From 00cc83cde8d03e85539ee06fbb3873ab80357a4f Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Wed, 18 Jan 2023 05:04:16 -0500 Subject: [PATCH 001/116] First version of AD/AND with memory duplication --- base/modules/serial/psb_c_mat_mod.F90 | 1 + base/modules/serial/psb_d_mat_mod.F90 | 1 + base/modules/serial/psb_s_mat_mod.F90 | 1 + base/modules/serial/psb_z_mat_mod.F90 | 1 + base/psblas/psb_cspmm.f90 | 21 ++++++++++++++----- base/psblas/psb_dspmm.f90 | 21 ++++++++++++++----- base/psblas/psb_sspmm.f90 | 21 ++++++++++++++----- base/psblas/psb_zspmm.f90 | 21 ++++++++++++++----- base/tools/psb_cspasb.f90 | 30 ++++++++++++++++++++++++++- base/tools/psb_dspasb.f90 | 30 ++++++++++++++++++++++++++- base/tools/psb_sspasb.f90 | 30 ++++++++++++++++++++++++++- base/tools/psb_zspasb.f90 | 30 ++++++++++++++++++++++++++- compile | 0 test/pargen/runs/ppde.inp | 2 +- 14 files changed, 185 insertions(+), 25 deletions(-) create mode 100644 compile diff --git a/base/modules/serial/psb_c_mat_mod.F90 b/base/modules/serial/psb_c_mat_mod.F90 index fd423de3..2e365858 100644 --- a/base/modules/serial/psb_c_mat_mod.F90 +++ b/base/modules/serial/psb_c_mat_mod.F90 @@ -85,6 +85,7 @@ module psb_c_mat_mod type :: psb_cspmat_type class(psb_c_base_sparse_mat), allocatable :: a + class(psb_c_base_sparse_mat), allocatable :: ad, and integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ type(psb_lc_coo_sparse_mat), allocatable :: rmta diff --git a/base/modules/serial/psb_d_mat_mod.F90 b/base/modules/serial/psb_d_mat_mod.F90 index 8f967ce1..49a9545e 100644 --- a/base/modules/serial/psb_d_mat_mod.F90 +++ b/base/modules/serial/psb_d_mat_mod.F90 @@ -85,6 +85,7 @@ module psb_d_mat_mod type :: psb_dspmat_type class(psb_d_base_sparse_mat), allocatable :: a + class(psb_d_base_sparse_mat), allocatable :: ad, and integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ type(psb_ld_coo_sparse_mat), allocatable :: rmta diff --git a/base/modules/serial/psb_s_mat_mod.F90 b/base/modules/serial/psb_s_mat_mod.F90 index 43f1c619..eb444249 100644 --- a/base/modules/serial/psb_s_mat_mod.F90 +++ b/base/modules/serial/psb_s_mat_mod.F90 @@ -85,6 +85,7 @@ module psb_s_mat_mod type :: psb_sspmat_type class(psb_s_base_sparse_mat), allocatable :: a + class(psb_s_base_sparse_mat), allocatable :: ad, and integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ type(psb_ls_coo_sparse_mat), allocatable :: rmta diff --git a/base/modules/serial/psb_z_mat_mod.F90 b/base/modules/serial/psb_z_mat_mod.F90 index c534cad5..e70e48aa 100644 --- a/base/modules/serial/psb_z_mat_mod.F90 +++ b/base/modules/serial/psb_z_mat_mod.F90 @@ -85,6 +85,7 @@ module psb_z_mat_mod type :: psb_zspmat_type class(psb_z_base_sparse_mat), allocatable :: a + class(psb_z_base_sparse_mat), allocatable :: ad, and integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ type(psb_lz_coo_sparse_mat), allocatable :: rmta diff --git a/base/psblas/psb_cspmm.f90 b/base/psblas/psb_cspmm.f90 index fd8a9c39..555461df 100644 --- a/base/psblas/psb_cspmm.f90 +++ b/base/psblas/psb_cspmm.f90 @@ -179,13 +179,24 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + if (.true.) then + call psi_swapdata(psb_swap_send_,& & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + call psi_swapdata(psb_swap_recv_,& + & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + call a%and%spmm(alpha,x%v,cone,y%v,info) + + else + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + + call psb_csmm(alpha,a,x,beta,y,info) + end if - - call psb_csmm(alpha,a,x,beta,y,info) - + if(info /= psb_success_) then info = psb_err_from_subroutine_non_ call psb_errpush(info,name) diff --git a/base/psblas/psb_dspmm.f90 b/base/psblas/psb_dspmm.f90 index a006c7e9..be8a493f 100644 --- a/base/psblas/psb_dspmm.f90 +++ b/base/psblas/psb_dspmm.f90 @@ -179,13 +179,24 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + if (.true.) then + call psi_swapdata(psb_swap_send_,& & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + call psi_swapdata(psb_swap_recv_,& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + call a%and%spmm(alpha,x%v,done,y%v,info) + + else + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + + call psb_csmm(alpha,a,x,beta,y,info) + end if - - call psb_csmm(alpha,a,x,beta,y,info) - + if(info /= psb_success_) then info = psb_err_from_subroutine_non_ call psb_errpush(info,name) diff --git a/base/psblas/psb_sspmm.f90 b/base/psblas/psb_sspmm.f90 index 43ee0d48..79bfbdd1 100644 --- a/base/psblas/psb_sspmm.f90 +++ b/base/psblas/psb_sspmm.f90 @@ -179,13 +179,24 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + if (.true.) then + call psi_swapdata(psb_swap_send_,& & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + call psi_swapdata(psb_swap_recv_,& + & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + call a%and%spmm(alpha,x%v,sone,y%v,info) + + else + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + + call psb_csmm(alpha,a,x,beta,y,info) + end if - - call psb_csmm(alpha,a,x,beta,y,info) - + if(info /= psb_success_) then info = psb_err_from_subroutine_non_ call psb_errpush(info,name) diff --git a/base/psblas/psb_zspmm.f90 b/base/psblas/psb_zspmm.f90 index b58ca303..f248db8b 100644 --- a/base/psblas/psb_zspmm.f90 +++ b/base/psblas/psb_zspmm.f90 @@ -179,13 +179,24 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + if (.true.) then + call psi_swapdata(psb_swap_send_,& & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + call psi_swapdata(psb_swap_recv_,& + & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + call a%and%spmm(alpha,x%v,zone,y%v,info) + + else + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + + call psb_csmm(alpha,a,x,beta,y,info) + end if - - call psb_csmm(alpha,a,x,beta,y,info) - + if(info /= psb_success_) then info = psb_err_from_subroutine_non_ call psb_errpush(info,name) diff --git a/base/tools/psb_cspasb.f90 b/base/tools/psb_cspasb.f90 index 0c5f14ab..ea7789f2 100644 --- a/base/tools/psb_cspasb.f90 +++ b/base/tools/psb_cspasb.f90 @@ -171,7 +171,35 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) end if - + if (.true.) then + block + character(len=1024) :: fname + type(psb_c_coo_sparse_mat) :: acoo + type(psb_c_csr_sparse_mat), allocatable :: aclip, andclip + allocate(aclip,andclip) + call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) + call aclip%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) + call andclip%mv_from_coo(acoo,info) + call move_alloc(aclip,a%ad) + call move_alloc(andclip,a%and) + if (.false.) then + write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' + open(25,file=fname) + call a%ad%print(25) + close(25) + write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' + open(25,file=fname) + call a%and%print(25) + close(25) + !call andclip%set_cols(n_col) + write(*,*) me,' ',trim(name),' ad ',& + &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col + write(*,*) me,' ',trim(name),' and ',& + &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col + end if + end block + end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() write(debug_unit, *) me,' ',trim(name),': From SPCNV',& diff --git a/base/tools/psb_dspasb.f90 b/base/tools/psb_dspasb.f90 index 3132f249..89ceef8d 100644 --- a/base/tools/psb_dspasb.f90 +++ b/base/tools/psb_dspasb.f90 @@ -171,7 +171,35 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) end if - + if (.true.) then + block + character(len=1024) :: fname + type(psb_d_coo_sparse_mat) :: acoo + type(psb_d_csr_sparse_mat), allocatable :: aclip, andclip + allocate(aclip,andclip) + call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) + call aclip%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) + call andclip%mv_from_coo(acoo,info) + call move_alloc(aclip,a%ad) + call move_alloc(andclip,a%and) + if (.false.) then + write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' + open(25,file=fname) + call a%ad%print(25) + close(25) + write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' + open(25,file=fname) + call a%and%print(25) + close(25) + !call andclip%set_cols(n_col) + write(*,*) me,' ',trim(name),' ad ',& + &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col + write(*,*) me,' ',trim(name),' and ',& + &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col + end if + end block + end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() write(debug_unit, *) me,' ',trim(name),': From SPCNV',& diff --git a/base/tools/psb_sspasb.f90 b/base/tools/psb_sspasb.f90 index cfa316eb..14ad5246 100644 --- a/base/tools/psb_sspasb.f90 +++ b/base/tools/psb_sspasb.f90 @@ -171,7 +171,35 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) end if - + if (.true.) then + block + character(len=1024) :: fname + type(psb_s_coo_sparse_mat) :: acoo + type(psb_s_csr_sparse_mat), allocatable :: aclip, andclip + allocate(aclip,andclip) + call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) + call aclip%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) + call andclip%mv_from_coo(acoo,info) + call move_alloc(aclip,a%ad) + call move_alloc(andclip,a%and) + if (.false.) then + write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' + open(25,file=fname) + call a%ad%print(25) + close(25) + write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' + open(25,file=fname) + call a%and%print(25) + close(25) + !call andclip%set_cols(n_col) + write(*,*) me,' ',trim(name),' ad ',& + &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col + write(*,*) me,' ',trim(name),' and ',& + &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col + end if + end block + end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() write(debug_unit, *) me,' ',trim(name),': From SPCNV',& diff --git a/base/tools/psb_zspasb.f90 b/base/tools/psb_zspasb.f90 index aeeef94d..f65be363 100644 --- a/base/tools/psb_zspasb.f90 +++ b/base/tools/psb_zspasb.f90 @@ -171,7 +171,35 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) end if - + if (.true.) then + block + character(len=1024) :: fname + type(psb_z_coo_sparse_mat) :: acoo + type(psb_z_csr_sparse_mat), allocatable :: aclip, andclip + allocate(aclip,andclip) + call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) + call aclip%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) + call andclip%mv_from_coo(acoo,info) + call move_alloc(aclip,a%ad) + call move_alloc(andclip,a%and) + if (.false.) then + write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' + open(25,file=fname) + call a%ad%print(25) + close(25) + write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' + open(25,file=fname) + call a%and%print(25) + close(25) + !call andclip%set_cols(n_col) + write(*,*) me,' ',trim(name),' ad ',& + &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col + write(*,*) me,' ',trim(name),' and ',& + &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col + end if + end block + end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() write(debug_unit, *) me,' ',trim(name),': From SPCNV',& diff --git a/compile b/compile new file mode 100644 index 00000000..e69de29b diff --git a/test/pargen/runs/ppde.inp b/test/pargen/runs/ppde.inp index e7e5dca2..5f040075 100644 --- a/test/pargen/runs/ppde.inp +++ b/test/pargen/runs/ppde.inp @@ -2,7 +2,7 @@ BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES FCG CGR BJAC Preconditioner NONE DIAG BJAC CSR Storage format for matrix A: CSR COO -040 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) +140 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) 3 Partition: 1 BLOCK 3 3D 2 Stopping criterion 1 2 0100 MAXIT From f09e25524ed30ab5f783ac2d6efbd1684717d317 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Wed, 18 Jan 2023 09:17:49 -0500 Subject: [PATCH 002/116] Create ECSR format and use it for A%AND --- base/modules/serial/psb_c_csr_mat_mod.f90 | 126 ++++++++++- base/modules/serial/psb_c_mat_mod.F90 | 3 +- base/modules/serial/psb_d_csr_mat_mod.f90 | 126 ++++++++++- base/modules/serial/psb_d_mat_mod.F90 | 3 +- base/modules/serial/psb_s_csr_mat_mod.f90 | 126 ++++++++++- base/modules/serial/psb_s_mat_mod.F90 | 3 +- base/modules/serial/psb_z_csr_mat_mod.f90 | 126 ++++++++++- base/modules/serial/psb_z_mat_mod.F90 | 3 +- base/serial/impl/psb_c_csr_impl.f90 | 263 ++++++++++++++++++++++ base/serial/impl/psb_d_csr_impl.f90 | 263 ++++++++++++++++++++++ base/serial/impl/psb_s_csr_impl.f90 | 263 ++++++++++++++++++++++ base/serial/impl/psb_z_csr_impl.f90 | 263 ++++++++++++++++++++++ base/tools/psb_cspasb.f90 | 7 +- base/tools/psb_dspasb.f90 | 7 +- base/tools/psb_sspasb.f90 | 7 +- base/tools/psb_zspasb.f90 | 7 +- test/pargen/runs/ppde.inp | 2 +- 17 files changed, 1577 insertions(+), 21 deletions(-) diff --git a/base/modules/serial/psb_c_csr_mat_mod.f90 b/base/modules/serial/psb_c_csr_mat_mod.f90 index 8b076cc2..d09eca2b 100644 --- a/base/modules/serial/psb_c_csr_mat_mod.f90 +++ b/base/modules/serial/psb_c_csr_mat_mod.f90 @@ -579,7 +579,111 @@ module psb_c_csr_mat_mod end subroutine psb_c_csr_scals end interface - !> \namespace psb_base_mod \class psb_lc_csr_sparse_mat + + type, extends(psb_c_csr_sparse_mat) :: psb_c_ecsr_sparse_mat + + !> Number of non-empty rows + integer(psb_ipk_) :: nnerws + !> Indices of non-empty rows + integer(psb_ipk_), allocatable :: nerwp(:) + + contains + procedure, nopass :: get_fmt => c_ecsr_get_fmt + + ! procedure, pass(a) :: csmm => psb_c_ecsr_csmm + procedure, pass(a) :: csmv => psb_c_ecsr_csmv + + procedure, pass(a) :: cp_from_coo => psb_c_cp_ecsr_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cp_ecsr_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_mv_ecsr_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_mv_ecsr_from_fmt + + procedure, pass(a) :: cmp_nerwp => psb_c_ecsr_cmp_nerwp + procedure, pass(a) :: free => c_ecsr_free + procedure, pass(a) :: mold => psb_c_ecsr_mold + + end type psb_c_ecsr_sparse_mat + !> \memberof psb_c_ecsr_sparse_mat + !! \see psb_c_base_mat_mod::psb_c_base_csmv + interface + subroutine psb_c_ecsr_csmv(alpha,a,x,beta,y,info,trans) + import + class(psb_c_ecsr_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_ecsr_csmv + end interface + + !> \memberof psb_c_ecsr_sparse_mat + !! \see psb_c_base_mat_mod::psb_c_base_cp_from_coo + interface + subroutine psb_c_ecsr_cmp_nerwp(a,info) + import + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_ecsr_cmp_nerwp + end interface + + !> \memberof psb_c_ecsr_sparse_mat + !! \see psb_c_base_mat_mod::psb_c_base_cp_from_coo + interface + subroutine psb_c_cp_ecsr_from_coo(a,b,info) + import + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_ecsr_from_coo + end interface + + !> \memberof psb_c_ecsr_sparse_mat + !! \see psb_c_base_mat_mod::psb_c_base_cp_from_fmt + interface + subroutine psb_c_cp_ecsr_from_fmt(a,b,info) + import + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_ecsr_from_fmt + end interface + + !> \memberof psb_c_ecsr_sparse_mat + !! \see psb_c_base_mat_mod::psb_c_base_mv_from_coo + interface + subroutine psb_c_mv_ecsr_from_coo(a,b,info) + import + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_ecsr_from_coo + end interface + + !> \memberof psb_c_ecsr_sparse_mat + !! \see psb_c_base_mat_mod::psb_c_base_mv_from_fmt + interface + subroutine psb_c_mv_ecsr_from_fmt(a,b,info) + import + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_ecsr_from_fmt + end interface + + !> \memberof psb_c_ecsr_sparse_mat + !| \see psb_base_mat_mod::psb_base_mold + interface + subroutine psb_c_ecsr_mold(a,b,info) + import + class(psb_c_ecsr_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_ecsr_mold + end interface + + + + !> \namespace psb_base_mod \class psb_lc_csr_sparse_mat !! \extends psb_lc_base_mat_mod::psb_lc_base_sparse_mat !! !! psb_lc_csr_sparse_mat type and the related methods. @@ -1178,6 +1282,26 @@ contains + function c_ecsr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ECSR' + end function c_ecsr_get_fmt + + subroutine c_ecsr_free(a) + implicit none + + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + + + if (allocated(a%nerwp)) deallocate(a%nerwp) + a%nnerws = 0 + call a%psb_c_csr_sparse_mat%free() + + return + end subroutine c_ecsr_free + + ! == =================================== ! ! diff --git a/base/modules/serial/psb_c_mat_mod.F90 b/base/modules/serial/psb_c_mat_mod.F90 index 2e365858..aa891381 100644 --- a/base/modules/serial/psb_c_mat_mod.F90 +++ b/base/modules/serial/psb_c_mat_mod.F90 @@ -79,7 +79,8 @@ module psb_c_mat_mod use psb_c_base_mat_mod - use psb_c_csr_mat_mod, only : psb_c_csr_sparse_mat, psb_lc_csr_sparse_mat + use psb_c_csr_mat_mod, only : psb_c_csr_sparse_mat, psb_lc_csr_sparse_mat,& + & psb_c_ecsr_sparse_mat use psb_c_csc_mat_mod, only : psb_c_csc_sparse_mat, psb_lc_csc_sparse_mat type :: psb_cspmat_type diff --git a/base/modules/serial/psb_d_csr_mat_mod.f90 b/base/modules/serial/psb_d_csr_mat_mod.f90 index d0aa622b..12d71755 100644 --- a/base/modules/serial/psb_d_csr_mat_mod.f90 +++ b/base/modules/serial/psb_d_csr_mat_mod.f90 @@ -579,7 +579,111 @@ module psb_d_csr_mat_mod end subroutine psb_d_csr_scals end interface - !> \namespace psb_base_mod \class psb_ld_csr_sparse_mat + + type, extends(psb_d_csr_sparse_mat) :: psb_d_ecsr_sparse_mat + + !> Number of non-empty rows + integer(psb_ipk_) :: nnerws + !> Indices of non-empty rows + integer(psb_ipk_), allocatable :: nerwp(:) + + contains + procedure, nopass :: get_fmt => d_ecsr_get_fmt + + ! procedure, pass(a) :: csmm => psb_d_ecsr_csmm + procedure, pass(a) :: csmv => psb_d_ecsr_csmv + + procedure, pass(a) :: cp_from_coo => psb_d_cp_ecsr_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cp_ecsr_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_mv_ecsr_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_mv_ecsr_from_fmt + + procedure, pass(a) :: cmp_nerwp => psb_d_ecsr_cmp_nerwp + procedure, pass(a) :: free => d_ecsr_free + procedure, pass(a) :: mold => psb_d_ecsr_mold + + end type psb_d_ecsr_sparse_mat + !> \memberof psb_d_ecsr_sparse_mat + !! \see psb_d_base_mat_mod::psb_d_base_csmv + interface + subroutine psb_d_ecsr_csmv(alpha,a,x,beta,y,info,trans) + import + class(psb_d_ecsr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_ecsr_csmv + end interface + + !> \memberof psb_d_ecsr_sparse_mat + !! \see psb_d_base_mat_mod::psb_d_base_cp_from_coo + interface + subroutine psb_d_ecsr_cmp_nerwp(a,info) + import + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_ecsr_cmp_nerwp + end interface + + !> \memberof psb_d_ecsr_sparse_mat + !! \see psb_d_base_mat_mod::psb_d_base_cp_from_coo + interface + subroutine psb_d_cp_ecsr_from_coo(a,b,info) + import + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_ecsr_from_coo + end interface + + !> \memberof psb_d_ecsr_sparse_mat + !! \see psb_d_base_mat_mod::psb_d_base_cp_from_fmt + interface + subroutine psb_d_cp_ecsr_from_fmt(a,b,info) + import + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_ecsr_from_fmt + end interface + + !> \memberof psb_d_ecsr_sparse_mat + !! \see psb_d_base_mat_mod::psb_d_base_mv_from_coo + interface + subroutine psb_d_mv_ecsr_from_coo(a,b,info) + import + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_ecsr_from_coo + end interface + + !> \memberof psb_d_ecsr_sparse_mat + !! \see psb_d_base_mat_mod::psb_d_base_mv_from_fmt + interface + subroutine psb_d_mv_ecsr_from_fmt(a,b,info) + import + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_ecsr_from_fmt + end interface + + !> \memberof psb_d_ecsr_sparse_mat + !| \see psb_base_mat_mod::psb_base_mold + interface + subroutine psb_d_ecsr_mold(a,b,info) + import + class(psb_d_ecsr_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_ecsr_mold + end interface + + + + !> \namespace psb_base_mod \class psb_ld_csr_sparse_mat !! \extends psb_ld_base_mat_mod::psb_ld_base_sparse_mat !! !! psb_ld_csr_sparse_mat type and the related methods. @@ -1178,6 +1282,26 @@ contains + function d_ecsr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ECSR' + end function d_ecsr_get_fmt + + subroutine d_ecsr_free(a) + implicit none + + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + + + if (allocated(a%nerwp)) deallocate(a%nerwp) + a%nnerws = 0 + call a%psb_d_csr_sparse_mat%free() + + return + end subroutine d_ecsr_free + + ! == =================================== ! ! diff --git a/base/modules/serial/psb_d_mat_mod.F90 b/base/modules/serial/psb_d_mat_mod.F90 index 49a9545e..c647e76b 100644 --- a/base/modules/serial/psb_d_mat_mod.F90 +++ b/base/modules/serial/psb_d_mat_mod.F90 @@ -79,7 +79,8 @@ module psb_d_mat_mod use psb_d_base_mat_mod - use psb_d_csr_mat_mod, only : psb_d_csr_sparse_mat, psb_ld_csr_sparse_mat + use psb_d_csr_mat_mod, only : psb_d_csr_sparse_mat, psb_ld_csr_sparse_mat,& + & psb_d_ecsr_sparse_mat use psb_d_csc_mat_mod, only : psb_d_csc_sparse_mat, psb_ld_csc_sparse_mat type :: psb_dspmat_type diff --git a/base/modules/serial/psb_s_csr_mat_mod.f90 b/base/modules/serial/psb_s_csr_mat_mod.f90 index 6b4c51c7..884ede38 100644 --- a/base/modules/serial/psb_s_csr_mat_mod.f90 +++ b/base/modules/serial/psb_s_csr_mat_mod.f90 @@ -579,7 +579,111 @@ module psb_s_csr_mat_mod end subroutine psb_s_csr_scals end interface - !> \namespace psb_base_mod \class psb_ls_csr_sparse_mat + + type, extends(psb_s_csr_sparse_mat) :: psb_s_ecsr_sparse_mat + + !> Number of non-empty rows + integer(psb_ipk_) :: nnerws + !> Indices of non-empty rows + integer(psb_ipk_), allocatable :: nerwp(:) + + contains + procedure, nopass :: get_fmt => s_ecsr_get_fmt + + ! procedure, pass(a) :: csmm => psb_s_ecsr_csmm + procedure, pass(a) :: csmv => psb_s_ecsr_csmv + + procedure, pass(a) :: cp_from_coo => psb_s_cp_ecsr_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cp_ecsr_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_mv_ecsr_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_mv_ecsr_from_fmt + + procedure, pass(a) :: cmp_nerwp => psb_s_ecsr_cmp_nerwp + procedure, pass(a) :: free => s_ecsr_free + procedure, pass(a) :: mold => psb_s_ecsr_mold + + end type psb_s_ecsr_sparse_mat + !> \memberof psb_s_ecsr_sparse_mat + !! \see psb_s_base_mat_mod::psb_s_base_csmv + interface + subroutine psb_s_ecsr_csmv(alpha,a,x,beta,y,info,trans) + import + class(psb_s_ecsr_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_ecsr_csmv + end interface + + !> \memberof psb_s_ecsr_sparse_mat + !! \see psb_s_base_mat_mod::psb_s_base_cp_from_coo + interface + subroutine psb_s_ecsr_cmp_nerwp(a,info) + import + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_ecsr_cmp_nerwp + end interface + + !> \memberof psb_s_ecsr_sparse_mat + !! \see psb_s_base_mat_mod::psb_s_base_cp_from_coo + interface + subroutine psb_s_cp_ecsr_from_coo(a,b,info) + import + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_ecsr_from_coo + end interface + + !> \memberof psb_s_ecsr_sparse_mat + !! \see psb_s_base_mat_mod::psb_s_base_cp_from_fmt + interface + subroutine psb_s_cp_ecsr_from_fmt(a,b,info) + import + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_ecsr_from_fmt + end interface + + !> \memberof psb_s_ecsr_sparse_mat + !! \see psb_s_base_mat_mod::psb_s_base_mv_from_coo + interface + subroutine psb_s_mv_ecsr_from_coo(a,b,info) + import + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_ecsr_from_coo + end interface + + !> \memberof psb_s_ecsr_sparse_mat + !! \see psb_s_base_mat_mod::psb_s_base_mv_from_fmt + interface + subroutine psb_s_mv_ecsr_from_fmt(a,b,info) + import + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_ecsr_from_fmt + end interface + + !> \memberof psb_s_ecsr_sparse_mat + !| \see psb_base_mat_mod::psb_base_mold + interface + subroutine psb_s_ecsr_mold(a,b,info) + import + class(psb_s_ecsr_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_ecsr_mold + end interface + + + + !> \namespace psb_base_mod \class psb_ls_csr_sparse_mat !! \extends psb_ls_base_mat_mod::psb_ls_base_sparse_mat !! !! psb_ls_csr_sparse_mat type and the related methods. @@ -1178,6 +1282,26 @@ contains + function s_ecsr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ECSR' + end function s_ecsr_get_fmt + + subroutine s_ecsr_free(a) + implicit none + + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + + + if (allocated(a%nerwp)) deallocate(a%nerwp) + a%nnerws = 0 + call a%psb_s_csr_sparse_mat%free() + + return + end subroutine s_ecsr_free + + ! == =================================== ! ! diff --git a/base/modules/serial/psb_s_mat_mod.F90 b/base/modules/serial/psb_s_mat_mod.F90 index eb444249..3e6b286a 100644 --- a/base/modules/serial/psb_s_mat_mod.F90 +++ b/base/modules/serial/psb_s_mat_mod.F90 @@ -79,7 +79,8 @@ module psb_s_mat_mod use psb_s_base_mat_mod - use psb_s_csr_mat_mod, only : psb_s_csr_sparse_mat, psb_ls_csr_sparse_mat + use psb_s_csr_mat_mod, only : psb_s_csr_sparse_mat, psb_ls_csr_sparse_mat,& + & psb_s_ecsr_sparse_mat use psb_s_csc_mat_mod, only : psb_s_csc_sparse_mat, psb_ls_csc_sparse_mat type :: psb_sspmat_type diff --git a/base/modules/serial/psb_z_csr_mat_mod.f90 b/base/modules/serial/psb_z_csr_mat_mod.f90 index 4ec8dd00..c328fead 100644 --- a/base/modules/serial/psb_z_csr_mat_mod.f90 +++ b/base/modules/serial/psb_z_csr_mat_mod.f90 @@ -579,7 +579,111 @@ module psb_z_csr_mat_mod end subroutine psb_z_csr_scals end interface - !> \namespace psb_base_mod \class psb_lz_csr_sparse_mat + + type, extends(psb_z_csr_sparse_mat) :: psb_z_ecsr_sparse_mat + + !> Number of non-empty rows + integer(psb_ipk_) :: nnerws + !> Indices of non-empty rows + integer(psb_ipk_), allocatable :: nerwp(:) + + contains + procedure, nopass :: get_fmt => z_ecsr_get_fmt + + ! procedure, pass(a) :: csmm => psb_z_ecsr_csmm + procedure, pass(a) :: csmv => psb_z_ecsr_csmv + + procedure, pass(a) :: cp_from_coo => psb_z_cp_ecsr_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cp_ecsr_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_mv_ecsr_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_mv_ecsr_from_fmt + + procedure, pass(a) :: cmp_nerwp => psb_z_ecsr_cmp_nerwp + procedure, pass(a) :: free => z_ecsr_free + procedure, pass(a) :: mold => psb_z_ecsr_mold + + end type psb_z_ecsr_sparse_mat + !> \memberof psb_z_ecsr_sparse_mat + !! \see psb_z_base_mat_mod::psb_z_base_csmv + interface + subroutine psb_z_ecsr_csmv(alpha,a,x,beta,y,info,trans) + import + class(psb_z_ecsr_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_ecsr_csmv + end interface + + !> \memberof psb_z_ecsr_sparse_mat + !! \see psb_z_base_mat_mod::psb_z_base_cp_from_coo + interface + subroutine psb_z_ecsr_cmp_nerwp(a,info) + import + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_ecsr_cmp_nerwp + end interface + + !> \memberof psb_z_ecsr_sparse_mat + !! \see psb_z_base_mat_mod::psb_z_base_cp_from_coo + interface + subroutine psb_z_cp_ecsr_from_coo(a,b,info) + import + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_ecsr_from_coo + end interface + + !> \memberof psb_z_ecsr_sparse_mat + !! \see psb_z_base_mat_mod::psb_z_base_cp_from_fmt + interface + subroutine psb_z_cp_ecsr_from_fmt(a,b,info) + import + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_ecsr_from_fmt + end interface + + !> \memberof psb_z_ecsr_sparse_mat + !! \see psb_z_base_mat_mod::psb_z_base_mv_from_coo + interface + subroutine psb_z_mv_ecsr_from_coo(a,b,info) + import + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_ecsr_from_coo + end interface + + !> \memberof psb_z_ecsr_sparse_mat + !! \see psb_z_base_mat_mod::psb_z_base_mv_from_fmt + interface + subroutine psb_z_mv_ecsr_from_fmt(a,b,info) + import + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_ecsr_from_fmt + end interface + + !> \memberof psb_z_ecsr_sparse_mat + !| \see psb_base_mat_mod::psb_base_mold + interface + subroutine psb_z_ecsr_mold(a,b,info) + import + class(psb_z_ecsr_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_ecsr_mold + end interface + + + + !> \namespace psb_base_mod \class psb_lz_csr_sparse_mat !! \extends psb_lz_base_mat_mod::psb_lz_base_sparse_mat !! !! psb_lz_csr_sparse_mat type and the related methods. @@ -1178,6 +1282,26 @@ contains + function z_ecsr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ECSR' + end function z_ecsr_get_fmt + + subroutine z_ecsr_free(a) + implicit none + + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + + + if (allocated(a%nerwp)) deallocate(a%nerwp) + a%nnerws = 0 + call a%psb_z_csr_sparse_mat%free() + + return + end subroutine z_ecsr_free + + ! == =================================== ! ! diff --git a/base/modules/serial/psb_z_mat_mod.F90 b/base/modules/serial/psb_z_mat_mod.F90 index e70e48aa..148e9ab9 100644 --- a/base/modules/serial/psb_z_mat_mod.F90 +++ b/base/modules/serial/psb_z_mat_mod.F90 @@ -79,7 +79,8 @@ module psb_z_mat_mod use psb_z_base_mat_mod - use psb_z_csr_mat_mod, only : psb_z_csr_sparse_mat, psb_lz_csr_sparse_mat + use psb_z_csr_mat_mod, only : psb_z_csr_sparse_mat, psb_lz_csr_sparse_mat,& + & psb_z_ecsr_sparse_mat use psb_z_csc_mat_mod, only : psb_z_csc_sparse_mat, psb_lz_csc_sparse_mat type :: psb_zspmat_type diff --git a/base/serial/impl/psb_c_csr_impl.f90 b/base/serial/impl/psb_c_csr_impl.f90 index 55a91648..1fed09ba 100644 --- a/base/serial/impl/psb_c_csr_impl.f90 +++ b/base/serial/impl/psb_c_csr_impl.f90 @@ -3550,6 +3550,269 @@ contains end subroutine psb_ccsrspspmm +subroutine psb_c_ecsr_mold(a,b,info) + use psb_c_csr_mat_mod, psb_protect_name => psb_c_ecsr_mold + use psb_error_mod + implicit none + class(psb_c_ecsr_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='ecsr_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_ecsr_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ecsr_mold + +subroutine psb_c_ecsr_csmv(alpha,a,x,beta,y,info,trans) + use psb_error_mod + use psb_string_mod + use psb_c_csr_mat_mod, psb_protect_name => psb_c_ecsr_csmv + implicit none + class(psb_c_ecsr_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: m, n + logical :: tra, ctra + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='c_csr_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_ecsr_cmp_nerwp + implicit none + + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: nnerws, i, nr, nzr + info = psb_success_ + nr = a%get_nrows() + call psb_realloc(nr,a%nerwp,info) + nnerws = 0 + do i=1, nr + nzr = a%irp(i+1)-a%irp(i) + if (nzr>0) then + nnerws = nnerws + 1 + a%nerwp(nnerws) = i + end if + end do + call psb_realloc(nnerws,a%nerwp,info) +end subroutine psb_c_ecsr_cmp_nerwp + +subroutine psb_c_cp_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_c_base_mat_mod + use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_ecsr_from_coo + implicit none + + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + call a%psb_c_csr_sparse_mat%cp_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_c_cp_ecsr_from_coo + +subroutine psb_c_mv_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_error_mod + use psb_c_base_mat_mod + use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_ecsr_from_coo + implicit none + + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_c_csr_sparse_mat%mv_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_c_mv_ecsr_from_coo + +subroutine psb_c_mv_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_c_base_mat_mod + use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_ecsr_from_fmt + implicit none + + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_c_csr_sparse_mat%mv_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_c_mv_ecsr_from_fmt + +subroutine psb_c_cp_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_c_base_mat_mod + use psb_realloc_mod + use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_ecsr_from_fmt + implicit none + + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_c_cp_ecsr_from_fmt + ! ! diff --git a/base/serial/impl/psb_d_csr_impl.f90 b/base/serial/impl/psb_d_csr_impl.f90 index 2c59c1a5..1bcc82a9 100644 --- a/base/serial/impl/psb_d_csr_impl.f90 +++ b/base/serial/impl/psb_d_csr_impl.f90 @@ -3550,6 +3550,269 @@ contains end subroutine psb_dcsrspspmm +subroutine psb_d_ecsr_mold(a,b,info) + use psb_d_csr_mat_mod, psb_protect_name => psb_d_ecsr_mold + use psb_error_mod + implicit none + class(psb_d_ecsr_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='ecsr_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_ecsr_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ecsr_mold + +subroutine psb_d_ecsr_csmv(alpha,a,x,beta,y,info,trans) + use psb_error_mod + use psb_string_mod + use psb_d_csr_mat_mod, psb_protect_name => psb_d_ecsr_csmv + implicit none + class(psb_d_ecsr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: m, n + logical :: tra, ctra + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='d_csr_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_ecsr_cmp_nerwp + implicit none + + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: nnerws, i, nr, nzr + info = psb_success_ + nr = a%get_nrows() + call psb_realloc(nr,a%nerwp,info) + nnerws = 0 + do i=1, nr + nzr = a%irp(i+1)-a%irp(i) + if (nzr>0) then + nnerws = nnerws + 1 + a%nerwp(nnerws) = i + end if + end do + call psb_realloc(nnerws,a%nerwp,info) +end subroutine psb_d_ecsr_cmp_nerwp + +subroutine psb_d_cp_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_d_base_mat_mod + use psb_d_csr_mat_mod, psb_protect_name => psb_d_cp_ecsr_from_coo + implicit none + + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + call a%psb_d_csr_sparse_mat%cp_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_d_cp_ecsr_from_coo + +subroutine psb_d_mv_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_error_mod + use psb_d_base_mat_mod + use psb_d_csr_mat_mod, psb_protect_name => psb_d_mv_ecsr_from_coo + implicit none + + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_d_csr_sparse_mat%mv_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_d_mv_ecsr_from_coo + +subroutine psb_d_mv_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_d_base_mat_mod + use psb_d_csr_mat_mod, psb_protect_name => psb_d_mv_ecsr_from_fmt + implicit none + + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_d_csr_sparse_mat%mv_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_d_mv_ecsr_from_fmt + +subroutine psb_d_cp_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_d_base_mat_mod + use psb_realloc_mod + use psb_d_csr_mat_mod, psb_protect_name => psb_d_cp_ecsr_from_fmt + implicit none + + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_d_csr_sparse_mat%cp_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_d_cp_ecsr_from_fmt + ! ! diff --git a/base/serial/impl/psb_s_csr_impl.f90 b/base/serial/impl/psb_s_csr_impl.f90 index 75358dbc..9670aeb9 100644 --- a/base/serial/impl/psb_s_csr_impl.f90 +++ b/base/serial/impl/psb_s_csr_impl.f90 @@ -3550,6 +3550,269 @@ contains end subroutine psb_scsrspspmm +subroutine psb_s_ecsr_mold(a,b,info) + use psb_s_csr_mat_mod, psb_protect_name => psb_s_ecsr_mold + use psb_error_mod + implicit none + class(psb_s_ecsr_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='ecsr_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_ecsr_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ecsr_mold + +subroutine psb_s_ecsr_csmv(alpha,a,x,beta,y,info,trans) + use psb_error_mod + use psb_string_mod + use psb_s_csr_mat_mod, psb_protect_name => psb_s_ecsr_csmv + implicit none + class(psb_s_ecsr_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: m, n + logical :: tra, ctra + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='s_csr_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_ecsr_cmp_nerwp + implicit none + + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: nnerws, i, nr, nzr + info = psb_success_ + nr = a%get_nrows() + call psb_realloc(nr,a%nerwp,info) + nnerws = 0 + do i=1, nr + nzr = a%irp(i+1)-a%irp(i) + if (nzr>0) then + nnerws = nnerws + 1 + a%nerwp(nnerws) = i + end if + end do + call psb_realloc(nnerws,a%nerwp,info) +end subroutine psb_s_ecsr_cmp_nerwp + +subroutine psb_s_cp_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_s_base_mat_mod + use psb_s_csr_mat_mod, psb_protect_name => psb_s_cp_ecsr_from_coo + implicit none + + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + call a%psb_s_csr_sparse_mat%cp_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_s_cp_ecsr_from_coo + +subroutine psb_s_mv_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_error_mod + use psb_s_base_mat_mod + use psb_s_csr_mat_mod, psb_protect_name => psb_s_mv_ecsr_from_coo + implicit none + + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_s_csr_sparse_mat%mv_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_s_mv_ecsr_from_coo + +subroutine psb_s_mv_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_s_base_mat_mod + use psb_s_csr_mat_mod, psb_protect_name => psb_s_mv_ecsr_from_fmt + implicit none + + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_s_csr_sparse_mat%mv_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_s_mv_ecsr_from_fmt + +subroutine psb_s_cp_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_s_base_mat_mod + use psb_realloc_mod + use psb_s_csr_mat_mod, psb_protect_name => psb_s_cp_ecsr_from_fmt + implicit none + + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_s_csr_sparse_mat%cp_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_s_cp_ecsr_from_fmt + ! ! diff --git a/base/serial/impl/psb_z_csr_impl.f90 b/base/serial/impl/psb_z_csr_impl.f90 index 4f2693c0..e9847849 100644 --- a/base/serial/impl/psb_z_csr_impl.f90 +++ b/base/serial/impl/psb_z_csr_impl.f90 @@ -3550,6 +3550,269 @@ contains end subroutine psb_zcsrspspmm +subroutine psb_z_ecsr_mold(a,b,info) + use psb_z_csr_mat_mod, psb_protect_name => psb_z_ecsr_mold + use psb_error_mod + implicit none + class(psb_z_ecsr_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='ecsr_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_ecsr_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ecsr_mold + +subroutine psb_z_ecsr_csmv(alpha,a,x,beta,y,info,trans) + use psb_error_mod + use psb_string_mod + use psb_z_csr_mat_mod, psb_protect_name => psb_z_ecsr_csmv + implicit none + class(psb_z_ecsr_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: m, n + logical :: tra, ctra + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='z_csr_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_ecsr_cmp_nerwp + implicit none + + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: nnerws, i, nr, nzr + info = psb_success_ + nr = a%get_nrows() + call psb_realloc(nr,a%nerwp,info) + nnerws = 0 + do i=1, nr + nzr = a%irp(i+1)-a%irp(i) + if (nzr>0) then + nnerws = nnerws + 1 + a%nerwp(nnerws) = i + end if + end do + call psb_realloc(nnerws,a%nerwp,info) +end subroutine psb_z_ecsr_cmp_nerwp + +subroutine psb_z_cp_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_z_base_mat_mod + use psb_z_csr_mat_mod, psb_protect_name => psb_z_cp_ecsr_from_coo + implicit none + + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + call a%psb_z_csr_sparse_mat%cp_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_z_cp_ecsr_from_coo + +subroutine psb_z_mv_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_error_mod + use psb_z_base_mat_mod + use psb_z_csr_mat_mod, psb_protect_name => psb_z_mv_ecsr_from_coo + implicit none + + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_z_csr_sparse_mat%mv_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_z_mv_ecsr_from_coo + +subroutine psb_z_mv_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_z_base_mat_mod + use psb_z_csr_mat_mod, psb_protect_name => psb_z_mv_ecsr_from_fmt + implicit none + + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_z_csr_sparse_mat%mv_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_z_mv_ecsr_from_fmt + +subroutine psb_z_cp_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_z_base_mat_mod + use psb_realloc_mod + use psb_z_csr_mat_mod, psb_protect_name => psb_z_cp_ecsr_from_fmt + implicit none + + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_z_csr_sparse_mat%cp_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_z_cp_ecsr_from_fmt + ! ! diff --git a/base/tools/psb_cspasb.f90 b/base/tools/psb_cspasb.f90 index ea7789f2..b4c957b0 100644 --- a/base/tools/psb_cspasb.f90 +++ b/base/tools/psb_cspasb.f90 @@ -175,13 +175,14 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) block character(len=1024) :: fname type(psb_c_coo_sparse_mat) :: acoo - type(psb_c_csr_sparse_mat), allocatable :: aclip, andclip + type(psb_c_csr_sparse_mat), allocatable :: aclip + type(psb_c_ecsr_sparse_mat), allocatable :: andclip allocate(aclip,andclip) call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) - call aclip%mv_from_coo(acoo,info) + allocate(a%ad,mold=a%a) + call a%ad%mv_from_coo(acoo,info) call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) call andclip%mv_from_coo(acoo,info) - call move_alloc(aclip,a%ad) call move_alloc(andclip,a%and) if (.false.) then write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' diff --git a/base/tools/psb_dspasb.f90 b/base/tools/psb_dspasb.f90 index 89ceef8d..5ebc47e8 100644 --- a/base/tools/psb_dspasb.f90 +++ b/base/tools/psb_dspasb.f90 @@ -175,13 +175,14 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) block character(len=1024) :: fname type(psb_d_coo_sparse_mat) :: acoo - type(psb_d_csr_sparse_mat), allocatable :: aclip, andclip + type(psb_d_csr_sparse_mat), allocatable :: aclip + type(psb_d_ecsr_sparse_mat), allocatable :: andclip allocate(aclip,andclip) call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) - call aclip%mv_from_coo(acoo,info) + allocate(a%ad,mold=a%a) + call a%ad%mv_from_coo(acoo,info) call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) call andclip%mv_from_coo(acoo,info) - call move_alloc(aclip,a%ad) call move_alloc(andclip,a%and) if (.false.) then write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' diff --git a/base/tools/psb_sspasb.f90 b/base/tools/psb_sspasb.f90 index 14ad5246..5423c2a7 100644 --- a/base/tools/psb_sspasb.f90 +++ b/base/tools/psb_sspasb.f90 @@ -175,13 +175,14 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) block character(len=1024) :: fname type(psb_s_coo_sparse_mat) :: acoo - type(psb_s_csr_sparse_mat), allocatable :: aclip, andclip + type(psb_s_csr_sparse_mat), allocatable :: aclip + type(psb_s_ecsr_sparse_mat), allocatable :: andclip allocate(aclip,andclip) call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) - call aclip%mv_from_coo(acoo,info) + allocate(a%ad,mold=a%a) + call a%ad%mv_from_coo(acoo,info) call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) call andclip%mv_from_coo(acoo,info) - call move_alloc(aclip,a%ad) call move_alloc(andclip,a%and) if (.false.) then write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' diff --git a/base/tools/psb_zspasb.f90 b/base/tools/psb_zspasb.f90 index f65be363..66fc8cd7 100644 --- a/base/tools/psb_zspasb.f90 +++ b/base/tools/psb_zspasb.f90 @@ -175,13 +175,14 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) block character(len=1024) :: fname type(psb_z_coo_sparse_mat) :: acoo - type(psb_z_csr_sparse_mat), allocatable :: aclip, andclip + type(psb_z_csr_sparse_mat), allocatable :: aclip + type(psb_z_ecsr_sparse_mat), allocatable :: andclip allocate(aclip,andclip) call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) - call aclip%mv_from_coo(acoo,info) + allocate(a%ad,mold=a%a) + call a%ad%mv_from_coo(acoo,info) call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) call andclip%mv_from_coo(acoo,info) - call move_alloc(aclip,a%ad) call move_alloc(andclip,a%and) if (.false.) then write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' diff --git a/test/pargen/runs/ppde.inp b/test/pargen/runs/ppde.inp index 5f040075..57fda01a 100644 --- a/test/pargen/runs/ppde.inp +++ b/test/pargen/runs/ppde.inp @@ -2,7 +2,7 @@ BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES FCG CGR BJAC Preconditioner NONE DIAG BJAC CSR Storage format for matrix A: CSR COO -140 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) +100 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) 3 Partition: 1 BLOCK 3 3D 2 Stopping criterion 1 2 0100 MAXIT From 86b8a261efd23d244a034a2b1826cdc3ecae2c43 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Thu, 19 Jan 2023 08:36:22 -0500 Subject: [PATCH 003/116] Fixed conversion bug, changed SPASB interface --- base/modules/tools/psb_c_tools_mod.F90 | 3 ++- base/modules/tools/psb_d_tools_mod.F90 | 3 ++- base/modules/tools/psb_s_tools_mod.F90 | 3 ++- base/modules/tools/psb_z_tools_mod.F90 | 3 ++- base/psblas/psb_cspmm.f90 | 6 +++--- base/psblas/psb_dspmm.f90 | 6 +++--- base/psblas/psb_sspmm.f90 | 6 +++--- base/psblas/psb_zspmm.f90 | 6 +++--- base/serial/impl/psb_c_csr_impl.f90 | 9 +++------ base/serial/impl/psb_d_csr_impl.f90 | 9 +++------ base/serial/impl/psb_s_csr_impl.f90 | 9 +++------ base/serial/impl/psb_z_csr_impl.f90 | 9 +++------ base/tools/psb_cspasb.f90 | 28 ++++++++++++++++++++------ base/tools/psb_dspasb.f90 | 28 ++++++++++++++++++++------ base/tools/psb_sspasb.f90 | 28 ++++++++++++++++++++------ base/tools/psb_zspasb.f90 | 28 ++++++++++++++++++++------ test/pargen/psb_d_pde3d.F90 | 4 ++-- test/pargen/runs/ppde.inp | 6 +++--- 18 files changed, 125 insertions(+), 69 deletions(-) diff --git a/base/modules/tools/psb_c_tools_mod.F90 b/base/modules/tools/psb_c_tools_mod.F90 index 2de8f906..0ed2d82c 100644 --- a/base/modules/tools/psb_c_tools_mod.F90 +++ b/base/modules/tools/psb_c_tools_mod.F90 @@ -250,7 +250,7 @@ Module psb_c_tools_mod end interface interface psb_spasb - subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) + subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold, bld_and) import implicit none type(psb_cspmat_type), intent (inout) :: a @@ -259,6 +259,7 @@ Module psb_c_tools_mod integer(psb_ipk_),optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_c_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and end subroutine psb_cspasb end interface diff --git a/base/modules/tools/psb_d_tools_mod.F90 b/base/modules/tools/psb_d_tools_mod.F90 index 30e45d53..26f83201 100644 --- a/base/modules/tools/psb_d_tools_mod.F90 +++ b/base/modules/tools/psb_d_tools_mod.F90 @@ -250,7 +250,7 @@ Module psb_d_tools_mod end interface interface psb_spasb - subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) + subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold, bld_and) import implicit none type(psb_dspmat_type), intent (inout) :: a @@ -259,6 +259,7 @@ Module psb_d_tools_mod integer(psb_ipk_),optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_d_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and end subroutine psb_dspasb end interface diff --git a/base/modules/tools/psb_s_tools_mod.F90 b/base/modules/tools/psb_s_tools_mod.F90 index 5d2f8d00..0f70a31a 100644 --- a/base/modules/tools/psb_s_tools_mod.F90 +++ b/base/modules/tools/psb_s_tools_mod.F90 @@ -250,7 +250,7 @@ Module psb_s_tools_mod end interface interface psb_spasb - subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) + subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold, bld_and) import implicit none type(psb_sspmat_type), intent (inout) :: a @@ -259,6 +259,7 @@ Module psb_s_tools_mod integer(psb_ipk_),optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_s_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and end subroutine psb_sspasb end interface diff --git a/base/modules/tools/psb_z_tools_mod.F90 b/base/modules/tools/psb_z_tools_mod.F90 index 9d6bd77b..1f24e05a 100644 --- a/base/modules/tools/psb_z_tools_mod.F90 +++ b/base/modules/tools/psb_z_tools_mod.F90 @@ -250,7 +250,7 @@ Module psb_z_tools_mod end interface interface psb_spasb - subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) + subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold, bld_and) import implicit none type(psb_zspmat_type), intent (inout) :: a @@ -259,6 +259,7 @@ Module psb_z_tools_mod integer(psb_ipk_),optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_z_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and end subroutine psb_zspasb end interface diff --git a/base/psblas/psb_cspmm.f90 b/base/psblas/psb_cspmm.f90 index 555461df..84d8a7d8 100644 --- a/base/psblas/psb_cspmm.f90 +++ b/base/psblas/psb_cspmm.f90 @@ -179,11 +179,11 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - if (.true.) then - call psi_swapdata(psb_swap_send_,& + if (allocated(a%ad)) then + if (doswap_) call psi_swapdata(psb_swap_send_,& & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) call a%ad%spmm(alpha,x%v,beta,y%v,info) - call psi_swapdata(psb_swap_recv_,& + if (doswap_) call psi_swapdata(psb_swap_recv_,& & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) call a%and%spmm(alpha,x%v,cone,y%v,info) diff --git a/base/psblas/psb_dspmm.f90 b/base/psblas/psb_dspmm.f90 index be8a493f..d5897f82 100644 --- a/base/psblas/psb_dspmm.f90 +++ b/base/psblas/psb_dspmm.f90 @@ -179,11 +179,11 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - if (.true.) then - call psi_swapdata(psb_swap_send_,& + if (allocated(a%ad)) then + if (doswap_) call psi_swapdata(psb_swap_send_,& & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) call a%ad%spmm(alpha,x%v,beta,y%v,info) - call psi_swapdata(psb_swap_recv_,& + if (doswap_) call psi_swapdata(psb_swap_recv_,& & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) call a%and%spmm(alpha,x%v,done,y%v,info) diff --git a/base/psblas/psb_sspmm.f90 b/base/psblas/psb_sspmm.f90 index 79bfbdd1..7c1e0ab3 100644 --- a/base/psblas/psb_sspmm.f90 +++ b/base/psblas/psb_sspmm.f90 @@ -179,11 +179,11 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - if (.true.) then - call psi_swapdata(psb_swap_send_,& + if (allocated(a%ad)) then + if (doswap_) call psi_swapdata(psb_swap_send_,& & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) call a%ad%spmm(alpha,x%v,beta,y%v,info) - call psi_swapdata(psb_swap_recv_,& + if (doswap_) call psi_swapdata(psb_swap_recv_,& & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) call a%and%spmm(alpha,x%v,sone,y%v,info) diff --git a/base/psblas/psb_zspmm.f90 b/base/psblas/psb_zspmm.f90 index f248db8b..4dc73f83 100644 --- a/base/psblas/psb_zspmm.f90 +++ b/base/psblas/psb_zspmm.f90 @@ -179,11 +179,11 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - if (.true.) then - call psi_swapdata(psb_swap_send_,& + if (allocated(a%ad)) then + if (doswap_) call psi_swapdata(psb_swap_send_,& & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) call a%ad%spmm(alpha,x%v,beta,y%v,info) - call psi_swapdata(psb_swap_recv_,& + if (doswap_) call psi_swapdata(psb_swap_recv_,& & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) call a%and%spmm(alpha,x%v,zone,y%v,info) diff --git a/base/serial/impl/psb_c_csr_impl.f90 b/base/serial/impl/psb_c_csr_impl.f90 index 1fed09ba..4744d947 100644 --- a/base/serial/impl/psb_c_csr_impl.f90 +++ b/base/serial/impl/psb_c_csr_impl.f90 @@ -3643,9 +3643,8 @@ subroutine psb_c_ecsr_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if - if (((beta == cone).and..not.(tra.or.ctra))& - & .or.(a%is_triangle()).or.(a%is_unit())) then - + if ((beta == cone).and.& + & .not.(tra.or.ctra.or.(a%is_triangle()).or.(a%is_unit()))) then call psb_c_ecsr_csmv_inner(m,n,alpha,a%irp,a%ja,a%val,& & a%nnerws,a%nerwp,x,y) else @@ -3672,9 +3671,6 @@ contains if (alpha == czero) return - - - if (alpha == cone) then !$omp parallel do private(ir,i,j,acc) do ir=1,nnerws @@ -3740,6 +3736,7 @@ subroutine psb_c_ecsr_cmp_nerwp(a,info) end if end do call psb_realloc(nnerws,a%nerwp,info) + a%nnerws = nnerws end subroutine psb_c_ecsr_cmp_nerwp subroutine psb_c_cp_ecsr_from_coo(a,b,info) diff --git a/base/serial/impl/psb_d_csr_impl.f90 b/base/serial/impl/psb_d_csr_impl.f90 index 1bcc82a9..6d2b58ad 100644 --- a/base/serial/impl/psb_d_csr_impl.f90 +++ b/base/serial/impl/psb_d_csr_impl.f90 @@ -3643,9 +3643,8 @@ subroutine psb_d_ecsr_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if - if (((beta == done).and..not.(tra.or.ctra))& - & .or.(a%is_triangle()).or.(a%is_unit())) then - + if ((beta == done).and.& + & .not.(tra.or.ctra.or.(a%is_triangle()).or.(a%is_unit()))) then call psb_d_ecsr_csmv_inner(m,n,alpha,a%irp,a%ja,a%val,& & a%nnerws,a%nerwp,x,y) else @@ -3672,9 +3671,6 @@ contains if (alpha == dzero) return - - - if (alpha == done) then !$omp parallel do private(ir,i,j,acc) do ir=1,nnerws @@ -3740,6 +3736,7 @@ subroutine psb_d_ecsr_cmp_nerwp(a,info) end if end do call psb_realloc(nnerws,a%nerwp,info) + a%nnerws = nnerws end subroutine psb_d_ecsr_cmp_nerwp subroutine psb_d_cp_ecsr_from_coo(a,b,info) diff --git a/base/serial/impl/psb_s_csr_impl.f90 b/base/serial/impl/psb_s_csr_impl.f90 index 9670aeb9..87cfff68 100644 --- a/base/serial/impl/psb_s_csr_impl.f90 +++ b/base/serial/impl/psb_s_csr_impl.f90 @@ -3643,9 +3643,8 @@ subroutine psb_s_ecsr_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if - if (((beta == sone).and..not.(tra.or.ctra))& - & .or.(a%is_triangle()).or.(a%is_unit())) then - + if ((beta == sone).and.& + & .not.(tra.or.ctra.or.(a%is_triangle()).or.(a%is_unit()))) then call psb_s_ecsr_csmv_inner(m,n,alpha,a%irp,a%ja,a%val,& & a%nnerws,a%nerwp,x,y) else @@ -3672,9 +3671,6 @@ contains if (alpha == szero) return - - - if (alpha == sone) then !$omp parallel do private(ir,i,j,acc) do ir=1,nnerws @@ -3740,6 +3736,7 @@ subroutine psb_s_ecsr_cmp_nerwp(a,info) end if end do call psb_realloc(nnerws,a%nerwp,info) + a%nnerws = nnerws end subroutine psb_s_ecsr_cmp_nerwp subroutine psb_s_cp_ecsr_from_coo(a,b,info) diff --git a/base/serial/impl/psb_z_csr_impl.f90 b/base/serial/impl/psb_z_csr_impl.f90 index e9847849..a4a2dd5a 100644 --- a/base/serial/impl/psb_z_csr_impl.f90 +++ b/base/serial/impl/psb_z_csr_impl.f90 @@ -3643,9 +3643,8 @@ subroutine psb_z_ecsr_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if - if (((beta == zone).and..not.(tra.or.ctra))& - & .or.(a%is_triangle()).or.(a%is_unit())) then - + if ((beta == zone).and.& + & .not.(tra.or.ctra.or.(a%is_triangle()).or.(a%is_unit()))) then call psb_z_ecsr_csmv_inner(m,n,alpha,a%irp,a%ja,a%val,& & a%nnerws,a%nerwp,x,y) else @@ -3672,9 +3671,6 @@ contains if (alpha == zzero) return - - - if (alpha == zone) then !$omp parallel do private(ir,i,j,acc) do ir=1,nnerws @@ -3740,6 +3736,7 @@ subroutine psb_z_ecsr_cmp_nerwp(a,info) end if end do call psb_realloc(nnerws,a%nerwp,info) + a%nnerws = nnerws end subroutine psb_z_ecsr_cmp_nerwp subroutine psb_z_cp_ecsr_from_coo(a,b,info) diff --git a/base/tools/psb_cspasb.f90 b/base/tools/psb_cspasb.f90 index b4c957b0..46258139 100644 --- a/base/tools/psb_cspasb.f90 +++ b/base/tools/psb_cspasb.f90 @@ -44,7 +44,7 @@ ! psb_upd_perm_ Permutation(more memory) ! ! -subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) +subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold, bld_and) use psb_base_mod, psb_protect_name => psb_cspasb use psb_sort_mod use psi_mod @@ -58,6 +58,7 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_), optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_c_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and !....Locals.... type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me, err_act @@ -65,6 +66,7 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name, ch_err class(psb_i_base_vect_type), allocatable :: ivm + logical :: bld_and_ info = psb_success_ name = 'psb_spasb' @@ -93,7 +95,11 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) if (debug_level >= psb_debug_ext_)& & write(debug_unit, *) me,' ',trim(name),& & ' Begin matrix assembly...' - + if (present(bld_and)) then + bld_and_ = bld_and + else + bld_and_ = .false. + end if !check on errors encountered in psdspins if (a%is_bld()) then @@ -171,19 +177,26 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) end if - if (.true.) then + if (bld_and_) then block character(len=1024) :: fname type(psb_c_coo_sparse_mat) :: acoo type(psb_c_csr_sparse_mat), allocatable :: aclip type(psb_c_ecsr_sparse_mat), allocatable :: andclip - allocate(aclip,andclip) + logical, parameter :: use_ecsr=.false. + allocate(aclip) call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) allocate(a%ad,mold=a%a) call a%ad%mv_from_coo(acoo,info) call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) - call andclip%mv_from_coo(acoo,info) - call move_alloc(andclip,a%and) + if (use_ecsr) then + allocate(andclip) + call andclip%mv_from_coo(acoo,info) + call move_alloc(andclip,a%and) + else + allocate(a%and,mold=a%a) + call a%and%mv_from_coo(acoo,info) + end if if (.false.) then write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' open(25,file=fname) @@ -200,6 +213,9 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col end if end block + else + if (allocated(a%ad)) deallocate(a%ad) + if (allocated(a%and)) deallocate(a%and) end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() diff --git a/base/tools/psb_dspasb.f90 b/base/tools/psb_dspasb.f90 index 5ebc47e8..6beb0e6f 100644 --- a/base/tools/psb_dspasb.f90 +++ b/base/tools/psb_dspasb.f90 @@ -44,7 +44,7 @@ ! psb_upd_perm_ Permutation(more memory) ! ! -subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) +subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold, bld_and) use psb_base_mod, psb_protect_name => psb_dspasb use psb_sort_mod use psi_mod @@ -58,6 +58,7 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_), optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_d_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and !....Locals.... type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me, err_act @@ -65,6 +66,7 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name, ch_err class(psb_i_base_vect_type), allocatable :: ivm + logical :: bld_and_ info = psb_success_ name = 'psb_spasb' @@ -93,7 +95,11 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) if (debug_level >= psb_debug_ext_)& & write(debug_unit, *) me,' ',trim(name),& & ' Begin matrix assembly...' - + if (present(bld_and)) then + bld_and_ = bld_and + else + bld_and_ = .false. + end if !check on errors encountered in psdspins if (a%is_bld()) then @@ -171,19 +177,26 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) end if - if (.true.) then + if (bld_and_) then block character(len=1024) :: fname type(psb_d_coo_sparse_mat) :: acoo type(psb_d_csr_sparse_mat), allocatable :: aclip type(psb_d_ecsr_sparse_mat), allocatable :: andclip - allocate(aclip,andclip) + logical, parameter :: use_ecsr=.true. + allocate(aclip) call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) allocate(a%ad,mold=a%a) call a%ad%mv_from_coo(acoo,info) call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) - call andclip%mv_from_coo(acoo,info) - call move_alloc(andclip,a%and) + if (use_ecsr) then + allocate(andclip) + call andclip%mv_from_coo(acoo,info) + call move_alloc(andclip,a%and) + else + allocate(a%and,mold=a%a) + call a%and%mv_from_coo(acoo,info) + end if if (.false.) then write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' open(25,file=fname) @@ -200,6 +213,9 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col end if end block + else + if (allocated(a%ad)) deallocate(a%ad) + if (allocated(a%and)) deallocate(a%and) end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() diff --git a/base/tools/psb_sspasb.f90 b/base/tools/psb_sspasb.f90 index 5423c2a7..0edae30e 100644 --- a/base/tools/psb_sspasb.f90 +++ b/base/tools/psb_sspasb.f90 @@ -44,7 +44,7 @@ ! psb_upd_perm_ Permutation(more memory) ! ! -subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) +subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold, bld_and) use psb_base_mod, psb_protect_name => psb_sspasb use psb_sort_mod use psi_mod @@ -58,6 +58,7 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_), optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_s_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and !....Locals.... type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me, err_act @@ -65,6 +66,7 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name, ch_err class(psb_i_base_vect_type), allocatable :: ivm + logical :: bld_and_ info = psb_success_ name = 'psb_spasb' @@ -93,7 +95,11 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) if (debug_level >= psb_debug_ext_)& & write(debug_unit, *) me,' ',trim(name),& & ' Begin matrix assembly...' - + if (present(bld_and)) then + bld_and_ = bld_and + else + bld_and_ = .false. + end if !check on errors encountered in psdspins if (a%is_bld()) then @@ -171,19 +177,26 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) end if - if (.true.) then + if (bld_and_) then block character(len=1024) :: fname type(psb_s_coo_sparse_mat) :: acoo type(psb_s_csr_sparse_mat), allocatable :: aclip type(psb_s_ecsr_sparse_mat), allocatable :: andclip - allocate(aclip,andclip) + logical, parameter :: use_ecsr=.false. + allocate(aclip) call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) allocate(a%ad,mold=a%a) call a%ad%mv_from_coo(acoo,info) call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) - call andclip%mv_from_coo(acoo,info) - call move_alloc(andclip,a%and) + if (use_ecsr) then + allocate(andclip) + call andclip%mv_from_coo(acoo,info) + call move_alloc(andclip,a%and) + else + allocate(a%and,mold=a%a) + call a%and%mv_from_coo(acoo,info) + end if if (.false.) then write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' open(25,file=fname) @@ -200,6 +213,9 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col end if end block + else + if (allocated(a%ad)) deallocate(a%ad) + if (allocated(a%and)) deallocate(a%and) end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() diff --git a/base/tools/psb_zspasb.f90 b/base/tools/psb_zspasb.f90 index 66fc8cd7..cd77de15 100644 --- a/base/tools/psb_zspasb.f90 +++ b/base/tools/psb_zspasb.f90 @@ -44,7 +44,7 @@ ! psb_upd_perm_ Permutation(more memory) ! ! -subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) +subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold, bld_and) use psb_base_mod, psb_protect_name => psb_zspasb use psb_sort_mod use psi_mod @@ -58,6 +58,7 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_), optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_z_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and !....Locals.... type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me, err_act @@ -65,6 +66,7 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name, ch_err class(psb_i_base_vect_type), allocatable :: ivm + logical :: bld_and_ info = psb_success_ name = 'psb_spasb' @@ -93,7 +95,11 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) if (debug_level >= psb_debug_ext_)& & write(debug_unit, *) me,' ',trim(name),& & ' Begin matrix assembly...' - + if (present(bld_and)) then + bld_and_ = bld_and + else + bld_and_ = .false. + end if !check on errors encountered in psdspins if (a%is_bld()) then @@ -171,19 +177,26 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) end if - if (.true.) then + if (bld_and_) then block character(len=1024) :: fname type(psb_z_coo_sparse_mat) :: acoo type(psb_z_csr_sparse_mat), allocatable :: aclip type(psb_z_ecsr_sparse_mat), allocatable :: andclip - allocate(aclip,andclip) + logical, parameter :: use_ecsr=.false. + allocate(aclip) call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) allocate(a%ad,mold=a%a) call a%ad%mv_from_coo(acoo,info) call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) - call andclip%mv_from_coo(acoo,info) - call move_alloc(andclip,a%and) + if (use_ecsr) then + allocate(andclip) + call andclip%mv_from_coo(acoo,info) + call move_alloc(andclip,a%and) + else + allocate(a%and,mold=a%a) + call a%and%mv_from_coo(acoo,info) + end if if (.false.) then write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' open(25,file=fname) @@ -200,6 +213,9 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col end if end block + else + if (allocated(a%ad)) deallocate(a%ad) + if (allocated(a%and)) deallocate(a%and) end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() diff --git a/test/pargen/psb_d_pde3d.F90 b/test/pargen/psb_d_pde3d.F90 index d4eeccf2..cd503d29 100644 --- a/test/pargen/psb_d_pde3d.F90 +++ b/test/pargen/psb_d_pde3d.F90 @@ -680,9 +680,9 @@ contains t1 = psb_wtime() if (info == psb_success_) then if (present(amold)) then - call psb_spasb(a,desc_a,info,mold=amold) + call psb_spasb(a,desc_a,info,mold=amold,bld_and=.true.) else - call psb_spasb(a,desc_a,info,afmt=afmt) + call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=.true.) end if end if call psb_barrier(ctxt) diff --git a/test/pargen/runs/ppde.inp b/test/pargen/runs/ppde.inp index 57fda01a..c70a973f 100644 --- a/test/pargen/runs/ppde.inp +++ b/test/pargen/runs/ppde.inp @@ -2,11 +2,11 @@ BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES FCG CGR BJAC Preconditioner NONE DIAG BJAC CSR Storage format for matrix A: CSR COO -100 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) +200 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) 3 Partition: 1 BLOCK 3 3D 2 Stopping criterion 1 2 -0100 MAXIT -05 ITRACE +0300 MAXIT +10 ITRACE 002 IRST restart for RGMRES and BiCGSTABL ILU Block Solver ILU,ILUT,INVK,AINVT,AORTH NONE If ILU : MILU or NONE othewise ignored From 1d5faa388dd5fdbfc39a93f318c2d2eaa9eb947b Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 27 Sep 2023 16:23:17 +0200 Subject: [PATCH 004/116] Merge psblas-ext into psblas, step 1: ext storage formats. --- Makefile | 10 +- ext/Makefile | 84 +++ ext/impl/Makefile | 412 ++++++++++++++ ext/impl/psb_c_cp_dia_from_coo.f90 | 70 +++ ext/impl/psb_c_cp_dia_to_coo.f90 | 65 +++ ext/impl/psb_c_cp_ell_from_coo.f90 | 71 +++ ext/impl/psb_c_cp_ell_from_fmt.f90 | 65 +++ ext/impl/psb_c_cp_ell_to_coo.f90 | 69 +++ ext/impl/psb_c_cp_ell_to_fmt.f90 | 67 +++ ext/impl/psb_c_cp_hdia_from_coo.f90 | 222 ++++++++ ext/impl/psb_c_cp_hdia_to_coo.f90 | 84 +++ ext/impl/psb_c_cp_hll_from_coo.f90 | 74 +++ ext/impl/psb_c_cp_hll_from_fmt.f90 | 70 +++ ext/impl/psb_c_cp_hll_to_coo.f90 | 104 ++++ ext/impl/psb_c_cp_hll_to_fmt.f90 | 68 +++ ext/impl/psb_c_dia_aclsum.f90 | 87 +++ ext/impl/psb_c_dia_allocate_mnnz.f90 | 88 +++ ext/impl/psb_c_dia_arwsum.f90 | 87 +++ ext/impl/psb_c_dia_colsum.f90 | 87 +++ ext/impl/psb_c_dia_csgetptn.f90 | 188 ++++++ ext/impl/psb_c_dia_csgetrow.f90 | 199 +++++++ ext/impl/psb_c_dia_csmm.f90 | 134 +++++ ext/impl/psb_c_dia_csmv.f90 | 135 +++++ ext/impl/psb_c_dia_get_diag.f90 | 75 +++ ext/impl/psb_c_dia_maxval.f90 | 54 ++ ext/impl/psb_c_dia_mold.f90 | 61 ++ ext/impl/psb_c_dia_print.f90 | 148 +++++ ext/impl/psb_c_dia_reallocate_nz.f90 | 56 ++ ext/impl/psb_c_dia_reinit.f90 | 78 +++ ext/impl/psb_c_dia_rowsum.f90 | 87 +++ ext/impl/psb_c_dia_scal.f90 | 108 ++++ ext/impl/psb_c_dia_scals.f90 | 63 +++ ext/impl/psb_c_dns_mat_impl.f90 | 724 ++++++++++++++++++++++++ ext/impl/psb_c_ell_aclsum.f90 | 82 +++ ext/impl/psb_c_ell_allocate_mnnz.f90 | 91 +++ ext/impl/psb_c_ell_arwsum.f90 | 78 +++ ext/impl/psb_c_ell_colsum.f90 | 80 +++ ext/impl/psb_c_ell_csgetblk.f90 | 83 +++ ext/impl/psb_c_ell_csgetptn.f90 | 189 +++++++ ext/impl/psb_c_ell_csgetrow.f90 | 205 +++++++ ext/impl/psb_c_ell_csmm.f90 | 377 ++++++++++++ ext/impl/psb_c_ell_csmv.f90 | 433 ++++++++++++++ ext/impl/psb_c_ell_csnm1.f90 | 73 +++ ext/impl/psb_c_ell_csnmi.f90 | 58 ++ ext/impl/psb_c_ell_csput.f90 | 208 +++++++ ext/impl/psb_c_ell_cssm.f90 | 375 ++++++++++++ ext/impl/psb_c_ell_cssv.f90 | 372 ++++++++++++ ext/impl/psb_c_ell_get_diag.f90 | 77 +++ ext/impl/psb_c_ell_maxval.f90 | 60 ++ ext/impl/psb_c_ell_mold.f90 | 63 +++ ext/impl/psb_c_ell_print.f90 | 99 ++++ ext/impl/psb_c_ell_reallocate_nz.f90 | 66 +++ ext/impl/psb_c_ell_reinit.f90 | 77 +++ ext/impl/psb_c_ell_rowsum.f90 | 77 +++ ext/impl/psb_c_ell_scal.f90 | 99 ++++ ext/impl/psb_c_ell_scals.f90 | 63 +++ ext/impl/psb_c_ell_trim.f90 | 60 ++ ext/impl/psb_c_hdia_allocate_mnnz.f90 | 75 +++ ext/impl/psb_c_hdia_csmv.f90 | 162 ++++++ ext/impl/psb_c_hdia_mold.f90 | 63 +++ ext/impl/psb_c_hdia_print.f90 | 121 ++++ ext/impl/psb_c_hll_aclsum.f90 | 109 ++++ ext/impl/psb_c_hll_allocate_mnnz.f90 | 93 +++ ext/impl/psb_c_hll_arwsum.f90 | 108 ++++ ext/impl/psb_c_hll_colsum.f90 | 109 ++++ ext/impl/psb_c_hll_csgetblk.f90 | 83 +++ ext/impl/psb_c_hll_csgetptn.f90 | 209 +++++++ ext/impl/psb_c_hll_csgetrow.f90 | 221 ++++++++ ext/impl/psb_c_hll_csmm.f90 | 235 ++++++++ ext/impl/psb_c_hll_csmv.f90 | 563 ++++++++++++++++++ ext/impl/psb_c_hll_csnm1.f90 | 111 ++++ ext/impl/psb_c_hll_csnmi.f90 | 104 ++++ ext/impl/psb_c_hll_csput.f90 | 233 ++++++++ ext/impl/psb_c_hll_cssm.f90 | 506 +++++++++++++++++ ext/impl/psb_c_hll_cssv.f90 | 498 ++++++++++++++++ ext/impl/psb_c_hll_get_diag.f90 | 110 ++++ ext/impl/psb_c_hll_maxval.f90 | 45 ++ ext/impl/psb_c_hll_mold.f90 | 65 +++ ext/impl/psb_c_hll_print.f90 | 134 +++++ ext/impl/psb_c_hll_reallocate_nz.f90 | 64 +++ ext/impl/psb_c_hll_reinit.f90 | 77 +++ ext/impl/psb_c_hll_rowsum.f90 | 110 ++++ ext/impl/psb_c_hll_scal.f90 | 135 +++++ ext/impl/psb_c_hll_scals.f90 | 63 +++ ext/impl/psb_c_mv_dia_from_coo.f90 | 62 ++ ext/impl/psb_c_mv_dia_to_coo.f90 | 55 ++ ext/impl/psb_c_mv_ell_from_coo.f90 | 56 ++ ext/impl/psb_c_mv_ell_from_fmt.f90 | 67 +++ ext/impl/psb_c_mv_ell_to_coo.f90 | 89 +++ ext/impl/psb_c_mv_ell_to_fmt.f90 | 67 +++ ext/impl/psb_c_mv_hdia_from_coo.f90 | 60 ++ ext/impl/psb_c_mv_hdia_to_coo.f90 | 55 ++ ext/impl/psb_c_mv_hll_from_coo.f90 | 58 ++ ext/impl/psb_c_mv_hll_from_fmt.f90 | 70 +++ ext/impl/psb_c_mv_hll_to_coo.f90 | 56 ++ ext/impl/psb_c_mv_hll_to_fmt.f90 | 69 +++ ext/impl/psb_d_cp_dia_from_coo.f90 | 70 +++ ext/impl/psb_d_cp_dia_to_coo.f90 | 65 +++ ext/impl/psb_d_cp_ell_from_coo.f90 | 71 +++ ext/impl/psb_d_cp_ell_from_fmt.f90 | 65 +++ ext/impl/psb_d_cp_ell_to_coo.f90 | 69 +++ ext/impl/psb_d_cp_ell_to_fmt.f90 | 67 +++ ext/impl/psb_d_cp_hdia_from_coo.f90 | 222 ++++++++ ext/impl/psb_d_cp_hdia_to_coo.f90 | 84 +++ ext/impl/psb_d_cp_hll_from_coo.f90 | 74 +++ ext/impl/psb_d_cp_hll_from_fmt.f90 | 70 +++ ext/impl/psb_d_cp_hll_to_coo.f90 | 104 ++++ ext/impl/psb_d_cp_hll_to_fmt.f90 | 68 +++ ext/impl/psb_d_dia_aclsum.f90 | 87 +++ ext/impl/psb_d_dia_allocate_mnnz.f90 | 88 +++ ext/impl/psb_d_dia_arwsum.f90 | 87 +++ ext/impl/psb_d_dia_colsum.f90 | 87 +++ ext/impl/psb_d_dia_csgetptn.f90 | 188 ++++++ ext/impl/psb_d_dia_csgetrow.f90 | 199 +++++++ ext/impl/psb_d_dia_csmm.f90 | 134 +++++ ext/impl/psb_d_dia_csmv.f90 | 135 +++++ ext/impl/psb_d_dia_get_diag.f90 | 75 +++ ext/impl/psb_d_dia_maxval.f90 | 54 ++ ext/impl/psb_d_dia_mold.f90 | 61 ++ ext/impl/psb_d_dia_print.f90 | 148 +++++ ext/impl/psb_d_dia_reallocate_nz.f90 | 56 ++ ext/impl/psb_d_dia_reinit.f90 | 78 +++ ext/impl/psb_d_dia_rowsum.f90 | 87 +++ ext/impl/psb_d_dia_scal.f90 | 108 ++++ ext/impl/psb_d_dia_scals.f90 | 63 +++ ext/impl/psb_d_dns_mat_impl.f90 | 724 ++++++++++++++++++++++++ ext/impl/psb_d_ell_aclsum.f90 | 82 +++ ext/impl/psb_d_ell_allocate_mnnz.f90 | 91 +++ ext/impl/psb_d_ell_arwsum.f90 | 78 +++ ext/impl/psb_d_ell_colsum.f90 | 80 +++ ext/impl/psb_d_ell_csgetblk.f90 | 83 +++ ext/impl/psb_d_ell_csgetptn.f90 | 189 +++++++ ext/impl/psb_d_ell_csgetrow.f90 | 205 +++++++ ext/impl/psb_d_ell_csmm.f90 | 377 ++++++++++++ ext/impl/psb_d_ell_csmv.f90 | 433 ++++++++++++++ ext/impl/psb_d_ell_csnm1.f90 | 73 +++ ext/impl/psb_d_ell_csnmi.f90 | 58 ++ ext/impl/psb_d_ell_csput.f90 | 208 +++++++ ext/impl/psb_d_ell_cssm.f90 | 375 ++++++++++++ ext/impl/psb_d_ell_cssv.f90 | 372 ++++++++++++ ext/impl/psb_d_ell_get_diag.f90 | 77 +++ ext/impl/psb_d_ell_maxval.f90 | 60 ++ ext/impl/psb_d_ell_mold.f90 | 63 +++ ext/impl/psb_d_ell_print.f90 | 99 ++++ ext/impl/psb_d_ell_reallocate_nz.f90 | 66 +++ ext/impl/psb_d_ell_reinit.f90 | 77 +++ ext/impl/psb_d_ell_rowsum.f90 | 77 +++ ext/impl/psb_d_ell_scal.f90 | 99 ++++ ext/impl/psb_d_ell_scals.f90 | 63 +++ ext/impl/psb_d_ell_trim.f90 | 60 ++ ext/impl/psb_d_hdia_allocate_mnnz.f90 | 75 +++ ext/impl/psb_d_hdia_csmv.f90 | 162 ++++++ ext/impl/psb_d_hdia_mold.f90 | 63 +++ ext/impl/psb_d_hdia_print.f90 | 121 ++++ ext/impl/psb_d_hll_aclsum.f90 | 109 ++++ ext/impl/psb_d_hll_allocate_mnnz.f90 | 93 +++ ext/impl/psb_d_hll_arwsum.f90 | 108 ++++ ext/impl/psb_d_hll_colsum.f90 | 109 ++++ ext/impl/psb_d_hll_csgetblk.f90 | 83 +++ ext/impl/psb_d_hll_csgetptn.f90 | 209 +++++++ ext/impl/psb_d_hll_csgetrow.f90 | 221 ++++++++ ext/impl/psb_d_hll_csmm.f90 | 235 ++++++++ ext/impl/psb_d_hll_csmv.f90 | 563 ++++++++++++++++++ ext/impl/psb_d_hll_csnm1.f90 | 111 ++++ ext/impl/psb_d_hll_csnmi.f90 | 104 ++++ ext/impl/psb_d_hll_csput.f90 | 233 ++++++++ ext/impl/psb_d_hll_cssm.f90 | 506 +++++++++++++++++ ext/impl/psb_d_hll_cssv.f90 | 498 ++++++++++++++++ ext/impl/psb_d_hll_get_diag.f90 | 110 ++++ ext/impl/psb_d_hll_maxval.f90 | 45 ++ ext/impl/psb_d_hll_mold.f90 | 65 +++ ext/impl/psb_d_hll_print.f90 | 134 +++++ ext/impl/psb_d_hll_reallocate_nz.f90 | 64 +++ ext/impl/psb_d_hll_reinit.f90 | 77 +++ ext/impl/psb_d_hll_rowsum.f90 | 110 ++++ ext/impl/psb_d_hll_scal.f90 | 135 +++++ ext/impl/psb_d_hll_scals.f90 | 63 +++ ext/impl/psb_d_mv_dia_from_coo.f90 | 62 ++ ext/impl/psb_d_mv_dia_to_coo.f90 | 55 ++ ext/impl/psb_d_mv_ell_from_coo.f90 | 56 ++ ext/impl/psb_d_mv_ell_from_fmt.f90 | 67 +++ ext/impl/psb_d_mv_ell_to_coo.f90 | 89 +++ ext/impl/psb_d_mv_ell_to_fmt.f90 | 67 +++ ext/impl/psb_d_mv_hdia_from_coo.f90 | 60 ++ ext/impl/psb_d_mv_hdia_to_coo.f90 | 55 ++ ext/impl/psb_d_mv_hll_from_coo.f90 | 58 ++ ext/impl/psb_d_mv_hll_from_fmt.f90 | 70 +++ ext/impl/psb_d_mv_hll_to_coo.f90 | 56 ++ ext/impl/psb_d_mv_hll_to_fmt.f90 | 69 +++ ext/impl/psb_s_cp_dia_from_coo.f90 | 70 +++ ext/impl/psb_s_cp_dia_to_coo.f90 | 65 +++ ext/impl/psb_s_cp_ell_from_coo.f90 | 71 +++ ext/impl/psb_s_cp_ell_from_fmt.f90 | 65 +++ ext/impl/psb_s_cp_ell_to_coo.f90 | 69 +++ ext/impl/psb_s_cp_ell_to_fmt.f90 | 67 +++ ext/impl/psb_s_cp_hdia_from_coo.f90 | 222 ++++++++ ext/impl/psb_s_cp_hdia_to_coo.f90 | 84 +++ ext/impl/psb_s_cp_hll_from_coo.f90 | 74 +++ ext/impl/psb_s_cp_hll_from_fmt.f90 | 70 +++ ext/impl/psb_s_cp_hll_to_coo.f90 | 104 ++++ ext/impl/psb_s_cp_hll_to_fmt.f90 | 68 +++ ext/impl/psb_s_dia_aclsum.f90 | 87 +++ ext/impl/psb_s_dia_allocate_mnnz.f90 | 88 +++ ext/impl/psb_s_dia_arwsum.f90 | 87 +++ ext/impl/psb_s_dia_colsum.f90 | 87 +++ ext/impl/psb_s_dia_csgetptn.f90 | 188 ++++++ ext/impl/psb_s_dia_csgetrow.f90 | 199 +++++++ ext/impl/psb_s_dia_csmm.f90 | 134 +++++ ext/impl/psb_s_dia_csmv.f90 | 135 +++++ ext/impl/psb_s_dia_get_diag.f90 | 75 +++ ext/impl/psb_s_dia_maxval.f90 | 54 ++ ext/impl/psb_s_dia_mold.f90 | 61 ++ ext/impl/psb_s_dia_print.f90 | 148 +++++ ext/impl/psb_s_dia_reallocate_nz.f90 | 56 ++ ext/impl/psb_s_dia_reinit.f90 | 78 +++ ext/impl/psb_s_dia_rowsum.f90 | 87 +++ ext/impl/psb_s_dia_scal.f90 | 108 ++++ ext/impl/psb_s_dia_scals.f90 | 63 +++ ext/impl/psb_s_dns_mat_impl.f90 | 724 ++++++++++++++++++++++++ ext/impl/psb_s_ell_aclsum.f90 | 82 +++ ext/impl/psb_s_ell_allocate_mnnz.f90 | 91 +++ ext/impl/psb_s_ell_arwsum.f90 | 78 +++ ext/impl/psb_s_ell_colsum.f90 | 80 +++ ext/impl/psb_s_ell_csgetblk.f90 | 83 +++ ext/impl/psb_s_ell_csgetptn.f90 | 189 +++++++ ext/impl/psb_s_ell_csgetrow.f90 | 205 +++++++ ext/impl/psb_s_ell_csmm.f90 | 377 ++++++++++++ ext/impl/psb_s_ell_csmv.f90 | 433 ++++++++++++++ ext/impl/psb_s_ell_csnm1.f90 | 73 +++ ext/impl/psb_s_ell_csnmi.f90 | 58 ++ ext/impl/psb_s_ell_csput.f90 | 208 +++++++ ext/impl/psb_s_ell_cssm.f90 | 375 ++++++++++++ ext/impl/psb_s_ell_cssv.f90 | 372 ++++++++++++ ext/impl/psb_s_ell_get_diag.f90 | 77 +++ ext/impl/psb_s_ell_maxval.f90 | 60 ++ ext/impl/psb_s_ell_mold.f90 | 63 +++ ext/impl/psb_s_ell_print.f90 | 99 ++++ ext/impl/psb_s_ell_reallocate_nz.f90 | 66 +++ ext/impl/psb_s_ell_reinit.f90 | 77 +++ ext/impl/psb_s_ell_rowsum.f90 | 77 +++ ext/impl/psb_s_ell_scal.f90 | 99 ++++ ext/impl/psb_s_ell_scals.f90 | 63 +++ ext/impl/psb_s_ell_trim.f90 | 60 ++ ext/impl/psb_s_hdia_allocate_mnnz.f90 | 75 +++ ext/impl/psb_s_hdia_csmv.f90 | 162 ++++++ ext/impl/psb_s_hdia_mold.f90 | 63 +++ ext/impl/psb_s_hdia_print.f90 | 121 ++++ ext/impl/psb_s_hll_aclsum.f90 | 109 ++++ ext/impl/psb_s_hll_allocate_mnnz.f90 | 93 +++ ext/impl/psb_s_hll_arwsum.f90 | 108 ++++ ext/impl/psb_s_hll_colsum.f90 | 109 ++++ ext/impl/psb_s_hll_csgetblk.f90 | 83 +++ ext/impl/psb_s_hll_csgetptn.f90 | 209 +++++++ ext/impl/psb_s_hll_csgetrow.f90 | 221 ++++++++ ext/impl/psb_s_hll_csmm.f90 | 235 ++++++++ ext/impl/psb_s_hll_csmv.f90 | 563 ++++++++++++++++++ ext/impl/psb_s_hll_csnm1.f90 | 111 ++++ ext/impl/psb_s_hll_csnmi.f90 | 104 ++++ ext/impl/psb_s_hll_csput.f90 | 233 ++++++++ ext/impl/psb_s_hll_cssm.f90 | 506 +++++++++++++++++ ext/impl/psb_s_hll_cssv.f90 | 498 ++++++++++++++++ ext/impl/psb_s_hll_get_diag.f90 | 110 ++++ ext/impl/psb_s_hll_maxval.f90 | 45 ++ ext/impl/psb_s_hll_mold.f90 | 65 +++ ext/impl/psb_s_hll_print.f90 | 134 +++++ ext/impl/psb_s_hll_reallocate_nz.f90 | 64 +++ ext/impl/psb_s_hll_reinit.f90 | 77 +++ ext/impl/psb_s_hll_rowsum.f90 | 110 ++++ ext/impl/psb_s_hll_scal.f90 | 135 +++++ ext/impl/psb_s_hll_scals.f90 | 63 +++ ext/impl/psb_s_mv_dia_from_coo.f90 | 62 ++ ext/impl/psb_s_mv_dia_to_coo.f90 | 55 ++ ext/impl/psb_s_mv_ell_from_coo.f90 | 56 ++ ext/impl/psb_s_mv_ell_from_fmt.f90 | 67 +++ ext/impl/psb_s_mv_ell_to_coo.f90 | 89 +++ ext/impl/psb_s_mv_ell_to_fmt.f90 | 67 +++ ext/impl/psb_s_mv_hdia_from_coo.f90 | 60 ++ ext/impl/psb_s_mv_hdia_to_coo.f90 | 55 ++ ext/impl/psb_s_mv_hll_from_coo.f90 | 58 ++ ext/impl/psb_s_mv_hll_from_fmt.f90 | 70 +++ ext/impl/psb_s_mv_hll_to_coo.f90 | 56 ++ ext/impl/psb_s_mv_hll_to_fmt.f90 | 69 +++ ext/impl/psb_z_cp_dia_from_coo.f90 | 70 +++ ext/impl/psb_z_cp_dia_to_coo.f90 | 65 +++ ext/impl/psb_z_cp_ell_from_coo.f90 | 71 +++ ext/impl/psb_z_cp_ell_from_fmt.f90 | 65 +++ ext/impl/psb_z_cp_ell_to_coo.f90 | 69 +++ ext/impl/psb_z_cp_ell_to_fmt.f90 | 67 +++ ext/impl/psb_z_cp_hdia_from_coo.f90 | 222 ++++++++ ext/impl/psb_z_cp_hdia_to_coo.f90 | 84 +++ ext/impl/psb_z_cp_hll_from_coo.f90 | 74 +++ ext/impl/psb_z_cp_hll_from_fmt.f90 | 70 +++ ext/impl/psb_z_cp_hll_to_coo.f90 | 104 ++++ ext/impl/psb_z_cp_hll_to_fmt.f90 | 68 +++ ext/impl/psb_z_dia_aclsum.f90 | 87 +++ ext/impl/psb_z_dia_allocate_mnnz.f90 | 88 +++ ext/impl/psb_z_dia_arwsum.f90 | 87 +++ ext/impl/psb_z_dia_colsum.f90 | 87 +++ ext/impl/psb_z_dia_csgetptn.f90 | 188 ++++++ ext/impl/psb_z_dia_csgetrow.f90 | 199 +++++++ ext/impl/psb_z_dia_csmm.f90 | 134 +++++ ext/impl/psb_z_dia_csmv.f90 | 135 +++++ ext/impl/psb_z_dia_get_diag.f90 | 75 +++ ext/impl/psb_z_dia_maxval.f90 | 54 ++ ext/impl/psb_z_dia_mold.f90 | 61 ++ ext/impl/psb_z_dia_print.f90 | 148 +++++ ext/impl/psb_z_dia_reallocate_nz.f90 | 56 ++ ext/impl/psb_z_dia_reinit.f90 | 78 +++ ext/impl/psb_z_dia_rowsum.f90 | 87 +++ ext/impl/psb_z_dia_scal.f90 | 108 ++++ ext/impl/psb_z_dia_scals.f90 | 63 +++ ext/impl/psb_z_dns_mat_impl.f90 | 724 ++++++++++++++++++++++++ ext/impl/psb_z_ell_aclsum.f90 | 82 +++ ext/impl/psb_z_ell_allocate_mnnz.f90 | 91 +++ ext/impl/psb_z_ell_arwsum.f90 | 78 +++ ext/impl/psb_z_ell_colsum.f90 | 80 +++ ext/impl/psb_z_ell_csgetblk.f90 | 83 +++ ext/impl/psb_z_ell_csgetptn.f90 | 189 +++++++ ext/impl/psb_z_ell_csgetrow.f90 | 205 +++++++ ext/impl/psb_z_ell_csmm.f90 | 377 ++++++++++++ ext/impl/psb_z_ell_csmv.f90 | 433 ++++++++++++++ ext/impl/psb_z_ell_csnm1.f90 | 73 +++ ext/impl/psb_z_ell_csnmi.f90 | 58 ++ ext/impl/psb_z_ell_csput.f90 | 208 +++++++ ext/impl/psb_z_ell_cssm.f90 | 375 ++++++++++++ ext/impl/psb_z_ell_cssv.f90 | 372 ++++++++++++ ext/impl/psb_z_ell_get_diag.f90 | 77 +++ ext/impl/psb_z_ell_maxval.f90 | 60 ++ ext/impl/psb_z_ell_mold.f90 | 63 +++ ext/impl/psb_z_ell_print.f90 | 99 ++++ ext/impl/psb_z_ell_reallocate_nz.f90 | 66 +++ ext/impl/psb_z_ell_reinit.f90 | 77 +++ ext/impl/psb_z_ell_rowsum.f90 | 77 +++ ext/impl/psb_z_ell_scal.f90 | 99 ++++ ext/impl/psb_z_ell_scals.f90 | 63 +++ ext/impl/psb_z_ell_trim.f90 | 60 ++ ext/impl/psb_z_hdia_allocate_mnnz.f90 | 75 +++ ext/impl/psb_z_hdia_csmv.f90 | 162 ++++++ ext/impl/psb_z_hdia_mold.f90 | 63 +++ ext/impl/psb_z_hdia_print.f90 | 121 ++++ ext/impl/psb_z_hll_aclsum.f90 | 109 ++++ ext/impl/psb_z_hll_allocate_mnnz.f90 | 93 +++ ext/impl/psb_z_hll_arwsum.f90 | 108 ++++ ext/impl/psb_z_hll_colsum.f90 | 109 ++++ ext/impl/psb_z_hll_csgetblk.f90 | 83 +++ ext/impl/psb_z_hll_csgetptn.f90 | 209 +++++++ ext/impl/psb_z_hll_csgetrow.f90 | 221 ++++++++ ext/impl/psb_z_hll_csmm.f90 | 235 ++++++++ ext/impl/psb_z_hll_csmv.f90 | 563 ++++++++++++++++++ ext/impl/psb_z_hll_csnm1.f90 | 111 ++++ ext/impl/psb_z_hll_csnmi.f90 | 104 ++++ ext/impl/psb_z_hll_csput.f90 | 233 ++++++++ ext/impl/psb_z_hll_cssm.f90 | 506 +++++++++++++++++ ext/impl/psb_z_hll_cssv.f90 | 498 ++++++++++++++++ ext/impl/psb_z_hll_get_diag.f90 | 110 ++++ ext/impl/psb_z_hll_maxval.f90 | 45 ++ ext/impl/psb_z_hll_mold.f90 | 65 +++ ext/impl/psb_z_hll_print.f90 | 134 +++++ ext/impl/psb_z_hll_reallocate_nz.f90 | 64 +++ ext/impl/psb_z_hll_reinit.f90 | 77 +++ ext/impl/psb_z_hll_rowsum.f90 | 110 ++++ ext/impl/psb_z_hll_scal.f90 | 135 +++++ ext/impl/psb_z_hll_scals.f90 | 63 +++ ext/impl/psb_z_mv_dia_from_coo.f90 | 62 ++ ext/impl/psb_z_mv_dia_to_coo.f90 | 55 ++ ext/impl/psb_z_mv_ell_from_coo.f90 | 56 ++ ext/impl/psb_z_mv_ell_from_fmt.f90 | 67 +++ ext/impl/psb_z_mv_ell_to_coo.f90 | 89 +++ ext/impl/psb_z_mv_ell_to_fmt.f90 | 67 +++ ext/impl/psb_z_mv_hdia_from_coo.f90 | 60 ++ ext/impl/psb_z_mv_hdia_to_coo.f90 | 55 ++ ext/impl/psb_z_mv_hll_from_coo.f90 | 58 ++ ext/impl/psb_z_mv_hll_from_fmt.f90 | 70 +++ ext/impl/psb_z_mv_hll_to_coo.f90 | 56 ++ ext/impl/psb_z_mv_hll_to_fmt.f90 | 69 +++ ext/impl/psi_c_convert_dia_from_coo.f90 | 73 +++ ext/impl/psi_c_convert_ell_from_coo.f90 | 87 +++ ext/impl/psi_c_convert_hll_from_coo.f90 | 122 ++++ ext/impl/psi_c_xtr_coo_from_dia.f90 | 80 +++ ext/impl/psi_c_xtr_dia_from_coo.f90 | 69 +++ ext/impl/psi_c_xtr_ell_from_coo.f90 | 63 +++ ext/impl/psi_d_convert_dia_from_coo.f90 | 73 +++ ext/impl/psi_d_convert_ell_from_coo.f90 | 87 +++ ext/impl/psi_d_convert_hll_from_coo.f90 | 122 ++++ ext/impl/psi_d_xtr_coo_from_dia.f90 | 80 +++ ext/impl/psi_d_xtr_dia_from_coo.f90 | 69 +++ ext/impl/psi_d_xtr_ell_from_coo.f90 | 63 +++ ext/impl/psi_s_convert_dia_from_coo.f90 | 73 +++ ext/impl/psi_s_convert_ell_from_coo.f90 | 87 +++ ext/impl/psi_s_convert_hll_from_coo.f90 | 122 ++++ ext/impl/psi_s_xtr_coo_from_dia.f90 | 80 +++ ext/impl/psi_s_xtr_dia_from_coo.f90 | 69 +++ ext/impl/psi_s_xtr_ell_from_coo.f90 | 63 +++ ext/impl/psi_z_convert_dia_from_coo.f90 | 73 +++ ext/impl/psi_z_convert_ell_from_coo.f90 | 87 +++ ext/impl/psi_z_convert_hll_from_coo.f90 | 122 ++++ ext/impl/psi_z_xtr_coo_from_dia.f90 | 80 +++ ext/impl/psi_z_xtr_dia_from_coo.f90 | 69 +++ ext/impl/psi_z_xtr_ell_from_coo.f90 | 63 +++ ext/psb_c_dia_mat_mod.f90 | 513 +++++++++++++++++ ext/psb_c_dns_mat_mod.f90 | 467 +++++++++++++++ ext/psb_c_ell_mat_mod.f90 | 544 ++++++++++++++++++ ext/psb_c_hdia_mat_mod.f90 | 534 +++++++++++++++++ ext/psb_c_hll_mat_mod.f90 | 564 ++++++++++++++++++ ext/psb_d_dia_mat_mod.f90 | 513 +++++++++++++++++ ext/psb_d_dns_mat_mod.f90 | 467 +++++++++++++++ ext/psb_d_ell_mat_mod.f90 | 544 ++++++++++++++++++ ext/psb_d_hdia_mat_mod.f90 | 534 +++++++++++++++++ ext/psb_d_hll_mat_mod.f90 | 564 ++++++++++++++++++ ext/psb_ext_mod.F90 | 65 +++ ext/psb_s_dia_mat_mod.f90 | 513 +++++++++++++++++ ext/psb_s_dns_mat_mod.f90 | 467 +++++++++++++++ ext/psb_s_ell_mat_mod.f90 | 544 ++++++++++++++++++ ext/psb_s_hdia_mat_mod.f90 | 534 +++++++++++++++++ ext/psb_s_hll_mat_mod.f90 | 564 ++++++++++++++++++ ext/psb_z_dia_mat_mod.f90 | 513 +++++++++++++++++ ext/psb_z_dns_mat_mod.f90 | 467 +++++++++++++++ ext/psb_z_ell_mat_mod.f90 | 544 ++++++++++++++++++ ext/psb_z_hdia_mat_mod.f90 | 534 +++++++++++++++++ ext/psb_z_hll_mat_mod.f90 | 564 ++++++++++++++++++ ext/psi_c_ext_util_mod.f90 | 80 +++ ext/psi_d_ext_util_mod.f90 | 80 +++ ext/psi_ext_util_mod.f90 | 41 ++ ext/psi_i_ext_util_mod.f90 | 175 ++++++ ext/psi_s_ext_util_mod.f90 | 80 +++ ext/psi_z_ext_util_mod.f90 | 80 +++ 426 files changed, 61981 insertions(+), 2 deletions(-) create mode 100755 ext/Makefile create mode 100755 ext/impl/Makefile create mode 100644 ext/impl/psb_c_cp_dia_from_coo.f90 create mode 100644 ext/impl/psb_c_cp_dia_to_coo.f90 create mode 100644 ext/impl/psb_c_cp_ell_from_coo.f90 create mode 100644 ext/impl/psb_c_cp_ell_from_fmt.f90 create mode 100644 ext/impl/psb_c_cp_ell_to_coo.f90 create mode 100644 ext/impl/psb_c_cp_ell_to_fmt.f90 create mode 100644 ext/impl/psb_c_cp_hdia_from_coo.f90 create mode 100644 ext/impl/psb_c_cp_hdia_to_coo.f90 create mode 100644 ext/impl/psb_c_cp_hll_from_coo.f90 create mode 100644 ext/impl/psb_c_cp_hll_from_fmt.f90 create mode 100644 ext/impl/psb_c_cp_hll_to_coo.f90 create mode 100644 ext/impl/psb_c_cp_hll_to_fmt.f90 create mode 100644 ext/impl/psb_c_dia_aclsum.f90 create mode 100644 ext/impl/psb_c_dia_allocate_mnnz.f90 create mode 100644 ext/impl/psb_c_dia_arwsum.f90 create mode 100644 ext/impl/psb_c_dia_colsum.f90 create mode 100644 ext/impl/psb_c_dia_csgetptn.f90 create mode 100644 ext/impl/psb_c_dia_csgetrow.f90 create mode 100644 ext/impl/psb_c_dia_csmm.f90 create mode 100644 ext/impl/psb_c_dia_csmv.f90 create mode 100644 ext/impl/psb_c_dia_get_diag.f90 create mode 100644 ext/impl/psb_c_dia_maxval.f90 create mode 100644 ext/impl/psb_c_dia_mold.f90 create mode 100644 ext/impl/psb_c_dia_print.f90 create mode 100644 ext/impl/psb_c_dia_reallocate_nz.f90 create mode 100644 ext/impl/psb_c_dia_reinit.f90 create mode 100644 ext/impl/psb_c_dia_rowsum.f90 create mode 100644 ext/impl/psb_c_dia_scal.f90 create mode 100644 ext/impl/psb_c_dia_scals.f90 create mode 100644 ext/impl/psb_c_dns_mat_impl.f90 create mode 100644 ext/impl/psb_c_ell_aclsum.f90 create mode 100644 ext/impl/psb_c_ell_allocate_mnnz.f90 create mode 100644 ext/impl/psb_c_ell_arwsum.f90 create mode 100644 ext/impl/psb_c_ell_colsum.f90 create mode 100644 ext/impl/psb_c_ell_csgetblk.f90 create mode 100644 ext/impl/psb_c_ell_csgetptn.f90 create mode 100644 ext/impl/psb_c_ell_csgetrow.f90 create mode 100644 ext/impl/psb_c_ell_csmm.f90 create mode 100644 ext/impl/psb_c_ell_csmv.f90 create mode 100644 ext/impl/psb_c_ell_csnm1.f90 create mode 100644 ext/impl/psb_c_ell_csnmi.f90 create mode 100644 ext/impl/psb_c_ell_csput.f90 create mode 100644 ext/impl/psb_c_ell_cssm.f90 create mode 100644 ext/impl/psb_c_ell_cssv.f90 create mode 100644 ext/impl/psb_c_ell_get_diag.f90 create mode 100644 ext/impl/psb_c_ell_maxval.f90 create mode 100644 ext/impl/psb_c_ell_mold.f90 create mode 100644 ext/impl/psb_c_ell_print.f90 create mode 100644 ext/impl/psb_c_ell_reallocate_nz.f90 create mode 100644 ext/impl/psb_c_ell_reinit.f90 create mode 100644 ext/impl/psb_c_ell_rowsum.f90 create mode 100644 ext/impl/psb_c_ell_scal.f90 create mode 100644 ext/impl/psb_c_ell_scals.f90 create mode 100644 ext/impl/psb_c_ell_trim.f90 create mode 100644 ext/impl/psb_c_hdia_allocate_mnnz.f90 create mode 100644 ext/impl/psb_c_hdia_csmv.f90 create mode 100644 ext/impl/psb_c_hdia_mold.f90 create mode 100644 ext/impl/psb_c_hdia_print.f90 create mode 100644 ext/impl/psb_c_hll_aclsum.f90 create mode 100644 ext/impl/psb_c_hll_allocate_mnnz.f90 create mode 100644 ext/impl/psb_c_hll_arwsum.f90 create mode 100644 ext/impl/psb_c_hll_colsum.f90 create mode 100644 ext/impl/psb_c_hll_csgetblk.f90 create mode 100644 ext/impl/psb_c_hll_csgetptn.f90 create mode 100644 ext/impl/psb_c_hll_csgetrow.f90 create mode 100644 ext/impl/psb_c_hll_csmm.f90 create mode 100644 ext/impl/psb_c_hll_csmv.f90 create mode 100644 ext/impl/psb_c_hll_csnm1.f90 create mode 100644 ext/impl/psb_c_hll_csnmi.f90 create mode 100644 ext/impl/psb_c_hll_csput.f90 create mode 100644 ext/impl/psb_c_hll_cssm.f90 create mode 100644 ext/impl/psb_c_hll_cssv.f90 create mode 100644 ext/impl/psb_c_hll_get_diag.f90 create mode 100644 ext/impl/psb_c_hll_maxval.f90 create mode 100644 ext/impl/psb_c_hll_mold.f90 create mode 100644 ext/impl/psb_c_hll_print.f90 create mode 100644 ext/impl/psb_c_hll_reallocate_nz.f90 create mode 100644 ext/impl/psb_c_hll_reinit.f90 create mode 100644 ext/impl/psb_c_hll_rowsum.f90 create mode 100644 ext/impl/psb_c_hll_scal.f90 create mode 100644 ext/impl/psb_c_hll_scals.f90 create mode 100644 ext/impl/psb_c_mv_dia_from_coo.f90 create mode 100644 ext/impl/psb_c_mv_dia_to_coo.f90 create mode 100644 ext/impl/psb_c_mv_ell_from_coo.f90 create mode 100644 ext/impl/psb_c_mv_ell_from_fmt.f90 create mode 100644 ext/impl/psb_c_mv_ell_to_coo.f90 create mode 100644 ext/impl/psb_c_mv_ell_to_fmt.f90 create mode 100644 ext/impl/psb_c_mv_hdia_from_coo.f90 create mode 100644 ext/impl/psb_c_mv_hdia_to_coo.f90 create mode 100644 ext/impl/psb_c_mv_hll_from_coo.f90 create mode 100644 ext/impl/psb_c_mv_hll_from_fmt.f90 create mode 100644 ext/impl/psb_c_mv_hll_to_coo.f90 create mode 100644 ext/impl/psb_c_mv_hll_to_fmt.f90 create mode 100644 ext/impl/psb_d_cp_dia_from_coo.f90 create mode 100644 ext/impl/psb_d_cp_dia_to_coo.f90 create mode 100644 ext/impl/psb_d_cp_ell_from_coo.f90 create mode 100644 ext/impl/psb_d_cp_ell_from_fmt.f90 create mode 100644 ext/impl/psb_d_cp_ell_to_coo.f90 create mode 100644 ext/impl/psb_d_cp_ell_to_fmt.f90 create mode 100644 ext/impl/psb_d_cp_hdia_from_coo.f90 create mode 100644 ext/impl/psb_d_cp_hdia_to_coo.f90 create mode 100644 ext/impl/psb_d_cp_hll_from_coo.f90 create mode 100644 ext/impl/psb_d_cp_hll_from_fmt.f90 create mode 100644 ext/impl/psb_d_cp_hll_to_coo.f90 create mode 100644 ext/impl/psb_d_cp_hll_to_fmt.f90 create mode 100644 ext/impl/psb_d_dia_aclsum.f90 create mode 100644 ext/impl/psb_d_dia_allocate_mnnz.f90 create mode 100644 ext/impl/psb_d_dia_arwsum.f90 create mode 100644 ext/impl/psb_d_dia_colsum.f90 create mode 100644 ext/impl/psb_d_dia_csgetptn.f90 create mode 100644 ext/impl/psb_d_dia_csgetrow.f90 create mode 100644 ext/impl/psb_d_dia_csmm.f90 create mode 100644 ext/impl/psb_d_dia_csmv.f90 create mode 100644 ext/impl/psb_d_dia_get_diag.f90 create mode 100644 ext/impl/psb_d_dia_maxval.f90 create mode 100644 ext/impl/psb_d_dia_mold.f90 create mode 100644 ext/impl/psb_d_dia_print.f90 create mode 100644 ext/impl/psb_d_dia_reallocate_nz.f90 create mode 100644 ext/impl/psb_d_dia_reinit.f90 create mode 100644 ext/impl/psb_d_dia_rowsum.f90 create mode 100644 ext/impl/psb_d_dia_scal.f90 create mode 100644 ext/impl/psb_d_dia_scals.f90 create mode 100644 ext/impl/psb_d_dns_mat_impl.f90 create mode 100644 ext/impl/psb_d_ell_aclsum.f90 create mode 100644 ext/impl/psb_d_ell_allocate_mnnz.f90 create mode 100644 ext/impl/psb_d_ell_arwsum.f90 create mode 100644 ext/impl/psb_d_ell_colsum.f90 create mode 100644 ext/impl/psb_d_ell_csgetblk.f90 create mode 100644 ext/impl/psb_d_ell_csgetptn.f90 create mode 100644 ext/impl/psb_d_ell_csgetrow.f90 create mode 100644 ext/impl/psb_d_ell_csmm.f90 create mode 100644 ext/impl/psb_d_ell_csmv.f90 create mode 100644 ext/impl/psb_d_ell_csnm1.f90 create mode 100644 ext/impl/psb_d_ell_csnmi.f90 create mode 100644 ext/impl/psb_d_ell_csput.f90 create mode 100644 ext/impl/psb_d_ell_cssm.f90 create mode 100644 ext/impl/psb_d_ell_cssv.f90 create mode 100644 ext/impl/psb_d_ell_get_diag.f90 create mode 100644 ext/impl/psb_d_ell_maxval.f90 create mode 100644 ext/impl/psb_d_ell_mold.f90 create mode 100644 ext/impl/psb_d_ell_print.f90 create mode 100644 ext/impl/psb_d_ell_reallocate_nz.f90 create mode 100644 ext/impl/psb_d_ell_reinit.f90 create mode 100644 ext/impl/psb_d_ell_rowsum.f90 create mode 100644 ext/impl/psb_d_ell_scal.f90 create mode 100644 ext/impl/psb_d_ell_scals.f90 create mode 100644 ext/impl/psb_d_ell_trim.f90 create mode 100644 ext/impl/psb_d_hdia_allocate_mnnz.f90 create mode 100644 ext/impl/psb_d_hdia_csmv.f90 create mode 100644 ext/impl/psb_d_hdia_mold.f90 create mode 100644 ext/impl/psb_d_hdia_print.f90 create mode 100644 ext/impl/psb_d_hll_aclsum.f90 create mode 100644 ext/impl/psb_d_hll_allocate_mnnz.f90 create mode 100644 ext/impl/psb_d_hll_arwsum.f90 create mode 100644 ext/impl/psb_d_hll_colsum.f90 create mode 100644 ext/impl/psb_d_hll_csgetblk.f90 create mode 100644 ext/impl/psb_d_hll_csgetptn.f90 create mode 100644 ext/impl/psb_d_hll_csgetrow.f90 create mode 100644 ext/impl/psb_d_hll_csmm.f90 create mode 100644 ext/impl/psb_d_hll_csmv.f90 create mode 100644 ext/impl/psb_d_hll_csnm1.f90 create mode 100644 ext/impl/psb_d_hll_csnmi.f90 create mode 100644 ext/impl/psb_d_hll_csput.f90 create mode 100644 ext/impl/psb_d_hll_cssm.f90 create mode 100644 ext/impl/psb_d_hll_cssv.f90 create mode 100644 ext/impl/psb_d_hll_get_diag.f90 create mode 100644 ext/impl/psb_d_hll_maxval.f90 create mode 100644 ext/impl/psb_d_hll_mold.f90 create mode 100644 ext/impl/psb_d_hll_print.f90 create mode 100644 ext/impl/psb_d_hll_reallocate_nz.f90 create mode 100644 ext/impl/psb_d_hll_reinit.f90 create mode 100644 ext/impl/psb_d_hll_rowsum.f90 create mode 100644 ext/impl/psb_d_hll_scal.f90 create mode 100644 ext/impl/psb_d_hll_scals.f90 create mode 100644 ext/impl/psb_d_mv_dia_from_coo.f90 create mode 100644 ext/impl/psb_d_mv_dia_to_coo.f90 create mode 100644 ext/impl/psb_d_mv_ell_from_coo.f90 create mode 100644 ext/impl/psb_d_mv_ell_from_fmt.f90 create mode 100644 ext/impl/psb_d_mv_ell_to_coo.f90 create mode 100644 ext/impl/psb_d_mv_ell_to_fmt.f90 create mode 100644 ext/impl/psb_d_mv_hdia_from_coo.f90 create mode 100644 ext/impl/psb_d_mv_hdia_to_coo.f90 create mode 100644 ext/impl/psb_d_mv_hll_from_coo.f90 create mode 100644 ext/impl/psb_d_mv_hll_from_fmt.f90 create mode 100644 ext/impl/psb_d_mv_hll_to_coo.f90 create mode 100644 ext/impl/psb_d_mv_hll_to_fmt.f90 create mode 100644 ext/impl/psb_s_cp_dia_from_coo.f90 create mode 100644 ext/impl/psb_s_cp_dia_to_coo.f90 create mode 100644 ext/impl/psb_s_cp_ell_from_coo.f90 create mode 100644 ext/impl/psb_s_cp_ell_from_fmt.f90 create mode 100644 ext/impl/psb_s_cp_ell_to_coo.f90 create mode 100644 ext/impl/psb_s_cp_ell_to_fmt.f90 create mode 100644 ext/impl/psb_s_cp_hdia_from_coo.f90 create mode 100644 ext/impl/psb_s_cp_hdia_to_coo.f90 create mode 100644 ext/impl/psb_s_cp_hll_from_coo.f90 create mode 100644 ext/impl/psb_s_cp_hll_from_fmt.f90 create mode 100644 ext/impl/psb_s_cp_hll_to_coo.f90 create mode 100644 ext/impl/psb_s_cp_hll_to_fmt.f90 create mode 100644 ext/impl/psb_s_dia_aclsum.f90 create mode 100644 ext/impl/psb_s_dia_allocate_mnnz.f90 create mode 100644 ext/impl/psb_s_dia_arwsum.f90 create mode 100644 ext/impl/psb_s_dia_colsum.f90 create mode 100644 ext/impl/psb_s_dia_csgetptn.f90 create mode 100644 ext/impl/psb_s_dia_csgetrow.f90 create mode 100644 ext/impl/psb_s_dia_csmm.f90 create mode 100644 ext/impl/psb_s_dia_csmv.f90 create mode 100644 ext/impl/psb_s_dia_get_diag.f90 create mode 100644 ext/impl/psb_s_dia_maxval.f90 create mode 100644 ext/impl/psb_s_dia_mold.f90 create mode 100644 ext/impl/psb_s_dia_print.f90 create mode 100644 ext/impl/psb_s_dia_reallocate_nz.f90 create mode 100644 ext/impl/psb_s_dia_reinit.f90 create mode 100644 ext/impl/psb_s_dia_rowsum.f90 create mode 100644 ext/impl/psb_s_dia_scal.f90 create mode 100644 ext/impl/psb_s_dia_scals.f90 create mode 100644 ext/impl/psb_s_dns_mat_impl.f90 create mode 100644 ext/impl/psb_s_ell_aclsum.f90 create mode 100644 ext/impl/psb_s_ell_allocate_mnnz.f90 create mode 100644 ext/impl/psb_s_ell_arwsum.f90 create mode 100644 ext/impl/psb_s_ell_colsum.f90 create mode 100644 ext/impl/psb_s_ell_csgetblk.f90 create mode 100644 ext/impl/psb_s_ell_csgetptn.f90 create mode 100644 ext/impl/psb_s_ell_csgetrow.f90 create mode 100644 ext/impl/psb_s_ell_csmm.f90 create mode 100644 ext/impl/psb_s_ell_csmv.f90 create mode 100644 ext/impl/psb_s_ell_csnm1.f90 create mode 100644 ext/impl/psb_s_ell_csnmi.f90 create mode 100644 ext/impl/psb_s_ell_csput.f90 create mode 100644 ext/impl/psb_s_ell_cssm.f90 create mode 100644 ext/impl/psb_s_ell_cssv.f90 create mode 100644 ext/impl/psb_s_ell_get_diag.f90 create mode 100644 ext/impl/psb_s_ell_maxval.f90 create mode 100644 ext/impl/psb_s_ell_mold.f90 create mode 100644 ext/impl/psb_s_ell_print.f90 create mode 100644 ext/impl/psb_s_ell_reallocate_nz.f90 create mode 100644 ext/impl/psb_s_ell_reinit.f90 create mode 100644 ext/impl/psb_s_ell_rowsum.f90 create mode 100644 ext/impl/psb_s_ell_scal.f90 create mode 100644 ext/impl/psb_s_ell_scals.f90 create mode 100644 ext/impl/psb_s_ell_trim.f90 create mode 100644 ext/impl/psb_s_hdia_allocate_mnnz.f90 create mode 100644 ext/impl/psb_s_hdia_csmv.f90 create mode 100644 ext/impl/psb_s_hdia_mold.f90 create mode 100644 ext/impl/psb_s_hdia_print.f90 create mode 100644 ext/impl/psb_s_hll_aclsum.f90 create mode 100644 ext/impl/psb_s_hll_allocate_mnnz.f90 create mode 100644 ext/impl/psb_s_hll_arwsum.f90 create mode 100644 ext/impl/psb_s_hll_colsum.f90 create mode 100644 ext/impl/psb_s_hll_csgetblk.f90 create mode 100644 ext/impl/psb_s_hll_csgetptn.f90 create mode 100644 ext/impl/psb_s_hll_csgetrow.f90 create mode 100644 ext/impl/psb_s_hll_csmm.f90 create mode 100644 ext/impl/psb_s_hll_csmv.f90 create mode 100644 ext/impl/psb_s_hll_csnm1.f90 create mode 100644 ext/impl/psb_s_hll_csnmi.f90 create mode 100644 ext/impl/psb_s_hll_csput.f90 create mode 100644 ext/impl/psb_s_hll_cssm.f90 create mode 100644 ext/impl/psb_s_hll_cssv.f90 create mode 100644 ext/impl/psb_s_hll_get_diag.f90 create mode 100644 ext/impl/psb_s_hll_maxval.f90 create mode 100644 ext/impl/psb_s_hll_mold.f90 create mode 100644 ext/impl/psb_s_hll_print.f90 create mode 100644 ext/impl/psb_s_hll_reallocate_nz.f90 create mode 100644 ext/impl/psb_s_hll_reinit.f90 create mode 100644 ext/impl/psb_s_hll_rowsum.f90 create mode 100644 ext/impl/psb_s_hll_scal.f90 create mode 100644 ext/impl/psb_s_hll_scals.f90 create mode 100644 ext/impl/psb_s_mv_dia_from_coo.f90 create mode 100644 ext/impl/psb_s_mv_dia_to_coo.f90 create mode 100644 ext/impl/psb_s_mv_ell_from_coo.f90 create mode 100644 ext/impl/psb_s_mv_ell_from_fmt.f90 create mode 100644 ext/impl/psb_s_mv_ell_to_coo.f90 create mode 100644 ext/impl/psb_s_mv_ell_to_fmt.f90 create mode 100644 ext/impl/psb_s_mv_hdia_from_coo.f90 create mode 100644 ext/impl/psb_s_mv_hdia_to_coo.f90 create mode 100644 ext/impl/psb_s_mv_hll_from_coo.f90 create mode 100644 ext/impl/psb_s_mv_hll_from_fmt.f90 create mode 100644 ext/impl/psb_s_mv_hll_to_coo.f90 create mode 100644 ext/impl/psb_s_mv_hll_to_fmt.f90 create mode 100644 ext/impl/psb_z_cp_dia_from_coo.f90 create mode 100644 ext/impl/psb_z_cp_dia_to_coo.f90 create mode 100644 ext/impl/psb_z_cp_ell_from_coo.f90 create mode 100644 ext/impl/psb_z_cp_ell_from_fmt.f90 create mode 100644 ext/impl/psb_z_cp_ell_to_coo.f90 create mode 100644 ext/impl/psb_z_cp_ell_to_fmt.f90 create mode 100644 ext/impl/psb_z_cp_hdia_from_coo.f90 create mode 100644 ext/impl/psb_z_cp_hdia_to_coo.f90 create mode 100644 ext/impl/psb_z_cp_hll_from_coo.f90 create mode 100644 ext/impl/psb_z_cp_hll_from_fmt.f90 create mode 100644 ext/impl/psb_z_cp_hll_to_coo.f90 create mode 100644 ext/impl/psb_z_cp_hll_to_fmt.f90 create mode 100644 ext/impl/psb_z_dia_aclsum.f90 create mode 100644 ext/impl/psb_z_dia_allocate_mnnz.f90 create mode 100644 ext/impl/psb_z_dia_arwsum.f90 create mode 100644 ext/impl/psb_z_dia_colsum.f90 create mode 100644 ext/impl/psb_z_dia_csgetptn.f90 create mode 100644 ext/impl/psb_z_dia_csgetrow.f90 create mode 100644 ext/impl/psb_z_dia_csmm.f90 create mode 100644 ext/impl/psb_z_dia_csmv.f90 create mode 100644 ext/impl/psb_z_dia_get_diag.f90 create mode 100644 ext/impl/psb_z_dia_maxval.f90 create mode 100644 ext/impl/psb_z_dia_mold.f90 create mode 100644 ext/impl/psb_z_dia_print.f90 create mode 100644 ext/impl/psb_z_dia_reallocate_nz.f90 create mode 100644 ext/impl/psb_z_dia_reinit.f90 create mode 100644 ext/impl/psb_z_dia_rowsum.f90 create mode 100644 ext/impl/psb_z_dia_scal.f90 create mode 100644 ext/impl/psb_z_dia_scals.f90 create mode 100644 ext/impl/psb_z_dns_mat_impl.f90 create mode 100644 ext/impl/psb_z_ell_aclsum.f90 create mode 100644 ext/impl/psb_z_ell_allocate_mnnz.f90 create mode 100644 ext/impl/psb_z_ell_arwsum.f90 create mode 100644 ext/impl/psb_z_ell_colsum.f90 create mode 100644 ext/impl/psb_z_ell_csgetblk.f90 create mode 100644 ext/impl/psb_z_ell_csgetptn.f90 create mode 100644 ext/impl/psb_z_ell_csgetrow.f90 create mode 100644 ext/impl/psb_z_ell_csmm.f90 create mode 100644 ext/impl/psb_z_ell_csmv.f90 create mode 100644 ext/impl/psb_z_ell_csnm1.f90 create mode 100644 ext/impl/psb_z_ell_csnmi.f90 create mode 100644 ext/impl/psb_z_ell_csput.f90 create mode 100644 ext/impl/psb_z_ell_cssm.f90 create mode 100644 ext/impl/psb_z_ell_cssv.f90 create mode 100644 ext/impl/psb_z_ell_get_diag.f90 create mode 100644 ext/impl/psb_z_ell_maxval.f90 create mode 100644 ext/impl/psb_z_ell_mold.f90 create mode 100644 ext/impl/psb_z_ell_print.f90 create mode 100644 ext/impl/psb_z_ell_reallocate_nz.f90 create mode 100644 ext/impl/psb_z_ell_reinit.f90 create mode 100644 ext/impl/psb_z_ell_rowsum.f90 create mode 100644 ext/impl/psb_z_ell_scal.f90 create mode 100644 ext/impl/psb_z_ell_scals.f90 create mode 100644 ext/impl/psb_z_ell_trim.f90 create mode 100644 ext/impl/psb_z_hdia_allocate_mnnz.f90 create mode 100644 ext/impl/psb_z_hdia_csmv.f90 create mode 100644 ext/impl/psb_z_hdia_mold.f90 create mode 100644 ext/impl/psb_z_hdia_print.f90 create mode 100644 ext/impl/psb_z_hll_aclsum.f90 create mode 100644 ext/impl/psb_z_hll_allocate_mnnz.f90 create mode 100644 ext/impl/psb_z_hll_arwsum.f90 create mode 100644 ext/impl/psb_z_hll_colsum.f90 create mode 100644 ext/impl/psb_z_hll_csgetblk.f90 create mode 100644 ext/impl/psb_z_hll_csgetptn.f90 create mode 100644 ext/impl/psb_z_hll_csgetrow.f90 create mode 100644 ext/impl/psb_z_hll_csmm.f90 create mode 100644 ext/impl/psb_z_hll_csmv.f90 create mode 100644 ext/impl/psb_z_hll_csnm1.f90 create mode 100644 ext/impl/psb_z_hll_csnmi.f90 create mode 100644 ext/impl/psb_z_hll_csput.f90 create mode 100644 ext/impl/psb_z_hll_cssm.f90 create mode 100644 ext/impl/psb_z_hll_cssv.f90 create mode 100644 ext/impl/psb_z_hll_get_diag.f90 create mode 100644 ext/impl/psb_z_hll_maxval.f90 create mode 100644 ext/impl/psb_z_hll_mold.f90 create mode 100644 ext/impl/psb_z_hll_print.f90 create mode 100644 ext/impl/psb_z_hll_reallocate_nz.f90 create mode 100644 ext/impl/psb_z_hll_reinit.f90 create mode 100644 ext/impl/psb_z_hll_rowsum.f90 create mode 100644 ext/impl/psb_z_hll_scal.f90 create mode 100644 ext/impl/psb_z_hll_scals.f90 create mode 100644 ext/impl/psb_z_mv_dia_from_coo.f90 create mode 100644 ext/impl/psb_z_mv_dia_to_coo.f90 create mode 100644 ext/impl/psb_z_mv_ell_from_coo.f90 create mode 100644 ext/impl/psb_z_mv_ell_from_fmt.f90 create mode 100644 ext/impl/psb_z_mv_ell_to_coo.f90 create mode 100644 ext/impl/psb_z_mv_ell_to_fmt.f90 create mode 100644 ext/impl/psb_z_mv_hdia_from_coo.f90 create mode 100644 ext/impl/psb_z_mv_hdia_to_coo.f90 create mode 100644 ext/impl/psb_z_mv_hll_from_coo.f90 create mode 100644 ext/impl/psb_z_mv_hll_from_fmt.f90 create mode 100644 ext/impl/psb_z_mv_hll_to_coo.f90 create mode 100644 ext/impl/psb_z_mv_hll_to_fmt.f90 create mode 100644 ext/impl/psi_c_convert_dia_from_coo.f90 create mode 100644 ext/impl/psi_c_convert_ell_from_coo.f90 create mode 100644 ext/impl/psi_c_convert_hll_from_coo.f90 create mode 100644 ext/impl/psi_c_xtr_coo_from_dia.f90 create mode 100644 ext/impl/psi_c_xtr_dia_from_coo.f90 create mode 100644 ext/impl/psi_c_xtr_ell_from_coo.f90 create mode 100644 ext/impl/psi_d_convert_dia_from_coo.f90 create mode 100644 ext/impl/psi_d_convert_ell_from_coo.f90 create mode 100644 ext/impl/psi_d_convert_hll_from_coo.f90 create mode 100644 ext/impl/psi_d_xtr_coo_from_dia.f90 create mode 100644 ext/impl/psi_d_xtr_dia_from_coo.f90 create mode 100644 ext/impl/psi_d_xtr_ell_from_coo.f90 create mode 100644 ext/impl/psi_s_convert_dia_from_coo.f90 create mode 100644 ext/impl/psi_s_convert_ell_from_coo.f90 create mode 100644 ext/impl/psi_s_convert_hll_from_coo.f90 create mode 100644 ext/impl/psi_s_xtr_coo_from_dia.f90 create mode 100644 ext/impl/psi_s_xtr_dia_from_coo.f90 create mode 100644 ext/impl/psi_s_xtr_ell_from_coo.f90 create mode 100644 ext/impl/psi_z_convert_dia_from_coo.f90 create mode 100644 ext/impl/psi_z_convert_ell_from_coo.f90 create mode 100644 ext/impl/psi_z_convert_hll_from_coo.f90 create mode 100644 ext/impl/psi_z_xtr_coo_from_dia.f90 create mode 100644 ext/impl/psi_z_xtr_dia_from_coo.f90 create mode 100644 ext/impl/psi_z_xtr_ell_from_coo.f90 create mode 100644 ext/psb_c_dia_mat_mod.f90 create mode 100644 ext/psb_c_dns_mat_mod.f90 create mode 100644 ext/psb_c_ell_mat_mod.f90 create mode 100644 ext/psb_c_hdia_mat_mod.f90 create mode 100644 ext/psb_c_hll_mat_mod.f90 create mode 100644 ext/psb_d_dia_mat_mod.f90 create mode 100644 ext/psb_d_dns_mat_mod.f90 create mode 100644 ext/psb_d_ell_mat_mod.f90 create mode 100644 ext/psb_d_hdia_mat_mod.f90 create mode 100644 ext/psb_d_hll_mat_mod.f90 create mode 100644 ext/psb_ext_mod.F90 create mode 100644 ext/psb_s_dia_mat_mod.f90 create mode 100644 ext/psb_s_dns_mat_mod.f90 create mode 100644 ext/psb_s_ell_mat_mod.f90 create mode 100644 ext/psb_s_hdia_mat_mod.f90 create mode 100644 ext/psb_s_hll_mat_mod.f90 create mode 100644 ext/psb_z_dia_mat_mod.f90 create mode 100644 ext/psb_z_dns_mat_mod.f90 create mode 100644 ext/psb_z_ell_mat_mod.f90 create mode 100644 ext/psb_z_hdia_mat_mod.f90 create mode 100644 ext/psb_z_hll_mat_mod.f90 create mode 100644 ext/psi_c_ext_util_mod.f90 create mode 100644 ext/psi_d_ext_util_mod.f90 create mode 100644 ext/psi_ext_util_mod.f90 create mode 100644 ext/psi_i_ext_util_mod.f90 create mode 100644 ext/psi_s_ext_util_mod.f90 create mode 100644 ext/psi_z_ext_util_mod.f90 diff --git a/Makefile b/Makefile index a0f5ec3e..4a79afce 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ include Make.inc -all: dirs based precd kryld utild cbindd libd +all: dirs based precd kryld utild cbindd extd libd @echo "=====================================" @echo "PSBLAS libraries Compilation Successful." @@ -12,15 +12,17 @@ dirs: precd: based utild: based kryld: precd +extd: based cbindd: based precd kryld utild -libd: based precd kryld utild cbindd +libd: based precd kryld utild cbindd extd $(MAKE) -C base lib $(MAKE) -C prec lib $(MAKE) -C krylov lib $(MAKE) -C util lib $(MAKE) -C cbind lib + $(MAKE) -C ext lib based: $(MAKE) -C base objs @@ -32,6 +34,8 @@ utild: $(MAKE) -C util objs cbindd: $(MAKE) -C cbind objs +extd: + $(MAKE) -C ext objs install: all @@ -56,6 +60,7 @@ clean: $(MAKE) -C krylov clean $(MAKE) -C util clean $(MAKE) -C cbind clean + $(MAKE) -C ext clean check: all make check -C test/serial @@ -71,6 +76,7 @@ veryclean: cleanlib cd krylov && $(MAKE) veryclean cd util && $(MAKE) veryclean cd cbind && $(MAKE) veryclean + cd ext && $(MAKE) veryclean cd test/fileread && $(MAKE) clean cd test/pargen && $(MAKE) clean cd test/util && $(MAKE) clean diff --git a/ext/Makefile b/ext/Makefile new file mode 100755 index 00000000..36b82433 --- /dev/null +++ b/ext/Makefile @@ -0,0 +1,84 @@ +include ../Make.inc +# +# Libraries used +# +LIBDIR=../lib +INCDIR=../include +MODDIR=../modules +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FIFLAG). +CINCLUDES= +LIBNAME=libpsb_ext.a + + +FOBJS= psb_d_ell_mat_mod.o psb_d_hll_mat_mod.o \ + psb_s_hll_mat_mod.o psb_s_ell_mat_mod.o \ + psb_c_hll_mat_mod.o psb_c_ell_mat_mod.o \ + psb_z_hll_mat_mod.o psb_z_ell_mat_mod.o \ + psb_d_dia_mat_mod.o psb_d_hdia_mat_mod.o \ + psb_s_dia_mat_mod.o psb_s_hdia_mat_mod.o \ + psb_c_dia_mat_mod.o psb_c_hdia_mat_mod.o \ + psb_z_dia_mat_mod.o psb_z_hdia_mat_mod.o \ + psb_s_dns_mat_mod.o psb_d_dns_mat_mod.o \ + psb_c_dns_mat_mod.o psb_z_dns_mat_mod.o \ + psi_ext_util_mod.o psi_i_ext_util_mod.o \ + psi_s_ext_util_mod.o psi_c_ext_util_mod.o \ + psi_d_ext_util_mod.o psi_z_ext_util_mod.o \ + psb_ext_mod.o + +COBJS= + +OBJS=$(COBJS) $(FOBJS) + +lib: objs ilib + ar cur $(LIBNAME) $(OBJS) + /bin/cp -p $(LIBNAME) $(LIBDIR) + +objs: $(OBJS) iobjs + /bin/cp -p *$(.mod) $(MODDIR) + + + +psb_ext_mod.o: psb_s_dia_mat_mod.o psb_d_dia_mat_mod.o \ + psb_c_dia_mat_mod.o psb_z_dia_mat_mod.o \ + psb_d_ell_mat_mod.o psb_d_hll_mat_mod.o \ + psb_s_hll_mat_mod.o psb_s_ell_mat_mod.o \ + psb_c_hll_mat_mod.o psb_c_ell_mat_mod.o \ + psb_z_hll_mat_mod.o psb_z_ell_mat_mod.o \ + psb_s_hdia_mat_mod.o psb_d_hdia_mat_mod.o \ + psb_c_hdia_mat_mod.o psb_z_hdia_mat_mod.o \ + psb_s_dns_mat_mod.o psb_d_dns_mat_mod.o \ + psb_c_dns_mat_mod.o psb_z_dns_mat_mod.o + +# psb_d_rsb_mat_mod.o psb_d_hdia_mat_mod.o +psi_ext_util_mod.o: psi_i_ext_util_mod.o \ + psi_s_ext_util_mod.o psi_c_ext_util_mod.o \ + psi_d_ext_util_mod.o psi_z_ext_util_mod.o + +psb_s_dia_mat_mod.o psb_c_dia_mat_mod.o psb_d_dia_mat_mod.o psb_z_dia_mat_mod.o: psi_ext_util_mod.o +psb_s_hdia_mat_mod.o psb_c_hdia_mat_mod.o psb_d_hdia_mat_mod.o psb_z_hdia_mat_mod.o: psi_ext_util_mod.o +psb_s_hll_mat_mod.o psb_c_hll_mat_mod.o psb_d_hll_mat_mod.o psb_z_hll_mat_mod.o: psi_ext_util_mod.o + +ilib: objs + $(MAKE) -C impl lib LIBNAME=$(LIBNAME) + +iobjs: $(OBJS) + $(MAKE) -C impl objs + +clean: cclean iclean + /bin/rm -f $(FOBJS) *$(.mod) *.a + +cclean: + /bin/rm -f $(COBJS) +iclean: + $(MAKE) -C impl clean + +veryclean: clean + /bin/rm -f $(HERE)/$(LIBNAME) $(LIBMOD) *$(.mod) + + + + diff --git a/ext/impl/Makefile b/ext/impl/Makefile new file mode 100755 index 00000000..57593a54 --- /dev/null +++ b/ext/impl/Makefile @@ -0,0 +1,412 @@ +include ../../Make.inc +LIBDIR=../../lib +INCDIR=../../include +MODDIR=../../modules +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG).. +LIBNAME=libpsb_ext.a + +OBJS= \ +psb_s_cp_dia_from_coo.o \ +psb_s_cp_dia_to_coo.o \ +psb_s_cp_ell_from_coo.o \ +psb_s_cp_ell_from_fmt.o \ +psb_s_cp_ell_to_coo.o \ +psb_s_cp_ell_to_fmt.o \ +psb_s_cp_hdia_from_coo.o \ +psb_s_cp_hdia_to_coo.o \ +psb_s_cp_hll_from_coo.o \ +psb_s_cp_hll_from_fmt.o \ +psb_s_cp_hll_to_coo.o \ +psb_s_cp_hll_to_fmt.o \ +psb_s_dia_aclsum.o \ +psb_s_dia_allocate_mnnz.o \ +psb_s_dia_arwsum.o \ +psb_s_dia_colsum.o \ +psb_s_dia_csgetptn.o \ +psb_s_dia_csgetrow.o \ +psb_s_dia_csmm.o \ +psb_s_dia_csmv.o \ +psb_s_dia_get_diag.o \ +psb_s_dia_maxval.o \ +psb_s_dia_mold.o \ +psb_s_dia_print.o \ +psb_s_dia_reallocate_nz.o \ +psb_s_dia_reinit.o \ +psb_s_dia_rowsum.o \ +psb_s_dia_scal.o \ +psb_s_dia_scals.o \ +psb_s_ell_aclsum.o \ +psb_s_ell_allocate_mnnz.o \ +psb_s_ell_arwsum.o \ +psb_s_ell_colsum.o \ +psb_s_ell_csgetblk.o \ +psb_s_ell_csgetptn.o \ +psb_s_ell_csgetrow.o \ +psb_s_ell_csmm.o \ +psb_s_ell_csmv.o \ +psb_s_ell_csnm1.o \ +psb_s_ell_csnmi.o \ +psb_s_ell_csput.o \ +psb_s_ell_cssm.o \ +psb_s_ell_cssv.o \ +psb_s_ell_get_diag.o \ +psb_s_ell_maxval.o \ +psb_s_ell_mold.o \ +psb_s_ell_print.o \ +psb_s_ell_reallocate_nz.o \ +psb_s_ell_reinit.o \ +psb_s_ell_rowsum.o \ +psb_s_ell_scal.o \ +psb_s_ell_scals.o \ +psb_s_ell_trim.o \ +psb_s_hdia_allocate_mnnz.o \ +psb_s_hdia_csmv.o \ +psb_s_hdia_mold.o \ +psb_s_hdia_print.o \ +psb_s_hll_aclsum.o \ +psb_s_hll_allocate_mnnz.o \ +psb_s_hll_arwsum.o \ +psb_s_hll_colsum.o \ +psb_s_hll_csgetblk.o \ +psb_s_hll_csgetptn.o \ +psb_s_hll_csgetrow.o \ +psb_s_hll_csmm.o \ +psb_s_hll_csmv.o \ +psb_s_hll_csnm1.o \ +psb_s_hll_csnmi.o \ +psb_s_hll_csput.o \ +psb_s_hll_cssm.o \ +psb_s_hll_cssv.o \ +psb_s_hll_get_diag.o \ +psb_s_hll_maxval.o \ +psb_s_hll_mold.o \ +psb_s_hll_print.o \ +psb_s_hll_reallocate_nz.o \ +psb_s_hll_reinit.o \ +psb_s_hll_rowsum.o \ +psb_s_hll_scal.o \ +psb_s_hll_scals.o \ +psb_s_mv_dia_from_coo.o \ +psb_s_mv_ell_from_coo.o \ +psb_s_mv_ell_from_fmt.o \ +psb_s_mv_ell_to_coo.o \ +psb_s_mv_ell_to_fmt.o \ +psb_s_mv_hdia_from_coo.o \ +psb_s_mv_hdia_to_coo.o \ +psb_s_mv_hll_from_coo.o \ +psb_s_mv_hll_from_fmt.o \ +psb_s_mv_hll_to_coo.o \ +psb_s_mv_hll_to_fmt.o \ +psb_c_cp_dia_from_coo.o \ +psb_c_cp_dia_to_coo.o \ +psb_c_cp_ell_from_coo.o \ +psb_c_cp_ell_from_fmt.o \ +psb_c_cp_ell_to_coo.o \ +psb_c_cp_ell_to_fmt.o \ +psb_c_cp_hdia_from_coo.o \ +psb_c_cp_hdia_to_coo.o \ +psb_c_cp_hll_from_coo.o \ +psb_c_cp_hll_from_fmt.o \ +psb_c_cp_hll_to_coo.o \ +psb_c_cp_hll_to_fmt.o \ +psb_c_dia_aclsum.o \ +psb_c_dia_allocate_mnnz.o \ +psb_c_dia_arwsum.o \ +psb_c_dia_colsum.o \ +psb_c_dia_csgetptn.o \ +psb_c_dia_csgetrow.o \ +psb_c_dia_csmm.o \ +psb_c_dia_csmv.o \ +psb_c_dia_get_diag.o \ +psb_c_dia_maxval.o \ +psb_c_dia_mold.o \ +psb_c_dia_print.o \ +psb_c_dia_reallocate_nz.o \ +psb_c_dia_reinit.o \ +psb_c_dia_rowsum.o \ +psb_c_dia_scal.o \ +psb_c_dia_scals.o \ +psb_c_ell_aclsum.o \ +psb_c_ell_allocate_mnnz.o \ +psb_c_ell_arwsum.o \ +psb_c_ell_colsum.o \ +psb_c_ell_csgetblk.o \ +psb_c_ell_csgetptn.o \ +psb_c_ell_csgetrow.o \ +psb_c_ell_csmm.o \ +psb_c_ell_csmv.o \ +psb_c_ell_csnm1.o \ +psb_c_ell_csnmi.o \ +psb_c_ell_csput.o \ +psb_c_ell_cssm.o \ +psb_c_ell_cssv.o \ +psb_c_ell_get_diag.o \ +psb_c_ell_maxval.o \ +psb_c_ell_mold.o \ +psb_c_ell_print.o \ +psb_c_ell_reallocate_nz.o \ +psb_c_ell_reinit.o \ +psb_c_ell_rowsum.o \ +psb_c_ell_scal.o \ +psb_c_ell_scals.o \ +psb_c_ell_trim.o \ +psb_c_hdia_allocate_mnnz.o \ +psb_c_hdia_csmv.o \ +psb_c_hdia_mold.o \ +psb_c_hdia_print.o \ +psb_c_hll_aclsum.o \ +psb_c_hll_allocate_mnnz.o \ +psb_c_hll_arwsum.o \ +psb_c_hll_colsum.o \ +psb_c_hll_csgetblk.o \ +psb_c_hll_csgetptn.o \ +psb_c_hll_csgetrow.o \ +psb_c_hll_csmm.o \ +psb_c_hll_csmv.o \ +psb_c_hll_csnm1.o \ +psb_c_hll_csnmi.o \ +psb_c_hll_csput.o \ +psb_c_hll_cssm.o \ +psb_c_hll_cssv.o \ +psb_c_hll_get_diag.o \ +psb_c_hll_maxval.o \ +psb_c_hll_mold.o \ +psb_c_hll_print.o \ +psb_c_hll_reallocate_nz.o \ +psb_c_hll_reinit.o \ +psb_c_hll_rowsum.o \ +psb_c_hll_scal.o \ +psb_c_hll_scals.o \ +psb_c_mv_dia_from_coo.o \ +psb_c_mv_ell_from_coo.o \ +psb_c_mv_ell_from_fmt.o \ +psb_c_mv_ell_to_coo.o \ +psb_c_mv_ell_to_fmt.o \ +psb_c_mv_hdia_from_coo.o \ +psb_c_mv_hdia_to_coo.o \ +psb_c_mv_hll_from_coo.o \ +psb_c_mv_hll_from_fmt.o \ +psb_c_mv_hll_to_coo.o \ +psb_c_mv_hll_to_fmt.o \ +psb_d_cp_dia_from_coo.o \ +psb_d_cp_dia_to_coo.o \ +psb_d_cp_ell_from_coo.o \ +psb_d_cp_ell_from_fmt.o \ +psb_d_cp_ell_to_coo.o \ +psb_d_cp_ell_to_fmt.o \ +psb_d_cp_hdia_from_coo.o \ +psb_d_cp_hdia_to_coo.o \ +psb_d_cp_hll_from_coo.o \ +psb_d_cp_hll_from_fmt.o \ +psb_d_cp_hll_to_coo.o \ +psb_d_cp_hll_to_fmt.o \ +psb_d_dia_aclsum.o \ +psb_d_dia_allocate_mnnz.o \ +psb_d_dia_arwsum.o \ +psb_d_dia_colsum.o \ +psb_d_dia_csgetptn.o \ +psb_d_dia_csgetrow.o \ +psb_d_dia_csmm.o \ +psb_d_dia_csmv.o \ +psb_d_dia_get_diag.o \ +psb_d_dia_maxval.o \ +psb_d_dia_mold.o \ +psb_d_dia_print.o \ +psb_d_dia_reallocate_nz.o \ +psb_d_dia_reinit.o \ +psb_d_dia_rowsum.o \ +psb_d_dia_scal.o \ +psb_d_dia_scals.o \ +psb_d_ell_aclsum.o \ +psb_d_ell_allocate_mnnz.o \ +psb_d_ell_arwsum.o \ +psb_d_ell_colsum.o \ +psb_d_ell_csgetblk.o \ +psb_d_ell_csgetptn.o \ +psb_d_ell_csgetrow.o \ +psb_d_ell_csmm.o \ +psb_d_ell_csmv.o \ +psb_d_ell_csnm1.o \ +psb_d_ell_csnmi.o \ +psb_d_ell_csput.o \ +psb_d_ell_cssm.o \ +psb_d_ell_cssv.o \ +psb_d_ell_get_diag.o \ +psb_d_ell_maxval.o \ +psb_d_ell_mold.o \ +psb_d_ell_print.o \ +psb_d_ell_reallocate_nz.o \ +psb_d_ell_reinit.o \ +psb_d_ell_rowsum.o \ +psb_d_ell_scal.o \ +psb_d_ell_scals.o \ +psb_d_ell_trim.o \ +psb_d_hdia_allocate_mnnz.o \ +psb_d_hdia_csmv.o \ +psb_d_hdia_mold.o \ +psb_d_hdia_print.o \ +psb_d_hll_aclsum.o \ +psb_d_hll_allocate_mnnz.o \ +psb_d_hll_arwsum.o \ +psb_d_hll_colsum.o \ +psb_d_hll_csgetblk.o \ +psb_d_hll_csgetptn.o \ +psb_d_hll_csgetrow.o \ +psb_d_hll_csmm.o \ +psb_d_hll_csmv.o \ +psb_d_hll_csnm1.o \ +psb_d_hll_csnmi.o \ +psb_d_hll_csput.o \ +psb_d_hll_cssm.o \ +psb_d_hll_cssv.o \ +psb_d_hll_get_diag.o \ +psb_d_hll_maxval.o \ +psb_d_hll_mold.o \ +psb_d_hll_print.o \ +psb_d_hll_reallocate_nz.o \ +psb_d_hll_reinit.o \ +psb_d_hll_rowsum.o \ +psb_d_hll_scal.o \ +psb_d_hll_scals.o \ +psb_d_mv_dia_from_coo.o \ +psb_d_mv_ell_from_coo.o \ +psb_d_mv_ell_from_fmt.o \ +psb_d_mv_ell_to_coo.o \ +psb_d_mv_ell_to_fmt.o \ +psb_d_mv_hdia_from_coo.o \ +psb_d_mv_hdia_to_coo.o \ +psb_d_mv_hll_from_coo.o \ +psb_d_mv_hll_from_fmt.o \ +psb_d_mv_hll_to_coo.o \ +psb_d_mv_hll_to_fmt.o \ +psb_z_cp_dia_from_coo.o \ +psb_z_cp_dia_to_coo.o \ +psb_z_cp_ell_from_coo.o \ +psb_z_cp_ell_from_fmt.o \ +psb_z_cp_ell_to_coo.o \ +psb_z_cp_ell_to_fmt.o \ +psb_z_cp_hdia_from_coo.o \ +psb_z_cp_hdia_to_coo.o \ +psb_z_cp_hll_from_coo.o \ +psb_z_cp_hll_from_fmt.o \ +psb_z_cp_hll_to_coo.o \ +psb_z_cp_hll_to_fmt.o \ +psb_z_dia_aclsum.o \ +psb_z_dia_allocate_mnnz.o \ +psb_z_dia_arwsum.o \ +psb_z_dia_colsum.o \ +psb_z_dia_csgetptn.o \ +psb_z_dia_csgetrow.o \ +psb_z_dia_csmm.o \ +psb_z_dia_csmv.o \ +psb_z_dia_get_diag.o \ +psb_z_dia_maxval.o \ +psb_z_dia_mold.o \ +psb_z_dia_print.o \ +psb_z_dia_reallocate_nz.o \ +psb_z_dia_reinit.o \ +psb_z_dia_rowsum.o \ +psb_z_dia_scal.o \ +psb_z_dia_scals.o \ +psb_z_ell_aclsum.o \ +psb_z_ell_allocate_mnnz.o \ +psb_z_ell_arwsum.o \ +psb_z_ell_colsum.o \ +psb_z_ell_csgetblk.o \ +psb_z_ell_csgetptn.o \ +psb_z_ell_csgetrow.o \ +psb_z_ell_csmm.o \ +psb_z_ell_csmv.o \ +psb_z_ell_csnm1.o \ +psb_z_ell_csnmi.o \ +psb_z_ell_csput.o \ +psb_z_ell_cssm.o \ +psb_z_ell_cssv.o \ +psb_z_ell_get_diag.o \ +psb_z_ell_maxval.o \ +psb_z_ell_mold.o \ +psb_z_ell_print.o \ +psb_z_ell_reallocate_nz.o \ +psb_z_ell_reinit.o \ +psb_z_ell_rowsum.o \ +psb_z_ell_scal.o \ +psb_z_ell_scals.o \ +psb_z_ell_trim.o \ +psb_z_hdia_allocate_mnnz.o \ +psb_z_hdia_csmv.o \ +psb_z_hdia_mold.o \ +psb_z_hdia_print.o \ +psb_z_hll_aclsum.o \ +psb_z_hll_allocate_mnnz.o \ +psb_z_hll_arwsum.o \ +psb_z_hll_colsum.o \ +psb_z_hll_csgetblk.o \ +psb_z_hll_csgetptn.o \ +psb_z_hll_csgetrow.o \ +psb_z_hll_csmm.o \ +psb_z_hll_csmv.o \ +psb_z_hll_csnm1.o \ +psb_z_hll_csnmi.o \ +psb_z_hll_csput.o \ +psb_z_hll_cssm.o \ +psb_z_hll_cssv.o \ +psb_z_hll_get_diag.o \ +psb_z_hll_maxval.o \ +psb_z_hll_mold.o \ +psb_z_hll_print.o \ +psb_z_hll_reallocate_nz.o \ +psb_z_hll_reinit.o \ +psb_z_hll_rowsum.o \ +psb_z_hll_scal.o \ +psb_z_hll_scals.o \ +psb_z_mv_dia_from_coo.o \ +psb_z_mv_ell_from_coo.o \ +psb_z_mv_ell_from_fmt.o \ +psb_z_mv_ell_to_coo.o \ +psb_z_mv_ell_to_fmt.o \ +psb_z_mv_hdia_from_coo.o \ +psb_z_mv_hdia_to_coo.o \ +psb_z_mv_hll_from_coo.o \ +psb_z_mv_hll_from_fmt.o \ +psb_z_mv_hll_to_coo.o \ +psb_z_mv_hll_to_fmt.o \ +psi_s_xtr_ell_from_coo.o \ +psi_c_xtr_ell_from_coo.o \ +psi_d_xtr_ell_from_coo.o \ +psi_z_xtr_ell_from_coo.o \ +psi_s_convert_ell_from_coo.o \ +psi_c_convert_ell_from_coo.o \ +psi_d_convert_ell_from_coo.o \ +psi_z_convert_ell_from_coo.o \ +psi_s_convert_hll_from_coo.o \ +psi_c_convert_hll_from_coo.o \ +psi_d_convert_hll_from_coo.o \ +psi_z_convert_hll_from_coo.o \ +psi_s_xtr_dia_from_coo.o \ +psi_c_xtr_dia_from_coo.o \ +psi_d_xtr_dia_from_coo.o \ +psi_z_xtr_dia_from_coo.o \ +psi_s_xtr_coo_from_dia.o \ +psi_d_xtr_coo_from_dia.o \ +psi_c_xtr_coo_from_dia.o \ +psi_z_xtr_coo_from_dia.o \ +psi_s_convert_dia_from_coo.o \ +psi_c_convert_dia_from_coo.o \ +psi_d_convert_dia_from_coo.o \ +psi_z_convert_dia_from_coo.o \ +psb_s_dns_mat_impl.o \ +psb_d_dns_mat_impl.o \ +psb_c_dns_mat_impl.o \ +psb_z_dns_mat_impl.o + +objs: $(OBJS) + +lib: objs + ar cur ../$(LIBNAME) $(OBJS) + +clean: + /bin/rm -f $(OBJS) diff --git a/ext/impl/psb_c_cp_dia_from_coo.f90 b/ext/impl/psb_c_cp_dia_from_coo.f90 new file mode 100644 index 00000000..2d2b1caa --- /dev/null +++ b/ext/impl/psb_c_cp_dia_from_coo.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cp_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_cp_dia_from_coo + implicit none + + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_convert_dia_from_coo(a,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_dia_from_coo(a,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cp_dia_from_coo diff --git a/ext/impl/psb_c_cp_dia_to_coo.f90 b/ext/impl/psb_c_cp_dia_to_coo.f90 new file mode 100644 index 00000000..9975bec0 --- /dev/null +++ b/ext/impl/psb_c_cp_dia_to_coo.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_cp_dia_to_coo + implicit none + + class(psb_c_dia_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: i, j, k,nr,nza,nc, nzd + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + + call psi_c_xtr_coo_from_dia(nr,nc,& + & b%ia, b%ja, b%val, nzd, & + & size(a%data,1),size(a%data,2),& + & a%data,a%offset,info) + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +end subroutine psb_c_cp_dia_to_coo diff --git a/ext/impl/psb_c_cp_ell_from_coo.f90 b/ext/impl/psb_c_cp_ell_from_coo.f90 new file mode 100644 index 00000000..28d7d242 --- /dev/null +++ b/ext/impl/psb_c_cp_ell_from_coo.f90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_cp_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc + integer(psb_ipk_) :: nzm, ir, ic, k + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + ! This is to have fix_coo called behind the scenes + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_c_convert_ell_from_coo(a,b,info) + else + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call psi_c_convert_ell_from_coo(a,tmp,info) + if (info == psb_success_) call tmp%free() + end if + if (info /= psb_success_) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + +end subroutine psb_c_cp_ell_from_coo diff --git a/ext/impl/psb_c_cp_ell_from_fmt.f90 b/ext/impl/psb_c_cp_ell_from_fmt.f90 new file mode 100644 index 00000000..309063b9 --- /dev/null +++ b/ext/impl/psb_c_cp_ell_from_fmt.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_cp_ell_from_fmt + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + + type is (psb_c_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_c_cp_ell_from_fmt diff --git a/ext/impl/psb_c_cp_ell_to_coo.f90 b/ext/impl/psb_c_cp_ell_to_coo.f90 new file mode 100644 index 00000000..ec6bcff5 --- /dev/null +++ b/ext/impl/psb_c_cp_ell_to_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_cp_ell_to_coo + implicit none + + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: i, j, k, nr, nc, nza + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + b%val(k) = a%val(i,j) + end do + end do + call b%set_nzeros(a%get_nzeros()) + call b%fix(info) + call b%set_host() + +end subroutine psb_c_cp_ell_to_coo diff --git a/ext/impl/psb_c_cp_ell_to_fmt.f90 b/ext/impl/psb_c_cp_ell_to_fmt.f90 new file mode 100644 index 00000000..0c6a6903 --- /dev/null +++ b/ext/impl/psb_c_cp_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_cp_ell_to_fmt + implicit none + + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_c_ell_sparse_mat) + if (a%is_dev()) call a%sync() + + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_cp_ell_to_fmt diff --git a/ext/impl/psb_c_cp_hdia_from_coo.f90 b/ext/impl/psb_c_cp_hdia_from_coo.f90 new file mode 100644 index 00000000..a9e1ca21 --- /dev/null +++ b/ext/impl/psb_c_cp_hdia_from_coo.f90 @@ -0,0 +1,222 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_cp_hdia_from_coo + implicit none + + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call inner_cp_hdia_from_coo(a,b,info) + if (info /= psb_success_) goto 9999 + else + call b%cp_to_coo(tmp,info) + if (info /= psb_success_) goto 9999 + if (.not.tmp%is_by_rows()) call tmp%fix(info) + if (info /= psb_success_) goto 9999 + call inner_cp_hdia_from_coo(a,tmp,info) + if (info /= psb_success_) goto 9999 + call tmp%free() + end if + call a%set_host() + + return + +9999 continue + + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine inner_cp_hdia_from_coo(a,tmp,info) + use psb_base_mod + use psi_ext_util_mod + + implicit none + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,mi,mj,dm,bi,w + integer(psb_ipk_),allocatable :: d(:), offset(:), irsz(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext, nzout + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + logical, parameter :: debug=.false. + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = tmp%psb_c_base_sparse_mat + + hacksize = a%hacksize + a%nhacks = (nr+hacksize-1)/hacksize + nhacks = a%nhacks + + ndiag = nr+nc-1 + if (info == psb_success_) call psb_realloc(nr,irsz,info) + if (info == psb_success_) call psb_realloc(ndiag,d,info) + if (info == psb_success_) call psb_realloc(ndiag,offset,info) + if (info == psb_success_) call psb_realloc(nhacks+1,a%hackoffsets,info) + if (info /= psb_success_) return + + irsz = 0 + do k=1,nza + ir = tmp%ia(k) + irsz(ir) = irsz(ir)+1 + end do + + a%nzeros = 0 + d = 0 + iszd = 0 + a%hackOffsets(1)=0 + klast1 = 1 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + if (debug) then + write(*,*) 'Loop iteration ',k,nhacks,i,ib,nr + write(*,*) 'RW:',tmp%ia(kfirst),tmp%ia(klast1-1) + write(*,*) 'CL:',tmp%ja(kfirst),tmp%ja(klast1-1) + end if + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, offset, info, initd=.false., cleard=.true.) + iszd = iszd + nd + a%hackOffsets(k+1)=iszd + if (debug) write(*,*) 'From chunk ',k,i,ib,sum(irsz(i:i+ib-1)),': ',nd, iszd + if (debug) write(*,*) 'offset ', offset(1:nd) + end do + if (debug) then + write(*,*) 'Hackcount ',nhacks,' Allocation height ',iszd + write(*,*) 'Hackoffsets ',a%hackOffsets(:) + end if + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%diaOffsets,info) + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%val,info) + if (info /= psb_success_) return + klast1 = 1 + ! + ! Second run: copy elements + ! + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, a%diaOffsets(hackfirst+1:hacknext), info, & + & initd=.false., cleard=.false.) + if (debug) write(*,*) 'Out from dia_offset: ', a%diaOffsets(hackfirst+1:hacknext) + call psi_c_xtr_dia_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & tmp%val(kfirst:klast1-1), & + & d,hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),info,& + & initdata=.true.,rdisp=(i-1)) + + call countnz(nr,nc,(i-1),hacksize,(hacknext-hackfirst),& + & a%diaOffsets(hackfirst+1:hacknext),nzout) + a%nzeros = a%nzeros + nzout + call cleand(nr,(hacknext-hackfirst),d,a%diaOffsets(hackfirst+1:hacknext)) + + end do + if (debug) then + write(*,*) 'NZEROS: ',a%nzeros, nza + write(*,*) 'diaoffsets: ',a%diaOffsets(1:iszd) + write(*,*) 'values: ' + j=0 + do k=1,nhacks + write(*,*) 'Hack No. ',k + do i=1,hacksize*(iszd/nhacks) + j = j + 1 + write(*,*) j, a%val(j) + end do + end do + end if + end subroutine inner_cp_hdia_from_coo + + subroutine countnz(nr,nc,rdisp,nrd,ncd,offsets,nz) + implicit none + integer(psb_ipk_), intent(in) :: nr,nc,nrd,ncd,rdisp,offsets(:) + integer(psb_ipk_), intent(out) :: nz + ! + integer(psb_ipk_) :: i,j,k, ir, jc, m4, ir1, ir2, nrcmdisp, rdisp1 + nz = 0 + nrcmdisp = min(nr-rdisp,nc-rdisp) + rdisp1 = 1-rdisp + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + nz = nz + (ir2-ir1+1) + end do + end subroutine countnz + + subroutine cleand(nr,nd,d,offset) + implicit none + integer(psb_ipk_), intent(in) :: nr,nd,offset(:) + integer(psb_ipk_), intent(inout) :: d(:) + integer(psb_ipk_) :: i,id + + do i=1,nd + id = offset(i) + nr + d(id) = 0 + end do + end subroutine cleand + +end subroutine psb_c_cp_hdia_from_coo diff --git a/ext/impl/psb_c_cp_hdia_to_coo.f90 b/ext/impl/psb_c_cp_hdia_to_coo.f90 new file mode 100644 index 00000000..32801653 --- /dev/null +++ b/ext/impl/psb_c_cp_hdia_to_coo.f90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_cp_hdia_to_coo + use psi_ext_util_mod + implicit none + + class(psb_c_hdia_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + call b%set_nzeros(nza) + call b%set_sort_status(psb_unsorted_) + nhacks = a%nhacks + hacksize = a%hacksize + j = 0 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_c_xtr_coo_from_dia(nr,nc,& + & b%ia(j+1:), b%ja(j+1:), b%val(j+1:), nzd, & + & hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) +!!$ write(*,*) 'diaoffsets',ib,' : ',ib - abs(a%diaOffsets(hackfirst+1:hacknext)) +!!$ write(*,*) 'sum',ib,j,' : ',sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + j = j + nzd + end do + if (nza /= j) then + write(*,*) 'Wrong counts in hdia_to_coo',j,nza + info = -8 + return + end if + call b%set_host() + call b%fix(info) + +end subroutine psb_c_cp_hdia_to_coo diff --git a/ext/impl/psb_c_cp_hll_from_coo.f90 b/ext/impl/psb_c_cp_hll_from_coo.f90 new file mode 100644 index 00000000..506196c2 --- /dev/null +++ b/ext/impl/psb_c_cp_hll_from_coo.f90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_cp_hll_from_coo + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + character(len=20) :: name='hll_from_coo' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + if (b%is_dev()) call b%sync() + hksz = psi_get_hksz() + if (b%is_by_rows()) then + call psi_convert_hll_from_coo(a,hksz,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_hll_from_coo(a,hksz,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cp_hll_from_coo diff --git a/ext/impl/psb_c_cp_hll_from_fmt.f90 b/ext/impl/psb_c_cp_hll_from_fmt.f90 new file mode 100644 index 00000000..0849561f --- /dev/null +++ b/ext/impl/psb_c_cp_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_cp_hll_from_fmt + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + class is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_c_hll_sparse_mat) + ! write(0,*) 'From type_hll' + if (b%is_dev()) call b%sync() + + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%hkoffs, a%hkoffs, info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) a%hksz = b%hksz + if (info == 0) a%nzt = b%nzt + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_c_cp_hll_from_fmt diff --git a/ext/impl/psb_c_cp_hll_to_coo.f90 b/ext/impl/psb_c_cp_hll_to_coo.f90 new file mode 100644 index 00000000..0ff46352 --- /dev/null +++ b/ext/impl/psb_c_cp_hll_to_coo.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_cp_hll_to_coo + implicit none + + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j, jj,k,ir, isz,err_act, hksz, hk, mxrwl,& + & irs, nzblk, kc + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + + j = 1 + kc = 1 + k = 1 + hksz = a%hksz + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + nzblk = sum(a%irn(i:i+ir-1)) + call inner_copy(i,ir,b%ia(kc:kc+nzblk-1),& + & b%ja(kc:kc+nzblk-1),b%val(kc:kc+nzblk-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & hksz) + k = k + isz + kc = kc + nzblk + + enddo + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +contains + + subroutine inner_copy(i,ir,iac,& + & jac,valc,ja,val,irn,ld) + integer(psb_ipk_) :: i,ir,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*) + complex(psb_spk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc + kc = 1 + do ii = 1, ir + nc = irn(ii) + do jj=1,nc + iac(kc) = i+ii-1 + jac(kc) = ja(ii,jj) + valc(kc) = val(ii,jj) + kc = kc + 1 + end do + end do + + end subroutine inner_copy + +end subroutine psb_c_cp_hll_to_coo diff --git a/ext/impl/psb_c_cp_hll_to_fmt.f90 b/ext/impl/psb_c_cp_hll_to_fmt.f90 new file mode 100644 index 00000000..df8fa3b7 --- /dev/null +++ b/ext/impl/psb_c_cp_hll_to_fmt.f90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_cp_hll_to_fmt + implicit none + + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_c_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%hkoffs, b%hkoffs , info) + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + if (info == 0) b%hksz = a%hksz + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_cp_hll_to_fmt diff --git a/ext/impl/psb_c_dia_aclsum.f90 b/ext/impl/psb_c_dia_aclsum.f90 new file mode 100644 index 00000000..4bd8d440 --- /dev/null +++ b/ext/impl/psb_c_dia_aclsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_aclsum(d,a) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_aclsum + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_aclsum diff --git a/ext/impl/psb_c_dia_allocate_mnnz.f90 b/ext/impl/psb_c_dia_allocate_mnnz.f90 new file mode 100644 index 00000000..37fb34e1 --- /dev/null +++ b/ext/impl/psb_c_dia_allocate_mnnz.f90 @@ -0,0 +1,88 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -ione )/m + else + nz_ = ((max(7*m,7*n,ione)+m-ione)/m) + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,nz_,a%data,info) + if (info == psb_success_) call psb_realloc(m+n,a%offset,info) + if (info == psb_success_) then + a%data = 0 + a%offset = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_allocate_mnnz diff --git a/ext/impl/psb_c_dia_arwsum.f90 b/ext/impl/psb_c_dia_arwsum.f90 new file mode 100644 index 00000000..fe40deb8 --- /dev/null +++ b/ext/impl/psb_c_dia_arwsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_arwsum(d,a) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_arwsum + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_arwsum diff --git a/ext/impl/psb_c_dia_colsum.f90 b/ext/impl/psb_c_dia_colsum.f90 new file mode 100644 index 00000000..ed43fa12 --- /dev/null +++ b/ext/impl/psb_c_dia_colsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_colsum(d,a) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_colsum + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = cone + else + d = czero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_colsum diff --git a/ext/impl/psb_c_dia_csgetptn.f90 b/ext/impl/psb_c_dia_csgetptn.f90 new file mode 100644 index 00000000..ad479d35 --- /dev/null +++ b/ext/impl/psb_c_dia_csgetptn.f90 @@ -0,0 +1,188 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_csgetptn + implicit none + + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + + + end subroutine dia_getptn + +end subroutine psb_c_dia_csgetptn diff --git a/ext/impl/psb_c_dia_csgetrow.f90 b/ext/impl/psb_c_dia_csgetrow.f90 new file mode 100644 index 00000000..2989b20f --- /dev/null +++ b/ext/impl/psb_c_dia_csgetrow.f90 @@ -0,0 +1,199 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_csgetrow + implicit none + + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + if (chksz) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,val,info) + end if + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + val(nzin_) = a%data(i,j) + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + end subroutine dia_getrow +end subroutine psb_c_dia_csgetrow diff --git a/ext/impl/psb_c_dia_csmm.f90 b/ext/impl/psb_c_dia_csmm.f90 new file mode 100644 index 00000000..b65c4651 --- /dev/null +++ b/ext/impl/psb_c_dia_csmm.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_csmm + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_dia_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i,1:nxy) = y(i,1:nxy) + alpha*data(i,j)*x(i+off(j),1:nxy) + enddo + enddo + + end subroutine psb_c_dia_csmm_inner + +end subroutine psb_c_dia_csmm diff --git a/ext/impl/psb_c_dia_csmv.f90 b/ext/impl/psb_c_dia_csmv.f90 new file mode 100644 index 00000000..cf1ef677 --- /dev/null +++ b/ext/impl/psb_c_dia_csmv.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_csmv + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_dia_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i) = y(i) + alpha*data(i,j)*x(i+off(j)) + enddo + enddo + + end subroutine psb_c_dia_csmv_inner + +end subroutine psb_c_dia_csmv diff --git a/ext/impl/psb_c_dia_get_diag.f90 b/ext/impl/psb_c_dia_get_diag.f90 new file mode 100644 index 00000000..d868b62d --- /dev/null +++ b/ext/impl/psb_c_dia_get_diag.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_get_diag(a,d,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_get_diag + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = cone + else + do i=1, size(a%offset) + if (a%offset(i) == 0) then + d(1:mnm) = a%data(1:mnm,i) + exit + end if + end do + end if + do i=mnm+1,size(d) + d(i) = czero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_get_diag diff --git a/ext/impl/psb_c_dia_maxval.f90 b/ext/impl/psb_c_dia_maxval.f90 new file mode 100644 index 00000000..03a2be82 --- /dev/null +++ b/ext/impl/psb_c_dia_maxval.f90 @@ -0,0 +1,54 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +function psb_c_dia_maxval(a) result(res) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_maxval + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_maxval' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = sone + else + res = szero + end if + + res = max(res,maxval(abs(a%data))) + +end function psb_c_dia_maxval diff --git a/ext/impl/psb_c_dia_mold.f90 b/ext/impl/psb_c_dia_mold.f90 new file mode 100644 index 00000000..1d694828 --- /dev/null +++ b/ext/impl/psb_c_dia_mold.f90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_mold(a,b,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_mold + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_dia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_mold diff --git a/ext/impl/psb_c_dia_print.f90 b/ext/impl/psb_c_dia_print.f90 new file mode 100644 index 00000000..f3233366 --- /dev/null +++ b/ext/impl/psb_c_dia_print.f90 @@ -0,0 +1,148 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_dia_print' + logical, parameter :: debug=.false. + + class(psb_c_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, jc, ir1, ir2 + + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_c_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + write(iout,*) nr, nc, nz + + nc=size(a%data,2) + + + + if(present(iv)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) iv(i),iv(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),(i+jc),a%data(i,j) + enddo + enddo + + endif + +end subroutine psb_c_dia_print diff --git a/ext/impl/psb_c_dia_reallocate_nz.f90 b/ext/impl/psb_c_dia_reallocate_nz.f90 new file mode 100644 index 00000000..c46cd465 --- /dev/null +++ b/ext/impl/psb_c_dia_reallocate_nz.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_reallocate_nz(nz,a) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_dia_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! Ans: NOTHING. + ! + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_reallocate_nz diff --git a/ext/impl/psb_c_dia_reinit.f90 b/ext/impl/psb_c_dia_reinit.f90 new file mode 100644 index 00000000..04a345eb --- /dev/null +++ b/ext/impl/psb_c_dia_reinit.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_reinit(a,clear) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_reinit + implicit none + + class(psb_c_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%data(:,:) = czero + call a%set_upd() + call a%set_host() + + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_reinit diff --git a/ext/impl/psb_c_dia_rowsum.f90 b/ext/impl/psb_c_dia_rowsum.f90 new file mode 100644 index 00000000..1f36dab4 --- /dev/null +++ b/ext/impl/psb_c_dia_rowsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_rowsum(d,a) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_rowsum + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_rowsum diff --git a/ext/impl/psb_c_dia_scal.f90 b/ext/impl/psb_c_dia_scal.f90 new file mode 100644 index 00000000..8f35b7c1 --- /dev/null +++ b/ext/impl/psb_c_dia_scal.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_scal(d,a,info,side) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_scal + implicit none + class(psb_c_dia_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), nc, jc, nr, ir1, ir2 + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%data(i,:) = a%data(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + nr=size(a%data,1) + nc=size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + a%data(i,j) = a%data(i,j) * d(i+jc) + enddo + enddo + + end if + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_scal diff --git a/ext/impl/psb_c_dia_scals.f90 b/ext/impl/psb_c_dia_scals.f90 new file mode 100644 index 00000000..a9ca5db1 --- /dev/null +++ b/ext/impl/psb_c_dia_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_scals(d,a,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_scals + implicit none + class(psb_c_dia_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%data(:,:) = a%data(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_scals diff --git a/ext/impl/psb_c_dns_mat_impl.f90 b/ext/impl/psb_c_dns_mat_impl.f90 new file mode 100644 index 00000000..8e99af8b --- /dev/null +++ b/ext/impl/psb_c_dns_mat_impl.f90 @@ -0,0 +1,724 @@ + +!> Function csmv: +!! \memberof psb_c_dns_sparse_mat +!! \brief Product by a dense rank 1 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_c_dns_csmv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_csmv + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act, m, n, lda + character(len=20) :: name='c_dns_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (trans_ == 'N') then + m=a%get_nrows() + n=a%get_ncols() + else + n=a%get_nrows() + m=a%get_ncols() + end if + lda = size(a%val,1) + + + call cgemv(trans_,a%get_nrows(),a%get_ncols(),alpha,& + & a%val,size(a%val,1),x,1,beta,y,1) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_csmv + + +!> Function csmm: +!! \memberof psb_c_dns_sparse_mat +!! \brief Product by a dense rank 2 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:,:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:,:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_c_dns_csmm(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_csmm + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act,m,n,k, lda, ldx, ldy + character(len=20) :: name='c_dns_csmm' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (psb_toupper(trans_)=='N') then + m = a%get_nrows() + k = a%get_ncols() + n = min(size(y,2),size(x,2)) + else + k = a%get_nrows() + m = a%get_ncols() + n = min(size(y,2),size(x,2)) + end if + lda = size(a%val,1) + ldx = size(x,1) + ldy = size(y,1) + call cgemm(trans_,'N',m,n,k,alpha,a%val,lda,x,ldx,beta,y,ldy) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_csmm + + + +! +! +!> Function csnmi: +!! \memberof psb_c_dns_sparse_mat +!! \brief Operator infinity norm +!! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) +!! +! +function psb_c_dns_csnmi(a) result(res) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_csnmi + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + ! + integer(psb_ipk_) :: i + real(psb_spk_) :: acc + + res = szero + if (a%is_dev()) call a%sync() + + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_c_dns_csnmi + + +! +!> Function get_diag: +!! \memberof psb_c_dns_sparse_mat +!! \brief Extract the diagonal of A. +!! +!! D(i) = A(i:i), i=1:min(nrows,ncols) +!! +!! \param d(:) The output diagonal +!! \param info return code. +! +subroutine psb_c_dns_get_diag(a,d,info) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_get_diag + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act, mnm, i + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + do i=1, mnm + d(i) = a%val(i,i) + end do + do i=mnm+1,size(d) + d(i) = czero + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_get_diag + + +! +! +!> Function reallocate_nz +!! \memberof psb_c_dns_sparse_mat +!! \brief One--parameters version of (re)allocate +!! +!! \param nz number of nonzeros to allocate for +!! i.e. makes sure that the internal storage +!! allows for NZ coefficients and their indices. +! +subroutine psb_c_dns_reallocate_nz(nz,a) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_dns_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! This is a no-op, allocation is fixed. + ! + if (a%is_dev()) call a%sync() + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_reallocate_nz + +! +!> Function mold: +!! \memberof psb_c_dns_sparse_mat +!! \brief Allocate a class(psb_c_dns_sparse_mat) with the +!! same dynamic type as the input. +!! This is equivalent to allocate( mold= ) and is provided +!! for those compilers not yet supporting mold. +!! \param b The output variable +!! \param info return code +! +subroutine psb_c_dns_mold(a,b,info) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_mold + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='dns_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + allocate(psb_c_dns_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_mold + +! +! +!> Function allocate_mnnz +!! \memberof psb_c_dns_sparse_mat +!! \brief Three-parameters version of allocate +!! +!! \param m number of rows +!! \param n number of cols +!! \param nz [estimated internally] number of nonzeros to allocate for +! +subroutine psb_c_dns_allocate_mnnz(m,n,a,nz) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + ! + integer(psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_/)) + goto 9999 + endif + + + ! Basic stuff common to all formats + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_bld() + call a%set_host() + + ! We ignore NZ in this case. + + call psb_realloc(m,n,a%val,info) + if (info == psb_success_) then + a%val = czero + a%nnz = 0 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_allocate_mnnz + + +! +! +! +!> Function csgetrow: +!! \memberof psb_c_dns_sparse_mat +!! \brief Get a (subset of) row(s) +!! +!! getrow is the basic method by which the other (getblk, clip) can +!! be implemented. +!! +!! Returns the set +!! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) +!! each identifying the position of a nonzero in A +!! i.e. +!! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) +!! with IMIN<=IA(:)<=IMAX +!! with JMIN<=JA(:)<=JMAX +!! IA,JA are reallocated as necessary. +!! +!! \param imin the minimum row index we are interested in +!! \param imax the minimum row index we are interested in +!! \param nz the number of output coefficients +!! \param ia(:) the output row indices +!! \param ja(:) the output col indices +!! \param val(:) the output coefficients +!! \param info return code +!! \param jmin [1] minimum col index +!! \param jmax [a\%get_ncols()] maximum col index +!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) +!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] +!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] +!! ( iren cannot be specified with rscale/cscale) +!! \param append [false] append to ia,ja +!! \param nzin [none] if append, then first new entry should go in entry nzin+1 +!! +! +subroutine psb_c_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_csgetrow + implicit none + + class(psb_c_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + ! + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i,j,k + character(len=20) :: name='csget' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax Function trim +!! \memberof psb_c_dns_sparse_mat +!! \brief Memory trim +!! Make sure the memory allocation of the sparse matrix is as tight as +!! possible given the actual number of nonzeros it contains. +! +subroutine psb_c_dns_trim(a) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_trim + implicit none + class(psb_c_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + ! Do nothing, we are already at minimum memory. + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_trim + +! +!> Function cp_from_coo: +!! \memberof psb_c_dns_sparse_mat +!! \brief Copy and convert from psb_c_coo_sparse_mat +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! + +subroutine psb_c_cp_dns_from_coo(a,b,info) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_cp_dns_from_coo + implicit none + + class(psb_c_dns_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + ! + type(psb_c_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: nza, nr, i,err_act, nc + integer(psb_ipk_), parameter :: maxtry=8 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (.not.b%is_by_rows()) then + ! This is to have fix_coo called behind the scenes + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + if (info /= psb_success_) return + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = tmp%psb_c_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = czero + do i=1, nza + a%val(tmp%ia(i),tmp%ja(i)) = tmp%val(i) + end do + a%nnz = nza + call tmp%free() + else + if (b%is_dev()) call b%sync() + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = czero + do i=1, nza + a%val(b%ia(i),b%ja(i)) = b%val(i) + end do + a%nnz = nza + end if + call a%set_host() + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_cp_dns_from_coo + + + +! +!> Function cp_to_coo: +!! \memberof psb_c_dns_sparse_mat +!! \brief Copy and convert to psb_c_coo_sparse_mat +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! + +subroutine psb_c_cp_dns_to_coo(a,b,info) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_cp_dns_to_coo + implicit none + + class(psb_c_dns_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_Ipk_) :: nza, nr, nc,i,j,k,err_act + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + + k = 0 + do i=1,a%get_nrows() + do j=1,a%get_ncols() + if (a%val(i,j) /= czero) then + k = k + 1 + b%ia(k) = i + b%ja(k) = j + b%val(k) = a%val(i,j) + end if + end do + end do + + call b%set_nzeros(nza) + call b%set_sort_status(psb_row_major_) + call b%set_asb() + call b%set_host() + +end subroutine psb_c_cp_dns_to_coo + + + +! +!> Function mv_to_coo: +!! \memberof psb_c_dns_sparse_mat +!! \brief Convert to psb_c_coo_sparse_mat, freeing the source. +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! +subroutine psb_c_mv_dns_to_coo(a,b,info) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_mv_dns_to_coo + implicit none + + class(psb_c_dns_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_to_coo(b,info) + call a%free() + return + +end subroutine psb_c_mv_dns_to_coo + + +! +!> Function mv_from_coo: +!! \memberof psb_c_dns_sparse_mat +!! \brief Convert from psb_c_coo_sparse_mat, freeing the source. +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! +! +subroutine psb_c_mv_dns_from_coo(a,b,info) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_mv_dns_from_coo + implicit none + + class(psb_c_dns_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_c_mv_dns_from_coo + diff --git a/ext/impl/psb_c_ell_aclsum.f90 b/ext/impl/psb_c_ell_aclsum.f90 new file mode 100644 index 00000000..3d5a292a --- /dev/null +++ b/ext/impl/psb_c_ell_aclsum.f90 @@ -0,0 +1,82 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_aclsum(d,a) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_aclsum + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + abs(a%val(i,j)) + end do + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_aclsum diff --git a/ext/impl/psb_c_ell_allocate_mnnz.f90 b/ext/impl/psb_c_ell_allocate_mnnz.f90 new file mode 100644 index 00000000..b137eb04 --- /dev/null +++ b/ext/impl/psb_c_ell_allocate_mnnz.f90 @@ -0,0 +1,91 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = -1 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_allocate_mnnz diff --git a/ext/impl/psb_c_ell_arwsum.f90 b/ext/impl/psb_c_ell_arwsum.f90 new file mode 100644 index 00000000..c047c742 --- /dev/null +++ b/ext/impl/psb_c_ell_arwsum.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_arwsum(d,a) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_arwsum + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + is_unit = a%is_unit() + + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = sone + else + d(i) = szero + end if + do j=1,a%irn(i) + d(i) = d(i) + abs(a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_arwsum diff --git a/ext/impl/psb_c_ell_colsum.f90 b/ext/impl/psb_c_ell_colsum.f90 new file mode 100644 index 00000000..6d06b589 --- /dev/null +++ b/ext/impl/psb_c_ell_colsum.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_colsum(d,a) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_colsum + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = cone + else + d = czero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_colsum diff --git a/ext/impl/psb_c_ell_csgetblk.f90 b/ext/impl/psb_c_ell_csgetblk.f90 new file mode 100644 index 00000000..deb07c25 --- /dev/null +++ b/ext/impl/psb_c_ell_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_csgetblk + implicit none + + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='ell_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_csgetblk diff --git a/ext/impl/psb_c_ell_csgetptn.f90 b/ext/impl/psb_c_ell_csgetptn.f90 new file mode 100644 index 00000000..821daa89 --- /dev/null +++ b/ext/impl/psb_c_ell_csgetptn.f90 @@ -0,0 +1,189 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_csgetptn + implicit none + + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_c_ell_csgetrow + implicit none + + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_c_ell_csmm + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_spk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_ell_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_ell_csmv + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_ell_csnm1 + + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info + real(psb_spk_), allocatable :: vt(:) + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_ell_csnm1' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = szero + nnz = a%get_nzeros() + m = a%get_nrows() + n = a%get_ncols() + allocate(vt(n),stat=info) + if (info /= 0) return + if (a%is_unit()) then + vt(:) = sone + else + vt(:) = szero + end if + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + vt(k) = vt(k) + abs(a%val(i,j)) + end do + end do + res = maxval(vt(1:n)) + deallocate(vt,stat=info) + + return + +end function psb_c_ell_csnm1 diff --git a/ext/impl/psb_c_ell_csnmi.f90 b/ext/impl/psb_c_ell_csnmi.f90 new file mode 100644 index 00000000..6dc9cfa4 --- /dev/null +++ b/ext/impl/psb_c_ell_csnmi.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_c_ell_csnmi(a) result(res) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_csnmi + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_spk_) :: acc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_csnmi' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = szero + is_unit = a%is_unit() + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + if (is_unit) acc = acc + sone + res = max(res,acc) + end do + +end function psb_c_ell_csnmi diff --git a/ext/impl/psb_c_ell_csput.f90 b/ext/impl/psb_c_ell_csput.f90 new file mode 100644 index 00000000..e0b0f47f --- /dev/null +++ b/ext/impl/psb_c_ell_csput.f90 @@ -0,0 +1,208 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_csput_a + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_ell_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + call psb_c_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info < 0) then + info = psb_err_internal_error_ + else if (info > 0) then + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarded entries not belonging to us.' + info = psb_success_ + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + +contains + + subroutine psb_c_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ilr, ilc, ip, & + & i1,i2,nr,nc,nnz,dupl + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='c_ell_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + ilr = -1 + ilc = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + + end do + + case(psb_dupl_add_) + ! Add + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = a%val(ir,ip) + val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_c_ell_srch_upd +end subroutine psb_c_ell_csput_a diff --git a/ext/impl/psb_c_ell_cssm.f90 b/ext/impl/psb_c_ell_cssm.f90 new file mode 100644 index 00000000..26e76030 --- /dev/null +++ b/ext/impl/psb_c_ell_cssm.f90 @@ -0,0 +1,375 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_cssm + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_spk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_ell_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_c_ell_cssv + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + complex(psb_spk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_ell_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_c_ell_get_diag + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = cone + else + do i=1, mnm + if (1<=a%idiag(i).and.(a%idiag(i)<=size(a%ja,2))) then + d(i) = a%val(i,a%idiag(i)) + else + d(i) = czero + end if + end do + end if + do i=mnm+1,size(d) + d(i) = czero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_get_diag diff --git a/ext/impl/psb_c_ell_maxval.f90 b/ext/impl/psb_c_ell_maxval.f90 new file mode 100644 index 00000000..4de58b11 --- /dev/null +++ b/ext/impl/psb_c_ell_maxval.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_c_ell_maxval(a) result(res) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_maxval + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_spk_) :: acc + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_csnmi' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = sone + else + res = szero + end if + + do i = 1, a%get_nrows() + acc = maxval(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_c_ell_maxval diff --git a/ext/impl/psb_c_ell_mold.f90 b/ext/impl/psb_c_ell_mold.f90 new file mode 100644 index 00000000..c7c5d621 --- /dev/null +++ b/ext/impl/psb_c_ell_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_mold(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_mold + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='ell_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_mold diff --git a/ext/impl/psb_c_ell_print.f90 b/ext/impl/psb_c_ell_print.f90 new file mode 100644 index 00000000..1b8117a8 --- /dev/null +++ b/ext/impl/psb_c_ell_print.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_ell_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% ELL' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_c_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(i,j)),a%val(i,j) + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(i,j)),a%val(i,j) + end do + enddo + endif + endif + +end subroutine psb_c_ell_print diff --git a/ext/impl/psb_c_ell_reallocate_nz.f90 b/ext/impl/psb_c_ell_reallocate_nz.f90 new file mode 100644 index 00000000..b0d77568 --- /dev/null +++ b/ext/impl/psb_c_ell_reallocate_nz.f90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_reallocate_nz(nz,a) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_reallocate_nz diff --git a/ext/impl/psb_c_ell_reinit.f90 b/ext/impl/psb_c_ell_reinit.f90 new file mode 100644 index 00000000..2b15dfea --- /dev/null +++ b/ext/impl/psb_c_ell_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_reinit(a,clear) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_reinit + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:,:) = czero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_reinit diff --git a/ext/impl/psb_c_ell_rowsum.f90 b/ext/impl/psb_c_ell_rowsum.f90 new file mode 100644 index 00000000..5ae7d42c --- /dev/null +++ b/ext/impl/psb_c_ell_rowsum.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_rowsum(d,a) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_rowsum + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical :: is_unit + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + is_unit = a%is_unit() + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = cone + else + d(i) = czero + end if + do j=1,a%irn(i) + d(i) = d(i) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_rowsum diff --git a/ext/impl/psb_c_ell_scal.f90 b/ext/impl/psb_c_ell_scal.f90 new file mode 100644 index 00000000..63150f32 --- /dev/null +++ b/ext/impl/psb_c_ell_scal.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_scal(d,a,info,side) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_scal + implicit none + class(psb_c_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5) + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%val(i,:) = a%val(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + do i=1, m + do j=1, a%irn(i) + a%val(i,j) = a%val(i,j) * d(a%ja(i,j)) + end do + enddo + + end if + + call a%set_host() + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_scal diff --git a/ext/impl/psb_c_ell_scals.f90 b/ext/impl/psb_c_ell_scals.f90 new file mode 100644 index 00000000..3e4cd92a --- /dev/null +++ b/ext/impl/psb_c_ell_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_scals(d,a,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_scals + implicit none + class(psb_c_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_scals diff --git a/ext/impl/psb_c_ell_trim.f90 b/ext/impl/psb_c_ell_trim.f90 new file mode 100644 index 00000000..22aafefd --- /dev/null +++ b/ext/impl/psb_c_ell_trim.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_trim(a) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_trim + implicit none + class(psb_c_ell_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_trim diff --git a/ext/impl/psb_c_hdia_allocate_mnnz.f90 b/ext/impl/psb_c_hdia_allocate_mnnz.f90 new file mode 100644 index 00000000..17a49ffe --- /dev/null +++ b/ext/impl/psb_c_hdia_allocate_mnnz.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_hdia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_hdia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hdia_allocate_mnnz diff --git a/ext/impl/psb_c_hdia_csmv.f90 b/ext/impl/psb_c_hdia_csmv.f90 new file mode 100644 index 00000000..a04fde07 --- /dev/null +++ b/ext/impl/psb_c_hdia_csmv.f90 @@ -0,0 +1,162 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_hdia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_hdia_csmv + implicit none + class(psb_c_hdia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc,nr,nc + integer(psb_ipk_) :: irs,ics, nmx, ni + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, ncd,ib, nzhack, & + & hackfirst, hacknext + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hdia_csmv' + logical, parameter :: debug=.false. + real :: start, finish + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + info = psb_err_transpose_not_n_unsupported_ + call psb_errpush(info,name) + goto 9999 + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1)=0) then + ir1 = 1 + ! min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + jc = ir1 + rdisp + offsets(j) + do i=ir1,ir2 + y(rdisp+i) = y(rdisp+i) + alpha*data(i,j)*x(jc) + jc = jc + 1 + enddo + end do + end subroutine psi_c_inner_dia_csmv + +end subroutine psb_c_hdia_csmv diff --git a/ext/impl/psb_c_hdia_mold.f90 b/ext/impl/psb_c_hdia_mold.f90 new file mode 100644 index 00000000..d9f85ec9 --- /dev/null +++ b/ext/impl/psb_c_hdia_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hdia_mold(a,b,info) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_hdia_mold + implicit none + class(psb_c_hdia_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hdia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_hdia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hdia_mold diff --git a/ext/impl/psb_c_hdia_print.f90 b/ext/impl/psb_c_hdia_print.f90 new file mode 100644 index 00000000..477a5433 --- /dev/null +++ b/ext/impl/psb_c_hdia_print.f90 @@ -0,0 +1,121 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_hdia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_hdia_print + use psi_ext_util_mod + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_c_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdia_print' + logical, parameter :: debug=.false. + + class(psb_c_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, k, ncd,ib, nzhack, info,& + & hackfirst, hacknext + integer(psb_ipk_), allocatable :: ia(:), ja(:) + complex(psb_spk_), allocatable :: val(:) + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% HDIA' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_c_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + + nhacks = a%nhacks + hacksize = a%hacksize + maxnzhack = 0 + do k=1, nhacks + maxnzhack = max(maxnzhack,(a%hackoffsets(k+1)-a%hackoffsets(k))) + end do + maxnzhack = hacksize*maxnzhack + allocate(ia(maxnzhack),ja(maxnzhack),val(maxnzhack),stat=info) + if (info /= 0) return + + write(iout,*) nr, nc, nz + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + ncd = hacknext-hackfirst + + call psi_c_xtr_coo_from_dia(nr,nc,& + & ia, ja, val, nzhack,& + & hacksize,ncd,& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) + !nzhack = sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + + if(present(iv)) then + do j=1,nzhack + write(iout,frmt) iv(ia(j)),iv(ja(j)),val(j) + enddo + else + if (present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ja(j),val(j) + enddo + else if (present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ja(j),val(j) + enddo + endif + end if + + end do + +end subroutine psb_c_hdia_print diff --git a/ext/impl/psb_c_hll_aclsum.f90 b/ext/impl/psb_c_hll_aclsum.f90 new file mode 100644 index 00000000..f1bd8e89 --- /dev/null +++ b/ext/impl/psb_c_hll_aclsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_aclsum(d,a) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_aclsum + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call c_hll_aclsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine c_hll_aclsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine c_hll_aclsum + +end subroutine psb_c_hll_aclsum diff --git a/ext/impl/psb_c_hll_allocate_mnnz.f90 b/ext/impl/psb_c_hll_allocate_mnnz.f90 new file mode 100644 index 00000000..97b996bd --- /dev/null +++ b/ext/impl/psb_c_hll_allocate_mnnz.f90 @@ -0,0 +1,93 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m+1,a%hkoffs,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_hksz(psb_hksz_def_) + call a%set_host() + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hll_allocate_mnnz diff --git a/ext/impl/psb_c_hll_arwsum.f90 b/ext/impl/psb_c_hll_arwsum.f90 new file mode 100644 index 00000000..9c48e1c0 --- /dev/null +++ b/ext/impl/psb_c_hll_arwsum.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_arwsum(d,a) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_arwsum + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call c_hll_arwsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine c_hll_arwsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + abs(val(i,j)) + end do + end do + + end subroutine c_hll_arwsum + +end subroutine psb_c_hll_arwsum diff --git a/ext/impl/psb_c_hll_colsum.f90 b/ext/impl/psb_c_hll_colsum.f90 new file mode 100644 index 00000000..fbcb0934 --- /dev/null +++ b/ext/impl/psb_c_hll_colsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_colsum(d,a) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_colsum + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = cone + else + d = czero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call c_hll_colsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine c_hll_colsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + complex(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine c_hll_colsum + +end subroutine psb_c_hll_colsum diff --git a/ext/impl/psb_c_hll_csgetblk.f90 b/ext/impl/psb_c_hll_csgetblk.f90 new file mode 100644 index 00000000..9bf0b869 --- /dev/null +++ b/ext/impl/psb_c_hll_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_csgetblk + implicit none + + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='hll_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hll_csgetblk diff --git a/ext/impl/psb_c_hll_csgetptn.f90 b/ext/impl/psb_c_hll_csgetptn.f90 new file mode 100644 index 00000000..0f6481ed --- /dev/null +++ b/ext/impl/psb_c_hll_csgetptn.f90 @@ -0,0 +1,209 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_csgetptn + implicit none + + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_c_hll_csgetrow + implicit none + + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_c_hll_csmm + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy,ldx,ldy,hksz,mxrwl + complex(psb_spk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hll_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + nxy = min(size(x,2) , size(y,2) ) + + + ldx = size(x,1) + ldy = size(y,1) + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (ldx psb_c_hll_csmv + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hkpnt, mxrwl, mmhk + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_hll_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (size(x,1) 0) then + select case(hksz) + case(4) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_c_hll_csmv_notra_4(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(8) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + &call psb_c_hll_csmv_notra_8(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(16) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_c_hll_csmv_notra_16(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(24) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_c_hll_csmv_notra_24(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(32) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_c_hll_csmv_notra_32(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case default + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_c_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + end select + end if + if (mmhk < m) then + i = mmhk+1 + ir = m-mmhk + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + call psb_c_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + if (info /= psb_success_) goto 9999 + end if + j = j + 1 + end if + + else + + j=1 + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,m,hksz + j = ((i-1)/hksz)+1 + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_c_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + end if + end if + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_c_hll_csmv_inner(ir,m,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,tra,ctra,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit,tra,ctra + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp + + info = psb_success_ + if (tra) then + + if (beta == cone) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*val(i,j)*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (ctra) then + + if (beta == cone) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*conjg(val(i,j))*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (.not.(tra.or.ctra)) then + + if (alpha == czero) then + if (beta == czero) then + do i=1,m + y(ir+i-1) = czero + end do + else + do i=1,m + y(ir+i-1) = beta*y(ir+i-1) + end do + end if + + else + if (beta == czero) then + do i=1,m + tmp = czero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + end do + else + do i=1,m + tmp = czero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + beta*y(ir+i-1) + end do + endif + end if + end if + + if (is_unit) then + do i=1, min(m,n) + y(i) = y(i) + alpha*x(i) + end do + end if + + end subroutine psb_c_hll_csmv_inner + + subroutine psb_c_hll_csmv_notra_8(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, czero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=8 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = czero + if (alpha /= czero) then + do j=1, maxval(irn(1:8)) + tmp(1:8) = tmp(1:8) + val(1:8,j)*x(ja(1:8,j)) + end do + end if + if (beta == czero) then + y(ir:ir+8-1) = alpha*tmp(1:8) + else + y(ir:ir+8-1) = alpha*tmp(1:8) + beta*y(ir:ir+8-1) + end if + + + if (is_unit) then + do i=1, min(8,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_c_hll_csmv_notra_8 + + subroutine psb_c_hll_csmv_notra_24(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, czero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=24 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = czero + if (alpha /= czero) then + do j=1, maxval(irn(1:24)) + tmp(1:24) = tmp(1:24) + val(1:24,j)*x(ja(1:24,j)) + end do + end if + if (beta == czero) then + y(ir:ir+24-1) = alpha*tmp(1:24) + else + y(ir:ir+24-1) = alpha*tmp(1:24) + beta*y(ir:ir+24-1) + end if + + + if (is_unit) then + do i=1, min(24,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_c_hll_csmv_notra_24 + + subroutine psb_c_hll_csmv_notra_16(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, czero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=16 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = czero + if (alpha /= czero) then + do j=1, maxval(irn(1:16)) + tmp(1:16) = tmp(1:16) + val(1:16,j)*x(ja(1:16,j)) + end do + end if + if (beta == czero) then + y(ir:ir+16-1) = alpha*tmp(1:16) + else + y(ir:ir+16-1) = alpha*tmp(1:16) + beta*y(ir:ir+16-1) + end if + + + if (is_unit) then + do i=1, min(16,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_c_hll_csmv_notra_16 + + subroutine psb_c_hll_csmv_notra_32(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, czero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=32 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = czero + if (alpha /= czero) then + do j=1, maxval(irn(1:32)) + tmp(1:32) = tmp(1:32) + val(1:32,j)*x(ja(1:32,j)) + end do + end if + if (beta == czero) then + y(ir:ir+32-1) = alpha*tmp(1:32) + else + y(ir:ir+32-1) = alpha*tmp(1:32) + beta*y(ir:ir+32-1) + end if + + + if (is_unit) then + do i=1, min(32,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_c_hll_csmv_notra_32 + + subroutine psb_c_hll_csmv_notra_4(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, czero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=4 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = czero + if (alpha /= czero) then + do j=1, maxval(irn(1:4)) + tmp(1:4) = tmp(1:4) + val(1:4,j)*x(ja(1:4,j)) + end do + end if + if (beta == czero) then + y(ir:ir+4-1) = alpha*tmp(1:4) + else + y(ir:ir+4-1) = alpha*tmp(1:4) + beta*y(ir:ir+4-1) + end if + + + if (is_unit) then + do i=1, min(4,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_c_hll_csmv_notra_4 + +end subroutine psb_c_hll_csmv diff --git a/ext/impl/psb_c_hll_csnm1.f90 b/ext/impl/psb_c_hll_csnm1.f90 new file mode 100644 index 00000000..25daa75d --- /dev/null +++ b/ext/impl/psb_c_hll_csnm1.f90 @@ -0,0 +1,111 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_c_hll_csnm1(a) result(res) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_csnm1 + + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info, hksz, mxrwl + real(psb_spk_), allocatable :: vt(:) + logical :: is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hll_csnm1' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + res = szero + if (a%is_dev()) call a%sync() + n = a%get_ncols() + m = a%get_nrows() + allocate(vt(n),stat=info) + if (Info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + if (a%is_unit()) then + vt = sone + else + vt = szero + end if + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_c_hll_csnm1_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & vt,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + res = maxval(vt) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_c_hll_csnm1_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & vt,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: vt(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + vt(jc) = vt(jc) + abs(val(i,j)) + end do + end do + end subroutine psb_c_hll_csnm1_inner + +end function psb_c_hll_csnm1 diff --git a/ext/impl/psb_c_hll_csnmi.f90 b/ext/impl/psb_c_hll_csnmi.f90 new file mode 100644 index 00000000..c70be9ce --- /dev/null +++ b/ext/impl/psb_c_hll_csnmi.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_c_hll_csnmi(a) result(res) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_csnmi + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc, hksz, mxrwl, info + Integer(Psb_ipk_) :: err_act + logical :: is_unit + character(len=20) :: name='c_csnmi' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + info = 0 + res = szero + if (a%is_dev()) call a%sync() + + n = a%get_ncols() + m = a%get_nrows() + is_unit = a%is_unit() + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_c_hll_csnmi_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & res,is_unit,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_c_hll_csnmi_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & res,is_unit,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: res + logical :: is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: tmp, acc + + info = psb_success_ + if (is_unit) then + tmp = sone + else + tmp = szero + end if + do i=1,m + acc = tmp + do j=1, irn(i) + acc = acc + abs(val(i,j)) + end do + res = max(acc,res) + end do + end subroutine psb_c_hll_csnmi_inner + +end function psb_c_hll_csnmi diff --git a/ext/impl/psb_c_hll_csput.f90 b/ext/impl/psb_c_hll_csput.f90 new file mode 100644 index 00000000..e46ae30a --- /dev/null +++ b/ext/impl/psb_c_hll_csput.f90 @@ -0,0 +1,233 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_csput_a + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hll_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5) + + + call psb_erractionsave(err_act) + info = psb_success_ + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + + call psb_c_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info /= psb_success_) then + + info = psb_err_invalid_mat_state_ + end if + call a%set_host() + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_c_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ip, i1,i2,nr,nc,nnz,dupl,ng,& + & hksz, hk, hkzpnt, ihkr, mxrwl, lastrow + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='c_hll_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + lastrow = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + hksz = a%get_hksz() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + + end do + + case(psb_dupl_add_) + ! Add + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_c_hll_srch_upd + +end subroutine psb_c_hll_csput_a diff --git a/ext/impl/psb_c_hll_cssm.f90 b/ext/impl/psb_c_hll_cssm.f90 new file mode 100644 index 00000000..90e3b978 --- /dev/null +++ b/ext/impl/psb_c_hll_cssm.f90 @@ -0,0 +1,506 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_cssm + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ldx, ldy, hksz, nxy, mk, mxrwl + complex(psb_spk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_hll_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + info = psb_err_missing_override_method_ + call psb_errpush(info,name) + goto 9999 + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + hksz = a%get_hksz() + + if (.not. (a%is_triangle())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + ldx = size(x,1) + ldy = size(y,1) + if (ldx psb_c_hll_cssv + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hk, mxrwl, noffs, kc, mk + complex(psb_spk_) :: acc + complex(psb_spk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hll_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x) psb_c_hll_get_diag + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k, ke, hksz, ld,ir, mxrwl + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + ld = size(d) + if (ld< mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,ld/)) + goto 9999 + end if + + if (a%is_triangle().and.a%is_unit()) then + d(1:mnm) = cone + else + + hksz = a%get_hksz() + j=1 + do i=1,mnm,hksz + ir = min(hksz,mnm-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + ke = a%hkoffs(j+1) + call psb_c_hll_get_diag_inner(ir,a%irn(i:i+ir-1),& + & a%ja(k:ke),hksz,a%val(k:ke),hksz,& + & a%idiag(i:i+ir-1),d(i:i+ir-1),info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + end if + + do i=mnm+1,size(d) + d(i) = czero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_c_hll_get_diag_inner(m,irn,ja,ldj,val,ldv,& + & idiag,d,info) + integer(psb_ipk_), intent(in) :: m,ldj,ldv,ja(ldj,*),irn(*), idiag(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + complex(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + do i=1,m + if (idiag(i) /= 0) then + d(i) = val(i,idiag(i)) + else + d(i) = czero + end if + end do + + end subroutine psb_c_hll_get_diag_inner + +end subroutine psb_c_hll_get_diag diff --git a/ext/impl/psb_c_hll_maxval.f90 b/ext/impl/psb_c_hll_maxval.f90 new file mode 100644 index 00000000..ff82bb40 --- /dev/null +++ b/ext/impl/psb_c_hll_maxval.f90 @@ -0,0 +1,45 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_c_hll_maxval(a) result(res) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_maxval + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + if (a%is_dev()) call a%sync() + res = maxval(abs(a%val(:))) + if (a%is_unit()) res = max(res,sone) + +end function psb_c_hll_maxval diff --git a/ext/impl/psb_c_hll_mold.f90 b/ext/impl/psb_c_hll_mold.f90 new file mode 100644 index 00000000..4a6204b0 --- /dev/null +++ b/ext/impl/psb_c_hll_mold.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_mold(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_mold + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hll_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hll_mold diff --git a/ext/impl/psb_c_hll_print.f90 b/ext/impl/psb_c_hll_print.f90 new file mode 100644 index 00000000..a5eec378 --- /dev/null +++ b/ext/impl/psb_c_hll_print.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_hll_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, k, hksz, hk, mxrwl,ir, ix + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_c_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + hksz = a%get_hksz() + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + endif + endif + +end subroutine psb_c_hll_print diff --git a/ext/impl/psb_c_hll_reallocate_nz.f90 b/ext/impl/psb_c_hll_reallocate_nz.f90 new file mode 100644 index 00000000..44d9cfc9 --- /dev/null +++ b/ext/impl/psb_c_hll_reallocate_nz.f90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_reallocate_nz(nz,a) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,nz_ + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_hll_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + nz_ = max(nz,ione) + call psb_realloc(nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(nz_,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hll_reallocate_nz diff --git a/ext/impl/psb_c_hll_reinit.f90 b/ext/impl/psb_c_hll_reinit.f90 new file mode 100644 index 00000000..82d5cb16 --- /dev/null +++ b/ext/impl/psb_c_hll_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_reinit(a,clear) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_reinit + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:) = czero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hll_reinit diff --git a/ext/impl/psb_c_hll_rowsum.f90 b/ext/impl/psb_c_hll_rowsum.f90 new file mode 100644 index 00000000..e6eea227 --- /dev/null +++ b/ext/impl/psb_c_hll_rowsum.f90 @@ -0,0 +1,110 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_rowsum(d,a) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_rowsum + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + + if (a%is_unit()) then + d = cone + else + d = czero + end if + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call c_hll_rowsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine c_hll_rowsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + complex(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + (val(i,j)) + end do + end do + + end subroutine c_hll_rowsum + +end subroutine psb_c_hll_rowsum diff --git a/ext/impl/psb_c_hll_scal.f90 b/ext/impl/psb_c_hll_scal.f90 new file mode 100644 index 00000000..0fd59f15 --- /dev/null +++ b/ext/impl/psb_c_hll_scal.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_scal(d,a,info,side) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_scal + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), ld, k, mxrwl, hksz, ir + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + info = psb_err_missing_override_method_ + call psb_errpush(info,name,i_err=ierr) + goto 9999 + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + ld = size(d) + if (left) then + m = a%get_nrows() + if (ld < m) then + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + else + n = a%get_ncols() + if (ld < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_c_hll_scal_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & left,d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_c_hll_scal_inner(ir,m,n,irn,ja,ldj,val,ldv,left,d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: d(*) + complex(psb_spk_), intent(inout) :: val(ldv,*) + logical, intent(in) :: left + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + if (left) then + do i=1,m + do j=1, irn(i) + val(i,j) = val(i,j)*d(ir+i-1) + end do + end do + else + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + val(i,j) = val(i,j)*d(jc) + end do + end do + + end if + + end subroutine psb_c_hll_scal_inner + + +end subroutine psb_c_hll_scal diff --git a/ext/impl/psb_c_hll_scals.f90 b/ext/impl/psb_c_hll_scals.f90 new file mode 100644 index 00000000..13a03a22 --- /dev/null +++ b/ext/impl/psb_c_hll_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_scals(d,a,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_scals + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:) = a%val(:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hll_scals diff --git a/ext/impl/psb_c_mv_dia_from_coo.f90 b/ext/impl/psb_c_mv_dia_from_coo.f90 new file mode 100644 index 00000000..99871348 --- /dev/null +++ b/ext/impl/psb_c_mv_dia_from_coo.f90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_mv_dia_from_coo + implicit none + + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_mv_dia_from_coo diff --git a/ext/impl/psb_c_mv_dia_to_coo.f90 b/ext/impl/psb_c_mv_dia_to_coo.f90 new file mode 100644 index 00000000..1382cec3 --- /dev/null +++ b/ext/impl/psb_c_mv_dia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_mv_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_mv_dia_to_coo + implicit none + + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_c_mv_dia_to_coo diff --git a/ext/impl/psb_c_mv_ell_from_coo.f90 b/ext/impl/psb_c_mv_ell_from_coo.f90 new file mode 100644 index 00000000..64da3e8d --- /dev/null +++ b/ext/impl/psb_c_mv_ell_from_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_mv_ell_from_coo + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, ir, ic + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_c_mv_ell_from_coo diff --git a/ext/impl/psb_c_mv_ell_from_fmt.f90 b/ext/impl/psb_c_mv_ell_from_fmt.f90 new file mode 100644 index 00000000..d0fa9bc4 --- /dev/null +++ b/ext/impl/psb_c_mv_ell_from_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_mv_ell_from_fmt + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_c_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_mv_ell_from_fmt diff --git a/ext/impl/psb_c_mv_ell_to_coo.f90 b/ext/impl/psb_c_mv_ell_to_coo.f90 new file mode 100644 index 00000000..a49e2e3c --- /dev/null +++ b/ext/impl/psb_c_mv_ell_to_coo.f90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_mv_ell_to_coo + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + ! Taking a path slightly slower but with less memory footprint + deallocate(a%idiag) + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + + call psb_realloc(nza,b%ia,info) + if (info == 0) call psb_realloc(nza,b%ja,info) + if (info /= 0) goto 9999 + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + end do + end do + deallocate(a%ja, stat=info) + + if (info == 0) call psb_realloc(nza,b%val,info) + if (info /= 0) goto 9999 + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%val(k) = a%val(i,j) + end do + end do + call a%free() + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_c_mv_ell_to_coo diff --git a/ext/impl/psb_c_mv_ell_to_fmt.f90 b/ext/impl/psb_c_mv_ell_to_fmt.f90 new file mode 100644 index 00000000..3ea02d6b --- /dev/null +++ b/ext/impl/psb_c_mv_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_mv_ell_to_fmt + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_c_ell_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_mv_ell_to_fmt diff --git a/ext/impl/psb_c_mv_hdia_from_coo.f90 b/ext/impl/psb_c_mv_hdia_from_coo.f90 new file mode 100644 index 00000000..4247fdf8 --- /dev/null +++ b/ext/impl/psb_c_mv_hdia_from_coo.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_mv_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_mv_hdia_from_coo + implicit none + + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.(b%is_by_rows())) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_mv_hdia_from_coo diff --git a/ext/impl/psb_c_mv_hdia_to_coo.f90 b/ext/impl/psb_c_mv_hdia_to_coo.f90 new file mode 100644 index 00000000..3a91917a --- /dev/null +++ b/ext/impl/psb_c_mv_hdia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_mv_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_mv_hdia_to_coo + implicit none + + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_c_mv_hdia_to_coo diff --git a/ext/impl/psb_c_mv_hll_from_coo.f90 b/ext/impl/psb_c_mv_hll_from_coo.f90 new file mode 100644 index 00000000..b78bdd80 --- /dev/null +++ b/ext/impl/psb_c_mv_hll_from_coo.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_mv_hll_from_coo + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: hksz + info = psb_success_ + if (.not.b%is_by_rows()) call b%fix(info) + hksz = psi_get_hksz() + call psi_convert_hll_from_coo(a,hksz,b,info) + if (info /= 0) goto 9999 + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_mv_hll_from_coo diff --git a/ext/impl/psb_c_mv_hll_from_fmt.f90 b/ext/impl/psb_c_mv_hll_from_fmt.f90 new file mode 100644 index 00000000..add90355 --- /dev/null +++ b/ext/impl/psb_c_mv_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_mv_hll_from_fmt + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_c_hll_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%hkoffs, a%hkoffs) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + a%hksz = b%hksz + a%nzt = b%nzt + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_mv_hll_from_fmt diff --git a/ext/impl/psb_c_mv_hll_to_coo.f90 b/ext/impl/psb_c_mv_hll_to_coo.f90 new file mode 100644 index 00000000..fbf5dfcd --- /dev/null +++ b/ext/impl/psb_c_mv_hll_to_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_mv_hll_to_coo + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%cp_to_coo(b,info) + + if (info /= psb_success_) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_c_mv_hll_to_coo diff --git a/ext/impl/psb_c_mv_hll_to_fmt.f90 b/ext/impl/psb_c_mv_hll_to_fmt.f90 new file mode 100644 index 00000000..37d77e85 --- /dev/null +++ b/ext/impl/psb_c_mv_hll_to_fmt.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_mv_hll_to_fmt + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_c_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%hkoffs, b%hkoffs) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + b%hksz = a%hksz + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_mv_hll_to_fmt diff --git a/ext/impl/psb_d_cp_dia_from_coo.f90 b/ext/impl/psb_d_cp_dia_from_coo.f90 new file mode 100644 index 00000000..b640565f --- /dev/null +++ b/ext/impl/psb_d_cp_dia_from_coo.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cp_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_cp_dia_from_coo + implicit none + + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_convert_dia_from_coo(a,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_dia_from_coo(a,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cp_dia_from_coo diff --git a/ext/impl/psb_d_cp_dia_to_coo.f90 b/ext/impl/psb_d_cp_dia_to_coo.f90 new file mode 100644 index 00000000..527c96d0 --- /dev/null +++ b/ext/impl/psb_d_cp_dia_to_coo.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_cp_dia_to_coo + implicit none + + class(psb_d_dia_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: i, j, k,nr,nza,nc, nzd + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + + call psi_d_xtr_coo_from_dia(nr,nc,& + & b%ia, b%ja, b%val, nzd, & + & size(a%data,1),size(a%data,2),& + & a%data,a%offset,info) + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +end subroutine psb_d_cp_dia_to_coo diff --git a/ext/impl/psb_d_cp_ell_from_coo.f90 b/ext/impl/psb_d_cp_ell_from_coo.f90 new file mode 100644 index 00000000..cf23a0e0 --- /dev/null +++ b/ext/impl/psb_d_cp_ell_from_coo.f90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_cp_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc + integer(psb_ipk_) :: nzm, ir, ic, k + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + ! This is to have fix_coo called behind the scenes + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_d_convert_ell_from_coo(a,b,info) + else + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call psi_d_convert_ell_from_coo(a,tmp,info) + if (info == psb_success_) call tmp%free() + end if + if (info /= psb_success_) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + +end subroutine psb_d_cp_ell_from_coo diff --git a/ext/impl/psb_d_cp_ell_from_fmt.f90 b/ext/impl/psb_d_cp_ell_from_fmt.f90 new file mode 100644 index 00000000..ce8a8d7e --- /dev/null +++ b/ext/impl/psb_d_cp_ell_from_fmt.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_cp_ell_from_fmt + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + + type is (psb_d_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_d_cp_ell_from_fmt diff --git a/ext/impl/psb_d_cp_ell_to_coo.f90 b/ext/impl/psb_d_cp_ell_to_coo.f90 new file mode 100644 index 00000000..8e7ad735 --- /dev/null +++ b/ext/impl/psb_d_cp_ell_to_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_cp_ell_to_coo + implicit none + + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: i, j, k, nr, nc, nza + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + b%val(k) = a%val(i,j) + end do + end do + call b%set_nzeros(a%get_nzeros()) + call b%fix(info) + call b%set_host() + +end subroutine psb_d_cp_ell_to_coo diff --git a/ext/impl/psb_d_cp_ell_to_fmt.f90 b/ext/impl/psb_d_cp_ell_to_fmt.f90 new file mode 100644 index 00000000..fd05d0fd --- /dev/null +++ b/ext/impl/psb_d_cp_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_cp_ell_to_fmt + implicit none + + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_d_ell_sparse_mat) + if (a%is_dev()) call a%sync() + + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_cp_ell_to_fmt diff --git a/ext/impl/psb_d_cp_hdia_from_coo.f90 b/ext/impl/psb_d_cp_hdia_from_coo.f90 new file mode 100644 index 00000000..bbc34195 --- /dev/null +++ b/ext/impl/psb_d_cp_hdia_from_coo.f90 @@ -0,0 +1,222 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_cp_hdia_from_coo + implicit none + + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call inner_cp_hdia_from_coo(a,b,info) + if (info /= psb_success_) goto 9999 + else + call b%cp_to_coo(tmp,info) + if (info /= psb_success_) goto 9999 + if (.not.tmp%is_by_rows()) call tmp%fix(info) + if (info /= psb_success_) goto 9999 + call inner_cp_hdia_from_coo(a,tmp,info) + if (info /= psb_success_) goto 9999 + call tmp%free() + end if + call a%set_host() + + return + +9999 continue + + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine inner_cp_hdia_from_coo(a,tmp,info) + use psb_base_mod + use psi_ext_util_mod + + implicit none + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,mi,mj,dm,bi,w + integer(psb_ipk_),allocatable :: d(:), offset(:), irsz(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext, nzout + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + logical, parameter :: debug=.false. + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat + + hacksize = a%hacksize + a%nhacks = (nr+hacksize-1)/hacksize + nhacks = a%nhacks + + ndiag = nr+nc-1 + if (info == psb_success_) call psb_realloc(nr,irsz,info) + if (info == psb_success_) call psb_realloc(ndiag,d,info) + if (info == psb_success_) call psb_realloc(ndiag,offset,info) + if (info == psb_success_) call psb_realloc(nhacks+1,a%hackoffsets,info) + if (info /= psb_success_) return + + irsz = 0 + do k=1,nza + ir = tmp%ia(k) + irsz(ir) = irsz(ir)+1 + end do + + a%nzeros = 0 + d = 0 + iszd = 0 + a%hackOffsets(1)=0 + klast1 = 1 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + if (debug) then + write(*,*) 'Loop iteration ',k,nhacks,i,ib,nr + write(*,*) 'RW:',tmp%ia(kfirst),tmp%ia(klast1-1) + write(*,*) 'CL:',tmp%ja(kfirst),tmp%ja(klast1-1) + end if + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, offset, info, initd=.false., cleard=.true.) + iszd = iszd + nd + a%hackOffsets(k+1)=iszd + if (debug) write(*,*) 'From chunk ',k,i,ib,sum(irsz(i:i+ib-1)),': ',nd, iszd + if (debug) write(*,*) 'offset ', offset(1:nd) + end do + if (debug) then + write(*,*) 'Hackcount ',nhacks,' Allocation height ',iszd + write(*,*) 'Hackoffsets ',a%hackOffsets(:) + end if + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%diaOffsets,info) + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%val,info) + if (info /= psb_success_) return + klast1 = 1 + ! + ! Second run: copy elements + ! + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, a%diaOffsets(hackfirst+1:hacknext), info, & + & initd=.false., cleard=.false.) + if (debug) write(*,*) 'Out from dia_offset: ', a%diaOffsets(hackfirst+1:hacknext) + call psi_d_xtr_dia_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & tmp%val(kfirst:klast1-1), & + & d,hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),info,& + & initdata=.true.,rdisp=(i-1)) + + call countnz(nr,nc,(i-1),hacksize,(hacknext-hackfirst),& + & a%diaOffsets(hackfirst+1:hacknext),nzout) + a%nzeros = a%nzeros + nzout + call cleand(nr,(hacknext-hackfirst),d,a%diaOffsets(hackfirst+1:hacknext)) + + end do + if (debug) then + write(*,*) 'NZEROS: ',a%nzeros, nza + write(*,*) 'diaoffsets: ',a%diaOffsets(1:iszd) + write(*,*) 'values: ' + j=0 + do k=1,nhacks + write(*,*) 'Hack No. ',k + do i=1,hacksize*(iszd/nhacks) + j = j + 1 + write(*,*) j, a%val(j) + end do + end do + end if + end subroutine inner_cp_hdia_from_coo + + subroutine countnz(nr,nc,rdisp,nrd,ncd,offsets,nz) + implicit none + integer(psb_ipk_), intent(in) :: nr,nc,nrd,ncd,rdisp,offsets(:) + integer(psb_ipk_), intent(out) :: nz + ! + integer(psb_ipk_) :: i,j,k, ir, jc, m4, ir1, ir2, nrcmdisp, rdisp1 + nz = 0 + nrcmdisp = min(nr-rdisp,nc-rdisp) + rdisp1 = 1-rdisp + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + nz = nz + (ir2-ir1+1) + end do + end subroutine countnz + + subroutine cleand(nr,nd,d,offset) + implicit none + integer(psb_ipk_), intent(in) :: nr,nd,offset(:) + integer(psb_ipk_), intent(inout) :: d(:) + integer(psb_ipk_) :: i,id + + do i=1,nd + id = offset(i) + nr + d(id) = 0 + end do + end subroutine cleand + +end subroutine psb_d_cp_hdia_from_coo diff --git a/ext/impl/psb_d_cp_hdia_to_coo.f90 b/ext/impl/psb_d_cp_hdia_to_coo.f90 new file mode 100644 index 00000000..bfa77b08 --- /dev/null +++ b/ext/impl/psb_d_cp_hdia_to_coo.f90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_cp_hdia_to_coo + use psi_ext_util_mod + implicit none + + class(psb_d_hdia_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + call b%set_nzeros(nza) + call b%set_sort_status(psb_unsorted_) + nhacks = a%nhacks + hacksize = a%hacksize + j = 0 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_d_xtr_coo_from_dia(nr,nc,& + & b%ia(j+1:), b%ja(j+1:), b%val(j+1:), nzd, & + & hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) +!!$ write(*,*) 'diaoffsets',ib,' : ',ib - abs(a%diaOffsets(hackfirst+1:hacknext)) +!!$ write(*,*) 'sum',ib,j,' : ',sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + j = j + nzd + end do + if (nza /= j) then + write(*,*) 'Wrong counts in hdia_to_coo',j,nza + info = -8 + return + end if + call b%set_host() + call b%fix(info) + +end subroutine psb_d_cp_hdia_to_coo diff --git a/ext/impl/psb_d_cp_hll_from_coo.f90 b/ext/impl/psb_d_cp_hll_from_coo.f90 new file mode 100644 index 00000000..03028d20 --- /dev/null +++ b/ext/impl/psb_d_cp_hll_from_coo.f90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_cp_hll_from_coo + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + character(len=20) :: name='hll_from_coo' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + if (b%is_dev()) call b%sync() + hksz = psi_get_hksz() + if (b%is_by_rows()) then + call psi_convert_hll_from_coo(a,hksz,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_hll_from_coo(a,hksz,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cp_hll_from_coo diff --git a/ext/impl/psb_d_cp_hll_from_fmt.f90 b/ext/impl/psb_d_cp_hll_from_fmt.f90 new file mode 100644 index 00000000..785b23ac --- /dev/null +++ b/ext/impl/psb_d_cp_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_cp_hll_from_fmt + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + class is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_d_hll_sparse_mat) + ! write(0,*) 'From type_hll' + if (b%is_dev()) call b%sync() + + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%hkoffs, a%hkoffs, info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) a%hksz = b%hksz + if (info == 0) a%nzt = b%nzt + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_d_cp_hll_from_fmt diff --git a/ext/impl/psb_d_cp_hll_to_coo.f90 b/ext/impl/psb_d_cp_hll_to_coo.f90 new file mode 100644 index 00000000..b20144c5 --- /dev/null +++ b/ext/impl/psb_d_cp_hll_to_coo.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_cp_hll_to_coo + implicit none + + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j, jj,k,ir, isz,err_act, hksz, hk, mxrwl,& + & irs, nzblk, kc + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + + j = 1 + kc = 1 + k = 1 + hksz = a%hksz + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + nzblk = sum(a%irn(i:i+ir-1)) + call inner_copy(i,ir,b%ia(kc:kc+nzblk-1),& + & b%ja(kc:kc+nzblk-1),b%val(kc:kc+nzblk-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & hksz) + k = k + isz + kc = kc + nzblk + + enddo + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +contains + + subroutine inner_copy(i,ir,iac,& + & jac,valc,ja,val,irn,ld) + integer(psb_ipk_) :: i,ir,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*) + real(psb_dpk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc + kc = 1 + do ii = 1, ir + nc = irn(ii) + do jj=1,nc + iac(kc) = i+ii-1 + jac(kc) = ja(ii,jj) + valc(kc) = val(ii,jj) + kc = kc + 1 + end do + end do + + end subroutine inner_copy + +end subroutine psb_d_cp_hll_to_coo diff --git a/ext/impl/psb_d_cp_hll_to_fmt.f90 b/ext/impl/psb_d_cp_hll_to_fmt.f90 new file mode 100644 index 00000000..6c60c5b5 --- /dev/null +++ b/ext/impl/psb_d_cp_hll_to_fmt.f90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_cp_hll_to_fmt + implicit none + + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_d_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%hkoffs, b%hkoffs , info) + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + if (info == 0) b%hksz = a%hksz + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_cp_hll_to_fmt diff --git a/ext/impl/psb_d_dia_aclsum.f90 b/ext/impl/psb_d_dia_aclsum.f90 new file mode 100644 index 00000000..0f4df6ca --- /dev/null +++ b/ext/impl/psb_d_dia_aclsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_aclsum(d,a) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_aclsum + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_aclsum diff --git a/ext/impl/psb_d_dia_allocate_mnnz.f90 b/ext/impl/psb_d_dia_allocate_mnnz.f90 new file mode 100644 index 00000000..309b7d4a --- /dev/null +++ b/ext/impl/psb_d_dia_allocate_mnnz.f90 @@ -0,0 +1,88 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -ione )/m + else + nz_ = ((max(7*m,7*n,ione)+m-ione)/m) + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,nz_,a%data,info) + if (info == psb_success_) call psb_realloc(m+n,a%offset,info) + if (info == psb_success_) then + a%data = 0 + a%offset = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_allocate_mnnz diff --git a/ext/impl/psb_d_dia_arwsum.f90 b/ext/impl/psb_d_dia_arwsum.f90 new file mode 100644 index 00000000..98eefc44 --- /dev/null +++ b/ext/impl/psb_d_dia_arwsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_arwsum(d,a) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_arwsum + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_arwsum diff --git a/ext/impl/psb_d_dia_colsum.f90 b/ext/impl/psb_d_dia_colsum.f90 new file mode 100644 index 00000000..6a6eb81c --- /dev/null +++ b/ext/impl/psb_d_dia_colsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_colsum(d,a) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_colsum + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_colsum diff --git a/ext/impl/psb_d_dia_csgetptn.f90 b/ext/impl/psb_d_dia_csgetptn.f90 new file mode 100644 index 00000000..ad0e040a --- /dev/null +++ b/ext/impl/psb_d_dia_csgetptn.f90 @@ -0,0 +1,188 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_csgetptn + implicit none + + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + + + end subroutine dia_getptn + +end subroutine psb_d_dia_csgetptn diff --git a/ext/impl/psb_d_dia_csgetrow.f90 b/ext/impl/psb_d_dia_csgetrow.f90 new file mode 100644 index 00000000..7e05a26e --- /dev/null +++ b/ext/impl/psb_d_dia_csgetrow.f90 @@ -0,0 +1,199 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_csgetrow + implicit none + + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + if (chksz) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,val,info) + end if + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + val(nzin_) = a%data(i,j) + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + end subroutine dia_getrow +end subroutine psb_d_dia_csgetrow diff --git a/ext/impl/psb_d_dia_csmm.f90 b/ext/impl/psb_d_dia_csmm.f90 new file mode 100644 index 00000000..81ad967d --- /dev/null +++ b/ext/impl/psb_d_dia_csmm.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_csmm + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_dia_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i,1:nxy) = y(i,1:nxy) + alpha*data(i,j)*x(i+off(j),1:nxy) + enddo + enddo + + end subroutine psb_d_dia_csmm_inner + +end subroutine psb_d_dia_csmm diff --git a/ext/impl/psb_d_dia_csmv.f90 b/ext/impl/psb_d_dia_csmv.f90 new file mode 100644 index 00000000..166b4c58 --- /dev/null +++ b/ext/impl/psb_d_dia_csmv.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_csmv + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_dia_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i) = y(i) + alpha*data(i,j)*x(i+off(j)) + enddo + enddo + + end subroutine psb_d_dia_csmv_inner + +end subroutine psb_d_dia_csmv diff --git a/ext/impl/psb_d_dia_get_diag.f90 b/ext/impl/psb_d_dia_get_diag.f90 new file mode 100644 index 00000000..bbcb4a12 --- /dev/null +++ b/ext/impl/psb_d_dia_get_diag.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_get_diag(a,d,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_get_diag + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = done + else + do i=1, size(a%offset) + if (a%offset(i) == 0) then + d(1:mnm) = a%data(1:mnm,i) + exit + end if + end do + end if + do i=mnm+1,size(d) + d(i) = dzero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_get_diag diff --git a/ext/impl/psb_d_dia_maxval.f90 b/ext/impl/psb_d_dia_maxval.f90 new file mode 100644 index 00000000..f57be1ff --- /dev/null +++ b/ext/impl/psb_d_dia_maxval.f90 @@ -0,0 +1,54 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +function psb_d_dia_maxval(a) result(res) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_maxval + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_maxval' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = done + else + res = dzero + end if + + res = max(res,maxval(abs(a%data))) + +end function psb_d_dia_maxval diff --git a/ext/impl/psb_d_dia_mold.f90 b/ext/impl/psb_d_dia_mold.f90 new file mode 100644 index 00000000..2b3cef81 --- /dev/null +++ b/ext/impl/psb_d_dia_mold.f90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_mold(a,b,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_mold + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_dia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_mold diff --git a/ext/impl/psb_d_dia_print.f90 b/ext/impl/psb_d_dia_print.f90 new file mode 100644 index 00000000..e32dc2ed --- /dev/null +++ b/ext/impl/psb_d_dia_print.f90 @@ -0,0 +1,148 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_dia_print' + logical, parameter :: debug=.false. + + class(psb_d_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, jc, ir1, ir2 + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_d_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + write(iout,*) nr, nc, nz + + nc=size(a%data,2) + + + + if(present(iv)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) iv(i),iv(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),(i+jc),a%data(i,j) + enddo + enddo + + endif + +end subroutine psb_d_dia_print diff --git a/ext/impl/psb_d_dia_reallocate_nz.f90 b/ext/impl/psb_d_dia_reallocate_nz.f90 new file mode 100644 index 00000000..83864dd8 --- /dev/null +++ b/ext/impl/psb_d_dia_reallocate_nz.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_reallocate_nz(nz,a) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_dia_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! Ans: NOTHING. + ! + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_reallocate_nz diff --git a/ext/impl/psb_d_dia_reinit.f90 b/ext/impl/psb_d_dia_reinit.f90 new file mode 100644 index 00000000..f1e91ade --- /dev/null +++ b/ext/impl/psb_d_dia_reinit.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_reinit(a,clear) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_reinit + implicit none + + class(psb_d_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%data(:,:) = dzero + call a%set_upd() + call a%set_host() + + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_reinit diff --git a/ext/impl/psb_d_dia_rowsum.f90 b/ext/impl/psb_d_dia_rowsum.f90 new file mode 100644 index 00000000..7a5875ba --- /dev/null +++ b/ext/impl/psb_d_dia_rowsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_rowsum(d,a) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_rowsum + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_rowsum diff --git a/ext/impl/psb_d_dia_scal.f90 b/ext/impl/psb_d_dia_scal.f90 new file mode 100644 index 00000000..d87c0d25 --- /dev/null +++ b/ext/impl/psb_d_dia_scal.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_scal(d,a,info,side) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_scal + implicit none + class(psb_d_dia_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), nc, jc, nr, ir1, ir2 + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%data(i,:) = a%data(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + nr=size(a%data,1) + nc=size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + a%data(i,j) = a%data(i,j) * d(i+jc) + enddo + enddo + + end if + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_scal diff --git a/ext/impl/psb_d_dia_scals.f90 b/ext/impl/psb_d_dia_scals.f90 new file mode 100644 index 00000000..a3958f57 --- /dev/null +++ b/ext/impl/psb_d_dia_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_scals(d,a,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_scals + implicit none + class(psb_d_dia_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%data(:,:) = a%data(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_scals diff --git a/ext/impl/psb_d_dns_mat_impl.f90 b/ext/impl/psb_d_dns_mat_impl.f90 new file mode 100644 index 00000000..edf5cde4 --- /dev/null +++ b/ext/impl/psb_d_dns_mat_impl.f90 @@ -0,0 +1,724 @@ + +!> Function csmv: +!! \memberof psb_d_dns_sparse_mat +!! \brief Product by a dense rank 1 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_d_dns_csmv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_csmv + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act, m, n, lda + character(len=20) :: name='d_dns_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (trans_ == 'N') then + m=a%get_nrows() + n=a%get_ncols() + else + n=a%get_nrows() + m=a%get_ncols() + end if + lda = size(a%val,1) + + + call dgemv(trans_,a%get_nrows(),a%get_ncols(),alpha,& + & a%val,size(a%val,1),x,1,beta,y,1) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_csmv + + +!> Function csmm: +!! \memberof psb_d_dns_sparse_mat +!! \brief Product by a dense rank 2 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:,:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:,:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_d_dns_csmm(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_csmm + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act,m,n,k, lda, ldx, ldy + character(len=20) :: name='d_dns_csmm' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (psb_toupper(trans_)=='N') then + m = a%get_nrows() + k = a%get_ncols() + n = min(size(y,2),size(x,2)) + else + k = a%get_nrows() + m = a%get_ncols() + n = min(size(y,2),size(x,2)) + end if + lda = size(a%val,1) + ldx = size(x,1) + ldy = size(y,1) + call dgemm(trans_,'N',m,n,k,alpha,a%val,lda,x,ldx,beta,y,ldy) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_csmm + + + +! +! +!> Function csnmi: +!! \memberof psb_d_dns_sparse_mat +!! \brief Operator infinity norm +!! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) +!! +! +function psb_d_dns_csnmi(a) result(res) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_csnmi + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + ! + integer(psb_ipk_) :: i + real(psb_dpk_) :: acc + + res = dzero + if (a%is_dev()) call a%sync() + + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_d_dns_csnmi + + +! +!> Function get_diag: +!! \memberof psb_d_dns_sparse_mat +!! \brief Extract the diagonal of A. +!! +!! D(i) = A(i:i), i=1:min(nrows,ncols) +!! +!! \param d(:) The output diagonal +!! \param info return code. +! +subroutine psb_d_dns_get_diag(a,d,info) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_get_diag + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act, mnm, i + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + do i=1, mnm + d(i) = a%val(i,i) + end do + do i=mnm+1,size(d) + d(i) = dzero + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_get_diag + + +! +! +!> Function reallocate_nz +!! \memberof psb_d_dns_sparse_mat +!! \brief One--parameters version of (re)allocate +!! +!! \param nz number of nonzeros to allocate for +!! i.e. makes sure that the internal storage +!! allows for NZ coefficients and their indices. +! +subroutine psb_d_dns_reallocate_nz(nz,a) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_dns_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! This is a no-op, allocation is fixed. + ! + if (a%is_dev()) call a%sync() + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_reallocate_nz + +! +!> Function mold: +!! \memberof psb_d_dns_sparse_mat +!! \brief Allocate a class(psb_d_dns_sparse_mat) with the +!! same dynamic type as the input. +!! This is equivalent to allocate( mold= ) and is provided +!! for those compilers not yet supporting mold. +!! \param b The output variable +!! \param info return code +! +subroutine psb_d_dns_mold(a,b,info) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_mold + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='dns_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + allocate(psb_d_dns_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_mold + +! +! +!> Function allocate_mnnz +!! \memberof psb_d_dns_sparse_mat +!! \brief Three-parameters version of allocate +!! +!! \param m number of rows +!! \param n number of cols +!! \param nz [estimated internally] number of nonzeros to allocate for +! +subroutine psb_d_dns_allocate_mnnz(m,n,a,nz) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + ! + integer(psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_/)) + goto 9999 + endif + + + ! Basic stuff common to all formats + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_bld() + call a%set_host() + + ! We ignore NZ in this case. + + call psb_realloc(m,n,a%val,info) + if (info == psb_success_) then + a%val = dzero + a%nnz = 0 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_allocate_mnnz + + +! +! +! +!> Function csgetrow: +!! \memberof psb_d_dns_sparse_mat +!! \brief Get a (subset of) row(s) +!! +!! getrow is the basic method by which the other (getblk, clip) can +!! be implemented. +!! +!! Returns the set +!! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) +!! each identifying the position of a nonzero in A +!! i.e. +!! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) +!! with IMIN<=IA(:)<=IMAX +!! with JMIN<=JA(:)<=JMAX +!! IA,JA are reallocated as necessary. +!! +!! \param imin the minimum row index we are interested in +!! \param imax the minimum row index we are interested in +!! \param nz the number of output coefficients +!! \param ia(:) the output row indices +!! \param ja(:) the output col indices +!! \param val(:) the output coefficients +!! \param info return code +!! \param jmin [1] minimum col index +!! \param jmax [a\%get_ncols()] maximum col index +!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) +!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] +!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] +!! ( iren cannot be specified with rscale/cscale) +!! \param append [false] append to ia,ja +!! \param nzin [none] if append, then first new entry should go in entry nzin+1 +!! +! +subroutine psb_d_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_csgetrow + implicit none + + class(psb_d_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + ! + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i,j,k + character(len=20) :: name='csget' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax Function trim +!! \memberof psb_d_dns_sparse_mat +!! \brief Memory trim +!! Make sure the memory allocation of the sparse matrix is as tight as +!! possible given the actual number of nonzeros it contains. +! +subroutine psb_d_dns_trim(a) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_trim + implicit none + class(psb_d_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + ! Do nothing, we are already at minimum memory. + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_trim + +! +!> Function cp_from_coo: +!! \memberof psb_d_dns_sparse_mat +!! \brief Copy and convert from psb_d_coo_sparse_mat +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! + +subroutine psb_d_cp_dns_from_coo(a,b,info) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_cp_dns_from_coo + implicit none + + class(psb_d_dns_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + ! + type(psb_d_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: nza, nr, i,err_act, nc + integer(psb_ipk_), parameter :: maxtry=8 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (.not.b%is_by_rows()) then + ! This is to have fix_coo called behind the scenes + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + if (info /= psb_success_) return + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = dzero + do i=1, nza + a%val(tmp%ia(i),tmp%ja(i)) = tmp%val(i) + end do + a%nnz = nza + call tmp%free() + else + if (b%is_dev()) call b%sync() + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = dzero + do i=1, nza + a%val(b%ia(i),b%ja(i)) = b%val(i) + end do + a%nnz = nza + end if + call a%set_host() + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_cp_dns_from_coo + + + +! +!> Function cp_to_coo: +!! \memberof psb_d_dns_sparse_mat +!! \brief Copy and convert to psb_d_coo_sparse_mat +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! + +subroutine psb_d_cp_dns_to_coo(a,b,info) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_cp_dns_to_coo + implicit none + + class(psb_d_dns_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_Ipk_) :: nza, nr, nc,i,j,k,err_act + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + + k = 0 + do i=1,a%get_nrows() + do j=1,a%get_ncols() + if (a%val(i,j) /= dzero) then + k = k + 1 + b%ia(k) = i + b%ja(k) = j + b%val(k) = a%val(i,j) + end if + end do + end do + + call b%set_nzeros(nza) + call b%set_sort_status(psb_row_major_) + call b%set_asb() + call b%set_host() + +end subroutine psb_d_cp_dns_to_coo + + + +! +!> Function mv_to_coo: +!! \memberof psb_d_dns_sparse_mat +!! \brief Convert to psb_d_coo_sparse_mat, freeing the source. +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! +subroutine psb_d_mv_dns_to_coo(a,b,info) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_mv_dns_to_coo + implicit none + + class(psb_d_dns_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_to_coo(b,info) + call a%free() + return + +end subroutine psb_d_mv_dns_to_coo + + +! +!> Function mv_from_coo: +!! \memberof psb_d_dns_sparse_mat +!! \brief Convert from psb_d_coo_sparse_mat, freeing the source. +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! +! +subroutine psb_d_mv_dns_from_coo(a,b,info) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_mv_dns_from_coo + implicit none + + class(psb_d_dns_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_d_mv_dns_from_coo + diff --git a/ext/impl/psb_d_ell_aclsum.f90 b/ext/impl/psb_d_ell_aclsum.f90 new file mode 100644 index 00000000..e0bfc18d --- /dev/null +++ b/ext/impl/psb_d_ell_aclsum.f90 @@ -0,0 +1,82 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_aclsum(d,a) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_aclsum + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + abs(a%val(i,j)) + end do + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_aclsum diff --git a/ext/impl/psb_d_ell_allocate_mnnz.f90 b/ext/impl/psb_d_ell_allocate_mnnz.f90 new file mode 100644 index 00000000..95e4558c --- /dev/null +++ b/ext/impl/psb_d_ell_allocate_mnnz.f90 @@ -0,0 +1,91 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = -1 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_allocate_mnnz diff --git a/ext/impl/psb_d_ell_arwsum.f90 b/ext/impl/psb_d_ell_arwsum.f90 new file mode 100644 index 00000000..6bf3b888 --- /dev/null +++ b/ext/impl/psb_d_ell_arwsum.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_arwsum(d,a) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_arwsum + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + is_unit = a%is_unit() + + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = done + else + d(i) = dzero + end if + do j=1,a%irn(i) + d(i) = d(i) + abs(a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_arwsum diff --git a/ext/impl/psb_d_ell_colsum.f90 b/ext/impl/psb_d_ell_colsum.f90 new file mode 100644 index 00000000..9eb30ca0 --- /dev/null +++ b/ext/impl/psb_d_ell_colsum.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_colsum(d,a) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_colsum + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_colsum diff --git a/ext/impl/psb_d_ell_csgetblk.f90 b/ext/impl/psb_d_ell_csgetblk.f90 new file mode 100644 index 00000000..9725518f --- /dev/null +++ b/ext/impl/psb_d_ell_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_csgetblk + implicit none + + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='ell_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_csgetblk diff --git a/ext/impl/psb_d_ell_csgetptn.f90 b/ext/impl/psb_d_ell_csgetptn.f90 new file mode 100644 index 00000000..a050fe54 --- /dev/null +++ b/ext/impl/psb_d_ell_csgetptn.f90 @@ -0,0 +1,189 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_csgetptn + implicit none + + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_d_ell_csgetrow + implicit none + + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_d_ell_csmm + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_dpk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_ell_csmv + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_ell_csnm1 + + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info + real(psb_dpk_), allocatable :: vt(:) + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csnm1' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = dzero + nnz = a%get_nzeros() + m = a%get_nrows() + n = a%get_ncols() + allocate(vt(n),stat=info) + if (info /= 0) return + if (a%is_unit()) then + vt(:) = done + else + vt(:) = dzero + end if + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + vt(k) = vt(k) + abs(a%val(i,j)) + end do + end do + res = maxval(vt(1:n)) + deallocate(vt,stat=info) + + return + +end function psb_d_ell_csnm1 diff --git a/ext/impl/psb_d_ell_csnmi.f90 b/ext/impl/psb_d_ell_csnmi.f90 new file mode 100644 index 00000000..b4e3d03e --- /dev/null +++ b/ext/impl/psb_d_ell_csnmi.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_d_ell_csnmi(a) result(res) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_csnmi + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csnmi' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = dzero + is_unit = a%is_unit() + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + if (is_unit) acc = acc + done + res = max(res,acc) + end do + +end function psb_d_ell_csnmi diff --git a/ext/impl/psb_d_ell_csput.f90 b/ext/impl/psb_d_ell_csput.f90 new file mode 100644 index 00000000..d38d9d51 --- /dev/null +++ b/ext/impl/psb_d_ell_csput.f90 @@ -0,0 +1,208 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_csput_a + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + call psb_d_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info < 0) then + info = psb_err_internal_error_ + else if (info > 0) then + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarded entries not belonging to us.' + info = psb_success_ + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + +contains + + subroutine psb_d_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ilr, ilc, ip, & + & i1,i2,nr,nc,nnz,dupl + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='d_ell_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + ilr = -1 + ilc = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + + end do + + case(psb_dupl_add_) + ! Add + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = a%val(ir,ip) + val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_d_ell_srch_upd +end subroutine psb_d_ell_csput_a diff --git a/ext/impl/psb_d_ell_cssm.f90 b/ext/impl/psb_d_ell_cssm.f90 new file mode 100644 index 00000000..3c8b5f21 --- /dev/null +++ b/ext/impl/psb_d_ell_cssm.f90 @@ -0,0 +1,375 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_cssm + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_dpk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_d_ell_cssv + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + real(psb_dpk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_d_ell_get_diag + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = done + else + do i=1, mnm + if (1<=a%idiag(i).and.(a%idiag(i)<=size(a%ja,2))) then + d(i) = a%val(i,a%idiag(i)) + else + d(i) = dzero + end if + end do + end if + do i=mnm+1,size(d) + d(i) = dzero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_get_diag diff --git a/ext/impl/psb_d_ell_maxval.f90 b/ext/impl/psb_d_ell_maxval.f90 new file mode 100644 index 00000000..d0cb24d3 --- /dev/null +++ b/ext/impl/psb_d_ell_maxval.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_d_ell_maxval(a) result(res) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_maxval + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csnmi' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = done + else + res = dzero + end if + + do i = 1, a%get_nrows() + acc = maxval(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_d_ell_maxval diff --git a/ext/impl/psb_d_ell_mold.f90 b/ext/impl/psb_d_ell_mold.f90 new file mode 100644 index 00000000..48814f3c --- /dev/null +++ b/ext/impl/psb_d_ell_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_mold(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_mold + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='ell_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_mold diff --git a/ext/impl/psb_d_ell_print.f90 b/ext/impl/psb_d_ell_print.f90 new file mode 100644 index 00000000..cf539662 --- /dev/null +++ b/ext/impl/psb_d_ell_print.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_ell_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% ELL' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_d_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(i,j)),a%val(i,j) + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(i,j)),a%val(i,j) + end do + enddo + endif + endif + +end subroutine psb_d_ell_print diff --git a/ext/impl/psb_d_ell_reallocate_nz.f90 b/ext/impl/psb_d_ell_reallocate_nz.f90 new file mode 100644 index 00000000..8f92ffad --- /dev/null +++ b/ext/impl/psb_d_ell_reallocate_nz.f90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_reallocate_nz(nz,a) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_reallocate_nz diff --git a/ext/impl/psb_d_ell_reinit.f90 b/ext/impl/psb_d_ell_reinit.f90 new file mode 100644 index 00000000..ab9a7ba2 --- /dev/null +++ b/ext/impl/psb_d_ell_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_reinit(a,clear) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_reinit + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:,:) = dzero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_reinit diff --git a/ext/impl/psb_d_ell_rowsum.f90 b/ext/impl/psb_d_ell_rowsum.f90 new file mode 100644 index 00000000..782775d4 --- /dev/null +++ b/ext/impl/psb_d_ell_rowsum.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_rowsum(d,a) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_rowsum + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical :: is_unit + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + is_unit = a%is_unit() + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = done + else + d(i) = dzero + end if + do j=1,a%irn(i) + d(i) = d(i) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_rowsum diff --git a/ext/impl/psb_d_ell_scal.f90 b/ext/impl/psb_d_ell_scal.f90 new file mode 100644 index 00000000..15be8a66 --- /dev/null +++ b/ext/impl/psb_d_ell_scal.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_scal(d,a,info,side) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_scal + implicit none + class(psb_d_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5) + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%val(i,:) = a%val(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + do i=1, m + do j=1, a%irn(i) + a%val(i,j) = a%val(i,j) * d(a%ja(i,j)) + end do + enddo + + end if + + call a%set_host() + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_scal diff --git a/ext/impl/psb_d_ell_scals.f90 b/ext/impl/psb_d_ell_scals.f90 new file mode 100644 index 00000000..501f42b0 --- /dev/null +++ b/ext/impl/psb_d_ell_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_scals(d,a,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_scals + implicit none + class(psb_d_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_scals diff --git a/ext/impl/psb_d_ell_trim.f90 b/ext/impl/psb_d_ell_trim.f90 new file mode 100644 index 00000000..8b1d52f7 --- /dev/null +++ b/ext/impl/psb_d_ell_trim.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_trim(a) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_trim + implicit none + class(psb_d_ell_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_trim diff --git a/ext/impl/psb_d_hdia_allocate_mnnz.f90 b/ext/impl/psb_d_hdia_allocate_mnnz.f90 new file mode 100644 index 00000000..e5721754 --- /dev/null +++ b/ext/impl/psb_d_hdia_allocate_mnnz.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_hdia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_hdia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hdia_allocate_mnnz diff --git a/ext/impl/psb_d_hdia_csmv.f90 b/ext/impl/psb_d_hdia_csmv.f90 new file mode 100644 index 00000000..82599342 --- /dev/null +++ b/ext/impl/psb_d_hdia_csmv.f90 @@ -0,0 +1,162 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_hdia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_hdia_csmv + implicit none + class(psb_d_hdia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc,nr,nc + integer(psb_ipk_) :: irs,ics, nmx, ni + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, ncd,ib, nzhack, & + & hackfirst, hacknext + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hdia_csmv' + logical, parameter :: debug=.false. + real :: start, finish + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + info = psb_err_transpose_not_n_unsupported_ + call psb_errpush(info,name) + goto 9999 + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1)=0) then + ir1 = 1 + ! min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + jc = ir1 + rdisp + offsets(j) + do i=ir1,ir2 + y(rdisp+i) = y(rdisp+i) + alpha*data(i,j)*x(jc) + jc = jc + 1 + enddo + end do + end subroutine psi_d_inner_dia_csmv + +end subroutine psb_d_hdia_csmv diff --git a/ext/impl/psb_d_hdia_mold.f90 b/ext/impl/psb_d_hdia_mold.f90 new file mode 100644 index 00000000..cebedd44 --- /dev/null +++ b/ext/impl/psb_d_hdia_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hdia_mold(a,b,info) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_hdia_mold + implicit none + class(psb_d_hdia_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hdia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_hdia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hdia_mold diff --git a/ext/impl/psb_d_hdia_print.f90 b/ext/impl/psb_d_hdia_print.f90 new file mode 100644 index 00000000..43753299 --- /dev/null +++ b/ext/impl/psb_d_hdia_print.f90 @@ -0,0 +1,121 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_hdia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_hdia_print + use psi_ext_util_mod + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_d_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdia_print' + logical, parameter :: debug=.false. + + class(psb_d_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, k, ncd,ib, nzhack, info,& + & hackfirst, hacknext + integer(psb_ipk_), allocatable :: ia(:), ja(:) + real(psb_dpk_), allocatable :: val(:) + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% HDIA' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_d_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + + nhacks = a%nhacks + hacksize = a%hacksize + maxnzhack = 0 + do k=1, nhacks + maxnzhack = max(maxnzhack,(a%hackoffsets(k+1)-a%hackoffsets(k))) + end do + maxnzhack = hacksize*maxnzhack + allocate(ia(maxnzhack),ja(maxnzhack),val(maxnzhack),stat=info) + if (info /= 0) return + + write(iout,*) nr, nc, nz + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + ncd = hacknext-hackfirst + + call psi_d_xtr_coo_from_dia(nr,nc,& + & ia, ja, val, nzhack,& + & hacksize,ncd,& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) + !nzhack = sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + + if(present(iv)) then + do j=1,nzhack + write(iout,frmt) iv(ia(j)),iv(ja(j)),val(j) + enddo + else + if (present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ja(j),val(j) + enddo + else if (present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ja(j),val(j) + enddo + endif + end if + + end do + +end subroutine psb_d_hdia_print diff --git a/ext/impl/psb_d_hll_aclsum.f90 b/ext/impl/psb_d_hll_aclsum.f90 new file mode 100644 index 00000000..1f868edc --- /dev/null +++ b/ext/impl/psb_d_hll_aclsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_aclsum(d,a) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_aclsum + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call d_hll_aclsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine d_hll_aclsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine d_hll_aclsum + +end subroutine psb_d_hll_aclsum diff --git a/ext/impl/psb_d_hll_allocate_mnnz.f90 b/ext/impl/psb_d_hll_allocate_mnnz.f90 new file mode 100644 index 00000000..f58d0e4a --- /dev/null +++ b/ext/impl/psb_d_hll_allocate_mnnz.f90 @@ -0,0 +1,93 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m+1,a%hkoffs,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_hksz(psb_hksz_def_) + call a%set_host() + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hll_allocate_mnnz diff --git a/ext/impl/psb_d_hll_arwsum.f90 b/ext/impl/psb_d_hll_arwsum.f90 new file mode 100644 index 00000000..e5ae24fb --- /dev/null +++ b/ext/impl/psb_d_hll_arwsum.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_arwsum(d,a) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_arwsum + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call d_hll_arwsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine d_hll_arwsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + abs(val(i,j)) + end do + end do + + end subroutine d_hll_arwsum + +end subroutine psb_d_hll_arwsum diff --git a/ext/impl/psb_d_hll_colsum.f90 b/ext/impl/psb_d_hll_colsum.f90 new file mode 100644 index 00000000..8c2020ec --- /dev/null +++ b/ext/impl/psb_d_hll_colsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_colsum(d,a) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_colsum + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call d_hll_colsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine d_hll_colsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine d_hll_colsum + +end subroutine psb_d_hll_colsum diff --git a/ext/impl/psb_d_hll_csgetblk.f90 b/ext/impl/psb_d_hll_csgetblk.f90 new file mode 100644 index 00000000..185baf29 --- /dev/null +++ b/ext/impl/psb_d_hll_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_csgetblk + implicit none + + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='hll_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hll_csgetblk diff --git a/ext/impl/psb_d_hll_csgetptn.f90 b/ext/impl/psb_d_hll_csgetptn.f90 new file mode 100644 index 00000000..a7cdc148 --- /dev/null +++ b/ext/impl/psb_d_hll_csgetptn.f90 @@ -0,0 +1,209 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_csgetptn + implicit none + + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_d_hll_csgetrow + implicit none + + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_d_hll_csmm + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy,ldx,ldy,hksz,mxrwl + real(psb_dpk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hll_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + nxy = min(size(x,2) , size(y,2) ) + + + ldx = size(x,1) + ldy = size(y,1) + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (ldx psb_d_hll_csmv + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hkpnt, mxrwl, mmhk + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hll_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (size(x,1) 0) then + select case(hksz) + case(4) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_d_hll_csmv_notra_4(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(8) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + &call psb_d_hll_csmv_notra_8(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(16) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_d_hll_csmv_notra_16(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(24) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_d_hll_csmv_notra_24(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(32) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_d_hll_csmv_notra_32(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case default + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_d_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + end select + end if + if (mmhk < m) then + i = mmhk+1 + ir = m-mmhk + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + call psb_d_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + if (info /= psb_success_) goto 9999 + end if + j = j + 1 + end if + + else + + j=1 + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,m,hksz + j = ((i-1)/hksz)+1 + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_d_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + end if + end if + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_d_hll_csmv_inner(ir,m,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,tra,ctra,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit,tra,ctra + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + if (tra) then + + if (beta == done) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*val(i,j)*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (ctra) then + + if (beta == done) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*(val(i,j))*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (.not.(tra.or.ctra)) then + + if (alpha == dzero) then + if (beta == dzero) then + do i=1,m + y(ir+i-1) = dzero + end do + else + do i=1,m + y(ir+i-1) = beta*y(ir+i-1) + end do + end if + + else + if (beta == dzero) then + do i=1,m + tmp = dzero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + end do + else + do i=1,m + tmp = dzero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + beta*y(ir+i-1) + end do + endif + end if + end if + + if (is_unit) then + do i=1, min(m,n) + y(i) = y(i) + alpha*x(i) + end do + end if + + end subroutine psb_d_hll_csmv_inner + + subroutine psb_d_hll_csmv_notra_8(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, dzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=8 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = dzero + if (alpha /= dzero) then + do j=1, maxval(irn(1:8)) + tmp(1:8) = tmp(1:8) + val(1:8,j)*x(ja(1:8,j)) + end do + end if + if (beta == dzero) then + y(ir:ir+8-1) = alpha*tmp(1:8) + else + y(ir:ir+8-1) = alpha*tmp(1:8) + beta*y(ir:ir+8-1) + end if + + + if (is_unit) then + do i=1, min(8,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_d_hll_csmv_notra_8 + + subroutine psb_d_hll_csmv_notra_24(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, dzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=24 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = dzero + if (alpha /= dzero) then + do j=1, maxval(irn(1:24)) + tmp(1:24) = tmp(1:24) + val(1:24,j)*x(ja(1:24,j)) + end do + end if + if (beta == dzero) then + y(ir:ir+24-1) = alpha*tmp(1:24) + else + y(ir:ir+24-1) = alpha*tmp(1:24) + beta*y(ir:ir+24-1) + end if + + + if (is_unit) then + do i=1, min(24,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_d_hll_csmv_notra_24 + + subroutine psb_d_hll_csmv_notra_16(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, dzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=16 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = dzero + if (alpha /= dzero) then + do j=1, maxval(irn(1:16)) + tmp(1:16) = tmp(1:16) + val(1:16,j)*x(ja(1:16,j)) + end do + end if + if (beta == dzero) then + y(ir:ir+16-1) = alpha*tmp(1:16) + else + y(ir:ir+16-1) = alpha*tmp(1:16) + beta*y(ir:ir+16-1) + end if + + + if (is_unit) then + do i=1, min(16,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_d_hll_csmv_notra_16 + + subroutine psb_d_hll_csmv_notra_32(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, dzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=32 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = dzero + if (alpha /= dzero) then + do j=1, maxval(irn(1:32)) + tmp(1:32) = tmp(1:32) + val(1:32,j)*x(ja(1:32,j)) + end do + end if + if (beta == dzero) then + y(ir:ir+32-1) = alpha*tmp(1:32) + else + y(ir:ir+32-1) = alpha*tmp(1:32) + beta*y(ir:ir+32-1) + end if + + + if (is_unit) then + do i=1, min(32,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_d_hll_csmv_notra_32 + + subroutine psb_d_hll_csmv_notra_4(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, dzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=4 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = dzero + if (alpha /= dzero) then + do j=1, maxval(irn(1:4)) + tmp(1:4) = tmp(1:4) + val(1:4,j)*x(ja(1:4,j)) + end do + end if + if (beta == dzero) then + y(ir:ir+4-1) = alpha*tmp(1:4) + else + y(ir:ir+4-1) = alpha*tmp(1:4) + beta*y(ir:ir+4-1) + end if + + + if (is_unit) then + do i=1, min(4,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_d_hll_csmv_notra_4 + +end subroutine psb_d_hll_csmv diff --git a/ext/impl/psb_d_hll_csnm1.f90 b/ext/impl/psb_d_hll_csnm1.f90 new file mode 100644 index 00000000..4627a4d2 --- /dev/null +++ b/ext/impl/psb_d_hll_csnm1.f90 @@ -0,0 +1,111 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_d_hll_csnm1(a) result(res) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_csnm1 + + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info, hksz, mxrwl + real(psb_dpk_), allocatable :: vt(:) + logical :: is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hll_csnm1' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + res = dzero + if (a%is_dev()) call a%sync() + n = a%get_ncols() + m = a%get_nrows() + allocate(vt(n),stat=info) + if (Info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + if (a%is_unit()) then + vt = done + else + vt = dzero + end if + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_d_hll_csnm1_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & vt,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + res = maxval(vt) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_d_hll_csnm1_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & vt,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: vt(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + vt(jc) = vt(jc) + abs(val(i,j)) + end do + end do + end subroutine psb_d_hll_csnm1_inner + +end function psb_d_hll_csnm1 diff --git a/ext/impl/psb_d_hll_csnmi.f90 b/ext/impl/psb_d_hll_csnmi.f90 new file mode 100644 index 00000000..2b758fa3 --- /dev/null +++ b/ext/impl/psb_d_hll_csnmi.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_d_hll_csnmi(a) result(res) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_csnmi + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc, hksz, mxrwl, info + Integer(Psb_ipk_) :: err_act + logical :: is_unit + character(len=20) :: name='d_csnmi' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + info = 0 + res = dzero + if (a%is_dev()) call a%sync() + + n = a%get_ncols() + m = a%get_nrows() + is_unit = a%is_unit() + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_d_hll_csnmi_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & res,is_unit,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_d_hll_csnmi_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & res,is_unit,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: res + logical :: is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: tmp, acc + + info = psb_success_ + if (is_unit) then + tmp = done + else + tmp = dzero + end if + do i=1,m + acc = tmp + do j=1, irn(i) + acc = acc + abs(val(i,j)) + end do + res = max(acc,res) + end do + end subroutine psb_d_hll_csnmi_inner + +end function psb_d_hll_csnmi diff --git a/ext/impl/psb_d_hll_csput.f90 b/ext/impl/psb_d_hll_csput.f90 new file mode 100644 index 00000000..064e6c59 --- /dev/null +++ b/ext/impl/psb_d_hll_csput.f90 @@ -0,0 +1,233 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_csput_a + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hll_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5) + + + call psb_erractionsave(err_act) + info = psb_success_ + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + + call psb_d_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info /= psb_success_) then + + info = psb_err_invalid_mat_state_ + end if + call a%set_host() + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_d_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ip, i1,i2,nr,nc,nnz,dupl,ng,& + & hksz, hk, hkzpnt, ihkr, mxrwl, lastrow + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='d_hll_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + lastrow = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + hksz = a%get_hksz() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + + end do + + case(psb_dupl_add_) + ! Add + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_d_hll_srch_upd + +end subroutine psb_d_hll_csput_a diff --git a/ext/impl/psb_d_hll_cssm.f90 b/ext/impl/psb_d_hll_cssm.f90 new file mode 100644 index 00000000..f4f6e349 --- /dev/null +++ b/ext/impl/psb_d_hll_cssm.f90 @@ -0,0 +1,506 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_cssm + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ldx, ldy, hksz, nxy, mk, mxrwl + real(psb_dpk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hll_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + info = psb_err_missing_override_method_ + call psb_errpush(info,name) + goto 9999 + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + hksz = a%get_hksz() + + if (.not. (a%is_triangle())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + ldx = size(x,1) + ldy = size(y,1) + if (ldx psb_d_hll_cssv + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hk, mxrwl, noffs, kc, mk + real(psb_dpk_) :: acc + real(psb_dpk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hll_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x) psb_d_hll_get_diag + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k, ke, hksz, ld,ir, mxrwl + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + ld = size(d) + if (ld< mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,ld/)) + goto 9999 + end if + + if (a%is_triangle().and.a%is_unit()) then + d(1:mnm) = done + else + + hksz = a%get_hksz() + j=1 + do i=1,mnm,hksz + ir = min(hksz,mnm-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + ke = a%hkoffs(j+1) + call psb_d_hll_get_diag_inner(ir,a%irn(i:i+ir-1),& + & a%ja(k:ke),hksz,a%val(k:ke),hksz,& + & a%idiag(i:i+ir-1),d(i:i+ir-1),info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + end if + + do i=mnm+1,size(d) + d(i) = dzero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_d_hll_get_diag_inner(m,irn,ja,ldj,val,ldv,& + & idiag,d,info) + integer(psb_ipk_), intent(in) :: m,ldj,ldv,ja(ldj,*),irn(*), idiag(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + do i=1,m + if (idiag(i) /= 0) then + d(i) = val(i,idiag(i)) + else + d(i) = dzero + end if + end do + + end subroutine psb_d_hll_get_diag_inner + +end subroutine psb_d_hll_get_diag diff --git a/ext/impl/psb_d_hll_maxval.f90 b/ext/impl/psb_d_hll_maxval.f90 new file mode 100644 index 00000000..8408cc96 --- /dev/null +++ b/ext/impl/psb_d_hll_maxval.f90 @@ -0,0 +1,45 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_d_hll_maxval(a) result(res) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_maxval + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + if (a%is_dev()) call a%sync() + res = maxval(abs(a%val(:))) + if (a%is_unit()) res = max(res,done) + +end function psb_d_hll_maxval diff --git a/ext/impl/psb_d_hll_mold.f90 b/ext/impl/psb_d_hll_mold.f90 new file mode 100644 index 00000000..e9d721f0 --- /dev/null +++ b/ext/impl/psb_d_hll_mold.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_mold(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_mold + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hll_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hll_mold diff --git a/ext/impl/psb_d_hll_print.f90 b/ext/impl/psb_d_hll_print.f90 new file mode 100644 index 00000000..93c56d5c --- /dev/null +++ b/ext/impl/psb_d_hll_print.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hll_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, k, hksz, hk, mxrwl,ir, ix + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_d_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + hksz = a%get_hksz() + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + endif + endif + +end subroutine psb_d_hll_print diff --git a/ext/impl/psb_d_hll_reallocate_nz.f90 b/ext/impl/psb_d_hll_reallocate_nz.f90 new file mode 100644 index 00000000..7abdd58f --- /dev/null +++ b/ext/impl/psb_d_hll_reallocate_nz.f90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_reallocate_nz(nz,a) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,nz_ + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_hll_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + nz_ = max(nz,ione) + call psb_realloc(nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(nz_,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hll_reallocate_nz diff --git a/ext/impl/psb_d_hll_reinit.f90 b/ext/impl/psb_d_hll_reinit.f90 new file mode 100644 index 00000000..6a0f34fa --- /dev/null +++ b/ext/impl/psb_d_hll_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_reinit(a,clear) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_reinit + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:) = dzero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hll_reinit diff --git a/ext/impl/psb_d_hll_rowsum.f90 b/ext/impl/psb_d_hll_rowsum.f90 new file mode 100644 index 00000000..bfa2d2e1 --- /dev/null +++ b/ext/impl/psb_d_hll_rowsum.f90 @@ -0,0 +1,110 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_rowsum(d,a) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_rowsum + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + + if (a%is_unit()) then + d = done + else + d = dzero + end if + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call d_hll_rowsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine d_hll_rowsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + (val(i,j)) + end do + end do + + end subroutine d_hll_rowsum + +end subroutine psb_d_hll_rowsum diff --git a/ext/impl/psb_d_hll_scal.f90 b/ext/impl/psb_d_hll_scal.f90 new file mode 100644 index 00000000..ed9dd9ce --- /dev/null +++ b/ext/impl/psb_d_hll_scal.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_scal(d,a,info,side) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_scal + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), ld, k, mxrwl, hksz, ir + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + info = psb_err_missing_override_method_ + call psb_errpush(info,name,i_err=ierr) + goto 9999 + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + ld = size(d) + if (left) then + m = a%get_nrows() + if (ld < m) then + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + else + n = a%get_ncols() + if (ld < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_d_hll_scal_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & left,d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_d_hll_scal_inner(ir,m,n,irn,ja,ldj,val,ldv,left,d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: d(*) + real(psb_dpk_), intent(inout) :: val(ldv,*) + logical, intent(in) :: left + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + if (left) then + do i=1,m + do j=1, irn(i) + val(i,j) = val(i,j)*d(ir+i-1) + end do + end do + else + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + val(i,j) = val(i,j)*d(jc) + end do + end do + + end if + + end subroutine psb_d_hll_scal_inner + + +end subroutine psb_d_hll_scal diff --git a/ext/impl/psb_d_hll_scals.f90 b/ext/impl/psb_d_hll_scals.f90 new file mode 100644 index 00000000..8e05cddd --- /dev/null +++ b/ext/impl/psb_d_hll_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_scals(d,a,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_scals + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:) = a%val(:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hll_scals diff --git a/ext/impl/psb_d_mv_dia_from_coo.f90 b/ext/impl/psb_d_mv_dia_from_coo.f90 new file mode 100644 index 00000000..e38e975a --- /dev/null +++ b/ext/impl/psb_d_mv_dia_from_coo.f90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_mv_dia_from_coo + implicit none + + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_mv_dia_from_coo diff --git a/ext/impl/psb_d_mv_dia_to_coo.f90 b/ext/impl/psb_d_mv_dia_to_coo.f90 new file mode 100644 index 00000000..d8ac7a69 --- /dev/null +++ b/ext/impl/psb_d_mv_dia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_mv_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_mv_dia_to_coo + implicit none + + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_d_mv_dia_to_coo diff --git a/ext/impl/psb_d_mv_ell_from_coo.f90 b/ext/impl/psb_d_mv_ell_from_coo.f90 new file mode 100644 index 00000000..8f98daab --- /dev/null +++ b/ext/impl/psb_d_mv_ell_from_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_mv_ell_from_coo + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, ir, ic + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_d_mv_ell_from_coo diff --git a/ext/impl/psb_d_mv_ell_from_fmt.f90 b/ext/impl/psb_d_mv_ell_from_fmt.f90 new file mode 100644 index 00000000..6589fd0a --- /dev/null +++ b/ext/impl/psb_d_mv_ell_from_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_mv_ell_from_fmt + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_d_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_mv_ell_from_fmt diff --git a/ext/impl/psb_d_mv_ell_to_coo.f90 b/ext/impl/psb_d_mv_ell_to_coo.f90 new file mode 100644 index 00000000..a1220a6e --- /dev/null +++ b/ext/impl/psb_d_mv_ell_to_coo.f90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_mv_ell_to_coo + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + ! Taking a path slightly slower but with less memory footprint + deallocate(a%idiag) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + + call psb_realloc(nza,b%ia,info) + if (info == 0) call psb_realloc(nza,b%ja,info) + if (info /= 0) goto 9999 + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + end do + end do + deallocate(a%ja, stat=info) + + if (info == 0) call psb_realloc(nza,b%val,info) + if (info /= 0) goto 9999 + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%val(k) = a%val(i,j) + end do + end do + call a%free() + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_d_mv_ell_to_coo diff --git a/ext/impl/psb_d_mv_ell_to_fmt.f90 b/ext/impl/psb_d_mv_ell_to_fmt.f90 new file mode 100644 index 00000000..a5975360 --- /dev/null +++ b/ext/impl/psb_d_mv_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_mv_ell_to_fmt + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_d_ell_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_mv_ell_to_fmt diff --git a/ext/impl/psb_d_mv_hdia_from_coo.f90 b/ext/impl/psb_d_mv_hdia_from_coo.f90 new file mode 100644 index 00000000..68caea34 --- /dev/null +++ b/ext/impl/psb_d_mv_hdia_from_coo.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_mv_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_mv_hdia_from_coo + implicit none + + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.(b%is_by_rows())) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_mv_hdia_from_coo diff --git a/ext/impl/psb_d_mv_hdia_to_coo.f90 b/ext/impl/psb_d_mv_hdia_to_coo.f90 new file mode 100644 index 00000000..595e20a2 --- /dev/null +++ b/ext/impl/psb_d_mv_hdia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_mv_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_mv_hdia_to_coo + implicit none + + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_d_mv_hdia_to_coo diff --git a/ext/impl/psb_d_mv_hll_from_coo.f90 b/ext/impl/psb_d_mv_hll_from_coo.f90 new file mode 100644 index 00000000..78faed4b --- /dev/null +++ b/ext/impl/psb_d_mv_hll_from_coo.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_mv_hll_from_coo + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: hksz + info = psb_success_ + if (.not.b%is_by_rows()) call b%fix(info) + hksz = psi_get_hksz() + call psi_convert_hll_from_coo(a,hksz,b,info) + if (info /= 0) goto 9999 + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_mv_hll_from_coo diff --git a/ext/impl/psb_d_mv_hll_from_fmt.f90 b/ext/impl/psb_d_mv_hll_from_fmt.f90 new file mode 100644 index 00000000..76a2f2fb --- /dev/null +++ b/ext/impl/psb_d_mv_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_mv_hll_from_fmt + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_d_hll_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%hkoffs, a%hkoffs) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + a%hksz = b%hksz + a%nzt = b%nzt + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_mv_hll_from_fmt diff --git a/ext/impl/psb_d_mv_hll_to_coo.f90 b/ext/impl/psb_d_mv_hll_to_coo.f90 new file mode 100644 index 00000000..fbc9111b --- /dev/null +++ b/ext/impl/psb_d_mv_hll_to_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_mv_hll_to_coo + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%cp_to_coo(b,info) + + if (info /= psb_success_) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_d_mv_hll_to_coo diff --git a/ext/impl/psb_d_mv_hll_to_fmt.f90 b/ext/impl/psb_d_mv_hll_to_fmt.f90 new file mode 100644 index 00000000..8022b2e5 --- /dev/null +++ b/ext/impl/psb_d_mv_hll_to_fmt.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_mv_hll_to_fmt + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_d_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%hkoffs, b%hkoffs) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + b%hksz = a%hksz + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_mv_hll_to_fmt diff --git a/ext/impl/psb_s_cp_dia_from_coo.f90 b/ext/impl/psb_s_cp_dia_from_coo.f90 new file mode 100644 index 00000000..6d9a0749 --- /dev/null +++ b/ext/impl/psb_s_cp_dia_from_coo.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cp_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_cp_dia_from_coo + implicit none + + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_convert_dia_from_coo(a,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_dia_from_coo(a,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cp_dia_from_coo diff --git a/ext/impl/psb_s_cp_dia_to_coo.f90 b/ext/impl/psb_s_cp_dia_to_coo.f90 new file mode 100644 index 00000000..c0cd5d32 --- /dev/null +++ b/ext/impl/psb_s_cp_dia_to_coo.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_cp_dia_to_coo + implicit none + + class(psb_s_dia_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: i, j, k,nr,nza,nc, nzd + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + + call psi_s_xtr_coo_from_dia(nr,nc,& + & b%ia, b%ja, b%val, nzd, & + & size(a%data,1),size(a%data,2),& + & a%data,a%offset,info) + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +end subroutine psb_s_cp_dia_to_coo diff --git a/ext/impl/psb_s_cp_ell_from_coo.f90 b/ext/impl/psb_s_cp_ell_from_coo.f90 new file mode 100644 index 00000000..f178a05c --- /dev/null +++ b/ext/impl/psb_s_cp_ell_from_coo.f90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_cp_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc + integer(psb_ipk_) :: nzm, ir, ic, k + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + ! This is to have fix_coo called behind the scenes + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_s_convert_ell_from_coo(a,b,info) + else + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call psi_s_convert_ell_from_coo(a,tmp,info) + if (info == psb_success_) call tmp%free() + end if + if (info /= psb_success_) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + +end subroutine psb_s_cp_ell_from_coo diff --git a/ext/impl/psb_s_cp_ell_from_fmt.f90 b/ext/impl/psb_s_cp_ell_from_fmt.f90 new file mode 100644 index 00000000..bffe3d85 --- /dev/null +++ b/ext/impl/psb_s_cp_ell_from_fmt.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_cp_ell_from_fmt + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + + type is (psb_s_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_s_cp_ell_from_fmt diff --git a/ext/impl/psb_s_cp_ell_to_coo.f90 b/ext/impl/psb_s_cp_ell_to_coo.f90 new file mode 100644 index 00000000..b8acddfc --- /dev/null +++ b/ext/impl/psb_s_cp_ell_to_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_cp_ell_to_coo + implicit none + + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: i, j, k, nr, nc, nza + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + b%val(k) = a%val(i,j) + end do + end do + call b%set_nzeros(a%get_nzeros()) + call b%fix(info) + call b%set_host() + +end subroutine psb_s_cp_ell_to_coo diff --git a/ext/impl/psb_s_cp_ell_to_fmt.f90 b/ext/impl/psb_s_cp_ell_to_fmt.f90 new file mode 100644 index 00000000..58fe3756 --- /dev/null +++ b/ext/impl/psb_s_cp_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_cp_ell_to_fmt + implicit none + + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_s_ell_sparse_mat) + if (a%is_dev()) call a%sync() + + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_cp_ell_to_fmt diff --git a/ext/impl/psb_s_cp_hdia_from_coo.f90 b/ext/impl/psb_s_cp_hdia_from_coo.f90 new file mode 100644 index 00000000..b3d427d9 --- /dev/null +++ b/ext/impl/psb_s_cp_hdia_from_coo.f90 @@ -0,0 +1,222 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_cp_hdia_from_coo + implicit none + + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call inner_cp_hdia_from_coo(a,b,info) + if (info /= psb_success_) goto 9999 + else + call b%cp_to_coo(tmp,info) + if (info /= psb_success_) goto 9999 + if (.not.tmp%is_by_rows()) call tmp%fix(info) + if (info /= psb_success_) goto 9999 + call inner_cp_hdia_from_coo(a,tmp,info) + if (info /= psb_success_) goto 9999 + call tmp%free() + end if + call a%set_host() + + return + +9999 continue + + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine inner_cp_hdia_from_coo(a,tmp,info) + use psb_base_mod + use psi_ext_util_mod + + implicit none + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,mi,mj,dm,bi,w + integer(psb_ipk_),allocatable :: d(:), offset(:), irsz(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext, nzout + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + logical, parameter :: debug=.false. + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = tmp%psb_s_base_sparse_mat + + hacksize = a%hacksize + a%nhacks = (nr+hacksize-1)/hacksize + nhacks = a%nhacks + + ndiag = nr+nc-1 + if (info == psb_success_) call psb_realloc(nr,irsz,info) + if (info == psb_success_) call psb_realloc(ndiag,d,info) + if (info == psb_success_) call psb_realloc(ndiag,offset,info) + if (info == psb_success_) call psb_realloc(nhacks+1,a%hackoffsets,info) + if (info /= psb_success_) return + + irsz = 0 + do k=1,nza + ir = tmp%ia(k) + irsz(ir) = irsz(ir)+1 + end do + + a%nzeros = 0 + d = 0 + iszd = 0 + a%hackOffsets(1)=0 + klast1 = 1 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + if (debug) then + write(*,*) 'Loop iteration ',k,nhacks,i,ib,nr + write(*,*) 'RW:',tmp%ia(kfirst),tmp%ia(klast1-1) + write(*,*) 'CL:',tmp%ja(kfirst),tmp%ja(klast1-1) + end if + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, offset, info, initd=.false., cleard=.true.) + iszd = iszd + nd + a%hackOffsets(k+1)=iszd + if (debug) write(*,*) 'From chunk ',k,i,ib,sum(irsz(i:i+ib-1)),': ',nd, iszd + if (debug) write(*,*) 'offset ', offset(1:nd) + end do + if (debug) then + write(*,*) 'Hackcount ',nhacks,' Allocation height ',iszd + write(*,*) 'Hackoffsets ',a%hackOffsets(:) + end if + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%diaOffsets,info) + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%val,info) + if (info /= psb_success_) return + klast1 = 1 + ! + ! Second run: copy elements + ! + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, a%diaOffsets(hackfirst+1:hacknext), info, & + & initd=.false., cleard=.false.) + if (debug) write(*,*) 'Out from dia_offset: ', a%diaOffsets(hackfirst+1:hacknext) + call psi_s_xtr_dia_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & tmp%val(kfirst:klast1-1), & + & d,hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),info,& + & initdata=.true.,rdisp=(i-1)) + + call countnz(nr,nc,(i-1),hacksize,(hacknext-hackfirst),& + & a%diaOffsets(hackfirst+1:hacknext),nzout) + a%nzeros = a%nzeros + nzout + call cleand(nr,(hacknext-hackfirst),d,a%diaOffsets(hackfirst+1:hacknext)) + + end do + if (debug) then + write(*,*) 'NZEROS: ',a%nzeros, nza + write(*,*) 'diaoffsets: ',a%diaOffsets(1:iszd) + write(*,*) 'values: ' + j=0 + do k=1,nhacks + write(*,*) 'Hack No. ',k + do i=1,hacksize*(iszd/nhacks) + j = j + 1 + write(*,*) j, a%val(j) + end do + end do + end if + end subroutine inner_cp_hdia_from_coo + + subroutine countnz(nr,nc,rdisp,nrd,ncd,offsets,nz) + implicit none + integer(psb_ipk_), intent(in) :: nr,nc,nrd,ncd,rdisp,offsets(:) + integer(psb_ipk_), intent(out) :: nz + ! + integer(psb_ipk_) :: i,j,k, ir, jc, m4, ir1, ir2, nrcmdisp, rdisp1 + nz = 0 + nrcmdisp = min(nr-rdisp,nc-rdisp) + rdisp1 = 1-rdisp + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + nz = nz + (ir2-ir1+1) + end do + end subroutine countnz + + subroutine cleand(nr,nd,d,offset) + implicit none + integer(psb_ipk_), intent(in) :: nr,nd,offset(:) + integer(psb_ipk_), intent(inout) :: d(:) + integer(psb_ipk_) :: i,id + + do i=1,nd + id = offset(i) + nr + d(id) = 0 + end do + end subroutine cleand + +end subroutine psb_s_cp_hdia_from_coo diff --git a/ext/impl/psb_s_cp_hdia_to_coo.f90 b/ext/impl/psb_s_cp_hdia_to_coo.f90 new file mode 100644 index 00000000..8e90e236 --- /dev/null +++ b/ext/impl/psb_s_cp_hdia_to_coo.f90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_cp_hdia_to_coo + use psi_ext_util_mod + implicit none + + class(psb_s_hdia_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + call b%set_nzeros(nza) + call b%set_sort_status(psb_unsorted_) + nhacks = a%nhacks + hacksize = a%hacksize + j = 0 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_s_xtr_coo_from_dia(nr,nc,& + & b%ia(j+1:), b%ja(j+1:), b%val(j+1:), nzd, & + & hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) +!!$ write(*,*) 'diaoffsets',ib,' : ',ib - abs(a%diaOffsets(hackfirst+1:hacknext)) +!!$ write(*,*) 'sum',ib,j,' : ',sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + j = j + nzd + end do + if (nza /= j) then + write(*,*) 'Wrong counts in hdia_to_coo',j,nza + info = -8 + return + end if + call b%set_host() + call b%fix(info) + +end subroutine psb_s_cp_hdia_to_coo diff --git a/ext/impl/psb_s_cp_hll_from_coo.f90 b/ext/impl/psb_s_cp_hll_from_coo.f90 new file mode 100644 index 00000000..9d75f994 --- /dev/null +++ b/ext/impl/psb_s_cp_hll_from_coo.f90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_cp_hll_from_coo + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + character(len=20) :: name='hll_from_coo' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + if (b%is_dev()) call b%sync() + hksz = psi_get_hksz() + if (b%is_by_rows()) then + call psi_convert_hll_from_coo(a,hksz,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_hll_from_coo(a,hksz,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cp_hll_from_coo diff --git a/ext/impl/psb_s_cp_hll_from_fmt.f90 b/ext/impl/psb_s_cp_hll_from_fmt.f90 new file mode 100644 index 00000000..8f010902 --- /dev/null +++ b/ext/impl/psb_s_cp_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_cp_hll_from_fmt + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + class is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_s_hll_sparse_mat) + ! write(0,*) 'From type_hll' + if (b%is_dev()) call b%sync() + + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%hkoffs, a%hkoffs, info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) a%hksz = b%hksz + if (info == 0) a%nzt = b%nzt + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_s_cp_hll_from_fmt diff --git a/ext/impl/psb_s_cp_hll_to_coo.f90 b/ext/impl/psb_s_cp_hll_to_coo.f90 new file mode 100644 index 00000000..74502ba2 --- /dev/null +++ b/ext/impl/psb_s_cp_hll_to_coo.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_cp_hll_to_coo + implicit none + + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j, jj,k,ir, isz,err_act, hksz, hk, mxrwl,& + & irs, nzblk, kc + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + + j = 1 + kc = 1 + k = 1 + hksz = a%hksz + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + nzblk = sum(a%irn(i:i+ir-1)) + call inner_copy(i,ir,b%ia(kc:kc+nzblk-1),& + & b%ja(kc:kc+nzblk-1),b%val(kc:kc+nzblk-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & hksz) + k = k + isz + kc = kc + nzblk + + enddo + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +contains + + subroutine inner_copy(i,ir,iac,& + & jac,valc,ja,val,irn,ld) + integer(psb_ipk_) :: i,ir,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*) + real(psb_spk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc + kc = 1 + do ii = 1, ir + nc = irn(ii) + do jj=1,nc + iac(kc) = i+ii-1 + jac(kc) = ja(ii,jj) + valc(kc) = val(ii,jj) + kc = kc + 1 + end do + end do + + end subroutine inner_copy + +end subroutine psb_s_cp_hll_to_coo diff --git a/ext/impl/psb_s_cp_hll_to_fmt.f90 b/ext/impl/psb_s_cp_hll_to_fmt.f90 new file mode 100644 index 00000000..f7adaa54 --- /dev/null +++ b/ext/impl/psb_s_cp_hll_to_fmt.f90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_cp_hll_to_fmt + implicit none + + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_s_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%hkoffs, b%hkoffs , info) + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + if (info == 0) b%hksz = a%hksz + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_cp_hll_to_fmt diff --git a/ext/impl/psb_s_dia_aclsum.f90 b/ext/impl/psb_s_dia_aclsum.f90 new file mode 100644 index 00000000..718a2424 --- /dev/null +++ b/ext/impl/psb_s_dia_aclsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_aclsum(d,a) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_aclsum + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_aclsum diff --git a/ext/impl/psb_s_dia_allocate_mnnz.f90 b/ext/impl/psb_s_dia_allocate_mnnz.f90 new file mode 100644 index 00000000..df56c4a6 --- /dev/null +++ b/ext/impl/psb_s_dia_allocate_mnnz.f90 @@ -0,0 +1,88 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -ione )/m + else + nz_ = ((max(7*m,7*n,ione)+m-ione)/m) + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,nz_,a%data,info) + if (info == psb_success_) call psb_realloc(m+n,a%offset,info) + if (info == psb_success_) then + a%data = 0 + a%offset = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_allocate_mnnz diff --git a/ext/impl/psb_s_dia_arwsum.f90 b/ext/impl/psb_s_dia_arwsum.f90 new file mode 100644 index 00000000..5a974bbf --- /dev/null +++ b/ext/impl/psb_s_dia_arwsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_arwsum(d,a) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_arwsum + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_arwsum diff --git a/ext/impl/psb_s_dia_colsum.f90 b/ext/impl/psb_s_dia_colsum.f90 new file mode 100644 index 00000000..e60eb88f --- /dev/null +++ b/ext/impl/psb_s_dia_colsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_colsum(d,a) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_colsum + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_colsum diff --git a/ext/impl/psb_s_dia_csgetptn.f90 b/ext/impl/psb_s_dia_csgetptn.f90 new file mode 100644 index 00000000..f946eb73 --- /dev/null +++ b/ext/impl/psb_s_dia_csgetptn.f90 @@ -0,0 +1,188 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_csgetptn + implicit none + + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + + + end subroutine dia_getptn + +end subroutine psb_s_dia_csgetptn diff --git a/ext/impl/psb_s_dia_csgetrow.f90 b/ext/impl/psb_s_dia_csgetrow.f90 new file mode 100644 index 00000000..b79e2470 --- /dev/null +++ b/ext/impl/psb_s_dia_csgetrow.f90 @@ -0,0 +1,199 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_csgetrow + implicit none + + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + if (chksz) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,val,info) + end if + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + val(nzin_) = a%data(i,j) + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + end subroutine dia_getrow +end subroutine psb_s_dia_csgetrow diff --git a/ext/impl/psb_s_dia_csmm.f90 b/ext/impl/psb_s_dia_csmm.f90 new file mode 100644 index 00000000..9f586dbb --- /dev/null +++ b/ext/impl/psb_s_dia_csmm.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_csmm + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_dia_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i,1:nxy) = y(i,1:nxy) + alpha*data(i,j)*x(i+off(j),1:nxy) + enddo + enddo + + end subroutine psb_s_dia_csmm_inner + +end subroutine psb_s_dia_csmm diff --git a/ext/impl/psb_s_dia_csmv.f90 b/ext/impl/psb_s_dia_csmv.f90 new file mode 100644 index 00000000..1a23932e --- /dev/null +++ b/ext/impl/psb_s_dia_csmv.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_csmv + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_dia_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i) = y(i) + alpha*data(i,j)*x(i+off(j)) + enddo + enddo + + end subroutine psb_s_dia_csmv_inner + +end subroutine psb_s_dia_csmv diff --git a/ext/impl/psb_s_dia_get_diag.f90 b/ext/impl/psb_s_dia_get_diag.f90 new file mode 100644 index 00000000..5909c72a --- /dev/null +++ b/ext/impl/psb_s_dia_get_diag.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_get_diag(a,d,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_get_diag + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = sone + else + do i=1, size(a%offset) + if (a%offset(i) == 0) then + d(1:mnm) = a%data(1:mnm,i) + exit + end if + end do + end if + do i=mnm+1,size(d) + d(i) = szero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_get_diag diff --git a/ext/impl/psb_s_dia_maxval.f90 b/ext/impl/psb_s_dia_maxval.f90 new file mode 100644 index 00000000..5f672644 --- /dev/null +++ b/ext/impl/psb_s_dia_maxval.f90 @@ -0,0 +1,54 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +function psb_s_dia_maxval(a) result(res) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_maxval + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_maxval' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = sone + else + res = szero + end if + + res = max(res,maxval(abs(a%data))) + +end function psb_s_dia_maxval diff --git a/ext/impl/psb_s_dia_mold.f90 b/ext/impl/psb_s_dia_mold.f90 new file mode 100644 index 00000000..a65379a4 --- /dev/null +++ b/ext/impl/psb_s_dia_mold.f90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_mold(a,b,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_mold + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_dia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_mold diff --git a/ext/impl/psb_s_dia_print.f90 b/ext/impl/psb_s_dia_print.f90 new file mode 100644 index 00000000..a0de1ba8 --- /dev/null +++ b/ext/impl/psb_s_dia_print.f90 @@ -0,0 +1,148 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_dia_print' + logical, parameter :: debug=.false. + + class(psb_s_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, jc, ir1, ir2 + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_s_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + write(iout,*) nr, nc, nz + + nc=size(a%data,2) + + + + if(present(iv)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) iv(i),iv(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),(i+jc),a%data(i,j) + enddo + enddo + + endif + +end subroutine psb_s_dia_print diff --git a/ext/impl/psb_s_dia_reallocate_nz.f90 b/ext/impl/psb_s_dia_reallocate_nz.f90 new file mode 100644 index 00000000..d37d9e5f --- /dev/null +++ b/ext/impl/psb_s_dia_reallocate_nz.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_reallocate_nz(nz,a) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_dia_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! Ans: NOTHING. + ! + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_reallocate_nz diff --git a/ext/impl/psb_s_dia_reinit.f90 b/ext/impl/psb_s_dia_reinit.f90 new file mode 100644 index 00000000..dd109783 --- /dev/null +++ b/ext/impl/psb_s_dia_reinit.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_reinit(a,clear) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_reinit + implicit none + + class(psb_s_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%data(:,:) = szero + call a%set_upd() + call a%set_host() + + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_reinit diff --git a/ext/impl/psb_s_dia_rowsum.f90 b/ext/impl/psb_s_dia_rowsum.f90 new file mode 100644 index 00000000..3f32a2b2 --- /dev/null +++ b/ext/impl/psb_s_dia_rowsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_rowsum(d,a) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_rowsum + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_rowsum diff --git a/ext/impl/psb_s_dia_scal.f90 b/ext/impl/psb_s_dia_scal.f90 new file mode 100644 index 00000000..7ccf881a --- /dev/null +++ b/ext/impl/psb_s_dia_scal.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_scal(d,a,info,side) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_scal + implicit none + class(psb_s_dia_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), nc, jc, nr, ir1, ir2 + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%data(i,:) = a%data(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + nr=size(a%data,1) + nc=size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + a%data(i,j) = a%data(i,j) * d(i+jc) + enddo + enddo + + end if + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_scal diff --git a/ext/impl/psb_s_dia_scals.f90 b/ext/impl/psb_s_dia_scals.f90 new file mode 100644 index 00000000..da1bc94e --- /dev/null +++ b/ext/impl/psb_s_dia_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_scals(d,a,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_scals + implicit none + class(psb_s_dia_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%data(:,:) = a%data(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_scals diff --git a/ext/impl/psb_s_dns_mat_impl.f90 b/ext/impl/psb_s_dns_mat_impl.f90 new file mode 100644 index 00000000..f6f458c8 --- /dev/null +++ b/ext/impl/psb_s_dns_mat_impl.f90 @@ -0,0 +1,724 @@ + +!> Function csmv: +!! \memberof psb_s_dns_sparse_mat +!! \brief Product by a dense rank 1 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_s_dns_csmv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_csmv + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act, m, n, lda + character(len=20) :: name='s_dns_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (trans_ == 'N') then + m=a%get_nrows() + n=a%get_ncols() + else + n=a%get_nrows() + m=a%get_ncols() + end if + lda = size(a%val,1) + + + call sgemv(trans_,a%get_nrows(),a%get_ncols(),alpha,& + & a%val,size(a%val,1),x,1,beta,y,1) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_csmv + + +!> Function csmm: +!! \memberof psb_s_dns_sparse_mat +!! \brief Product by a dense rank 2 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:,:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:,:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_s_dns_csmm(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_csmm + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act,m,n,k, lda, ldx, ldy + character(len=20) :: name='s_dns_csmm' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (psb_toupper(trans_)=='N') then + m = a%get_nrows() + k = a%get_ncols() + n = min(size(y,2),size(x,2)) + else + k = a%get_nrows() + m = a%get_ncols() + n = min(size(y,2),size(x,2)) + end if + lda = size(a%val,1) + ldx = size(x,1) + ldy = size(y,1) + call sgemm(trans_,'N',m,n,k,alpha,a%val,lda,x,ldx,beta,y,ldy) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_csmm + + + +! +! +!> Function csnmi: +!! \memberof psb_s_dns_sparse_mat +!! \brief Operator infinity norm +!! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) +!! +! +function psb_s_dns_csnmi(a) result(res) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_csnmi + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + ! + integer(psb_ipk_) :: i + real(psb_spk_) :: acc + + res = szero + if (a%is_dev()) call a%sync() + + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_s_dns_csnmi + + +! +!> Function get_diag: +!! \memberof psb_s_dns_sparse_mat +!! \brief Extract the diagonal of A. +!! +!! D(i) = A(i:i), i=1:min(nrows,ncols) +!! +!! \param d(:) The output diagonal +!! \param info return code. +! +subroutine psb_s_dns_get_diag(a,d,info) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_get_diag + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act, mnm, i + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + do i=1, mnm + d(i) = a%val(i,i) + end do + do i=mnm+1,size(d) + d(i) = szero + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_get_diag + + +! +! +!> Function reallocate_nz +!! \memberof psb_s_dns_sparse_mat +!! \brief One--parameters version of (re)allocate +!! +!! \param nz number of nonzeros to allocate for +!! i.e. makes sure that the internal storage +!! allows for NZ coefficients and their indices. +! +subroutine psb_s_dns_reallocate_nz(nz,a) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_dns_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! This is a no-op, allocation is fixed. + ! + if (a%is_dev()) call a%sync() + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_reallocate_nz + +! +!> Function mold: +!! \memberof psb_s_dns_sparse_mat +!! \brief Allocate a class(psb_s_dns_sparse_mat) with the +!! same dynamic type as the input. +!! This is equivalent to allocate( mold= ) and is provided +!! for those compilers not yet supporting mold. +!! \param b The output variable +!! \param info return code +! +subroutine psb_s_dns_mold(a,b,info) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_mold + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='dns_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + allocate(psb_s_dns_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_mold + +! +! +!> Function allocate_mnnz +!! \memberof psb_s_dns_sparse_mat +!! \brief Three-parameters version of allocate +!! +!! \param m number of rows +!! \param n number of cols +!! \param nz [estimated internally] number of nonzeros to allocate for +! +subroutine psb_s_dns_allocate_mnnz(m,n,a,nz) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + ! + integer(psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_/)) + goto 9999 + endif + + + ! Basic stuff common to all formats + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_bld() + call a%set_host() + + ! We ignore NZ in this case. + + call psb_realloc(m,n,a%val,info) + if (info == psb_success_) then + a%val = szero + a%nnz = 0 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_allocate_mnnz + + +! +! +! +!> Function csgetrow: +!! \memberof psb_s_dns_sparse_mat +!! \brief Get a (subset of) row(s) +!! +!! getrow is the basic method by which the other (getblk, clip) can +!! be implemented. +!! +!! Returns the set +!! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) +!! each identifying the position of a nonzero in A +!! i.e. +!! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) +!! with IMIN<=IA(:)<=IMAX +!! with JMIN<=JA(:)<=JMAX +!! IA,JA are reallocated as necessary. +!! +!! \param imin the minimum row index we are interested in +!! \param imax the minimum row index we are interested in +!! \param nz the number of output coefficients +!! \param ia(:) the output row indices +!! \param ja(:) the output col indices +!! \param val(:) the output coefficients +!! \param info return code +!! \param jmin [1] minimum col index +!! \param jmax [a\%get_ncols()] maximum col index +!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) +!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] +!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] +!! ( iren cannot be specified with rscale/cscale) +!! \param append [false] append to ia,ja +!! \param nzin [none] if append, then first new entry should go in entry nzin+1 +!! +! +subroutine psb_s_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_csgetrow + implicit none + + class(psb_s_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + ! + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i,j,k + character(len=20) :: name='csget' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax Function trim +!! \memberof psb_s_dns_sparse_mat +!! \brief Memory trim +!! Make sure the memory allocation of the sparse matrix is as tight as +!! possible given the actual number of nonzeros it contains. +! +subroutine psb_s_dns_trim(a) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_trim + implicit none + class(psb_s_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + ! Do nothing, we are already at minimum memory. + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_trim + +! +!> Function cp_from_coo: +!! \memberof psb_s_dns_sparse_mat +!! \brief Copy and convert from psb_s_coo_sparse_mat +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! + +subroutine psb_s_cp_dns_from_coo(a,b,info) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_cp_dns_from_coo + implicit none + + class(psb_s_dns_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + ! + type(psb_s_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: nza, nr, i,err_act, nc + integer(psb_ipk_), parameter :: maxtry=8 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (.not.b%is_by_rows()) then + ! This is to have fix_coo called behind the scenes + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + if (info /= psb_success_) return + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = tmp%psb_s_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = szero + do i=1, nza + a%val(tmp%ia(i),tmp%ja(i)) = tmp%val(i) + end do + a%nnz = nza + call tmp%free() + else + if (b%is_dev()) call b%sync() + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = szero + do i=1, nza + a%val(b%ia(i),b%ja(i)) = b%val(i) + end do + a%nnz = nza + end if + call a%set_host() + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_cp_dns_from_coo + + + +! +!> Function cp_to_coo: +!! \memberof psb_s_dns_sparse_mat +!! \brief Copy and convert to psb_s_coo_sparse_mat +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! + +subroutine psb_s_cp_dns_to_coo(a,b,info) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_cp_dns_to_coo + implicit none + + class(psb_s_dns_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_Ipk_) :: nza, nr, nc,i,j,k,err_act + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + + k = 0 + do i=1,a%get_nrows() + do j=1,a%get_ncols() + if (a%val(i,j) /= szero) then + k = k + 1 + b%ia(k) = i + b%ja(k) = j + b%val(k) = a%val(i,j) + end if + end do + end do + + call b%set_nzeros(nza) + call b%set_sort_status(psb_row_major_) + call b%set_asb() + call b%set_host() + +end subroutine psb_s_cp_dns_to_coo + + + +! +!> Function mv_to_coo: +!! \memberof psb_s_dns_sparse_mat +!! \brief Convert to psb_s_coo_sparse_mat, freeing the source. +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! +subroutine psb_s_mv_dns_to_coo(a,b,info) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_mv_dns_to_coo + implicit none + + class(psb_s_dns_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_to_coo(b,info) + call a%free() + return + +end subroutine psb_s_mv_dns_to_coo + + +! +!> Function mv_from_coo: +!! \memberof psb_s_dns_sparse_mat +!! \brief Convert from psb_s_coo_sparse_mat, freeing the source. +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! +! +subroutine psb_s_mv_dns_from_coo(a,b,info) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_mv_dns_from_coo + implicit none + + class(psb_s_dns_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_s_mv_dns_from_coo + diff --git a/ext/impl/psb_s_ell_aclsum.f90 b/ext/impl/psb_s_ell_aclsum.f90 new file mode 100644 index 00000000..2eea0cc9 --- /dev/null +++ b/ext/impl/psb_s_ell_aclsum.f90 @@ -0,0 +1,82 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_aclsum(d,a) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_aclsum + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + abs(a%val(i,j)) + end do + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_aclsum diff --git a/ext/impl/psb_s_ell_allocate_mnnz.f90 b/ext/impl/psb_s_ell_allocate_mnnz.f90 new file mode 100644 index 00000000..fd9f1b49 --- /dev/null +++ b/ext/impl/psb_s_ell_allocate_mnnz.f90 @@ -0,0 +1,91 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = -1 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_allocate_mnnz diff --git a/ext/impl/psb_s_ell_arwsum.f90 b/ext/impl/psb_s_ell_arwsum.f90 new file mode 100644 index 00000000..a47f8721 --- /dev/null +++ b/ext/impl/psb_s_ell_arwsum.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_arwsum(d,a) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_arwsum + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + is_unit = a%is_unit() + + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = sone + else + d(i) = szero + end if + do j=1,a%irn(i) + d(i) = d(i) + abs(a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_arwsum diff --git a/ext/impl/psb_s_ell_colsum.f90 b/ext/impl/psb_s_ell_colsum.f90 new file mode 100644 index 00000000..0924d8a9 --- /dev/null +++ b/ext/impl/psb_s_ell_colsum.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_colsum(d,a) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_colsum + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_colsum diff --git a/ext/impl/psb_s_ell_csgetblk.f90 b/ext/impl/psb_s_ell_csgetblk.f90 new file mode 100644 index 00000000..5468e93c --- /dev/null +++ b/ext/impl/psb_s_ell_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_csgetblk + implicit none + + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='ell_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_csgetblk diff --git a/ext/impl/psb_s_ell_csgetptn.f90 b/ext/impl/psb_s_ell_csgetptn.f90 new file mode 100644 index 00000000..07463757 --- /dev/null +++ b/ext/impl/psb_s_ell_csgetptn.f90 @@ -0,0 +1,189 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_csgetptn + implicit none + + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_s_ell_csgetrow + implicit none + + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_s_ell_csmm + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_spk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_ell_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_ell_csmv + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_ell_csnm1 + + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info + real(psb_spk_), allocatable :: vt(:) + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_ell_csnm1' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = szero + nnz = a%get_nzeros() + m = a%get_nrows() + n = a%get_ncols() + allocate(vt(n),stat=info) + if (info /= 0) return + if (a%is_unit()) then + vt(:) = sone + else + vt(:) = szero + end if + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + vt(k) = vt(k) + abs(a%val(i,j)) + end do + end do + res = maxval(vt(1:n)) + deallocate(vt,stat=info) + + return + +end function psb_s_ell_csnm1 diff --git a/ext/impl/psb_s_ell_csnmi.f90 b/ext/impl/psb_s_ell_csnmi.f90 new file mode 100644 index 00000000..1df9bafa --- /dev/null +++ b/ext/impl/psb_s_ell_csnmi.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_s_ell_csnmi(a) result(res) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_csnmi + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_spk_) :: acc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_csnmi' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = szero + is_unit = a%is_unit() + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + if (is_unit) acc = acc + sone + res = max(res,acc) + end do + +end function psb_s_ell_csnmi diff --git a/ext/impl/psb_s_ell_csput.f90 b/ext/impl/psb_s_ell_csput.f90 new file mode 100644 index 00000000..c0d69067 --- /dev/null +++ b/ext/impl/psb_s_ell_csput.f90 @@ -0,0 +1,208 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_csput_a + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_ell_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + call psb_s_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info < 0) then + info = psb_err_internal_error_ + else if (info > 0) then + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarded entries not belonging to us.' + info = psb_success_ + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + +contains + + subroutine psb_s_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ilr, ilc, ip, & + & i1,i2,nr,nc,nnz,dupl + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='s_ell_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + ilr = -1 + ilc = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + + end do + + case(psb_dupl_add_) + ! Add + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = a%val(ir,ip) + val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_s_ell_srch_upd +end subroutine psb_s_ell_csput_a diff --git a/ext/impl/psb_s_ell_cssm.f90 b/ext/impl/psb_s_ell_cssm.f90 new file mode 100644 index 00000000..ca50fa35 --- /dev/null +++ b/ext/impl/psb_s_ell_cssm.f90 @@ -0,0 +1,375 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_cssm + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_spk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_ell_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_s_ell_cssv + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + real(psb_spk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_ell_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_s_ell_get_diag + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = sone + else + do i=1, mnm + if (1<=a%idiag(i).and.(a%idiag(i)<=size(a%ja,2))) then + d(i) = a%val(i,a%idiag(i)) + else + d(i) = szero + end if + end do + end if + do i=mnm+1,size(d) + d(i) = szero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_get_diag diff --git a/ext/impl/psb_s_ell_maxval.f90 b/ext/impl/psb_s_ell_maxval.f90 new file mode 100644 index 00000000..6e2635b8 --- /dev/null +++ b/ext/impl/psb_s_ell_maxval.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_s_ell_maxval(a) result(res) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_maxval + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_spk_) :: acc + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_csnmi' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = sone + else + res = szero + end if + + do i = 1, a%get_nrows() + acc = maxval(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_s_ell_maxval diff --git a/ext/impl/psb_s_ell_mold.f90 b/ext/impl/psb_s_ell_mold.f90 new file mode 100644 index 00000000..4d137112 --- /dev/null +++ b/ext/impl/psb_s_ell_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_mold(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_mold + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='ell_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_mold diff --git a/ext/impl/psb_s_ell_print.f90 b/ext/impl/psb_s_ell_print.f90 new file mode 100644 index 00000000..aec15a6d --- /dev/null +++ b/ext/impl/psb_s_ell_print.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_ell_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% ELL' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_s_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(i,j)),a%val(i,j) + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(i,j)),a%val(i,j) + end do + enddo + endif + endif + +end subroutine psb_s_ell_print diff --git a/ext/impl/psb_s_ell_reallocate_nz.f90 b/ext/impl/psb_s_ell_reallocate_nz.f90 new file mode 100644 index 00000000..ff7dabda --- /dev/null +++ b/ext/impl/psb_s_ell_reallocate_nz.f90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_reallocate_nz(nz,a) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_reallocate_nz diff --git a/ext/impl/psb_s_ell_reinit.f90 b/ext/impl/psb_s_ell_reinit.f90 new file mode 100644 index 00000000..088e8398 --- /dev/null +++ b/ext/impl/psb_s_ell_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_reinit(a,clear) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_reinit + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:,:) = szero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_reinit diff --git a/ext/impl/psb_s_ell_rowsum.f90 b/ext/impl/psb_s_ell_rowsum.f90 new file mode 100644 index 00000000..092329c9 --- /dev/null +++ b/ext/impl/psb_s_ell_rowsum.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_rowsum(d,a) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_rowsum + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical :: is_unit + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + is_unit = a%is_unit() + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = sone + else + d(i) = szero + end if + do j=1,a%irn(i) + d(i) = d(i) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_rowsum diff --git a/ext/impl/psb_s_ell_scal.f90 b/ext/impl/psb_s_ell_scal.f90 new file mode 100644 index 00000000..7f39f63d --- /dev/null +++ b/ext/impl/psb_s_ell_scal.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_scal(d,a,info,side) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_scal + implicit none + class(psb_s_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5) + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%val(i,:) = a%val(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + do i=1, m + do j=1, a%irn(i) + a%val(i,j) = a%val(i,j) * d(a%ja(i,j)) + end do + enddo + + end if + + call a%set_host() + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_scal diff --git a/ext/impl/psb_s_ell_scals.f90 b/ext/impl/psb_s_ell_scals.f90 new file mode 100644 index 00000000..4bc77626 --- /dev/null +++ b/ext/impl/psb_s_ell_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_scals(d,a,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_scals + implicit none + class(psb_s_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_scals diff --git a/ext/impl/psb_s_ell_trim.f90 b/ext/impl/psb_s_ell_trim.f90 new file mode 100644 index 00000000..758a8bb5 --- /dev/null +++ b/ext/impl/psb_s_ell_trim.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_trim(a) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_trim + implicit none + class(psb_s_ell_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_trim diff --git a/ext/impl/psb_s_hdia_allocate_mnnz.f90 b/ext/impl/psb_s_hdia_allocate_mnnz.f90 new file mode 100644 index 00000000..2c4e16fc --- /dev/null +++ b/ext/impl/psb_s_hdia_allocate_mnnz.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_hdia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_hdia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hdia_allocate_mnnz diff --git a/ext/impl/psb_s_hdia_csmv.f90 b/ext/impl/psb_s_hdia_csmv.f90 new file mode 100644 index 00000000..d945f964 --- /dev/null +++ b/ext/impl/psb_s_hdia_csmv.f90 @@ -0,0 +1,162 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_hdia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_hdia_csmv + implicit none + class(psb_s_hdia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc,nr,nc + integer(psb_ipk_) :: irs,ics, nmx, ni + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, ncd,ib, nzhack, & + & hackfirst, hacknext + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hdia_csmv' + logical, parameter :: debug=.false. + real :: start, finish + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + info = psb_err_transpose_not_n_unsupported_ + call psb_errpush(info,name) + goto 9999 + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1)=0) then + ir1 = 1 + ! min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + jc = ir1 + rdisp + offsets(j) + do i=ir1,ir2 + y(rdisp+i) = y(rdisp+i) + alpha*data(i,j)*x(jc) + jc = jc + 1 + enddo + end do + end subroutine psi_s_inner_dia_csmv + +end subroutine psb_s_hdia_csmv diff --git a/ext/impl/psb_s_hdia_mold.f90 b/ext/impl/psb_s_hdia_mold.f90 new file mode 100644 index 00000000..a62630c0 --- /dev/null +++ b/ext/impl/psb_s_hdia_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hdia_mold(a,b,info) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_hdia_mold + implicit none + class(psb_s_hdia_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hdia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_hdia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hdia_mold diff --git a/ext/impl/psb_s_hdia_print.f90 b/ext/impl/psb_s_hdia_print.f90 new file mode 100644 index 00000000..f4b927bc --- /dev/null +++ b/ext/impl/psb_s_hdia_print.f90 @@ -0,0 +1,121 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_hdia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_hdia_print + use psi_ext_util_mod + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_s_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdia_print' + logical, parameter :: debug=.false. + + class(psb_s_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, k, ncd,ib, nzhack, info,& + & hackfirst, hacknext + integer(psb_ipk_), allocatable :: ia(:), ja(:) + real(psb_spk_), allocatable :: val(:) + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% HDIA' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_s_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + + nhacks = a%nhacks + hacksize = a%hacksize + maxnzhack = 0 + do k=1, nhacks + maxnzhack = max(maxnzhack,(a%hackoffsets(k+1)-a%hackoffsets(k))) + end do + maxnzhack = hacksize*maxnzhack + allocate(ia(maxnzhack),ja(maxnzhack),val(maxnzhack),stat=info) + if (info /= 0) return + + write(iout,*) nr, nc, nz + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + ncd = hacknext-hackfirst + + call psi_s_xtr_coo_from_dia(nr,nc,& + & ia, ja, val, nzhack,& + & hacksize,ncd,& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) + !nzhack = sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + + if(present(iv)) then + do j=1,nzhack + write(iout,frmt) iv(ia(j)),iv(ja(j)),val(j) + enddo + else + if (present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ja(j),val(j) + enddo + else if (present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ja(j),val(j) + enddo + endif + end if + + end do + +end subroutine psb_s_hdia_print diff --git a/ext/impl/psb_s_hll_aclsum.f90 b/ext/impl/psb_s_hll_aclsum.f90 new file mode 100644 index 00000000..cf75dfb2 --- /dev/null +++ b/ext/impl/psb_s_hll_aclsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_aclsum(d,a) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_aclsum + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call s_hll_aclsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine s_hll_aclsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine s_hll_aclsum + +end subroutine psb_s_hll_aclsum diff --git a/ext/impl/psb_s_hll_allocate_mnnz.f90 b/ext/impl/psb_s_hll_allocate_mnnz.f90 new file mode 100644 index 00000000..549eccb4 --- /dev/null +++ b/ext/impl/psb_s_hll_allocate_mnnz.f90 @@ -0,0 +1,93 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m+1,a%hkoffs,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_hksz(psb_hksz_def_) + call a%set_host() + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hll_allocate_mnnz diff --git a/ext/impl/psb_s_hll_arwsum.f90 b/ext/impl/psb_s_hll_arwsum.f90 new file mode 100644 index 00000000..b93efb12 --- /dev/null +++ b/ext/impl/psb_s_hll_arwsum.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_arwsum(d,a) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_arwsum + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call s_hll_arwsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine s_hll_arwsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + abs(val(i,j)) + end do + end do + + end subroutine s_hll_arwsum + +end subroutine psb_s_hll_arwsum diff --git a/ext/impl/psb_s_hll_colsum.f90 b/ext/impl/psb_s_hll_colsum.f90 new file mode 100644 index 00000000..02cceac2 --- /dev/null +++ b/ext/impl/psb_s_hll_colsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_colsum(d,a) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_colsum + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call s_hll_colsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine s_hll_colsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine s_hll_colsum + +end subroutine psb_s_hll_colsum diff --git a/ext/impl/psb_s_hll_csgetblk.f90 b/ext/impl/psb_s_hll_csgetblk.f90 new file mode 100644 index 00000000..c925e3a2 --- /dev/null +++ b/ext/impl/psb_s_hll_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_csgetblk + implicit none + + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='hll_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hll_csgetblk diff --git a/ext/impl/psb_s_hll_csgetptn.f90 b/ext/impl/psb_s_hll_csgetptn.f90 new file mode 100644 index 00000000..ccb1b6a1 --- /dev/null +++ b/ext/impl/psb_s_hll_csgetptn.f90 @@ -0,0 +1,209 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_csgetptn + implicit none + + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_s_hll_csgetrow + implicit none + + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_s_hll_csmm + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy,ldx,ldy,hksz,mxrwl + real(psb_spk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hll_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + nxy = min(size(x,2) , size(y,2) ) + + + ldx = size(x,1) + ldy = size(y,1) + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (ldx psb_s_hll_csmv + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hkpnt, mxrwl, mmhk + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_hll_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (size(x,1) 0) then + select case(hksz) + case(4) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_s_hll_csmv_notra_4(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(8) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + &call psb_s_hll_csmv_notra_8(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(16) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_s_hll_csmv_notra_16(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(24) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_s_hll_csmv_notra_24(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(32) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_s_hll_csmv_notra_32(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case default + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_s_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + end select + end if + if (mmhk < m) then + i = mmhk+1 + ir = m-mmhk + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + call psb_s_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + if (info /= psb_success_) goto 9999 + end if + j = j + 1 + end if + + else + + j=1 + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,m,hksz + j = ((i-1)/hksz)+1 + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_s_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + end if + end if + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_s_hll_csmv_inner(ir,m,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,tra,ctra,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit,tra,ctra + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + if (tra) then + + if (beta == sone) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*val(i,j)*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (ctra) then + + if (beta == sone) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*(val(i,j))*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (.not.(tra.or.ctra)) then + + if (alpha == szero) then + if (beta == szero) then + do i=1,m + y(ir+i-1) = szero + end do + else + do i=1,m + y(ir+i-1) = beta*y(ir+i-1) + end do + end if + + else + if (beta == szero) then + do i=1,m + tmp = szero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + end do + else + do i=1,m + tmp = szero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + beta*y(ir+i-1) + end do + endif + end if + end if + + if (is_unit) then + do i=1, min(m,n) + y(i) = y(i) + alpha*x(i) + end do + end if + + end subroutine psb_s_hll_csmv_inner + + subroutine psb_s_hll_csmv_notra_8(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, szero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=8 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = szero + if (alpha /= szero) then + do j=1, maxval(irn(1:8)) + tmp(1:8) = tmp(1:8) + val(1:8,j)*x(ja(1:8,j)) + end do + end if + if (beta == szero) then + y(ir:ir+8-1) = alpha*tmp(1:8) + else + y(ir:ir+8-1) = alpha*tmp(1:8) + beta*y(ir:ir+8-1) + end if + + + if (is_unit) then + do i=1, min(8,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_s_hll_csmv_notra_8 + + subroutine psb_s_hll_csmv_notra_24(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, szero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=24 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = szero + if (alpha /= szero) then + do j=1, maxval(irn(1:24)) + tmp(1:24) = tmp(1:24) + val(1:24,j)*x(ja(1:24,j)) + end do + end if + if (beta == szero) then + y(ir:ir+24-1) = alpha*tmp(1:24) + else + y(ir:ir+24-1) = alpha*tmp(1:24) + beta*y(ir:ir+24-1) + end if + + + if (is_unit) then + do i=1, min(24,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_s_hll_csmv_notra_24 + + subroutine psb_s_hll_csmv_notra_16(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, szero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=16 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = szero + if (alpha /= szero) then + do j=1, maxval(irn(1:16)) + tmp(1:16) = tmp(1:16) + val(1:16,j)*x(ja(1:16,j)) + end do + end if + if (beta == szero) then + y(ir:ir+16-1) = alpha*tmp(1:16) + else + y(ir:ir+16-1) = alpha*tmp(1:16) + beta*y(ir:ir+16-1) + end if + + + if (is_unit) then + do i=1, min(16,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_s_hll_csmv_notra_16 + + subroutine psb_s_hll_csmv_notra_32(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, szero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=32 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = szero + if (alpha /= szero) then + do j=1, maxval(irn(1:32)) + tmp(1:32) = tmp(1:32) + val(1:32,j)*x(ja(1:32,j)) + end do + end if + if (beta == szero) then + y(ir:ir+32-1) = alpha*tmp(1:32) + else + y(ir:ir+32-1) = alpha*tmp(1:32) + beta*y(ir:ir+32-1) + end if + + + if (is_unit) then + do i=1, min(32,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_s_hll_csmv_notra_32 + + subroutine psb_s_hll_csmv_notra_4(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, szero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=4 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = szero + if (alpha /= szero) then + do j=1, maxval(irn(1:4)) + tmp(1:4) = tmp(1:4) + val(1:4,j)*x(ja(1:4,j)) + end do + end if + if (beta == szero) then + y(ir:ir+4-1) = alpha*tmp(1:4) + else + y(ir:ir+4-1) = alpha*tmp(1:4) + beta*y(ir:ir+4-1) + end if + + + if (is_unit) then + do i=1, min(4,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_s_hll_csmv_notra_4 + +end subroutine psb_s_hll_csmv diff --git a/ext/impl/psb_s_hll_csnm1.f90 b/ext/impl/psb_s_hll_csnm1.f90 new file mode 100644 index 00000000..6e745081 --- /dev/null +++ b/ext/impl/psb_s_hll_csnm1.f90 @@ -0,0 +1,111 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_s_hll_csnm1(a) result(res) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_csnm1 + + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info, hksz, mxrwl + real(psb_spk_), allocatable :: vt(:) + logical :: is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hll_csnm1' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + res = szero + if (a%is_dev()) call a%sync() + n = a%get_ncols() + m = a%get_nrows() + allocate(vt(n),stat=info) + if (Info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + if (a%is_unit()) then + vt = sone + else + vt = szero + end if + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_s_hll_csnm1_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & vt,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + res = maxval(vt) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_s_hll_csnm1_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & vt,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: vt(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + vt(jc) = vt(jc) + abs(val(i,j)) + end do + end do + end subroutine psb_s_hll_csnm1_inner + +end function psb_s_hll_csnm1 diff --git a/ext/impl/psb_s_hll_csnmi.f90 b/ext/impl/psb_s_hll_csnmi.f90 new file mode 100644 index 00000000..3be15f9b --- /dev/null +++ b/ext/impl/psb_s_hll_csnmi.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_s_hll_csnmi(a) result(res) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_csnmi + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc, hksz, mxrwl, info + Integer(Psb_ipk_) :: err_act + logical :: is_unit + character(len=20) :: name='s_csnmi' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + info = 0 + res = szero + if (a%is_dev()) call a%sync() + + n = a%get_ncols() + m = a%get_nrows() + is_unit = a%is_unit() + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_s_hll_csnmi_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & res,is_unit,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_s_hll_csnmi_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & res,is_unit,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: res + logical :: is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: tmp, acc + + info = psb_success_ + if (is_unit) then + tmp = sone + else + tmp = szero + end if + do i=1,m + acc = tmp + do j=1, irn(i) + acc = acc + abs(val(i,j)) + end do + res = max(acc,res) + end do + end subroutine psb_s_hll_csnmi_inner + +end function psb_s_hll_csnmi diff --git a/ext/impl/psb_s_hll_csput.f90 b/ext/impl/psb_s_hll_csput.f90 new file mode 100644 index 00000000..b12678d4 --- /dev/null +++ b/ext/impl/psb_s_hll_csput.f90 @@ -0,0 +1,233 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_csput_a + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hll_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5) + + + call psb_erractionsave(err_act) + info = psb_success_ + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + + call psb_s_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info /= psb_success_) then + + info = psb_err_invalid_mat_state_ + end if + call a%set_host() + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_s_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ip, i1,i2,nr,nc,nnz,dupl,ng,& + & hksz, hk, hkzpnt, ihkr, mxrwl, lastrow + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='s_hll_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + lastrow = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + hksz = a%get_hksz() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + + end do + + case(psb_dupl_add_) + ! Add + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_s_hll_srch_upd + +end subroutine psb_s_hll_csput_a diff --git a/ext/impl/psb_s_hll_cssm.f90 b/ext/impl/psb_s_hll_cssm.f90 new file mode 100644 index 00000000..30c77c8f --- /dev/null +++ b/ext/impl/psb_s_hll_cssm.f90 @@ -0,0 +1,506 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_cssm + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ldx, ldy, hksz, nxy, mk, mxrwl + real(psb_spk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_hll_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + info = psb_err_missing_override_method_ + call psb_errpush(info,name) + goto 9999 + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + hksz = a%get_hksz() + + if (.not. (a%is_triangle())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + ldx = size(x,1) + ldy = size(y,1) + if (ldx psb_s_hll_cssv + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hk, mxrwl, noffs, kc, mk + real(psb_spk_) :: acc + real(psb_spk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hll_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x) psb_s_hll_get_diag + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k, ke, hksz, ld,ir, mxrwl + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + ld = size(d) + if (ld< mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,ld/)) + goto 9999 + end if + + if (a%is_triangle().and.a%is_unit()) then + d(1:mnm) = sone + else + + hksz = a%get_hksz() + j=1 + do i=1,mnm,hksz + ir = min(hksz,mnm-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + ke = a%hkoffs(j+1) + call psb_s_hll_get_diag_inner(ir,a%irn(i:i+ir-1),& + & a%ja(k:ke),hksz,a%val(k:ke),hksz,& + & a%idiag(i:i+ir-1),d(i:i+ir-1),info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + end if + + do i=mnm+1,size(d) + d(i) = szero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_s_hll_get_diag_inner(m,irn,ja,ldj,val,ldv,& + & idiag,d,info) + integer(psb_ipk_), intent(in) :: m,ldj,ldv,ja(ldj,*),irn(*), idiag(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + do i=1,m + if (idiag(i) /= 0) then + d(i) = val(i,idiag(i)) + else + d(i) = szero + end if + end do + + end subroutine psb_s_hll_get_diag_inner + +end subroutine psb_s_hll_get_diag diff --git a/ext/impl/psb_s_hll_maxval.f90 b/ext/impl/psb_s_hll_maxval.f90 new file mode 100644 index 00000000..84625328 --- /dev/null +++ b/ext/impl/psb_s_hll_maxval.f90 @@ -0,0 +1,45 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_s_hll_maxval(a) result(res) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_maxval + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + if (a%is_dev()) call a%sync() + res = maxval(abs(a%val(:))) + if (a%is_unit()) res = max(res,sone) + +end function psb_s_hll_maxval diff --git a/ext/impl/psb_s_hll_mold.f90 b/ext/impl/psb_s_hll_mold.f90 new file mode 100644 index 00000000..eb04ccd9 --- /dev/null +++ b/ext/impl/psb_s_hll_mold.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_mold(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_mold + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hll_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hll_mold diff --git a/ext/impl/psb_s_hll_print.f90 b/ext/impl/psb_s_hll_print.f90 new file mode 100644 index 00000000..fb6bb38d --- /dev/null +++ b/ext/impl/psb_s_hll_print.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_hll_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, k, hksz, hk, mxrwl,ir, ix + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_s_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + hksz = a%get_hksz() + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + endif + endif + +end subroutine psb_s_hll_print diff --git a/ext/impl/psb_s_hll_reallocate_nz.f90 b/ext/impl/psb_s_hll_reallocate_nz.f90 new file mode 100644 index 00000000..f7a3076f --- /dev/null +++ b/ext/impl/psb_s_hll_reallocate_nz.f90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_reallocate_nz(nz,a) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,nz_ + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_hll_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + nz_ = max(nz,ione) + call psb_realloc(nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(nz_,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hll_reallocate_nz diff --git a/ext/impl/psb_s_hll_reinit.f90 b/ext/impl/psb_s_hll_reinit.f90 new file mode 100644 index 00000000..170abe08 --- /dev/null +++ b/ext/impl/psb_s_hll_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_reinit(a,clear) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_reinit + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:) = szero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hll_reinit diff --git a/ext/impl/psb_s_hll_rowsum.f90 b/ext/impl/psb_s_hll_rowsum.f90 new file mode 100644 index 00000000..c7484698 --- /dev/null +++ b/ext/impl/psb_s_hll_rowsum.f90 @@ -0,0 +1,110 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_rowsum(d,a) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_rowsum + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + + if (a%is_unit()) then + d = sone + else + d = szero + end if + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call s_hll_rowsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine s_hll_rowsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + (val(i,j)) + end do + end do + + end subroutine s_hll_rowsum + +end subroutine psb_s_hll_rowsum diff --git a/ext/impl/psb_s_hll_scal.f90 b/ext/impl/psb_s_hll_scal.f90 new file mode 100644 index 00000000..c8f3ddd5 --- /dev/null +++ b/ext/impl/psb_s_hll_scal.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_scal(d,a,info,side) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_scal + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), ld, k, mxrwl, hksz, ir + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + info = psb_err_missing_override_method_ + call psb_errpush(info,name,i_err=ierr) + goto 9999 + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + ld = size(d) + if (left) then + m = a%get_nrows() + if (ld < m) then + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + else + n = a%get_ncols() + if (ld < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_s_hll_scal_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & left,d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_s_hll_scal_inner(ir,m,n,irn,ja,ldj,val,ldv,left,d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: d(*) + real(psb_spk_), intent(inout) :: val(ldv,*) + logical, intent(in) :: left + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + if (left) then + do i=1,m + do j=1, irn(i) + val(i,j) = val(i,j)*d(ir+i-1) + end do + end do + else + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + val(i,j) = val(i,j)*d(jc) + end do + end do + + end if + + end subroutine psb_s_hll_scal_inner + + +end subroutine psb_s_hll_scal diff --git a/ext/impl/psb_s_hll_scals.f90 b/ext/impl/psb_s_hll_scals.f90 new file mode 100644 index 00000000..8f823a20 --- /dev/null +++ b/ext/impl/psb_s_hll_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_scals(d,a,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_scals + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:) = a%val(:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hll_scals diff --git a/ext/impl/psb_s_mv_dia_from_coo.f90 b/ext/impl/psb_s_mv_dia_from_coo.f90 new file mode 100644 index 00000000..d7dcfc1b --- /dev/null +++ b/ext/impl/psb_s_mv_dia_from_coo.f90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_mv_dia_from_coo + implicit none + + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_mv_dia_from_coo diff --git a/ext/impl/psb_s_mv_dia_to_coo.f90 b/ext/impl/psb_s_mv_dia_to_coo.f90 new file mode 100644 index 00000000..c0944b21 --- /dev/null +++ b/ext/impl/psb_s_mv_dia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_mv_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_mv_dia_to_coo + implicit none + + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_s_mv_dia_to_coo diff --git a/ext/impl/psb_s_mv_ell_from_coo.f90 b/ext/impl/psb_s_mv_ell_from_coo.f90 new file mode 100644 index 00000000..90965e41 --- /dev/null +++ b/ext/impl/psb_s_mv_ell_from_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_mv_ell_from_coo + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, ir, ic + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_s_mv_ell_from_coo diff --git a/ext/impl/psb_s_mv_ell_from_fmt.f90 b/ext/impl/psb_s_mv_ell_from_fmt.f90 new file mode 100644 index 00000000..03ebf8e4 --- /dev/null +++ b/ext/impl/psb_s_mv_ell_from_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_mv_ell_from_fmt + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_s_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_mv_ell_from_fmt diff --git a/ext/impl/psb_s_mv_ell_to_coo.f90 b/ext/impl/psb_s_mv_ell_to_coo.f90 new file mode 100644 index 00000000..151cbeff --- /dev/null +++ b/ext/impl/psb_s_mv_ell_to_coo.f90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_mv_ell_to_coo + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + ! Taking a path slightly slower but with less memory footprint + deallocate(a%idiag) + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + + call psb_realloc(nza,b%ia,info) + if (info == 0) call psb_realloc(nza,b%ja,info) + if (info /= 0) goto 9999 + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + end do + end do + deallocate(a%ja, stat=info) + + if (info == 0) call psb_realloc(nza,b%val,info) + if (info /= 0) goto 9999 + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%val(k) = a%val(i,j) + end do + end do + call a%free() + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_s_mv_ell_to_coo diff --git a/ext/impl/psb_s_mv_ell_to_fmt.f90 b/ext/impl/psb_s_mv_ell_to_fmt.f90 new file mode 100644 index 00000000..66f33508 --- /dev/null +++ b/ext/impl/psb_s_mv_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_mv_ell_to_fmt + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_s_ell_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_mv_ell_to_fmt diff --git a/ext/impl/psb_s_mv_hdia_from_coo.f90 b/ext/impl/psb_s_mv_hdia_from_coo.f90 new file mode 100644 index 00000000..88765079 --- /dev/null +++ b/ext/impl/psb_s_mv_hdia_from_coo.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_mv_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_mv_hdia_from_coo + implicit none + + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.(b%is_by_rows())) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_mv_hdia_from_coo diff --git a/ext/impl/psb_s_mv_hdia_to_coo.f90 b/ext/impl/psb_s_mv_hdia_to_coo.f90 new file mode 100644 index 00000000..56399b2a --- /dev/null +++ b/ext/impl/psb_s_mv_hdia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_mv_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_mv_hdia_to_coo + implicit none + + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_s_mv_hdia_to_coo diff --git a/ext/impl/psb_s_mv_hll_from_coo.f90 b/ext/impl/psb_s_mv_hll_from_coo.f90 new file mode 100644 index 00000000..c8e46086 --- /dev/null +++ b/ext/impl/psb_s_mv_hll_from_coo.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_mv_hll_from_coo + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: hksz + info = psb_success_ + if (.not.b%is_by_rows()) call b%fix(info) + hksz = psi_get_hksz() + call psi_convert_hll_from_coo(a,hksz,b,info) + if (info /= 0) goto 9999 + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_mv_hll_from_coo diff --git a/ext/impl/psb_s_mv_hll_from_fmt.f90 b/ext/impl/psb_s_mv_hll_from_fmt.f90 new file mode 100644 index 00000000..19bda0a6 --- /dev/null +++ b/ext/impl/psb_s_mv_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_mv_hll_from_fmt + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_s_hll_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%hkoffs, a%hkoffs) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + a%hksz = b%hksz + a%nzt = b%nzt + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_mv_hll_from_fmt diff --git a/ext/impl/psb_s_mv_hll_to_coo.f90 b/ext/impl/psb_s_mv_hll_to_coo.f90 new file mode 100644 index 00000000..d36286a5 --- /dev/null +++ b/ext/impl/psb_s_mv_hll_to_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_mv_hll_to_coo + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%cp_to_coo(b,info) + + if (info /= psb_success_) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_s_mv_hll_to_coo diff --git a/ext/impl/psb_s_mv_hll_to_fmt.f90 b/ext/impl/psb_s_mv_hll_to_fmt.f90 new file mode 100644 index 00000000..17618f69 --- /dev/null +++ b/ext/impl/psb_s_mv_hll_to_fmt.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_mv_hll_to_fmt + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_s_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%hkoffs, b%hkoffs) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + b%hksz = a%hksz + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_mv_hll_to_fmt diff --git a/ext/impl/psb_z_cp_dia_from_coo.f90 b/ext/impl/psb_z_cp_dia_from_coo.f90 new file mode 100644 index 00000000..e87bfb34 --- /dev/null +++ b/ext/impl/psb_z_cp_dia_from_coo.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cp_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_cp_dia_from_coo + implicit none + + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_convert_dia_from_coo(a,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_dia_from_coo(a,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cp_dia_from_coo diff --git a/ext/impl/psb_z_cp_dia_to_coo.f90 b/ext/impl/psb_z_cp_dia_to_coo.f90 new file mode 100644 index 00000000..26fac30b --- /dev/null +++ b/ext/impl/psb_z_cp_dia_to_coo.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_cp_dia_to_coo + implicit none + + class(psb_z_dia_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: i, j, k,nr,nza,nc, nzd + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + + call psi_z_xtr_coo_from_dia(nr,nc,& + & b%ia, b%ja, b%val, nzd, & + & size(a%data,1),size(a%data,2),& + & a%data,a%offset,info) + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +end subroutine psb_z_cp_dia_to_coo diff --git a/ext/impl/psb_z_cp_ell_from_coo.f90 b/ext/impl/psb_z_cp_ell_from_coo.f90 new file mode 100644 index 00000000..7559621d --- /dev/null +++ b/ext/impl/psb_z_cp_ell_from_coo.f90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_cp_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc + integer(psb_ipk_) :: nzm, ir, ic, k + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + ! This is to have fix_coo called behind the scenes + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_z_convert_ell_from_coo(a,b,info) + else + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call psi_z_convert_ell_from_coo(a,tmp,info) + if (info == psb_success_) call tmp%free() + end if + if (info /= psb_success_) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + +end subroutine psb_z_cp_ell_from_coo diff --git a/ext/impl/psb_z_cp_ell_from_fmt.f90 b/ext/impl/psb_z_cp_ell_from_fmt.f90 new file mode 100644 index 00000000..6d63b64e --- /dev/null +++ b/ext/impl/psb_z_cp_ell_from_fmt.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_cp_ell_from_fmt + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + + type is (psb_z_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_z_cp_ell_from_fmt diff --git a/ext/impl/psb_z_cp_ell_to_coo.f90 b/ext/impl/psb_z_cp_ell_to_coo.f90 new file mode 100644 index 00000000..38a1696b --- /dev/null +++ b/ext/impl/psb_z_cp_ell_to_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_cp_ell_to_coo + implicit none + + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: i, j, k, nr, nc, nza + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + b%val(k) = a%val(i,j) + end do + end do + call b%set_nzeros(a%get_nzeros()) + call b%fix(info) + call b%set_host() + +end subroutine psb_z_cp_ell_to_coo diff --git a/ext/impl/psb_z_cp_ell_to_fmt.f90 b/ext/impl/psb_z_cp_ell_to_fmt.f90 new file mode 100644 index 00000000..7fb64a90 --- /dev/null +++ b/ext/impl/psb_z_cp_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_cp_ell_to_fmt + implicit none + + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_z_ell_sparse_mat) + if (a%is_dev()) call a%sync() + + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_cp_ell_to_fmt diff --git a/ext/impl/psb_z_cp_hdia_from_coo.f90 b/ext/impl/psb_z_cp_hdia_from_coo.f90 new file mode 100644 index 00000000..ed77914e --- /dev/null +++ b/ext/impl/psb_z_cp_hdia_from_coo.f90 @@ -0,0 +1,222 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_cp_hdia_from_coo + implicit none + + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call inner_cp_hdia_from_coo(a,b,info) + if (info /= psb_success_) goto 9999 + else + call b%cp_to_coo(tmp,info) + if (info /= psb_success_) goto 9999 + if (.not.tmp%is_by_rows()) call tmp%fix(info) + if (info /= psb_success_) goto 9999 + call inner_cp_hdia_from_coo(a,tmp,info) + if (info /= psb_success_) goto 9999 + call tmp%free() + end if + call a%set_host() + + return + +9999 continue + + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine inner_cp_hdia_from_coo(a,tmp,info) + use psb_base_mod + use psi_ext_util_mod + + implicit none + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,mi,mj,dm,bi,w + integer(psb_ipk_),allocatable :: d(:), offset(:), irsz(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext, nzout + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + logical, parameter :: debug=.false. + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = tmp%psb_z_base_sparse_mat + + hacksize = a%hacksize + a%nhacks = (nr+hacksize-1)/hacksize + nhacks = a%nhacks + + ndiag = nr+nc-1 + if (info == psb_success_) call psb_realloc(nr,irsz,info) + if (info == psb_success_) call psb_realloc(ndiag,d,info) + if (info == psb_success_) call psb_realloc(ndiag,offset,info) + if (info == psb_success_) call psb_realloc(nhacks+1,a%hackoffsets,info) + if (info /= psb_success_) return + + irsz = 0 + do k=1,nza + ir = tmp%ia(k) + irsz(ir) = irsz(ir)+1 + end do + + a%nzeros = 0 + d = 0 + iszd = 0 + a%hackOffsets(1)=0 + klast1 = 1 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + if (debug) then + write(*,*) 'Loop iteration ',k,nhacks,i,ib,nr + write(*,*) 'RW:',tmp%ia(kfirst),tmp%ia(klast1-1) + write(*,*) 'CL:',tmp%ja(kfirst),tmp%ja(klast1-1) + end if + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, offset, info, initd=.false., cleard=.true.) + iszd = iszd + nd + a%hackOffsets(k+1)=iszd + if (debug) write(*,*) 'From chunk ',k,i,ib,sum(irsz(i:i+ib-1)),': ',nd, iszd + if (debug) write(*,*) 'offset ', offset(1:nd) + end do + if (debug) then + write(*,*) 'Hackcount ',nhacks,' Allocation height ',iszd + write(*,*) 'Hackoffsets ',a%hackOffsets(:) + end if + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%diaOffsets,info) + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%val,info) + if (info /= psb_success_) return + klast1 = 1 + ! + ! Second run: copy elements + ! + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, a%diaOffsets(hackfirst+1:hacknext), info, & + & initd=.false., cleard=.false.) + if (debug) write(*,*) 'Out from dia_offset: ', a%diaOffsets(hackfirst+1:hacknext) + call psi_z_xtr_dia_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & tmp%val(kfirst:klast1-1), & + & d,hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),info,& + & initdata=.true.,rdisp=(i-1)) + + call countnz(nr,nc,(i-1),hacksize,(hacknext-hackfirst),& + & a%diaOffsets(hackfirst+1:hacknext),nzout) + a%nzeros = a%nzeros + nzout + call cleand(nr,(hacknext-hackfirst),d,a%diaOffsets(hackfirst+1:hacknext)) + + end do + if (debug) then + write(*,*) 'NZEROS: ',a%nzeros, nza + write(*,*) 'diaoffsets: ',a%diaOffsets(1:iszd) + write(*,*) 'values: ' + j=0 + do k=1,nhacks + write(*,*) 'Hack No. ',k + do i=1,hacksize*(iszd/nhacks) + j = j + 1 + write(*,*) j, a%val(j) + end do + end do + end if + end subroutine inner_cp_hdia_from_coo + + subroutine countnz(nr,nc,rdisp,nrd,ncd,offsets,nz) + implicit none + integer(psb_ipk_), intent(in) :: nr,nc,nrd,ncd,rdisp,offsets(:) + integer(psb_ipk_), intent(out) :: nz + ! + integer(psb_ipk_) :: i,j,k, ir, jc, m4, ir1, ir2, nrcmdisp, rdisp1 + nz = 0 + nrcmdisp = min(nr-rdisp,nc-rdisp) + rdisp1 = 1-rdisp + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + nz = nz + (ir2-ir1+1) + end do + end subroutine countnz + + subroutine cleand(nr,nd,d,offset) + implicit none + integer(psb_ipk_), intent(in) :: nr,nd,offset(:) + integer(psb_ipk_), intent(inout) :: d(:) + integer(psb_ipk_) :: i,id + + do i=1,nd + id = offset(i) + nr + d(id) = 0 + end do + end subroutine cleand + +end subroutine psb_z_cp_hdia_from_coo diff --git a/ext/impl/psb_z_cp_hdia_to_coo.f90 b/ext/impl/psb_z_cp_hdia_to_coo.f90 new file mode 100644 index 00000000..c0544ff0 --- /dev/null +++ b/ext/impl/psb_z_cp_hdia_to_coo.f90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_cp_hdia_to_coo + use psi_ext_util_mod + implicit none + + class(psb_z_hdia_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + call b%set_nzeros(nza) + call b%set_sort_status(psb_unsorted_) + nhacks = a%nhacks + hacksize = a%hacksize + j = 0 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_z_xtr_coo_from_dia(nr,nc,& + & b%ia(j+1:), b%ja(j+1:), b%val(j+1:), nzd, & + & hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) +!!$ write(*,*) 'diaoffsets',ib,' : ',ib - abs(a%diaOffsets(hackfirst+1:hacknext)) +!!$ write(*,*) 'sum',ib,j,' : ',sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + j = j + nzd + end do + if (nza /= j) then + write(*,*) 'Wrong counts in hdia_to_coo',j,nza + info = -8 + return + end if + call b%set_host() + call b%fix(info) + +end subroutine psb_z_cp_hdia_to_coo diff --git a/ext/impl/psb_z_cp_hll_from_coo.f90 b/ext/impl/psb_z_cp_hll_from_coo.f90 new file mode 100644 index 00000000..15a8d1c2 --- /dev/null +++ b/ext/impl/psb_z_cp_hll_from_coo.f90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_cp_hll_from_coo + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + character(len=20) :: name='hll_from_coo' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + if (b%is_dev()) call b%sync() + hksz = psi_get_hksz() + if (b%is_by_rows()) then + call psi_convert_hll_from_coo(a,hksz,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_hll_from_coo(a,hksz,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cp_hll_from_coo diff --git a/ext/impl/psb_z_cp_hll_from_fmt.f90 b/ext/impl/psb_z_cp_hll_from_fmt.f90 new file mode 100644 index 00000000..3bdb2271 --- /dev/null +++ b/ext/impl/psb_z_cp_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_cp_hll_from_fmt + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + class is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_z_hll_sparse_mat) + ! write(0,*) 'From type_hll' + if (b%is_dev()) call b%sync() + + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%hkoffs, a%hkoffs, info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) a%hksz = b%hksz + if (info == 0) a%nzt = b%nzt + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_z_cp_hll_from_fmt diff --git a/ext/impl/psb_z_cp_hll_to_coo.f90 b/ext/impl/psb_z_cp_hll_to_coo.f90 new file mode 100644 index 00000000..409fe7b5 --- /dev/null +++ b/ext/impl/psb_z_cp_hll_to_coo.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_cp_hll_to_coo + implicit none + + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j, jj,k,ir, isz,err_act, hksz, hk, mxrwl,& + & irs, nzblk, kc + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + + j = 1 + kc = 1 + k = 1 + hksz = a%hksz + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + nzblk = sum(a%irn(i:i+ir-1)) + call inner_copy(i,ir,b%ia(kc:kc+nzblk-1),& + & b%ja(kc:kc+nzblk-1),b%val(kc:kc+nzblk-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & hksz) + k = k + isz + kc = kc + nzblk + + enddo + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +contains + + subroutine inner_copy(i,ir,iac,& + & jac,valc,ja,val,irn,ld) + integer(psb_ipk_) :: i,ir,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*) + complex(psb_dpk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc + kc = 1 + do ii = 1, ir + nc = irn(ii) + do jj=1,nc + iac(kc) = i+ii-1 + jac(kc) = ja(ii,jj) + valc(kc) = val(ii,jj) + kc = kc + 1 + end do + end do + + end subroutine inner_copy + +end subroutine psb_z_cp_hll_to_coo diff --git a/ext/impl/psb_z_cp_hll_to_fmt.f90 b/ext/impl/psb_z_cp_hll_to_fmt.f90 new file mode 100644 index 00000000..b0417c92 --- /dev/null +++ b/ext/impl/psb_z_cp_hll_to_fmt.f90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_cp_hll_to_fmt + implicit none + + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_z_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%hkoffs, b%hkoffs , info) + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + if (info == 0) b%hksz = a%hksz + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_cp_hll_to_fmt diff --git a/ext/impl/psb_z_dia_aclsum.f90 b/ext/impl/psb_z_dia_aclsum.f90 new file mode 100644 index 00000000..5aed7ff0 --- /dev/null +++ b/ext/impl/psb_z_dia_aclsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_aclsum(d,a) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_aclsum + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_aclsum diff --git a/ext/impl/psb_z_dia_allocate_mnnz.f90 b/ext/impl/psb_z_dia_allocate_mnnz.f90 new file mode 100644 index 00000000..e9c614f6 --- /dev/null +++ b/ext/impl/psb_z_dia_allocate_mnnz.f90 @@ -0,0 +1,88 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -ione )/m + else + nz_ = ((max(7*m,7*n,ione)+m-ione)/m) + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,nz_,a%data,info) + if (info == psb_success_) call psb_realloc(m+n,a%offset,info) + if (info == psb_success_) then + a%data = 0 + a%offset = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_allocate_mnnz diff --git a/ext/impl/psb_z_dia_arwsum.f90 b/ext/impl/psb_z_dia_arwsum.f90 new file mode 100644 index 00000000..42805349 --- /dev/null +++ b/ext/impl/psb_z_dia_arwsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_arwsum(d,a) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_arwsum + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_arwsum diff --git a/ext/impl/psb_z_dia_colsum.f90 b/ext/impl/psb_z_dia_colsum.f90 new file mode 100644 index 00000000..69919736 --- /dev/null +++ b/ext/impl/psb_z_dia_colsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_colsum(d,a) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_colsum + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = zone + else + d = zzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_colsum diff --git a/ext/impl/psb_z_dia_csgetptn.f90 b/ext/impl/psb_z_dia_csgetptn.f90 new file mode 100644 index 00000000..d63304f8 --- /dev/null +++ b/ext/impl/psb_z_dia_csgetptn.f90 @@ -0,0 +1,188 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_csgetptn + implicit none + + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + + + end subroutine dia_getptn + +end subroutine psb_z_dia_csgetptn diff --git a/ext/impl/psb_z_dia_csgetrow.f90 b/ext/impl/psb_z_dia_csgetrow.f90 new file mode 100644 index 00000000..6571264e --- /dev/null +++ b/ext/impl/psb_z_dia_csgetrow.f90 @@ -0,0 +1,199 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_csgetrow + implicit none + + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + if (chksz) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,val,info) + end if + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + val(nzin_) = a%data(i,j) + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + end subroutine dia_getrow +end subroutine psb_z_dia_csgetrow diff --git a/ext/impl/psb_z_dia_csmm.f90 b/ext/impl/psb_z_dia_csmm.f90 new file mode 100644 index 00000000..cbebd10e --- /dev/null +++ b/ext/impl/psb_z_dia_csmm.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_csmm + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_dia_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i,1:nxy) = y(i,1:nxy) + alpha*data(i,j)*x(i+off(j),1:nxy) + enddo + enddo + + end subroutine psb_z_dia_csmm_inner + +end subroutine psb_z_dia_csmm diff --git a/ext/impl/psb_z_dia_csmv.f90 b/ext/impl/psb_z_dia_csmv.f90 new file mode 100644 index 00000000..9d1f5a2a --- /dev/null +++ b/ext/impl/psb_z_dia_csmv.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_csmv + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_dia_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i) = y(i) + alpha*data(i,j)*x(i+off(j)) + enddo + enddo + + end subroutine psb_z_dia_csmv_inner + +end subroutine psb_z_dia_csmv diff --git a/ext/impl/psb_z_dia_get_diag.f90 b/ext/impl/psb_z_dia_get_diag.f90 new file mode 100644 index 00000000..9b403923 --- /dev/null +++ b/ext/impl/psb_z_dia_get_diag.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_get_diag(a,d,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_get_diag + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = zone + else + do i=1, size(a%offset) + if (a%offset(i) == 0) then + d(1:mnm) = a%data(1:mnm,i) + exit + end if + end do + end if + do i=mnm+1,size(d) + d(i) = zzero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_get_diag diff --git a/ext/impl/psb_z_dia_maxval.f90 b/ext/impl/psb_z_dia_maxval.f90 new file mode 100644 index 00000000..d3518c17 --- /dev/null +++ b/ext/impl/psb_z_dia_maxval.f90 @@ -0,0 +1,54 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +function psb_z_dia_maxval(a) result(res) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_maxval + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_maxval' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = done + else + res = dzero + end if + + res = max(res,maxval(abs(a%data))) + +end function psb_z_dia_maxval diff --git a/ext/impl/psb_z_dia_mold.f90 b/ext/impl/psb_z_dia_mold.f90 new file mode 100644 index 00000000..421af284 --- /dev/null +++ b/ext/impl/psb_z_dia_mold.f90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_mold(a,b,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_mold + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_dia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_mold diff --git a/ext/impl/psb_z_dia_print.f90 b/ext/impl/psb_z_dia_print.f90 new file mode 100644 index 00000000..1f7853ef --- /dev/null +++ b/ext/impl/psb_z_dia_print.f90 @@ -0,0 +1,148 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_dia_print' + logical, parameter :: debug=.false. + + class(psb_z_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, jc, ir1, ir2 + + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_z_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + write(iout,*) nr, nc, nz + + nc=size(a%data,2) + + + + if(present(iv)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) iv(i),iv(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),(i+jc),a%data(i,j) + enddo + enddo + + endif + +end subroutine psb_z_dia_print diff --git a/ext/impl/psb_z_dia_reallocate_nz.f90 b/ext/impl/psb_z_dia_reallocate_nz.f90 new file mode 100644 index 00000000..2d204a64 --- /dev/null +++ b/ext/impl/psb_z_dia_reallocate_nz.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_reallocate_nz(nz,a) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_dia_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! Ans: NOTHING. + ! + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_reallocate_nz diff --git a/ext/impl/psb_z_dia_reinit.f90 b/ext/impl/psb_z_dia_reinit.f90 new file mode 100644 index 00000000..0f58a9ed --- /dev/null +++ b/ext/impl/psb_z_dia_reinit.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_reinit(a,clear) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_reinit + implicit none + + class(psb_z_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%data(:,:) = zzero + call a%set_upd() + call a%set_host() + + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_reinit diff --git a/ext/impl/psb_z_dia_rowsum.f90 b/ext/impl/psb_z_dia_rowsum.f90 new file mode 100644 index 00000000..6918ada1 --- /dev/null +++ b/ext/impl/psb_z_dia_rowsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_rowsum(d,a) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_rowsum + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_rowsum diff --git a/ext/impl/psb_z_dia_scal.f90 b/ext/impl/psb_z_dia_scal.f90 new file mode 100644 index 00000000..65957e60 --- /dev/null +++ b/ext/impl/psb_z_dia_scal.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_scal(d,a,info,side) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_scal + implicit none + class(psb_z_dia_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), nc, jc, nr, ir1, ir2 + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%data(i,:) = a%data(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + nr=size(a%data,1) + nc=size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + a%data(i,j) = a%data(i,j) * d(i+jc) + enddo + enddo + + end if + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_scal diff --git a/ext/impl/psb_z_dia_scals.f90 b/ext/impl/psb_z_dia_scals.f90 new file mode 100644 index 00000000..895763d9 --- /dev/null +++ b/ext/impl/psb_z_dia_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_scals(d,a,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_scals + implicit none + class(psb_z_dia_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%data(:,:) = a%data(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_scals diff --git a/ext/impl/psb_z_dns_mat_impl.f90 b/ext/impl/psb_z_dns_mat_impl.f90 new file mode 100644 index 00000000..b249a3f2 --- /dev/null +++ b/ext/impl/psb_z_dns_mat_impl.f90 @@ -0,0 +1,724 @@ + +!> Function csmv: +!! \memberof psb_z_dns_sparse_mat +!! \brief Product by a dense rank 1 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_z_dns_csmv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_csmv + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act, m, n, lda + character(len=20) :: name='z_dns_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (trans_ == 'N') then + m=a%get_nrows() + n=a%get_ncols() + else + n=a%get_nrows() + m=a%get_ncols() + end if + lda = size(a%val,1) + + + call zgemv(trans_,a%get_nrows(),a%get_ncols(),alpha,& + & a%val,size(a%val,1),x,1,beta,y,1) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_csmv + + +!> Function csmm: +!! \memberof psb_z_dns_sparse_mat +!! \brief Product by a dense rank 2 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:,:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:,:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_z_dns_csmm(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_csmm + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act,m,n,k, lda, ldx, ldy + character(len=20) :: name='z_dns_csmm' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (psb_toupper(trans_)=='N') then + m = a%get_nrows() + k = a%get_ncols() + n = min(size(y,2),size(x,2)) + else + k = a%get_nrows() + m = a%get_ncols() + n = min(size(y,2),size(x,2)) + end if + lda = size(a%val,1) + ldx = size(x,1) + ldy = size(y,1) + call zgemm(trans_,'N',m,n,k,alpha,a%val,lda,x,ldx,beta,y,ldy) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_csmm + + + +! +! +!> Function csnmi: +!! \memberof psb_z_dns_sparse_mat +!! \brief Operator infinity norm +!! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) +!! +! +function psb_z_dns_csnmi(a) result(res) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_csnmi + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + ! + integer(psb_ipk_) :: i + real(psb_dpk_) :: acc + + res = dzero + if (a%is_dev()) call a%sync() + + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_z_dns_csnmi + + +! +!> Function get_diag: +!! \memberof psb_z_dns_sparse_mat +!! \brief Extract the diagonal of A. +!! +!! D(i) = A(i:i), i=1:min(nrows,ncols) +!! +!! \param d(:) The output diagonal +!! \param info return code. +! +subroutine psb_z_dns_get_diag(a,d,info) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_get_diag + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act, mnm, i + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + do i=1, mnm + d(i) = a%val(i,i) + end do + do i=mnm+1,size(d) + d(i) = zzero + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_get_diag + + +! +! +!> Function reallocate_nz +!! \memberof psb_z_dns_sparse_mat +!! \brief One--parameters version of (re)allocate +!! +!! \param nz number of nonzeros to allocate for +!! i.e. makes sure that the internal storage +!! allows for NZ coefficients and their indices. +! +subroutine psb_z_dns_reallocate_nz(nz,a) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_dns_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! This is a no-op, allocation is fixed. + ! + if (a%is_dev()) call a%sync() + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_reallocate_nz + +! +!> Function mold: +!! \memberof psb_z_dns_sparse_mat +!! \brief Allocate a class(psb_z_dns_sparse_mat) with the +!! same dynamic type as the input. +!! This is equivalent to allocate( mold= ) and is provided +!! for those compilers not yet supporting mold. +!! \param b The output variable +!! \param info return code +! +subroutine psb_z_dns_mold(a,b,info) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_mold + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='dns_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + allocate(psb_z_dns_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_mold + +! +! +!> Function allocate_mnnz +!! \memberof psb_z_dns_sparse_mat +!! \brief Three-parameters version of allocate +!! +!! \param m number of rows +!! \param n number of cols +!! \param nz [estimated internally] number of nonzeros to allocate for +! +subroutine psb_z_dns_allocate_mnnz(m,n,a,nz) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + ! + integer(psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_/)) + goto 9999 + endif + + + ! Basic stuff common to all formats + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_bld() + call a%set_host() + + ! We ignore NZ in this case. + + call psb_realloc(m,n,a%val,info) + if (info == psb_success_) then + a%val = zzero + a%nnz = 0 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_allocate_mnnz + + +! +! +! +!> Function csgetrow: +!! \memberof psb_z_dns_sparse_mat +!! \brief Get a (subset of) row(s) +!! +!! getrow is the basic method by which the other (getblk, clip) can +!! be implemented. +!! +!! Returns the set +!! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) +!! each identifying the position of a nonzero in A +!! i.e. +!! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) +!! with IMIN<=IA(:)<=IMAX +!! with JMIN<=JA(:)<=JMAX +!! IA,JA are reallocated as necessary. +!! +!! \param imin the minimum row index we are interested in +!! \param imax the minimum row index we are interested in +!! \param nz the number of output coefficients +!! \param ia(:) the output row indices +!! \param ja(:) the output col indices +!! \param val(:) the output coefficients +!! \param info return code +!! \param jmin [1] minimum col index +!! \param jmax [a\%get_ncols()] maximum col index +!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) +!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] +!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] +!! ( iren cannot be specified with rscale/cscale) +!! \param append [false] append to ia,ja +!! \param nzin [none] if append, then first new entry should go in entry nzin+1 +!! +! +subroutine psb_z_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_csgetrow + implicit none + + class(psb_z_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + ! + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i,j,k + character(len=20) :: name='csget' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax Function trim +!! \memberof psb_z_dns_sparse_mat +!! \brief Memory trim +!! Make sure the memory allocation of the sparse matrix is as tight as +!! possible given the actual number of nonzeros it contains. +! +subroutine psb_z_dns_trim(a) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_trim + implicit none + class(psb_z_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + ! Do nothing, we are already at minimum memory. + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_trim + +! +!> Function cp_from_coo: +!! \memberof psb_z_dns_sparse_mat +!! \brief Copy and convert from psb_z_coo_sparse_mat +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! + +subroutine psb_z_cp_dns_from_coo(a,b,info) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_cp_dns_from_coo + implicit none + + class(psb_z_dns_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + ! + type(psb_z_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: nza, nr, i,err_act, nc + integer(psb_ipk_), parameter :: maxtry=8 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (.not.b%is_by_rows()) then + ! This is to have fix_coo called behind the scenes + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + if (info /= psb_success_) return + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = tmp%psb_z_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = zzero + do i=1, nza + a%val(tmp%ia(i),tmp%ja(i)) = tmp%val(i) + end do + a%nnz = nza + call tmp%free() + else + if (b%is_dev()) call b%sync() + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = zzero + do i=1, nza + a%val(b%ia(i),b%ja(i)) = b%val(i) + end do + a%nnz = nza + end if + call a%set_host() + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_cp_dns_from_coo + + + +! +!> Function cp_to_coo: +!! \memberof psb_z_dns_sparse_mat +!! \brief Copy and convert to psb_z_coo_sparse_mat +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! + +subroutine psb_z_cp_dns_to_coo(a,b,info) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_cp_dns_to_coo + implicit none + + class(psb_z_dns_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_Ipk_) :: nza, nr, nc,i,j,k,err_act + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + + k = 0 + do i=1,a%get_nrows() + do j=1,a%get_ncols() + if (a%val(i,j) /= zzero) then + k = k + 1 + b%ia(k) = i + b%ja(k) = j + b%val(k) = a%val(i,j) + end if + end do + end do + + call b%set_nzeros(nza) + call b%set_sort_status(psb_row_major_) + call b%set_asb() + call b%set_host() + +end subroutine psb_z_cp_dns_to_coo + + + +! +!> Function mv_to_coo: +!! \memberof psb_z_dns_sparse_mat +!! \brief Convert to psb_z_coo_sparse_mat, freeing the source. +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! +subroutine psb_z_mv_dns_to_coo(a,b,info) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_mv_dns_to_coo + implicit none + + class(psb_z_dns_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_to_coo(b,info) + call a%free() + return + +end subroutine psb_z_mv_dns_to_coo + + +! +!> Function mv_from_coo: +!! \memberof psb_z_dns_sparse_mat +!! \brief Convert from psb_z_coo_sparse_mat, freeing the source. +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! +! +subroutine psb_z_mv_dns_from_coo(a,b,info) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_mv_dns_from_coo + implicit none + + class(psb_z_dns_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_z_mv_dns_from_coo + diff --git a/ext/impl/psb_z_ell_aclsum.f90 b/ext/impl/psb_z_ell_aclsum.f90 new file mode 100644 index 00000000..b03121fd --- /dev/null +++ b/ext/impl/psb_z_ell_aclsum.f90 @@ -0,0 +1,82 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_aclsum(d,a) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_aclsum + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + abs(a%val(i,j)) + end do + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_aclsum diff --git a/ext/impl/psb_z_ell_allocate_mnnz.f90 b/ext/impl/psb_z_ell_allocate_mnnz.f90 new file mode 100644 index 00000000..f7f7f67e --- /dev/null +++ b/ext/impl/psb_z_ell_allocate_mnnz.f90 @@ -0,0 +1,91 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = -1 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_allocate_mnnz diff --git a/ext/impl/psb_z_ell_arwsum.f90 b/ext/impl/psb_z_ell_arwsum.f90 new file mode 100644 index 00000000..9d4b4949 --- /dev/null +++ b/ext/impl/psb_z_ell_arwsum.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_arwsum(d,a) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_arwsum + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + is_unit = a%is_unit() + + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = done + else + d(i) = dzero + end if + do j=1,a%irn(i) + d(i) = d(i) + abs(a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_arwsum diff --git a/ext/impl/psb_z_ell_colsum.f90 b/ext/impl/psb_z_ell_colsum.f90 new file mode 100644 index 00000000..e9c2bc0b --- /dev/null +++ b/ext/impl/psb_z_ell_colsum.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_colsum(d,a) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_colsum + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = zone + else + d = zzero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_colsum diff --git a/ext/impl/psb_z_ell_csgetblk.f90 b/ext/impl/psb_z_ell_csgetblk.f90 new file mode 100644 index 00000000..d2e56e1d --- /dev/null +++ b/ext/impl/psb_z_ell_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_csgetblk + implicit none + + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='ell_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_csgetblk diff --git a/ext/impl/psb_z_ell_csgetptn.f90 b/ext/impl/psb_z_ell_csgetptn.f90 new file mode 100644 index 00000000..97ed7d90 --- /dev/null +++ b/ext/impl/psb_z_ell_csgetptn.f90 @@ -0,0 +1,189 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_csgetptn + implicit none + + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_z_ell_csgetrow + implicit none + + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_z_ell_csmm + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_dpk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_ell_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_ell_csmv + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_ell_csnm1 + + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info + real(psb_dpk_), allocatable :: vt(:) + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_ell_csnm1' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = dzero + nnz = a%get_nzeros() + m = a%get_nrows() + n = a%get_ncols() + allocate(vt(n),stat=info) + if (info /= 0) return + if (a%is_unit()) then + vt(:) = done + else + vt(:) = dzero + end if + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + vt(k) = vt(k) + abs(a%val(i,j)) + end do + end do + res = maxval(vt(1:n)) + deallocate(vt,stat=info) + + return + +end function psb_z_ell_csnm1 diff --git a/ext/impl/psb_z_ell_csnmi.f90 b/ext/impl/psb_z_ell_csnmi.f90 new file mode 100644 index 00000000..ecbfb1e1 --- /dev/null +++ b/ext/impl/psb_z_ell_csnmi.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_z_ell_csnmi(a) result(res) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_csnmi + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_csnmi' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = dzero + is_unit = a%is_unit() + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + if (is_unit) acc = acc + done + res = max(res,acc) + end do + +end function psb_z_ell_csnmi diff --git a/ext/impl/psb_z_ell_csput.f90 b/ext/impl/psb_z_ell_csput.f90 new file mode 100644 index 00000000..cf45070f --- /dev/null +++ b/ext/impl/psb_z_ell_csput.f90 @@ -0,0 +1,208 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_csput_a + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_ell_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + call psb_z_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info < 0) then + info = psb_err_internal_error_ + else if (info > 0) then + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarded entries not belonging to us.' + info = psb_success_ + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + +contains + + subroutine psb_z_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ilr, ilc, ip, & + & i1,i2,nr,nc,nnz,dupl + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='z_ell_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + ilr = -1 + ilc = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + + end do + + case(psb_dupl_add_) + ! Add + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = a%val(ir,ip) + val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_z_ell_srch_upd +end subroutine psb_z_ell_csput_a diff --git a/ext/impl/psb_z_ell_cssm.f90 b/ext/impl/psb_z_ell_cssm.f90 new file mode 100644 index 00000000..2e26c656 --- /dev/null +++ b/ext/impl/psb_z_ell_cssm.f90 @@ -0,0 +1,375 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_cssm + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_dpk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_ell_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_z_ell_cssv + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + complex(psb_dpk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_ell_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_z_ell_get_diag + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = zone + else + do i=1, mnm + if (1<=a%idiag(i).and.(a%idiag(i)<=size(a%ja,2))) then + d(i) = a%val(i,a%idiag(i)) + else + d(i) = zzero + end if + end do + end if + do i=mnm+1,size(d) + d(i) = zzero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_get_diag diff --git a/ext/impl/psb_z_ell_maxval.f90 b/ext/impl/psb_z_ell_maxval.f90 new file mode 100644 index 00000000..9596f124 --- /dev/null +++ b/ext/impl/psb_z_ell_maxval.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_z_ell_maxval(a) result(res) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_maxval + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_csnmi' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = done + else + res = dzero + end if + + do i = 1, a%get_nrows() + acc = maxval(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_z_ell_maxval diff --git a/ext/impl/psb_z_ell_mold.f90 b/ext/impl/psb_z_ell_mold.f90 new file mode 100644 index 00000000..3e1db6cc --- /dev/null +++ b/ext/impl/psb_z_ell_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_mold(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_mold + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='ell_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_mold diff --git a/ext/impl/psb_z_ell_print.f90 b/ext/impl/psb_z_ell_print.f90 new file mode 100644 index 00000000..502abb94 --- /dev/null +++ b/ext/impl/psb_z_ell_print.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_ell_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% ELL' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_z_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(i,j)),a%val(i,j) + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(i,j)),a%val(i,j) + end do + enddo + endif + endif + +end subroutine psb_z_ell_print diff --git a/ext/impl/psb_z_ell_reallocate_nz.f90 b/ext/impl/psb_z_ell_reallocate_nz.f90 new file mode 100644 index 00000000..58237508 --- /dev/null +++ b/ext/impl/psb_z_ell_reallocate_nz.f90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_reallocate_nz(nz,a) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_reallocate_nz diff --git a/ext/impl/psb_z_ell_reinit.f90 b/ext/impl/psb_z_ell_reinit.f90 new file mode 100644 index 00000000..d73620d8 --- /dev/null +++ b/ext/impl/psb_z_ell_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_reinit(a,clear) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_reinit + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:,:) = zzero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_reinit diff --git a/ext/impl/psb_z_ell_rowsum.f90 b/ext/impl/psb_z_ell_rowsum.f90 new file mode 100644 index 00000000..60eb70af --- /dev/null +++ b/ext/impl/psb_z_ell_rowsum.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_rowsum(d,a) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_rowsum + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical :: is_unit + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + is_unit = a%is_unit() + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = zone + else + d(i) = zzero + end if + do j=1,a%irn(i) + d(i) = d(i) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_rowsum diff --git a/ext/impl/psb_z_ell_scal.f90 b/ext/impl/psb_z_ell_scal.f90 new file mode 100644 index 00000000..7f2f8944 --- /dev/null +++ b/ext/impl/psb_z_ell_scal.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_scal(d,a,info,side) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_scal + implicit none + class(psb_z_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5) + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%val(i,:) = a%val(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + do i=1, m + do j=1, a%irn(i) + a%val(i,j) = a%val(i,j) * d(a%ja(i,j)) + end do + enddo + + end if + + call a%set_host() + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_scal diff --git a/ext/impl/psb_z_ell_scals.f90 b/ext/impl/psb_z_ell_scals.f90 new file mode 100644 index 00000000..4086d8cc --- /dev/null +++ b/ext/impl/psb_z_ell_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_scals(d,a,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_scals + implicit none + class(psb_z_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_scals diff --git a/ext/impl/psb_z_ell_trim.f90 b/ext/impl/psb_z_ell_trim.f90 new file mode 100644 index 00000000..7cc2ed65 --- /dev/null +++ b/ext/impl/psb_z_ell_trim.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_trim(a) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_trim + implicit none + class(psb_z_ell_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_trim diff --git a/ext/impl/psb_z_hdia_allocate_mnnz.f90 b/ext/impl/psb_z_hdia_allocate_mnnz.f90 new file mode 100644 index 00000000..abed0c58 --- /dev/null +++ b/ext/impl/psb_z_hdia_allocate_mnnz.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_hdia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_hdia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hdia_allocate_mnnz diff --git a/ext/impl/psb_z_hdia_csmv.f90 b/ext/impl/psb_z_hdia_csmv.f90 new file mode 100644 index 00000000..73d11da6 --- /dev/null +++ b/ext/impl/psb_z_hdia_csmv.f90 @@ -0,0 +1,162 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_hdia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_hdia_csmv + implicit none + class(psb_z_hdia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc,nr,nc + integer(psb_ipk_) :: irs,ics, nmx, ni + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, ncd,ib, nzhack, & + & hackfirst, hacknext + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hdia_csmv' + logical, parameter :: debug=.false. + real :: start, finish + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + info = psb_err_transpose_not_n_unsupported_ + call psb_errpush(info,name) + goto 9999 + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1)=0) then + ir1 = 1 + ! min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + jc = ir1 + rdisp + offsets(j) + do i=ir1,ir2 + y(rdisp+i) = y(rdisp+i) + alpha*data(i,j)*x(jc) + jc = jc + 1 + enddo + end do + end subroutine psi_z_inner_dia_csmv + +end subroutine psb_z_hdia_csmv diff --git a/ext/impl/psb_z_hdia_mold.f90 b/ext/impl/psb_z_hdia_mold.f90 new file mode 100644 index 00000000..d91bdb35 --- /dev/null +++ b/ext/impl/psb_z_hdia_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hdia_mold(a,b,info) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_hdia_mold + implicit none + class(psb_z_hdia_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hdia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_hdia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hdia_mold diff --git a/ext/impl/psb_z_hdia_print.f90 b/ext/impl/psb_z_hdia_print.f90 new file mode 100644 index 00000000..46f7769d --- /dev/null +++ b/ext/impl/psb_z_hdia_print.f90 @@ -0,0 +1,121 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_hdia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_hdia_print + use psi_ext_util_mod + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_z_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdia_print' + logical, parameter :: debug=.false. + + class(psb_z_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, k, ncd,ib, nzhack, info,& + & hackfirst, hacknext + integer(psb_ipk_), allocatable :: ia(:), ja(:) + complex(psb_dpk_), allocatable :: val(:) + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% HDIA' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_z_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + + nhacks = a%nhacks + hacksize = a%hacksize + maxnzhack = 0 + do k=1, nhacks + maxnzhack = max(maxnzhack,(a%hackoffsets(k+1)-a%hackoffsets(k))) + end do + maxnzhack = hacksize*maxnzhack + allocate(ia(maxnzhack),ja(maxnzhack),val(maxnzhack),stat=info) + if (info /= 0) return + + write(iout,*) nr, nc, nz + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + ncd = hacknext-hackfirst + + call psi_z_xtr_coo_from_dia(nr,nc,& + & ia, ja, val, nzhack,& + & hacksize,ncd,& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) + !nzhack = sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + + if(present(iv)) then + do j=1,nzhack + write(iout,frmt) iv(ia(j)),iv(ja(j)),val(j) + enddo + else + if (present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ja(j),val(j) + enddo + else if (present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ja(j),val(j) + enddo + endif + end if + + end do + +end subroutine psb_z_hdia_print diff --git a/ext/impl/psb_z_hll_aclsum.f90 b/ext/impl/psb_z_hll_aclsum.f90 new file mode 100644 index 00000000..e4add299 --- /dev/null +++ b/ext/impl/psb_z_hll_aclsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_aclsum(d,a) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_aclsum + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call z_hll_aclsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine z_hll_aclsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine z_hll_aclsum + +end subroutine psb_z_hll_aclsum diff --git a/ext/impl/psb_z_hll_allocate_mnnz.f90 b/ext/impl/psb_z_hll_allocate_mnnz.f90 new file mode 100644 index 00000000..6ba9d7f1 --- /dev/null +++ b/ext/impl/psb_z_hll_allocate_mnnz.f90 @@ -0,0 +1,93 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m+1,a%hkoffs,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_hksz(psb_hksz_def_) + call a%set_host() + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hll_allocate_mnnz diff --git a/ext/impl/psb_z_hll_arwsum.f90 b/ext/impl/psb_z_hll_arwsum.f90 new file mode 100644 index 00000000..a6e020fd --- /dev/null +++ b/ext/impl/psb_z_hll_arwsum.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_arwsum(d,a) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_arwsum + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call z_hll_arwsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine z_hll_arwsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + abs(val(i,j)) + end do + end do + + end subroutine z_hll_arwsum + +end subroutine psb_z_hll_arwsum diff --git a/ext/impl/psb_z_hll_colsum.f90 b/ext/impl/psb_z_hll_colsum.f90 new file mode 100644 index 00000000..196a694e --- /dev/null +++ b/ext/impl/psb_z_hll_colsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_colsum(d,a) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_colsum + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = zone + else + d = zzero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call z_hll_colsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine z_hll_colsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + complex(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine z_hll_colsum + +end subroutine psb_z_hll_colsum diff --git a/ext/impl/psb_z_hll_csgetblk.f90 b/ext/impl/psb_z_hll_csgetblk.f90 new file mode 100644 index 00000000..0cdf1fef --- /dev/null +++ b/ext/impl/psb_z_hll_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_csgetblk + implicit none + + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='hll_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hll_csgetblk diff --git a/ext/impl/psb_z_hll_csgetptn.f90 b/ext/impl/psb_z_hll_csgetptn.f90 new file mode 100644 index 00000000..9d4c6714 --- /dev/null +++ b/ext/impl/psb_z_hll_csgetptn.f90 @@ -0,0 +1,209 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_csgetptn + implicit none + + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_z_hll_csgetrow + implicit none + + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_z_hll_csmm + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy,ldx,ldy,hksz,mxrwl + complex(psb_dpk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hll_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + nxy = min(size(x,2) , size(y,2) ) + + + ldx = size(x,1) + ldy = size(y,1) + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (ldx psb_z_hll_csmv + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hkpnt, mxrwl, mmhk + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_hll_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (size(x,1) 0) then + select case(hksz) + case(4) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_z_hll_csmv_notra_4(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(8) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + &call psb_z_hll_csmv_notra_8(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(16) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_z_hll_csmv_notra_16(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(24) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_z_hll_csmv_notra_24(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(32) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_z_hll_csmv_notra_32(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case default + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_z_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + end select + end if + if (mmhk < m) then + i = mmhk+1 + ir = m-mmhk + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + call psb_z_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + if (info /= psb_success_) goto 9999 + end if + j = j + 1 + end if + + else + + j=1 + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,m,hksz + j = ((i-1)/hksz)+1 + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_z_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + end if + end if + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_z_hll_csmv_inner(ir,m,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,tra,ctra,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit,tra,ctra + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + if (tra) then + + if (beta == zone) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*val(i,j)*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (ctra) then + + if (beta == zone) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*conjg(val(i,j))*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (.not.(tra.or.ctra)) then + + if (alpha == zzero) then + if (beta == zzero) then + do i=1,m + y(ir+i-1) = zzero + end do + else + do i=1,m + y(ir+i-1) = beta*y(ir+i-1) + end do + end if + + else + if (beta == zzero) then + do i=1,m + tmp = zzero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + end do + else + do i=1,m + tmp = zzero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + beta*y(ir+i-1) + end do + endif + end if + end if + + if (is_unit) then + do i=1, min(m,n) + y(i) = y(i) + alpha*x(i) + end do + end if + + end subroutine psb_z_hll_csmv_inner + + subroutine psb_z_hll_csmv_notra_8(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, zzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=8 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = zzero + if (alpha /= zzero) then + do j=1, maxval(irn(1:8)) + tmp(1:8) = tmp(1:8) + val(1:8,j)*x(ja(1:8,j)) + end do + end if + if (beta == zzero) then + y(ir:ir+8-1) = alpha*tmp(1:8) + else + y(ir:ir+8-1) = alpha*tmp(1:8) + beta*y(ir:ir+8-1) + end if + + + if (is_unit) then + do i=1, min(8,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_z_hll_csmv_notra_8 + + subroutine psb_z_hll_csmv_notra_24(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, zzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=24 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = zzero + if (alpha /= zzero) then + do j=1, maxval(irn(1:24)) + tmp(1:24) = tmp(1:24) + val(1:24,j)*x(ja(1:24,j)) + end do + end if + if (beta == zzero) then + y(ir:ir+24-1) = alpha*tmp(1:24) + else + y(ir:ir+24-1) = alpha*tmp(1:24) + beta*y(ir:ir+24-1) + end if + + + if (is_unit) then + do i=1, min(24,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_z_hll_csmv_notra_24 + + subroutine psb_z_hll_csmv_notra_16(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, zzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=16 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = zzero + if (alpha /= zzero) then + do j=1, maxval(irn(1:16)) + tmp(1:16) = tmp(1:16) + val(1:16,j)*x(ja(1:16,j)) + end do + end if + if (beta == zzero) then + y(ir:ir+16-1) = alpha*tmp(1:16) + else + y(ir:ir+16-1) = alpha*tmp(1:16) + beta*y(ir:ir+16-1) + end if + + + if (is_unit) then + do i=1, min(16,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_z_hll_csmv_notra_16 + + subroutine psb_z_hll_csmv_notra_32(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, zzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=32 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = zzero + if (alpha /= zzero) then + do j=1, maxval(irn(1:32)) + tmp(1:32) = tmp(1:32) + val(1:32,j)*x(ja(1:32,j)) + end do + end if + if (beta == zzero) then + y(ir:ir+32-1) = alpha*tmp(1:32) + else + y(ir:ir+32-1) = alpha*tmp(1:32) + beta*y(ir:ir+32-1) + end if + + + if (is_unit) then + do i=1, min(32,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_z_hll_csmv_notra_32 + + subroutine psb_z_hll_csmv_notra_4(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, zzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=4 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = zzero + if (alpha /= zzero) then + do j=1, maxval(irn(1:4)) + tmp(1:4) = tmp(1:4) + val(1:4,j)*x(ja(1:4,j)) + end do + end if + if (beta == zzero) then + y(ir:ir+4-1) = alpha*tmp(1:4) + else + y(ir:ir+4-1) = alpha*tmp(1:4) + beta*y(ir:ir+4-1) + end if + + + if (is_unit) then + do i=1, min(4,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_z_hll_csmv_notra_4 + +end subroutine psb_z_hll_csmv diff --git a/ext/impl/psb_z_hll_csnm1.f90 b/ext/impl/psb_z_hll_csnm1.f90 new file mode 100644 index 00000000..eb5c5b6b --- /dev/null +++ b/ext/impl/psb_z_hll_csnm1.f90 @@ -0,0 +1,111 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_z_hll_csnm1(a) result(res) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_csnm1 + + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info, hksz, mxrwl + real(psb_dpk_), allocatable :: vt(:) + logical :: is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hll_csnm1' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + res = dzero + if (a%is_dev()) call a%sync() + n = a%get_ncols() + m = a%get_nrows() + allocate(vt(n),stat=info) + if (Info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + if (a%is_unit()) then + vt = done + else + vt = dzero + end if + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_z_hll_csnm1_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & vt,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + res = maxval(vt) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_z_hll_csnm1_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & vt,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: vt(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + vt(jc) = vt(jc) + abs(val(i,j)) + end do + end do + end subroutine psb_z_hll_csnm1_inner + +end function psb_z_hll_csnm1 diff --git a/ext/impl/psb_z_hll_csnmi.f90 b/ext/impl/psb_z_hll_csnmi.f90 new file mode 100644 index 00000000..6243e5cf --- /dev/null +++ b/ext/impl/psb_z_hll_csnmi.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_z_hll_csnmi(a) result(res) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_csnmi + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc, hksz, mxrwl, info + Integer(Psb_ipk_) :: err_act + logical :: is_unit + character(len=20) :: name='z_csnmi' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + info = 0 + res = dzero + if (a%is_dev()) call a%sync() + + n = a%get_ncols() + m = a%get_nrows() + is_unit = a%is_unit() + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_z_hll_csnmi_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & res,is_unit,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_z_hll_csnmi_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & res,is_unit,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: res + logical :: is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: tmp, acc + + info = psb_success_ + if (is_unit) then + tmp = done + else + tmp = dzero + end if + do i=1,m + acc = tmp + do j=1, irn(i) + acc = acc + abs(val(i,j)) + end do + res = max(acc,res) + end do + end subroutine psb_z_hll_csnmi_inner + +end function psb_z_hll_csnmi diff --git a/ext/impl/psb_z_hll_csput.f90 b/ext/impl/psb_z_hll_csput.f90 new file mode 100644 index 00000000..e47664c7 --- /dev/null +++ b/ext/impl/psb_z_hll_csput.f90 @@ -0,0 +1,233 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_csput_a + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hll_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5) + + + call psb_erractionsave(err_act) + info = psb_success_ + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + + call psb_z_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info /= psb_success_) then + + info = psb_err_invalid_mat_state_ + end if + call a%set_host() + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_z_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ip, i1,i2,nr,nc,nnz,dupl,ng,& + & hksz, hk, hkzpnt, ihkr, mxrwl, lastrow + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='z_hll_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + lastrow = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + hksz = a%get_hksz() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + + end do + + case(psb_dupl_add_) + ! Add + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_z_hll_srch_upd + +end subroutine psb_z_hll_csput_a diff --git a/ext/impl/psb_z_hll_cssm.f90 b/ext/impl/psb_z_hll_cssm.f90 new file mode 100644 index 00000000..ba1aa150 --- /dev/null +++ b/ext/impl/psb_z_hll_cssm.f90 @@ -0,0 +1,506 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_cssm + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ldx, ldy, hksz, nxy, mk, mxrwl + complex(psb_dpk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_hll_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + info = psb_err_missing_override_method_ + call psb_errpush(info,name) + goto 9999 + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + hksz = a%get_hksz() + + if (.not. (a%is_triangle())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + ldx = size(x,1) + ldy = size(y,1) + if (ldx psb_z_hll_cssv + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hk, mxrwl, noffs, kc, mk + complex(psb_dpk_) :: acc + complex(psb_dpk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hll_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x) psb_z_hll_get_diag + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k, ke, hksz, ld,ir, mxrwl + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + ld = size(d) + if (ld< mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,ld/)) + goto 9999 + end if + + if (a%is_triangle().and.a%is_unit()) then + d(1:mnm) = zone + else + + hksz = a%get_hksz() + j=1 + do i=1,mnm,hksz + ir = min(hksz,mnm-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + ke = a%hkoffs(j+1) + call psb_z_hll_get_diag_inner(ir,a%irn(i:i+ir-1),& + & a%ja(k:ke),hksz,a%val(k:ke),hksz,& + & a%idiag(i:i+ir-1),d(i:i+ir-1),info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + end if + + do i=mnm+1,size(d) + d(i) = zzero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_z_hll_get_diag_inner(m,irn,ja,ldj,val,ldv,& + & idiag,d,info) + integer(psb_ipk_), intent(in) :: m,ldj,ldv,ja(ldj,*),irn(*), idiag(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + complex(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + do i=1,m + if (idiag(i) /= 0) then + d(i) = val(i,idiag(i)) + else + d(i) = zzero + end if + end do + + end subroutine psb_z_hll_get_diag_inner + +end subroutine psb_z_hll_get_diag diff --git a/ext/impl/psb_z_hll_maxval.f90 b/ext/impl/psb_z_hll_maxval.f90 new file mode 100644 index 00000000..22258c3a --- /dev/null +++ b/ext/impl/psb_z_hll_maxval.f90 @@ -0,0 +1,45 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_z_hll_maxval(a) result(res) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_maxval + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + if (a%is_dev()) call a%sync() + res = maxval(abs(a%val(:))) + if (a%is_unit()) res = max(res,done) + +end function psb_z_hll_maxval diff --git a/ext/impl/psb_z_hll_mold.f90 b/ext/impl/psb_z_hll_mold.f90 new file mode 100644 index 00000000..e108e9ce --- /dev/null +++ b/ext/impl/psb_z_hll_mold.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_mold(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_mold + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hll_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hll_mold diff --git a/ext/impl/psb_z_hll_print.f90 b/ext/impl/psb_z_hll_print.f90 new file mode 100644 index 00000000..43882264 --- /dev/null +++ b/ext/impl/psb_z_hll_print.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_hll_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, k, hksz, hk, mxrwl,ir, ix + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_z_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + hksz = a%get_hksz() + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + endif + endif + +end subroutine psb_z_hll_print diff --git a/ext/impl/psb_z_hll_reallocate_nz.f90 b/ext/impl/psb_z_hll_reallocate_nz.f90 new file mode 100644 index 00000000..23432f9f --- /dev/null +++ b/ext/impl/psb_z_hll_reallocate_nz.f90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_reallocate_nz(nz,a) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,nz_ + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_hll_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + nz_ = max(nz,ione) + call psb_realloc(nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(nz_,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hll_reallocate_nz diff --git a/ext/impl/psb_z_hll_reinit.f90 b/ext/impl/psb_z_hll_reinit.f90 new file mode 100644 index 00000000..b6851c61 --- /dev/null +++ b/ext/impl/psb_z_hll_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_reinit(a,clear) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_reinit + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:) = zzero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hll_reinit diff --git a/ext/impl/psb_z_hll_rowsum.f90 b/ext/impl/psb_z_hll_rowsum.f90 new file mode 100644 index 00000000..027c5b22 --- /dev/null +++ b/ext/impl/psb_z_hll_rowsum.f90 @@ -0,0 +1,110 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_rowsum(d,a) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_rowsum + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + + if (a%is_unit()) then + d = zone + else + d = zzero + end if + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call z_hll_rowsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine z_hll_rowsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + complex(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + (val(i,j)) + end do + end do + + end subroutine z_hll_rowsum + +end subroutine psb_z_hll_rowsum diff --git a/ext/impl/psb_z_hll_scal.f90 b/ext/impl/psb_z_hll_scal.f90 new file mode 100644 index 00000000..a11d0da8 --- /dev/null +++ b/ext/impl/psb_z_hll_scal.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_scal(d,a,info,side) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_scal + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), ld, k, mxrwl, hksz, ir + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + info = psb_err_missing_override_method_ + call psb_errpush(info,name,i_err=ierr) + goto 9999 + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + ld = size(d) + if (left) then + m = a%get_nrows() + if (ld < m) then + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + else + n = a%get_ncols() + if (ld < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_z_hll_scal_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & left,d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_z_hll_scal_inner(ir,m,n,irn,ja,ldj,val,ldv,left,d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: d(*) + complex(psb_dpk_), intent(inout) :: val(ldv,*) + logical, intent(in) :: left + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + if (left) then + do i=1,m + do j=1, irn(i) + val(i,j) = val(i,j)*d(ir+i-1) + end do + end do + else + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + val(i,j) = val(i,j)*d(jc) + end do + end do + + end if + + end subroutine psb_z_hll_scal_inner + + +end subroutine psb_z_hll_scal diff --git a/ext/impl/psb_z_hll_scals.f90 b/ext/impl/psb_z_hll_scals.f90 new file mode 100644 index 00000000..432f11e6 --- /dev/null +++ b/ext/impl/psb_z_hll_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_scals(d,a,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_scals + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:) = a%val(:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hll_scals diff --git a/ext/impl/psb_z_mv_dia_from_coo.f90 b/ext/impl/psb_z_mv_dia_from_coo.f90 new file mode 100644 index 00000000..29e27dfc --- /dev/null +++ b/ext/impl/psb_z_mv_dia_from_coo.f90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_mv_dia_from_coo + implicit none + + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_mv_dia_from_coo diff --git a/ext/impl/psb_z_mv_dia_to_coo.f90 b/ext/impl/psb_z_mv_dia_to_coo.f90 new file mode 100644 index 00000000..1679c9e0 --- /dev/null +++ b/ext/impl/psb_z_mv_dia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_mv_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_mv_dia_to_coo + implicit none + + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_z_mv_dia_to_coo diff --git a/ext/impl/psb_z_mv_ell_from_coo.f90 b/ext/impl/psb_z_mv_ell_from_coo.f90 new file mode 100644 index 00000000..de39604e --- /dev/null +++ b/ext/impl/psb_z_mv_ell_from_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_mv_ell_from_coo + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, ir, ic + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_z_mv_ell_from_coo diff --git a/ext/impl/psb_z_mv_ell_from_fmt.f90 b/ext/impl/psb_z_mv_ell_from_fmt.f90 new file mode 100644 index 00000000..a2c7c190 --- /dev/null +++ b/ext/impl/psb_z_mv_ell_from_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_mv_ell_from_fmt + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_z_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_mv_ell_from_fmt diff --git a/ext/impl/psb_z_mv_ell_to_coo.f90 b/ext/impl/psb_z_mv_ell_to_coo.f90 new file mode 100644 index 00000000..3f8afb0a --- /dev/null +++ b/ext/impl/psb_z_mv_ell_to_coo.f90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_mv_ell_to_coo + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + ! Taking a path slightly slower but with less memory footprint + deallocate(a%idiag) + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + + call psb_realloc(nza,b%ia,info) + if (info == 0) call psb_realloc(nza,b%ja,info) + if (info /= 0) goto 9999 + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + end do + end do + deallocate(a%ja, stat=info) + + if (info == 0) call psb_realloc(nza,b%val,info) + if (info /= 0) goto 9999 + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%val(k) = a%val(i,j) + end do + end do + call a%free() + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_z_mv_ell_to_coo diff --git a/ext/impl/psb_z_mv_ell_to_fmt.f90 b/ext/impl/psb_z_mv_ell_to_fmt.f90 new file mode 100644 index 00000000..d34ae80e --- /dev/null +++ b/ext/impl/psb_z_mv_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_mv_ell_to_fmt + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_z_ell_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_mv_ell_to_fmt diff --git a/ext/impl/psb_z_mv_hdia_from_coo.f90 b/ext/impl/psb_z_mv_hdia_from_coo.f90 new file mode 100644 index 00000000..b9593f34 --- /dev/null +++ b/ext/impl/psb_z_mv_hdia_from_coo.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_mv_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_mv_hdia_from_coo + implicit none + + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.(b%is_by_rows())) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_mv_hdia_from_coo diff --git a/ext/impl/psb_z_mv_hdia_to_coo.f90 b/ext/impl/psb_z_mv_hdia_to_coo.f90 new file mode 100644 index 00000000..f4c8df55 --- /dev/null +++ b/ext/impl/psb_z_mv_hdia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_mv_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_mv_hdia_to_coo + implicit none + + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_z_mv_hdia_to_coo diff --git a/ext/impl/psb_z_mv_hll_from_coo.f90 b/ext/impl/psb_z_mv_hll_from_coo.f90 new file mode 100644 index 00000000..abe988b3 --- /dev/null +++ b/ext/impl/psb_z_mv_hll_from_coo.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_mv_hll_from_coo + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: hksz + info = psb_success_ + if (.not.b%is_by_rows()) call b%fix(info) + hksz = psi_get_hksz() + call psi_convert_hll_from_coo(a,hksz,b,info) + if (info /= 0) goto 9999 + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_mv_hll_from_coo diff --git a/ext/impl/psb_z_mv_hll_from_fmt.f90 b/ext/impl/psb_z_mv_hll_from_fmt.f90 new file mode 100644 index 00000000..81626aba --- /dev/null +++ b/ext/impl/psb_z_mv_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_mv_hll_from_fmt + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_z_hll_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%hkoffs, a%hkoffs) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + a%hksz = b%hksz + a%nzt = b%nzt + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_mv_hll_from_fmt diff --git a/ext/impl/psb_z_mv_hll_to_coo.f90 b/ext/impl/psb_z_mv_hll_to_coo.f90 new file mode 100644 index 00000000..af033004 --- /dev/null +++ b/ext/impl/psb_z_mv_hll_to_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_mv_hll_to_coo + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%cp_to_coo(b,info) + + if (info /= psb_success_) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_z_mv_hll_to_coo diff --git a/ext/impl/psb_z_mv_hll_to_fmt.f90 b/ext/impl/psb_z_mv_hll_to_fmt.f90 new file mode 100644 index 00000000..a2fd7027 --- /dev/null +++ b/ext/impl/psb_z_mv_hll_to_fmt.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_mv_hll_to_fmt + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_z_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%hkoffs, b%hkoffs) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + b%hksz = a%hksz + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_mv_hll_to_fmt diff --git a/ext/impl/psi_c_convert_dia_from_coo.f90 b/ext/impl/psi_c_convert_dia_from_coo.f90 new file mode 100644 index 00000000..29565748 --- /dev/null +++ b/ext/impl/psi_c_convert_dia_from_coo.f90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_c_convert_dia_from_coo(a,tmp,info) + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psi_c_convert_dia_from_coo + use psi_ext_util_mod + implicit none + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,nd + integer(psb_ipk_),allocatable :: d(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza,ir,ic + + info = psb_success_ + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = tmp%psb_c_base_sparse_mat + + ndiag = nr+nc-1 + allocate(d(ndiag),stat=info) + if (info /= 0) return + call psb_realloc(ndiag,a%offset,info) + if (info /= 0) return + + call psi_dia_offset_from_coo(nr,nc,nza,tmp%ia,tmp%ja, & + & nd,d,a%offset,info,initd=.true.,cleard=.false.) + + call psb_realloc(nd,a%offset,info) + if (info /= 0) return + call psb_realloc(nr,nd,a%data,info) + if (info /= 0) return + a%nzeros = nza + + call psi_xtr_dia_from_coo(nr,nc,nza,tmp%ia,tmp%ja,tmp%val,& + & d,nr,nd,a%data,info,initdata=.true.) + + deallocate(d,stat=info) + if (info /= 0) return + +end subroutine psi_c_convert_dia_from_coo diff --git a/ext/impl/psi_c_convert_ell_from_coo.f90 b/ext/impl/psi_c_convert_ell_from_coo.f90 new file mode 100644 index 00000000..b4e0c7e4 --- /dev/null +++ b/ext/impl/psi_c_convert_ell_from_coo.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_c_convert_ell_from_coo(a,tmp,info,hacksize) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psi_c_convert_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, hsz_, ldv + + info = psb_success_ + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = tmp%psb_c_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= psb_success_) return + a%irn = 0 + do i=1, nza + ir = tmp%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + end do + ! Allocate and extract. + call psb_realloc(nr,a%idiag,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%val,info) + if (info /= psb_success_) return + + call psi_c_xtr_ell_from_coo(ione,nr,nzm,tmp%ia,tmp%ja,tmp%val,& + & a%ja,a%val,a%irn,a%idiag,ldv) + +end subroutine psi_c_convert_ell_from_coo + diff --git a/ext/impl/psi_c_convert_hll_from_coo.f90 b/ext/impl/psi_c_convert_hll_from_coo.f90 new file mode 100644 index 00000000..2ebb86a6 --- /dev/null +++ b/ext/impl/psi_c_convert_hll_from_coo.f90 @@ -0,0 +1,122 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_c_convert_hll_from_coo(a,hksz,tmp,info) + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psi_c_convert_hll_from_coo + use psi_ext_util_mod + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + + + if (.not.tmp%is_by_rows()) then + info = -98765 + return + end if + + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = tmp%psb_c_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= 0) return + a%irn = 0 + do i=1, nza + a%irn(tmp%ia(i)) = a%irn(tmp%ia(i)) + 1 + end do + + a%nzt = nza + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + do k=1,ir-1 + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) + call psb_realloc(nr,a%idiag,info) + if (info == 0) call psb_realloc(isz,a%ja,info) + if (info == 0) call psb_realloc(isz,a%val,info) + if (info /= 0) return + ! Init last chunk of data + nzm = a%hkoffs(noffs+1)-a%hkoffs(noffs) + a%val(isz-(nzm-1):isz) = czero + a%ja(isz-(nzm-1):isz) = nr + ! + ! Now copy everything, noting the position of the diagonal. + ! + kc = 1 + k = 1 + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + mxrwl = isz/hksz + nza = sum(a%irn(i:i+ir-1)) + call psi_c_xtr_ell_from_coo(i,ir,mxrwl,tmp%ia(kc:kc+nza-1),& + & tmp%ja(kc:kc+nza-1),tmp%val(kc:kc+nza-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & a%idiag(i:i+ir-1),hksz) + k = k + isz + kc = kc + nza + + enddo + + ! Third copy the other stuff + if (info /= 0) return + call a%set_sorted(.true.) + +end subroutine psi_c_convert_hll_from_coo diff --git a/ext/impl/psi_c_xtr_coo_from_dia.f90 b/ext/impl/psi_c_xtr_coo_from_dia.f90 new file mode 100644 index 00000000..eab82a11 --- /dev/null +++ b/ext/impl/psi_c_xtr_coo_from_dia.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_c_xtr_coo_from_dia(nr,nc,ia,ja,val,nz,nrd,ncd,data,offsets,info,rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_spk_, czero + + implicit none + + integer(psb_ipk_), intent(in) :: nr,nc, nrd,ncd, offsets(:) + integer(psb_ipk_), intent(inout) :: ia(:), ja(:),nz + complex(psb_spk_), intent(inout) :: val(:) + complex(psb_spk_), intent(in) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + integer(psb_ipk_) :: rdisp_, nrcmdisp, rdisp1 + integer(psb_ipk_) :: i,j,ir1, ir2, ir, ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_coo_from_dia',nr,nc,nrd,ncd, rdisp_ + nrcmdisp = min(nr-rdisp_,nc-rdisp_) + rdisp1 = 1-rdisp_ + nz = 0 + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + if (debug) write(0,*) ' Loop J',j,ir1,ir2, offsets(j) + do i=ir1,ir2 + ir = i + rdisp_ + ic = i + rdisp_ + offsets(j) + if (debug) write(0,*) ' Loop I',i,ir,ic + nz = nz + 1 + ia(nz) = ir + ja(nz) = ic + val(nz) = data(i,j) + enddo + end do + +end subroutine psi_c_xtr_coo_from_dia + diff --git a/ext/impl/psi_c_xtr_dia_from_coo.f90 b/ext/impl/psi_c_xtr_dia_from_coo.f90 new file mode 100644 index 00000000..f72a03df --- /dev/null +++ b/ext/impl/psi_c_xtr_dia_from_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_c_xtr_dia_from_coo(nr,nc,nz,ia,ja,val,d,nrd,ncd,data,info,& + & initdata, rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_spk_, czero + + implicit none + integer(psb_ipk_), intent(in) :: nr, nc, nz, nrd,ncd,ia(:), ja(:), d(:) + complex(psb_spk_), intent(in) :: val(:) + complex(psb_spk_), intent(out) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initdata + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + logical :: initdata_ + integer(psb_ipk_) :: rdisp_ + integer(psb_ipk_) :: i,ir,ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + initdata_ = .true. + if (present(initdata)) initdata_ = initdata + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_dia_from_coo',nr,nc,nz,nrd,ncd,initdata_, rdisp_ + + if (initdata_) data(1:nrd,1:ncd) = czero + + do i=1,nz + ir = ia(i) + k = ja(i) - ir + ic = d(nr+k) + if (debug) write(0,*) 'loop xtr_dia_from_coo :',ia(i),ja(i),k,ir-rdisp_,ic + data(ir-rdisp_,ic) = val(i) + enddo + + +end subroutine psi_c_xtr_dia_from_coo diff --git a/ext/impl/psi_c_xtr_ell_from_coo.f90 b/ext/impl/psi_c_xtr_ell_from_coo.f90 new file mode 100644 index 00000000..706e6c1f --- /dev/null +++ b/ext/impl/psi_c_xtr_ell_from_coo.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_c_xtr_ell_from_coo(i,nr,mxrwl,iac,jac,valc, & + & ja,val,irn,diag,ld) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_spk_, czero + + implicit none + integer(psb_ipk_) :: i,nr,mxrwl,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*),diag(*) + complex(psb_spk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc, ir, ic + kc = 1 + do ii = 1, nr + nc = irn(ii) + do jj=1,nc + !if (iac(kc) /= i+ii-1) write(0,*) 'Copy mismatch',iac(kc),i,ii,i+ii-1 + ir = iac(kc) + ic = jac(kc) + if (ir == ic) diag(ii) = jj + ja(ii,jj) = ic + val(ii,jj) = valc(kc) + kc = kc + 1 + end do + ! We are assuming that jac contains at least one valid entry + ! If the previous loop did not have any entries, pick one valid + ! value. + if (nc == 0) ic = jac(1) + do jj = nc+1,mxrwl + ja(ii,jj) = ic + val(ii,jj) = czero + end do + end do +end subroutine psi_c_xtr_ell_from_coo + diff --git a/ext/impl/psi_d_convert_dia_from_coo.f90 b/ext/impl/psi_d_convert_dia_from_coo.f90 new file mode 100644 index 00000000..5f821967 --- /dev/null +++ b/ext/impl/psi_d_convert_dia_from_coo.f90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_d_convert_dia_from_coo(a,tmp,info) + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psi_d_convert_dia_from_coo + use psi_ext_util_mod + implicit none + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,nd + integer(psb_ipk_),allocatable :: d(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza,ir,ic + + info = psb_success_ + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat + + ndiag = nr+nc-1 + allocate(d(ndiag),stat=info) + if (info /= 0) return + call psb_realloc(ndiag,a%offset,info) + if (info /= 0) return + + call psi_dia_offset_from_coo(nr,nc,nza,tmp%ia,tmp%ja, & + & nd,d,a%offset,info,initd=.true.,cleard=.false.) + + call psb_realloc(nd,a%offset,info) + if (info /= 0) return + call psb_realloc(nr,nd,a%data,info) + if (info /= 0) return + a%nzeros = nza + + call psi_xtr_dia_from_coo(nr,nc,nza,tmp%ia,tmp%ja,tmp%val,& + & d,nr,nd,a%data,info,initdata=.true.) + + deallocate(d,stat=info) + if (info /= 0) return + +end subroutine psi_d_convert_dia_from_coo diff --git a/ext/impl/psi_d_convert_ell_from_coo.f90 b/ext/impl/psi_d_convert_ell_from_coo.f90 new file mode 100644 index 00000000..51471c19 --- /dev/null +++ b/ext/impl/psi_d_convert_ell_from_coo.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_d_convert_ell_from_coo(a,tmp,info,hacksize) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psi_d_convert_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, hsz_, ldv + + info = psb_success_ + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= psb_success_) return + a%irn = 0 + do i=1, nza + ir = tmp%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + end do + ! Allocate and extract. + call psb_realloc(nr,a%idiag,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%val,info) + if (info /= psb_success_) return + + call psi_d_xtr_ell_from_coo(ione,nr,nzm,tmp%ia,tmp%ja,tmp%val,& + & a%ja,a%val,a%irn,a%idiag,ldv) + +end subroutine psi_d_convert_ell_from_coo + diff --git a/ext/impl/psi_d_convert_hll_from_coo.f90 b/ext/impl/psi_d_convert_hll_from_coo.f90 new file mode 100644 index 00000000..cb07e52c --- /dev/null +++ b/ext/impl/psi_d_convert_hll_from_coo.f90 @@ -0,0 +1,122 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_d_convert_hll_from_coo(a,hksz,tmp,info) + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psi_d_convert_hll_from_coo + use psi_ext_util_mod + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + + + if (.not.tmp%is_by_rows()) then + info = -98765 + return + end if + + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= 0) return + a%irn = 0 + do i=1, nza + a%irn(tmp%ia(i)) = a%irn(tmp%ia(i)) + 1 + end do + + a%nzt = nza + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + do k=1,ir-1 + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) + call psb_realloc(nr,a%idiag,info) + if (info == 0) call psb_realloc(isz,a%ja,info) + if (info == 0) call psb_realloc(isz,a%val,info) + if (info /= 0) return + ! Init last chunk of data + nzm = a%hkoffs(noffs+1)-a%hkoffs(noffs) + a%val(isz-(nzm-1):isz) = dzero + a%ja(isz-(nzm-1):isz) = nr + ! + ! Now copy everything, noting the position of the diagonal. + ! + kc = 1 + k = 1 + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + mxrwl = isz/hksz + nza = sum(a%irn(i:i+ir-1)) + call psi_d_xtr_ell_from_coo(i,ir,mxrwl,tmp%ia(kc:kc+nza-1),& + & tmp%ja(kc:kc+nza-1),tmp%val(kc:kc+nza-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & a%idiag(i:i+ir-1),hksz) + k = k + isz + kc = kc + nza + + enddo + + ! Third copy the other stuff + if (info /= 0) return + call a%set_sorted(.true.) + +end subroutine psi_d_convert_hll_from_coo diff --git a/ext/impl/psi_d_xtr_coo_from_dia.f90 b/ext/impl/psi_d_xtr_coo_from_dia.f90 new file mode 100644 index 00000000..5fc98b82 --- /dev/null +++ b/ext/impl/psi_d_xtr_coo_from_dia.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_d_xtr_coo_from_dia(nr,nc,ia,ja,val,nz,nrd,ncd,data,offsets,info,rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_dpk_, dzero + + implicit none + + integer(psb_ipk_), intent(in) :: nr,nc, nrd,ncd, offsets(:) + integer(psb_ipk_), intent(inout) :: ia(:), ja(:),nz + real(psb_dpk_), intent(inout) :: val(:) + real(psb_dpk_), intent(in) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + integer(psb_ipk_) :: rdisp_, nrcmdisp, rdisp1 + integer(psb_ipk_) :: i,j,ir1, ir2, ir, ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_coo_from_dia',nr,nc,nrd,ncd, rdisp_ + nrcmdisp = min(nr-rdisp_,nc-rdisp_) + rdisp1 = 1-rdisp_ + nz = 0 + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + if (debug) write(0,*) ' Loop J',j,ir1,ir2, offsets(j) + do i=ir1,ir2 + ir = i + rdisp_ + ic = i + rdisp_ + offsets(j) + if (debug) write(0,*) ' Loop I',i,ir,ic + nz = nz + 1 + ia(nz) = ir + ja(nz) = ic + val(nz) = data(i,j) + enddo + end do + +end subroutine psi_d_xtr_coo_from_dia + diff --git a/ext/impl/psi_d_xtr_dia_from_coo.f90 b/ext/impl/psi_d_xtr_dia_from_coo.f90 new file mode 100644 index 00000000..cd95b64e --- /dev/null +++ b/ext/impl/psi_d_xtr_dia_from_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_d_xtr_dia_from_coo(nr,nc,nz,ia,ja,val,d,nrd,ncd,data,info,& + & initdata, rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_dpk_, dzero + + implicit none + integer(psb_ipk_), intent(in) :: nr, nc, nz, nrd,ncd,ia(:), ja(:), d(:) + real(psb_dpk_), intent(in) :: val(:) + real(psb_dpk_), intent(out) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initdata + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + logical :: initdata_ + integer(psb_ipk_) :: rdisp_ + integer(psb_ipk_) :: i,ir,ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + initdata_ = .true. + if (present(initdata)) initdata_ = initdata + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_dia_from_coo',nr,nc,nz,nrd,ncd,initdata_, rdisp_ + + if (initdata_) data(1:nrd,1:ncd) = dzero + + do i=1,nz + ir = ia(i) + k = ja(i) - ir + ic = d(nr+k) + if (debug) write(0,*) 'loop xtr_dia_from_coo :',ia(i),ja(i),k,ir-rdisp_,ic + data(ir-rdisp_,ic) = val(i) + enddo + + +end subroutine psi_d_xtr_dia_from_coo diff --git a/ext/impl/psi_d_xtr_ell_from_coo.f90 b/ext/impl/psi_d_xtr_ell_from_coo.f90 new file mode 100644 index 00000000..ec520797 --- /dev/null +++ b/ext/impl/psi_d_xtr_ell_from_coo.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_d_xtr_ell_from_coo(i,nr,mxrwl,iac,jac,valc, & + & ja,val,irn,diag,ld) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_dpk_, dzero + + implicit none + integer(psb_ipk_) :: i,nr,mxrwl,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*),diag(*) + real(psb_dpk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc, ir, ic + kc = 1 + do ii = 1, nr + nc = irn(ii) + do jj=1,nc + !if (iac(kc) /= i+ii-1) write(0,*) 'Copy mismatch',iac(kc),i,ii,i+ii-1 + ir = iac(kc) + ic = jac(kc) + if (ir == ic) diag(ii) = jj + ja(ii,jj) = ic + val(ii,jj) = valc(kc) + kc = kc + 1 + end do + ! We are assuming that jac contains at least one valid entry + ! If the previous loop did not have any entries, pick one valid + ! value. + if (nc == 0) ic = jac(1) + do jj = nc+1,mxrwl + ja(ii,jj) = ic + val(ii,jj) = dzero + end do + end do +end subroutine psi_d_xtr_ell_from_coo + diff --git a/ext/impl/psi_s_convert_dia_from_coo.f90 b/ext/impl/psi_s_convert_dia_from_coo.f90 new file mode 100644 index 00000000..d2f58778 --- /dev/null +++ b/ext/impl/psi_s_convert_dia_from_coo.f90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_s_convert_dia_from_coo(a,tmp,info) + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psi_s_convert_dia_from_coo + use psi_ext_util_mod + implicit none + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,nd + integer(psb_ipk_),allocatable :: d(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza,ir,ic + + info = psb_success_ + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = tmp%psb_s_base_sparse_mat + + ndiag = nr+nc-1 + allocate(d(ndiag),stat=info) + if (info /= 0) return + call psb_realloc(ndiag,a%offset,info) + if (info /= 0) return + + call psi_dia_offset_from_coo(nr,nc,nza,tmp%ia,tmp%ja, & + & nd,d,a%offset,info,initd=.true.,cleard=.false.) + + call psb_realloc(nd,a%offset,info) + if (info /= 0) return + call psb_realloc(nr,nd,a%data,info) + if (info /= 0) return + a%nzeros = nza + + call psi_xtr_dia_from_coo(nr,nc,nza,tmp%ia,tmp%ja,tmp%val,& + & d,nr,nd,a%data,info,initdata=.true.) + + deallocate(d,stat=info) + if (info /= 0) return + +end subroutine psi_s_convert_dia_from_coo diff --git a/ext/impl/psi_s_convert_ell_from_coo.f90 b/ext/impl/psi_s_convert_ell_from_coo.f90 new file mode 100644 index 00000000..ecdd9b1e --- /dev/null +++ b/ext/impl/psi_s_convert_ell_from_coo.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_s_convert_ell_from_coo(a,tmp,info,hacksize) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psi_s_convert_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, hsz_, ldv + + info = psb_success_ + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = tmp%psb_s_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= psb_success_) return + a%irn = 0 + do i=1, nza + ir = tmp%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + end do + ! Allocate and extract. + call psb_realloc(nr,a%idiag,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%val,info) + if (info /= psb_success_) return + + call psi_s_xtr_ell_from_coo(ione,nr,nzm,tmp%ia,tmp%ja,tmp%val,& + & a%ja,a%val,a%irn,a%idiag,ldv) + +end subroutine psi_s_convert_ell_from_coo + diff --git a/ext/impl/psi_s_convert_hll_from_coo.f90 b/ext/impl/psi_s_convert_hll_from_coo.f90 new file mode 100644 index 00000000..dcf6c4e2 --- /dev/null +++ b/ext/impl/psi_s_convert_hll_from_coo.f90 @@ -0,0 +1,122 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_s_convert_hll_from_coo(a,hksz,tmp,info) + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psi_s_convert_hll_from_coo + use psi_ext_util_mod + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + + + if (.not.tmp%is_by_rows()) then + info = -98765 + return + end if + + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = tmp%psb_s_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= 0) return + a%irn = 0 + do i=1, nza + a%irn(tmp%ia(i)) = a%irn(tmp%ia(i)) + 1 + end do + + a%nzt = nza + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + do k=1,ir-1 + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) + call psb_realloc(nr,a%idiag,info) + if (info == 0) call psb_realloc(isz,a%ja,info) + if (info == 0) call psb_realloc(isz,a%val,info) + if (info /= 0) return + ! Init last chunk of data + nzm = a%hkoffs(noffs+1)-a%hkoffs(noffs) + a%val(isz-(nzm-1):isz) = szero + a%ja(isz-(nzm-1):isz) = nr + ! + ! Now copy everything, noting the position of the diagonal. + ! + kc = 1 + k = 1 + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + mxrwl = isz/hksz + nza = sum(a%irn(i:i+ir-1)) + call psi_s_xtr_ell_from_coo(i,ir,mxrwl,tmp%ia(kc:kc+nza-1),& + & tmp%ja(kc:kc+nza-1),tmp%val(kc:kc+nza-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & a%idiag(i:i+ir-1),hksz) + k = k + isz + kc = kc + nza + + enddo + + ! Third copy the other stuff + if (info /= 0) return + call a%set_sorted(.true.) + +end subroutine psi_s_convert_hll_from_coo diff --git a/ext/impl/psi_s_xtr_coo_from_dia.f90 b/ext/impl/psi_s_xtr_coo_from_dia.f90 new file mode 100644 index 00000000..3745365b --- /dev/null +++ b/ext/impl/psi_s_xtr_coo_from_dia.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_s_xtr_coo_from_dia(nr,nc,ia,ja,val,nz,nrd,ncd,data,offsets,info,rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_spk_, szero + + implicit none + + integer(psb_ipk_), intent(in) :: nr,nc, nrd,ncd, offsets(:) + integer(psb_ipk_), intent(inout) :: ia(:), ja(:),nz + real(psb_spk_), intent(inout) :: val(:) + real(psb_spk_), intent(in) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + integer(psb_ipk_) :: rdisp_, nrcmdisp, rdisp1 + integer(psb_ipk_) :: i,j,ir1, ir2, ir, ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_coo_from_dia',nr,nc,nrd,ncd, rdisp_ + nrcmdisp = min(nr-rdisp_,nc-rdisp_) + rdisp1 = 1-rdisp_ + nz = 0 + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + if (debug) write(0,*) ' Loop J',j,ir1,ir2, offsets(j) + do i=ir1,ir2 + ir = i + rdisp_ + ic = i + rdisp_ + offsets(j) + if (debug) write(0,*) ' Loop I',i,ir,ic + nz = nz + 1 + ia(nz) = ir + ja(nz) = ic + val(nz) = data(i,j) + enddo + end do + +end subroutine psi_s_xtr_coo_from_dia + diff --git a/ext/impl/psi_s_xtr_dia_from_coo.f90 b/ext/impl/psi_s_xtr_dia_from_coo.f90 new file mode 100644 index 00000000..a8ee7c4b --- /dev/null +++ b/ext/impl/psi_s_xtr_dia_from_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_s_xtr_dia_from_coo(nr,nc,nz,ia,ja,val,d,nrd,ncd,data,info,& + & initdata, rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_spk_, szero + + implicit none + integer(psb_ipk_), intent(in) :: nr, nc, nz, nrd,ncd,ia(:), ja(:), d(:) + real(psb_spk_), intent(in) :: val(:) + real(psb_spk_), intent(out) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initdata + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + logical :: initdata_ + integer(psb_ipk_) :: rdisp_ + integer(psb_ipk_) :: i,ir,ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + initdata_ = .true. + if (present(initdata)) initdata_ = initdata + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_dia_from_coo',nr,nc,nz,nrd,ncd,initdata_, rdisp_ + + if (initdata_) data(1:nrd,1:ncd) = szero + + do i=1,nz + ir = ia(i) + k = ja(i) - ir + ic = d(nr+k) + if (debug) write(0,*) 'loop xtr_dia_from_coo :',ia(i),ja(i),k,ir-rdisp_,ic + data(ir-rdisp_,ic) = val(i) + enddo + + +end subroutine psi_s_xtr_dia_from_coo diff --git a/ext/impl/psi_s_xtr_ell_from_coo.f90 b/ext/impl/psi_s_xtr_ell_from_coo.f90 new file mode 100644 index 00000000..0bac2ec0 --- /dev/null +++ b/ext/impl/psi_s_xtr_ell_from_coo.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_s_xtr_ell_from_coo(i,nr,mxrwl,iac,jac,valc, & + & ja,val,irn,diag,ld) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_spk_, szero + + implicit none + integer(psb_ipk_) :: i,nr,mxrwl,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*),diag(*) + real(psb_spk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc, ir, ic + kc = 1 + do ii = 1, nr + nc = irn(ii) + do jj=1,nc + !if (iac(kc) /= i+ii-1) write(0,*) 'Copy mismatch',iac(kc),i,ii,i+ii-1 + ir = iac(kc) + ic = jac(kc) + if (ir == ic) diag(ii) = jj + ja(ii,jj) = ic + val(ii,jj) = valc(kc) + kc = kc + 1 + end do + ! We are assuming that jac contains at least one valid entry + ! If the previous loop did not have any entries, pick one valid + ! value. + if (nc == 0) ic = jac(1) + do jj = nc+1,mxrwl + ja(ii,jj) = ic + val(ii,jj) = szero + end do + end do +end subroutine psi_s_xtr_ell_from_coo + diff --git a/ext/impl/psi_z_convert_dia_from_coo.f90 b/ext/impl/psi_z_convert_dia_from_coo.f90 new file mode 100644 index 00000000..ddc9d2fd --- /dev/null +++ b/ext/impl/psi_z_convert_dia_from_coo.f90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_z_convert_dia_from_coo(a,tmp,info) + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psi_z_convert_dia_from_coo + use psi_ext_util_mod + implicit none + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,nd + integer(psb_ipk_),allocatable :: d(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza,ir,ic + + info = psb_success_ + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = tmp%psb_z_base_sparse_mat + + ndiag = nr+nc-1 + allocate(d(ndiag),stat=info) + if (info /= 0) return + call psb_realloc(ndiag,a%offset,info) + if (info /= 0) return + + call psi_dia_offset_from_coo(nr,nc,nza,tmp%ia,tmp%ja, & + & nd,d,a%offset,info,initd=.true.,cleard=.false.) + + call psb_realloc(nd,a%offset,info) + if (info /= 0) return + call psb_realloc(nr,nd,a%data,info) + if (info /= 0) return + a%nzeros = nza + + call psi_xtr_dia_from_coo(nr,nc,nza,tmp%ia,tmp%ja,tmp%val,& + & d,nr,nd,a%data,info,initdata=.true.) + + deallocate(d,stat=info) + if (info /= 0) return + +end subroutine psi_z_convert_dia_from_coo diff --git a/ext/impl/psi_z_convert_ell_from_coo.f90 b/ext/impl/psi_z_convert_ell_from_coo.f90 new file mode 100644 index 00000000..3d37c11f --- /dev/null +++ b/ext/impl/psi_z_convert_ell_from_coo.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_z_convert_ell_from_coo(a,tmp,info,hacksize) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psi_z_convert_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, hsz_, ldv + + info = psb_success_ + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = tmp%psb_z_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= psb_success_) return + a%irn = 0 + do i=1, nza + ir = tmp%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + end do + ! Allocate and extract. + call psb_realloc(nr,a%idiag,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%val,info) + if (info /= psb_success_) return + + call psi_z_xtr_ell_from_coo(ione,nr,nzm,tmp%ia,tmp%ja,tmp%val,& + & a%ja,a%val,a%irn,a%idiag,ldv) + +end subroutine psi_z_convert_ell_from_coo + diff --git a/ext/impl/psi_z_convert_hll_from_coo.f90 b/ext/impl/psi_z_convert_hll_from_coo.f90 new file mode 100644 index 00000000..bc9fdde1 --- /dev/null +++ b/ext/impl/psi_z_convert_hll_from_coo.f90 @@ -0,0 +1,122 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_z_convert_hll_from_coo(a,hksz,tmp,info) + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psi_z_convert_hll_from_coo + use psi_ext_util_mod + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + + + if (.not.tmp%is_by_rows()) then + info = -98765 + return + end if + + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = tmp%psb_z_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= 0) return + a%irn = 0 + do i=1, nza + a%irn(tmp%ia(i)) = a%irn(tmp%ia(i)) + 1 + end do + + a%nzt = nza + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + do k=1,ir-1 + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) + call psb_realloc(nr,a%idiag,info) + if (info == 0) call psb_realloc(isz,a%ja,info) + if (info == 0) call psb_realloc(isz,a%val,info) + if (info /= 0) return + ! Init last chunk of data + nzm = a%hkoffs(noffs+1)-a%hkoffs(noffs) + a%val(isz-(nzm-1):isz) = zzero + a%ja(isz-(nzm-1):isz) = nr + ! + ! Now copy everything, noting the position of the diagonal. + ! + kc = 1 + k = 1 + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + mxrwl = isz/hksz + nza = sum(a%irn(i:i+ir-1)) + call psi_z_xtr_ell_from_coo(i,ir,mxrwl,tmp%ia(kc:kc+nza-1),& + & tmp%ja(kc:kc+nza-1),tmp%val(kc:kc+nza-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & a%idiag(i:i+ir-1),hksz) + k = k + isz + kc = kc + nza + + enddo + + ! Third copy the other stuff + if (info /= 0) return + call a%set_sorted(.true.) + +end subroutine psi_z_convert_hll_from_coo diff --git a/ext/impl/psi_z_xtr_coo_from_dia.f90 b/ext/impl/psi_z_xtr_coo_from_dia.f90 new file mode 100644 index 00000000..70d0938f --- /dev/null +++ b/ext/impl/psi_z_xtr_coo_from_dia.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_z_xtr_coo_from_dia(nr,nc,ia,ja,val,nz,nrd,ncd,data,offsets,info,rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_dpk_, zzero + + implicit none + + integer(psb_ipk_), intent(in) :: nr,nc, nrd,ncd, offsets(:) + integer(psb_ipk_), intent(inout) :: ia(:), ja(:),nz + complex(psb_dpk_), intent(inout) :: val(:) + complex(psb_dpk_), intent(in) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + integer(psb_ipk_) :: rdisp_, nrcmdisp, rdisp1 + integer(psb_ipk_) :: i,j,ir1, ir2, ir, ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_coo_from_dia',nr,nc,nrd,ncd, rdisp_ + nrcmdisp = min(nr-rdisp_,nc-rdisp_) + rdisp1 = 1-rdisp_ + nz = 0 + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + if (debug) write(0,*) ' Loop J',j,ir1,ir2, offsets(j) + do i=ir1,ir2 + ir = i + rdisp_ + ic = i + rdisp_ + offsets(j) + if (debug) write(0,*) ' Loop I',i,ir,ic + nz = nz + 1 + ia(nz) = ir + ja(nz) = ic + val(nz) = data(i,j) + enddo + end do + +end subroutine psi_z_xtr_coo_from_dia + diff --git a/ext/impl/psi_z_xtr_dia_from_coo.f90 b/ext/impl/psi_z_xtr_dia_from_coo.f90 new file mode 100644 index 00000000..6b2542c6 --- /dev/null +++ b/ext/impl/psi_z_xtr_dia_from_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_z_xtr_dia_from_coo(nr,nc,nz,ia,ja,val,d,nrd,ncd,data,info,& + & initdata, rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_dpk_, zzero + + implicit none + integer(psb_ipk_), intent(in) :: nr, nc, nz, nrd,ncd,ia(:), ja(:), d(:) + complex(psb_dpk_), intent(in) :: val(:) + complex(psb_dpk_), intent(out) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initdata + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + logical :: initdata_ + integer(psb_ipk_) :: rdisp_ + integer(psb_ipk_) :: i,ir,ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + initdata_ = .true. + if (present(initdata)) initdata_ = initdata + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_dia_from_coo',nr,nc,nz,nrd,ncd,initdata_, rdisp_ + + if (initdata_) data(1:nrd,1:ncd) = zzero + + do i=1,nz + ir = ia(i) + k = ja(i) - ir + ic = d(nr+k) + if (debug) write(0,*) 'loop xtr_dia_from_coo :',ia(i),ja(i),k,ir-rdisp_,ic + data(ir-rdisp_,ic) = val(i) + enddo + + +end subroutine psi_z_xtr_dia_from_coo diff --git a/ext/impl/psi_z_xtr_ell_from_coo.f90 b/ext/impl/psi_z_xtr_ell_from_coo.f90 new file mode 100644 index 00000000..7133f2ae --- /dev/null +++ b/ext/impl/psi_z_xtr_ell_from_coo.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_z_xtr_ell_from_coo(i,nr,mxrwl,iac,jac,valc, & + & ja,val,irn,diag,ld) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_dpk_, zzero + + implicit none + integer(psb_ipk_) :: i,nr,mxrwl,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*),diag(*) + complex(psb_dpk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc, ir, ic + kc = 1 + do ii = 1, nr + nc = irn(ii) + do jj=1,nc + !if (iac(kc) /= i+ii-1) write(0,*) 'Copy mismatch',iac(kc),i,ii,i+ii-1 + ir = iac(kc) + ic = jac(kc) + if (ir == ic) diag(ii) = jj + ja(ii,jj) = ic + val(ii,jj) = valc(kc) + kc = kc + 1 + end do + ! We are assuming that jac contains at least one valid entry + ! If the previous loop did not have any entries, pick one valid + ! value. + if (nc == 0) ic = jac(1) + do jj = nc+1,mxrwl + ja(ii,jj) = ic + val(ii,jj) = zzero + end do + end do +end subroutine psi_z_xtr_ell_from_coo + diff --git a/ext/psb_c_dia_mat_mod.f90 b/ext/psb_c_dia_mat_mod.f90 new file mode 100644 index 00000000..8311487b --- /dev/null +++ b/ext/psb_c_dia_mat_mod.f90 @@ -0,0 +1,513 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_dia_mat_mod + + use psb_c_base_mat_mod + + type, extends(psb_c_base_sparse_mat) :: psb_c_dia_sparse_mat + ! + ! DIA format, extended. + ! + + integer(psb_ipk_), allocatable :: offset(:) + integer(psb_ipk_) :: nzeros + complex(psb_spk_), allocatable :: data(:,:) + + contains + ! procedure, pass(a) :: get_size => c_dia_get_size + procedure, pass(a) :: get_nzeros => c_dia_get_nzeros + procedure, nopass :: get_fmt => c_dia_get_fmt + procedure, pass(a) :: sizeof => c_dia_sizeof + procedure, pass(a) :: csmm => psb_c_dia_csmm + procedure, pass(a) :: csmv => psb_c_dia_csmv + ! procedure, pass(a) :: inner_cssm => psb_c_dia_cssm + ! procedure, pass(a) :: inner_cssv => psb_c_dia_cssv + procedure, pass(a) :: scals => psb_c_dia_scals + procedure, pass(a) :: scalv => psb_c_dia_scal + procedure, pass(a) :: maxval => psb_c_dia_maxval + procedure, pass(a) :: rowsum => psb_c_dia_rowsum + procedure, pass(a) :: arwsum => psb_c_dia_arwsum + procedure, pass(a) :: colsum => psb_c_dia_colsum + procedure, pass(a) :: aclsum => psb_c_dia_aclsum + procedure, pass(a) :: reallocate_nz => psb_c_dia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_dia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_c_cp_dia_to_coo + procedure, pass(a) :: cp_from_coo => psb_c_cp_dia_from_coo + ! procedure, pass(a) :: mv_to_coo => psb_c_mv_dia_to_coo + procedure, pass(a) :: mv_from_coo => psb_c_mv_dia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_c_mv_dia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_c_mv_dia_from_fmt + ! procedure, pass(a) :: csput_a => psb_c_dia_csput_a + procedure, pass(a) :: get_diag => psb_c_dia_get_diag + procedure, pass(a) :: csgetptn => psb_c_dia_csgetptn + procedure, pass(a) :: csgetrow => psb_c_dia_csgetrow + ! procedure, pass(a) :: get_nz_row => c_dia_get_nz_row + procedure, pass(a) :: reinit => psb_c_dia_reinit + ! procedure, pass(a) :: trim => psb_c_dia_trim + procedure, pass(a) :: print => psb_c_dia_print + procedure, pass(a) :: free => c_dia_free + procedure, pass(a) :: mold => psb_c_dia_mold + + end type psb_c_dia_sparse_mat + + private :: c_dia_get_nzeros, c_dia_free, c_dia_get_fmt, & + & c_dia_sizeof !, c_dia_get_size, c_dia_get_nz_row + + interface + subroutine psb_c_dia_reallocate_nz(nz,a) + import :: psb_c_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_dia_sparse_mat), intent(inout) :: a + end subroutine psb_c_dia_reallocate_nz + end interface + + interface + subroutine psb_c_dia_reinit(a,clear) + import :: psb_c_dia_sparse_mat + class(psb_c_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_c_dia_reinit + end interface + + interface + subroutine psb_c_dia_trim(a) + import :: psb_c_dia_sparse_mat + class(psb_c_dia_sparse_mat), intent(inout) :: a + end subroutine psb_c_dia_trim + end interface + + interface + subroutine psb_c_dia_mold(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dia_mold + end interface + + interface + subroutine psb_c_dia_allocate_mnnz(m,n,a,nz) + import :: psb_c_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_dia_allocate_mnnz + end interface + + interface + subroutine psb_c_dia_print(iout,a,iv,head,ivr,ivc) + import :: psb_c_dia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_c_dia_print + end interface + + interface + subroutine psb_c_cp_dia_to_coo(a,b,info) + import :: psb_c_coo_sparse_mat, psb_c_dia_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dia_to_coo + end interface + + interface + subroutine psb_c_cp_dia_from_coo(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dia_from_coo + end interface + + interface + subroutine psb_c_cp_dia_to_fmt(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dia_to_fmt + end interface + + interface + subroutine psb_c_cp_dia_from_fmt(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dia_from_fmt + end interface + + interface + subroutine psb_c_mv_dia_to_coo(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dia_to_coo + end interface + + interface + subroutine psb_c_mv_dia_from_coo(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dia_from_coo + end interface + + interface + subroutine psb_c_mv_dia_to_fmt(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dia_to_fmt + end interface + + interface + subroutine psb_c_mv_dia_from_fmt(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dia_from_fmt + end interface + + interface + subroutine psb_c_dia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dia_csput_a + end interface + + interface + subroutine psb_c_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_c_dia_csgetptn + end interface + + interface + subroutine psb_c_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_c_dia_csgetrow + end interface + + interface + subroutine psb_c_dia_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_c_dia_csgetblk + end interface + + interface + subroutine psb_c_dia_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dia_cssv + subroutine psb_c_dia_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dia_cssm + end interface + + interface + subroutine psb_c_dia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dia_csmv + subroutine psb_c_dia_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dia_csmm + end interface + + + interface + function psb_c_dia_maxval(a) result(res) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_dia_maxval + end interface + + interface + function psb_c_dia_csnmi(a) result(res) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_dia_csnmi + end interface + + interface + function psb_c_dia_csnm1(a) result(res) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_dia_csnm1 + end interface + + interface + subroutine psb_c_dia_rowsum(d,a) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_dia_rowsum + end interface + + interface + subroutine psb_c_dia_arwsum(d,a) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_dia_arwsum + end interface + + interface + subroutine psb_c_dia_colsum(d,a) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_dia_colsum + end interface + + interface + subroutine psb_c_dia_aclsum(d,a) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_dia_aclsum + end interface + + interface + subroutine psb_c_dia_get_diag(a,d,info) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dia_get_diag + end interface + + interface + subroutine psb_c_dia_scal(d,a,info,side) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_dia_scal + end interface + + interface + subroutine psb_c_dia_scals(d,a,info) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dia_scals + end interface + + interface psi_convert_dia_from_coo + subroutine psi_c_convert_dia_from_coo(a,tmp,info) + import :: psb_c_dia_sparse_mat, psb_ipk_, psb_c_coo_sparse_mat + implicit none + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_c_convert_dia_from_coo + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_dia_sizeof(a) result(res) + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + end function c_dia_sizeof + + function c_dia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIA' + end function c_dia_get_fmt + + function c_dia_get_nzeros(a) result(res) + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function c_dia_get_nzeros + + ! function c_dia_get_size(a) result(res) + ! implicit none + ! class(psb_c_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function c_dia_get_size + + + ! function c_dia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_c_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function c_dia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine c_dia_free(a) + implicit none + + class(psb_c_dia_sparse_mat), intent(inout) :: a + + if (allocated(a%data)) deallocate(a%data) + if (allocated(a%offset)) deallocate(a%offset) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine c_dia_free + + +end module psb_c_dia_mat_mod diff --git a/ext/psb_c_dns_mat_mod.f90 b/ext/psb_c_dns_mat_mod.f90 new file mode 100644 index 00000000..5e5a191d --- /dev/null +++ b/ext/psb_c_dns_mat_mod.f90 @@ -0,0 +1,467 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +module psb_c_dns_mat_mod + + use psb_c_base_mat_mod + + type, extends(psb_c_base_sparse_mat) :: psb_c_dns_sparse_mat + ! + ! DNS format: a very simple dense matrix storage + ! psb_spk_ : kind for double precision reals + ! psb_ipk_: kind for normal integers. + ! psb_sizeof_dp: variable holding size in bytes of + ! a double + ! psb_sizeof_ip: size in bytes of an integer + ! + ! psb_realloc(n,v,info) Reallocate: does what it says + ! psb_realloc(m,n,a,info) on rank 1 and 2 arrays, may start + ! from unallocated + ! + ! + integer(psb_ipk_) :: nnz + complex(psb_spk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: get_size => c_dns_get_size + procedure, pass(a) :: get_nzeros => c_dns_get_nzeros + procedure, nopass :: get_fmt => c_dns_get_fmt + procedure, pass(a) :: sizeof => c_dns_sizeof + procedure, pass(a) :: csmv => psb_c_dns_csmv + procedure, pass(a) :: csmm => psb_c_dns_csmm + procedure, pass(a) :: csnmi => psb_c_dns_csnmi + procedure, pass(a) :: reallocate_nz => psb_c_dns_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_dns_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_c_cp_dns_to_coo + procedure, pass(a) :: cp_from_coo => psb_c_cp_dns_from_coo + procedure, pass(a) :: mv_to_coo => psb_c_mv_dns_to_coo + procedure, pass(a) :: mv_from_coo => psb_c_mv_dns_from_coo + procedure, pass(a) :: get_diag => psb_c_dns_get_diag + procedure, pass(a) :: csgetrow => psb_c_dns_csgetrow + procedure, pass(a) :: get_nz_row => c_dns_get_nz_row + procedure, pass(a) :: trim => psb_c_dns_trim + procedure, pass(a) :: free => c_dns_free + procedure, pass(a) :: mold => psb_c_dns_mold + + end type psb_c_dns_sparse_mat + + private :: c_dns_get_nzeros, c_dns_free, c_dns_get_fmt, & + & c_dns_get_size, c_dns_sizeof, c_dns_get_nz_row + + ! + ! + !> Function reallocate_nz + !! \memberof psb_c_dns_sparse_mat + !! \brief One--parameters version of (re)allocate + !! + !! \param nz number of nonzeros to allocate for + !! i.e. makes sure that the internal storage + !! allows for NZ coefficients and their indices. + ! + interface + subroutine psb_c_dns_reallocate_nz(nz,a) + import :: psb_c_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_dns_sparse_mat), intent(inout) :: a + end subroutine psb_c_dns_reallocate_nz + end interface + + !> Function trim + !! \memberof psb_c_dns_sparse_mat + !! \brief Memory trim + !! Make sure the memory allocation of the sparse matrix is as tight as + !! possible given the actual number of nonzeros it contains. + ! + interface + subroutine psb_c_dns_trim(a) + import :: psb_c_dns_sparse_mat + class(psb_c_dns_sparse_mat), intent(inout) :: a + end subroutine psb_c_dns_trim + end interface + + ! + !> Function mold: + !! \memberof psb_c_dns_sparse_mat + !! \brief Allocate a class(psb_c_dns_sparse_mat) with the + !! same dynamic type as the input. + !! This is equivalent to allocate( mold= ) and is provided + !! for those compilers not yet supporting mold. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_c_dns_mold(a,b,info) + import :: psb_c_dns_sparse_mat, psb_c_base_sparse_mat, psb_epk_, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dns_mold + end interface + + ! + ! + !> Function allocate_mnnz + !! \memberof psb_c_dns_sparse_mat + !! \brief Three-parameters version of allocate + !! + !! \param m number of rows + !! \param n number of cols + !! \param nz [estimated internally] number of nonzeros to allocate for + ! + interface + subroutine psb_c_dns_allocate_mnnz(m,n,a,nz) + import :: psb_c_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_dns_allocate_mnnz + end interface + + ! + !> Function cp_to_coo: + !! \memberof psb_c_dns_sparse_mat + !! \brief Copy and convert to psb_c_coo_sparse_mat + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_c_cp_dns_to_coo(a,b,info) + import :: psb_c_coo_sparse_mat, psb_c_dns_sparse_mat, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dns_to_coo + end interface + + ! + !> Function cp_from_coo: + !! \memberof psb_c_dns_sparse_mat + !! \brief Copy and convert from psb_c_coo_sparse_mat + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_c_cp_dns_from_coo(a,b,info) + import :: psb_c_dns_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dns_from_coo + end interface + + ! + !> Function mv_to_coo: + !! \memberof psb_c_dns_sparse_mat + !! \brief Convert to psb_c_coo_sparse_mat, freeing the source. + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_c_mv_dns_to_coo(a,b,info) + import :: psb_c_dns_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dns_to_coo + end interface + + ! + !> Function mv_from_coo: + !! \memberof psb_c_dns_sparse_mat + !! \brief Convert from psb_c_coo_sparse_mat, freeing the source. + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_c_mv_dns_from_coo(a,b,info) + import :: psb_c_dns_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dns_from_coo + end interface + + ! + ! + !> Function csgetrow: + !! \memberof psb_c_dns_sparse_mat + !! \brief Get a (subset of) row(s) + !! + !! getrow is the basic method by which the other (getblk, clip) can + !! be implemented. + !! + !! Returns the set + !! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) + !! each identifying the position of a nonzero in A + !! i.e. + !! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) + !! with IMIN<=IA(:)<=IMAX + !! with JMIN<=JA(:)<=JMAX + !! IA,JA are reallocated as necessary. + !! + !! \param imin the minimum row index we are interested in + !! \param imax the minimum row index we are interested in + !! \param nz the number of output coefficients + !! \param ia(:) the output row indices + !! \param ja(:) the output col indices + !! \param val(:) the output coefficients + !! \param info return code + !! \param jmin [1] minimum col index + !! \param jmax [a\%get_ncols()] maximum col index + !! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) + !! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] + !! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] + !! ( iren cannot be specified with rscale/cscale) + !! \param append [false] append to ia,ja + !! \param nzin [none] if append, then first new entry should go in entry nzin+1 + !! + ! + interface + subroutine psb_c_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_c_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_c_dns_csgetrow + end interface + + + + !> Function csmv: + !! \memberof psb_c_dns_sparse_mat + !! \brief Product by a dense rank 1 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_c_dns_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dns_csmv + end interface + + !> Function csmm: + !! \memberof psb_c_dns_sparse_mat + !! \brief Product by a dense rank 2 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:,:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:,:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_c_dns_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dns_csmm + end interface + + ! + ! + !> Function csnmi: + !! \memberof psb_c_dns_sparse_mat + !! \brief Operator infinity norm + !! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) + !! + ! + interface + function psb_c_dns_csnmi(a) result(res) + import :: psb_c_dns_sparse_mat, psb_spk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_dns_csnmi + end interface + + ! + !> Function get_diag: + !! \memberof psb_c_dns_sparse_mat + !! \brief Extract the diagonal of A. + !! + !! D(i) = A(i:i), i=1:min(nrows,ncols) + !! + !! \param d(:) The output diagonal + !! \param info return code. + ! + interface + subroutine psb_c_dns_get_diag(a,d,info) + import :: psb_c_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dns_get_diag + end interface + + +contains + + ! + !> Function sizeof + !! \memberof psb_c_dns_sparse_mat + !! \brief Memory occupation in bytes + ! + function c_dns_sizeof(a) result(res) + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip + + end function c_dns_sizeof + + ! + !> Function get_fmt + !! \memberof psb_c_dns_sparse_mat + !! \brief return a short descriptive name (e.g. COO CSR etc.) + ! + function c_dns_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNS' + end function c_dns_get_fmt + + ! + !> Function get_nzeros + !! \memberof psb_c_dns_sparse_mat + !! \brief Current number of nonzero entries + ! + function c_dns_get_nzeros(a) result(res) + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nnz + end function c_dns_get_nzeros + + ! + !> Function get_size + !! \memberof psb_c_dns_sparse_mat + !! \brief Maximum number of nonzeros the current structure can hold + ! this is fixed once you initialize the matrix, with dense storage + ! you can hold up to MxN entries + function c_dns_get_size(a) result(res) + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = size(a%val) + + end function c_dns_get_size + + + ! + !> Function get_nz_row. + !! \memberof psb_c_coo_sparse_mat + !! \brief How many nonzeros in a row? + !! + !! \param idx The row to search. + !! + ! + function c_dns_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_c_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = count(a%val(idx,:) /= dzero) + end if + + end function c_dns_get_nz_row + + ! + !> Function free + !! \memberof psb_c_dns_sparse_mat + !! Name says all + + subroutine c_dns_free(a) + implicit none + + class(psb_c_dns_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) deallocate(a%val) + a%nnz = 0 + + + ! + ! Mark the object as empty just in case + ! + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine c_dns_free + + +end module psb_c_dns_mat_mod diff --git a/ext/psb_c_ell_mat_mod.f90 b/ext/psb_c_ell_mat_mod.f90 new file mode 100644 index 00000000..8eaf01ba --- /dev/null +++ b/ext/psb_c_ell_mat_mod.f90 @@ -0,0 +1,544 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_ell_mat_mod + + use psb_c_base_mat_mod + + type, extends(psb_c_base_sparse_mat) :: psb_c_ell_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! Based on M. Heroux "A proposal for a sparse BLAS toolkit". + ! IRN is our addition, should help in transferring to/from + ! other formats (should come in handy for GPUs). + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! + + integer(psb_ipk_) :: nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:) + complex(psb_spk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: is_by_rows => c_ell_is_by_rows + procedure, pass(a) :: get_size => c_ell_get_size + procedure, pass(a) :: get_nzeros => c_ell_get_nzeros + procedure, nopass :: get_fmt => c_ell_get_fmt + procedure, pass(a) :: sizeof => c_ell_sizeof + procedure, pass(a) :: csmm => psb_c_ell_csmm + procedure, pass(a) :: csmv => psb_c_ell_csmv + procedure, pass(a) :: inner_cssm => psb_c_ell_cssm + procedure, pass(a) :: inner_cssv => psb_c_ell_cssv + procedure, pass(a) :: scals => psb_c_ell_scals + procedure, pass(a) :: scalv => psb_c_ell_scal + procedure, pass(a) :: maxval => psb_c_ell_maxval + procedure, pass(a) :: csnmi => psb_c_ell_csnmi + procedure, pass(a) :: csnm1 => psb_c_ell_csnm1 + procedure, pass(a) :: rowsum => psb_c_ell_rowsum + procedure, pass(a) :: arwsum => psb_c_ell_arwsum + procedure, pass(a) :: colsum => psb_c_ell_colsum + procedure, pass(a) :: aclsum => psb_c_ell_aclsum + procedure, pass(a) :: reallocate_nz => psb_c_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_ell_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_c_cp_ell_to_coo + procedure, pass(a) :: cp_from_coo => psb_c_cp_ell_from_coo + procedure, pass(a) :: cp_to_fmt => psb_c_cp_ell_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_c_cp_ell_from_fmt + procedure, pass(a) :: mv_to_coo => psb_c_mv_ell_to_coo + procedure, pass(a) :: mv_from_coo => psb_c_mv_ell_from_coo + procedure, pass(a) :: mv_to_fmt => psb_c_mv_ell_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_c_mv_ell_from_fmt + procedure, pass(a) :: csput_a => psb_c_ell_csput_a + procedure, pass(a) :: get_diag => psb_c_ell_get_diag + procedure, pass(a) :: csgetptn => psb_c_ell_csgetptn + procedure, pass(a) :: csgetrow => psb_c_ell_csgetrow + procedure, pass(a) :: get_nz_row => c_ell_get_nz_row + procedure, pass(a) :: reinit => psb_c_ell_reinit + procedure, pass(a) :: trim => psb_c_ell_trim + procedure, pass(a) :: print => psb_c_ell_print + procedure, pass(a) :: free => c_ell_free + procedure, pass(a) :: mold => psb_c_ell_mold + + end type psb_c_ell_sparse_mat + + private :: c_ell_get_nzeros, c_ell_free, c_ell_get_fmt, & + & c_ell_get_size, c_ell_sizeof, c_ell_get_nz_row, & + & c_ell_is_by_rows + + interface + subroutine psb_c_ell_reallocate_nz(nz,a) + import :: psb_c_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_ell_sparse_mat), intent(inout) :: a + end subroutine psb_c_ell_reallocate_nz + end interface + + interface + subroutine psb_c_ell_reinit(a,clear) + import :: psb_c_ell_sparse_mat + class(psb_c_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_c_ell_reinit + end interface + + interface + subroutine psb_c_ell_trim(a) + import :: psb_c_ell_sparse_mat + class(psb_c_ell_sparse_mat), intent(inout) :: a + end subroutine psb_c_ell_trim + end interface + + interface + subroutine psb_c_ell_mold(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_ell_mold + end interface + + interface + subroutine psb_c_ell_allocate_mnnz(m,n,a,nz) + import :: psb_c_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_ell_allocate_mnnz + end interface + + interface + subroutine psb_c_ell_print(iout,a,iv,head,ivr,ivc) + import :: psb_c_ell_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_c_ell_print + end interface + + interface + subroutine psb_c_cp_ell_to_coo(a,b,info) + import :: psb_c_coo_sparse_mat, psb_c_ell_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_ell_to_coo + end interface + + interface + subroutine psb_c_cp_ell_from_coo(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_ell_from_coo + end interface + + interface + subroutine psb_c_cp_ell_to_fmt(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_ell_to_fmt + end interface + + interface + subroutine psb_c_cp_ell_from_fmt(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_ell_from_fmt + end interface + + interface + subroutine psb_c_mv_ell_to_coo(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_ell_to_coo + end interface + + interface + subroutine psb_c_mv_ell_from_coo(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_ell_from_coo + end interface + + interface + subroutine psb_c_mv_ell_to_fmt(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_ell_to_fmt + end interface + + interface + subroutine psb_c_mv_ell_from_fmt(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_ell_from_fmt + end interface + + interface + subroutine psb_c_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_ell_csput_a + end interface + + interface + subroutine psb_c_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_c_ell_csgetptn + end interface + + interface + subroutine psb_c_ell_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_c_ell_csgetrow + end interface + + interface + subroutine psb_c_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_c_ell_csgetblk + end interface + + interface + subroutine psb_c_ell_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_ell_cssv + subroutine psb_c_ell_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_ell_cssm + end interface + + interface + subroutine psb_c_ell_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_ell_csmv + subroutine psb_c_ell_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_ell_csmm + end interface + + + interface + function psb_c_ell_maxval(a) result(res) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_ell_maxval + end interface + + interface + function psb_c_ell_csnmi(a) result(res) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_ell_csnmi + end interface + + interface + function psb_c_ell_csnm1(a) result(res) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_ell_csnm1 + end interface + + interface + subroutine psb_c_ell_rowsum(d,a) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_ell_rowsum + end interface + + interface + subroutine psb_c_ell_arwsum(d,a) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_ell_arwsum + end interface + + interface + subroutine psb_c_ell_colsum(d,a) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_ell_colsum + end interface + + interface + subroutine psb_c_ell_aclsum(d,a) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_ell_aclsum + end interface + + interface + subroutine psb_c_ell_get_diag(a,d,info) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_ell_get_diag + end interface + + interface + subroutine psb_c_ell_scal(d,a,info,side) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_ell_scal + end interface + + interface + subroutine psb_c_ell_scals(d,a,info) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_ell_scals + end interface + + interface + subroutine psi_c_convert_ell_from_coo(a,tmp,info,hacksize) + import :: psb_c_ell_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + implicit none + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + end subroutine psi_c_convert_ell_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_ell_is_by_rows(a) result(res) + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + logical :: res + res = .true. + end function c_ell_is_by_rows + + function c_ell_sizeof(a) result(res) + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + + end function c_ell_sizeof + + function c_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELL' + end function c_ell_get_fmt + + function c_ell_get_nzeros(a) result(res) + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function c_ell_get_nzeros + + function c_ell_get_size(a) result(res) + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = -1 + if (a%is_dev()) call a%sync() + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function c_ell_get_size + + + function c_ell_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + if (a%is_dev()) call a%sync() + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function c_ell_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_ell_free(a) + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine c_ell_free + + +end module psb_c_ell_mat_mod diff --git a/ext/psb_c_hdia_mat_mod.f90 b/ext/psb_c_hdia_mat_mod.f90 new file mode 100644 index 00000000..fbac05de --- /dev/null +++ b/ext/psb_c_hdia_mat_mod.f90 @@ -0,0 +1,534 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +module psb_c_hdia_mat_mod + + use psb_c_base_mat_mod + + + type, extends(psb_c_base_sparse_mat) :: psb_c_hdia_sparse_mat + ! + ! HDIA format + ! + integer(psb_ipk_), allocatable :: hackOffsets(:), diaOffsets(:) + complex(psb_spk_), allocatable :: val(:) + + + integer(psb_ipk_) :: nhacks, nzeros + integer(psb_ipk_) :: hacksize = 32 + integer(psb_epk_) :: dim=0 + + contains + ! procedure, pass(a) :: get_size => c_hdia_get_size + procedure, pass(a) :: get_nzeros => c_hdia_get_nzeros + procedure, pass(a) :: set_nzeros => c_hdia_set_nzeros + procedure, nopass :: get_fmt => c_hdia_get_fmt + procedure, pass(a) :: sizeof => c_hdia_sizeof + ! procedure, pass(a) :: csmm => psb_c_hdia_csmm + procedure, pass(a) :: csmv => psb_c_hdia_csmv + ! procedure, pass(a) :: inner_cssm => psb_c_hdia_cssm + ! procedure, pass(a) :: inner_cssv => psb_c_hdia_cssv + ! procedure, pass(a) :: scals => psb_c_hdia_scals + ! procedure, pass(a) :: scalv => psb_c_hdia_scal + ! procedure, pass(a) :: maxval => psb_c_hdia_maxval + ! procedure, pass(a) :: csnmi => psb_c_hdia_csnmi + ! procedure, pass(a) :: csnm1 => psb_c_hdia_csnm1 + ! procedure, pass(a) :: rowsum => psb_c_hdia_rowsum + ! procedure, pass(a) :: arwsum => psb_c_hdia_arwsum + ! procedure, pass(a) :: colsum => psb_c_hdia_colsum + ! procedure, pass(a) :: aclsum => psb_c_hdia_aclsum + ! procedure, pass(a) :: reallocate_nz => psb_c_hdia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_hdia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_c_cp_hdia_to_coo + procedure, pass(a) :: cp_from_coo => psb_c_cp_hdia_from_coo + ! procedure, pass(a) :: cp_to_fmt => psb_c_cp_hdia_to_fmt + ! procedure, pass(a) :: cp_from_fmt => psb_c_cp_hdia_from_fmt + procedure, pass(a) :: mv_to_coo => psb_c_mv_hdia_to_coo + procedure, pass(a) :: mv_from_coo => psb_c_mv_hdia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_c_mv_hdia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_c_mv_hdia_from_fmt + ! procedure, pass(a) :: csput_a => psb_c_hdia_csput_a + ! procedure, pass(a) :: get_diag => psb_c_hdia_get_diag + ! procedure, pass(a) :: csgetptn => psb_c_hdia_csgetptn + ! procedure, pass(a) :: csgetrow => psb_c_hdia_csgetrow + ! procedure, pass(a) :: get_nz_row => c_hdia_get_nz_row + ! procedure, pass(a) :: reinit => psb_c_hdia_reinit + ! procedure, pass(a) :: trim => psb_c_hdia_trim + procedure, pass(a) :: print => psb_c_hdia_print + procedure, pass(a) :: free => c_hdia_free + procedure, pass(a) :: mold => psb_c_hdia_mold + + end type psb_c_hdia_sparse_mat + + private :: c_hdia_get_nzeros, c_hdia_set_nzeros, c_hdia_free, & + & c_hdia_get_fmt, c_hdia_sizeof +!!$ & +!!$ & c_hdia_get_nz_row c_hdia_get_size, + +!!$ interface +!!$ subroutine psb_c_hdia_reallocate_nz(nz,a) +!!$ import :: psb_c_hdia_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_hdia_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_reinit(a,clear) +!!$ import :: psb_c_hdia_sparse_mat +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ logical, intent(in), optional :: clear +!!$ end subroutine psb_c_hdia_reinit +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_trim(a) +!!$ import :: psb_c_hdia_sparse_mat +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_hdia_trim +!!$ end interface + + interface + subroutine psb_c_hdia_mold(a,b,info) + import :: psb_c_hdia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hdia_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hdia_mold + end interface + + interface + subroutine psb_c_hdia_allocate_mnnz(m,n,a,nz) + import :: psb_c_hdia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_hdia_allocate_mnnz + end interface + + interface + subroutine psb_c_hdia_print(iout,a,iv,head,ivr,ivc) + import :: psb_c_hdia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_c_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_c_hdia_print + end interface + + interface + subroutine psb_c_cp_hdia_to_coo(a,b,info) + import :: psb_c_coo_sparse_mat, psb_c_hdia_sparse_mat, psb_ipk_ + class(psb_c_hdia_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hdia_to_coo + end interface + + interface + subroutine psb_c_cp_hdia_from_coo(a,b,info) + import :: psb_c_hdia_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_c_cp_hdia_to_fmt(a,b,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_c_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_cp_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_cp_hdia_from_fmt(a,b,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_c_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_cp_hdia_from_fmt +!!$ end interface + + interface + subroutine psb_c_mv_hdia_to_coo(a,b,info) + import :: psb_c_hdia_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hdia_to_coo + end interface + + interface + subroutine psb_c_mv_hdia_from_coo(a,b,info) + import :: psb_c_hdia_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_c_mv_hdia_to_fmt(a,b,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_c_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_mv_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_mv_hdia_from_fmt(a,b,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_c_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_mv_hdia_from_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: val(:) +!!$ integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& +!!$ & imin,imax,jmin,jmax +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_hdia_csput_a +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_csgetptn(imin,imax,a,nz,ia,ja,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_c_hdia_csgetptn +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ complex(psb_spk_), allocatable, intent(inout) :: val(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_c_hdia_csgetrow +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_csgetblk(imin,imax,a,b,info,& +!!$ & jmin,jmax,iren,append,rscale,cscale) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_c_coo_sparse_mat, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_c_coo_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_c_hdia_csgetblk +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_cssv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:) +!!$ complex(psb_spk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_hdia_cssv +!!$ subroutine psb_c_hdia_cssm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_hdia_cssm +!!$ end interface + + interface + subroutine psb_c_hdia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hdia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hdia_csmv +!!$ subroutine psb_c_hdia_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_hdia_csmm + end interface + + +!!$ interface +!!$ function psb_c_hdia_maxval(a) result(res) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_) :: res +!!$ end function psb_c_hdia_maxval +!!$ end interface +!!$ +!!$ interface +!!$ function psb_c_hdia_csnmi(a) result(res) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_) :: res +!!$ end function psb_c_hdia_csnmi +!!$ end interface +!!$ +!!$ interface +!!$ function psb_c_hdia_csnm1(a) result(res) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_) :: res +!!$ end function psb_c_hdia_csnm1 +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_rowsum(d,a) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_c_hdia_rowsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_arwsum(d,a) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_c_hdia_arwsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_colsum(d,a) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_c_hdia_colsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_aclsum(d,a) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_c_hdia_aclsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_get_diag(a,d,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(out) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_hdia_get_diag +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_scal(d,a,info,side) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_c_hdia_scal +!!$ end interface + +!!$ interface +!!$ subroutine psb_c_hdia_scals(d,a,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_hdia_scals +!!$ end interface +!!$ + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_hdia_sizeof(a) result(res) + use psb_realloc_mod, only : psb_size + implicit none + class(psb_c_hdia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + integer(psb_ipk_) :: i + + if (a%is_dev()) call a%sync() + res = 0 + + res = res + psb_size(a%hackOffsets)*psb_sizeof_ip + res = res + psb_size(a%diaOffsets)*psb_sizeof_ip + res = res + psb_size(a%val) * (2*psb_sizeof_sp) + + end function c_hdia_sizeof + + function c_hdia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIA' + end function c_hdia_get_fmt + + function c_hdia_get_nzeros(a) result(res) + implicit none + class(psb_c_hdia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function c_hdia_get_nzeros + + subroutine c_hdia_set_nzeros(a,nz) + implicit none + class(psb_c_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + a%nzeros = nz + end subroutine c_hdia_set_nzeros + + ! function c_hdia_get_size(a) result(res) + ! implicit none + ! class(psb_c_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function c_hdia_get_size + + + ! function c_hdia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_c_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function c_hdia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine c_hdia_free(a) + implicit none + + class(psb_c_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: i, info + + + if (allocated(a%hackOffsets))& + & deallocate(a%hackOffsets,stat=info) + if (allocated(a%diaOffsets))& + & deallocate(a%diaOffsets,stat=info) + if (allocated(a%val))& + & deallocate(a%val,stat=info) + a%nhacks=0 + + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine c_hdia_free + + +end module psb_c_hdia_mat_mod diff --git a/ext/psb_c_hll_mat_mod.f90 b/ext/psb_c_hll_mat_mod.f90 new file mode 100644 index 00000000..966b60f5 --- /dev/null +++ b/ext/psb_c_hll_mat_mod.f90 @@ -0,0 +1,564 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_hll_mat_mod + + use psb_c_base_mat_mod + use psi_ext_util_mod + + type, extends(psb_c_base_sparse_mat) :: psb_c_hll_sparse_mat + ! + ! HLL format. (Hacked ELL) + ! A modification of ELL. + ! Basic idea: pack and pad data in blocks of HCK rows; + ! this reduces the impact of a lone, very long row. + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! 6. hksz: hack size (multiple of 32) + ! 7. hkoffs(:): offsets of the starts of hacks inside ja/val + ! + ! + ! + integer(psb_ipk_) :: hksz, nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:) + complex(psb_spk_), allocatable :: val(:) + + contains + + procedure, pass(a) :: get_hksz => c_hll_get_hksz + procedure, pass(a) :: set_hksz => c_hll_set_hksz + procedure, pass(a) :: get_size => c_hll_get_size + procedure, pass(a) :: set_nzeros => c_hll_set_nzeros + procedure, pass(a) :: get_nzeros => c_hll_get_nzeros + procedure, nopass :: get_fmt => c_hll_get_fmt + procedure, pass(a) :: sizeof => c_hll_sizeof + procedure, pass(a) :: csmm => psb_c_hll_csmm + procedure, pass(a) :: csmv => psb_c_hll_csmv + procedure, pass(a) :: inner_cssm => psb_c_hll_cssm + procedure, pass(a) :: inner_cssv => psb_c_hll_cssv + procedure, pass(a) :: scals => psb_c_hll_scals + procedure, pass(a) :: scalv => psb_c_hll_scal + procedure, pass(a) :: maxval => psb_c_hll_maxval + procedure, pass(a) :: csnmi => psb_c_hll_csnmi + procedure, pass(a) :: csnm1 => psb_c_hll_csnm1 + procedure, pass(a) :: rowsum => psb_c_hll_rowsum + procedure, pass(a) :: arwsum => psb_c_hll_arwsum + procedure, pass(a) :: colsum => psb_c_hll_colsum + procedure, pass(a) :: aclsum => psb_c_hll_aclsum + procedure, pass(a) :: reallocate_nz => psb_c_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_hll_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_c_cp_hll_to_coo + procedure, pass(a) :: cp_from_coo => psb_c_cp_hll_from_coo + procedure, pass(a) :: cp_to_fmt => psb_c_cp_hll_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_c_cp_hll_from_fmt + procedure, pass(a) :: mv_to_coo => psb_c_mv_hll_to_coo + procedure, pass(a) :: mv_from_coo => psb_c_mv_hll_from_coo + procedure, pass(a) :: mv_to_fmt => psb_c_mv_hll_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_c_mv_hll_from_fmt + procedure, pass(a) :: csput_a => psb_c_hll_csput_a + procedure, pass(a) :: get_diag => psb_c_hll_get_diag + procedure, pass(a) :: csgetptn => psb_c_hll_csgetptn + procedure, pass(a) :: csgetrow => psb_c_hll_csgetrow + procedure, pass(a) :: get_nz_row => c_hll_get_nz_row + procedure, pass(a) :: reinit => psb_c_hll_reinit + procedure, pass(a) :: print => psb_c_hll_print + procedure, pass(a) :: free => c_hll_free + procedure, pass(a) :: mold => psb_c_hll_mold + + end type psb_c_hll_sparse_mat + + private :: c_hll_get_nzeros, c_hll_free, c_hll_get_fmt, & + & c_hll_get_size, c_hll_sizeof, c_hll_get_nz_row, & + & c_hll_set_nzeros, c_hll_get_hksz, c_hll_set_hksz + + interface + subroutine psb_c_hll_reallocate_nz(nz,a) + import :: psb_c_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_hll_sparse_mat), intent(inout) :: a + end subroutine psb_c_hll_reallocate_nz + end interface + + interface + subroutine psb_c_hll_reinit(a,clear) + import :: psb_c_hll_sparse_mat + class(psb_c_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_c_hll_reinit + end interface + + interface + subroutine psb_c_hll_mold(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hll_mold + end interface + + interface + subroutine psb_c_hll_allocate_mnnz(m,n,a,nz) + import :: psb_c_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_hll_allocate_mnnz + end interface + + interface + subroutine psb_c_hll_print(iout,a,iv,head,ivr,ivc) + import :: psb_c_hll_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_c_hll_print + end interface + + interface + subroutine psb_c_cp_hll_to_coo(a,b,info) + import :: psb_c_coo_sparse_mat, psb_c_hll_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hll_to_coo + end interface + + interface + subroutine psb_c_cp_hll_from_coo(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hll_from_coo + end interface + + interface + subroutine psb_c_cp_hll_to_fmt(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hll_to_fmt + end interface + + interface + subroutine psb_c_cp_hll_from_fmt(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hll_from_fmt + end interface + + interface + subroutine psb_c_mv_hll_to_coo(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hll_to_coo + end interface + + interface + subroutine psb_c_mv_hll_from_coo(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hll_from_coo + end interface + + interface + subroutine psb_c_mv_hll_to_fmt(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hll_to_fmt + end interface + + interface + subroutine psb_c_mv_hll_from_fmt(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hll_from_fmt + end interface + + interface + subroutine psb_c_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hll_csput_a + end interface + + interface + subroutine psb_c_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_c_hll_csgetptn + end interface + + interface + subroutine psb_c_hll_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_c_hll_csgetrow + end interface + + interface + subroutine psb_c_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_c_hll_csgetblk + end interface + + interface + subroutine psb_c_hll_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hll_cssv + subroutine psb_c_hll_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hll_cssm + end interface + + interface + subroutine psb_c_hll_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hll_csmv + subroutine psb_c_hll_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hll_csmm + end interface + + + interface + function psb_c_hll_maxval(a) result(res) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_hll_maxval + end interface + + interface + function psb_c_hll_csnmi(a) result(res) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_hll_csnmi + end interface + + interface + function psb_c_hll_csnm1(a) result(res) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_hll_csnm1 + end interface + + interface + subroutine psb_c_hll_rowsum(d,a) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_hll_rowsum + end interface + + interface + subroutine psb_c_hll_arwsum(d,a) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_hll_arwsum + end interface + + interface + subroutine psb_c_hll_colsum(d,a) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_hll_colsum + end interface + + interface + subroutine psb_c_hll_aclsum(d,a) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_hll_aclsum + end interface + + interface + subroutine psb_c_hll_get_diag(a,d,info) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hll_get_diag + end interface + + interface + subroutine psb_c_hll_scal(d,a,info,side) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_hll_scal + end interface + + interface + subroutine psb_c_hll_scals(d,a,info) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hll_scals + end interface + + interface psi_convert_hll_from_coo + subroutine psi_c_convert_hll_from_coo(a,hksz,tmp,info) + import :: psb_c_hll_sparse_mat, psb_ipk_, psb_c_coo_sparse_mat + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: hksz + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_c_convert_hll_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_hll_sizeof(a) result(res) + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%hkoffs) + + end function c_hll_sizeof + + function c_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL' + end function c_hll_get_fmt + + subroutine c_hll_set_nzeros(a,n) + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%nzt = n + end subroutine c_hll_set_nzeros + + function c_hll_get_nzeros(a) result(res) + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function c_hll_get_nzeros + + function c_hll_get_size(a) result(res) + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + if (a%is_dev()) call a%sync() + + res = -1 + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function c_hll_get_size + + + + function c_hll_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function c_hll_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_hll_free(a) + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + if (allocated(a%val)) deallocate(a%hkoffs) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + call a%set_hksz(izero) + + return + + end subroutine c_hll_free + + subroutine c_hll_set_hksz(a,n) + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%hksz = n + end subroutine c_hll_set_hksz + + function c_hll_get_hksz(a) result(res) + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = a%hksz + + end function c_hll_get_hksz + +end module psb_c_hll_mat_mod diff --git a/ext/psb_d_dia_mat_mod.f90 b/ext/psb_d_dia_mat_mod.f90 new file mode 100644 index 00000000..7df615ac --- /dev/null +++ b/ext/psb_d_dia_mat_mod.f90 @@ -0,0 +1,513 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_dia_mat_mod + + use psb_d_base_mat_mod + + type, extends(psb_d_base_sparse_mat) :: psb_d_dia_sparse_mat + ! + ! DIA format, extended. + ! + + integer(psb_ipk_), allocatable :: offset(:) + integer(psb_ipk_) :: nzeros + real(psb_dpk_), allocatable :: data(:,:) + + contains + ! procedure, pass(a) :: get_size => d_dia_get_size + procedure, pass(a) :: get_nzeros => d_dia_get_nzeros + procedure, nopass :: get_fmt => d_dia_get_fmt + procedure, pass(a) :: sizeof => d_dia_sizeof + procedure, pass(a) :: csmm => psb_d_dia_csmm + procedure, pass(a) :: csmv => psb_d_dia_csmv + ! procedure, pass(a) :: inner_cssm => psb_d_dia_cssm + ! procedure, pass(a) :: inner_cssv => psb_d_dia_cssv + procedure, pass(a) :: scals => psb_d_dia_scals + procedure, pass(a) :: scalv => psb_d_dia_scal + procedure, pass(a) :: maxval => psb_d_dia_maxval + procedure, pass(a) :: rowsum => psb_d_dia_rowsum + procedure, pass(a) :: arwsum => psb_d_dia_arwsum + procedure, pass(a) :: colsum => psb_d_dia_colsum + procedure, pass(a) :: aclsum => psb_d_dia_aclsum + procedure, pass(a) :: reallocate_nz => psb_d_dia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_dia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_d_cp_dia_to_coo + procedure, pass(a) :: cp_from_coo => psb_d_cp_dia_from_coo + ! procedure, pass(a) :: mv_to_coo => psb_d_mv_dia_to_coo + procedure, pass(a) :: mv_from_coo => psb_d_mv_dia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_d_mv_dia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_d_mv_dia_from_fmt + ! procedure, pass(a) :: csput_a => psb_d_dia_csput_a + procedure, pass(a) :: get_diag => psb_d_dia_get_diag + procedure, pass(a) :: csgetptn => psb_d_dia_csgetptn + procedure, pass(a) :: csgetrow => psb_d_dia_csgetrow + ! procedure, pass(a) :: get_nz_row => d_dia_get_nz_row + procedure, pass(a) :: reinit => psb_d_dia_reinit + ! procedure, pass(a) :: trim => psb_d_dia_trim + procedure, pass(a) :: print => psb_d_dia_print + procedure, pass(a) :: free => d_dia_free + procedure, pass(a) :: mold => psb_d_dia_mold + + end type psb_d_dia_sparse_mat + + private :: d_dia_get_nzeros, d_dia_free, d_dia_get_fmt, & + & d_dia_sizeof !, d_dia_get_size, d_dia_get_nz_row + + interface + subroutine psb_d_dia_reallocate_nz(nz,a) + import :: psb_d_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_dia_sparse_mat), intent(inout) :: a + end subroutine psb_d_dia_reallocate_nz + end interface + + interface + subroutine psb_d_dia_reinit(a,clear) + import :: psb_d_dia_sparse_mat + class(psb_d_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_d_dia_reinit + end interface + + interface + subroutine psb_d_dia_trim(a) + import :: psb_d_dia_sparse_mat + class(psb_d_dia_sparse_mat), intent(inout) :: a + end subroutine psb_d_dia_trim + end interface + + interface + subroutine psb_d_dia_mold(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dia_mold + end interface + + interface + subroutine psb_d_dia_allocate_mnnz(m,n,a,nz) + import :: psb_d_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_dia_allocate_mnnz + end interface + + interface + subroutine psb_d_dia_print(iout,a,iv,head,ivr,ivc) + import :: psb_d_dia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_d_dia_print + end interface + + interface + subroutine psb_d_cp_dia_to_coo(a,b,info) + import :: psb_d_coo_sparse_mat, psb_d_dia_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dia_to_coo + end interface + + interface + subroutine psb_d_cp_dia_from_coo(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dia_from_coo + end interface + + interface + subroutine psb_d_cp_dia_to_fmt(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dia_to_fmt + end interface + + interface + subroutine psb_d_cp_dia_from_fmt(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dia_from_fmt + end interface + + interface + subroutine psb_d_mv_dia_to_coo(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dia_to_coo + end interface + + interface + subroutine psb_d_mv_dia_from_coo(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dia_from_coo + end interface + + interface + subroutine psb_d_mv_dia_to_fmt(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dia_to_fmt + end interface + + interface + subroutine psb_d_mv_dia_from_fmt(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dia_from_fmt + end interface + + interface + subroutine psb_d_dia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dia_csput_a + end interface + + interface + subroutine psb_d_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_dia_csgetptn + end interface + + interface + subroutine psb_d_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_d_dia_csgetrow + end interface + + interface + subroutine psb_d_dia_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_dia_csgetblk + end interface + + interface + subroutine psb_d_dia_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dia_cssv + subroutine psb_d_dia_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dia_cssm + end interface + + interface + subroutine psb_d_dia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dia_csmv + subroutine psb_d_dia_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dia_csmm + end interface + + + interface + function psb_d_dia_maxval(a) result(res) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_dia_maxval + end interface + + interface + function psb_d_dia_csnmi(a) result(res) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_dia_csnmi + end interface + + interface + function psb_d_dia_csnm1(a) result(res) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_dia_csnm1 + end interface + + interface + subroutine psb_d_dia_rowsum(d,a) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_dia_rowsum + end interface + + interface + subroutine psb_d_dia_arwsum(d,a) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_dia_arwsum + end interface + + interface + subroutine psb_d_dia_colsum(d,a) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_dia_colsum + end interface + + interface + subroutine psb_d_dia_aclsum(d,a) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_dia_aclsum + end interface + + interface + subroutine psb_d_dia_get_diag(a,d,info) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dia_get_diag + end interface + + interface + subroutine psb_d_dia_scal(d,a,info,side) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_dia_scal + end interface + + interface + subroutine psb_d_dia_scals(d,a,info) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dia_scals + end interface + + interface psi_convert_dia_from_coo + subroutine psi_d_convert_dia_from_coo(a,tmp,info) + import :: psb_d_dia_sparse_mat, psb_ipk_, psb_d_coo_sparse_mat + implicit none + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_d_convert_dia_from_coo + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_dia_sizeof(a) result(res) + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + end function d_dia_sizeof + + function d_dia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIA' + end function d_dia_get_fmt + + function d_dia_get_nzeros(a) result(res) + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function d_dia_get_nzeros + + ! function d_dia_get_size(a) result(res) + ! implicit none + ! class(psb_d_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function d_dia_get_size + + + ! function d_dia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_d_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function d_dia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine d_dia_free(a) + implicit none + + class(psb_d_dia_sparse_mat), intent(inout) :: a + + if (allocated(a%data)) deallocate(a%data) + if (allocated(a%offset)) deallocate(a%offset) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine d_dia_free + + +end module psb_d_dia_mat_mod diff --git a/ext/psb_d_dns_mat_mod.f90 b/ext/psb_d_dns_mat_mod.f90 new file mode 100644 index 00000000..f8c977bc --- /dev/null +++ b/ext/psb_d_dns_mat_mod.f90 @@ -0,0 +1,467 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +module psb_d_dns_mat_mod + + use psb_d_base_mat_mod + + type, extends(psb_d_base_sparse_mat) :: psb_d_dns_sparse_mat + ! + ! DNS format: a very simple dense matrix storage + ! psb_dpk_ : kind for double precision reals + ! psb_ipk_: kind for normal integers. + ! psb_sizeof_dp: variable holding size in bytes of + ! a double + ! psb_sizeof_ip: size in bytes of an integer + ! + ! psb_realloc(n,v,info) Reallocate: does what it says + ! psb_realloc(m,n,a,info) on rank 1 and 2 arrays, may start + ! from unallocated + ! + ! + integer(psb_ipk_) :: nnz + real(psb_dpk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: get_size => d_dns_get_size + procedure, pass(a) :: get_nzeros => d_dns_get_nzeros + procedure, nopass :: get_fmt => d_dns_get_fmt + procedure, pass(a) :: sizeof => d_dns_sizeof + procedure, pass(a) :: csmv => psb_d_dns_csmv + procedure, pass(a) :: csmm => psb_d_dns_csmm + procedure, pass(a) :: csnmi => psb_d_dns_csnmi + procedure, pass(a) :: reallocate_nz => psb_d_dns_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_dns_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_d_cp_dns_to_coo + procedure, pass(a) :: cp_from_coo => psb_d_cp_dns_from_coo + procedure, pass(a) :: mv_to_coo => psb_d_mv_dns_to_coo + procedure, pass(a) :: mv_from_coo => psb_d_mv_dns_from_coo + procedure, pass(a) :: get_diag => psb_d_dns_get_diag + procedure, pass(a) :: csgetrow => psb_d_dns_csgetrow + procedure, pass(a) :: get_nz_row => d_dns_get_nz_row + procedure, pass(a) :: trim => psb_d_dns_trim + procedure, pass(a) :: free => d_dns_free + procedure, pass(a) :: mold => psb_d_dns_mold + + end type psb_d_dns_sparse_mat + + private :: d_dns_get_nzeros, d_dns_free, d_dns_get_fmt, & + & d_dns_get_size, d_dns_sizeof, d_dns_get_nz_row + + ! + ! + !> Function reallocate_nz + !! \memberof psb_d_dns_sparse_mat + !! \brief One--parameters version of (re)allocate + !! + !! \param nz number of nonzeros to allocate for + !! i.e. makes sure that the internal storage + !! allows for NZ coefficients and their indices. + ! + interface + subroutine psb_d_dns_reallocate_nz(nz,a) + import :: psb_d_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_dns_sparse_mat), intent(inout) :: a + end subroutine psb_d_dns_reallocate_nz + end interface + + !> Function trim + !! \memberof psb_d_dns_sparse_mat + !! \brief Memory trim + !! Make sure the memory allocation of the sparse matrix is as tight as + !! possible given the actual number of nonzeros it contains. + ! + interface + subroutine psb_d_dns_trim(a) + import :: psb_d_dns_sparse_mat + class(psb_d_dns_sparse_mat), intent(inout) :: a + end subroutine psb_d_dns_trim + end interface + + ! + !> Function mold: + !! \memberof psb_d_dns_sparse_mat + !! \brief Allocate a class(psb_d_dns_sparse_mat) with the + !! same dynamic type as the input. + !! This is equivalent to allocate( mold= ) and is provided + !! for those compilers not yet supporting mold. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_d_dns_mold(a,b,info) + import :: psb_d_dns_sparse_mat, psb_d_base_sparse_mat, psb_epk_, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dns_mold + end interface + + ! + ! + !> Function allocate_mnnz + !! \memberof psb_d_dns_sparse_mat + !! \brief Three-parameters version of allocate + !! + !! \param m number of rows + !! \param n number of cols + !! \param nz [estimated internally] number of nonzeros to allocate for + ! + interface + subroutine psb_d_dns_allocate_mnnz(m,n,a,nz) + import :: psb_d_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_dns_allocate_mnnz + end interface + + ! + !> Function cp_to_coo: + !! \memberof psb_d_dns_sparse_mat + !! \brief Copy and convert to psb_d_coo_sparse_mat + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_d_cp_dns_to_coo(a,b,info) + import :: psb_d_coo_sparse_mat, psb_d_dns_sparse_mat, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dns_to_coo + end interface + + ! + !> Function cp_from_coo: + !! \memberof psb_d_dns_sparse_mat + !! \brief Copy and convert from psb_d_coo_sparse_mat + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_d_cp_dns_from_coo(a,b,info) + import :: psb_d_dns_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dns_from_coo + end interface + + ! + !> Function mv_to_coo: + !! \memberof psb_d_dns_sparse_mat + !! \brief Convert to psb_d_coo_sparse_mat, freeing the source. + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_d_mv_dns_to_coo(a,b,info) + import :: psb_d_dns_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dns_to_coo + end interface + + ! + !> Function mv_from_coo: + !! \memberof psb_d_dns_sparse_mat + !! \brief Convert from psb_d_coo_sparse_mat, freeing the source. + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_d_mv_dns_from_coo(a,b,info) + import :: psb_d_dns_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dns_from_coo + end interface + + ! + ! + !> Function csgetrow: + !! \memberof psb_d_dns_sparse_mat + !! \brief Get a (subset of) row(s) + !! + !! getrow is the basic method by which the other (getblk, clip) can + !! be implemented. + !! + !! Returns the set + !! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) + !! each identifying the position of a nonzero in A + !! i.e. + !! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) + !! with IMIN<=IA(:)<=IMAX + !! with JMIN<=JA(:)<=JMAX + !! IA,JA are reallocated as necessary. + !! + !! \param imin the minimum row index we are interested in + !! \param imax the minimum row index we are interested in + !! \param nz the number of output coefficients + !! \param ia(:) the output row indices + !! \param ja(:) the output col indices + !! \param val(:) the output coefficients + !! \param info return code + !! \param jmin [1] minimum col index + !! \param jmax [a\%get_ncols()] maximum col index + !! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) + !! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] + !! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] + !! ( iren cannot be specified with rscale/cscale) + !! \param append [false] append to ia,ja + !! \param nzin [none] if append, then first new entry should go in entry nzin+1 + !! + ! + interface + subroutine psb_d_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_d_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_d_dns_csgetrow + end interface + + + + !> Function csmv: + !! \memberof psb_d_dns_sparse_mat + !! \brief Product by a dense rank 1 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_d_dns_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dns_csmv + end interface + + !> Function csmm: + !! \memberof psb_d_dns_sparse_mat + !! \brief Product by a dense rank 2 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:,:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:,:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_d_dns_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dns_csmm + end interface + + ! + ! + !> Function csnmi: + !! \memberof psb_d_dns_sparse_mat + !! \brief Operator infinity norm + !! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) + !! + ! + interface + function psb_d_dns_csnmi(a) result(res) + import :: psb_d_dns_sparse_mat, psb_dpk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_dns_csnmi + end interface + + ! + !> Function get_diag: + !! \memberof psb_d_dns_sparse_mat + !! \brief Extract the diagonal of A. + !! + !! D(i) = A(i:i), i=1:min(nrows,ncols) + !! + !! \param d(:) The output diagonal + !! \param info return code. + ! + interface + subroutine psb_d_dns_get_diag(a,d,info) + import :: psb_d_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dns_get_diag + end interface + + +contains + + ! + !> Function sizeof + !! \memberof psb_d_dns_sparse_mat + !! \brief Memory occupation in bytes + ! + function d_dns_sizeof(a) result(res) + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip + + end function d_dns_sizeof + + ! + !> Function get_fmt + !! \memberof psb_d_dns_sparse_mat + !! \brief return a short descriptive name (e.g. COO CSR etc.) + ! + function d_dns_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNS' + end function d_dns_get_fmt + + ! + !> Function get_nzeros + !! \memberof psb_d_dns_sparse_mat + !! \brief Current number of nonzero entries + ! + function d_dns_get_nzeros(a) result(res) + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nnz + end function d_dns_get_nzeros + + ! + !> Function get_size + !! \memberof psb_d_dns_sparse_mat + !! \brief Maximum number of nonzeros the current structure can hold + ! this is fixed once you initialize the matrix, with dense storage + ! you can hold up to MxN entries + function d_dns_get_size(a) result(res) + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = size(a%val) + + end function d_dns_get_size + + + ! + !> Function get_nz_row. + !! \memberof psb_d_coo_sparse_mat + !! \brief How many nonzeros in a row? + !! + !! \param idx The row to search. + !! + ! + function d_dns_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_d_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = count(a%val(idx,:) /= dzero) + end if + + end function d_dns_get_nz_row + + ! + !> Function free + !! \memberof psb_d_dns_sparse_mat + !! Name says all + + subroutine d_dns_free(a) + implicit none + + class(psb_d_dns_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) deallocate(a%val) + a%nnz = 0 + + + ! + ! Mark the object as empty just in case + ! + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine d_dns_free + + +end module psb_d_dns_mat_mod diff --git a/ext/psb_d_ell_mat_mod.f90 b/ext/psb_d_ell_mat_mod.f90 new file mode 100644 index 00000000..3e34d63e --- /dev/null +++ b/ext/psb_d_ell_mat_mod.f90 @@ -0,0 +1,544 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_ell_mat_mod + + use psb_d_base_mat_mod + + type, extends(psb_d_base_sparse_mat) :: psb_d_ell_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! Based on M. Heroux "A proposal for a sparse BLAS toolkit". + ! IRN is our addition, should help in transferring to/from + ! other formats (should come in handy for GPUs). + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! + + integer(psb_ipk_) :: nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:) + real(psb_dpk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: is_by_rows => d_ell_is_by_rows + procedure, pass(a) :: get_size => d_ell_get_size + procedure, pass(a) :: get_nzeros => d_ell_get_nzeros + procedure, nopass :: get_fmt => d_ell_get_fmt + procedure, pass(a) :: sizeof => d_ell_sizeof + procedure, pass(a) :: csmm => psb_d_ell_csmm + procedure, pass(a) :: csmv => psb_d_ell_csmv + procedure, pass(a) :: inner_cssm => psb_d_ell_cssm + procedure, pass(a) :: inner_cssv => psb_d_ell_cssv + procedure, pass(a) :: scals => psb_d_ell_scals + procedure, pass(a) :: scalv => psb_d_ell_scal + procedure, pass(a) :: maxval => psb_d_ell_maxval + procedure, pass(a) :: csnmi => psb_d_ell_csnmi + procedure, pass(a) :: csnm1 => psb_d_ell_csnm1 + procedure, pass(a) :: rowsum => psb_d_ell_rowsum + procedure, pass(a) :: arwsum => psb_d_ell_arwsum + procedure, pass(a) :: colsum => psb_d_ell_colsum + procedure, pass(a) :: aclsum => psb_d_ell_aclsum + procedure, pass(a) :: reallocate_nz => psb_d_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_ell_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_d_cp_ell_to_coo + procedure, pass(a) :: cp_from_coo => psb_d_cp_ell_from_coo + procedure, pass(a) :: cp_to_fmt => psb_d_cp_ell_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_d_cp_ell_from_fmt + procedure, pass(a) :: mv_to_coo => psb_d_mv_ell_to_coo + procedure, pass(a) :: mv_from_coo => psb_d_mv_ell_from_coo + procedure, pass(a) :: mv_to_fmt => psb_d_mv_ell_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_d_mv_ell_from_fmt + procedure, pass(a) :: csput_a => psb_d_ell_csput_a + procedure, pass(a) :: get_diag => psb_d_ell_get_diag + procedure, pass(a) :: csgetptn => psb_d_ell_csgetptn + procedure, pass(a) :: csgetrow => psb_d_ell_csgetrow + procedure, pass(a) :: get_nz_row => d_ell_get_nz_row + procedure, pass(a) :: reinit => psb_d_ell_reinit + procedure, pass(a) :: trim => psb_d_ell_trim + procedure, pass(a) :: print => psb_d_ell_print + procedure, pass(a) :: free => d_ell_free + procedure, pass(a) :: mold => psb_d_ell_mold + + end type psb_d_ell_sparse_mat + + private :: d_ell_get_nzeros, d_ell_free, d_ell_get_fmt, & + & d_ell_get_size, d_ell_sizeof, d_ell_get_nz_row, & + & d_ell_is_by_rows + + interface + subroutine psb_d_ell_reallocate_nz(nz,a) + import :: psb_d_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_ell_sparse_mat), intent(inout) :: a + end subroutine psb_d_ell_reallocate_nz + end interface + + interface + subroutine psb_d_ell_reinit(a,clear) + import :: psb_d_ell_sparse_mat + class(psb_d_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_d_ell_reinit + end interface + + interface + subroutine psb_d_ell_trim(a) + import :: psb_d_ell_sparse_mat + class(psb_d_ell_sparse_mat), intent(inout) :: a + end subroutine psb_d_ell_trim + end interface + + interface + subroutine psb_d_ell_mold(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_ell_mold + end interface + + interface + subroutine psb_d_ell_allocate_mnnz(m,n,a,nz) + import :: psb_d_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_ell_allocate_mnnz + end interface + + interface + subroutine psb_d_ell_print(iout,a,iv,head,ivr,ivc) + import :: psb_d_ell_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_d_ell_print + end interface + + interface + subroutine psb_d_cp_ell_to_coo(a,b,info) + import :: psb_d_coo_sparse_mat, psb_d_ell_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_ell_to_coo + end interface + + interface + subroutine psb_d_cp_ell_from_coo(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_ell_from_coo + end interface + + interface + subroutine psb_d_cp_ell_to_fmt(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_ell_to_fmt + end interface + + interface + subroutine psb_d_cp_ell_from_fmt(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_ell_from_fmt + end interface + + interface + subroutine psb_d_mv_ell_to_coo(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_ell_to_coo + end interface + + interface + subroutine psb_d_mv_ell_from_coo(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_ell_from_coo + end interface + + interface + subroutine psb_d_mv_ell_to_fmt(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_ell_to_fmt + end interface + + interface + subroutine psb_d_mv_ell_from_fmt(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_ell_from_fmt + end interface + + interface + subroutine psb_d_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_ell_csput_a + end interface + + interface + subroutine psb_d_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_ell_csgetptn + end interface + + interface + subroutine psb_d_ell_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_d_ell_csgetrow + end interface + + interface + subroutine psb_d_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_ell_csgetblk + end interface + + interface + subroutine psb_d_ell_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_ell_cssv + subroutine psb_d_ell_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_ell_cssm + end interface + + interface + subroutine psb_d_ell_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_ell_csmv + subroutine psb_d_ell_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_ell_csmm + end interface + + + interface + function psb_d_ell_maxval(a) result(res) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_ell_maxval + end interface + + interface + function psb_d_ell_csnmi(a) result(res) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_ell_csnmi + end interface + + interface + function psb_d_ell_csnm1(a) result(res) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_ell_csnm1 + end interface + + interface + subroutine psb_d_ell_rowsum(d,a) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_ell_rowsum + end interface + + interface + subroutine psb_d_ell_arwsum(d,a) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_ell_arwsum + end interface + + interface + subroutine psb_d_ell_colsum(d,a) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_ell_colsum + end interface + + interface + subroutine psb_d_ell_aclsum(d,a) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_ell_aclsum + end interface + + interface + subroutine psb_d_ell_get_diag(a,d,info) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_ell_get_diag + end interface + + interface + subroutine psb_d_ell_scal(d,a,info,side) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_ell_scal + end interface + + interface + subroutine psb_d_ell_scals(d,a,info) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_ell_scals + end interface + + interface + subroutine psi_d_convert_ell_from_coo(a,tmp,info,hacksize) + import :: psb_d_ell_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + implicit none + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + end subroutine psi_d_convert_ell_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_ell_is_by_rows(a) result(res) + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + logical :: res + res = .true. + end function d_ell_is_by_rows + + function d_ell_sizeof(a) result(res) + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + + end function d_ell_sizeof + + function d_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELL' + end function d_ell_get_fmt + + function d_ell_get_nzeros(a) result(res) + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function d_ell_get_nzeros + + function d_ell_get_size(a) result(res) + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = -1 + if (a%is_dev()) call a%sync() + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function d_ell_get_size + + + function d_ell_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + if (a%is_dev()) call a%sync() + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function d_ell_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_ell_free(a) + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine d_ell_free + + +end module psb_d_ell_mat_mod diff --git a/ext/psb_d_hdia_mat_mod.f90 b/ext/psb_d_hdia_mat_mod.f90 new file mode 100644 index 00000000..25bc6898 --- /dev/null +++ b/ext/psb_d_hdia_mat_mod.f90 @@ -0,0 +1,534 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +module psb_d_hdia_mat_mod + + use psb_d_base_mat_mod + + + type, extends(psb_d_base_sparse_mat) :: psb_d_hdia_sparse_mat + ! + ! HDIA format + ! + integer(psb_ipk_), allocatable :: hackOffsets(:), diaOffsets(:) + real(psb_dpk_), allocatable :: val(:) + + + integer(psb_ipk_) :: nhacks, nzeros + integer(psb_ipk_) :: hacksize = 32 + integer(psb_epk_) :: dim=0 + + contains + ! procedure, pass(a) :: get_size => d_hdia_get_size + procedure, pass(a) :: get_nzeros => d_hdia_get_nzeros + procedure, pass(a) :: set_nzeros => d_hdia_set_nzeros + procedure, nopass :: get_fmt => d_hdia_get_fmt + procedure, pass(a) :: sizeof => d_hdia_sizeof + ! procedure, pass(a) :: csmm => psb_d_hdia_csmm + procedure, pass(a) :: csmv => psb_d_hdia_csmv + ! procedure, pass(a) :: inner_cssm => psb_d_hdia_cssm + ! procedure, pass(a) :: inner_cssv => psb_d_hdia_cssv + ! procedure, pass(a) :: scals => psb_d_hdia_scals + ! procedure, pass(a) :: scalv => psb_d_hdia_scal + ! procedure, pass(a) :: maxval => psb_d_hdia_maxval + ! procedure, pass(a) :: csnmi => psb_d_hdia_csnmi + ! procedure, pass(a) :: csnm1 => psb_d_hdia_csnm1 + ! procedure, pass(a) :: rowsum => psb_d_hdia_rowsum + ! procedure, pass(a) :: arwsum => psb_d_hdia_arwsum + ! procedure, pass(a) :: colsum => psb_d_hdia_colsum + ! procedure, pass(a) :: aclsum => psb_d_hdia_aclsum + ! procedure, pass(a) :: reallocate_nz => psb_d_hdia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_hdia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_d_cp_hdia_to_coo + procedure, pass(a) :: cp_from_coo => psb_d_cp_hdia_from_coo + ! procedure, pass(a) :: cp_to_fmt => psb_d_cp_hdia_to_fmt + ! procedure, pass(a) :: cp_from_fmt => psb_d_cp_hdia_from_fmt + procedure, pass(a) :: mv_to_coo => psb_d_mv_hdia_to_coo + procedure, pass(a) :: mv_from_coo => psb_d_mv_hdia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_d_mv_hdia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_d_mv_hdia_from_fmt + ! procedure, pass(a) :: csput_a => psb_d_hdia_csput_a + ! procedure, pass(a) :: get_diag => psb_d_hdia_get_diag + ! procedure, pass(a) :: csgetptn => psb_d_hdia_csgetptn + ! procedure, pass(a) :: csgetrow => psb_d_hdia_csgetrow + ! procedure, pass(a) :: get_nz_row => d_hdia_get_nz_row + ! procedure, pass(a) :: reinit => psb_d_hdia_reinit + ! procedure, pass(a) :: trim => psb_d_hdia_trim + procedure, pass(a) :: print => psb_d_hdia_print + procedure, pass(a) :: free => d_hdia_free + procedure, pass(a) :: mold => psb_d_hdia_mold + + end type psb_d_hdia_sparse_mat + + private :: d_hdia_get_nzeros, d_hdia_set_nzeros, d_hdia_free, & + & d_hdia_get_fmt, d_hdia_sizeof +!!$ & +!!$ & d_hdia_get_nz_row d_hdia_get_size, + +!!$ interface +!!$ subroutine psb_d_hdia_reallocate_nz(nz,a) +!!$ import :: psb_d_hdia_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_hdia_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_reinit(a,clear) +!!$ import :: psb_d_hdia_sparse_mat +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ logical, intent(in), optional :: clear +!!$ end subroutine psb_d_hdia_reinit +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_trim(a) +!!$ import :: psb_d_hdia_sparse_mat +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_hdia_trim +!!$ end interface + + interface + subroutine psb_d_hdia_mold(a,b,info) + import :: psb_d_hdia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hdia_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hdia_mold + end interface + + interface + subroutine psb_d_hdia_allocate_mnnz(m,n,a,nz) + import :: psb_d_hdia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_hdia_allocate_mnnz + end interface + + interface + subroutine psb_d_hdia_print(iout,a,iv,head,ivr,ivc) + import :: psb_d_hdia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_d_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_d_hdia_print + end interface + + interface + subroutine psb_d_cp_hdia_to_coo(a,b,info) + import :: psb_d_coo_sparse_mat, psb_d_hdia_sparse_mat, psb_ipk_ + class(psb_d_hdia_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hdia_to_coo + end interface + + interface + subroutine psb_d_cp_hdia_from_coo(a,b,info) + import :: psb_d_hdia_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_d_cp_hdia_to_fmt(a,b,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_d_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_cp_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_cp_hdia_from_fmt(a,b,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_d_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_cp_hdia_from_fmt +!!$ end interface + + interface + subroutine psb_d_mv_hdia_to_coo(a,b,info) + import :: psb_d_hdia_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hdia_to_coo + end interface + + interface + subroutine psb_d_mv_hdia_from_coo(a,b,info) + import :: psb_d_hdia_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_d_mv_hdia_to_fmt(a,b,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_d_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_mv_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_mv_hdia_from_fmt(a,b,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_d_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_mv_hdia_from_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: val(:) +!!$ integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& +!!$ & imin,imax,jmin,jmax +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_hdia_csput_a +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_csgetptn(imin,imax,a,nz,ia,ja,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_d_hdia_csgetptn +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ real(psb_dpk_), allocatable, intent(inout) :: val(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_d_hdia_csgetrow +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_csgetblk(imin,imax,a,b,info,& +!!$ & jmin,jmax,iren,append,rscale,cscale) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_d_coo_sparse_mat, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_d_coo_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_d_hdia_csgetblk +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_cssv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:) +!!$ real(psb_dpk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_hdia_cssv +!!$ subroutine psb_d_hdia_cssm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_hdia_cssm +!!$ end interface + + interface + subroutine psb_d_hdia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hdia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hdia_csmv +!!$ subroutine psb_d_hdia_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_hdia_csmm + end interface + + +!!$ interface +!!$ function psb_d_hdia_maxval(a) result(res) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_) :: res +!!$ end function psb_d_hdia_maxval +!!$ end interface +!!$ +!!$ interface +!!$ function psb_d_hdia_csnmi(a) result(res) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_) :: res +!!$ end function psb_d_hdia_csnmi +!!$ end interface +!!$ +!!$ interface +!!$ function psb_d_hdia_csnm1(a) result(res) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_) :: res +!!$ end function psb_d_hdia_csnm1 +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_rowsum(d,a) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_d_hdia_rowsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_arwsum(d,a) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_d_hdia_arwsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_colsum(d,a) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_d_hdia_colsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_aclsum(d,a) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_d_hdia_aclsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_get_diag(a,d,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_hdia_get_diag +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_scal(d,a,info,side) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_d_hdia_scal +!!$ end interface + +!!$ interface +!!$ subroutine psb_d_hdia_scals(d,a,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_hdia_scals +!!$ end interface +!!$ + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_hdia_sizeof(a) result(res) + use psb_realloc_mod, only : psb_size + implicit none + class(psb_d_hdia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + integer(psb_ipk_) :: i + + if (a%is_dev()) call a%sync() + res = 0 + + res = res + psb_size(a%hackOffsets)*psb_sizeof_ip + res = res + psb_size(a%diaOffsets)*psb_sizeof_ip + res = res + psb_size(a%val) * psb_sizeof_dp + + end function d_hdia_sizeof + + function d_hdia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIA' + end function d_hdia_get_fmt + + function d_hdia_get_nzeros(a) result(res) + implicit none + class(psb_d_hdia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function d_hdia_get_nzeros + + subroutine d_hdia_set_nzeros(a,nz) + implicit none + class(psb_d_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + a%nzeros = nz + end subroutine d_hdia_set_nzeros + + ! function d_hdia_get_size(a) result(res) + ! implicit none + ! class(psb_d_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function d_hdia_get_size + + + ! function d_hdia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_d_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function d_hdia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine d_hdia_free(a) + implicit none + + class(psb_d_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: i, info + + + if (allocated(a%hackOffsets))& + & deallocate(a%hackOffsets,stat=info) + if (allocated(a%diaOffsets))& + & deallocate(a%diaOffsets,stat=info) + if (allocated(a%val))& + & deallocate(a%val,stat=info) + a%nhacks=0 + + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine d_hdia_free + + +end module psb_d_hdia_mat_mod diff --git a/ext/psb_d_hll_mat_mod.f90 b/ext/psb_d_hll_mat_mod.f90 new file mode 100644 index 00000000..acc3b312 --- /dev/null +++ b/ext/psb_d_hll_mat_mod.f90 @@ -0,0 +1,564 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_hll_mat_mod + + use psb_d_base_mat_mod + use psi_ext_util_mod + + type, extends(psb_d_base_sparse_mat) :: psb_d_hll_sparse_mat + ! + ! HLL format. (Hacked ELL) + ! A modification of ELL. + ! Basic idea: pack and pad data in blocks of HCK rows; + ! this reduces the impact of a lone, very long row. + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! 6. hksz: hack size (multiple of 32) + ! 7. hkoffs(:): offsets of the starts of hacks inside ja/val + ! + ! + ! + integer(psb_ipk_) :: hksz, nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:) + real(psb_dpk_), allocatable :: val(:) + + contains + + procedure, pass(a) :: get_hksz => d_hll_get_hksz + procedure, pass(a) :: set_hksz => d_hll_set_hksz + procedure, pass(a) :: get_size => d_hll_get_size + procedure, pass(a) :: set_nzeros => d_hll_set_nzeros + procedure, pass(a) :: get_nzeros => d_hll_get_nzeros + procedure, nopass :: get_fmt => d_hll_get_fmt + procedure, pass(a) :: sizeof => d_hll_sizeof + procedure, pass(a) :: csmm => psb_d_hll_csmm + procedure, pass(a) :: csmv => psb_d_hll_csmv + procedure, pass(a) :: inner_cssm => psb_d_hll_cssm + procedure, pass(a) :: inner_cssv => psb_d_hll_cssv + procedure, pass(a) :: scals => psb_d_hll_scals + procedure, pass(a) :: scalv => psb_d_hll_scal + procedure, pass(a) :: maxval => psb_d_hll_maxval + procedure, pass(a) :: csnmi => psb_d_hll_csnmi + procedure, pass(a) :: csnm1 => psb_d_hll_csnm1 + procedure, pass(a) :: rowsum => psb_d_hll_rowsum + procedure, pass(a) :: arwsum => psb_d_hll_arwsum + procedure, pass(a) :: colsum => psb_d_hll_colsum + procedure, pass(a) :: aclsum => psb_d_hll_aclsum + procedure, pass(a) :: reallocate_nz => psb_d_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_hll_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_d_cp_hll_to_coo + procedure, pass(a) :: cp_from_coo => psb_d_cp_hll_from_coo + procedure, pass(a) :: cp_to_fmt => psb_d_cp_hll_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_d_cp_hll_from_fmt + procedure, pass(a) :: mv_to_coo => psb_d_mv_hll_to_coo + procedure, pass(a) :: mv_from_coo => psb_d_mv_hll_from_coo + procedure, pass(a) :: mv_to_fmt => psb_d_mv_hll_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_d_mv_hll_from_fmt + procedure, pass(a) :: csput_a => psb_d_hll_csput_a + procedure, pass(a) :: get_diag => psb_d_hll_get_diag + procedure, pass(a) :: csgetptn => psb_d_hll_csgetptn + procedure, pass(a) :: csgetrow => psb_d_hll_csgetrow + procedure, pass(a) :: get_nz_row => d_hll_get_nz_row + procedure, pass(a) :: reinit => psb_d_hll_reinit + procedure, pass(a) :: print => psb_d_hll_print + procedure, pass(a) :: free => d_hll_free + procedure, pass(a) :: mold => psb_d_hll_mold + + end type psb_d_hll_sparse_mat + + private :: d_hll_get_nzeros, d_hll_free, d_hll_get_fmt, & + & d_hll_get_size, d_hll_sizeof, d_hll_get_nz_row, & + & d_hll_set_nzeros, d_hll_get_hksz, d_hll_set_hksz + + interface + subroutine psb_d_hll_reallocate_nz(nz,a) + import :: psb_d_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_hll_sparse_mat), intent(inout) :: a + end subroutine psb_d_hll_reallocate_nz + end interface + + interface + subroutine psb_d_hll_reinit(a,clear) + import :: psb_d_hll_sparse_mat + class(psb_d_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_d_hll_reinit + end interface + + interface + subroutine psb_d_hll_mold(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hll_mold + end interface + + interface + subroutine psb_d_hll_allocate_mnnz(m,n,a,nz) + import :: psb_d_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_hll_allocate_mnnz + end interface + + interface + subroutine psb_d_hll_print(iout,a,iv,head,ivr,ivc) + import :: psb_d_hll_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_d_hll_print + end interface + + interface + subroutine psb_d_cp_hll_to_coo(a,b,info) + import :: psb_d_coo_sparse_mat, psb_d_hll_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hll_to_coo + end interface + + interface + subroutine psb_d_cp_hll_from_coo(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hll_from_coo + end interface + + interface + subroutine psb_d_cp_hll_to_fmt(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hll_to_fmt + end interface + + interface + subroutine psb_d_cp_hll_from_fmt(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hll_from_fmt + end interface + + interface + subroutine psb_d_mv_hll_to_coo(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hll_to_coo + end interface + + interface + subroutine psb_d_mv_hll_from_coo(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hll_from_coo + end interface + + interface + subroutine psb_d_mv_hll_to_fmt(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hll_to_fmt + end interface + + interface + subroutine psb_d_mv_hll_from_fmt(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hll_from_fmt + end interface + + interface + subroutine psb_d_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hll_csput_a + end interface + + interface + subroutine psb_d_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_hll_csgetptn + end interface + + interface + subroutine psb_d_hll_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_d_hll_csgetrow + end interface + + interface + subroutine psb_d_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_hll_csgetblk + end interface + + interface + subroutine psb_d_hll_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hll_cssv + subroutine psb_d_hll_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hll_cssm + end interface + + interface + subroutine psb_d_hll_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hll_csmv + subroutine psb_d_hll_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hll_csmm + end interface + + + interface + function psb_d_hll_maxval(a) result(res) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_hll_maxval + end interface + + interface + function psb_d_hll_csnmi(a) result(res) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_hll_csnmi + end interface + + interface + function psb_d_hll_csnm1(a) result(res) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_hll_csnm1 + end interface + + interface + subroutine psb_d_hll_rowsum(d,a) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_hll_rowsum + end interface + + interface + subroutine psb_d_hll_arwsum(d,a) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_hll_arwsum + end interface + + interface + subroutine psb_d_hll_colsum(d,a) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_hll_colsum + end interface + + interface + subroutine psb_d_hll_aclsum(d,a) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_hll_aclsum + end interface + + interface + subroutine psb_d_hll_get_diag(a,d,info) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hll_get_diag + end interface + + interface + subroutine psb_d_hll_scal(d,a,info,side) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_hll_scal + end interface + + interface + subroutine psb_d_hll_scals(d,a,info) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hll_scals + end interface + + interface psi_convert_hll_from_coo + subroutine psi_d_convert_hll_from_coo(a,hksz,tmp,info) + import :: psb_d_hll_sparse_mat, psb_ipk_, psb_d_coo_sparse_mat + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: hksz + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_d_convert_hll_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_hll_sizeof(a) result(res) + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%hkoffs) + + end function d_hll_sizeof + + function d_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL' + end function d_hll_get_fmt + + subroutine d_hll_set_nzeros(a,n) + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%nzt = n + end subroutine d_hll_set_nzeros + + function d_hll_get_nzeros(a) result(res) + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function d_hll_get_nzeros + + function d_hll_get_size(a) result(res) + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + if (a%is_dev()) call a%sync() + + res = -1 + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function d_hll_get_size + + + + function d_hll_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function d_hll_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_hll_free(a) + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + if (allocated(a%val)) deallocate(a%hkoffs) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + call a%set_hksz(izero) + + return + + end subroutine d_hll_free + + subroutine d_hll_set_hksz(a,n) + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%hksz = n + end subroutine d_hll_set_hksz + + function d_hll_get_hksz(a) result(res) + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = a%hksz + + end function d_hll_get_hksz + +end module psb_d_hll_mat_mod diff --git a/ext/psb_ext_mod.F90 b/ext/psb_ext_mod.F90 new file mode 100644 index 00000000..b1dbdb59 --- /dev/null +++ b/ext/psb_ext_mod.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_ext_mod + use psb_const_mod + use psi_ext_util_mod + + use psb_s_dns_mat_mod + use psb_d_dns_mat_mod + use psb_c_dns_mat_mod + use psb_z_dns_mat_mod + + use psb_d_ell_mat_mod + use psb_s_ell_mat_mod + use psb_z_ell_mat_mod + use psb_c_ell_mat_mod + + use psb_s_hll_mat_mod + use psb_d_hll_mat_mod + use psb_c_hll_mat_mod + use psb_z_hll_mat_mod + + use psb_s_dia_mat_mod + use psb_d_dia_mat_mod + use psb_c_dia_mat_mod + use psb_z_dia_mat_mod + + use psb_s_hdia_mat_mod + use psb_d_hdia_mat_mod + use psb_c_hdia_mat_mod + use psb_z_hdia_mat_mod + +#ifdef HAVE_RSB + use psb_d_rsb_mat_mod +#endif +end module psb_ext_mod diff --git a/ext/psb_s_dia_mat_mod.f90 b/ext/psb_s_dia_mat_mod.f90 new file mode 100644 index 00000000..3a11d959 --- /dev/null +++ b/ext/psb_s_dia_mat_mod.f90 @@ -0,0 +1,513 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_dia_mat_mod + + use psb_s_base_mat_mod + + type, extends(psb_s_base_sparse_mat) :: psb_s_dia_sparse_mat + ! + ! DIA format, extended. + ! + + integer(psb_ipk_), allocatable :: offset(:) + integer(psb_ipk_) :: nzeros + real(psb_spk_), allocatable :: data(:,:) + + contains + ! procedure, pass(a) :: get_size => s_dia_get_size + procedure, pass(a) :: get_nzeros => s_dia_get_nzeros + procedure, nopass :: get_fmt => s_dia_get_fmt + procedure, pass(a) :: sizeof => s_dia_sizeof + procedure, pass(a) :: csmm => psb_s_dia_csmm + procedure, pass(a) :: csmv => psb_s_dia_csmv + ! procedure, pass(a) :: inner_cssm => psb_s_dia_cssm + ! procedure, pass(a) :: inner_cssv => psb_s_dia_cssv + procedure, pass(a) :: scals => psb_s_dia_scals + procedure, pass(a) :: scalv => psb_s_dia_scal + procedure, pass(a) :: maxval => psb_s_dia_maxval + procedure, pass(a) :: rowsum => psb_s_dia_rowsum + procedure, pass(a) :: arwsum => psb_s_dia_arwsum + procedure, pass(a) :: colsum => psb_s_dia_colsum + procedure, pass(a) :: aclsum => psb_s_dia_aclsum + procedure, pass(a) :: reallocate_nz => psb_s_dia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_dia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_s_cp_dia_to_coo + procedure, pass(a) :: cp_from_coo => psb_s_cp_dia_from_coo + ! procedure, pass(a) :: mv_to_coo => psb_s_mv_dia_to_coo + procedure, pass(a) :: mv_from_coo => psb_s_mv_dia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_s_mv_dia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_s_mv_dia_from_fmt + ! procedure, pass(a) :: csput_a => psb_s_dia_csput_a + procedure, pass(a) :: get_diag => psb_s_dia_get_diag + procedure, pass(a) :: csgetptn => psb_s_dia_csgetptn + procedure, pass(a) :: csgetrow => psb_s_dia_csgetrow + ! procedure, pass(a) :: get_nz_row => s_dia_get_nz_row + procedure, pass(a) :: reinit => psb_s_dia_reinit + ! procedure, pass(a) :: trim => psb_s_dia_trim + procedure, pass(a) :: print => psb_s_dia_print + procedure, pass(a) :: free => s_dia_free + procedure, pass(a) :: mold => psb_s_dia_mold + + end type psb_s_dia_sparse_mat + + private :: s_dia_get_nzeros, s_dia_free, s_dia_get_fmt, & + & s_dia_sizeof !, s_dia_get_size, s_dia_get_nz_row + + interface + subroutine psb_s_dia_reallocate_nz(nz,a) + import :: psb_s_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_dia_sparse_mat), intent(inout) :: a + end subroutine psb_s_dia_reallocate_nz + end interface + + interface + subroutine psb_s_dia_reinit(a,clear) + import :: psb_s_dia_sparse_mat + class(psb_s_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_s_dia_reinit + end interface + + interface + subroutine psb_s_dia_trim(a) + import :: psb_s_dia_sparse_mat + class(psb_s_dia_sparse_mat), intent(inout) :: a + end subroutine psb_s_dia_trim + end interface + + interface + subroutine psb_s_dia_mold(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dia_mold + end interface + + interface + subroutine psb_s_dia_allocate_mnnz(m,n,a,nz) + import :: psb_s_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_dia_allocate_mnnz + end interface + + interface + subroutine psb_s_dia_print(iout,a,iv,head,ivr,ivc) + import :: psb_s_dia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_s_dia_print + end interface + + interface + subroutine psb_s_cp_dia_to_coo(a,b,info) + import :: psb_s_coo_sparse_mat, psb_s_dia_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dia_to_coo + end interface + + interface + subroutine psb_s_cp_dia_from_coo(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dia_from_coo + end interface + + interface + subroutine psb_s_cp_dia_to_fmt(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dia_to_fmt + end interface + + interface + subroutine psb_s_cp_dia_from_fmt(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dia_from_fmt + end interface + + interface + subroutine psb_s_mv_dia_to_coo(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dia_to_coo + end interface + + interface + subroutine psb_s_mv_dia_from_coo(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dia_from_coo + end interface + + interface + subroutine psb_s_mv_dia_to_fmt(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dia_to_fmt + end interface + + interface + subroutine psb_s_mv_dia_from_fmt(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dia_from_fmt + end interface + + interface + subroutine psb_s_dia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dia_csput_a + end interface + + interface + subroutine psb_s_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_s_dia_csgetptn + end interface + + interface + subroutine psb_s_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_s_dia_csgetrow + end interface + + interface + subroutine psb_s_dia_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_s_dia_csgetblk + end interface + + interface + subroutine psb_s_dia_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dia_cssv + subroutine psb_s_dia_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dia_cssm + end interface + + interface + subroutine psb_s_dia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dia_csmv + subroutine psb_s_dia_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dia_csmm + end interface + + + interface + function psb_s_dia_maxval(a) result(res) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_dia_maxval + end interface + + interface + function psb_s_dia_csnmi(a) result(res) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_dia_csnmi + end interface + + interface + function psb_s_dia_csnm1(a) result(res) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_dia_csnm1 + end interface + + interface + subroutine psb_s_dia_rowsum(d,a) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_dia_rowsum + end interface + + interface + subroutine psb_s_dia_arwsum(d,a) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_dia_arwsum + end interface + + interface + subroutine psb_s_dia_colsum(d,a) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_dia_colsum + end interface + + interface + subroutine psb_s_dia_aclsum(d,a) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_dia_aclsum + end interface + + interface + subroutine psb_s_dia_get_diag(a,d,info) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dia_get_diag + end interface + + interface + subroutine psb_s_dia_scal(d,a,info,side) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_dia_scal + end interface + + interface + subroutine psb_s_dia_scals(d,a,info) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dia_scals + end interface + + interface psi_convert_dia_from_coo + subroutine psi_s_convert_dia_from_coo(a,tmp,info) + import :: psb_s_dia_sparse_mat, psb_ipk_, psb_s_coo_sparse_mat + implicit none + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_s_convert_dia_from_coo + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_dia_sizeof(a) result(res) + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + end function s_dia_sizeof + + function s_dia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIA' + end function s_dia_get_fmt + + function s_dia_get_nzeros(a) result(res) + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function s_dia_get_nzeros + + ! function s_dia_get_size(a) result(res) + ! implicit none + ! class(psb_s_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function s_dia_get_size + + + ! function s_dia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_s_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function s_dia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine s_dia_free(a) + implicit none + + class(psb_s_dia_sparse_mat), intent(inout) :: a + + if (allocated(a%data)) deallocate(a%data) + if (allocated(a%offset)) deallocate(a%offset) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine s_dia_free + + +end module psb_s_dia_mat_mod diff --git a/ext/psb_s_dns_mat_mod.f90 b/ext/psb_s_dns_mat_mod.f90 new file mode 100644 index 00000000..e9ea5f26 --- /dev/null +++ b/ext/psb_s_dns_mat_mod.f90 @@ -0,0 +1,467 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +module psb_s_dns_mat_mod + + use psb_s_base_mat_mod + + type, extends(psb_s_base_sparse_mat) :: psb_s_dns_sparse_mat + ! + ! DNS format: a very simple dense matrix storage + ! psb_spk_ : kind for double precision reals + ! psb_ipk_: kind for normal integers. + ! psb_sizeof_dp: variable holding size in bytes of + ! a double + ! psb_sizeof_ip: size in bytes of an integer + ! + ! psb_realloc(n,v,info) Reallocate: does what it says + ! psb_realloc(m,n,a,info) on rank 1 and 2 arrays, may start + ! from unallocated + ! + ! + integer(psb_ipk_) :: nnz + real(psb_spk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: get_size => s_dns_get_size + procedure, pass(a) :: get_nzeros => s_dns_get_nzeros + procedure, nopass :: get_fmt => s_dns_get_fmt + procedure, pass(a) :: sizeof => s_dns_sizeof + procedure, pass(a) :: csmv => psb_s_dns_csmv + procedure, pass(a) :: csmm => psb_s_dns_csmm + procedure, pass(a) :: csnmi => psb_s_dns_csnmi + procedure, pass(a) :: reallocate_nz => psb_s_dns_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_dns_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_s_cp_dns_to_coo + procedure, pass(a) :: cp_from_coo => psb_s_cp_dns_from_coo + procedure, pass(a) :: mv_to_coo => psb_s_mv_dns_to_coo + procedure, pass(a) :: mv_from_coo => psb_s_mv_dns_from_coo + procedure, pass(a) :: get_diag => psb_s_dns_get_diag + procedure, pass(a) :: csgetrow => psb_s_dns_csgetrow + procedure, pass(a) :: get_nz_row => s_dns_get_nz_row + procedure, pass(a) :: trim => psb_s_dns_trim + procedure, pass(a) :: free => s_dns_free + procedure, pass(a) :: mold => psb_s_dns_mold + + end type psb_s_dns_sparse_mat + + private :: s_dns_get_nzeros, s_dns_free, s_dns_get_fmt, & + & s_dns_get_size, s_dns_sizeof, s_dns_get_nz_row + + ! + ! + !> Function reallocate_nz + !! \memberof psb_s_dns_sparse_mat + !! \brief One--parameters version of (re)allocate + !! + !! \param nz number of nonzeros to allocate for + !! i.e. makes sure that the internal storage + !! allows for NZ coefficients and their indices. + ! + interface + subroutine psb_s_dns_reallocate_nz(nz,a) + import :: psb_s_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_dns_sparse_mat), intent(inout) :: a + end subroutine psb_s_dns_reallocate_nz + end interface + + !> Function trim + !! \memberof psb_s_dns_sparse_mat + !! \brief Memory trim + !! Make sure the memory allocation of the sparse matrix is as tight as + !! possible given the actual number of nonzeros it contains. + ! + interface + subroutine psb_s_dns_trim(a) + import :: psb_s_dns_sparse_mat + class(psb_s_dns_sparse_mat), intent(inout) :: a + end subroutine psb_s_dns_trim + end interface + + ! + !> Function mold: + !! \memberof psb_s_dns_sparse_mat + !! \brief Allocate a class(psb_s_dns_sparse_mat) with the + !! same dynamic type as the input. + !! This is equivalent to allocate( mold= ) and is provided + !! for those compilers not yet supporting mold. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_s_dns_mold(a,b,info) + import :: psb_s_dns_sparse_mat, psb_s_base_sparse_mat, psb_epk_, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dns_mold + end interface + + ! + ! + !> Function allocate_mnnz + !! \memberof psb_s_dns_sparse_mat + !! \brief Three-parameters version of allocate + !! + !! \param m number of rows + !! \param n number of cols + !! \param nz [estimated internally] number of nonzeros to allocate for + ! + interface + subroutine psb_s_dns_allocate_mnnz(m,n,a,nz) + import :: psb_s_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_dns_allocate_mnnz + end interface + + ! + !> Function cp_to_coo: + !! \memberof psb_s_dns_sparse_mat + !! \brief Copy and convert to psb_s_coo_sparse_mat + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_s_cp_dns_to_coo(a,b,info) + import :: psb_s_coo_sparse_mat, psb_s_dns_sparse_mat, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dns_to_coo + end interface + + ! + !> Function cp_from_coo: + !! \memberof psb_s_dns_sparse_mat + !! \brief Copy and convert from psb_s_coo_sparse_mat + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_s_cp_dns_from_coo(a,b,info) + import :: psb_s_dns_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dns_from_coo + end interface + + ! + !> Function mv_to_coo: + !! \memberof psb_s_dns_sparse_mat + !! \brief Convert to psb_s_coo_sparse_mat, freeing the source. + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_s_mv_dns_to_coo(a,b,info) + import :: psb_s_dns_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dns_to_coo + end interface + + ! + !> Function mv_from_coo: + !! \memberof psb_s_dns_sparse_mat + !! \brief Convert from psb_s_coo_sparse_mat, freeing the source. + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_s_mv_dns_from_coo(a,b,info) + import :: psb_s_dns_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dns_from_coo + end interface + + ! + ! + !> Function csgetrow: + !! \memberof psb_s_dns_sparse_mat + !! \brief Get a (subset of) row(s) + !! + !! getrow is the basic method by which the other (getblk, clip) can + !! be implemented. + !! + !! Returns the set + !! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) + !! each identifying the position of a nonzero in A + !! i.e. + !! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) + !! with IMIN<=IA(:)<=IMAX + !! with JMIN<=JA(:)<=JMAX + !! IA,JA are reallocated as necessary. + !! + !! \param imin the minimum row index we are interested in + !! \param imax the minimum row index we are interested in + !! \param nz the number of output coefficients + !! \param ia(:) the output row indices + !! \param ja(:) the output col indices + !! \param val(:) the output coefficients + !! \param info return code + !! \param jmin [1] minimum col index + !! \param jmax [a\%get_ncols()] maximum col index + !! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) + !! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] + !! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] + !! ( iren cannot be specified with rscale/cscale) + !! \param append [false] append to ia,ja + !! \param nzin [none] if append, then first new entry should go in entry nzin+1 + !! + ! + interface + subroutine psb_s_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_s_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_s_dns_csgetrow + end interface + + + + !> Function csmv: + !! \memberof psb_s_dns_sparse_mat + !! \brief Product by a dense rank 1 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_s_dns_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dns_csmv + end interface + + !> Function csmm: + !! \memberof psb_s_dns_sparse_mat + !! \brief Product by a dense rank 2 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:,:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:,:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_s_dns_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dns_csmm + end interface + + ! + ! + !> Function csnmi: + !! \memberof psb_s_dns_sparse_mat + !! \brief Operator infinity norm + !! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) + !! + ! + interface + function psb_s_dns_csnmi(a) result(res) + import :: psb_s_dns_sparse_mat, psb_spk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_dns_csnmi + end interface + + ! + !> Function get_diag: + !! \memberof psb_s_dns_sparse_mat + !! \brief Extract the diagonal of A. + !! + !! D(i) = A(i:i), i=1:min(nrows,ncols) + !! + !! \param d(:) The output diagonal + !! \param info return code. + ! + interface + subroutine psb_s_dns_get_diag(a,d,info) + import :: psb_s_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dns_get_diag + end interface + + +contains + + ! + !> Function sizeof + !! \memberof psb_s_dns_sparse_mat + !! \brief Memory occupation in bytes + ! + function s_dns_sizeof(a) result(res) + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip + + end function s_dns_sizeof + + ! + !> Function get_fmt + !! \memberof psb_s_dns_sparse_mat + !! \brief return a short descriptive name (e.g. COO CSR etc.) + ! + function s_dns_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNS' + end function s_dns_get_fmt + + ! + !> Function get_nzeros + !! \memberof psb_s_dns_sparse_mat + !! \brief Current number of nonzero entries + ! + function s_dns_get_nzeros(a) result(res) + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nnz + end function s_dns_get_nzeros + + ! + !> Function get_size + !! \memberof psb_s_dns_sparse_mat + !! \brief Maximum number of nonzeros the current structure can hold + ! this is fixed once you initialize the matrix, with dense storage + ! you can hold up to MxN entries + function s_dns_get_size(a) result(res) + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = size(a%val) + + end function s_dns_get_size + + + ! + !> Function get_nz_row. + !! \memberof psb_s_coo_sparse_mat + !! \brief How many nonzeros in a row? + !! + !! \param idx The row to search. + !! + ! + function s_dns_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_s_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = count(a%val(idx,:) /= dzero) + end if + + end function s_dns_get_nz_row + + ! + !> Function free + !! \memberof psb_s_dns_sparse_mat + !! Name says all + + subroutine s_dns_free(a) + implicit none + + class(psb_s_dns_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) deallocate(a%val) + a%nnz = 0 + + + ! + ! Mark the object as empty just in case + ! + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine s_dns_free + + +end module psb_s_dns_mat_mod diff --git a/ext/psb_s_ell_mat_mod.f90 b/ext/psb_s_ell_mat_mod.f90 new file mode 100644 index 00000000..5f09913a --- /dev/null +++ b/ext/psb_s_ell_mat_mod.f90 @@ -0,0 +1,544 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_ell_mat_mod + + use psb_s_base_mat_mod + + type, extends(psb_s_base_sparse_mat) :: psb_s_ell_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! Based on M. Heroux "A proposal for a sparse BLAS toolkit". + ! IRN is our addition, should help in transferring to/from + ! other formats (should come in handy for GPUs). + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! + + integer(psb_ipk_) :: nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:) + real(psb_spk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: is_by_rows => s_ell_is_by_rows + procedure, pass(a) :: get_size => s_ell_get_size + procedure, pass(a) :: get_nzeros => s_ell_get_nzeros + procedure, nopass :: get_fmt => s_ell_get_fmt + procedure, pass(a) :: sizeof => s_ell_sizeof + procedure, pass(a) :: csmm => psb_s_ell_csmm + procedure, pass(a) :: csmv => psb_s_ell_csmv + procedure, pass(a) :: inner_cssm => psb_s_ell_cssm + procedure, pass(a) :: inner_cssv => psb_s_ell_cssv + procedure, pass(a) :: scals => psb_s_ell_scals + procedure, pass(a) :: scalv => psb_s_ell_scal + procedure, pass(a) :: maxval => psb_s_ell_maxval + procedure, pass(a) :: csnmi => psb_s_ell_csnmi + procedure, pass(a) :: csnm1 => psb_s_ell_csnm1 + procedure, pass(a) :: rowsum => psb_s_ell_rowsum + procedure, pass(a) :: arwsum => psb_s_ell_arwsum + procedure, pass(a) :: colsum => psb_s_ell_colsum + procedure, pass(a) :: aclsum => psb_s_ell_aclsum + procedure, pass(a) :: reallocate_nz => psb_s_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_ell_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_s_cp_ell_to_coo + procedure, pass(a) :: cp_from_coo => psb_s_cp_ell_from_coo + procedure, pass(a) :: cp_to_fmt => psb_s_cp_ell_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_s_cp_ell_from_fmt + procedure, pass(a) :: mv_to_coo => psb_s_mv_ell_to_coo + procedure, pass(a) :: mv_from_coo => psb_s_mv_ell_from_coo + procedure, pass(a) :: mv_to_fmt => psb_s_mv_ell_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_s_mv_ell_from_fmt + procedure, pass(a) :: csput_a => psb_s_ell_csput_a + procedure, pass(a) :: get_diag => psb_s_ell_get_diag + procedure, pass(a) :: csgetptn => psb_s_ell_csgetptn + procedure, pass(a) :: csgetrow => psb_s_ell_csgetrow + procedure, pass(a) :: get_nz_row => s_ell_get_nz_row + procedure, pass(a) :: reinit => psb_s_ell_reinit + procedure, pass(a) :: trim => psb_s_ell_trim + procedure, pass(a) :: print => psb_s_ell_print + procedure, pass(a) :: free => s_ell_free + procedure, pass(a) :: mold => psb_s_ell_mold + + end type psb_s_ell_sparse_mat + + private :: s_ell_get_nzeros, s_ell_free, s_ell_get_fmt, & + & s_ell_get_size, s_ell_sizeof, s_ell_get_nz_row, & + & s_ell_is_by_rows + + interface + subroutine psb_s_ell_reallocate_nz(nz,a) + import :: psb_s_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_ell_sparse_mat), intent(inout) :: a + end subroutine psb_s_ell_reallocate_nz + end interface + + interface + subroutine psb_s_ell_reinit(a,clear) + import :: psb_s_ell_sparse_mat + class(psb_s_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_s_ell_reinit + end interface + + interface + subroutine psb_s_ell_trim(a) + import :: psb_s_ell_sparse_mat + class(psb_s_ell_sparse_mat), intent(inout) :: a + end subroutine psb_s_ell_trim + end interface + + interface + subroutine psb_s_ell_mold(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_ell_mold + end interface + + interface + subroutine psb_s_ell_allocate_mnnz(m,n,a,nz) + import :: psb_s_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_ell_allocate_mnnz + end interface + + interface + subroutine psb_s_ell_print(iout,a,iv,head,ivr,ivc) + import :: psb_s_ell_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_s_ell_print + end interface + + interface + subroutine psb_s_cp_ell_to_coo(a,b,info) + import :: psb_s_coo_sparse_mat, psb_s_ell_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_ell_to_coo + end interface + + interface + subroutine psb_s_cp_ell_from_coo(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_ell_from_coo + end interface + + interface + subroutine psb_s_cp_ell_to_fmt(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_ell_to_fmt + end interface + + interface + subroutine psb_s_cp_ell_from_fmt(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_ell_from_fmt + end interface + + interface + subroutine psb_s_mv_ell_to_coo(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_ell_to_coo + end interface + + interface + subroutine psb_s_mv_ell_from_coo(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_ell_from_coo + end interface + + interface + subroutine psb_s_mv_ell_to_fmt(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_ell_to_fmt + end interface + + interface + subroutine psb_s_mv_ell_from_fmt(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_ell_from_fmt + end interface + + interface + subroutine psb_s_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_ell_csput_a + end interface + + interface + subroutine psb_s_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_s_ell_csgetptn + end interface + + interface + subroutine psb_s_ell_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_s_ell_csgetrow + end interface + + interface + subroutine psb_s_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_s_ell_csgetblk + end interface + + interface + subroutine psb_s_ell_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_ell_cssv + subroutine psb_s_ell_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_ell_cssm + end interface + + interface + subroutine psb_s_ell_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_ell_csmv + subroutine psb_s_ell_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_ell_csmm + end interface + + + interface + function psb_s_ell_maxval(a) result(res) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_ell_maxval + end interface + + interface + function psb_s_ell_csnmi(a) result(res) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_ell_csnmi + end interface + + interface + function psb_s_ell_csnm1(a) result(res) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_ell_csnm1 + end interface + + interface + subroutine psb_s_ell_rowsum(d,a) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_ell_rowsum + end interface + + interface + subroutine psb_s_ell_arwsum(d,a) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_ell_arwsum + end interface + + interface + subroutine psb_s_ell_colsum(d,a) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_ell_colsum + end interface + + interface + subroutine psb_s_ell_aclsum(d,a) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_ell_aclsum + end interface + + interface + subroutine psb_s_ell_get_diag(a,d,info) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_ell_get_diag + end interface + + interface + subroutine psb_s_ell_scal(d,a,info,side) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_ell_scal + end interface + + interface + subroutine psb_s_ell_scals(d,a,info) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_ell_scals + end interface + + interface + subroutine psi_s_convert_ell_from_coo(a,tmp,info,hacksize) + import :: psb_s_ell_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + implicit none + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + end subroutine psi_s_convert_ell_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_ell_is_by_rows(a) result(res) + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + logical :: res + res = .true. + end function s_ell_is_by_rows + + function s_ell_sizeof(a) result(res) + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + + end function s_ell_sizeof + + function s_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELL' + end function s_ell_get_fmt + + function s_ell_get_nzeros(a) result(res) + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function s_ell_get_nzeros + + function s_ell_get_size(a) result(res) + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = -1 + if (a%is_dev()) call a%sync() + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function s_ell_get_size + + + function s_ell_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + if (a%is_dev()) call a%sync() + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function s_ell_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_ell_free(a) + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine s_ell_free + + +end module psb_s_ell_mat_mod diff --git a/ext/psb_s_hdia_mat_mod.f90 b/ext/psb_s_hdia_mat_mod.f90 new file mode 100644 index 00000000..b7b2b110 --- /dev/null +++ b/ext/psb_s_hdia_mat_mod.f90 @@ -0,0 +1,534 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +module psb_s_hdia_mat_mod + + use psb_s_base_mat_mod + + + type, extends(psb_s_base_sparse_mat) :: psb_s_hdia_sparse_mat + ! + ! HDIA format + ! + integer(psb_ipk_), allocatable :: hackOffsets(:), diaOffsets(:) + real(psb_spk_), allocatable :: val(:) + + + integer(psb_ipk_) :: nhacks, nzeros + integer(psb_ipk_) :: hacksize = 32 + integer(psb_epk_) :: dim=0 + + contains + ! procedure, pass(a) :: get_size => s_hdia_get_size + procedure, pass(a) :: get_nzeros => s_hdia_get_nzeros + procedure, pass(a) :: set_nzeros => s_hdia_set_nzeros + procedure, nopass :: get_fmt => s_hdia_get_fmt + procedure, pass(a) :: sizeof => s_hdia_sizeof + ! procedure, pass(a) :: csmm => psb_s_hdia_csmm + procedure, pass(a) :: csmv => psb_s_hdia_csmv + ! procedure, pass(a) :: inner_cssm => psb_s_hdia_cssm + ! procedure, pass(a) :: inner_cssv => psb_s_hdia_cssv + ! procedure, pass(a) :: scals => psb_s_hdia_scals + ! procedure, pass(a) :: scalv => psb_s_hdia_scal + ! procedure, pass(a) :: maxval => psb_s_hdia_maxval + ! procedure, pass(a) :: csnmi => psb_s_hdia_csnmi + ! procedure, pass(a) :: csnm1 => psb_s_hdia_csnm1 + ! procedure, pass(a) :: rowsum => psb_s_hdia_rowsum + ! procedure, pass(a) :: arwsum => psb_s_hdia_arwsum + ! procedure, pass(a) :: colsum => psb_s_hdia_colsum + ! procedure, pass(a) :: aclsum => psb_s_hdia_aclsum + ! procedure, pass(a) :: reallocate_nz => psb_s_hdia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_hdia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_s_cp_hdia_to_coo + procedure, pass(a) :: cp_from_coo => psb_s_cp_hdia_from_coo + ! procedure, pass(a) :: cp_to_fmt => psb_s_cp_hdia_to_fmt + ! procedure, pass(a) :: cp_from_fmt => psb_s_cp_hdia_from_fmt + procedure, pass(a) :: mv_to_coo => psb_s_mv_hdia_to_coo + procedure, pass(a) :: mv_from_coo => psb_s_mv_hdia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_s_mv_hdia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_s_mv_hdia_from_fmt + ! procedure, pass(a) :: csput_a => psb_s_hdia_csput_a + ! procedure, pass(a) :: get_diag => psb_s_hdia_get_diag + ! procedure, pass(a) :: csgetptn => psb_s_hdia_csgetptn + ! procedure, pass(a) :: csgetrow => psb_s_hdia_csgetrow + ! procedure, pass(a) :: get_nz_row => s_hdia_get_nz_row + ! procedure, pass(a) :: reinit => psb_s_hdia_reinit + ! procedure, pass(a) :: trim => psb_s_hdia_trim + procedure, pass(a) :: print => psb_s_hdia_print + procedure, pass(a) :: free => s_hdia_free + procedure, pass(a) :: mold => psb_s_hdia_mold + + end type psb_s_hdia_sparse_mat + + private :: s_hdia_get_nzeros, s_hdia_set_nzeros, s_hdia_free, & + & s_hdia_get_fmt, s_hdia_sizeof +!!$ & +!!$ & s_hdia_get_nz_row s_hdia_get_size, + +!!$ interface +!!$ subroutine psb_s_hdia_reallocate_nz(nz,a) +!!$ import :: psb_s_hdia_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_hdia_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_reinit(a,clear) +!!$ import :: psb_s_hdia_sparse_mat +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ logical, intent(in), optional :: clear +!!$ end subroutine psb_s_hdia_reinit +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_trim(a) +!!$ import :: psb_s_hdia_sparse_mat +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_hdia_trim +!!$ end interface + + interface + subroutine psb_s_hdia_mold(a,b,info) + import :: psb_s_hdia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hdia_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hdia_mold + end interface + + interface + subroutine psb_s_hdia_allocate_mnnz(m,n,a,nz) + import :: psb_s_hdia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_hdia_allocate_mnnz + end interface + + interface + subroutine psb_s_hdia_print(iout,a,iv,head,ivr,ivc) + import :: psb_s_hdia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_s_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_s_hdia_print + end interface + + interface + subroutine psb_s_cp_hdia_to_coo(a,b,info) + import :: psb_s_coo_sparse_mat, psb_s_hdia_sparse_mat, psb_ipk_ + class(psb_s_hdia_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hdia_to_coo + end interface + + interface + subroutine psb_s_cp_hdia_from_coo(a,b,info) + import :: psb_s_hdia_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_s_cp_hdia_to_fmt(a,b,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_s_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_cp_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_cp_hdia_from_fmt(a,b,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_s_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_cp_hdia_from_fmt +!!$ end interface + + interface + subroutine psb_s_mv_hdia_to_coo(a,b,info) + import :: psb_s_hdia_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hdia_to_coo + end interface + + interface + subroutine psb_s_mv_hdia_from_coo(a,b,info) + import :: psb_s_hdia_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_s_mv_hdia_to_fmt(a,b,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_s_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_mv_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_mv_hdia_from_fmt(a,b,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_s_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_mv_hdia_from_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: val(:) +!!$ integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& +!!$ & imin,imax,jmin,jmax +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_hdia_csput_a +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_csgetptn(imin,imax,a,nz,ia,ja,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_s_hdia_csgetptn +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ real(psb_spk_), allocatable, intent(inout) :: val(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_s_hdia_csgetrow +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_csgetblk(imin,imax,a,b,info,& +!!$ & jmin,jmax,iren,append,rscale,cscale) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_s_coo_sparse_mat, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_s_coo_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_s_hdia_csgetblk +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_cssv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta, x(:) +!!$ real(psb_spk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_hdia_cssv +!!$ subroutine psb_s_hdia_cssm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_hdia_cssm +!!$ end interface + + interface + subroutine psb_s_hdia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hdia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hdia_csmv +!!$ subroutine psb_s_hdia_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_hdia_csmm + end interface + + +!!$ interface +!!$ function psb_s_hdia_maxval(a) result(res) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_) :: res +!!$ end function psb_s_hdia_maxval +!!$ end interface +!!$ +!!$ interface +!!$ function psb_s_hdia_csnmi(a) result(res) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_) :: res +!!$ end function psb_s_hdia_csnmi +!!$ end interface +!!$ +!!$ interface +!!$ function psb_s_hdia_csnm1(a) result(res) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_) :: res +!!$ end function psb_s_hdia_csnm1 +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_rowsum(d,a) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_s_hdia_rowsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_arwsum(d,a) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_s_hdia_arwsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_colsum(d,a) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_s_hdia_colsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_aclsum(d,a) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_s_hdia_aclsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_get_diag(a,d,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_hdia_get_diag +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_scal(d,a,info,side) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_s_hdia_scal +!!$ end interface + +!!$ interface +!!$ subroutine psb_s_hdia_scals(d,a,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_hdia_scals +!!$ end interface +!!$ + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_hdia_sizeof(a) result(res) + use psb_realloc_mod, only : psb_size + implicit none + class(psb_s_hdia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + integer(psb_ipk_) :: i + + if (a%is_dev()) call a%sync() + res = 0 + + res = res + psb_size(a%hackOffsets)*psb_sizeof_ip + res = res + psb_size(a%diaOffsets)*psb_sizeof_ip + res = res + psb_size(a%val) * psb_sizeof_sp + + end function s_hdia_sizeof + + function s_hdia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIA' + end function s_hdia_get_fmt + + function s_hdia_get_nzeros(a) result(res) + implicit none + class(psb_s_hdia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function s_hdia_get_nzeros + + subroutine s_hdia_set_nzeros(a,nz) + implicit none + class(psb_s_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + a%nzeros = nz + end subroutine s_hdia_set_nzeros + + ! function s_hdia_get_size(a) result(res) + ! implicit none + ! class(psb_s_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function s_hdia_get_size + + + ! function s_hdia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_s_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function s_hdia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine s_hdia_free(a) + implicit none + + class(psb_s_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: i, info + + + if (allocated(a%hackOffsets))& + & deallocate(a%hackOffsets,stat=info) + if (allocated(a%diaOffsets))& + & deallocate(a%diaOffsets,stat=info) + if (allocated(a%val))& + & deallocate(a%val,stat=info) + a%nhacks=0 + + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine s_hdia_free + + +end module psb_s_hdia_mat_mod diff --git a/ext/psb_s_hll_mat_mod.f90 b/ext/psb_s_hll_mat_mod.f90 new file mode 100644 index 00000000..735091c8 --- /dev/null +++ b/ext/psb_s_hll_mat_mod.f90 @@ -0,0 +1,564 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_hll_mat_mod + + use psb_s_base_mat_mod + use psi_ext_util_mod + + type, extends(psb_s_base_sparse_mat) :: psb_s_hll_sparse_mat + ! + ! HLL format. (Hacked ELL) + ! A modification of ELL. + ! Basic idea: pack and pad data in blocks of HCK rows; + ! this reduces the impact of a lone, very long row. + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! 6. hksz: hack size (multiple of 32) + ! 7. hkoffs(:): offsets of the starts of hacks inside ja/val + ! + ! + ! + integer(psb_ipk_) :: hksz, nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:) + real(psb_spk_), allocatable :: val(:) + + contains + + procedure, pass(a) :: get_hksz => s_hll_get_hksz + procedure, pass(a) :: set_hksz => s_hll_set_hksz + procedure, pass(a) :: get_size => s_hll_get_size + procedure, pass(a) :: set_nzeros => s_hll_set_nzeros + procedure, pass(a) :: get_nzeros => s_hll_get_nzeros + procedure, nopass :: get_fmt => s_hll_get_fmt + procedure, pass(a) :: sizeof => s_hll_sizeof + procedure, pass(a) :: csmm => psb_s_hll_csmm + procedure, pass(a) :: csmv => psb_s_hll_csmv + procedure, pass(a) :: inner_cssm => psb_s_hll_cssm + procedure, pass(a) :: inner_cssv => psb_s_hll_cssv + procedure, pass(a) :: scals => psb_s_hll_scals + procedure, pass(a) :: scalv => psb_s_hll_scal + procedure, pass(a) :: maxval => psb_s_hll_maxval + procedure, pass(a) :: csnmi => psb_s_hll_csnmi + procedure, pass(a) :: csnm1 => psb_s_hll_csnm1 + procedure, pass(a) :: rowsum => psb_s_hll_rowsum + procedure, pass(a) :: arwsum => psb_s_hll_arwsum + procedure, pass(a) :: colsum => psb_s_hll_colsum + procedure, pass(a) :: aclsum => psb_s_hll_aclsum + procedure, pass(a) :: reallocate_nz => psb_s_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_hll_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_s_cp_hll_to_coo + procedure, pass(a) :: cp_from_coo => psb_s_cp_hll_from_coo + procedure, pass(a) :: cp_to_fmt => psb_s_cp_hll_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_s_cp_hll_from_fmt + procedure, pass(a) :: mv_to_coo => psb_s_mv_hll_to_coo + procedure, pass(a) :: mv_from_coo => psb_s_mv_hll_from_coo + procedure, pass(a) :: mv_to_fmt => psb_s_mv_hll_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_s_mv_hll_from_fmt + procedure, pass(a) :: csput_a => psb_s_hll_csput_a + procedure, pass(a) :: get_diag => psb_s_hll_get_diag + procedure, pass(a) :: csgetptn => psb_s_hll_csgetptn + procedure, pass(a) :: csgetrow => psb_s_hll_csgetrow + procedure, pass(a) :: get_nz_row => s_hll_get_nz_row + procedure, pass(a) :: reinit => psb_s_hll_reinit + procedure, pass(a) :: print => psb_s_hll_print + procedure, pass(a) :: free => s_hll_free + procedure, pass(a) :: mold => psb_s_hll_mold + + end type psb_s_hll_sparse_mat + + private :: s_hll_get_nzeros, s_hll_free, s_hll_get_fmt, & + & s_hll_get_size, s_hll_sizeof, s_hll_get_nz_row, & + & s_hll_set_nzeros, s_hll_get_hksz, s_hll_set_hksz + + interface + subroutine psb_s_hll_reallocate_nz(nz,a) + import :: psb_s_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_hll_sparse_mat), intent(inout) :: a + end subroutine psb_s_hll_reallocate_nz + end interface + + interface + subroutine psb_s_hll_reinit(a,clear) + import :: psb_s_hll_sparse_mat + class(psb_s_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_s_hll_reinit + end interface + + interface + subroutine psb_s_hll_mold(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hll_mold + end interface + + interface + subroutine psb_s_hll_allocate_mnnz(m,n,a,nz) + import :: psb_s_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_hll_allocate_mnnz + end interface + + interface + subroutine psb_s_hll_print(iout,a,iv,head,ivr,ivc) + import :: psb_s_hll_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_s_hll_print + end interface + + interface + subroutine psb_s_cp_hll_to_coo(a,b,info) + import :: psb_s_coo_sparse_mat, psb_s_hll_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hll_to_coo + end interface + + interface + subroutine psb_s_cp_hll_from_coo(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hll_from_coo + end interface + + interface + subroutine psb_s_cp_hll_to_fmt(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hll_to_fmt + end interface + + interface + subroutine psb_s_cp_hll_from_fmt(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hll_from_fmt + end interface + + interface + subroutine psb_s_mv_hll_to_coo(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hll_to_coo + end interface + + interface + subroutine psb_s_mv_hll_from_coo(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hll_from_coo + end interface + + interface + subroutine psb_s_mv_hll_to_fmt(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hll_to_fmt + end interface + + interface + subroutine psb_s_mv_hll_from_fmt(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hll_from_fmt + end interface + + interface + subroutine psb_s_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hll_csput_a + end interface + + interface + subroutine psb_s_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_s_hll_csgetptn + end interface + + interface + subroutine psb_s_hll_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_s_hll_csgetrow + end interface + + interface + subroutine psb_s_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_s_hll_csgetblk + end interface + + interface + subroutine psb_s_hll_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hll_cssv + subroutine psb_s_hll_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hll_cssm + end interface + + interface + subroutine psb_s_hll_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hll_csmv + subroutine psb_s_hll_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hll_csmm + end interface + + + interface + function psb_s_hll_maxval(a) result(res) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_hll_maxval + end interface + + interface + function psb_s_hll_csnmi(a) result(res) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_hll_csnmi + end interface + + interface + function psb_s_hll_csnm1(a) result(res) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_hll_csnm1 + end interface + + interface + subroutine psb_s_hll_rowsum(d,a) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_hll_rowsum + end interface + + interface + subroutine psb_s_hll_arwsum(d,a) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_hll_arwsum + end interface + + interface + subroutine psb_s_hll_colsum(d,a) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_hll_colsum + end interface + + interface + subroutine psb_s_hll_aclsum(d,a) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_hll_aclsum + end interface + + interface + subroutine psb_s_hll_get_diag(a,d,info) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hll_get_diag + end interface + + interface + subroutine psb_s_hll_scal(d,a,info,side) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_hll_scal + end interface + + interface + subroutine psb_s_hll_scals(d,a,info) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hll_scals + end interface + + interface psi_convert_hll_from_coo + subroutine psi_s_convert_hll_from_coo(a,hksz,tmp,info) + import :: psb_s_hll_sparse_mat, psb_ipk_, psb_s_coo_sparse_mat + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: hksz + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_s_convert_hll_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_hll_sizeof(a) result(res) + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%hkoffs) + + end function s_hll_sizeof + + function s_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL' + end function s_hll_get_fmt + + subroutine s_hll_set_nzeros(a,n) + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%nzt = n + end subroutine s_hll_set_nzeros + + function s_hll_get_nzeros(a) result(res) + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function s_hll_get_nzeros + + function s_hll_get_size(a) result(res) + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + if (a%is_dev()) call a%sync() + + res = -1 + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function s_hll_get_size + + + + function s_hll_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function s_hll_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_hll_free(a) + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + if (allocated(a%val)) deallocate(a%hkoffs) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + call a%set_hksz(izero) + + return + + end subroutine s_hll_free + + subroutine s_hll_set_hksz(a,n) + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%hksz = n + end subroutine s_hll_set_hksz + + function s_hll_get_hksz(a) result(res) + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = a%hksz + + end function s_hll_get_hksz + +end module psb_s_hll_mat_mod diff --git a/ext/psb_z_dia_mat_mod.f90 b/ext/psb_z_dia_mat_mod.f90 new file mode 100644 index 00000000..76d071af --- /dev/null +++ b/ext/psb_z_dia_mat_mod.f90 @@ -0,0 +1,513 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_dia_mat_mod + + use psb_z_base_mat_mod + + type, extends(psb_z_base_sparse_mat) :: psb_z_dia_sparse_mat + ! + ! DIA format, extended. + ! + + integer(psb_ipk_), allocatable :: offset(:) + integer(psb_ipk_) :: nzeros + complex(psb_dpk_), allocatable :: data(:,:) + + contains + ! procedure, pass(a) :: get_size => z_dia_get_size + procedure, pass(a) :: get_nzeros => z_dia_get_nzeros + procedure, nopass :: get_fmt => z_dia_get_fmt + procedure, pass(a) :: sizeof => z_dia_sizeof + procedure, pass(a) :: csmm => psb_z_dia_csmm + procedure, pass(a) :: csmv => psb_z_dia_csmv + ! procedure, pass(a) :: inner_cssm => psb_z_dia_cssm + ! procedure, pass(a) :: inner_cssv => psb_z_dia_cssv + procedure, pass(a) :: scals => psb_z_dia_scals + procedure, pass(a) :: scalv => psb_z_dia_scal + procedure, pass(a) :: maxval => psb_z_dia_maxval + procedure, pass(a) :: rowsum => psb_z_dia_rowsum + procedure, pass(a) :: arwsum => psb_z_dia_arwsum + procedure, pass(a) :: colsum => psb_z_dia_colsum + procedure, pass(a) :: aclsum => psb_z_dia_aclsum + procedure, pass(a) :: reallocate_nz => psb_z_dia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_dia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_z_cp_dia_to_coo + procedure, pass(a) :: cp_from_coo => psb_z_cp_dia_from_coo + ! procedure, pass(a) :: mv_to_coo => psb_z_mv_dia_to_coo + procedure, pass(a) :: mv_from_coo => psb_z_mv_dia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_z_mv_dia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_z_mv_dia_from_fmt + ! procedure, pass(a) :: csput_a => psb_z_dia_csput_a + procedure, pass(a) :: get_diag => psb_z_dia_get_diag + procedure, pass(a) :: csgetptn => psb_z_dia_csgetptn + procedure, pass(a) :: csgetrow => psb_z_dia_csgetrow + ! procedure, pass(a) :: get_nz_row => z_dia_get_nz_row + procedure, pass(a) :: reinit => psb_z_dia_reinit + ! procedure, pass(a) :: trim => psb_z_dia_trim + procedure, pass(a) :: print => psb_z_dia_print + procedure, pass(a) :: free => z_dia_free + procedure, pass(a) :: mold => psb_z_dia_mold + + end type psb_z_dia_sparse_mat + + private :: z_dia_get_nzeros, z_dia_free, z_dia_get_fmt, & + & z_dia_sizeof !, z_dia_get_size, z_dia_get_nz_row + + interface + subroutine psb_z_dia_reallocate_nz(nz,a) + import :: psb_z_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_dia_sparse_mat), intent(inout) :: a + end subroutine psb_z_dia_reallocate_nz + end interface + + interface + subroutine psb_z_dia_reinit(a,clear) + import :: psb_z_dia_sparse_mat + class(psb_z_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_z_dia_reinit + end interface + + interface + subroutine psb_z_dia_trim(a) + import :: psb_z_dia_sparse_mat + class(psb_z_dia_sparse_mat), intent(inout) :: a + end subroutine psb_z_dia_trim + end interface + + interface + subroutine psb_z_dia_mold(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dia_mold + end interface + + interface + subroutine psb_z_dia_allocate_mnnz(m,n,a,nz) + import :: psb_z_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_dia_allocate_mnnz + end interface + + interface + subroutine psb_z_dia_print(iout,a,iv,head,ivr,ivc) + import :: psb_z_dia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_z_dia_print + end interface + + interface + subroutine psb_z_cp_dia_to_coo(a,b,info) + import :: psb_z_coo_sparse_mat, psb_z_dia_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dia_to_coo + end interface + + interface + subroutine psb_z_cp_dia_from_coo(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dia_from_coo + end interface + + interface + subroutine psb_z_cp_dia_to_fmt(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dia_to_fmt + end interface + + interface + subroutine psb_z_cp_dia_from_fmt(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dia_from_fmt + end interface + + interface + subroutine psb_z_mv_dia_to_coo(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dia_to_coo + end interface + + interface + subroutine psb_z_mv_dia_from_coo(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dia_from_coo + end interface + + interface + subroutine psb_z_mv_dia_to_fmt(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dia_to_fmt + end interface + + interface + subroutine psb_z_mv_dia_from_fmt(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dia_from_fmt + end interface + + interface + subroutine psb_z_dia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dia_csput_a + end interface + + interface + subroutine psb_z_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_z_dia_csgetptn + end interface + + interface + subroutine psb_z_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_z_dia_csgetrow + end interface + + interface + subroutine psb_z_dia_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_z_dia_csgetblk + end interface + + interface + subroutine psb_z_dia_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dia_cssv + subroutine psb_z_dia_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dia_cssm + end interface + + interface + subroutine psb_z_dia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dia_csmv + subroutine psb_z_dia_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dia_csmm + end interface + + + interface + function psb_z_dia_maxval(a) result(res) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_dia_maxval + end interface + + interface + function psb_z_dia_csnmi(a) result(res) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_dia_csnmi + end interface + + interface + function psb_z_dia_csnm1(a) result(res) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_dia_csnm1 + end interface + + interface + subroutine psb_z_dia_rowsum(d,a) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_dia_rowsum + end interface + + interface + subroutine psb_z_dia_arwsum(d,a) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_dia_arwsum + end interface + + interface + subroutine psb_z_dia_colsum(d,a) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_dia_colsum + end interface + + interface + subroutine psb_z_dia_aclsum(d,a) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_dia_aclsum + end interface + + interface + subroutine psb_z_dia_get_diag(a,d,info) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dia_get_diag + end interface + + interface + subroutine psb_z_dia_scal(d,a,info,side) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_dia_scal + end interface + + interface + subroutine psb_z_dia_scals(d,a,info) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dia_scals + end interface + + interface psi_convert_dia_from_coo + subroutine psi_z_convert_dia_from_coo(a,tmp,info) + import :: psb_z_dia_sparse_mat, psb_ipk_, psb_z_coo_sparse_mat + implicit none + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_z_convert_dia_from_coo + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_dia_sizeof(a) result(res) + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + end function z_dia_sizeof + + function z_dia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIA' + end function z_dia_get_fmt + + function z_dia_get_nzeros(a) result(res) + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function z_dia_get_nzeros + + ! function z_dia_get_size(a) result(res) + ! implicit none + ! class(psb_z_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function z_dia_get_size + + + ! function z_dia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_z_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function z_dia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine z_dia_free(a) + implicit none + + class(psb_z_dia_sparse_mat), intent(inout) :: a + + if (allocated(a%data)) deallocate(a%data) + if (allocated(a%offset)) deallocate(a%offset) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine z_dia_free + + +end module psb_z_dia_mat_mod diff --git a/ext/psb_z_dns_mat_mod.f90 b/ext/psb_z_dns_mat_mod.f90 new file mode 100644 index 00000000..6147057d --- /dev/null +++ b/ext/psb_z_dns_mat_mod.f90 @@ -0,0 +1,467 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +module psb_z_dns_mat_mod + + use psb_z_base_mat_mod + + type, extends(psb_z_base_sparse_mat) :: psb_z_dns_sparse_mat + ! + ! DNS format: a very simple dense matrix storage + ! psb_dpk_ : kind for double precision reals + ! psb_ipk_: kind for normal integers. + ! psb_sizeof_dp: variable holding size in bytes of + ! a double + ! psb_sizeof_ip: size in bytes of an integer + ! + ! psb_realloc(n,v,info) Reallocate: does what it says + ! psb_realloc(m,n,a,info) on rank 1 and 2 arrays, may start + ! from unallocated + ! + ! + integer(psb_ipk_) :: nnz + complex(psb_dpk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: get_size => z_dns_get_size + procedure, pass(a) :: get_nzeros => z_dns_get_nzeros + procedure, nopass :: get_fmt => z_dns_get_fmt + procedure, pass(a) :: sizeof => z_dns_sizeof + procedure, pass(a) :: csmv => psb_z_dns_csmv + procedure, pass(a) :: csmm => psb_z_dns_csmm + procedure, pass(a) :: csnmi => psb_z_dns_csnmi + procedure, pass(a) :: reallocate_nz => psb_z_dns_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_dns_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_z_cp_dns_to_coo + procedure, pass(a) :: cp_from_coo => psb_z_cp_dns_from_coo + procedure, pass(a) :: mv_to_coo => psb_z_mv_dns_to_coo + procedure, pass(a) :: mv_from_coo => psb_z_mv_dns_from_coo + procedure, pass(a) :: get_diag => psb_z_dns_get_diag + procedure, pass(a) :: csgetrow => psb_z_dns_csgetrow + procedure, pass(a) :: get_nz_row => z_dns_get_nz_row + procedure, pass(a) :: trim => psb_z_dns_trim + procedure, pass(a) :: free => z_dns_free + procedure, pass(a) :: mold => psb_z_dns_mold + + end type psb_z_dns_sparse_mat + + private :: z_dns_get_nzeros, z_dns_free, z_dns_get_fmt, & + & z_dns_get_size, z_dns_sizeof, z_dns_get_nz_row + + ! + ! + !> Function reallocate_nz + !! \memberof psb_z_dns_sparse_mat + !! \brief One--parameters version of (re)allocate + !! + !! \param nz number of nonzeros to allocate for + !! i.e. makes sure that the internal storage + !! allows for NZ coefficients and their indices. + ! + interface + subroutine psb_z_dns_reallocate_nz(nz,a) + import :: psb_z_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_dns_sparse_mat), intent(inout) :: a + end subroutine psb_z_dns_reallocate_nz + end interface + + !> Function trim + !! \memberof psb_z_dns_sparse_mat + !! \brief Memory trim + !! Make sure the memory allocation of the sparse matrix is as tight as + !! possible given the actual number of nonzeros it contains. + ! + interface + subroutine psb_z_dns_trim(a) + import :: psb_z_dns_sparse_mat + class(psb_z_dns_sparse_mat), intent(inout) :: a + end subroutine psb_z_dns_trim + end interface + + ! + !> Function mold: + !! \memberof psb_z_dns_sparse_mat + !! \brief Allocate a class(psb_z_dns_sparse_mat) with the + !! same dynamic type as the input. + !! This is equivalent to allocate( mold= ) and is provided + !! for those compilers not yet supporting mold. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_z_dns_mold(a,b,info) + import :: psb_z_dns_sparse_mat, psb_z_base_sparse_mat, psb_epk_, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dns_mold + end interface + + ! + ! + !> Function allocate_mnnz + !! \memberof psb_z_dns_sparse_mat + !! \brief Three-parameters version of allocate + !! + !! \param m number of rows + !! \param n number of cols + !! \param nz [estimated internally] number of nonzeros to allocate for + ! + interface + subroutine psb_z_dns_allocate_mnnz(m,n,a,nz) + import :: psb_z_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_dns_allocate_mnnz + end interface + + ! + !> Function cp_to_coo: + !! \memberof psb_z_dns_sparse_mat + !! \brief Copy and convert to psb_z_coo_sparse_mat + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_z_cp_dns_to_coo(a,b,info) + import :: psb_z_coo_sparse_mat, psb_z_dns_sparse_mat, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dns_to_coo + end interface + + ! + !> Function cp_from_coo: + !! \memberof psb_z_dns_sparse_mat + !! \brief Copy and convert from psb_z_coo_sparse_mat + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_z_cp_dns_from_coo(a,b,info) + import :: psb_z_dns_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dns_from_coo + end interface + + ! + !> Function mv_to_coo: + !! \memberof psb_z_dns_sparse_mat + !! \brief Convert to psb_z_coo_sparse_mat, freeing the source. + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_z_mv_dns_to_coo(a,b,info) + import :: psb_z_dns_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dns_to_coo + end interface + + ! + !> Function mv_from_coo: + !! \memberof psb_z_dns_sparse_mat + !! \brief Convert from psb_z_coo_sparse_mat, freeing the source. + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_z_mv_dns_from_coo(a,b,info) + import :: psb_z_dns_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dns_from_coo + end interface + + ! + ! + !> Function csgetrow: + !! \memberof psb_z_dns_sparse_mat + !! \brief Get a (subset of) row(s) + !! + !! getrow is the basic method by which the other (getblk, clip) can + !! be implemented. + !! + !! Returns the set + !! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) + !! each identifying the position of a nonzero in A + !! i.e. + !! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) + !! with IMIN<=IA(:)<=IMAX + !! with JMIN<=JA(:)<=JMAX + !! IA,JA are reallocated as necessary. + !! + !! \param imin the minimum row index we are interested in + !! \param imax the minimum row index we are interested in + !! \param nz the number of output coefficients + !! \param ia(:) the output row indices + !! \param ja(:) the output col indices + !! \param val(:) the output coefficients + !! \param info return code + !! \param jmin [1] minimum col index + !! \param jmax [a\%get_ncols()] maximum col index + !! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) + !! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] + !! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] + !! ( iren cannot be specified with rscale/cscale) + !! \param append [false] append to ia,ja + !! \param nzin [none] if append, then first new entry should go in entry nzin+1 + !! + ! + interface + subroutine psb_z_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_z_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_z_dns_csgetrow + end interface + + + + !> Function csmv: + !! \memberof psb_z_dns_sparse_mat + !! \brief Product by a dense rank 1 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_z_dns_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dns_csmv + end interface + + !> Function csmm: + !! \memberof psb_z_dns_sparse_mat + !! \brief Product by a dense rank 2 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:,:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:,:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_z_dns_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dns_csmm + end interface + + ! + ! + !> Function csnmi: + !! \memberof psb_z_dns_sparse_mat + !! \brief Operator infinity norm + !! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) + !! + ! + interface + function psb_z_dns_csnmi(a) result(res) + import :: psb_z_dns_sparse_mat, psb_dpk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_dns_csnmi + end interface + + ! + !> Function get_diag: + !! \memberof psb_z_dns_sparse_mat + !! \brief Extract the diagonal of A. + !! + !! D(i) = A(i:i), i=1:min(nrows,ncols) + !! + !! \param d(:) The output diagonal + !! \param info return code. + ! + interface + subroutine psb_z_dns_get_diag(a,d,info) + import :: psb_z_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dns_get_diag + end interface + + +contains + + ! + !> Function sizeof + !! \memberof psb_z_dns_sparse_mat + !! \brief Memory occupation in bytes + ! + function z_dns_sizeof(a) result(res) + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip + + end function z_dns_sizeof + + ! + !> Function get_fmt + !! \memberof psb_z_dns_sparse_mat + !! \brief return a short descriptive name (e.g. COO CSR etc.) + ! + function z_dns_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNS' + end function z_dns_get_fmt + + ! + !> Function get_nzeros + !! \memberof psb_z_dns_sparse_mat + !! \brief Current number of nonzero entries + ! + function z_dns_get_nzeros(a) result(res) + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nnz + end function z_dns_get_nzeros + + ! + !> Function get_size + !! \memberof psb_z_dns_sparse_mat + !! \brief Maximum number of nonzeros the current structure can hold + ! this is fixed once you initialize the matrix, with dense storage + ! you can hold up to MxN entries + function z_dns_get_size(a) result(res) + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = size(a%val) + + end function z_dns_get_size + + + ! + !> Function get_nz_row. + !! \memberof psb_z_coo_sparse_mat + !! \brief How many nonzeros in a row? + !! + !! \param idx The row to search. + !! + ! + function z_dns_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_z_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = count(a%val(idx,:) /= dzero) + end if + + end function z_dns_get_nz_row + + ! + !> Function free + !! \memberof psb_z_dns_sparse_mat + !! Name says all + + subroutine z_dns_free(a) + implicit none + + class(psb_z_dns_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) deallocate(a%val) + a%nnz = 0 + + + ! + ! Mark the object as empty just in case + ! + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine z_dns_free + + +end module psb_z_dns_mat_mod diff --git a/ext/psb_z_ell_mat_mod.f90 b/ext/psb_z_ell_mat_mod.f90 new file mode 100644 index 00000000..52dc62b1 --- /dev/null +++ b/ext/psb_z_ell_mat_mod.f90 @@ -0,0 +1,544 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_ell_mat_mod + + use psb_z_base_mat_mod + + type, extends(psb_z_base_sparse_mat) :: psb_z_ell_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! Based on M. Heroux "A proposal for a sparse BLAS toolkit". + ! IRN is our addition, should help in transferring to/from + ! other formats (should come in handy for GPUs). + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! + + integer(psb_ipk_) :: nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:) + complex(psb_dpk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: is_by_rows => z_ell_is_by_rows + procedure, pass(a) :: get_size => z_ell_get_size + procedure, pass(a) :: get_nzeros => z_ell_get_nzeros + procedure, nopass :: get_fmt => z_ell_get_fmt + procedure, pass(a) :: sizeof => z_ell_sizeof + procedure, pass(a) :: csmm => psb_z_ell_csmm + procedure, pass(a) :: csmv => psb_z_ell_csmv + procedure, pass(a) :: inner_cssm => psb_z_ell_cssm + procedure, pass(a) :: inner_cssv => psb_z_ell_cssv + procedure, pass(a) :: scals => psb_z_ell_scals + procedure, pass(a) :: scalv => psb_z_ell_scal + procedure, pass(a) :: maxval => psb_z_ell_maxval + procedure, pass(a) :: csnmi => psb_z_ell_csnmi + procedure, pass(a) :: csnm1 => psb_z_ell_csnm1 + procedure, pass(a) :: rowsum => psb_z_ell_rowsum + procedure, pass(a) :: arwsum => psb_z_ell_arwsum + procedure, pass(a) :: colsum => psb_z_ell_colsum + procedure, pass(a) :: aclsum => psb_z_ell_aclsum + procedure, pass(a) :: reallocate_nz => psb_z_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_ell_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_z_cp_ell_to_coo + procedure, pass(a) :: cp_from_coo => psb_z_cp_ell_from_coo + procedure, pass(a) :: cp_to_fmt => psb_z_cp_ell_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_z_cp_ell_from_fmt + procedure, pass(a) :: mv_to_coo => psb_z_mv_ell_to_coo + procedure, pass(a) :: mv_from_coo => psb_z_mv_ell_from_coo + procedure, pass(a) :: mv_to_fmt => psb_z_mv_ell_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_z_mv_ell_from_fmt + procedure, pass(a) :: csput_a => psb_z_ell_csput_a + procedure, pass(a) :: get_diag => psb_z_ell_get_diag + procedure, pass(a) :: csgetptn => psb_z_ell_csgetptn + procedure, pass(a) :: csgetrow => psb_z_ell_csgetrow + procedure, pass(a) :: get_nz_row => z_ell_get_nz_row + procedure, pass(a) :: reinit => psb_z_ell_reinit + procedure, pass(a) :: trim => psb_z_ell_trim + procedure, pass(a) :: print => psb_z_ell_print + procedure, pass(a) :: free => z_ell_free + procedure, pass(a) :: mold => psb_z_ell_mold + + end type psb_z_ell_sparse_mat + + private :: z_ell_get_nzeros, z_ell_free, z_ell_get_fmt, & + & z_ell_get_size, z_ell_sizeof, z_ell_get_nz_row, & + & z_ell_is_by_rows + + interface + subroutine psb_z_ell_reallocate_nz(nz,a) + import :: psb_z_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_ell_sparse_mat), intent(inout) :: a + end subroutine psb_z_ell_reallocate_nz + end interface + + interface + subroutine psb_z_ell_reinit(a,clear) + import :: psb_z_ell_sparse_mat + class(psb_z_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_z_ell_reinit + end interface + + interface + subroutine psb_z_ell_trim(a) + import :: psb_z_ell_sparse_mat + class(psb_z_ell_sparse_mat), intent(inout) :: a + end subroutine psb_z_ell_trim + end interface + + interface + subroutine psb_z_ell_mold(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_ell_mold + end interface + + interface + subroutine psb_z_ell_allocate_mnnz(m,n,a,nz) + import :: psb_z_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_ell_allocate_mnnz + end interface + + interface + subroutine psb_z_ell_print(iout,a,iv,head,ivr,ivc) + import :: psb_z_ell_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_z_ell_print + end interface + + interface + subroutine psb_z_cp_ell_to_coo(a,b,info) + import :: psb_z_coo_sparse_mat, psb_z_ell_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_ell_to_coo + end interface + + interface + subroutine psb_z_cp_ell_from_coo(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_ell_from_coo + end interface + + interface + subroutine psb_z_cp_ell_to_fmt(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_ell_to_fmt + end interface + + interface + subroutine psb_z_cp_ell_from_fmt(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_ell_from_fmt + end interface + + interface + subroutine psb_z_mv_ell_to_coo(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_ell_to_coo + end interface + + interface + subroutine psb_z_mv_ell_from_coo(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_ell_from_coo + end interface + + interface + subroutine psb_z_mv_ell_to_fmt(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_ell_to_fmt + end interface + + interface + subroutine psb_z_mv_ell_from_fmt(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_ell_from_fmt + end interface + + interface + subroutine psb_z_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_ell_csput_a + end interface + + interface + subroutine psb_z_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_z_ell_csgetptn + end interface + + interface + subroutine psb_z_ell_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_z_ell_csgetrow + end interface + + interface + subroutine psb_z_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_z_ell_csgetblk + end interface + + interface + subroutine psb_z_ell_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_ell_cssv + subroutine psb_z_ell_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_ell_cssm + end interface + + interface + subroutine psb_z_ell_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_ell_csmv + subroutine psb_z_ell_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_ell_csmm + end interface + + + interface + function psb_z_ell_maxval(a) result(res) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_ell_maxval + end interface + + interface + function psb_z_ell_csnmi(a) result(res) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_ell_csnmi + end interface + + interface + function psb_z_ell_csnm1(a) result(res) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_ell_csnm1 + end interface + + interface + subroutine psb_z_ell_rowsum(d,a) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_ell_rowsum + end interface + + interface + subroutine psb_z_ell_arwsum(d,a) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_ell_arwsum + end interface + + interface + subroutine psb_z_ell_colsum(d,a) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_ell_colsum + end interface + + interface + subroutine psb_z_ell_aclsum(d,a) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_ell_aclsum + end interface + + interface + subroutine psb_z_ell_get_diag(a,d,info) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_ell_get_diag + end interface + + interface + subroutine psb_z_ell_scal(d,a,info,side) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_ell_scal + end interface + + interface + subroutine psb_z_ell_scals(d,a,info) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_ell_scals + end interface + + interface + subroutine psi_z_convert_ell_from_coo(a,tmp,info,hacksize) + import :: psb_z_ell_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + implicit none + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + end subroutine psi_z_convert_ell_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_ell_is_by_rows(a) result(res) + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + logical :: res + res = .true. + end function z_ell_is_by_rows + + function z_ell_sizeof(a) result(res) + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + + end function z_ell_sizeof + + function z_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELL' + end function z_ell_get_fmt + + function z_ell_get_nzeros(a) result(res) + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function z_ell_get_nzeros + + function z_ell_get_size(a) result(res) + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = -1 + if (a%is_dev()) call a%sync() + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function z_ell_get_size + + + function z_ell_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + if (a%is_dev()) call a%sync() + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function z_ell_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_ell_free(a) + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine z_ell_free + + +end module psb_z_ell_mat_mod diff --git a/ext/psb_z_hdia_mat_mod.f90 b/ext/psb_z_hdia_mat_mod.f90 new file mode 100644 index 00000000..e7c11321 --- /dev/null +++ b/ext/psb_z_hdia_mat_mod.f90 @@ -0,0 +1,534 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +module psb_z_hdia_mat_mod + + use psb_z_base_mat_mod + + + type, extends(psb_z_base_sparse_mat) :: psb_z_hdia_sparse_mat + ! + ! HDIA format + ! + integer(psb_ipk_), allocatable :: hackOffsets(:), diaOffsets(:) + complex(psb_dpk_), allocatable :: val(:) + + + integer(psb_ipk_) :: nhacks, nzeros + integer(psb_ipk_) :: hacksize = 32 + integer(psb_epk_) :: dim=0 + + contains + ! procedure, pass(a) :: get_size => z_hdia_get_size + procedure, pass(a) :: get_nzeros => z_hdia_get_nzeros + procedure, pass(a) :: set_nzeros => z_hdia_set_nzeros + procedure, nopass :: get_fmt => z_hdia_get_fmt + procedure, pass(a) :: sizeof => z_hdia_sizeof + ! procedure, pass(a) :: csmm => psb_z_hdia_csmm + procedure, pass(a) :: csmv => psb_z_hdia_csmv + ! procedure, pass(a) :: inner_cssm => psb_z_hdia_cssm + ! procedure, pass(a) :: inner_cssv => psb_z_hdia_cssv + ! procedure, pass(a) :: scals => psb_z_hdia_scals + ! procedure, pass(a) :: scalv => psb_z_hdia_scal + ! procedure, pass(a) :: maxval => psb_z_hdia_maxval + ! procedure, pass(a) :: csnmi => psb_z_hdia_csnmi + ! procedure, pass(a) :: csnm1 => psb_z_hdia_csnm1 + ! procedure, pass(a) :: rowsum => psb_z_hdia_rowsum + ! procedure, pass(a) :: arwsum => psb_z_hdia_arwsum + ! procedure, pass(a) :: colsum => psb_z_hdia_colsum + ! procedure, pass(a) :: aclsum => psb_z_hdia_aclsum + ! procedure, pass(a) :: reallocate_nz => psb_z_hdia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_hdia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_z_cp_hdia_to_coo + procedure, pass(a) :: cp_from_coo => psb_z_cp_hdia_from_coo + ! procedure, pass(a) :: cp_to_fmt => psb_z_cp_hdia_to_fmt + ! procedure, pass(a) :: cp_from_fmt => psb_z_cp_hdia_from_fmt + procedure, pass(a) :: mv_to_coo => psb_z_mv_hdia_to_coo + procedure, pass(a) :: mv_from_coo => psb_z_mv_hdia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_z_mv_hdia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_z_mv_hdia_from_fmt + ! procedure, pass(a) :: csput_a => psb_z_hdia_csput_a + ! procedure, pass(a) :: get_diag => psb_z_hdia_get_diag + ! procedure, pass(a) :: csgetptn => psb_z_hdia_csgetptn + ! procedure, pass(a) :: csgetrow => psb_z_hdia_csgetrow + ! procedure, pass(a) :: get_nz_row => z_hdia_get_nz_row + ! procedure, pass(a) :: reinit => psb_z_hdia_reinit + ! procedure, pass(a) :: trim => psb_z_hdia_trim + procedure, pass(a) :: print => psb_z_hdia_print + procedure, pass(a) :: free => z_hdia_free + procedure, pass(a) :: mold => psb_z_hdia_mold + + end type psb_z_hdia_sparse_mat + + private :: z_hdia_get_nzeros, z_hdia_set_nzeros, z_hdia_free, & + & z_hdia_get_fmt, z_hdia_sizeof +!!$ & +!!$ & z_hdia_get_nz_row z_hdia_get_size, + +!!$ interface +!!$ subroutine psb_z_hdia_reallocate_nz(nz,a) +!!$ import :: psb_z_hdia_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_hdia_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_reinit(a,clear) +!!$ import :: psb_z_hdia_sparse_mat +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ logical, intent(in), optional :: clear +!!$ end subroutine psb_z_hdia_reinit +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_trim(a) +!!$ import :: psb_z_hdia_sparse_mat +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_hdia_trim +!!$ end interface + + interface + subroutine psb_z_hdia_mold(a,b,info) + import :: psb_z_hdia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hdia_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hdia_mold + end interface + + interface + subroutine psb_z_hdia_allocate_mnnz(m,n,a,nz) + import :: psb_z_hdia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_hdia_allocate_mnnz + end interface + + interface + subroutine psb_z_hdia_print(iout,a,iv,head,ivr,ivc) + import :: psb_z_hdia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_z_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_z_hdia_print + end interface + + interface + subroutine psb_z_cp_hdia_to_coo(a,b,info) + import :: psb_z_coo_sparse_mat, psb_z_hdia_sparse_mat, psb_ipk_ + class(psb_z_hdia_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hdia_to_coo + end interface + + interface + subroutine psb_z_cp_hdia_from_coo(a,b,info) + import :: psb_z_hdia_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_z_cp_hdia_to_fmt(a,b,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_z_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_cp_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_cp_hdia_from_fmt(a,b,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_z_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_cp_hdia_from_fmt +!!$ end interface + + interface + subroutine psb_z_mv_hdia_to_coo(a,b,info) + import :: psb_z_hdia_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hdia_to_coo + end interface + + interface + subroutine psb_z_mv_hdia_from_coo(a,b,info) + import :: psb_z_hdia_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_z_mv_hdia_to_fmt(a,b,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_z_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_mv_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_mv_hdia_from_fmt(a,b,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_z_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_mv_hdia_from_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: val(:) +!!$ integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& +!!$ & imin,imax,jmin,jmax +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_hdia_csput_a +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_csgetptn(imin,imax,a,nz,ia,ja,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_z_hdia_csgetptn +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ complex(psb_dpk_), allocatable, intent(inout) :: val(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_z_hdia_csgetrow +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_csgetblk(imin,imax,a,b,info,& +!!$ & jmin,jmax,iren,append,rscale,cscale) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_z_coo_sparse_mat, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_z_coo_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_z_hdia_csgetblk +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_cssv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:) +!!$ complex(psb_dpk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_hdia_cssv +!!$ subroutine psb_z_hdia_cssm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_hdia_cssm +!!$ end interface + + interface + subroutine psb_z_hdia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hdia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hdia_csmv +!!$ subroutine psb_z_hdia_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_hdia_csmm + end interface + + +!!$ interface +!!$ function psb_z_hdia_maxval(a) result(res) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_) :: res +!!$ end function psb_z_hdia_maxval +!!$ end interface +!!$ +!!$ interface +!!$ function psb_z_hdia_csnmi(a) result(res) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_) :: res +!!$ end function psb_z_hdia_csnmi +!!$ end interface +!!$ +!!$ interface +!!$ function psb_z_hdia_csnm1(a) result(res) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_) :: res +!!$ end function psb_z_hdia_csnm1 +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_rowsum(d,a) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_z_hdia_rowsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_arwsum(d,a) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_z_hdia_arwsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_colsum(d,a) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_z_hdia_colsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_aclsum(d,a) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_z_hdia_aclsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_get_diag(a,d,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(out) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_hdia_get_diag +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_scal(d,a,info,side) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_z_hdia_scal +!!$ end interface + +!!$ interface +!!$ subroutine psb_z_hdia_scals(d,a,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_hdia_scals +!!$ end interface +!!$ + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_hdia_sizeof(a) result(res) + use psb_realloc_mod, only : psb_size + implicit none + class(psb_z_hdia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + integer(psb_ipk_) :: i + + if (a%is_dev()) call a%sync() + res = 0 + + res = res + psb_size(a%hackOffsets)*psb_sizeof_ip + res = res + psb_size(a%diaOffsets)*psb_sizeof_ip + res = res + psb_size(a%val) * (2*psb_sizeof_dp) + + end function z_hdia_sizeof + + function z_hdia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIA' + end function z_hdia_get_fmt + + function z_hdia_get_nzeros(a) result(res) + implicit none + class(psb_z_hdia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function z_hdia_get_nzeros + + subroutine z_hdia_set_nzeros(a,nz) + implicit none + class(psb_z_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + a%nzeros = nz + end subroutine z_hdia_set_nzeros + + ! function z_hdia_get_size(a) result(res) + ! implicit none + ! class(psb_z_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function z_hdia_get_size + + + ! function z_hdia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_z_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function z_hdia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine z_hdia_free(a) + implicit none + + class(psb_z_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: i, info + + + if (allocated(a%hackOffsets))& + & deallocate(a%hackOffsets,stat=info) + if (allocated(a%diaOffsets))& + & deallocate(a%diaOffsets,stat=info) + if (allocated(a%val))& + & deallocate(a%val,stat=info) + a%nhacks=0 + + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine z_hdia_free + + +end module psb_z_hdia_mat_mod diff --git a/ext/psb_z_hll_mat_mod.f90 b/ext/psb_z_hll_mat_mod.f90 new file mode 100644 index 00000000..98eb403f --- /dev/null +++ b/ext/psb_z_hll_mat_mod.f90 @@ -0,0 +1,564 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_hll_mat_mod + + use psb_z_base_mat_mod + use psi_ext_util_mod + + type, extends(psb_z_base_sparse_mat) :: psb_z_hll_sparse_mat + ! + ! HLL format. (Hacked ELL) + ! A modification of ELL. + ! Basic idea: pack and pad data in blocks of HCK rows; + ! this reduces the impact of a lone, very long row. + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! 6. hksz: hack size (multiple of 32) + ! 7. hkoffs(:): offsets of the starts of hacks inside ja/val + ! + ! + ! + integer(psb_ipk_) :: hksz, nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:) + complex(psb_dpk_), allocatable :: val(:) + + contains + + procedure, pass(a) :: get_hksz => z_hll_get_hksz + procedure, pass(a) :: set_hksz => z_hll_set_hksz + procedure, pass(a) :: get_size => z_hll_get_size + procedure, pass(a) :: set_nzeros => z_hll_set_nzeros + procedure, pass(a) :: get_nzeros => z_hll_get_nzeros + procedure, nopass :: get_fmt => z_hll_get_fmt + procedure, pass(a) :: sizeof => z_hll_sizeof + procedure, pass(a) :: csmm => psb_z_hll_csmm + procedure, pass(a) :: csmv => psb_z_hll_csmv + procedure, pass(a) :: inner_cssm => psb_z_hll_cssm + procedure, pass(a) :: inner_cssv => psb_z_hll_cssv + procedure, pass(a) :: scals => psb_z_hll_scals + procedure, pass(a) :: scalv => psb_z_hll_scal + procedure, pass(a) :: maxval => psb_z_hll_maxval + procedure, pass(a) :: csnmi => psb_z_hll_csnmi + procedure, pass(a) :: csnm1 => psb_z_hll_csnm1 + procedure, pass(a) :: rowsum => psb_z_hll_rowsum + procedure, pass(a) :: arwsum => psb_z_hll_arwsum + procedure, pass(a) :: colsum => psb_z_hll_colsum + procedure, pass(a) :: aclsum => psb_z_hll_aclsum + procedure, pass(a) :: reallocate_nz => psb_z_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_hll_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_z_cp_hll_to_coo + procedure, pass(a) :: cp_from_coo => psb_z_cp_hll_from_coo + procedure, pass(a) :: cp_to_fmt => psb_z_cp_hll_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_z_cp_hll_from_fmt + procedure, pass(a) :: mv_to_coo => psb_z_mv_hll_to_coo + procedure, pass(a) :: mv_from_coo => psb_z_mv_hll_from_coo + procedure, pass(a) :: mv_to_fmt => psb_z_mv_hll_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_z_mv_hll_from_fmt + procedure, pass(a) :: csput_a => psb_z_hll_csput_a + procedure, pass(a) :: get_diag => psb_z_hll_get_diag + procedure, pass(a) :: csgetptn => psb_z_hll_csgetptn + procedure, pass(a) :: csgetrow => psb_z_hll_csgetrow + procedure, pass(a) :: get_nz_row => z_hll_get_nz_row + procedure, pass(a) :: reinit => psb_z_hll_reinit + procedure, pass(a) :: print => psb_z_hll_print + procedure, pass(a) :: free => z_hll_free + procedure, pass(a) :: mold => psb_z_hll_mold + + end type psb_z_hll_sparse_mat + + private :: z_hll_get_nzeros, z_hll_free, z_hll_get_fmt, & + & z_hll_get_size, z_hll_sizeof, z_hll_get_nz_row, & + & z_hll_set_nzeros, z_hll_get_hksz, z_hll_set_hksz + + interface + subroutine psb_z_hll_reallocate_nz(nz,a) + import :: psb_z_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_hll_sparse_mat), intent(inout) :: a + end subroutine psb_z_hll_reallocate_nz + end interface + + interface + subroutine psb_z_hll_reinit(a,clear) + import :: psb_z_hll_sparse_mat + class(psb_z_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_z_hll_reinit + end interface + + interface + subroutine psb_z_hll_mold(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hll_mold + end interface + + interface + subroutine psb_z_hll_allocate_mnnz(m,n,a,nz) + import :: psb_z_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_hll_allocate_mnnz + end interface + + interface + subroutine psb_z_hll_print(iout,a,iv,head,ivr,ivc) + import :: psb_z_hll_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_z_hll_print + end interface + + interface + subroutine psb_z_cp_hll_to_coo(a,b,info) + import :: psb_z_coo_sparse_mat, psb_z_hll_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hll_to_coo + end interface + + interface + subroutine psb_z_cp_hll_from_coo(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hll_from_coo + end interface + + interface + subroutine psb_z_cp_hll_to_fmt(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hll_to_fmt + end interface + + interface + subroutine psb_z_cp_hll_from_fmt(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hll_from_fmt + end interface + + interface + subroutine psb_z_mv_hll_to_coo(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hll_to_coo + end interface + + interface + subroutine psb_z_mv_hll_from_coo(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hll_from_coo + end interface + + interface + subroutine psb_z_mv_hll_to_fmt(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hll_to_fmt + end interface + + interface + subroutine psb_z_mv_hll_from_fmt(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hll_from_fmt + end interface + + interface + subroutine psb_z_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hll_csput_a + end interface + + interface + subroutine psb_z_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_z_hll_csgetptn + end interface + + interface + subroutine psb_z_hll_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_z_hll_csgetrow + end interface + + interface + subroutine psb_z_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_z_hll_csgetblk + end interface + + interface + subroutine psb_z_hll_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hll_cssv + subroutine psb_z_hll_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hll_cssm + end interface + + interface + subroutine psb_z_hll_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hll_csmv + subroutine psb_z_hll_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hll_csmm + end interface + + + interface + function psb_z_hll_maxval(a) result(res) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_hll_maxval + end interface + + interface + function psb_z_hll_csnmi(a) result(res) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_hll_csnmi + end interface + + interface + function psb_z_hll_csnm1(a) result(res) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_hll_csnm1 + end interface + + interface + subroutine psb_z_hll_rowsum(d,a) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_hll_rowsum + end interface + + interface + subroutine psb_z_hll_arwsum(d,a) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_hll_arwsum + end interface + + interface + subroutine psb_z_hll_colsum(d,a) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_hll_colsum + end interface + + interface + subroutine psb_z_hll_aclsum(d,a) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_hll_aclsum + end interface + + interface + subroutine psb_z_hll_get_diag(a,d,info) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hll_get_diag + end interface + + interface + subroutine psb_z_hll_scal(d,a,info,side) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_hll_scal + end interface + + interface + subroutine psb_z_hll_scals(d,a,info) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hll_scals + end interface + + interface psi_convert_hll_from_coo + subroutine psi_z_convert_hll_from_coo(a,hksz,tmp,info) + import :: psb_z_hll_sparse_mat, psb_ipk_, psb_z_coo_sparse_mat + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: hksz + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_z_convert_hll_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_hll_sizeof(a) result(res) + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%hkoffs) + + end function z_hll_sizeof + + function z_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL' + end function z_hll_get_fmt + + subroutine z_hll_set_nzeros(a,n) + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%nzt = n + end subroutine z_hll_set_nzeros + + function z_hll_get_nzeros(a) result(res) + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function z_hll_get_nzeros + + function z_hll_get_size(a) result(res) + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + if (a%is_dev()) call a%sync() + + res = -1 + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function z_hll_get_size + + + + function z_hll_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function z_hll_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_hll_free(a) + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + if (allocated(a%val)) deallocate(a%hkoffs) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + call a%set_hksz(izero) + + return + + end subroutine z_hll_free + + subroutine z_hll_set_hksz(a,n) + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%hksz = n + end subroutine z_hll_set_hksz + + function z_hll_get_hksz(a) result(res) + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = a%hksz + + end function z_hll_get_hksz + +end module psb_z_hll_mat_mod diff --git a/ext/psi_c_ext_util_mod.f90 b/ext/psi_c_ext_util_mod.f90 new file mode 100644 index 00000000..e58c0d93 --- /dev/null +++ b/ext/psi_c_ext_util_mod.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psi_c_ext_util_mod + + use psb_base_mod, only : psb_ipk_, psb_spk_ + + interface psi_xtr_dia_from_coo + subroutine psi_c_xtr_dia_from_coo(nr,nc,nz,ia,ja,val,d,nrd,ncd,data,info,& + & initdata,rdisp) + import :: psb_ipk_, psb_spk_ + implicit none + integer(psb_ipk_), intent(in) :: nr, nc, nz, nrd, ncd, ia(:), ja(:), d(:) + complex(psb_spk_), intent(in) :: val(:) + complex(psb_spk_), intent(out) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initdata + integer(psb_ipk_), intent(in), optional :: rdisp + + end subroutine psi_c_xtr_dia_from_coo + end interface + + interface psi_xtr_ell_from_coo + subroutine psi_c_xtr_ell_from_coo(i,nr,mxrwl,iac,jac,& + & valc,ja,val,irn,diag,ld) + import :: psb_ipk_, psb_spk_ + implicit none + integer(psb_ipk_) :: i,nr,mxrwl,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*),diag(*) + complex(psb_spk_) :: valc(*), val(ld,*) + + end subroutine psi_c_xtr_ell_from_coo + end interface psi_xtr_ell_from_coo + + interface psi_xtr_coo_from_dia + subroutine psi_c_xtr_coo_from_dia(nr,nc,ia,ja,val,nz,nrd,ncd,data,offsets,& + & info,rdisp) + import :: psb_ipk_, psb_spk_ + + implicit none + + integer(psb_ipk_), intent(in) :: nr,nc, nrd,ncd, offsets(:) + integer(psb_ipk_), intent(inout) :: ia(:), ja(:), nz + complex(psb_spk_), intent(inout) :: val(:) + complex(psb_spk_), intent(in) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: rdisp + end subroutine psi_c_xtr_coo_from_dia + end interface + +end module psi_c_ext_util_mod diff --git a/ext/psi_d_ext_util_mod.f90 b/ext/psi_d_ext_util_mod.f90 new file mode 100644 index 00000000..07de8ad1 --- /dev/null +++ b/ext/psi_d_ext_util_mod.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psi_d_ext_util_mod + + use psb_base_mod, only : psb_ipk_, psb_dpk_ + + interface psi_xtr_dia_from_coo + subroutine psi_d_xtr_dia_from_coo(nr,nc,nz,ia,ja,val,d,nrd,ncd,data,info,& + & initdata,rdisp) + import :: psb_ipk_, psb_dpk_ + implicit none + integer(psb_ipk_), intent(in) :: nr, nc, nz, nrd, ncd, ia(:), ja(:), d(:) + real(psb_dpk_), intent(in) :: val(:) + real(psb_dpk_), intent(out) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initdata + integer(psb_ipk_), intent(in), optional :: rdisp + + end subroutine psi_d_xtr_dia_from_coo + end interface + + interface psi_xtr_ell_from_coo + subroutine psi_d_xtr_ell_from_coo(i,nr,mxrwl,iac,jac,& + & valc,ja,val,irn,diag,ld) + import :: psb_ipk_, psb_dpk_ + implicit none + integer(psb_ipk_) :: i,nr,mxrwl,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*),diag(*) + real(psb_dpk_) :: valc(*), val(ld,*) + + end subroutine psi_d_xtr_ell_from_coo + end interface psi_xtr_ell_from_coo + + interface psi_xtr_coo_from_dia + subroutine psi_d_xtr_coo_from_dia(nr,nc,ia,ja,val,nz,nrd,ncd,data,offsets,& + & info,rdisp) + import :: psb_ipk_, psb_dpk_ + + implicit none + + integer(psb_ipk_), intent(in) :: nr,nc, nrd,ncd, offsets(:) + integer(psb_ipk_), intent(inout) :: ia(:), ja(:), nz + real(psb_dpk_), intent(inout) :: val(:) + real(psb_dpk_), intent(in) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: rdisp + end subroutine psi_d_xtr_coo_from_dia + end interface + +end module psi_d_ext_util_mod diff --git a/ext/psi_ext_util_mod.f90 b/ext/psi_ext_util_mod.f90 new file mode 100644 index 00000000..afb2c749 --- /dev/null +++ b/ext/psi_ext_util_mod.f90 @@ -0,0 +1,41 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psi_ext_util_mod + + use psi_i_ext_util_mod + use psi_s_ext_util_mod + use psi_c_ext_util_mod + use psi_d_ext_util_mod + use psi_z_ext_util_mod + +end module psi_ext_util_mod diff --git a/ext/psi_i_ext_util_mod.f90 b/ext/psi_i_ext_util_mod.f90 new file mode 100644 index 00000000..ac073f1d --- /dev/null +++ b/ext/psi_i_ext_util_mod.f90 @@ -0,0 +1,175 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psi_i_ext_util_mod + + use psb_base_mod, only : psb_ipk_ + ! + ! Hack size for HLL format. + ! + integer(psb_ipk_), parameter :: psb_hksz_def_ = 32 + integer(psb_ipk_), private, save :: psb_hksz = psb_hksz_def_ + logical, private, save :: psb_hll_use_vector = .true. +contains + + function psi_get_hksz() result(res) + implicit none + integer(psb_ipk_) :: res + res = psb_hksz + end function psi_get_hksz + + subroutine psi_set_hksz(size) + implicit none + integer(psb_ipk_), intent(in) :: size + if (size > 0) psb_hksz = size + end subroutine psi_set_hksz + + subroutine psi_set_hll_vector(val) + implicit none + logical, optional :: val + if (present(val)) then + psb_hll_use_vector = val + else + psb_hll_use_vector = .true. + end if + + end subroutine psi_set_hll_vector + + function psi_get_hll_vector() result(res) + implicit none + logical :: res + + res = psb_hll_use_vector + end function psi_get_hll_vector + + + ! + ! Compute offsets and allocation for DIAgonal storage. + ! Input: + ! nr,nc,nz,ia,ja: the matrix pattern in COO + ! Note: This routine is designed to be called + ! with either a full matrix or an horizontal stripe, + ! with the COO entries sorted in row major order, hence + ! it will handle the conversion of a strip, so it can + ! be used by both DIA and HDIA. In both cases NR and NC + ! *MUST* be the *GLOBAL* number of rows/columns, not those + ! of the strips, i.e. it must be that all entris in IA <=NR + ! and JA <= NC. + ! Output: + ! nd: number of nonzero diagonals + ! d: d(k) contains the index inside offset of diagonal k, + ! which is, if A(I,J) /= 0 then K=NR+J-I, or (optionally) 0. + ! *MUST* be allocated on the *global* size NR+NC-1 + ! + ! offset: for each of the ND nonzero diagonals, its offset J-I + ! + ! Notes: D and OFFSET together represent the set of diagonals; + ! D can be used outside to quickly find which entry of OFFSET + ! a given a(i,j) corresponds to, without doing a search. + ! + ! 1. Optionally init D vector to zeros + ! 2. Walk through the NZ pairs (I,J): + ! a. if it is a new diagonal add to a heap; + ! b. increase its population count stored in D(J-I+NR) + ! c. Keep track of maximum population count. + ! 3. Go through the ND diagonals, getting them K out of the heap in order: + ! a. Set offset(i) to K-NR == J-I + ! b. Set D(K) = i or 0 (depending on cleard) + ! + ! Setting to 0 allows to reuse this function in a loop in a dry run + ! to estimate the allocation size for HDIA; without settng to 0 we + ! would need to zero the whole vector, resulting + ! in a quadratic overall cost. Outside this subroutine, it is possible + ! to zero selectively the entres in D by using the indices in OFFSET. + ! + ! + subroutine psi_dia_offset_from_coo(nr,nc,nz,ia,ja,nd,d,offset,info,& + & initd,cleard) + use psb_base_mod + + implicit none + + integer(psb_ipk_), intent(in) :: nr, nc, nz, ia(:), ja(:) + integer(psb_ipk_), intent(inout) :: d(:) + integer(psb_ipk_), intent(out) :: offset(:) + integer(psb_ipk_), intent(out) :: nd + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initd,cleard + + type(psb_i_heap) :: heap + integer(psb_ipk_) :: k,i,j,ir,ic, ndiag, id + logical :: initd_, cleard_ + character(len=20) :: name + + info = psb_success_ + initd_ = .true. + if (present(initd)) initd_ = initd + cleard_ = .false. + if (present(cleard)) cleard_ = cleard + + if (initd_) d(:) = 0 + + ndiag = nr+nc-1 + if (size(d) Date: Fri, 29 Sep 2023 13:54:58 +0200 Subject: [PATCH 005/116] Merge psblas-ext into psblas step 2: GPU formats and configry. --- Make.inc.in | 22 + Makefile | 17 +- config/pac.m4 | 249 ++++ configure | 497 +++++- configure.ac | 69 +- gpu/CUDA/Makefile | 38 + gpu/CUDA/psi_cuda_CopyCooToElg.cuh | 104 ++ gpu/CUDA/psi_cuda_CopyCooToHlg.cuh | 108 ++ gpu/CUDA/psi_cuda_c_CopyCooToElg.cu | 10 + gpu/CUDA/psi_cuda_c_CopyCooToHlg.cu | 10 + gpu/CUDA/psi_cuda_common.cuh | 16 + gpu/CUDA/psi_cuda_d_CopyCooToElg.cu | 10 + gpu/CUDA/psi_cuda_d_CopyCooToHlg.cu | 10 + gpu/CUDA/psi_cuda_s_CopyCooToElg.cu | 10 + gpu/CUDA/psi_cuda_s_CopyCooToHlg.cu | 10 + gpu/CUDA/psi_cuda_z_CopyCooToElg.cu | 10 + gpu/CUDA/psi_cuda_z_CopyCooToHlg.cu | 10 + gpu/Makefile | 134 ++ gpu/base_cusparse_mod.F90 | 117 ++ gpu/c_cusparse_mod.F90 | 305 ++++ gpu/ccusparse.c | 97 ++ gpu/cintrf.h | 51 + gpu/core_mod.f90 | 53 + gpu/cuda_util.c | 808 ++++++++++ gpu/cuda_util.h | 139 ++ gpu/cusparse_mod.F90 | 38 + gpu/cvectordev.c | 325 ++++ gpu/cvectordev.h | 81 + gpu/d_cusparse_mod.F90 | 305 ++++ gpu/dcusparse.c | 95 ++ gpu/diagdev.c | 291 ++++ gpu/diagdev.h | 95 ++ gpu/diagdev_mod.F90 | 231 +++ gpu/dnsdev.c | 383 +++++ gpu/dnsdev.h | 122 ++ gpu/dnsdev_mod.F90 | 275 ++++ gpu/dvectordev.c | 305 ++++ gpu/dvectordev.h | 78 + gpu/elldev.c | 773 ++++++++++ gpu/elldev.h | 183 +++ gpu/elldev_mod.F90 | 326 ++++ gpu/fcusparse.c | 77 + gpu/fcusparse.h | 70 + gpu/fcusparse_fct.h | 770 ++++++++++ gpu/hdiagdev.c | 425 ++++++ gpu/hdiagdev.h | 111 ++ gpu/hdiagdev_mod.F90 | 203 +++ gpu/hlldev.c | 615 ++++++++ gpu/hlldev.h | 161 ++ gpu/hlldev_mod.F90 | 273 ++++ gpu/impl/Makefile | 294 ++++ gpu/impl/psb_c_cp_csrg_from_coo.F90 | 62 + gpu/impl/psb_c_cp_csrg_from_fmt.F90 | 61 + gpu/impl/psb_c_cp_diag_from_coo.F90 | 64 + gpu/impl/psb_c_cp_elg_from_coo.F90 | 184 +++ gpu/impl/psb_c_cp_elg_from_fmt.F90 | 101 ++ gpu/impl/psb_c_cp_hdiag_from_coo.F90 | 73 + gpu/impl/psb_c_cp_hlg_from_coo.F90 | 198 +++ gpu/impl/psb_c_cp_hlg_from_fmt.F90 | 68 + gpu/impl/psb_c_cp_hybg_from_coo.F90 | 64 + gpu/impl/psb_c_cp_hybg_from_fmt.F90 | 62 + gpu/impl/psb_c_csrg_allocate_mnnz.F90 | 68 + gpu/impl/psb_c_csrg_csmm.F90 | 134 ++ gpu/impl/psb_c_csrg_csmv.F90 | 139 ++ gpu/impl/psb_c_csrg_from_gpu.F90 | 73 + gpu/impl/psb_c_csrg_inner_vect_sv.F90 | 136 ++ gpu/impl/psb_c_csrg_mold.F90 | 65 + gpu/impl/psb_c_csrg_reallocate_nz.F90 | 70 + gpu/impl/psb_c_csrg_scal.F90 | 73 + gpu/impl/psb_c_csrg_scals.F90 | 71 + gpu/impl/psb_c_csrg_to_gpu.F90 | 325 ++++ gpu/impl/psb_c_csrg_vect_mv.F90 | 125 ++ gpu/impl/psb_c_diag_csmv.F90 | 136 ++ gpu/impl/psb_c_diag_mold.F90 | 65 + gpu/impl/psb_c_diag_to_gpu.F90 | 74 + gpu/impl/psb_c_diag_vect_mv.F90 | 126 ++ gpu/impl/psb_c_dnsg_mat_impl.F90 | 461 ++++++ gpu/impl/psb_c_elg_allocate_mnnz.F90 | 113 ++ gpu/impl/psb_c_elg_asb.f90 | 65 + gpu/impl/psb_c_elg_csmm.F90 | 134 ++ gpu/impl/psb_c_elg_csmv.F90 | 136 ++ gpu/impl/psb_c_elg_csput.F90 | 248 +++ gpu/impl/psb_c_elg_from_gpu.F90 | 74 + gpu/impl/psb_c_elg_inner_vect_sv.F90 | 89 ++ gpu/impl/psb_c_elg_mold.F90 | 65 + gpu/impl/psb_c_elg_reallocate_nz.F90 | 79 + gpu/impl/psb_c_elg_scal.F90 | 78 + gpu/impl/psb_c_elg_scals.F90 | 73 + gpu/impl/psb_c_elg_to_gpu.F90 | 93 ++ gpu/impl/psb_c_elg_trim.f90 | 62 + gpu/impl/psb_c_elg_vect_mv.F90 | 131 ++ gpu/impl/psb_c_hdiag_csmv.F90 | 136 ++ gpu/impl/psb_c_hdiag_mold.F90 | 65 + gpu/impl/psb_c_hdiag_to_gpu.F90 | 86 ++ gpu/impl/psb_c_hdiag_vect_mv.F90 | 126 ++ gpu/impl/psb_c_hlg_allocate_mnnz.F90 | 71 + gpu/impl/psb_c_hlg_csmm.F90 | 132 ++ gpu/impl/psb_c_hlg_csmv.F90 | 135 ++ gpu/impl/psb_c_hlg_from_gpu.F90 | 76 + gpu/impl/psb_c_hlg_inner_vect_sv.F90 | 81 + gpu/impl/psb_c_hlg_mold.F90 | 64 + gpu/impl/psb_c_hlg_reallocate_nz.F90 | 67 + gpu/impl/psb_c_hlg_scal.F90 | 75 + gpu/impl/psb_c_hlg_scals.F90 | 73 + gpu/impl/psb_c_hlg_to_gpu.F90 | 68 + gpu/impl/psb_c_hlg_vect_mv.F90 | 129 ++ gpu/impl/psb_c_hybg_allocate_mnnz.F90 | 69 + gpu/impl/psb_c_hybg_csmm.F90 | 135 ++ gpu/impl/psb_c_hybg_csmv.F90 | 138 ++ gpu/impl/psb_c_hybg_inner_vect_sv.F90 | 138 ++ gpu/impl/psb_c_hybg_mold.F90 | 66 + gpu/impl/psb_c_hybg_reallocate_nz.F90 | 71 + gpu/impl/psb_c_hybg_scal.F90 | 76 + gpu/impl/psb_c_hybg_scals.F90 | 76 + gpu/impl/psb_c_hybg_to_gpu.F90 | 154 ++ gpu/impl/psb_c_hybg_vect_mv.F90 | 127 ++ gpu/impl/psb_c_mv_csrg_from_coo.F90 | 65 + gpu/impl/psb_c_mv_csrg_from_fmt.F90 | 63 + gpu/impl/psb_c_mv_diag_from_coo.F90 | 69 + gpu/impl/psb_c_mv_elg_from_coo.F90 | 61 + gpu/impl/psb_c_mv_elg_from_fmt.F90 | 99 ++ gpu/impl/psb_c_mv_hdiag_from_coo.F90 | 74 + gpu/impl/psb_c_mv_hlg_from_coo.F90 | 61 + gpu/impl/psb_c_mv_hlg_from_fmt.F90 | 62 + gpu/impl/psb_c_mv_hybg_from_coo.F90 | 65 + gpu/impl/psb_c_mv_hybg_from_fmt.F90 | 62 + gpu/impl/psb_d_cp_csrg_from_coo.F90 | 62 + gpu/impl/psb_d_cp_csrg_from_fmt.F90 | 61 + gpu/impl/psb_d_cp_diag_from_coo.F90 | 64 + gpu/impl/psb_d_cp_elg_from_coo.F90 | 184 +++ gpu/impl/psb_d_cp_elg_from_fmt.F90 | 101 ++ gpu/impl/psb_d_cp_hdiag_from_coo.F90 | 73 + gpu/impl/psb_d_cp_hlg_from_coo.F90 | 198 +++ gpu/impl/psb_d_cp_hlg_from_fmt.F90 | 68 + gpu/impl/psb_d_cp_hybg_from_coo.F90 | 64 + gpu/impl/psb_d_cp_hybg_from_fmt.F90 | 62 + gpu/impl/psb_d_csrg_allocate_mnnz.F90 | 68 + gpu/impl/psb_d_csrg_csmm.F90 | 134 ++ gpu/impl/psb_d_csrg_csmv.F90 | 139 ++ gpu/impl/psb_d_csrg_from_gpu.F90 | 73 + gpu/impl/psb_d_csrg_inner_vect_sv.F90 | 136 ++ gpu/impl/psb_d_csrg_mold.F90 | 65 + gpu/impl/psb_d_csrg_reallocate_nz.F90 | 70 + gpu/impl/psb_d_csrg_scal.F90 | 73 + gpu/impl/psb_d_csrg_scals.F90 | 71 + gpu/impl/psb_d_csrg_to_gpu.F90 | 325 ++++ gpu/impl/psb_d_csrg_vect_mv.F90 | 125 ++ gpu/impl/psb_d_diag_csmv.F90 | 136 ++ gpu/impl/psb_d_diag_mold.F90 | 65 + gpu/impl/psb_d_diag_to_gpu.F90 | 74 + gpu/impl/psb_d_diag_vect_mv.F90 | 126 ++ gpu/impl/psb_d_dnsg_mat_impl.F90 | 461 ++++++ gpu/impl/psb_d_elg_allocate_mnnz.F90 | 113 ++ gpu/impl/psb_d_elg_asb.f90 | 65 + gpu/impl/psb_d_elg_csmm.F90 | 134 ++ gpu/impl/psb_d_elg_csmv.F90 | 136 ++ gpu/impl/psb_d_elg_csput.F90 | 248 +++ gpu/impl/psb_d_elg_from_gpu.F90 | 74 + gpu/impl/psb_d_elg_inner_vect_sv.F90 | 89 ++ gpu/impl/psb_d_elg_mold.F90 | 65 + gpu/impl/psb_d_elg_reallocate_nz.F90 | 79 + gpu/impl/psb_d_elg_scal.F90 | 78 + gpu/impl/psb_d_elg_scals.F90 | 73 + gpu/impl/psb_d_elg_to_gpu.F90 | 93 ++ gpu/impl/psb_d_elg_trim.f90 | 62 + gpu/impl/psb_d_elg_vect_mv.F90 | 131 ++ gpu/impl/psb_d_hdiag_csmv.F90 | 136 ++ gpu/impl/psb_d_hdiag_mold.F90 | 65 + gpu/impl/psb_d_hdiag_to_gpu.F90 | 86 ++ gpu/impl/psb_d_hdiag_vect_mv.F90 | 126 ++ gpu/impl/psb_d_hlg_allocate_mnnz.F90 | 71 + gpu/impl/psb_d_hlg_csmm.F90 | 132 ++ gpu/impl/psb_d_hlg_csmv.F90 | 135 ++ gpu/impl/psb_d_hlg_from_gpu.F90 | 76 + gpu/impl/psb_d_hlg_inner_vect_sv.F90 | 81 + gpu/impl/psb_d_hlg_mold.F90 | 64 + gpu/impl/psb_d_hlg_reallocate_nz.F90 | 67 + gpu/impl/psb_d_hlg_scal.F90 | 75 + gpu/impl/psb_d_hlg_scals.F90 | 73 + gpu/impl/psb_d_hlg_to_gpu.F90 | 68 + gpu/impl/psb_d_hlg_vect_mv.F90 | 129 ++ gpu/impl/psb_d_hybg_allocate_mnnz.F90 | 69 + gpu/impl/psb_d_hybg_csmm.F90 | 135 ++ gpu/impl/psb_d_hybg_csmv.F90 | 138 ++ gpu/impl/psb_d_hybg_inner_vect_sv.F90 | 138 ++ gpu/impl/psb_d_hybg_mold.F90 | 66 + gpu/impl/psb_d_hybg_reallocate_nz.F90 | 71 + gpu/impl/psb_d_hybg_scal.F90 | 76 + gpu/impl/psb_d_hybg_scals.F90 | 76 + gpu/impl/psb_d_hybg_to_gpu.F90 | 154 ++ gpu/impl/psb_d_hybg_vect_mv.F90 | 127 ++ gpu/impl/psb_d_mv_csrg_from_coo.F90 | 65 + gpu/impl/psb_d_mv_csrg_from_fmt.F90 | 63 + gpu/impl/psb_d_mv_diag_from_coo.F90 | 69 + gpu/impl/psb_d_mv_elg_from_coo.F90 | 61 + gpu/impl/psb_d_mv_elg_from_fmt.F90 | 99 ++ gpu/impl/psb_d_mv_hdiag_from_coo.F90 | 74 + gpu/impl/psb_d_mv_hlg_from_coo.F90 | 61 + gpu/impl/psb_d_mv_hlg_from_fmt.F90 | 62 + gpu/impl/psb_d_mv_hybg_from_coo.F90 | 65 + gpu/impl/psb_d_mv_hybg_from_fmt.F90 | 62 + gpu/impl/psb_s_cp_csrg_from_coo.F90 | 62 + gpu/impl/psb_s_cp_csrg_from_fmt.F90 | 61 + gpu/impl/psb_s_cp_diag_from_coo.F90 | 64 + gpu/impl/psb_s_cp_elg_from_coo.F90 | 184 +++ gpu/impl/psb_s_cp_elg_from_fmt.F90 | 101 ++ gpu/impl/psb_s_cp_hdiag_from_coo.F90 | 73 + gpu/impl/psb_s_cp_hlg_from_coo.F90 | 198 +++ gpu/impl/psb_s_cp_hlg_from_fmt.F90 | 68 + gpu/impl/psb_s_cp_hybg_from_coo.F90 | 64 + gpu/impl/psb_s_cp_hybg_from_fmt.F90 | 62 + gpu/impl/psb_s_csrg_allocate_mnnz.F90 | 68 + gpu/impl/psb_s_csrg_csmm.F90 | 134 ++ gpu/impl/psb_s_csrg_csmv.F90 | 139 ++ gpu/impl/psb_s_csrg_from_gpu.F90 | 73 + gpu/impl/psb_s_csrg_inner_vect_sv.F90 | 136 ++ gpu/impl/psb_s_csrg_mold.F90 | 65 + gpu/impl/psb_s_csrg_reallocate_nz.F90 | 70 + gpu/impl/psb_s_csrg_scal.F90 | 73 + gpu/impl/psb_s_csrg_scals.F90 | 71 + gpu/impl/psb_s_csrg_to_gpu.F90 | 325 ++++ gpu/impl/psb_s_csrg_vect_mv.F90 | 125 ++ gpu/impl/psb_s_diag_csmv.F90 | 136 ++ gpu/impl/psb_s_diag_mold.F90 | 65 + gpu/impl/psb_s_diag_to_gpu.F90 | 74 + gpu/impl/psb_s_diag_vect_mv.F90 | 126 ++ gpu/impl/psb_s_dnsg_mat_impl.F90 | 461 ++++++ gpu/impl/psb_s_elg_allocate_mnnz.F90 | 113 ++ gpu/impl/psb_s_elg_asb.f90 | 65 + gpu/impl/psb_s_elg_csmm.F90 | 134 ++ gpu/impl/psb_s_elg_csmv.F90 | 136 ++ gpu/impl/psb_s_elg_csput.F90 | 248 +++ gpu/impl/psb_s_elg_from_gpu.F90 | 74 + gpu/impl/psb_s_elg_inner_vect_sv.F90 | 89 ++ gpu/impl/psb_s_elg_mold.F90 | 65 + gpu/impl/psb_s_elg_reallocate_nz.F90 | 79 + gpu/impl/psb_s_elg_scal.F90 | 78 + gpu/impl/psb_s_elg_scals.F90 | 73 + gpu/impl/psb_s_elg_to_gpu.F90 | 93 ++ gpu/impl/psb_s_elg_trim.f90 | 62 + gpu/impl/psb_s_elg_vect_mv.F90 | 131 ++ gpu/impl/psb_s_hdiag_csmv.F90 | 136 ++ gpu/impl/psb_s_hdiag_mold.F90 | 65 + gpu/impl/psb_s_hdiag_to_gpu.F90 | 86 ++ gpu/impl/psb_s_hdiag_vect_mv.F90 | 126 ++ gpu/impl/psb_s_hlg_allocate_mnnz.F90 | 71 + gpu/impl/psb_s_hlg_csmm.F90 | 132 ++ gpu/impl/psb_s_hlg_csmv.F90 | 135 ++ gpu/impl/psb_s_hlg_from_gpu.F90 | 76 + gpu/impl/psb_s_hlg_inner_vect_sv.F90 | 81 + gpu/impl/psb_s_hlg_mold.F90 | 64 + gpu/impl/psb_s_hlg_reallocate_nz.F90 | 67 + gpu/impl/psb_s_hlg_scal.F90 | 75 + gpu/impl/psb_s_hlg_scals.F90 | 73 + gpu/impl/psb_s_hlg_to_gpu.F90 | 68 + gpu/impl/psb_s_hlg_vect_mv.F90 | 129 ++ gpu/impl/psb_s_hybg_allocate_mnnz.F90 | 69 + gpu/impl/psb_s_hybg_csmm.F90 | 135 ++ gpu/impl/psb_s_hybg_csmv.F90 | 138 ++ gpu/impl/psb_s_hybg_inner_vect_sv.F90 | 138 ++ gpu/impl/psb_s_hybg_mold.F90 | 66 + gpu/impl/psb_s_hybg_reallocate_nz.F90 | 71 + gpu/impl/psb_s_hybg_scal.F90 | 76 + gpu/impl/psb_s_hybg_scals.F90 | 76 + gpu/impl/psb_s_hybg_to_gpu.F90 | 154 ++ gpu/impl/psb_s_hybg_vect_mv.F90 | 127 ++ gpu/impl/psb_s_mv_csrg_from_coo.F90 | 65 + gpu/impl/psb_s_mv_csrg_from_fmt.F90 | 63 + gpu/impl/psb_s_mv_diag_from_coo.F90 | 69 + gpu/impl/psb_s_mv_elg_from_coo.F90 | 61 + gpu/impl/psb_s_mv_elg_from_fmt.F90 | 99 ++ gpu/impl/psb_s_mv_hdiag_from_coo.F90 | 74 + gpu/impl/psb_s_mv_hlg_from_coo.F90 | 61 + gpu/impl/psb_s_mv_hlg_from_fmt.F90 | 62 + gpu/impl/psb_s_mv_hybg_from_coo.F90 | 65 + gpu/impl/psb_s_mv_hybg_from_fmt.F90 | 62 + gpu/impl/psb_z_cp_csrg_from_coo.F90 | 62 + gpu/impl/psb_z_cp_csrg_from_fmt.F90 | 61 + gpu/impl/psb_z_cp_diag_from_coo.F90 | 64 + gpu/impl/psb_z_cp_elg_from_coo.F90 | 184 +++ gpu/impl/psb_z_cp_elg_from_fmt.F90 | 101 ++ gpu/impl/psb_z_cp_hdiag_from_coo.F90 | 73 + gpu/impl/psb_z_cp_hlg_from_coo.F90 | 198 +++ gpu/impl/psb_z_cp_hlg_from_fmt.F90 | 68 + gpu/impl/psb_z_cp_hybg_from_coo.F90 | 64 + gpu/impl/psb_z_cp_hybg_from_fmt.F90 | 62 + gpu/impl/psb_z_csrg_allocate_mnnz.F90 | 68 + gpu/impl/psb_z_csrg_csmm.F90 | 134 ++ gpu/impl/psb_z_csrg_csmv.F90 | 139 ++ gpu/impl/psb_z_csrg_from_gpu.F90 | 73 + gpu/impl/psb_z_csrg_inner_vect_sv.F90 | 136 ++ gpu/impl/psb_z_csrg_mold.F90 | 65 + gpu/impl/psb_z_csrg_reallocate_nz.F90 | 70 + gpu/impl/psb_z_csrg_scal.F90 | 73 + gpu/impl/psb_z_csrg_scals.F90 | 71 + gpu/impl/psb_z_csrg_to_gpu.F90 | 325 ++++ gpu/impl/psb_z_csrg_vect_mv.F90 | 125 ++ gpu/impl/psb_z_diag_csmv.F90 | 136 ++ gpu/impl/psb_z_diag_mold.F90 | 65 + gpu/impl/psb_z_diag_to_gpu.F90 | 74 + gpu/impl/psb_z_diag_vect_mv.F90 | 126 ++ gpu/impl/psb_z_dnsg_mat_impl.F90 | 461 ++++++ gpu/impl/psb_z_elg_allocate_mnnz.F90 | 113 ++ gpu/impl/psb_z_elg_asb.f90 | 65 + gpu/impl/psb_z_elg_csmm.F90 | 134 ++ gpu/impl/psb_z_elg_csmv.F90 | 136 ++ gpu/impl/psb_z_elg_csput.F90 | 248 +++ gpu/impl/psb_z_elg_from_gpu.F90 | 74 + gpu/impl/psb_z_elg_inner_vect_sv.F90 | 89 ++ gpu/impl/psb_z_elg_mold.F90 | 65 + gpu/impl/psb_z_elg_reallocate_nz.F90 | 79 + gpu/impl/psb_z_elg_scal.F90 | 78 + gpu/impl/psb_z_elg_scals.F90 | 73 + gpu/impl/psb_z_elg_to_gpu.F90 | 93 ++ gpu/impl/psb_z_elg_trim.f90 | 62 + gpu/impl/psb_z_elg_vect_mv.F90 | 131 ++ gpu/impl/psb_z_hdiag_csmv.F90 | 136 ++ gpu/impl/psb_z_hdiag_mold.F90 | 65 + gpu/impl/psb_z_hdiag_to_gpu.F90 | 86 ++ gpu/impl/psb_z_hdiag_vect_mv.F90 | 126 ++ gpu/impl/psb_z_hlg_allocate_mnnz.F90 | 71 + gpu/impl/psb_z_hlg_csmm.F90 | 132 ++ gpu/impl/psb_z_hlg_csmv.F90 | 135 ++ gpu/impl/psb_z_hlg_from_gpu.F90 | 76 + gpu/impl/psb_z_hlg_inner_vect_sv.F90 | 81 + gpu/impl/psb_z_hlg_mold.F90 | 64 + gpu/impl/psb_z_hlg_reallocate_nz.F90 | 67 + gpu/impl/psb_z_hlg_scal.F90 | 75 + gpu/impl/psb_z_hlg_scals.F90 | 73 + gpu/impl/psb_z_hlg_to_gpu.F90 | 68 + gpu/impl/psb_z_hlg_vect_mv.F90 | 129 ++ gpu/impl/psb_z_hybg_allocate_mnnz.F90 | 69 + gpu/impl/psb_z_hybg_csmm.F90 | 135 ++ gpu/impl/psb_z_hybg_csmv.F90 | 138 ++ gpu/impl/psb_z_hybg_inner_vect_sv.F90 | 138 ++ gpu/impl/psb_z_hybg_mold.F90 | 66 + gpu/impl/psb_z_hybg_reallocate_nz.F90 | 71 + gpu/impl/psb_z_hybg_scal.F90 | 76 + gpu/impl/psb_z_hybg_scals.F90 | 76 + gpu/impl/psb_z_hybg_to_gpu.F90 | 154 ++ gpu/impl/psb_z_hybg_vect_mv.F90 | 127 ++ gpu/impl/psb_z_mv_csrg_from_coo.F90 | 65 + gpu/impl/psb_z_mv_csrg_from_fmt.F90 | 63 + gpu/impl/psb_z_mv_diag_from_coo.F90 | 69 + gpu/impl/psb_z_mv_elg_from_coo.F90 | 61 + gpu/impl/psb_z_mv_elg_from_fmt.F90 | 99 ++ gpu/impl/psb_z_mv_hdiag_from_coo.F90 | 74 + gpu/impl/psb_z_mv_hlg_from_coo.F90 | 61 + gpu/impl/psb_z_mv_hlg_from_fmt.F90 | 62 + gpu/impl/psb_z_mv_hybg_from_coo.F90 | 65 + gpu/impl/psb_z_mv_hybg_from_fmt.F90 | 62 + gpu/ivectordev.c | 182 +++ gpu/ivectordev.h | 64 + gpu/psb_base_vectordev_mod.F90 | 104 ++ gpu/psb_c_csrg_mat_mod.F90 | 393 +++++ gpu/psb_c_diag_mat_mod.F90 | 308 ++++ gpu/psb_c_dnsg_mat_mod.F90 | 294 ++++ gpu/psb_c_elg_mat_mod.F90 | 483 ++++++ gpu/psb_c_gpu_vect_mod.F90 | 1989 +++++++++++++++++++++++++ gpu/psb_c_hdiag_mat_mod.F90 | 287 ++++ gpu/psb_c_hlg_mat_mod.F90 | 398 +++++ gpu/psb_c_hybg_mat_mod.F90 | 306 ++++ gpu/psb_c_vectordev_mod.F90 | 390 +++++ gpu/psb_d_csrg_mat_mod.F90 | 393 +++++ gpu/psb_d_diag_mat_mod.F90 | 308 ++++ gpu/psb_d_dnsg_mat_mod.F90 | 294 ++++ gpu/psb_d_elg_mat_mod.F90 | 483 ++++++ gpu/psb_d_gpu_vect_mod.F90 | 1989 +++++++++++++++++++++++++ gpu/psb_d_hdiag_mat_mod.F90 | 287 ++++ gpu/psb_d_hlg_mat_mod.F90 | 398 +++++ gpu/psb_d_hybg_mat_mod.F90 | 306 ++++ gpu/psb_d_vectordev_mod.F90 | 390 +++++ gpu/psb_gpu_env_mod.F90 | 340 +++++ gpu/psb_gpu_mod.F90 | 89 ++ gpu/psb_i_csrg_mat_mod.F90 | 393 +++++ gpu/psb_i_diag_mat_mod.F90 | 308 ++++ gpu/psb_i_dnsg_mat_mod.F90 | 294 ++++ gpu/psb_i_elg_mat_mod.F90 | 483 ++++++ gpu/psb_i_gpu_vect_mod.F90 | 1671 +++++++++++++++++++++ gpu/psb_i_hdiag_mat_mod.F90 | 287 ++++ gpu/psb_i_hlg_mat_mod.F90 | 398 +++++ gpu/psb_i_hybg_mat_mod.F90 | 306 ++++ gpu/psb_i_vectordev_mod.F90 | 283 ++++ gpu/psb_s_csrg_mat_mod.F90 | 393 +++++ gpu/psb_s_diag_mat_mod.F90 | 308 ++++ gpu/psb_s_dnsg_mat_mod.F90 | 294 ++++ gpu/psb_s_elg_mat_mod.F90 | 483 ++++++ gpu/psb_s_gpu_vect_mod.F90 | 1989 +++++++++++++++++++++++++ gpu/psb_s_hdiag_mat_mod.F90 | 287 ++++ gpu/psb_s_hlg_mat_mod.F90 | 398 +++++ gpu/psb_s_hybg_mat_mod.F90 | 306 ++++ gpu/psb_s_vectordev_mod.F90 | 390 +++++ gpu/psb_vectordev_mod.f90 | 8 + gpu/psb_z_csrg_mat_mod.F90 | 393 +++++ gpu/psb_z_diag_mat_mod.F90 | 308 ++++ gpu/psb_z_dnsg_mat_mod.F90 | 294 ++++ gpu/psb_z_elg_mat_mod.F90 | 483 ++++++ gpu/psb_z_gpu_vect_mod.F90 | 1989 +++++++++++++++++++++++++ gpu/psb_z_hdiag_mat_mod.F90 | 287 ++++ gpu/psb_z_hlg_mat_mod.F90 | 398 +++++ gpu/psb_z_hybg_mat_mod.F90 | 306 ++++ gpu/psb_z_vectordev_mod.F90 | 390 +++++ gpu/s_cusparse_mod.F90 | 305 ++++ gpu/scusparse.c | 95 ++ gpu/svectordev.c | 304 ++++ gpu/svectordev.h | 78 + gpu/vectordev.c | 198 +++ gpu/vectordev.h | 90 ++ gpu/z_cusparse_mod.F90 | 305 ++++ gpu/zcusparse.c | 94 ++ gpu/zvectordev.c | 321 ++++ gpu/zvectordev.h | 91 ++ 412 files changed, 66802 insertions(+), 22 deletions(-) create mode 100644 gpu/CUDA/Makefile create mode 100644 gpu/CUDA/psi_cuda_CopyCooToElg.cuh create mode 100644 gpu/CUDA/psi_cuda_CopyCooToHlg.cuh create mode 100644 gpu/CUDA/psi_cuda_c_CopyCooToElg.cu create mode 100644 gpu/CUDA/psi_cuda_c_CopyCooToHlg.cu create mode 100644 gpu/CUDA/psi_cuda_common.cuh create mode 100644 gpu/CUDA/psi_cuda_d_CopyCooToElg.cu create mode 100644 gpu/CUDA/psi_cuda_d_CopyCooToHlg.cu create mode 100644 gpu/CUDA/psi_cuda_s_CopyCooToElg.cu create mode 100644 gpu/CUDA/psi_cuda_s_CopyCooToHlg.cu create mode 100644 gpu/CUDA/psi_cuda_z_CopyCooToElg.cu create mode 100644 gpu/CUDA/psi_cuda_z_CopyCooToHlg.cu create mode 100755 gpu/Makefile create mode 100644 gpu/base_cusparse_mod.F90 create mode 100644 gpu/c_cusparse_mod.F90 create mode 100644 gpu/ccusparse.c create mode 100644 gpu/cintrf.h create mode 100644 gpu/core_mod.f90 create mode 100644 gpu/cuda_util.c create mode 100644 gpu/cuda_util.h create mode 100644 gpu/cusparse_mod.F90 create mode 100644 gpu/cvectordev.c create mode 100644 gpu/cvectordev.h create mode 100644 gpu/d_cusparse_mod.F90 create mode 100644 gpu/dcusparse.c create mode 100644 gpu/diagdev.c create mode 100644 gpu/diagdev.h create mode 100644 gpu/diagdev_mod.F90 create mode 100644 gpu/dnsdev.c create mode 100644 gpu/dnsdev.h create mode 100644 gpu/dnsdev_mod.F90 create mode 100644 gpu/dvectordev.c create mode 100644 gpu/dvectordev.h create mode 100644 gpu/elldev.c create mode 100644 gpu/elldev.h create mode 100644 gpu/elldev_mod.F90 create mode 100644 gpu/fcusparse.c create mode 100644 gpu/fcusparse.h create mode 100644 gpu/fcusparse_fct.h create mode 100644 gpu/hdiagdev.c create mode 100644 gpu/hdiagdev.h create mode 100644 gpu/hdiagdev_mod.F90 create mode 100644 gpu/hlldev.c create mode 100644 gpu/hlldev.h create mode 100644 gpu/hlldev_mod.F90 create mode 100755 gpu/impl/Makefile create mode 100644 gpu/impl/psb_c_cp_csrg_from_coo.F90 create mode 100644 gpu/impl/psb_c_cp_csrg_from_fmt.F90 create mode 100644 gpu/impl/psb_c_cp_diag_from_coo.F90 create mode 100644 gpu/impl/psb_c_cp_elg_from_coo.F90 create mode 100644 gpu/impl/psb_c_cp_elg_from_fmt.F90 create mode 100644 gpu/impl/psb_c_cp_hdiag_from_coo.F90 create mode 100644 gpu/impl/psb_c_cp_hlg_from_coo.F90 create mode 100644 gpu/impl/psb_c_cp_hlg_from_fmt.F90 create mode 100644 gpu/impl/psb_c_cp_hybg_from_coo.F90 create mode 100644 gpu/impl/psb_c_cp_hybg_from_fmt.F90 create mode 100644 gpu/impl/psb_c_csrg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_c_csrg_csmm.F90 create mode 100644 gpu/impl/psb_c_csrg_csmv.F90 create mode 100644 gpu/impl/psb_c_csrg_from_gpu.F90 create mode 100644 gpu/impl/psb_c_csrg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_c_csrg_mold.F90 create mode 100644 gpu/impl/psb_c_csrg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_c_csrg_scal.F90 create mode 100644 gpu/impl/psb_c_csrg_scals.F90 create mode 100644 gpu/impl/psb_c_csrg_to_gpu.F90 create mode 100644 gpu/impl/psb_c_csrg_vect_mv.F90 create mode 100644 gpu/impl/psb_c_diag_csmv.F90 create mode 100644 gpu/impl/psb_c_diag_mold.F90 create mode 100644 gpu/impl/psb_c_diag_to_gpu.F90 create mode 100644 gpu/impl/psb_c_diag_vect_mv.F90 create mode 100644 gpu/impl/psb_c_dnsg_mat_impl.F90 create mode 100644 gpu/impl/psb_c_elg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_c_elg_asb.f90 create mode 100644 gpu/impl/psb_c_elg_csmm.F90 create mode 100644 gpu/impl/psb_c_elg_csmv.F90 create mode 100644 gpu/impl/psb_c_elg_csput.F90 create mode 100644 gpu/impl/psb_c_elg_from_gpu.F90 create mode 100644 gpu/impl/psb_c_elg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_c_elg_mold.F90 create mode 100644 gpu/impl/psb_c_elg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_c_elg_scal.F90 create mode 100644 gpu/impl/psb_c_elg_scals.F90 create mode 100644 gpu/impl/psb_c_elg_to_gpu.F90 create mode 100644 gpu/impl/psb_c_elg_trim.f90 create mode 100644 gpu/impl/psb_c_elg_vect_mv.F90 create mode 100644 gpu/impl/psb_c_hdiag_csmv.F90 create mode 100644 gpu/impl/psb_c_hdiag_mold.F90 create mode 100644 gpu/impl/psb_c_hdiag_to_gpu.F90 create mode 100644 gpu/impl/psb_c_hdiag_vect_mv.F90 create mode 100644 gpu/impl/psb_c_hlg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_c_hlg_csmm.F90 create mode 100644 gpu/impl/psb_c_hlg_csmv.F90 create mode 100644 gpu/impl/psb_c_hlg_from_gpu.F90 create mode 100644 gpu/impl/psb_c_hlg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_c_hlg_mold.F90 create mode 100644 gpu/impl/psb_c_hlg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_c_hlg_scal.F90 create mode 100644 gpu/impl/psb_c_hlg_scals.F90 create mode 100644 gpu/impl/psb_c_hlg_to_gpu.F90 create mode 100644 gpu/impl/psb_c_hlg_vect_mv.F90 create mode 100644 gpu/impl/psb_c_hybg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_c_hybg_csmm.F90 create mode 100644 gpu/impl/psb_c_hybg_csmv.F90 create mode 100644 gpu/impl/psb_c_hybg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_c_hybg_mold.F90 create mode 100644 gpu/impl/psb_c_hybg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_c_hybg_scal.F90 create mode 100644 gpu/impl/psb_c_hybg_scals.F90 create mode 100644 gpu/impl/psb_c_hybg_to_gpu.F90 create mode 100644 gpu/impl/psb_c_hybg_vect_mv.F90 create mode 100644 gpu/impl/psb_c_mv_csrg_from_coo.F90 create mode 100644 gpu/impl/psb_c_mv_csrg_from_fmt.F90 create mode 100644 gpu/impl/psb_c_mv_diag_from_coo.F90 create mode 100644 gpu/impl/psb_c_mv_elg_from_coo.F90 create mode 100644 gpu/impl/psb_c_mv_elg_from_fmt.F90 create mode 100644 gpu/impl/psb_c_mv_hdiag_from_coo.F90 create mode 100644 gpu/impl/psb_c_mv_hlg_from_coo.F90 create mode 100644 gpu/impl/psb_c_mv_hlg_from_fmt.F90 create mode 100644 gpu/impl/psb_c_mv_hybg_from_coo.F90 create mode 100644 gpu/impl/psb_c_mv_hybg_from_fmt.F90 create mode 100644 gpu/impl/psb_d_cp_csrg_from_coo.F90 create mode 100644 gpu/impl/psb_d_cp_csrg_from_fmt.F90 create mode 100644 gpu/impl/psb_d_cp_diag_from_coo.F90 create mode 100644 gpu/impl/psb_d_cp_elg_from_coo.F90 create mode 100644 gpu/impl/psb_d_cp_elg_from_fmt.F90 create mode 100644 gpu/impl/psb_d_cp_hdiag_from_coo.F90 create mode 100644 gpu/impl/psb_d_cp_hlg_from_coo.F90 create mode 100644 gpu/impl/psb_d_cp_hlg_from_fmt.F90 create mode 100644 gpu/impl/psb_d_cp_hybg_from_coo.F90 create mode 100644 gpu/impl/psb_d_cp_hybg_from_fmt.F90 create mode 100644 gpu/impl/psb_d_csrg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_d_csrg_csmm.F90 create mode 100644 gpu/impl/psb_d_csrg_csmv.F90 create mode 100644 gpu/impl/psb_d_csrg_from_gpu.F90 create mode 100644 gpu/impl/psb_d_csrg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_d_csrg_mold.F90 create mode 100644 gpu/impl/psb_d_csrg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_d_csrg_scal.F90 create mode 100644 gpu/impl/psb_d_csrg_scals.F90 create mode 100644 gpu/impl/psb_d_csrg_to_gpu.F90 create mode 100644 gpu/impl/psb_d_csrg_vect_mv.F90 create mode 100644 gpu/impl/psb_d_diag_csmv.F90 create mode 100644 gpu/impl/psb_d_diag_mold.F90 create mode 100644 gpu/impl/psb_d_diag_to_gpu.F90 create mode 100644 gpu/impl/psb_d_diag_vect_mv.F90 create mode 100644 gpu/impl/psb_d_dnsg_mat_impl.F90 create mode 100644 gpu/impl/psb_d_elg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_d_elg_asb.f90 create mode 100644 gpu/impl/psb_d_elg_csmm.F90 create mode 100644 gpu/impl/psb_d_elg_csmv.F90 create mode 100644 gpu/impl/psb_d_elg_csput.F90 create mode 100644 gpu/impl/psb_d_elg_from_gpu.F90 create mode 100644 gpu/impl/psb_d_elg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_d_elg_mold.F90 create mode 100644 gpu/impl/psb_d_elg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_d_elg_scal.F90 create mode 100644 gpu/impl/psb_d_elg_scals.F90 create mode 100644 gpu/impl/psb_d_elg_to_gpu.F90 create mode 100644 gpu/impl/psb_d_elg_trim.f90 create mode 100644 gpu/impl/psb_d_elg_vect_mv.F90 create mode 100644 gpu/impl/psb_d_hdiag_csmv.F90 create mode 100644 gpu/impl/psb_d_hdiag_mold.F90 create mode 100644 gpu/impl/psb_d_hdiag_to_gpu.F90 create mode 100644 gpu/impl/psb_d_hdiag_vect_mv.F90 create mode 100644 gpu/impl/psb_d_hlg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_d_hlg_csmm.F90 create mode 100644 gpu/impl/psb_d_hlg_csmv.F90 create mode 100644 gpu/impl/psb_d_hlg_from_gpu.F90 create mode 100644 gpu/impl/psb_d_hlg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_d_hlg_mold.F90 create mode 100644 gpu/impl/psb_d_hlg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_d_hlg_scal.F90 create mode 100644 gpu/impl/psb_d_hlg_scals.F90 create mode 100644 gpu/impl/psb_d_hlg_to_gpu.F90 create mode 100644 gpu/impl/psb_d_hlg_vect_mv.F90 create mode 100644 gpu/impl/psb_d_hybg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_d_hybg_csmm.F90 create mode 100644 gpu/impl/psb_d_hybg_csmv.F90 create mode 100644 gpu/impl/psb_d_hybg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_d_hybg_mold.F90 create mode 100644 gpu/impl/psb_d_hybg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_d_hybg_scal.F90 create mode 100644 gpu/impl/psb_d_hybg_scals.F90 create mode 100644 gpu/impl/psb_d_hybg_to_gpu.F90 create mode 100644 gpu/impl/psb_d_hybg_vect_mv.F90 create mode 100644 gpu/impl/psb_d_mv_csrg_from_coo.F90 create mode 100644 gpu/impl/psb_d_mv_csrg_from_fmt.F90 create mode 100644 gpu/impl/psb_d_mv_diag_from_coo.F90 create mode 100644 gpu/impl/psb_d_mv_elg_from_coo.F90 create mode 100644 gpu/impl/psb_d_mv_elg_from_fmt.F90 create mode 100644 gpu/impl/psb_d_mv_hdiag_from_coo.F90 create mode 100644 gpu/impl/psb_d_mv_hlg_from_coo.F90 create mode 100644 gpu/impl/psb_d_mv_hlg_from_fmt.F90 create mode 100644 gpu/impl/psb_d_mv_hybg_from_coo.F90 create mode 100644 gpu/impl/psb_d_mv_hybg_from_fmt.F90 create mode 100644 gpu/impl/psb_s_cp_csrg_from_coo.F90 create mode 100644 gpu/impl/psb_s_cp_csrg_from_fmt.F90 create mode 100644 gpu/impl/psb_s_cp_diag_from_coo.F90 create mode 100644 gpu/impl/psb_s_cp_elg_from_coo.F90 create mode 100644 gpu/impl/psb_s_cp_elg_from_fmt.F90 create mode 100644 gpu/impl/psb_s_cp_hdiag_from_coo.F90 create mode 100644 gpu/impl/psb_s_cp_hlg_from_coo.F90 create mode 100644 gpu/impl/psb_s_cp_hlg_from_fmt.F90 create mode 100644 gpu/impl/psb_s_cp_hybg_from_coo.F90 create mode 100644 gpu/impl/psb_s_cp_hybg_from_fmt.F90 create mode 100644 gpu/impl/psb_s_csrg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_s_csrg_csmm.F90 create mode 100644 gpu/impl/psb_s_csrg_csmv.F90 create mode 100644 gpu/impl/psb_s_csrg_from_gpu.F90 create mode 100644 gpu/impl/psb_s_csrg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_s_csrg_mold.F90 create mode 100644 gpu/impl/psb_s_csrg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_s_csrg_scal.F90 create mode 100644 gpu/impl/psb_s_csrg_scals.F90 create mode 100644 gpu/impl/psb_s_csrg_to_gpu.F90 create mode 100644 gpu/impl/psb_s_csrg_vect_mv.F90 create mode 100644 gpu/impl/psb_s_diag_csmv.F90 create mode 100644 gpu/impl/psb_s_diag_mold.F90 create mode 100644 gpu/impl/psb_s_diag_to_gpu.F90 create mode 100644 gpu/impl/psb_s_diag_vect_mv.F90 create mode 100644 gpu/impl/psb_s_dnsg_mat_impl.F90 create mode 100644 gpu/impl/psb_s_elg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_s_elg_asb.f90 create mode 100644 gpu/impl/psb_s_elg_csmm.F90 create mode 100644 gpu/impl/psb_s_elg_csmv.F90 create mode 100644 gpu/impl/psb_s_elg_csput.F90 create mode 100644 gpu/impl/psb_s_elg_from_gpu.F90 create mode 100644 gpu/impl/psb_s_elg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_s_elg_mold.F90 create mode 100644 gpu/impl/psb_s_elg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_s_elg_scal.F90 create mode 100644 gpu/impl/psb_s_elg_scals.F90 create mode 100644 gpu/impl/psb_s_elg_to_gpu.F90 create mode 100644 gpu/impl/psb_s_elg_trim.f90 create mode 100644 gpu/impl/psb_s_elg_vect_mv.F90 create mode 100644 gpu/impl/psb_s_hdiag_csmv.F90 create mode 100644 gpu/impl/psb_s_hdiag_mold.F90 create mode 100644 gpu/impl/psb_s_hdiag_to_gpu.F90 create mode 100644 gpu/impl/psb_s_hdiag_vect_mv.F90 create mode 100644 gpu/impl/psb_s_hlg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_s_hlg_csmm.F90 create mode 100644 gpu/impl/psb_s_hlg_csmv.F90 create mode 100644 gpu/impl/psb_s_hlg_from_gpu.F90 create mode 100644 gpu/impl/psb_s_hlg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_s_hlg_mold.F90 create mode 100644 gpu/impl/psb_s_hlg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_s_hlg_scal.F90 create mode 100644 gpu/impl/psb_s_hlg_scals.F90 create mode 100644 gpu/impl/psb_s_hlg_to_gpu.F90 create mode 100644 gpu/impl/psb_s_hlg_vect_mv.F90 create mode 100644 gpu/impl/psb_s_hybg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_s_hybg_csmm.F90 create mode 100644 gpu/impl/psb_s_hybg_csmv.F90 create mode 100644 gpu/impl/psb_s_hybg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_s_hybg_mold.F90 create mode 100644 gpu/impl/psb_s_hybg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_s_hybg_scal.F90 create mode 100644 gpu/impl/psb_s_hybg_scals.F90 create mode 100644 gpu/impl/psb_s_hybg_to_gpu.F90 create mode 100644 gpu/impl/psb_s_hybg_vect_mv.F90 create mode 100644 gpu/impl/psb_s_mv_csrg_from_coo.F90 create mode 100644 gpu/impl/psb_s_mv_csrg_from_fmt.F90 create mode 100644 gpu/impl/psb_s_mv_diag_from_coo.F90 create mode 100644 gpu/impl/psb_s_mv_elg_from_coo.F90 create mode 100644 gpu/impl/psb_s_mv_elg_from_fmt.F90 create mode 100644 gpu/impl/psb_s_mv_hdiag_from_coo.F90 create mode 100644 gpu/impl/psb_s_mv_hlg_from_coo.F90 create mode 100644 gpu/impl/psb_s_mv_hlg_from_fmt.F90 create mode 100644 gpu/impl/psb_s_mv_hybg_from_coo.F90 create mode 100644 gpu/impl/psb_s_mv_hybg_from_fmt.F90 create mode 100644 gpu/impl/psb_z_cp_csrg_from_coo.F90 create mode 100644 gpu/impl/psb_z_cp_csrg_from_fmt.F90 create mode 100644 gpu/impl/psb_z_cp_diag_from_coo.F90 create mode 100644 gpu/impl/psb_z_cp_elg_from_coo.F90 create mode 100644 gpu/impl/psb_z_cp_elg_from_fmt.F90 create mode 100644 gpu/impl/psb_z_cp_hdiag_from_coo.F90 create mode 100644 gpu/impl/psb_z_cp_hlg_from_coo.F90 create mode 100644 gpu/impl/psb_z_cp_hlg_from_fmt.F90 create mode 100644 gpu/impl/psb_z_cp_hybg_from_coo.F90 create mode 100644 gpu/impl/psb_z_cp_hybg_from_fmt.F90 create mode 100644 gpu/impl/psb_z_csrg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_z_csrg_csmm.F90 create mode 100644 gpu/impl/psb_z_csrg_csmv.F90 create mode 100644 gpu/impl/psb_z_csrg_from_gpu.F90 create mode 100644 gpu/impl/psb_z_csrg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_z_csrg_mold.F90 create mode 100644 gpu/impl/psb_z_csrg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_z_csrg_scal.F90 create mode 100644 gpu/impl/psb_z_csrg_scals.F90 create mode 100644 gpu/impl/psb_z_csrg_to_gpu.F90 create mode 100644 gpu/impl/psb_z_csrg_vect_mv.F90 create mode 100644 gpu/impl/psb_z_diag_csmv.F90 create mode 100644 gpu/impl/psb_z_diag_mold.F90 create mode 100644 gpu/impl/psb_z_diag_to_gpu.F90 create mode 100644 gpu/impl/psb_z_diag_vect_mv.F90 create mode 100644 gpu/impl/psb_z_dnsg_mat_impl.F90 create mode 100644 gpu/impl/psb_z_elg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_z_elg_asb.f90 create mode 100644 gpu/impl/psb_z_elg_csmm.F90 create mode 100644 gpu/impl/psb_z_elg_csmv.F90 create mode 100644 gpu/impl/psb_z_elg_csput.F90 create mode 100644 gpu/impl/psb_z_elg_from_gpu.F90 create mode 100644 gpu/impl/psb_z_elg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_z_elg_mold.F90 create mode 100644 gpu/impl/psb_z_elg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_z_elg_scal.F90 create mode 100644 gpu/impl/psb_z_elg_scals.F90 create mode 100644 gpu/impl/psb_z_elg_to_gpu.F90 create mode 100644 gpu/impl/psb_z_elg_trim.f90 create mode 100644 gpu/impl/psb_z_elg_vect_mv.F90 create mode 100644 gpu/impl/psb_z_hdiag_csmv.F90 create mode 100644 gpu/impl/psb_z_hdiag_mold.F90 create mode 100644 gpu/impl/psb_z_hdiag_to_gpu.F90 create mode 100644 gpu/impl/psb_z_hdiag_vect_mv.F90 create mode 100644 gpu/impl/psb_z_hlg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_z_hlg_csmm.F90 create mode 100644 gpu/impl/psb_z_hlg_csmv.F90 create mode 100644 gpu/impl/psb_z_hlg_from_gpu.F90 create mode 100644 gpu/impl/psb_z_hlg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_z_hlg_mold.F90 create mode 100644 gpu/impl/psb_z_hlg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_z_hlg_scal.F90 create mode 100644 gpu/impl/psb_z_hlg_scals.F90 create mode 100644 gpu/impl/psb_z_hlg_to_gpu.F90 create mode 100644 gpu/impl/psb_z_hlg_vect_mv.F90 create mode 100644 gpu/impl/psb_z_hybg_allocate_mnnz.F90 create mode 100644 gpu/impl/psb_z_hybg_csmm.F90 create mode 100644 gpu/impl/psb_z_hybg_csmv.F90 create mode 100644 gpu/impl/psb_z_hybg_inner_vect_sv.F90 create mode 100644 gpu/impl/psb_z_hybg_mold.F90 create mode 100644 gpu/impl/psb_z_hybg_reallocate_nz.F90 create mode 100644 gpu/impl/psb_z_hybg_scal.F90 create mode 100644 gpu/impl/psb_z_hybg_scals.F90 create mode 100644 gpu/impl/psb_z_hybg_to_gpu.F90 create mode 100644 gpu/impl/psb_z_hybg_vect_mv.F90 create mode 100644 gpu/impl/psb_z_mv_csrg_from_coo.F90 create mode 100644 gpu/impl/psb_z_mv_csrg_from_fmt.F90 create mode 100644 gpu/impl/psb_z_mv_diag_from_coo.F90 create mode 100644 gpu/impl/psb_z_mv_elg_from_coo.F90 create mode 100644 gpu/impl/psb_z_mv_elg_from_fmt.F90 create mode 100644 gpu/impl/psb_z_mv_hdiag_from_coo.F90 create mode 100644 gpu/impl/psb_z_mv_hlg_from_coo.F90 create mode 100644 gpu/impl/psb_z_mv_hlg_from_fmt.F90 create mode 100644 gpu/impl/psb_z_mv_hybg_from_coo.F90 create mode 100644 gpu/impl/psb_z_mv_hybg_from_fmt.F90 create mode 100644 gpu/ivectordev.c create mode 100644 gpu/ivectordev.h create mode 100644 gpu/psb_base_vectordev_mod.F90 create mode 100644 gpu/psb_c_csrg_mat_mod.F90 create mode 100644 gpu/psb_c_diag_mat_mod.F90 create mode 100644 gpu/psb_c_dnsg_mat_mod.F90 create mode 100644 gpu/psb_c_elg_mat_mod.F90 create mode 100644 gpu/psb_c_gpu_vect_mod.F90 create mode 100644 gpu/psb_c_hdiag_mat_mod.F90 create mode 100644 gpu/psb_c_hlg_mat_mod.F90 create mode 100644 gpu/psb_c_hybg_mat_mod.F90 create mode 100644 gpu/psb_c_vectordev_mod.F90 create mode 100644 gpu/psb_d_csrg_mat_mod.F90 create mode 100644 gpu/psb_d_diag_mat_mod.F90 create mode 100644 gpu/psb_d_dnsg_mat_mod.F90 create mode 100644 gpu/psb_d_elg_mat_mod.F90 create mode 100644 gpu/psb_d_gpu_vect_mod.F90 create mode 100644 gpu/psb_d_hdiag_mat_mod.F90 create mode 100644 gpu/psb_d_hlg_mat_mod.F90 create mode 100644 gpu/psb_d_hybg_mat_mod.F90 create mode 100644 gpu/psb_d_vectordev_mod.F90 create mode 100644 gpu/psb_gpu_env_mod.F90 create mode 100644 gpu/psb_gpu_mod.F90 create mode 100644 gpu/psb_i_csrg_mat_mod.F90 create mode 100644 gpu/psb_i_diag_mat_mod.F90 create mode 100644 gpu/psb_i_dnsg_mat_mod.F90 create mode 100644 gpu/psb_i_elg_mat_mod.F90 create mode 100644 gpu/psb_i_gpu_vect_mod.F90 create mode 100644 gpu/psb_i_hdiag_mat_mod.F90 create mode 100644 gpu/psb_i_hlg_mat_mod.F90 create mode 100644 gpu/psb_i_hybg_mat_mod.F90 create mode 100644 gpu/psb_i_vectordev_mod.F90 create mode 100644 gpu/psb_s_csrg_mat_mod.F90 create mode 100644 gpu/psb_s_diag_mat_mod.F90 create mode 100644 gpu/psb_s_dnsg_mat_mod.F90 create mode 100644 gpu/psb_s_elg_mat_mod.F90 create mode 100644 gpu/psb_s_gpu_vect_mod.F90 create mode 100644 gpu/psb_s_hdiag_mat_mod.F90 create mode 100644 gpu/psb_s_hlg_mat_mod.F90 create mode 100644 gpu/psb_s_hybg_mat_mod.F90 create mode 100644 gpu/psb_s_vectordev_mod.F90 create mode 100644 gpu/psb_vectordev_mod.f90 create mode 100644 gpu/psb_z_csrg_mat_mod.F90 create mode 100644 gpu/psb_z_diag_mat_mod.F90 create mode 100644 gpu/psb_z_dnsg_mat_mod.F90 create mode 100644 gpu/psb_z_elg_mat_mod.F90 create mode 100644 gpu/psb_z_gpu_vect_mod.F90 create mode 100644 gpu/psb_z_hdiag_mat_mod.F90 create mode 100644 gpu/psb_z_hlg_mat_mod.F90 create mode 100644 gpu/psb_z_hybg_mat_mod.F90 create mode 100644 gpu/psb_z_vectordev_mod.F90 create mode 100644 gpu/s_cusparse_mod.F90 create mode 100644 gpu/scusparse.c create mode 100644 gpu/svectordev.c create mode 100644 gpu/svectordev.h create mode 100644 gpu/vectordev.c create mode 100644 gpu/vectordev.h create mode 100644 gpu/z_cusparse_mod.F90 create mode 100644 gpu/zcusparse.c create mode 100644 gpu/zvectordev.c create mode 100644 gpu/zvectordev.h diff --git a/Make.inc.in b/Make.inc.in index 1fa3179c..e28e9bee 100755 --- a/Make.inc.in +++ b/Make.inc.in @@ -67,6 +67,28 @@ UTILMODNAME=@UTILMODNAME@ CBINDLIBNAME=libpsb_cbind.a +GPUD=@GPUD@ +GPULD=@GPULD@ + +SPGPUDIR=@SPGPU_DIR@ +SPGPU_INCDIR=@SPGPU_INCDIR@ +SPGPU_LIBS=@SPGPU_LIBS@ +SPGPU_DEFINES=@SPGPU_DEFINES@ +SPGPU_INCLUDES=@SPGPU_INCLUDES@ + +CUDA_DIR=@CUDA_DIR@ +CUDA_DEFINES=@CUDA_DEFINES@ +CUDA_INCLUDES=@CUDA_INCLUDES@ +CUDA_LIBS=@CUDA_LIBS@ +CUDA_VERSION=@CUDA_VERSION@ +CUDA_SHORT_VERSION=@CUDA_SHORT_VERSION@ +NVCC=@CUDA_NVCC@ +CUDEFINES=@CUDEFINES@ + +.SUFFIXES: .cu +.cu.o: + $(NVCC) $(CINCLUDES) $(CDEFINES) $(CUDEFINES) -c $< + @PSBLASRULES@ diff --git a/Makefile b/Makefile index 4a79afce..49879270 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ include Make.inc -all: dirs based precd kryld utild cbindd extd libd +all: dirs based precd kryld utild cbindd extd $(GPUD) libd @echo "=====================================" @echo "PSBLAS libraries Compilation Successful." @@ -12,17 +12,20 @@ dirs: precd: based utild: based kryld: precd -extd: based - +extd: based +gpud: extd cbindd: based precd kryld utild -libd: based precd kryld utild cbindd extd +libd: based precd kryld utild cbindd extd $(GPULD) $(MAKE) -C base lib $(MAKE) -C prec lib $(MAKE) -C krylov lib $(MAKE) -C util lib $(MAKE) -C cbind lib $(MAKE) -C ext lib +gpuld: gpud + $(MAKE) -C gpu lib + based: $(MAKE) -C base objs @@ -34,8 +37,10 @@ utild: $(MAKE) -C util objs cbindd: $(MAKE) -C cbind objs -extd: +extd: based $(MAKE) -C ext objs +gpud: based extd + $(MAKE) -C gpu objs install: all @@ -61,6 +66,7 @@ clean: $(MAKE) -C util clean $(MAKE) -C cbind clean $(MAKE) -C ext clean + $(MAKE) -C gpu clean check: all make check -C test/serial @@ -77,6 +83,7 @@ veryclean: cleanlib cd util && $(MAKE) veryclean cd cbind && $(MAKE) veryclean cd ext && $(MAKE) veryclean + cd gpu && $(MAKE) veryclean cd test/fileread && $(MAKE) clean cd test/pargen && $(MAKE) clean cd test/util && $(MAKE) clean diff --git a/config/pac.m4 b/config/pac.m4 index 69d2f863..7a9ee07e 100644 --- a/config/pac.m4 +++ b/config/pac.m4 @@ -2018,3 +2018,252 @@ CPPFLAGS="$SAVE_CPPFLAGS"; ])dnl +dnl @synopsis PAC_CHECK_SPGPU +dnl +dnl Will try to find the spgpu library and headers. +dnl +dnl Will use $CC +dnl +dnl If the test passes, will execute ACTION-IF-FOUND. Otherwise, ACTION-IF-NOT-FOUND. +dnl Note : This file will be likely to induce the compiler to create a module file +dnl (for a module called conftest). +dnl Depending on the compiler flags, this could cause a conftest.mod file to appear +dnl in the present directory, or in another, or with another name. So be warned! +dnl +dnl @author Salvatore Filippone +dnl +AC_DEFUN(PAC_CHECK_SPGPU, + [SAVE_LIBS="$LIBS" + SAVE_CPPFLAGS="$CPPFLAGS" + if test "x$pac_cv_have_cuda" == "x"; then + PAC_CHECK_CUDA() + fi +dnl AC_MSG_NOTICE([From CUDA: $pac_cv_have_cuda ]) + if test "x$pac_cv_have_cuda" == "xyes"; then + AC_ARG_WITH(spgpu, AC_HELP_STRING([--with-spgpu=DIR], [Specify the directory for SPGPU library and includes.]), + [pac_cv_spgpudir=$withval], + [pac_cv_spgpudir='']) + + AC_LANG([C]) + if test "x$pac_cv_spgpudir" != "x"; then + LIBS="-L$pac_cv_spgpudir/lib $LIBS" + GPU_INCLUDES="-I$pac_cv_spgpudir/include" + CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS" + GPU_LIBDIR="-L$pac_cv_spgpudir/lib" + fi + AC_MSG_CHECKING([spgpu dir $pac_cv_spgpudir]) + AC_CHECK_HEADER([core.h], + [pac_gpu_header_ok=yes], + [pac_gpu_header_ok=no; GPU_INCLUDES=""]) + + if test "x$pac_gpu_header_ok" == "xyes" ; then + GPU_LIBS="-lspgpu $GPU_LIBDIR" + LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS"; + AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS]) + AC_TRY_LINK_FUNC(spgpuCreate, + [pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ], + [pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""]) + AC_MSG_RESULT($pac_gpu_lib_ok) + if test "x$pac_cv_have_spgpu" == "xyes" ; then + AC_MSG_NOTICE([Have found SPGPU]) + SPGPULIBNAME="libpsbgpu.a"; + SPGPU_DIR="$pac_cv_spgpudir"; + SPGPU_DEFINES="-DHAVE_SPGPU"; + SPGPU_INCDIR="$SPGPU_DIR/include"; + SPGPU_INCLUDES="-I$SPGPU_INCDIR"; + SPGPU_LIBS="-lspgpu -L$SPGPU_DIR/lib"; + LGPU=-lpsb_gpu + CUDA_DIR="$pac_cv_cuda_dir"; + CUDA_DEFINES="-DHAVE_CUDA"; + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + FDEFINES="$psblas_cv_define_prepend-DHAVE_GPU $psblas_cv_define_prepend-DHAVE_SPGPU $psblas_cv_define_prepend-DHAVE_CUDA $FDEFINES"; + CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES" ; + fi + fi +fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" +])dnl + + + + +dnl @synopsis PAC_CHECK_CUDA +dnl +dnl Will try to find the cuda library and headers. +dnl +dnl Will use $CC +dnl +dnl If the test passes, will execute ACTION-IF-FOUND. Otherwise, ACTION-IF-NOT-FOUND. +dnl Note : This file will be likely to induce the compiler to create a module file +dnl (for a module called conftest). +dnl Depending on the compiler flags, this could cause a conftest.mod file to appear +dnl in the present directory, or in another, or with another name. So be warned! +dnl +dnl @author Salvatore Filippone +dnl +AC_DEFUN(PAC_CHECK_CUDA, +[AC_ARG_WITH(cuda, AC_HELP_STRING([--with-cuda=DIR], [Specify the directory for CUDA library and includes.]), + [pac_cv_cuda_dir=$withval], + [pac_cv_cuda_dir='']) + +AC_LANG([C]) +SAVE_LIBS="$LIBS" +SAVE_CPPFLAGS="$CPPFLAGS" +if test "x$pac_cv_cuda_dir" != "x"; then + CUDA_DIR="$pac_cv_cuda_dir" + LIBS="-L$pac_cv_cuda_dir/lib $LIBS" + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_DEFINES="-DHAVE_CUDA" + CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + if test -f "$pac_cv_cuda_dir/bin/nvcc"; then + CUDA_NVCC="$pac_cv_cuda_dir/bin/nvcc" + else + CUDA_NVCC="nvcc" + fi +fi +AC_MSG_CHECKING([cuda dir $pac_cv_cuda_dir]) +AC_CHECK_HEADER([cuda_runtime.h], + [pac_cuda_header_ok=yes], + [pac_cuda_header_ok=no; CUDA_INCLUDES=""]) + +if test "x$pac_cuda_header_ok" == "xyes" ; then + CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR" + LIBS="$CUDA_LIBS -lm $LIBS"; + AC_MSG_CHECKING([for cudaMemcpy in $CUDA_LIBS]) + AC_TRY_LINK_FUNC(cudaMemcpy, + [pac_cv_have_cuda=yes;pac_cuda_lib_ok=yes; ], + [pac_cv_have_cuda=no;pac_cuda_lib_ok=no; CUDA_LIBS=""]) + AC_MSG_RESULT($pac_cuda_lib_ok) + +fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" +])dnl + +dnl @synopsis PAC_ARG_WITH_CUDACC +dnl +dnl Test for --with-cudacc="set_of_cc". +dnl +dnl Defines the CC to compile for +dnl +dnl +dnl Example use: +dnl +dnl PAC_ARG_WITH_CUDACC +dnl +dnl @author Salvatore Filippone +dnl +AC_DEFUN([PAC_ARG_WITH_CUDACC], +[ +AC_ARG_WITH(cudacc, +AC_HELP_STRING([--with-cudacc], [A comma-separated list of CCs to compile to, for example, + --with-cudacc=30,35,37,50,60]), +[pac_cv_cudacc=$withval], +[pac_cv_cudacc='']) +]) + +AC_DEFUN(PAC_ARG_WITH_LIBRSB, + [SAVE_LIBS="$LIBS" + SAVE_CPPFLAGS="$CPPFLAGS" + + AC_ARG_WITH(librsb, + AC_HELP_STRING([--with-librsb], [The directory for LIBRSB, for example, + --with-librsb=/opt/packages/librsb]), + [pac_cv_librsb_dir=$withval], + [pac_cv_librsb_dir='']) + + if test "x$pac_cv_librsb_dir" != "x"; then + LIBS="-L$pac_cv_librsb_dir $LIBS" + RSB_INCLUDES="-I$pac_cv_librsb_dir" + # CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS" + RSB_LIBDIR="-L$pac_cv_librsb_dir" + fi + #AC_MSG_CHECKING([librsb dir $pac_cv_librsb_dir]) + AC_CHECK_HEADER([$pac_cv_librsb_dir/rsb.h], + [pac_rsb_header_ok=yes], + [pac_rsb_header_ok=no; RSB_INCLUDES=""]) + + if test "x$pac_rsb_header_ok" == "xyes" ; then + RSB_LIBS="-lrsb $RSB_LIBDIR" + # LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS"; + # AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS]) + # AC_TRY_LINK_FUNC(spgpuCreate, + # [pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ], + # [pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""]) + # AC_MSG_RESULT($pac_gpu_lib_ok) + # if test "x$pac_cv_have_spgpu" == "xyes" ; then + # AC_MSG_NOTICE([Have found SPGPU]) + RSBLIBNAME="librsb.a"; + LIBRSB_DIR="$pac_cv_librsb_dir"; + # SPGPU_DEFINES="-DHAVE_SPGPU"; + LIBRSB_INCDIR="$LIBRSB_DIR"; + LIBRSB_INCLUDES="-I$LIBRSB_INCDIR"; + LIBRSB_LIBS="-lrsb -L$LIBRSB_DIR"; + # CUDA_DIR="$pac_cv_cuda_dir"; + LIBRSB_DEFINES="-DHAVE_RSB"; + LRSB=-lpsb_rsb + # CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + # CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + FDEFINES="$LIBRSB_DEFINES $psblas_cv_define_prepend $FDEFINES"; + CDEFINES="$LIBRSB_DEFINES $CDEFINES";#CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES"; + fi +# fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" +]) +dnl + +dnl @synopsis PAC_CHECK_CUDA_VERSION +dnl +dnl Will try to find the cuda version +dnl +dnl Will use $CC +dnl +dnl If the test passes, will execute ACTION-IF-FOUND. Otherwise, ACTION-IF-NOT-FOUND. +dnl Note : This file will be likely to induce the compiler to create a module file +dnl (for a module called conftest). +dnl Depending on the compiler flags, this could cause a conftest.mod file to appear +dnl in the present directory, or in another, or with another name. So be warned! +dnl +dnl @author Salvatore Filippone +dnl +AC_DEFUN(PAC_CHECK_CUDA_VERSION, +[AC_LANG_PUSH([C]) +SAVE_LIBS="$LIBS" +SAVE_CPPFLAGS="$CPPFLAGS" +if test "x$pac_cv_have_cuda" == "x"; then + PAC_CHECK_CUDA() +fi +if test "x$pac_cv_have_cuda" == "xyes"; then + CUDA_DIR="$pac_cv_cuda_dir" + LIBS="-L$pac_cv_cuda_dir/lib $LIBS" + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_DEFINES="-DHAVE_CUDA" + CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR" + LIBS="$CUDA_LIBS -lm $LIBS"; + AC_MSG_CHECKING([for CUDA version]) + AC_LINK_IFELSE([AC_LANG_SOURCE([ +#include +#include + +int main(int argc, char *argv[]) +{ + printf("%d",CUDA_VERSION); + return(0); +} ])], + [pac_cv_cuda_version=`./conftest${ac_exeext} | sed 's/^ *//'`;], + [pac_cv_cuda_version="unknown";]) + + AC_MSG_RESULT($pac_cv_cuda_version) + fi +AC_LANG_POP([C]) +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" +])dnl + + diff --git a/configure b/configure index 752fe192..5c3444b3 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.71 for PSBLAS 3.7.0. +# Generated by GNU Autoconf 2.71 for PSBLAS 3.8.1. # # Report bugs to . # @@ -611,8 +611,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='PSBLAS' PACKAGE_TARNAME='psblas' -PACKAGE_VERSION='3.7.0' -PACKAGE_STRING='PSBLAS 3.7.0' +PACKAGE_VERSION='3.8.1' +PACKAGE_STRING='PSBLAS 3.8.1' PACKAGE_BUGREPORT='https://github.com/sfilippone/psblas3/issues' PACKAGE_URL='' @@ -653,6 +653,23 @@ ac_subst_vars='am__EXEEXT_FALSE am__EXEEXT_TRUE LTLIBOBJS LIBOBJS +GPULD +GPUD +CUDEFINES +CUDA_NVCC +CUDA_SHORT_VERSION +CUDA_VERSION +CUDA_LIBS +CUDA_INCLUDES +CUDA_DEFINES +CUDA_DIR +EXTRALDLIBS +SPGPU_INCDIR +SPGPU_INCLUDES +SPGPU_DEFINES +SPGPU_DIR +SPGPU_LIBS +SPGPU_FLAGS METISINCFILE UTILLIBNAME METHDLIBNAME @@ -825,6 +842,9 @@ with_amd with_amddir with_amdincdir with_amdlibdir +with_cuda +with_spgpu +with_cudacc ' ac_precious_vars='build_alias host_alias @@ -1390,7 +1410,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures PSBLAS 3.7.0 to adapt to many kinds of systems. +\`configure' configures PSBLAS 3.8.1 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1457,7 +1477,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of PSBLAS 3.7.0:";; + short | recursive ) echo "Configuration of PSBLAS 3.8.1:";; esac cat <<\_ACEOF @@ -1523,6 +1543,11 @@ Optional Packages: --with-amddir=DIR Specify the directory for AMD library and includes. --with-amdincdir=DIR Specify the directory for AMD includes. --with-amdlibdir=DIR Specify the directory for AMD library. + --with-cuda=DIR Specify the directory for CUDA library and includes. + --with-spgpu=DIR Specify the directory for SPGPU library and + includes. + --with-cudacc A comma-separated list of CCs to compile to, for + example, --with-cudacc=30,35,37,50,60 Some influential environment variables: FC Fortran compiler command @@ -1607,7 +1632,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -PSBLAS configure 3.7.0 +PSBLAS configure 3.8.1 generated by GNU Autoconf 2.71 Copyright (C) 2021 Free Software Foundation, Inc. @@ -2291,7 +2316,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by PSBLAS $as_me 3.7.0, which was +It was created by PSBLAS $as_me 3.8.1, which was generated by GNU Autoconf 2.71. Invocation command line was $ $0$ac_configure_args_raw @@ -3265,7 +3290,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu # VERSION is the file containing the PSBLAS version code # FIXME -psblas_cv_version="3.7.0" +psblas_cv_version="3.8.1" # A sample source file @@ -3280,7 +3305,8 @@ psblas_cv_version="3.7.0" documentation, you can make your own by hand for your needs. Be sure to specify the library paths of your interest. Examples: - ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS + ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS + --with-spgpu=/path/to/spgpu FC=mpif90 CC=mpicc ./configure <- will force FC,CC See ./configure --help=short fore more info. @@ -3294,7 +3320,8 @@ printf "%s\n" "$as_me: documentation, you can make your own by hand for your needs. Be sure to specify the library paths of your interest. Examples: - ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS + ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS + --with-spgpu=/path/to/spgpu FC=mpif90 CC=mpicc ./configure <- will force FC,CC See ./configure --help=short fore more info. @@ -6393,7 +6420,7 @@ fi # Define the identity of the package. PACKAGE='psblas' - VERSION='3.7.0' + VERSION='3.8.1' printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h @@ -10610,6 +10637,434 @@ fi + +# Check whether --with-cuda was given. +if test ${with_cuda+y} +then : + withval=$with_cuda; pac_cv_cuda_dir=$withval +else $as_nop + pac_cv_cuda_dir='' +fi + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +SAVE_LIBS="$LIBS" +SAVE_CPPFLAGS="$CPPFLAGS" +if test "x$pac_cv_cuda_dir" != "x"; then + CUDA_DIR="$pac_cv_cuda_dir" + LIBS="-L$pac_cv_cuda_dir/lib $LIBS" + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_DEFINES="-DHAVE_CUDA" + CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + if test -f "$pac_cv_cuda_dir/bin/nvcc"; then + CUDA_NVCC="$pac_cv_cuda_dir/bin/nvcc" + else + CUDA_NVCC="nvcc" + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking cuda dir $pac_cv_cuda_dir" >&5 +printf %s "checking cuda dir $pac_cv_cuda_dir... " >&6; } +ac_fn_c_check_header_compile "$LINENO" "cuda_runtime.h" "ac_cv_header_cuda_runtime_h" "$ac_includes_default" +if test "x$ac_cv_header_cuda_runtime_h" = xyes +then : + pac_cuda_header_ok=yes +else $as_nop + pac_cuda_header_ok=no; CUDA_INCLUDES="" +fi + + +if test "x$pac_cuda_header_ok" == "xyes" ; then + CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR" + LIBS="$CUDA_LIBS -lm $LIBS"; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cudaMemcpy in $CUDA_LIBS" >&5 +printf %s "checking for cudaMemcpy in $CUDA_LIBS... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char cudaMemcpy (); +int +main (void) +{ +return cudaMemcpy (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + pac_cv_have_cuda=yes;pac_cuda_lib_ok=yes; +else $as_nop + pac_cv_have_cuda=no;pac_cuda_lib_ok=no; CUDA_LIBS="" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_cuda_lib_ok" >&5 +printf "%s\n" "$pac_cuda_lib_ok" >&6; } + +fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" + + +if test "x$pac_cv_have_cuda" == "xyes"; then + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +SAVE_LIBS="$LIBS" +SAVE_CPPFLAGS="$CPPFLAGS" +if test "x$pac_cv_have_cuda" == "x"; then + +# Check whether --with-cuda was given. +if test ${with_cuda+y} +then : + withval=$with_cuda; pac_cv_cuda_dir=$withval +else $as_nop + pac_cv_cuda_dir='' +fi + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +SAVE_LIBS="$LIBS" +SAVE_CPPFLAGS="$CPPFLAGS" +if test "x$pac_cv_cuda_dir" != "x"; then + CUDA_DIR="$pac_cv_cuda_dir" + LIBS="-L$pac_cv_cuda_dir/lib $LIBS" + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_DEFINES="-DHAVE_CUDA" + CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + if test -f "$pac_cv_cuda_dir/bin/nvcc"; then + CUDA_NVCC="$pac_cv_cuda_dir/bin/nvcc" + else + CUDA_NVCC="nvcc" + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking cuda dir $pac_cv_cuda_dir" >&5 +printf %s "checking cuda dir $pac_cv_cuda_dir... " >&6; } +ac_fn_c_check_header_compile "$LINENO" "cuda_runtime.h" "ac_cv_header_cuda_runtime_h" "$ac_includes_default" +if test "x$ac_cv_header_cuda_runtime_h" = xyes +then : + pac_cuda_header_ok=yes +else $as_nop + pac_cuda_header_ok=no; CUDA_INCLUDES="" +fi + + +if test "x$pac_cuda_header_ok" == "xyes" ; then + CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR" + LIBS="$CUDA_LIBS -lm $LIBS"; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cudaMemcpy in $CUDA_LIBS" >&5 +printf %s "checking for cudaMemcpy in $CUDA_LIBS... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char cudaMemcpy (); +int +main (void) +{ +return cudaMemcpy (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + pac_cv_have_cuda=yes;pac_cuda_lib_ok=yes; +else $as_nop + pac_cv_have_cuda=no;pac_cuda_lib_ok=no; CUDA_LIBS="" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_cuda_lib_ok" >&5 +printf "%s\n" "$pac_cuda_lib_ok" >&6; } + +fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" + +fi +if test "x$pac_cv_have_cuda" == "xyes"; then + CUDA_DIR="$pac_cv_cuda_dir" + LIBS="-L$pac_cv_cuda_dir/lib $LIBS" + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_DEFINES="-DHAVE_CUDA" + CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR" + LIBS="$CUDA_LIBS -lm $LIBS"; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for CUDA version" >&5 +printf %s "checking for CUDA version... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include + +int main(int argc, char *argv) +{ + printf("%d",CUDA_VERSION); + return(0); +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + pac_cv_cuda_version=`./conftest${ac_exeext} | sed 's/^ *//'`; +else $as_nop + pac_cv_cuda_version="unknown"; +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_cv_cuda_version" >&5 +printf "%s\n" "$pac_cv_cuda_version" >&6; } + fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" + +CUDA_VERSION="$pac_cv_cuda_version"; +CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); +SAVE_LIBS="$LIBS" + SAVE_CPPFLAGS="$CPPFLAGS" + if test "x$pac_cv_have_cuda" == "x"; then + +# Check whether --with-cuda was given. +if test ${with_cuda+y} +then : + withval=$with_cuda; pac_cv_cuda_dir=$withval +else $as_nop + pac_cv_cuda_dir='' +fi + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +SAVE_LIBS="$LIBS" +SAVE_CPPFLAGS="$CPPFLAGS" +if test "x$pac_cv_cuda_dir" != "x"; then + CUDA_DIR="$pac_cv_cuda_dir" + LIBS="-L$pac_cv_cuda_dir/lib $LIBS" + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_DEFINES="-DHAVE_CUDA" + CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + if test -f "$pac_cv_cuda_dir/bin/nvcc"; then + CUDA_NVCC="$pac_cv_cuda_dir/bin/nvcc" + else + CUDA_NVCC="nvcc" + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking cuda dir $pac_cv_cuda_dir" >&5 +printf %s "checking cuda dir $pac_cv_cuda_dir... " >&6; } +ac_fn_c_check_header_compile "$LINENO" "cuda_runtime.h" "ac_cv_header_cuda_runtime_h" "$ac_includes_default" +if test "x$ac_cv_header_cuda_runtime_h" = xyes +then : + pac_cuda_header_ok=yes +else $as_nop + pac_cuda_header_ok=no; CUDA_INCLUDES="" +fi + + +if test "x$pac_cuda_header_ok" == "xyes" ; then + CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR" + LIBS="$CUDA_LIBS -lm $LIBS"; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cudaMemcpy in $CUDA_LIBS" >&5 +printf %s "checking for cudaMemcpy in $CUDA_LIBS... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char cudaMemcpy (); +int +main (void) +{ +return cudaMemcpy (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + pac_cv_have_cuda=yes;pac_cuda_lib_ok=yes; +else $as_nop + pac_cv_have_cuda=no;pac_cuda_lib_ok=no; CUDA_LIBS="" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_cuda_lib_ok" >&5 +printf "%s\n" "$pac_cuda_lib_ok" >&6; } + +fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" + + fi + if test "x$pac_cv_have_cuda" == "xyes"; then + +# Check whether --with-spgpu was given. +if test ${with_spgpu+y} +then : + withval=$with_spgpu; pac_cv_spgpudir=$withval +else $as_nop + pac_cv_spgpudir='' +fi + + + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + if test "x$pac_cv_spgpudir" != "x"; then + LIBS="-L$pac_cv_spgpudir/lib $LIBS" + GPU_INCLUDES="-I$pac_cv_spgpudir/include" + CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS" + GPU_LIBDIR="-L$pac_cv_spgpudir/lib" + fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking spgpu dir $pac_cv_spgpudir" >&5 +printf %s "checking spgpu dir $pac_cv_spgpudir... " >&6; } + ac_fn_c_check_header_compile "$LINENO" "core.h" "ac_cv_header_core_h" "$ac_includes_default" +if test "x$ac_cv_header_core_h" = xyes +then : + pac_gpu_header_ok=yes +else $as_nop + pac_gpu_header_ok=no; GPU_INCLUDES="" +fi + + + if test "x$pac_gpu_header_ok" == "xyes" ; then + GPU_LIBS="-lspgpu $GPU_LIBDIR" + LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS"; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for spgpuCreate in $GPU_LIBS" >&5 +printf %s "checking for spgpuCreate in $GPU_LIBS... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char spgpuCreate (); +int +main (void) +{ +return spgpuCreate (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; +else $as_nop + pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS="" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_gpu_lib_ok" >&5 +printf "%s\n" "$pac_gpu_lib_ok" >&6; } + if test "x$pac_cv_have_spgpu" == "xyes" ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Have found SPGPU" >&5 +printf "%s\n" "$as_me: Have found SPGPU" >&6;} + SPGPULIBNAME="libpsbgpu.a"; + SPGPU_DIR="$pac_cv_spgpudir"; + SPGPU_DEFINES="-DHAVE_SPGPU"; + SPGPU_INCDIR="$SPGPU_DIR/include"; + SPGPU_INCLUDES="-I$SPGPU_INCDIR"; + SPGPU_LIBS="-lspgpu -L$SPGPU_DIR/lib"; + LGPU=-lpsb_gpu + CUDA_DIR="$pac_cv_cuda_dir"; + CUDA_DEFINES="-DHAVE_CUDA"; + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + FDEFINES="$psblas_cv_define_prepend-DHAVE_GPU $psblas_cv_define_prepend-DHAVE_SPGPU $psblas_cv_define_prepend-DHAVE_CUDA $FDEFINES"; + CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES" ; + fi + fi +fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" + +if test "x$pac_cv_have_spgpu" == "xyes" ; then + GPUD=gpud; + GPULD=gpuld; + EXTRALDLIBS="-lstdc++"; +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: At this point GPUTARGET is $GPUD $GPULD" >&5 +printf "%s\n" "$as_me: At this point GPUTARGET is $GPUD $GPULD" >&6;} + + + +# Check whether --with-cudacc was given. +if test ${with_cudacc+y} +then : + withval=$with_cudacc; pac_cv_cudacc=$withval +else $as_nop + pac_cv_cudacc='' +fi + + +if test "x$pac_cv_cudacc" == "x"; then + pac_cv_cudacc="30,35,37,50,60"; +fi +CUDEFINES=""; +for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'` +do + CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; +done +if test "x$pac_cv_cuda_version" != "xunknown"; then + CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" +fi + +fi + +if test "x$pac_cv_ipk_size" != "x4"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&5 +printf "%s\n" "$as_me: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&6;} + GPUD=""; + GPULD=""; + CUDEFINES=""; + CUDA_INCLUDES=""; + CUDA_LIBS=""; +fi + + + + ############################################################################### # Library target directory and archive files. ############################################################################### @@ -10687,6 +11142,22 @@ FDEFINES=$(PSBFDEFINES) + + + + + + + + + + + + + + + + @@ -11262,7 +11733,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by PSBLAS $as_me 3.7.0, which was +This file was extended by PSBLAS $as_me 3.8.1, which was generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -11321,7 +11792,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -PSBLAS config.status 3.7.0 +PSBLAS config.status 3.8.1 configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index d8a02a50..4b7f82e1 100755 --- a/configure.ac +++ b/configure.ac @@ -36,11 +36,11 @@ dnl NOTE : There is no cross compilation support. ############################################################################### # NOTE: the literal for version (the second argument to AC_INIT should be a literal!) -AC_INIT([PSBLAS],3.7.0, [https://github.com/sfilippone/psblas3/issues]) +AC_INIT([PSBLAS],3.8.1, [https://github.com/sfilippone/psblas3/issues]) # VERSION is the file containing the PSBLAS version code # FIXME -psblas_cv_version="3.7.0" +psblas_cv_version="3.8.1" # A sample source file AC_CONFIG_SRCDIR([base/modules/psb_base_mod.f90]) @@ -56,7 +56,8 @@ AC_MSG_NOTICE([ documentation, you can make your own by hand for your needs. Be sure to specify the library paths of your interest. Examples: - ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS + ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS + [ --with-spgpu=/path/to/spgpu] FC=mpif90 CC=mpicc ./configure <- will force FC,CC See ./configure --help=short fore more info. @@ -790,6 +791,50 @@ fi +PAC_CHECK_CUDA() + +if test "x$pac_cv_have_cuda" == "xyes"; then + +PAC_CHECK_CUDA_VERSION() +CUDA_VERSION="$pac_cv_cuda_version"; +CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); +PAC_CHECK_SPGPU() +if test "x$pac_cv_have_spgpu" == "xyes" ; then + GPUD=gpud; + GPULD=gpuld; + EXTRALDLIBS="-lstdc++"; +fi +AC_MSG_NOTICE([At this point GPUTARGET is $GPUD $GPULD]) + +PAC_ARG_WITH_CUDACC() +if test "x$pac_cv_cudacc" == "x"; then + pac_cv_cudacc="30,35,37,50,60"; +fi +CUDEFINES=""; +for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'` +do + CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; +done +if test "x$pac_cv_cuda_version" != "xunknown"; then + CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" +fi + +fi + +if test "x$pac_cv_ipk_size" != "x4"; then + AC_MSG_NOTICE([For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU]) + GPUD=""; + GPULD=""; + CUDEFINES=""; + CUDA_INCLUDES=""; + CUDA_LIBS=""; +fi + + + + ############################################################################### # Library target directory and archive files. ############################################################################### @@ -871,7 +916,23 @@ AC_SUBST(PRECLIBNAME) AC_SUBST(METHDLIBNAME) AC_SUBST(UTILLIBNAME) AC_SUBST(METISINCFILE) - +AC_SUBST(SPGPU_FLAGS) +AC_SUBST(SPGPU_LIBS) +AC_SUBST(SPGPU_DIR) +AC_SUBST(SPGPU_DEFINES) +AC_SUBST(SPGPU_INCLUDES) +AC_SUBST(SPGPU_INCDIR) +AC_SUBST(EXTRALDLIBS) +AC_SUBST(CUDA_DIR) +AC_SUBST(CUDA_DEFINES) +AC_SUBST(CUDA_INCLUDES) +AC_SUBST(CUDA_LIBS) +AC_SUBST(CUDA_VERSION) +AC_SUBST(CUDA_SHORT_VERSION) +AC_SUBST(CUDA_NVCC) +AC_SUBST(CUDEFINES) +AC_SUBST(GPUD) +AC_SUBST(GPULD) ############################################################################### # the following files will be created by Automake diff --git a/gpu/CUDA/Makefile b/gpu/CUDA/Makefile new file mode 100644 index 00000000..a1f9d48b --- /dev/null +++ b/gpu/CUDA/Makefile @@ -0,0 +1,38 @@ +TOPDIR=../.. +include $(TOPDIR)/Make.inc +# +# Libraries used +# +PSBLIBDIR=$(PSBLASDIR)/lib/ +PSBINCDIR=$(PSBLASDIR)/include +LIBDIR=$(TOPDIR)/lib +INCDIR=$(TOPDIR)/include +PSBLAS_LIB= -L$(PSBLIBDIR) -lpsb_util -lpsb_base +#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +LDLIBS=$(PSBLDLIBS) +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(PSBINCDIR) $(FIFLAG). +CINCLUDES=$(SPGPU_INCLUDES) $(CUDA_INCLUDES) -I.. +LIBNAME=libpsb_gpu.a + + + +CUDAOBJS=psi_cuda_c_CopyCooToElg.o psi_cuda_c_CopyCooToHlg.o \ +psi_cuda_d_CopyCooToElg.o psi_cuda_d_CopyCooToHlg.o \ +psi_cuda_s_CopyCooToElg.o psi_cuda_s_CopyCooToHlg.o \ +psi_cuda_z_CopyCooToElg.o psi_cuda_z_CopyCooToHlg.o + + + +objs: $(CUDAOBJS) + +lib: objs + ar cur ../$(LIBNAME) $(CUDAOBJS) + +$(CUDAOBJS): psi_cuda_common.cuh psi_cuda_CopyCooToElg.cuh psi_cuda_CopyCooToHlg.cuh + +clean: + /bin/rm -f $(CUDAOBJS) diff --git a/gpu/CUDA/psi_cuda_CopyCooToElg.cuh b/gpu/CUDA/psi_cuda_CopyCooToElg.cuh new file mode 100644 index 00000000..10a81a36 --- /dev/null +++ b/gpu/CUDA/psi_cuda_CopyCooToElg.cuh @@ -0,0 +1,104 @@ +#include +#include + +#include "cintrf.h" +#include "vectordev.h" +#include "psi_cuda_common.cuh" + + +#undef GEN_PSI_FUNC_NAME +#define GEN_PSI_FUNC_NAME(x) CONCAT(CONCAT(psi_cuda_,x),_CopyCooToElg) + +#define THREAD_BLOCK 256 + +#ifdef __cplusplus +extern "C" { +#endif + + + void GEN_PSI_FUNC_NAME(TYPE_SYMBOL)(spgpuHandle_t handle, int nr, int nc, int nza, + int baseIdx, int hacksz, int ldv, int nzm, + int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal, + int *idiag, int *rP, VALUE_TYPE *cM); + + +#ifdef __cplusplus +} +#endif + + + + + +__global__ void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn)(int ii, int nrws, int nr, int nza, + int baseIdx, int hacksz, int ldv, int nzm, + int *rS, int *devIdisp, int *devJa, VALUE_TYPE *devVal, + int *idiag, int *rP, VALUE_TYPE *cM) +{ + int ir, k, ipnt, rsz,jc; + int ki = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + int i=ii+ki; + int idval=0; + + if (ki >= nrws) return; + if (i >= nr) return; + + ipnt=devIdisp[i]; + rsz=rS[i]; + ir = i; + for (k=0; kcurrentStream >>>(i,nrws, nr, nza, baseIdx, hacksz, ldv, nzm, + rS,devIdisp,devJa,devVal,idiag, rP,cM); + +} + + + + +void +GEN_PSI_FUNC_NAME(TYPE_SYMBOL) + (spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, int hacksz, int ldv, int nzm, + int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal, + int *idiag, int *rP, VALUE_TYPE *cM) +{ int i,j,k, nrws; + //int maxNForACall = THREAD_BLOCK*handle->maxGridSizeX; + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + + //fprintf(stderr,"Loop on j: %d\n",j); + for (i=0; i +#include + +#include "cintrf.h" +#include "vectordev.h" +#include "psi_cuda_common.cuh" + + +#undef GEN_PSI_FUNC_NAME +#define GEN_PSI_FUNC_NAME(x) CONCAT(CONCAT(psi_cuda_,x),_CopyCooToHlg) + +#define THREAD_BLOCK 256 + +#ifdef __cplusplus +extern "C" { +#endif + +void GEN_PSI_FUNC_NAME(TYPE_SYMBOL)(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, int hacksz, + int noffs, int isz, int *rS, int *hackOffs, int *devIdisp, + int *devJa, VALUE_TYPE *devVal, + int *idiag, int *rP, VALUE_TYPE *cM); + + + +#ifdef __cplusplus +} +#endif + + +__global__ void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn)(int ii, int nrws, int nr, int nza, + int baseIdx, int hacksz, int noffs, int isz, + int *rS, int *hackOffs, int *devIdisp, + int *devJa, VALUE_TYPE *devVal, + int *idiag, int *rP, VALUE_TYPE *cM) +{ + int ir, k, ipnt, rsz,jc; + int ki = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + int i=ii+ki; + + if (ki >= nrws) return; + + + if (icurrentStream >>>(i,nrws,nr, nza, baseIdx, hacksz, noffs, isz, + rS,hackOffs,devIdisp,devJa,devVal,idiag,rP,cM); + +} + + +void GEN_PSI_FUNC_NAME(TYPE_SYMBOL)(spgpuHandle_t handle, int nr, int nc, int nza, + int baseIdx, int hacksz, int noffs, int isz, + int *rS, int *hackOffs, int *devIdisp, + int *devJa, VALUE_TYPE *devVal, + int *idiag, int *rP, VALUE_TYPE *cM) +{ int i, nrws; + //int maxNForACall = THREAD_BLOCK*handle->maxGridSizeX; + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + //fprintf(stderr,"Loop on j: %d\n",j); + for (i=0; i +#include + +#include "cintrf.h" +#include "vectordev.h" + + +#define VALUE_TYPE cuFloatComplex +#define TYPE_SYMBOL c +#include "psi_cuda_CopyCooToElg.cuh" diff --git a/gpu/CUDA/psi_cuda_c_CopyCooToHlg.cu b/gpu/CUDA/psi_cuda_c_CopyCooToHlg.cu new file mode 100644 index 00000000..f2b5c86d --- /dev/null +++ b/gpu/CUDA/psi_cuda_c_CopyCooToHlg.cu @@ -0,0 +1,10 @@ +#include +#include + +#include "cintrf.h" +#include "vectordev.h" + + +#define VALUE_TYPE cuFloatComplex +#define TYPE_SYMBOL c +#include "psi_cuda_CopyCooToHlg.cuh" diff --git a/gpu/CUDA/psi_cuda_common.cuh b/gpu/CUDA/psi_cuda_common.cuh new file mode 100644 index 00000000..12d81f03 --- /dev/null +++ b/gpu/CUDA/psi_cuda_common.cuh @@ -0,0 +1,16 @@ +#pragma once + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) +#define MIN(A,B) ( (A)<(B) ? (A) : (B) ) +#define SQUARE(x) ((x)*(x)) +#define GET_ADDR(a,ix,iy,nc) a[(nc)*(ix)+(iy)] +#define GET_VAL(a,ix,iy,nc) (GET_ADDR(a,ix,iy,nc)) + +__device__ __host__ static float zero_float() { return 0.0f; } +__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); } + +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +#endif diff --git a/gpu/CUDA/psi_cuda_d_CopyCooToElg.cu b/gpu/CUDA/psi_cuda_d_CopyCooToElg.cu new file mode 100644 index 00000000..f306ffe1 --- /dev/null +++ b/gpu/CUDA/psi_cuda_d_CopyCooToElg.cu @@ -0,0 +1,10 @@ +#include +#include + +#include "cintrf.h" +#include "vectordev.h" + + +#define VALUE_TYPE double +#define TYPE_SYMBOL d +#include "psi_cuda_CopyCooToElg.cuh" diff --git a/gpu/CUDA/psi_cuda_d_CopyCooToHlg.cu b/gpu/CUDA/psi_cuda_d_CopyCooToHlg.cu new file mode 100644 index 00000000..9c0e371e --- /dev/null +++ b/gpu/CUDA/psi_cuda_d_CopyCooToHlg.cu @@ -0,0 +1,10 @@ +#include +#include + +#include "cintrf.h" +#include "vectordev.h" + + +#define VALUE_TYPE double +#define TYPE_SYMBOL d +#include "psi_cuda_CopyCooToHlg.cuh" diff --git a/gpu/CUDA/psi_cuda_s_CopyCooToElg.cu b/gpu/CUDA/psi_cuda_s_CopyCooToElg.cu new file mode 100644 index 00000000..76e10de1 --- /dev/null +++ b/gpu/CUDA/psi_cuda_s_CopyCooToElg.cu @@ -0,0 +1,10 @@ +#include +#include + +#include "cintrf.h" +#include "vectordev.h" + + +#define VALUE_TYPE float +#define TYPE_SYMBOL s +#include "psi_cuda_CopyCooToElg.cuh" diff --git a/gpu/CUDA/psi_cuda_s_CopyCooToHlg.cu b/gpu/CUDA/psi_cuda_s_CopyCooToHlg.cu new file mode 100644 index 00000000..c2d76c0a --- /dev/null +++ b/gpu/CUDA/psi_cuda_s_CopyCooToHlg.cu @@ -0,0 +1,10 @@ +#include +#include + +#include "cintrf.h" +#include "vectordev.h" + + +#define VALUE_TYPE float +#define TYPE_SYMBOL s +#include "psi_cuda_CopyCooToHlg.cuh" diff --git a/gpu/CUDA/psi_cuda_z_CopyCooToElg.cu b/gpu/CUDA/psi_cuda_z_CopyCooToElg.cu new file mode 100644 index 00000000..a57ad637 --- /dev/null +++ b/gpu/CUDA/psi_cuda_z_CopyCooToElg.cu @@ -0,0 +1,10 @@ +#include +#include + +#include "cintrf.h" +#include "vectordev.h" + + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL z +#include "psi_cuda_CopyCooToElg.cuh" diff --git a/gpu/CUDA/psi_cuda_z_CopyCooToHlg.cu b/gpu/CUDA/psi_cuda_z_CopyCooToHlg.cu new file mode 100644 index 00000000..2ff9b869 --- /dev/null +++ b/gpu/CUDA/psi_cuda_z_CopyCooToHlg.cu @@ -0,0 +1,10 @@ +#include +#include + +#include "cintrf.h" +#include "vectordev.h" + + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL z +#include "psi_cuda_CopyCooToHlg.cuh" diff --git a/gpu/Makefile b/gpu/Makefile new file mode 100755 index 00000000..16e9c084 --- /dev/null +++ b/gpu/Makefile @@ -0,0 +1,134 @@ +include ../Make.inc +# +# Libraries used +# +LIBDIR=../lib +INCDIR=../include +MODDIR=../modules +PSBLAS_LIB= -lpsb_util -lpsb_base +#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +LDLIBS=$(PSBLDLIBS) +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FIFLAG). +CINCLUDES=$(SPGPU_INCLUDES) $(CUDA_INCLUDES) +LIBNAME=libpsb_gpu.a + + +FOBJS=cusparse_mod.o base_cusparse_mod.o \ + s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o \ + psb_vectordev_mod.o core_mod.o \ + psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_i_vectordev_mod.o\ + psb_c_vectordev_mod.o psb_z_vectordev_mod.o psb_base_vectordev_mod.o \ + elldev_mod.o hlldev_mod.o diagdev_mod.o hdiagdev_mod.o \ + psb_i_gpu_vect_mod.o \ + psb_d_gpu_vect_mod.o psb_s_gpu_vect_mod.o\ + psb_z_gpu_vect_mod.o psb_c_gpu_vect_mod.o\ + psb_d_elg_mat_mod.o psb_d_hlg_mat_mod.o \ + psb_d_hybg_mat_mod.o psb_d_csrg_mat_mod.o\ + psb_s_elg_mat_mod.o psb_s_hlg_mat_mod.o \ + psb_s_hybg_mat_mod.o psb_s_csrg_mat_mod.o\ + psb_c_elg_mat_mod.o psb_c_hlg_mat_mod.o \ + psb_c_hybg_mat_mod.o psb_c_csrg_mat_mod.o\ + psb_z_elg_mat_mod.o psb_z_hlg_mat_mod.o \ + psb_z_hybg_mat_mod.o psb_z_csrg_mat_mod.o\ + psb_gpu_env_mod.o psb_gpu_mod.o \ + psb_d_diag_mat_mod.o\ + psb_d_hdiag_mat_mod.o psb_s_hdiag_mat_mod.o\ + psb_s_dnsg_mat_mod.o psb_d_dnsg_mat_mod.o \ + psb_c_dnsg_mat_mod.o psb_z_dnsg_mat_mod.o \ + dnsdev_mod.o + +COBJS= elldev.o hlldev.o diagdev.o hdiagdev.o vectordev.o ivectordev.o dnsdev.o\ + svectordev.o dvectordev.o cvectordev.o zvectordev.o cuda_util.o \ + fcusparse.o scusparse.o dcusparse.o ccusparse.o zcusparse.o + +OBJS=$(COBJS) $(FOBJS) + +lib: objs + +objs: $(OBJS) iobjs cudaobjs + /bin/cp -p *$(.mod) $(MODDIR) + /bin/cp -p *.h $(INCDIR) + +lib: ilib cudalib + ar cur $(LIBNAME) $(OBJS) + /bin/cp -p $(LIBNAME) $(LIBDIR) + +dnsdev_mod.o hlldev_mod.o elldev_mod.o psb_base_vectordev_mod.o: core_mod.o +psb_d_gpu_vect_mod.o psb_s_gpu_vect_mod.o psb_z_gpu_vect_mod.o psb_c_gpu_vect_mod.o: psb_i_gpu_vect_mod.o +psb_i_gpu_vect_mod.o : psb_vectordev_mod.o psb_gpu_env_mod.o +cusparse_mod.o: s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o +s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o : base_cusparse_mod.o +psb_d_hlg_mat_mod.o: hlldev_mod.o psb_d_gpu_vect_mod.o psb_gpu_env_mod.o +psb_d_elg_mat_mod.o: elldev_mod.o psb_d_gpu_vect_mod.o +psb_d_diag_mat_mod.o: diagdev_mod.o psb_d_gpu_vect_mod.o +psb_d_hdiag_mat_mod.o: hdiagdev_mod.o psb_d_gpu_vect_mod.o +psb_s_dnsg_mat_mod.o: dnsdev_mod.o psb_s_gpu_vect_mod.o +psb_d_dnsg_mat_mod.o: dnsdev_mod.o psb_d_gpu_vect_mod.o +psb_c_dnsg_mat_mod.o: dnsdev_mod.o psb_c_gpu_vect_mod.o +psb_z_dnsg_mat_mod.o: dnsdev_mod.o psb_z_gpu_vect_mod.o +psb_s_hlg_mat_mod.o: hlldev_mod.o psb_s_gpu_vect_mod.o psb_gpu_env_mod.o +psb_s_elg_mat_mod.o: elldev_mod.o psb_s_gpu_vect_mod.o +psb_s_diag_mat_mod.o: diagdev_mod.o psb_s_gpu_vect_mod.o +psb_s_hdiag_mat_mod.o: hdiagdev_mod.o psb_s_gpu_vect_mod.o +psb_s_csrg_mat_mod.o psb_s_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_d_csrg_mat_mod.o psb_d_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_z_hlg_mat_mod.o: hlldev_mod.o psb_z_gpu_vect_mod.o psb_gpu_env_mod.o +psb_z_elg_mat_mod.o: elldev_mod.o psb_z_gpu_vect_mod.o +psb_c_hlg_mat_mod.o: hlldev_mod.o psb_c_gpu_vect_mod.o psb_gpu_env_mod.o +psb_c_elg_mat_mod.o: elldev_mod.o psb_c_gpu_vect_mod.o +psb_c_csrg_mat_mod.o psb_c_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_z_csrg_mat_mod.o psb_z_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_vectordev_mod.o: psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_c_vectordev_mod.o psb_z_vectordev_mod.o psb_i_vectordev_mod.o +psb_i_vectordev_mod.o psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_c_vectordev_mod.o psb_z_vectordev_mod.o: psb_base_vectordev_mod.o +vectordev.o: cuda_util.o vectordev.h +elldev.o: elldev.c +dnsdev.o: dnsdev.c +fcusparse.h elldev.c: elldev.h vectordev.h +fcusparse.o scusparse.o dcusparse.o ccusparse.o zcusparse.o : fcusparse.h +fcusparse.o scusparse.o dcusparse.o ccusparse.o zcusparse.o : fcusparse_fct.h +svectordev.o: svectordev.h vectordev.h +dvectordev.o: dvectordev.h vectordev.h +cvectordev.o: cvectordev.h vectordev.h +zvectordev.o: zvectordev.h vectordev.h +psb_gpu_env_mod.o: base_cusparse_mod.o +psb_gpu_mod.o: psb_gpu_env_mod.o psb_i_gpu_vect_mod.o\ + psb_d_gpu_vect_mod.o psb_s_gpu_vect_mod.o\ + psb_z_gpu_vect_mod.o psb_c_gpu_vect_mod.o\ + psb_d_elg_mat_mod.o psb_d_hlg_mat_mod.o \ + psb_d_hybg_mat_mod.o psb_d_csrg_mat_mod.o\ + psb_s_elg_mat_mod.o psb_s_hlg_mat_mod.o \ + psb_s_hybg_mat_mod.o psb_s_csrg_mat_mod.o\ + psb_c_elg_mat_mod.o psb_c_hlg_mat_mod.o \ + psb_c_hybg_mat_mod.o psb_c_csrg_mat_mod.o\ + psb_z_elg_mat_mod.o psb_z_hlg_mat_mod.o \ + psb_z_hybg_mat_mod.o psb_z_csrg_mat_mod.o\ + psb_d_diag_mat_mod.o \ + psb_d_hdiag_mat_mod.o psb_s_hdiag_mat_mod.o\ + psb_s_dnsg_mat_mod.o psb_d_dnsg_mat_mod.o \ + psb_c_dnsg_mat_mod.o psb_z_dnsg_mat_mod.o + +iobjs: $(FOBJS) + $(MAKE) -C impl objs +cudaobjs: $(FOBJS) + $(MAKE) -C CUDA objs + +ilib: objs + $(MAKE) -C impl lib LIBNAME=$(LIBNAME) +cudalib: objs + $(MAKE) -C CUDA lib LIBNAME=$(LIBNAME) + +clean: cclean iclean cudaclean + /bin/rm -f $(FOBJS) *$(.mod) *.a + +cclean: + /bin/rm -f $(COBJS) +iclean: + $(MAKE) -C impl clean +cudaclean: + $(MAKE) -C CUDA clean + +veryclean: clean diff --git a/gpu/base_cusparse_mod.F90 b/gpu/base_cusparse_mod.F90 new file mode 100644 index 00000000..9f5628be --- /dev/null +++ b/gpu/base_cusparse_mod.F90 @@ -0,0 +1,117 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module base_cusparse_mod + use iso_c_binding + ! Interface to CUSPARSE. + + enum, bind(c) + enumerator cusparse_status_success + enumerator cusparse_status_not_initialized + enumerator cusparse_status_alloc_failed + enumerator cusparse_status_invalid_value + enumerator cusparse_status_arch_mismatch + enumerator cusparse_status_mapping_error + enumerator cusparse_status_execution_failed + enumerator cusparse_status_internal_error + enumerator cusparse_status_matrix_type_not_supported + end enum + + enum, bind(c) + enumerator cusparse_matrix_type_general + enumerator cusparse_matrix_type_symmetric + enumerator cusparse_matrix_type_hermitian + enumerator cusparse_matrix_type_triangular + end enum + + enum, bind(c) + enumerator cusparse_fill_mode_lower + enumerator cusparse_fill_mode_upper + end enum + + enum, bind(c) + enumerator cusparse_diag_type_non_unit + enumerator cusparse_diag_type_unit + end enum + + enum, bind(c) + enumerator cusparse_index_base_zero + enumerator cusparse_index_base_one + end enum + + enum, bind(c) + enumerator cusparse_operation_non_transpose + enumerator cusparse_operation_transpose + enumerator cusparse_operation_conjugate_transpose + end enum + + enum, bind(c) + enumerator cusparse_direction_row + enumerator cusparse_direction_column + end enum + + +#if defined(HAVE_CUDA) && defined(HAVE_SPGPU) + + interface + function FcusparseCreate() & + & bind(c,name="FcusparseCreate") result(res) + use iso_c_binding + integer(c_int) :: res + end function FcusparseCreate + end interface + + interface + function FcusparseDestroy() & + & bind(c,name="FcusparseDestroy") result(res) + use iso_c_binding + integer(c_int) :: res + end function FcusparseDestroy + end interface + +contains + + function initFcusparse() result(res) + implicit none + integer(c_int) :: res + + res = FcusparseCreate() + end function initFcusparse + + function closeFcusparse() result(res) + implicit none + integer(c_int) :: res + res = FcusparseDestroy() + end function closeFcusparse + +#endif +end module base_cusparse_mod diff --git a/gpu/c_cusparse_mod.F90 b/gpu/c_cusparse_mod.F90 new file mode 100644 index 00000000..e7d37173 --- /dev/null +++ b/gpu/c_cusparse_mod.F90 @@ -0,0 +1,305 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module c_cusparse_mod + use base_cusparse_mod + + type, bind(c) :: c_Cmat + type(c_ptr) :: Mat = c_null_ptr + end type c_Cmat + +#if CUDA_SHORT_VERSION <= 10 + type, bind(c) :: c_Hmat + type(c_ptr) :: Mat = c_null_ptr + end type c_Hmat +#endif + + +#if defined(HAVE_CUDA) && defined(HAVE_SPGPU) + + interface CSRGDeviceFree + function c_CSRGDeviceFree(Mat) & + & bind(c,name="c_CSRGDeviceFree") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int) :: res + end function c_CSRGDeviceFree + end interface + + interface CSRGDeviceSetMatType + function c_CSRGDeviceSetMatType(Mat,type) & + & bind(c,name="c_CSRGDeviceSetMatType") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_CSRGDeviceSetMatType + end interface + + interface CSRGDeviceSetMatFillMode + function c_CSRGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="c_CSRGDeviceSetMatFillMode") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_CSRGDeviceSetMatFillMode + end interface + + interface CSRGDeviceSetMatDiagType + function c_CSRGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="c_CSRGDeviceSetMatDiagType") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_CSRGDeviceSetMatDiagType + end interface + + interface CSRGDeviceSetMatIndexBase + function c_CSRGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="c_CSRGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_CSRGDeviceSetMatIndexBase + end interface + + interface CSRGDeviceCsrsmAnalysis + function c_CSRGDeviceCsrsmAnalysis(Mat) & + & bind(c,name="c_CSRGDeviceCsrsmAnalysis") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int) :: res + end function c_CSRGDeviceCsrsmAnalysis + end interface + + interface CSRGDeviceAlloc + function c_CSRGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="c_CSRGDeviceAlloc") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function c_CSRGDeviceAlloc + end interface + + interface CSRGDeviceGetParms + function c_CSRGDeviceGetParms(Mat,nr,nc,nz) & + & bind(c,name="c_CSRGDeviceGetParms") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int) :: nr, nc, nz + integer(c_int) :: res + end function c_CSRGDeviceGetParms + end interface + + interface spsvCSRGDevice + function c_spsvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="c_spsvCSRGDevice") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_float_complex), value :: alpha,beta + integer(c_int) :: res + end function c_spsvCSRGDevice + end interface + + interface spmvCSRGDevice + function c_spmvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="c_spmvCSRGDevice") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_float_complex), value :: alpha,beta + integer(c_int) :: res + end function c_spmvCSRGDevice + end interface + + interface CSRGHost2Device + function c_CSRGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="c_CSRGHost2Device") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + complex(c_float_complex) :: val(*) + integer(c_int) :: res + end function c_CSRGHost2Device + end interface + + interface CSRGDevice2Host + function c_CSRGDevice2Host(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="c_CSRGDevice2Host") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + complex(c_float_complex) :: val(*) + integer(c_int) :: res + end function c_CSRGDevice2Host + end interface + +#if CUDA_SHORT_VERSION <=10 + interface HYBGDeviceAlloc + function c_HYBGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="c_HYBGDeviceAlloc") result(res) + use iso_c_binding + import c_hmat + type(c_Hmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function c_HYBGDeviceAlloc + end interface + + interface HYBGDeviceFree + function c_HYBGDeviceFree(Mat) & + & bind(c,name="c_HYBGDeviceFree") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int) :: res + end function c_HYBGDeviceFree + end interface + + interface HYBGDeviceSetMatType + function c_HYBGDeviceSetMatType(Mat,type) & + & bind(c,name="c_HYBGDeviceSetMatType") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_HYBGDeviceSetMatType + end interface + + interface HYBGDeviceSetMatFillMode + function c_HYBGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="c_HYBGDeviceSetMatFillMode") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_HYBGDeviceSetMatFillMode + end interface + + interface HYBGDeviceSetMatDiagType + function c_HYBGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="c_HYBGDeviceSetMatDiagType") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_HYBGDeviceSetMatDiagType + end interface + + interface HYBGDeviceSetMatIndexBase + function c_HYBGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="c_HYBGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_HYBGDeviceSetMatIndexBase + end interface + + interface HYBGDeviceHybsmAnalysis + function c_HYBGDeviceHybsmAnalysis(Mat) & + & bind(c,name="c_HYBGDeviceHybsmAnalysis") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int) :: res + end function c_HYBGDeviceHybsmAnalysis + end interface + + interface spsvHYBGDevice + function c_spsvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="c_spsvHYBGDevice") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_float_complex), value :: alpha,beta + integer(c_int) :: res + end function c_spsvHYBGDevice + end interface + + interface spmvHYBGDevice + function c_spmvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="c_spmvHYBGDevice") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_float_complex), value :: alpha,beta + integer(c_int) :: res + end function c_spmvHYBGDevice + end interface + + interface HYBGHost2Device + function c_HYBGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="c_HYBGHost2Device") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + complex(c_float_complex) :: val(*) + integer(c_int) :: res + end function c_HYBGHost2Device + end interface +#endif + +#endif + +end module c_cusparse_mod diff --git a/gpu/ccusparse.c b/gpu/ccusparse.c new file mode 100644 index 00000000..6f1cfdb3 --- /dev/null +++ b/gpu/ccusparse.c @@ -0,0 +1,97 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include + +#ifdef HAVE_SPGPU +#include +#include +#include "cintrf.h" +#include "fcusparse.h" + +/* Single precision complex */ +#define TYPE float complex +#define CUSPARSE_BASE_TYPE CUDA_C_32F +#define T_CSRGDeviceMat c_CSRGDeviceMat +#define T_Cmat c_Cmat +#define T_spmvCSRGDevice c_spmvCSRGDevice +#define T_spsvCSRGDevice c_spsvCSRGDevice +#define T_CSRGDeviceAlloc c_CSRGDeviceAlloc +#define T_CSRGDeviceFree c_CSRGDeviceFree +#define T_CSRGHost2Device c_CSRGHost2Device +#define T_CSRGDevice2Host c_CSRGDevice2Host +#define T_CSRGDeviceSetMatFillMode c_CSRGDeviceSetMatFillMode +#define T_CSRGDeviceSetMatDiagType c_CSRGDeviceSetMatDiagType +#define T_CSRGDeviceGetParms c_CSRGDeviceGetParms + +#if CUDA_SHORT_VERSION <= 10 + +#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsmAnalysis c_CSRGDeviceCsrsmAnalysis +#define cusparseTcsrmv cusparseCcsrmv +#define cusparseTcsrsv_solve cusparseCcsrsv_solve +#define cusparseTcsrsv_analysis cusparseCcsrsv_analysis + +#elif CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis c_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseCcsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseCcsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseCcsrsv2_solve + +#else + +#define T_HYBGDeviceMat c_HYBGDeviceMat +#define T_Hmat c_Hmat +#define T_HYBGDeviceFree c_HYBGDeviceFree +#define T_spmvHYBGDevice c_spmvHYBGDevice +#define T_HYBGDeviceAlloc c_HYBGDeviceAlloc +#define T_HYBGDeviceSetMatDiagType c_HYBGDeviceSetMatDiagType +#define T_HYBGDeviceSetMatIndexBase c_HYBGDeviceSetMatIndexBase +#define T_HYBGDeviceSetMatType c_HYBGDeviceSetMatType +#define T_HYBGDeviceSetMatFillMode c_HYBGDeviceSetMatFillMode +#define T_HYBGDeviceHybsmAnalysis c_HYBGDeviceHybsmAnalysis +#define T_spsvHYBGDevice c_spsvHYBGDevice +#define T_HYBGHost2Device c_HYBGHost2Device +#define cusparseThybmv cusparseChybmv +#define cusparseThybsv_solve cusparseChybsv_solve +#define cusparseThybsv_analysis cusparseChybsv_analysis +#define cusparseTcsr2hyb cusparseCcsr2hyb +#endif + +#include "fcusparse_fct.h" + +#endif diff --git a/gpu/cintrf.h b/gpu/cintrf.h new file mode 100644 index 00000000..1a3528aa --- /dev/null +++ b/gpu/cintrf.h @@ -0,0 +1,51 @@ + /* Parallel Sparse BLAS SPGPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#ifndef _CINTRF_H_ +#define _CINTRF_H_ + +#include +#include + +#if defined(HAVE_SPGPU) && defined(HAVE_CUDA) +#include "core.h" +#include "cuda_util.h" +#include "vector.h" +#include "vectordev.h" + +#define ELL_PITCH_ALIGN_S 32 +#define ELL_PITCH_ALIGN_D 16 + + +#endif + +#endif diff --git a/gpu/core_mod.f90 b/gpu/core_mod.f90 new file mode 100644 index 00000000..d30f8a99 --- /dev/null +++ b/gpu/core_mod.f90 @@ -0,0 +1,53 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module core_mod + use iso_c_binding + + integer(c_int), parameter :: spgpu_type_int = 0 + integer(c_int), parameter :: spgpu_type_float = 1 + integer(c_int), parameter :: spgpu_type_double = 2 + integer(c_int), parameter :: spgpu_type_complex_float = 3 + integer(c_int), parameter :: spgpu_type_complex_double = 4 + integer(c_int), parameter :: spgpu_success = 0 + integer(c_int), parameter :: spgpu_unsupported = 1 + integer(c_int), parameter :: spgpu_unspecified = 2 + integer(c_int), parameter :: spgpu_outofmem = 3 + + interface + subroutine psb_cudaSync() & + & bind(c,name='cudaSync') + use iso_c_binding + end subroutine psb_cudaSync + end interface + +end module core_mod diff --git a/gpu/cuda_util.c b/gpu/cuda_util.c new file mode 100644 index 00000000..63c38b53 --- /dev/null +++ b/gpu/cuda_util.c @@ -0,0 +1,808 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include "cuda_util.h" + +#if defined(HAVE_CUDA) + + +static int hasUVA=-1; +static struct cudaDeviceProp *prop=NULL; +static spgpuHandle_t psb_gpu_handle = NULL; +static cublasHandle_t psb_cublas_handle = NULL; + + +int allocRemoteBuffer(void** buffer, int count) +{ + cudaError_t err = cudaMalloc(buffer, count); + if (err == cudaSuccess) + { + return SPGPU_SUCCESS; + } + else + { + fprintf(stderr,"CUDA allocRemoteBuffer for %d bytes Error: %s \n", + count, cudaGetErrorString(err)); + if(err == cudaErrorMemoryAllocation) + return SPGPU_OUTOFMEMORY; + else + return SPGPU_UNSPECIFIED; + } +} + +int hostRegisterMapped(void *pointer, long size) +{ + cudaError_t err = cudaHostRegister(pointer, size, cudaHostRegisterMapped); + + if (err == cudaSuccess) + { + return SPGPU_SUCCESS; + } + else + { + fprintf(stderr,"CUDA hostRegisterMapped Error: %s\n", cudaGetErrorString(err)); + if(err == cudaErrorMemoryAllocation) + return SPGPU_OUTOFMEMORY; + else + return SPGPU_UNSPECIFIED; + } +} + +int getDevicePointer(void **d_p, void * h_p) +{ + cudaError_t err = cudaHostGetDevicePointer(d_p,h_p,0); + + if (err == cudaSuccess) + { + return SPGPU_SUCCESS; + } + else + { + fprintf(stderr,"CUDA getDevicePointer Error: %s\n", cudaGetErrorString(err)); + if(err == cudaErrorMemoryAllocation) + return SPGPU_OUTOFMEMORY; + else + return SPGPU_UNSPECIFIED; + } +} + +int registerMappedMemory(void *buffer, void **dp, int size) +{ + //cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped); + cudaError_t err = cudaHostRegister(buffer, size, cudaHostRegisterMapped); + if (err == cudaSuccess) err = cudaHostGetDevicePointer(dp,buffer,0); + + if (err == cudaSuccess) + { + err = cudaHostGetDevicePointer(dp,buffer,0); + if (err == cudaSuccess) + { + return SPGPU_SUCCESS; + } + else + { + fprintf(stderr,"CUDA registerMappedMemory Error: %s\n", cudaGetErrorString(err)); + return SPGPU_UNSPECIFIED; + } + } + else + { + fprintf(stderr,"CUDA registerMappedMemory Error: %s\n", cudaGetErrorString(err)); + if(err == cudaErrorMemoryAllocation) + return SPGPU_OUTOFMEMORY; + else + return SPGPU_UNSPECIFIED; + } +} + +int allocMappedMemory(void **buffer, void **dp, int size) +{ + cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped); + if (err == 0) err = cudaHostGetDevicePointer(dp,*buffer,0); + + if (err == cudaSuccess) + { + return SPGPU_SUCCESS; + } + else + { + fprintf(stderr,"CUDA allocMappedMemory Error: %s\n", cudaGetErrorString(err)); + if(err == cudaErrorMemoryAllocation) + return SPGPU_OUTOFMEMORY; + else + return SPGPU_UNSPECIFIED; + } +} + +int unregisterMappedMemory(void *buffer) +{ + //cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped); + cudaError_t err = cudaHostUnregister(buffer); + + if (err == cudaSuccess) + { + return SPGPU_SUCCESS; + } + else + { + fprintf(stderr,"CUDA unregisterMappedMemory Error: %s\n", cudaGetErrorString(err)); + if(err == cudaErrorMemoryAllocation) + return SPGPU_OUTOFMEMORY; + else + return SPGPU_UNSPECIFIED; + } +} + +int writeRemoteBuffer(void* hostSrc, void* buffer, int count) +{ + cudaError_t err = cudaMemcpy(buffer, hostSrc, count, cudaMemcpyHostToDevice); + + if (err == cudaSuccess) + return SPGPU_SUCCESS; + else { + fprintf(stderr,"CUDA Error writeRemoteBuffer: %s %p %p %d\n", + cudaGetErrorString(err),buffer, hostSrc, count); + return SPGPU_UNSPECIFIED; + } +} + +int readRemoteBuffer(void* hostDest, void* buffer, int count) +{ + + + cudaError_t err1; + cudaError_t err; +#if 0 + { + err1 =cudaGetLastError(); + fprintf(stderr,"CUDA Error prior to readRemoteBuffer: %s %d\n", + cudaGetErrorString(err1),err1); + } + +#endif + err = cudaMemcpy(hostDest, buffer, count, cudaMemcpyDeviceToHost); + + if (err == cudaSuccess) + return SPGPU_SUCCESS; + else { + fprintf(stderr,"CUDA Error readRemoteBuffer: %s %p %p %d %d\n", + cudaGetErrorString(err),hostDest,buffer,count,err); + return SPGPU_UNSPECIFIED; + } +} + +int freeRemoteBuffer(void* buffer) +{ + cudaError_t err = cudaFree(buffer); + if (err == cudaSuccess) + return SPGPU_SUCCESS; + else { + fprintf(stderr,"CUDA Error freeRemoteBuffer: %s %p\n", cudaGetErrorString(err),buffer); + return SPGPU_UNSPECIFIED; + } +} + +int gpuInit(int dev) +{ + + int count,err; + + if ((err=cudaSetDeviceFlags(cudaDeviceMapHost))!=cudaSuccess) + fprintf(stderr,"Error On SetDeviceFlags: %d '%s'\n",err,cudaGetErrorString(err)); + if ((prop=(struct cudaDeviceProp *) malloc(sizeof(struct cudaDeviceProp)))==NULL) { + fprintf(stderr,"CUDA Error gpuInit3: not malloced prop\n"); + return SPGPU_UNSPECIFIED; + } + err = setDevice(dev); + if (err != cudaSuccess) { + fprintf(stderr,"CUDA Error gpuInit2: %s\n", cudaGetErrorString(err)); + return SPGPU_UNSPECIFIED; + } + if (!psb_cublas_handle) + psb_gpuCreateCublasHandle(); + hasUVA=getDeviceHasUVA(); + + return err; + +} + +void gpuClose() +{ + cudaStream_t st1, st2; + if (! psb_gpu_handle) + st1=spgpuGetStream(psb_gpu_handle); + if (! psb_cublas_handle) + cublasGetStream(psb_cublas_handle,&st2); + + psb_gpuDestroyHandle(); + if (st1 != st2) + psb_gpuDestroyCublasHandle(); + free(prop); + prop=NULL; + hasUVA=-1; +} + + +int setDevice(int dev) +{ + int count,err,idev; + + err = cudaGetDeviceCount(&count); + if (err != cudaSuccess) { + fprintf(stderr,"CUDA Error setDevice: %s\n", cudaGetErrorString(err)); + return SPGPU_UNSPECIFIED; + } + + if ((0<=dev)&&(devunifiedAddressing; + return(count); +} + +int getGPUMultiProcessors() +{ int count=0; + if (prop!=NULL) + count = prop->multiProcessorCount; + return(count); +} + + +int getGPUMemoryBusWidth() +{ int count=0; +#if CUDART_VERSION >= 5000 + if (prop!=NULL) + count = prop->memoryBusWidth; +#endif + return(count); +} +int getGPUMemoryClockRate() +{ int count=0; +#if CUDART_VERSION >= 5000 + if (prop!=NULL) + count = prop->memoryClockRate; +#endif + return(count); +} +int getGPUWarpSize() +{ int count=0; + if (prop!=NULL) + count = prop->warpSize; + return(count); +} +int getGPUMaxThreadsPerBlock() +{ int count=0; + if (prop!=NULL) + count = prop->maxThreadsPerBlock; + return(count); +} +int getGPUMaxThreadsPerMP() +{ int count=0; + if (prop!=NULL) + count = prop->maxThreadsPerMultiProcessor; + return(count); +} +int getGPUMaxRegistersPerBlock() +{ int count=0; + if (prop!=NULL) + count = prop->regsPerBlock; + return(count); +} + +void cpyGPUNameString(char *cstring) +{ + *cstring='\0'; + if (prop!=NULL) + strcpy(cstring,prop->name); + +} + +int DeviceHasUVA() +{ + return(hasUVA == 1); +} + + +int getDeviceCount() +{ int count; + cudaError_t err; + err = cudaGetDeviceCount(&count); + if (err != cudaSuccess) { + fprintf(stderr,"CUDA Error getDeviceCount: %s\n", cudaGetErrorString(err)); + return SPGPU_UNSPECIFIED; + } + return(count); +} + +void cudaSync() +{ + cudaError_t err; + err = cudaDeviceSynchronize(); + if (err == cudaSuccess) + return SPGPU_SUCCESS; + else { + fprintf(stderr,"CUDA Error cudaSync: %s\n", cudaGetErrorString(err)); + return SPGPU_UNSPECIFIED; + } +} + +void cudaReset() +{ + cudaError_t err; + err = cudaDeviceReset(); + if (err != cudaSuccess) { + fprintf(stderr,"CUDA Error Reset: %s\n", cudaGetErrorString(err)); + return SPGPU_UNSPECIFIED; + } +} + + +spgpuHandle_t psb_gpuGetHandle() +{ + return psb_gpu_handle; +} + +void psb_gpuCreateHandle() +{ + if (!psb_gpu_handle) + spgpuCreate(&psb_gpu_handle, getDevice()); + +} + +void psb_gpuDestroyHandle() +{ + if (!psb_gpu_handle) + spgpuDestroy(psb_gpu_handle); + psb_gpu_handle = NULL; +} + +cudaStream_t psb_gpuGetStream() +{ + return spgpuGetStream(psb_gpu_handle); +} + +void psb_gpuSetStream(cudaStream_t stream) +{ + spgpuSetStream(psb_gpu_handle, stream); + return ; +} + + + +cublasHandle_t psb_gpuGetCublasHandle() +{ + if (!psb_cublas_handle) + psb_gpuCreateCublasHandle(); + return psb_cublas_handle; +} +void psb_gpuCreateCublasHandle() +{ if (!psb_cublas_handle) + cublasCreate(&psb_cublas_handle); +} +void psb_gpuDestroyCublasHandle() +{ + if (!psb_cublas_handle) + cublasDestroy(psb_cublas_handle); + psb_cublas_handle=NULL; +} + + + + + +/* Simple memory tools */ + +int allocateInt(void **d_int, int n) +{ + return allocRemoteBuffer((void **)(d_int), n*sizeof(int)); +} + +int writeInt(void *d_int, int* h_int, int n) +{ + int i,j; + int *di; + i = writeRemoteBuffer((void*)h_int, (void*)d_int, n*sizeof(int)); + return i; +} + +int readInt(void* d_int, int* h_int, int n) +{ int i; + i = readRemoteBuffer((void *) h_int, (void *) d_int, n*sizeof(int)); + //cudaSync(); + return(i); +} + +int writeIntFirst(int first, void *d_int, int* h_int, int n, int IndexBase) +{ + int i,j; + int *di=(int *) d_int; + di = &(di[first-IndexBase]); + i = writeRemoteBuffer((void*)h_int, (void*)di, n*sizeof(int)); + return i; +} + +int readIntFirst(int first,void* d_int, int* h_int, int n, int IndexBase) +{ int i; + int *di=(int *) d_int; + di = &(di[first-IndexBase]); + i = readRemoteBuffer((void *) h_int, (void *) di, n*sizeof(int)); + //cudaSync(); + return(i); +} + +int allocateMultiInt(void **d_int, int m, int n) +{ + return allocRemoteBuffer((void **)(d_int), m*n*sizeof(int)); +} + +int writeMultiInt(void *d_int, int* h_int, int m, int n) +{ + int i,j; + int *di; + i = writeRemoteBuffer((void*)h_int, (void*)d_int, m*n*sizeof(int)); + return i; +} + +int readMultiInt(void* d_int, int* h_int, int m, int n) +{ int i; + i = readRemoteBuffer((void *) h_int, (void *) d_int, m*n*sizeof(int)); + //cudaSync(); + return(i); +} + +void freeInt(void *d_int) +{ + //printf("Before freeInt\n"); + freeRemoteBuffer(d_int); +} + + + + +int allocateFloat(void **d_float, int n) +{ + return allocRemoteBuffer((void **)(d_float), n*sizeof(float)); +} + +int writeFloat(void *d_float, float* h_float, int n) +{ + int i; + + i = writeRemoteBuffer((void*)h_float, (void*)d_float, n*sizeof(float)); + + return i; +} + +int readFloat(void* d_float, float* h_float, int n) +{ int i; + i = readRemoteBuffer((void *) h_float, (void *) d_float, n*sizeof(float)); + + return(i); +} + +int writeFloatFirst(int df, void *d_float, float* h_float, int n, int IndexBase) +{ + int i; + + float *dv=(float *) d_float; + dv = &dv[df-IndexBase]; + i = writeRemoteBuffer((void*)h_float, (void*)dv, n*sizeof(float)); + + return i; +} + +int readFloatFirst(int df, void* d_float, float* h_float, int n, int IndexBase) +{ int i; + float *dv=(float *) d_float; + dv = &dv[df-IndexBase]; + //fprintf(stderr,"readFloatFirst: %d %p %p %p %d \n",df,d_float,dv,h_float,n); + i = readRemoteBuffer((void *) h_float, (void *) dv, n*sizeof(float)); + + return(i); +} + + +int allocateMultiFloat(void **d_float, int m, int n) +{ + return allocRemoteBuffer((void **)(d_float), m*n*sizeof(float)); +} + +int writeMultiFloat(void *d_float, float* h_float, int m, int n) +{ + int i,j; + i = writeRemoteBuffer((void*)h_float, (void*)d_float, m*n*sizeof(float)); + return i; +} + +int readMultiFloat(void* d_float, float* h_float, int m, int n) +{ int i; + i = readRemoteBuffer((void *) h_float, (void *) d_float, m*n*sizeof(float)); + //cudaSync(); + return(i); +} + +void freeFloat(void *d_float) +{ + freeRemoteBuffer(d_float); +} + + + +int allocateDouble(void **d_double, int n) +{ + return allocRemoteBuffer((void **)(d_double), n*sizeof(double)); +} + +int writeDouble(void *d_double, double* h_double, int n) +{ + int i; + + i = writeRemoteBuffer((void*)h_double, (void*)d_double, n*sizeof(double)); + + return i; +} + +int readDouble(void* d_double, double* h_double, int n) +{ int i; + i = readRemoteBuffer((void *) h_double, (void *) d_double, n*sizeof(double)); + + return(i); +} + +int writeDoubleFirst(int df, void *d_double, double* h_double, int n, int IndexBase) +{ + int i; + + double *dv=(double *) d_double; + dv = &dv[df-IndexBase]; + i = writeRemoteBuffer((void*)h_double, (void*)dv, n*sizeof(double)); + + return i; +} + +int readDoubleFirst(int df, void* d_double, double* h_double, int n, int IndexBase) +{ int i; + double *dv=(double *) d_double; + dv = &dv[df-IndexBase]; + //fprintf(stderr,"readDoubleFirst: %d %p %p %p %d \n",df,d_double,dv,h_double,n); + i = readRemoteBuffer((void *) h_double, (void *) dv, n*sizeof(double)); + + return(i); +} + +int allocateMultiDouble(void **d_double, int m, int n) +{ + return allocRemoteBuffer((void **)(d_double), m*n*sizeof(double)); +} + +int writeMultiDouble(void *d_double, double* h_double, int m, int n) +{ + int i,j; + i = writeRemoteBuffer((void*)h_double, (void*)d_double, m*n*sizeof(double)); + return i; +} + +int readMultiDouble(void* d_double, double* h_double, int m, int n) +{ int i; + i = readRemoteBuffer((void *) h_double, (void *) d_double, m*n*sizeof(double)); + //cudaSync(); + return(i); +} + +void freeDouble(void *d_double) +{ + freeRemoteBuffer(d_double); +} + + + +int allocateFloatComplex(void **d_FloatComplex, int n) +{ + return allocRemoteBuffer((void **)(d_FloatComplex), n*sizeof(cuFloatComplex)); +} + +int writeFloatComplex(void *d_FloatComplex, cuFloatComplex* h_FloatComplex, int n) +{ + int i; + + i = writeRemoteBuffer((void*)h_FloatComplex, (void*)d_FloatComplex, n*sizeof(cuFloatComplex)); + + return i; +} + +int readFloatComplex(void* d_FloatComplex, cuFloatComplex* h_FloatComplex, int n) +{ int i; + i = readRemoteBuffer((void *) h_FloatComplex, (void *) d_FloatComplex, n*sizeof(cuFloatComplex)); + + return(i); +} + +int allocateMultiFloatComplex(void **d_FloatComplex, int m, int n) +{ + return allocRemoteBuffer((void **)(d_FloatComplex), m*n*sizeof(cuFloatComplex)); +} + +int writeMultiFloatComplex(void *d_FloatComplex, cuFloatComplex* h_FloatComplex, int m, int n) +{ + int i,j; + i = writeRemoteBuffer((void*)h_FloatComplex, (void*)d_FloatComplex, m*n*sizeof(cuFloatComplex)); + return i; +} + +int readMultiFloatComplex(void* d_FloatComplex, cuFloatComplex* h_FloatComplex, int m, int n) +{ int i; + i = readRemoteBuffer((void *) h_FloatComplex, (void *) d_FloatComplex, m*n*sizeof(cuFloatComplex)); + //cudaSync(); + return(i); +} + +int writeFloatComplexFirst(int df, void *d_floatComplex, + cuFloatComplex* h_floatComplex, int n, int IndexBase) +{ + int i; + + cuFloatComplex *dv=(cuFloatComplex *) d_floatComplex; + dv = &dv[df-IndexBase]; + i = writeRemoteBuffer((void*)h_floatComplex, (void*)dv, n*sizeof(cuFloatComplex)); + + return i; +} + +int readFloatComplexFirst(int df, void* d_floatComplex, cuFloatComplex* h_floatComplex, + int n, int IndexBase) +{ int i; + cuFloatComplex *dv=(cuFloatComplex *) d_floatComplex; + dv = &dv[df-IndexBase]; + i = readRemoteBuffer((void *) h_floatComplex, (void *) dv, n*sizeof(cuFloatComplex)); + + return(i); +} + +void freeFloatComplex(void *d_FloatComplex) +{ + freeRemoteBuffer(d_FloatComplex); +} + + + + +int allocateDoubleComplex(void **d_DoubleComplex, int n) +{ + return allocRemoteBuffer((void **)(d_DoubleComplex), n*sizeof(cuDoubleComplex)); +} + +int writeDoubleComplex(void *d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int n) +{ + int i; + + i = writeRemoteBuffer((void*)h_DoubleComplex, (void*)d_DoubleComplex, n*sizeof(cuDoubleComplex)); + + return i; +} + +int readDoubleComplex(void* d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int n) +{ int i; + i = readRemoteBuffer((void *) h_DoubleComplex, (void *) d_DoubleComplex, n*sizeof(cuDoubleComplex)); + + return(i); +} + +int writeDoubleComplexFirst(int df, void *d_doubleComplex, + cuDoubleComplex* h_doubleComplex, int n, int IndexBase) +{ + int i; + + cuDoubleComplex *dv=(cuDoubleComplex *) d_doubleComplex; + dv = &dv[df-IndexBase]; + i = writeRemoteBuffer((void*)h_doubleComplex, (void*)dv, n*sizeof(cuDoubleComplex)); + + return i; +} + +int readDoubleComplexFirst(int df, void* d_doubleComplex, cuDoubleComplex* h_doubleComplex, + int n, int IndexBase) +{ int i; + cuDoubleComplex *dv=(cuDoubleComplex *) d_doubleComplex; + dv = &dv[df-IndexBase]; + i = readRemoteBuffer((void *) h_doubleComplex, (void *) dv, n*sizeof(cuDoubleComplex)); + + return(i); +} + +int allocateMultiDoubleComplex(void **d_DoubleComplex, int m, int n) +{ + return allocRemoteBuffer((void **)(d_DoubleComplex), m*n*sizeof(cuDoubleComplex)); +} + +int writeMultiDoubleComplex(void *d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int m, int n) +{ + int i,j; + i = writeRemoteBuffer((void*)h_DoubleComplex, (void*)d_DoubleComplex, m*n*sizeof(cuDoubleComplex)); + return i; +} + +int readMultiDoubleComplex(void* d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int m, int n) +{ int i; + i = readRemoteBuffer((void *) h_DoubleComplex, (void *) d_DoubleComplex, m*n*sizeof(cuDoubleComplex)); + //cudaSync(); + return(i); +} + +void freeDoubleComplex(void *d_DoubleComplex) +{ + freeRemoteBuffer(d_DoubleComplex); +} + + + +double etime() +{ + struct timeval tt; + struct timezone tz; + double temp; + if (gettimeofday(&tt,&tz) != 0) { + fprintf(stderr,"Fatal error for gettimeofday ??? \n"); + exit(-1); + } + temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6; + return(temp); +} + + + + +#endif diff --git a/gpu/cuda_util.h b/gpu/cuda_util.h new file mode 100644 index 00000000..03c7b488 --- /dev/null +++ b/gpu/cuda_util.h @@ -0,0 +1,139 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#ifndef _CUDA_UTIL_H_ +#define _CUDA_UTIL_H_ + +#include +#include +#include +#include + +#if defined(HAVE_CUDA) +#include "cuda_runtime.h" +#include "core.h" +#include "cuComplex.h" +#include "fcusparse.h" +#include "cublas_v2.h" + +int allocRemoteBuffer(void** buffer, int count); +int allocMappedMemory(void **buffer, void **dp, int size); +int registerMappedMemory(void *buffer, void **dp, int size); +int unregisterMappedMemory(void *buffer); +int writeRemoteBuffer(void* hostSrc, void* buffer, int count); +int readRemoteBuffer(void* hostDest, void* buffer, int count); +int freeRemoteBuffer(void* buffer); +int gpuInit(int dev); +int getDeviceCount(); +int getDevice(); +int setDevice(int dev); +int getGPUMultiProcessors(); +int getGPUMemoryBusWidth(); +int getGPUMemoryClockRate(); +int getGPUWarpSize(); +int getGPUMaxThreadsPerBlock(); +int getGPUMaxThreadsPerMP(); +int getGPUMaxRegistersPerBlock(); +void cpyGPUNameString(char *cstring); + + +void cudaSync(); +void cudaReset(); +void gpuClose(); + + +spgpuHandle_t psb_gpuGetHandle(); +void psb_gpuCreateHandle(); +void psb_gpuDestroyHandle(); +cudaStream_t psb_gpuGetStream(); +void psb_gpuSetStream(cudaStream_t stream); + +cublasHandle_t psb_gpuGetCublasHandle(); +void psb_gpuCreateCublasHandle(); +void psb_gpuDestroyCublasHandle(); + + +int allocateInt(void **, int); +int allocateMultiInt(void **, int, int); +int writeInt(void *, int *, int); +int writeMultiInt(void *, int* , int , int ); +int readInt(void *, int *, int); +int readMultiInt(void*, int*, int, int ); +int writeIntFirst(int,void *, int *, int,int); +int readIntFirst(int,void *, int *, int,int); +void freeInt(void *); + +int allocateFloat(void **, int); +int allocateMultiFloat(void **, int, int); +int writeFloat(void *, float *, int); +int writeMultiFloat(void *, float* , int , int ); +int readFloat(void *, float*, int); +int readMultiFloat(void*, float*, int, int ); +int writeFloatFirst(int, void *, float*, int, int); +int readFloatFirst(int, void *, float*, int, int); +void freeFloat(void *); + +int allocateDouble(void **, int); +int allocateMultiDouble(void **, int, int); +int writeDouble(void *, double*, int); +int writeMultiDouble(void *, double* , int , int ); +int readDouble(void *, double*, int); +int readMultiDouble(void*, double*, int, int ); +int writeDoubleFirst(int, void *, double*, int, int); +int readDoubleFirst(int, void *, double*, int, int); +void freeDouble(void *); + +int allocateFloatComplex(void **, int); +int allocateMultiFloatComplex(void **, int, int); +int writeFloatComplex(void *, cuFloatComplex*, int); +int writeMultiFloatComplex(void *, cuFloatComplex* , int , int ); +int readFloatComplex(void *, cuFloatComplex*, int); +int readMultiFloatComplex(void*, cuFloatComplex*, int, int ); +int writeFloatComplexFirst(int, void *, cuFloatComplex*, int, int); +int readFloatComplexFirst(int, void *, cuFloatComplex*, int, int); +void freeFloatComplex(void *); + +int allocateDoubleComplex(void **, int); +int allocateMultiDoubleComplex(void **, int, int); +int writeDoubleComplex(void *, cuDoubleComplex*, int); +int writeMultiDoubleComplex(void *, cuDoubleComplex* , int , int ); +int readDoubleComplex(void *, cuDoubleComplex*, int); +int readMultiDoubleComplex(void*, cuDoubleComplex*, int, int ); +int writeDoubleComplexFirst(int, void *, cuDoubleComplex*, int, int); +int readDoubleComplexFirst(int, void *, cuDoubleComplex*, int, int); +void freeDoubleComplex(void *); + +double etime(); + +#endif + +#endif diff --git a/gpu/cusparse_mod.F90 b/gpu/cusparse_mod.F90 new file mode 100644 index 00000000..4ae16cff --- /dev/null +++ b/gpu/cusparse_mod.F90 @@ -0,0 +1,38 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +module cusparse_mod + use base_cusparse_mod + use s_cusparse_mod + use d_cusparse_mod + use c_cusparse_mod + use z_cusparse_mod +end module cusparse_mod diff --git a/gpu/cvectordev.c b/gpu/cvectordev.c new file mode 100644 index 00000000..db55caef --- /dev/null +++ b/gpu/cvectordev.c @@ -0,0 +1,325 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include +#if defined(HAVE_SPGPU) +//#include "utils.h" +//#include "common.h" +#include "cvectordev.h" + + +int registerMappedFloatComplex(void *buff, void **d_p, int n, cuFloatComplex dummy) +{ + return registerMappedMemory(buff,d_p,n*sizeof(cuFloatComplex)); +} + +int writeMultiVecDeviceFloatComplex(void* deviceVec, cuFloatComplex* hostVec) +{ int i; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + // Ex updateFromHost vector function + i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, + devVec->pitch_*devVec->count_*sizeof(cuFloatComplex)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i); + } + return(i); +} + +int writeMultiVecDeviceFloatComplexR2(void* deviceVec, cuFloatComplex* hostVec, int ld) +{ int i; + i = writeMultiVecDeviceFloatComplex(deviceVec, (void *) hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceFloatComplexR2",i); + } + return(i); +} + +int readMultiVecDeviceFloatComplex(void* deviceVec, cuFloatComplex* hostVec) +{ int i,j; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_, + devVec->pitch_*devVec->count_*sizeof(cuFloatComplex)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloat",i); + } + return(i); +} + +int readMultiVecDeviceFloatComplexR2(void* deviceVec, cuFloatComplex* hostVec, int ld) +{ int i; + i = readMultiVecDeviceFloatComplex(deviceVec, hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloatComplexR2",i); + } + return(i); +} + +int setscalMultiVecDeviceFloatComplex(cuFloatComplex val, int first, int last, + int indexBase, void* devMultiVecX) +{ int i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + spgpuHandle_t handle=psb_gpuGetHandle(); + + spgpuCsetscal(handle, first, last, indexBase, val, (cuFloatComplex *) devVecX->v_); + + return(i); +} + +int geinsMultiVecDeviceFloatComplex(int n, void* devMultiVecIrl, void* devMultiVecVal, + int dupl, int indexBase, void* devMultiVecX) +{ int j=0, i=0,nmin=0,nmax=0; + int pitch = 0; + cuFloatComplex beta; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; + struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; + spgpuHandle_t handle=psb_gpuGetHandle(); + pitch = devVecIrl->pitch_; + if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) + return SPGPU_UNSUPPORTED; + + //fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_); + + if (dupl == INS_OVERWRITE) + beta = make_cuFloatComplex(0.0, 0.0); + else if (dupl == INS_ADD) + beta = make_cuFloatComplex(1.0, 0.0); + else + beta = make_cuFloatComplex(0.0, 0.0); + + spgpuCscat(handle, (cuFloatComplex *) devVecX->v_, n, (cuFloatComplex*)devVecVal->v_, + (int*)devVecIrl->v_, indexBase, beta); + + return(i); +} + + +int igathMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + + i= igathMultiVecDeviceFloatComplex(deviceVec, vectorId, n, + first, (void*) devIdx->v_, hfirst, host_values, indexBase); + return(i); +} + +int igathMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx =(int *) indexes;; + cuFloatComplex *hv = (cuFloatComplex *) host_values;; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_gpuGetHandle(); + + i=0; + hv = &(hv[hfirst-indexBase]); + idx = &(idx[first-indexBase]); + spgpuCgath(handle,hv, n, idx,indexBase, + (cuFloatComplex *) devVec->v_+vectorId*devVec->pitch_); + return(i); +} + +int iscatMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n, + int first, void *deviceIdx, + int hfirst, void* host_values, + int indexBase, cuFloatComplex beta) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + i= iscatMultiVecDeviceFloatComplex(deviceVec, vectorId, n, first, + (void*) devIdx->v_, hfirst,host_values, + indexBase, beta); + return(i); +} + +int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, + int first, void *indexes, + int hfirst, void* host_values, + int indexBase, cuFloatComplex beta) +{ int i=0; + cuFloatComplex *hv = (cuFloatComplex *) host_values; + int *idx=(int *) indexes; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_gpuGetHandle(); + + idx = &(idx[first-indexBase]); + hv = &(hv[hfirst-indexBase]); + spgpuCscat(handle, (cuFloatComplex *) devVec->v_, n, hv, idx, indexBase, beta); + return SPGPU_SUCCESS; + +} + + +int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuCmnrm2(handle, y_res, n,(cuFloatComplex *)devVecA->v_, + devVecA->count_, devVecA->pitch_); + return(i); +} + +int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuCmamax(handle, y_res, n,(cuFloatComplex *)devVecA->v_, + devVecA->count_, devVecA->pitch_); + return(i); +} + +int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuCmasum(handle, y_res, n,(cuFloatComplex *)devVecA->v_, + devVecA->count_, devVecA->pitch_); + + return(i); +} + +int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + // Note: inner kernel can handle aliased input/output + spgpuCscal(handle, (cuFloatComplex *)devVecA->v_, devVecA->pitch_, + alpha, (cuFloatComplex *)devVecA->v_); + return(i); +} + +int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, + void* devMultiVecA, void* devMultiVecB) +{int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; + spgpuHandle_t handle=psb_gpuGetHandle(); + + spgpuCmdot(handle, y_res, n, (cuFloatComplex*)devVecA->v_, + (cuFloatComplex*)devVecB->v_,devVecA->count_,devVecB->pitch_); + return(i); +} + +int axpbyMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha, void* devMultiVecX, + cuFloatComplex beta, void* devMultiVecY) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + spgpuHandle_t handle=psb_gpuGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + for(j=0;jcount_;j++) + spgpuCaxpby(handle,(cuFloatComplex*)devVecY->v_+pitch*j, n, beta, + (cuFloatComplex*)devVecY->v_+pitch*j, alpha, + (cuFloatComplex*) devVecX->v_+pitch*j); + return(i); +} + +int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, + void *deviceVecA, void *deviceVecB) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + spgpuHandle_t handle=psb_gpuGetHandle(); + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuCmaxy(handle, (cuFloatComplex*)devVecB->v_, n, alpha, + (cuFloatComplex*)devVecA->v_, + (cuFloatComplex*)devVecB->v_, devVecA->count_, devVecA->pitch_); + + return(i); +} + +int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, + void *deviceVecB, cuFloatComplex beta, + void *deviceVecZ) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; + spgpuHandle_t handle=psb_gpuGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) + return SPGPU_UNSUPPORTED; + spgpuCmaxypbz(handle, (cuFloatComplex*)devVecZ->v_, n, beta, + (cuFloatComplex*)devVecZ->v_, + alpha, (cuFloatComplex*) devVecA->v_, (cuFloatComplex*) devVecB->v_, + devVecB->count_, devVecB->pitch_); + return(i); +} + + +int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha, void *deviceVecA, + void *deviceVecB) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + + spgpuHandle_t handle=psb_gpuGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuCabs(handle, (cuFloatComplex*)devVecB->v_, n, + alpha, (cuFloatComplex*)devVecA->v_); + + return(i); +} + +int absMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + spgpuHandle_t handle=psb_gpuGetHandle(); + if (n > devVecA->size_) + return SPGPU_UNSUPPORTED; + + spgpuCabs(handle, (cuFloatComplex*)devVecA->v_, n, + alpha, (cuFloatComplex*)devVecA->v_); + + return(i); +} + +#endif + diff --git a/gpu/cvectordev.h b/gpu/cvectordev.h new file mode 100644 index 00000000..f58fcca7 --- /dev/null +++ b/gpu/cvectordev.h @@ -0,0 +1,81 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#pragma once +#if defined(HAVE_SPGPU) +//#include "utils.h" +#include +#include "cuComplex.h" +#include "vectordev.h" +#include "cuda_runtime.h" +#include "core.h" + +int registerMappedFloatComplex(void *, void **, int, cuFloatComplex); +int writeMultiVecDeviceFloatComplex(void* deviceMultiVec, cuFloatComplex* hostMultiVec); +int writeMultiVecDeviceFloatComplexR2(void* deviceMultiVec, cuFloatComplex* hostMultiVec, int ld); +int readMultiVecDeviceFloatComplex(void* deviceMultiVec, cuFloatComplex* hostMultiVec); +int readMultiVecDeviceFloatComplexR2(void* deviceMultiVec, cuFloatComplex* hostMultiVec, int ld); + +int setscalMultiVecDeviceFloatComplex(cuFloatComplex val, int first, int last, + int indexBase, void* devVecX); + +int geinsMultiVecDeviceFloatComplex(int n, void* devVecIrl, void* devVecVal, + int dupl, int indexBase, void* devVecX); + +int igathMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase); +int igathMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, + int indexBase); +int iscatMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n, int first, + void *deviceIdx, int hfirst, void* host_values, + int indexBase, cuFloatComplex beta); +int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, cuFloatComplex beta); + +int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA); +int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA); +int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA); +int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA); +int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA, void* devVecB); + +int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX, cuFloatComplex beta, void* devVecY); +int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, void *deviceVecB); +int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, + void *deviceVecB, cuFloatComplex beta, void *deviceVecZ); +int absMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA); +int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha, + void *deviceVecA, void *deviceVecB); + + +#endif diff --git a/gpu/d_cusparse_mod.F90 b/gpu/d_cusparse_mod.F90 new file mode 100644 index 00000000..cd8bd52f --- /dev/null +++ b/gpu/d_cusparse_mod.F90 @@ -0,0 +1,305 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module d_cusparse_mod + use base_cusparse_mod + + type, bind(c) :: d_Cmat + type(c_ptr) :: Mat = c_null_ptr + end type d_Cmat + +#if CUDA_SHORT_VERSION <= 10 + type, bind(c) :: d_Hmat + type(c_ptr) :: Mat = c_null_ptr + end type d_Hmat +#endif + + +#if defined(HAVE_CUDA) && defined(HAVE_SPGPU) + + interface CSRGDeviceFree + function d_CSRGDeviceFree(Mat) & + & bind(c,name="d_CSRGDeviceFree") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int) :: res + end function d_CSRGDeviceFree + end interface + + interface CSRGDeviceSetMatType + function d_CSRGDeviceSetMatType(Mat,type) & + & bind(c,name="d_CSRGDeviceSetMatType") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_CSRGDeviceSetMatType + end interface + + interface CSRGDeviceSetMatFillMode + function d_CSRGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="d_CSRGDeviceSetMatFillMode") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_CSRGDeviceSetMatFillMode + end interface + + interface CSRGDeviceSetMatDiagType + function d_CSRGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="d_CSRGDeviceSetMatDiagType") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_CSRGDeviceSetMatDiagType + end interface + + interface CSRGDeviceSetMatIndexBase + function d_CSRGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="d_CSRGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_CSRGDeviceSetMatIndexBase + end interface + + interface CSRGDeviceCsrsmAnalysis + function d_CSRGDeviceCsrsmAnalysis(Mat) & + & bind(c,name="d_CSRGDeviceCsrsmAnalysis") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int) :: res + end function d_CSRGDeviceCsrsmAnalysis + end interface + + interface CSRGDeviceAlloc + function d_CSRGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="d_CSRGDeviceAlloc") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function d_CSRGDeviceAlloc + end interface + + interface CSRGDeviceGetParms + function d_CSRGDeviceGetParms(Mat,nr,nc,nz) & + & bind(c,name="d_CSRGDeviceGetParms") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int) :: nr, nc, nz + integer(c_int) :: res + end function d_CSRGDeviceGetParms + end interface + + interface spsvCSRGDevice + function d_spsvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="d_spsvCSRGDevice") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + real(c_double), value :: alpha,beta + integer(c_int) :: res + end function d_spsvCSRGDevice + end interface + + interface spmvCSRGDevice + function d_spmvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="d_spmvCSRGDevice") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + real(c_double), value :: alpha,beta + integer(c_int) :: res + end function d_spmvCSRGDevice + end interface + + interface CSRGHost2Device + function d_CSRGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="d_CSRGHost2Device") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + real(c_double) :: val(*) + integer(c_int) :: res + end function d_CSRGHost2Device + end interface + + interface CSRGDevice2Host + function d_CSRGDevice2Host(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="d_CSRGDevice2Host") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + real(c_double) :: val(*) + integer(c_int) :: res + end function d_CSRGDevice2Host + end interface + +#if CUDA_SHORT_VERSION <= 10 + interface HYBGDeviceAlloc + function d_HYBGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="d_HYBGDeviceAlloc") result(res) + use iso_c_binding + import d_hmat + type(d_Hmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function d_HYBGDeviceAlloc + end interface + + interface HYBGDeviceFree + function d_HYBGDeviceFree(Mat) & + & bind(c,name="d_HYBGDeviceFree") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int) :: res + end function d_HYBGDeviceFree + end interface + + interface HYBGDeviceSetMatType + function d_HYBGDeviceSetMatType(Mat,type) & + & bind(c,name="d_HYBGDeviceSetMatType") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_HYBGDeviceSetMatType + end interface + + interface HYBGDeviceSetMatFillMode + function d_HYBGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="d_HYBGDeviceSetMatFillMode") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_HYBGDeviceSetMatFillMode + end interface + + interface HYBGDeviceSetMatDiagType + function d_HYBGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="d_HYBGDeviceSetMatDiagType") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_HYBGDeviceSetMatDiagType + end interface + + interface HYBGDeviceSetMatIndexBase + function d_HYBGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="d_HYBGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_HYBGDeviceSetMatIndexBase + end interface + + interface HYBGDeviceHybsmAnalysis + function d_HYBGDeviceHybsmAnalysis(Mat) & + & bind(c,name="d_HYBGDeviceHybsmAnalysis") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int) :: res + end function d_HYBGDeviceHybsmAnalysis + end interface + + interface spsvHYBGDevice + function d_spsvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="d_spsvHYBGDevice") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + real(c_double), value :: alpha,beta + integer(c_int) :: res + end function d_spsvHYBGDevice + end interface + + interface spmvHYBGDevice + function d_spmvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="d_spmvHYBGDevice") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + real(c_double), value :: alpha,beta + integer(c_int) :: res + end function d_spmvHYBGDevice + end interface + + interface HYBGHost2Device + function d_HYBGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="d_HYBGHost2Device") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + real(c_double) :: val(*) + integer(c_int) :: res + end function d_HYBGHost2Device + end interface +#endif + +#endif + +end module d_cusparse_mod diff --git a/gpu/dcusparse.c b/gpu/dcusparse.c new file mode 100644 index 00000000..9659c1f9 --- /dev/null +++ b/gpu/dcusparse.c @@ -0,0 +1,95 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include + +#ifdef HAVE_SPGPU +#include +#include +#include "cintrf.h" +#include "fcusparse.h" + + +/* Double precision real */ +#define TYPE double +#define CUSPARSE_BASE_TYPE CUDA_R_64F +#define T_CSRGDeviceMat d_CSRGDeviceMat +#define T_Cmat d_Cmat +#define T_spmvCSRGDevice d_spmvCSRGDevice +#define T_spsvCSRGDevice d_spsvCSRGDevice +#define T_CSRGDeviceAlloc d_CSRGDeviceAlloc +#define T_CSRGDeviceFree d_CSRGDeviceFree +#define T_CSRGHost2Device d_CSRGHost2Device +#define T_CSRGDevice2Host d_CSRGDevice2Host +#define T_CSRGDeviceSetMatFillMode d_CSRGDeviceSetMatFillMode +#define T_CSRGDeviceSetMatDiagType d_CSRGDeviceSetMatDiagType +#define T_CSRGDeviceGetParms d_CSRGDeviceGetParms + +#if CUDA_SHORT_VERSION <= 10 +#define T_CSRGDeviceSetMatType d_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase d_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsmAnalysis d_CSRGDeviceCsrsmAnalysis +#define cusparseTcsrmv cusparseDcsrmv +#define cusparseTcsrsv_solve cusparseDcsrsv_solve +#define cusparseTcsrsv_analysis cusparseDcsrsv_analysis +#define T_HYBGDeviceMat d_HYBGDeviceMat +#define T_Hmat d_Hmat +#define T_HYBGDeviceFree d_HYBGDeviceFree +#define T_spmvHYBGDevice d_spmvHYBGDevice +#define T_HYBGDeviceAlloc d_HYBGDeviceAlloc +#define T_HYBGDeviceSetMatDiagType d_HYBGDeviceSetMatDiagType +#define T_HYBGDeviceSetMatIndexBase d_HYBGDeviceSetMatIndexBase +#define T_HYBGDeviceSetMatType d_HYBGDeviceSetMatType +#define T_HYBGDeviceSetMatFillMode d_HYBGDeviceSetMatFillMode +#define T_HYBGDeviceHybsmAnalysis d_HYBGDeviceHybsmAnalysis +#define T_spsvHYBGDevice d_spsvHYBGDevice +#define T_HYBGHost2Device d_HYBGHost2Device +#define cusparseThybmv cusparseDhybmv +#define cusparseThybsv_solve cusparseDhybsv_solve +#define cusparseThybsv_analysis cusparseDhybsv_analysis +#define cusparseTcsr2hyb cusparseDcsr2hyb + +#elif CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType d_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase d_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis d_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseDcsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseDcsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseDcsrsv2_solve + +#endif + +#include "fcusparse_fct.h" + +#endif diff --git a/gpu/diagdev.c b/gpu/diagdev.c new file mode 100644 index 00000000..64879455 --- /dev/null +++ b/gpu/diagdev.c @@ -0,0 +1,291 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#include "diagdev.h" +#include +#include +#include +#include +#if defined(HAVE_SPGPU) +//new +DiagDeviceParams getDiagDeviceParams(unsigned int rows, unsigned int columns, unsigned int diags, unsigned int elementType) +{ + DiagDeviceParams params; + + params.elementType = elementType; + //numero di elementi di val + params.rows = rows; + params.columns = columns; + params.diags = diags; + + return params; + +} +//new +int allocDiagDevice(void ** remoteMatrix, DiagDeviceParams* params) +{ + struct DiagDevice *tmp = (struct DiagDevice *)malloc(sizeof(struct DiagDevice)); + int ret=SPGPU_SUCCESS; + *remoteMatrix = (void *)tmp; + + tmp->rows = params->rows; + + tmp->cols = params->columns; + + tmp->diags = params->diags; + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->off), tmp->diags*sizeof(int)); + + /* tmp->baseIndex = params->firstIndex; */ + + if (params->elementType == SPGPU_TYPE_INT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(int)); + } + else if (params->elementType == SPGPU_TYPE_FLOAT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(float)); + } + else if (params->elementType == SPGPU_TYPE_DOUBLE) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(double)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(cuFloatComplex)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(cuDoubleComplex)); + } + else + return SPGPU_UNSUPPORTED; // Unsupported params + return ret; +} + +void freeDiagDevice(void* remoteMatrix) +{ + struct DiagDevice *devMat = (struct DiagDevice *) remoteMatrix; + //fprintf(stderr,"freeHllDevice\n"); + if (devMat != NULL) { + freeRemoteBuffer(devMat->off); + freeRemoteBuffer(devMat->cM); + free(remoteMatrix); + } +} + +//new +int FallocDiagDevice(void** deviceMat, unsigned int rows, unsigned int columns,unsigned int diags,unsigned int elementType) +{ int i; +#ifdef HAVE_SPGPU + DiagDeviceParams p; + + p = getDiagDeviceParams(rows, columns, diags,elementType); + i = allocDiagDevice(deviceMat, &p); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i); + } + return(i); +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int writeDiagDeviceDouble(void* deviceMat, double* a, int* off, int n) +{ int i,fo,fa; + char buf_a[255], buf_o[255],tmp[255]; +#ifdef HAVE_SPGPU + struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; + // Ex updateFromHost function + /* memset(buf_a,'\0',255); */ + /* memset(buf_o,'\0',255); */ + /* memset(tmp,'\0',255); */ + + /* strcat(buf_a,"mat_"); */ + /* strcat(buf_o,"off_"); */ + /* sprintf(tmp,"%d_%d.dat",devMat->rows,devMat->cols); */ + /* strcat(buf_a,tmp); */ + /* memset(tmp,'\0',255); */ + /* sprintf(tmp,"%d.dat",devMat->cols); */ + /* strcat(buf_o,tmp); */ + + /* fa = open(buf_a, O_CREAT | O_WRONLY | O_TRUNC, 0664); */ + /* fo = open(buf_o, O_CREAT | O_WRONLY | O_TRUNC, 0664); */ + + /* i = write(fa, a, sizeof(double)*devMat->cols*devMat->rows); */ + /* i = write(fo, off, sizeof(int)*devMat->cols); */ + + /* close(fa); */ + /* close(fo); */ + + i = writeRemoteBuffer((void*) a, (void *)devMat->cM, devMat->rows*devMat->diags*sizeof(double)); + i = writeRemoteBuffer((void*) off, (void *)devMat->off, devMat->diags*sizeof(int)); + + if(i==0) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int readDiagDeviceDouble(void* deviceMat, double* a, int* off) +{ int i; +#ifdef HAVE_SPGPU + struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; + i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(double)); + i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +//new +int spmvDiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, + double beta, void* deviceY) +{ + struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_gpuGetHandle(); + +#ifdef HAVE_SPGPU +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif + /* spgpuDdiagspmv(handle, (double *)y->v_, (double *)y->v_,alpha,(double *)devMat->cM,devMat->off,devMat->rows,devMat->cols,x->v_,beta,devMat->baseIndex); */ + + spgpuDdiaspmv(handle, (double *)y->v_, (double *)y->v_,alpha,(double *)devMat->cM,devMat->off,devMat->rows,devMat->rows,devMat->cols,devMat->diags,x->v_,beta); + + //cudaSync(); + + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + +int writeDiagDeviceFloat(void* deviceMat, float* a, int* off, int n) +{ int i,fo,fa; + char buf_a[255], buf_o[255],tmp[255]; +#ifdef HAVE_SPGPU + struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; + // Ex updateFromHost function + /* memset(buf_a,'\0',255); */ + /* memset(buf_o,'\0',255); */ + /* memset(tmp,'\0',255); */ + + /* strcat(buf_a,"mat_"); */ + /* strcat(buf_o,"off_"); */ + /* sprintf(tmp,"%d_%d.dat",devMat->rows,devMat->cols); */ + /* strcat(buf_a,tmp); */ + /* memset(tmp,'\0',255); */ + /* sprintf(tmp,"%d.dat",devMat->cols); */ + /* strcat(buf_o,tmp); */ + + /* fa = open(buf_a, O_CREAT | O_WRONLY | O_TRUNC, 0664); */ + /* fo = open(buf_o, O_CREAT | O_WRONLY | O_TRUNC, 0664); */ + + /* i = write(fa, a, sizeof(float)*devMat->cols*devMat->rows); */ + /* i = write(fo, off, sizeof(int)*devMat->cols); */ + + /* close(fa); */ + /* close(fo); */ + + i = writeRemoteBuffer((void*) a, (void *)devMat->cM, devMat->rows*devMat->diags*sizeof(float)); + i = writeRemoteBuffer((void*) off, (void *)devMat->off, devMat->diags*sizeof(int)); + + if(i==0) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int readDiagDeviceFloat(void* deviceMat, float* a, int* off) +{ int i; +#ifdef HAVE_SPGPU + struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; + i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(float)); + i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +//new +int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, + float beta, void* deviceY) +{ + struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_gpuGetHandle(); + +#ifdef HAVE_SPGPU +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif + /* spgpuDdiagspmv(handle, (float *)y->v_, (float *)y->v_,alpha,(float *)devMat->cM,devMat->off,devMat->rows,devMat->cols,x->v_,beta,devMat->baseIndex); */ + + spgpuSdiaspmv(handle, (float *)y->v_, (float *)y->v_,alpha,(float *)devMat->cM,devMat->off,devMat->rows,devMat->rows,devMat->cols,devMat->diags,x->v_,beta); + + //cudaSync(); + + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +#endif diff --git a/gpu/diagdev.h b/gpu/diagdev.h new file mode 100644 index 00000000..83f38289 --- /dev/null +++ b/gpu/diagdev.h @@ -0,0 +1,95 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#ifndef _DIAGDEV_H_ +#define _DIAGDEV_H_ + +#ifdef HAVE_SPGPU +#include "cintrf.h" +#include "dia.h" + +struct DiagDevice +{ + // Compressed matrix + void *cM; //it can be float or double + + // offset (same size of cM) + int *off; + + int rows; + + int cols; + + int diags; + +}; + +typedef struct DiagDeviceParams +{ + + unsigned int elementType; + + // Number of rows. + // Used to allocate rS array + unsigned int rows; + //unsigned int hackOffsLength; + + // Number of columns. + // Used for error-checking + unsigned int columns; + + unsigned int diags; + +} DiagDeviceParams; +DiagDeviceParams getDiagDeviceParams(unsigned int rows, unsigned int columns, + unsigned int elementType, unsigned int firstIndex); +int FallocDiagDevice(void** deviceMat, unsigned int rows, unsigned int cols, + unsigned int elementType, unsigned int firstIndex); +int allocDiagDevice(void ** remoteMatrix, DiagDeviceParams* params); +void freeDiagDevice(void* remoteMatrix); + +int readDiagDeviceDouble(void* deviceMat, double* a, int* off); +int writeDiagDeviceDouble(void* deviceMat, double* a, int* off, int n); +int spmvDiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, + double beta, void* deviceY); + +int readDiagDeviceFloat(void* deviceMat, float* a, int* off); +int writeDiagDeviceFloat(void* deviceMat, float* a, int* off, int n); +int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, + float beta, void* deviceY); + + + +#else +#define CINTRF_UNSUPPORTED -1 +#endif + +#endif diff --git a/gpu/diagdev_mod.F90 b/gpu/diagdev_mod.F90 new file mode 100644 index 00000000..cbcc029e --- /dev/null +++ b/gpu/diagdev_mod.F90 @@ -0,0 +1,231 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module diagdev_mod + use iso_c_binding + use core_mod + + type, bind(c) :: diagdev_parms + integer(c_int) :: element_type + integer(c_int) :: rows + integer(c_int) :: columns + integer(c_int) :: firstIndex + end type diagdev_parms + +#ifdef HAVE_SPGPU + + interface + function FgetDiagDeviceParams(rows, columns, elementType, firstIndex) & + & result(res) bind(c,name='getDiagDeviceParams') + use iso_c_binding + import :: diagdev_parms + type(diagdev_parms) :: res + integer(c_int), value :: rows,columns,elementType,firstIndex + end function FgetDiagDeviceParams + end interface + + + interface + function FallocDiagDevice(deviceMat,rows,columns,& + & elementType,firstIndex) & + & result(res) bind(c,name='FallocDiagDevice') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: rows,columns,elementType,firstIndex + type(c_ptr) :: deviceMat + end function FallocDiagDevice + end interface + + + interface writeDiagDevice + + function writeDiagDeviceFloat(deviceMat,a,off,n) & + & result(res) bind(c,name='writeDiagDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: n + real(c_float) :: a(n,*) + integer(c_int) :: off(*)!,irn(*) + end function writeDiagDeviceFloat + + function writeDiagDeviceDouble(deviceMat,a,off,n) & + & result(res) bind(c,name='writeDiagDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int),value :: n + real(c_double) :: a(n,*) + integer(c_int) :: off(*) + end function writeDiagDeviceDouble + + function writeDiagDeviceFloatComplex(deviceMat,a,off,n) & + & result(res) bind(c,name='writeDiagDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: n + complex(c_float_complex) :: a(n,*) + integer(c_int) :: off(*)!,irn(*) + end function writeDiagDeviceFloatComplex + + function writeDiagDeviceDoubleComplex(deviceMat,a,off,n) & + & result(res) bind(c,name='writeDiagDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: n + complex(c_double_complex) :: a(n,*) + integer(c_int) :: off(*)!,irn(*) + end function writeDiagDeviceDoubleComplex + + end interface + + interface readDiagDevice + + function readDiagDeviceFloat(deviceMat,a,off,n) & + & result(res) bind(c,name='readDiagDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_float) :: a(n,*) + integer(c_int) :: off(*)!,irn(*) + end function readDiagDeviceFloat + + function readDiagDeviceDouble(deviceMat,a,off,n) & + & result(res) bind(c,name='readDiagDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int),value :: n + real(c_double) :: a(n,*) + integer(c_int) :: off(*) + end function readDiagDeviceDouble + + function readDiagDeviceFloatComplex(deviceMat,a,off,n) & + & result(res) bind(c,name='readDiagDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: n + complex(c_float_complex) :: a(n,*) + integer(c_int) :: off(*)!,irn(*) + end function readDiagDeviceFloatComplex + + function readDiagDeviceDoubleComplex(deviceMat,a,off,n) & + & result(res) bind(c,name='readDiagDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: n + complex(c_double_complex) :: a(n,*) + integer(c_int) :: off(*)!,irn(*) + end function readDiagDeviceDoubleComplex + + end interface + + interface + subroutine freeDiagDevice(deviceMat) & + & bind(c,name='freeDiagDevice') + use iso_c_binding + type(c_ptr), value :: deviceMat + end subroutine freeDiagDevice + end interface + + interface + subroutine resetDiagTimer() bind(c,name='resetDiagTimer') + use iso_c_binding + end subroutine resetDiagTimer + end interface + interface + function getDiagTimer() & + & bind(c,name='getDiagTimer') result(res) + use iso_c_binding + real(c_double) :: res + end function getDiagTimer + end interface + + + interface + function getDiagDevicePitch(deviceMat) & + & bind(c,name='getDiagDevicePitch') result(res) + use iso_c_binding + type(c_ptr), value :: deviceMat + integer(c_int) :: res + end function getDiagDevicePitch + end interface + + interface + function getDiagDeviceMaxRowSize(deviceMat) & + & bind(c,name='getDiagDeviceMaxRowSize') result(res) + use iso_c_binding + type(c_ptr), value :: deviceMat + integer(c_int) :: res + end function getDiagDeviceMaxRowSize + end interface + + + interface spmvDiagDevice + function spmvDiagDeviceFloat(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvDiagDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_float),value :: alpha, beta + end function spmvDiagDeviceFloat + function spmvDiagDeviceDouble(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvDiagDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_double),value :: alpha, beta + end function spmvDiagDeviceDouble + function spmvDiagDeviceFloatComplex(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvDiagDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_float_complex),value :: alpha, beta + end function spmvDiagDeviceFloatComplex + function spmvDiagDeviceDoubleComplex(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvDiagDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_double_complex),value :: alpha, beta + end function spmvDiagDeviceDoubleComplex + end interface spmvDiagDevice + +#endif + + +end module diagdev_mod diff --git a/gpu/dnsdev.c b/gpu/dnsdev.c new file mode 100644 index 00000000..fb4d339c --- /dev/null +++ b/gpu/dnsdev.c @@ -0,0 +1,383 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#include +#include "dnsdev.h" + +#if defined(HAVE_SPGPU) + +#define PASS_RS 0 + +#define IMIN(a,b) ((a)<(b) ? (a) : (b)) + +DnsDeviceParams getDnsDeviceParams(unsigned int rows, unsigned int columns, + unsigned int elementType, unsigned int firstIndex) +{ + DnsDeviceParams params; + + if (elementType == SPGPU_TYPE_DOUBLE) + { + params.pitch = ((rows + ELL_PITCH_ALIGN_D - 1)/ELL_PITCH_ALIGN_D)*ELL_PITCH_ALIGN_D; + } + else + { + params.pitch = ((rows + ELL_PITCH_ALIGN_S - 1)/ELL_PITCH_ALIGN_S)*ELL_PITCH_ALIGN_S; + } + //For complex? + params.elementType = elementType; + params.rows = rows; + params.columns = columns; + params.firstIndex = firstIndex; + + return params; + +} +//new +int allocDnsDevice(void ** remoteMatrix, DnsDeviceParams* params) +{ + struct DnsDevice *tmp = (struct DnsDevice *)malloc(sizeof(struct DnsDevice)); + *remoteMatrix = (void *)tmp; + tmp->rows = params->rows; + tmp->columns = params->columns; + tmp->cMPitch = params->pitch; + tmp->pitch= tmp->cMPitch; + tmp->allocsize = (int)tmp->columns * tmp->pitch; + tmp->baseIndex = params->firstIndex; + //fprintf(stderr,"allocDnsDevice: %d %d %d \n",tmp->pitch, params->maxRowSize, params->avgRowSize); + if (params->elementType == SPGPU_TYPE_FLOAT) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(float)); + else if (params->elementType == SPGPU_TYPE_DOUBLE) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(double)); + else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuFloatComplex)); + else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuDoubleComplex)); + else + return SPGPU_UNSUPPORTED; // Unsupported params + //fprintf(stderr,"From allocDnsDevice: %d %d %d %p %p %p\n",tmp->maxRowSize, + // tmp->avgRowSize,tmp->allocsize,tmp->rS,tmp->rP,tmp->cM); + + return SPGPU_SUCCESS; +} + +void freeDnsDevice(void* remoteMatrix) +{ + struct DnsDevice *devMat = (struct DnsDevice *) remoteMatrix; + //fprintf(stderr,"freeDnsDevice\n"); + if (devMat != NULL) { + freeRemoteBuffer(devMat->cM); + free(remoteMatrix); + } +} + +//new +int FallocDnsDevice(void** deviceMat, unsigned int rows, + unsigned int columns, unsigned int elementType, + unsigned int firstIndex) +{ int i; +#ifdef HAVE_SPGPU + DnsDeviceParams p; + + p = getDnsDeviceParams(rows, columns, elementType, firstIndex); + i = allocDnsDevice(deviceMat, &p); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocDnsDevice",i); + } + return(i); +#else + return SPGPU_UNSUPPORTED; +#endif +} + + +int spmvDnsDeviceFloat(char transa, int m, int n, int k, float *alpha, + void *deviceMat, void* deviceX, float *beta, void* deviceY) +{ + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + int status; +#ifdef HAVE_SPGPU + + cublasHandle_t handle=psb_gpuGetCublasHandle(); + cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); + /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ + if (n == 1) { + status = cublasSgemv(handle, trans, m,k, + alpha, devMat->cM,devMat->pitch, x->v_,1, + beta, y->v_,1); + } else { + status = cublasSgemm(handle, trans, CUBLAS_OP_N, m,n,k, + alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_, + beta, y->v_,y->pitch_); + } + + if (status == CUBLAS_STATUS_SUCCESS) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int spmvDnsDeviceDouble(char transa, int m, int n, int k, double *alpha, + void *deviceMat, void* deviceX, double *beta, void* deviceY) +{ + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + int status; +#ifdef HAVE_SPGPU + + cublasHandle_t handle=psb_gpuGetCublasHandle(); + cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); + /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ + if (n == 1) { + status = cublasDgemv(handle, trans, m,k, + alpha, devMat->cM,devMat->pitch, x->v_,1, + beta, y->v_,1); + } else { + status = cublasDgemm(handle, trans, CUBLAS_OP_N, m,n,k, + alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_, + beta, y->v_,y->pitch_); + } + + if (status == CUBLAS_STATUS_SUCCESS) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *alpha, + void *deviceMat, void* deviceX, float complex *beta, void* deviceY) +{ + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + int status; +#ifdef HAVE_SPGPU + + cublasHandle_t handle=psb_gpuGetCublasHandle(); + cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); + /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ + if (n == 1) { + status = cublasCgemv(handle, trans, m,k, + alpha, devMat->cM,devMat->pitch, x->v_,1, + beta, y->v_,1); + } else { + status = cublasCgemm(handle, trans, CUBLAS_OP_N, m,n,k, + alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_, + beta, y->v_,y->pitch_); + } + + if (status == CUBLAS_STATUS_SUCCESS) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex *alpha, + void *deviceMat, void* deviceX, double complex *beta, void* deviceY) +{ + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + int status; +#ifdef HAVE_SPGPU + + cublasHandle_t handle=psb_gpuGetCublasHandle(); + cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); + /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ + if (n == 1) { + status = cublasZgemv(handle, trans, m,k, + alpha, devMat->cM,devMat->pitch, x->v_,1, + beta, y->v_,1); + } else { + status = cublasZgemm(handle, trans, CUBLAS_OP_N, m,n,k, + alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_, + beta, y->v_,y->pitch_); + } + + if (status == CUBLAS_STATUS_SUCCESS) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + +int writeDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc) +{ int i; +#ifdef HAVE_SPGPU + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasSetMatrix(lda,nc,sizeof(float), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceFloat",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int writeDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc) +{ int i; +#ifdef HAVE_SPGPU + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasSetMatrix(lda,nc,sizeof(double), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceDouble",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + +int writeDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc) +{ int i; +#ifdef HAVE_SPGPU + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasSetMatrix(lda,nc,sizeof(cuFloatComplex), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceFloatComplex",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int writeDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc) +{ int i; +#ifdef HAVE_SPGPU + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasSetMatrix(lda,nc,sizeof(cuDoubleComplex), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceDoubleComplex",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + +int readDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc) +{ int i; +#ifdef HAVE_SPGPU + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasGetMatrix(lda,nc,sizeof(float), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceFloat",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int readDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc) +{ int i; +#ifdef HAVE_SPGPU + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasGetMatrix(lda,nc,sizeof(double), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceDouble",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + +int readDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc) +{ int i; +#ifdef HAVE_SPGPU + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasGetMatrix(lda,nc,sizeof(cuFloatComplex), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceFloatComplex",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int readDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc) +{ int i; +#ifdef HAVE_SPGPU + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasGetMatrix(lda,nc,sizeof(cuDoubleComplex), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceDoubleComplex",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + +int getDnsDevicePitch(void* deviceMat) +{ int i; + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; +#ifdef HAVE_SPGPU + i = devMat->pitch; + return(i); +#else + return SPGPU_UNSUPPORTED; +#endif +} + + + +#endif + diff --git a/gpu/dnsdev.h b/gpu/dnsdev.h new file mode 100644 index 00000000..7c8b06c9 --- /dev/null +++ b/gpu/dnsdev.h @@ -0,0 +1,122 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#ifndef _DNSDEV_H_ +#define _DNSDEV_H_ + +#if defined(HAVE_SPGPU) +#include "cintrf.h" +#include "cuComplex.h" +#include "cublas_v2.h" + + +struct DnsDevice +{ + // Compressed matrix + void *cM; //it can be float or double + + + //matrix size (uncompressed) + int rows; + int columns; + + int pitch; //old + + int cMPitch; + + //allocation size (in elements) + int allocsize; + + /*(i.e. 0 for C, 1 for Fortran)*/ + int baseIndex; +}; + +typedef struct DnsDeviceParams +{ + // The resulting allocation for cM and rP will be pitch*maxRowSize*(size of the elementType) + unsigned int elementType; + + // Pitch (in number of elements) + unsigned int pitch; + + // Number of rows. + // Used to allocate rS array + unsigned int rows; + + // Number of columns. + // Used for error-checking + unsigned int columns; + + // First index (e.g 0 or 1) + unsigned int firstIndex; +} DnsDeviceParams; + +int FallocDnsDevice(void** deviceMat, unsigned int rows, + unsigned int columns, unsigned int elementType, + unsigned int firstIndex); +int allocDnsDevice(void ** remoteMatrix, DnsDeviceParams* params); +void freeDnsDevice(void* remoteMatrix); + +int writeDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc); +int writeDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc); +int writeDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc); +int writeDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc); + +int readDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc); +int readDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc); +int readDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc); +int readDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc); + +int spmvDnsDeviceFloat(char transa, int m, int n, int k, + float *alpha, void *deviceMat, void* deviceX, + float *beta, void* deviceY); +int spmvDnsDeviceDouble(char transa, int m, int n, int k, + double *alpha, void *deviceMat, void* deviceX, + double *beta, void* deviceY); +int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, + float complex *alpha, void *deviceMat, void* deviceX, + float complex *beta, void* deviceY); +int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, + double complex *alpha, void *deviceMat, void* deviceX, + double complex *beta, void* deviceY); + +int getDnsDevicePitch(void* deviceMat); + +// sparse Dns matrix-vector product +//int spmvDnsDeviceFloat(void *deviceMat, float* alpha, void* deviceX, float* beta, void* deviceY); +//int spmvDnsDeviceDouble(void *deviceMat, double* alpha, void* deviceX, double* beta, void* deviceY); + +#else +#define CINTRF_UNSUPPORTED -1 +#endif + +#endif diff --git a/gpu/dnsdev_mod.F90 b/gpu/dnsdev_mod.F90 new file mode 100644 index 00000000..8b96b918 --- /dev/null +++ b/gpu/dnsdev_mod.F90 @@ -0,0 +1,275 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module dnsdev_mod + use iso_c_binding + use core_mod + + type, bind(c) :: dnsdev_parms + integer(c_int) :: element_type + integer(c_int) :: pitch + integer(c_int) :: rows + integer(c_int) :: columns + integer(c_int) :: maxRowSize + integer(c_int) :: avgRowSize + integer(c_int) :: firstIndex + end type dnsdev_parms + +#ifdef HAVE_SPGPU + + interface + function FgetDnsDeviceParams(rows, columns, elementType, firstIndex) & + & result(res) bind(c,name='getDnsDeviceParams') + use iso_c_binding + import :: dnsdev_parms + type(dnsdev_parms) :: res + integer(c_int), value :: rows,columns,elementType,firstIndex + end function FgetDnsDeviceParams + end interface + + + interface + function FallocDnsDevice(deviceMat,rows,columns,& + & elementType,firstIndex) & + & result(res) bind(c,name='FallocDnsDevice') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: rows,columns,elementType,firstIndex + type(c_ptr) :: deviceMat + end function FallocDnsDevice + end interface + + + interface writeDnsDevice + + function writeDnsDeviceFloat(deviceMat,val,lda,nc) & + & result(res) bind(c,name='writeDnsDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + real(c_float) :: val(lda,*) + end function writeDnsDeviceFloat + + + function writeDnsDeviceDouble(deviceMat,val,lda,nc) & + & result(res) bind(c,name='writeDnsDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + real(c_double) :: val(lda,*) + end function writeDnsDeviceDouble + + + function writeDnsDeviceFloatComplex(deviceMat,val,lda,nc) & + & result(res) bind(c,name='writeDnsDeviceFloatComple') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + complex(c_float_complex) :: val(lda,*) + end function writeDnsDeviceFloatComplex + + + function writeDnsDeviceDoubleComplex(deviceMat,val,lda,nc) & + & result(res) bind(c,name='writeDnsDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + complex(c_double_complex) :: val(lda,*) + end function writeDnsDeviceDoubleComplex + + end interface + + interface readDnsDevice + + function readDnsDeviceFloat(deviceMat,val,lda,nc) & + & result(res) bind(c,name='readDnsDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + real(c_float) :: val(lda,*) + end function readDnsDeviceFloat + + + function readDnsDeviceDouble(deviceMat,val,lda,nc) & + & result(res) bind(c,name='readDnsDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + real(c_double) :: val(lda,*) + end function readDnsDeviceDouble + + + function readDnsDeviceFloatComplex(deviceMat,val,lda,nc) & + & result(res) bind(c,name='readDnsDeviceFloatComple') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + complex(c_float_complex) :: val(lda,*) + end function readDnsDeviceFloatComplex + + + function readDnsDeviceDoubleComplex(deviceMat,val,lda,nc) & + & result(res) bind(c,name='readDnsDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + complex(c_double_complex) :: val(lda,*) + end function readDnsDeviceDoubleComplex + + end interface + + interface + subroutine freeDnsDevice(deviceMat) & + & bind(c,name='freeDnsDevice') + use iso_c_binding + type(c_ptr), value :: deviceMat + end subroutine freeDnsDevice + end interface + + interface + subroutine resetDnsTimer() bind(c,name='resetDnsTimer') + use iso_c_binding + end subroutine resetDnsTimer + end interface + interface + function getDnsTimer() & + & bind(c,name='getDnsTimer') result(res) + use iso_c_binding + real(c_double) :: res + end function getDnsTimer + end interface + + + interface + function getDnsDevicePitch(deviceMat) & + & bind(c,name='getDnsDevicePitch') result(res) + use iso_c_binding + type(c_ptr), value :: deviceMat + integer(c_int) :: res + end function getDnsDevicePitch + end interface + +!!$ interface csputDnsDeviceFloat +!!$ function dev_csputDnsDeviceFloat(deviceMat, nnz, ia, ja, val) & +!!$ & result(res) bind(c,name='dev_csputDnsDeviceFloat') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat , ia, ja, val +!!$ integer(c_int), value :: nnz +!!$ end function dev_csputDnsDeviceFloat +!!$ end interface +!!$ +!!$ interface csputDnsDeviceDouble +!!$ function dev_csputDnsDeviceDouble(deviceMat, nnz, ia, ja, val) & +!!$ & result(res) bind(c,name='dev_csputDnsDeviceDouble') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat , ia, ja, val +!!$ integer(c_int), value :: nnz +!!$ end function dev_csputDnsDeviceDouble +!!$ end interface +!!$ +!!$ interface csputDnsDeviceFloatComplex +!!$ function dev_csputDnsDeviceFloatComplex(deviceMat, nnz, ia, ja, val) & +!!$ & result(res) bind(c,name='dev_csputDnsDeviceFloatComplex') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat , ia, ja, val +!!$ integer(c_int), value :: nnz +!!$ end function dev_csputDnsDeviceFloatComplex +!!$ end interface +!!$ +!!$ interface csputDnsDeviceDoubleComplex +!!$ function dev_csputDnsDeviceDoubleComplex(deviceMat, nnz, ia, ja, val) & +!!$ & result(res) bind(c,name='dev_csputDnsDeviceDoubleComplex') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat , ia, ja, val +!!$ integer(c_int), value :: nnz +!!$ end function dev_csputDnsDeviceDoubleComplex +!!$ end interface + + interface spmvDnsDevice + function spmvDnsDeviceFloat(transa,m,n,k,alpha,deviceMat,x,beta,y) & + & result(res) bind(c,name='spmvDnsDeviceFloat') + use iso_c_binding + character(c_char), value :: transa + integer(c_int), value :: m, n, k + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_float) :: alpha, beta + end function spmvDnsDeviceFloat + + function spmvDnsDeviceDouble(transa,m,n,k,alpha,deviceMat,x,beta,y) & + & result(res) bind(c,name='spmvDnsDeviceDouble') + use iso_c_binding + character(c_char), value :: transa + integer(c_int), value :: m, n, k + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_double) :: alpha, beta + end function spmvDnsDeviceDouble + + function spmvDnsDeviceFloatComplex(transa,m,n,k,alpha,deviceMat,x,beta,y) & + & result(res) bind(c,name='spmvDnsDeviceFloatComplex') + use iso_c_binding + character(c_char), value :: transa + integer(c_int), value :: m, n, k + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_float_complex) :: alpha, beta + end function spmvDnsDeviceFloatComplex + + function spmvDnsDeviceDoubleComplex(transa,m,n,k,alpha,deviceMat,x,beta,y) & + & result(res) bind(c,name='spmvDnsDeviceDoubleComplex') + use iso_c_binding + character(c_char), value :: transa + integer(c_int), value :: m, n, k + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_double_complex) :: alpha, beta + end function spmvDnsDeviceDoubleComplex + + end interface + +#endif + + +end module dnsdev_mod diff --git a/gpu/dvectordev.c b/gpu/dvectordev.c new file mode 100644 index 00000000..8b020c16 --- /dev/null +++ b/gpu/dvectordev.c @@ -0,0 +1,305 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include +#if defined(HAVE_SPGPU) +//#include "utils.h" +//#include "common.h" +#include "dvectordev.h" + + +int registerMappedDouble(void *buff, void **d_p, int n, double dummy) +{ + return registerMappedMemory(buff,d_p,n*sizeof(double)); +} + +int writeMultiVecDeviceDouble(void* deviceVec, double* hostVec) +{ int i; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + // Ex updateFromHost vector function + i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, devVec->pitch_*devVec->count_*sizeof(double)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i); + } + return(i); +} + +int writeMultiVecDeviceDoubleR2(void* deviceVec, double* hostVec, int ld) +{ int i; + i = writeMultiVecDeviceDouble(deviceVec, (void *) hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceDoubleR2",i); + } + return(i); +} + +int readMultiVecDeviceDouble(void* deviceVec, double* hostVec) +{ int i,j; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_, + devVec->pitch_*devVec->count_*sizeof(double)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDouble",i); + } + return(i); +} + +int readMultiVecDeviceDoubleR2(void* deviceVec, double* hostVec, int ld) +{ int i; + i = readMultiVecDeviceDouble(deviceVec, hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDoubleR2",i); + } + return(i); +} + +int setscalMultiVecDeviceDouble(double val, int first, int last, + int indexBase, void* devMultiVecX) +{ int i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + spgpuHandle_t handle=psb_gpuGetHandle(); + + spgpuDsetscal(handle, first, last, indexBase, val, (double *) devVecX->v_); + + return(i); +} + + +int geinsMultiVecDeviceDouble(int n, void* devMultiVecIrl, void* devMultiVecVal, + int dupl, int indexBase, void* devMultiVecX) +{ int j=0, i=0,nmin=0,nmax=0; + int pitch = 0; + double beta; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; + struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; + spgpuHandle_t handle=psb_gpuGetHandle(); + pitch = devVecIrl->pitch_; + if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) + return SPGPU_UNSUPPORTED; + + //fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_); + + if (dupl == INS_OVERWRITE) + beta = 0.0; + else if (dupl == INS_ADD) + beta = 1.0; + else + beta = 0.0; + + spgpuDscat(handle, (double *) devVecX->v_, n, (double*)devVecVal->v_, + (int*)devVecIrl->v_, indexBase, beta); + + return(i); +} + + +int igathMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + + i= igathMultiVecDeviceDouble(deviceVec, vectorId, n, + first, (void*) devIdx->v_, hfirst, host_values, indexBase); + return(i); +} + +int igathMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, int indexBase) +{ + int i, *idx =(int *) indexes;; + double *hv = (double *) host_values;; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_gpuGetHandle(); + + i=0; + hv = &(hv[hfirst-indexBase]); + idx = &(idx[first-indexBase]); + spgpuDgath(handle,hv, n, idx,indexBase, (double *) devVec->v_+vectorId*devVec->pitch_); + return(i); +} + +int iscatMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n, int first, void *deviceIdx, + int hfirst, void* host_values, int indexBase, double beta) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + i= iscatMultiVecDeviceDouble(deviceVec, vectorId, n, first, + (void*) devIdx->v_, hfirst,host_values, indexBase, beta); + return(i); +} + +int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, double beta) +{ int i=0; + double *hv = (double *) host_values; + int *idx=(int *) indexes; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_gpuGetHandle(); + + idx = &(idx[first-indexBase]); + hv = &(hv[hfirst-indexBase]); + spgpuDscat(handle, (double *) devVec->v_, n, hv, idx, indexBase, beta); + return SPGPU_SUCCESS; + +} + +int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + // Note: inner kernel can handle aliased input/output + spgpuDscal(handle, (double *)devVecA->v_, devVecA->pitch_, + alpha, (double *)devVecA->v_); + return(i); +} + +int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuDmnrm2(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_); + return(i); +} + +int amaxMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuDmamax(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_); + return(i); +} + +int asumMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuDmasum(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_); + + return(i); +} + +int dotMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA, void* devMultiVecB) +{int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; + spgpuHandle_t handle=psb_gpuGetHandle(); + + spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_,devVecA->count_,devVecB->pitch_); + return(i); +} + +int axpbyMultiVecDeviceDouble(int n,double alpha, void* devMultiVecX, + double beta, void* devMultiVecY) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + spgpuHandle_t handle=psb_gpuGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + for(j=0;jcount_;j++) + spgpuDaxpby(handle,(double*)devVecY->v_+pitch*j, n, beta, + (double*)devVecY->v_+pitch*j, alpha,(double*) devVecX->v_+pitch*j); + return(i); +} + +int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + spgpuHandle_t handle=psb_gpuGetHandle(); + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuDmaxy(handle, (double*)devVecB->v_, n, alpha, (double*)devVecA->v_, + (double*)devVecB->v_, devVecA->count_, devVecA->pitch_); + + return(i); +} + +int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, + void *deviceVecB, double beta, void *deviceVecZ) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; + spgpuHandle_t handle=psb_gpuGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) + return SPGPU_UNSUPPORTED; + spgpuDmaxypbz(handle, (double*)devVecZ->v_, n, beta, (double*)devVecZ->v_, + alpha, (double*) devVecA->v_, (double*) devVecB->v_, + devVecB->count_, devVecB->pitch_); + return(i); +} + +int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA, + void *deviceVecB) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + + spgpuHandle_t handle=psb_gpuGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuDabs(handle, (double*)devVecB->v_, n, alpha, (double*)devVecA->v_); + + return(i); +} + +int absMultiVecDeviceDouble(int n, double alpha, void *deviceVecA) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + spgpuHandle_t handle=psb_gpuGetHandle(); + if (n > devVecA->size_) + return SPGPU_UNSUPPORTED; + + spgpuDabs(handle, (double*)devVecA->v_, n, alpha, (double*)devVecA->v_); + + return(i); +} + + +#endif + diff --git a/gpu/dvectordev.h b/gpu/dvectordev.h new file mode 100644 index 00000000..960958c5 --- /dev/null +++ b/gpu/dvectordev.h @@ -0,0 +1,78 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#pragma once +#if defined(HAVE_SPGPU) +//#include "utils.h" +#include "vectordev.h" +#include "cuda_runtime.h" +#include "core.h" + +int registerMappedDouble(void *, void **, int, double); +int writeMultiVecDeviceDouble(void* deviceMultiVec, double* hostMultiVec); +int writeMultiVecDeviceDoubleR2(void* deviceMultiVec, double* hostMultiVec, int ld); +int readMultiVecDeviceDouble(void* deviceMultiVec, double* hostMultiVec); +int readMultiVecDeviceDoubleR2(void* deviceMultiVec, double* hostMultiVec, int ld); + +int setscalMultiVecDeviceDouble(double val, int first, int last, + int indexBase, void* devVecX); + +int geinsMultiVecDeviceDouble(int n, void* devVecIrl, void* devVecVal, + int dupl, int indexBase, void* devVecX); + +int igathMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase); +int igathMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, + int indexBase); +int iscatMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n, int first, + void *deviceIdx, int hfirst, void* host_values, + int indexBase, double beta); +int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, double beta); + +int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA); +int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devVecA); +int amaxMultiVecDeviceDouble(double* y_res, int n, void* devVecA); +int asumMultiVecDeviceDouble(double* y_res, int n, void* devVecA); +int dotMultiVecDeviceDouble(double* y_res, int n, void* devVecA, void* devVecB); + +int axpbyMultiVecDeviceDouble(int n, double alpha, void* devVecX, double beta, void* devVecY); +int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB); +int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, + void *deviceVecB, double beta, void *deviceVecZ); +int absMultiVecDeviceDouble(int n, double alpha, void *deviceVecA); +int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA, void *deviceVecB); + + +#endif diff --git a/gpu/elldev.c b/gpu/elldev.c new file mode 100644 index 00000000..8fd7aeb5 --- /dev/null +++ b/gpu/elldev.c @@ -0,0 +1,773 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#include +#include "elldev.h" + +#if defined(HAVE_SPGPU) + +#define PASS_RS 0 + +EllDeviceParams getEllDeviceParams(unsigned int rows, unsigned int maxRowSize, + unsigned int nnzeros, + unsigned int columns, unsigned int elementType, + unsigned int firstIndex) +{ + EllDeviceParams params; + + if (elementType == SPGPU_TYPE_DOUBLE) + { + params.pitch = ((rows + ELL_PITCH_ALIGN_D - 1)/ELL_PITCH_ALIGN_D)*ELL_PITCH_ALIGN_D; + } + else + { + params.pitch = ((rows + ELL_PITCH_ALIGN_S - 1)/ELL_PITCH_ALIGN_S)*ELL_PITCH_ALIGN_S; + } + //For complex? + params.elementType = elementType; + + params.rows = rows; + params.maxRowSize = maxRowSize; + params.avgRowSize = (nnzeros+rows-1)/rows; + params.columns = columns; + params.firstIndex = firstIndex; + + //params.pitch = computeEllAllocPitch(rows); + + return params; + +} +//new +int allocEllDevice(void ** remoteMatrix, EllDeviceParams* params) +{ + struct EllDevice *tmp = (struct EllDevice *)malloc(sizeof(struct EllDevice)); + *remoteMatrix = (void *)tmp; + tmp->rows = params->rows; + tmp->cMPitch = computeEllAllocPitch(tmp->rows); + tmp->rPPitch = tmp->cMPitch; + tmp->pitch= tmp->cMPitch; + tmp->maxRowSize = params->maxRowSize; + tmp->avgRowSize = params->avgRowSize; + tmp->allocsize = (int)tmp->maxRowSize * tmp->pitch; + //tmp->allocsize = (int)params->maxRowSize * tmp->cMPitch; + allocRemoteBuffer((void **)&(tmp->rS), tmp->rows*sizeof(int)); + allocRemoteBuffer((void **)&(tmp->diag), tmp->rows*sizeof(int)); + allocRemoteBuffer((void **)&(tmp->rP), tmp->allocsize*sizeof(int)); + tmp->columns = params->columns; + tmp->baseIndex = params->firstIndex; + tmp->dataType = params->elementType; + //fprintf(stderr,"allocEllDevice: %d %d %d \n",tmp->pitch, params->maxRowSize, params->avgRowSize); + if (params->elementType == SPGPU_TYPE_FLOAT) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(float)); + else if (params->elementType == SPGPU_TYPE_DOUBLE) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(double)); + else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuFloatComplex)); + else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuDoubleComplex)); + else + return SPGPU_UNSUPPORTED; // Unsupported params + //fprintf(stderr,"From allocEllDevice: %d %d %d %p %p %p\n",tmp->maxRowSize, + // tmp->avgRowSize,tmp->allocsize,tmp->rS,tmp->rP,tmp->cM); + + return SPGPU_SUCCESS; +} + +//new +void zeroEllDevice(void *remoteMatrix) +{ + struct EllDevice *tmp = (struct EllDevice *) remoteMatrix; + + if (tmp->dataType == SPGPU_TYPE_FLOAT) + cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(float)); + else if (tmp->dataType == SPGPU_TYPE_DOUBLE) + cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(double)); + else if (tmp->dataType == SPGPU_TYPE_COMPLEX_FLOAT) + cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(cuFloatComplex)); + else if (tmp->dataType == SPGPU_TYPE_COMPLEX_DOUBLE) + cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(cuDoubleComplex)); + else + return SPGPU_UNSUPPORTED; // Unsupported params + //fprintf(stderr,"From allocEllDevice: %d %d %d %p %p %p\n",tmp->maxRowSize, + // tmp->avgRowSize,tmp->allocsize,tmp->rS,tmp->rP,tmp->cM); + + return; +} + + +void freeEllDevice(void* remoteMatrix) +{ + struct EllDevice *devMat = (struct EllDevice *) remoteMatrix; + //fprintf(stderr,"freeEllDevice\n"); + if (devMat != NULL) { + freeRemoteBuffer(devMat->rS); + freeRemoteBuffer(devMat->rP); + freeRemoteBuffer(devMat->cM); + free(remoteMatrix); + } +} + +//new +int FallocEllDevice(void** deviceMat,unsigned int rows, unsigned int maxRowSize, + unsigned int nnzeros, + unsigned int columns, unsigned int elementType, + unsigned int firstIndex) +{ int i; +#ifdef HAVE_SPGPU + EllDeviceParams p; + + p = getEllDeviceParams(rows, maxRowSize, nnzeros, columns, elementType, firstIndex); + i = allocEllDevice(deviceMat, &p); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i); + } + return(i); +#else + return SPGPU_UNSUPPORTED; +#endif +} + +void sspmdmm_gpu(float *z,int s, int vPitch, float *y, float alpha, float* cM, int* rP, int* rS, + int avgRowSize, int maxRowSize, int rows, int pitch, float *x, float beta, int firstIndex) +{ + int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + + for (i=0; icount_ == x->count_, "ERROR: x and y don't share the same number of vectors"); + __assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)"); + __assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)"); +#endif + /*spgpuSellspmv (handle, (float*) y->v_, (float*)y->v_, alpha, + (float*) devMat->cM, devMat->rP, devMat->cMPitch, + devMat->rPPitch, devMat->rS, devMat->rows, + (float*)x->v_, beta, devMat->baseIndex);*/ + sspmdmm_gpu ( (float *)y->v_,y->count_, y->pitch_, (float *)y->v_, alpha, (float *)devMat->cM, devMat->rP, devMat->rS, + devMat->avgRowSize, devMat->maxRowSize, devMat->rows, devMat->pitch, + (float *)x->v_, beta, devMat->baseIndex); + return(i); +#else + return SPGPU_UNSUPPORTED; +#endif +} + + +void +dspmdmm_gpu (double *z,int s, int vPitch, double *y, double alpha, double* cM, int* rP, + int* rS, int avgRowSize, int maxRowSize, int rows, int pitch, + double *x, double beta, int firstIndex) +{ + int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + for (i=0; iv_, (double*)y->v_, alpha, (double*) devMat->cM, devMat->rP, devMat->cMPitch, devMat->rPPitch, devMat->rS, devMat->rows, (double*)x->v_, beta, devMat->baseIndex);*/ + /* fprintf(stderr,"From spmvEllDouble: mat %d %d %d %d y %d %d \n", */ + /* devMat->avgRowSize, devMat->maxRowSize, devMat->rows, */ + /* devMat->pitch, y->count_, y->pitch_); */ + dspmdmm_gpu ((double *)y->v_, y->count_, y->pitch_, (double *)y->v_, + alpha, (double *)devMat->cM, + devMat->rP, devMat->rS, devMat->avgRowSize, + devMat->maxRowSize, devMat->rows, devMat->pitch, + (double *)x->v_, beta, devMat->baseIndex); + + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +void +cspmdmm_gpu (cuFloatComplex *z, int s, int vPitch, cuFloatComplex *y, + cuFloatComplex alpha, cuFloatComplex* cM, + int* rP, int* rS, int avgRowSize, int maxRowSize, int rows, int pitch, + cuFloatComplex *x, cuFloatComplex beta, int firstIndex) +{ + int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + for (i=0; iv_, y->count_, y->pitch_, (cuFloatComplex *)y->v_, a, (cuFloatComplex *)devMat->cM, + devMat->rP, devMat->rS, devMat->avgRowSize, devMat->maxRowSize, devMat->rows, devMat->pitch, + (cuFloatComplex *)x->v_, b, devMat->baseIndex); + + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +void +zspmdmm_gpu (cuDoubleComplex *z, int s, int vPitch, cuDoubleComplex *y, cuDoubleComplex alpha, cuDoubleComplex* cM, + int* rP, int* rS, int avgRowSize, int maxRowSize, int rows, int pitch, + cuDoubleComplex *x, cuDoubleComplex beta, int firstIndex) +{ + int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + for (i=0; iv_, y->count_, y->pitch_, (cuDoubleComplex *)y->v_, a, (cuDoubleComplex *)devMat->cM, + devMat->rP, devMat->rS, devMat->avgRowSize, devMat->maxRowSize, devMat->rows, + devMat->pitch, (cuDoubleComplex *)x->v_, b, devMat->baseIndex); + + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int writeEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + //i = writeEllDevice(deviceMat, (void *) val, ja, irn); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int writeEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int writeEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int writeEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int readEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + /*i = readEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int readEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int readEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int readEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int getEllDevicePitch(void* deviceMat) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; +#ifdef HAVE_SPGPU + i = devMat->pitch; //old + //i = getPitchEllDevice(deviceMat); + return(i); +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int getEllDeviceMaxRowSize(void* deviceMat) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; +#ifdef HAVE_SPGPU + i = devMat->maxRowSize; + return(i); +#else + return SPGPU_UNSUPPORTED; +#endif +} + + + + +// New copying interface + +int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, float *val, void *deviceMat) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + float *devVal; + int *devIdisp, *devJa; + spgpuHandle_t handle; + handle = psb_gpuGetHandle(); + + allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(float)); + i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(float)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + + if (i==0) psi_cuda_s_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm, + (int *) devMat->rS,devIdisp,devJa,devVal, + (int *) devMat->diag, (int *) devMat->rP, (float *)devMat->cM); + // Ex updateFromHost function + //i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); + //if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + //if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + + +int psiCopyCooToElgDouble(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, double *val, void *deviceMat) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + double *devVal; + int *devIdisp, *devJa; + spgpuHandle_t handle; + handle = psb_gpuGetHandle(); + + allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(double)); + i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(double)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + + if (i==0) psi_cuda_d_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm, + (int *) devMat->rS,devIdisp,devJa,devVal, + (int *) devMat->diag, (int *) devMat->rP, (double *)devMat->cM); + // Ex updateFromHost function + //i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); + //if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + //if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + +int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, float complex *val, void *deviceMat) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + float complex *devVal; + int *devIdisp, *devJa; + spgpuHandle_t handle; + handle = psb_gpuGetHandle(); + + allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuFloatComplex)); + i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuFloatComplex)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + + if (i==0) psi_cuda_c_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm, + (int *) devMat->rS,devIdisp,devJa,devVal, + (int *) devMat->diag,(int *) devMat->rP, (float complex *)devMat->cM); + // Ex updateFromHost function + //i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float complex)); + //if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + //if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloatComplex",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + + +int psiCopyCooToElgDoubleComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, double complex *val, void *deviceMat) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + double complex *devVal; + int *devIdisp, *devJa; + spgpuHandle_t handle; + handle = psb_gpuGetHandle(); + + allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuDoubleComplex)); + i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuDoubleComplex)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + + if (i==0) psi_cuda_z_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm, + (int *) devMat->rS,devIdisp,devJa,devVal, + (int *) devMat->diag,(int *) devMat->rP, (double complex *)devMat->cM); + // Ex updateFromHost function + //i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double complex)); + //if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + //if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDoubleComplex",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + +int dev_csputEllDeviceFloat(void* deviceMat, int nnz, void *ia, void *ja, void *val) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + struct MultiVectDevice *devVal = (struct MultiVectDevice *) val; + struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; + struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja; + float alpha=1.0; + spgpuHandle_t handle=psb_gpuGetHandle(); + + if (nnz <=0) return SPGPU_SUCCESS; + //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt); + + spgpuSellcsput(handle,alpha,(float *) devMat->cM, + devMat->rP,devMat->pitch, devMat->pitch, devMat->rS, + nnz, devIa->v_, devJa->v_, (float *) devVal->v_, 1); + +#endif + return SPGPU_SUCCESS; +} + +int dev_csputEllDeviceDouble(void* deviceMat, int nnz, void *ia, void *ja, void *val) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + struct MultiVectDevice *devVal = (struct MultiVectDevice *) val; + struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; + struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja; + double alpha=1.0; + spgpuHandle_t handle=psb_gpuGetHandle(); + + if (nnz <=0) return SPGPU_SUCCESS; + //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt); + + spgpuDellcsput(handle,alpha,(double *) devMat->cM, + devMat->rP,devMat->pitch, devMat->pitch, devMat->rS, + nnz, devIa->v_, devJa->v_, (double *) devVal->v_, 1); + +#endif + return SPGPU_SUCCESS; +} + + +int dev_csputEllDeviceFloatComplex(void* deviceMat, int nnz, + void *ia, void *ja, void *val) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + struct MultiVectDevice *devVal = (struct MultiVectDevice *) val; + struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; + struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja; + cuFloatComplex alpha = make_cuFloatComplex(1.0, 0.0); + spgpuHandle_t handle=psb_gpuGetHandle(); + + if (nnz <=0) return SPGPU_SUCCESS; + //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt); + + spgpuCellcsput(handle,alpha,(cuFloatComplex *) devMat->cM, + devMat->rP,devMat->pitch, devMat->pitch, devMat->rS, + nnz, devIa->v_, devJa->v_, (cuFloatComplex *) devVal->v_, 1); + +#endif + return SPGPU_SUCCESS; +} + +int dev_csputEllDeviceDoubleComplex(void* deviceMat, int nnz, + void *ia, void *ja, void *val) +{ int i; +#ifdef HAVE_SPGPU + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + struct MultiVectDevice *devVal = (struct MultiVectDevice *) val; + struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; + struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja; + cuDoubleComplex alpha = make_cuDoubleComplex(1.0, 0.0); + spgpuHandle_t handle=psb_gpuGetHandle(); + + if (nnz <=0) return SPGPU_SUCCESS; + //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt); + + spgpuZellcsput(handle,alpha,(cuDoubleComplex *) devMat->cM, + devMat->rP,devMat->pitch, devMat->pitch, devMat->rS, + nnz, devIa->v_, devJa->v_, (cuDoubleComplex *) devVal->v_, 1); + +#endif + return SPGPU_SUCCESS; +} + +#endif + diff --git a/gpu/elldev.h b/gpu/elldev.h new file mode 100644 index 00000000..6a0814e2 --- /dev/null +++ b/gpu/elldev.h @@ -0,0 +1,183 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#ifndef _ELLDEV_H_ +#define _ELLDEV_H_ + +#if defined(HAVE_SPGPU) +#include "cintrf.h" +#include "cuComplex.h" +#include "ell.h" + + +struct EllDevice +{ + // Compressed matrix + void *cM; //it can be float or double + + // row pointers (same size of cM) + int *rP; + int *diag; + // row size + int *rS; + + //matrix size (uncompressed) + int rows; + int columns; + + int pitch; //old + + int cMPitch; + + int rPPitch; + + int maxRowSize; + int avgRowSize; + + //allocation size (in elements) + int allocsize; + + /*(i.e. 0 for C, 1 for Fortran)*/ + int baseIndex; + /* real/complex, single/double */ + int dataType; + +}; + +typedef struct EllDeviceParams +{ + // The resulting allocation for cM and rP will be pitch*maxRowSize*(size of the elementType) + unsigned int elementType; + + // Pitch (in number of elements) + unsigned int pitch; + + // Number of rows. + // Used to allocate rS array + unsigned int rows; + + // Number of columns. + // Used for error-checking + unsigned int columns; + + // Largest row size + unsigned int maxRowSize; + unsigned int avgRowSize; + + // First index (e.g 0 or 1) + unsigned int firstIndex; +} EllDeviceParams; + +int FallocEllDevice(void** deviceMat, unsigned int rows, unsigned int maxRowSize, + unsigned int nnzeros, + unsigned int columns, unsigned int elementType, + unsigned int firstIndex); +int allocEllDevice(void ** remoteMatrix, EllDeviceParams* params); +void freeEllDevice(void* remoteMatrix); + +int writeEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag); +int writeEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag); +int writeEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag); +int writeEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag); + +int readEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag); +int readEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag); +int readEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag); +int readEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag); + +int spmvEllDeviceFloat(void *deviceMat, float alpha, void* deviceX, + float beta, void* deviceY); +int spmvEllDeviceDouble(void *deviceMat, double alpha, void* deviceX, + double beta, void* deviceY); +int spmvEllDeviceFloatComplex(void *deviceMat, float complex alpha, void* deviceX, + float complex beta, void* deviceY); +int spmvEllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* deviceX, + double complex beta, void* deviceY); + + + +int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, float *val, void *deviceMat); + +int psiCopyCooToElgDouble(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, double *val, void *deviceMat); + +int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, float complex *val, void *deviceMat); + +int psiCopyCooToElgDoubleComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, double complex *val, void *deviceMat); + + +void psi_cuda_s_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, + int hacksz, int ldv, int nzm, + int *rS,int *devIdisp, int *devJa, float *devVal, + int *idiag, int *rP, float *cM); + +void psi_cuda_d_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, + int hacksz, int ldv, int nzm, + int *rS,int *devIdisp, int *devJa, double *devVal, + int *idiag, int *rP, double *cM); + +void psi_cuda_c_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, + int hacksz, int ldv, int nzm, + int *rS,int *devIdisp, int *devJa, float complex *devVal, + int *idiag, int *rP, float complex *cM); + +void psi_cuda_z_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, + int hacksz, int ldv, int nzm, + int *rS,int *devIdisp, int *devJa, double complex *devVal, + int *idiag, int *rP, double complex *cM); + + +int dev_csputEllDeviceFloat(void* deviceMat, int nnz, + void *ia, void *ja, void *val); +int dev_csputEllDeviceDouble(void* deviceMat, int nnz, + void *ia, void *ja, void *val); +int dev_csputEllDeviceFloatComplex(void* deviceMat, int nnz, + void *ia, void *ja, void *val); +int dev_csputEllDeviceDoubleComplex(void* deviceMat, int nnz, + void *ia, void *ja, void *val); + +void zeroEllDevice(void* deviceMat); + +int getEllDevicePitch(void* deviceMat); + +// sparse Ell matrix-vector product +//int spmvEllDeviceFloat(void *deviceMat, float* alpha, void* deviceX, float* beta, void* deviceY); +//int spmvEllDeviceDouble(void *deviceMat, double* alpha, void* deviceX, double* beta, void* deviceY); + +#else +#define CINTRF_UNSUPPORTED -1 +#endif + +#endif diff --git a/gpu/elldev_mod.F90 b/gpu/elldev_mod.F90 new file mode 100644 index 00000000..49656d19 --- /dev/null +++ b/gpu/elldev_mod.F90 @@ -0,0 +1,326 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module elldev_mod + use iso_c_binding + use core_mod + + type, bind(c) :: elldev_parms + integer(c_int) :: element_type + integer(c_int) :: pitch + integer(c_int) :: rows + integer(c_int) :: columns + integer(c_int) :: maxRowSize + integer(c_int) :: avgRowSize + integer(c_int) :: firstIndex + end type elldev_parms + +#ifdef HAVE_SPGPU + + interface + function FgetEllDeviceParams(rows, maxRowSize, nnzeros, columns, elementType, firstIndex) & + & result(res) bind(c,name='getEllDeviceParams') + use iso_c_binding + import :: elldev_parms + type(elldev_parms) :: res + integer(c_int), value :: rows,maxRowSize,nnzeros,columns,elementType,firstIndex + end function FgetEllDeviceParams + end interface + + + interface + function FallocEllDevice(deviceMat,rows,maxRowSize,nnzeros,columns,& + & elementType,firstIndex) & + & result(res) bind(c,name='FallocEllDevice') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: rows,maxRowSize,nnzeros,columns,elementType,firstIndex + type(c_ptr) :: deviceMat + end function FallocEllDevice + end interface + + + interface writeEllDevice + + function writeEllDeviceFloat(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='writeEllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + real(c_float) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function writeEllDeviceFloat + + function writeEllDeviceDouble(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='writeEllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + real(c_double) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function writeEllDeviceDouble + + function writeEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='writeEllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + complex(c_float_complex) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function writeEllDeviceFloatComplex + + function writeEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='writeEllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + complex(c_double_complex) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function writeEllDeviceDoubleComplex + + end interface + + interface readEllDevice + + function readEllDeviceFloat(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='readEllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + real(c_float) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function readEllDeviceFloat + + function readEllDeviceDouble(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='readEllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + real(c_double) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function readEllDeviceDouble + + function readEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='readEllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + complex(c_float_complex) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function readEllDeviceFloatComplex + + function readEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='readEllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + complex(c_double_complex) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function readEllDeviceDoubleComplex + + end interface + + interface + subroutine freeEllDevice(deviceMat) & + & bind(c,name='freeEllDevice') + use iso_c_binding + type(c_ptr), value :: deviceMat + end subroutine freeEllDevice + end interface + + interface + subroutine zeroEllDevice(deviceMat) & + & bind(c,name='zeroEllDevice') + use iso_c_binding + type(c_ptr), value :: deviceMat + end subroutine zeroEllDevice + end interface + + interface + subroutine resetEllTimer() bind(c,name='resetEllTimer') + use iso_c_binding + end subroutine resetEllTimer + end interface + interface + function getEllTimer() & + & bind(c,name='getEllTimer') result(res) + use iso_c_binding + real(c_double) :: res + end function getEllTimer + end interface + + + interface + function getEllDevicePitch(deviceMat) & + & bind(c,name='getEllDevicePitch') result(res) + use iso_c_binding + type(c_ptr), value :: deviceMat + integer(c_int) :: res + end function getEllDevicePitch + end interface + + interface + function getEllDeviceMaxRowSize(deviceMat) & + & bind(c,name='getEllDeviceMaxRowSize') result(res) + use iso_c_binding + type(c_ptr), value :: deviceMat + integer(c_int) :: res + end function getEllDeviceMaxRowSize + end interface + + + interface psi_CopyCooToElg + function psiCopyCooToElgFloat(nr, nc, nza, hacksz, ldv, nzm, irn, & + & idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToElgFloat') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm + type(c_ptr), value :: deviceMat + real(c_float) :: val(*) + integer(c_int) :: irn(*),idisp(*),ja(*) + end function psiCopyCooToElgFloat + function psiCopyCooToElgDouble(nr, nc, nza, hacksz, ldv, nzm, irn, & + & idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToElgDouble') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm + type(c_ptr), value :: deviceMat + real(c_double) :: val(*) + integer(c_int) :: irn(*),idisp(*),ja(*) + end function psiCopyCooToElgDouble + function psiCopyCooToElgFloatComplex(nr, nc, nza, hacksz, ldv, nzm, irn, & + & idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToElgFloatComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm + type(c_ptr), value :: deviceMat + complex(c_float_complex) :: val(*) + integer(c_int) :: irn(*),idisp(*),ja(*) + end function psiCopyCooToElgFloatComplex + function psiCopyCooToElgDoubleComplex(nr, nc, nza, hacksz, ldv, nzm, irn, & + & idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToElgDoubleComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm + type(c_ptr), value :: deviceMat + complex(c_double_complex) :: val(*) + integer(c_int) :: irn(*),idisp(*),ja(*) + end function psiCopyCooToElgDoubleComplex + end interface + + interface csputEllDeviceFloat + function dev_csputEllDeviceFloat(deviceMat, nnz, ia, ja, val) & + & result(res) bind(c,name='dev_csputEllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat , ia, ja, val + integer(c_int), value :: nnz + end function dev_csputEllDeviceFloat + end interface + + interface csputEllDeviceDouble + function dev_csputEllDeviceDouble(deviceMat, nnz, ia, ja, val) & + & result(res) bind(c,name='dev_csputEllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat , ia, ja, val + integer(c_int), value :: nnz + end function dev_csputEllDeviceDouble + end interface + + interface csputEllDeviceFloatComplex + function dev_csputEllDeviceFloatComplex(deviceMat, nnz, ia, ja, val) & + & result(res) bind(c,name='dev_csputEllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat , ia, ja, val + integer(c_int), value :: nnz + end function dev_csputEllDeviceFloatComplex + end interface + + interface csputEllDeviceDoubleComplex + function dev_csputEllDeviceDoubleComplex(deviceMat, nnz, ia, ja, val) & + & result(res) bind(c,name='dev_csputEllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat , ia, ja, val + integer(c_int), value :: nnz + end function dev_csputEllDeviceDoubleComplex + end interface + + interface spmvEllDevice + function spmvEllDeviceFloat(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvEllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_float),value :: alpha, beta + end function spmvEllDeviceFloat + function spmvEllDeviceDouble(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvEllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_double),value :: alpha, beta + end function spmvEllDeviceDouble + function spmvEllDeviceFloatComplex(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvEllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_float_complex),value :: alpha, beta + end function spmvEllDeviceFloatComplex + function spmvEllDeviceDoubleComplex(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvEllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_double_complex),value :: alpha, beta + end function spmvEllDeviceDoubleComplex + end interface + +#endif + + +end module elldev_mod diff --git a/gpu/fcusparse.c b/gpu/fcusparse.c new file mode 100644 index 00000000..5f0c12d9 --- /dev/null +++ b/gpu/fcusparse.c @@ -0,0 +1,77 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include + +#ifdef HAVE_SPGPU +#include +#include "cintrf.h" +#include "fcusparse.h" + +static cusparseHandle_t *cusparse_handle=NULL; + + +void setHandle(cusparseHandle_t); + +int FcusparseCreate() +{ + int ret=CUSPARSE_STATUS_SUCCESS; + cusparseHandle_t *handle; + if (cusparse_handle == NULL) { + if ((handle = (cusparseHandle_t *)malloc(sizeof(cusparseHandle_t)))==NULL) + return((int) CUSPARSE_STATUS_ALLOC_FAILED); + ret = (int)cusparseCreate(handle); + if (ret == CUSPARSE_STATUS_SUCCESS) + cusparse_handle = handle; + } + return (ret); +} + +int FcusparseDestroy() +{ + int val; + val = (int) cusparseDestroy(*cusparse_handle); + free(cusparse_handle); + cusparse_handle=NULL; + return(val); +} +cusparseHandle_t *getHandle() +{ + if (cusparse_handle == NULL) + FcusparseCreate(); + return(cusparse_handle); +} + + + +#endif diff --git a/gpu/fcusparse.h b/gpu/fcusparse.h new file mode 100644 index 00000000..2bab2aca --- /dev/null +++ b/gpu/fcusparse.h @@ -0,0 +1,70 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#ifndef FCUSPARSE_ +#define FCUSPARSE_ + +#ifdef HAVE_SPGPU +#include +#if CUDA_SHORT_VERSION <= 10 +#include +#else +#include +#endif +#include "cintrf.h" + +int FcusparseCreate(); +int FcusparseDestroy(); +cusparseHandle_t *getHandle(); + +#define CHECK_CUDA(func) \ +{ \ + cudaError_t status = (func); \ + if (status != cudaSuccess) { \ + printf("CUDA API failed at line %d with error: %s (%d)\n", \ + __LINE__, cudaGetErrorString(status), status); \ + return EXIT_FAILURE; \ + } \ +} + +#define CHECK_CUSPARSE(func) \ +{ \ + cusparseStatus_t status = (func); \ + if (status != CUSPARSE_STATUS_SUCCESS) { \ + printf("CUSPARSE API failed at line %d with error: %s (%d)\n", \ + __LINE__, cusparseGetErrorString(status), status); \ + return EXIT_FAILURE; \ + } \ +} + +#endif +#endif diff --git a/gpu/fcusparse_fct.h b/gpu/fcusparse_fct.h new file mode 100644 index 00000000..5a3b1ac6 --- /dev/null +++ b/gpu/fcusparse_fct.h @@ -0,0 +1,770 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +typedef struct T_CSRGDeviceMat +{ +#if CUDA_SHORT_VERSION <= 10 + cusparseMatDescr_t descr; + cusparseSolveAnalysisInfo_t triang; +#elif CUDA_VERSION < 11030 + cusparseMatDescr_t descr; + csrsv2Info_t triang; + size_t mvbsize, svbsize; + void *mvbuffer, *svbuffer; +#else + cusparseSpMatDescr_t descr; + cusparseSpSVDescr_t *spsvDescr; + size_t mvbsize, svbsize; + void *mvbuffer, *svbuffer; +#endif + int m, n, nz; + TYPE *val; + int *irp; + int *ja; +} T_CSRGDeviceMat; + +/* Interoperability: type coming from Fortran side to distinguish D/S/C/Z. */ +typedef struct T_Cmat +{ + T_CSRGDeviceMat *mat; +} T_Cmat; + +#if CUDA_SHORT_VERSION <= 10 +typedef struct T_HYBGDeviceMat +{ + cusparseMatDescr_t descr; + cusparseSolveAnalysisInfo_t triang; + cusparseHybMat_t hybA; + int m, n, nz; + TYPE *val; + int *irp; + int *ja; +} T_HYBGDeviceMat; + + +/* Interoperability: type coming from Fortran side to distinguish D/S/C/Z. */ +typedef struct T_Hmat +{ + T_HYBGDeviceMat *mat; +} T_Hmat; +#endif + +int T_spmvCSRGDevice(T_Cmat *Mat, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY); +int T_spsvCSRGDevice(T_Cmat *Mat, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY); +int T_CSRGDeviceAlloc(T_Cmat *Mat,int nr, int nc, int nz); +int T_CSRGDeviceFree(T_Cmat *Mat); + + +int T_CSRGHost2Device(T_Cmat *Mat, int m, int n, int nz, + int *irp, int *ja, TYPE *val); +int T_CSRGDevice2Host(T_Cmat *Mat, int m, int n, int nz, + int *irp, int *ja, TYPE *val); + +int T_CSRGDeviceGetParms(T_Cmat *Mat,int *nr, int *nc, int *nz); + +#if CUDA_SHORT_VERSION <= 10 +int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type); +int T_CSRGDeviceCsrsmAnalysis(T_Cmat *Mat); +#elif CUDA_VERSION < 11030 +int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type); +#endif + + + +#if CUDA_SHORT_VERSION <= 10 + + +int T_HYBGDeviceFree(T_Hmat *Matrix); +int T_spmvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY); +int T_HYBGDeviceAlloc(T_Hmat *Matrix,int nr, int nc, int nz); +int T_HYBGDeviceSetMatDiagType(T_Hmat *Matrix, int type); +int T_HYBGDeviceSetMatIndexBase(T_Hmat *Matrix, int type); +int T_HYBGDeviceSetMatType(T_Hmat *Matrix, int type); +int T_HYBGDeviceSetMatFillMode(T_Hmat *Matrix, int type); +int T_HYBGDeviceHybsmAnalysis(T_Hmat *Matrix); +int T_spsvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY); +int T_HYBGHost2Device(T_Hmat *Matrix, int m, int n, int nz, + int *irp, int *ja, TYPE *val); +#endif + +int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY) +{ + T_CSRGDeviceMat *cMat=Matrix->mat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + void *vX, *vY; + int r,n; + cusparseHandle_t *my_handle=getHandle(); + TYPE ealpha=alpha, ebeta=beta; +#if CUDA_SHORT_VERSION <= 10 + /*getAddrMultiVecDevice(deviceX, &vX); + getAddrMultiVecDevice(deviceY, &vY); */ + vX=x->v_; + vY=y->v_; + + return cusparseTcsrmv(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->n,cMat->nz,(const TYPE *) &alpha,cMat->descr, + cMat->val, cMat->irp, cMat->ja, + (const TYPE *) vX, (const TYPE *) &beta, (TYPE *) vY); +#elif CUDA_VERSION < 11030 + size_t bfsz; + vX=x->v_; + vY=y->v_; +#if 1 + CHECK_CUSPARSE(cusparseCsrmvEx_bufferSize(*my_handle,CUSPARSE_ALG_MERGE_PATH, + CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->n,cMat->nz, + (const void *) &ealpha,CUSPARSE_BASE_TYPE, + cMat->descr, + (const void *) cMat->val, + CUSPARSE_BASE_TYPE, + (const int *) cMat->irp, + (const int *) cMat->ja, + (const void *) vX, CUSPARSE_BASE_TYPE, + (const void *) &ebeta, CUSPARSE_BASE_TYPE, + (void *) vY, CUSPARSE_BASE_TYPE, + CUSPARSE_BASE_TYPE, &bfsz)); +#else + bfsz=cMat->nz; +#endif + + if (bfsz > cMat->mvbsize) { + if (cMat->mvbuffer != NULL) { + CHECK_CUDA(cudaFree(cMat->mvbuffer)); + cMat->mvbuffer = NULL; + } + CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz)); + cMat->mvbsize = bfsz; + } + CHECK_CUSPARSE(cusparseCsrmvEx(*my_handle, + CUSPARSE_ALG_MERGE_PATH, + CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->n,cMat->nz, + (const void *) &ealpha,CUSPARSE_BASE_TYPE, + cMat->descr, + (const void *) cMat->val, CUSPARSE_BASE_TYPE, + (const int *) cMat->irp, (const int *) cMat->ja, + (const void *) vX, CUSPARSE_BASE_TYPE, + (const void *) &ebeta, CUSPARSE_BASE_TYPE, + (void *) vY, CUSPARSE_BASE_TYPE, + CUSPARSE_BASE_TYPE, (void *) cMat->mvbuffer)); + +#else + cusparseDnVecDescr_t vecX, vecY; + size_t bfsz; + vX=x->v_; + vY=y->v_; + CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) ); + CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) ); + CHECK_CUSPARSE(cusparseSpMV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha,cMat->descr,vecX,&beta,vecY, + CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT, + &bfsz)); + if (bfsz > cMat->mvbsize) { + if (cMat->mvbuffer != NULL) { + CHECK_CUDA(cudaFree(cMat->mvbuffer)); + cMat->mvbuffer = NULL; + } + CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz)); + cMat->mvbsize = bfsz; + } + CHECK_CUSPARSE(cusparseSpMV(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha,cMat->descr,vecX,&beta,vecY, + CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT, + cMat->mvbuffer)); + CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) ); + CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) ); +#endif +} + +int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY) +{ + T_CSRGDeviceMat *cMat=Matrix->mat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + void *vX, *vY; + int r,n; + cusparseHandle_t *my_handle=getHandle(); +#if CUDA_SHORT_VERSION <= 10 + vX=x->v_; + vY=y->v_; + + return cusparseTcsrsv_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,(const TYPE *) &alpha,cMat->descr, + cMat->val, cMat->irp, cMat->ja, cMat->triang, + (const TYPE *) vX, (TYPE *) vY); +#elif CUDA_VERSION < 11030 + vX=x->v_; + vY=y->v_; + CHECK_CUSPARSE(cusparseTcsrsv2_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->nz, + (const TYPE *) &alpha, + cMat->descr, + cMat->val, cMat->irp, cMat->ja, + cMat->triang, + (const TYPE *) vX, (TYPE *) vY, + CUSPARSE_SOLVE_POLICY_USE_LEVEL, + (void *) cMat->svbuffer)); +#else + cusparseDnVecDescr_t vecX, vecY; + size_t bfsz; + vX=x->v_; + vY=y->v_; + cMat->spsvDescr=(cusparseSpSVDescr_t *) malloc(sizeof(cusparseSpSVDescr_t *)); + CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) ); + CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) ); + CHECK_CUSPARSE(cusparseSpSV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha,cMat->descr,vecX,vecY, + CUSPARSE_BASE_TYPE, + CUSPARSE_SPSV_ALG_DEFAULT, + *(cMat->spsvDescr), + &bfsz)); + if (bfsz > cMat->svbsize) { + if (cMat->svbuffer != NULL) { + CHECK_CUDA(cudaFree(cMat->svbuffer)); + cMat->svbuffer = NULL; + } + CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); + cMat->svbsize=bfsz; + } + if (cMat->spsvDescr==NULL) { + CHECK_CUSPARSE(cusparseSpSV_analysis(*my_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, + cMat->descr, + vecX, vecY, + CUSPARSE_BASE_TYPE, + CUSPARSE_SPSV_ALG_DEFAULT, + *(cMat->spsvDescr), + cMat->svbuffer)); + } + + CHECK_CUSPARSE(cusparseSpSV_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha,cMat->descr,vecX,vecY, + CUSPARSE_BASE_TYPE, + CUSPARSE_SPSV_ALG_DEFAULT, + *(cMat->spsvDescr))); + CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) ); + CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) ); +#endif +} + +int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz) +{ + T_CSRGDeviceMat *cMat; + int nr1=nr, nz1=nz, rc; + cusparseHandle_t *my_handle=getHandle(); + int bfsz; + + if ((nr<0)||(nc<0)||(nz<0)) + return((int) CUSPARSE_STATUS_INVALID_VALUE); + if ((cMat=(T_CSRGDeviceMat *) malloc(sizeof(T_CSRGDeviceMat)))==NULL) + return((int) CUSPARSE_STATUS_ALLOC_FAILED); + cMat->m = nr; + cMat->n = nc; + cMat->nz = nz; + if (nr1 == 0) nr1 = 1; + if (nz1 == 0) nz1 = 1; + if ((rc= allocRemoteBuffer(((void **) &(cMat->irp)), ((nr1+1)*sizeof(int)))) != 0) + return(rc); + if ((rc= allocRemoteBuffer(((void **) &(cMat->ja)), ((nz1)*sizeof(int)))) != 0) + return(rc); + if ((rc= allocRemoteBuffer(((void **) &(cMat->val)), ((nz1)*sizeof(TYPE)))) != 0) + return(rc); +#if CUDA_SHORT_VERSION <= 10 + if ((rc= cusparseCreateMatDescr(&(cMat->descr))) !=0) + return(rc); + if ((rc= cusparseCreateSolveAnalysisInfo(&(cMat->triang))) !=0) + return(rc); +#elif CUDA_VERSION < 11030 + if ((rc= cusparseCreateMatDescr(&(cMat->descr))) !=0) + return(rc); + CHECK_CUSPARSE(cusparseSetMatType(cMat->descr,CUSPARSE_MATRIX_TYPE_GENERAL)); + CHECK_CUSPARSE(cusparseSetMatDiagType(cMat->descr,CUSPARSE_DIAG_TYPE_NON_UNIT)); + CHECK_CUSPARSE(cusparseSetMatIndexBase(cMat->descr,CUSPARSE_INDEX_BASE_ONE)); + CHECK_CUSPARSE(cusparseCreateCsrsv2Info(&(cMat->triang))); + if (cMat->nz > 0) { + CHECK_CUSPARSE(cusparseTcsrsv2_bufferSize(*my_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->nz, cMat->descr, + cMat->val, cMat->irp, cMat->ja, + cMat->triang, &bfsz)); + } else { + bfsz = 0; + } + + /* if (cMat->svbuffer != NULL) { */ + /* fprintf(stderr,"Calling cudaFree\n"); */ + /* CHECK_CUDA(cudaFree(cMat->svbuffer)); */ + /* cMat->svbuffer = NULL; */ + /* } */ + if (bfsz > 0) { + CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); + } else { + cMat->svbuffer=NULL; + } + cMat->svbsize=bfsz; + + cMat->mvbuffer=NULL; + cMat->mvbsize = 0; + + +#else + int64_t rows=nr, cols=nc, nnz=nz; + + CHECK_CUSPARSE(cusparseCreateCsr(&(cMat->descr), + rows, cols, nnz, + (void *) cMat->irp, + (void *) cMat->ja, + (void *) cMat->val, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ONE, + CUSPARSE_BASE_TYPE) ); + cMat->spsvDescr=NULL; + cMat->mvbuffer=NULL; + cMat->svbuffer=NULL; + cMat->mvbsize=0; + cMat->svbsize=0; +#endif + Matrix->mat = cMat; + return(CUSPARSE_STATUS_SUCCESS); +} + +int T_CSRGDeviceFree(T_Cmat *Matrix) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + + if (cMat!=NULL) { + freeRemoteBuffer(cMat->irp); + freeRemoteBuffer(cMat->ja); + freeRemoteBuffer(cMat->val); +#if CUDA_SHORT_VERSION <= 10 + cusparseDestroyMatDescr(cMat->descr); + cusparseDestroySolveAnalysisInfo(cMat->triang); +#elif CUDA_VERSION < 11030 + cusparseDestroyMatDescr(cMat->descr); + cusparseDestroyCsrsv2Info(cMat->triang); +#else + cusparseDestroySpMat(cMat->descr); + if (cMat->spsvDescr!=NULL) { + CHECK_CUSPARSE( cusparseSpSV_destroyDescr(*(cMat->spsvDescr))); + free(cMat->spsvDescr); + cMat->spsvDescr=NULL; + } + if (cMat->mvbuffer!=NULL) + CHECK_CUDA( cudaFree(cMat->mvbuffer)); + if (cMat->svbuffer!=NULL) + CHECK_CUDA( cudaFree(cMat->svbuffer)); + cMat->spsvDescr=NULL; + cMat->mvbuffer=NULL; + cMat->svbuffer=NULL; + cMat->mvbsize=0; + cMat->svbsize=0; +#endif + free(cMat); + Matrix->mat = NULL; + } + return(CUSPARSE_STATUS_SUCCESS); +} + +int T_CSRGDeviceGetParms(T_Cmat *Matrix,int *nr, int *nc, int *nz) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + + if (cMat!=NULL) { + *nr = cMat->m ; + *nc = cMat->n ; + *nz = cMat->nz ; + return(CUSPARSE_STATUS_SUCCESS); + } else { + return((int) CUSPARSE_STATUS_ALLOC_FAILED); + } +} + +#if CUDA_SHORT_VERSION <= 10 + +int T_CSRGDeviceSetMatType(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatType(cMat->descr,type)); +} + +int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatFillMode(cMat->descr,type)); +} + +int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatDiagType(cMat->descr,type)); +} + +int T_CSRGDeviceSetMatIndexBase(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatIndexBase(cMat->descr,type)); +} + +int T_CSRGDeviceCsrsmAnalysis(T_Cmat *Matrix) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + int rc, buffersize; + cusparseHandle_t *my_handle=getHandle(); + cusparseSolveAnalysisInfo_t info; + + rc= (int) cusparseTcsrsv_analysis(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->nz,cMat->descr, + cMat->val, cMat->irp, cMat->ja, + cMat->triang); + if (rc !=0) { + fprintf(stderr,"From csrsv_analysis: %d\n",rc); + } + return(rc); +} + +#elif CUDA_VERSION < 11030 +int T_CSRGDeviceSetMatType(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatType(cMat->descr,type)); +} + +int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatFillMode(cMat->descr,type)); +} + +int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatDiagType(cMat->descr,type)); +} + +int T_CSRGDeviceSetMatIndexBase(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatIndexBase(cMat->descr,type)); +} + +#else + +int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + cusparseFillMode_t mode=type; + + CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->descr, + CUSPARSE_SPMAT_FILL_MODE, + (const void*) &mode, + sizeof(cusparseFillMode_t))); + return(0); +} + +int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + cusparseDiagType_t cutype=type; + CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->descr, + CUSPARSE_SPMAT_DIAG_TYPE, + (const void*) &cutype, + sizeof(cusparseDiagType_t))); + return(0); +} + +#endif + +int T_CSRGHost2Device(T_Cmat *Matrix, int m, int n, int nz, + int *irp, int *ja, TYPE *val) +{ + int rc; + T_CSRGDeviceMat *cMat= Matrix->mat; + cusparseHandle_t *my_handle=getHandle(); + + if ((rc=writeRemoteBuffer((void *) irp, (void *) cMat->irp, + (m+1)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + + if ((rc=writeRemoteBuffer((void *) ja,(void *) cMat->ja, + (nz)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + if ((rc=writeRemoteBuffer((void *) val, (void *) cMat->val, + (nz)*sizeof(TYPE))) + != SPGPU_SUCCESS) + return(rc); +#if (CUDA_SHORT_VERSION > 10 ) && (CUDA_VERSION < 11030) + if (cusparseGetMatType(cMat->descr)== CUSPARSE_MATRIX_TYPE_TRIANGULAR) { + // Why do we need to set TYPE_GENERAL??? cuSPARSE can be misterious sometimes. + cusparseSetMatType(cMat->descr,CUSPARSE_MATRIX_TYPE_GENERAL); + CHECK_CUSPARSE(cusparseTcsrsv2_analysis(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->nz, cMat->descr, + cMat->val, cMat->irp, cMat->ja, + cMat->triang, CUSPARSE_SOLVE_POLICY_USE_LEVEL, + cMat->svbuffer)); + } +#endif + return(CUSPARSE_STATUS_SUCCESS); +} + +int T_CSRGDevice2Host(T_Cmat *Matrix, int m, int n, int nz, + int *irp, int *ja, TYPE *val) +{ + int rc; + T_CSRGDeviceMat *cMat = Matrix->mat; + + if ((rc=readRemoteBuffer((void *) irp, (void *) cMat->irp, (m+1)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + + if ((rc=readRemoteBuffer((void *) ja, (void *) cMat->ja, (nz)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + if ((rc=readRemoteBuffer((void *) val, (void *) cMat->val, (nz)*sizeof(TYPE))) + != SPGPU_SUCCESS) + return(rc); + + return(CUSPARSE_STATUS_SUCCESS); +} + +#if CUDA_SHORT_VERSION <= 10 +int T_HYBGDeviceFree(T_Hmat *Matrix) +{ + T_HYBGDeviceMat *hMat= Matrix->mat; + if (hMat != NULL) { + cusparseDestroyMatDescr(hMat->descr); + cusparseDestroySolveAnalysisInfo(hMat->triang); + cusparseDestroyHybMat(hMat->hybA); + free(hMat); + } + Matrix->mat = NULL; + return(CUSPARSE_STATUS_SUCCESS); +} + +int T_spmvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY) +{ + T_HYBGDeviceMat *hMat=Matrix->mat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + void *vX, *vY; + int r,n,rc; + cusparseMatrixType_t type; + cusparseHandle_t *my_handle=getHandle(); + + /*getAddrMultiVecDevice(deviceX, &vX); + getAddrMultiVecDevice(deviceY, &vY); */ + vX=x->v_; + vY=y->v_; + + /* rc = (int) cusparseGetMatType(hMat->descr); */ + /* fprintf(stderr,"Spmv MatType: %d\n",rc); */ + /* rc = (int) cusparseGetMatDiagType(hMat->descr); */ + /* fprintf(stderr,"Spmv DiagType: %d\n",rc); */ + /* rc = (int) cusparseGetMatFillMode(hMat->descr); */ + /* fprintf(stderr,"Spmv FillMode: %d\n",rc); */ + /* Dirty trick: apparently hybmv does not accept a triangular + matrix even though it should not make a difference. So + we claim it's general anyway */ + type = cusparseGetMatType(hMat->descr); + rc = cusparseSetMatType(hMat->descr,CUSPARSE_MATRIX_TYPE_GENERAL); + if (rc == 0) + rc = (int) cusparseThybmv(*my_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, + (const TYPE *) &alpha, hMat->descr, hMat->hybA, + (const TYPE *) vX, (const TYPE *) &beta, + (TYPE *) vY); + if (rc == 0) + rc = cusparseSetMatType(hMat->descr,type); + return(rc); +} + +int T_HYBGDeviceAlloc(T_Hmat *Matrix,int nr, int nc, int nz) +{ + T_HYBGDeviceMat *hMat; + int nr1=nr, nz1=nz, rc; + if ((nr<0)||(nc<0)||(nz<0)) + return((int) CUSPARSE_STATUS_INVALID_VALUE); + if ((hMat=(T_HYBGDeviceMat *) malloc(sizeof(T_HYBGDeviceMat)))==NULL) + return((int) CUSPARSE_STATUS_ALLOC_FAILED); + hMat->m = nr; + hMat->n = nc; + hMat->nz = nz; + + if ((rc= cusparseCreateMatDescr(&(hMat->descr))) !=0) + return(rc); + if ((rc= cusparseCreateSolveAnalysisInfo(&(hMat->triang))) !=0) + return(rc); + if((rc = cusparseCreateHybMat(&(hMat->hybA))) != 0) + return(rc); + Matrix->mat = hMat; + return(CUSPARSE_STATUS_SUCCESS); +} + +int T_HYBGDeviceSetMatDiagType(T_Hmat *Matrix, int type) +{ + T_HYBGDeviceMat *hMat= Matrix->mat; + return ((int) cusparseSetMatDiagType(hMat->descr,type)); +} + +int T_HYBGDeviceSetMatIndexBase(T_Hmat *Matrix, int type) +{ + T_HYBGDeviceMat *hMat= Matrix->mat; + return ((int) cusparseSetMatIndexBase(hMat->descr,type)); +} + +int T_HYBGDeviceSetMatType(T_Hmat *Matrix, int type) +{ + T_HYBGDeviceMat *hMat= Matrix->mat; + return ((int) cusparseSetMatType(hMat->descr,type)); +} + +int T_HYBGDeviceSetMatFillMode(T_Hmat *Matrix, int type) +{ + T_HYBGDeviceMat *hMat= Matrix->mat; + return ((int) cusparseSetMatFillMode(hMat->descr,type)); +} + +int T_spsvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY) +{ + //beta?? + T_HYBGDeviceMat *hMat=Matrix->mat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + void *vX, *vY; + int r,n; + cusparseHandle_t *my_handle=getHandle(); + /*getAddrMultiVecDevice(deviceX, &vX); + getAddrMultiVecDevice(deviceY, &vY); */ + vX=x->v_; + vY=y->v_; + + return cusparseThybsv_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + (const TYPE *) &alpha, hMat->descr, + hMat->hybA, hMat->triang, + (const TYPE *) vX, (TYPE *) vY); +} + +int T_HYBGDeviceHybsmAnalysis(T_Hmat *Matrix) +{ + T_HYBGDeviceMat *hMat= Matrix->mat; + cusparseSolveAnalysisInfo_t info; + int rc; + cusparseHandle_t *my_handle=getHandle(); + + /* rc = (int) cusparseGetMatType(hMat->descr); */ + /* fprintf(stderr,"Analysis MatType: %d\n",rc); */ + /* rc = (int) cusparseGetMatDiagType(hMat->descr); */ + /* fprintf(stderr,"Analysis DiagType: %d\n",rc); */ + /* rc = (int) cusparseGetMatFillMode(hMat->descr); */ + /* fprintf(stderr,"Analysis FillMode: %d\n",rc); */ + rc = (int) cusparseThybsv_analysis(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + hMat->descr, hMat->hybA, hMat->triang); + + if (rc !=0) { + fprintf(stderr,"From csrsv_analysis: %d\n",rc); + } + return(rc); +} + +int T_HYBGHost2Device(T_Hmat *Matrix, int m, int n, int nz, + int *irp, int *ja, TYPE *val) +{ + int rc; double t1,t2; + int nr1=m, nz1=nz; + T_HYBGDeviceMat *hMat= Matrix->mat; + cusparseHandle_t *my_handle=getHandle(); + + if (nr1 == 0) nr1 = 1; + if (nz1 == 0) nz1 = 1; + if ((rc= allocRemoteBuffer(((void **) &(hMat->irp)), ((nr1+1)*sizeof(int)))) != 0) + return(rc); + if ((rc= allocRemoteBuffer(((void **) &(hMat->ja)), ((nz1)*sizeof(int)))) != 0) + return(rc); + if ((rc= allocRemoteBuffer(((void **) &(hMat->val)), ((nz1)*sizeof(TYPE)))) != 0) + return(rc); + + if ((rc=writeRemoteBuffer((void *) irp, (void *) hMat->irp, + (m+1)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + + if ((rc=writeRemoteBuffer((void *) ja,(void *) hMat->ja, + (nz)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + if ((rc=writeRemoteBuffer((void *) val, (void *) hMat->val, + (nz)*sizeof(TYPE))) + != SPGPU_SUCCESS) + return(rc); + /* rc = (int) cusparseGetMatType(hMat->descr); */ + /* fprintf(stderr,"Conversion MatType: %d\n",rc); */ + /* rc = (int) cusparseGetMatDiagType(hMat->descr); */ + /* fprintf(stderr,"Conversion DiagType: %d\n",rc); */ + /* rc = (int) cusparseGetMatFillMode(hMat->descr); */ + /* fprintf(stderr,"Conversion FillMode: %d\n",rc); */ + //t1=etime(); + rc = (int) cusparseTcsr2hyb(*my_handle, m, n, + hMat->descr, + (const TYPE *)hMat->val, + (const int *)hMat->irp, (const int *)hMat->ja, + hMat->hybA,0, + CUSPARSE_HYB_PARTITION_AUTO); + + freeRemoteBuffer(hMat->irp); hMat->irp = NULL; + freeRemoteBuffer(hMat->ja); hMat->ja = NULL; + freeRemoteBuffer(hMat->val); hMat->val = NULL; + + //cudaSync(); + //t2 = etime(); + //fprintf(stderr,"Inner call to cusparseTcsr2hyb: %lf\n",(t2-t1)); + if (rc != 0) { + fprintf(stderr,"From csr2hyb: %d\n",rc); + } + return(rc); +} +#endif + diff --git a/gpu/hdiagdev.c b/gpu/hdiagdev.c new file mode 100644 index 00000000..4e640278 --- /dev/null +++ b/gpu/hdiagdev.c @@ -0,0 +1,425 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#include "hdiagdev.h" +#include +#include +#include +#include +#if defined(HAVE_SPGPU) +#define DEBUG 0 + + +void freeHdiagDevice(void* remoteMatrix) +{ + struct HdiagDevice *devMat = (struct HdiagDevice *) remoteMatrix; + //fprintf(stderr,"freeHllDevice\n"); + if (devMat != NULL) { + freeRemoteBuffer(devMat->hackOffsets); + freeRemoteBuffer(devMat->cM); + free(remoteMatrix); + } +} + + +HdiagDeviceParams getHdiagDeviceParams(unsigned int rows, unsigned int columns, + unsigned int allocationHeight, unsigned int hackSize, + unsigned int hackCount, unsigned int elementType) +{ + HdiagDeviceParams params; + + params.elementType = elementType; + //numero di elementi di val + params.rows = rows; + params.columns = columns; + params.allocationHeight = allocationHeight; + params.hackSize = hackSize; + params.hackCount = hackCount; + + return params; + +} + +int allocHdiagDevice(void **remoteMatrix, HdiagDeviceParams* params) +{ + struct HdiagDevice *tmp = (struct HdiagDevice *)malloc(sizeof(struct HdiagDevice)); + int ret=SPGPU_SUCCESS; + int *tmpOff = NULL; + + *remoteMatrix = (void *) tmp; +#if DEBUG + fprintf(stderr,"From alloc: %p\n",*remoteMatrix); +#endif + + tmp->rows = params->rows; + + tmp->hackSize = params->hackSize; + + tmp->cols = params->columns; + + tmp->allocationHeight = params->allocationHeight; + + tmp->hackCount = params->hackCount; + + + +#if DEBUG + fprintf(stderr,"hackcount %d allocationHeight %d\n",tmp->hackCount,tmp->allocationHeight); +#endif + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->hackOffsets), (tmp->hackCount+1)*sizeof(int)); + + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->hdiaOffsets), tmp->allocationHeight*sizeof(int)); + + /* tmp->baseIndex = params->firstIndex; */ + + if (params->elementType == SPGPU_TYPE_INT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(int)); + } + else if (params->elementType == SPGPU_TYPE_FLOAT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(float)); + } + else if (params->elementType == SPGPU_TYPE_DOUBLE) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(double)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(cuFloatComplex)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(cuDoubleComplex)); + } + else + return SPGPU_UNSUPPORTED; // Unsupported params + return ret; +} + +int FallocHdiagDevice(void** deviceMat, unsigned int rows, unsigned int cols, + unsigned int allocationHeight, unsigned int hackSize, + unsigned int hackCount, unsigned int elementType) +{ int i=0; +#ifdef HAVE_SPGPU + HdiagDeviceParams p; + + p = getHdiagDeviceParams(rows, cols, allocationHeight, hackSize, hackCount,elementType); + + i = allocHdiagDevice(deviceMat, &p); +#if DEBUG + fprintf(stderr," Falloc %p \n",*deviceMat); +#endif + + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i); + } + return(i); +#else + return SPGPU_UNSUPPORTED; +#endif + + +} + +int writeHdiagDeviceDouble(void* deviceMat, double* val, int* hdiaOffsets, int *hackOffsets) +{ int i=0,fo,fa,j,k,p; + char buf_a[255], buf_o[255],tmp[255]; +#ifdef HAVE_SPGPU + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + + i=SPGPU_SUCCESS; + + +#if DEBUG + fprintf(stderr," Write %p \n",devMat); + + fprintf(stderr,"HDIAG writing to device memory: allocationHeight %d hackCount %d\n", + devMat->allocationHeight,devMat->hackCount); + fprintf(stderr,"HackOffsets: "); + for (j=0; jhackCount+1; j++) + fprintf(stderr," %d",hackOffsets[j]); + fprintf(stderr,"\n"); + fprintf(stderr,"diaOffsets: "); + for (j=0; jallocationHeight; j++) + fprintf(stderr," %d",hdiaOffsets[j]); + fprintf(stderr,"\n"); +#if 1 + fprintf(stderr,"values: \n"); + p=0; + for (j=0; jhackCount; j++){ + fprintf(stderr,"Hack no: %d\n",j+1); + for (k=0; khackSize*(devMat->allocationHeight/devMat->hackCount); k++){ + fprintf(stderr," %d %lf\n",p+1,val[p]); p++; + } + } + fprintf(stderr,"\n"); +#endif +#endif + + + if(i== SPGPU_SUCCESS) + i = writeRemoteBuffer((void *) hackOffsets,(void *) devMat->hackOffsets, + (devMat->hackCount+1)*sizeof(int)); + + if(i== SPGPU_SUCCESS) + i = writeRemoteBuffer((void*) hdiaOffsets, (void *)devMat->hdiaOffsets, + devMat->allocationHeight*sizeof(int)); + if(i== SPGPU_SUCCESS) + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, + devMat->allocationHeight*devMat->hackSize*sizeof(double)); + if (i!=0) + fprintf(stderr,"Error in writeHdiagDeviceDouble %d\n",i); + +#if DEBUG + fprintf(stderr," EndWrite %p \n",devMat); +#endif + + if(i==0) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + + +long long int sizeofHdiagDeviceDouble(void* deviceMat) +{ int i=0,fo,fa; + int *hoff=NULL,*hackoff=NULL; + long long int memsize=0; +#ifdef HAVE_SPGPU + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + + + memsize += (devMat->hackCount+1)*sizeof(int); + memsize += devMat->allocationHeight*sizeof(int); + memsize += devMat->allocationHeight*devMat->hackSize*sizeof(double); + +#endif + return(memsize); +} + + + +int readHdiagDeviceDouble(void* deviceMat, double* a, int* off) +{ int i; +#ifdef HAVE_SPGPU + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + /* i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(double)); */ + /* i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); */ + + + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int spmvHdiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, + double beta, void* deviceY) +{ + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_gpuGetHandle(); + +#ifdef HAVE_SPGPU +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif +#if DEBUG + fprintf(stderr," First %p \n",devMat); + fprintf(stderr,"%d %d %d %p %p %p\n",devMat->rows,devMat->cols, devMat->hackSize, + devMat->hackOffsets, devMat->hdiaOffsets, devMat->cM); +#endif + spgpuDhdiaspmv (handle, (double*)y->v_, (double *)y->v_, alpha, + (double *)devMat->cM,devMat->hdiaOffsets, + devMat->hackSize, devMat->hackOffsets, devMat->rows,devMat->cols, + x->v_, beta); + + //cudaSync(); + + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int writeHdiagDeviceFloat(void* deviceMat, float* val, int* hdiaOffsets, int *hackOffsets) +{ int i=0,fo,fa,j,k,p; + char buf_a[255], buf_o[255],tmp[255]; +#ifdef HAVE_SPGPU + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + + i=SPGPU_SUCCESS; + + +#if DEBUG + fprintf(stderr," Write %p \n",devMat); + + fprintf(stderr,"HDIAG writing to device memory: allocationHeight %d hackCount %d\n", + devMat->allocationHeight,devMat->hackCount); + fprintf(stderr,"HackOffsets: "); + for (j=0; jhackCount+1; j++) + fprintf(stderr," %d",hackOffsets[j]); + fprintf(stderr,"\n"); + fprintf(stderr,"diaOffsets: "); + for (j=0; jallocationHeight; j++) + fprintf(stderr," %d",hdiaOffsets[j]); + fprintf(stderr,"\n"); +#if 1 + fprintf(stderr,"values: \n"); + p=0; + for (j=0; jhackCount; j++){ + fprintf(stderr,"Hack no: %d\n",j+1); + for (k=0; khackSize*(devMat->allocationHeight/devMat->hackCount); k++){ + fprintf(stderr," %d %lf\n",p+1,val[p]); p++; + } + } + fprintf(stderr,"\n"); +#endif +#endif + + + if(i== SPGPU_SUCCESS) + i = writeRemoteBuffer((void *) hackOffsets,(void *) devMat->hackOffsets, + (devMat->hackCount+1)*sizeof(int)); + + if(i== SPGPU_SUCCESS) + i = writeRemoteBuffer((void*) hdiaOffsets, (void *)devMat->hdiaOffsets, + devMat->allocationHeight*sizeof(int)); + if(i== SPGPU_SUCCESS) + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, + devMat->allocationHeight*devMat->hackSize*sizeof(float)); + if (i!=0) + fprintf(stderr,"Error in writeHdiagDeviceFloat %d\n",i); + +#if DEBUG + fprintf(stderr," EndWrite %p \n",devMat); +#endif + + if(i==0) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + + +long long int sizeofHdiagDeviceFloat(void* deviceMat) +{ int i=0,fo,fa; + int *hoff=NULL,*hackoff=NULL; + long long int memsize=0; +#ifdef HAVE_SPGPU + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + + + memsize += (devMat->hackCount+1)*sizeof(int); + memsize += devMat->allocationHeight*sizeof(int); + memsize += devMat->allocationHeight*devMat->hackSize*sizeof(float); + +#endif + return(memsize); +} + + + +int readHdiagDeviceFloat(void* deviceMat, float* a, int* off) +{ int i; +#ifdef HAVE_SPGPU + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + /* i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(float)); */ + /* i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); */ + + + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int spmvHdiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, + float beta, void* deviceY) +{ + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_gpuGetHandle(); + +#ifdef HAVE_SPGPU +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif +#if DEBUG + fprintf(stderr," First %p \n",devMat); + fprintf(stderr,"%d %d %d %p %p %p\n",devMat->rows,devMat->cols, devMat->hackSize, + devMat->hackOffsets, devMat->hdiaOffsets, devMat->cM); +#endif + spgpuShdiaspmv (handle, (float*)y->v_, (float *)y->v_, alpha, + (float *)devMat->cM,devMat->hdiaOffsets, + devMat->hackSize, devMat->hackOffsets, devMat->rows,devMat->cols, + x->v_, beta); + + //cudaSync(); + + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + +#endif diff --git a/gpu/hdiagdev.h b/gpu/hdiagdev.h new file mode 100644 index 00000000..4bce5066 --- /dev/null +++ b/gpu/hdiagdev.h @@ -0,0 +1,111 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#ifndef _HDIAGDEV_H_ +#define _HDIAGDEV_H_ + +#ifdef HAVE_SPGPU +#include "cintrf.h" +#include "hdia.h" + +struct HdiagDevice +{ + // Compressed matrix + void *cM; //it can be float or double + + // offset (same size of cM) + int *hdiaOffsets; + + int *hackOffsets; + + int hackCount; + + int rows; + + int cols; + + + int hackSize; + + int allocationHeight; + +}; + +typedef struct HdiagDeviceParams +{ + + unsigned int elementType; + + // Number of rows. + // Used to allocate rS array + unsigned int rows; + //unsigned int hackOffsLength; + + // Number of columns. + // Used for error-checking + unsigned int columns; + + unsigned int hackSize; + unsigned int hackCount; + unsigned int allocationHeight; + + +} HdiagDeviceParams; + + + +HdiagDeviceParams getHdiagDeviceParams(unsigned int rows, unsigned int columns, + unsigned int allocationHeight, unsigned int hackSize, + unsigned int hackCount, unsigned int elementType); + +int FallocHdiagDevice(void** deviceMat, unsigned int rows, unsigned int cols, + unsigned int allocationHeight, unsigned int hackSize, + unsigned int hackCount, unsigned int elementType); + +int allocHdiagDevice(void ** remoteMatrix, HdiagDeviceParams* params); + + +void freeHdiagDevice(void* remoteMatrix); + +int writeHdiagDeviceFloat(void* deviceMat, float* val, int* hdiaOffsets, int *hackOffsets); +int spmvHdiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, + float beta, void* deviceY); + +int writeHdiagDeviceDouble(void* deviceMat, double* val, int* hdiaOffsets, int *hackOffsets); +int spmvHdiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, + double beta, void* deviceY); + + +#else +#define CINTRF_UNSUPPORTED -1 +#endif + +#endif diff --git a/gpu/hdiagdev_mod.F90 b/gpu/hdiagdev_mod.F90 new file mode 100644 index 00000000..ad0f7cc5 --- /dev/null +++ b/gpu/hdiagdev_mod.F90 @@ -0,0 +1,203 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module hdiagdev_mod + use iso_c_binding + use core_mod + + type, bind(c) :: hdiagdev_parms + integer(c_int) :: element_type + integer(c_int) :: rows + integer(c_int) :: columns + integer(c_int) :: hackSize + integer(c_int) :: hackCount + integer(c_int) :: allocationHeight + end type hdiagdev_parms + +#ifdef HAVE_SPGPU + + ! interface computeHdiaHacksCount + ! function computeHdiaHacksCountDouble(allocationHeight,hackOffsets,hackSize, & + ! & diaValues,diaValuesPitch,diags,rows)& + ! & result(res) bind(c,name='computeHdiaHackOffsetsDouble') + ! use iso_c_binding + ! integer(c_int) :: res + ! integer(c_int), value :: rows,diags,diaValuesPitch,hackSize,elementType + ! real(c_double) :: diaValues(rows,:) + ! integer(c_int) :: hackOffsets,allocationHeight + ! end function computeHdiaHacksCountDouble + ! end interface computeHdiaHacksCount + + interface + function FgetHdiagDeviceParams(rows, columns, allocationHeight,hackSize, & + & hackCount, elementType) & + & result(res) bind(c,name='getHdiagDeviceParams') + use iso_c_binding + import :: hdiagdev_parms + type(hdiagdev_parms) :: res + integer(c_int), value :: rows,columns,allocationHeight,& + & elementType,hackSize,hackCount + end function FgetHdiagDeviceParams + end interface + + + interface + function FallocHdiagDevice(deviceMat,rows,columns,allocationHeight,& + & hackSize,hackCount,elementType) & + & result(res) bind(c,name='FallocHdiagDevice') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: rows,columns,allocationHeight,hackSize,& + & hackCount,elementType + type(c_ptr) :: deviceMat + end function FallocHdiagDevice + end interface + + + interface + function sizeofHdiagDeviceDouble(deviceMat) & + & result(res) bind(c,name='sizeofHdiagDeviceDouble') + use iso_c_binding + integer(c_long_long) :: res + type(c_ptr), value :: deviceMat + end function sizeofHdiagDeviceDouble + end interface + + interface writeHdiagDevice + + function writeHdiagDeviceFloat(deviceMat,val,hdiaOffsets, hackOffsets) & + & result(res) bind(c,name='writeHdiagDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_float) :: val(*) + integer(c_int) :: hdiaOffsets(*), hackOffsets(*) + end function writeHdiagDeviceFloat + + function writeHdiagDeviceDouble(deviceMat,val,hdiaOffsets, hackOffsets) & + & result(res) bind(c,name='writeHdiagDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_double) :: val(*) + integer(c_int) :: hdiaOffsets(*), hackOffsets(*) + end function writeHdiagDeviceDouble + + end interface writeHdiagDevice + +!!$ interface readHdiagDevice +!!$ +!!$ function readHdiagDeviceFloat(deviceMat,val,ja,ldj,irn) & +!!$ & result(res) bind(c,name='readHdiagDeviceFloat') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat +!!$ integer(c_int), value :: ldj +!!$ real(c_float) :: val(ldj,*) +!!$ integer(c_int) :: ja(ldj,*),irn(*) +!!$ end function readHdiagDeviceFloat +!!$ +!!$ function readHdiagDeviceDouble(deviceMat,a,off,n) & +!!$ & result(res) bind(c,name='readHdiagDeviceDouble') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat +!!$ integer(c_int),value :: n +!!$ real(c_double) :: a(n,*) +!!$ integer(c_int) :: off(*) +!!$ end function readHdiagDeviceDouble +!!$ +!!$ function readHdiagDeviceFloatComplex(deviceMat,val,ja,ldj,irn) & +!!$ & result(res) bind(c,name='readHdiagDeviceFloatComplex') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat +!!$ integer(c_int), value :: ldj +!!$ complex(c_float_complex) :: val(ldj,*) +!!$ integer(c_int) :: ja(ldj,*),irn(*) +!!$ end function readHdiagDeviceFloatComplex +!!$ +!!$ function readHdiagDeviceDoubleComplex(deviceMat,val,ja,ldj,irn) & +!!$ & result(res) bind(c,name='readHdiagDeviceDoubleComplex') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat +!!$ integer(c_int), value :: ldj +!!$ complex(c_double_complex) :: val(ldj,*) +!!$ integer(c_int) :: ja(ldj,*),irn(*) +!!$ end function readHdiagDeviceDoubleComplex +!!$ +!!$ end interface readHdiagDevice +!!$ + interface + subroutine freeHdiagDevice(deviceMat) & + & bind(c,name='freeHdiagDevice') + use iso_c_binding + type(c_ptr), value :: deviceMat + end subroutine freeHdiagDevice + end interface + + + interface spmvHdiagDevice + function spmvHdiagDeviceFloat(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvHdiagDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_float),value :: alpha, beta + end function spmvHdiagDeviceFloat + function spmvHdiagDeviceDouble(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvHdiagDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_double),value :: alpha, beta + end function spmvHdiagDeviceDouble +!!$ function spmvHdiagDeviceFloatComplex(deviceMat,alpha,x,beta,y) & +!!$ & result(res) bind(c,name='spmvHdiagDeviceFloatComplex') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat, x, y +!!$ complex(c_float_complex),value :: alpha, beta +!!$ end function spmvHdiagDeviceFloatComplex +!!$ function spmvHdiagDeviceDoubleComplex(deviceMat,alpha,x,beta,y) & +!!$ & result(res) bind(c,name='spmvHdiagDeviceDoubleComplex') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat, x, y +!!$ complex(c_double_complex),value :: alpha, beta +!!$ end function spmvHdiagDeviceDoubleComplex + end interface spmvHdiagDevice + +#endif + +end module hdiagdev_mod diff --git a/gpu/hlldev.c b/gpu/hlldev.c new file mode 100644 index 00000000..afff1df1 --- /dev/null +++ b/gpu/hlldev.c @@ -0,0 +1,615 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#include "hlldev.h" +#if defined(HAVE_SPGPU) +//new +HllDeviceParams bldHllDeviceParams(unsigned int hksize, unsigned int rows, unsigned int nzeros, + unsigned int allocsize, unsigned int elementType, unsigned int firstIndex) +{ + HllDeviceParams params; + + params.elementType = elementType; + params.hackSize = hksize; + //numero di elementi di val + params.allocsize = allocsize; + params.rows = rows; + params.nzt = nzeros; + params.avgNzr = (nzeros+rows-1)/rows; + params.firstIndex = firstIndex; + return params; + +} + +int getHllDeviceParams(HllDevice* mat, int *hksize, int *rows, int *nzeros, + int *allocsize, int *hackOffsLength, int *firstIndex, int *avgnzr) +{ + + + if (mat!=NULL) { + *hackOffsLength = mat->hackOffsLength ; + *hksize = mat->hackSize ; + *nzeros = mat->nzt ; + *allocsize = mat->allocsize ; + *rows = mat->rows ; + *avgnzr = mat->avgNzr ; + *firstIndex = mat->baseIndex ; + return SPGPU_SUCCESS; + } else { + return SPGPU_UNSUPPORTED; + } +} +//new +int allocHllDevice(void ** remoteMatrix, HllDeviceParams* params) +{ + HllDevice *tmp = (HllDevice *)malloc(sizeof(HllDevice)); + int ret=SPGPU_SUCCESS; + *remoteMatrix = (void *)tmp; + + tmp->hackSize = params->hackSize; + + tmp->allocsize = params->allocsize; + + tmp->rows = params->rows; + tmp->avgNzr = params->avgNzr; + tmp->nzt = params->nzt; + tmp->baseIndex = params->firstIndex; + //fprintf(stderr,"Allocating HLG with %d avgNzr\n",params->avgNzr); + tmp->hackOffsLength = (int)(tmp->rows+tmp->hackSize-1)/tmp->hackSize; + + //printf("hackOffsLength %d\n",tmp->hackOffsLength); + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->rP), tmp->allocsize*sizeof(int)); + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->rS), tmp->rows*sizeof(int)); + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->diag), tmp->rows*sizeof(int)); + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->hackOffs), ((tmp->hackOffsLength+1)*sizeof(int))); + + if (params->elementType == SPGPU_TYPE_INT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(int)); + } + else if (params->elementType == SPGPU_TYPE_FLOAT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(float)); + } + else if (params->elementType == SPGPU_TYPE_DOUBLE) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(double)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuFloatComplex)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuDoubleComplex)); + } + else + return SPGPU_UNSUPPORTED; // Unsupported params + return ret; +} + +void freeHllDevice(void* remoteMatrix) +{ + HllDevice *devMat = (HllDevice *) remoteMatrix; + //fprintf(stderr,"freeHllDevice\n"); + if (devMat != NULL) { + freeRemoteBuffer(devMat->rS); + freeRemoteBuffer(devMat->diag); + freeRemoteBuffer(devMat->rP); + freeRemoteBuffer(devMat->cM); + free(remoteMatrix); + } +} + +//new +int FallocHllDevice(void** deviceMat,unsigned int hksize, unsigned int rows, unsigned int nzeros, + unsigned int allocsize, + unsigned int elementType, unsigned int firstIndex) +{ int i; +#ifdef HAVE_SPGPU + HllDeviceParams p; + + p = bldHllDeviceParams(hksize, rows, nzeros, allocsize, elementType, firstIndex); + i = allocHllDevice(deviceMat, &p); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i); + } + return(i); +#else + return SPGPU_UNSUPPORTED; +#endif +} + + +int spmvHllDeviceFloat(void *deviceMat, float alpha, void* deviceX, + float beta, void* deviceY) +{ + HllDevice *devMat = (HllDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_gpuGetHandle(); + +#ifdef HAVE_SPGPU +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif + /*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM, + devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta, + devMat->baseIndex);*/ + + spgpuShellspmv (handle, (float *)y->v_, (float *)y->v_, alpha, (float *)devMat->cM, + devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL, + devMat->avgNzr, devMat->rows, (float *)x->v_, beta, devMat->baseIndex); + + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +//new +int spmvHllDeviceDouble(void *deviceMat, double alpha, void* deviceX, + double beta, void* deviceY) +{ + HllDevice *devMat = (HllDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_gpuGetHandle(); + +#ifdef HAVE_SPGPU +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif + /*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM, + devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta, + devMat->baseIndex);*/ + + spgpuDhellspmv (handle, (double *)y->v_, (double *)y->v_, alpha, (double*)devMat->cM, + devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL, + devMat->avgNzr, devMat->rows, (double *)x->v_, beta, devMat->baseIndex); + //cudaSync(); + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int spmvHllDeviceFloatComplex(void *deviceMat, float complex alpha, void* deviceX, + float complex beta, void* deviceY) +{ + HllDevice *devMat = (HllDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_gpuGetHandle(); + +#ifdef HAVE_SPGPU + cuFloatComplex a = make_cuFloatComplex(crealf(alpha),cimagf(alpha)); + cuFloatComplex b = make_cuFloatComplex(crealf(beta),cimagf(beta)); +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif + /*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM, + devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta, + devMat->baseIndex);*/ + + spgpuChellspmv (handle, (cuFloatComplex *)y->v_, (cuFloatComplex *)y->v_, a, (cuFloatComplex *)devMat->cM, + devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL, + devMat->avgNzr, devMat->rows, (cuFloatComplex *)x->v_, b, devMat->baseIndex); + + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int spmvHllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* deviceX, + double complex beta, void* deviceY) +{ + HllDevice *devMat = (HllDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_gpuGetHandle(); + +#ifdef HAVE_SPGPU + cuDoubleComplex a = make_cuDoubleComplex(creal(alpha),cimag(alpha)); + cuDoubleComplex b = make_cuDoubleComplex(creal(beta),cimag(beta)); +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif + + spgpuZhellspmv (handle, (cuDoubleComplex *)y->v_, (cuDoubleComplex *)y->v_, a, (cuDoubleComplex *)devMat->cM, + devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL, + devMat->avgNzr,devMat->rows, (cuDoubleComplex *)x->v_, b, devMat->baseIndex); + + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int writeHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + HllDevice *devMat = (HllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + //i = writeEllDevice(deviceMat, (void *) val, ja, irn); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int writeHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + HllDevice *devMat = (HllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int writeHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + HllDevice *devMat = (HllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int writeHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + HllDevice *devMat = (HllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int readHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + HllDevice *devMat = (HllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + /*i = readEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int readHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + HllDevice *devMat = (HllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int readHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + HllDevice *devMat = (HllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int readHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; +#ifdef HAVE_SPGPU + HllDevice *devMat = (HllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +// New copy routines. + +int psiCopyCooToHlgFloat(int nr, int nc, int nza, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, int *ja, + float *val, void *deviceMat) +{ int i,j; +#ifdef HAVE_SPGPU + spgpuHandle_t handle; + HllDevice *devMat = (HllDevice *) deviceMat; + float *devVal; + int *devIdisp, *devJa; + int *tja; + //fprintf(stderr,"devMat: %p\n",devMat); + allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(float)); + + // fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength); + i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(float)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + //cudaSync(); + + handle = psb_gpuGetHandle(); + psi_cuda_s_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, + (int *) devMat->rS, (int *) devMat->hackOffs, + devIdisp,devJa,devVal, + (int *) devMat->diag, (int *) devMat->rP, (float *)devMat->cM); + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceFloat",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, int *ja, + double *val, void *deviceMat) +{ int i,j; +#ifdef HAVE_SPGPU + spgpuHandle_t handle; + HllDevice *devMat = (HllDevice *) deviceMat; + double *devVal; + int *devIdisp, *devJa; + int *tja; + //fprintf(stderr,"devMat: %p\n",devMat); + allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(double)); + + // fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength); + i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(double)); + //fprintf(stderr,"WriteRemoteBuffer val %d\n",i); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); + //fprintf(stderr,"WriteRemoteBuffer ja %d\n",i); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); + //fprintf(stderr,"WriteRemoteBuffer irn %d\n",i); + if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + //fprintf(stderr,"WriteRemoteBuffer hoffs %d\n",i); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + //fprintf(stderr,"WriteRemoteBuffer idisp %d\n",i); + //cudaSync(); + //fprintf(stderr," hacksz: %d \n",hacksz); + handle = psb_gpuGetHandle(); + psi_cuda_d_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, + (int *) devMat->rS, (int *) devMat->hackOffs, + devIdisp,devJa,devVal, + (int *) devMat->diag, (int *) devMat->rP, (double *)devMat->cM); + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceDouble",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, int *ja, + float complex *val, void *deviceMat) +{ int i,j; +#ifdef HAVE_SPGPU + spgpuHandle_t handle; + HllDevice *devMat = (HllDevice *) deviceMat; + float complex *devVal; + int *devIdisp, *devJa; + int *tja; + //fprintf(stderr,"devMat: %p\n",devMat); + allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuFloatComplex)); + + // fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength); + i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuFloatComplex)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + //cudaSync(); + + handle = psb_gpuGetHandle(); + psi_cuda_c_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, + (int *) devMat->rS, (int *) devMat->hackOffs, + devIdisp,devJa,devVal, + (int *) devMat->diag,(int *) devMat->rP, (float complex *)devMat->cM); + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceFloatComplex",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + +int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, int *ja, + double complex *val, void *deviceMat) +{ int i,j; +#ifdef HAVE_SPGPU + spgpuHandle_t handle; + HllDevice *devMat = (HllDevice *) deviceMat; + double complex *devVal; + int *devIdisp, *devJa; + int *tja; + //fprintf(stderr,"devMat: %p\n",devMat); + allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuDoubleComplex)); + + // fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength); + i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuDoubleComplex)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + //cudaSync(); + + handle = psb_gpuGetHandle(); + psi_cuda_z_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, + (int *) devMat->rS, (int *) devMat->hackOffs, + devIdisp,devJa,devVal, + (int *) devMat->diag,(int *) devMat->rP, (double complex *)devMat->cM); + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceDoubleComplex",i); + } + return SPGPU_SUCCESS; +#else + return SPGPU_UNSUPPORTED; +#endif +} + + + + + +#endif diff --git a/gpu/hlldev.h b/gpu/hlldev.h new file mode 100644 index 00000000..478ad86e --- /dev/null +++ b/gpu/hlldev.h @@ -0,0 +1,161 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#ifndef _HLLDEV_H_ +#define _HLLDEV_H_ + +#ifdef HAVE_SPGPU +#include "cintrf.h" +#include "hell.h" + + +typedef struct hlldevice +{ + // Compressed matrix + void *cM; //it can be float or double + + // row pointers (same size of cM) + int *rP; + + // row size and diagonal position + int *rS; + int *diag; + + int *hackOffs; + + int rows; + int avgNzr; + int hackOffsLength; + int nzt; + int hackSize; //must be multiple of 32 + + //matrix size (uncompressed) + //int rows; + //int columns; + + //allocation size + int allocsize; + + /*(i.e. 0 for C, 1 for Fortran)*/ + int baseIndex; +} HllDevice; + +typedef struct hlldeviceparams +{ + + unsigned int elementType; + + unsigned int hackSize; + + // Number of rows. + // Used to allocate rS array + unsigned int rows; + unsigned int avgNzr; + unsigned int nzt; + //unsigned int hackOffsLength; + + // Number of columns. + // Used for error-checking + // unsigned int columns; + + unsigned int allocsize; + + // First index (e.g 0 or 1) + unsigned int firstIndex; + +} HllDeviceParams; + + +HllDeviceParams bldHllDeviceParams(unsigned int hksize, unsigned int rows, unsigned int nzeros, + unsigned int allocsize, + unsigned int elementType, unsigned int firstIndex); +int getHllDeviceParams(HllDevice* mat, int *hksize, int *rows, int *nzeros, + int *allocsize, int *hackOffsLength, int *firstIndex, int *avgnzr); +int FallocHllDevice(void** deviceMat,unsigned int hksize, unsigned int rows, unsigned int nzeros, + unsigned int allocsize, unsigned int elementType, unsigned int firstIndex); +int allocHllDevice(void ** remoteMatrix, HllDeviceParams* params); +void freeHllDevice(void* remoteMatrix); +int writeHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag); +int writeHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag); +int writeHllDeviceFloatComplex(void* deviceMat, float complex* val, + int* ja, int *hkoffs, int* irn, int *idiag); +int writeHllDeviceDoubleComplex(void* deviceMat, double complex* val, + int* ja, int *hkoffs, int* irn, int *idiag); +int readHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag); +int readHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag); +int readHllDeviceFloatComplex(void* deviceMat, float complex* val, + int* ja, int *hkoffs, int* irn, int *idiag); +int readHllDeviceDoubleComplex(void* deviceMat, double complex* val, + int* ja, int *hkoffs, int* irn, int *idiag); + + +int psiCopyCooToHlgFloat(int nr, int nc, int nza, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, int *ja, + float *val, void *deviceMat); +int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, int *ja, + double *val, void *deviceMat); +int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz, + int noffs, int isz, int *irn, + int *hoffs, int *idisp, int *ja, + float complex *val, void *deviceMat); +int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz, + int noffs, int isz, int *irn, + int *hoffs, int *idisp, int *ja, + double complex *val, void *deviceMat); + +int psi_cuda_s_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza, + int baseIdx, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, + int *ja, float *val, + int *idiag, int *rP, float *cM); +int psi_cuda_d_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza, + int baseIdx, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, + int *ja, double *val, + int *idiag, int *rP, double *cM); +int psi_cuda_c_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza, + int baseIdx, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, + int *ja, float complex *val, + int *idiag, int *rP, float complex *cM); +int psi_cuda_z_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza, + int baseIdx, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, + int *ja, double complex *val, + int *idiag, int *rP, double complex *cM); + + +#else +#define CINTRF_UNSUPPORTED -1 +#endif + +#endif diff --git a/gpu/hlldev_mod.F90 b/gpu/hlldev_mod.F90 new file mode 100644 index 00000000..4eaa5ce0 --- /dev/null +++ b/gpu/hlldev_mod.F90 @@ -0,0 +1,273 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module hlldev_mod + use iso_c_binding + use core_mod + + type, bind(c) :: hlldev_parms + integer(c_int) :: element_type + integer(c_int) :: hackSize + integer(c_int) :: rows + integer(c_int) :: avgNzr + integer(c_int) :: allocsize + integer(c_int) :: firstIndex + end type hlldev_parms + +#ifdef HAVE_SPGPU + + interface + function bldHllDeviceParams(hksize, rows, nzeros, allocsize, elementType, firstIndex) & + & result(res) bind(c,name='bldHllDeviceParams') + use iso_c_binding + import :: hlldev_parms + type(hlldev_parms) :: res + integer(c_int), value :: hksize,rows,nzeros,allocsize,elementType,firstIndex + end function BldHllDeviceParams + end interface + + interface + function getHllDeviceParams(deviceMat,hksize, rows, nzeros, allocsize,& + & hackOffsLength, firstIndex,avgnzr) & + & result(res) bind(c,name='getHllDeviceParams') + use iso_c_binding + import :: hlldev_parms + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr + end function GetHllDeviceParams + end interface + + + interface + function FallocHllDevice(deviceMat,hksize,rows, nzeros,allocsize, & + & elementType,firstIndex) & + & result(res) bind(c,name='FallocHllDevice') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: hksize,rows,nzeros,allocsize,elementType,firstIndex + type(c_ptr) :: deviceMat + end function FallocHllDevice + end interface + + + interface writeHllDevice + + function writeHllDeviceFloat(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='writeHllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_float) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function writeHllDeviceFloat + + function writeHllDeviceDouble(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='writeHllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_double) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function writeHllDeviceDouble + + function writeHllDeviceFloatComplex(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='writeHllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + complex(c_float_complex) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function writeHllDeviceFloatComplex + + function writeHllDeviceDoubleComplex(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='writeHllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + complex(c_double_complex) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function writeHllDeviceDoubleComplex + + end interface + + interface readHllDevice + + function readHllDeviceFloat(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='readHllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_float) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function readHllDeviceFloat + + function readHllDeviceDouble(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='readHllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_double) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function readHllDeviceDouble + + function readHllDeviceFloatComplex(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='readHllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + complex(c_float_complex) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function readHllDeviceFloatComplex + + function readHllDeviceDoubleComplex(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='readHllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + complex(c_double_complex) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function readHllDeviceDoubleComplex + + end interface + + interface + subroutine freeHllDevice(deviceMat) & + & bind(c,name='freeHllDevice') + use iso_c_binding + type(c_ptr), value :: deviceMat + end subroutine freeHllDevice + end interface + + + interface psi_CopyCooToHlg + function psiCopyCooToHlgFloat(nr, nc, nza, hacksz, noffs, isz, irn, & + & hoffs, idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToHlgFloat') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz + type(c_ptr), value :: deviceMat + real(c_float) :: val(*) + integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*) + end function psiCopyCooToHlgFloat + function psiCopyCooToHlgDouble(nr, nc, nza, hacksz, noffs, isz, irn, & + & hoffs, idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToHlgDouble') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz + type(c_ptr), value :: deviceMat + real(c_double) :: val(*) + integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*) + end function psiCopyCooToHlgDouble + function psiCopyCooToHlgFloatComplex(nr, nc, nza, hacksz, noffs, isz, irn, & + & hoffs, idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToHlgFloatComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz + type(c_ptr), value :: deviceMat + complex(c_float_complex) :: val(*) + integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*) + end function psiCopyCooToHlgFloatComplex + function psiCopyCooToHlgDoubleComplex(nr, nc, nza, hacksz, noffs, isz, irn, & + & hoffs, idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToHlgDoubleComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz + type(c_ptr), value :: deviceMat + complex(c_double_complex) :: val(*) + integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*) + end function psiCopyCooToHlgDoubleComplex + end interface + + + !interface + ! function getHllDevicePitch(deviceMat) & + ! & bind(c,name='getHllDevicePitch') result(res) + ! use iso_c_binding + ! type(c_ptr), value :: deviceMat + ! integer(c_int) :: res + ! end function getHllDevicePitch + !end interface + + !interface + ! function getHllDeviceMaxRowSize(deviceMat) & + ! & bind(c,name='getHllDeviceMaxRowSize') result(res) + ! use iso_c_binding + ! type(c_ptr), value :: deviceMat + ! integer(c_int) :: res + ! end function getHllDeviceMaxRowSize + !end interface + + interface spmvHllDevice + + function spmvHllDeviceFloat(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvHllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_float),value :: alpha, beta + end function spmvHllDeviceFloat + + function spmvHllDeviceDouble(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvHllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_double),value :: alpha, beta + end function spmvHllDeviceDouble + + function spmvHllDeviceFloatComplex(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvHllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_float_complex),value :: alpha, beta + end function spmvHllDeviceFloatComplex + + function spmvHllDeviceDoubleComplex(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvHllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_double_complex),value :: alpha, beta + end function spmvHllDeviceDoubleComplex + + end interface + +#endif + + +end module hlldev_mod diff --git a/gpu/impl/Makefile b/gpu/impl/Makefile new file mode 100755 index 00000000..158066f2 --- /dev/null +++ b/gpu/impl/Makefile @@ -0,0 +1,294 @@ +include ../../Make.inc +LIBDIR=../../lib +INCDIR=../../include +MODDIR=../../modules +PSBLAS_LIB= -L$(PSBLIBDIR) -lpsb_util -lpsb_base +#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +LDLIBS=$(PSBLDLIBS) +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG).. +CINCLUDES=-I$(GPU_INCDIR) -I$(CUDA_INCDIR) +LIBNAME=libpsb_gpu.a + +OBJS= \ +psb_d_cp_csrg_from_coo.o \ +psb_d_cp_csrg_from_fmt.o \ +psb_d_cp_elg_from_coo.o \ +psb_d_cp_elg_from_fmt.o \ +psb_s_cp_csrg_from_coo.o \ +psb_s_cp_csrg_from_fmt.o \ +psb_s_csrg_allocate_mnnz.o \ +psb_s_csrg_csmm.o \ +psb_s_csrg_csmv.o \ +psb_s_csrg_mold.o \ +psb_s_csrg_reallocate_nz.o \ +psb_s_csrg_scal.o \ +psb_s_csrg_scals.o \ +psb_s_csrg_from_gpu.o \ +psb_s_csrg_to_gpu.o \ +psb_s_csrg_vect_mv.o \ +psb_s_csrg_inner_vect_sv.o \ +psb_d_csrg_allocate_mnnz.o \ +psb_d_csrg_csmm.o \ +psb_d_csrg_csmv.o \ +psb_d_csrg_mold.o \ +psb_d_csrg_reallocate_nz.o \ +psb_d_csrg_scal.o \ +psb_d_csrg_scals.o \ +psb_d_csrg_from_gpu.o \ +psb_d_csrg_to_gpu.o \ +psb_d_csrg_vect_mv.o \ +psb_d_csrg_inner_vect_sv.o \ +psb_d_elg_allocate_mnnz.o \ +psb_d_elg_asb.o \ +psb_d_elg_csmm.o \ +psb_d_elg_csmv.o \ +psb_d_elg_csput.o \ +psb_d_elg_from_gpu.o \ +psb_d_elg_inner_vect_sv.o \ +psb_d_elg_mold.o \ +psb_d_elg_reallocate_nz.o \ +psb_d_elg_scal.o \ +psb_d_elg_scals.o \ +psb_d_elg_to_gpu.o \ +psb_d_elg_vect_mv.o \ +psb_d_mv_csrg_from_coo.o \ +psb_d_mv_csrg_from_fmt.o \ +psb_d_mv_elg_from_coo.o \ +psb_d_mv_elg_from_fmt.o \ +psb_s_mv_csrg_from_coo.o \ +psb_s_mv_csrg_from_fmt.o \ +psb_s_cp_elg_from_coo.o \ +psb_s_cp_elg_from_fmt.o \ +psb_s_elg_allocate_mnnz.o \ +psb_s_elg_asb.o \ +psb_s_elg_csmm.o \ +psb_s_elg_csmv.o \ +psb_s_elg_csput.o \ +psb_s_elg_inner_vect_sv.o \ +psb_s_elg_mold.o \ +psb_s_elg_reallocate_nz.o \ +psb_s_elg_scal.o \ +psb_s_elg_scals.o \ +psb_s_elg_to_gpu.o \ +psb_s_elg_from_gpu.o \ +psb_s_elg_vect_mv.o \ +psb_s_mv_elg_from_coo.o \ +psb_s_mv_elg_from_fmt.o \ +psb_s_cp_hlg_from_fmt.o \ +psb_s_cp_hlg_from_coo.o \ +psb_d_cp_hlg_from_fmt.o \ +psb_d_cp_hlg_from_coo.o \ +psb_d_hlg_allocate_mnnz.o \ +psb_d_hlg_csmm.o \ +psb_d_hlg_csmv.o \ +psb_d_hlg_inner_vect_sv.o \ +psb_d_hlg_mold.o \ +psb_d_hlg_reallocate_nz.o \ +psb_d_hlg_scal.o \ +psb_d_hlg_scals.o \ +psb_d_hlg_from_gpu.o \ +psb_d_hlg_to_gpu.o \ +psb_d_hlg_vect_mv.o \ +psb_s_hlg_allocate_mnnz.o \ +psb_s_hlg_csmm.o \ +psb_s_hlg_csmv.o \ +psb_s_hlg_inner_vect_sv.o \ +psb_s_hlg_mold.o \ +psb_s_hlg_reallocate_nz.o \ +psb_s_hlg_scal.o \ +psb_s_hlg_scals.o \ +psb_s_hlg_from_gpu.o \ +psb_s_hlg_to_gpu.o \ +psb_s_hlg_vect_mv.o \ +psb_s_mv_hlg_from_coo.o \ +psb_s_cp_hlg_from_coo.o \ +psb_s_mv_hlg_from_fmt.o \ +psb_d_mv_hlg_from_coo.o \ +psb_d_cp_hlg_from_coo.o \ +psb_d_mv_hlg_from_fmt.o \ +psb_s_hybg_allocate_mnnz.o \ +psb_s_hybg_csmm.o \ +psb_s_hybg_csmv.o \ +psb_s_hybg_reallocate_nz.o \ +psb_s_hybg_scal.o \ +psb_s_hybg_scals.o \ +psb_s_hybg_to_gpu.o \ +psb_s_hybg_vect_mv.o \ +psb_s_hybg_inner_vect_sv.o \ +psb_s_cp_hybg_from_coo.o \ +psb_s_cp_hybg_from_fmt.o \ +psb_s_mv_hybg_from_fmt.o \ +psb_s_mv_hybg_from_coo.o \ +psb_s_hybg_mold.o \ +psb_d_hybg_allocate_mnnz.o \ +psb_d_hybg_csmm.o \ +psb_d_hybg_csmv.o \ +psb_d_hybg_reallocate_nz.o \ +psb_d_hybg_scal.o \ +psb_d_hybg_scals.o \ +psb_d_hybg_to_gpu.o \ +psb_d_hybg_vect_mv.o \ +psb_d_hybg_inner_vect_sv.o \ +psb_d_cp_hybg_from_coo.o \ +psb_d_cp_hybg_from_fmt.o \ +psb_d_mv_hybg_from_fmt.o \ +psb_d_mv_hybg_from_coo.o \ +psb_d_hybg_mold.o \ +psb_z_cp_csrg_from_coo.o \ +psb_z_cp_csrg_from_fmt.o \ +psb_z_cp_elg_from_coo.o \ +psb_z_cp_elg_from_fmt.o \ +psb_c_cp_csrg_from_coo.o \ +psb_c_cp_csrg_from_fmt.o \ +psb_c_csrg_allocate_mnnz.o \ +psb_c_csrg_csmm.o \ +psb_c_csrg_csmv.o \ +psb_c_csrg_mold.o \ +psb_c_csrg_reallocate_nz.o \ +psb_c_csrg_scal.o \ +psb_c_csrg_scals.o \ +psb_c_csrg_from_gpu.o \ +psb_c_csrg_to_gpu.o \ +psb_c_csrg_vect_mv.o \ +psb_c_csrg_inner_vect_sv.o \ +psb_z_csrg_allocate_mnnz.o \ +psb_z_csrg_csmm.o \ +psb_z_csrg_csmv.o \ +psb_z_csrg_mold.o \ +psb_z_csrg_reallocate_nz.o \ +psb_z_csrg_scal.o \ +psb_z_csrg_scals.o \ +psb_z_csrg_from_gpu.o \ +psb_z_csrg_to_gpu.o \ +psb_z_csrg_vect_mv.o \ +psb_z_csrg_inner_vect_sv.o \ +psb_z_elg_allocate_mnnz.o \ +psb_z_elg_asb.o \ +psb_z_elg_csmm.o \ +psb_z_elg_csmv.o \ +psb_z_elg_csput.o \ +psb_z_elg_inner_vect_sv.o \ +psb_z_elg_mold.o \ +psb_z_elg_reallocate_nz.o \ +psb_z_elg_scal.o \ +psb_z_elg_scals.o \ +psb_z_elg_to_gpu.o \ +psb_z_elg_from_gpu.o \ +psb_z_elg_vect_mv.o \ +psb_z_mv_csrg_from_coo.o \ +psb_z_mv_csrg_from_fmt.o \ +psb_z_mv_elg_from_coo.o \ +psb_z_mv_elg_from_fmt.o \ +psb_c_mv_csrg_from_coo.o \ +psb_c_mv_csrg_from_fmt.o \ +psb_c_cp_elg_from_coo.o \ +psb_c_cp_elg_from_fmt.o \ +psb_c_elg_allocate_mnnz.o \ +psb_c_elg_asb.o \ +psb_c_elg_csmm.o \ +psb_c_elg_csmv.o \ +psb_c_elg_csput.o \ +psb_c_elg_inner_vect_sv.o \ +psb_c_elg_mold.o \ +psb_c_elg_reallocate_nz.o \ +psb_c_elg_scal.o \ +psb_c_elg_scals.o \ +psb_c_elg_to_gpu.o \ +psb_c_elg_from_gpu.o \ +psb_c_elg_vect_mv.o \ +psb_c_mv_elg_from_coo.o \ +psb_c_mv_elg_from_fmt.o \ +psb_c_cp_hlg_from_fmt.o \ +psb_c_cp_hlg_from_coo.o \ +psb_z_cp_hlg_from_fmt.o \ +psb_z_cp_hlg_from_coo.o \ +psb_z_hlg_allocate_mnnz.o \ +psb_z_hlg_csmm.o \ +psb_z_hlg_csmv.o \ +psb_z_hlg_inner_vect_sv.o \ +psb_z_hlg_mold.o \ +psb_z_hlg_reallocate_nz.o \ +psb_z_hlg_scal.o \ +psb_z_hlg_scals.o \ +psb_z_hlg_from_gpu.o \ +psb_z_hlg_to_gpu.o \ +psb_z_hlg_vect_mv.o \ +psb_c_hlg_allocate_mnnz.o \ +psb_c_hlg_csmm.o \ +psb_c_hlg_csmv.o \ +psb_c_hlg_inner_vect_sv.o \ +psb_c_hlg_mold.o \ +psb_c_hlg_reallocate_nz.o \ +psb_c_hlg_scal.o \ +psb_c_hlg_scals.o \ +psb_c_hlg_from_gpu.o \ +psb_c_hlg_to_gpu.o \ +psb_c_hlg_vect_mv.o \ +psb_c_mv_hlg_from_coo.o \ +psb_c_cp_hlg_from_coo.o \ +psb_c_mv_hlg_from_fmt.o \ +psb_z_mv_hlg_from_coo.o \ +psb_z_cp_hlg_from_coo.o \ +psb_z_mv_hlg_from_fmt.o \ +psb_c_hybg_allocate_mnnz.o \ +psb_c_hybg_csmm.o \ +psb_c_hybg_csmv.o \ +psb_c_hybg_reallocate_nz.o \ +psb_c_hybg_scal.o \ +psb_c_hybg_scals.o \ +psb_c_hybg_to_gpu.o \ +psb_c_hybg_vect_mv.o \ +psb_c_hybg_inner_vect_sv.o \ +psb_c_cp_hybg_from_coo.o \ +psb_c_cp_hybg_from_fmt.o \ +psb_c_mv_hybg_from_fmt.o \ +psb_c_mv_hybg_from_coo.o \ +psb_c_hybg_mold.o \ +psb_z_hybg_allocate_mnnz.o \ +psb_z_hybg_csmm.o \ +psb_z_hybg_csmv.o \ +psb_z_hybg_reallocate_nz.o \ +psb_z_hybg_scal.o \ +psb_z_hybg_scals.o \ +psb_z_hybg_to_gpu.o \ +psb_z_hybg_vect_mv.o \ +psb_z_hybg_inner_vect_sv.o \ +psb_z_cp_hybg_from_coo.o \ +psb_z_cp_hybg_from_fmt.o \ +psb_z_mv_hybg_from_fmt.o \ +psb_z_mv_hybg_from_coo.o \ +psb_z_hybg_mold.o \ +psb_d_cp_diag_from_coo.o \ +psb_d_mv_diag_from_coo.o \ +psb_d_diag_to_gpu.o \ +psb_d_diag_csmv.o \ +psb_d_diag_mold.o \ +psb_d_diag_vect_mv.o \ +psb_d_cp_hdiag_from_coo.o \ +psb_d_mv_hdiag_from_coo.o \ +psb_d_hdiag_to_gpu.o \ +psb_d_hdiag_csmv.o \ +psb_d_hdiag_mold.o \ +psb_d_hdiag_vect_mv.o \ +psb_s_cp_hdiag_from_coo.o \ +psb_s_mv_hdiag_from_coo.o \ +psb_s_hdiag_to_gpu.o \ +psb_s_hdiag_csmv.o \ +psb_s_hdiag_mold.o \ +psb_s_hdiag_vect_mv.o \ +psb_s_dnsg_mat_impl.o \ +psb_d_dnsg_mat_impl.o \ +psb_c_dnsg_mat_impl.o \ +psb_z_dnsg_mat_impl.o + + +objs: $(OBJS) +lib: objs + ar cur ../$(LIBNAME) $(OBJS) + +clean: + /bin/rm -f $(OBJS) diff --git a/gpu/impl/psb_c_cp_csrg_from_coo.F90 b/gpu/impl/psb_c_cp_csrg_from_coo.F90 new file mode 100644 index 00000000..9ab3b7f0 --- /dev/null +++ b/gpu/impl/psb_c_cp_csrg_from_coo.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cp_csrg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_cp_csrg_from_coo +#else + use psb_c_csrg_mat_mod +#endif + implicit none + + class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_c_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cp_csrg_from_coo diff --git a/gpu/impl/psb_c_cp_csrg_from_fmt.F90 b/gpu/impl/psb_c_cp_csrg_from_fmt.F90 new file mode 100644 index 00000000..5229244f --- /dev/null +++ b/gpu/impl/psb_c_cp_csrg_from_fmt.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cp_csrg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_cp_csrg_from_fmt +#else + use psb_c_csrg_mat_mod +#endif + !use iso_c_binding + implicit none + + class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + select type(b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select + +end subroutine psb_c_cp_csrg_from_fmt diff --git a/gpu/impl/psb_c_cp_diag_from_coo.F90 b/gpu/impl/psb_c_cp_diag_from_coo.F90 new file mode 100644 index 00000000..8d196891 --- /dev/null +++ b/gpu/impl/psb_c_cp_diag_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_diag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_c_diag_mat_mod, psb_protect_name => psb_c_cp_diag_from_coo +#else + use psb_c_diag_mat_mod +#endif + implicit none + + class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + call a%psb_c_dia_sparse_mat%cp_from_coo(b,info) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cp_diag_from_coo diff --git a/gpu/impl/psb_c_cp_elg_from_coo.F90 b/gpu/impl/psb_c_cp_elg_from_coo.F90 new file mode 100644 index 00000000..95193c13 --- /dev/null +++ b/gpu/impl/psb_c_cp_elg_from_coo.F90 @@ -0,0 +1,184 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_elg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_cp_elg_from_coo + use psi_ext_util_mod + use psb_gpu_env_mod +#else + use psb_c_elg_mat_mod +#endif + implicit none + + class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, ld, ldv, hacksize + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + type(psb_c_coo_sparse_mat) :: tmp + integer(psb_ipk_), allocatable :: idisp(:) + + info = psb_success_ +#ifdef HAVE_SPGPU + hacksize = max(1,psb_gpu_WarpSize()) +#else + hacksize = 1 +#endif + if (b%is_dev()) call b%sync() + + if (b%is_by_rows()) then + +#ifdef HAVE_SPGPU + call psi_c_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,b%ja,b%val, a%deviceMat) + call a%set_dev() +#else + + call psi_c_convert_ell_from_coo(a,b,info,hacksize=hacksize) + call a%set_host() +#endif + + else + call b%cp_to_coo(tmp,info) +#ifdef HAVE_SPGPU + call psi_c_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,tmp%ja,tmp%val, a%deviceMat) + + call a%set_dev() +#else + + call psi_c_convert_ell_from_coo(a,tmp,info,hacksize=hacksize) + call a%set_host() +#endif + end if + + if (info /= psb_success_) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine psi_c_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize) + + use psb_base_mod + use psi_ext_util_mod + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(out) :: info, nzm, ldv + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, & + & ir, ic, hsz_ + real(psb_dpk_) :: t0,t1 + logical, parameter :: timing=.true. + + + info = psb_success_ + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == psb_success_) call psb_realloc(nr+1,idisp,info) + if (info /= psb_success_) return + if (timing) t0=psb_wtime() + + a%irn = 0 + do i=1, nza + ir = b%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + idisp(1) = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + idisp(i+1) = a%nzt + end do + + end subroutine psi_c_count_ell_from_coo + +end subroutine psb_c_cp_elg_from_coo diff --git a/gpu/impl/psb_c_cp_elg_from_fmt.F90 b/gpu/impl/psb_c_cp_elg_from_fmt.F90 new file mode 100644 index 00000000..e8be8a8d --- /dev/null +++ b/gpu/impl/psb_c_cp_elg_from_fmt.F90 @@ -0,0 +1,101 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_elg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_cp_elg_from_fmt +#else + use psb_c_elg_mat_mod +#endif + implicit none + + class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_c_ell_sparse_mat) + nzm = psb_size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() +#ifdef HAVE_SPGPU + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize +#else + ld = m +#endif + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + end if + a%nzt = nza +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_cp_elg_from_fmt diff --git a/gpu/impl/psb_c_cp_hdiag_from_coo.F90 b/gpu/impl/psb_c_cp_hdiag_from_coo.F90 new file mode 100644 index 00000000..f0ec00ad --- /dev/null +++ b/gpu/impl/psb_c_cp_hdiag_from_coo.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hdiag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_cp_hdiag_from_coo + use psb_gpu_env_mod +#else + use psb_c_hdiag_mat_mod +#endif + implicit none + + class(psb_c_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + +#ifdef HAVE_SPGPU + a%hacksize = psb_gpu_WarpSize() +#endif + + call a%psb_c_hdia_sparse_mat%cp_from_coo(b,info) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cp_hdiag_from_coo diff --git a/gpu/impl/psb_c_cp_hlg_from_coo.F90 b/gpu/impl/psb_c_cp_hlg_from_coo.F90 new file mode 100644 index 00000000..cf305592 --- /dev/null +++ b/gpu/impl/psb_c_cp_hlg_from_coo.F90 @@ -0,0 +1,198 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hlg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_gpu_env_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_cp_hlg_from_coo +#else + use psb_c_hlg_mat_mod +#endif + implicit none + + class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + integer(psb_ipk_), allocatable :: idisp(:) + character(len=20) :: name='hll_from_coo' + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + integer(psb_ipk_), allocatable :: irn(:), ja(:), hko(:) + real(psb_dpk_), allocatable :: val(:) + logical, parameter :: debug=.false. + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() +#ifdef HAVE_SPGPU + hksz = max(1,psb_gpu_WarpSize()) +#else + hksz = psi_get_hksz() +#endif + + if (b%is_by_rows()) then + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + if (debug) write(0,*) 'Copying through GPU',nza + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,b%ja, b%val, a%deviceMat) + call a%set_dev() + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + if (debug) write(0,*) 'Copying through GPU' + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,tmp,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,tmp%ja, tmp%val, a%deviceMat) + + call tmp%free() + call a%set_dev() + end if + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + subroutine psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + use psb_base_mod + use psi_ext_util_mod + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info, noffs, isz + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, kc + logical, parameter :: debug=.false. + + info = 0 + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + if (debug) write(0,*) 'Start compute hckoff_from_coo',nr,nc,nza + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == 0) call psb_realloc(nr+1,idisp,info) + if (info /= 0) return + a%irn = 0 + if (debug) then + do i=1, nza + if ((1<=b%ia(i)).and.(b%ia(i)<= nr)) then + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + else + write(0,*) 'Out of bouds IA ',i,b%ia(i),nr + end if + end do + else + do i=1, nza + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + end do + end if + a%nzt = nza + + + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (debug) write(0,*) ' noffsets ',noffs,info + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + idisp(1) = 0 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + idisp(i+1) = idisp(i) + a%irn(i) + do k=1,ir-1 + idisp(i+k+1) = idisp(i+k) + a%irn(i+k) + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) +!!$ write(*,*) 'End of psi_comput_hckoff ',info + end subroutine psi_compute_hckoff_from_coo + +end subroutine psb_c_cp_hlg_from_coo diff --git a/gpu/impl/psb_c_cp_hlg_from_fmt.F90 b/gpu/impl/psb_c_cp_hlg_from_fmt.F90 new file mode 100644 index 00000000..559c501c --- /dev/null +++ b/gpu/impl/psb_c_cp_hlg_from_fmt.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hlg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_cp_hlg_from_fmt +#else + use psb_c_hlg_mat_mod +#endif + implicit none + + class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_c_hll_sparse_mat%cp_from_fmt(b,info) +#ifdef HAVE_SPGPU + if (info == 0) call a%to_gpu(info) +#endif + end select + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cp_hlg_from_fmt diff --git a/gpu/impl/psb_c_cp_hybg_from_coo.F90 b/gpu/impl/psb_c_cp_hybg_from_coo.F90 new file mode 100644 index 00000000..00a7d4ee --- /dev/null +++ b/gpu/impl/psb_c_cp_hybg_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cp_hybg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_hybg_mat_mod, psb_protect_name => psb_c_cp_hybg_from_coo +#else + use psb_c_hybg_mat_mod +#endif + implicit none + + class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_c_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cp_hybg_from_coo +#endif diff --git a/gpu/impl/psb_c_cp_hybg_from_fmt.F90 b/gpu/impl/psb_c_cp_hybg_from_fmt.F90 new file mode 100644 index 00000000..643abf99 --- /dev/null +++ b/gpu/impl/psb_c_cp_hybg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cp_hybg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_hybg_mat_mod, psb_protect_name => psb_c_cp_hybg_from_fmt +#else + use psb_c_hybg_mat_mod +#endif + implicit none + + class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select + +end subroutine psb_c_cp_hybg_from_fmt +#endif diff --git a/gpu/impl/psb_c_csrg_allocate_mnnz.F90 b/gpu/impl/psb_c_csrg_allocate_mnnz.F90 new file mode 100644 index 00000000..2183ee63 --- /dev/null +++ b/gpu/impl/psb_c_csrg_allocate_mnnz.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_csrg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_allocate_mnnz +#else + use psb_c_csrg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_c_csr_sparse_mat%allocate(m,n,nz) + +#ifdef HAVE_SPGPU + info = initFcusparse() + if (info == 0) call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_csrg_allocate_mnnz diff --git a/gpu/impl/psb_c_csrg_csmm.F90 b/gpu/impl/psb_c_csrg_csmm.F90 new file mode 100644 index 00000000..cef5d288 --- /dev/null +++ b/gpu/impl/psb_c_csrg_csmm.F90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_csrg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_csmm +#else + use psb_c_csrg_mat_mod +#endif + implicit none + class(psb_c_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_spk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csrg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_csrg_csmv +#else + use psb_c_csrg_mat_mod +#endif + implicit none + class(psb_c_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_csrg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_csrg_from_gpu +#else + use psb_c_csrg_mat_mod +#endif + implicit none + class(psb_c_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, n, nz + + info = 0 + +#ifdef HAVE_SPGPU + if (.not.(c_associated(a%deviceMat%mat))) then + call a%free() + return + end if + + info = CSRGDeviceGetParms(a%deviceMat,m,n,nz) + if (info /= psb_success_) return + + if (info == 0) call psb_realloc(m+1,a%irp,info) + if (info == 0) call psb_realloc(nz,a%ja,info) + if (info == 0) call psb_realloc(nz,a%val,info) + if (info == 0) info = & + & CSRGDevice2Host(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) +#if (CUDA_SHORT_VERSION <= 10) || (CUDA_VERSION < 11030) + a%irp(:) = a%irp(:)+1 + a%ja(:) = a%ja(:)+1 +#endif + + call a%set_sync() +#endif + +end subroutine psb_c_csrg_from_gpu diff --git a/gpu/impl/psb_c_csrg_inner_vect_sv.F90 b/gpu/impl/psb_c_csrg_inner_vect_sv.F90 new file mode 100644 index 00000000..39938752 --- /dev/null +++ b/gpu/impl/psb_c_csrg_inner_vect_sv.F90 @@ -0,0 +1,136 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_inner_vect_sv +#else + use psb_c_csrg_mat_mod +#endif + use psb_c_gpu_vect_mod + implicit none + class(psb_c_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_csrg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + +#ifdef HAVE_SPGPU + if (tra.or.(beta/=dzero)) then + call x%sync() + call y%sync() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_c_vect_gpu) + select type(yy => y) + type is (psb_c_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call x%sync() + call y%sync() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() +#endif + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='csrg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_csrg_inner_vect_sv diff --git a/gpu/impl/psb_c_csrg_mold.F90 b/gpu/impl/psb_c_csrg_mold.F90 new file mode 100644 index 00000000..8b1b616a --- /dev/null +++ b/gpu/impl/psb_c_csrg_mold.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_csrg_mold(a,b,info) + + use psb_base_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_mold + implicit none + class(psb_c_csrg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='csrg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_csrg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_csrg_mold diff --git a/gpu/impl/psb_c_csrg_reallocate_nz.F90 b/gpu/impl/psb_c_csrg_reallocate_nz.F90 new file mode 100644 index 00000000..e9db4128 --- /dev/null +++ b/gpu/impl/psb_c_csrg_reallocate_nz.F90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_csrg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_reallocate_nz +#else + use psb_c_csrg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_csrg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_c_csr_sparse_mat%reallocate(nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_csrg_reallocate_nz diff --git a/gpu/impl/psb_c_csrg_scal.F90 b/gpu/impl/psb_c_csrg_scal.F90 new file mode 100644 index 00000000..f183a822 --- /dev/null +++ b/gpu/impl/psb_c_csrg_scal.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_csrg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_scal +#else + use psb_c_csrg_mat_mod +#endif + implicit none + class(psb_c_csrg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + call a%psb_c_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_csrg_scal diff --git a/gpu/impl/psb_c_csrg_scals.F90 b/gpu/impl/psb_c_csrg_scals.F90 new file mode 100644 index 00000000..13f0d707 --- /dev/null +++ b/gpu/impl/psb_c_csrg_scals.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_csrg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_scals +#else + use psb_c_csrg_mat_mod +#endif + implicit none + class(psb_c_csrg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + call a%psb_c_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_csrg_scals diff --git a/gpu/impl/psb_c_csrg_to_gpu.F90 b/gpu/impl/psb_c_csrg_to_gpu.F90 new file mode 100644 index 00000000..a04f1bab --- /dev/null +++ b/gpu/impl/psb_c_csrg_to_gpu.F90 @@ -0,0 +1,325 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_csrg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_to_gpu +#else + use psb_c_csrg_mat_mod +#endif + implicit none + class(psb_c_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + complex(psb_spk_), allocatable :: valdi(:) + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = CSRGDeviceFree(a%deviceMat) + end if +#if CUDA_SHORT_VERSION <= 10 + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = cone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = cone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceCsrsmAnalysis(a%deviceMat) + end if + +#elif CUDA_VERSION < 11030 + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) +!!$ write(0,*) 'Done deviceAlloc' + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_zero) +!!$ write(0,*) 'Done SetIndexBase' + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(0:nzdi),valdi(0:nzdi),stat=info) + if (info == 0) then + irpdi(1) = 0 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = cone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = cone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ write(0,*) 'Done deviceAlloc', info + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,& + & cusparse_index_base_zero) +!!$ write(0,*) 'Done setIndexBase', info + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + nzdi=a%irp(m+1)-1 + if (info == 0) allocate(irpdi(m+1),jadi(max(nzdi,1)),stat=info) + if (info == 0) then + irpdi(1:m+1) = a%irp(1:m+1) -1 + jadi(1:nzdi) = a%ja(1:nzdi) -1 + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,irpdi,jadi,a%val) +!!$ write(0,*) 'Done Host2Device', info + endif + + +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = cone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = cone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + +!!$ if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) +!!$ end if + +#endif + call a%set_sync() + + if (info /= 0) then + write(0,*) 'Error in CSRG_TO_GPU ',info + end if +#endif + +end subroutine psb_c_csrg_to_gpu diff --git a/gpu/impl/psb_c_csrg_vect_mv.F90 b/gpu/impl/psb_c_csrg_vect_mv.F90 new file mode 100644 index 00000000..0feb03fd --- /dev/null +++ b/gpu/impl/psb_c_csrg_vect_mv.F90 @@ -0,0 +1,125 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_vect_mv +#else + use psb_c_csrg_mat_mod +#endif + use psb_c_gpu_vect_mod + implicit none + class(psb_c_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_csrg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= czero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_c_vect_gpu) + select type(yy => y) + type is (psb_c_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= czero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_c_csrg_vect_mv diff --git a/gpu/impl/psb_c_diag_csmv.F90 b/gpu/impl/psb_c_diag_csmv.F90 new file mode 100644 index 00000000..05ca102f --- /dev/null +++ b/gpu/impl/psb_c_diag_csmv.F90 @@ -0,0 +1,136 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_diag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_c_diag_mat_mod, psb_protect_name => psb_c_diag_csmv +#else + use psb_c_diag_mat_mod +#endif + implicit none + class(psb_c_diag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='c_diag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_diag_mold + implicit none + class(psb_c_diag_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='diag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_diag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_diag_mold diff --git a/gpu/impl/psb_c_diag_to_gpu.F90 b/gpu/impl/psb_c_diag_to_gpu.F90 new file mode 100644 index 00000000..a60fc741 --- /dev/null +++ b/gpu/impl/psb_c_diag_to_gpu.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_diag_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_c_diag_mat_mod, psb_protect_name => psb_c_diag_to_gpu +#else + use psb_c_diag_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_c_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d +#ifdef HAVE_SPGPU + type(diagdev_parms) :: gpu_parms +#endif + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return + + n = size(a%data,1) + d = size(a%data,2) + c = a%get_ncols() + !allocsize = a%get_size() + !write(*,*) 'Create the DIAG matrix' + gpu_parms = FgetDiagDeviceParams(n,c,d,spgpu_type_complex_float) + if (c_associated(a%deviceMat)) then + call freeDiagDevice(a%deviceMat) + endif + info = FallocDiagDevice(a%deviceMat,n,c,d,spgpu_type_complex_float) + if (info == 0) info = & + & writeDiagDevice(a%deviceMat,a%data,a%offset,n) +! if (info /= 0) goto 9999 +#endif + +end subroutine psb_c_diag_to_gpu diff --git a/gpu/impl/psb_c_diag_vect_mv.F90 b/gpu/impl/psb_c_diag_vect_mv.F90 new file mode 100644 index 00000000..e680a737 --- /dev/null +++ b/gpu/impl/psb_c_diag_vect_mv.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_diag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_c_diag_mat_mod, psb_protect_name => psb_c_diag_vect_mv +#else + use psb_c_diag_mat_mod +#endif + use psb_c_gpu_vect_mod + implicit none + class(psb_c_diag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_diag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_c_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_c_vect_gpu) + select type(yy => y) + type is (psb_c_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + call a%psb_c_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_diag_vect_mv diff --git a/gpu/impl/psb_c_dnsg_mat_impl.F90 b/gpu/impl/psb_c_dnsg_mat_impl.F90 new file mode 100644 index 00000000..b70f383a --- /dev/null +++ b/gpu/impl/psb_c_dnsg_mat_impl.F90 @@ -0,0 +1,461 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_c_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_dnsg_vect_mv +#else + use psb_c_dnsg_mat_mod +#endif + implicit none + class(psb_c_dnsg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + logical :: tra + character :: trans_ + complex(psb_spk_), allocatable :: rx(:), ry(:) + Integer(Psb_ipk_) :: err_act, m, n, k + character(len=20) :: name='c_dnsg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (trans_ =='N') then + m = a%get_nrows() + n = 1 + k = a%get_ncols() + else + m = a%get_ncols() + n = 1 + k = a%get_nrows() + end if + select type (xx => x) + type is (psb_c_vect_gpu) + select type(yy => y) + type is (psb_c_vect_gpu) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= czero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDnsDevice(trans_,m,n,k,alpha,a%deviceMat,& + & xx%deviceVect,beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDnsDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_dnsg_vect_mv + + +subroutine psb_c_dnsg_mold(a,b,info) + use psb_base_mod + use psb_c_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_dnsg_mold +#else + use psb_c_dnsg_mat_mod +#endif + implicit none + class(psb_c_dnsg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dnsg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_dnsg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_dnsg_mold + + +!!$ +!!$ interface +!!$ subroutine psb_c_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_c_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type +!!$ class(psb_c_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_c_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_c_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_c_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_c_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_c_dnsg_allocate_mnnz +!!$ end interface + + +subroutine psb_c_dnsg_to_gpu(a,info) + use psb_base_mod + use psb_c_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_dnsg_to_gpu +#else + use psb_c_dnsg_mat_mod +#endif + class(psb_c_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act, pitch, lda + logical, parameter :: debug=.false. + character(len=20) :: name='c_dnsg_to_gpu' + + call psb_erractionsave(err_act) + info = psb_success_ +#ifdef HAVE_SPGPU + if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2) + info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),& + & spgpu_type_complex_float,1) + if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2)) + if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info + + +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_dnsg_to_gpu + + + +subroutine psb_c_cp_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_c_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_cp_dnsg_from_coo +#else + use psb_c_dnsg_mat_mod +#endif + implicit none + + class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_dnsg_cp_from_coo' + integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: debug=.false. + type(psb_c_coo_sparse_mat) :: tmp + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + call a%psb_c_dns_sparse_mat%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_cp_from_coo: dns_cp',info + if (info == 0) call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cp_dnsg_from_coo + +subroutine psb_c_cp_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_c_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_cp_dnsg_from_fmt +#else + use psb_c_dnsg_mat_mod +#endif + implicit none + + class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + type(psb_c_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + +!!$ class is (psb_c_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$#ifdef HAVE_SPGPU +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$#else +!!$ ld = m +!!$#endif +!!$ a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$#ifdef HAVE_SPGPU +!!$ call a%to_gpu(info) +!!$#endif + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cp_dnsg_from_fmt + + + +subroutine psb_c_mv_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_c_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_mv_dnsg_from_coo +#else + use psb_c_dnsg_mat_mod +#endif + implicit none + + class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + logical, parameter :: debug=.false. + character(len=20) :: name='c_dnsg_mv_from_coo' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_mv_from_coo: cp_from_coo:',info + call b%free() + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_mv_dnsg_from_coo + + +subroutine psb_c_mv_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_c_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_mv_dnsg_from_fmt +#else + use psb_c_dnsg_mat_mod +#endif + implicit none + class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + type(psb_c_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + +!!$ class is (psb_c_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$#ifdef HAVE_SPGPU +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$#else +!!$ ld = m +!!$#endif +!!$ a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$#ifdef HAVE_SPGPU +!!$ call a%to_gpu(info) +!!$#endif + + class default + + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_c_mv_dnsg_from_fmt diff --git a/gpu/impl/psb_c_elg_allocate_mnnz.F90 b/gpu/impl/psb_c_elg_allocate_mnnz.F90 new file mode 100644 index 00000000..ac9e654f --- /dev/null +++ b/gpu/impl/psb_c_elg_allocate_mnnz.F90 @@ -0,0 +1,113 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_elg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_allocate_mnnz +#else + use psb_c_elg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione,izero,izero,izero,izero/)) + goto 9999 + endif + +#ifdef HAVE_SPGPU + gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_complex_float,1) + ld = gpu_parms%pitch + nz_ = gpu_parms%maxRowSize +#else + ld = m +#endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_elg_allocate_mnnz diff --git a/gpu/impl/psb_c_elg_asb.f90 b/gpu/impl/psb_c_elg_asb.f90 new file mode 100644 index 00000000..f2a8c641 --- /dev/null +++ b/gpu/impl/psb_c_elg_asb.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_elg_asb(a) + + use psb_base_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_asb + implicit none + + class(psb_c_elg_sparse_mat), intent(inout) :: a + + integer(psb_ipk_) :: err_act, info + character(len=20) :: name='elg_asb' + logical :: clear_ + logical, parameter :: debug=.false. + real(psb_dpk_), allocatable :: valt(:,:) + integer(psb_ipk_), allocatable :: jat(:,:) + integer(psb_ipk_) :: nr, nc + + call psb_erractionsave(err_act) + info = psb_success_ + + ! Only call sync() if we are on host + if (a%is_host()) then + call a%sync() + end if + call a%set_asb() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_elg_asb diff --git a/gpu/impl/psb_c_elg_csmm.F90 b/gpu/impl/psb_c_elg_csmm.F90 new file mode 100644 index 00000000..5d355d88 --- /dev/null +++ b/gpu/impl/psb_c_elg_csmm.F90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_elg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_csmm +#else + use psb_c_elg_mat_mod +#endif + implicit none + class(psb_c_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_spk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_elg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_elg_csmv +#else + use psb_c_elg_mat_mod +#endif + implicit none + class(psb_c_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_elg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_elg_csput_a +#else + use psb_c_elg_mat_mod +#endif + implicit none + + class(psb_c_elg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_elg_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +!!$ write(0,*) 'In ELG_csput_a' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then +!!$ write(*,*) 'elg_csput_a ' + if (a%is_dev()) call a%sync() + call a%psb_c_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_elg_csput_a + + + +subroutine psb_c_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use iso_c_binding +#ifdef HAVE_SPGPU + use elldev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_csput_v + use psb_c_gpu_vect_mod +#else + use psb_c_elg_mat_mod +#endif + implicit none + + class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_elg_csput_v' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw + logical :: gpu_invoked + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + integer(psb_ipk_), allocatable :: idxs(:) + logical, parameter :: debug_idxs=.false., debug_vals=.false. + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +! write(0,*) 'In ELG_csput_v' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (ia%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (ja%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (val%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + + t1=psb_wtime() + gpu_invoked = .false. + select type (ia) + class is (psb_i_vect_gpu) + select type (ja) + class is (psb_i_vect_gpu) + select type (val) + class is (psb_c_vect_gpu) + if (a%is_host()) call a%sync() + if (val%is_host()) call val%sync() + if (ia%is_host()) call ia%sync() + if (ja%is_host()) call ja%sync() + info = csputEllDeviceFloatComplex(a%deviceMat,nz,& + & ia%deviceVect,ja%deviceVect,val%deviceVect) + call a%set_dev() + gpu_invoked=.true. + end select + end select + end select + if (.not.gpu_invoked) then +!!$ write(0,*)'Not gpu_invoked ' + if (a%is_dev()) call a%sync() + call a%psb_c_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + call a%set_host() + end if + + if (info /= 0) then + info = psb_err_internal_error_ + end if + + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_c_elg_csput_v diff --git a/gpu/impl/psb_c_elg_from_gpu.F90 b/gpu/impl/psb_c_elg_from_gpu.F90 new file mode 100644 index 00000000..eda65380 --- /dev/null +++ b/gpu/impl/psb_c_elg_from_gpu.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_elg_from_gpu(a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_from_gpu +#else + use psb_c_elg_mat_mod +#endif + implicit none + class(psb_c_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize + + info = 0 + +#ifdef HAVE_SPGPU + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + if (info == 0) call psb_realloc(pitch,a%irn,info) + end if + if (info == 0) info = & + & readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag) + call a%set_sync() +#endif + +end subroutine psb_c_elg_from_gpu diff --git a/gpu/impl/psb_c_elg_inner_vect_sv.F90 b/gpu/impl/psb_c_elg_inner_vect_sv.F90 new file mode 100644 index 00000000..97f0f7ff --- /dev/null +++ b/gpu/impl/psb_c_elg_inner_vect_sv.F90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_inner_vect_sv +#else + use psb_c_elg_mat_mod +#endif + use psb_c_gpu_vect_mod + implicit none + class(psb_c_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_elg_inner_vect_sv' + logical, parameter :: debug=.false. + complex(psb_spk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + if (a%is_dev()) call a%sync() + if (.false.) then + rx = x%get_vect() + ry = y%get_vect() + call a%inner_spsm(alpha,rx,beta,ry,info,trans) + call y%bld(ry) + else + call x%sync() + call y%sync() + call a%psb_c_ell_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + end if + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_elg_inner_vect_sv diff --git a/gpu/impl/psb_c_elg_mold.F90 b/gpu/impl/psb_c_elg_mold.F90 new file mode 100644 index 00000000..17cd2ce2 --- /dev/null +++ b/gpu/impl/psb_c_elg_mold.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_elg_mold(a,b,info) + + use psb_base_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_mold + implicit none + class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='elg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_elg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_elg_mold diff --git a/gpu/impl/psb_c_elg_reallocate_nz.F90 b/gpu/impl/psb_c_elg_reallocate_nz.F90 new file mode 100644 index 00000000..40d94d36 --- /dev/null +++ b/gpu/impl/psb_c_elg_reallocate_nz.F90 @@ -0,0 +1,79 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_elg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_reallocate_nz +#else + use psb_c_elg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_elg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + if (a%is_dev()) call a%sync() + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nzrm) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_elg_reallocate_nz diff --git a/gpu/impl/psb_c_elg_scal.F90 b/gpu/impl/psb_c_elg_scal.F90 new file mode 100644 index 00000000..63d9907e --- /dev/null +++ b/gpu/impl/psb_c_elg_scal.F90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_elg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_scal +#else + use psb_c_elg_mat_mod +#endif + implicit none + class(psb_c_elg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_c_ell_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_elg_scal diff --git a/gpu/impl/psb_c_elg_scals.F90 b/gpu/impl/psb_c_elg_scals.F90 new file mode 100644 index 00000000..b954e0a1 --- /dev/null +++ b/gpu/impl/psb_c_elg_scals.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_elg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_scals +#else + use psb_c_elg_mat_mod +#endif + implicit none + class(psb_c_elg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_elg_scals diff --git a/gpu/impl/psb_c_elg_to_gpu.F90 b/gpu/impl/psb_c_elg_to_gpu.F90 new file mode 100644 index 00000000..b967a59b --- /dev/null +++ b/gpu/impl/psb_c_elg_to_gpu.F90 @@ -0,0 +1,93 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_elg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_to_gpu +#else + use psb_c_elg_mat_mod +#endif + implicit none + class(psb_c_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + nzt = a%get_nzeros() + if (present(nzrm)) nzm = max(nzm,nzrm) + + gpu_parms = FgetEllDeviceParams(m,nzm,nzt,n,spgpu_type_complex_float,1) + + if (c_associated(a%deviceMat)) then + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + else + pitch = -1 + maxrowsize = -1 + end if + + if ((pitch /= gpu_parms%pitch).or.(maxrowsize /= gpu_parms%maxRowSize)) then + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + info = FallocEllDevice(a%deviceMat,m,nzm,nzt,n,spgpu_type_complex_float,1) + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + end if + if (info == 0) then + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + end if + end if + if (info == 0) info = & + & writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag) + call a%set_sync() +#endif + +end subroutine psb_c_elg_to_gpu diff --git a/gpu/impl/psb_c_elg_trim.f90 b/gpu/impl/psb_c_elg_trim.f90 new file mode 100644 index 00000000..bc0c0696 --- /dev/null +++ b/gpu/impl/psb_c_elg_trim.f90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_elg_trim(a) + + use psb_base_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_trim + implicit none + class(psb_c_elg_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + ld = max(1_psb_ipk_,size(a%ja,1)) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_elg_trim diff --git a/gpu/impl/psb_c_elg_vect_mv.F90 b/gpu/impl/psb_c_elg_vect_mv.F90 new file mode 100644 index 00000000..ec6e5b50 --- /dev/null +++ b/gpu/impl/psb_c_elg_vect_mv.F90 @@ -0,0 +1,131 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_elg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_vect_mv +#else + use psb_c_elg_mat_mod +#endif + use psb_c_gpu_vect_mod + implicit none + class(psb_c_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_elg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (a%is_dev()) call a%sync() + if (.not.x%is_host()) call x%sync() + if (beta /= czero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_c_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_c_vect_gpu) + select type(yy => y) + type is (psb_c_vect_gpu) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= czero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvEllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvELLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + if (a%is_dev()) call a%sync() + call a%psb_c_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_elg_vect_mv diff --git a/gpu/impl/psb_c_hdiag_csmv.F90 b/gpu/impl/psb_c_hdiag_csmv.F90 new file mode 100644 index 00000000..1ba58c6f --- /dev/null +++ b/gpu/impl/psb_c_hdiag_csmv.F90 @@ -0,0 +1,136 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hdiag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_hdiag_csmv +#else + use psb_c_hdiag_mat_mod +#endif + implicit none + class(psb_c_hdiag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='c_hdiag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_hdiag_mold + implicit none + class(psb_c_hdiag_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdiag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_hdiag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hdiag_mold diff --git a/gpu/impl/psb_c_hdiag_to_gpu.F90 b/gpu/impl/psb_c_hdiag_to_gpu.F90 new file mode 100644 index 00000000..565babe0 --- /dev/null +++ b/gpu/impl/psb_c_hdiag_to_gpu.F90 @@ -0,0 +1,86 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hdiag_to_gpu(a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_hdiag_to_gpu +#else + use psb_c_hdiag_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_c_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight +#ifdef HAVE_SPGPU + type(hdiagdev_parms) :: gpu_parms +#endif + + info = 0 + +#ifdef HAVE_SPGPU + nr = a%get_nrows() + nc = a%get_ncols() + hacksize = a%hackSize + hackCount = a%nhacks + if (.not.allocated(a%hackOffsets)) then + info = -1 + return + end if + allocheight = a%hackOffsets(hackCount+1) +!!$ write(*,*) 'HDIAG TO GPU:',nr,nc,hacksize,hackCount,allocheight,& +!!$ & size(a%hackoffsets),size(a%diaoffsets), size(a%val) + if (.not.allocated(a%diaOffsets)) then + info = -2 + return + end if + if (.not.allocated(a%val)) then + info = -3 + return + end if + + if (c_associated(a%deviceMat)) then + call freeHdiagDevice(a%deviceMat) + endif + + info = FAllocHdiagDevice(a%deviceMat,nr,nc,& + & allocheight,hacksize,hackCount,spgpu_type_double) + if (info == 0) info = & + & writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets) + +#endif + +end subroutine psb_c_hdiag_to_gpu diff --git a/gpu/impl/psb_c_hdiag_vect_mv.F90 b/gpu/impl/psb_c_hdiag_vect_mv.F90 new file mode 100644 index 00000000..a891a274 --- /dev/null +++ b/gpu/impl/psb_c_hdiag_vect_mv.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_hdiag_vect_mv +#else + use psb_c_hdiag_mat_mod +#endif + use psb_c_gpu_vect_mod + implicit none + class(psb_c_hdiag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hdiag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_c_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_c_vect_gpu) + select type(yy => y) + type is (psb_c_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHdiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + call a%psb_c_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hdiag_vect_mv diff --git a/gpu/impl/psb_c_hlg_allocate_mnnz.F90 b/gpu/impl/psb_c_hlg_allocate_mnnz.F90 new file mode 100644 index 00000000..27e5c0b6 --- /dev/null +++ b/gpu/impl/psb_c_hlg_allocate_mnnz.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hlg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_allocate_mnnz +#else + use psb_c_hlg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. +#ifdef HAVE_SPGPU + type(hlldev_parms) :: gpu_parms +#endif + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_c_hll_sparse_mat%allocate(m,n,nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hlg_allocate_mnnz diff --git a/gpu/impl/psb_c_hlg_csmm.F90 b/gpu/impl/psb_c_hlg_csmm.F90 new file mode 100644 index 00000000..c33b2dde --- /dev/null +++ b/gpu/impl/psb_c_hlg_csmm.F90 @@ -0,0 +1,132 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hlg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_csmm +#else + use psb_c_hlg_mat_mod +#endif + implicit none + class(psb_c_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_spk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hlg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_hlg_csmv +#else + use psb_c_hlg_mat_mod +#endif + implicit none + class(psb_c_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='c_hlg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_hlg_from_gpu +#else + use psb_c_hlg_mat_mod +#endif + implicit none + class(psb_c_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr + + info = 0 + +#ifdef HAVE_SPGPU + if (a%is_sync()) return + if (a%is_host()) return + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + + info = getHllDeviceParams(a%deviceMat,hksize, rows, nzeros, allocsize,& + & hackOffsLength, firstIndex,avgnzr) + + if (info == 0) call a%set_nzeros(nzeros) + if (info == 0) call a%set_hksz(hksize) + if (info == 0) call psb_realloc(rows,a%irn,info) + if (info == 0) call psb_realloc(rows,a%idiag,info) + if (info == 0) call psb_realloc(allocsize,a%ja,info) + if (info == 0) call psb_realloc(allocsize,a%val,info) + if (info == 0) call psb_realloc((hackOffsLength+1),a%hkoffs,info) + + if (info == 0) info = & + & readHllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) + call a%set_sync() +#endif + +end subroutine psb_c_hlg_from_gpu diff --git a/gpu/impl/psb_c_hlg_inner_vect_sv.F90 b/gpu/impl/psb_c_hlg_inner_vect_sv.F90 new file mode 100644 index 00000000..0955d8a1 --- /dev/null +++ b/gpu/impl/psb_c_hlg_inner_vect_sv.F90 @@ -0,0 +1,81 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_inner_vect_sv +#else + use psb_c_hlg_mat_mod +#endif + use psb_c_gpu_vect_mod + implicit none + class(psb_c_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_base_inner_vect_sv' + logical, parameter :: debug=.false. + complex(psb_spk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + info = psb_success_ + + + call x%sync() + call y%sync() + if (a%is_dev()) call a%sync() + call a%psb_c_hll_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hlg_inner_vect_sv diff --git a/gpu/impl/psb_c_hlg_mold.F90 b/gpu/impl/psb_c_hlg_mold.F90 new file mode 100644 index 00000000..321111f0 --- /dev/null +++ b/gpu/impl/psb_c_hlg_mold.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hlg_mold(a,b,info) + + use psb_base_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_mold + implicit none + class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer, intent(out) :: info + Integer :: err_act + character(len=20) :: name='hlg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_hlg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_c_hlg_mold diff --git a/gpu/impl/psb_c_hlg_reallocate_nz.F90 b/gpu/impl/psb_c_hlg_reallocate_nz.F90 new file mode 100644 index 00000000..a27c3f55 --- /dev/null +++ b/gpu/impl/psb_c_hlg_reallocate_nz.F90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hlg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_reallocate_nz +#else + use psb_c_hlg_mat_mod +#endif + use iso_c_binding + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_hlg_sparse_mat), intent(inout) :: a + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_hlg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + call a%psb_c_hll_sparse_mat%reallocate(nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hlg_reallocate_nz diff --git a/gpu/impl/psb_c_hlg_scal.F90 b/gpu/impl/psb_c_hlg_scal.F90 new file mode 100644 index 00000000..b2c9d30d --- /dev/null +++ b/gpu/impl/psb_c_hlg_scal.F90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hlg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_scal +#else + use psb_c_hlg_mat_mod +#endif + implicit none + class(psb_c_hlg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_c_hll_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hlg_scal diff --git a/gpu/impl/psb_c_hlg_scals.F90 b/gpu/impl/psb_c_hlg_scals.F90 new file mode 100644 index 00000000..af2efb19 --- /dev/null +++ b/gpu/impl/psb_c_hlg_scals.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hlg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_scals +#else + use psb_c_hlg_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_c_hlg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_c_hll_sparse_mat%scal(d,info) + if (info /= psb_success_) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_c_hlg_scals diff --git a/gpu/impl/psb_c_hlg_to_gpu.F90 b/gpu/impl/psb_c_hlg_to_gpu.F90 new file mode 100644 index 00000000..0d37bc24 --- /dev/null +++ b/gpu/impl/psb_c_hlg_to_gpu.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hlg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_to_gpu +#else + use psb_c_hlg_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_c_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, nza, n, pitch,maxrowsize, allocsize + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + n = a%get_nrows() + allocsize = a%get_size() + nza = a%get_nzeros() + if (c_associated(a%deviceMat)) then + call freehllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,a%hksz,n,nza,allocsize,spgpu_type_complex_float,1) + if (info == 0) info = & + & writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) +! if (info /= 0) goto 9999 +#endif + +end subroutine psb_c_hlg_to_gpu diff --git a/gpu/impl/psb_c_hlg_vect_mv.F90 b/gpu/impl/psb_c_hlg_vect_mv.F90 new file mode 100644 index 00000000..bc4e2f56 --- /dev/null +++ b/gpu/impl/psb_c_hlg_vect_mv.F90 @@ -0,0 +1,129 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_vect_mv +#else + use psb_c_hlg_mat_mod +#endif + use psb_c_gpu_vect_mod + implicit none + class(psb_c_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hlg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= czero) then + if (.not.y%is_host()) call y%sync() + end if + if (a%is_dev()) call a%sync() + call a%psb_c_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_c_vect_gpu) + select type(yy => y) + type is (psb_c_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvhllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHLLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + call a%psb_c_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hlg_vect_mv diff --git a/gpu/impl/psb_c_hybg_allocate_mnnz.F90 b/gpu/impl/psb_c_hybg_allocate_mnnz.F90 new file mode 100644 index 00000000..5cd57fa2 --- /dev/null +++ b/gpu/impl/psb_c_hybg_allocate_mnnz.F90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_hybg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_allocate_mnnz +#else + use psb_c_hybg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_c_csr_sparse_mat%allocate(m,n,nz) + +#ifdef HAVE_SPGPU + info = initFcusparse() + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hybg_allocate_mnnz +#endif diff --git a/gpu/impl/psb_c_hybg_csmm.F90 b/gpu/impl/psb_c_hybg_csmm.F90 new file mode 100644 index 00000000..7c8bb582 --- /dev/null +++ b/gpu/impl/psb_c_hybg_csmm.F90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_hybg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_csmm +#else + use psb_c_hybg_mat_mod +#endif + implicit none + class(psb_c_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hybg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_hybg_csmv +#else + use psb_c_hybg_mat_mod +#endif + implicit none + class(psb_c_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hybg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_hybg_inner_vect_sv +#else + use psb_c_hybg_mat_mod +#endif + use psb_c_gpu_vect_mod + implicit none + class(psb_c_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_hybg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + +#ifdef HAVE_SPGPU + if (tra.or.(beta/=czero)) then + call x%sync() + call y%sync() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_c_vect_gpu) + select type(yy => y) + type is (psb_c_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= czero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call x%sync() + call y%sync() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() +#endif + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='hybg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hybg_inner_vect_sv +#endif diff --git a/gpu/impl/psb_c_hybg_mold.F90 b/gpu/impl/psb_c_hybg_mold.F90 new file mode 100644 index 00000000..54dd24c2 --- /dev/null +++ b/gpu/impl/psb_c_hybg_mold.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_hybg_mold(a,b,info) + + use psb_base_mod + use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_mold + implicit none + class(psb_c_hybg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hybg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_hybg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hybg_mold +#endif diff --git a/gpu/impl/psb_c_hybg_reallocate_nz.F90 b/gpu/impl/psb_c_hybg_reallocate_nz.F90 new file mode 100644 index 00000000..3272b797 --- /dev/null +++ b/gpu/impl/psb_c_hybg_reallocate_nz.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_hybg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_reallocate_nz +#else + use psb_c_hybg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_hybg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_c_csr_sparse_mat%reallocate(nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hybg_reallocate_nz +#endif diff --git a/gpu/impl/psb_c_hybg_scal.F90 b/gpu/impl/psb_c_hybg_scal.F90 new file mode 100644 index 00000000..1019f979 --- /dev/null +++ b/gpu/impl/psb_c_hybg_scal.F90 @@ -0,0 +1,76 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_hybg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_scal +#else + use psb_c_hybg_mat_mod +#endif + implicit none + class(psb_c_hybg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m,n,nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_c_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hybg_scal +#endif diff --git a/gpu/impl/psb_c_hybg_scals.F90 b/gpu/impl/psb_c_hybg_scals.F90 new file mode 100644 index 00000000..1d09abbb --- /dev/null +++ b/gpu/impl/psb_c_hybg_scals.F90 @@ -0,0 +1,76 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_hybg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_scals +#else + use psb_c_hybg_mat_mod +#endif + implicit none + class(psb_c_hybg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + + call a%psb_c_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hybg_scals +#endif diff --git a/gpu/impl/psb_c_hybg_to_gpu.F90 b/gpu/impl/psb_c_hybg_to_gpu.F90 new file mode 100644 index 00000000..107efba9 --- /dev/null +++ b/gpu/impl/psb_c_hybg_to_gpu.F90 @@ -0,0 +1,154 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_hybg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_to_gpu +#else + use psb_c_hybg_mat_mod +#endif + implicit none + class(psb_c_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + complex(psb_spk_), allocatable :: valdi(:) + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = HYBGDeviceFree(a%deviceMat) + end if + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! We are explicitly adding the diagonal + if (info == 0) info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = cone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = cone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + else + + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) then + if (a%is_unit()) then + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + endif + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceHybsmAnalysis(a%deviceMat) + end if + + + if (info /= 0) then + write(0,*) 'Error in HYBG_TO_GPU ',info + end if +#endif + +end subroutine psb_c_hybg_to_gpu +#endif diff --git a/gpu/impl/psb_c_hybg_vect_mv.F90 b/gpu/impl/psb_c_hybg_vect_mv.F90 new file mode 100644 index 00000000..3ed0f7fd --- /dev/null +++ b/gpu/impl/psb_c_hybg_vect_mv.F90 @@ -0,0 +1,127 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_vect_mv +#else + use psb_c_hybg_mat_mod +#endif + use psb_c_gpu_vect_mod + implicit none + class(psb_c_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hybg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= czero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_c_vect_gpu) + select type(yy => y) + type is (psb_c_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= czero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_hybg_vect_mv +#endif diff --git a/gpu/impl/psb_c_mv_csrg_from_coo.F90 b/gpu/impl/psb_c_mv_csrg_from_coo.F90 new file mode 100644 index 00000000..d2533c2d --- /dev/null +++ b/gpu/impl/psb_c_mv_csrg_from_coo.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_csrg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_mv_csrg_from_coo +#else + use psb_c_csrg_mat_mod +#endif + implicit none + + class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + + info = psb_success_ + + call a%psb_c_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_mv_csrg_from_coo diff --git a/gpu/impl/psb_c_mv_csrg_from_fmt.F90 b/gpu/impl/psb_c_mv_csrg_from_fmt.F90 new file mode 100644 index 00000000..3e898e8f --- /dev/null +++ b/gpu/impl/psb_c_mv_csrg_from_fmt.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_csrg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_csrg_mat_mod, psb_protect_name => psb_c_mv_csrg_from_fmt +#else + use psb_c_csrg_mat_mod +#endif + implicit none + + class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer, intent(out) :: info + + !locals + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_c_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select + +end subroutine psb_c_mv_csrg_from_fmt diff --git a/gpu/impl/psb_c_mv_diag_from_coo.F90 b/gpu/impl/psb_c_mv_diag_from_coo.F90 new file mode 100644 index 00000000..34fe69b7 --- /dev/null +++ b/gpu/impl/psb_c_mv_diag_from_coo.F90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_diag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_c_diag_mat_mod, psb_protect_name => psb_c_mv_diag_from_coo +#else + use psb_c_diag_mat_mod +#endif + + implicit none + + class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) goto 9999 + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_mv_diag_from_coo diff --git a/gpu/impl/psb_c_mv_elg_from_coo.F90 b/gpu/impl/psb_c_mv_elg_from_coo.F90 new file mode 100644 index 00000000..acf7e28c --- /dev/null +++ b/gpu/impl/psb_c_mv_elg_from_coo.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_elg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_mv_elg_from_coo +#else + use psb_c_elg_mat_mod +#endif + implicit none + + class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + call b%free() + + return + + +end subroutine psb_c_mv_elg_from_coo diff --git a/gpu/impl/psb_c_mv_elg_from_fmt.F90 b/gpu/impl/psb_c_mv_elg_from_fmt.F90 new file mode 100644 index 00000000..fb9e3cfe --- /dev/null +++ b/gpu/impl/psb_c_mv_elg_from_fmt.F90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_elg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_c_elg_mat_mod, psb_protect_name => psb_c_mv_elg_from_fmt +#else + use psb_c_elg_mat_mod +#endif + implicit none + + class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + info = psb_success_ + + if (b%is_dev()) call b%sync() + select type (b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + + class is (psb_c_ell_sparse_mat) + nzm = size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() +#ifdef HAVE_SPGPU + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize +#else + ld = m +#endif + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + deallocate(b%ja,stat=info) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + deallocate(b%val,stat=info) + end if + a%nzt = nza + call b%free() +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_mv_elg_from_fmt diff --git a/gpu/impl/psb_c_mv_hdiag_from_coo.F90 b/gpu/impl/psb_c_mv_hdiag_from_coo.F90 new file mode 100644 index 00000000..1d07bddb --- /dev/null +++ b/gpu/impl/psb_c_mv_hdiag_from_coo.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_hdiag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_mv_hdiag_from_coo + use psb_gpu_env_mod +#else + use psb_c_hdiag_mat_mod +#endif + + implicit none + + class(psb_c_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + +#ifdef HAVE_SPGPU + a%hacksize = psb_gpu_WarpSize() +#endif + + call a%psb_c_hdia_sparse_mat%mv_from_coo(b,info) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_mv_hdiag_from_coo diff --git a/gpu/impl/psb_c_mv_hlg_from_coo.F90 b/gpu/impl/psb_c_mv_hlg_from_coo.F90 new file mode 100644 index 00000000..0fa2d72d --- /dev/null +++ b/gpu/impl/psb_c_mv_hlg_from_coo.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_hlg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_gpu_env_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_mv_hlg_from_coo +#else + use psb_c_hlg_mat_mod +#endif + implicit none + + class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_c_mv_hlg_from_coo diff --git a/gpu/impl/psb_c_mv_hlg_from_fmt.F90 b/gpu/impl/psb_c_mv_hlg_from_fmt.F90 new file mode 100644 index 00000000..0581c7d6 --- /dev/null +++ b/gpu/impl/psb_c_mv_hlg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_hlg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_c_hlg_mat_mod, psb_protect_name => psb_c_mv_hlg_from_fmt +#else + use psb_c_hlg_mat_mod +#endif + implicit none + + class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_mv_hlg_from_fmt diff --git a/gpu/impl/psb_c_mv_hybg_from_coo.F90 b/gpu/impl/psb_c_mv_hybg_from_coo.F90 new file mode 100644 index 00000000..7aca6065 --- /dev/null +++ b/gpu/impl/psb_c_mv_hybg_from_coo.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_mv_hybg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_hybg_mat_mod, psb_protect_name => psb_c_mv_hybg_from_coo +#else + use psb_c_hybg_mat_mod +#endif + implicit none + + class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%psb_c_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_mv_hybg_from_coo +#endif diff --git a/gpu/impl/psb_c_mv_hybg_from_fmt.F90 b/gpu/impl/psb_c_mv_hybg_from_fmt.F90 new file mode 100644 index 00000000..41581b85 --- /dev/null +++ b/gpu/impl/psb_c_mv_hybg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_mv_hybg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_c_hybg_mat_mod, psb_protect_name => psb_c_mv_hybg_from_fmt +#else + use psb_c_hybg_mat_mod +#endif + implicit none + + class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_c_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select +end subroutine psb_c_mv_hybg_from_fmt +#endif diff --git a/gpu/impl/psb_d_cp_csrg_from_coo.F90 b/gpu/impl/psb_d_cp_csrg_from_coo.F90 new file mode 100644 index 00000000..ec00007e --- /dev/null +++ b/gpu/impl/psb_d_cp_csrg_from_coo.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cp_csrg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_cp_csrg_from_coo +#else + use psb_d_csrg_mat_mod +#endif + implicit none + + class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_d_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cp_csrg_from_coo diff --git a/gpu/impl/psb_d_cp_csrg_from_fmt.F90 b/gpu/impl/psb_d_cp_csrg_from_fmt.F90 new file mode 100644 index 00000000..b3aabeed --- /dev/null +++ b/gpu/impl/psb_d_cp_csrg_from_fmt.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cp_csrg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_cp_csrg_from_fmt +#else + use psb_d_csrg_mat_mod +#endif + !use iso_c_binding + implicit none + + class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + select type(b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_d_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select + +end subroutine psb_d_cp_csrg_from_fmt diff --git a/gpu/impl/psb_d_cp_diag_from_coo.F90 b/gpu/impl/psb_d_cp_diag_from_coo.F90 new file mode 100644 index 00000000..06aff19d --- /dev/null +++ b/gpu/impl/psb_d_cp_diag_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_diag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_d_diag_mat_mod, psb_protect_name => psb_d_cp_diag_from_coo +#else + use psb_d_diag_mat_mod +#endif + implicit none + + class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + call a%psb_d_dia_sparse_mat%cp_from_coo(b,info) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cp_diag_from_coo diff --git a/gpu/impl/psb_d_cp_elg_from_coo.F90 b/gpu/impl/psb_d_cp_elg_from_coo.F90 new file mode 100644 index 00000000..381e4bfb --- /dev/null +++ b/gpu/impl/psb_d_cp_elg_from_coo.F90 @@ -0,0 +1,184 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_elg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_cp_elg_from_coo + use psi_ext_util_mod + use psb_gpu_env_mod +#else + use psb_d_elg_mat_mod +#endif + implicit none + + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, ld, ldv, hacksize + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + type(psb_d_coo_sparse_mat) :: tmp + integer(psb_ipk_), allocatable :: idisp(:) + + info = psb_success_ +#ifdef HAVE_SPGPU + hacksize = max(1,psb_gpu_WarpSize()) +#else + hacksize = 1 +#endif + if (b%is_dev()) call b%sync() + + if (b%is_by_rows()) then + +#ifdef HAVE_SPGPU + call psi_d_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,b%ja,b%val, a%deviceMat) + call a%set_dev() +#else + + call psi_d_convert_ell_from_coo(a,b,info,hacksize=hacksize) + call a%set_host() +#endif + + else + call b%cp_to_coo(tmp,info) +#ifdef HAVE_SPGPU + call psi_d_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,tmp%ja,tmp%val, a%deviceMat) + + call a%set_dev() +#else + + call psi_d_convert_ell_from_coo(a,tmp,info,hacksize=hacksize) + call a%set_host() +#endif + end if + + if (info /= psb_success_) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine psi_d_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize) + + use psb_base_mod + use psi_ext_util_mod + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(out) :: info, nzm, ldv + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, & + & ir, ic, hsz_ + real(psb_dpk_) :: t0,t1 + logical, parameter :: timing=.true. + + + info = psb_success_ + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == psb_success_) call psb_realloc(nr+1,idisp,info) + if (info /= psb_success_) return + if (timing) t0=psb_wtime() + + a%irn = 0 + do i=1, nza + ir = b%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + idisp(1) = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + idisp(i+1) = a%nzt + end do + + end subroutine psi_d_count_ell_from_coo + +end subroutine psb_d_cp_elg_from_coo diff --git a/gpu/impl/psb_d_cp_elg_from_fmt.F90 b/gpu/impl/psb_d_cp_elg_from_fmt.F90 new file mode 100644 index 00000000..9a6b6d41 --- /dev/null +++ b/gpu/impl/psb_d_cp_elg_from_fmt.F90 @@ -0,0 +1,101 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_elg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_cp_elg_from_fmt +#else + use psb_d_elg_mat_mod +#endif + implicit none + + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_d_ell_sparse_mat) + nzm = psb_size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() +#ifdef HAVE_SPGPU + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize +#else + ld = m +#endif + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + end if + a%nzt = nza +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_cp_elg_from_fmt diff --git a/gpu/impl/psb_d_cp_hdiag_from_coo.F90 b/gpu/impl/psb_d_cp_hdiag_from_coo.F90 new file mode 100644 index 00000000..443452a1 --- /dev/null +++ b/gpu/impl/psb_d_cp_hdiag_from_coo.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hdiag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_cp_hdiag_from_coo + use psb_gpu_env_mod +#else + use psb_d_hdiag_mat_mod +#endif + implicit none + + class(psb_d_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + +#ifdef HAVE_SPGPU + a%hacksize = psb_gpu_WarpSize() +#endif + + call a%psb_d_hdia_sparse_mat%cp_from_coo(b,info) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cp_hdiag_from_coo diff --git a/gpu/impl/psb_d_cp_hlg_from_coo.F90 b/gpu/impl/psb_d_cp_hlg_from_coo.F90 new file mode 100644 index 00000000..02855fef --- /dev/null +++ b/gpu/impl/psb_d_cp_hlg_from_coo.F90 @@ -0,0 +1,198 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hlg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_gpu_env_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_cp_hlg_from_coo +#else + use psb_d_hlg_mat_mod +#endif + implicit none + + class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + integer(psb_ipk_), allocatable :: idisp(:) + character(len=20) :: name='hll_from_coo' + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + integer(psb_ipk_), allocatable :: irn(:), ja(:), hko(:) + real(psb_dpk_), allocatable :: val(:) + logical, parameter :: debug=.false. + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() +#ifdef HAVE_SPGPU + hksz = max(1,psb_gpu_WarpSize()) +#else + hksz = psi_get_hksz() +#endif + + if (b%is_by_rows()) then + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + if (debug) write(0,*) 'Copying through GPU',nza + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,b%ja, b%val, a%deviceMat) + call a%set_dev() + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + if (debug) write(0,*) 'Copying through GPU' + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,tmp,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,tmp%ja, tmp%val, a%deviceMat) + + call tmp%free() + call a%set_dev() + end if + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + subroutine psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + use psb_base_mod + use psi_ext_util_mod + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info, noffs, isz + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, kc + logical, parameter :: debug=.false. + + info = 0 + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + if (debug) write(0,*) 'Start compute hckoff_from_coo',nr,nc,nza + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == 0) call psb_realloc(nr+1,idisp,info) + if (info /= 0) return + a%irn = 0 + if (debug) then + do i=1, nza + if ((1<=b%ia(i)).and.(b%ia(i)<= nr)) then + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + else + write(0,*) 'Out of bouds IA ',i,b%ia(i),nr + end if + end do + else + do i=1, nza + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + end do + end if + a%nzt = nza + + + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (debug) write(0,*) ' noffsets ',noffs,info + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + idisp(1) = 0 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + idisp(i+1) = idisp(i) + a%irn(i) + do k=1,ir-1 + idisp(i+k+1) = idisp(i+k) + a%irn(i+k) + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) +!!$ write(*,*) 'End of psi_comput_hckoff ',info + end subroutine psi_compute_hckoff_from_coo + +end subroutine psb_d_cp_hlg_from_coo diff --git a/gpu/impl/psb_d_cp_hlg_from_fmt.F90 b/gpu/impl/psb_d_cp_hlg_from_fmt.F90 new file mode 100644 index 00000000..133fbb32 --- /dev/null +++ b/gpu/impl/psb_d_cp_hlg_from_fmt.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hlg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_cp_hlg_from_fmt +#else + use psb_d_hlg_mat_mod +#endif + implicit none + + class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_d_hll_sparse_mat%cp_from_fmt(b,info) +#ifdef HAVE_SPGPU + if (info == 0) call a%to_gpu(info) +#endif + end select + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cp_hlg_from_fmt diff --git a/gpu/impl/psb_d_cp_hybg_from_coo.F90 b/gpu/impl/psb_d_cp_hybg_from_coo.F90 new file mode 100644 index 00000000..a74409cb --- /dev/null +++ b/gpu/impl/psb_d_cp_hybg_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cp_hybg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_hybg_mat_mod, psb_protect_name => psb_d_cp_hybg_from_coo +#else + use psb_d_hybg_mat_mod +#endif + implicit none + + class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_d_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cp_hybg_from_coo +#endif diff --git a/gpu/impl/psb_d_cp_hybg_from_fmt.F90 b/gpu/impl/psb_d_cp_hybg_from_fmt.F90 new file mode 100644 index 00000000..91d59060 --- /dev/null +++ b/gpu/impl/psb_d_cp_hybg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cp_hybg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_hybg_mat_mod, psb_protect_name => psb_d_cp_hybg_from_fmt +#else + use psb_d_hybg_mat_mod +#endif + implicit none + + class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_d_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select + +end subroutine psb_d_cp_hybg_from_fmt +#endif diff --git a/gpu/impl/psb_d_csrg_allocate_mnnz.F90 b/gpu/impl/psb_d_csrg_allocate_mnnz.F90 new file mode 100644 index 00000000..7d2d4470 --- /dev/null +++ b/gpu/impl/psb_d_csrg_allocate_mnnz.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_csrg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_allocate_mnnz +#else + use psb_d_csrg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_d_csr_sparse_mat%allocate(m,n,nz) + +#ifdef HAVE_SPGPU + info = initFcusparse() + if (info == 0) call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_csrg_allocate_mnnz diff --git a/gpu/impl/psb_d_csrg_csmm.F90 b/gpu/impl/psb_d_csrg_csmm.F90 new file mode 100644 index 00000000..59c8343e --- /dev/null +++ b/gpu/impl/psb_d_csrg_csmm.F90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_csrg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_csmm +#else + use psb_d_csrg_mat_mod +#endif + implicit none + class(psb_d_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_dpk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csrg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_csrg_csmv +#else + use psb_d_csrg_mat_mod +#endif + implicit none + class(psb_d_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csrg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_csrg_from_gpu +#else + use psb_d_csrg_mat_mod +#endif + implicit none + class(psb_d_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, n, nz + + info = 0 + +#ifdef HAVE_SPGPU + if (.not.(c_associated(a%deviceMat%mat))) then + call a%free() + return + end if + + info = CSRGDeviceGetParms(a%deviceMat,m,n,nz) + if (info /= psb_success_) return + + if (info == 0) call psb_realloc(m+1,a%irp,info) + if (info == 0) call psb_realloc(nz,a%ja,info) + if (info == 0) call psb_realloc(nz,a%val,info) + if (info == 0) info = & + & CSRGDevice2Host(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) +#if (CUDA_SHORT_VERSION <= 10) || (CUDA_VERSION < 11030) + a%irp(:) = a%irp(:)+1 + a%ja(:) = a%ja(:)+1 +#endif + + call a%set_sync() +#endif + +end subroutine psb_d_csrg_from_gpu diff --git a/gpu/impl/psb_d_csrg_inner_vect_sv.F90 b/gpu/impl/psb_d_csrg_inner_vect_sv.F90 new file mode 100644 index 00000000..016d63d6 --- /dev/null +++ b/gpu/impl/psb_d_csrg_inner_vect_sv.F90 @@ -0,0 +1,136 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_inner_vect_sv +#else + use psb_d_csrg_mat_mod +#endif + use psb_d_gpu_vect_mod + implicit none + class(psb_d_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_csrg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + +#ifdef HAVE_SPGPU + if (tra.or.(beta/=dzero)) then + call x%sync() + call y%sync() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_d_vect_gpu) + select type(yy => y) + type is (psb_d_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call x%sync() + call y%sync() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() +#endif + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='csrg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_csrg_inner_vect_sv diff --git a/gpu/impl/psb_d_csrg_mold.F90 b/gpu/impl/psb_d_csrg_mold.F90 new file mode 100644 index 00000000..d7288868 --- /dev/null +++ b/gpu/impl/psb_d_csrg_mold.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_csrg_mold(a,b,info) + + use psb_base_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_mold + implicit none + class(psb_d_csrg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='csrg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_csrg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_csrg_mold diff --git a/gpu/impl/psb_d_csrg_reallocate_nz.F90 b/gpu/impl/psb_d_csrg_reallocate_nz.F90 new file mode 100644 index 00000000..083091f5 --- /dev/null +++ b/gpu/impl/psb_d_csrg_reallocate_nz.F90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_csrg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_reallocate_nz +#else + use psb_d_csrg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_csrg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_d_csr_sparse_mat%reallocate(nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_csrg_reallocate_nz diff --git a/gpu/impl/psb_d_csrg_scal.F90 b/gpu/impl/psb_d_csrg_scal.F90 new file mode 100644 index 00000000..60dbaecd --- /dev/null +++ b/gpu/impl/psb_d_csrg_scal.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_csrg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_scal +#else + use psb_d_csrg_mat_mod +#endif + implicit none + class(psb_d_csrg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + call a%psb_d_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_csrg_scal diff --git a/gpu/impl/psb_d_csrg_scals.F90 b/gpu/impl/psb_d_csrg_scals.F90 new file mode 100644 index 00000000..6d4a1f40 --- /dev/null +++ b/gpu/impl/psb_d_csrg_scals.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_csrg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_scals +#else + use psb_d_csrg_mat_mod +#endif + implicit none + class(psb_d_csrg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + call a%psb_d_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_csrg_scals diff --git a/gpu/impl/psb_d_csrg_to_gpu.F90 b/gpu/impl/psb_d_csrg_to_gpu.F90 new file mode 100644 index 00000000..eb5d3942 --- /dev/null +++ b/gpu/impl/psb_d_csrg_to_gpu.F90 @@ -0,0 +1,325 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_csrg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_to_gpu +#else + use psb_d_csrg_mat_mod +#endif + implicit none + class(psb_d_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + real(psb_dpk_), allocatable :: valdi(:) + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = CSRGDeviceFree(a%deviceMat) + end if +#if CUDA_SHORT_VERSION <= 10 + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = done + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = done + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceCsrsmAnalysis(a%deviceMat) + end if + +#elif CUDA_VERSION < 11030 + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) +!!$ write(0,*) 'Done deviceAlloc' + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_zero) +!!$ write(0,*) 'Done SetIndexBase' + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(0:nzdi),valdi(0:nzdi),stat=info) + if (info == 0) then + irpdi(1) = 0 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = done + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = done + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ write(0,*) 'Done deviceAlloc', info + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,& + & cusparse_index_base_zero) +!!$ write(0,*) 'Done setIndexBase', info + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + nzdi=a%irp(m+1)-1 + if (info == 0) allocate(irpdi(m+1),jadi(max(nzdi,1)),stat=info) + if (info == 0) then + irpdi(1:m+1) = a%irp(1:m+1) -1 + jadi(1:nzdi) = a%ja(1:nzdi) -1 + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,irpdi,jadi,a%val) +!!$ write(0,*) 'Done Host2Device', info + endif + + +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = done + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = done + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + +!!$ if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) +!!$ end if + +#endif + call a%set_sync() + + if (info /= 0) then + write(0,*) 'Error in CSRG_TO_GPU ',info + end if +#endif + +end subroutine psb_d_csrg_to_gpu diff --git a/gpu/impl/psb_d_csrg_vect_mv.F90 b/gpu/impl/psb_d_csrg_vect_mv.F90 new file mode 100644 index 00000000..f7124bbb --- /dev/null +++ b/gpu/impl/psb_d_csrg_vect_mv.F90 @@ -0,0 +1,125 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_vect_mv +#else + use psb_d_csrg_mat_mod +#endif + use psb_d_gpu_vect_mod + implicit none + class(psb_d_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csrg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_d_vect_gpu) + select type(yy => y) + type is (psb_d_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_d_csrg_vect_mv diff --git a/gpu/impl/psb_d_diag_csmv.F90 b/gpu/impl/psb_d_diag_csmv.F90 new file mode 100644 index 00000000..af9ad2db --- /dev/null +++ b/gpu/impl/psb_d_diag_csmv.F90 @@ -0,0 +1,136 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_diag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_d_diag_mat_mod, psb_protect_name => psb_d_diag_csmv +#else + use psb_d_diag_mat_mod +#endif + implicit none + class(psb_d_diag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='d_diag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_diag_mold + implicit none + class(psb_d_diag_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='diag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_diag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_diag_mold diff --git a/gpu/impl/psb_d_diag_to_gpu.F90 b/gpu/impl/psb_d_diag_to_gpu.F90 new file mode 100644 index 00000000..de244124 --- /dev/null +++ b/gpu/impl/psb_d_diag_to_gpu.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_diag_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_d_diag_mat_mod, psb_protect_name => psb_d_diag_to_gpu +#else + use psb_d_diag_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_d_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d +#ifdef HAVE_SPGPU + type(diagdev_parms) :: gpu_parms +#endif + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return + + n = size(a%data,1) + d = size(a%data,2) + c = a%get_ncols() + !allocsize = a%get_size() + !write(*,*) 'Create the DIAG matrix' + gpu_parms = FgetDiagDeviceParams(n,c,d,spgpu_type_double) + if (c_associated(a%deviceMat)) then + call freeDiagDevice(a%deviceMat) + endif + info = FallocDiagDevice(a%deviceMat,n,c,d,spgpu_type_double) + if (info == 0) info = & + & writeDiagDevice(a%deviceMat,a%data,a%offset,n) +! if (info /= 0) goto 9999 +#endif + +end subroutine psb_d_diag_to_gpu diff --git a/gpu/impl/psb_d_diag_vect_mv.F90 b/gpu/impl/psb_d_diag_vect_mv.F90 new file mode 100644 index 00000000..3f2f5ac6 --- /dev/null +++ b/gpu/impl/psb_d_diag_vect_mv.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_diag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_d_diag_mat_mod, psb_protect_name => psb_d_diag_vect_mv +#else + use psb_d_diag_mat_mod +#endif + use psb_d_gpu_vect_mod + implicit none + class(psb_d_diag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_diag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_d_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_d_vect_gpu) + select type(yy => y) + type is (psb_d_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + call a%psb_d_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_diag_vect_mv diff --git a/gpu/impl/psb_d_dnsg_mat_impl.F90 b/gpu/impl/psb_d_dnsg_mat_impl.F90 new file mode 100644 index 00000000..a7915898 --- /dev/null +++ b/gpu/impl/psb_d_dnsg_mat_impl.F90 @@ -0,0 +1,461 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_d_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_dnsg_vect_mv +#else + use psb_d_dnsg_mat_mod +#endif + implicit none + class(psb_d_dnsg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + logical :: tra + character :: trans_ + real(psb_dpk_), allocatable :: rx(:), ry(:) + Integer(Psb_ipk_) :: err_act, m, n, k + character(len=20) :: name='d_dnsg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (trans_ =='N') then + m = a%get_nrows() + n = 1 + k = a%get_ncols() + else + m = a%get_ncols() + n = 1 + k = a%get_nrows() + end if + select type (xx => x) + type is (psb_d_vect_gpu) + select type(yy => y) + type is (psb_d_vect_gpu) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDnsDevice(trans_,m,n,k,alpha,a%deviceMat,& + & xx%deviceVect,beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDnsDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_dnsg_vect_mv + + +subroutine psb_d_dnsg_mold(a,b,info) + use psb_base_mod + use psb_d_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_dnsg_mold +#else + use psb_d_dnsg_mat_mod +#endif + implicit none + class(psb_d_dnsg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dnsg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_dnsg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_dnsg_mold + + +!!$ +!!$ interface +!!$ subroutine psb_d_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_d_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type +!!$ class(psb_d_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_d_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_d_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_d_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_d_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_d_dnsg_allocate_mnnz +!!$ end interface + + +subroutine psb_d_dnsg_to_gpu(a,info) + use psb_base_mod + use psb_d_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_dnsg_to_gpu +#else + use psb_d_dnsg_mat_mod +#endif + class(psb_d_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act, pitch, lda + logical, parameter :: debug=.false. + character(len=20) :: name='d_dnsg_to_gpu' + + call psb_erractionsave(err_act) + info = psb_success_ +#ifdef HAVE_SPGPU + if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2) + info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),& + & spgpu_type_double,1) + if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2)) + if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info + + +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_dnsg_to_gpu + + + +subroutine psb_d_cp_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_d_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_cp_dnsg_from_coo +#else + use psb_d_dnsg_mat_mod +#endif + implicit none + + class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_dnsg_cp_from_coo' + integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: debug=.false. + type(psb_d_coo_sparse_mat) :: tmp + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + call a%psb_d_dns_sparse_mat%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_cp_from_coo: dns_cp',info + if (info == 0) call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cp_dnsg_from_coo + +subroutine psb_d_cp_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_d_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_cp_dnsg_from_fmt +#else + use psb_d_dnsg_mat_mod +#endif + implicit none + + class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + type(psb_d_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + +!!$ class is (psb_d_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$#ifdef HAVE_SPGPU +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$#else +!!$ ld = m +!!$#endif +!!$ a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$#ifdef HAVE_SPGPU +!!$ call a%to_gpu(info) +!!$#endif + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cp_dnsg_from_fmt + + + +subroutine psb_d_mv_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_d_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_mv_dnsg_from_coo +#else + use psb_d_dnsg_mat_mod +#endif + implicit none + + class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + logical, parameter :: debug=.false. + character(len=20) :: name='d_dnsg_mv_from_coo' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_mv_from_coo: cp_from_coo:',info + call b%free() + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_mv_dnsg_from_coo + + +subroutine psb_d_mv_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_d_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_mv_dnsg_from_fmt +#else + use psb_d_dnsg_mat_mod +#endif + implicit none + class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + type(psb_d_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + +!!$ class is (psb_d_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$#ifdef HAVE_SPGPU +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$#else +!!$ ld = m +!!$#endif +!!$ a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$#ifdef HAVE_SPGPU +!!$ call a%to_gpu(info) +!!$#endif + + class default + + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_d_mv_dnsg_from_fmt diff --git a/gpu/impl/psb_d_elg_allocate_mnnz.F90 b/gpu/impl/psb_d_elg_allocate_mnnz.F90 new file mode 100644 index 00000000..105f5617 --- /dev/null +++ b/gpu/impl/psb_d_elg_allocate_mnnz.F90 @@ -0,0 +1,113 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_elg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_allocate_mnnz +#else + use psb_d_elg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione,izero,izero,izero,izero/)) + goto 9999 + endif + +#ifdef HAVE_SPGPU + gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_double,1) + ld = gpu_parms%pitch + nz_ = gpu_parms%maxRowSize +#else + ld = m +#endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_elg_allocate_mnnz diff --git a/gpu/impl/psb_d_elg_asb.f90 b/gpu/impl/psb_d_elg_asb.f90 new file mode 100644 index 00000000..f80537ef --- /dev/null +++ b/gpu/impl/psb_d_elg_asb.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_elg_asb(a) + + use psb_base_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_asb + implicit none + + class(psb_d_elg_sparse_mat), intent(inout) :: a + + integer(psb_ipk_) :: err_act, info + character(len=20) :: name='elg_asb' + logical :: clear_ + logical, parameter :: debug=.false. + real(psb_dpk_), allocatable :: valt(:,:) + integer(psb_ipk_), allocatable :: jat(:,:) + integer(psb_ipk_) :: nr, nc + + call psb_erractionsave(err_act) + info = psb_success_ + + ! Only call sync() if we are on host + if (a%is_host()) then + call a%sync() + end if + call a%set_asb() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_elg_asb diff --git a/gpu/impl/psb_d_elg_csmm.F90 b/gpu/impl/psb_d_elg_csmm.F90 new file mode 100644 index 00000000..add9c3b2 --- /dev/null +++ b/gpu/impl/psb_d_elg_csmm.F90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_elg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_csmm +#else + use psb_d_elg_mat_mod +#endif + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_dpk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_elg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_elg_csmv +#else + use psb_d_elg_mat_mod +#endif + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_elg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_elg_csput_a +#else + use psb_d_elg_mat_mod +#endif + implicit none + + class(psb_d_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_elg_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +!!$ write(0,*) 'In ELG_csput_a' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then +!!$ write(*,*) 'elg_csput_a ' + if (a%is_dev()) call a%sync() + call a%psb_d_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_elg_csput_a + + + +subroutine psb_d_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use iso_c_binding +#ifdef HAVE_SPGPU + use elldev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_csput_v + use psb_d_gpu_vect_mod +#else + use psb_d_elg_mat_mod +#endif + implicit none + + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_elg_csput_v' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw + logical :: gpu_invoked + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + integer(psb_ipk_), allocatable :: idxs(:) + logical, parameter :: debug_idxs=.false., debug_vals=.false. + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +! write(0,*) 'In ELG_csput_v' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (ia%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (ja%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (val%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + + t1=psb_wtime() + gpu_invoked = .false. + select type (ia) + class is (psb_i_vect_gpu) + select type (ja) + class is (psb_i_vect_gpu) + select type (val) + class is (psb_d_vect_gpu) + if (a%is_host()) call a%sync() + if (val%is_host()) call val%sync() + if (ia%is_host()) call ia%sync() + if (ja%is_host()) call ja%sync() + info = csputEllDeviceDouble(a%deviceMat,nz,& + & ia%deviceVect,ja%deviceVect,val%deviceVect) + call a%set_dev() + gpu_invoked=.true. + end select + end select + end select + if (.not.gpu_invoked) then +!!$ write(0,*)'Not gpu_invoked ' + if (a%is_dev()) call a%sync() + call a%psb_d_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + call a%set_host() + end if + + if (info /= 0) then + info = psb_err_internal_error_ + end if + + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_d_elg_csput_v diff --git a/gpu/impl/psb_d_elg_from_gpu.F90 b/gpu/impl/psb_d_elg_from_gpu.F90 new file mode 100644 index 00000000..c1da9584 --- /dev/null +++ b/gpu/impl/psb_d_elg_from_gpu.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_elg_from_gpu(a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_from_gpu +#else + use psb_d_elg_mat_mod +#endif + implicit none + class(psb_d_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize + + info = 0 + +#ifdef HAVE_SPGPU + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + if (info == 0) call psb_realloc(pitch,a%irn,info) + end if + if (info == 0) info = & + & readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag) + call a%set_sync() +#endif + +end subroutine psb_d_elg_from_gpu diff --git a/gpu/impl/psb_d_elg_inner_vect_sv.F90 b/gpu/impl/psb_d_elg_inner_vect_sv.F90 new file mode 100644 index 00000000..333946bf --- /dev/null +++ b/gpu/impl/psb_d_elg_inner_vect_sv.F90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_inner_vect_sv +#else + use psb_d_elg_mat_mod +#endif + use psb_d_gpu_vect_mod + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_elg_inner_vect_sv' + logical, parameter :: debug=.false. + real(psb_dpk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + if (a%is_dev()) call a%sync() + if (.false.) then + rx = x%get_vect() + ry = y%get_vect() + call a%inner_spsm(alpha,rx,beta,ry,info,trans) + call y%bld(ry) + else + call x%sync() + call y%sync() + call a%psb_d_ell_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + end if + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_elg_inner_vect_sv diff --git a/gpu/impl/psb_d_elg_mold.F90 b/gpu/impl/psb_d_elg_mold.F90 new file mode 100644 index 00000000..3fd6d071 --- /dev/null +++ b/gpu/impl/psb_d_elg_mold.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_elg_mold(a,b,info) + + use psb_base_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_mold + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='elg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_elg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_elg_mold diff --git a/gpu/impl/psb_d_elg_reallocate_nz.F90 b/gpu/impl/psb_d_elg_reallocate_nz.F90 new file mode 100644 index 00000000..70b3705c --- /dev/null +++ b/gpu/impl/psb_d_elg_reallocate_nz.F90 @@ -0,0 +1,79 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_elg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_reallocate_nz +#else + use psb_d_elg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_elg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + if (a%is_dev()) call a%sync() + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nzrm) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_elg_reallocate_nz diff --git a/gpu/impl/psb_d_elg_scal.F90 b/gpu/impl/psb_d_elg_scal.F90 new file mode 100644 index 00000000..53ab82d7 --- /dev/null +++ b/gpu/impl/psb_d_elg_scal.F90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_elg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_scal +#else + use psb_d_elg_mat_mod +#endif + implicit none + class(psb_d_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_d_ell_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_elg_scal diff --git a/gpu/impl/psb_d_elg_scals.F90 b/gpu/impl/psb_d_elg_scals.F90 new file mode 100644 index 00000000..f85780ce --- /dev/null +++ b/gpu/impl/psb_d_elg_scals.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_elg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_scals +#else + use psb_d_elg_mat_mod +#endif + implicit none + class(psb_d_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_elg_scals diff --git a/gpu/impl/psb_d_elg_to_gpu.F90 b/gpu/impl/psb_d_elg_to_gpu.F90 new file mode 100644 index 00000000..28e61606 --- /dev/null +++ b/gpu/impl/psb_d_elg_to_gpu.F90 @@ -0,0 +1,93 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_elg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_to_gpu +#else + use psb_d_elg_mat_mod +#endif + implicit none + class(psb_d_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + nzt = a%get_nzeros() + if (present(nzrm)) nzm = max(nzm,nzrm) + + gpu_parms = FgetEllDeviceParams(m,nzm,nzt,n,spgpu_type_double,1) + + if (c_associated(a%deviceMat)) then + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + else + pitch = -1 + maxrowsize = -1 + end if + + if ((pitch /= gpu_parms%pitch).or.(maxrowsize /= gpu_parms%maxRowSize)) then + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + info = FallocEllDevice(a%deviceMat,m,nzm,nzt,n,spgpu_type_double,1) + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + end if + if (info == 0) then + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + end if + end if + if (info == 0) info = & + & writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag) + call a%set_sync() +#endif + +end subroutine psb_d_elg_to_gpu diff --git a/gpu/impl/psb_d_elg_trim.f90 b/gpu/impl/psb_d_elg_trim.f90 new file mode 100644 index 00000000..d2a2047c --- /dev/null +++ b/gpu/impl/psb_d_elg_trim.f90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_elg_trim(a) + + use psb_base_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_trim + implicit none + class(psb_d_elg_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + ld = max(1_psb_ipk_,size(a%ja,1)) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_elg_trim diff --git a/gpu/impl/psb_d_elg_vect_mv.F90 b/gpu/impl/psb_d_elg_vect_mv.F90 new file mode 100644 index 00000000..e46f84da --- /dev/null +++ b/gpu/impl/psb_d_elg_vect_mv.F90 @@ -0,0 +1,131 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_elg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_vect_mv +#else + use psb_d_elg_mat_mod +#endif + use psb_d_gpu_vect_mod + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_elg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (a%is_dev()) call a%sync() + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_d_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_d_vect_gpu) + select type(yy => y) + type is (psb_d_vect_gpu) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvEllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvELLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + if (a%is_dev()) call a%sync() + call a%psb_d_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_elg_vect_mv diff --git a/gpu/impl/psb_d_hdiag_csmv.F90 b/gpu/impl/psb_d_hdiag_csmv.F90 new file mode 100644 index 00000000..6f6bcedf --- /dev/null +++ b/gpu/impl/psb_d_hdiag_csmv.F90 @@ -0,0 +1,136 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hdiag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_hdiag_csmv +#else + use psb_d_hdiag_mat_mod +#endif + implicit none + class(psb_d_hdiag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='d_hdiag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_hdiag_mold + implicit none + class(psb_d_hdiag_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdiag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_hdiag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hdiag_mold diff --git a/gpu/impl/psb_d_hdiag_to_gpu.F90 b/gpu/impl/psb_d_hdiag_to_gpu.F90 new file mode 100644 index 00000000..fb013586 --- /dev/null +++ b/gpu/impl/psb_d_hdiag_to_gpu.F90 @@ -0,0 +1,86 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hdiag_to_gpu(a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_hdiag_to_gpu +#else + use psb_d_hdiag_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_d_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight +#ifdef HAVE_SPGPU + type(hdiagdev_parms) :: gpu_parms +#endif + + info = 0 + +#ifdef HAVE_SPGPU + nr = a%get_nrows() + nc = a%get_ncols() + hacksize = a%hackSize + hackCount = a%nhacks + if (.not.allocated(a%hackOffsets)) then + info = -1 + return + end if + allocheight = a%hackOffsets(hackCount+1) +!!$ write(*,*) 'HDIAG TO GPU:',nr,nc,hacksize,hackCount,allocheight,& +!!$ & size(a%hackoffsets),size(a%diaoffsets), size(a%val) + if (.not.allocated(a%diaOffsets)) then + info = -2 + return + end if + if (.not.allocated(a%val)) then + info = -3 + return + end if + + if (c_associated(a%deviceMat)) then + call freeHdiagDevice(a%deviceMat) + endif + + info = FAllocHdiagDevice(a%deviceMat,nr,nc,& + & allocheight,hacksize,hackCount,spgpu_type_double) + if (info == 0) info = & + & writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets) + +#endif + +end subroutine psb_d_hdiag_to_gpu diff --git a/gpu/impl/psb_d_hdiag_vect_mv.F90 b/gpu/impl/psb_d_hdiag_vect_mv.F90 new file mode 100644 index 00000000..db7ec9c6 --- /dev/null +++ b/gpu/impl/psb_d_hdiag_vect_mv.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_hdiag_vect_mv +#else + use psb_d_hdiag_mat_mod +#endif + use psb_d_gpu_vect_mod + implicit none + class(psb_d_hdiag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hdiag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_d_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_d_vect_gpu) + select type(yy => y) + type is (psb_d_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHdiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + call a%psb_d_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hdiag_vect_mv diff --git a/gpu/impl/psb_d_hlg_allocate_mnnz.F90 b/gpu/impl/psb_d_hlg_allocate_mnnz.F90 new file mode 100644 index 00000000..6f327e81 --- /dev/null +++ b/gpu/impl/psb_d_hlg_allocate_mnnz.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hlg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_allocate_mnnz +#else + use psb_d_hlg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. +#ifdef HAVE_SPGPU + type(hlldev_parms) :: gpu_parms +#endif + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_d_hll_sparse_mat%allocate(m,n,nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hlg_allocate_mnnz diff --git a/gpu/impl/psb_d_hlg_csmm.F90 b/gpu/impl/psb_d_hlg_csmm.F90 new file mode 100644 index 00000000..120f3e06 --- /dev/null +++ b/gpu/impl/psb_d_hlg_csmm.F90 @@ -0,0 +1,132 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hlg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_csmm +#else + use psb_d_hlg_mat_mod +#endif + implicit none + class(psb_d_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_dpk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hlg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_hlg_csmv +#else + use psb_d_hlg_mat_mod +#endif + implicit none + class(psb_d_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='d_hlg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_hlg_from_gpu +#else + use psb_d_hlg_mat_mod +#endif + implicit none + class(psb_d_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr + + info = 0 + +#ifdef HAVE_SPGPU + if (a%is_sync()) return + if (a%is_host()) return + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + + info = getHllDeviceParams(a%deviceMat,hksize, rows, nzeros, allocsize,& + & hackOffsLength, firstIndex,avgnzr) + + if (info == 0) call a%set_nzeros(nzeros) + if (info == 0) call a%set_hksz(hksize) + if (info == 0) call psb_realloc(rows,a%irn,info) + if (info == 0) call psb_realloc(rows,a%idiag,info) + if (info == 0) call psb_realloc(allocsize,a%ja,info) + if (info == 0) call psb_realloc(allocsize,a%val,info) + if (info == 0) call psb_realloc((hackOffsLength+1),a%hkoffs,info) + + if (info == 0) info = & + & readHllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) + call a%set_sync() +#endif + +end subroutine psb_d_hlg_from_gpu diff --git a/gpu/impl/psb_d_hlg_inner_vect_sv.F90 b/gpu/impl/psb_d_hlg_inner_vect_sv.F90 new file mode 100644 index 00000000..0ad867a3 --- /dev/null +++ b/gpu/impl/psb_d_hlg_inner_vect_sv.F90 @@ -0,0 +1,81 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_inner_vect_sv +#else + use psb_d_hlg_mat_mod +#endif + use psb_d_gpu_vect_mod + implicit none + class(psb_d_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_base_inner_vect_sv' + logical, parameter :: debug=.false. + real(psb_dpk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + info = psb_success_ + + + call x%sync() + call y%sync() + if (a%is_dev()) call a%sync() + call a%psb_d_hll_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hlg_inner_vect_sv diff --git a/gpu/impl/psb_d_hlg_mold.F90 b/gpu/impl/psb_d_hlg_mold.F90 new file mode 100644 index 00000000..3ce9f33a --- /dev/null +++ b/gpu/impl/psb_d_hlg_mold.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hlg_mold(a,b,info) + + use psb_base_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_mold + implicit none + class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer, intent(out) :: info + Integer :: err_act + character(len=20) :: name='hlg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_hlg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_d_hlg_mold diff --git a/gpu/impl/psb_d_hlg_reallocate_nz.F90 b/gpu/impl/psb_d_hlg_reallocate_nz.F90 new file mode 100644 index 00000000..c9fa4771 --- /dev/null +++ b/gpu/impl/psb_d_hlg_reallocate_nz.F90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hlg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_reallocate_nz +#else + use psb_d_hlg_mat_mod +#endif + use iso_c_binding + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_hlg_sparse_mat), intent(inout) :: a + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_hlg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + call a%psb_d_hll_sparse_mat%reallocate(nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hlg_reallocate_nz diff --git a/gpu/impl/psb_d_hlg_scal.F90 b/gpu/impl/psb_d_hlg_scal.F90 new file mode 100644 index 00000000..b487303d --- /dev/null +++ b/gpu/impl/psb_d_hlg_scal.F90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hlg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_scal +#else + use psb_d_hlg_mat_mod +#endif + implicit none + class(psb_d_hlg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_d_hll_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hlg_scal diff --git a/gpu/impl/psb_d_hlg_scals.F90 b/gpu/impl/psb_d_hlg_scals.F90 new file mode 100644 index 00000000..e3f676e9 --- /dev/null +++ b/gpu/impl/psb_d_hlg_scals.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hlg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_scals +#else + use psb_d_hlg_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_d_hlg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_d_hll_sparse_mat%scal(d,info) + if (info /= psb_success_) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_d_hlg_scals diff --git a/gpu/impl/psb_d_hlg_to_gpu.F90 b/gpu/impl/psb_d_hlg_to_gpu.F90 new file mode 100644 index 00000000..5e3b3558 --- /dev/null +++ b/gpu/impl/psb_d_hlg_to_gpu.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hlg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_to_gpu +#else + use psb_d_hlg_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_d_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, nza, n, pitch,maxrowsize, allocsize + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + n = a%get_nrows() + allocsize = a%get_size() + nza = a%get_nzeros() + if (c_associated(a%deviceMat)) then + call freehllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,a%hksz,n,nza,allocsize,spgpu_type_double,1) + if (info == 0) info = & + & writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) +! if (info /= 0) goto 9999 +#endif + +end subroutine psb_d_hlg_to_gpu diff --git a/gpu/impl/psb_d_hlg_vect_mv.F90 b/gpu/impl/psb_d_hlg_vect_mv.F90 new file mode 100644 index 00000000..cd5e95e5 --- /dev/null +++ b/gpu/impl/psb_d_hlg_vect_mv.F90 @@ -0,0 +1,129 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_vect_mv +#else + use psb_d_hlg_mat_mod +#endif + use psb_d_gpu_vect_mod + implicit none + class(psb_d_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hlg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + if (a%is_dev()) call a%sync() + call a%psb_d_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_d_vect_gpu) + select type(yy => y) + type is (psb_d_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvhllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHLLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + call a%psb_d_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hlg_vect_mv diff --git a/gpu/impl/psb_d_hybg_allocate_mnnz.F90 b/gpu/impl/psb_d_hybg_allocate_mnnz.F90 new file mode 100644 index 00000000..1565a719 --- /dev/null +++ b/gpu/impl/psb_d_hybg_allocate_mnnz.F90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_hybg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_allocate_mnnz +#else + use psb_d_hybg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_d_csr_sparse_mat%allocate(m,n,nz) + +#ifdef HAVE_SPGPU + info = initFcusparse() + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hybg_allocate_mnnz +#endif diff --git a/gpu/impl/psb_d_hybg_csmm.F90 b/gpu/impl/psb_d_hybg_csmm.F90 new file mode 100644 index 00000000..abc0e0c2 --- /dev/null +++ b/gpu/impl/psb_d_hybg_csmm.F90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_hybg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_csmm +#else + use psb_d_hybg_mat_mod +#endif + implicit none + class(psb_d_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hybg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_hybg_csmv +#else + use psb_d_hybg_mat_mod +#endif + implicit none + class(psb_d_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hybg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_hybg_inner_vect_sv +#else + use psb_d_hybg_mat_mod +#endif + use psb_d_gpu_vect_mod + implicit none + class(psb_d_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hybg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + +#ifdef HAVE_SPGPU + if (tra.or.(beta/=dzero)) then + call x%sync() + call y%sync() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_d_vect_gpu) + select type(yy => y) + type is (psb_d_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call x%sync() + call y%sync() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() +#endif + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='hybg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hybg_inner_vect_sv +#endif diff --git a/gpu/impl/psb_d_hybg_mold.F90 b/gpu/impl/psb_d_hybg_mold.F90 new file mode 100644 index 00000000..27390db0 --- /dev/null +++ b/gpu/impl/psb_d_hybg_mold.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_hybg_mold(a,b,info) + + use psb_base_mod + use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_mold + implicit none + class(psb_d_hybg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hybg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_hybg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hybg_mold +#endif diff --git a/gpu/impl/psb_d_hybg_reallocate_nz.F90 b/gpu/impl/psb_d_hybg_reallocate_nz.F90 new file mode 100644 index 00000000..537101e9 --- /dev/null +++ b/gpu/impl/psb_d_hybg_reallocate_nz.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_hybg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_reallocate_nz +#else + use psb_d_hybg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_hybg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_d_csr_sparse_mat%reallocate(nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hybg_reallocate_nz +#endif diff --git a/gpu/impl/psb_d_hybg_scal.F90 b/gpu/impl/psb_d_hybg_scal.F90 new file mode 100644 index 00000000..32ef2da0 --- /dev/null +++ b/gpu/impl/psb_d_hybg_scal.F90 @@ -0,0 +1,76 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_hybg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_scal +#else + use psb_d_hybg_mat_mod +#endif + implicit none + class(psb_d_hybg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m,n,nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_d_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hybg_scal +#endif diff --git a/gpu/impl/psb_d_hybg_scals.F90 b/gpu/impl/psb_d_hybg_scals.F90 new file mode 100644 index 00000000..8c38328a --- /dev/null +++ b/gpu/impl/psb_d_hybg_scals.F90 @@ -0,0 +1,76 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_hybg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_scals +#else + use psb_d_hybg_mat_mod +#endif + implicit none + class(psb_d_hybg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + + call a%psb_d_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hybg_scals +#endif diff --git a/gpu/impl/psb_d_hybg_to_gpu.F90 b/gpu/impl/psb_d_hybg_to_gpu.F90 new file mode 100644 index 00000000..33bf55b8 --- /dev/null +++ b/gpu/impl/psb_d_hybg_to_gpu.F90 @@ -0,0 +1,154 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_hybg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_to_gpu +#else + use psb_d_hybg_mat_mod +#endif + implicit none + class(psb_d_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + real(psb_dpk_), allocatable :: valdi(:) + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = HYBGDeviceFree(a%deviceMat) + end if + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! We are explicitly adding the diagonal + if (info == 0) info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = done + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = done + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + else + + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) then + if (a%is_unit()) then + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + endif + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceHybsmAnalysis(a%deviceMat) + end if + + + if (info /= 0) then + write(0,*) 'Error in HYBG_TO_GPU ',info + end if +#endif + +end subroutine psb_d_hybg_to_gpu +#endif diff --git a/gpu/impl/psb_d_hybg_vect_mv.F90 b/gpu/impl/psb_d_hybg_vect_mv.F90 new file mode 100644 index 00000000..d9653a48 --- /dev/null +++ b/gpu/impl/psb_d_hybg_vect_mv.F90 @@ -0,0 +1,127 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_vect_mv +#else + use psb_d_hybg_mat_mod +#endif + use psb_d_gpu_vect_mod + implicit none + class(psb_d_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hybg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_d_vect_gpu) + select type(yy => y) + type is (psb_d_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_hybg_vect_mv +#endif diff --git a/gpu/impl/psb_d_mv_csrg_from_coo.F90 b/gpu/impl/psb_d_mv_csrg_from_coo.F90 new file mode 100644 index 00000000..8c59e6d1 --- /dev/null +++ b/gpu/impl/psb_d_mv_csrg_from_coo.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_csrg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_mv_csrg_from_coo +#else + use psb_d_csrg_mat_mod +#endif + implicit none + + class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + + info = psb_success_ + + call a%psb_d_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_mv_csrg_from_coo diff --git a/gpu/impl/psb_d_mv_csrg_from_fmt.F90 b/gpu/impl/psb_d_mv_csrg_from_fmt.F90 new file mode 100644 index 00000000..30c133e4 --- /dev/null +++ b/gpu/impl/psb_d_mv_csrg_from_fmt.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_csrg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_csrg_mat_mod, psb_protect_name => psb_d_mv_csrg_from_fmt +#else + use psb_d_csrg_mat_mod +#endif + implicit none + + class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer, intent(out) :: info + + !locals + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_d_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select + +end subroutine psb_d_mv_csrg_from_fmt diff --git a/gpu/impl/psb_d_mv_diag_from_coo.F90 b/gpu/impl/psb_d_mv_diag_from_coo.F90 new file mode 100644 index 00000000..f37a5523 --- /dev/null +++ b/gpu/impl/psb_d_mv_diag_from_coo.F90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_diag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_d_diag_mat_mod, psb_protect_name => psb_d_mv_diag_from_coo +#else + use psb_d_diag_mat_mod +#endif + + implicit none + + class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) goto 9999 + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_mv_diag_from_coo diff --git a/gpu/impl/psb_d_mv_elg_from_coo.F90 b/gpu/impl/psb_d_mv_elg_from_coo.F90 new file mode 100644 index 00000000..73216cfa --- /dev/null +++ b/gpu/impl/psb_d_mv_elg_from_coo.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_elg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_mv_elg_from_coo +#else + use psb_d_elg_mat_mod +#endif + implicit none + + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + call b%free() + + return + + +end subroutine psb_d_mv_elg_from_coo diff --git a/gpu/impl/psb_d_mv_elg_from_fmt.F90 b/gpu/impl/psb_d_mv_elg_from_fmt.F90 new file mode 100644 index 00000000..5038c50e --- /dev/null +++ b/gpu/impl/psb_d_mv_elg_from_fmt.F90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_elg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_d_elg_mat_mod, psb_protect_name => psb_d_mv_elg_from_fmt +#else + use psb_d_elg_mat_mod +#endif + implicit none + + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + info = psb_success_ + + if (b%is_dev()) call b%sync() + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + + class is (psb_d_ell_sparse_mat) + nzm = size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() +#ifdef HAVE_SPGPU + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize +#else + ld = m +#endif + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + deallocate(b%ja,stat=info) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + deallocate(b%val,stat=info) + end if + a%nzt = nza + call b%free() +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_mv_elg_from_fmt diff --git a/gpu/impl/psb_d_mv_hdiag_from_coo.F90 b/gpu/impl/psb_d_mv_hdiag_from_coo.F90 new file mode 100644 index 00000000..ee0e983f --- /dev/null +++ b/gpu/impl/psb_d_mv_hdiag_from_coo.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_hdiag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_mv_hdiag_from_coo + use psb_gpu_env_mod +#else + use psb_d_hdiag_mat_mod +#endif + + implicit none + + class(psb_d_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + +#ifdef HAVE_SPGPU + a%hacksize = psb_gpu_WarpSize() +#endif + + call a%psb_d_hdia_sparse_mat%mv_from_coo(b,info) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_mv_hdiag_from_coo diff --git a/gpu/impl/psb_d_mv_hlg_from_coo.F90 b/gpu/impl/psb_d_mv_hlg_from_coo.F90 new file mode 100644 index 00000000..fe030415 --- /dev/null +++ b/gpu/impl/psb_d_mv_hlg_from_coo.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_hlg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_gpu_env_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_mv_hlg_from_coo +#else + use psb_d_hlg_mat_mod +#endif + implicit none + + class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_d_mv_hlg_from_coo diff --git a/gpu/impl/psb_d_mv_hlg_from_fmt.F90 b/gpu/impl/psb_d_mv_hlg_from_fmt.F90 new file mode 100644 index 00000000..e538b017 --- /dev/null +++ b/gpu/impl/psb_d_mv_hlg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_hlg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_d_hlg_mat_mod, psb_protect_name => psb_d_mv_hlg_from_fmt +#else + use psb_d_hlg_mat_mod +#endif + implicit none + + class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_mv_hlg_from_fmt diff --git a/gpu/impl/psb_d_mv_hybg_from_coo.F90 b/gpu/impl/psb_d_mv_hybg_from_coo.F90 new file mode 100644 index 00000000..4fe76c72 --- /dev/null +++ b/gpu/impl/psb_d_mv_hybg_from_coo.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_mv_hybg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_hybg_mat_mod, psb_protect_name => psb_d_mv_hybg_from_coo +#else + use psb_d_hybg_mat_mod +#endif + implicit none + + class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%psb_d_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_mv_hybg_from_coo +#endif diff --git a/gpu/impl/psb_d_mv_hybg_from_fmt.F90 b/gpu/impl/psb_d_mv_hybg_from_fmt.F90 new file mode 100644 index 00000000..454533d0 --- /dev/null +++ b/gpu/impl/psb_d_mv_hybg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_mv_hybg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_d_hybg_mat_mod, psb_protect_name => psb_d_mv_hybg_from_fmt +#else + use psb_d_hybg_mat_mod +#endif + implicit none + + class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_d_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select +end subroutine psb_d_mv_hybg_from_fmt +#endif diff --git a/gpu/impl/psb_s_cp_csrg_from_coo.F90 b/gpu/impl/psb_s_cp_csrg_from_coo.F90 new file mode 100644 index 00000000..4a714d41 --- /dev/null +++ b/gpu/impl/psb_s_cp_csrg_from_coo.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cp_csrg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_cp_csrg_from_coo +#else + use psb_s_csrg_mat_mod +#endif + implicit none + + class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_s_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cp_csrg_from_coo diff --git a/gpu/impl/psb_s_cp_csrg_from_fmt.F90 b/gpu/impl/psb_s_cp_csrg_from_fmt.F90 new file mode 100644 index 00000000..962a8c9d --- /dev/null +++ b/gpu/impl/psb_s_cp_csrg_from_fmt.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cp_csrg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_cp_csrg_from_fmt +#else + use psb_s_csrg_mat_mod +#endif + !use iso_c_binding + implicit none + + class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + select type(b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_s_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select + +end subroutine psb_s_cp_csrg_from_fmt diff --git a/gpu/impl/psb_s_cp_diag_from_coo.F90 b/gpu/impl/psb_s_cp_diag_from_coo.F90 new file mode 100644 index 00000000..6b105ef2 --- /dev/null +++ b/gpu/impl/psb_s_cp_diag_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_diag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_s_diag_mat_mod, psb_protect_name => psb_s_cp_diag_from_coo +#else + use psb_s_diag_mat_mod +#endif + implicit none + + class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + call a%psb_s_dia_sparse_mat%cp_from_coo(b,info) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cp_diag_from_coo diff --git a/gpu/impl/psb_s_cp_elg_from_coo.F90 b/gpu/impl/psb_s_cp_elg_from_coo.F90 new file mode 100644 index 00000000..af8c7d28 --- /dev/null +++ b/gpu/impl/psb_s_cp_elg_from_coo.F90 @@ -0,0 +1,184 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_elg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_cp_elg_from_coo + use psi_ext_util_mod + use psb_gpu_env_mod +#else + use psb_s_elg_mat_mod +#endif + implicit none + + class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, ld, ldv, hacksize + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + type(psb_s_coo_sparse_mat) :: tmp + integer(psb_ipk_), allocatable :: idisp(:) + + info = psb_success_ +#ifdef HAVE_SPGPU + hacksize = max(1,psb_gpu_WarpSize()) +#else + hacksize = 1 +#endif + if (b%is_dev()) call b%sync() + + if (b%is_by_rows()) then + +#ifdef HAVE_SPGPU + call psi_s_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,b%ja,b%val, a%deviceMat) + call a%set_dev() +#else + + call psi_s_convert_ell_from_coo(a,b,info,hacksize=hacksize) + call a%set_host() +#endif + + else + call b%cp_to_coo(tmp,info) +#ifdef HAVE_SPGPU + call psi_s_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,tmp%ja,tmp%val, a%deviceMat) + + call a%set_dev() +#else + + call psi_s_convert_ell_from_coo(a,tmp,info,hacksize=hacksize) + call a%set_host() +#endif + end if + + if (info /= psb_success_) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine psi_s_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize) + + use psb_base_mod + use psi_ext_util_mod + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(out) :: info, nzm, ldv + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, & + & ir, ic, hsz_ + real(psb_dpk_) :: t0,t1 + logical, parameter :: timing=.true. + + + info = psb_success_ + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == psb_success_) call psb_realloc(nr+1,idisp,info) + if (info /= psb_success_) return + if (timing) t0=psb_wtime() + + a%irn = 0 + do i=1, nza + ir = b%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + idisp(1) = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + idisp(i+1) = a%nzt + end do + + end subroutine psi_s_count_ell_from_coo + +end subroutine psb_s_cp_elg_from_coo diff --git a/gpu/impl/psb_s_cp_elg_from_fmt.F90 b/gpu/impl/psb_s_cp_elg_from_fmt.F90 new file mode 100644 index 00000000..c3d973e1 --- /dev/null +++ b/gpu/impl/psb_s_cp_elg_from_fmt.F90 @@ -0,0 +1,101 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_elg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_cp_elg_from_fmt +#else + use psb_s_elg_mat_mod +#endif + implicit none + + class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_s_ell_sparse_mat) + nzm = psb_size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() +#ifdef HAVE_SPGPU + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize +#else + ld = m +#endif + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + end if + a%nzt = nza +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_cp_elg_from_fmt diff --git a/gpu/impl/psb_s_cp_hdiag_from_coo.F90 b/gpu/impl/psb_s_cp_hdiag_from_coo.F90 new file mode 100644 index 00000000..0509706d --- /dev/null +++ b/gpu/impl/psb_s_cp_hdiag_from_coo.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hdiag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_cp_hdiag_from_coo + use psb_gpu_env_mod +#else + use psb_s_hdiag_mat_mod +#endif + implicit none + + class(psb_s_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + +#ifdef HAVE_SPGPU + a%hacksize = psb_gpu_WarpSize() +#endif + + call a%psb_s_hdia_sparse_mat%cp_from_coo(b,info) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cp_hdiag_from_coo diff --git a/gpu/impl/psb_s_cp_hlg_from_coo.F90 b/gpu/impl/psb_s_cp_hlg_from_coo.F90 new file mode 100644 index 00000000..5988c8dd --- /dev/null +++ b/gpu/impl/psb_s_cp_hlg_from_coo.F90 @@ -0,0 +1,198 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hlg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_gpu_env_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_cp_hlg_from_coo +#else + use psb_s_hlg_mat_mod +#endif + implicit none + + class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + integer(psb_ipk_), allocatable :: idisp(:) + character(len=20) :: name='hll_from_coo' + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + integer(psb_ipk_), allocatable :: irn(:), ja(:), hko(:) + real(psb_dpk_), allocatable :: val(:) + logical, parameter :: debug=.false. + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() +#ifdef HAVE_SPGPU + hksz = max(1,psb_gpu_WarpSize()) +#else + hksz = psi_get_hksz() +#endif + + if (b%is_by_rows()) then + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + if (debug) write(0,*) 'Copying through GPU',nza + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,b%ja, b%val, a%deviceMat) + call a%set_dev() + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + if (debug) write(0,*) 'Copying through GPU' + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,tmp,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,tmp%ja, tmp%val, a%deviceMat) + + call tmp%free() + call a%set_dev() + end if + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + subroutine psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + use psb_base_mod + use psi_ext_util_mod + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info, noffs, isz + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, kc + logical, parameter :: debug=.false. + + info = 0 + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + if (debug) write(0,*) 'Start compute hckoff_from_coo',nr,nc,nza + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == 0) call psb_realloc(nr+1,idisp,info) + if (info /= 0) return + a%irn = 0 + if (debug) then + do i=1, nza + if ((1<=b%ia(i)).and.(b%ia(i)<= nr)) then + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + else + write(0,*) 'Out of bouds IA ',i,b%ia(i),nr + end if + end do + else + do i=1, nza + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + end do + end if + a%nzt = nza + + + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (debug) write(0,*) ' noffsets ',noffs,info + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + idisp(1) = 0 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + idisp(i+1) = idisp(i) + a%irn(i) + do k=1,ir-1 + idisp(i+k+1) = idisp(i+k) + a%irn(i+k) + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) +!!$ write(*,*) 'End of psi_comput_hckoff ',info + end subroutine psi_compute_hckoff_from_coo + +end subroutine psb_s_cp_hlg_from_coo diff --git a/gpu/impl/psb_s_cp_hlg_from_fmt.F90 b/gpu/impl/psb_s_cp_hlg_from_fmt.F90 new file mode 100644 index 00000000..41c20866 --- /dev/null +++ b/gpu/impl/psb_s_cp_hlg_from_fmt.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hlg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_cp_hlg_from_fmt +#else + use psb_s_hlg_mat_mod +#endif + implicit none + + class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_s_hll_sparse_mat%cp_from_fmt(b,info) +#ifdef HAVE_SPGPU + if (info == 0) call a%to_gpu(info) +#endif + end select + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cp_hlg_from_fmt diff --git a/gpu/impl/psb_s_cp_hybg_from_coo.F90 b/gpu/impl/psb_s_cp_hybg_from_coo.F90 new file mode 100644 index 00000000..92dc4a68 --- /dev/null +++ b/gpu/impl/psb_s_cp_hybg_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cp_hybg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_hybg_mat_mod, psb_protect_name => psb_s_cp_hybg_from_coo +#else + use psb_s_hybg_mat_mod +#endif + implicit none + + class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_s_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cp_hybg_from_coo +#endif diff --git a/gpu/impl/psb_s_cp_hybg_from_fmt.F90 b/gpu/impl/psb_s_cp_hybg_from_fmt.F90 new file mode 100644 index 00000000..53143776 --- /dev/null +++ b/gpu/impl/psb_s_cp_hybg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cp_hybg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_hybg_mat_mod, psb_protect_name => psb_s_cp_hybg_from_fmt +#else + use psb_s_hybg_mat_mod +#endif + implicit none + + class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_s_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select + +end subroutine psb_s_cp_hybg_from_fmt +#endif diff --git a/gpu/impl/psb_s_csrg_allocate_mnnz.F90 b/gpu/impl/psb_s_csrg_allocate_mnnz.F90 new file mode 100644 index 00000000..e93452d2 --- /dev/null +++ b/gpu/impl/psb_s_csrg_allocate_mnnz.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_csrg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_allocate_mnnz +#else + use psb_s_csrg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_s_csr_sparse_mat%allocate(m,n,nz) + +#ifdef HAVE_SPGPU + info = initFcusparse() + if (info == 0) call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_csrg_allocate_mnnz diff --git a/gpu/impl/psb_s_csrg_csmm.F90 b/gpu/impl/psb_s_csrg_csmm.F90 new file mode 100644 index 00000000..55087053 --- /dev/null +++ b/gpu/impl/psb_s_csrg_csmm.F90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_csrg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_csmm +#else + use psb_s_csrg_mat_mod +#endif + implicit none + class(psb_s_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_spk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csrg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_csrg_csmv +#else + use psb_s_csrg_mat_mod +#endif + implicit none + class(psb_s_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_csrg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_csrg_from_gpu +#else + use psb_s_csrg_mat_mod +#endif + implicit none + class(psb_s_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, n, nz + + info = 0 + +#ifdef HAVE_SPGPU + if (.not.(c_associated(a%deviceMat%mat))) then + call a%free() + return + end if + + info = CSRGDeviceGetParms(a%deviceMat,m,n,nz) + if (info /= psb_success_) return + + if (info == 0) call psb_realloc(m+1,a%irp,info) + if (info == 0) call psb_realloc(nz,a%ja,info) + if (info == 0) call psb_realloc(nz,a%val,info) + if (info == 0) info = & + & CSRGDevice2Host(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) +#if (CUDA_SHORT_VERSION <= 10) || (CUDA_VERSION < 11030) + a%irp(:) = a%irp(:)+1 + a%ja(:) = a%ja(:)+1 +#endif + + call a%set_sync() +#endif + +end subroutine psb_s_csrg_from_gpu diff --git a/gpu/impl/psb_s_csrg_inner_vect_sv.F90 b/gpu/impl/psb_s_csrg_inner_vect_sv.F90 new file mode 100644 index 00000000..133a6350 --- /dev/null +++ b/gpu/impl/psb_s_csrg_inner_vect_sv.F90 @@ -0,0 +1,136 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_inner_vect_sv +#else + use psb_s_csrg_mat_mod +#endif + use psb_s_gpu_vect_mod + implicit none + class(psb_s_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_csrg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + +#ifdef HAVE_SPGPU + if (tra.or.(beta/=dzero)) then + call x%sync() + call y%sync() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_s_vect_gpu) + select type(yy => y) + type is (psb_s_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call x%sync() + call y%sync() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() +#endif + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='csrg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_csrg_inner_vect_sv diff --git a/gpu/impl/psb_s_csrg_mold.F90 b/gpu/impl/psb_s_csrg_mold.F90 new file mode 100644 index 00000000..6ac4cc3d --- /dev/null +++ b/gpu/impl/psb_s_csrg_mold.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_csrg_mold(a,b,info) + + use psb_base_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_mold + implicit none + class(psb_s_csrg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='csrg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_csrg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_csrg_mold diff --git a/gpu/impl/psb_s_csrg_reallocate_nz.F90 b/gpu/impl/psb_s_csrg_reallocate_nz.F90 new file mode 100644 index 00000000..dd9a50d0 --- /dev/null +++ b/gpu/impl/psb_s_csrg_reallocate_nz.F90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_csrg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_reallocate_nz +#else + use psb_s_csrg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_csrg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_s_csr_sparse_mat%reallocate(nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_csrg_reallocate_nz diff --git a/gpu/impl/psb_s_csrg_scal.F90 b/gpu/impl/psb_s_csrg_scal.F90 new file mode 100644 index 00000000..5e0fbcf0 --- /dev/null +++ b/gpu/impl/psb_s_csrg_scal.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_csrg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_scal +#else + use psb_s_csrg_mat_mod +#endif + implicit none + class(psb_s_csrg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + call a%psb_s_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_csrg_scal diff --git a/gpu/impl/psb_s_csrg_scals.F90 b/gpu/impl/psb_s_csrg_scals.F90 new file mode 100644 index 00000000..54b299a1 --- /dev/null +++ b/gpu/impl/psb_s_csrg_scals.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_csrg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_scals +#else + use psb_s_csrg_mat_mod +#endif + implicit none + class(psb_s_csrg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + call a%psb_s_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_csrg_scals diff --git a/gpu/impl/psb_s_csrg_to_gpu.F90 b/gpu/impl/psb_s_csrg_to_gpu.F90 new file mode 100644 index 00000000..f90ae4ea --- /dev/null +++ b/gpu/impl/psb_s_csrg_to_gpu.F90 @@ -0,0 +1,325 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_csrg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_to_gpu +#else + use psb_s_csrg_mat_mod +#endif + implicit none + class(psb_s_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + real(psb_spk_), allocatable :: valdi(:) + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = CSRGDeviceFree(a%deviceMat) + end if +#if CUDA_SHORT_VERSION <= 10 + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = sone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = sone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceCsrsmAnalysis(a%deviceMat) + end if + +#elif CUDA_VERSION < 11030 + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) +!!$ write(0,*) 'Done deviceAlloc' + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_zero) +!!$ write(0,*) 'Done SetIndexBase' + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(0:nzdi),valdi(0:nzdi),stat=info) + if (info == 0) then + irpdi(1) = 0 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = sone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = sone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ write(0,*) 'Done deviceAlloc', info + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,& + & cusparse_index_base_zero) +!!$ write(0,*) 'Done setIndexBase', info + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + nzdi=a%irp(m+1)-1 + if (info == 0) allocate(irpdi(m+1),jadi(max(nzdi,1)),stat=info) + if (info == 0) then + irpdi(1:m+1) = a%irp(1:m+1) -1 + jadi(1:nzdi) = a%ja(1:nzdi) -1 + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,irpdi,jadi,a%val) +!!$ write(0,*) 'Done Host2Device', info + endif + + +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = sone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = sone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + +!!$ if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) +!!$ end if + +#endif + call a%set_sync() + + if (info /= 0) then + write(0,*) 'Error in CSRG_TO_GPU ',info + end if +#endif + +end subroutine psb_s_csrg_to_gpu diff --git a/gpu/impl/psb_s_csrg_vect_mv.F90 b/gpu/impl/psb_s_csrg_vect_mv.F90 new file mode 100644 index 00000000..ff88bf89 --- /dev/null +++ b/gpu/impl/psb_s_csrg_vect_mv.F90 @@ -0,0 +1,125 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_vect_mv +#else + use psb_s_csrg_mat_mod +#endif + use psb_s_gpu_vect_mod + implicit none + class(psb_s_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_csrg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_s_vect_gpu) + select type(yy => y) + type is (psb_s_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= szero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_s_csrg_vect_mv diff --git a/gpu/impl/psb_s_diag_csmv.F90 b/gpu/impl/psb_s_diag_csmv.F90 new file mode 100644 index 00000000..4cf14d12 --- /dev/null +++ b/gpu/impl/psb_s_diag_csmv.F90 @@ -0,0 +1,136 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_diag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_s_diag_mat_mod, psb_protect_name => psb_s_diag_csmv +#else + use psb_s_diag_mat_mod +#endif + implicit none + class(psb_s_diag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='s_diag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_diag_mold + implicit none + class(psb_s_diag_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='diag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_diag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_diag_mold diff --git a/gpu/impl/psb_s_diag_to_gpu.F90 b/gpu/impl/psb_s_diag_to_gpu.F90 new file mode 100644 index 00000000..bb09b127 --- /dev/null +++ b/gpu/impl/psb_s_diag_to_gpu.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_diag_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_s_diag_mat_mod, psb_protect_name => psb_s_diag_to_gpu +#else + use psb_s_diag_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_s_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d +#ifdef HAVE_SPGPU + type(diagdev_parms) :: gpu_parms +#endif + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return + + n = size(a%data,1) + d = size(a%data,2) + c = a%get_ncols() + !allocsize = a%get_size() + !write(*,*) 'Create the DIAG matrix' + gpu_parms = FgetDiagDeviceParams(n,c,d,spgpu_type_float) + if (c_associated(a%deviceMat)) then + call freeDiagDevice(a%deviceMat) + endif + info = FallocDiagDevice(a%deviceMat,n,c,d,spgpu_type_float) + if (info == 0) info = & + & writeDiagDevice(a%deviceMat,a%data,a%offset,n) +! if (info /= 0) goto 9999 +#endif + +end subroutine psb_s_diag_to_gpu diff --git a/gpu/impl/psb_s_diag_vect_mv.F90 b/gpu/impl/psb_s_diag_vect_mv.F90 new file mode 100644 index 00000000..31976247 --- /dev/null +++ b/gpu/impl/psb_s_diag_vect_mv.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_diag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_s_diag_mat_mod, psb_protect_name => psb_s_diag_vect_mv +#else + use psb_s_diag_mat_mod +#endif + use psb_s_gpu_vect_mod + implicit none + class(psb_s_diag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_diag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_s_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_s_vect_gpu) + select type(yy => y) + type is (psb_s_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + call a%psb_s_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_diag_vect_mv diff --git a/gpu/impl/psb_s_dnsg_mat_impl.F90 b/gpu/impl/psb_s_dnsg_mat_impl.F90 new file mode 100644 index 00000000..13c58985 --- /dev/null +++ b/gpu/impl/psb_s_dnsg_mat_impl.F90 @@ -0,0 +1,461 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_s_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_dnsg_vect_mv +#else + use psb_s_dnsg_mat_mod +#endif + implicit none + class(psb_s_dnsg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + logical :: tra + character :: trans_ + real(psb_spk_), allocatable :: rx(:), ry(:) + Integer(Psb_ipk_) :: err_act, m, n, k + character(len=20) :: name='s_dnsg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (trans_ =='N') then + m = a%get_nrows() + n = 1 + k = a%get_ncols() + else + m = a%get_ncols() + n = 1 + k = a%get_nrows() + end if + select type (xx => x) + type is (psb_s_vect_gpu) + select type(yy => y) + type is (psb_s_vect_gpu) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= szero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDnsDevice(trans_,m,n,k,alpha,a%deviceMat,& + & xx%deviceVect,beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDnsDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_dnsg_vect_mv + + +subroutine psb_s_dnsg_mold(a,b,info) + use psb_base_mod + use psb_s_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_dnsg_mold +#else + use psb_s_dnsg_mat_mod +#endif + implicit none + class(psb_s_dnsg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dnsg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_dnsg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_dnsg_mold + + +!!$ +!!$ interface +!!$ subroutine psb_s_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_s_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type +!!$ class(psb_s_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_s_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_s_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_s_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_s_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_s_dnsg_allocate_mnnz +!!$ end interface + + +subroutine psb_s_dnsg_to_gpu(a,info) + use psb_base_mod + use psb_s_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_dnsg_to_gpu +#else + use psb_s_dnsg_mat_mod +#endif + class(psb_s_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act, pitch, lda + logical, parameter :: debug=.false. + character(len=20) :: name='s_dnsg_to_gpu' + + call psb_erractionsave(err_act) + info = psb_success_ +#ifdef HAVE_SPGPU + if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2) + info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),& + & spgpu_type_float,1) + if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2)) + if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info + + +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_dnsg_to_gpu + + + +subroutine psb_s_cp_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_s_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_cp_dnsg_from_coo +#else + use psb_s_dnsg_mat_mod +#endif + implicit none + + class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_dnsg_cp_from_coo' + integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: debug=.false. + type(psb_s_coo_sparse_mat) :: tmp + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + call a%psb_s_dns_sparse_mat%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_cp_from_coo: dns_cp',info + if (info == 0) call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cp_dnsg_from_coo + +subroutine psb_s_cp_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_s_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_cp_dnsg_from_fmt +#else + use psb_s_dnsg_mat_mod +#endif + implicit none + + class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + type(psb_s_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + +!!$ class is (psb_s_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$#ifdef HAVE_SPGPU +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$#else +!!$ ld = m +!!$#endif +!!$ a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$#ifdef HAVE_SPGPU +!!$ call a%to_gpu(info) +!!$#endif + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cp_dnsg_from_fmt + + + +subroutine psb_s_mv_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_s_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_mv_dnsg_from_coo +#else + use psb_s_dnsg_mat_mod +#endif + implicit none + + class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + logical, parameter :: debug=.false. + character(len=20) :: name='s_dnsg_mv_from_coo' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_mv_from_coo: cp_from_coo:',info + call b%free() + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_mv_dnsg_from_coo + + +subroutine psb_s_mv_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_s_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_mv_dnsg_from_fmt +#else + use psb_s_dnsg_mat_mod +#endif + implicit none + class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + type(psb_s_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + +!!$ class is (psb_s_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$#ifdef HAVE_SPGPU +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$#else +!!$ ld = m +!!$#endif +!!$ a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$#ifdef HAVE_SPGPU +!!$ call a%to_gpu(info) +!!$#endif + + class default + + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_s_mv_dnsg_from_fmt diff --git a/gpu/impl/psb_s_elg_allocate_mnnz.F90 b/gpu/impl/psb_s_elg_allocate_mnnz.F90 new file mode 100644 index 00000000..f3b1d743 --- /dev/null +++ b/gpu/impl/psb_s_elg_allocate_mnnz.F90 @@ -0,0 +1,113 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_elg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_allocate_mnnz +#else + use psb_s_elg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione,izero,izero,izero,izero/)) + goto 9999 + endif + +#ifdef HAVE_SPGPU + gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_float,1) + ld = gpu_parms%pitch + nz_ = gpu_parms%maxRowSize +#else + ld = m +#endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_elg_allocate_mnnz diff --git a/gpu/impl/psb_s_elg_asb.f90 b/gpu/impl/psb_s_elg_asb.f90 new file mode 100644 index 00000000..190be710 --- /dev/null +++ b/gpu/impl/psb_s_elg_asb.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_elg_asb(a) + + use psb_base_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_asb + implicit none + + class(psb_s_elg_sparse_mat), intent(inout) :: a + + integer(psb_ipk_) :: err_act, info + character(len=20) :: name='elg_asb' + logical :: clear_ + logical, parameter :: debug=.false. + real(psb_dpk_), allocatable :: valt(:,:) + integer(psb_ipk_), allocatable :: jat(:,:) + integer(psb_ipk_) :: nr, nc + + call psb_erractionsave(err_act) + info = psb_success_ + + ! Only call sync() if we are on host + if (a%is_host()) then + call a%sync() + end if + call a%set_asb() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_elg_asb diff --git a/gpu/impl/psb_s_elg_csmm.F90 b/gpu/impl/psb_s_elg_csmm.F90 new file mode 100644 index 00000000..8bda23e3 --- /dev/null +++ b/gpu/impl/psb_s_elg_csmm.F90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_elg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_csmm +#else + use psb_s_elg_mat_mod +#endif + implicit none + class(psb_s_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_spk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_elg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_elg_csmv +#else + use psb_s_elg_mat_mod +#endif + implicit none + class(psb_s_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_elg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_elg_csput_a +#else + use psb_s_elg_mat_mod +#endif + implicit none + + class(psb_s_elg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_elg_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +!!$ write(0,*) 'In ELG_csput_a' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then +!!$ write(*,*) 'elg_csput_a ' + if (a%is_dev()) call a%sync() + call a%psb_s_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_elg_csput_a + + + +subroutine psb_s_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use iso_c_binding +#ifdef HAVE_SPGPU + use elldev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_csput_v + use psb_s_gpu_vect_mod +#else + use psb_s_elg_mat_mod +#endif + implicit none + + class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_elg_csput_v' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw + logical :: gpu_invoked + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + integer(psb_ipk_), allocatable :: idxs(:) + logical, parameter :: debug_idxs=.false., debug_vals=.false. + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +! write(0,*) 'In ELG_csput_v' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (ia%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (ja%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (val%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + + t1=psb_wtime() + gpu_invoked = .false. + select type (ia) + class is (psb_i_vect_gpu) + select type (ja) + class is (psb_i_vect_gpu) + select type (val) + class is (psb_s_vect_gpu) + if (a%is_host()) call a%sync() + if (val%is_host()) call val%sync() + if (ia%is_host()) call ia%sync() + if (ja%is_host()) call ja%sync() + info = csputEllDeviceFloat(a%deviceMat,nz,& + & ia%deviceVect,ja%deviceVect,val%deviceVect) + call a%set_dev() + gpu_invoked=.true. + end select + end select + end select + if (.not.gpu_invoked) then +!!$ write(0,*)'Not gpu_invoked ' + if (a%is_dev()) call a%sync() + call a%psb_s_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + call a%set_host() + end if + + if (info /= 0) then + info = psb_err_internal_error_ + end if + + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_s_elg_csput_v diff --git a/gpu/impl/psb_s_elg_from_gpu.F90 b/gpu/impl/psb_s_elg_from_gpu.F90 new file mode 100644 index 00000000..d043790d --- /dev/null +++ b/gpu/impl/psb_s_elg_from_gpu.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_elg_from_gpu(a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_from_gpu +#else + use psb_s_elg_mat_mod +#endif + implicit none + class(psb_s_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize + + info = 0 + +#ifdef HAVE_SPGPU + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + if (info == 0) call psb_realloc(pitch,a%irn,info) + end if + if (info == 0) info = & + & readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag) + call a%set_sync() +#endif + +end subroutine psb_s_elg_from_gpu diff --git a/gpu/impl/psb_s_elg_inner_vect_sv.F90 b/gpu/impl/psb_s_elg_inner_vect_sv.F90 new file mode 100644 index 00000000..83c79cf3 --- /dev/null +++ b/gpu/impl/psb_s_elg_inner_vect_sv.F90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_inner_vect_sv +#else + use psb_s_elg_mat_mod +#endif + use psb_s_gpu_vect_mod + implicit none + class(psb_s_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_elg_inner_vect_sv' + logical, parameter :: debug=.false. + real(psb_spk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + if (a%is_dev()) call a%sync() + if (.false.) then + rx = x%get_vect() + ry = y%get_vect() + call a%inner_spsm(alpha,rx,beta,ry,info,trans) + call y%bld(ry) + else + call x%sync() + call y%sync() + call a%psb_s_ell_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + end if + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_elg_inner_vect_sv diff --git a/gpu/impl/psb_s_elg_mold.F90 b/gpu/impl/psb_s_elg_mold.F90 new file mode 100644 index 00000000..a481d605 --- /dev/null +++ b/gpu/impl/psb_s_elg_mold.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_elg_mold(a,b,info) + + use psb_base_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_mold + implicit none + class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='elg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_elg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_elg_mold diff --git a/gpu/impl/psb_s_elg_reallocate_nz.F90 b/gpu/impl/psb_s_elg_reallocate_nz.F90 new file mode 100644 index 00000000..22916852 --- /dev/null +++ b/gpu/impl/psb_s_elg_reallocate_nz.F90 @@ -0,0 +1,79 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_elg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_reallocate_nz +#else + use psb_s_elg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_elg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + if (a%is_dev()) call a%sync() + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nzrm) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_elg_reallocate_nz diff --git a/gpu/impl/psb_s_elg_scal.F90 b/gpu/impl/psb_s_elg_scal.F90 new file mode 100644 index 00000000..913ae47e --- /dev/null +++ b/gpu/impl/psb_s_elg_scal.F90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_elg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_scal +#else + use psb_s_elg_mat_mod +#endif + implicit none + class(psb_s_elg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_s_ell_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_elg_scal diff --git a/gpu/impl/psb_s_elg_scals.F90 b/gpu/impl/psb_s_elg_scals.F90 new file mode 100644 index 00000000..8261fc94 --- /dev/null +++ b/gpu/impl/psb_s_elg_scals.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_elg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_scals +#else + use psb_s_elg_mat_mod +#endif + implicit none + class(psb_s_elg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_elg_scals diff --git a/gpu/impl/psb_s_elg_to_gpu.F90 b/gpu/impl/psb_s_elg_to_gpu.F90 new file mode 100644 index 00000000..bf86343b --- /dev/null +++ b/gpu/impl/psb_s_elg_to_gpu.F90 @@ -0,0 +1,93 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_elg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_to_gpu +#else + use psb_s_elg_mat_mod +#endif + implicit none + class(psb_s_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + nzt = a%get_nzeros() + if (present(nzrm)) nzm = max(nzm,nzrm) + + gpu_parms = FgetEllDeviceParams(m,nzm,nzt,n,spgpu_type_float,1) + + if (c_associated(a%deviceMat)) then + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + else + pitch = -1 + maxrowsize = -1 + end if + + if ((pitch /= gpu_parms%pitch).or.(maxrowsize /= gpu_parms%maxRowSize)) then + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + info = FallocEllDevice(a%deviceMat,m,nzm,nzt,n,spgpu_type_float,1) + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + end if + if (info == 0) then + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + end if + end if + if (info == 0) info = & + & writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag) + call a%set_sync() +#endif + +end subroutine psb_s_elg_to_gpu diff --git a/gpu/impl/psb_s_elg_trim.f90 b/gpu/impl/psb_s_elg_trim.f90 new file mode 100644 index 00000000..f3bd3b2f --- /dev/null +++ b/gpu/impl/psb_s_elg_trim.f90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_elg_trim(a) + + use psb_base_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_trim + implicit none + class(psb_s_elg_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + ld = max(1_psb_ipk_,size(a%ja,1)) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_elg_trim diff --git a/gpu/impl/psb_s_elg_vect_mv.F90 b/gpu/impl/psb_s_elg_vect_mv.F90 new file mode 100644 index 00000000..f8d297d1 --- /dev/null +++ b/gpu/impl/psb_s_elg_vect_mv.F90 @@ -0,0 +1,131 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_elg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_vect_mv +#else + use psb_s_elg_mat_mod +#endif + use psb_s_gpu_vect_mod + implicit none + class(psb_s_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_elg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (a%is_dev()) call a%sync() + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_s_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_s_vect_gpu) + select type(yy => y) + type is (psb_s_vect_gpu) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= szero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvEllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvELLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + if (a%is_dev()) call a%sync() + call a%psb_s_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_elg_vect_mv diff --git a/gpu/impl/psb_s_hdiag_csmv.F90 b/gpu/impl/psb_s_hdiag_csmv.F90 new file mode 100644 index 00000000..3320901c --- /dev/null +++ b/gpu/impl/psb_s_hdiag_csmv.F90 @@ -0,0 +1,136 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hdiag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_hdiag_csmv +#else + use psb_s_hdiag_mat_mod +#endif + implicit none + class(psb_s_hdiag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='s_hdiag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_hdiag_mold + implicit none + class(psb_s_hdiag_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdiag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_hdiag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hdiag_mold diff --git a/gpu/impl/psb_s_hdiag_to_gpu.F90 b/gpu/impl/psb_s_hdiag_to_gpu.F90 new file mode 100644 index 00000000..ade1c080 --- /dev/null +++ b/gpu/impl/psb_s_hdiag_to_gpu.F90 @@ -0,0 +1,86 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hdiag_to_gpu(a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_hdiag_to_gpu +#else + use psb_s_hdiag_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_s_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight +#ifdef HAVE_SPGPU + type(hdiagdev_parms) :: gpu_parms +#endif + + info = 0 + +#ifdef HAVE_SPGPU + nr = a%get_nrows() + nc = a%get_ncols() + hacksize = a%hackSize + hackCount = a%nhacks + if (.not.allocated(a%hackOffsets)) then + info = -1 + return + end if + allocheight = a%hackOffsets(hackCount+1) +!!$ write(*,*) 'HDIAG TO GPU:',nr,nc,hacksize,hackCount,allocheight,& +!!$ & size(a%hackoffsets),size(a%diaoffsets), size(a%val) + if (.not.allocated(a%diaOffsets)) then + info = -2 + return + end if + if (.not.allocated(a%val)) then + info = -3 + return + end if + + if (c_associated(a%deviceMat)) then + call freeHdiagDevice(a%deviceMat) + endif + + info = FAllocHdiagDevice(a%deviceMat,nr,nc,& + & allocheight,hacksize,hackCount,spgpu_type_double) + if (info == 0) info = & + & writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets) + +#endif + +end subroutine psb_s_hdiag_to_gpu diff --git a/gpu/impl/psb_s_hdiag_vect_mv.F90 b/gpu/impl/psb_s_hdiag_vect_mv.F90 new file mode 100644 index 00000000..ac261e92 --- /dev/null +++ b/gpu/impl/psb_s_hdiag_vect_mv.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_hdiag_vect_mv +#else + use psb_s_hdiag_mat_mod +#endif + use psb_s_gpu_vect_mod + implicit none + class(psb_s_hdiag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hdiag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_s_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_s_vect_gpu) + select type(yy => y) + type is (psb_s_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHdiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + call a%psb_s_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hdiag_vect_mv diff --git a/gpu/impl/psb_s_hlg_allocate_mnnz.F90 b/gpu/impl/psb_s_hlg_allocate_mnnz.F90 new file mode 100644 index 00000000..c7e430f1 --- /dev/null +++ b/gpu/impl/psb_s_hlg_allocate_mnnz.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hlg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_allocate_mnnz +#else + use psb_s_hlg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. +#ifdef HAVE_SPGPU + type(hlldev_parms) :: gpu_parms +#endif + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_s_hll_sparse_mat%allocate(m,n,nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hlg_allocate_mnnz diff --git a/gpu/impl/psb_s_hlg_csmm.F90 b/gpu/impl/psb_s_hlg_csmm.F90 new file mode 100644 index 00000000..126b17e6 --- /dev/null +++ b/gpu/impl/psb_s_hlg_csmm.F90 @@ -0,0 +1,132 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hlg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_csmm +#else + use psb_s_hlg_mat_mod +#endif + implicit none + class(psb_s_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_spk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hlg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_hlg_csmv +#else + use psb_s_hlg_mat_mod +#endif + implicit none + class(psb_s_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='s_hlg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_hlg_from_gpu +#else + use psb_s_hlg_mat_mod +#endif + implicit none + class(psb_s_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr + + info = 0 + +#ifdef HAVE_SPGPU + if (a%is_sync()) return + if (a%is_host()) return + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + + info = getHllDeviceParams(a%deviceMat,hksize, rows, nzeros, allocsize,& + & hackOffsLength, firstIndex,avgnzr) + + if (info == 0) call a%set_nzeros(nzeros) + if (info == 0) call a%set_hksz(hksize) + if (info == 0) call psb_realloc(rows,a%irn,info) + if (info == 0) call psb_realloc(rows,a%idiag,info) + if (info == 0) call psb_realloc(allocsize,a%ja,info) + if (info == 0) call psb_realloc(allocsize,a%val,info) + if (info == 0) call psb_realloc((hackOffsLength+1),a%hkoffs,info) + + if (info == 0) info = & + & readHllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) + call a%set_sync() +#endif + +end subroutine psb_s_hlg_from_gpu diff --git a/gpu/impl/psb_s_hlg_inner_vect_sv.F90 b/gpu/impl/psb_s_hlg_inner_vect_sv.F90 new file mode 100644 index 00000000..d545eb02 --- /dev/null +++ b/gpu/impl/psb_s_hlg_inner_vect_sv.F90 @@ -0,0 +1,81 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_inner_vect_sv +#else + use psb_s_hlg_mat_mod +#endif + use psb_s_gpu_vect_mod + implicit none + class(psb_s_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_base_inner_vect_sv' + logical, parameter :: debug=.false. + real(psb_spk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + info = psb_success_ + + + call x%sync() + call y%sync() + if (a%is_dev()) call a%sync() + call a%psb_s_hll_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hlg_inner_vect_sv diff --git a/gpu/impl/psb_s_hlg_mold.F90 b/gpu/impl/psb_s_hlg_mold.F90 new file mode 100644 index 00000000..c5dc4774 --- /dev/null +++ b/gpu/impl/psb_s_hlg_mold.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hlg_mold(a,b,info) + + use psb_base_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_mold + implicit none + class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer, intent(out) :: info + Integer :: err_act + character(len=20) :: name='hlg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_hlg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_s_hlg_mold diff --git a/gpu/impl/psb_s_hlg_reallocate_nz.F90 b/gpu/impl/psb_s_hlg_reallocate_nz.F90 new file mode 100644 index 00000000..19cd95df --- /dev/null +++ b/gpu/impl/psb_s_hlg_reallocate_nz.F90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hlg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_reallocate_nz +#else + use psb_s_hlg_mat_mod +#endif + use iso_c_binding + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_hlg_sparse_mat), intent(inout) :: a + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_hlg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + call a%psb_s_hll_sparse_mat%reallocate(nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hlg_reallocate_nz diff --git a/gpu/impl/psb_s_hlg_scal.F90 b/gpu/impl/psb_s_hlg_scal.F90 new file mode 100644 index 00000000..cd389baa --- /dev/null +++ b/gpu/impl/psb_s_hlg_scal.F90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hlg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_scal +#else + use psb_s_hlg_mat_mod +#endif + implicit none + class(psb_s_hlg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_s_hll_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hlg_scal diff --git a/gpu/impl/psb_s_hlg_scals.F90 b/gpu/impl/psb_s_hlg_scals.F90 new file mode 100644 index 00000000..256fac3e --- /dev/null +++ b/gpu/impl/psb_s_hlg_scals.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hlg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_scals +#else + use psb_s_hlg_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_s_hlg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_s_hll_sparse_mat%scal(d,info) + if (info /= psb_success_) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_s_hlg_scals diff --git a/gpu/impl/psb_s_hlg_to_gpu.F90 b/gpu/impl/psb_s_hlg_to_gpu.F90 new file mode 100644 index 00000000..139482c2 --- /dev/null +++ b/gpu/impl/psb_s_hlg_to_gpu.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hlg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_to_gpu +#else + use psb_s_hlg_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_s_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, nza, n, pitch,maxrowsize, allocsize + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + n = a%get_nrows() + allocsize = a%get_size() + nza = a%get_nzeros() + if (c_associated(a%deviceMat)) then + call freehllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,a%hksz,n,nza,allocsize,spgpu_type_float,1) + if (info == 0) info = & + & writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) +! if (info /= 0) goto 9999 +#endif + +end subroutine psb_s_hlg_to_gpu diff --git a/gpu/impl/psb_s_hlg_vect_mv.F90 b/gpu/impl/psb_s_hlg_vect_mv.F90 new file mode 100644 index 00000000..52f322aa --- /dev/null +++ b/gpu/impl/psb_s_hlg_vect_mv.F90 @@ -0,0 +1,129 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_vect_mv +#else + use psb_s_hlg_mat_mod +#endif + use psb_s_gpu_vect_mod + implicit none + class(psb_s_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hlg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + if (a%is_dev()) call a%sync() + call a%psb_s_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_s_vect_gpu) + select type(yy => y) + type is (psb_s_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvhllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHLLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + call a%psb_s_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hlg_vect_mv diff --git a/gpu/impl/psb_s_hybg_allocate_mnnz.F90 b/gpu/impl/psb_s_hybg_allocate_mnnz.F90 new file mode 100644 index 00000000..f2b79c77 --- /dev/null +++ b/gpu/impl/psb_s_hybg_allocate_mnnz.F90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_hybg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_allocate_mnnz +#else + use psb_s_hybg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_s_csr_sparse_mat%allocate(m,n,nz) + +#ifdef HAVE_SPGPU + info = initFcusparse() + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hybg_allocate_mnnz +#endif diff --git a/gpu/impl/psb_s_hybg_csmm.F90 b/gpu/impl/psb_s_hybg_csmm.F90 new file mode 100644 index 00000000..9de67633 --- /dev/null +++ b/gpu/impl/psb_s_hybg_csmm.F90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_hybg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_csmm +#else + use psb_s_hybg_mat_mod +#endif + implicit none + class(psb_s_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hybg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_hybg_csmv +#else + use psb_s_hybg_mat_mod +#endif + implicit none + class(psb_s_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hybg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_hybg_inner_vect_sv +#else + use psb_s_hybg_mat_mod +#endif + use psb_s_gpu_vect_mod + implicit none + class(psb_s_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_hybg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + +#ifdef HAVE_SPGPU + if (tra.or.(beta/=szero)) then + call x%sync() + call y%sync() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_s_vect_gpu) + select type(yy => y) + type is (psb_s_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= szero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call x%sync() + call y%sync() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() +#endif + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='hybg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hybg_inner_vect_sv +#endif diff --git a/gpu/impl/psb_s_hybg_mold.F90 b/gpu/impl/psb_s_hybg_mold.F90 new file mode 100644 index 00000000..882990c0 --- /dev/null +++ b/gpu/impl/psb_s_hybg_mold.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_hybg_mold(a,b,info) + + use psb_base_mod + use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_mold + implicit none + class(psb_s_hybg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hybg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_hybg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hybg_mold +#endif diff --git a/gpu/impl/psb_s_hybg_reallocate_nz.F90 b/gpu/impl/psb_s_hybg_reallocate_nz.F90 new file mode 100644 index 00000000..46079a92 --- /dev/null +++ b/gpu/impl/psb_s_hybg_reallocate_nz.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_hybg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_reallocate_nz +#else + use psb_s_hybg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_hybg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_s_csr_sparse_mat%reallocate(nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hybg_reallocate_nz +#endif diff --git a/gpu/impl/psb_s_hybg_scal.F90 b/gpu/impl/psb_s_hybg_scal.F90 new file mode 100644 index 00000000..a55a8b2c --- /dev/null +++ b/gpu/impl/psb_s_hybg_scal.F90 @@ -0,0 +1,76 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_hybg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_scal +#else + use psb_s_hybg_mat_mod +#endif + implicit none + class(psb_s_hybg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m,n,nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_s_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hybg_scal +#endif diff --git a/gpu/impl/psb_s_hybg_scals.F90 b/gpu/impl/psb_s_hybg_scals.F90 new file mode 100644 index 00000000..ae92166f --- /dev/null +++ b/gpu/impl/psb_s_hybg_scals.F90 @@ -0,0 +1,76 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_hybg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_scals +#else + use psb_s_hybg_mat_mod +#endif + implicit none + class(psb_s_hybg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + + call a%psb_s_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hybg_scals +#endif diff --git a/gpu/impl/psb_s_hybg_to_gpu.F90 b/gpu/impl/psb_s_hybg_to_gpu.F90 new file mode 100644 index 00000000..bfb9b261 --- /dev/null +++ b/gpu/impl/psb_s_hybg_to_gpu.F90 @@ -0,0 +1,154 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_hybg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_to_gpu +#else + use psb_s_hybg_mat_mod +#endif + implicit none + class(psb_s_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + real(psb_spk_), allocatable :: valdi(:) + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = HYBGDeviceFree(a%deviceMat) + end if + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! We are explicitly adding the diagonal + if (info == 0) info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = sone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = sone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + else + + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) then + if (a%is_unit()) then + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + endif + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceHybsmAnalysis(a%deviceMat) + end if + + + if (info /= 0) then + write(0,*) 'Error in HYBG_TO_GPU ',info + end if +#endif + +end subroutine psb_s_hybg_to_gpu +#endif diff --git a/gpu/impl/psb_s_hybg_vect_mv.F90 b/gpu/impl/psb_s_hybg_vect_mv.F90 new file mode 100644 index 00000000..5fe102f6 --- /dev/null +++ b/gpu/impl/psb_s_hybg_vect_mv.F90 @@ -0,0 +1,127 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_vect_mv +#else + use psb_s_hybg_mat_mod +#endif + use psb_s_gpu_vect_mod + implicit none + class(psb_s_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hybg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_s_vect_gpu) + select type(yy => y) + type is (psb_s_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= szero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_hybg_vect_mv +#endif diff --git a/gpu/impl/psb_s_mv_csrg_from_coo.F90 b/gpu/impl/psb_s_mv_csrg_from_coo.F90 new file mode 100644 index 00000000..01c9db06 --- /dev/null +++ b/gpu/impl/psb_s_mv_csrg_from_coo.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_csrg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_mv_csrg_from_coo +#else + use psb_s_csrg_mat_mod +#endif + implicit none + + class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + + info = psb_success_ + + call a%psb_s_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_mv_csrg_from_coo diff --git a/gpu/impl/psb_s_mv_csrg_from_fmt.F90 b/gpu/impl/psb_s_mv_csrg_from_fmt.F90 new file mode 100644 index 00000000..0ac28af3 --- /dev/null +++ b/gpu/impl/psb_s_mv_csrg_from_fmt.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_csrg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_csrg_mat_mod, psb_protect_name => psb_s_mv_csrg_from_fmt +#else + use psb_s_csrg_mat_mod +#endif + implicit none + + class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer, intent(out) :: info + + !locals + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_s_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select + +end subroutine psb_s_mv_csrg_from_fmt diff --git a/gpu/impl/psb_s_mv_diag_from_coo.F90 b/gpu/impl/psb_s_mv_diag_from_coo.F90 new file mode 100644 index 00000000..f51607e5 --- /dev/null +++ b/gpu/impl/psb_s_mv_diag_from_coo.F90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_diag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_s_diag_mat_mod, psb_protect_name => psb_s_mv_diag_from_coo +#else + use psb_s_diag_mat_mod +#endif + + implicit none + + class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) goto 9999 + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_mv_diag_from_coo diff --git a/gpu/impl/psb_s_mv_elg_from_coo.F90 b/gpu/impl/psb_s_mv_elg_from_coo.F90 new file mode 100644 index 00000000..ac153f6c --- /dev/null +++ b/gpu/impl/psb_s_mv_elg_from_coo.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_elg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_mv_elg_from_coo +#else + use psb_s_elg_mat_mod +#endif + implicit none + + class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + call b%free() + + return + + +end subroutine psb_s_mv_elg_from_coo diff --git a/gpu/impl/psb_s_mv_elg_from_fmt.F90 b/gpu/impl/psb_s_mv_elg_from_fmt.F90 new file mode 100644 index 00000000..9238544c --- /dev/null +++ b/gpu/impl/psb_s_mv_elg_from_fmt.F90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_elg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_s_elg_mat_mod, psb_protect_name => psb_s_mv_elg_from_fmt +#else + use psb_s_elg_mat_mod +#endif + implicit none + + class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + info = psb_success_ + + if (b%is_dev()) call b%sync() + select type (b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + + class is (psb_s_ell_sparse_mat) + nzm = size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() +#ifdef HAVE_SPGPU + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize +#else + ld = m +#endif + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + deallocate(b%ja,stat=info) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + deallocate(b%val,stat=info) + end if + a%nzt = nza + call b%free() +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_mv_elg_from_fmt diff --git a/gpu/impl/psb_s_mv_hdiag_from_coo.F90 b/gpu/impl/psb_s_mv_hdiag_from_coo.F90 new file mode 100644 index 00000000..dcbcfe4d --- /dev/null +++ b/gpu/impl/psb_s_mv_hdiag_from_coo.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_hdiag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_mv_hdiag_from_coo + use psb_gpu_env_mod +#else + use psb_s_hdiag_mat_mod +#endif + + implicit none + + class(psb_s_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + +#ifdef HAVE_SPGPU + a%hacksize = psb_gpu_WarpSize() +#endif + + call a%psb_s_hdia_sparse_mat%mv_from_coo(b,info) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_mv_hdiag_from_coo diff --git a/gpu/impl/psb_s_mv_hlg_from_coo.F90 b/gpu/impl/psb_s_mv_hlg_from_coo.F90 new file mode 100644 index 00000000..dc72a135 --- /dev/null +++ b/gpu/impl/psb_s_mv_hlg_from_coo.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_hlg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_gpu_env_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_mv_hlg_from_coo +#else + use psb_s_hlg_mat_mod +#endif + implicit none + + class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_s_mv_hlg_from_coo diff --git a/gpu/impl/psb_s_mv_hlg_from_fmt.F90 b/gpu/impl/psb_s_mv_hlg_from_fmt.F90 new file mode 100644 index 00000000..bbe42e4a --- /dev/null +++ b/gpu/impl/psb_s_mv_hlg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_hlg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_s_hlg_mat_mod, psb_protect_name => psb_s_mv_hlg_from_fmt +#else + use psb_s_hlg_mat_mod +#endif + implicit none + + class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_mv_hlg_from_fmt diff --git a/gpu/impl/psb_s_mv_hybg_from_coo.F90 b/gpu/impl/psb_s_mv_hybg_from_coo.F90 new file mode 100644 index 00000000..7d3197a8 --- /dev/null +++ b/gpu/impl/psb_s_mv_hybg_from_coo.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_mv_hybg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_hybg_mat_mod, psb_protect_name => psb_s_mv_hybg_from_coo +#else + use psb_s_hybg_mat_mod +#endif + implicit none + + class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%psb_s_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_mv_hybg_from_coo +#endif diff --git a/gpu/impl/psb_s_mv_hybg_from_fmt.F90 b/gpu/impl/psb_s_mv_hybg_from_fmt.F90 new file mode 100644 index 00000000..51d8a2e6 --- /dev/null +++ b/gpu/impl/psb_s_mv_hybg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_mv_hybg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_s_hybg_mat_mod, psb_protect_name => psb_s_mv_hybg_from_fmt +#else + use psb_s_hybg_mat_mod +#endif + implicit none + + class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_s_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select +end subroutine psb_s_mv_hybg_from_fmt +#endif diff --git a/gpu/impl/psb_z_cp_csrg_from_coo.F90 b/gpu/impl/psb_z_cp_csrg_from_coo.F90 new file mode 100644 index 00000000..c3b0eebd --- /dev/null +++ b/gpu/impl/psb_z_cp_csrg_from_coo.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cp_csrg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_cp_csrg_from_coo +#else + use psb_z_csrg_mat_mod +#endif + implicit none + + class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_z_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cp_csrg_from_coo diff --git a/gpu/impl/psb_z_cp_csrg_from_fmt.F90 b/gpu/impl/psb_z_cp_csrg_from_fmt.F90 new file mode 100644 index 00000000..218d6c7b --- /dev/null +++ b/gpu/impl/psb_z_cp_csrg_from_fmt.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cp_csrg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_cp_csrg_from_fmt +#else + use psb_z_csrg_mat_mod +#endif + !use iso_c_binding + implicit none + + class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + select type(b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_z_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select + +end subroutine psb_z_cp_csrg_from_fmt diff --git a/gpu/impl/psb_z_cp_diag_from_coo.F90 b/gpu/impl/psb_z_cp_diag_from_coo.F90 new file mode 100644 index 00000000..013e88cd --- /dev/null +++ b/gpu/impl/psb_z_cp_diag_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_diag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_z_diag_mat_mod, psb_protect_name => psb_z_cp_diag_from_coo +#else + use psb_z_diag_mat_mod +#endif + implicit none + + class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + call a%psb_z_dia_sparse_mat%cp_from_coo(b,info) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cp_diag_from_coo diff --git a/gpu/impl/psb_z_cp_elg_from_coo.F90 b/gpu/impl/psb_z_cp_elg_from_coo.F90 new file mode 100644 index 00000000..c9b61a99 --- /dev/null +++ b/gpu/impl/psb_z_cp_elg_from_coo.F90 @@ -0,0 +1,184 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_elg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_cp_elg_from_coo + use psi_ext_util_mod + use psb_gpu_env_mod +#else + use psb_z_elg_mat_mod +#endif + implicit none + + class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, ld, ldv, hacksize + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + type(psb_z_coo_sparse_mat) :: tmp + integer(psb_ipk_), allocatable :: idisp(:) + + info = psb_success_ +#ifdef HAVE_SPGPU + hacksize = max(1,psb_gpu_WarpSize()) +#else + hacksize = 1 +#endif + if (b%is_dev()) call b%sync() + + if (b%is_by_rows()) then + +#ifdef HAVE_SPGPU + call psi_z_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,b%ja,b%val, a%deviceMat) + call a%set_dev() +#else + + call psi_z_convert_ell_from_coo(a,b,info,hacksize=hacksize) + call a%set_host() +#endif + + else + call b%cp_to_coo(tmp,info) +#ifdef HAVE_SPGPU + call psi_z_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,tmp%ja,tmp%val, a%deviceMat) + + call a%set_dev() +#else + + call psi_z_convert_ell_from_coo(a,tmp,info,hacksize=hacksize) + call a%set_host() +#endif + end if + + if (info /= psb_success_) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine psi_z_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize) + + use psb_base_mod + use psi_ext_util_mod + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(out) :: info, nzm, ldv + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, & + & ir, ic, hsz_ + real(psb_dpk_) :: t0,t1 + logical, parameter :: timing=.true. + + + info = psb_success_ + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == psb_success_) call psb_realloc(nr+1,idisp,info) + if (info /= psb_success_) return + if (timing) t0=psb_wtime() + + a%irn = 0 + do i=1, nza + ir = b%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + idisp(1) = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + idisp(i+1) = a%nzt + end do + + end subroutine psi_z_count_ell_from_coo + +end subroutine psb_z_cp_elg_from_coo diff --git a/gpu/impl/psb_z_cp_elg_from_fmt.F90 b/gpu/impl/psb_z_cp_elg_from_fmt.F90 new file mode 100644 index 00000000..23468b8a --- /dev/null +++ b/gpu/impl/psb_z_cp_elg_from_fmt.F90 @@ -0,0 +1,101 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_elg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_cp_elg_from_fmt +#else + use psb_z_elg_mat_mod +#endif + implicit none + + class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_z_ell_sparse_mat) + nzm = psb_size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() +#ifdef HAVE_SPGPU + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize +#else + ld = m +#endif + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + end if + a%nzt = nza +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_cp_elg_from_fmt diff --git a/gpu/impl/psb_z_cp_hdiag_from_coo.F90 b/gpu/impl/psb_z_cp_hdiag_from_coo.F90 new file mode 100644 index 00000000..b44c2854 --- /dev/null +++ b/gpu/impl/psb_z_cp_hdiag_from_coo.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hdiag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_cp_hdiag_from_coo + use psb_gpu_env_mod +#else + use psb_z_hdiag_mat_mod +#endif + implicit none + + class(psb_z_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + +#ifdef HAVE_SPGPU + a%hacksize = psb_gpu_WarpSize() +#endif + + call a%psb_z_hdia_sparse_mat%cp_from_coo(b,info) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cp_hdiag_from_coo diff --git a/gpu/impl/psb_z_cp_hlg_from_coo.F90 b/gpu/impl/psb_z_cp_hlg_from_coo.F90 new file mode 100644 index 00000000..51d0c8e6 --- /dev/null +++ b/gpu/impl/psb_z_cp_hlg_from_coo.F90 @@ -0,0 +1,198 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hlg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_gpu_env_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_cp_hlg_from_coo +#else + use psb_z_hlg_mat_mod +#endif + implicit none + + class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + integer(psb_ipk_), allocatable :: idisp(:) + character(len=20) :: name='hll_from_coo' + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + integer(psb_ipk_), allocatable :: irn(:), ja(:), hko(:) + real(psb_dpk_), allocatable :: val(:) + logical, parameter :: debug=.false. + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() +#ifdef HAVE_SPGPU + hksz = max(1,psb_gpu_WarpSize()) +#else + hksz = psi_get_hksz() +#endif + + if (b%is_by_rows()) then + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + if (debug) write(0,*) 'Copying through GPU',nza + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,b%ja, b%val, a%deviceMat) + call a%set_dev() + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + if (debug) write(0,*) 'Copying through GPU' + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,tmp,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,tmp%ja, tmp%val, a%deviceMat) + + call tmp%free() + call a%set_dev() + end if + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + subroutine psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + use psb_base_mod + use psi_ext_util_mod + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info, noffs, isz + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, kc + logical, parameter :: debug=.false. + + info = 0 + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + if (debug) write(0,*) 'Start compute hckoff_from_coo',nr,nc,nza + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == 0) call psb_realloc(nr+1,idisp,info) + if (info /= 0) return + a%irn = 0 + if (debug) then + do i=1, nza + if ((1<=b%ia(i)).and.(b%ia(i)<= nr)) then + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + else + write(0,*) 'Out of bouds IA ',i,b%ia(i),nr + end if + end do + else + do i=1, nza + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + end do + end if + a%nzt = nza + + + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (debug) write(0,*) ' noffsets ',noffs,info + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + idisp(1) = 0 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + idisp(i+1) = idisp(i) + a%irn(i) + do k=1,ir-1 + idisp(i+k+1) = idisp(i+k) + a%irn(i+k) + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) +!!$ write(*,*) 'End of psi_comput_hckoff ',info + end subroutine psi_compute_hckoff_from_coo + +end subroutine psb_z_cp_hlg_from_coo diff --git a/gpu/impl/psb_z_cp_hlg_from_fmt.F90 b/gpu/impl/psb_z_cp_hlg_from_fmt.F90 new file mode 100644 index 00000000..a6dd5970 --- /dev/null +++ b/gpu/impl/psb_z_cp_hlg_from_fmt.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hlg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_cp_hlg_from_fmt +#else + use psb_z_hlg_mat_mod +#endif + implicit none + + class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_z_hll_sparse_mat%cp_from_fmt(b,info) +#ifdef HAVE_SPGPU + if (info == 0) call a%to_gpu(info) +#endif + end select + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cp_hlg_from_fmt diff --git a/gpu/impl/psb_z_cp_hybg_from_coo.F90 b/gpu/impl/psb_z_cp_hybg_from_coo.F90 new file mode 100644 index 00000000..ebb6f60a --- /dev/null +++ b/gpu/impl/psb_z_cp_hybg_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cp_hybg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_hybg_mat_mod, psb_protect_name => psb_z_cp_hybg_from_coo +#else + use psb_z_hybg_mat_mod +#endif + implicit none + + class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_z_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cp_hybg_from_coo +#endif diff --git a/gpu/impl/psb_z_cp_hybg_from_fmt.F90 b/gpu/impl/psb_z_cp_hybg_from_fmt.F90 new file mode 100644 index 00000000..82f2ac65 --- /dev/null +++ b/gpu/impl/psb_z_cp_hybg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cp_hybg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_hybg_mat_mod, psb_protect_name => psb_z_cp_hybg_from_fmt +#else + use psb_z_hybg_mat_mod +#endif + implicit none + + class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_z_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select + +end subroutine psb_z_cp_hybg_from_fmt +#endif diff --git a/gpu/impl/psb_z_csrg_allocate_mnnz.F90 b/gpu/impl/psb_z_csrg_allocate_mnnz.F90 new file mode 100644 index 00000000..8cb2ccb1 --- /dev/null +++ b/gpu/impl/psb_z_csrg_allocate_mnnz.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_csrg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_allocate_mnnz +#else + use psb_z_csrg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_z_csr_sparse_mat%allocate(m,n,nz) + +#ifdef HAVE_SPGPU + info = initFcusparse() + if (info == 0) call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_csrg_allocate_mnnz diff --git a/gpu/impl/psb_z_csrg_csmm.F90 b/gpu/impl/psb_z_csrg_csmm.F90 new file mode 100644 index 00000000..eb8a4d7f --- /dev/null +++ b/gpu/impl/psb_z_csrg_csmm.F90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_csrg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_csmm +#else + use psb_z_csrg_mat_mod +#endif + implicit none + class(psb_z_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_dpk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csrg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_csrg_csmv +#else + use psb_z_csrg_mat_mod +#endif + implicit none + class(psb_z_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_csrg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_csrg_from_gpu +#else + use psb_z_csrg_mat_mod +#endif + implicit none + class(psb_z_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, n, nz + + info = 0 + +#ifdef HAVE_SPGPU + if (.not.(c_associated(a%deviceMat%mat))) then + call a%free() + return + end if + + info = CSRGDeviceGetParms(a%deviceMat,m,n,nz) + if (info /= psb_success_) return + + if (info == 0) call psb_realloc(m+1,a%irp,info) + if (info == 0) call psb_realloc(nz,a%ja,info) + if (info == 0) call psb_realloc(nz,a%val,info) + if (info == 0) info = & + & CSRGDevice2Host(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) +#if (CUDA_SHORT_VERSION <= 10) || (CUDA_VERSION < 11030) + a%irp(:) = a%irp(:)+1 + a%ja(:) = a%ja(:)+1 +#endif + + call a%set_sync() +#endif + +end subroutine psb_z_csrg_from_gpu diff --git a/gpu/impl/psb_z_csrg_inner_vect_sv.F90 b/gpu/impl/psb_z_csrg_inner_vect_sv.F90 new file mode 100644 index 00000000..75d6800b --- /dev/null +++ b/gpu/impl/psb_z_csrg_inner_vect_sv.F90 @@ -0,0 +1,136 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_inner_vect_sv +#else + use psb_z_csrg_mat_mod +#endif + use psb_z_gpu_vect_mod + implicit none + class(psb_z_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_csrg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + +#ifdef HAVE_SPGPU + if (tra.or.(beta/=dzero)) then + call x%sync() + call y%sync() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_z_vect_gpu) + select type(yy => y) + type is (psb_z_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call x%sync() + call y%sync() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() +#endif + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='csrg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_csrg_inner_vect_sv diff --git a/gpu/impl/psb_z_csrg_mold.F90 b/gpu/impl/psb_z_csrg_mold.F90 new file mode 100644 index 00000000..e83deb3f --- /dev/null +++ b/gpu/impl/psb_z_csrg_mold.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_csrg_mold(a,b,info) + + use psb_base_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_mold + implicit none + class(psb_z_csrg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='csrg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_csrg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_csrg_mold diff --git a/gpu/impl/psb_z_csrg_reallocate_nz.F90 b/gpu/impl/psb_z_csrg_reallocate_nz.F90 new file mode 100644 index 00000000..c2509c22 --- /dev/null +++ b/gpu/impl/psb_z_csrg_reallocate_nz.F90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_csrg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_reallocate_nz +#else + use psb_z_csrg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_csrg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_z_csr_sparse_mat%reallocate(nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_csrg_reallocate_nz diff --git a/gpu/impl/psb_z_csrg_scal.F90 b/gpu/impl/psb_z_csrg_scal.F90 new file mode 100644 index 00000000..d8ab0ca3 --- /dev/null +++ b/gpu/impl/psb_z_csrg_scal.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_csrg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_scal +#else + use psb_z_csrg_mat_mod +#endif + implicit none + class(psb_z_csrg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + call a%psb_z_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_csrg_scal diff --git a/gpu/impl/psb_z_csrg_scals.F90 b/gpu/impl/psb_z_csrg_scals.F90 new file mode 100644 index 00000000..3d14998d --- /dev/null +++ b/gpu/impl/psb_z_csrg_scals.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_csrg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_scals +#else + use psb_z_csrg_mat_mod +#endif + implicit none + class(psb_z_csrg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + call a%psb_z_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_csrg_scals diff --git a/gpu/impl/psb_z_csrg_to_gpu.F90 b/gpu/impl/psb_z_csrg_to_gpu.F90 new file mode 100644 index 00000000..4548935d --- /dev/null +++ b/gpu/impl/psb_z_csrg_to_gpu.F90 @@ -0,0 +1,325 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_csrg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_to_gpu +#else + use psb_z_csrg_mat_mod +#endif + implicit none + class(psb_z_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + complex(psb_dpk_), allocatable :: valdi(:) + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = CSRGDeviceFree(a%deviceMat) + end if +#if CUDA_SHORT_VERSION <= 10 + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = zone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = zone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceCsrsmAnalysis(a%deviceMat) + end if + +#elif CUDA_VERSION < 11030 + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) +!!$ write(0,*) 'Done deviceAlloc' + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_zero) +!!$ write(0,*) 'Done SetIndexBase' + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(0:nzdi),valdi(0:nzdi),stat=info) + if (info == 0) then + irpdi(1) = 0 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = zone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = zone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ write(0,*) 'Done deviceAlloc', info + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,& + & cusparse_index_base_zero) +!!$ write(0,*) 'Done setIndexBase', info + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + nzdi=a%irp(m+1)-1 + if (info == 0) allocate(irpdi(m+1),jadi(max(nzdi,1)),stat=info) + if (info == 0) then + irpdi(1:m+1) = a%irp(1:m+1) -1 + jadi(1:nzdi) = a%ja(1:nzdi) -1 + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,irpdi,jadi,a%val) +!!$ write(0,*) 'Done Host2Device', info + endif + + +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = zone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = zone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + +!!$ if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) +!!$ end if + +#endif + call a%set_sync() + + if (info /= 0) then + write(0,*) 'Error in CSRG_TO_GPU ',info + end if +#endif + +end subroutine psb_z_csrg_to_gpu diff --git a/gpu/impl/psb_z_csrg_vect_mv.F90 b/gpu/impl/psb_z_csrg_vect_mv.F90 new file mode 100644 index 00000000..0770d448 --- /dev/null +++ b/gpu/impl/psb_z_csrg_vect_mv.F90 @@ -0,0 +1,125 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_vect_mv +#else + use psb_z_csrg_mat_mod +#endif + use psb_z_gpu_vect_mod + implicit none + class(psb_z_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_csrg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= zzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_z_vect_gpu) + select type(yy => y) + type is (psb_z_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= zzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_z_csrg_vect_mv diff --git a/gpu/impl/psb_z_diag_csmv.F90 b/gpu/impl/psb_z_diag_csmv.F90 new file mode 100644 index 00000000..667e1a1f --- /dev/null +++ b/gpu/impl/psb_z_diag_csmv.F90 @@ -0,0 +1,136 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_diag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_z_diag_mat_mod, psb_protect_name => psb_z_diag_csmv +#else + use psb_z_diag_mat_mod +#endif + implicit none + class(psb_z_diag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='z_diag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_diag_mold + implicit none + class(psb_z_diag_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='diag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_diag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_diag_mold diff --git a/gpu/impl/psb_z_diag_to_gpu.F90 b/gpu/impl/psb_z_diag_to_gpu.F90 new file mode 100644 index 00000000..40913624 --- /dev/null +++ b/gpu/impl/psb_z_diag_to_gpu.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_diag_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_z_diag_mat_mod, psb_protect_name => psb_z_diag_to_gpu +#else + use psb_z_diag_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_z_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d +#ifdef HAVE_SPGPU + type(diagdev_parms) :: gpu_parms +#endif + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return + + n = size(a%data,1) + d = size(a%data,2) + c = a%get_ncols() + !allocsize = a%get_size() + !write(*,*) 'Create the DIAG matrix' + gpu_parms = FgetDiagDeviceParams(n,c,d,spgpu_type_complex_double) + if (c_associated(a%deviceMat)) then + call freeDiagDevice(a%deviceMat) + endif + info = FallocDiagDevice(a%deviceMat,n,c,d,spgpu_type_complex_double) + if (info == 0) info = & + & writeDiagDevice(a%deviceMat,a%data,a%offset,n) +! if (info /= 0) goto 9999 +#endif + +end subroutine psb_z_diag_to_gpu diff --git a/gpu/impl/psb_z_diag_vect_mv.F90 b/gpu/impl/psb_z_diag_vect_mv.F90 new file mode 100644 index 00000000..b8946491 --- /dev/null +++ b/gpu/impl/psb_z_diag_vect_mv.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_diag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_z_diag_mat_mod, psb_protect_name => psb_z_diag_vect_mv +#else + use psb_z_diag_mat_mod +#endif + use psb_z_gpu_vect_mod + implicit none + class(psb_z_diag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_diag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_z_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_z_vect_gpu) + select type(yy => y) + type is (psb_z_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + call a%psb_z_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_diag_vect_mv diff --git a/gpu/impl/psb_z_dnsg_mat_impl.F90 b/gpu/impl/psb_z_dnsg_mat_impl.F90 new file mode 100644 index 00000000..407deaa2 --- /dev/null +++ b/gpu/impl/psb_z_dnsg_mat_impl.F90 @@ -0,0 +1,461 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_z_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_dnsg_vect_mv +#else + use psb_z_dnsg_mat_mod +#endif + implicit none + class(psb_z_dnsg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + logical :: tra + character :: trans_ + complex(psb_dpk_), allocatable :: rx(:), ry(:) + Integer(Psb_ipk_) :: err_act, m, n, k + character(len=20) :: name='z_dnsg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (trans_ =='N') then + m = a%get_nrows() + n = 1 + k = a%get_ncols() + else + m = a%get_ncols() + n = 1 + k = a%get_nrows() + end if + select type (xx => x) + type is (psb_z_vect_gpu) + select type(yy => y) + type is (psb_z_vect_gpu) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= zzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDnsDevice(trans_,m,n,k,alpha,a%deviceMat,& + & xx%deviceVect,beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDnsDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_dnsg_vect_mv + + +subroutine psb_z_dnsg_mold(a,b,info) + use psb_base_mod + use psb_z_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_dnsg_mold +#else + use psb_z_dnsg_mat_mod +#endif + implicit none + class(psb_z_dnsg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dnsg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_dnsg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_dnsg_mold + + +!!$ +!!$ interface +!!$ subroutine psb_z_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_z_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type +!!$ class(psb_z_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_z_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_z_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_z_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_z_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_z_dnsg_allocate_mnnz +!!$ end interface + + +subroutine psb_z_dnsg_to_gpu(a,info) + use psb_base_mod + use psb_z_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_dnsg_to_gpu +#else + use psb_z_dnsg_mat_mod +#endif + class(psb_z_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act, pitch, lda + logical, parameter :: debug=.false. + character(len=20) :: name='z_dnsg_to_gpu' + + call psb_erractionsave(err_act) + info = psb_success_ +#ifdef HAVE_SPGPU + if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2) + info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),& + & spgpu_type_complex_double,1) + if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2)) + if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info + + +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_dnsg_to_gpu + + + +subroutine psb_z_cp_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_z_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_cp_dnsg_from_coo +#else + use psb_z_dnsg_mat_mod +#endif + implicit none + + class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_dnsg_cp_from_coo' + integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: debug=.false. + type(psb_z_coo_sparse_mat) :: tmp + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + call a%psb_z_dns_sparse_mat%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_cp_from_coo: dns_cp',info + if (info == 0) call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cp_dnsg_from_coo + +subroutine psb_z_cp_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_z_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_cp_dnsg_from_fmt +#else + use psb_z_dnsg_mat_mod +#endif + implicit none + + class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + type(psb_z_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + +!!$ class is (psb_z_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$#ifdef HAVE_SPGPU +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$#else +!!$ ld = m +!!$#endif +!!$ a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$#ifdef HAVE_SPGPU +!!$ call a%to_gpu(info) +!!$#endif + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cp_dnsg_from_fmt + + + +subroutine psb_z_mv_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_z_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_mv_dnsg_from_coo +#else + use psb_z_dnsg_mat_mod +#endif + implicit none + + class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + logical, parameter :: debug=.false. + character(len=20) :: name='z_dnsg_mv_from_coo' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_mv_from_coo: cp_from_coo:',info + call b%free() + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_mv_dnsg_from_coo + + +subroutine psb_z_mv_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_z_gpu_vect_mod +#ifdef HAVE_SPGPU + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_mv_dnsg_from_fmt +#else + use psb_z_dnsg_mat_mod +#endif + implicit none + class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + type(psb_z_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + +!!$ class is (psb_z_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$#ifdef HAVE_SPGPU +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$#else +!!$ ld = m +!!$#endif +!!$ a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$#ifdef HAVE_SPGPU +!!$ call a%to_gpu(info) +!!$#endif + + class default + + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_z_mv_dnsg_from_fmt diff --git a/gpu/impl/psb_z_elg_allocate_mnnz.F90 b/gpu/impl/psb_z_elg_allocate_mnnz.F90 new file mode 100644 index 00000000..39d14dd2 --- /dev/null +++ b/gpu/impl/psb_z_elg_allocate_mnnz.F90 @@ -0,0 +1,113 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_elg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_allocate_mnnz +#else + use psb_z_elg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione,izero,izero,izero,izero/)) + goto 9999 + endif + +#ifdef HAVE_SPGPU + gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_complex_double,1) + ld = gpu_parms%pitch + nz_ = gpu_parms%maxRowSize +#else + ld = m +#endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_elg_allocate_mnnz diff --git a/gpu/impl/psb_z_elg_asb.f90 b/gpu/impl/psb_z_elg_asb.f90 new file mode 100644 index 00000000..515f579a --- /dev/null +++ b/gpu/impl/psb_z_elg_asb.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_elg_asb(a) + + use psb_base_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_asb + implicit none + + class(psb_z_elg_sparse_mat), intent(inout) :: a + + integer(psb_ipk_) :: err_act, info + character(len=20) :: name='elg_asb' + logical :: clear_ + logical, parameter :: debug=.false. + real(psb_dpk_), allocatable :: valt(:,:) + integer(psb_ipk_), allocatable :: jat(:,:) + integer(psb_ipk_) :: nr, nc + + call psb_erractionsave(err_act) + info = psb_success_ + + ! Only call sync() if we are on host + if (a%is_host()) then + call a%sync() + end if + call a%set_asb() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_elg_asb diff --git a/gpu/impl/psb_z_elg_csmm.F90 b/gpu/impl/psb_z_elg_csmm.F90 new file mode 100644 index 00000000..aa27419c --- /dev/null +++ b/gpu/impl/psb_z_elg_csmm.F90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_elg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_csmm +#else + use psb_z_elg_mat_mod +#endif + implicit none + class(psb_z_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_dpk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_elg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_elg_csmv +#else + use psb_z_elg_mat_mod +#endif + implicit none + class(psb_z_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_elg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_elg_csput_a +#else + use psb_z_elg_mat_mod +#endif + implicit none + + class(psb_z_elg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_elg_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +!!$ write(0,*) 'In ELG_csput_a' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then +!!$ write(*,*) 'elg_csput_a ' + if (a%is_dev()) call a%sync() + call a%psb_z_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_elg_csput_a + + + +subroutine psb_z_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use iso_c_binding +#ifdef HAVE_SPGPU + use elldev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_csput_v + use psb_z_gpu_vect_mod +#else + use psb_z_elg_mat_mod +#endif + implicit none + + class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_elg_csput_v' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw + logical :: gpu_invoked + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + integer(psb_ipk_), allocatable :: idxs(:) + logical, parameter :: debug_idxs=.false., debug_vals=.false. + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +! write(0,*) 'In ELG_csput_v' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (ia%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (ja%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (val%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + + t1=psb_wtime() + gpu_invoked = .false. + select type (ia) + class is (psb_i_vect_gpu) + select type (ja) + class is (psb_i_vect_gpu) + select type (val) + class is (psb_z_vect_gpu) + if (a%is_host()) call a%sync() + if (val%is_host()) call val%sync() + if (ia%is_host()) call ia%sync() + if (ja%is_host()) call ja%sync() + info = csputEllDeviceDoubleComplex(a%deviceMat,nz,& + & ia%deviceVect,ja%deviceVect,val%deviceVect) + call a%set_dev() + gpu_invoked=.true. + end select + end select + end select + if (.not.gpu_invoked) then +!!$ write(0,*)'Not gpu_invoked ' + if (a%is_dev()) call a%sync() + call a%psb_z_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + call a%set_host() + end if + + if (info /= 0) then + info = psb_err_internal_error_ + end if + + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_z_elg_csput_v diff --git a/gpu/impl/psb_z_elg_from_gpu.F90 b/gpu/impl/psb_z_elg_from_gpu.F90 new file mode 100644 index 00000000..e8670cd4 --- /dev/null +++ b/gpu/impl/psb_z_elg_from_gpu.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_elg_from_gpu(a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_from_gpu +#else + use psb_z_elg_mat_mod +#endif + implicit none + class(psb_z_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize + + info = 0 + +#ifdef HAVE_SPGPU + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + if (info == 0) call psb_realloc(pitch,a%irn,info) + end if + if (info == 0) info = & + & readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag) + call a%set_sync() +#endif + +end subroutine psb_z_elg_from_gpu diff --git a/gpu/impl/psb_z_elg_inner_vect_sv.F90 b/gpu/impl/psb_z_elg_inner_vect_sv.F90 new file mode 100644 index 00000000..66d7eed8 --- /dev/null +++ b/gpu/impl/psb_z_elg_inner_vect_sv.F90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_inner_vect_sv +#else + use psb_z_elg_mat_mod +#endif + use psb_z_gpu_vect_mod + implicit none + class(psb_z_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_elg_inner_vect_sv' + logical, parameter :: debug=.false. + complex(psb_dpk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + if (a%is_dev()) call a%sync() + if (.false.) then + rx = x%get_vect() + ry = y%get_vect() + call a%inner_spsm(alpha,rx,beta,ry,info,trans) + call y%bld(ry) + else + call x%sync() + call y%sync() + call a%psb_z_ell_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + end if + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_elg_inner_vect_sv diff --git a/gpu/impl/psb_z_elg_mold.F90 b/gpu/impl/psb_z_elg_mold.F90 new file mode 100644 index 00000000..1a5ebe54 --- /dev/null +++ b/gpu/impl/psb_z_elg_mold.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_elg_mold(a,b,info) + + use psb_base_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_mold + implicit none + class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='elg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_elg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_elg_mold diff --git a/gpu/impl/psb_z_elg_reallocate_nz.F90 b/gpu/impl/psb_z_elg_reallocate_nz.F90 new file mode 100644 index 00000000..f6bc194f --- /dev/null +++ b/gpu/impl/psb_z_elg_reallocate_nz.F90 @@ -0,0 +1,79 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_elg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_reallocate_nz +#else + use psb_z_elg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_elg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + if (a%is_dev()) call a%sync() + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nzrm) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_elg_reallocate_nz diff --git a/gpu/impl/psb_z_elg_scal.F90 b/gpu/impl/psb_z_elg_scal.F90 new file mode 100644 index 00000000..eed9007a --- /dev/null +++ b/gpu/impl/psb_z_elg_scal.F90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_elg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_scal +#else + use psb_z_elg_mat_mod +#endif + implicit none + class(psb_z_elg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_z_ell_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_elg_scal diff --git a/gpu/impl/psb_z_elg_scals.F90 b/gpu/impl/psb_z_elg_scals.F90 new file mode 100644 index 00000000..1e3f3682 --- /dev/null +++ b/gpu/impl/psb_z_elg_scals.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_elg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_scals +#else + use psb_z_elg_mat_mod +#endif + implicit none + class(psb_z_elg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_elg_scals diff --git a/gpu/impl/psb_z_elg_to_gpu.F90 b/gpu/impl/psb_z_elg_to_gpu.F90 new file mode 100644 index 00000000..71a5ec66 --- /dev/null +++ b/gpu/impl/psb_z_elg_to_gpu.F90 @@ -0,0 +1,93 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_elg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_to_gpu +#else + use psb_z_elg_mat_mod +#endif + implicit none + class(psb_z_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + nzt = a%get_nzeros() + if (present(nzrm)) nzm = max(nzm,nzrm) + + gpu_parms = FgetEllDeviceParams(m,nzm,nzt,n,spgpu_type_complex_double,1) + + if (c_associated(a%deviceMat)) then + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + else + pitch = -1 + maxrowsize = -1 + end if + + if ((pitch /= gpu_parms%pitch).or.(maxrowsize /= gpu_parms%maxRowSize)) then + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + info = FallocEllDevice(a%deviceMat,m,nzm,nzt,n,spgpu_type_complex_double,1) + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + end if + if (info == 0) then + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + end if + end if + if (info == 0) info = & + & writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag) + call a%set_sync() +#endif + +end subroutine psb_z_elg_to_gpu diff --git a/gpu/impl/psb_z_elg_trim.f90 b/gpu/impl/psb_z_elg_trim.f90 new file mode 100644 index 00000000..9bd43312 --- /dev/null +++ b/gpu/impl/psb_z_elg_trim.f90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_elg_trim(a) + + use psb_base_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_trim + implicit none + class(psb_z_elg_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + ld = max(1_psb_ipk_,size(a%ja,1)) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_elg_trim diff --git a/gpu/impl/psb_z_elg_vect_mv.F90 b/gpu/impl/psb_z_elg_vect_mv.F90 new file mode 100644 index 00000000..5cd72e44 --- /dev/null +++ b/gpu/impl/psb_z_elg_vect_mv.F90 @@ -0,0 +1,131 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_elg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_vect_mv +#else + use psb_z_elg_mat_mod +#endif + use psb_z_gpu_vect_mod + implicit none + class(psb_z_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_elg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (a%is_dev()) call a%sync() + if (.not.x%is_host()) call x%sync() + if (beta /= zzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_z_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_z_vect_gpu) + select type(yy => y) + type is (psb_z_vect_gpu) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= zzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvEllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvELLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + if (a%is_dev()) call a%sync() + call a%psb_z_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_elg_vect_mv diff --git a/gpu/impl/psb_z_hdiag_csmv.F90 b/gpu/impl/psb_z_hdiag_csmv.F90 new file mode 100644 index 00000000..baf730a2 --- /dev/null +++ b/gpu/impl/psb_z_hdiag_csmv.F90 @@ -0,0 +1,136 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hdiag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_hdiag_csmv +#else + use psb_z_hdiag_mat_mod +#endif + implicit none + class(psb_z_hdiag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='z_hdiag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_hdiag_mold + implicit none + class(psb_z_hdiag_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdiag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_hdiag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hdiag_mold diff --git a/gpu/impl/psb_z_hdiag_to_gpu.F90 b/gpu/impl/psb_z_hdiag_to_gpu.F90 new file mode 100644 index 00000000..622a0141 --- /dev/null +++ b/gpu/impl/psb_z_hdiag_to_gpu.F90 @@ -0,0 +1,86 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hdiag_to_gpu(a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_hdiag_to_gpu +#else + use psb_z_hdiag_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_z_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight +#ifdef HAVE_SPGPU + type(hdiagdev_parms) :: gpu_parms +#endif + + info = 0 + +#ifdef HAVE_SPGPU + nr = a%get_nrows() + nc = a%get_ncols() + hacksize = a%hackSize + hackCount = a%nhacks + if (.not.allocated(a%hackOffsets)) then + info = -1 + return + end if + allocheight = a%hackOffsets(hackCount+1) +!!$ write(*,*) 'HDIAG TO GPU:',nr,nc,hacksize,hackCount,allocheight,& +!!$ & size(a%hackoffsets),size(a%diaoffsets), size(a%val) + if (.not.allocated(a%diaOffsets)) then + info = -2 + return + end if + if (.not.allocated(a%val)) then + info = -3 + return + end if + + if (c_associated(a%deviceMat)) then + call freeHdiagDevice(a%deviceMat) + endif + + info = FAllocHdiagDevice(a%deviceMat,nr,nc,& + & allocheight,hacksize,hackCount,spgpu_type_double) + if (info == 0) info = & + & writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets) + +#endif + +end subroutine psb_z_hdiag_to_gpu diff --git a/gpu/impl/psb_z_hdiag_vect_mv.F90 b/gpu/impl/psb_z_hdiag_vect_mv.F90 new file mode 100644 index 00000000..3e1c859e --- /dev/null +++ b/gpu/impl/psb_z_hdiag_vect_mv.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_hdiag_vect_mv +#else + use psb_z_hdiag_mat_mod +#endif + use psb_z_gpu_vect_mod + implicit none + class(psb_z_hdiag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hdiag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_z_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_z_vect_gpu) + select type(yy => y) + type is (psb_z_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHdiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + call a%psb_z_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hdiag_vect_mv diff --git a/gpu/impl/psb_z_hlg_allocate_mnnz.F90 b/gpu/impl/psb_z_hlg_allocate_mnnz.F90 new file mode 100644 index 00000000..e3c05ec1 --- /dev/null +++ b/gpu/impl/psb_z_hlg_allocate_mnnz.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hlg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_allocate_mnnz +#else + use psb_z_hlg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. +#ifdef HAVE_SPGPU + type(hlldev_parms) :: gpu_parms +#endif + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_z_hll_sparse_mat%allocate(m,n,nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hlg_allocate_mnnz diff --git a/gpu/impl/psb_z_hlg_csmm.F90 b/gpu/impl/psb_z_hlg_csmm.F90 new file mode 100644 index 00000000..3432c177 --- /dev/null +++ b/gpu/impl/psb_z_hlg_csmm.F90 @@ -0,0 +1,132 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hlg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_csmm +#else + use psb_z_hlg_mat_mod +#endif + implicit none + class(psb_z_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_dpk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hlg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_hlg_csmv +#else + use psb_z_hlg_mat_mod +#endif + implicit none + class(psb_z_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='z_hlg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_hlg_from_gpu +#else + use psb_z_hlg_mat_mod +#endif + implicit none + class(psb_z_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr + + info = 0 + +#ifdef HAVE_SPGPU + if (a%is_sync()) return + if (a%is_host()) return + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + + info = getHllDeviceParams(a%deviceMat,hksize, rows, nzeros, allocsize,& + & hackOffsLength, firstIndex,avgnzr) + + if (info == 0) call a%set_nzeros(nzeros) + if (info == 0) call a%set_hksz(hksize) + if (info == 0) call psb_realloc(rows,a%irn,info) + if (info == 0) call psb_realloc(rows,a%idiag,info) + if (info == 0) call psb_realloc(allocsize,a%ja,info) + if (info == 0) call psb_realloc(allocsize,a%val,info) + if (info == 0) call psb_realloc((hackOffsLength+1),a%hkoffs,info) + + if (info == 0) info = & + & readHllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) + call a%set_sync() +#endif + +end subroutine psb_z_hlg_from_gpu diff --git a/gpu/impl/psb_z_hlg_inner_vect_sv.F90 b/gpu/impl/psb_z_hlg_inner_vect_sv.F90 new file mode 100644 index 00000000..5a7b1031 --- /dev/null +++ b/gpu/impl/psb_z_hlg_inner_vect_sv.F90 @@ -0,0 +1,81 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_inner_vect_sv +#else + use psb_z_hlg_mat_mod +#endif + use psb_z_gpu_vect_mod + implicit none + class(psb_z_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_base_inner_vect_sv' + logical, parameter :: debug=.false. + complex(psb_dpk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + info = psb_success_ + + + call x%sync() + call y%sync() + if (a%is_dev()) call a%sync() + call a%psb_z_hll_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hlg_inner_vect_sv diff --git a/gpu/impl/psb_z_hlg_mold.F90 b/gpu/impl/psb_z_hlg_mold.F90 new file mode 100644 index 00000000..f9ff0c7a --- /dev/null +++ b/gpu/impl/psb_z_hlg_mold.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hlg_mold(a,b,info) + + use psb_base_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_mold + implicit none + class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer, intent(out) :: info + Integer :: err_act + character(len=20) :: name='hlg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_hlg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_z_hlg_mold diff --git a/gpu/impl/psb_z_hlg_reallocate_nz.F90 b/gpu/impl/psb_z_hlg_reallocate_nz.F90 new file mode 100644 index 00000000..f3d50626 --- /dev/null +++ b/gpu/impl/psb_z_hlg_reallocate_nz.F90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hlg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_reallocate_nz +#else + use psb_z_hlg_mat_mod +#endif + use iso_c_binding + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_hlg_sparse_mat), intent(inout) :: a + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_hlg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + call a%psb_z_hll_sparse_mat%reallocate(nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hlg_reallocate_nz diff --git a/gpu/impl/psb_z_hlg_scal.F90 b/gpu/impl/psb_z_hlg_scal.F90 new file mode 100644 index 00000000..8aa85500 --- /dev/null +++ b/gpu/impl/psb_z_hlg_scal.F90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hlg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_scal +#else + use psb_z_hlg_mat_mod +#endif + implicit none + class(psb_z_hlg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_z_hll_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hlg_scal diff --git a/gpu/impl/psb_z_hlg_scals.F90 b/gpu/impl/psb_z_hlg_scals.F90 new file mode 100644 index 00000000..d5689c06 --- /dev/null +++ b/gpu/impl/psb_z_hlg_scals.F90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hlg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_scals +#else + use psb_z_hlg_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_z_hlg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_z_hll_sparse_mat%scal(d,info) + if (info /= psb_success_) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_z_hlg_scals diff --git a/gpu/impl/psb_z_hlg_to_gpu.F90 b/gpu/impl/psb_z_hlg_to_gpu.F90 new file mode 100644 index 00000000..d63aee9c --- /dev/null +++ b/gpu/impl/psb_z_hlg_to_gpu.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hlg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_to_gpu +#else + use psb_z_hlg_mat_mod +#endif + use iso_c_binding + implicit none + class(psb_z_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, nza, n, pitch,maxrowsize, allocsize + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + n = a%get_nrows() + allocsize = a%get_size() + nza = a%get_nzeros() + if (c_associated(a%deviceMat)) then + call freehllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,a%hksz,n,nza,allocsize,spgpu_type_complex_double,1) + if (info == 0) info = & + & writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) +! if (info /= 0) goto 9999 +#endif + +end subroutine psb_z_hlg_to_gpu diff --git a/gpu/impl/psb_z_hlg_vect_mv.F90 b/gpu/impl/psb_z_hlg_vect_mv.F90 new file mode 100644 index 00000000..9efefc0a --- /dev/null +++ b/gpu/impl/psb_z_hlg_vect_mv.F90 @@ -0,0 +1,129 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_vect_mv +#else + use psb_z_hlg_mat_mod +#endif + use psb_z_gpu_vect_mod + implicit none + class(psb_z_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hlg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= zzero) then + if (.not.y%is_host()) call y%sync() + end if + if (a%is_dev()) call a%sync() + call a%psb_z_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_z_vect_gpu) + select type(yy => y) + type is (psb_z_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvhllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHLLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if +#else + call a%psb_z_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hlg_vect_mv diff --git a/gpu/impl/psb_z_hybg_allocate_mnnz.F90 b/gpu/impl/psb_z_hybg_allocate_mnnz.F90 new file mode 100644 index 00000000..2c38c536 --- /dev/null +++ b/gpu/impl/psb_z_hybg_allocate_mnnz.F90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_hybg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_allocate_mnnz +#else + use psb_z_hybg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_z_csr_sparse_mat%allocate(m,n,nz) + +#ifdef HAVE_SPGPU + info = initFcusparse() + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hybg_allocate_mnnz +#endif diff --git a/gpu/impl/psb_z_hybg_csmm.F90 b/gpu/impl/psb_z_hybg_csmm.F90 new file mode 100644 index 00000000..5ec9701b --- /dev/null +++ b/gpu/impl/psb_z_hybg_csmm.F90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_hybg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_csmm +#else + use psb_z_hybg_mat_mod +#endif + implicit none + class(psb_z_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hybg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_hybg_csmv +#else + use psb_z_hybg_mat_mod +#endif + implicit none + class(psb_z_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hybg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_hybg_inner_vect_sv +#else + use psb_z_hybg_mat_mod +#endif + use psb_z_gpu_vect_mod + implicit none + class(psb_z_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_hybg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + +#ifdef HAVE_SPGPU + if (tra.or.(beta/=zzero)) then + call x%sync() + call y%sync() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_z_vect_gpu) + select type(yy => y) + type is (psb_z_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= zzero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call x%sync() + call y%sync() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() +#endif + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='hybg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hybg_inner_vect_sv +#endif diff --git a/gpu/impl/psb_z_hybg_mold.F90 b/gpu/impl/psb_z_hybg_mold.F90 new file mode 100644 index 00000000..3a17dbd2 --- /dev/null +++ b/gpu/impl/psb_z_hybg_mold.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_hybg_mold(a,b,info) + + use psb_base_mod + use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_mold + implicit none + class(psb_z_hybg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hybg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_hybg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hybg_mold +#endif diff --git a/gpu/impl/psb_z_hybg_reallocate_nz.F90 b/gpu/impl/psb_z_hybg_reallocate_nz.F90 new file mode 100644 index 00000000..79d81911 --- /dev/null +++ b/gpu/impl/psb_z_hybg_reallocate_nz.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_hybg_reallocate_nz(nz,a) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_reallocate_nz +#else + use psb_z_hybg_mat_mod +#endif + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_hybg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_z_csr_sparse_mat%reallocate(nz) + +#ifdef HAVE_SPGPU + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hybg_reallocate_nz +#endif diff --git a/gpu/impl/psb_z_hybg_scal.F90 b/gpu/impl/psb_z_hybg_scal.F90 new file mode 100644 index 00000000..c8179bf2 --- /dev/null +++ b/gpu/impl/psb_z_hybg_scal.F90 @@ -0,0 +1,76 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_hybg_scal(d,a,info,side) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_scal +#else + use psb_z_hybg_mat_mod +#endif + implicit none + class(psb_z_hybg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m,n,nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_z_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hybg_scal +#endif diff --git a/gpu/impl/psb_z_hybg_scals.F90 b/gpu/impl/psb_z_hybg_scals.F90 new file mode 100644 index 00000000..3729412d --- /dev/null +++ b/gpu/impl/psb_z_hybg_scals.F90 @@ -0,0 +1,76 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_hybg_scals(d,a,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_scals +#else + use psb_z_hybg_mat_mod +#endif + implicit none + class(psb_z_hybg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + + call a%psb_z_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hybg_scals +#endif diff --git a/gpu/impl/psb_z_hybg_to_gpu.F90 b/gpu/impl/psb_z_hybg_to_gpu.F90 new file mode 100644 index 00000000..4a2a9b1c --- /dev/null +++ b/gpu/impl/psb_z_hybg_to_gpu.F90 @@ -0,0 +1,154 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_hybg_to_gpu(a,info,nzrm) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_to_gpu +#else + use psb_z_hybg_mat_mod +#endif + implicit none + class(psb_z_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + complex(psb_dpk_), allocatable :: valdi(:) + + info = 0 + +#ifdef HAVE_SPGPU + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = HYBGDeviceFree(a%deviceMat) + end if + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! We are explicitly adding the diagonal + if (info == 0) info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = zone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = zone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + else + + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) then + if (a%is_unit()) then + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + endif + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceHybsmAnalysis(a%deviceMat) + end if + + + if (info /= 0) then + write(0,*) 'Error in HYBG_TO_GPU ',info + end if +#endif + +end subroutine psb_z_hybg_to_gpu +#endif diff --git a/gpu/impl/psb_z_hybg_vect_mv.F90 b/gpu/impl/psb_z_hybg_vect_mv.F90 new file mode 100644 index 00000000..f3b6695e --- /dev/null +++ b/gpu/impl/psb_z_hybg_vect_mv.F90 @@ -0,0 +1,127 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_vect_mv +#else + use psb_z_hybg_mat_mod +#endif + use psb_z_gpu_vect_mod + implicit none + class(psb_z_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hybg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + +#ifdef HAVE_SPGPU + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= zzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_z_vect_gpu) + select type(yy => y) + type is (psb_z_vect_gpu) + if (xx%is_host()) call xx%sync() + if (beta /= zzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if +#else + call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) +#endif + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_hybg_vect_mv +#endif diff --git a/gpu/impl/psb_z_mv_csrg_from_coo.F90 b/gpu/impl/psb_z_mv_csrg_from_coo.F90 new file mode 100644 index 00000000..21771b89 --- /dev/null +++ b/gpu/impl/psb_z_mv_csrg_from_coo.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_csrg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_mv_csrg_from_coo +#else + use psb_z_csrg_mat_mod +#endif + implicit none + + class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + + info = psb_success_ + + call a%psb_z_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_mv_csrg_from_coo diff --git a/gpu/impl/psb_z_mv_csrg_from_fmt.F90 b/gpu/impl/psb_z_mv_csrg_from_fmt.F90 new file mode 100644 index 00000000..31408214 --- /dev/null +++ b/gpu/impl/psb_z_mv_csrg_from_fmt.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_csrg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_csrg_mat_mod, psb_protect_name => psb_z_mv_csrg_from_fmt +#else + use psb_z_csrg_mat_mod +#endif + implicit none + + class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer, intent(out) :: info + + !locals + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_z_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select + +end subroutine psb_z_mv_csrg_from_fmt diff --git a/gpu/impl/psb_z_mv_diag_from_coo.F90 b/gpu/impl/psb_z_mv_diag_from_coo.F90 new file mode 100644 index 00000000..8872c890 --- /dev/null +++ b/gpu/impl/psb_z_mv_diag_from_coo.F90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_diag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use diagdev_mod + use psb_vectordev_mod + use psb_z_diag_mat_mod, psb_protect_name => psb_z_mv_diag_from_coo +#else + use psb_z_diag_mat_mod +#endif + + implicit none + + class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) goto 9999 + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_mv_diag_from_coo diff --git a/gpu/impl/psb_z_mv_elg_from_coo.F90 b/gpu/impl/psb_z_mv_elg_from_coo.F90 new file mode 100644 index 00000000..2d78edc6 --- /dev/null +++ b/gpu/impl/psb_z_mv_elg_from_coo.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_elg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_mv_elg_from_coo +#else + use psb_z_elg_mat_mod +#endif + implicit none + + class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + call b%free() + + return + + +end subroutine psb_z_mv_elg_from_coo diff --git a/gpu/impl/psb_z_mv_elg_from_fmt.F90 b/gpu/impl/psb_z_mv_elg_from_fmt.F90 new file mode 100644 index 00000000..3bf663b3 --- /dev/null +++ b/gpu/impl/psb_z_mv_elg_from_fmt.F90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_elg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use elldev_mod + use psb_vectordev_mod + use psb_z_elg_mat_mod, psb_protect_name => psb_z_mv_elg_from_fmt +#else + use psb_z_elg_mat_mod +#endif + implicit none + + class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m +#ifdef HAVE_SPGPU + type(elldev_parms) :: gpu_parms +#endif + + info = psb_success_ + + if (b%is_dev()) call b%sync() + select type (b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + + class is (psb_z_ell_sparse_mat) + nzm = size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() +#ifdef HAVE_SPGPU + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize +#else + ld = m +#endif + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + deallocate(b%ja,stat=info) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + deallocate(b%val,stat=info) + end if + a%nzt = nza + call b%free() +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_mv_elg_from_fmt diff --git a/gpu/impl/psb_z_mv_hdiag_from_coo.F90 b/gpu/impl/psb_z_mv_hdiag_from_coo.F90 new file mode 100644 index 00000000..e1df9cc4 --- /dev/null +++ b/gpu/impl/psb_z_mv_hdiag_from_coo.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_hdiag_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hdiagdev_mod + use psb_vectordev_mod + use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_mv_hdiag_from_coo + use psb_gpu_env_mod +#else + use psb_z_hdiag_mat_mod +#endif + + implicit none + + class(psb_z_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + +#ifdef HAVE_SPGPU + a%hacksize = psb_gpu_WarpSize() +#endif + + call a%psb_z_hdia_sparse_mat%mv_from_coo(b,info) + +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_mv_hdiag_from_coo diff --git a/gpu/impl/psb_z_mv_hlg_from_coo.F90 b/gpu/impl/psb_z_mv_hlg_from_coo.F90 new file mode 100644 index 00000000..ce037be2 --- /dev/null +++ b/gpu/impl/psb_z_mv_hlg_from_coo.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_hlg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_gpu_env_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_mv_hlg_from_coo +#else + use psb_z_hlg_mat_mod +#endif + implicit none + + class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_z_mv_hlg_from_coo diff --git a/gpu/impl/psb_z_mv_hlg_from_fmt.F90 b/gpu/impl/psb_z_mv_hlg_from_fmt.F90 new file mode 100644 index 00000000..4ea1b385 --- /dev/null +++ b/gpu/impl/psb_z_mv_hlg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_hlg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use hlldev_mod + use psb_vectordev_mod + use psb_z_hlg_mat_mod, psb_protect_name => psb_z_mv_hlg_from_fmt +#else + use psb_z_hlg_mat_mod +#endif + implicit none + + class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_mv_hlg_from_fmt diff --git a/gpu/impl/psb_z_mv_hybg_from_coo.F90 b/gpu/impl/psb_z_mv_hybg_from_coo.F90 new file mode 100644 index 00000000..3424caea --- /dev/null +++ b/gpu/impl/psb_z_mv_hybg_from_coo.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_mv_hybg_from_coo(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_hybg_mat_mod, psb_protect_name => psb_z_mv_hybg_from_coo +#else + use psb_z_hybg_mat_mod +#endif + implicit none + + class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%psb_z_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 +#ifdef HAVE_SPGPU + call a%to_gpu(info) + if (info /= 0) goto 9999 +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_mv_hybg_from_coo +#endif diff --git a/gpu/impl/psb_z_mv_hybg_from_fmt.F90 b/gpu/impl/psb_z_mv_hybg_from_fmt.F90 new file mode 100644 index 00000000..90c35897 --- /dev/null +++ b/gpu/impl/psb_z_mv_hybg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_mv_hybg_from_fmt(a,b,info) + + use psb_base_mod +#ifdef HAVE_SPGPU + use cusparse_mod + use psb_z_hybg_mat_mod, psb_protect_name => psb_z_mv_hybg_from_fmt +#else + use psb_z_hybg_mat_mod +#endif + implicit none + + class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_z_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return +#ifdef HAVE_SPGPU + call a%to_gpu(info) +#endif + end select +end subroutine psb_z_mv_hybg_from_fmt +#endif diff --git a/gpu/ivectordev.c b/gpu/ivectordev.c new file mode 100644 index 00000000..93636465 --- /dev/null +++ b/gpu/ivectordev.c @@ -0,0 +1,182 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include +#if defined(HAVE_SPGPU) +//#include "utils.h" +//#include "common.h" +#include "ivectordev.h" + + +int registerMappedInt(void *buff, void **d_p, int n, int dummy) +{ + return registerMappedMemory(buff,d_p,n*sizeof(int)); +} + +int writeMultiVecDeviceInt(void* deviceVec, int* hostVec) +{ int i; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, + devVec->pitch_*devVec->count_*sizeof(int)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i); + } + + return(i); +} + +int writeMultiVecDeviceIntR2(void* deviceVec, int* hostVec, int ld) +{ int i; + i = writeMultiVecDeviceInt(deviceVec, (void *) hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceIntR2",i); + } + return(i); +} + +int readMultiVecDeviceInt(void* deviceVec, int* hostVec) +{ int i,j; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_, + devVec->pitch_*devVec->count_*sizeof(int)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceInt",i); + } + return(i); +} + +int readMultiVecDeviceIntR2(void* deviceVec, int* hostVec, int ld) +{ int i; + i = readMultiVecDeviceInt(deviceVec, hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceIntR2",i); + } + return(i); +} + + +int setscalMultiVecDeviceInt(int val, int first, int last, + int indexBase, void* devMultiVecX) +{ int i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + spgpuHandle_t handle=psb_gpuGetHandle(); + + spgpuIsetscal(handle, first, last, indexBase, val, (int *) devVecX->v_); + + return(i); +} + +int geinsMultiVecDeviceInt(int n, void* devMultiVecIrl, void* devMultiVecVal, + int dupl, int indexBase, void* devMultiVecX) +{ int j=0, i=0,nmin=0,nmax=0; + int pitch = 0; + int beta; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; + struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; + spgpuHandle_t handle=psb_gpuGetHandle(); + pitch = devVecIrl->pitch_; + if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) + return SPGPU_UNSUPPORTED; + + //fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_); + + if (dupl == INS_OVERWRITE) + beta = 0; + else if (dupl == INS_ADD) + beta = 1; + else + beta = 0; + + spgpuIscat(handle, (int *) devVecX->v_, n, (int *)devVecVal->v_, + (int*)devVecIrl->v_, indexBase, beta); + + return(i); +} + + +int igathMultiVecDeviceIntVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + + i= igathMultiVecDeviceInt(deviceVec, vectorId, n, + first, (void*) devIdx->v_, hfirst, host_values, indexBase); + return(i); +} + +int igathMultiVecDeviceInt(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, int indexBase) +{ + int i, *idx =(int *) indexes;; + int *hv = (int *) host_values;; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_gpuGetHandle(); + + i=0; + hv = &(hv[hfirst-indexBase]); + idx = &(idx[first-indexBase]); + spgpuIgath(handle,hv, n, idx,indexBase, (int *) devVec->v_+vectorId*devVec->pitch_); + return(i); +} + +int iscatMultiVecDeviceIntVecIdx(void* deviceVec, int vectorId, int n, int first, void *deviceIdx, + int hfirst, void* host_values, int indexBase, int beta) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + i= iscatMultiVecDeviceInt(deviceVec, vectorId, n, first, + (void*) devIdx->v_, hfirst,host_values, indexBase, beta); + return(i); +} + +int iscatMultiVecDeviceInt(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, int beta) +{ int i=0; + int *hv = (int *) host_values; + int *idx=(int *) indexes; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_gpuGetHandle(); + + idx = &(idx[first-indexBase]); + hv = &(hv[hfirst-indexBase]); + spgpuIscat(handle, (int *) devVec->v_, n, hv, idx, indexBase, beta); + return SPGPU_SUCCESS; + +} + +#endif + diff --git a/gpu/ivectordev.h b/gpu/ivectordev.h new file mode 100644 index 00000000..5f7ca974 --- /dev/null +++ b/gpu/ivectordev.h @@ -0,0 +1,64 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#pragma once +#if defined(HAVE_SPGPU) +//#include "utils.h" +#include "vectordev.h" +#include "cuda_runtime.h" +#include "core.h" + +int registerMappedInt(void *, void **, int, int); +int writeMultiVecDeviceInt(void* deviceMultiVec, int* hostMultiVec); +int writeMultiVecDeviceIntR2(void* deviceMultiVec, int* hostMultiVec, int ld); +int readMultiVecDeviceInt(void* deviceMultiVec, int* hostMultiVec); +int readMultiVecDeviceIntR2(void* deviceMultiVec, int* hostMultiVec, int ld); + +int setscalMultiVecDeviceInt(int val, int first, int last, + int indexBase, void* devVecX); + +int geinsMultiVecDeviceInt(int n, void* devVecIrl, void* devVecVal, + int dupl, int indexBase, void* devVecX); + +int igathMultiVecDeviceIntVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase); +int igathMultiVecDeviceInt(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, + int indexBase); +int iscatMultiVecDeviceIntVecIdx(void* deviceVec, int vectorId, int n, int first, + void *deviceIdx, int hfirst, void* host_values, + int indexBase, int beta); +int iscatMultiVecDeviceInt(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, int beta); + +#endif diff --git a/gpu/psb_base_vectordev_mod.F90 b/gpu/psb_base_vectordev_mod.F90 new file mode 100644 index 00000000..f8c303d0 --- /dev/null +++ b/gpu/psb_base_vectordev_mod.F90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_base_vectordev_mod + use iso_c_binding + use core_mod + + type, bind(c) :: multivec_dev_parms + integer(c_int) :: count + integer(c_int) :: element_type + integer(c_int) :: pitch + integer(c_int) :: size + end type multivec_dev_parms + +#ifdef HAVE_SPGPU + + + interface + function FallocMultiVecDevice(deviceVec,count,Size,elementType) & + & result(res) bind(c,name='FallocMultiVecDevice') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: count,Size,elementType + type(c_ptr) :: deviceVec + end function FallocMultiVecDevice + end interface + + + interface + subroutine unregisterMapped(buf) & + & bind(c,name='unregisterMapped') + use iso_c_binding + type(c_ptr), value :: buf + end subroutine unregisterMapped + end interface + + interface + subroutine freeMultiVecDevice(deviceVec) & + & bind(c,name='freeMultiVecDevice') + use iso_c_binding + type(c_ptr), value :: deviceVec + end subroutine freeMultiVecDevice + end interface + + interface + function getMultiVecDeviceSize(deviceVec) & + & bind(c,name='getMultiVecDeviceSize') result(res) + use iso_c_binding + type(c_ptr), value :: deviceVec + integer(c_int) :: res + end function getMultiVecDeviceSize + end interface + + interface + function getMultiVecDeviceCount(deviceVec) & + & bind(c,name='getMultiVecDeviceCount') result(res) + use iso_c_binding + type(c_ptr), value :: deviceVec + integer(c_int) :: res + end function getMultiVecDeviceCount + end interface + + interface + function getMultiVecDevicePitch(deviceVec) & + & bind(c,name='getMultiVecDevicePitch') result(res) + use iso_c_binding + type(c_ptr), value :: deviceVec + integer(c_int) :: res + end function getMultiVecDevicePitch + end interface + +#endif + + +end module psb_base_vectordev_mod diff --git a/gpu/psb_c_csrg_mat_mod.F90 b/gpu/psb_c_csrg_mat_mod.F90 new file mode 100644 index 00000000..203a6dbf --- /dev/null +++ b/gpu/psb_c_csrg_mat_mod.F90 @@ -0,0 +1,393 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_csrg_mat_mod + + use iso_c_binding + use psb_c_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_csr_sparse_mat) :: psb_c_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(c_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => c_csrg_get_fmt + procedure, pass(a) :: sizeof => c_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_c_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_c_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_c_csrg_csmm + procedure, pass(a) :: csmv => psb_c_csrg_csmv + procedure, pass(a) :: scals => psb_c_csrg_scals + procedure, pass(a) :: scalv => psb_c_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_c_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_mv_csrg_from_fmt + procedure, pass(a) :: free => c_csrg_free + procedure, pass(a) :: mold => psb_c_csrg_mold + procedure, pass(a) :: is_host => c_csrg_is_host + procedure, pass(a) :: is_dev => c_csrg_is_dev + procedure, pass(a) :: is_sync => c_csrg_is_sync + procedure, pass(a) :: set_host => c_csrg_set_host + procedure, pass(a) :: set_dev => c_csrg_set_dev + procedure, pass(a) :: set_sync => c_csrg_set_sync + procedure, pass(a) :: sync => c_csrg_sync + procedure, pass(a) :: to_gpu => psb_c_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_c_csrg_from_gpu + final :: c_csrg_finalize +#else + contains + procedure, pass(a) :: mold => psb_c_csrg_mold +#endif + end type psb_c_csrg_sparse_mat + +#ifdef HAVE_SPGPU + private :: c_csrg_get_nzeros, c_csrg_free, c_csrg_get_fmt, & + & c_csrg_get_size, c_csrg_sizeof, c_csrg_get_nz_row + + + interface + subroutine psb_c_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_c_csrg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_c_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_csrg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_csrg_vect_mv + end interface + + interface + subroutine psb_c_csrg_reallocate_nz(nz,a) + import :: psb_c_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_c_csrg_reallocate_nz + end interface + + interface + subroutine psb_c_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_c_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_csrg_allocate_mnnz + end interface + + interface + subroutine psb_c_csrg_mold(a,b,info) + import :: psb_c_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_csrg_mold + end interface + + interface + subroutine psb_c_csrg_to_gpu(a,info, nzrm) + import :: psb_c_csrg_sparse_mat, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_c_csrg_to_gpu + end interface + + interface + subroutine psb_c_csrg_from_gpu(a,info) + import :: psb_c_csrg_sparse_mat, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_csrg_from_gpu + end interface + + interface + subroutine psb_c_cp_csrg_from_coo(a,b,info) + import :: psb_c_csrg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_csrg_from_coo + end interface + + interface + subroutine psb_c_cp_csrg_from_fmt(a,b,info) + import :: psb_c_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_csrg_from_fmt + end interface + + interface + subroutine psb_c_mv_csrg_from_coo(a,b,info) + import :: psb_c_csrg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_csrg_from_coo + end interface + + interface + subroutine psb_c_mv_csrg_from_fmt(a,b,info) + import :: psb_c_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_csrg_from_fmt + end interface + + interface + subroutine psb_c_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_csrg_csmv + end interface + interface + subroutine psb_c_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_csrg_csmm + end interface + + interface + subroutine psb_c_csrg_scal(d,a,info,side) + import :: psb_c_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_csrg_scal + end interface + + interface + subroutine psb_c_csrg_scals(d,a,info) + import :: psb_c_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_csrg_sizeof(a) result(res) + implicit none + class(psb_c_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function c_csrg_sizeof + + function c_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function c_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine c_csrg_set_host(a) + implicit none + class(psb_c_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine c_csrg_set_host + + subroutine c_csrg_set_dev(a) + implicit none + class(psb_c_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine c_csrg_set_dev + + subroutine c_csrg_set_sync(a) + implicit none + class(psb_c_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine c_csrg_set_sync + + function c_csrg_is_dev(a) result(res) + implicit none + class(psb_c_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function c_csrg_is_dev + + function c_csrg_is_host(a) result(res) + implicit none + class(psb_c_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function c_csrg_is_host + + function c_csrg_is_sync(a) result(res) + implicit none + class(psb_c_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function c_csrg_is_sync + + + subroutine c_csrg_sync(a) + implicit none + class(psb_c_csrg_sparse_mat), target, intent(in) :: a + class(psb_c_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine c_csrg_sync + + subroutine c_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_c_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_c_csr_sparse_mat%free() + + return + + end subroutine c_csrg_free + + subroutine c_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_c_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine c_csrg_finalize + +#else + interface + subroutine psb_c_csrg_mold(a,b,info) + import :: psb_c_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_csrg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_csrg_mold + end interface + +#endif + +end module psb_c_csrg_mat_mod diff --git a/gpu/psb_c_diag_mat_mod.F90 b/gpu/psb_c_diag_mat_mod.F90 new file mode 100644 index 00000000..a7ab2fbb --- /dev/null +++ b/gpu/psb_c_diag_mat_mod.F90 @@ -0,0 +1,308 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_diag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_c_dia_mat_mod + + type, extends(psb_c_dia_sparse_mat) :: psb_c_diag_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => c_diag_get_fmt + procedure, pass(a) :: sizeof => c_diag_sizeof + procedure, pass(a) :: vect_mv => psb_c_diag_vect_mv +! procedure, pass(a) :: csmm => psb_c_diag_csmm + procedure, pass(a) :: csmv => psb_c_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_c_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_c_diag_scals +! procedure, pass(a) :: scalv => psb_c_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_c_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_c_diag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_c_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_c_mv_diag_from_fmt + procedure, pass(a) :: free => c_diag_free + procedure, pass(a) :: mold => psb_c_diag_mold + procedure, pass(a) :: to_gpu => psb_c_diag_to_gpu + final :: c_diag_finalize +#else + contains + procedure, pass(a) :: mold => psb_c_diag_mold +#endif + end type psb_c_diag_sparse_mat + +#ifdef HAVE_SPGPU + private :: c_diag_get_nzeros, c_diag_free, c_diag_get_fmt, & + & c_diag_get_size, c_diag_sizeof, c_diag_get_nz_row + + + interface + subroutine psb_c_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_diag_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_diag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_diag_vect_mv + end interface + + interface + subroutine psb_c_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_c_diag_sparse_mat, psb_spk_, psb_c_base_vect_type + class(psb_c_diag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_diag_inner_vect_sv + end interface + + interface + subroutine psb_c_diag_reallocate_nz(nz,a) + import :: psb_c_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_diag_sparse_mat), intent(inout) :: a + end subroutine psb_c_diag_reallocate_nz + end interface + + interface + subroutine psb_c_diag_allocate_mnnz(m,n,a,nz) + import :: psb_c_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_diag_allocate_mnnz + end interface + + interface + subroutine psb_c_diag_mold(a,b,info) + import :: psb_c_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_diag_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_diag_mold + end interface + + interface + subroutine psb_c_diag_to_gpu(a,info, nzrm) + import :: psb_c_diag_sparse_mat, psb_ipk_ + class(psb_c_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_c_diag_to_gpu + end interface + + interface + subroutine psb_c_cp_diag_from_coo(a,b,info) + import :: psb_c_diag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_diag_from_coo + end interface + + interface + subroutine psb_c_cp_diag_from_fmt(a,b,info) + import :: psb_c_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_diag_from_fmt + end interface + + interface + subroutine psb_c_mv_diag_from_coo(a,b,info) + import :: psb_c_diag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_diag_from_coo + end interface + + + interface + subroutine psb_c_mv_diag_from_fmt(a,b,info) + import :: psb_c_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_diag_from_fmt + end interface + + interface + subroutine psb_c_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_diag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_diag_csmv + end interface + interface + subroutine psb_c_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_diag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_diag_csmm + end interface + + interface + subroutine psb_c_diag_scal(d,a,info, side) + import :: psb_c_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_diag_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_diag_scal + end interface + + interface + subroutine psb_c_diag_scals(d,a,info) + import :: psb_c_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_diag_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_diag_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_diag_sizeof(a) result(res) + implicit none + class(psb_c_diag_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function c_diag_sizeof + + function c_diag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIAG' + end function c_diag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_diag_free(a) + use diagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_c_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_c_dia_sparse_mat%free() + + return + + end subroutine c_diag_free + + subroutine c_diag_finalize(a) + use diagdev_mod + implicit none + type(psb_c_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine c_diag_finalize + +#else + + interface + subroutine psb_c_diag_mold(a,b,info) + import :: psb_c_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_diag_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_diag_mold + end interface + +#endif + +end module psb_c_diag_mat_mod diff --git a/gpu/psb_c_dnsg_mat_mod.F90 b/gpu/psb_c_dnsg_mat_mod.F90 new file mode 100644 index 00000000..7fe5fdda --- /dev/null +++ b/gpu/psb_c_dnsg_mat_mod.F90 @@ -0,0 +1,294 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_dnsg_mat_mod + + use iso_c_binding + use psb_c_mat_mod + use psb_c_dns_mat_mod + use dnsdev_mod + + type, extends(psb_c_dns_sparse_mat) :: psb_c_dnsg_sparse_mat + ! + ! ITPACK/DNS format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of DNS, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => c_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => c_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_c_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_c_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_c_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_c_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_c_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_c_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_c_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_c_dnsg_allocate_mnnz + ! Note: we *do* need the TO methods, because of the need to invoke SYNC + ! + procedure, pass(a) :: cp_from_coo => psb_c_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_mv_dnsg_from_fmt + procedure, pass(a) :: free => c_dnsg_free + procedure, pass(a) :: mold => psb_c_dnsg_mold + procedure, pass(a) :: to_gpu => psb_c_dnsg_to_gpu + final :: c_dnsg_finalize +#else + contains + procedure, pass(a) :: mold => psb_c_dnsg_mold +#endif + end type psb_c_dnsg_sparse_mat + +#ifdef HAVE_SPGPU + private :: c_dnsg_get_nzeros, c_dnsg_free, c_dnsg_get_fmt, & + & c_dnsg_get_size, c_dnsg_get_nz_row + + + interface + subroutine psb_c_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_dnsg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dnsg_vect_mv + end interface +!!$ +!!$ interface +!!$ subroutine psb_c_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_c_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type +!!$ class(psb_c_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_c_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_c_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_c_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_c_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_c_dnsg_allocate_mnnz +!!$ end interface + + interface + subroutine psb_c_dnsg_mold(a,b,info) + import :: psb_c_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dnsg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dnsg_mold + end interface + + interface + subroutine psb_c_dnsg_to_gpu(a,info) + import :: psb_c_dnsg_sparse_mat, psb_ipk_ + class(psb_c_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dnsg_to_gpu + end interface + + interface + subroutine psb_c_cp_dnsg_from_coo(a,b,info) + import :: psb_c_dnsg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dnsg_from_coo + end interface + + interface + subroutine psb_c_cp_dnsg_from_fmt(a,b,info) + import :: psb_c_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dnsg_from_fmt + end interface + + interface + subroutine psb_c_mv_dnsg_from_coo(a,b,info) + import :: psb_c_dnsg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dnsg_from_coo + end interface + + + interface + subroutine psb_c_mv_dnsg_from_fmt(a,b,info) + import :: psb_c_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dnsg_from_fmt + end interface + +!!$ interface +!!$ subroutine psb_c_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:) +!!$ complex(psb_spk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_dnsg_csmv +!!$ end interface +!!$ interface +!!$ subroutine psb_c_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_dnsg_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_dnsg_scal(d,a,info, side) +!!$ import :: psb_c_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_c_dnsg_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_dnsg_scals(d,a,info) +!!$ import :: psb_c_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_dnsg_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + + function c_dnsg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNSG' + end function c_dnsg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_dnsg_free(a) + use dnsdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_c_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_c_dns_sparse_mat%free() + + return + + end subroutine c_dnsg_free + + subroutine c_dnsg_finalize(a) + use dnsdev_mod + implicit none + type(psb_c_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine c_dnsg_finalize + +#else + + interface + subroutine psb_c_dnsg_mold(a,b,info) + import :: psb_c_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dnsg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dnsg_mold + end interface + +#endif + +end module psb_c_dnsg_mat_mod diff --git a/gpu/psb_c_elg_mat_mod.F90 b/gpu/psb_c_elg_mat_mod.F90 new file mode 100644 index 00000000..83355b9d --- /dev/null +++ b/gpu/psb_c_elg_mat_mod.F90 @@ -0,0 +1,483 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_elg_mat_mod + + use iso_c_binding + use psb_c_mat_mod + use psb_c_ell_mat_mod + use psb_i_gpu_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_ell_sparse_mat) :: psb_c_elg_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of ELL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => c_elg_get_fmt + procedure, pass(a) :: sizeof => c_elg_sizeof + procedure, pass(a) :: vect_mv => psb_c_elg_vect_mv + procedure, pass(a) :: csmm => psb_c_elg_csmm + procedure, pass(a) :: csmv => psb_c_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_c_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_c_elg_scals + procedure, pass(a) :: scalv => psb_c_elg_scal + procedure, pass(a) :: reallocate_nz => psb_c_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_elg_allocate_mnnz + procedure, pass(a) :: reinit => c_elg_reinit + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_mv_elg_from_fmt + procedure, pass(a) :: free => c_elg_free + procedure, pass(a) :: mold => psb_c_elg_mold + procedure, pass(a) :: csput_a => psb_c_elg_csput_a + procedure, pass(a) :: csput_v => psb_c_elg_csput_v + procedure, pass(a) :: is_host => c_elg_is_host + procedure, pass(a) :: is_dev => c_elg_is_dev + procedure, pass(a) :: is_sync => c_elg_is_sync + procedure, pass(a) :: set_host => c_elg_set_host + procedure, pass(a) :: set_dev => c_elg_set_dev + procedure, pass(a) :: set_sync => c_elg_set_sync + procedure, pass(a) :: sync => c_elg_sync + procedure, pass(a) :: from_gpu => psb_c_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_c_elg_to_gpu + procedure, pass(a) :: asb => psb_c_elg_asb + final :: c_elg_finalize +#else + contains + procedure, pass(a) :: mold => psb_c_elg_mold + procedure, pass(a) :: asb => psb_c_elg_asb +#endif + end type psb_c_elg_sparse_mat + +#ifdef HAVE_SPGPU + private :: c_elg_get_nzeros, c_elg_free, c_elg_get_fmt, & + & c_elg_get_size, c_elg_sizeof, c_elg_get_nz_row, c_elg_sync + + + interface + subroutine psb_c_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_elg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_elg_vect_mv + end interface + + interface + subroutine psb_c_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_c_elg_sparse_mat, psb_spk_, psb_c_base_vect_type + class(psb_c_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_elg_inner_vect_sv + end interface + + interface + subroutine psb_c_elg_reallocate_nz(nz,a) + import :: psb_c_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_elg_sparse_mat), intent(inout) :: a + end subroutine psb_c_elg_reallocate_nz + end interface + + interface + subroutine psb_c_elg_allocate_mnnz(m,n,a,nz) + import :: psb_c_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_elg_allocate_mnnz + end interface + + interface + subroutine psb_c_elg_mold(a,b,info) + import :: psb_c_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_elg_mold + end interface + + interface + subroutine psb_c_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_elg_csput_a + end interface + + interface + subroutine psb_c_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_c_base_vect_type,& + & psb_i_base_vect_type + class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_elg_csput_v + end interface + + interface + subroutine psb_c_elg_from_gpu(a,info) + import :: psb_c_elg_sparse_mat, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_elg_from_gpu + end interface + + interface + subroutine psb_c_elg_to_gpu(a,info, nzrm) + import :: psb_c_elg_sparse_mat, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_c_elg_to_gpu + end interface + + interface + subroutine psb_c_cp_elg_from_coo(a,b,info) + import :: psb_c_elg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_elg_from_coo + end interface + + interface + subroutine psb_c_cp_elg_from_fmt(a,b,info) + import :: psb_c_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_elg_from_fmt + end interface + + interface + subroutine psb_c_mv_elg_from_coo(a,b,info) + import :: psb_c_elg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_elg_from_coo + end interface + + + interface + subroutine psb_c_mv_elg_from_fmt(a,b,info) + import :: psb_c_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_elg_from_fmt + end interface + + interface + subroutine psb_c_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_elg_csmv + end interface + interface + subroutine psb_c_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_elg_csmm + end interface + + interface + subroutine psb_c_elg_scal(d,a,info, side) + import :: psb_c_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_elg_scal + end interface + + interface + subroutine psb_c_elg_scals(d,a,info) + import :: psb_c_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_elg_scals + end interface + + interface + subroutine psb_c_elg_asb(a) + import :: psb_c_elg_sparse_mat + class(psb_c_elg_sparse_mat), intent(inout) :: a + end subroutine psb_c_elg_asb + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_elg_sizeof(a) result(res) + implicit none + class(psb_c_elg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function c_elg_sizeof + + function c_elg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELG' + end function c_elg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + subroutine c_elg_reinit(a,clear) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_c_elg_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + integer(psb_ipk_) :: isz, err_act + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev().or.a%is_sync()) then + if (clear_) call zeroEllDevice(a%deviceMat) + call a%set_dev() + else if (a%is_host()) then + a%val(:,:) = czero + end if + call a%set_upd() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine c_elg_reinit + + subroutine c_elg_free(a) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_c_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_c_ell_sparse_mat%free() + call a%set_sync() + + return + + end subroutine c_elg_free + + subroutine c_elg_sync(a) + implicit none + class(psb_c_elg_sparse_mat), target, intent(in) :: a + class(psb_c_elg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine c_elg_sync + + subroutine c_elg_set_host(a) + implicit none + class(psb_c_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine c_elg_set_host + + subroutine c_elg_set_dev(a) + implicit none + class(psb_c_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine c_elg_set_dev + + subroutine c_elg_set_sync(a) + implicit none + class(psb_c_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine c_elg_set_sync + + function c_elg_is_dev(a) result(res) + implicit none + class(psb_c_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function c_elg_is_dev + + function c_elg_is_host(a) result(res) + implicit none + class(psb_c_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function c_elg_is_host + + function c_elg_is_sync(a) result(res) + implicit none + class(psb_c_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function c_elg_is_sync + + subroutine c_elg_finalize(a) + use elldev_mod + implicit none + type(psb_c_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + return + + end subroutine c_elg_finalize + +#else + + interface + subroutine psb_c_elg_asb(a) + import :: psb_c_elg_sparse_mat + class(psb_c_elg_sparse_mat), intent(inout) :: a + end subroutine psb_c_elg_asb + end interface + + interface + subroutine psb_c_elg_mold(a,b,info) + import :: psb_c_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_elg_mold + end interface + +#endif + +end module psb_c_elg_mat_mod diff --git a/gpu/psb_c_gpu_vect_mod.F90 b/gpu/psb_c_gpu_vect_mod.F90 new file mode 100644 index 00000000..4c31154f --- /dev/null +++ b/gpu/psb_c_gpu_vect_mod.F90 @@ -0,0 +1,1989 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_gpu_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_c_vect_mod + use psb_i_vect_mod +#ifdef HAVE_SPGPU + use psb_gpu_env_mod + use psb_i_gpu_vect_mod + use psb_i_vectordev_mod + use psb_c_vectordev_mod +#endif + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_base_vect_type) :: psb_c_vect_gpu +#ifdef HAVE_SPGPU + integer :: state = is_host + type(c_ptr) :: deviceVect = c_null_ptr + complex(c_float_complex), allocatable :: pinned_buffer(:) + type(c_ptr) :: dt_p_buf = c_null_ptr + complex(c_float_complex), allocatable :: buffer(:) + type(c_ptr) :: dt_buf = c_null_ptr + integer :: dt_buf_sz = 0 + type(c_ptr) :: i_buf = c_null_ptr + integer :: i_buf_sz = 0 + contains + procedure, pass(x) :: get_nrows => c_gpu_get_nrows + procedure, nopass :: get_fmt => c_gpu_get_fmt + + procedure, pass(x) :: all => c_gpu_all + procedure, pass(x) :: zero => c_gpu_zero + procedure, pass(x) :: asb_m => c_gpu_asb_m + procedure, pass(x) :: sync => c_gpu_sync + procedure, pass(x) :: sync_space => c_gpu_sync_space + procedure, pass(x) :: bld_x => c_gpu_bld_x + procedure, pass(x) :: bld_mn => c_gpu_bld_mn + procedure, pass(x) :: free => c_gpu_free + procedure, pass(x) :: ins_a => c_gpu_ins_a + procedure, pass(x) :: ins_v => c_gpu_ins_v + procedure, pass(x) :: is_host => c_gpu_is_host + procedure, pass(x) :: is_dev => c_gpu_is_dev + procedure, pass(x) :: is_sync => c_gpu_is_sync + procedure, pass(x) :: set_host => c_gpu_set_host + procedure, pass(x) :: set_dev => c_gpu_set_dev + procedure, pass(x) :: set_sync => c_gpu_set_sync + procedure, pass(x) :: set_scal => c_gpu_set_scal +!!$ procedure, pass(x) :: set_vect => c_gpu_set_vect + procedure, pass(x) :: gthzv_x => c_gpu_gthzv_x + procedure, pass(y) :: sctb => c_gpu_sctb + procedure, pass(y) :: sctb_x => c_gpu_sctb_x + procedure, pass(x) :: gthzbuf => c_gpu_gthzbuf + procedure, pass(y) :: sctb_buf => c_gpu_sctb_buf + procedure, pass(x) :: new_buffer => c_gpu_new_buffer + procedure, nopass :: device_wait => c_gpu_device_wait + procedure, pass(x) :: free_buffer => c_gpu_free_buffer + procedure, pass(x) :: maybe_free_buffer => c_gpu_maybe_free_buffer + procedure, pass(x) :: dot_v => c_gpu_dot_v + procedure, pass(x) :: dot_a => c_gpu_dot_a + procedure, pass(y) :: axpby_v => c_gpu_axpby_v + procedure, pass(y) :: axpby_a => c_gpu_axpby_a + procedure, pass(y) :: mlt_v => c_gpu_mlt_v + procedure, pass(y) :: mlt_a => c_gpu_mlt_a + procedure, pass(z) :: mlt_a_2 => c_gpu_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => c_gpu_mlt_v_2 + procedure, pass(x) :: scal => c_gpu_scal + procedure, pass(x) :: nrm2 => c_gpu_nrm2 + procedure, pass(x) :: amax => c_gpu_amax + procedure, pass(x) :: asum => c_gpu_asum + procedure, pass(x) :: absval1 => c_gpu_absval1 + procedure, pass(x) :: absval2 => c_gpu_absval2 + + final :: c_gpu_vect_finalize +#endif + end type psb_c_vect_gpu + + public :: psb_c_vect_gpu_ + private :: constructor + interface psb_c_vect_gpu_ + module procedure constructor + end interface psb_c_vect_gpu_ + +contains + + function constructor(x) result(this) + complex(psb_spk_) :: x(:) + type(psb_c_vect_gpu) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x),info) + + end function constructor + +#ifdef HAVE_SPGPU + + subroutine c_gpu_device_wait() + call psb_cudaSync() + end subroutine c_gpu_device_wait + + subroutine c_gpu_new_buffer(n,x,info) + use psb_realloc_mod + use psb_gpu_env_mod + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + + if (psb_gpu_DeviceHasUVA()) then + if (allocated(x%combuf)) then + if (size(x%combuf) idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_gpu_DeviceHasUVA()) then + ! + ! Only need a sync in this branch; in the others + ! cudamemCpy acts as a sync point. + ! + if (allocated(x%pinned_buffer)) then + if (size(x%pinned_buffer) < n) then + call inner_unregister(x%pinned_buffer) + deallocate(x%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(x%pinned_buffer)) then + allocate(x%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(x%pinned_buffer,x%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + info = igathMultiVecDeviceFloatComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_p_buf, 1) + call psb_cudaSync() + y(1:n) = x%pinned_buffer(1:n) + + else + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeFloatComplex(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateFloatComplex(x%dt_buf,n) + x%dt_buf_sz=n + end if + if (info == 0) & + & info = igathMultiVecDeviceFloatComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_buf, 1) + if (info == 0) & + & info = readFloatComplex(x%dt_buf,y,n) + + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + + if (x%i_buf_sz < ni) then + if (c_associated(x%i_buf)) then + call freeInt(x%i_buf) + x%i_buf = c_null_ptr + end if + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeFloatComplex(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateFloatComplex(x%dt_buf,n) + x%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(x%i_buf,ii%v,ni) + if (info == 0) & + & info = igathMultiVecDeviceFloatComplex(x%deviceVect,& + & 0, n, i, x%i_buf, 1, x%dt_buf, 1) + if (info == 0) & + & info = readFloatComplex(x%dt_buf,y,n) + + end select + + end subroutine c_gpu_gthzv_x + + subroutine c_gpu_gthzbuf(i,n,idx,x) + use psb_gpu_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i,n + class(psb_i_base_vect_type) :: idx + class(psb_c_vect_gpu) :: x + integer :: info, ni + + info = 0 +!!$ write(0,*) 'Starting gth_zbuf' + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'gthzbuf') + return + end if + + select type(ii=> idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_gpu_DeviceHasUVA()) then + info = igathMultiVecDeviceFloatComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) + + else + info = igathMultiVecDeviceFloatComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_buf, 1) + if (info == 0) & + & info = readFloatComplex(i,x%dt_buf,x%combuf(i:),n,1) + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + info = 0 + if (.not.c_associated(x%i_buf)) then + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,x%i_buf,ii%v(i:),n,1) + + if (info == 0) & + & info = igathMultiVecDeviceFloatComplex(x%deviceVect,& + & 0, n, i, x%i_buf, i,x%dt_buf, 1) + + if (info == 0) & + & info = readFloatComplex(i,x%dt_buf,x%combuf(i:),n,1) + + end select + + end subroutine c_gpu_gthzbuf + + subroutine c_gpu_sctb(n,idx,x,beta,y) + implicit none + !use psb_const_mod + integer(psb_ipk_) :: n, idx(:) + complex(psb_spk_) :: beta, x(:) + class(psb_c_vect_gpu) :: y + integer(psb_ipk_) :: info + + if (n == 0) return + + if (y%is_dev()) call y%sync() + + call y%psb_c_base_vect_type%sctb(n,idx,x,beta) + call y%set_host() + + end subroutine c_gpu_sctb + + subroutine c_gpu_sctb_x(i,n,idx,x,beta,y) + use psb_gpu_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + complex(psb_spk_) :: beta, x(:) + class(psb_c_vect_gpu) :: y + integer :: info, ni + + select type(ii=> idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + + ! + if (psb_gpu_DeviceHasUVA()) then + if (allocated(y%pinned_buffer)) then + if (size(y%pinned_buffer) < n) then + call inner_unregister(y%pinned_buffer) + deallocate(y%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(y%pinned_buffer)) then + allocate(y%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(y%pinned_buffer,y%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + y%pinned_buffer(1:n) = x(1:n) + info = iscatMultiVecDeviceFloatComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_p_buf, 1,beta) + else + + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeFloatComplex(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateFloatComplex(y%dt_buf,n) + y%dt_buf_sz=n + end if + info = writeFloatComplex(y%dt_buf,x,n) + info = iscatMultiVecDeviceFloatComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_buf, 1,beta) + + end if + + class default + ni = size(ii%v) + + if (y%i_buf_sz < ni) then + if (c_associated(y%i_buf)) then + call freeInt(y%i_buf) + y%i_buf = c_null_ptr + end if + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeFloatComplex(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateFloatComplex(y%dt_buf,n) + y%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(y%i_buf,ii%v(i:i+n-1),n) + info = writeFloatComplex(y%dt_buf,x,n) + info = iscatMultiVecDeviceFloatComplex(y%deviceVect,& + & 0, n, 1, y%i_buf, 1, y%dt_buf, 1,beta) + + + end select + ! + ! Need a sync here to make sure we are not reallocating + ! the buffers before iscatMulti has finished. + ! + call psb_cudaSync() + call y%set_dev() + + end subroutine c_gpu_sctb_x + + subroutine c_gpu_sctb_buf(i,n,idx,beta,y) + use psi_serial_mod + use psb_gpu_env_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + complex(psb_spk_) :: beta + class(psb_c_vect_gpu) :: y + integer(psb_ipk_) :: info, ni + +!!$ write(0,*) 'Starting sctb_buf' + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'sctb_buf') + return + end if + + + select type(ii=> idx) + class is (psb_i_vect_gpu) + + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (psb_gpu_DeviceHasUVA()) then + info = iscatMultiVecDeviceFloatComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) + else + info = writeFloatComplex(i,y%dt_buf,y%combuf(i:),n,1) + info = iscatMultiVecDeviceFloatComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_buf, 1,beta) + + end if + + class default + !call y%sct(n,ii%v(i:),x,beta) + ni = size(ii%v) + info = 0 + if (.not.c_associated(y%i_buf)) then + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,y%i_buf,ii%v(i:),n,1) + if (info == 0) & + & info = writeFloatComplex(i,y%dt_buf,y%combuf(i:),n,1) + if (info == 0) info = iscatMultiVecDeviceFloatComplex(y%deviceVect,& + & 0, n, i, y%i_buf, i, y%dt_buf, 1,beta) + end select +!!$ write(0,*) 'Done sctb_buf' + + end subroutine c_gpu_sctb_buf + + + subroutine c_gpu_bld_x(x,this) + use psb_base_mod + complex(psb_spk_), intent(in) :: this(:) + class(psb_c_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this),x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'c_gpu_bld_x',& + & i_err=(/size(this),izero,izero,izero,izero/)) + end if + x%v(:) = this(:) + call x%set_host() + call x%sync() + + end subroutine c_gpu_bld_x + + subroutine c_gpu_bld_mn(x,n) + integer(psb_mpk_), intent(in) :: n + class(psb_c_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n,info) + if (info /= 0) then + call psb_errpush(info,'c_gpu_bld_n',i_err=(/n,n,n,n,n/)) + end if + + end subroutine c_gpu_bld_mn + + subroutine c_gpu_set_host(x) + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + + x%state = is_host + end subroutine c_gpu_set_host + + subroutine c_gpu_set_dev(x) + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + + x%state = is_dev + end subroutine c_gpu_set_dev + + subroutine c_gpu_set_sync(x) + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + + x%state = is_sync + end subroutine c_gpu_set_sync + + function c_gpu_is_dev(x) result(res) + implicit none + class(psb_c_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function c_gpu_is_dev + + function c_gpu_is_host(x) result(res) + implicit none + class(psb_c_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function c_gpu_is_host + + function c_gpu_is_sync(x) result(res) + implicit none + class(psb_c_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function c_gpu_is_sync + + + function c_gpu_get_nrows(x) result(res) + implicit none + class(psb_c_vect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = 0 + if (allocated(x%v)) res = size(x%v) + end function c_gpu_get_nrows + + function c_gpu_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'cGPU' + end function c_gpu_get_fmt + + subroutine c_gpu_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_c_vect_gpu), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n,x%v,info) + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'c_gpu_all',& + & i_err=(/n,n,n,n,n/)) + end if + end subroutine c_gpu_all + + subroutine c_gpu_zero(x) + use psi_serial_mod + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + + if (allocated(x%v)) x%v=czero + call x%set_host() + end subroutine c_gpu_zero + + subroutine c_gpu_asb_m(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_c_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + if (x%is_dev()) then + nd = getMultiVecDeviceSize(x%deviceVect) + if (nd < n) then + call x%sync() + call x%psb_c_base_vect_type%asb(n,info) + if (info == psb_success_) call x%sync_space(info) + call x%set_host() + end if + else ! + if (x%get_nrows() size(x%v)).or.(n > x%get_nrows())) then +!!$ write(0,*) 'Incoherent situation : sizes',n,size(x%v),x%get_nrows() + call psb_realloc(n,x%v,info) + end if + info = readMultiVecDevice(x%deviceVect,x%v) + end if + if (info == 0) call x%set_sync() + if (info /= 0) then + info=psb_err_internal_error_ + call psb_errpush(info,'c_gpu_sync') + end if + + end subroutine c_gpu_sync + + subroutine c_gpu_free(x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) deallocate(x%v, stat=info) + if (c_associated(x%deviceVect)) then +!!$ write(0,*)'d_gpu_free Calling freeMultiVecDevice' + call freeMultiVecDevice(x%deviceVect) + x%deviceVect=c_null_ptr + end if + call x%free_buffer(info) + call x%set_sync() + end subroutine c_gpu_free + + subroutine c_gpu_set_scal(x,val,first,last) + class(psb_c_vect_gpu), intent(inout) :: x + complex(psb_spk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: info, first_, last_ + + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1,first) + if (present(last)) last_ = min(last,last_) + + if (x%is_host()) call x%sync() + info = setScalDevice(val,first_,last_,1,x%deviceVect) + call x%set_dev() + + end subroutine c_gpu_set_scal +!!$ +!!$ subroutine c_gpu_set_vect(x,val) +!!$ class(psb_c_vect_gpu), intent(inout) :: x +!!$ complex(psb_spk_), intent(in) :: val(:) +!!$ integer(psb_ipk_) :: nr +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_c_base_vect_type%set_vect(val) +!!$ call x%set_host() +!!$ +!!$ end subroutine c_gpu_set_vect + + + + function c_gpu_dot_v(n,x,y) result(res) + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + complex(psb_spk_) :: res + complex(psb_spk_), external :: ddot + integer(psb_ipk_) :: info + + res = czero + ! + ! Note: this is the gpu implementation. + ! When we get here, we are sure that X is of + ! TYPE psb_c_vect + ! + select type(yy => y) + type is (psb_c_base_vect_type) + if (x%is_dev()) call x%sync() + res = ddot(n,x%v,1,yy%v,1) + type is (psb_c_vect_gpu) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) + if (info /= 0) then + info = psb_err_internal_error_ + call psb_errpush(info,'c_gpu_dot_v') + end if + + class default + ! y%sync is done in dot_a + call x%sync() + res = y%dot(n,x%v) + end select + + end function c_gpu_dot_v + + function c_gpu_dot_a(n,x,y) result(res) + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + complex(psb_spk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + complex(psb_spk_) :: res + complex(psb_spk_), external :: ddot + + if (x%is_dev()) call x%sync() + res = ddot(n,y,1,x%v,1) + + end function c_gpu_dot_a + + subroutine c_gpu_axpby_v(m,alpha, x, beta, y, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_vect_gpu), intent(inout) :: y + complex(psb_spk_), intent (in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny + + info = psb_success_ + + select type(xx => x) + type is (psb_c_vect_gpu) + ! Do something different here + if ((beta /= czero).and.y%is_host())& + & call y%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(y%deviceVect) + if ((nx x) + type is (psb_c_base_vect_type) + if (y%is_dev()) call y%sync() + do i=1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + type is (psb_c_vect_gpu) + ! Do something different here + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + info = axyMultiVecDevice(n,cone,xx%deviceVect,y%deviceVect) + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + call y%mlt(xx%v,info) + call y%set_host() + end select + + end subroutine c_gpu_mlt_v + + subroutine c_gpu_mlt_a(x, y, info) + use psi_serial_mod + implicit none + complex(psb_spk_), intent(in) :: x(:) + class(psb_c_vect_gpu), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync() + call y%psb_c_base_vect_type%mlt(x,info) + ! set_host() is invoked in the base method + end subroutine c_gpu_mlt_a + + subroutine c_gpu_mlt_a_2(alpha,x,y,beta,z,info) + use psi_serial_mod + implicit none + complex(psb_spk_), intent(in) :: alpha,beta + complex(psb_spk_), intent(in) :: x(:) + complex(psb_spk_), intent(in) :: y(:) + class(psb_c_vect_gpu), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync() + call z%psb_c_base_vect_type%mlt(alpha,x,y,beta,info) + ! set_host() is invoked in the base method + end subroutine c_gpu_mlt_a_2 + + subroutine c_gpu_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + use psi_serial_mod + use psb_string_mod + implicit none + complex(psb_spk_), intent(in) :: alpha,beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + class(psb_c_vect_gpu), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_ + + if (.false.) then + ! These are present just for coherence with the + ! complex versions; they do nothing here. + conjgx_=.false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') + conjgy_=.false. + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') + end if + + n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) + + ! + ! Need to reconsider BETA in the GPU side + ! of things. + ! + info = 0 + select type(xx => x) + type is (psb_c_vect_gpu) + select type (yy => y) + type is (psb_c_vect_gpu) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= czero).and.(z%is_host())) call z%sync() + info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& + & yy%deviceVect,beta,z%deviceVect) + call z%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (yy%is_dev()) call yy%sync() + if ((beta /= czero).and.(z%is_dev())) call z%sync() + call z%psb_c_base_vect_type%mlt(alpha,xx,yy,beta,info) + call z%set_host() + end select + + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= czero).and.(z%is_dev())) call z%sync() + call z%psb_c_base_vect_type%mlt(alpha,x,y,beta,info) + call z%set_host() + end select + end subroutine c_gpu_mlt_v_2 + + subroutine c_gpu_scal(alpha, x) + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + complex(psb_spk_), intent (in) :: alpha + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = scalMultiVecDevice(alpha,x%deviceVect) + call x%set_dev() + end subroutine c_gpu_scal + + + function c_gpu_nrm2(n,x) result(res) + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + ! WARNING: this should be changed. + if (x%is_host()) call x%sync() + info = nrm2MultiVecDeviceComplex(res,n,x%deviceVect) + + end function c_gpu_nrm2 + + function c_gpu_amax(n,x) result(res) + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = amaxMultiVecDeviceComplex(res,n,x%deviceVect) + + end function c_gpu_amax + + function c_gpu_asum(n,x) result(res) + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = asumMultiVecDeviceComplex(res,n,x%deviceVect) + + end function c_gpu_asum + + subroutine c_gpu_absval1(x) + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + n=x%get_nrows() + info = absMultiVecDevice(n,cone,x%deviceVect) + + end subroutine c_gpu_absval1 + + subroutine c_gpu_absval2(x,y) + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + n=min(x%get_nrows(),y%get_nrows()) + select type (yy=> y) + class is (psb_c_vect_gpu) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = absMultiVecDevice(n,cone,x%deviceVect,yy%deviceVect) + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_c_base_vect_type%absval(y) + end select + end subroutine c_gpu_absval2 + + + subroutine c_gpu_vect_finalize(x) + use psi_serial_mod + use psb_realloc_mod + implicit none + type(psb_c_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + info = 0 + call x%free(info) + end subroutine c_gpu_vect_finalize + + subroutine c_gpu_ins_v(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_c_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_gpu + + info = 0 + if (psb_errstatus_fatal()) return + + done_gpu = .false. + select type(virl => irl) + class is (psb_i_vect_gpu) + select type(vval => val) + class is (psb_c_vect_gpu) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + info = geinsMultiVecDeviceFloatComplex(n,virl%deviceVect,& + & vval%deviceVect,dupl,1,x%deviceVect) + call x%set_dev() + done_gpu=.true. + end select + end select + + if (.not.done_gpu) then + if (irl%is_dev()) call irl%sync() + if (val%is_dev()) call val%sync() + call x%ins(n,irl%v,val%v,dupl,info) + end if + + if (info /= 0) then + call psb_errpush(info,'gpu_vect_ins') + return + end if + + end subroutine c_gpu_ins_v + + subroutine c_gpu_ins_a(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_c_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_c_base_vect_type%ins(n,irl,val,dupl,info) + call x%set_host() + + end subroutine c_gpu_ins_a + +#endif + +end module psb_c_gpu_vect_mod + + +! +! Multivectors +! + + + +module psb_c_gpu_multivect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_c_multivect_mod + use psb_c_base_multivect_mod + + use psb_i_multivect_mod +#ifdef HAVE_SPGPU + use psb_i_gpu_multivect_mod + use psb_c_vectordev_mod +#endif + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_base_multivect_type) :: psb_c_multivect_gpu +#ifdef HAVE_SPGPU + + integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 + type(c_ptr) :: deviceVect = c_null_ptr + real(c_double), allocatable :: buffer(:,:) + type(c_ptr) :: dt_buf = c_null_ptr + contains + procedure, pass(x) :: get_nrows => c_gpu_multi_get_nrows + procedure, pass(x) :: get_ncols => c_gpu_multi_get_ncols + procedure, nopass :: get_fmt => c_gpu_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => c_gpu_multi_dot_v +!!$ procedure, pass(x) :: dot_a => c_gpu_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => c_gpu_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => c_gpu_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => c_gpu_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => c_gpu_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => c_gpu_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => c_gpu_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => c_gpu_multi_scal +!!$ procedure, pass(x) :: nrm2 => c_gpu_multi_nrm2 +!!$ procedure, pass(x) :: amax => c_gpu_multi_amax +!!$ procedure, pass(x) :: asum => c_gpu_multi_asum + procedure, pass(x) :: all => c_gpu_multi_all + procedure, pass(x) :: zero => c_gpu_multi_zero + procedure, pass(x) :: asb => c_gpu_multi_asb + procedure, pass(x) :: sync => c_gpu_multi_sync + procedure, pass(x) :: sync_space => c_gpu_multi_sync_space + procedure, pass(x) :: bld_x => c_gpu_multi_bld_x + procedure, pass(x) :: bld_n => c_gpu_multi_bld_n + procedure, pass(x) :: free => c_gpu_multi_free + procedure, pass(x) :: ins => c_gpu_multi_ins + procedure, pass(x) :: is_host => c_gpu_multi_is_host + procedure, pass(x) :: is_dev => c_gpu_multi_is_dev + procedure, pass(x) :: is_sync => c_gpu_multi_is_sync + procedure, pass(x) :: set_host => c_gpu_multi_set_host + procedure, pass(x) :: set_dev => c_gpu_multi_set_dev + procedure, pass(x) :: set_sync => c_gpu_multi_set_sync + procedure, pass(x) :: set_scal => c_gpu_multi_set_scal + procedure, pass(x) :: set_vect => c_gpu_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => c_gpu_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => c_gpu_multi_sctb +!!$ procedure, pass(y) :: sctb_x => c_gpu_multi_sctb_x + final :: c_gpu_multi_vect_finalize +#endif + end type psb_c_multivect_gpu + + public :: psb_c_multivect_gpu + private :: constructor + interface psb_c_multivect_gpu + module procedure constructor + end interface + +contains + + function constructor(x) result(this) + complex(psb_spk_) :: x(:,:) + type(psb_c_multivect_gpu) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x,1),size(x,2),info) + + end function constructor + +#ifdef HAVE_SPGPU + +!!$ subroutine c_gpu_multi_gthzv_x(i,n,idx,x,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i,n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ complex(psb_spk_) :: y(:) +!!$ class(psb_c_multivect_gpu) :: x +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_gpu) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (x%is_host()) call x%sync() +!!$ +!!$ if (allocated(x%buffer)) then +!!$ if (size(x%buffer) < n) then +!!$ call inner_unregister(x%buffer) +!!$ deallocate(x%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(x%buffer)) then +!!$ allocate(x%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(x%buffer,x%dt_buf) +!!$ endif +!!$ info = igathMultiVecDeviceDouble(x%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, x%dt_buf, 1) +!!$ call psb_cudaSync() +!!$ y(1:n) = x%buffer(1:n) +!!$ +!!$ class default +!!$ call x%gth(n,ii%v(i:),y) +!!$ end select +!!$ +!!$ +!!$ end subroutine c_gpu_multi_gthzv_x +!!$ +!!$ +!!$ +!!$ subroutine c_gpu_multi_sctb(n,idx,x,beta,y) +!!$ implicit none +!!$ !use psb_const_mod +!!$ integer(psb_ipk_) :: n, idx(:) +!!$ complex(psb_spk_) :: beta, x(:) +!!$ class(psb_c_multivect_gpu) :: y +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (n == 0) return +!!$ +!!$ if (y%is_dev()) call y%sync() +!!$ +!!$ call y%psb_c_base_multivect_type%sctb(n,idx,x,beta) +!!$ call y%set_host() +!!$ +!!$ end subroutine c_gpu_multi_sctb +!!$ +!!$ subroutine c_gpu_multi_sctb_x(i,n,idx,x,beta,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ complex(psb_spk_) :: beta, x(:) +!!$ class(psb_c_multivect_gpu) :: y +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_gpu) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (y%is_host()) call y%sync() +!!$ +!!$ if (allocated(y%buffer)) then +!!$ if (size(y%buffer) < n) then +!!$ call inner_unregister(y%buffer) +!!$ deallocate(y%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(y%buffer)) then +!!$ allocate(y%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(y%buffer,y%dt_buf) +!!$ endif +!!$ y%buffer(1:n) = x(1:n) +!!$ info = iscatMultiVecDeviceDouble(y%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, y%dt_buf, 1,beta) +!!$ +!!$ call y%set_dev() +!!$ call psb_cudaSync() +!!$ +!!$ class default +!!$ call y%sct(n,ii%v(i:),x,beta) +!!$ end select +!!$ +!!$ end subroutine c_gpu_multi_sctb_x + + + subroutine c_gpu_multi_bld_x(x,this) + use psb_base_mod + complex(psb_spk_), intent(in) :: this(:,:) + class(psb_c_multivect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info, m, n + + m=size(this,1) + n=size(this,2) + x%m_nrows = m + x%m_ncols = n + call psb_realloc(m,n,x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'c_gpu_multi_bld_x',& + & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) + end if + x%v(1:m,1:n) = this(1:m,1:n) + call x%set_host() + call x%sync() + + end subroutine c_gpu_multi_bld_x + + subroutine c_gpu_multi_bld_n(x,m,n) + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_multivect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(m,n,info) + if (info /= 0) then + call psb_errpush(info,'c_gpu_multi_bld_n',i_err=(/m,n,n,n,n/)) + end if + + end subroutine c_gpu_multi_bld_n + + + subroutine c_gpu_multi_set_host(x) + implicit none + class(psb_c_multivect_gpu), intent(inout) :: x + + x%state = is_host + end subroutine c_gpu_multi_set_host + + subroutine c_gpu_multi_set_dev(x) + implicit none + class(psb_c_multivect_gpu), intent(inout) :: x + + x%state = is_dev + end subroutine c_gpu_multi_set_dev + + subroutine c_gpu_multi_set_sync(x) + implicit none + class(psb_c_multivect_gpu), intent(inout) :: x + + x%state = is_sync + end subroutine c_gpu_multi_set_sync + + function c_gpu_multi_is_dev(x) result(res) + implicit none + class(psb_c_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function c_gpu_multi_is_dev + + function c_gpu_multi_is_host(x) result(res) + implicit none + class(psb_c_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function c_gpu_multi_is_host + + function c_gpu_multi_is_sync(x) result(res) + implicit none + class(psb_c_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function c_gpu_multi_is_sync + + + function c_gpu_multi_get_nrows(x) result(res) + implicit none + class(psb_c_multivect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_nrows + + end function c_gpu_multi_get_nrows + + function c_gpu_multi_get_ncols(x) result(res) + implicit none + class(psb_c_multivect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_ncols + + end function c_gpu_multi_get_ncols + + function c_gpu_multi_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'cGPU' + end function c_gpu_multi_get_fmt + +!!$ function c_gpu_multi_dot_v(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ class(psb_c_base_multivect_type), intent(inout) :: y +!!$ integer(psb_ipk_), intent(in) :: n +!!$ complex(psb_spk_) :: res +!!$ complex(psb_spk_), external :: ddot +!!$ integer(psb_ipk_) :: info +!!$ +!!$ res = dzero +!!$ ! +!!$ ! Note: this is the gpu implementation. +!!$ ! When we get here, we are sure that X is of +!!$ ! TYPE psb_c_vect +!!$ ! +!!$ select type(yy => y) +!!$ type is (psb_c_base_multivect_type) +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,x%v,1,yy%v,1) +!!$ type is (psb_c_multivect_gpu) +!!$ if (x%is_host()) call x%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) +!!$ if (info /= 0) then +!!$ info = psb_err_internal_error_ +!!$ call psb_errpush(info,'c_gpu_multi_dot_v') +!!$ end if +!!$ +!!$ class default +!!$ ! y%sync is done in dot_a +!!$ call x%sync() +!!$ res = y%dot(n,x%v) +!!$ end select +!!$ +!!$ end function c_gpu_multi_dot_v +!!$ +!!$ function c_gpu_multi_dot_a(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ complex(psb_spk_), intent(in) :: y(:) +!!$ integer(psb_ipk_), intent(in) :: n +!!$ complex(psb_spk_) :: res +!!$ complex(psb_spk_), external :: ddot +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,y,1,x%v,1) +!!$ +!!$ end function c_gpu_multi_dot_a +!!$ +!!$ subroutine c_gpu_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ class(psb_c_base_multivect_type), intent(inout) :: x +!!$ class(psb_c_multivect_gpu), intent(inout) :: y +!!$ complex(psb_spk_), intent (in) :: alpha, beta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: nx, ny +!!$ +!!$ info = psb_success_ +!!$ +!!$ select type(xx => x) +!!$ type is (psb_c_base_multivect_type) +!!$ if ((beta /= dzero).and.(y%is_dev()))& +!!$ & call y%sync() +!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) +!!$ call y%set_host() +!!$ type is (psb_c_multivect_gpu) +!!$ ! Do something different here +!!$ if ((beta /= dzero).and.y%is_host())& +!!$ & call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ nx = getMultiVecDeviceSize(xx%deviceVect) +!!$ ny = getMultiVecDeviceSize(y%deviceVect) +!!$ if ((nx x) +!!$ type is (psb_c_base_multivect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ do i=1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ type is (psb_c_multivect_gpu) +!!$ ! Do something different here +!!$ if (y%is_host()) call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ info = axyMultiVecDevice(n,done,xx%deviceVect,y%deviceVect) +!!$ call y%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call y%mlt(xx%v,info) +!!$ call y%set_host() +!!$ end select +!!$ +!!$ end subroutine c_gpu_multi_mlt_v +!!$ +!!$ subroutine c_gpu_multi_mlt_a(x, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ complex(psb_spk_), intent(in) :: x(:) +!!$ class(psb_c_multivect_gpu), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ call y%sync() +!!$ call y%psb_c_base_multivect_type%mlt(x,info) +!!$ call y%set_host() +!!$ end subroutine c_gpu_multi_mlt_a +!!$ +!!$ subroutine c_gpu_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ complex(psb_spk_), intent(in) :: alpha,beta +!!$ complex(psb_spk_), intent(in) :: x(:) +!!$ complex(psb_spk_), intent(in) :: y(:) +!!$ class(psb_c_multivect_gpu), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ if (z%is_dev()) call z%sync() +!!$ call z%psb_c_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end subroutine c_gpu_multi_mlt_a_2 +!!$ +!!$ subroutine c_gpu_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ use psi_serial_mod +!!$ use psb_string_mod +!!$ implicit none +!!$ complex(psb_spk_), intent(in) :: alpha,beta +!!$ class(psb_c_base_multivect_type), intent(inout) :: x +!!$ class(psb_c_base_multivect_type), intent(inout) :: y +!!$ class(psb_c_multivect_gpu), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character(len=1), intent(in), optional :: conjgx, conjgy +!!$ integer(psb_ipk_) :: i, n +!!$ logical :: conjgx_, conjgy_ +!!$ +!!$ if (.false.) then +!!$ ! These are present just for coherence with the +!!$ ! complex versions; they do nothing here. +!!$ conjgx_=.false. +!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') +!!$ conjgy_=.false. +!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') +!!$ end if +!!$ +!!$ n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) +!!$ +!!$ ! +!!$ ! Need to reconsider BETA in the GPU side +!!$ ! of things. +!!$ ! +!!$ info = 0 +!!$ select type(xx => x) +!!$ type is (psb_c_multivect_gpu) +!!$ select type (yy => y) +!!$ type is (psb_c_multivect_gpu) +!!$ if (xx%is_host()) call xx%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ ! Z state is irrelevant: it will be done on the GPU. +!!$ info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& +!!$ & yy%deviceVect,beta,z%deviceVect) +!!$ call z%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call yy%sync() +!!$ call z%psb_c_base_multivect_type%mlt(alpha,xx,yy,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ +!!$ class default +!!$ call x%sync() +!!$ call y%sync() +!!$ call z%psb_c_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ end subroutine c_gpu_multi_mlt_v_2 + + + subroutine c_gpu_multi_set_scal(x,val) + class(psb_c_multivect_gpu), intent(inout) :: x + complex(psb_spk_), intent(in) :: val + + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_c_base_multivect_type%set_scal(val) + call x%set_host() + end subroutine c_gpu_multi_set_scal + + subroutine c_gpu_multi_set_vect(x,val) + class(psb_c_multivect_gpu), intent(inout) :: x + complex(psb_spk_), intent(in) :: val(:,:) + integer(psb_ipk_) :: nr + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_c_base_multivect_type%set_vect(val) + call x%set_host() + + end subroutine c_gpu_multi_set_vect + + + +!!$ subroutine c_gpu_multi_scal(alpha, x) +!!$ implicit none +!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ complex(psb_spk_), intent (in) :: alpha +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_c_base_multivect_type%scal(alpha) +!!$ call x%set_host() +!!$ end subroutine c_gpu_multi_scal +!!$ +!!$ +!!$ function c_gpu_multi_nrm2(n,x) result(res) +!!$ implicit none +!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ ! WARNING: this should be changed. +!!$ if (x%is_host()) call x%sync() +!!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) +!!$ +!!$ end function c_gpu_multi_nrm2 +!!$ +!!$ function c_gpu_multi_amax(n,x) result(res) +!!$ implicit none +!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = maxval(abs(x%v(1:n))) +!!$ +!!$ end function c_gpu_multi_amax +!!$ +!!$ function c_gpu_multi_asum(n,x) result(res) +!!$ implicit none +!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = sum(abs(x%v(1:n))) +!!$ +!!$ end function c_gpu_multi_asum + + subroutine c_gpu_multi_all(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_multivect_gpu), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(m,n,x%v,info,pad=czero) + x%m_nrows = m + x%m_ncols = n + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'c_gpu_multi_all',& + & i_err=(/m,n,n,n,n/)) + end if + end subroutine c_gpu_multi_all + + subroutine c_gpu_multi_zero(x) + use psi_serial_mod + implicit none + class(psb_c_multivect_gpu), intent(inout) :: x + + if (allocated(x%v)) x%v=dzero + call x%set_host() + end subroutine c_gpu_multi_zero + + subroutine c_gpu_multi_asb(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_multivect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nd, nc + + + x%m_nrows = m + x%m_ncols = n + if (x%is_host()) then + call x%psb_c_base_multivect_type%asb(m,n,info) + if (info == psb_success_) call x%sync_space(info) + else if (x%is_dev()) then + nd = getMultiVecDevicePitch(x%deviceVect) + nc = getMultiVecDeviceCount(x%deviceVect) + if ((nd < m).or.(nc c_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => c_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_c_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_c_hdiag_csmm + procedure, pass(a) :: csmv => psb_c_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_c_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_c_hdiag_scals + ! procedure, pass(a) :: scalv => psb_c_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_c_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_c_hdiag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_c_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_c_mv_hdiag_from_fmt + procedure, pass(a) :: free => c_hdiag_free + procedure, pass(a) :: mold => psb_c_hdiag_mold + procedure, pass(a) :: to_gpu => psb_c_hdiag_to_gpu + final :: c_hdiag_finalize +#else + contains + procedure, pass(a) :: mold => psb_c_hdiag_mold +#endif + end type psb_c_hdiag_sparse_mat + +#ifdef HAVE_SPGPU + private :: c_hdiag_get_nzeros, c_hdiag_free, c_hdiag_get_fmt, & + & c_hdiag_get_size, c_hdiag_sizeof, c_hdiag_get_nz_row + + + interface + subroutine psb_c_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hdiag_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_hdiag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hdiag_vect_mv + end interface + +!!$ interface +!!$ subroutine psb_c_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_c_hdiag_sparse_mat, psb_spk_, psb_c_base_vect_type +!!$ class(psb_c_hdiag_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_c_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_hdiag_inner_vect_sv +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_c_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_hdiag_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_c_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_c_hdiag_allocate_mnnz +!!$ end interface + + interface + subroutine psb_c_hdiag_mold(a,b,info) + import :: psb_c_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hdiag_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hdiag_mold + end interface + + interface + subroutine psb_c_hdiag_to_gpu(a,info) + import :: psb_c_hdiag_sparse_mat, psb_ipk_ + class(psb_c_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hdiag_to_gpu + end interface + + interface + subroutine psb_c_cp_hdiag_from_coo(a,b,info) + import :: psb_c_hdiag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hdiag_from_coo + end interface + +!!$ interface +!!$ subroutine psb_c_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_c_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_c_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_cp_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_c_mv_hdiag_from_coo(a,b,info) + import :: psb_c_hdiag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hdiag_from_coo + end interface + +!!$ +!!$ interface +!!$ subroutine psb_c_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_c_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_c_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_mv_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_c_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hdiag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hdiag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hdiag_csmv + end interface + +!!$ interface +!!$ subroutine psb_c_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdiag_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_hdiag_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdiag_scal(d,a,info, side) +!!$ import :: psb_c_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_c_hdiag_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdiag_scals(d,a,info) +!!$ import :: psb_c_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_hdiag_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + function c_hdiag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIAG' + end function c_hdiag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_hdiag_free(a) + use hdiagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_c_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_c_hdia_sparse_mat%free() + + return + + end subroutine c_hdiag_free + + subroutine c_hdiag_finalize(a) + use hdiagdev_mod + implicit none + type(psb_c_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_c_hdia_sparse_mat%free() + + return + end subroutine c_hdiag_finalize + +#else + + interface + subroutine psb_c_hdiag_mold(a,b,info) + import :: psb_c_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hdiag_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hdiag_mold + end interface + +#endif + +end module psb_c_hdiag_mat_mod diff --git a/gpu/psb_c_hlg_mat_mod.F90 b/gpu/psb_c_hlg_mat_mod.F90 new file mode 100644 index 00000000..9236a202 --- /dev/null +++ b/gpu/psb_c_hlg_mat_mod.F90 @@ -0,0 +1,398 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_hlg_mat_mod + + use iso_c_binding + use psb_c_mat_mod + use psb_c_hll_mat_mod + + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_hll_sparse_mat) :: psb_c_hlg_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + integer :: devstate = is_host + + contains + procedure, nopass :: get_fmt => c_hlg_get_fmt + procedure, pass(a) :: sizeof => c_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_c_hlg_vect_mv + procedure, pass(a) :: csmm => psb_c_hlg_csmm + procedure, pass(a) :: csmv => psb_c_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_c_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_c_hlg_scals + procedure, pass(a) :: scalv => psb_c_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_c_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_hlg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_mv_hlg_from_fmt + procedure, pass(a) :: free => c_hlg_free + procedure, pass(a) :: mold => psb_c_hlg_mold + procedure, pass(a) :: is_host => c_hlg_is_host + procedure, pass(a) :: is_dev => c_hlg_is_dev + procedure, pass(a) :: is_sync => c_hlg_is_sync + procedure, pass(a) :: set_host => c_hlg_set_host + procedure, pass(a) :: set_dev => c_hlg_set_dev + procedure, pass(a) :: set_sync => c_hlg_set_sync + procedure, pass(a) :: sync => c_hlg_sync + procedure, pass(a) :: from_gpu => psb_c_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_c_hlg_to_gpu + final :: c_hlg_finalize +#else + contains + procedure, pass(a) :: mold => psb_c_hlg_mold +#endif + end type psb_c_hlg_sparse_mat + +#ifdef HAVE_SPGPU + private :: c_hlg_get_nzeros, c_hlg_free, c_hlg_get_fmt, & + & c_hlg_get_size, c_hlg_sizeof, c_hlg_get_nz_row + + + interface + subroutine psb_c_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hlg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hlg_vect_mv + end interface + + interface + subroutine psb_c_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_c_hlg_sparse_mat, psb_spk_, psb_c_base_vect_type + class(psb_c_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hlg_inner_vect_sv + end interface + + interface + subroutine psb_c_hlg_reallocate_nz(nz,a) + import :: psb_c_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_c_hlg_reallocate_nz + end interface + + interface + subroutine psb_c_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_c_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_hlg_allocate_mnnz + end interface + + interface + subroutine psb_c_hlg_mold(a,b,info) + import :: psb_c_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hlg_mold + end interface + + interface + subroutine psb_c_hlg_from_gpu(a,info) + import :: psb_c_hlg_sparse_mat, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hlg_from_gpu + end interface + + interface + subroutine psb_c_hlg_to_gpu(a,info, nzrm) + import :: psb_c_hlg_sparse_mat, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_c_hlg_to_gpu + end interface + + interface + subroutine psb_c_cp_hlg_from_coo(a,b,info) + import :: psb_c_hlg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hlg_from_coo + end interface + + interface + subroutine psb_c_cp_hlg_from_fmt(a,b,info) + import :: psb_c_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hlg_from_fmt + end interface + + interface + subroutine psb_c_mv_hlg_from_coo(a,b,info) + import :: psb_c_hlg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hlg_from_coo + end interface + + + interface + subroutine psb_c_mv_hlg_from_fmt(a,b,info) + import :: psb_c_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hlg_from_fmt + end interface + + interface + subroutine psb_c_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hlg_csmv + end interface + interface + subroutine psb_c_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hlg_csmm + end interface + + interface + subroutine psb_c_hlg_scal(d,a,info, side) + import :: psb_c_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_hlg_scal + end interface + + interface + subroutine psb_c_hlg_scals(d,a,info) + import :: psb_c_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hlg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_hlg_sizeof(a) result(res) + implicit none + class(psb_c_hlg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function c_hlg_sizeof + + function c_hlg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLG' + end function c_hlg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_hlg_free(a) + use hlldev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_c_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_c_hll_sparse_mat%free() + + return + + end subroutine c_hlg_free + + + subroutine c_hlg_sync(a) + implicit none + class(psb_c_hlg_sparse_mat), target, intent(in) :: a + class(psb_c_hlg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine c_hlg_sync + + subroutine c_hlg_set_host(a) + implicit none + class(psb_c_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine c_hlg_set_host + + subroutine c_hlg_set_dev(a) + implicit none + class(psb_c_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine c_hlg_set_dev + + subroutine c_hlg_set_sync(a) + implicit none + class(psb_c_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine c_hlg_set_sync + + function c_hlg_is_dev(a) result(res) + implicit none + class(psb_c_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function c_hlg_is_dev + + function c_hlg_is_host(a) result(res) + implicit none + class(psb_c_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function c_hlg_is_host + + function c_hlg_is_sync(a) result(res) + implicit none + class(psb_c_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function c_hlg_is_sync + + + subroutine c_hlg_finalize(a) + use hlldev_mod + implicit none + type(psb_c_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine c_hlg_finalize + +#else + + interface + subroutine psb_c_hlg_mold(a,b,info) + import :: psb_c_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hlg_mold + end interface + +#endif + +end module psb_c_hlg_mat_mod diff --git a/gpu/psb_c_hybg_mat_mod.F90 b/gpu/psb_c_hybg_mat_mod.F90 new file mode 100644 index 00000000..d5c605ec --- /dev/null +++ b/gpu/psb_c_hybg_mat_mod.F90 @@ -0,0 +1,306 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +#if CUDA_SHORT_VERSION <= 10 + +module psb_c_hybg_mat_mod + + use iso_c_binding + use psb_c_mat_mod + use cusparse_mod + + type, extends(psb_c_csr_sparse_mat) :: psb_c_hybg_sparse_mat + ! + ! HYBG. An interface to the cuSPARSE HYB + ! On the CPU side we keep a CSR storage. + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(c_Hmat) :: deviceMat + + contains + procedure, nopass :: get_fmt => c_hybg_get_fmt + procedure, pass(a) :: sizeof => c_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_c_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_c_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_c_hybg_csmm + procedure, pass(a) :: csmv => psb_c_hybg_csmv + procedure, pass(a) :: scals => psb_c_hybg_scals + procedure, pass(a) :: scalv => psb_c_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_c_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_hybg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_mv_hybg_from_fmt + procedure, pass(a) :: free => c_hybg_free + procedure, pass(a) :: mold => psb_c_hybg_mold + procedure, pass(a) :: to_gpu => psb_c_hybg_to_gpu + final :: c_hybg_finalize +#else + contains + procedure, pass(a) :: mold => psb_c_hybg_mold +#endif + end type psb_c_hybg_sparse_mat + +#ifdef HAVE_SPGPU + private :: c_hybg_get_nzeros, c_hybg_free, c_hybg_get_fmt, & + & c_hybg_get_size, c_hybg_sizeof, c_hybg_get_nz_row + + + interface + subroutine psb_c_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hybg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hybg_inner_vect_sv + end interface + + interface + subroutine psb_c_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hybg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hybg_vect_mv + end interface + + interface + subroutine psb_c_hybg_reallocate_nz(nz,a) + import :: psb_c_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_c_hybg_reallocate_nz + end interface + + interface + subroutine psb_c_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_c_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_hybg_allocate_mnnz + end interface + + interface + subroutine psb_c_hybg_mold(a,b,info) + import :: psb_c_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hybg_mold + end interface + + interface + subroutine psb_c_hybg_to_gpu(a,info, nzrm) + import :: psb_c_hybg_sparse_mat, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_c_hybg_to_gpu + end interface + + interface + subroutine psb_c_cp_hybg_from_coo(a,b,info) + import :: psb_c_hybg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hybg_from_coo + end interface + + interface + subroutine psb_c_cp_hybg_from_fmt(a,b,info) + import :: psb_c_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hybg_from_fmt + end interface + + interface + subroutine psb_c_mv_hybg_from_coo(a,b,info) + import :: psb_c_hybg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hybg_from_coo + end interface + + interface + subroutine psb_c_mv_hybg_from_fmt(a,b,info) + import :: psb_c_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hybg_from_fmt + end interface + + interface + subroutine psb_c_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hybg_csmv + end interface + interface + subroutine psb_c_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hybg_csmm + end interface + + interface + subroutine psb_c_hybg_scal(d,a,info,side) + import :: psb_c_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_hybg_scal + end interface + + interface + subroutine psb_c_hybg_scals(d,a,info) + import :: psb_c_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hybg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_hybg_sizeof(a) result(res) + implicit none + class(psb_c_hybg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function c_hybg_sizeof + + function c_hybg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HYBG' + end function c_hybg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_hybg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + class(psb_c_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + call a%psb_c_csr_sparse_mat%free() + + return + + end subroutine c_hybg_free + + subroutine c_hybg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + type(psb_c_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + + return + end subroutine c_hybg_finalize + +#else + + interface + subroutine psb_c_hybg_mold(a,b,info) + import :: psb_c_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hybg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hybg_mold + end interface + +#endif + +end module psb_c_hybg_mat_mod +#endif diff --git a/gpu/psb_c_vectordev_mod.F90 b/gpu/psb_c_vectordev_mod.F90 new file mode 100644 index 00000000..f3c243a6 --- /dev/null +++ b/gpu/psb_c_vectordev_mod.F90 @@ -0,0 +1,390 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_vectordev_mod + + use psb_base_vectordev_mod + +#ifdef HAVE_SPGPU + + interface registerMapped + function registerMappedFloatComplex(buf,d_p,n,dummy) & + & result(res) bind(c,name='registerMappedFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: buf + type(c_ptr) :: d_p + integer(c_int),value :: n + complex(c_float_complex), value :: dummy + end function registerMappedFloatComplex + end interface + + interface writeMultiVecDevice + function writeMultiVecDeviceFloatComplex(deviceVec,hostVec) & + & result(res) bind(c,name='writeMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + complex(c_float_complex) :: hostVec(*) + end function writeMultiVecDeviceFloatComplex + function writeMultiVecDeviceFloatComplexR2(deviceVec,hostVec,ld) & + & result(res) bind(c,name='writeMultiVecDeviceFloatComplexR2') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int), value :: ld + complex(c_float_complex) :: hostVec(ld,*) + end function writeMultiVecDeviceFloatComplexR2 + end interface + + interface readMultiVecDevice + function readMultiVecDeviceFloatComplex(deviceVec,hostVec) & + & result(res) bind(c,name='readMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + complex(c_float_complex) :: hostVec(*) + end function readMultiVecDeviceFloatComplex + function readMultiVecDeviceFloatComplexR2(deviceVec,hostVec,ld) & + & result(res) bind(c,name='readMultiVecDeviceFloatComplexR2') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int), value :: ld + complex(c_float_complex) :: hostVec(ld,*) + end function readMultiVecDeviceFloatComplexR2 + end interface + + interface allocateFloatComplex + function allocateFloatComplex(didx,n) & + & result(res) bind(c,name='allocateFloatComplex') + use iso_c_binding + type(c_ptr) :: didx + integer(c_int),value :: n + integer(c_int) :: res + end function allocateFloatComplex + function allocateMultiFloatComplex(didx,m,n) & + & result(res) bind(c,name='allocateMultiFloatComplex') + use iso_c_binding + type(c_ptr) :: didx + integer(c_int),value :: m,n + integer(c_int) :: res + end function allocateMultiFloatComplex + end interface + + interface writeFloatComplex + function writeFloatComplex(didx,hidx,n) & + & result(res) bind(c,name='writeFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + complex(c_float_complex) :: hidx(*) + integer(c_int),value :: n + end function writeFloatComplex + function writeFloatComplexFirst(first,didx,hidx,n,IndexBase) & + & result(res) bind(c,name='writeFloatComplexFirst') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + complex(c_float_complex) :: hidx(*) + integer(c_int),value :: n, first, IndexBase + end function writeFloatComplexFirst + function writeMultiFloatComplex(didx,hidx,m,n) & + & result(res) bind(c,name='writeMultiFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + complex(c_float_complex) :: hidx(m,*) + integer(c_int),value :: m,n + end function writeMultiFloatComplex + end interface + + interface readFloatComplex + function readFloatComplex(didx,hidx,n) & + & result(res) bind(c,name='readFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + complex(c_float_complex) :: hidx(*) + integer(c_int),value :: n + end function readFloatComplex + function readFloatComplexFirst(first,didx,hidx,n,IndexBase) & + & result(res) bind(c,name='readFloatComplexFirst') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + complex(c_float_complex) :: hidx(*) + integer(c_int),value :: n, first, IndexBase + end function readFloatComplexFirst + function readMultiFloatComplex(didx,hidx,m,n) & + & result(res) bind(c,name='readMultiFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + complex(c_float_complex) :: hidx(m,*) + integer(c_int),value :: m,n + end function readMultiFloatComplex + end interface + + interface + subroutine freeFloatComplex(didx) & + & bind(c,name='freeFloatComplex') + use iso_c_binding + type(c_ptr), value :: didx + end subroutine freeFloatComplex + end interface + + + interface setScalDevice + function setScalMultiVecDeviceFloatComplex(val, first, last, & + & indexBase, deviceVecX) result(res) & + & bind(c,name='setscalMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: first,last,indexbase + complex(c_float_complex), value :: val + type(c_ptr), value :: deviceVecX + end function setScalMultiVecDeviceFloatComplex + end interface + + interface + function geinsMultiVecDeviceFloatComplex(n,deviceVecIrl,deviceVecVal,& + & dupl,indexbase,deviceVecX) & + & result(res) bind(c,name='geinsMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n, dupl,indexbase + type(c_ptr), value :: deviceVecIrl, deviceVecVal, deviceVecX + end function geinsMultiVecDeviceFloatComplex + end interface + + ! New gather functions + + interface + function igathMultiVecDeviceFloatComplex(deviceVec, vectorId, n, first, idx, & + & hfirst, hostVec, indexBase) & + & result(res) bind(c,name='igathMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value:: vectorId + integer(c_int),value:: first, n, hfirst + type(c_ptr),value :: idx + type(c_ptr),value :: hostVec + integer(c_int),value:: indexBase + end function igathMultiVecDeviceFloatComplex + end interface + + interface + function igathMultiVecDeviceFloatComplexVecIdx(deviceVec, vectorId, n, first, idx, & + & hfirst, hostVec, indexBase) & + & result(res) bind(c,name='igathMultiVecDeviceFloatComplexVecIdx') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value:: vectorId + integer(c_int),value:: first, n, hfirst + type(c_ptr),value :: idx + type(c_ptr),value :: hostVec + integer(c_int),value:: indexBase + end function igathMultiVecDeviceFloatComplexVecIdx + end interface + + interface + function iscatMultiVecDeviceFloatComplex(deviceVec, vectorId, & + & first, n, idx, hfirst, hostVec, indexBase, beta) & + & result(res) bind(c,name='iscatMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value :: vectorId + integer(c_int),value :: first, n, hfirst + type(c_ptr), value :: idx + type(c_ptr), value :: hostVec + integer(c_int),value :: indexBase + complex(c_float_complex),value :: beta + end function iscatMultiVecDeviceFloatComplex + end interface + + interface + function iscatMultiVecDeviceFloatComplexVecIdx(deviceVec, vectorId, & + & first, n, idx, hfirst, hostVec, indexBase, beta) & + & result(res) bind(c,name='iscatMultiVecDeviceFloatComplexVecIdx') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value :: vectorId + integer(c_int),value :: first, n, hfirst + type(c_ptr), value :: idx + type(c_ptr), value :: hostVec + integer(c_int),value :: indexBase + complex(c_float_complex),value :: beta + end function iscatMultiVecDeviceFloatComplexVecIdx + end interface + + + interface scalMultiVecDevice + function scalMultiVecDeviceFloatComplex(alpha,deviceVecA) & + & result(val) bind(c,name='scalMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + complex(c_float_complex), value :: alpha + type(c_ptr), value :: deviceVecA + end function scalMultiVecDeviceFloatComplex + end interface + + interface dotMultiVecDevice + function dotMultiVecDeviceFloatComplex(res, n,deviceVecA,deviceVecB) & + & result(val) bind(c,name='dotMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + complex(c_float_complex) :: res + type(c_ptr), value :: deviceVecA, deviceVecB + end function dotMultiVecDeviceFloatComplex + end interface + + interface nrm2MultiVecDeviceComplex + function nrm2MultiVecDeviceFloatComplex(res,n,deviceVecA) & + & result(val) bind(c,name='nrm2MultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_float) :: res + type(c_ptr), value :: deviceVecA + end function nrm2MultiVecDeviceFloatComplex + end interface + + interface amaxMultiVecDeviceComplex + function amaxMultiVecDeviceFloatComplex(res,n,deviceVecA) & + & result(val) bind(c,name='amaxMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_float) :: res + type(c_ptr), value :: deviceVecA + end function amaxMultiVecDeviceFloatComplex + end interface + + interface asumMultiVecDeviceComplex + function asumMultiVecDeviceFloatComplex(res,n,deviceVecA) & + & result(val) bind(c,name='asumMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_float) :: res + type(c_ptr), value :: deviceVecA + end function asumMultiVecDeviceFloatComplex + end interface + + + interface axpbyMultiVecDevice + function axpbyMultiVecDeviceFloatComplex(n,alpha,deviceVecA,beta,deviceVecB) & + & result(res) bind(c,name='axpbyMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + complex(c_float_complex), value :: alpha, beta + type(c_ptr), value :: deviceVecA, deviceVecB + end function axpbyMultiVecDeviceFloatComplex + end interface + + interface axyMultiVecDevice + function axyMultiVecDeviceFloatComplex(n,alpha,deviceVecA,deviceVecB) & + & result(res) bind(c,name='axyMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + complex(c_float_complex), value :: alpha + type(c_ptr), value :: deviceVecA, deviceVecB + end function axyMultiVecDeviceFloatComplex + end interface + + interface axybzMultiVecDevice + function axybzMultiVecDeviceFloatComplex(n,alpha,deviceVecA,deviceVecB,beta,deviceVecZ) & + & result(res) bind(c,name='axybzMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + complex(c_float_complex), value :: alpha, beta + type(c_ptr), value :: deviceVecA, deviceVecB,deviceVecZ + end function axybzMultiVecDeviceFloatComplex + end interface + + + interface absMultiVecDevice + function absMultiVecDeviceFloatComplex(n,alpha,deviceVecA) & + & result(res) bind(c,name='absMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + complex(c_float_complex), value :: alpha + type(c_ptr), value :: deviceVecA + end function absMultiVecDeviceFloatComplex + function absMultiVecDeviceFloatComplex2(n,alpha,deviceVecA,deviceVecB) & + & result(res) bind(c,name='absMultiVecDeviceFloatComplex2') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + complex(c_float_complex), value :: alpha + type(c_ptr), value :: deviceVecA, deviceVecB + end function absMultiVecDeviceFloatComplex2 + end interface + + interface inner_register + module procedure inner_registerFloatComplex + end interface + + interface inner_unregister + module procedure inner_unregisterFloatComplex + end interface + +contains + + + function inner_registerFloatComplex(buffer,dval) result(res) + complex(c_float_complex), allocatable, target :: buffer(:) + type(c_ptr) :: dval + integer(c_int) :: res + complex(c_float_complex) :: dummy + res = registerMapped(c_loc(buffer),dval,size(buffer), dummy) + end function inner_registerFloatComplex + + subroutine inner_unregisterFloatComplex(buffer) + complex(c_float_complex), allocatable, target :: buffer(:) + + call unregisterMapped(c_loc(buffer)) + end subroutine inner_unregisterFloatComplex + +#endif + +end module psb_c_vectordev_mod diff --git a/gpu/psb_d_csrg_mat_mod.F90 b/gpu/psb_d_csrg_mat_mod.F90 new file mode 100644 index 00000000..177c7440 --- /dev/null +++ b/gpu/psb_d_csrg_mat_mod.F90 @@ -0,0 +1,393 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_csrg_mat_mod + + use iso_c_binding + use psb_d_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_csr_sparse_mat) :: psb_d_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(d_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => d_csrg_get_fmt + procedure, pass(a) :: sizeof => d_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_d_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_csrg_csmm + procedure, pass(a) :: csmv => psb_d_csrg_csmv + procedure, pass(a) :: scals => psb_d_csrg_scals + procedure, pass(a) :: scalv => psb_d_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_d_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_mv_csrg_from_fmt + procedure, pass(a) :: free => d_csrg_free + procedure, pass(a) :: mold => psb_d_csrg_mold + procedure, pass(a) :: is_host => d_csrg_is_host + procedure, pass(a) :: is_dev => d_csrg_is_dev + procedure, pass(a) :: is_sync => d_csrg_is_sync + procedure, pass(a) :: set_host => d_csrg_set_host + procedure, pass(a) :: set_dev => d_csrg_set_dev + procedure, pass(a) :: set_sync => d_csrg_set_sync + procedure, pass(a) :: sync => d_csrg_sync + procedure, pass(a) :: to_gpu => psb_d_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_d_csrg_from_gpu + final :: d_csrg_finalize +#else + contains + procedure, pass(a) :: mold => psb_d_csrg_mold +#endif + end type psb_d_csrg_sparse_mat + +#ifdef HAVE_SPGPU + private :: d_csrg_get_nzeros, d_csrg_free, d_csrg_get_fmt, & + & d_csrg_get_size, d_csrg_sizeof, d_csrg_get_nz_row + + + interface + subroutine psb_d_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_d_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_csrg_vect_mv + end interface + + interface + subroutine psb_d_csrg_reallocate_nz(nz,a) + import :: psb_d_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_d_csrg_reallocate_nz + end interface + + interface + subroutine psb_d_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_d_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_csrg_allocate_mnnz + end interface + + interface + subroutine psb_d_csrg_mold(a,b,info) + import :: psb_d_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_csrg_mold + end interface + + interface + subroutine psb_d_csrg_to_gpu(a,info, nzrm) + import :: psb_d_csrg_sparse_mat, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_d_csrg_to_gpu + end interface + + interface + subroutine psb_d_csrg_from_gpu(a,info) + import :: psb_d_csrg_sparse_mat, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_csrg_from_gpu + end interface + + interface + subroutine psb_d_cp_csrg_from_coo(a,b,info) + import :: psb_d_csrg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_csrg_from_coo + end interface + + interface + subroutine psb_d_cp_csrg_from_fmt(a,b,info) + import :: psb_d_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_csrg_from_fmt + end interface + + interface + subroutine psb_d_mv_csrg_from_coo(a,b,info) + import :: psb_d_csrg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_csrg_from_coo + end interface + + interface + subroutine psb_d_mv_csrg_from_fmt(a,b,info) + import :: psb_d_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_csrg_from_fmt + end interface + + interface + subroutine psb_d_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_csrg_csmv + end interface + interface + subroutine psb_d_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_csrg_csmm + end interface + + interface + subroutine psb_d_csrg_scal(d,a,info,side) + import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_csrg_scal + end interface + + interface + subroutine psb_d_csrg_scals(d,a,info) + import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_csrg_sizeof(a) result(res) + implicit none + class(psb_d_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function d_csrg_sizeof + + function d_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function d_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine d_csrg_set_host(a) + implicit none + class(psb_d_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_csrg_set_host + + subroutine d_csrg_set_dev(a) + implicit none + class(psb_d_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_csrg_set_dev + + subroutine d_csrg_set_sync(a) + implicit none + class(psb_d_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_csrg_set_sync + + function d_csrg_is_dev(a) result(res) + implicit none + class(psb_d_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_csrg_is_dev + + function d_csrg_is_host(a) result(res) + implicit none + class(psb_d_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_csrg_is_host + + function d_csrg_is_sync(a) result(res) + implicit none + class(psb_d_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_csrg_is_sync + + + subroutine d_csrg_sync(a) + implicit none + class(psb_d_csrg_sparse_mat), target, intent(in) :: a + class(psb_d_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine d_csrg_sync + + subroutine d_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_d_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_d_csr_sparse_mat%free() + + return + + end subroutine d_csrg_free + + subroutine d_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_d_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine d_csrg_finalize + +#else + interface + subroutine psb_d_csrg_mold(a,b,info) + import :: psb_d_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_csrg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_csrg_mold + end interface + +#endif + +end module psb_d_csrg_mat_mod diff --git a/gpu/psb_d_diag_mat_mod.F90 b/gpu/psb_d_diag_mat_mod.F90 new file mode 100644 index 00000000..564f7a13 --- /dev/null +++ b/gpu/psb_d_diag_mat_mod.F90 @@ -0,0 +1,308 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_diag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_d_dia_mat_mod + + type, extends(psb_d_dia_sparse_mat) :: psb_d_diag_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => d_diag_get_fmt + procedure, pass(a) :: sizeof => d_diag_sizeof + procedure, pass(a) :: vect_mv => psb_d_diag_vect_mv +! procedure, pass(a) :: csmm => psb_d_diag_csmm + procedure, pass(a) :: csmv => psb_d_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_d_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_d_diag_scals +! procedure, pass(a) :: scalv => psb_d_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_d_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_d_diag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_d_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_d_mv_diag_from_fmt + procedure, pass(a) :: free => d_diag_free + procedure, pass(a) :: mold => psb_d_diag_mold + procedure, pass(a) :: to_gpu => psb_d_diag_to_gpu + final :: d_diag_finalize +#else + contains + procedure, pass(a) :: mold => psb_d_diag_mold +#endif + end type psb_d_diag_sparse_mat + +#ifdef HAVE_SPGPU + private :: d_diag_get_nzeros, d_diag_free, d_diag_get_fmt, & + & d_diag_get_size, d_diag_sizeof, d_diag_get_nz_row + + + interface + subroutine psb_d_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_diag_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_diag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_diag_vect_mv + end interface + + interface + subroutine psb_d_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_d_diag_sparse_mat, psb_dpk_, psb_d_base_vect_type + class(psb_d_diag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_diag_inner_vect_sv + end interface + + interface + subroutine psb_d_diag_reallocate_nz(nz,a) + import :: psb_d_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_diag_sparse_mat), intent(inout) :: a + end subroutine psb_d_diag_reallocate_nz + end interface + + interface + subroutine psb_d_diag_allocate_mnnz(m,n,a,nz) + import :: psb_d_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_diag_allocate_mnnz + end interface + + interface + subroutine psb_d_diag_mold(a,b,info) + import :: psb_d_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_diag_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_diag_mold + end interface + + interface + subroutine psb_d_diag_to_gpu(a,info, nzrm) + import :: psb_d_diag_sparse_mat, psb_ipk_ + class(psb_d_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_d_diag_to_gpu + end interface + + interface + subroutine psb_d_cp_diag_from_coo(a,b,info) + import :: psb_d_diag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_diag_from_coo + end interface + + interface + subroutine psb_d_cp_diag_from_fmt(a,b,info) + import :: psb_d_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_diag_from_fmt + end interface + + interface + subroutine psb_d_mv_diag_from_coo(a,b,info) + import :: psb_d_diag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_diag_from_coo + end interface + + + interface + subroutine psb_d_mv_diag_from_fmt(a,b,info) + import :: psb_d_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_diag_from_fmt + end interface + + interface + subroutine psb_d_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_diag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_diag_csmv + end interface + interface + subroutine psb_d_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_diag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_diag_csmm + end interface + + interface + subroutine psb_d_diag_scal(d,a,info, side) + import :: psb_d_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_diag_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_diag_scal + end interface + + interface + subroutine psb_d_diag_scals(d,a,info) + import :: psb_d_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_diag_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_diag_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_diag_sizeof(a) result(res) + implicit none + class(psb_d_diag_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = 8 + res = res + psb_sizeof_dp * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function d_diag_sizeof + + function d_diag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIAG' + end function d_diag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_diag_free(a) + use diagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_d_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_d_dia_sparse_mat%free() + + return + + end subroutine d_diag_free + + subroutine d_diag_finalize(a) + use diagdev_mod + implicit none + type(psb_d_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine d_diag_finalize + +#else + + interface + subroutine psb_d_diag_mold(a,b,info) + import :: psb_d_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_diag_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_diag_mold + end interface + +#endif + +end module psb_d_diag_mat_mod diff --git a/gpu/psb_d_dnsg_mat_mod.F90 b/gpu/psb_d_dnsg_mat_mod.F90 new file mode 100644 index 00000000..966c2311 --- /dev/null +++ b/gpu/psb_d_dnsg_mat_mod.F90 @@ -0,0 +1,294 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_dnsg_mat_mod + + use iso_c_binding + use psb_d_mat_mod + use psb_d_dns_mat_mod + use dnsdev_mod + + type, extends(psb_d_dns_sparse_mat) :: psb_d_dnsg_sparse_mat + ! + ! ITPACK/DNS format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of DNS, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => d_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => d_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_d_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_d_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_d_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_d_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_d_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_d_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_d_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_d_dnsg_allocate_mnnz + ! Note: we *do* need the TO methods, because of the need to invoke SYNC + ! + procedure, pass(a) :: cp_from_coo => psb_d_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_mv_dnsg_from_fmt + procedure, pass(a) :: free => d_dnsg_free + procedure, pass(a) :: mold => psb_d_dnsg_mold + procedure, pass(a) :: to_gpu => psb_d_dnsg_to_gpu + final :: d_dnsg_finalize +#else + contains + procedure, pass(a) :: mold => psb_d_dnsg_mold +#endif + end type psb_d_dnsg_sparse_mat + +#ifdef HAVE_SPGPU + private :: d_dnsg_get_nzeros, d_dnsg_free, d_dnsg_get_fmt, & + & d_dnsg_get_size, d_dnsg_get_nz_row + + + interface + subroutine psb_d_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_dnsg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dnsg_vect_mv + end interface +!!$ +!!$ interface +!!$ subroutine psb_d_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_d_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type +!!$ class(psb_d_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_d_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_d_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_d_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_d_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_d_dnsg_allocate_mnnz +!!$ end interface + + interface + subroutine psb_d_dnsg_mold(a,b,info) + import :: psb_d_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dnsg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dnsg_mold + end interface + + interface + subroutine psb_d_dnsg_to_gpu(a,info) + import :: psb_d_dnsg_sparse_mat, psb_ipk_ + class(psb_d_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dnsg_to_gpu + end interface + + interface + subroutine psb_d_cp_dnsg_from_coo(a,b,info) + import :: psb_d_dnsg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dnsg_from_coo + end interface + + interface + subroutine psb_d_cp_dnsg_from_fmt(a,b,info) + import :: psb_d_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dnsg_from_fmt + end interface + + interface + subroutine psb_d_mv_dnsg_from_coo(a,b,info) + import :: psb_d_dnsg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dnsg_from_coo + end interface + + + interface + subroutine psb_d_mv_dnsg_from_fmt(a,b,info) + import :: psb_d_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dnsg_from_fmt + end interface + +!!$ interface +!!$ subroutine psb_d_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:) +!!$ real(psb_dpk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_dnsg_csmv +!!$ end interface +!!$ interface +!!$ subroutine psb_d_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_dnsg_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_dnsg_scal(d,a,info, side) +!!$ import :: psb_d_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_d_dnsg_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_dnsg_scals(d,a,info) +!!$ import :: psb_d_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_dnsg_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + + function d_dnsg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNSG' + end function d_dnsg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_dnsg_free(a) + use dnsdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_d_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_d_dns_sparse_mat%free() + + return + + end subroutine d_dnsg_free + + subroutine d_dnsg_finalize(a) + use dnsdev_mod + implicit none + type(psb_d_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine d_dnsg_finalize + +#else + + interface + subroutine psb_d_dnsg_mold(a,b,info) + import :: psb_d_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dnsg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dnsg_mold + end interface + +#endif + +end module psb_d_dnsg_mat_mod diff --git a/gpu/psb_d_elg_mat_mod.F90 b/gpu/psb_d_elg_mat_mod.F90 new file mode 100644 index 00000000..eac7bb36 --- /dev/null +++ b/gpu/psb_d_elg_mat_mod.F90 @@ -0,0 +1,483 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_elg_mat_mod + + use iso_c_binding + use psb_d_mat_mod + use psb_d_ell_mat_mod + use psb_i_gpu_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_ell_sparse_mat) :: psb_d_elg_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of ELL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => d_elg_get_fmt + procedure, pass(a) :: sizeof => d_elg_sizeof + procedure, pass(a) :: vect_mv => psb_d_elg_vect_mv + procedure, pass(a) :: csmm => psb_d_elg_csmm + procedure, pass(a) :: csmv => psb_d_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_d_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_d_elg_scals + procedure, pass(a) :: scalv => psb_d_elg_scal + procedure, pass(a) :: reallocate_nz => psb_d_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_elg_allocate_mnnz + procedure, pass(a) :: reinit => d_elg_reinit + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_mv_elg_from_fmt + procedure, pass(a) :: free => d_elg_free + procedure, pass(a) :: mold => psb_d_elg_mold + procedure, pass(a) :: csput_a => psb_d_elg_csput_a + procedure, pass(a) :: csput_v => psb_d_elg_csput_v + procedure, pass(a) :: is_host => d_elg_is_host + procedure, pass(a) :: is_dev => d_elg_is_dev + procedure, pass(a) :: is_sync => d_elg_is_sync + procedure, pass(a) :: set_host => d_elg_set_host + procedure, pass(a) :: set_dev => d_elg_set_dev + procedure, pass(a) :: set_sync => d_elg_set_sync + procedure, pass(a) :: sync => d_elg_sync + procedure, pass(a) :: from_gpu => psb_d_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_d_elg_to_gpu + procedure, pass(a) :: asb => psb_d_elg_asb + final :: d_elg_finalize +#else + contains + procedure, pass(a) :: mold => psb_d_elg_mold + procedure, pass(a) :: asb => psb_d_elg_asb +#endif + end type psb_d_elg_sparse_mat + +#ifdef HAVE_SPGPU + private :: d_elg_get_nzeros, d_elg_free, d_elg_get_fmt, & + & d_elg_get_size, d_elg_sizeof, d_elg_get_nz_row, d_elg_sync + + + interface + subroutine psb_d_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_elg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_elg_vect_mv + end interface + + interface + subroutine psb_d_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_d_elg_sparse_mat, psb_dpk_, psb_d_base_vect_type + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_elg_inner_vect_sv + end interface + + interface + subroutine psb_d_elg_reallocate_nz(nz,a) + import :: psb_d_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_elg_sparse_mat), intent(inout) :: a + end subroutine psb_d_elg_reallocate_nz + end interface + + interface + subroutine psb_d_elg_allocate_mnnz(m,n,a,nz) + import :: psb_d_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_elg_allocate_mnnz + end interface + + interface + subroutine psb_d_elg_mold(a,b,info) + import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_elg_mold + end interface + + interface + subroutine psb_d_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_elg_csput_a + end interface + + interface + subroutine psb_d_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_d_base_vect_type,& + & psb_i_base_vect_type + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_elg_csput_v + end interface + + interface + subroutine psb_d_elg_from_gpu(a,info) + import :: psb_d_elg_sparse_mat, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_elg_from_gpu + end interface + + interface + subroutine psb_d_elg_to_gpu(a,info, nzrm) + import :: psb_d_elg_sparse_mat, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_d_elg_to_gpu + end interface + + interface + subroutine psb_d_cp_elg_from_coo(a,b,info) + import :: psb_d_elg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_elg_from_coo + end interface + + interface + subroutine psb_d_cp_elg_from_fmt(a,b,info) + import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_elg_from_fmt + end interface + + interface + subroutine psb_d_mv_elg_from_coo(a,b,info) + import :: psb_d_elg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_elg_from_coo + end interface + + + interface + subroutine psb_d_mv_elg_from_fmt(a,b,info) + import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_elg_from_fmt + end interface + + interface + subroutine psb_d_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_elg_csmv + end interface + interface + subroutine psb_d_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_elg_csmm + end interface + + interface + subroutine psb_d_elg_scal(d,a,info, side) + import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_elg_scal + end interface + + interface + subroutine psb_d_elg_scals(d,a,info) + import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_elg_scals + end interface + + interface + subroutine psb_d_elg_asb(a) + import :: psb_d_elg_sparse_mat + class(psb_d_elg_sparse_mat), intent(inout) :: a + end subroutine psb_d_elg_asb + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_elg_sizeof(a) result(res) + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function d_elg_sizeof + + function d_elg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELG' + end function d_elg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + subroutine d_elg_reinit(a,clear) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_d_elg_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + integer(psb_ipk_) :: isz, err_act + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev().or.a%is_sync()) then + if (clear_) call zeroEllDevice(a%deviceMat) + call a%set_dev() + else if (a%is_host()) then + a%val(:,:) = dzero + end if + call a%set_upd() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine d_elg_reinit + + subroutine d_elg_free(a) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_d_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_d_ell_sparse_mat%free() + call a%set_sync() + + return + + end subroutine d_elg_free + + subroutine d_elg_sync(a) + implicit none + class(psb_d_elg_sparse_mat), target, intent(in) :: a + class(psb_d_elg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine d_elg_sync + + subroutine d_elg_set_host(a) + implicit none + class(psb_d_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_elg_set_host + + subroutine d_elg_set_dev(a) + implicit none + class(psb_d_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_elg_set_dev + + subroutine d_elg_set_sync(a) + implicit none + class(psb_d_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_elg_set_sync + + function d_elg_is_dev(a) result(res) + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_elg_is_dev + + function d_elg_is_host(a) result(res) + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_elg_is_host + + function d_elg_is_sync(a) result(res) + implicit none + class(psb_d_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_elg_is_sync + + subroutine d_elg_finalize(a) + use elldev_mod + implicit none + type(psb_d_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + return + + end subroutine d_elg_finalize + +#else + + interface + subroutine psb_d_elg_asb(a) + import :: psb_d_elg_sparse_mat + class(psb_d_elg_sparse_mat), intent(inout) :: a + end subroutine psb_d_elg_asb + end interface + + interface + subroutine psb_d_elg_mold(a,b,info) + import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_elg_mold + end interface + +#endif + +end module psb_d_elg_mat_mod diff --git a/gpu/psb_d_gpu_vect_mod.F90 b/gpu/psb_d_gpu_vect_mod.F90 new file mode 100644 index 00000000..cd3757c3 --- /dev/null +++ b/gpu/psb_d_gpu_vect_mod.F90 @@ -0,0 +1,1989 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_gpu_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_d_vect_mod + use psb_i_vect_mod +#ifdef HAVE_SPGPU + use psb_gpu_env_mod + use psb_i_gpu_vect_mod + use psb_i_vectordev_mod + use psb_d_vectordev_mod +#endif + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_base_vect_type) :: psb_d_vect_gpu +#ifdef HAVE_SPGPU + integer :: state = is_host + type(c_ptr) :: deviceVect = c_null_ptr + real(c_double), allocatable :: pinned_buffer(:) + type(c_ptr) :: dt_p_buf = c_null_ptr + real(c_double), allocatable :: buffer(:) + type(c_ptr) :: dt_buf = c_null_ptr + integer :: dt_buf_sz = 0 + type(c_ptr) :: i_buf = c_null_ptr + integer :: i_buf_sz = 0 + contains + procedure, pass(x) :: get_nrows => d_gpu_get_nrows + procedure, nopass :: get_fmt => d_gpu_get_fmt + + procedure, pass(x) :: all => d_gpu_all + procedure, pass(x) :: zero => d_gpu_zero + procedure, pass(x) :: asb_m => d_gpu_asb_m + procedure, pass(x) :: sync => d_gpu_sync + procedure, pass(x) :: sync_space => d_gpu_sync_space + procedure, pass(x) :: bld_x => d_gpu_bld_x + procedure, pass(x) :: bld_mn => d_gpu_bld_mn + procedure, pass(x) :: free => d_gpu_free + procedure, pass(x) :: ins_a => d_gpu_ins_a + procedure, pass(x) :: ins_v => d_gpu_ins_v + procedure, pass(x) :: is_host => d_gpu_is_host + procedure, pass(x) :: is_dev => d_gpu_is_dev + procedure, pass(x) :: is_sync => d_gpu_is_sync + procedure, pass(x) :: set_host => d_gpu_set_host + procedure, pass(x) :: set_dev => d_gpu_set_dev + procedure, pass(x) :: set_sync => d_gpu_set_sync + procedure, pass(x) :: set_scal => d_gpu_set_scal +!!$ procedure, pass(x) :: set_vect => d_gpu_set_vect + procedure, pass(x) :: gthzv_x => d_gpu_gthzv_x + procedure, pass(y) :: sctb => d_gpu_sctb + procedure, pass(y) :: sctb_x => d_gpu_sctb_x + procedure, pass(x) :: gthzbuf => d_gpu_gthzbuf + procedure, pass(y) :: sctb_buf => d_gpu_sctb_buf + procedure, pass(x) :: new_buffer => d_gpu_new_buffer + procedure, nopass :: device_wait => d_gpu_device_wait + procedure, pass(x) :: free_buffer => d_gpu_free_buffer + procedure, pass(x) :: maybe_free_buffer => d_gpu_maybe_free_buffer + procedure, pass(x) :: dot_v => d_gpu_dot_v + procedure, pass(x) :: dot_a => d_gpu_dot_a + procedure, pass(y) :: axpby_v => d_gpu_axpby_v + procedure, pass(y) :: axpby_a => d_gpu_axpby_a + procedure, pass(y) :: mlt_v => d_gpu_mlt_v + procedure, pass(y) :: mlt_a => d_gpu_mlt_a + procedure, pass(z) :: mlt_a_2 => d_gpu_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => d_gpu_mlt_v_2 + procedure, pass(x) :: scal => d_gpu_scal + procedure, pass(x) :: nrm2 => d_gpu_nrm2 + procedure, pass(x) :: amax => d_gpu_amax + procedure, pass(x) :: asum => d_gpu_asum + procedure, pass(x) :: absval1 => d_gpu_absval1 + procedure, pass(x) :: absval2 => d_gpu_absval2 + + final :: d_gpu_vect_finalize +#endif + end type psb_d_vect_gpu + + public :: psb_d_vect_gpu_ + private :: constructor + interface psb_d_vect_gpu_ + module procedure constructor + end interface psb_d_vect_gpu_ + +contains + + function constructor(x) result(this) + real(psb_dpk_) :: x(:) + type(psb_d_vect_gpu) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x),info) + + end function constructor + +#ifdef HAVE_SPGPU + + subroutine d_gpu_device_wait() + call psb_cudaSync() + end subroutine d_gpu_device_wait + + subroutine d_gpu_new_buffer(n,x,info) + use psb_realloc_mod + use psb_gpu_env_mod + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + + if (psb_gpu_DeviceHasUVA()) then + if (allocated(x%combuf)) then + if (size(x%combuf) idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_gpu_DeviceHasUVA()) then + ! + ! Only need a sync in this branch; in the others + ! cudamemCpy acts as a sync point. + ! + if (allocated(x%pinned_buffer)) then + if (size(x%pinned_buffer) < n) then + call inner_unregister(x%pinned_buffer) + deallocate(x%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(x%pinned_buffer)) then + allocate(x%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(x%pinned_buffer,x%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + info = igathMultiVecDeviceDoubleVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_p_buf, 1) + call psb_cudaSync() + y(1:n) = x%pinned_buffer(1:n) + + else + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeDouble(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateDouble(x%dt_buf,n) + x%dt_buf_sz=n + end if + if (info == 0) & + & info = igathMultiVecDeviceDoubleVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_buf, 1) + if (info == 0) & + & info = readDouble(x%dt_buf,y,n) + + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + + if (x%i_buf_sz < ni) then + if (c_associated(x%i_buf)) then + call freeInt(x%i_buf) + x%i_buf = c_null_ptr + end if + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeDouble(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateDouble(x%dt_buf,n) + x%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(x%i_buf,ii%v,ni) + if (info == 0) & + & info = igathMultiVecDeviceDouble(x%deviceVect,& + & 0, n, i, x%i_buf, 1, x%dt_buf, 1) + if (info == 0) & + & info = readDouble(x%dt_buf,y,n) + + end select + + end subroutine d_gpu_gthzv_x + + subroutine d_gpu_gthzbuf(i,n,idx,x) + use psb_gpu_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i,n + class(psb_i_base_vect_type) :: idx + class(psb_d_vect_gpu) :: x + integer :: info, ni + + info = 0 +!!$ write(0,*) 'Starting gth_zbuf' + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'gthzbuf') + return + end if + + select type(ii=> idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_gpu_DeviceHasUVA()) then + info = igathMultiVecDeviceDoubleVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) + + else + info = igathMultiVecDeviceDoubleVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_buf, 1) + if (info == 0) & + & info = readDouble(i,x%dt_buf,x%combuf(i:),n,1) + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + info = 0 + if (.not.c_associated(x%i_buf)) then + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,x%i_buf,ii%v(i:),n,1) + + if (info == 0) & + & info = igathMultiVecDeviceDouble(x%deviceVect,& + & 0, n, i, x%i_buf, i,x%dt_buf, 1) + + if (info == 0) & + & info = readDouble(i,x%dt_buf,x%combuf(i:),n,1) + + end select + + end subroutine d_gpu_gthzbuf + + subroutine d_gpu_sctb(n,idx,x,beta,y) + implicit none + !use psb_const_mod + integer(psb_ipk_) :: n, idx(:) + real(psb_dpk_) :: beta, x(:) + class(psb_d_vect_gpu) :: y + integer(psb_ipk_) :: info + + if (n == 0) return + + if (y%is_dev()) call y%sync() + + call y%psb_d_base_vect_type%sctb(n,idx,x,beta) + call y%set_host() + + end subroutine d_gpu_sctb + + subroutine d_gpu_sctb_x(i,n,idx,x,beta,y) + use psb_gpu_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + real(psb_dpk_) :: beta, x(:) + class(psb_d_vect_gpu) :: y + integer :: info, ni + + select type(ii=> idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + + ! + if (psb_gpu_DeviceHasUVA()) then + if (allocated(y%pinned_buffer)) then + if (size(y%pinned_buffer) < n) then + call inner_unregister(y%pinned_buffer) + deallocate(y%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(y%pinned_buffer)) then + allocate(y%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(y%pinned_buffer,y%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + y%pinned_buffer(1:n) = x(1:n) + info = iscatMultiVecDeviceDoubleVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_p_buf, 1,beta) + else + + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeDouble(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateDouble(y%dt_buf,n) + y%dt_buf_sz=n + end if + info = writeDouble(y%dt_buf,x,n) + info = iscatMultiVecDeviceDoubleVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_buf, 1,beta) + + end if + + class default + ni = size(ii%v) + + if (y%i_buf_sz < ni) then + if (c_associated(y%i_buf)) then + call freeInt(y%i_buf) + y%i_buf = c_null_ptr + end if + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeDouble(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateDouble(y%dt_buf,n) + y%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(y%i_buf,ii%v(i:i+n-1),n) + info = writeDouble(y%dt_buf,x,n) + info = iscatMultiVecDeviceDouble(y%deviceVect,& + & 0, n, 1, y%i_buf, 1, y%dt_buf, 1,beta) + + + end select + ! + ! Need a sync here to make sure we are not reallocating + ! the buffers before iscatMulti has finished. + ! + call psb_cudaSync() + call y%set_dev() + + end subroutine d_gpu_sctb_x + + subroutine d_gpu_sctb_buf(i,n,idx,beta,y) + use psi_serial_mod + use psb_gpu_env_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + real(psb_dpk_) :: beta + class(psb_d_vect_gpu) :: y + integer(psb_ipk_) :: info, ni + +!!$ write(0,*) 'Starting sctb_buf' + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'sctb_buf') + return + end if + + + select type(ii=> idx) + class is (psb_i_vect_gpu) + + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (psb_gpu_DeviceHasUVA()) then + info = iscatMultiVecDeviceDoubleVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) + else + info = writeDouble(i,y%dt_buf,y%combuf(i:),n,1) + info = iscatMultiVecDeviceDoubleVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_buf, 1,beta) + + end if + + class default + !call y%sct(n,ii%v(i:),x,beta) + ni = size(ii%v) + info = 0 + if (.not.c_associated(y%i_buf)) then + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,y%i_buf,ii%v(i:),n,1) + if (info == 0) & + & info = writeDouble(i,y%dt_buf,y%combuf(i:),n,1) + if (info == 0) info = iscatMultiVecDeviceDouble(y%deviceVect,& + & 0, n, i, y%i_buf, i, y%dt_buf, 1,beta) + end select +!!$ write(0,*) 'Done sctb_buf' + + end subroutine d_gpu_sctb_buf + + + subroutine d_gpu_bld_x(x,this) + use psb_base_mod + real(psb_dpk_), intent(in) :: this(:) + class(psb_d_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this),x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'d_gpu_bld_x',& + & i_err=(/size(this),izero,izero,izero,izero/)) + end if + x%v(:) = this(:) + call x%set_host() + call x%sync() + + end subroutine d_gpu_bld_x + + subroutine d_gpu_bld_mn(x,n) + integer(psb_mpk_), intent(in) :: n + class(psb_d_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n,info) + if (info /= 0) then + call psb_errpush(info,'d_gpu_bld_n',i_err=(/n,n,n,n,n/)) + end if + + end subroutine d_gpu_bld_mn + + subroutine d_gpu_set_host(x) + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + + x%state = is_host + end subroutine d_gpu_set_host + + subroutine d_gpu_set_dev(x) + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + + x%state = is_dev + end subroutine d_gpu_set_dev + + subroutine d_gpu_set_sync(x) + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + + x%state = is_sync + end subroutine d_gpu_set_sync + + function d_gpu_is_dev(x) result(res) + implicit none + class(psb_d_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function d_gpu_is_dev + + function d_gpu_is_host(x) result(res) + implicit none + class(psb_d_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function d_gpu_is_host + + function d_gpu_is_sync(x) result(res) + implicit none + class(psb_d_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function d_gpu_is_sync + + + function d_gpu_get_nrows(x) result(res) + implicit none + class(psb_d_vect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = 0 + if (allocated(x%v)) res = size(x%v) + end function d_gpu_get_nrows + + function d_gpu_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'dGPU' + end function d_gpu_get_fmt + + subroutine d_gpu_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_d_vect_gpu), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n,x%v,info) + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'d_gpu_all',& + & i_err=(/n,n,n,n,n/)) + end if + end subroutine d_gpu_all + + subroutine d_gpu_zero(x) + use psi_serial_mod + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + + if (allocated(x%v)) x%v=dzero + call x%set_host() + end subroutine d_gpu_zero + + subroutine d_gpu_asb_m(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_d_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + if (x%is_dev()) then + nd = getMultiVecDeviceSize(x%deviceVect) + if (nd < n) then + call x%sync() + call x%psb_d_base_vect_type%asb(n,info) + if (info == psb_success_) call x%sync_space(info) + call x%set_host() + end if + else ! + if (x%get_nrows() size(x%v)).or.(n > x%get_nrows())) then +!!$ write(0,*) 'Incoherent situation : sizes',n,size(x%v),x%get_nrows() + call psb_realloc(n,x%v,info) + end if + info = readMultiVecDevice(x%deviceVect,x%v) + end if + if (info == 0) call x%set_sync() + if (info /= 0) then + info=psb_err_internal_error_ + call psb_errpush(info,'d_gpu_sync') + end if + + end subroutine d_gpu_sync + + subroutine d_gpu_free(x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) deallocate(x%v, stat=info) + if (c_associated(x%deviceVect)) then +!!$ write(0,*)'d_gpu_free Calling freeMultiVecDevice' + call freeMultiVecDevice(x%deviceVect) + x%deviceVect=c_null_ptr + end if + call x%free_buffer(info) + call x%set_sync() + end subroutine d_gpu_free + + subroutine d_gpu_set_scal(x,val,first,last) + class(psb_d_vect_gpu), intent(inout) :: x + real(psb_dpk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: info, first_, last_ + + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1,first) + if (present(last)) last_ = min(last,last_) + + if (x%is_host()) call x%sync() + info = setScalDevice(val,first_,last_,1,x%deviceVect) + call x%set_dev() + + end subroutine d_gpu_set_scal +!!$ +!!$ subroutine d_gpu_set_vect(x,val) +!!$ class(psb_d_vect_gpu), intent(inout) :: x +!!$ real(psb_dpk_), intent(in) :: val(:) +!!$ integer(psb_ipk_) :: nr +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_d_base_vect_type%set_vect(val) +!!$ call x%set_host() +!!$ +!!$ end subroutine d_gpu_set_vect + + + + function d_gpu_dot_v(n,x,y) result(res) + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + real(psb_dpk_), external :: ddot + integer(psb_ipk_) :: info + + res = dzero + ! + ! Note: this is the gpu implementation. + ! When we get here, we are sure that X is of + ! TYPE psb_d_vect + ! + select type(yy => y) + type is (psb_d_base_vect_type) + if (x%is_dev()) call x%sync() + res = ddot(n,x%v,1,yy%v,1) + type is (psb_d_vect_gpu) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) + if (info /= 0) then + info = psb_err_internal_error_ + call psb_errpush(info,'d_gpu_dot_v') + end if + + class default + ! y%sync is done in dot_a + call x%sync() + res = y%dot(n,x%v) + end select + + end function d_gpu_dot_v + + function d_gpu_dot_a(n,x,y) result(res) + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + real(psb_dpk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + real(psb_dpk_), external :: ddot + + if (x%is_dev()) call x%sync() + res = ddot(n,y,1,x%v,1) + + end function d_gpu_dot_a + + subroutine d_gpu_axpby_v(m,alpha, x, beta, y, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_vect_gpu), intent(inout) :: y + real(psb_dpk_), intent (in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny + + info = psb_success_ + + select type(xx => x) + type is (psb_d_vect_gpu) + ! Do something different here + if ((beta /= dzero).and.y%is_host())& + & call y%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(y%deviceVect) + if ((nx x) + type is (psb_d_base_vect_type) + if (y%is_dev()) call y%sync() + do i=1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + type is (psb_d_vect_gpu) + ! Do something different here + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + info = axyMultiVecDevice(n,done,xx%deviceVect,y%deviceVect) + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + call y%mlt(xx%v,info) + call y%set_host() + end select + + end subroutine d_gpu_mlt_v + + subroutine d_gpu_mlt_a(x, y, info) + use psi_serial_mod + implicit none + real(psb_dpk_), intent(in) :: x(:) + class(psb_d_vect_gpu), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync() + call y%psb_d_base_vect_type%mlt(x,info) + ! set_host() is invoked in the base method + end subroutine d_gpu_mlt_a + + subroutine d_gpu_mlt_a_2(alpha,x,y,beta,z,info) + use psi_serial_mod + implicit none + real(psb_dpk_), intent(in) :: alpha,beta + real(psb_dpk_), intent(in) :: x(:) + real(psb_dpk_), intent(in) :: y(:) + class(psb_d_vect_gpu), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync() + call z%psb_d_base_vect_type%mlt(alpha,x,y,beta,info) + ! set_host() is invoked in the base method + end subroutine d_gpu_mlt_a_2 + + subroutine d_gpu_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + use psi_serial_mod + use psb_string_mod + implicit none + real(psb_dpk_), intent(in) :: alpha,beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_vect_gpu), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_ + + if (.false.) then + ! These are present just for coherence with the + ! complex versions; they do nothing here. + conjgx_=.false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') + conjgy_=.false. + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') + end if + + n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) + + ! + ! Need to reconsider BETA in the GPU side + ! of things. + ! + info = 0 + select type(xx => x) + type is (psb_d_vect_gpu) + select type (yy => y) + type is (psb_d_vect_gpu) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= dzero).and.(z%is_host())) call z%sync() + info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& + & yy%deviceVect,beta,z%deviceVect) + call z%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (yy%is_dev()) call yy%sync() + if ((beta /= dzero).and.(z%is_dev())) call z%sync() + call z%psb_d_base_vect_type%mlt(alpha,xx,yy,beta,info) + call z%set_host() + end select + + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= dzero).and.(z%is_dev())) call z%sync() + call z%psb_d_base_vect_type%mlt(alpha,x,y,beta,info) + call z%set_host() + end select + end subroutine d_gpu_mlt_v_2 + + subroutine d_gpu_scal(alpha, x) + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + real(psb_dpk_), intent (in) :: alpha + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = scalMultiVecDevice(alpha,x%deviceVect) + call x%set_dev() + end subroutine d_gpu_scal + + + function d_gpu_nrm2(n,x) result(res) + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + ! WARNING: this should be changed. + if (x%is_host()) call x%sync() + info = nrm2MultiVecDevice(res,n,x%deviceVect) + + end function d_gpu_nrm2 + + function d_gpu_amax(n,x) result(res) + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = amaxMultiVecDevice(res,n,x%deviceVect) + + end function d_gpu_amax + + function d_gpu_asum(n,x) result(res) + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = asumMultiVecDevice(res,n,x%deviceVect) + + end function d_gpu_asum + + subroutine d_gpu_absval1(x) + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + n=x%get_nrows() + info = absMultiVecDevice(n,done,x%deviceVect) + + end subroutine d_gpu_absval1 + + subroutine d_gpu_absval2(x,y) + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + n=min(x%get_nrows(),y%get_nrows()) + select type (yy=> y) + class is (psb_d_vect_gpu) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = absMultiVecDevice(n,done,x%deviceVect,yy%deviceVect) + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_d_base_vect_type%absval(y) + end select + end subroutine d_gpu_absval2 + + + subroutine d_gpu_vect_finalize(x) + use psi_serial_mod + use psb_realloc_mod + implicit none + type(psb_d_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + info = 0 + call x%free(info) + end subroutine d_gpu_vect_finalize + + subroutine d_gpu_ins_v(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_d_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_gpu + + info = 0 + if (psb_errstatus_fatal()) return + + done_gpu = .false. + select type(virl => irl) + class is (psb_i_vect_gpu) + select type(vval => val) + class is (psb_d_vect_gpu) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + info = geinsMultiVecDeviceDouble(n,virl%deviceVect,& + & vval%deviceVect,dupl,1,x%deviceVect) + call x%set_dev() + done_gpu=.true. + end select + end select + + if (.not.done_gpu) then + if (irl%is_dev()) call irl%sync() + if (val%is_dev()) call val%sync() + call x%ins(n,irl%v,val%v,dupl,info) + end if + + if (info /= 0) then + call psb_errpush(info,'gpu_vect_ins') + return + end if + + end subroutine d_gpu_ins_v + + subroutine d_gpu_ins_a(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_d_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_d_base_vect_type%ins(n,irl,val,dupl,info) + call x%set_host() + + end subroutine d_gpu_ins_a + +#endif + +end module psb_d_gpu_vect_mod + + +! +! Multivectors +! + + + +module psb_d_gpu_multivect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_d_multivect_mod + use psb_d_base_multivect_mod + + use psb_i_multivect_mod +#ifdef HAVE_SPGPU + use psb_i_gpu_multivect_mod + use psb_d_vectordev_mod +#endif + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_base_multivect_type) :: psb_d_multivect_gpu +#ifdef HAVE_SPGPU + + integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 + type(c_ptr) :: deviceVect = c_null_ptr + real(c_double), allocatable :: buffer(:,:) + type(c_ptr) :: dt_buf = c_null_ptr + contains + procedure, pass(x) :: get_nrows => d_gpu_multi_get_nrows + procedure, pass(x) :: get_ncols => d_gpu_multi_get_ncols + procedure, nopass :: get_fmt => d_gpu_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => d_gpu_multi_dot_v +!!$ procedure, pass(x) :: dot_a => d_gpu_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => d_gpu_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => d_gpu_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => d_gpu_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => d_gpu_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => d_gpu_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => d_gpu_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => d_gpu_multi_scal +!!$ procedure, pass(x) :: nrm2 => d_gpu_multi_nrm2 +!!$ procedure, pass(x) :: amax => d_gpu_multi_amax +!!$ procedure, pass(x) :: asum => d_gpu_multi_asum + procedure, pass(x) :: all => d_gpu_multi_all + procedure, pass(x) :: zero => d_gpu_multi_zero + procedure, pass(x) :: asb => d_gpu_multi_asb + procedure, pass(x) :: sync => d_gpu_multi_sync + procedure, pass(x) :: sync_space => d_gpu_multi_sync_space + procedure, pass(x) :: bld_x => d_gpu_multi_bld_x + procedure, pass(x) :: bld_n => d_gpu_multi_bld_n + procedure, pass(x) :: free => d_gpu_multi_free + procedure, pass(x) :: ins => d_gpu_multi_ins + procedure, pass(x) :: is_host => d_gpu_multi_is_host + procedure, pass(x) :: is_dev => d_gpu_multi_is_dev + procedure, pass(x) :: is_sync => d_gpu_multi_is_sync + procedure, pass(x) :: set_host => d_gpu_multi_set_host + procedure, pass(x) :: set_dev => d_gpu_multi_set_dev + procedure, pass(x) :: set_sync => d_gpu_multi_set_sync + procedure, pass(x) :: set_scal => d_gpu_multi_set_scal + procedure, pass(x) :: set_vect => d_gpu_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => d_gpu_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => d_gpu_multi_sctb +!!$ procedure, pass(y) :: sctb_x => d_gpu_multi_sctb_x + final :: d_gpu_multi_vect_finalize +#endif + end type psb_d_multivect_gpu + + public :: psb_d_multivect_gpu + private :: constructor + interface psb_d_multivect_gpu + module procedure constructor + end interface + +contains + + function constructor(x) result(this) + real(psb_dpk_) :: x(:,:) + type(psb_d_multivect_gpu) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x,1),size(x,2),info) + + end function constructor + +#ifdef HAVE_SPGPU + +!!$ subroutine d_gpu_multi_gthzv_x(i,n,idx,x,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i,n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ real(psb_dpk_) :: y(:) +!!$ class(psb_d_multivect_gpu) :: x +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_gpu) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (x%is_host()) call x%sync() +!!$ +!!$ if (allocated(x%buffer)) then +!!$ if (size(x%buffer) < n) then +!!$ call inner_unregister(x%buffer) +!!$ deallocate(x%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(x%buffer)) then +!!$ allocate(x%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(x%buffer,x%dt_buf) +!!$ endif +!!$ info = igathMultiVecDeviceDouble(x%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, x%dt_buf, 1) +!!$ call psb_cudaSync() +!!$ y(1:n) = x%buffer(1:n) +!!$ +!!$ class default +!!$ call x%gth(n,ii%v(i:),y) +!!$ end select +!!$ +!!$ +!!$ end subroutine d_gpu_multi_gthzv_x +!!$ +!!$ +!!$ +!!$ subroutine d_gpu_multi_sctb(n,idx,x,beta,y) +!!$ implicit none +!!$ !use psb_const_mod +!!$ integer(psb_ipk_) :: n, idx(:) +!!$ real(psb_dpk_) :: beta, x(:) +!!$ class(psb_d_multivect_gpu) :: y +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (n == 0) return +!!$ +!!$ if (y%is_dev()) call y%sync() +!!$ +!!$ call y%psb_d_base_multivect_type%sctb(n,idx,x,beta) +!!$ call y%set_host() +!!$ +!!$ end subroutine d_gpu_multi_sctb +!!$ +!!$ subroutine d_gpu_multi_sctb_x(i,n,idx,x,beta,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ real(psb_dpk_) :: beta, x(:) +!!$ class(psb_d_multivect_gpu) :: y +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_gpu) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (y%is_host()) call y%sync() +!!$ +!!$ if (allocated(y%buffer)) then +!!$ if (size(y%buffer) < n) then +!!$ call inner_unregister(y%buffer) +!!$ deallocate(y%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(y%buffer)) then +!!$ allocate(y%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(y%buffer,y%dt_buf) +!!$ endif +!!$ y%buffer(1:n) = x(1:n) +!!$ info = iscatMultiVecDeviceDouble(y%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, y%dt_buf, 1,beta) +!!$ +!!$ call y%set_dev() +!!$ call psb_cudaSync() +!!$ +!!$ class default +!!$ call y%sct(n,ii%v(i:),x,beta) +!!$ end select +!!$ +!!$ end subroutine d_gpu_multi_sctb_x + + + subroutine d_gpu_multi_bld_x(x,this) + use psb_base_mod + real(psb_dpk_), intent(in) :: this(:,:) + class(psb_d_multivect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info, m, n + + m=size(this,1) + n=size(this,2) + x%m_nrows = m + x%m_ncols = n + call psb_realloc(m,n,x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'d_gpu_multi_bld_x',& + & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) + end if + x%v(1:m,1:n) = this(1:m,1:n) + call x%set_host() + call x%sync() + + end subroutine d_gpu_multi_bld_x + + subroutine d_gpu_multi_bld_n(x,m,n) + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_multivect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(m,n,info) + if (info /= 0) then + call psb_errpush(info,'d_gpu_multi_bld_n',i_err=(/m,n,n,n,n/)) + end if + + end subroutine d_gpu_multi_bld_n + + + subroutine d_gpu_multi_set_host(x) + implicit none + class(psb_d_multivect_gpu), intent(inout) :: x + + x%state = is_host + end subroutine d_gpu_multi_set_host + + subroutine d_gpu_multi_set_dev(x) + implicit none + class(psb_d_multivect_gpu), intent(inout) :: x + + x%state = is_dev + end subroutine d_gpu_multi_set_dev + + subroutine d_gpu_multi_set_sync(x) + implicit none + class(psb_d_multivect_gpu), intent(inout) :: x + + x%state = is_sync + end subroutine d_gpu_multi_set_sync + + function d_gpu_multi_is_dev(x) result(res) + implicit none + class(psb_d_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function d_gpu_multi_is_dev + + function d_gpu_multi_is_host(x) result(res) + implicit none + class(psb_d_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function d_gpu_multi_is_host + + function d_gpu_multi_is_sync(x) result(res) + implicit none + class(psb_d_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function d_gpu_multi_is_sync + + + function d_gpu_multi_get_nrows(x) result(res) + implicit none + class(psb_d_multivect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_nrows + + end function d_gpu_multi_get_nrows + + function d_gpu_multi_get_ncols(x) result(res) + implicit none + class(psb_d_multivect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_ncols + + end function d_gpu_multi_get_ncols + + function d_gpu_multi_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'dGPU' + end function d_gpu_multi_get_fmt + +!!$ function d_gpu_multi_dot_v(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ class(psb_d_base_multivect_type), intent(inout) :: y +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ real(psb_dpk_), external :: ddot +!!$ integer(psb_ipk_) :: info +!!$ +!!$ res = dzero +!!$ ! +!!$ ! Note: this is the gpu implementation. +!!$ ! When we get here, we are sure that X is of +!!$ ! TYPE psb_d_vect +!!$ ! +!!$ select type(yy => y) +!!$ type is (psb_d_base_multivect_type) +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,x%v,1,yy%v,1) +!!$ type is (psb_d_multivect_gpu) +!!$ if (x%is_host()) call x%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) +!!$ if (info /= 0) then +!!$ info = psb_err_internal_error_ +!!$ call psb_errpush(info,'d_gpu_multi_dot_v') +!!$ end if +!!$ +!!$ class default +!!$ ! y%sync is done in dot_a +!!$ call x%sync() +!!$ res = y%dot(n,x%v) +!!$ end select +!!$ +!!$ end function d_gpu_multi_dot_v +!!$ +!!$ function d_gpu_multi_dot_a(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ real(psb_dpk_), intent(in) :: y(:) +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ real(psb_dpk_), external :: ddot +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,y,1,x%v,1) +!!$ +!!$ end function d_gpu_multi_dot_a +!!$ +!!$ subroutine d_gpu_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ class(psb_d_base_multivect_type), intent(inout) :: x +!!$ class(psb_d_multivect_gpu), intent(inout) :: y +!!$ real(psb_dpk_), intent (in) :: alpha, beta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: nx, ny +!!$ +!!$ info = psb_success_ +!!$ +!!$ select type(xx => x) +!!$ type is (psb_d_base_multivect_type) +!!$ if ((beta /= dzero).and.(y%is_dev()))& +!!$ & call y%sync() +!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) +!!$ call y%set_host() +!!$ type is (psb_d_multivect_gpu) +!!$ ! Do something different here +!!$ if ((beta /= dzero).and.y%is_host())& +!!$ & call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ nx = getMultiVecDeviceSize(xx%deviceVect) +!!$ ny = getMultiVecDeviceSize(y%deviceVect) +!!$ if ((nx x) +!!$ type is (psb_d_base_multivect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ do i=1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ type is (psb_d_multivect_gpu) +!!$ ! Do something different here +!!$ if (y%is_host()) call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ info = axyMultiVecDevice(n,done,xx%deviceVect,y%deviceVect) +!!$ call y%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call y%mlt(xx%v,info) +!!$ call y%set_host() +!!$ end select +!!$ +!!$ end subroutine d_gpu_multi_mlt_v +!!$ +!!$ subroutine d_gpu_multi_mlt_a(x, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ real(psb_dpk_), intent(in) :: x(:) +!!$ class(psb_d_multivect_gpu), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ call y%sync() +!!$ call y%psb_d_base_multivect_type%mlt(x,info) +!!$ call y%set_host() +!!$ end subroutine d_gpu_multi_mlt_a +!!$ +!!$ subroutine d_gpu_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ real(psb_dpk_), intent(in) :: alpha,beta +!!$ real(psb_dpk_), intent(in) :: x(:) +!!$ real(psb_dpk_), intent(in) :: y(:) +!!$ class(psb_d_multivect_gpu), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ if (z%is_dev()) call z%sync() +!!$ call z%psb_d_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end subroutine d_gpu_multi_mlt_a_2 +!!$ +!!$ subroutine d_gpu_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ use psi_serial_mod +!!$ use psb_string_mod +!!$ implicit none +!!$ real(psb_dpk_), intent(in) :: alpha,beta +!!$ class(psb_d_base_multivect_type), intent(inout) :: x +!!$ class(psb_d_base_multivect_type), intent(inout) :: y +!!$ class(psb_d_multivect_gpu), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character(len=1), intent(in), optional :: conjgx, conjgy +!!$ integer(psb_ipk_) :: i, n +!!$ logical :: conjgx_, conjgy_ +!!$ +!!$ if (.false.) then +!!$ ! These are present just for coherence with the +!!$ ! complex versions; they do nothing here. +!!$ conjgx_=.false. +!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') +!!$ conjgy_=.false. +!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') +!!$ end if +!!$ +!!$ n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) +!!$ +!!$ ! +!!$ ! Need to reconsider BETA in the GPU side +!!$ ! of things. +!!$ ! +!!$ info = 0 +!!$ select type(xx => x) +!!$ type is (psb_d_multivect_gpu) +!!$ select type (yy => y) +!!$ type is (psb_d_multivect_gpu) +!!$ if (xx%is_host()) call xx%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ ! Z state is irrelevant: it will be done on the GPU. +!!$ info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& +!!$ & yy%deviceVect,beta,z%deviceVect) +!!$ call z%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call yy%sync() +!!$ call z%psb_d_base_multivect_type%mlt(alpha,xx,yy,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ +!!$ class default +!!$ call x%sync() +!!$ call y%sync() +!!$ call z%psb_d_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ end subroutine d_gpu_multi_mlt_v_2 + + + subroutine d_gpu_multi_set_scal(x,val) + class(psb_d_multivect_gpu), intent(inout) :: x + real(psb_dpk_), intent(in) :: val + + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_d_base_multivect_type%set_scal(val) + call x%set_host() + end subroutine d_gpu_multi_set_scal + + subroutine d_gpu_multi_set_vect(x,val) + class(psb_d_multivect_gpu), intent(inout) :: x + real(psb_dpk_), intent(in) :: val(:,:) + integer(psb_ipk_) :: nr + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_d_base_multivect_type%set_vect(val) + call x%set_host() + + end subroutine d_gpu_multi_set_vect + + + +!!$ subroutine d_gpu_multi_scal(alpha, x) +!!$ implicit none +!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ real(psb_dpk_), intent (in) :: alpha +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_d_base_multivect_type%scal(alpha) +!!$ call x%set_host() +!!$ end subroutine d_gpu_multi_scal +!!$ +!!$ +!!$ function d_gpu_multi_nrm2(n,x) result(res) +!!$ implicit none +!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ ! WARNING: this should be changed. +!!$ if (x%is_host()) call x%sync() +!!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) +!!$ +!!$ end function d_gpu_multi_nrm2 +!!$ +!!$ function d_gpu_multi_amax(n,x) result(res) +!!$ implicit none +!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = maxval(abs(x%v(1:n))) +!!$ +!!$ end function d_gpu_multi_amax +!!$ +!!$ function d_gpu_multi_asum(n,x) result(res) +!!$ implicit none +!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = sum(abs(x%v(1:n))) +!!$ +!!$ end function d_gpu_multi_asum + + subroutine d_gpu_multi_all(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_multivect_gpu), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(m,n,x%v,info,pad=dzero) + x%m_nrows = m + x%m_ncols = n + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'d_gpu_multi_all',& + & i_err=(/m,n,n,n,n/)) + end if + end subroutine d_gpu_multi_all + + subroutine d_gpu_multi_zero(x) + use psi_serial_mod + implicit none + class(psb_d_multivect_gpu), intent(inout) :: x + + if (allocated(x%v)) x%v=dzero + call x%set_host() + end subroutine d_gpu_multi_zero + + subroutine d_gpu_multi_asb(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_multivect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nd, nc + + + x%m_nrows = m + x%m_ncols = n + if (x%is_host()) then + call x%psb_d_base_multivect_type%asb(m,n,info) + if (info == psb_success_) call x%sync_space(info) + else if (x%is_dev()) then + nd = getMultiVecDevicePitch(x%deviceVect) + nc = getMultiVecDeviceCount(x%deviceVect) + if ((nd < m).or.(nc d_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => d_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_d_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_d_hdiag_csmm + procedure, pass(a) :: csmv => psb_d_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_d_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_d_hdiag_scals + ! procedure, pass(a) :: scalv => psb_d_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_d_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_d_hdiag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_d_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_d_mv_hdiag_from_fmt + procedure, pass(a) :: free => d_hdiag_free + procedure, pass(a) :: mold => psb_d_hdiag_mold + procedure, pass(a) :: to_gpu => psb_d_hdiag_to_gpu + final :: d_hdiag_finalize +#else + contains + procedure, pass(a) :: mold => psb_d_hdiag_mold +#endif + end type psb_d_hdiag_sparse_mat + +#ifdef HAVE_SPGPU + private :: d_hdiag_get_nzeros, d_hdiag_free, d_hdiag_get_fmt, & + & d_hdiag_get_size, d_hdiag_sizeof, d_hdiag_get_nz_row + + + interface + subroutine psb_d_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hdiag_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_hdiag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hdiag_vect_mv + end interface + +!!$ interface +!!$ subroutine psb_d_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_d_hdiag_sparse_mat, psb_dpk_, psb_d_base_vect_type +!!$ class(psb_d_hdiag_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_d_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_hdiag_inner_vect_sv +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_d_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_hdiag_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_d_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_d_hdiag_allocate_mnnz +!!$ end interface + + interface + subroutine psb_d_hdiag_mold(a,b,info) + import :: psb_d_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hdiag_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hdiag_mold + end interface + + interface + subroutine psb_d_hdiag_to_gpu(a,info) + import :: psb_d_hdiag_sparse_mat, psb_ipk_ + class(psb_d_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hdiag_to_gpu + end interface + + interface + subroutine psb_d_cp_hdiag_from_coo(a,b,info) + import :: psb_d_hdiag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hdiag_from_coo + end interface + +!!$ interface +!!$ subroutine psb_d_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_d_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_d_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_cp_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_d_mv_hdiag_from_coo(a,b,info) + import :: psb_d_hdiag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hdiag_from_coo + end interface + +!!$ +!!$ interface +!!$ subroutine psb_d_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_d_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_d_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_mv_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_d_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hdiag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hdiag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hdiag_csmv + end interface + +!!$ interface +!!$ subroutine psb_d_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdiag_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_hdiag_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdiag_scal(d,a,info, side) +!!$ import :: psb_d_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_d_hdiag_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdiag_scals(d,a,info) +!!$ import :: psb_d_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_hdiag_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + function d_hdiag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIAG' + end function d_hdiag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_hdiag_free(a) + use hdiagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_d_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_d_hdia_sparse_mat%free() + + return + + end subroutine d_hdiag_free + + subroutine d_hdiag_finalize(a) + use hdiagdev_mod + implicit none + type(psb_d_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_d_hdia_sparse_mat%free() + + return + end subroutine d_hdiag_finalize + +#else + + interface + subroutine psb_d_hdiag_mold(a,b,info) + import :: psb_d_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hdiag_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hdiag_mold + end interface + +#endif + +end module psb_d_hdiag_mat_mod diff --git a/gpu/psb_d_hlg_mat_mod.F90 b/gpu/psb_d_hlg_mat_mod.F90 new file mode 100644 index 00000000..756d13aa --- /dev/null +++ b/gpu/psb_d_hlg_mat_mod.F90 @@ -0,0 +1,398 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_hlg_mat_mod + + use iso_c_binding + use psb_d_mat_mod + use psb_d_hll_mat_mod + + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_hll_sparse_mat) :: psb_d_hlg_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + integer :: devstate = is_host + + contains + procedure, nopass :: get_fmt => d_hlg_get_fmt + procedure, pass(a) :: sizeof => d_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_d_hlg_vect_mv + procedure, pass(a) :: csmm => psb_d_hlg_csmm + procedure, pass(a) :: csmv => psb_d_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_d_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_d_hlg_scals + procedure, pass(a) :: scalv => psb_d_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_d_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_hlg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_mv_hlg_from_fmt + procedure, pass(a) :: free => d_hlg_free + procedure, pass(a) :: mold => psb_d_hlg_mold + procedure, pass(a) :: is_host => d_hlg_is_host + procedure, pass(a) :: is_dev => d_hlg_is_dev + procedure, pass(a) :: is_sync => d_hlg_is_sync + procedure, pass(a) :: set_host => d_hlg_set_host + procedure, pass(a) :: set_dev => d_hlg_set_dev + procedure, pass(a) :: set_sync => d_hlg_set_sync + procedure, pass(a) :: sync => d_hlg_sync + procedure, pass(a) :: from_gpu => psb_d_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_d_hlg_to_gpu + final :: d_hlg_finalize +#else + contains + procedure, pass(a) :: mold => psb_d_hlg_mold +#endif + end type psb_d_hlg_sparse_mat + +#ifdef HAVE_SPGPU + private :: d_hlg_get_nzeros, d_hlg_free, d_hlg_get_fmt, & + & d_hlg_get_size, d_hlg_sizeof, d_hlg_get_nz_row + + + interface + subroutine psb_d_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hlg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hlg_vect_mv + end interface + + interface + subroutine psb_d_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_d_hlg_sparse_mat, psb_dpk_, psb_d_base_vect_type + class(psb_d_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hlg_inner_vect_sv + end interface + + interface + subroutine psb_d_hlg_reallocate_nz(nz,a) + import :: psb_d_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_d_hlg_reallocate_nz + end interface + + interface + subroutine psb_d_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_d_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_hlg_allocate_mnnz + end interface + + interface + subroutine psb_d_hlg_mold(a,b,info) + import :: psb_d_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hlg_mold + end interface + + interface + subroutine psb_d_hlg_from_gpu(a,info) + import :: psb_d_hlg_sparse_mat, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hlg_from_gpu + end interface + + interface + subroutine psb_d_hlg_to_gpu(a,info, nzrm) + import :: psb_d_hlg_sparse_mat, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_d_hlg_to_gpu + end interface + + interface + subroutine psb_d_cp_hlg_from_coo(a,b,info) + import :: psb_d_hlg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hlg_from_coo + end interface + + interface + subroutine psb_d_cp_hlg_from_fmt(a,b,info) + import :: psb_d_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hlg_from_fmt + end interface + + interface + subroutine psb_d_mv_hlg_from_coo(a,b,info) + import :: psb_d_hlg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hlg_from_coo + end interface + + + interface + subroutine psb_d_mv_hlg_from_fmt(a,b,info) + import :: psb_d_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hlg_from_fmt + end interface + + interface + subroutine psb_d_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hlg_csmv + end interface + interface + subroutine psb_d_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hlg_csmm + end interface + + interface + subroutine psb_d_hlg_scal(d,a,info, side) + import :: psb_d_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_hlg_scal + end interface + + interface + subroutine psb_d_hlg_scals(d,a,info) + import :: psb_d_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hlg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_hlg_sizeof(a) result(res) + implicit none + class(psb_d_hlg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function d_hlg_sizeof + + function d_hlg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLG' + end function d_hlg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_hlg_free(a) + use hlldev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_d_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_d_hll_sparse_mat%free() + + return + + end subroutine d_hlg_free + + + subroutine d_hlg_sync(a) + implicit none + class(psb_d_hlg_sparse_mat), target, intent(in) :: a + class(psb_d_hlg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine d_hlg_sync + + subroutine d_hlg_set_host(a) + implicit none + class(psb_d_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_hlg_set_host + + subroutine d_hlg_set_dev(a) + implicit none + class(psb_d_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_hlg_set_dev + + subroutine d_hlg_set_sync(a) + implicit none + class(psb_d_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_hlg_set_sync + + function d_hlg_is_dev(a) result(res) + implicit none + class(psb_d_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_hlg_is_dev + + function d_hlg_is_host(a) result(res) + implicit none + class(psb_d_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_hlg_is_host + + function d_hlg_is_sync(a) result(res) + implicit none + class(psb_d_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_hlg_is_sync + + + subroutine d_hlg_finalize(a) + use hlldev_mod + implicit none + type(psb_d_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine d_hlg_finalize + +#else + + interface + subroutine psb_d_hlg_mold(a,b,info) + import :: psb_d_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hlg_mold + end interface + +#endif + +end module psb_d_hlg_mat_mod diff --git a/gpu/psb_d_hybg_mat_mod.F90 b/gpu/psb_d_hybg_mat_mod.F90 new file mode 100644 index 00000000..d764daa7 --- /dev/null +++ b/gpu/psb_d_hybg_mat_mod.F90 @@ -0,0 +1,306 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +#if CUDA_SHORT_VERSION <= 10 + +module psb_d_hybg_mat_mod + + use iso_c_binding + use psb_d_mat_mod + use cusparse_mod + + type, extends(psb_d_csr_sparse_mat) :: psb_d_hybg_sparse_mat + ! + ! HYBG. An interface to the cuSPARSE HYB + ! On the CPU side we keep a CSR storage. + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(d_Hmat) :: deviceMat + + contains + procedure, nopass :: get_fmt => d_hybg_get_fmt + procedure, pass(a) :: sizeof => d_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_d_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_hybg_csmm + procedure, pass(a) :: csmv => psb_d_hybg_csmv + procedure, pass(a) :: scals => psb_d_hybg_scals + procedure, pass(a) :: scalv => psb_d_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_d_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_hybg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_mv_hybg_from_fmt + procedure, pass(a) :: free => d_hybg_free + procedure, pass(a) :: mold => psb_d_hybg_mold + procedure, pass(a) :: to_gpu => psb_d_hybg_to_gpu + final :: d_hybg_finalize +#else + contains + procedure, pass(a) :: mold => psb_d_hybg_mold +#endif + end type psb_d_hybg_sparse_mat + +#ifdef HAVE_SPGPU + private :: d_hybg_get_nzeros, d_hybg_free, d_hybg_get_fmt, & + & d_hybg_get_size, d_hybg_sizeof, d_hybg_get_nz_row + + + interface + subroutine psb_d_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hybg_inner_vect_sv + end interface + + interface + subroutine psb_d_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hybg_vect_mv + end interface + + interface + subroutine psb_d_hybg_reallocate_nz(nz,a) + import :: psb_d_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_d_hybg_reallocate_nz + end interface + + interface + subroutine psb_d_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_d_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_hybg_allocate_mnnz + end interface + + interface + subroutine psb_d_hybg_mold(a,b,info) + import :: psb_d_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hybg_mold + end interface + + interface + subroutine psb_d_hybg_to_gpu(a,info, nzrm) + import :: psb_d_hybg_sparse_mat, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_d_hybg_to_gpu + end interface + + interface + subroutine psb_d_cp_hybg_from_coo(a,b,info) + import :: psb_d_hybg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hybg_from_coo + end interface + + interface + subroutine psb_d_cp_hybg_from_fmt(a,b,info) + import :: psb_d_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hybg_from_fmt + end interface + + interface + subroutine psb_d_mv_hybg_from_coo(a,b,info) + import :: psb_d_hybg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hybg_from_coo + end interface + + interface + subroutine psb_d_mv_hybg_from_fmt(a,b,info) + import :: psb_d_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hybg_from_fmt + end interface + + interface + subroutine psb_d_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hybg_csmv + end interface + interface + subroutine psb_d_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hybg_csmm + end interface + + interface + subroutine psb_d_hybg_scal(d,a,info,side) + import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_hybg_scal + end interface + + interface + subroutine psb_d_hybg_scals(d,a,info) + import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hybg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_hybg_sizeof(a) result(res) + implicit none + class(psb_d_hybg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function d_hybg_sizeof + + function d_hybg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HYBG' + end function d_hybg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_hybg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + class(psb_d_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + call a%psb_d_csr_sparse_mat%free() + + return + + end subroutine d_hybg_free + + subroutine d_hybg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + type(psb_d_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + + return + end subroutine d_hybg_finalize + +#else + + interface + subroutine psb_d_hybg_mold(a,b,info) + import :: psb_d_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hybg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hybg_mold + end interface + +#endif + +end module psb_d_hybg_mat_mod +#endif diff --git a/gpu/psb_d_vectordev_mod.F90 b/gpu/psb_d_vectordev_mod.F90 new file mode 100644 index 00000000..cda0d9d7 --- /dev/null +++ b/gpu/psb_d_vectordev_mod.F90 @@ -0,0 +1,390 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_vectordev_mod + + use psb_base_vectordev_mod + +#ifdef HAVE_SPGPU + + interface registerMapped + function registerMappedDouble(buf,d_p,n,dummy) & + & result(res) bind(c,name='registerMappedDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: buf + type(c_ptr) :: d_p + integer(c_int),value :: n + real(c_double), value :: dummy + end function registerMappedDouble + end interface + + interface writeMultiVecDevice + function writeMultiVecDeviceDouble(deviceVec,hostVec) & + & result(res) bind(c,name='writeMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + real(c_double) :: hostVec(*) + end function writeMultiVecDeviceDouble + function writeMultiVecDeviceDoubleR2(deviceVec,hostVec,ld) & + & result(res) bind(c,name='writeMultiVecDeviceDoubleR2') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int), value :: ld + real(c_double) :: hostVec(ld,*) + end function writeMultiVecDeviceDoubleR2 + end interface + + interface readMultiVecDevice + function readMultiVecDeviceDouble(deviceVec,hostVec) & + & result(res) bind(c,name='readMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + real(c_double) :: hostVec(*) + end function readMultiVecDeviceDouble + function readMultiVecDeviceDoubleR2(deviceVec,hostVec,ld) & + & result(res) bind(c,name='readMultiVecDeviceDoubleR2') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int), value :: ld + real(c_double) :: hostVec(ld,*) + end function readMultiVecDeviceDoubleR2 + end interface + + interface allocateDouble + function allocateDouble(didx,n) & + & result(res) bind(c,name='allocateDouble') + use iso_c_binding + type(c_ptr) :: didx + integer(c_int),value :: n + integer(c_int) :: res + end function allocateDouble + function allocateMultiDouble(didx,m,n) & + & result(res) bind(c,name='allocateMultiDouble') + use iso_c_binding + type(c_ptr) :: didx + integer(c_int),value :: m,n + integer(c_int) :: res + end function allocateMultiDouble + end interface + + interface writeDouble + function writeDouble(didx,hidx,n) & + & result(res) bind(c,name='writeDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + real(c_double) :: hidx(*) + integer(c_int),value :: n + end function writeDouble + function writeDoubleFirst(first,didx,hidx,n,IndexBase) & + & result(res) bind(c,name='writeDoubleFirst') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + real(c_double) :: hidx(*) + integer(c_int),value :: n, first, IndexBase + end function writeDoubleFirst + function writeMultiDouble(didx,hidx,m,n) & + & result(res) bind(c,name='writeMultiDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + real(c_double) :: hidx(m,*) + integer(c_int),value :: m,n + end function writeMultiDouble + end interface + + interface readDouble + function readDouble(didx,hidx,n) & + & result(res) bind(c,name='readDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + real(c_double) :: hidx(*) + integer(c_int),value :: n + end function readDouble + function readDoubleFirst(first,didx,hidx,n,IndexBase) & + & result(res) bind(c,name='readDoubleFirst') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + real(c_double) :: hidx(*) + integer(c_int),value :: n, first, IndexBase + end function readDoubleFirst + function readMultiDouble(didx,hidx,m,n) & + & result(res) bind(c,name='readMultiDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + real(c_double) :: hidx(m,*) + integer(c_int),value :: m,n + end function readMultiDouble + end interface + + interface + subroutine freeDouble(didx) & + & bind(c,name='freeDouble') + use iso_c_binding + type(c_ptr), value :: didx + end subroutine freeDouble + end interface + + + interface setScalDevice + function setScalMultiVecDeviceDouble(val, first, last, & + & indexBase, deviceVecX) result(res) & + & bind(c,name='setscalMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: first,last,indexbase + real(c_double), value :: val + type(c_ptr), value :: deviceVecX + end function setScalMultiVecDeviceDouble + end interface + + interface + function geinsMultiVecDeviceDouble(n,deviceVecIrl,deviceVecVal,& + & dupl,indexbase,deviceVecX) & + & result(res) bind(c,name='geinsMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n, dupl,indexbase + type(c_ptr), value :: deviceVecIrl, deviceVecVal, deviceVecX + end function geinsMultiVecDeviceDouble + end interface + + ! New gather functions + + interface + function igathMultiVecDeviceDouble(deviceVec, vectorId, n, first, idx, & + & hfirst, hostVec, indexBase) & + & result(res) bind(c,name='igathMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value:: vectorId + integer(c_int),value:: first, n, hfirst + type(c_ptr),value :: idx + type(c_ptr),value :: hostVec + integer(c_int),value:: indexBase + end function igathMultiVecDeviceDouble + end interface + + interface + function igathMultiVecDeviceDoubleVecIdx(deviceVec, vectorId, n, first, idx, & + & hfirst, hostVec, indexBase) & + & result(res) bind(c,name='igathMultiVecDeviceDoubleVecIdx') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value:: vectorId + integer(c_int),value:: first, n, hfirst + type(c_ptr),value :: idx + type(c_ptr),value :: hostVec + integer(c_int),value:: indexBase + end function igathMultiVecDeviceDoubleVecIdx + end interface + + interface + function iscatMultiVecDeviceDouble(deviceVec, vectorId, & + & first, n, idx, hfirst, hostVec, indexBase, beta) & + & result(res) bind(c,name='iscatMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value :: vectorId + integer(c_int),value :: first, n, hfirst + type(c_ptr), value :: idx + type(c_ptr), value :: hostVec + integer(c_int),value :: indexBase + real(c_double),value :: beta + end function iscatMultiVecDeviceDouble + end interface + + interface + function iscatMultiVecDeviceDoubleVecIdx(deviceVec, vectorId, & + & first, n, idx, hfirst, hostVec, indexBase, beta) & + & result(res) bind(c,name='iscatMultiVecDeviceDoubleVecIdx') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value :: vectorId + integer(c_int),value :: first, n, hfirst + type(c_ptr), value :: idx + type(c_ptr), value :: hostVec + integer(c_int),value :: indexBase + real(c_double),value :: beta + end function iscatMultiVecDeviceDoubleVecIdx + end interface + + + interface scalMultiVecDevice + function scalMultiVecDeviceDouble(alpha,deviceVecA) & + & result(val) bind(c,name='scalMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: res + real(c_double), value :: alpha + type(c_ptr), value :: deviceVecA + end function scalMultiVecDeviceDouble + end interface + + interface dotMultiVecDevice + function dotMultiVecDeviceDouble(res, n,deviceVecA,deviceVecB) & + & result(val) bind(c,name='dotMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_double) :: res + type(c_ptr), value :: deviceVecA, deviceVecB + end function dotMultiVecDeviceDouble + end interface + + interface nrm2MultiVecDevice + function nrm2MultiVecDeviceDouble(res,n,deviceVecA) & + & result(val) bind(c,name='nrm2MultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_double) :: res + type(c_ptr), value :: deviceVecA + end function nrm2MultiVecDeviceDouble + end interface + + interface amaxMultiVecDevice + function amaxMultiVecDeviceDouble(res,n,deviceVecA) & + & result(val) bind(c,name='amaxMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_double) :: res + type(c_ptr), value :: deviceVecA + end function amaxMultiVecDeviceDouble + end interface + + interface asumMultiVecDevice + function asumMultiVecDeviceDouble(res,n,deviceVecA) & + & result(val) bind(c,name='asumMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_double) :: res + type(c_ptr), value :: deviceVecA + end function asumMultiVecDeviceDouble + end interface + + + interface axpbyMultiVecDevice + function axpbyMultiVecDeviceDouble(n,alpha,deviceVecA,beta,deviceVecB) & + & result(res) bind(c,name='axpbyMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + real(c_double), value :: alpha, beta + type(c_ptr), value :: deviceVecA, deviceVecB + end function axpbyMultiVecDeviceDouble + end interface + + interface axyMultiVecDevice + function axyMultiVecDeviceDouble(n,alpha,deviceVecA,deviceVecB) & + & result(res) bind(c,name='axyMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + real(c_double), value :: alpha + type(c_ptr), value :: deviceVecA, deviceVecB + end function axyMultiVecDeviceDouble + end interface + + interface axybzMultiVecDevice + function axybzMultiVecDeviceDouble(n,alpha,deviceVecA,deviceVecB,beta,deviceVecZ) & + & result(res) bind(c,name='axybzMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + real(c_double), value :: alpha, beta + type(c_ptr), value :: deviceVecA, deviceVecB,deviceVecZ + end function axybzMultiVecDeviceDouble + end interface + + + interface absMultiVecDevice + function absMultiVecDeviceDouble(n,alpha,deviceVecA) & + & result(res) bind(c,name='absMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + real(c_double), value :: alpha + type(c_ptr), value :: deviceVecA + end function absMultiVecDeviceDouble + function absMultiVecDeviceDouble2(n,alpha,deviceVecA,deviceVecB) & + & result(res) bind(c,name='absMultiVecDeviceDouble2') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + real(c_double), value :: alpha + type(c_ptr), value :: deviceVecA, deviceVecB + end function absMultiVecDeviceDouble2 + end interface + + interface inner_register + module procedure inner_registerDouble + end interface + + interface inner_unregister + module procedure inner_unregisterDouble + end interface + +contains + + + function inner_registerDouble(buffer,dval) result(res) + real(c_double), allocatable, target :: buffer(:) + type(c_ptr) :: dval + integer(c_int) :: res + real(c_double) :: dummy + res = registerMapped(c_loc(buffer),dval,size(buffer), dummy) + end function inner_registerDouble + + subroutine inner_unregisterDouble(buffer) + real(c_double), allocatable, target :: buffer(:) + + call unregisterMapped(c_loc(buffer)) + end subroutine inner_unregisterDouble + +#endif + +end module psb_d_vectordev_mod diff --git a/gpu/psb_gpu_env_mod.F90 b/gpu/psb_gpu_env_mod.F90 new file mode 100644 index 00000000..0473f4ac --- /dev/null +++ b/gpu/psb_gpu_env_mod.F90 @@ -0,0 +1,340 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_gpu_env_mod + use psb_const_mod + use iso_c_binding + use base_cusparse_mod +! interface psb_gpu_init +! module procedure psb_gpu_init +! end interface +#if defined(HAVE_CUDA) + use core_mod + + interface + function psb_gpuGetHandle() & + & result(res) bind(c,name='psb_gpuGetHandle') + use iso_c_binding + type(c_ptr) :: res + end function psb_gpuGetHandle + end interface + + interface + function psb_gpuGetStream() & + & result(res) bind(c,name='psb_gpuGetStream') + use iso_c_binding + type(c_ptr) :: res + end function psb_gpuGetStream + end interface + + interface + function psb_C_gpu_init(dev) & + & result(res) bind(c,name='gpuInit') + use iso_c_binding + integer(c_int),value :: dev + integer(c_int) :: res + end function psb_C_gpu_init + end interface + + interface + function psb_cuda_getDeviceCount() & + & result(res) bind(c,name='getDeviceCount') + use iso_c_binding + integer(c_int) :: res + end function psb_cuda_getDeviceCount + end interface + + interface + function psb_cuda_getDevice() & + & result(res) bind(c,name='getDevice') + use iso_c_binding + integer(c_int) :: res + end function psb_cuda_getDevice + end interface + + interface + function psb_cuda_setDevice(dev) & + & result(res) bind(c,name='setDevice') + use iso_c_binding + integer(c_int), value :: dev + integer(c_int) :: res + end function psb_cuda_setDevice + end interface + + + interface + subroutine psb_gpuCreateHandle() & + & bind(c,name='psb_gpuCreateHandle') + use iso_c_binding + end subroutine psb_gpuCreateHandle + end interface + + interface + subroutine psb_gpuSetStream(handle,stream) & + & bind(c,name='psb_gpuSetStream') + use iso_c_binding + type(c_ptr), value :: handle, stream + end subroutine psb_gpuSetStream + end interface + + interface + subroutine psb_gpuDestroyHandle() & + & bind(c,name='psb_gpuDestroyHandle') + use iso_c_binding + end subroutine psb_gpuDestroyHandle + end interface + + interface + subroutine psb_cudaReset() & + & bind(c,name='cudaReset') + use iso_c_binding + end subroutine psb_cudaReset + end interface + + interface + subroutine psb_gpuClose() & + & bind(c,name='gpuClose') + use iso_c_binding + end subroutine psb_gpuClose + end interface +#endif + + interface + function psb_C_DeviceHasUVA() & + & result(res) bind(c,name='DeviceHasUVA') + use iso_c_binding + integer(c_int) :: res + end function psb_C_DeviceHasUVA + end interface + + interface + function psb_C_get_MultiProcessors() & + & result(res) bind(c,name='getGPUMultiProcessors') + use iso_c_binding + integer(c_int) :: res + end function psb_C_get_MultiProcessors + function psb_C_get_MemoryBusWidth() & + & result(res) bind(c,name='getGPUMemoryBusWidth') + use iso_c_binding + integer(c_int) :: res + end function psb_C_get_MemoryBusWidth + function psb_C_get_MemoryClockRate() & + & result(res) bind(c,name='getGPUMemoryClockRate') + use iso_c_binding + integer(c_int) :: res + end function psb_C_get_MemoryClockRate + function psb_C_get_WarpSize() & + & result(res) bind(c,name='getGPUWarpSize') + use iso_c_binding + integer(c_int) :: res + end function psb_C_get_WarpSize + function psb_C_get_MaxThreadsPerMP() & + & result(res) bind(c,name='getGPUMaxThreadsPerMP') + use iso_c_binding + integer(c_int) :: res + end function psb_C_get_MaxThreadsPerMP + function psb_C_get_MaxRegistersPerBlock() & + & result(res) bind(c,name='getGPUMaxRegistersPerBlock') + use iso_c_binding + integer(c_int) :: res + end function psb_C_get_MaxRegistersPerBlock + end interface + interface + subroutine psb_C_cpy_NameString(cstring) & + & bind(c,name='cpyGPUNameString') + use iso_c_binding + character(c_char) :: cstring(*) + end subroutine psb_C_cpy_NameString + end interface + + logical, private :: gpu_do_maybe_free_buffer = .false. + +Contains + + function psb_gpu_get_maybe_free_buffer() result(res) + logical :: res + res = gpu_do_maybe_free_buffer + end function psb_gpu_get_maybe_free_buffer + + subroutine psb_gpu_set_maybe_free_buffer(val) + logical, intent(in) :: val + gpu_do_maybe_free_buffer = val + end subroutine psb_gpu_set_maybe_free_buffer + + ! !!!!!!!!!!!!!!!!!!!!!! + ! + ! Environment handling + ! + ! !!!!!!!!!!!!!!!!!!!!!! + + + subroutine psb_gpu_init(ctxt,dev) + use psb_penv_mod + use psb_const_mod + use psb_error_mod + type(psb_ctxt_type), intent(in) :: ctxt + integer, intent(in), optional :: dev + + integer :: np, npavail, iam, info, count, dev_ + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) +#if defined (HAVE_CUDA) +#if defined(SERIAL_MPI) + iam = 0 +#else + call psb_info(ctxt,iam,np) +#endif + + count = psb_cuda_getDeviceCount() + + if (present(dev)) then + info = psb_C_gpu_init(dev) + else + if (count >0) then + dev_ = mod(iam,count) + else + dev_ = 0 + end if + info = psb_C_gpu_init(dev_) + end if + if (info == 0) info = initFcusparse() + if (info /= 0) then + call psb_errpush(psb_err_internal_error_,'psb_gpu_init') + goto 9999 + end if + call psb_gpuCreateHandle() +#endif + call psb_erractionrestore(err_act) + return +9999 call psb_error_handler(ctxt,err_act) + + return + + end subroutine psb_gpu_init + + + subroutine psb_gpu_DeviceSync() +#if defined(HAVE_CUDA) + call psb_cudaSync() +#endif + end subroutine psb_gpu_DeviceSync + + function psb_gpu_getDeviceCount() result(res) + integer :: res +#if defined(HAVE_CUDA) + res = psb_cuda_getDeviceCount() +#else + res = 0 +#endif + end function psb_gpu_getDeviceCount + + subroutine psb_gpu_exit() + integer :: res + res = closeFcusparse() + call psb_gpuClose() + call psb_cudaReset() + end subroutine psb_gpu_exit + + function psb_gpu_DeviceHasUVA() result(res) + logical :: res + res = (psb_C_DeviceHasUVA() == 1) + end function psb_gpu_DeviceHasUVA + + function psb_gpu_MultiProcessors() result(res) + integer(psb_ipk_) :: res + res = psb_C_get_MultiProcessors() + end function psb_gpu_MultiProcessors + + function psb_gpu_MaxRegistersPerBlock() result(res) + integer(psb_ipk_) :: res + res = psb_C_get_MaxRegistersPerBlock() + end function psb_gpu_MaxRegistersPerBlock + + function psb_gpu_MaxThreadsPerMP() result(res) + integer(psb_ipk_) :: res + res = psb_C_get_MaxThreadsPerMP() + end function psb_gpu_MaxThreadsPerMP + + function psb_gpu_WarpSize() result(res) + integer(psb_ipk_) :: res + res = psb_C_get_WarpSize() + end function psb_gpu_WarpSize + + function psb_gpu_MemoryClockRate() result(res) + integer(psb_ipk_) :: res + res = psb_C_get_MemoryClockRate() + end function psb_gpu_MemoryClockRate + + function psb_gpu_MemoryBusWidth() result(res) + integer(psb_ipk_) :: res + res = psb_C_get_MemoryBusWidth() + end function psb_gpu_MemoryBusWidth + + function psb_gpu_MemoryPeakBandwidth() result(res) + real(psb_dpk_) :: res + ! Formula here: 2*ClockRate(KHz)*BusWidth(bit) + ! normalization: bit/byte, KHz/MHz + ! output: MBytes/s + res = 2.d0*0.125d0*1.d-3*psb_C_get_MemoryBusWidth()*psb_C_get_MemoryClockRate() + end function psb_gpu_MemoryPeakBandwidth + + function psb_gpu_DeviceName() result(res) + character(len=256) :: res + character :: cstring(256) + call psb_C_cpy_NameString(cstring) + call stringc2f(cstring,res) + end function psb_gpu_DeviceName + + + subroutine stringc2f(cstring,fstring) + character(c_char) :: cstring(*) + character(len=*) :: fstring + integer :: i + + i = 1 + do + if (cstring(i) == c_null_char) exit + if (i > len(fstring)) exit + fstring(i:i) = cstring(i) + i = i + 1 + end do + do + if (i > len(fstring)) exit + fstring(i:i) = " " + i = i + 1 + end do + return + end subroutine stringc2f + +end module psb_gpu_env_mod diff --git a/gpu/psb_gpu_mod.F90 b/gpu/psb_gpu_mod.F90 new file mode 100644 index 00000000..7eba8062 --- /dev/null +++ b/gpu/psb_gpu_mod.F90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_gpu_mod + use psb_const_mod + use psb_gpu_env_mod + + use psb_i_gpu_vect_mod + use psb_s_gpu_vect_mod + use psb_d_gpu_vect_mod + use psb_c_gpu_vect_mod + use psb_z_gpu_vect_mod + + use psb_i_gpu_multivect_mod + use psb_s_gpu_multivect_mod + use psb_d_gpu_multivect_mod + use psb_c_gpu_multivect_mod + use psb_z_gpu_multivect_mod + + use psb_d_ell_mat_mod + use psb_d_elg_mat_mod + use psb_s_ell_mat_mod + use psb_s_elg_mat_mod + use psb_z_ell_mat_mod + use psb_z_elg_mat_mod + use psb_c_ell_mat_mod + use psb_c_elg_mat_mod + + use psb_s_hll_mat_mod + use psb_s_hlg_mat_mod + use psb_d_hll_mat_mod + use psb_d_hlg_mat_mod + use psb_c_hll_mat_mod + use psb_c_hlg_mat_mod + use psb_z_hll_mat_mod + use psb_z_hlg_mat_mod + + use psb_s_csrg_mat_mod + use psb_d_csrg_mat_mod + use psb_c_csrg_mat_mod + use psb_z_csrg_mat_mod +#if CUDA_SHORT_VERSION <= 10 + use psb_s_hybg_mat_mod + use psb_d_hybg_mat_mod + use psb_c_hybg_mat_mod + use psb_z_hybg_mat_mod +#endif + use psb_d_diag_mat_mod + use psb_d_hdiag_mat_mod + + use psb_s_dnsg_mat_mod + use psb_d_dnsg_mat_mod + use psb_c_dnsg_mat_mod + use psb_z_dnsg_mat_mod + + use psb_s_hdiag_mat_mod + ! use psb_s_diag_mat_mod + +end module psb_gpu_mod + diff --git a/gpu/psb_i_csrg_mat_mod.F90 b/gpu/psb_i_csrg_mat_mod.F90 new file mode 100644 index 00000000..de25370f --- /dev/null +++ b/gpu/psb_i_csrg_mat_mod.F90 @@ -0,0 +1,393 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_csrg_mat_mod + + use iso_c_binding + use psb_i_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_i_csr_sparse_mat) :: psb_i_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(i_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => i_csrg_get_fmt + procedure, pass(a) :: sizeof => i_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_i_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_i_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_i_csrg_csmm + procedure, pass(a) :: csmv => psb_i_csrg_csmv + procedure, pass(a) :: scals => psb_i_csrg_scals + procedure, pass(a) :: scalv => psb_i_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_i_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_mv_csrg_from_fmt + procedure, pass(a) :: free => i_csrg_free + procedure, pass(a) :: mold => psb_i_csrg_mold + procedure, pass(a) :: is_host => i_csrg_is_host + procedure, pass(a) :: is_dev => i_csrg_is_dev + procedure, pass(a) :: is_sync => i_csrg_is_sync + procedure, pass(a) :: set_host => i_csrg_set_host + procedure, pass(a) :: set_dev => i_csrg_set_dev + procedure, pass(a) :: set_sync => i_csrg_set_sync + procedure, pass(a) :: sync => i_csrg_sync + procedure, pass(a) :: to_gpu => psb_i_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_i_csrg_from_gpu + final :: i_csrg_finalize +#else + contains + procedure, pass(a) :: mold => psb_i_csrg_mold +#endif + end type psb_i_csrg_sparse_mat + +#ifdef HAVE_SPGPU + private :: i_csrg_get_nzeros, i_csrg_free, i_csrg_get_fmt, & + & i_csrg_get_size, i_csrg_sizeof, i_csrg_get_nz_row + + + interface + subroutine psb_i_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_i_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_csrg_vect_mv + end interface + + interface + subroutine psb_i_csrg_reallocate_nz(nz,a) + import :: psb_i_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_i_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_i_csrg_reallocate_nz + end interface + + interface + subroutine psb_i_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_i_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_i_csrg_allocate_mnnz + end interface + + interface + subroutine psb_i_csrg_mold(a,b,info) + import :: psb_i_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_csrg_mold + end interface + + interface + subroutine psb_i_csrg_to_gpu(a,info, nzrm) + import :: psb_i_csrg_sparse_mat, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_i_csrg_to_gpu + end interface + + interface + subroutine psb_i_csrg_from_gpu(a,info) + import :: psb_i_csrg_sparse_mat, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_csrg_from_gpu + end interface + + interface + subroutine psb_i_cp_csrg_from_coo(a,b,info) + import :: psb_i_csrg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_csrg_from_coo + end interface + + interface + subroutine psb_i_cp_csrg_from_fmt(a,b,info) + import :: psb_i_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_csrg_from_fmt + end interface + + interface + subroutine psb_i_mv_csrg_from_coo(a,b,info) + import :: psb_i_csrg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_csrg_from_coo + end interface + + interface + subroutine psb_i_mv_csrg_from_fmt(a,b,info) + import :: psb_i_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_csrg_from_fmt + end interface + + interface + subroutine psb_i_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_csrg_csmv + end interface + interface + subroutine psb_i_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) + integer(psb_ipk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_csrg_csmm + end interface + + interface + subroutine psb_i_csrg_scal(d,a,info,side) + import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_i_csrg_scal + end interface + + interface + subroutine psb_i_csrg_scals(d,a,info) + import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function i_csrg_sizeof(a) result(res) + implicit none + class(psb_i_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_int * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function i_csrg_sizeof + + function i_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function i_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine i_csrg_set_host(a) + implicit none + class(psb_i_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine i_csrg_set_host + + subroutine i_csrg_set_dev(a) + implicit none + class(psb_i_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine i_csrg_set_dev + + subroutine i_csrg_set_sync(a) + implicit none + class(psb_i_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine i_csrg_set_sync + + function i_csrg_is_dev(a) result(res) + implicit none + class(psb_i_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function i_csrg_is_dev + + function i_csrg_is_host(a) result(res) + implicit none + class(psb_i_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function i_csrg_is_host + + function i_csrg_is_sync(a) result(res) + implicit none + class(psb_i_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function i_csrg_is_sync + + + subroutine i_csrg_sync(a) + implicit none + class(psb_i_csrg_sparse_mat), target, intent(in) :: a + class(psb_i_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine i_csrg_sync + + subroutine i_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_i_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_i_csr_sparse_mat%free() + + return + + end subroutine i_csrg_free + + subroutine i_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_i_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine i_csrg_finalize + +#else + interface + subroutine psb_i_csrg_mold(a,b,info) + import :: psb_i_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_csrg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_csrg_mold + end interface + +#endif + +end module psb_i_csrg_mat_mod diff --git a/gpu/psb_i_diag_mat_mod.F90 b/gpu/psb_i_diag_mat_mod.F90 new file mode 100644 index 00000000..3559c09a --- /dev/null +++ b/gpu/psb_i_diag_mat_mod.F90 @@ -0,0 +1,308 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_diag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_i_dia_mat_mod + + type, extends(psb_i_dia_sparse_mat) :: psb_i_diag_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => i_diag_get_fmt + procedure, pass(a) :: sizeof => i_diag_sizeof + procedure, pass(a) :: vect_mv => psb_i_diag_vect_mv +! procedure, pass(a) :: csmm => psb_i_diag_csmm + procedure, pass(a) :: csmv => psb_i_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_i_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_i_diag_scals +! procedure, pass(a) :: scalv => psb_i_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_i_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_i_diag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_i_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_i_mv_diag_from_fmt + procedure, pass(a) :: free => i_diag_free + procedure, pass(a) :: mold => psb_i_diag_mold + procedure, pass(a) :: to_gpu => psb_i_diag_to_gpu + final :: i_diag_finalize +#else + contains + procedure, pass(a) :: mold => psb_i_diag_mold +#endif + end type psb_i_diag_sparse_mat + +#ifdef HAVE_SPGPU + private :: i_diag_get_nzeros, i_diag_free, i_diag_get_fmt, & + & i_diag_get_size, i_diag_sizeof, i_diag_get_nz_row + + + interface + subroutine psb_i_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_diag_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_diag_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_diag_vect_mv + end interface + + interface + subroutine psb_i_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_i_diag_sparse_mat, psb_ipk_, psb_i_base_vect_type + class(psb_i_diag_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_diag_inner_vect_sv + end interface + + interface + subroutine psb_i_diag_reallocate_nz(nz,a) + import :: psb_i_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_i_diag_sparse_mat), intent(inout) :: a + end subroutine psb_i_diag_reallocate_nz + end interface + + interface + subroutine psb_i_diag_allocate_mnnz(m,n,a,nz) + import :: psb_i_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_i_diag_allocate_mnnz + end interface + + interface + subroutine psb_i_diag_mold(a,b,info) + import :: psb_i_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_diag_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_diag_mold + end interface + + interface + subroutine psb_i_diag_to_gpu(a,info, nzrm) + import :: psb_i_diag_sparse_mat, psb_ipk_ + class(psb_i_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_i_diag_to_gpu + end interface + + interface + subroutine psb_i_cp_diag_from_coo(a,b,info) + import :: psb_i_diag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_diag_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_diag_from_coo + end interface + + interface + subroutine psb_i_cp_diag_from_fmt(a,b,info) + import :: psb_i_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_diag_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_diag_from_fmt + end interface + + interface + subroutine psb_i_mv_diag_from_coo(a,b,info) + import :: psb_i_diag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_diag_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_diag_from_coo + end interface + + + interface + subroutine psb_i_mv_diag_from_fmt(a,b,info) + import :: psb_i_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_diag_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_diag_from_fmt + end interface + + interface + subroutine psb_i_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_diag_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_diag_csmv + end interface + interface + subroutine psb_i_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_diag_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) + integer(psb_ipk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_diag_csmm + end interface + + interface + subroutine psb_i_diag_scal(d,a,info, side) + import :: psb_i_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_i_diag_scal + end interface + + interface + subroutine psb_i_diag_scals(d,a,info) + import :: psb_i_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_diag_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function i_diag_sizeof(a) result(res) + implicit none + class(psb_i_diag_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = 8 + res = res + psb_sizeof_int * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function i_diag_sizeof + + function i_diag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIAG' + end function i_diag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine i_diag_free(a) + use diagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_i_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_i_dia_sparse_mat%free() + + return + + end subroutine i_diag_free + + subroutine i_diag_finalize(a) + use diagdev_mod + implicit none + type(psb_i_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine i_diag_finalize + +#else + + interface + subroutine psb_i_diag_mold(a,b,info) + import :: psb_i_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_diag_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_diag_mold + end interface + +#endif + +end module psb_i_diag_mat_mod diff --git a/gpu/psb_i_dnsg_mat_mod.F90 b/gpu/psb_i_dnsg_mat_mod.F90 new file mode 100644 index 00000000..978996ae --- /dev/null +++ b/gpu/psb_i_dnsg_mat_mod.F90 @@ -0,0 +1,294 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_dnsg_mat_mod + + use iso_c_binding + use psb_i_mat_mod + use psb_i_dns_mat_mod + use dnsdev_mod + + type, extends(psb_i_dns_sparse_mat) :: psb_i_dnsg_sparse_mat + ! + ! ITPACK/DNS format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of DNS, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => i_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => i_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_i_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_i_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_i_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_i_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_i_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_i_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_i_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_i_dnsg_allocate_mnnz + ! Note: we *do* need the TO methods, because of the need to invoke SYNC + ! + procedure, pass(a) :: cp_from_coo => psb_i_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_mv_dnsg_from_fmt + procedure, pass(a) :: free => i_dnsg_free + procedure, pass(a) :: mold => psb_i_dnsg_mold + procedure, pass(a) :: to_gpu => psb_i_dnsg_to_gpu + final :: i_dnsg_finalize +#else + contains + procedure, pass(a) :: mold => psb_i_dnsg_mold +#endif + end type psb_i_dnsg_sparse_mat + +#ifdef HAVE_SPGPU + private :: i_dnsg_get_nzeros, i_dnsg_free, i_dnsg_get_fmt, & + & i_dnsg_get_size, i_dnsg_get_nz_row + + + interface + subroutine psb_i_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_dnsg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_dnsg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_dnsg_vect_mv + end interface +!!$ +!!$ interface +!!$ subroutine psb_i_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_i_dnsg_sparse_mat, psb_ipk_, psb_i_base_vect_type +!!$ class(psb_i_dnsg_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: alpha, beta +!!$ class(psb_i_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_i_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_i_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_i_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_i_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_i_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_i_dnsg_allocate_mnnz +!!$ end interface + + interface + subroutine psb_i_dnsg_mold(a,b,info) + import :: psb_i_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_dnsg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_dnsg_mold + end interface + + interface + subroutine psb_i_dnsg_to_gpu(a,info) + import :: psb_i_dnsg_sparse_mat, psb_ipk_ + class(psb_i_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_dnsg_to_gpu + end interface + + interface + subroutine psb_i_cp_dnsg_from_coo(a,b,info) + import :: psb_i_dnsg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_dnsg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_dnsg_from_coo + end interface + + interface + subroutine psb_i_cp_dnsg_from_fmt(a,b,info) + import :: psb_i_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_dnsg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_dnsg_from_fmt + end interface + + interface + subroutine psb_i_mv_dnsg_from_coo(a,b,info) + import :: psb_i_dnsg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_dnsg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_dnsg_from_coo + end interface + + + interface + subroutine psb_i_mv_dnsg_from_fmt(a,b,info) + import :: psb_i_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_dnsg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_dnsg_from_fmt + end interface + +!!$ interface +!!$ subroutine psb_i_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_dnsg_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: alpha, beta, x(:) +!!$ integer(psb_ipk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_i_dnsg_csmv +!!$ end interface +!!$ interface +!!$ subroutine psb_i_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_dnsg_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) +!!$ integer(psb_ipk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_i_dnsg_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_dnsg_scal(d,a,info, side) +!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_i_dnsg_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_dnsg_scals(d,a,info) +!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_i_dnsg_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + + function i_dnsg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNSG' + end function i_dnsg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine i_dnsg_free(a) + use dnsdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_i_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_i_dns_sparse_mat%free() + + return + + end subroutine i_dnsg_free + + subroutine i_dnsg_finalize(a) + use dnsdev_mod + implicit none + type(psb_i_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine i_dnsg_finalize + +#else + + interface + subroutine psb_i_dnsg_mold(a,b,info) + import :: psb_i_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_dnsg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_dnsg_mold + end interface + +#endif + +end module psb_i_dnsg_mat_mod diff --git a/gpu/psb_i_elg_mat_mod.F90 b/gpu/psb_i_elg_mat_mod.F90 new file mode 100644 index 00000000..afc71662 --- /dev/null +++ b/gpu/psb_i_elg_mat_mod.F90 @@ -0,0 +1,483 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_elg_mat_mod + + use iso_c_binding + use psb_i_mat_mod + use psb_i_ell_mat_mod + use psb_i_gpu_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_i_ell_sparse_mat) :: psb_i_elg_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of ELL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => i_elg_get_fmt + procedure, pass(a) :: sizeof => i_elg_sizeof + procedure, pass(a) :: vect_mv => psb_i_elg_vect_mv + procedure, pass(a) :: csmm => psb_i_elg_csmm + procedure, pass(a) :: csmv => psb_i_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_i_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_i_elg_scals + procedure, pass(a) :: scalv => psb_i_elg_scal + procedure, pass(a) :: reallocate_nz => psb_i_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_elg_allocate_mnnz + procedure, pass(a) :: reinit => i_elg_reinit + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_mv_elg_from_fmt + procedure, pass(a) :: free => i_elg_free + procedure, pass(a) :: mold => psb_i_elg_mold + procedure, pass(a) :: csput_a => psb_i_elg_csput_a + procedure, pass(a) :: csput_v => psb_i_elg_csput_v + procedure, pass(a) :: is_host => i_elg_is_host + procedure, pass(a) :: is_dev => i_elg_is_dev + procedure, pass(a) :: is_sync => i_elg_is_sync + procedure, pass(a) :: set_host => i_elg_set_host + procedure, pass(a) :: set_dev => i_elg_set_dev + procedure, pass(a) :: set_sync => i_elg_set_sync + procedure, pass(a) :: sync => i_elg_sync + procedure, pass(a) :: from_gpu => psb_i_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_i_elg_to_gpu + procedure, pass(a) :: asb => psb_i_elg_asb + final :: i_elg_finalize +#else + contains + procedure, pass(a) :: mold => psb_i_elg_mold + procedure, pass(a) :: asb => psb_i_elg_asb +#endif + end type psb_i_elg_sparse_mat + +#ifdef HAVE_SPGPU + private :: i_elg_get_nzeros, i_elg_free, i_elg_get_fmt, & + & i_elg_get_size, i_elg_sizeof, i_elg_get_nz_row, i_elg_sync + + + interface + subroutine psb_i_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_elg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_elg_vect_mv + end interface + + interface + subroutine psb_i_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_i_elg_sparse_mat, psb_ipk_, psb_i_base_vect_type + class(psb_i_elg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_elg_inner_vect_sv + end interface + + interface + subroutine psb_i_elg_reallocate_nz(nz,a) + import :: psb_i_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_i_elg_sparse_mat), intent(inout) :: a + end subroutine psb_i_elg_reallocate_nz + end interface + + interface + subroutine psb_i_elg_allocate_mnnz(m,n,a,nz) + import :: psb_i_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_i_elg_allocate_mnnz + end interface + + interface + subroutine psb_i_elg_mold(a,b,info) + import :: psb_i_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_elg_mold + end interface + + interface + subroutine psb_i_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_i_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_elg_csput_a + end interface + + interface + subroutine psb_i_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_i_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_i_base_vect_type,& + & psb_i_base_vect_type + class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_elg_csput_v + end interface + + interface + subroutine psb_i_elg_from_gpu(a,info) + import :: psb_i_elg_sparse_mat, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_elg_from_gpu + end interface + + interface + subroutine psb_i_elg_to_gpu(a,info, nzrm) + import :: psb_i_elg_sparse_mat, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_i_elg_to_gpu + end interface + + interface + subroutine psb_i_cp_elg_from_coo(a,b,info) + import :: psb_i_elg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_elg_from_coo + end interface + + interface + subroutine psb_i_cp_elg_from_fmt(a,b,info) + import :: psb_i_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_elg_from_fmt + end interface + + interface + subroutine psb_i_mv_elg_from_coo(a,b,info) + import :: psb_i_elg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_elg_from_coo + end interface + + + interface + subroutine psb_i_mv_elg_from_fmt(a,b,info) + import :: psb_i_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_elg_from_fmt + end interface + + interface + subroutine psb_i_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_elg_csmv + end interface + interface + subroutine psb_i_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) + integer(psb_ipk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_elg_csmm + end interface + + interface + subroutine psb_i_elg_scal(d,a,info, side) + import :: psb_i_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_i_elg_scal + end interface + + interface + subroutine psb_i_elg_scals(d,a,info) + import :: psb_i_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_elg_scals + end interface + + interface + subroutine psb_i_elg_asb(a) + import :: psb_i_elg_sparse_mat + class(psb_i_elg_sparse_mat), intent(inout) :: a + end subroutine psb_i_elg_asb + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function i_elg_sizeof(a) result(res) + implicit none + class(psb_i_elg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_int * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function i_elg_sizeof + + function i_elg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELG' + end function i_elg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + subroutine i_elg_reinit(a,clear) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_i_elg_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + integer(psb_ipk_) :: isz, err_act + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev().or.a%is_sync()) then + if (clear_) call zeroEllDevice(a%deviceMat) + call a%set_dev() + else if (a%is_host()) then + a%val(:,:) = izero + end if + call a%set_upd() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine i_elg_reinit + + subroutine i_elg_free(a) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_i_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_i_ell_sparse_mat%free() + call a%set_sync() + + return + + end subroutine i_elg_free + + subroutine i_elg_sync(a) + implicit none + class(psb_i_elg_sparse_mat), target, intent(in) :: a + class(psb_i_elg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine i_elg_sync + + subroutine i_elg_set_host(a) + implicit none + class(psb_i_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine i_elg_set_host + + subroutine i_elg_set_dev(a) + implicit none + class(psb_i_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine i_elg_set_dev + + subroutine i_elg_set_sync(a) + implicit none + class(psb_i_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine i_elg_set_sync + + function i_elg_is_dev(a) result(res) + implicit none + class(psb_i_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function i_elg_is_dev + + function i_elg_is_host(a) result(res) + implicit none + class(psb_i_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function i_elg_is_host + + function i_elg_is_sync(a) result(res) + implicit none + class(psb_i_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function i_elg_is_sync + + subroutine i_elg_finalize(a) + use elldev_mod + implicit none + type(psb_i_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + return + + end subroutine i_elg_finalize + +#else + + interface + subroutine psb_i_elg_asb(a) + import :: psb_i_elg_sparse_mat + class(psb_i_elg_sparse_mat), intent(inout) :: a + end subroutine psb_i_elg_asb + end interface + + interface + subroutine psb_i_elg_mold(a,b,info) + import :: psb_i_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_elg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_elg_mold + end interface + +#endif + +end module psb_i_elg_mat_mod diff --git a/gpu/psb_i_gpu_vect_mod.F90 b/gpu/psb_i_gpu_vect_mod.F90 new file mode 100644 index 00000000..ca4950a0 --- /dev/null +++ b/gpu/psb_i_gpu_vect_mod.F90 @@ -0,0 +1,1671 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_gpu_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_i_vect_mod +#ifdef HAVE_SPGPU + use psb_gpu_env_mod + use psb_i_vectordev_mod +#endif + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_i_base_vect_type) :: psb_i_vect_gpu +#ifdef HAVE_SPGPU + integer :: state = is_host + type(c_ptr) :: deviceVect = c_null_ptr + integer(c_int), allocatable :: pinned_buffer(:) + type(c_ptr) :: dt_p_buf = c_null_ptr + integer(c_int), allocatable :: buffer(:) + type(c_ptr) :: dt_buf = c_null_ptr + integer :: dt_buf_sz = 0 + type(c_ptr) :: i_buf = c_null_ptr + integer :: i_buf_sz = 0 + contains + procedure, pass(x) :: get_nrows => i_gpu_get_nrows + procedure, nopass :: get_fmt => i_gpu_get_fmt + + procedure, pass(x) :: all => i_gpu_all + procedure, pass(x) :: zero => i_gpu_zero + procedure, pass(x) :: asb_m => i_gpu_asb_m + procedure, pass(x) :: sync => i_gpu_sync + procedure, pass(x) :: sync_space => i_gpu_sync_space + procedure, pass(x) :: bld_x => i_gpu_bld_x + procedure, pass(x) :: bld_mn => i_gpu_bld_mn + procedure, pass(x) :: free => i_gpu_free + procedure, pass(x) :: ins_a => i_gpu_ins_a + procedure, pass(x) :: ins_v => i_gpu_ins_v + procedure, pass(x) :: is_host => i_gpu_is_host + procedure, pass(x) :: is_dev => i_gpu_is_dev + procedure, pass(x) :: is_sync => i_gpu_is_sync + procedure, pass(x) :: set_host => i_gpu_set_host + procedure, pass(x) :: set_dev => i_gpu_set_dev + procedure, pass(x) :: set_sync => i_gpu_set_sync + procedure, pass(x) :: set_scal => i_gpu_set_scal +!!$ procedure, pass(x) :: set_vect => i_gpu_set_vect + procedure, pass(x) :: gthzv_x => i_gpu_gthzv_x + procedure, pass(y) :: sctb => i_gpu_sctb + procedure, pass(y) :: sctb_x => i_gpu_sctb_x + procedure, pass(x) :: gthzbuf => i_gpu_gthzbuf + procedure, pass(y) :: sctb_buf => i_gpu_sctb_buf + procedure, pass(x) :: new_buffer => i_gpu_new_buffer + procedure, nopass :: device_wait => i_gpu_device_wait + procedure, pass(x) :: free_buffer => i_gpu_free_buffer + procedure, pass(x) :: maybe_free_buffer => i_gpu_maybe_free_buffer + + final :: i_gpu_vect_finalize +#endif + end type psb_i_vect_gpu + + public :: psb_i_vect_gpu_ + private :: constructor + interface psb_i_vect_gpu_ + module procedure constructor + end interface psb_i_vect_gpu_ + +contains + + function constructor(x) result(this) + integer(psb_ipk_) :: x(:) + type(psb_i_vect_gpu) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x),info) + + end function constructor + +#ifdef HAVE_SPGPU + + subroutine i_gpu_device_wait() + call psb_cudaSync() + end subroutine i_gpu_device_wait + + subroutine i_gpu_new_buffer(n,x,info) + use psb_realloc_mod + use psb_gpu_env_mod + implicit none + class(psb_i_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + + if (psb_gpu_DeviceHasUVA()) then + if (allocated(x%combuf)) then + if (size(x%combuf) idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_gpu_DeviceHasUVA()) then + ! + ! Only need a sync in this branch; in the others + ! cudamemCpy acts as a sync point. + ! + if (allocated(x%pinned_buffer)) then + if (size(x%pinned_buffer) < n) then + call inner_unregister(x%pinned_buffer) + deallocate(x%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(x%pinned_buffer)) then + allocate(x%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(x%pinned_buffer,x%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + info = igathMultiVecDeviceIntVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_p_buf, 1) + call psb_cudaSync() + y(1:n) = x%pinned_buffer(1:n) + + else + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeInt(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateInt(x%dt_buf,n) + x%dt_buf_sz=n + end if + if (info == 0) & + & info = igathMultiVecDeviceIntVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_buf, 1) + if (info == 0) & + & info = readInt(x%dt_buf,y,n) + + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + + if (x%i_buf_sz < ni) then + if (c_associated(x%i_buf)) then + call freeInt(x%i_buf) + x%i_buf = c_null_ptr + end if + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeInt(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateInt(x%dt_buf,n) + x%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(x%i_buf,ii%v,ni) + if (info == 0) & + & info = igathMultiVecDeviceInt(x%deviceVect,& + & 0, n, i, x%i_buf, 1, x%dt_buf, 1) + if (info == 0) & + & info = readInt(x%dt_buf,y,n) + + end select + + end subroutine i_gpu_gthzv_x + + subroutine i_gpu_gthzbuf(i,n,idx,x) + use psb_gpu_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i,n + class(psb_i_base_vect_type) :: idx + class(psb_i_vect_gpu) :: x + integer :: info, ni + + info = 0 +!!$ write(0,*) 'Starting gth_zbuf' + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'gthzbuf') + return + end if + + select type(ii=> idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_gpu_DeviceHasUVA()) then + info = igathMultiVecDeviceIntVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) + + else + info = igathMultiVecDeviceIntVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_buf, 1) + if (info == 0) & + & info = readInt(i,x%dt_buf,x%combuf(i:),n,1) + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + info = 0 + if (.not.c_associated(x%i_buf)) then + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,x%i_buf,ii%v(i:),n,1) + + if (info == 0) & + & info = igathMultiVecDeviceInt(x%deviceVect,& + & 0, n, i, x%i_buf, i,x%dt_buf, 1) + + if (info == 0) & + & info = readInt(i,x%dt_buf,x%combuf(i:),n,1) + + end select + + end subroutine i_gpu_gthzbuf + + subroutine i_gpu_sctb(n,idx,x,beta,y) + implicit none + !use psb_const_mod + integer(psb_ipk_) :: n, idx(:) + integer(psb_ipk_) :: beta, x(:) + class(psb_i_vect_gpu) :: y + integer(psb_ipk_) :: info + + if (n == 0) return + + if (y%is_dev()) call y%sync() + + call y%psb_i_base_vect_type%sctb(n,idx,x,beta) + call y%set_host() + + end subroutine i_gpu_sctb + + subroutine i_gpu_sctb_x(i,n,idx,x,beta,y) + use psb_gpu_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + integer(psb_ipk_) :: beta, x(:) + class(psb_i_vect_gpu) :: y + integer :: info, ni + + select type(ii=> idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + + ! + if (psb_gpu_DeviceHasUVA()) then + if (allocated(y%pinned_buffer)) then + if (size(y%pinned_buffer) < n) then + call inner_unregister(y%pinned_buffer) + deallocate(y%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(y%pinned_buffer)) then + allocate(y%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(y%pinned_buffer,y%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + y%pinned_buffer(1:n) = x(1:n) + info = iscatMultiVecDeviceIntVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_p_buf, 1,beta) + else + + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeInt(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateInt(y%dt_buf,n) + y%dt_buf_sz=n + end if + info = writeInt(y%dt_buf,x,n) + info = iscatMultiVecDeviceIntVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_buf, 1,beta) + + end if + + class default + ni = size(ii%v) + + if (y%i_buf_sz < ni) then + if (c_associated(y%i_buf)) then + call freeInt(y%i_buf) + y%i_buf = c_null_ptr + end if + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeInt(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateInt(y%dt_buf,n) + y%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(y%i_buf,ii%v(i:i+n-1),n) + info = writeInt(y%dt_buf,x,n) + info = iscatMultiVecDeviceInt(y%deviceVect,& + & 0, n, 1, y%i_buf, 1, y%dt_buf, 1,beta) + + + end select + ! + ! Need a sync here to make sure we are not reallocating + ! the buffers before iscatMulti has finished. + ! + call psb_cudaSync() + call y%set_dev() + + end subroutine i_gpu_sctb_x + + subroutine i_gpu_sctb_buf(i,n,idx,beta,y) + use psi_serial_mod + use psb_gpu_env_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + integer(psb_ipk_) :: beta + class(psb_i_vect_gpu) :: y + integer(psb_ipk_) :: info, ni + +!!$ write(0,*) 'Starting sctb_buf' + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'sctb_buf') + return + end if + + + select type(ii=> idx) + class is (psb_i_vect_gpu) + + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (psb_gpu_DeviceHasUVA()) then + info = iscatMultiVecDeviceIntVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) + else + info = writeInt(i,y%dt_buf,y%combuf(i:),n,1) + info = iscatMultiVecDeviceIntVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_buf, 1,beta) + + end if + + class default + !call y%sct(n,ii%v(i:),x,beta) + ni = size(ii%v) + info = 0 + if (.not.c_associated(y%i_buf)) then + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,y%i_buf,ii%v(i:),n,1) + if (info == 0) & + & info = writeInt(i,y%dt_buf,y%combuf(i:),n,1) + if (info == 0) info = iscatMultiVecDeviceInt(y%deviceVect,& + & 0, n, i, y%i_buf, i, y%dt_buf, 1,beta) + end select +!!$ write(0,*) 'Done sctb_buf' + + end subroutine i_gpu_sctb_buf + + + subroutine i_gpu_bld_x(x,this) + use psb_base_mod + integer(psb_ipk_), intent(in) :: this(:) + class(psb_i_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this),x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'i_gpu_bld_x',& + & i_err=(/size(this),izero,izero,izero,izero/)) + end if + x%v(:) = this(:) + call x%set_host() + call x%sync() + + end subroutine i_gpu_bld_x + + subroutine i_gpu_bld_mn(x,n) + integer(psb_mpk_), intent(in) :: n + class(psb_i_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n,info) + if (info /= 0) then + call psb_errpush(info,'i_gpu_bld_n',i_err=(/n,n,n,n,n/)) + end if + + end subroutine i_gpu_bld_mn + + subroutine i_gpu_set_host(x) + implicit none + class(psb_i_vect_gpu), intent(inout) :: x + + x%state = is_host + end subroutine i_gpu_set_host + + subroutine i_gpu_set_dev(x) + implicit none + class(psb_i_vect_gpu), intent(inout) :: x + + x%state = is_dev + end subroutine i_gpu_set_dev + + subroutine i_gpu_set_sync(x) + implicit none + class(psb_i_vect_gpu), intent(inout) :: x + + x%state = is_sync + end subroutine i_gpu_set_sync + + function i_gpu_is_dev(x) result(res) + implicit none + class(psb_i_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function i_gpu_is_dev + + function i_gpu_is_host(x) result(res) + implicit none + class(psb_i_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function i_gpu_is_host + + function i_gpu_is_sync(x) result(res) + implicit none + class(psb_i_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function i_gpu_is_sync + + + function i_gpu_get_nrows(x) result(res) + implicit none + class(psb_i_vect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = 0 + if (allocated(x%v)) res = size(x%v) + end function i_gpu_get_nrows + + function i_gpu_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'iGPU' + end function i_gpu_get_fmt + + subroutine i_gpu_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_i_vect_gpu), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n,x%v,info) + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'i_gpu_all',& + & i_err=(/n,n,n,n,n/)) + end if + end subroutine i_gpu_all + + subroutine i_gpu_zero(x) + use psi_serial_mod + implicit none + class(psb_i_vect_gpu), intent(inout) :: x + + if (allocated(x%v)) x%v=izero + call x%set_host() + end subroutine i_gpu_zero + + subroutine i_gpu_asb_m(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_i_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + if (x%is_dev()) then + nd = getMultiVecDeviceSize(x%deviceVect) + if (nd < n) then + call x%sync() + call x%psb_i_base_vect_type%asb(n,info) + if (info == psb_success_) call x%sync_space(info) + call x%set_host() + end if + else ! + if (x%get_nrows() size(x%v)).or.(n > x%get_nrows())) then +!!$ write(0,*) 'Incoherent situation : sizes',n,size(x%v),x%get_nrows() + call psb_realloc(n,x%v,info) + end if + info = readMultiVecDevice(x%deviceVect,x%v) + end if + if (info == 0) call x%set_sync() + if (info /= 0) then + info=psb_err_internal_error_ + call psb_errpush(info,'i_gpu_sync') + end if + + end subroutine i_gpu_sync + + subroutine i_gpu_free(x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_i_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) deallocate(x%v, stat=info) + if (c_associated(x%deviceVect)) then +!!$ write(0,*)'d_gpu_free Calling freeMultiVecDevice' + call freeMultiVecDevice(x%deviceVect) + x%deviceVect=c_null_ptr + end if + call x%free_buffer(info) + call x%set_sync() + end subroutine i_gpu_free + + subroutine i_gpu_set_scal(x,val,first,last) + class(psb_i_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: info, first_, last_ + + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1,first) + if (present(last)) last_ = min(last,last_) + + if (x%is_host()) call x%sync() + info = setScalDevice(val,first_,last_,1,x%deviceVect) + call x%set_dev() + + end subroutine i_gpu_set_scal +!!$ +!!$ subroutine i_gpu_set_vect(x,val) +!!$ class(psb_i_vect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: val(:) +!!$ integer(psb_ipk_) :: nr +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_i_base_vect_type%set_vect(val) +!!$ call x%set_host() +!!$ +!!$ end subroutine i_gpu_set_vect + + + + subroutine i_gpu_vect_finalize(x) + use psi_serial_mod + use psb_realloc_mod + implicit none + type(psb_i_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + info = 0 + call x%free(info) + end subroutine i_gpu_vect_finalize + + subroutine i_gpu_ins_v(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_i_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_i_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_gpu + + info = 0 + if (psb_errstatus_fatal()) return + + done_gpu = .false. + select type(virl => irl) + class is (psb_i_vect_gpu) + select type(vval => val) + class is (psb_i_vect_gpu) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + info = geinsMultiVecDeviceInt(n,virl%deviceVect,& + & vval%deviceVect,dupl,1,x%deviceVect) + call x%set_dev() + done_gpu=.true. + end select + end select + + if (.not.done_gpu) then + if (irl%is_dev()) call irl%sync() + if (val%is_dev()) call val%sync() + call x%ins(n,irl%v,val%v,dupl,info) + end if + + if (info /= 0) then + call psb_errpush(info,'gpu_vect_ins') + return + end if + + end subroutine i_gpu_ins_v + + subroutine i_gpu_ins_a(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_i_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + integer(psb_ipk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_i_base_vect_type%ins(n,irl,val,dupl,info) + call x%set_host() + + end subroutine i_gpu_ins_a + +#endif + +end module psb_i_gpu_vect_mod + + +! +! Multivectors +! + + + +module psb_i_gpu_multivect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_i_multivect_mod + use psb_i_base_multivect_mod + +#ifdef HAVE_SPGPU + use psb_i_vectordev_mod +#endif + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_i_base_multivect_type) :: psb_i_multivect_gpu +#ifdef HAVE_SPGPU + + integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 + type(c_ptr) :: deviceVect = c_null_ptr + real(c_double), allocatable :: buffer(:,:) + type(c_ptr) :: dt_buf = c_null_ptr + contains + procedure, pass(x) :: get_nrows => i_gpu_multi_get_nrows + procedure, pass(x) :: get_ncols => i_gpu_multi_get_ncols + procedure, nopass :: get_fmt => i_gpu_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => i_gpu_multi_dot_v +!!$ procedure, pass(x) :: dot_a => i_gpu_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => i_gpu_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => i_gpu_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => i_gpu_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => i_gpu_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => i_gpu_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => i_gpu_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => i_gpu_multi_scal +!!$ procedure, pass(x) :: nrm2 => i_gpu_multi_nrm2 +!!$ procedure, pass(x) :: amax => i_gpu_multi_amax +!!$ procedure, pass(x) :: asum => i_gpu_multi_asum + procedure, pass(x) :: all => i_gpu_multi_all + procedure, pass(x) :: zero => i_gpu_multi_zero + procedure, pass(x) :: asb => i_gpu_multi_asb + procedure, pass(x) :: sync => i_gpu_multi_sync + procedure, pass(x) :: sync_space => i_gpu_multi_sync_space + procedure, pass(x) :: bld_x => i_gpu_multi_bld_x + procedure, pass(x) :: bld_n => i_gpu_multi_bld_n + procedure, pass(x) :: free => i_gpu_multi_free + procedure, pass(x) :: ins => i_gpu_multi_ins + procedure, pass(x) :: is_host => i_gpu_multi_is_host + procedure, pass(x) :: is_dev => i_gpu_multi_is_dev + procedure, pass(x) :: is_sync => i_gpu_multi_is_sync + procedure, pass(x) :: set_host => i_gpu_multi_set_host + procedure, pass(x) :: set_dev => i_gpu_multi_set_dev + procedure, pass(x) :: set_sync => i_gpu_multi_set_sync + procedure, pass(x) :: set_scal => i_gpu_multi_set_scal + procedure, pass(x) :: set_vect => i_gpu_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => i_gpu_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => i_gpu_multi_sctb +!!$ procedure, pass(y) :: sctb_x => i_gpu_multi_sctb_x + final :: i_gpu_multi_vect_finalize +#endif + end type psb_i_multivect_gpu + + public :: psb_i_multivect_gpu + private :: constructor + interface psb_i_multivect_gpu + module procedure constructor + end interface + +contains + + function constructor(x) result(this) + integer(psb_ipk_) :: x(:,:) + type(psb_i_multivect_gpu) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x,1),size(x,2),info) + + end function constructor + +#ifdef HAVE_SPGPU + +!!$ subroutine i_gpu_multi_gthzv_x(i,n,idx,x,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i,n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ integer(psb_ipk_) :: y(:) +!!$ class(psb_i_multivect_gpu) :: x +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_gpu) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (x%is_host()) call x%sync() +!!$ +!!$ if (allocated(x%buffer)) then +!!$ if (size(x%buffer) < n) then +!!$ call inner_unregister(x%buffer) +!!$ deallocate(x%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(x%buffer)) then +!!$ allocate(x%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(x%buffer,x%dt_buf) +!!$ endif +!!$ info = igathMultiVecDeviceDouble(x%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, x%dt_buf, 1) +!!$ call psb_cudaSync() +!!$ y(1:n) = x%buffer(1:n) +!!$ +!!$ class default +!!$ call x%gth(n,ii%v(i:),y) +!!$ end select +!!$ +!!$ +!!$ end subroutine i_gpu_multi_gthzv_x +!!$ +!!$ +!!$ +!!$ subroutine i_gpu_multi_sctb(n,idx,x,beta,y) +!!$ implicit none +!!$ !use psb_const_mod +!!$ integer(psb_ipk_) :: n, idx(:) +!!$ integer(psb_ipk_) :: beta, x(:) +!!$ class(psb_i_multivect_gpu) :: y +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (n == 0) return +!!$ +!!$ if (y%is_dev()) call y%sync() +!!$ +!!$ call y%psb_i_base_multivect_type%sctb(n,idx,x,beta) +!!$ call y%set_host() +!!$ +!!$ end subroutine i_gpu_multi_sctb +!!$ +!!$ subroutine i_gpu_multi_sctb_x(i,n,idx,x,beta,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ integer(psb_ipk_) :: beta, x(:) +!!$ class(psb_i_multivect_gpu) :: y +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_gpu) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (y%is_host()) call y%sync() +!!$ +!!$ if (allocated(y%buffer)) then +!!$ if (size(y%buffer) < n) then +!!$ call inner_unregister(y%buffer) +!!$ deallocate(y%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(y%buffer)) then +!!$ allocate(y%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(y%buffer,y%dt_buf) +!!$ endif +!!$ y%buffer(1:n) = x(1:n) +!!$ info = iscatMultiVecDeviceDouble(y%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, y%dt_buf, 1,beta) +!!$ +!!$ call y%set_dev() +!!$ call psb_cudaSync() +!!$ +!!$ class default +!!$ call y%sct(n,ii%v(i:),x,beta) +!!$ end select +!!$ +!!$ end subroutine i_gpu_multi_sctb_x + + + subroutine i_gpu_multi_bld_x(x,this) + use psb_base_mod + integer(psb_ipk_), intent(in) :: this(:,:) + class(psb_i_multivect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info, m, n + + m=size(this,1) + n=size(this,2) + x%m_nrows = m + x%m_ncols = n + call psb_realloc(m,n,x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'i_gpu_multi_bld_x',& + & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) + end if + x%v(1:m,1:n) = this(1:m,1:n) + call x%set_host() + call x%sync() + + end subroutine i_gpu_multi_bld_x + + subroutine i_gpu_multi_bld_n(x,m,n) + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_multivect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(m,n,info) + if (info /= 0) then + call psb_errpush(info,'i_gpu_multi_bld_n',i_err=(/m,n,n,n,n/)) + end if + + end subroutine i_gpu_multi_bld_n + + + subroutine i_gpu_multi_set_host(x) + implicit none + class(psb_i_multivect_gpu), intent(inout) :: x + + x%state = is_host + end subroutine i_gpu_multi_set_host + + subroutine i_gpu_multi_set_dev(x) + implicit none + class(psb_i_multivect_gpu), intent(inout) :: x + + x%state = is_dev + end subroutine i_gpu_multi_set_dev + + subroutine i_gpu_multi_set_sync(x) + implicit none + class(psb_i_multivect_gpu), intent(inout) :: x + + x%state = is_sync + end subroutine i_gpu_multi_set_sync + + function i_gpu_multi_is_dev(x) result(res) + implicit none + class(psb_i_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function i_gpu_multi_is_dev + + function i_gpu_multi_is_host(x) result(res) + implicit none + class(psb_i_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function i_gpu_multi_is_host + + function i_gpu_multi_is_sync(x) result(res) + implicit none + class(psb_i_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function i_gpu_multi_is_sync + + + function i_gpu_multi_get_nrows(x) result(res) + implicit none + class(psb_i_multivect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_nrows + + end function i_gpu_multi_get_nrows + + function i_gpu_multi_get_ncols(x) result(res) + implicit none + class(psb_i_multivect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_ncols + + end function i_gpu_multi_get_ncols + + function i_gpu_multi_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'iGPU' + end function i_gpu_multi_get_fmt + +!!$ function i_gpu_multi_dot_v(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ class(psb_i_base_multivect_type), intent(inout) :: y +!!$ integer(psb_ipk_), intent(in) :: n +!!$ integer(psb_ipk_) :: res +!!$ integer(psb_ipk_), external :: ddot +!!$ integer(psb_ipk_) :: info +!!$ +!!$ res = dzero +!!$ ! +!!$ ! Note: this is the gpu implementation. +!!$ ! When we get here, we are sure that X is of +!!$ ! TYPE psb_i_vect +!!$ ! +!!$ select type(yy => y) +!!$ type is (psb_i_base_multivect_type) +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,x%v,1,yy%v,1) +!!$ type is (psb_i_multivect_gpu) +!!$ if (x%is_host()) call x%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) +!!$ if (info /= 0) then +!!$ info = psb_err_internal_error_ +!!$ call psb_errpush(info,'i_gpu_multi_dot_v') +!!$ end if +!!$ +!!$ class default +!!$ ! y%sync is done in dot_a +!!$ call x%sync() +!!$ res = y%dot(n,x%v) +!!$ end select +!!$ +!!$ end function i_gpu_multi_dot_v +!!$ +!!$ function i_gpu_multi_dot_a(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: y(:) +!!$ integer(psb_ipk_), intent(in) :: n +!!$ integer(psb_ipk_) :: res +!!$ integer(psb_ipk_), external :: ddot +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,y,1,x%v,1) +!!$ +!!$ end function i_gpu_multi_dot_a +!!$ +!!$ subroutine i_gpu_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ class(psb_i_base_multivect_type), intent(inout) :: x +!!$ class(psb_i_multivect_gpu), intent(inout) :: y +!!$ integer(psb_ipk_), intent (in) :: alpha, beta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: nx, ny +!!$ +!!$ info = psb_success_ +!!$ +!!$ select type(xx => x) +!!$ type is (psb_i_base_multivect_type) +!!$ if ((beta /= dzero).and.(y%is_dev()))& +!!$ & call y%sync() +!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) +!!$ call y%set_host() +!!$ type is (psb_i_multivect_gpu) +!!$ ! Do something different here +!!$ if ((beta /= dzero).and.y%is_host())& +!!$ & call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ nx = getMultiVecDeviceSize(xx%deviceVect) +!!$ ny = getMultiVecDeviceSize(y%deviceVect) +!!$ if ((nx x) +!!$ type is (psb_i_base_multivect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ do i=1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ type is (psb_i_multivect_gpu) +!!$ ! Do something different here +!!$ if (y%is_host()) call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ info = axyMultiVecDevice(n,done,xx%deviceVect,y%deviceVect) +!!$ call y%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call y%mlt(xx%v,info) +!!$ call y%set_host() +!!$ end select +!!$ +!!$ end subroutine i_gpu_multi_mlt_v +!!$ +!!$ subroutine i_gpu_multi_mlt_a(x, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: x(:) +!!$ class(psb_i_multivect_gpu), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ call y%sync() +!!$ call y%psb_i_base_multivect_type%mlt(x,info) +!!$ call y%set_host() +!!$ end subroutine i_gpu_multi_mlt_a +!!$ +!!$ subroutine i_gpu_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: alpha,beta +!!$ integer(psb_ipk_), intent(in) :: x(:) +!!$ integer(psb_ipk_), intent(in) :: y(:) +!!$ class(psb_i_multivect_gpu), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ if (z%is_dev()) call z%sync() +!!$ call z%psb_i_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end subroutine i_gpu_multi_mlt_a_2 +!!$ +!!$ subroutine i_gpu_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ use psi_serial_mod +!!$ use psb_string_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: alpha,beta +!!$ class(psb_i_base_multivect_type), intent(inout) :: x +!!$ class(psb_i_base_multivect_type), intent(inout) :: y +!!$ class(psb_i_multivect_gpu), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character(len=1), intent(in), optional :: conjgx, conjgy +!!$ integer(psb_ipk_) :: i, n +!!$ logical :: conjgx_, conjgy_ +!!$ +!!$ if (.false.) then +!!$ ! These are present just for coherence with the +!!$ ! complex versions; they do nothing here. +!!$ conjgx_=.false. +!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') +!!$ conjgy_=.false. +!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') +!!$ end if +!!$ +!!$ n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) +!!$ +!!$ ! +!!$ ! Need to reconsider BETA in the GPU side +!!$ ! of things. +!!$ ! +!!$ info = 0 +!!$ select type(xx => x) +!!$ type is (psb_i_multivect_gpu) +!!$ select type (yy => y) +!!$ type is (psb_i_multivect_gpu) +!!$ if (xx%is_host()) call xx%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ ! Z state is irrelevant: it will be done on the GPU. +!!$ info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& +!!$ & yy%deviceVect,beta,z%deviceVect) +!!$ call z%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call yy%sync() +!!$ call z%psb_i_base_multivect_type%mlt(alpha,xx,yy,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ +!!$ class default +!!$ call x%sync() +!!$ call y%sync() +!!$ call z%psb_i_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ end subroutine i_gpu_multi_mlt_v_2 + + + subroutine i_gpu_multi_set_scal(x,val) + class(psb_i_multivect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: val + + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_i_base_multivect_type%set_scal(val) + call x%set_host() + end subroutine i_gpu_multi_set_scal + + subroutine i_gpu_multi_set_vect(x,val) + class(psb_i_multivect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: val(:,:) + integer(psb_ipk_) :: nr + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_i_base_multivect_type%set_vect(val) + call x%set_host() + + end subroutine i_gpu_multi_set_vect + + + +!!$ subroutine i_gpu_multi_scal(alpha, x) +!!$ implicit none +!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent (in) :: alpha +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_i_base_multivect_type%scal(alpha) +!!$ call x%set_host() +!!$ end subroutine i_gpu_multi_scal +!!$ +!!$ +!!$ function i_gpu_multi_nrm2(n,x) result(res) +!!$ implicit none +!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ integer(psb_ipk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ ! WARNING: this should be changed. +!!$ if (x%is_host()) call x%sync() +!!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) +!!$ +!!$ end function i_gpu_multi_nrm2 +!!$ +!!$ function i_gpu_multi_amax(n,x) result(res) +!!$ implicit none +!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ integer(psb_ipk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = maxval(abs(x%v(1:n))) +!!$ +!!$ end function i_gpu_multi_amax +!!$ +!!$ function i_gpu_multi_asum(n,x) result(res) +!!$ implicit none +!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ integer(psb_ipk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = sum(abs(x%v(1:n))) +!!$ +!!$ end function i_gpu_multi_asum + + subroutine i_gpu_multi_all(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_multivect_gpu), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(m,n,x%v,info,pad=izero) + x%m_nrows = m + x%m_ncols = n + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'i_gpu_multi_all',& + & i_err=(/m,n,n,n,n/)) + end if + end subroutine i_gpu_multi_all + + subroutine i_gpu_multi_zero(x) + use psi_serial_mod + implicit none + class(psb_i_multivect_gpu), intent(inout) :: x + + if (allocated(x%v)) x%v=dzero + call x%set_host() + end subroutine i_gpu_multi_zero + + subroutine i_gpu_multi_asb(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_multivect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nd, nc + + + x%m_nrows = m + x%m_ncols = n + if (x%is_host()) then + call x%psb_i_base_multivect_type%asb(m,n,info) + if (info == psb_success_) call x%sync_space(info) + else if (x%is_dev()) then + nd = getMultiVecDevicePitch(x%deviceVect) + nc = getMultiVecDeviceCount(x%deviceVect) + if ((nd < m).or.(nc i_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => i_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_i_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_i_hdiag_csmm + procedure, pass(a) :: csmv => psb_i_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_i_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_i_hdiag_scals + ! procedure, pass(a) :: scalv => psb_i_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_i_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_i_hdiag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_i_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_i_mv_hdiag_from_fmt + procedure, pass(a) :: free => i_hdiag_free + procedure, pass(a) :: mold => psb_i_hdiag_mold + procedure, pass(a) :: to_gpu => psb_i_hdiag_to_gpu + final :: i_hdiag_finalize +#else + contains + procedure, pass(a) :: mold => psb_i_hdiag_mold +#endif + end type psb_i_hdiag_sparse_mat + +#ifdef HAVE_SPGPU + private :: i_hdiag_get_nzeros, i_hdiag_free, i_hdiag_get_fmt, & + & i_hdiag_get_size, i_hdiag_sizeof, i_hdiag_get_nz_row + + + interface + subroutine psb_i_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_hdiag_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_hdiag_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_hdiag_vect_mv + end interface + +!!$ interface +!!$ subroutine psb_i_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_i_hdiag_sparse_mat, psb_ipk_, psb_i_base_vect_type +!!$ class(psb_i_hdiag_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: alpha, beta +!!$ class(psb_i_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_i_hdiag_inner_vect_sv +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_i_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_i_hdiag_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_i_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_i_hdiag_allocate_mnnz +!!$ end interface + + interface + subroutine psb_i_hdiag_mold(a,b,info) + import :: psb_i_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_hdiag_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_hdiag_mold + end interface + + interface + subroutine psb_i_hdiag_to_gpu(a,info) + import :: psb_i_hdiag_sparse_mat, psb_ipk_ + class(psb_i_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_hdiag_to_gpu + end interface + + interface + subroutine psb_i_cp_hdiag_from_coo(a,b,info) + import :: psb_i_hdiag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_hdiag_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_hdiag_from_coo + end interface + +!!$ interface +!!$ subroutine psb_i_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_i_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ +!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_i_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_i_cp_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_i_mv_hdiag_from_coo(a,b,info) + import :: psb_i_hdiag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_hdiag_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_hdiag_from_coo + end interface + +!!$ +!!$ interface +!!$ subroutine psb_i_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_i_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ +!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_i_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_i_mv_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_i_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_hdiag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_hdiag_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_hdiag_csmv + end interface + +!!$ interface +!!$ subroutine psb_i_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_i_hdiag_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_hdiag_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) +!!$ integer(psb_ipk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_i_hdiag_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_hdiag_scal(d,a,info, side) +!!$ import :: psb_i_hdiag_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_i_hdiag_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_hdiag_scals(d,a,info) +!!$ import :: psb_i_hdiag_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_i_hdiag_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + function i_hdiag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIAG' + end function i_hdiag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine i_hdiag_free(a) + use hdiagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_i_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_i_hdia_sparse_mat%free() + + return + + end subroutine i_hdiag_free + + subroutine i_hdiag_finalize(a) + use hdiagdev_mod + implicit none + type(psb_i_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_i_hdia_sparse_mat%free() + + return + end subroutine i_hdiag_finalize + +#else + + interface + subroutine psb_i_hdiag_mold(a,b,info) + import :: psb_i_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_hdiag_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_hdiag_mold + end interface + +#endif + +end module psb_i_hdiag_mat_mod diff --git a/gpu/psb_i_hlg_mat_mod.F90 b/gpu/psb_i_hlg_mat_mod.F90 new file mode 100644 index 00000000..92917d47 --- /dev/null +++ b/gpu/psb_i_hlg_mat_mod.F90 @@ -0,0 +1,398 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_hlg_mat_mod + + use iso_c_binding + use psb_i_mat_mod + use psb_i_hll_mat_mod + + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_i_hll_sparse_mat) :: psb_i_hlg_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + integer :: devstate = is_host + + contains + procedure, nopass :: get_fmt => i_hlg_get_fmt + procedure, pass(a) :: sizeof => i_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_i_hlg_vect_mv + procedure, pass(a) :: csmm => psb_i_hlg_csmm + procedure, pass(a) :: csmv => psb_i_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_i_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_i_hlg_scals + procedure, pass(a) :: scalv => psb_i_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_i_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_hlg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_mv_hlg_from_fmt + procedure, pass(a) :: free => i_hlg_free + procedure, pass(a) :: mold => psb_i_hlg_mold + procedure, pass(a) :: is_host => i_hlg_is_host + procedure, pass(a) :: is_dev => i_hlg_is_dev + procedure, pass(a) :: is_sync => i_hlg_is_sync + procedure, pass(a) :: set_host => i_hlg_set_host + procedure, pass(a) :: set_dev => i_hlg_set_dev + procedure, pass(a) :: set_sync => i_hlg_set_sync + procedure, pass(a) :: sync => i_hlg_sync + procedure, pass(a) :: from_gpu => psb_i_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_i_hlg_to_gpu + final :: i_hlg_finalize +#else + contains + procedure, pass(a) :: mold => psb_i_hlg_mold +#endif + end type psb_i_hlg_sparse_mat + +#ifdef HAVE_SPGPU + private :: i_hlg_get_nzeros, i_hlg_free, i_hlg_get_fmt, & + & i_hlg_get_size, i_hlg_sizeof, i_hlg_get_nz_row + + + interface + subroutine psb_i_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_hlg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_hlg_vect_mv + end interface + + interface + subroutine psb_i_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_i_hlg_sparse_mat, psb_ipk_, psb_i_base_vect_type + class(psb_i_hlg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_hlg_inner_vect_sv + end interface + + interface + subroutine psb_i_hlg_reallocate_nz(nz,a) + import :: psb_i_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_i_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_i_hlg_reallocate_nz + end interface + + interface + subroutine psb_i_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_i_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_i_hlg_allocate_mnnz + end interface + + interface + subroutine psb_i_hlg_mold(a,b,info) + import :: psb_i_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_hlg_mold + end interface + + interface + subroutine psb_i_hlg_from_gpu(a,info) + import :: psb_i_hlg_sparse_mat, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_hlg_from_gpu + end interface + + interface + subroutine psb_i_hlg_to_gpu(a,info, nzrm) + import :: psb_i_hlg_sparse_mat, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_i_hlg_to_gpu + end interface + + interface + subroutine psb_i_cp_hlg_from_coo(a,b,info) + import :: psb_i_hlg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_hlg_from_coo + end interface + + interface + subroutine psb_i_cp_hlg_from_fmt(a,b,info) + import :: psb_i_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_hlg_from_fmt + end interface + + interface + subroutine psb_i_mv_hlg_from_coo(a,b,info) + import :: psb_i_hlg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_hlg_from_coo + end interface + + + interface + subroutine psb_i_mv_hlg_from_fmt(a,b,info) + import :: psb_i_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_hlg_from_fmt + end interface + + interface + subroutine psb_i_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_hlg_csmv + end interface + interface + subroutine psb_i_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) + integer(psb_ipk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_hlg_csmm + end interface + + interface + subroutine psb_i_hlg_scal(d,a,info, side) + import :: psb_i_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_i_hlg_scal + end interface + + interface + subroutine psb_i_hlg_scals(d,a,info) + import :: psb_i_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_hlg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function i_hlg_sizeof(a) result(res) + implicit none + class(psb_i_hlg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_int * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function i_hlg_sizeof + + function i_hlg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLG' + end function i_hlg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine i_hlg_free(a) + use hlldev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_i_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_i_hll_sparse_mat%free() + + return + + end subroutine i_hlg_free + + + subroutine i_hlg_sync(a) + implicit none + class(psb_i_hlg_sparse_mat), target, intent(in) :: a + class(psb_i_hlg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine i_hlg_sync + + subroutine i_hlg_set_host(a) + implicit none + class(psb_i_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine i_hlg_set_host + + subroutine i_hlg_set_dev(a) + implicit none + class(psb_i_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine i_hlg_set_dev + + subroutine i_hlg_set_sync(a) + implicit none + class(psb_i_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine i_hlg_set_sync + + function i_hlg_is_dev(a) result(res) + implicit none + class(psb_i_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function i_hlg_is_dev + + function i_hlg_is_host(a) result(res) + implicit none + class(psb_i_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function i_hlg_is_host + + function i_hlg_is_sync(a) result(res) + implicit none + class(psb_i_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function i_hlg_is_sync + + + subroutine i_hlg_finalize(a) + use hlldev_mod + implicit none + type(psb_i_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine i_hlg_finalize + +#else + + interface + subroutine psb_i_hlg_mold(a,b,info) + import :: psb_i_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_hlg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_hlg_mold + end interface + +#endif + +end module psb_i_hlg_mat_mod diff --git a/gpu/psb_i_hybg_mat_mod.F90 b/gpu/psb_i_hybg_mat_mod.F90 new file mode 100644 index 00000000..9e682365 --- /dev/null +++ b/gpu/psb_i_hybg_mat_mod.F90 @@ -0,0 +1,306 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +#if CUDA_SHORT_VERSION <= 10 + +module psb_i_hybg_mat_mod + + use iso_c_binding + use psb_i_mat_mod + use cusparse_mod + + type, extends(psb_i_csr_sparse_mat) :: psb_i_hybg_sparse_mat + ! + ! HYBG. An interface to the cuSPARSE HYB + ! On the CPU side we keep a CSR storage. + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(i_Hmat) :: deviceMat + + contains + procedure, nopass :: get_fmt => i_hybg_get_fmt + procedure, pass(a) :: sizeof => i_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_i_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_i_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_i_hybg_csmm + procedure, pass(a) :: csmv => psb_i_hybg_csmv + procedure, pass(a) :: scals => psb_i_hybg_scals + procedure, pass(a) :: scalv => psb_i_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_i_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_hybg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_mv_hybg_from_fmt + procedure, pass(a) :: free => i_hybg_free + procedure, pass(a) :: mold => psb_i_hybg_mold + procedure, pass(a) :: to_gpu => psb_i_hybg_to_gpu + final :: i_hybg_finalize +#else + contains + procedure, pass(a) :: mold => psb_i_hybg_mold +#endif + end type psb_i_hybg_sparse_mat + +#ifdef HAVE_SPGPU + private :: i_hybg_get_nzeros, i_hybg_free, i_hybg_get_fmt, & + & i_hybg_get_size, i_hybg_sizeof, i_hybg_get_nz_row + + + interface + subroutine psb_i_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_hybg_inner_vect_sv + end interface + + interface + subroutine psb_i_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_hybg_vect_mv + end interface + + interface + subroutine psb_i_hybg_reallocate_nz(nz,a) + import :: psb_i_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_i_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_i_hybg_reallocate_nz + end interface + + interface + subroutine psb_i_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_i_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_i_hybg_allocate_mnnz + end interface + + interface + subroutine psb_i_hybg_mold(a,b,info) + import :: psb_i_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_hybg_mold + end interface + + interface + subroutine psb_i_hybg_to_gpu(a,info, nzrm) + import :: psb_i_hybg_sparse_mat, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_i_hybg_to_gpu + end interface + + interface + subroutine psb_i_cp_hybg_from_coo(a,b,info) + import :: psb_i_hybg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_hybg_from_coo + end interface + + interface + subroutine psb_i_cp_hybg_from_fmt(a,b,info) + import :: psb_i_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cp_hybg_from_fmt + end interface + + interface + subroutine psb_i_mv_hybg_from_coo(a,b,info) + import :: psb_i_hybg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_hybg_from_coo + end interface + + interface + subroutine psb_i_mv_hybg_from_fmt(a,b,info) + import :: psb_i_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_mv_hybg_from_fmt + end interface + + interface + subroutine psb_i_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_hybg_csmv + end interface + interface + subroutine psb_i_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) + integer(psb_ipk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_hybg_csmm + end interface + + interface + subroutine psb_i_hybg_scal(d,a,info,side) + import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_i_hybg_scal + end interface + + interface + subroutine psb_i_hybg_scals(d,a,info) + import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_hybg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function i_hybg_sizeof(a) result(res) + implicit none + class(psb_i_hybg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + res = 8 + res = res + psb_sizeof_int * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function i_hybg_sizeof + + function i_hybg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HYBG' + end function i_hybg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine i_hybg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + class(psb_i_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + call a%psb_i_csr_sparse_mat%free() + + return + + end subroutine i_hybg_free + + subroutine i_hybg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + type(psb_i_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + + return + end subroutine i_hybg_finalize + +#else + + interface + subroutine psb_i_hybg_mold(a,b,info) + import :: psb_i_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_hybg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_hybg_mold + end interface + +#endif + +end module psb_i_hybg_mat_mod +#endif diff --git a/gpu/psb_i_vectordev_mod.F90 b/gpu/psb_i_vectordev_mod.F90 new file mode 100644 index 00000000..9998d355 --- /dev/null +++ b/gpu/psb_i_vectordev_mod.F90 @@ -0,0 +1,283 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_vectordev_mod + + use psb_base_vectordev_mod + +#ifdef HAVE_SPGPU + + interface registerMapped + function registerMappedInt(buf,d_p,n,dummy) & + & result(res) bind(c,name='registerMappedInt') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: buf + type(c_ptr) :: d_p + integer(c_int),value :: n + integer(c_int), value :: dummy + end function registerMappedInt + end interface + + interface writeMultiVecDevice + function writeMultiVecDeviceInt(deviceVec,hostVec) & + & result(res) bind(c,name='writeMultiVecDeviceInt') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int) :: hostVec(*) + end function writeMultiVecDeviceInt + function writeMultiVecDeviceIntR2(deviceVec,hostVec,ld) & + & result(res) bind(c,name='writeMultiVecDeviceIntR2') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int), value :: ld + integer(c_int) :: hostVec(ld,*) + end function writeMultiVecDeviceIntR2 + end interface + + interface readMultiVecDevice + function readMultiVecDeviceInt(deviceVec,hostVec) & + & result(res) bind(c,name='readMultiVecDeviceInt') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int) :: hostVec(*) + end function readMultiVecDeviceInt + function readMultiVecDeviceIntR2(deviceVec,hostVec,ld) & + & result(res) bind(c,name='readMultiVecDeviceIntR2') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int), value :: ld + integer(c_int) :: hostVec(ld,*) + end function readMultiVecDeviceIntR2 + end interface + + interface allocateInt + function allocateInt(didx,n) & + & result(res) bind(c,name='allocateInt') + use iso_c_binding + type(c_ptr) :: didx + integer(c_int),value :: n + integer(c_int) :: res + end function allocateInt + function allocateMultiInt(didx,m,n) & + & result(res) bind(c,name='allocateMultiInt') + use iso_c_binding + type(c_ptr) :: didx + integer(c_int),value :: m,n + integer(c_int) :: res + end function allocateMultiInt + end interface + + interface writeInt + function writeInt(didx,hidx,n) & + & result(res) bind(c,name='writeInt') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + integer(c_int) :: hidx(*) + integer(c_int),value :: n + end function writeInt + function writeIntFirst(first,didx,hidx,n,IndexBase) & + & result(res) bind(c,name='writeIntFirst') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + integer(c_int) :: hidx(*) + integer(c_int),value :: n, first, IndexBase + end function writeIntFirst + function writeMultiInt(didx,hidx,m,n) & + & result(res) bind(c,name='writeMultiInt') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + integer(c_int) :: hidx(m,*) + integer(c_int),value :: m,n + end function writeMultiInt + end interface + + interface readInt + function readInt(didx,hidx,n) & + & result(res) bind(c,name='readInt') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + integer(c_int) :: hidx(*) + integer(c_int),value :: n + end function readInt + function readIntFirst(first,didx,hidx,n,IndexBase) & + & result(res) bind(c,name='readIntFirst') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + integer(c_int) :: hidx(*) + integer(c_int),value :: n, first, IndexBase + end function readIntFirst + function readMultiInt(didx,hidx,m,n) & + & result(res) bind(c,name='readMultiInt') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + integer(c_int) :: hidx(m,*) + integer(c_int),value :: m,n + end function readMultiInt + end interface + + interface + subroutine freeInt(didx) & + & bind(c,name='freeInt') + use iso_c_binding + type(c_ptr), value :: didx + end subroutine freeInt + end interface + + + interface setScalDevice + function setScalMultiVecDeviceInt(val, first, last, & + & indexBase, deviceVecX) result(res) & + & bind(c,name='setscalMultiVecDeviceInt') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: first,last,indexbase + integer(c_int), value :: val + type(c_ptr), value :: deviceVecX + end function setScalMultiVecDeviceInt + end interface + + interface + function geinsMultiVecDeviceInt(n,deviceVecIrl,deviceVecVal,& + & dupl,indexbase,deviceVecX) & + & result(res) bind(c,name='geinsMultiVecDeviceInt') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n, dupl,indexbase + type(c_ptr), value :: deviceVecIrl, deviceVecVal, deviceVecX + end function geinsMultiVecDeviceInt + end interface + + ! New gather functions + + interface + function igathMultiVecDeviceInt(deviceVec, vectorId, n, first, idx, & + & hfirst, hostVec, indexBase) & + & result(res) bind(c,name='igathMultiVecDeviceInt') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value:: vectorId + integer(c_int),value:: first, n, hfirst + type(c_ptr),value :: idx + type(c_ptr),value :: hostVec + integer(c_int),value:: indexBase + end function igathMultiVecDeviceInt + end interface + + interface + function igathMultiVecDeviceIntVecIdx(deviceVec, vectorId, n, first, idx, & + & hfirst, hostVec, indexBase) & + & result(res) bind(c,name='igathMultiVecDeviceIntVecIdx') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value:: vectorId + integer(c_int),value:: first, n, hfirst + type(c_ptr),value :: idx + type(c_ptr),value :: hostVec + integer(c_int),value:: indexBase + end function igathMultiVecDeviceIntVecIdx + end interface + + interface + function iscatMultiVecDeviceInt(deviceVec, vectorId, & + & first, n, idx, hfirst, hostVec, indexBase, beta) & + & result(res) bind(c,name='iscatMultiVecDeviceInt') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value :: vectorId + integer(c_int),value :: first, n, hfirst + type(c_ptr), value :: idx + type(c_ptr), value :: hostVec + integer(c_int),value :: indexBase + integer(c_int),value :: beta + end function iscatMultiVecDeviceInt + end interface + + interface + function iscatMultiVecDeviceIntVecIdx(deviceVec, vectorId, & + & first, n, idx, hfirst, hostVec, indexBase, beta) & + & result(res) bind(c,name='iscatMultiVecDeviceIntVecIdx') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value :: vectorId + integer(c_int),value :: first, n, hfirst + type(c_ptr), value :: idx + type(c_ptr), value :: hostVec + integer(c_int),value :: indexBase + integer(c_int),value :: beta + end function iscatMultiVecDeviceIntVecIdx + end interface + + + + interface inner_register + module procedure inner_registerInt + end interface + + interface inner_unregister + module procedure inner_unregisterInt + end interface + +contains + + + function inner_registerInt(buffer,dval) result(res) + integer(c_int), allocatable, target :: buffer(:) + type(c_ptr) :: dval + integer(c_int) :: res + integer(c_int) :: dummy + res = registerMapped(c_loc(buffer),dval,size(buffer), dummy) + end function inner_registerInt + + subroutine inner_unregisterInt(buffer) + integer(c_int), allocatable, target :: buffer(:) + + call unregisterMapped(c_loc(buffer)) + end subroutine inner_unregisterInt + +#endif + +end module psb_i_vectordev_mod diff --git a/gpu/psb_s_csrg_mat_mod.F90 b/gpu/psb_s_csrg_mat_mod.F90 new file mode 100644 index 00000000..cface9f5 --- /dev/null +++ b/gpu/psb_s_csrg_mat_mod.F90 @@ -0,0 +1,393 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_csrg_mat_mod + + use iso_c_binding + use psb_s_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_csr_sparse_mat) :: psb_s_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(s_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => s_csrg_get_fmt + procedure, pass(a) :: sizeof => s_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_s_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_s_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_s_csrg_csmm + procedure, pass(a) :: csmv => psb_s_csrg_csmv + procedure, pass(a) :: scals => psb_s_csrg_scals + procedure, pass(a) :: scalv => psb_s_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_s_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_mv_csrg_from_fmt + procedure, pass(a) :: free => s_csrg_free + procedure, pass(a) :: mold => psb_s_csrg_mold + procedure, pass(a) :: is_host => s_csrg_is_host + procedure, pass(a) :: is_dev => s_csrg_is_dev + procedure, pass(a) :: is_sync => s_csrg_is_sync + procedure, pass(a) :: set_host => s_csrg_set_host + procedure, pass(a) :: set_dev => s_csrg_set_dev + procedure, pass(a) :: set_sync => s_csrg_set_sync + procedure, pass(a) :: sync => s_csrg_sync + procedure, pass(a) :: to_gpu => psb_s_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_s_csrg_from_gpu + final :: s_csrg_finalize +#else + contains + procedure, pass(a) :: mold => psb_s_csrg_mold +#endif + end type psb_s_csrg_sparse_mat + +#ifdef HAVE_SPGPU + private :: s_csrg_get_nzeros, s_csrg_free, s_csrg_get_fmt, & + & s_csrg_get_size, s_csrg_sizeof, s_csrg_get_nz_row + + + interface + subroutine psb_s_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_s_csrg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_s_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_csrg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_csrg_vect_mv + end interface + + interface + subroutine psb_s_csrg_reallocate_nz(nz,a) + import :: psb_s_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_s_csrg_reallocate_nz + end interface + + interface + subroutine psb_s_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_s_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_csrg_allocate_mnnz + end interface + + interface + subroutine psb_s_csrg_mold(a,b,info) + import :: psb_s_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_csrg_mold + end interface + + interface + subroutine psb_s_csrg_to_gpu(a,info, nzrm) + import :: psb_s_csrg_sparse_mat, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_s_csrg_to_gpu + end interface + + interface + subroutine psb_s_csrg_from_gpu(a,info) + import :: psb_s_csrg_sparse_mat, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_csrg_from_gpu + end interface + + interface + subroutine psb_s_cp_csrg_from_coo(a,b,info) + import :: psb_s_csrg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_csrg_from_coo + end interface + + interface + subroutine psb_s_cp_csrg_from_fmt(a,b,info) + import :: psb_s_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_csrg_from_fmt + end interface + + interface + subroutine psb_s_mv_csrg_from_coo(a,b,info) + import :: psb_s_csrg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_csrg_from_coo + end interface + + interface + subroutine psb_s_mv_csrg_from_fmt(a,b,info) + import :: psb_s_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_csrg_from_fmt + end interface + + interface + subroutine psb_s_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_csrg_csmv + end interface + interface + subroutine psb_s_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_csrg_csmm + end interface + + interface + subroutine psb_s_csrg_scal(d,a,info,side) + import :: psb_s_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_csrg_scal + end interface + + interface + subroutine psb_s_csrg_scals(d,a,info) + import :: psb_s_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_csrg_sizeof(a) result(res) + implicit none + class(psb_s_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function s_csrg_sizeof + + function s_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function s_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine s_csrg_set_host(a) + implicit none + class(psb_s_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine s_csrg_set_host + + subroutine s_csrg_set_dev(a) + implicit none + class(psb_s_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine s_csrg_set_dev + + subroutine s_csrg_set_sync(a) + implicit none + class(psb_s_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine s_csrg_set_sync + + function s_csrg_is_dev(a) result(res) + implicit none + class(psb_s_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function s_csrg_is_dev + + function s_csrg_is_host(a) result(res) + implicit none + class(psb_s_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function s_csrg_is_host + + function s_csrg_is_sync(a) result(res) + implicit none + class(psb_s_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function s_csrg_is_sync + + + subroutine s_csrg_sync(a) + implicit none + class(psb_s_csrg_sparse_mat), target, intent(in) :: a + class(psb_s_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine s_csrg_sync + + subroutine s_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_s_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_s_csr_sparse_mat%free() + + return + + end subroutine s_csrg_free + + subroutine s_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_s_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine s_csrg_finalize + +#else + interface + subroutine psb_s_csrg_mold(a,b,info) + import :: psb_s_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_csrg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_csrg_mold + end interface + +#endif + +end module psb_s_csrg_mat_mod diff --git a/gpu/psb_s_diag_mat_mod.F90 b/gpu/psb_s_diag_mat_mod.F90 new file mode 100644 index 00000000..1ed54f88 --- /dev/null +++ b/gpu/psb_s_diag_mat_mod.F90 @@ -0,0 +1,308 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_diag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_s_dia_mat_mod + + type, extends(psb_s_dia_sparse_mat) :: psb_s_diag_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => s_diag_get_fmt + procedure, pass(a) :: sizeof => s_diag_sizeof + procedure, pass(a) :: vect_mv => psb_s_diag_vect_mv +! procedure, pass(a) :: csmm => psb_s_diag_csmm + procedure, pass(a) :: csmv => psb_s_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_s_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_s_diag_scals +! procedure, pass(a) :: scalv => psb_s_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_s_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_s_diag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_s_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_s_mv_diag_from_fmt + procedure, pass(a) :: free => s_diag_free + procedure, pass(a) :: mold => psb_s_diag_mold + procedure, pass(a) :: to_gpu => psb_s_diag_to_gpu + final :: s_diag_finalize +#else + contains + procedure, pass(a) :: mold => psb_s_diag_mold +#endif + end type psb_s_diag_sparse_mat + +#ifdef HAVE_SPGPU + private :: s_diag_get_nzeros, s_diag_free, s_diag_get_fmt, & + & s_diag_get_size, s_diag_sizeof, s_diag_get_nz_row + + + interface + subroutine psb_s_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_diag_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_diag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_diag_vect_mv + end interface + + interface + subroutine psb_s_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_s_diag_sparse_mat, psb_spk_, psb_s_base_vect_type + class(psb_s_diag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_diag_inner_vect_sv + end interface + + interface + subroutine psb_s_diag_reallocate_nz(nz,a) + import :: psb_s_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_diag_sparse_mat), intent(inout) :: a + end subroutine psb_s_diag_reallocate_nz + end interface + + interface + subroutine psb_s_diag_allocate_mnnz(m,n,a,nz) + import :: psb_s_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_diag_allocate_mnnz + end interface + + interface + subroutine psb_s_diag_mold(a,b,info) + import :: psb_s_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_diag_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_diag_mold + end interface + + interface + subroutine psb_s_diag_to_gpu(a,info, nzrm) + import :: psb_s_diag_sparse_mat, psb_ipk_ + class(psb_s_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_s_diag_to_gpu + end interface + + interface + subroutine psb_s_cp_diag_from_coo(a,b,info) + import :: psb_s_diag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_diag_from_coo + end interface + + interface + subroutine psb_s_cp_diag_from_fmt(a,b,info) + import :: psb_s_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_diag_from_fmt + end interface + + interface + subroutine psb_s_mv_diag_from_coo(a,b,info) + import :: psb_s_diag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_diag_from_coo + end interface + + + interface + subroutine psb_s_mv_diag_from_fmt(a,b,info) + import :: psb_s_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_diag_from_fmt + end interface + + interface + subroutine psb_s_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_diag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_diag_csmv + end interface + interface + subroutine psb_s_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_diag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_diag_csmm + end interface + + interface + subroutine psb_s_diag_scal(d,a,info, side) + import :: psb_s_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_diag_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_diag_scal + end interface + + interface + subroutine psb_s_diag_scals(d,a,info) + import :: psb_s_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_diag_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_diag_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_diag_sizeof(a) result(res) + implicit none + class(psb_s_diag_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = 8 + res = res + psb_sizeof_sp * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function s_diag_sizeof + + function s_diag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIAG' + end function s_diag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_diag_free(a) + use diagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_s_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_s_dia_sparse_mat%free() + + return + + end subroutine s_diag_free + + subroutine s_diag_finalize(a) + use diagdev_mod + implicit none + type(psb_s_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine s_diag_finalize + +#else + + interface + subroutine psb_s_diag_mold(a,b,info) + import :: psb_s_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_diag_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_diag_mold + end interface + +#endif + +end module psb_s_diag_mat_mod diff --git a/gpu/psb_s_dnsg_mat_mod.F90 b/gpu/psb_s_dnsg_mat_mod.F90 new file mode 100644 index 00000000..1c531463 --- /dev/null +++ b/gpu/psb_s_dnsg_mat_mod.F90 @@ -0,0 +1,294 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_dnsg_mat_mod + + use iso_c_binding + use psb_s_mat_mod + use psb_s_dns_mat_mod + use dnsdev_mod + + type, extends(psb_s_dns_sparse_mat) :: psb_s_dnsg_sparse_mat + ! + ! ITPACK/DNS format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of DNS, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => s_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => s_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_s_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_s_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_s_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_s_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_s_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_s_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_s_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_s_dnsg_allocate_mnnz + ! Note: we *do* need the TO methods, because of the need to invoke SYNC + ! + procedure, pass(a) :: cp_from_coo => psb_s_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_mv_dnsg_from_fmt + procedure, pass(a) :: free => s_dnsg_free + procedure, pass(a) :: mold => psb_s_dnsg_mold + procedure, pass(a) :: to_gpu => psb_s_dnsg_to_gpu + final :: s_dnsg_finalize +#else + contains + procedure, pass(a) :: mold => psb_s_dnsg_mold +#endif + end type psb_s_dnsg_sparse_mat + +#ifdef HAVE_SPGPU + private :: s_dnsg_get_nzeros, s_dnsg_free, s_dnsg_get_fmt, & + & s_dnsg_get_size, s_dnsg_get_nz_row + + + interface + subroutine psb_s_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_dnsg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dnsg_vect_mv + end interface +!!$ +!!$ interface +!!$ subroutine psb_s_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_s_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type +!!$ class(psb_s_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_s_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_s_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_s_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_s_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_s_dnsg_allocate_mnnz +!!$ end interface + + interface + subroutine psb_s_dnsg_mold(a,b,info) + import :: psb_s_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dnsg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dnsg_mold + end interface + + interface + subroutine psb_s_dnsg_to_gpu(a,info) + import :: psb_s_dnsg_sparse_mat, psb_ipk_ + class(psb_s_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dnsg_to_gpu + end interface + + interface + subroutine psb_s_cp_dnsg_from_coo(a,b,info) + import :: psb_s_dnsg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dnsg_from_coo + end interface + + interface + subroutine psb_s_cp_dnsg_from_fmt(a,b,info) + import :: psb_s_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dnsg_from_fmt + end interface + + interface + subroutine psb_s_mv_dnsg_from_coo(a,b,info) + import :: psb_s_dnsg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dnsg_from_coo + end interface + + + interface + subroutine psb_s_mv_dnsg_from_fmt(a,b,info) + import :: psb_s_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dnsg_from_fmt + end interface + +!!$ interface +!!$ subroutine psb_s_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta, x(:) +!!$ real(psb_spk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_dnsg_csmv +!!$ end interface +!!$ interface +!!$ subroutine psb_s_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_dnsg_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_dnsg_scal(d,a,info, side) +!!$ import :: psb_s_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_s_dnsg_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_dnsg_scals(d,a,info) +!!$ import :: psb_s_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_dnsg_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + + function s_dnsg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNSG' + end function s_dnsg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_dnsg_free(a) + use dnsdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_s_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_s_dns_sparse_mat%free() + + return + + end subroutine s_dnsg_free + + subroutine s_dnsg_finalize(a) + use dnsdev_mod + implicit none + type(psb_s_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine s_dnsg_finalize + +#else + + interface + subroutine psb_s_dnsg_mold(a,b,info) + import :: psb_s_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dnsg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dnsg_mold + end interface + +#endif + +end module psb_s_dnsg_mat_mod diff --git a/gpu/psb_s_elg_mat_mod.F90 b/gpu/psb_s_elg_mat_mod.F90 new file mode 100644 index 00000000..5c4eae9b --- /dev/null +++ b/gpu/psb_s_elg_mat_mod.F90 @@ -0,0 +1,483 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_elg_mat_mod + + use iso_c_binding + use psb_s_mat_mod + use psb_s_ell_mat_mod + use psb_i_gpu_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_ell_sparse_mat) :: psb_s_elg_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of ELL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => s_elg_get_fmt + procedure, pass(a) :: sizeof => s_elg_sizeof + procedure, pass(a) :: vect_mv => psb_s_elg_vect_mv + procedure, pass(a) :: csmm => psb_s_elg_csmm + procedure, pass(a) :: csmv => psb_s_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_s_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_s_elg_scals + procedure, pass(a) :: scalv => psb_s_elg_scal + procedure, pass(a) :: reallocate_nz => psb_s_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_elg_allocate_mnnz + procedure, pass(a) :: reinit => s_elg_reinit + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_mv_elg_from_fmt + procedure, pass(a) :: free => s_elg_free + procedure, pass(a) :: mold => psb_s_elg_mold + procedure, pass(a) :: csput_a => psb_s_elg_csput_a + procedure, pass(a) :: csput_v => psb_s_elg_csput_v + procedure, pass(a) :: is_host => s_elg_is_host + procedure, pass(a) :: is_dev => s_elg_is_dev + procedure, pass(a) :: is_sync => s_elg_is_sync + procedure, pass(a) :: set_host => s_elg_set_host + procedure, pass(a) :: set_dev => s_elg_set_dev + procedure, pass(a) :: set_sync => s_elg_set_sync + procedure, pass(a) :: sync => s_elg_sync + procedure, pass(a) :: from_gpu => psb_s_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_s_elg_to_gpu + procedure, pass(a) :: asb => psb_s_elg_asb + final :: s_elg_finalize +#else + contains + procedure, pass(a) :: mold => psb_s_elg_mold + procedure, pass(a) :: asb => psb_s_elg_asb +#endif + end type psb_s_elg_sparse_mat + +#ifdef HAVE_SPGPU + private :: s_elg_get_nzeros, s_elg_free, s_elg_get_fmt, & + & s_elg_get_size, s_elg_sizeof, s_elg_get_nz_row, s_elg_sync + + + interface + subroutine psb_s_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_elg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_elg_vect_mv + end interface + + interface + subroutine psb_s_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_s_elg_sparse_mat, psb_spk_, psb_s_base_vect_type + class(psb_s_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_elg_inner_vect_sv + end interface + + interface + subroutine psb_s_elg_reallocate_nz(nz,a) + import :: psb_s_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_elg_sparse_mat), intent(inout) :: a + end subroutine psb_s_elg_reallocate_nz + end interface + + interface + subroutine psb_s_elg_allocate_mnnz(m,n,a,nz) + import :: psb_s_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_elg_allocate_mnnz + end interface + + interface + subroutine psb_s_elg_mold(a,b,info) + import :: psb_s_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_elg_mold + end interface + + interface + subroutine psb_s_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_elg_csput_a + end interface + + interface + subroutine psb_s_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_s_base_vect_type,& + & psb_i_base_vect_type + class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_elg_csput_v + end interface + + interface + subroutine psb_s_elg_from_gpu(a,info) + import :: psb_s_elg_sparse_mat, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_elg_from_gpu + end interface + + interface + subroutine psb_s_elg_to_gpu(a,info, nzrm) + import :: psb_s_elg_sparse_mat, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_s_elg_to_gpu + end interface + + interface + subroutine psb_s_cp_elg_from_coo(a,b,info) + import :: psb_s_elg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_elg_from_coo + end interface + + interface + subroutine psb_s_cp_elg_from_fmt(a,b,info) + import :: psb_s_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_elg_from_fmt + end interface + + interface + subroutine psb_s_mv_elg_from_coo(a,b,info) + import :: psb_s_elg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_elg_from_coo + end interface + + + interface + subroutine psb_s_mv_elg_from_fmt(a,b,info) + import :: psb_s_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_elg_from_fmt + end interface + + interface + subroutine psb_s_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_elg_csmv + end interface + interface + subroutine psb_s_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_elg_csmm + end interface + + interface + subroutine psb_s_elg_scal(d,a,info, side) + import :: psb_s_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_elg_scal + end interface + + interface + subroutine psb_s_elg_scals(d,a,info) + import :: psb_s_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_elg_scals + end interface + + interface + subroutine psb_s_elg_asb(a) + import :: psb_s_elg_sparse_mat + class(psb_s_elg_sparse_mat), intent(inout) :: a + end subroutine psb_s_elg_asb + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_elg_sizeof(a) result(res) + implicit none + class(psb_s_elg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function s_elg_sizeof + + function s_elg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELG' + end function s_elg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + subroutine s_elg_reinit(a,clear) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_s_elg_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + integer(psb_ipk_) :: isz, err_act + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev().or.a%is_sync()) then + if (clear_) call zeroEllDevice(a%deviceMat) + call a%set_dev() + else if (a%is_host()) then + a%val(:,:) = szero + end if + call a%set_upd() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine s_elg_reinit + + subroutine s_elg_free(a) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_s_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_s_ell_sparse_mat%free() + call a%set_sync() + + return + + end subroutine s_elg_free + + subroutine s_elg_sync(a) + implicit none + class(psb_s_elg_sparse_mat), target, intent(in) :: a + class(psb_s_elg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine s_elg_sync + + subroutine s_elg_set_host(a) + implicit none + class(psb_s_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine s_elg_set_host + + subroutine s_elg_set_dev(a) + implicit none + class(psb_s_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine s_elg_set_dev + + subroutine s_elg_set_sync(a) + implicit none + class(psb_s_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine s_elg_set_sync + + function s_elg_is_dev(a) result(res) + implicit none + class(psb_s_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function s_elg_is_dev + + function s_elg_is_host(a) result(res) + implicit none + class(psb_s_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function s_elg_is_host + + function s_elg_is_sync(a) result(res) + implicit none + class(psb_s_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function s_elg_is_sync + + subroutine s_elg_finalize(a) + use elldev_mod + implicit none + type(psb_s_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + return + + end subroutine s_elg_finalize + +#else + + interface + subroutine psb_s_elg_asb(a) + import :: psb_s_elg_sparse_mat + class(psb_s_elg_sparse_mat), intent(inout) :: a + end subroutine psb_s_elg_asb + end interface + + interface + subroutine psb_s_elg_mold(a,b,info) + import :: psb_s_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_elg_mold + end interface + +#endif + +end module psb_s_elg_mat_mod diff --git a/gpu/psb_s_gpu_vect_mod.F90 b/gpu/psb_s_gpu_vect_mod.F90 new file mode 100644 index 00000000..1371db53 --- /dev/null +++ b/gpu/psb_s_gpu_vect_mod.F90 @@ -0,0 +1,1989 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_gpu_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_s_vect_mod + use psb_i_vect_mod +#ifdef HAVE_SPGPU + use psb_gpu_env_mod + use psb_i_gpu_vect_mod + use psb_i_vectordev_mod + use psb_s_vectordev_mod +#endif + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_base_vect_type) :: psb_s_vect_gpu +#ifdef HAVE_SPGPU + integer :: state = is_host + type(c_ptr) :: deviceVect = c_null_ptr + real(c_float), allocatable :: pinned_buffer(:) + type(c_ptr) :: dt_p_buf = c_null_ptr + real(c_float), allocatable :: buffer(:) + type(c_ptr) :: dt_buf = c_null_ptr + integer :: dt_buf_sz = 0 + type(c_ptr) :: i_buf = c_null_ptr + integer :: i_buf_sz = 0 + contains + procedure, pass(x) :: get_nrows => s_gpu_get_nrows + procedure, nopass :: get_fmt => s_gpu_get_fmt + + procedure, pass(x) :: all => s_gpu_all + procedure, pass(x) :: zero => s_gpu_zero + procedure, pass(x) :: asb_m => s_gpu_asb_m + procedure, pass(x) :: sync => s_gpu_sync + procedure, pass(x) :: sync_space => s_gpu_sync_space + procedure, pass(x) :: bld_x => s_gpu_bld_x + procedure, pass(x) :: bld_mn => s_gpu_bld_mn + procedure, pass(x) :: free => s_gpu_free + procedure, pass(x) :: ins_a => s_gpu_ins_a + procedure, pass(x) :: ins_v => s_gpu_ins_v + procedure, pass(x) :: is_host => s_gpu_is_host + procedure, pass(x) :: is_dev => s_gpu_is_dev + procedure, pass(x) :: is_sync => s_gpu_is_sync + procedure, pass(x) :: set_host => s_gpu_set_host + procedure, pass(x) :: set_dev => s_gpu_set_dev + procedure, pass(x) :: set_sync => s_gpu_set_sync + procedure, pass(x) :: set_scal => s_gpu_set_scal +!!$ procedure, pass(x) :: set_vect => s_gpu_set_vect + procedure, pass(x) :: gthzv_x => s_gpu_gthzv_x + procedure, pass(y) :: sctb => s_gpu_sctb + procedure, pass(y) :: sctb_x => s_gpu_sctb_x + procedure, pass(x) :: gthzbuf => s_gpu_gthzbuf + procedure, pass(y) :: sctb_buf => s_gpu_sctb_buf + procedure, pass(x) :: new_buffer => s_gpu_new_buffer + procedure, nopass :: device_wait => s_gpu_device_wait + procedure, pass(x) :: free_buffer => s_gpu_free_buffer + procedure, pass(x) :: maybe_free_buffer => s_gpu_maybe_free_buffer + procedure, pass(x) :: dot_v => s_gpu_dot_v + procedure, pass(x) :: dot_a => s_gpu_dot_a + procedure, pass(y) :: axpby_v => s_gpu_axpby_v + procedure, pass(y) :: axpby_a => s_gpu_axpby_a + procedure, pass(y) :: mlt_v => s_gpu_mlt_v + procedure, pass(y) :: mlt_a => s_gpu_mlt_a + procedure, pass(z) :: mlt_a_2 => s_gpu_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => s_gpu_mlt_v_2 + procedure, pass(x) :: scal => s_gpu_scal + procedure, pass(x) :: nrm2 => s_gpu_nrm2 + procedure, pass(x) :: amax => s_gpu_amax + procedure, pass(x) :: asum => s_gpu_asum + procedure, pass(x) :: absval1 => s_gpu_absval1 + procedure, pass(x) :: absval2 => s_gpu_absval2 + + final :: s_gpu_vect_finalize +#endif + end type psb_s_vect_gpu + + public :: psb_s_vect_gpu_ + private :: constructor + interface psb_s_vect_gpu_ + module procedure constructor + end interface psb_s_vect_gpu_ + +contains + + function constructor(x) result(this) + real(psb_spk_) :: x(:) + type(psb_s_vect_gpu) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x),info) + + end function constructor + +#ifdef HAVE_SPGPU + + subroutine s_gpu_device_wait() + call psb_cudaSync() + end subroutine s_gpu_device_wait + + subroutine s_gpu_new_buffer(n,x,info) + use psb_realloc_mod + use psb_gpu_env_mod + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + + if (psb_gpu_DeviceHasUVA()) then + if (allocated(x%combuf)) then + if (size(x%combuf) idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_gpu_DeviceHasUVA()) then + ! + ! Only need a sync in this branch; in the others + ! cudamemCpy acts as a sync point. + ! + if (allocated(x%pinned_buffer)) then + if (size(x%pinned_buffer) < n) then + call inner_unregister(x%pinned_buffer) + deallocate(x%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(x%pinned_buffer)) then + allocate(x%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(x%pinned_buffer,x%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + info = igathMultiVecDeviceFloatVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_p_buf, 1) + call psb_cudaSync() + y(1:n) = x%pinned_buffer(1:n) + + else + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeFloat(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateFloat(x%dt_buf,n) + x%dt_buf_sz=n + end if + if (info == 0) & + & info = igathMultiVecDeviceFloatVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_buf, 1) + if (info == 0) & + & info = readFloat(x%dt_buf,y,n) + + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + + if (x%i_buf_sz < ni) then + if (c_associated(x%i_buf)) then + call freeInt(x%i_buf) + x%i_buf = c_null_ptr + end if + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeFloat(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateFloat(x%dt_buf,n) + x%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(x%i_buf,ii%v,ni) + if (info == 0) & + & info = igathMultiVecDeviceFloat(x%deviceVect,& + & 0, n, i, x%i_buf, 1, x%dt_buf, 1) + if (info == 0) & + & info = readFloat(x%dt_buf,y,n) + + end select + + end subroutine s_gpu_gthzv_x + + subroutine s_gpu_gthzbuf(i,n,idx,x) + use psb_gpu_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i,n + class(psb_i_base_vect_type) :: idx + class(psb_s_vect_gpu) :: x + integer :: info, ni + + info = 0 +!!$ write(0,*) 'Starting gth_zbuf' + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'gthzbuf') + return + end if + + select type(ii=> idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_gpu_DeviceHasUVA()) then + info = igathMultiVecDeviceFloatVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) + + else + info = igathMultiVecDeviceFloatVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_buf, 1) + if (info == 0) & + & info = readFloat(i,x%dt_buf,x%combuf(i:),n,1) + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + info = 0 + if (.not.c_associated(x%i_buf)) then + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,x%i_buf,ii%v(i:),n,1) + + if (info == 0) & + & info = igathMultiVecDeviceFloat(x%deviceVect,& + & 0, n, i, x%i_buf, i,x%dt_buf, 1) + + if (info == 0) & + & info = readFloat(i,x%dt_buf,x%combuf(i:),n,1) + + end select + + end subroutine s_gpu_gthzbuf + + subroutine s_gpu_sctb(n,idx,x,beta,y) + implicit none + !use psb_const_mod + integer(psb_ipk_) :: n, idx(:) + real(psb_spk_) :: beta, x(:) + class(psb_s_vect_gpu) :: y + integer(psb_ipk_) :: info + + if (n == 0) return + + if (y%is_dev()) call y%sync() + + call y%psb_s_base_vect_type%sctb(n,idx,x,beta) + call y%set_host() + + end subroutine s_gpu_sctb + + subroutine s_gpu_sctb_x(i,n,idx,x,beta,y) + use psb_gpu_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + real(psb_spk_) :: beta, x(:) + class(psb_s_vect_gpu) :: y + integer :: info, ni + + select type(ii=> idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + + ! + if (psb_gpu_DeviceHasUVA()) then + if (allocated(y%pinned_buffer)) then + if (size(y%pinned_buffer) < n) then + call inner_unregister(y%pinned_buffer) + deallocate(y%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(y%pinned_buffer)) then + allocate(y%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(y%pinned_buffer,y%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + y%pinned_buffer(1:n) = x(1:n) + info = iscatMultiVecDeviceFloatVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_p_buf, 1,beta) + else + + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeFloat(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateFloat(y%dt_buf,n) + y%dt_buf_sz=n + end if + info = writeFloat(y%dt_buf,x,n) + info = iscatMultiVecDeviceFloatVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_buf, 1,beta) + + end if + + class default + ni = size(ii%v) + + if (y%i_buf_sz < ni) then + if (c_associated(y%i_buf)) then + call freeInt(y%i_buf) + y%i_buf = c_null_ptr + end if + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeFloat(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateFloat(y%dt_buf,n) + y%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(y%i_buf,ii%v(i:i+n-1),n) + info = writeFloat(y%dt_buf,x,n) + info = iscatMultiVecDeviceFloat(y%deviceVect,& + & 0, n, 1, y%i_buf, 1, y%dt_buf, 1,beta) + + + end select + ! + ! Need a sync here to make sure we are not reallocating + ! the buffers before iscatMulti has finished. + ! + call psb_cudaSync() + call y%set_dev() + + end subroutine s_gpu_sctb_x + + subroutine s_gpu_sctb_buf(i,n,idx,beta,y) + use psi_serial_mod + use psb_gpu_env_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + real(psb_spk_) :: beta + class(psb_s_vect_gpu) :: y + integer(psb_ipk_) :: info, ni + +!!$ write(0,*) 'Starting sctb_buf' + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'sctb_buf') + return + end if + + + select type(ii=> idx) + class is (psb_i_vect_gpu) + + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (psb_gpu_DeviceHasUVA()) then + info = iscatMultiVecDeviceFloatVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) + else + info = writeFloat(i,y%dt_buf,y%combuf(i:),n,1) + info = iscatMultiVecDeviceFloatVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_buf, 1,beta) + + end if + + class default + !call y%sct(n,ii%v(i:),x,beta) + ni = size(ii%v) + info = 0 + if (.not.c_associated(y%i_buf)) then + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,y%i_buf,ii%v(i:),n,1) + if (info == 0) & + & info = writeFloat(i,y%dt_buf,y%combuf(i:),n,1) + if (info == 0) info = iscatMultiVecDeviceFloat(y%deviceVect,& + & 0, n, i, y%i_buf, i, y%dt_buf, 1,beta) + end select +!!$ write(0,*) 'Done sctb_buf' + + end subroutine s_gpu_sctb_buf + + + subroutine s_gpu_bld_x(x,this) + use psb_base_mod + real(psb_spk_), intent(in) :: this(:) + class(psb_s_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this),x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'s_gpu_bld_x',& + & i_err=(/size(this),izero,izero,izero,izero/)) + end if + x%v(:) = this(:) + call x%set_host() + call x%sync() + + end subroutine s_gpu_bld_x + + subroutine s_gpu_bld_mn(x,n) + integer(psb_mpk_), intent(in) :: n + class(psb_s_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n,info) + if (info /= 0) then + call psb_errpush(info,'s_gpu_bld_n',i_err=(/n,n,n,n,n/)) + end if + + end subroutine s_gpu_bld_mn + + subroutine s_gpu_set_host(x) + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + + x%state = is_host + end subroutine s_gpu_set_host + + subroutine s_gpu_set_dev(x) + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + + x%state = is_dev + end subroutine s_gpu_set_dev + + subroutine s_gpu_set_sync(x) + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + + x%state = is_sync + end subroutine s_gpu_set_sync + + function s_gpu_is_dev(x) result(res) + implicit none + class(psb_s_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function s_gpu_is_dev + + function s_gpu_is_host(x) result(res) + implicit none + class(psb_s_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function s_gpu_is_host + + function s_gpu_is_sync(x) result(res) + implicit none + class(psb_s_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function s_gpu_is_sync + + + function s_gpu_get_nrows(x) result(res) + implicit none + class(psb_s_vect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = 0 + if (allocated(x%v)) res = size(x%v) + end function s_gpu_get_nrows + + function s_gpu_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'sGPU' + end function s_gpu_get_fmt + + subroutine s_gpu_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_s_vect_gpu), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n,x%v,info) + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'s_gpu_all',& + & i_err=(/n,n,n,n,n/)) + end if + end subroutine s_gpu_all + + subroutine s_gpu_zero(x) + use psi_serial_mod + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + + if (allocated(x%v)) x%v=szero + call x%set_host() + end subroutine s_gpu_zero + + subroutine s_gpu_asb_m(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_s_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + if (x%is_dev()) then + nd = getMultiVecDeviceSize(x%deviceVect) + if (nd < n) then + call x%sync() + call x%psb_s_base_vect_type%asb(n,info) + if (info == psb_success_) call x%sync_space(info) + call x%set_host() + end if + else ! + if (x%get_nrows() size(x%v)).or.(n > x%get_nrows())) then +!!$ write(0,*) 'Incoherent situation : sizes',n,size(x%v),x%get_nrows() + call psb_realloc(n,x%v,info) + end if + info = readMultiVecDevice(x%deviceVect,x%v) + end if + if (info == 0) call x%set_sync() + if (info /= 0) then + info=psb_err_internal_error_ + call psb_errpush(info,'s_gpu_sync') + end if + + end subroutine s_gpu_sync + + subroutine s_gpu_free(x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) deallocate(x%v, stat=info) + if (c_associated(x%deviceVect)) then +!!$ write(0,*)'d_gpu_free Calling freeMultiVecDevice' + call freeMultiVecDevice(x%deviceVect) + x%deviceVect=c_null_ptr + end if + call x%free_buffer(info) + call x%set_sync() + end subroutine s_gpu_free + + subroutine s_gpu_set_scal(x,val,first,last) + class(psb_s_vect_gpu), intent(inout) :: x + real(psb_spk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: info, first_, last_ + + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1,first) + if (present(last)) last_ = min(last,last_) + + if (x%is_host()) call x%sync() + info = setScalDevice(val,first_,last_,1,x%deviceVect) + call x%set_dev() + + end subroutine s_gpu_set_scal +!!$ +!!$ subroutine s_gpu_set_vect(x,val) +!!$ class(psb_s_vect_gpu), intent(inout) :: x +!!$ real(psb_spk_), intent(in) :: val(:) +!!$ integer(psb_ipk_) :: nr +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_s_base_vect_type%set_vect(val) +!!$ call x%set_host() +!!$ +!!$ end subroutine s_gpu_set_vect + + + + function s_gpu_dot_v(n,x,y) result(res) + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + real(psb_spk_), external :: ddot + integer(psb_ipk_) :: info + + res = szero + ! + ! Note: this is the gpu implementation. + ! When we get here, we are sure that X is of + ! TYPE psb_s_vect + ! + select type(yy => y) + type is (psb_s_base_vect_type) + if (x%is_dev()) call x%sync() + res = ddot(n,x%v,1,yy%v,1) + type is (psb_s_vect_gpu) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) + if (info /= 0) then + info = psb_err_internal_error_ + call psb_errpush(info,'s_gpu_dot_v') + end if + + class default + ! y%sync is done in dot_a + call x%sync() + res = y%dot(n,x%v) + end select + + end function s_gpu_dot_v + + function s_gpu_dot_a(n,x,y) result(res) + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + real(psb_spk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + real(psb_spk_), external :: ddot + + if (x%is_dev()) call x%sync() + res = ddot(n,y,1,x%v,1) + + end function s_gpu_dot_a + + subroutine s_gpu_axpby_v(m,alpha, x, beta, y, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_vect_gpu), intent(inout) :: y + real(psb_spk_), intent (in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny + + info = psb_success_ + + select type(xx => x) + type is (psb_s_vect_gpu) + ! Do something different here + if ((beta /= szero).and.y%is_host())& + & call y%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(y%deviceVect) + if ((nx x) + type is (psb_s_base_vect_type) + if (y%is_dev()) call y%sync() + do i=1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + type is (psb_s_vect_gpu) + ! Do something different here + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + info = axyMultiVecDevice(n,sone,xx%deviceVect,y%deviceVect) + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + call y%mlt(xx%v,info) + call y%set_host() + end select + + end subroutine s_gpu_mlt_v + + subroutine s_gpu_mlt_a(x, y, info) + use psi_serial_mod + implicit none + real(psb_spk_), intent(in) :: x(:) + class(psb_s_vect_gpu), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync() + call y%psb_s_base_vect_type%mlt(x,info) + ! set_host() is invoked in the base method + end subroutine s_gpu_mlt_a + + subroutine s_gpu_mlt_a_2(alpha,x,y,beta,z,info) + use psi_serial_mod + implicit none + real(psb_spk_), intent(in) :: alpha,beta + real(psb_spk_), intent(in) :: x(:) + real(psb_spk_), intent(in) :: y(:) + class(psb_s_vect_gpu), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync() + call z%psb_s_base_vect_type%mlt(alpha,x,y,beta,info) + ! set_host() is invoked in the base method + end subroutine s_gpu_mlt_a_2 + + subroutine s_gpu_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + use psi_serial_mod + use psb_string_mod + implicit none + real(psb_spk_), intent(in) :: alpha,beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + class(psb_s_vect_gpu), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_ + + if (.false.) then + ! These are present just for coherence with the + ! complex versions; they do nothing here. + conjgx_=.false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') + conjgy_=.false. + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') + end if + + n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) + + ! + ! Need to reconsider BETA in the GPU side + ! of things. + ! + info = 0 + select type(xx => x) + type is (psb_s_vect_gpu) + select type (yy => y) + type is (psb_s_vect_gpu) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= szero).and.(z%is_host())) call z%sync() + info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& + & yy%deviceVect,beta,z%deviceVect) + call z%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (yy%is_dev()) call yy%sync() + if ((beta /= szero).and.(z%is_dev())) call z%sync() + call z%psb_s_base_vect_type%mlt(alpha,xx,yy,beta,info) + call z%set_host() + end select + + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= szero).and.(z%is_dev())) call z%sync() + call z%psb_s_base_vect_type%mlt(alpha,x,y,beta,info) + call z%set_host() + end select + end subroutine s_gpu_mlt_v_2 + + subroutine s_gpu_scal(alpha, x) + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + real(psb_spk_), intent (in) :: alpha + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = scalMultiVecDevice(alpha,x%deviceVect) + call x%set_dev() + end subroutine s_gpu_scal + + + function s_gpu_nrm2(n,x) result(res) + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + ! WARNING: this should be changed. + if (x%is_host()) call x%sync() + info = nrm2MultiVecDevice(res,n,x%deviceVect) + + end function s_gpu_nrm2 + + function s_gpu_amax(n,x) result(res) + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = amaxMultiVecDevice(res,n,x%deviceVect) + + end function s_gpu_amax + + function s_gpu_asum(n,x) result(res) + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = asumMultiVecDevice(res,n,x%deviceVect) + + end function s_gpu_asum + + subroutine s_gpu_absval1(x) + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + n=x%get_nrows() + info = absMultiVecDevice(n,sone,x%deviceVect) + + end subroutine s_gpu_absval1 + + subroutine s_gpu_absval2(x,y) + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + n=min(x%get_nrows(),y%get_nrows()) + select type (yy=> y) + class is (psb_s_vect_gpu) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = absMultiVecDevice(n,sone,x%deviceVect,yy%deviceVect) + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_s_base_vect_type%absval(y) + end select + end subroutine s_gpu_absval2 + + + subroutine s_gpu_vect_finalize(x) + use psi_serial_mod + use psb_realloc_mod + implicit none + type(psb_s_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + info = 0 + call x%free(info) + end subroutine s_gpu_vect_finalize + + subroutine s_gpu_ins_v(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_s_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_gpu + + info = 0 + if (psb_errstatus_fatal()) return + + done_gpu = .false. + select type(virl => irl) + class is (psb_i_vect_gpu) + select type(vval => val) + class is (psb_s_vect_gpu) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + info = geinsMultiVecDeviceFloat(n,virl%deviceVect,& + & vval%deviceVect,dupl,1,x%deviceVect) + call x%set_dev() + done_gpu=.true. + end select + end select + + if (.not.done_gpu) then + if (irl%is_dev()) call irl%sync() + if (val%is_dev()) call val%sync() + call x%ins(n,irl%v,val%v,dupl,info) + end if + + if (info /= 0) then + call psb_errpush(info,'gpu_vect_ins') + return + end if + + end subroutine s_gpu_ins_v + + subroutine s_gpu_ins_a(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_s_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_s_base_vect_type%ins(n,irl,val,dupl,info) + call x%set_host() + + end subroutine s_gpu_ins_a + +#endif + +end module psb_s_gpu_vect_mod + + +! +! Multivectors +! + + + +module psb_s_gpu_multivect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_s_multivect_mod + use psb_s_base_multivect_mod + + use psb_i_multivect_mod +#ifdef HAVE_SPGPU + use psb_i_gpu_multivect_mod + use psb_s_vectordev_mod +#endif + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_base_multivect_type) :: psb_s_multivect_gpu +#ifdef HAVE_SPGPU + + integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 + type(c_ptr) :: deviceVect = c_null_ptr + real(c_double), allocatable :: buffer(:,:) + type(c_ptr) :: dt_buf = c_null_ptr + contains + procedure, pass(x) :: get_nrows => s_gpu_multi_get_nrows + procedure, pass(x) :: get_ncols => s_gpu_multi_get_ncols + procedure, nopass :: get_fmt => s_gpu_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => s_gpu_multi_dot_v +!!$ procedure, pass(x) :: dot_a => s_gpu_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => s_gpu_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => s_gpu_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => s_gpu_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => s_gpu_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => s_gpu_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => s_gpu_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => s_gpu_multi_scal +!!$ procedure, pass(x) :: nrm2 => s_gpu_multi_nrm2 +!!$ procedure, pass(x) :: amax => s_gpu_multi_amax +!!$ procedure, pass(x) :: asum => s_gpu_multi_asum + procedure, pass(x) :: all => s_gpu_multi_all + procedure, pass(x) :: zero => s_gpu_multi_zero + procedure, pass(x) :: asb => s_gpu_multi_asb + procedure, pass(x) :: sync => s_gpu_multi_sync + procedure, pass(x) :: sync_space => s_gpu_multi_sync_space + procedure, pass(x) :: bld_x => s_gpu_multi_bld_x + procedure, pass(x) :: bld_n => s_gpu_multi_bld_n + procedure, pass(x) :: free => s_gpu_multi_free + procedure, pass(x) :: ins => s_gpu_multi_ins + procedure, pass(x) :: is_host => s_gpu_multi_is_host + procedure, pass(x) :: is_dev => s_gpu_multi_is_dev + procedure, pass(x) :: is_sync => s_gpu_multi_is_sync + procedure, pass(x) :: set_host => s_gpu_multi_set_host + procedure, pass(x) :: set_dev => s_gpu_multi_set_dev + procedure, pass(x) :: set_sync => s_gpu_multi_set_sync + procedure, pass(x) :: set_scal => s_gpu_multi_set_scal + procedure, pass(x) :: set_vect => s_gpu_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => s_gpu_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => s_gpu_multi_sctb +!!$ procedure, pass(y) :: sctb_x => s_gpu_multi_sctb_x + final :: s_gpu_multi_vect_finalize +#endif + end type psb_s_multivect_gpu + + public :: psb_s_multivect_gpu + private :: constructor + interface psb_s_multivect_gpu + module procedure constructor + end interface + +contains + + function constructor(x) result(this) + real(psb_spk_) :: x(:,:) + type(psb_s_multivect_gpu) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x,1),size(x,2),info) + + end function constructor + +#ifdef HAVE_SPGPU + +!!$ subroutine s_gpu_multi_gthzv_x(i,n,idx,x,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i,n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ real(psb_spk_) :: y(:) +!!$ class(psb_s_multivect_gpu) :: x +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_gpu) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (x%is_host()) call x%sync() +!!$ +!!$ if (allocated(x%buffer)) then +!!$ if (size(x%buffer) < n) then +!!$ call inner_unregister(x%buffer) +!!$ deallocate(x%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(x%buffer)) then +!!$ allocate(x%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(x%buffer,x%dt_buf) +!!$ endif +!!$ info = igathMultiVecDeviceDouble(x%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, x%dt_buf, 1) +!!$ call psb_cudaSync() +!!$ y(1:n) = x%buffer(1:n) +!!$ +!!$ class default +!!$ call x%gth(n,ii%v(i:),y) +!!$ end select +!!$ +!!$ +!!$ end subroutine s_gpu_multi_gthzv_x +!!$ +!!$ +!!$ +!!$ subroutine s_gpu_multi_sctb(n,idx,x,beta,y) +!!$ implicit none +!!$ !use psb_const_mod +!!$ integer(psb_ipk_) :: n, idx(:) +!!$ real(psb_spk_) :: beta, x(:) +!!$ class(psb_s_multivect_gpu) :: y +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (n == 0) return +!!$ +!!$ if (y%is_dev()) call y%sync() +!!$ +!!$ call y%psb_s_base_multivect_type%sctb(n,idx,x,beta) +!!$ call y%set_host() +!!$ +!!$ end subroutine s_gpu_multi_sctb +!!$ +!!$ subroutine s_gpu_multi_sctb_x(i,n,idx,x,beta,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ real(psb_spk_) :: beta, x(:) +!!$ class(psb_s_multivect_gpu) :: y +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_gpu) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (y%is_host()) call y%sync() +!!$ +!!$ if (allocated(y%buffer)) then +!!$ if (size(y%buffer) < n) then +!!$ call inner_unregister(y%buffer) +!!$ deallocate(y%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(y%buffer)) then +!!$ allocate(y%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(y%buffer,y%dt_buf) +!!$ endif +!!$ y%buffer(1:n) = x(1:n) +!!$ info = iscatMultiVecDeviceDouble(y%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, y%dt_buf, 1,beta) +!!$ +!!$ call y%set_dev() +!!$ call psb_cudaSync() +!!$ +!!$ class default +!!$ call y%sct(n,ii%v(i:),x,beta) +!!$ end select +!!$ +!!$ end subroutine s_gpu_multi_sctb_x + + + subroutine s_gpu_multi_bld_x(x,this) + use psb_base_mod + real(psb_spk_), intent(in) :: this(:,:) + class(psb_s_multivect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info, m, n + + m=size(this,1) + n=size(this,2) + x%m_nrows = m + x%m_ncols = n + call psb_realloc(m,n,x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'s_gpu_multi_bld_x',& + & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) + end if + x%v(1:m,1:n) = this(1:m,1:n) + call x%set_host() + call x%sync() + + end subroutine s_gpu_multi_bld_x + + subroutine s_gpu_multi_bld_n(x,m,n) + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_multivect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(m,n,info) + if (info /= 0) then + call psb_errpush(info,'s_gpu_multi_bld_n',i_err=(/m,n,n,n,n/)) + end if + + end subroutine s_gpu_multi_bld_n + + + subroutine s_gpu_multi_set_host(x) + implicit none + class(psb_s_multivect_gpu), intent(inout) :: x + + x%state = is_host + end subroutine s_gpu_multi_set_host + + subroutine s_gpu_multi_set_dev(x) + implicit none + class(psb_s_multivect_gpu), intent(inout) :: x + + x%state = is_dev + end subroutine s_gpu_multi_set_dev + + subroutine s_gpu_multi_set_sync(x) + implicit none + class(psb_s_multivect_gpu), intent(inout) :: x + + x%state = is_sync + end subroutine s_gpu_multi_set_sync + + function s_gpu_multi_is_dev(x) result(res) + implicit none + class(psb_s_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function s_gpu_multi_is_dev + + function s_gpu_multi_is_host(x) result(res) + implicit none + class(psb_s_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function s_gpu_multi_is_host + + function s_gpu_multi_is_sync(x) result(res) + implicit none + class(psb_s_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function s_gpu_multi_is_sync + + + function s_gpu_multi_get_nrows(x) result(res) + implicit none + class(psb_s_multivect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_nrows + + end function s_gpu_multi_get_nrows + + function s_gpu_multi_get_ncols(x) result(res) + implicit none + class(psb_s_multivect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_ncols + + end function s_gpu_multi_get_ncols + + function s_gpu_multi_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'sGPU' + end function s_gpu_multi_get_fmt + +!!$ function s_gpu_multi_dot_v(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ class(psb_s_base_multivect_type), intent(inout) :: y +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ real(psb_spk_), external :: ddot +!!$ integer(psb_ipk_) :: info +!!$ +!!$ res = dzero +!!$ ! +!!$ ! Note: this is the gpu implementation. +!!$ ! When we get here, we are sure that X is of +!!$ ! TYPE psb_s_vect +!!$ ! +!!$ select type(yy => y) +!!$ type is (psb_s_base_multivect_type) +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,x%v,1,yy%v,1) +!!$ type is (psb_s_multivect_gpu) +!!$ if (x%is_host()) call x%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) +!!$ if (info /= 0) then +!!$ info = psb_err_internal_error_ +!!$ call psb_errpush(info,'s_gpu_multi_dot_v') +!!$ end if +!!$ +!!$ class default +!!$ ! y%sync is done in dot_a +!!$ call x%sync() +!!$ res = y%dot(n,x%v) +!!$ end select +!!$ +!!$ end function s_gpu_multi_dot_v +!!$ +!!$ function s_gpu_multi_dot_a(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ real(psb_spk_), intent(in) :: y(:) +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ real(psb_spk_), external :: ddot +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,y,1,x%v,1) +!!$ +!!$ end function s_gpu_multi_dot_a +!!$ +!!$ subroutine s_gpu_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ class(psb_s_base_multivect_type), intent(inout) :: x +!!$ class(psb_s_multivect_gpu), intent(inout) :: y +!!$ real(psb_spk_), intent (in) :: alpha, beta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: nx, ny +!!$ +!!$ info = psb_success_ +!!$ +!!$ select type(xx => x) +!!$ type is (psb_s_base_multivect_type) +!!$ if ((beta /= dzero).and.(y%is_dev()))& +!!$ & call y%sync() +!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) +!!$ call y%set_host() +!!$ type is (psb_s_multivect_gpu) +!!$ ! Do something different here +!!$ if ((beta /= dzero).and.y%is_host())& +!!$ & call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ nx = getMultiVecDeviceSize(xx%deviceVect) +!!$ ny = getMultiVecDeviceSize(y%deviceVect) +!!$ if ((nx x) +!!$ type is (psb_s_base_multivect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ do i=1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ type is (psb_s_multivect_gpu) +!!$ ! Do something different here +!!$ if (y%is_host()) call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ info = axyMultiVecDevice(n,done,xx%deviceVect,y%deviceVect) +!!$ call y%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call y%mlt(xx%v,info) +!!$ call y%set_host() +!!$ end select +!!$ +!!$ end subroutine s_gpu_multi_mlt_v +!!$ +!!$ subroutine s_gpu_multi_mlt_a(x, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ real(psb_spk_), intent(in) :: x(:) +!!$ class(psb_s_multivect_gpu), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ call y%sync() +!!$ call y%psb_s_base_multivect_type%mlt(x,info) +!!$ call y%set_host() +!!$ end subroutine s_gpu_multi_mlt_a +!!$ +!!$ subroutine s_gpu_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ real(psb_spk_), intent(in) :: alpha,beta +!!$ real(psb_spk_), intent(in) :: x(:) +!!$ real(psb_spk_), intent(in) :: y(:) +!!$ class(psb_s_multivect_gpu), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ if (z%is_dev()) call z%sync() +!!$ call z%psb_s_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end subroutine s_gpu_multi_mlt_a_2 +!!$ +!!$ subroutine s_gpu_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ use psi_serial_mod +!!$ use psb_string_mod +!!$ implicit none +!!$ real(psb_spk_), intent(in) :: alpha,beta +!!$ class(psb_s_base_multivect_type), intent(inout) :: x +!!$ class(psb_s_base_multivect_type), intent(inout) :: y +!!$ class(psb_s_multivect_gpu), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character(len=1), intent(in), optional :: conjgx, conjgy +!!$ integer(psb_ipk_) :: i, n +!!$ logical :: conjgx_, conjgy_ +!!$ +!!$ if (.false.) then +!!$ ! These are present just for coherence with the +!!$ ! complex versions; they do nothing here. +!!$ conjgx_=.false. +!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') +!!$ conjgy_=.false. +!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') +!!$ end if +!!$ +!!$ n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) +!!$ +!!$ ! +!!$ ! Need to reconsider BETA in the GPU side +!!$ ! of things. +!!$ ! +!!$ info = 0 +!!$ select type(xx => x) +!!$ type is (psb_s_multivect_gpu) +!!$ select type (yy => y) +!!$ type is (psb_s_multivect_gpu) +!!$ if (xx%is_host()) call xx%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ ! Z state is irrelevant: it will be done on the GPU. +!!$ info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& +!!$ & yy%deviceVect,beta,z%deviceVect) +!!$ call z%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call yy%sync() +!!$ call z%psb_s_base_multivect_type%mlt(alpha,xx,yy,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ +!!$ class default +!!$ call x%sync() +!!$ call y%sync() +!!$ call z%psb_s_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ end subroutine s_gpu_multi_mlt_v_2 + + + subroutine s_gpu_multi_set_scal(x,val) + class(psb_s_multivect_gpu), intent(inout) :: x + real(psb_spk_), intent(in) :: val + + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_s_base_multivect_type%set_scal(val) + call x%set_host() + end subroutine s_gpu_multi_set_scal + + subroutine s_gpu_multi_set_vect(x,val) + class(psb_s_multivect_gpu), intent(inout) :: x + real(psb_spk_), intent(in) :: val(:,:) + integer(psb_ipk_) :: nr + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_s_base_multivect_type%set_vect(val) + call x%set_host() + + end subroutine s_gpu_multi_set_vect + + + +!!$ subroutine s_gpu_multi_scal(alpha, x) +!!$ implicit none +!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ real(psb_spk_), intent (in) :: alpha +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_s_base_multivect_type%scal(alpha) +!!$ call x%set_host() +!!$ end subroutine s_gpu_multi_scal +!!$ +!!$ +!!$ function s_gpu_multi_nrm2(n,x) result(res) +!!$ implicit none +!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ ! WARNING: this should be changed. +!!$ if (x%is_host()) call x%sync() +!!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) +!!$ +!!$ end function s_gpu_multi_nrm2 +!!$ +!!$ function s_gpu_multi_amax(n,x) result(res) +!!$ implicit none +!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = maxval(abs(x%v(1:n))) +!!$ +!!$ end function s_gpu_multi_amax +!!$ +!!$ function s_gpu_multi_asum(n,x) result(res) +!!$ implicit none +!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = sum(abs(x%v(1:n))) +!!$ +!!$ end function s_gpu_multi_asum + + subroutine s_gpu_multi_all(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_multivect_gpu), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(m,n,x%v,info,pad=szero) + x%m_nrows = m + x%m_ncols = n + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'s_gpu_multi_all',& + & i_err=(/m,n,n,n,n/)) + end if + end subroutine s_gpu_multi_all + + subroutine s_gpu_multi_zero(x) + use psi_serial_mod + implicit none + class(psb_s_multivect_gpu), intent(inout) :: x + + if (allocated(x%v)) x%v=dzero + call x%set_host() + end subroutine s_gpu_multi_zero + + subroutine s_gpu_multi_asb(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_multivect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nd, nc + + + x%m_nrows = m + x%m_ncols = n + if (x%is_host()) then + call x%psb_s_base_multivect_type%asb(m,n,info) + if (info == psb_success_) call x%sync_space(info) + else if (x%is_dev()) then + nd = getMultiVecDevicePitch(x%deviceVect) + nc = getMultiVecDeviceCount(x%deviceVect) + if ((nd < m).or.(nc s_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => s_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_s_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_s_hdiag_csmm + procedure, pass(a) :: csmv => psb_s_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_s_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_s_hdiag_scals + ! procedure, pass(a) :: scalv => psb_s_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_s_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_s_hdiag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_s_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_s_mv_hdiag_from_fmt + procedure, pass(a) :: free => s_hdiag_free + procedure, pass(a) :: mold => psb_s_hdiag_mold + procedure, pass(a) :: to_gpu => psb_s_hdiag_to_gpu + final :: s_hdiag_finalize +#else + contains + procedure, pass(a) :: mold => psb_s_hdiag_mold +#endif + end type psb_s_hdiag_sparse_mat + +#ifdef HAVE_SPGPU + private :: s_hdiag_get_nzeros, s_hdiag_free, s_hdiag_get_fmt, & + & s_hdiag_get_size, s_hdiag_sizeof, s_hdiag_get_nz_row + + + interface + subroutine psb_s_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hdiag_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_hdiag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hdiag_vect_mv + end interface + +!!$ interface +!!$ subroutine psb_s_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_s_hdiag_sparse_mat, psb_spk_, psb_s_base_vect_type +!!$ class(psb_s_hdiag_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_s_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_hdiag_inner_vect_sv +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_s_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_hdiag_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_s_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_s_hdiag_allocate_mnnz +!!$ end interface + + interface + subroutine psb_s_hdiag_mold(a,b,info) + import :: psb_s_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hdiag_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hdiag_mold + end interface + + interface + subroutine psb_s_hdiag_to_gpu(a,info) + import :: psb_s_hdiag_sparse_mat, psb_ipk_ + class(psb_s_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hdiag_to_gpu + end interface + + interface + subroutine psb_s_cp_hdiag_from_coo(a,b,info) + import :: psb_s_hdiag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hdiag_from_coo + end interface + +!!$ interface +!!$ subroutine psb_s_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_s_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_s_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_cp_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_s_mv_hdiag_from_coo(a,b,info) + import :: psb_s_hdiag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hdiag_from_coo + end interface + +!!$ +!!$ interface +!!$ subroutine psb_s_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_s_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_s_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_mv_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_s_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hdiag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hdiag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hdiag_csmv + end interface + +!!$ interface +!!$ subroutine psb_s_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdiag_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_hdiag_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdiag_scal(d,a,info, side) +!!$ import :: psb_s_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_s_hdiag_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdiag_scals(d,a,info) +!!$ import :: psb_s_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_hdiag_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + function s_hdiag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIAG' + end function s_hdiag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_hdiag_free(a) + use hdiagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_s_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_s_hdia_sparse_mat%free() + + return + + end subroutine s_hdiag_free + + subroutine s_hdiag_finalize(a) + use hdiagdev_mod + implicit none + type(psb_s_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_s_hdia_sparse_mat%free() + + return + end subroutine s_hdiag_finalize + +#else + + interface + subroutine psb_s_hdiag_mold(a,b,info) + import :: psb_s_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hdiag_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hdiag_mold + end interface + +#endif + +end module psb_s_hdiag_mat_mod diff --git a/gpu/psb_s_hlg_mat_mod.F90 b/gpu/psb_s_hlg_mat_mod.F90 new file mode 100644 index 00000000..8f896e4b --- /dev/null +++ b/gpu/psb_s_hlg_mat_mod.F90 @@ -0,0 +1,398 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_hlg_mat_mod + + use iso_c_binding + use psb_s_mat_mod + use psb_s_hll_mat_mod + + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_hll_sparse_mat) :: psb_s_hlg_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + integer :: devstate = is_host + + contains + procedure, nopass :: get_fmt => s_hlg_get_fmt + procedure, pass(a) :: sizeof => s_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_s_hlg_vect_mv + procedure, pass(a) :: csmm => psb_s_hlg_csmm + procedure, pass(a) :: csmv => psb_s_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_s_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_s_hlg_scals + procedure, pass(a) :: scalv => psb_s_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_s_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_hlg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_mv_hlg_from_fmt + procedure, pass(a) :: free => s_hlg_free + procedure, pass(a) :: mold => psb_s_hlg_mold + procedure, pass(a) :: is_host => s_hlg_is_host + procedure, pass(a) :: is_dev => s_hlg_is_dev + procedure, pass(a) :: is_sync => s_hlg_is_sync + procedure, pass(a) :: set_host => s_hlg_set_host + procedure, pass(a) :: set_dev => s_hlg_set_dev + procedure, pass(a) :: set_sync => s_hlg_set_sync + procedure, pass(a) :: sync => s_hlg_sync + procedure, pass(a) :: from_gpu => psb_s_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_s_hlg_to_gpu + final :: s_hlg_finalize +#else + contains + procedure, pass(a) :: mold => psb_s_hlg_mold +#endif + end type psb_s_hlg_sparse_mat + +#ifdef HAVE_SPGPU + private :: s_hlg_get_nzeros, s_hlg_free, s_hlg_get_fmt, & + & s_hlg_get_size, s_hlg_sizeof, s_hlg_get_nz_row + + + interface + subroutine psb_s_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hlg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hlg_vect_mv + end interface + + interface + subroutine psb_s_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_s_hlg_sparse_mat, psb_spk_, psb_s_base_vect_type + class(psb_s_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hlg_inner_vect_sv + end interface + + interface + subroutine psb_s_hlg_reallocate_nz(nz,a) + import :: psb_s_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_s_hlg_reallocate_nz + end interface + + interface + subroutine psb_s_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_s_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_hlg_allocate_mnnz + end interface + + interface + subroutine psb_s_hlg_mold(a,b,info) + import :: psb_s_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hlg_mold + end interface + + interface + subroutine psb_s_hlg_from_gpu(a,info) + import :: psb_s_hlg_sparse_mat, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hlg_from_gpu + end interface + + interface + subroutine psb_s_hlg_to_gpu(a,info, nzrm) + import :: psb_s_hlg_sparse_mat, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_s_hlg_to_gpu + end interface + + interface + subroutine psb_s_cp_hlg_from_coo(a,b,info) + import :: psb_s_hlg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hlg_from_coo + end interface + + interface + subroutine psb_s_cp_hlg_from_fmt(a,b,info) + import :: psb_s_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hlg_from_fmt + end interface + + interface + subroutine psb_s_mv_hlg_from_coo(a,b,info) + import :: psb_s_hlg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hlg_from_coo + end interface + + + interface + subroutine psb_s_mv_hlg_from_fmt(a,b,info) + import :: psb_s_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hlg_from_fmt + end interface + + interface + subroutine psb_s_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hlg_csmv + end interface + interface + subroutine psb_s_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hlg_csmm + end interface + + interface + subroutine psb_s_hlg_scal(d,a,info, side) + import :: psb_s_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_hlg_scal + end interface + + interface + subroutine psb_s_hlg_scals(d,a,info) + import :: psb_s_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hlg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_hlg_sizeof(a) result(res) + implicit none + class(psb_s_hlg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function s_hlg_sizeof + + function s_hlg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLG' + end function s_hlg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_hlg_free(a) + use hlldev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_s_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_s_hll_sparse_mat%free() + + return + + end subroutine s_hlg_free + + + subroutine s_hlg_sync(a) + implicit none + class(psb_s_hlg_sparse_mat), target, intent(in) :: a + class(psb_s_hlg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine s_hlg_sync + + subroutine s_hlg_set_host(a) + implicit none + class(psb_s_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine s_hlg_set_host + + subroutine s_hlg_set_dev(a) + implicit none + class(psb_s_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine s_hlg_set_dev + + subroutine s_hlg_set_sync(a) + implicit none + class(psb_s_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine s_hlg_set_sync + + function s_hlg_is_dev(a) result(res) + implicit none + class(psb_s_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function s_hlg_is_dev + + function s_hlg_is_host(a) result(res) + implicit none + class(psb_s_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function s_hlg_is_host + + function s_hlg_is_sync(a) result(res) + implicit none + class(psb_s_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function s_hlg_is_sync + + + subroutine s_hlg_finalize(a) + use hlldev_mod + implicit none + type(psb_s_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine s_hlg_finalize + +#else + + interface + subroutine psb_s_hlg_mold(a,b,info) + import :: psb_s_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hlg_mold + end interface + +#endif + +end module psb_s_hlg_mat_mod diff --git a/gpu/psb_s_hybg_mat_mod.F90 b/gpu/psb_s_hybg_mat_mod.F90 new file mode 100644 index 00000000..5a8e0e5d --- /dev/null +++ b/gpu/psb_s_hybg_mat_mod.F90 @@ -0,0 +1,306 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +#if CUDA_SHORT_VERSION <= 10 + +module psb_s_hybg_mat_mod + + use iso_c_binding + use psb_s_mat_mod + use cusparse_mod + + type, extends(psb_s_csr_sparse_mat) :: psb_s_hybg_sparse_mat + ! + ! HYBG. An interface to the cuSPARSE HYB + ! On the CPU side we keep a CSR storage. + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(s_Hmat) :: deviceMat + + contains + procedure, nopass :: get_fmt => s_hybg_get_fmt + procedure, pass(a) :: sizeof => s_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_s_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_s_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_s_hybg_csmm + procedure, pass(a) :: csmv => psb_s_hybg_csmv + procedure, pass(a) :: scals => psb_s_hybg_scals + procedure, pass(a) :: scalv => psb_s_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_s_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_hybg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_mv_hybg_from_fmt + procedure, pass(a) :: free => s_hybg_free + procedure, pass(a) :: mold => psb_s_hybg_mold + procedure, pass(a) :: to_gpu => psb_s_hybg_to_gpu + final :: s_hybg_finalize +#else + contains + procedure, pass(a) :: mold => psb_s_hybg_mold +#endif + end type psb_s_hybg_sparse_mat + +#ifdef HAVE_SPGPU + private :: s_hybg_get_nzeros, s_hybg_free, s_hybg_get_fmt, & + & s_hybg_get_size, s_hybg_sizeof, s_hybg_get_nz_row + + + interface + subroutine psb_s_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hybg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hybg_inner_vect_sv + end interface + + interface + subroutine psb_s_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hybg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hybg_vect_mv + end interface + + interface + subroutine psb_s_hybg_reallocate_nz(nz,a) + import :: psb_s_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_s_hybg_reallocate_nz + end interface + + interface + subroutine psb_s_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_s_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_hybg_allocate_mnnz + end interface + + interface + subroutine psb_s_hybg_mold(a,b,info) + import :: psb_s_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hybg_mold + end interface + + interface + subroutine psb_s_hybg_to_gpu(a,info, nzrm) + import :: psb_s_hybg_sparse_mat, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_s_hybg_to_gpu + end interface + + interface + subroutine psb_s_cp_hybg_from_coo(a,b,info) + import :: psb_s_hybg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hybg_from_coo + end interface + + interface + subroutine psb_s_cp_hybg_from_fmt(a,b,info) + import :: psb_s_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hybg_from_fmt + end interface + + interface + subroutine psb_s_mv_hybg_from_coo(a,b,info) + import :: psb_s_hybg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hybg_from_coo + end interface + + interface + subroutine psb_s_mv_hybg_from_fmt(a,b,info) + import :: psb_s_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hybg_from_fmt + end interface + + interface + subroutine psb_s_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hybg_csmv + end interface + interface + subroutine psb_s_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hybg_csmm + end interface + + interface + subroutine psb_s_hybg_scal(d,a,info,side) + import :: psb_s_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_hybg_scal + end interface + + interface + subroutine psb_s_hybg_scals(d,a,info) + import :: psb_s_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hybg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_hybg_sizeof(a) result(res) + implicit none + class(psb_s_hybg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function s_hybg_sizeof + + function s_hybg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HYBG' + end function s_hybg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_hybg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + class(psb_s_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + call a%psb_s_csr_sparse_mat%free() + + return + + end subroutine s_hybg_free + + subroutine s_hybg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + type(psb_s_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + + return + end subroutine s_hybg_finalize + +#else + + interface + subroutine psb_s_hybg_mold(a,b,info) + import :: psb_s_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hybg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hybg_mold + end interface + +#endif + +end module psb_s_hybg_mat_mod +#endif diff --git a/gpu/psb_s_vectordev_mod.F90 b/gpu/psb_s_vectordev_mod.F90 new file mode 100644 index 00000000..a7319b95 --- /dev/null +++ b/gpu/psb_s_vectordev_mod.F90 @@ -0,0 +1,390 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_vectordev_mod + + use psb_base_vectordev_mod + +#ifdef HAVE_SPGPU + + interface registerMapped + function registerMappedFloat(buf,d_p,n,dummy) & + & result(res) bind(c,name='registerMappedFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: buf + type(c_ptr) :: d_p + integer(c_int),value :: n + real(c_float), value :: dummy + end function registerMappedFloat + end interface + + interface writeMultiVecDevice + function writeMultiVecDeviceFloat(deviceVec,hostVec) & + & result(res) bind(c,name='writeMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + real(c_float) :: hostVec(*) + end function writeMultiVecDeviceFloat + function writeMultiVecDeviceFloatR2(deviceVec,hostVec,ld) & + & result(res) bind(c,name='writeMultiVecDeviceFloatR2') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int), value :: ld + real(c_float) :: hostVec(ld,*) + end function writeMultiVecDeviceFloatR2 + end interface + + interface readMultiVecDevice + function readMultiVecDeviceFloat(deviceVec,hostVec) & + & result(res) bind(c,name='readMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + real(c_float) :: hostVec(*) + end function readMultiVecDeviceFloat + function readMultiVecDeviceFloatR2(deviceVec,hostVec,ld) & + & result(res) bind(c,name='readMultiVecDeviceFloatR2') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int), value :: ld + real(c_float) :: hostVec(ld,*) + end function readMultiVecDeviceFloatR2 + end interface + + interface allocateFloat + function allocateFloat(didx,n) & + & result(res) bind(c,name='allocateFloat') + use iso_c_binding + type(c_ptr) :: didx + integer(c_int),value :: n + integer(c_int) :: res + end function allocateFloat + function allocateMultiFloat(didx,m,n) & + & result(res) bind(c,name='allocateMultiFloat') + use iso_c_binding + type(c_ptr) :: didx + integer(c_int),value :: m,n + integer(c_int) :: res + end function allocateMultiFloat + end interface + + interface writeFloat + function writeFloat(didx,hidx,n) & + & result(res) bind(c,name='writeFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + real(c_float) :: hidx(*) + integer(c_int),value :: n + end function writeFloat + function writeFloatFirst(first,didx,hidx,n,IndexBase) & + & result(res) bind(c,name='writeFloatFirst') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + real(c_float) :: hidx(*) + integer(c_int),value :: n, first, IndexBase + end function writeFloatFirst + function writeMultiFloat(didx,hidx,m,n) & + & result(res) bind(c,name='writeMultiFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + real(c_float) :: hidx(m,*) + integer(c_int),value :: m,n + end function writeMultiFloat + end interface + + interface readFloat + function readFloat(didx,hidx,n) & + & result(res) bind(c,name='readFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + real(c_float) :: hidx(*) + integer(c_int),value :: n + end function readFloat + function readFloatFirst(first,didx,hidx,n,IndexBase) & + & result(res) bind(c,name='readFloatFirst') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + real(c_float) :: hidx(*) + integer(c_int),value :: n, first, IndexBase + end function readFloatFirst + function readMultiFloat(didx,hidx,m,n) & + & result(res) bind(c,name='readMultiFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + real(c_float) :: hidx(m,*) + integer(c_int),value :: m,n + end function readMultiFloat + end interface + + interface + subroutine freeFloat(didx) & + & bind(c,name='freeFloat') + use iso_c_binding + type(c_ptr), value :: didx + end subroutine freeFloat + end interface + + + interface setScalDevice + function setScalMultiVecDeviceFloat(val, first, last, & + & indexBase, deviceVecX) result(res) & + & bind(c,name='setscalMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: first,last,indexbase + real(c_float), value :: val + type(c_ptr), value :: deviceVecX + end function setScalMultiVecDeviceFloat + end interface + + interface + function geinsMultiVecDeviceFloat(n,deviceVecIrl,deviceVecVal,& + & dupl,indexbase,deviceVecX) & + & result(res) bind(c,name='geinsMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n, dupl,indexbase + type(c_ptr), value :: deviceVecIrl, deviceVecVal, deviceVecX + end function geinsMultiVecDeviceFloat + end interface + + ! New gather functions + + interface + function igathMultiVecDeviceFloat(deviceVec, vectorId, n, first, idx, & + & hfirst, hostVec, indexBase) & + & result(res) bind(c,name='igathMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value:: vectorId + integer(c_int),value:: first, n, hfirst + type(c_ptr),value :: idx + type(c_ptr),value :: hostVec + integer(c_int),value:: indexBase + end function igathMultiVecDeviceFloat + end interface + + interface + function igathMultiVecDeviceFloatVecIdx(deviceVec, vectorId, n, first, idx, & + & hfirst, hostVec, indexBase) & + & result(res) bind(c,name='igathMultiVecDeviceFloatVecIdx') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value:: vectorId + integer(c_int),value:: first, n, hfirst + type(c_ptr),value :: idx + type(c_ptr),value :: hostVec + integer(c_int),value:: indexBase + end function igathMultiVecDeviceFloatVecIdx + end interface + + interface + function iscatMultiVecDeviceFloat(deviceVec, vectorId, & + & first, n, idx, hfirst, hostVec, indexBase, beta) & + & result(res) bind(c,name='iscatMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value :: vectorId + integer(c_int),value :: first, n, hfirst + type(c_ptr), value :: idx + type(c_ptr), value :: hostVec + integer(c_int),value :: indexBase + real(c_float),value :: beta + end function iscatMultiVecDeviceFloat + end interface + + interface + function iscatMultiVecDeviceFloatVecIdx(deviceVec, vectorId, & + & first, n, idx, hfirst, hostVec, indexBase, beta) & + & result(res) bind(c,name='iscatMultiVecDeviceFloatVecIdx') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value :: vectorId + integer(c_int),value :: first, n, hfirst + type(c_ptr), value :: idx + type(c_ptr), value :: hostVec + integer(c_int),value :: indexBase + real(c_float),value :: beta + end function iscatMultiVecDeviceFloatVecIdx + end interface + + + interface scalMultiVecDevice + function scalMultiVecDeviceFloat(alpha,deviceVecA) & + & result(val) bind(c,name='scalMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: res + real(c_float), value :: alpha + type(c_ptr), value :: deviceVecA + end function scalMultiVecDeviceFloat + end interface + + interface dotMultiVecDevice + function dotMultiVecDeviceFloat(res, n,deviceVecA,deviceVecB) & + & result(val) bind(c,name='dotMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_float) :: res + type(c_ptr), value :: deviceVecA, deviceVecB + end function dotMultiVecDeviceFloat + end interface + + interface nrm2MultiVecDevice + function nrm2MultiVecDeviceFloat(res,n,deviceVecA) & + & result(val) bind(c,name='nrm2MultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_float) :: res + type(c_ptr), value :: deviceVecA + end function nrm2MultiVecDeviceFloat + end interface + + interface amaxMultiVecDevice + function amaxMultiVecDeviceFloat(res,n,deviceVecA) & + & result(val) bind(c,name='amaxMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_float) :: res + type(c_ptr), value :: deviceVecA + end function amaxMultiVecDeviceFloat + end interface + + interface asumMultiVecDevice + function asumMultiVecDeviceFloat(res,n,deviceVecA) & + & result(val) bind(c,name='asumMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_float) :: res + type(c_ptr), value :: deviceVecA + end function asumMultiVecDeviceFloat + end interface + + + interface axpbyMultiVecDevice + function axpbyMultiVecDeviceFloat(n,alpha,deviceVecA,beta,deviceVecB) & + & result(res) bind(c,name='axpbyMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + real(c_float), value :: alpha, beta + type(c_ptr), value :: deviceVecA, deviceVecB + end function axpbyMultiVecDeviceFloat + end interface + + interface axyMultiVecDevice + function axyMultiVecDeviceFloat(n,alpha,deviceVecA,deviceVecB) & + & result(res) bind(c,name='axyMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + real(c_float), value :: alpha + type(c_ptr), value :: deviceVecA, deviceVecB + end function axyMultiVecDeviceFloat + end interface + + interface axybzMultiVecDevice + function axybzMultiVecDeviceFloat(n,alpha,deviceVecA,deviceVecB,beta,deviceVecZ) & + & result(res) bind(c,name='axybzMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + real(c_float), value :: alpha, beta + type(c_ptr), value :: deviceVecA, deviceVecB,deviceVecZ + end function axybzMultiVecDeviceFloat + end interface + + + interface absMultiVecDevice + function absMultiVecDeviceFloat(n,alpha,deviceVecA) & + & result(res) bind(c,name='absMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + real(c_float), value :: alpha + type(c_ptr), value :: deviceVecA + end function absMultiVecDeviceFloat + function absMultiVecDeviceFloat2(n,alpha,deviceVecA,deviceVecB) & + & result(res) bind(c,name='absMultiVecDeviceFloat2') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + real(c_float), value :: alpha + type(c_ptr), value :: deviceVecA, deviceVecB + end function absMultiVecDeviceFloat2 + end interface + + interface inner_register + module procedure inner_registerFloat + end interface + + interface inner_unregister + module procedure inner_unregisterFloat + end interface + +contains + + + function inner_registerFloat(buffer,dval) result(res) + real(c_float), allocatable, target :: buffer(:) + type(c_ptr) :: dval + integer(c_int) :: res + real(c_float) :: dummy + res = registerMapped(c_loc(buffer),dval,size(buffer), dummy) + end function inner_registerFloat + + subroutine inner_unregisterFloat(buffer) + real(c_float), allocatable, target :: buffer(:) + + call unregisterMapped(c_loc(buffer)) + end subroutine inner_unregisterFloat + +#endif + +end module psb_s_vectordev_mod diff --git a/gpu/psb_vectordev_mod.f90 b/gpu/psb_vectordev_mod.f90 new file mode 100644 index 00000000..1316d458 --- /dev/null +++ b/gpu/psb_vectordev_mod.f90 @@ -0,0 +1,8 @@ +module psb_vectordev_mod + use psb_base_vectordev_mod + use psb_s_vectordev_mod + use psb_d_vectordev_mod + use psb_c_vectordev_mod + use psb_z_vectordev_mod + use psb_i_vectordev_mod +end module psb_vectordev_mod diff --git a/gpu/psb_z_csrg_mat_mod.F90 b/gpu/psb_z_csrg_mat_mod.F90 new file mode 100644 index 00000000..14df1124 --- /dev/null +++ b/gpu/psb_z_csrg_mat_mod.F90 @@ -0,0 +1,393 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_csrg_mat_mod + + use iso_c_binding + use psb_z_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_csr_sparse_mat) :: psb_z_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(z_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => z_csrg_get_fmt + procedure, pass(a) :: sizeof => z_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_z_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_z_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_z_csrg_csmm + procedure, pass(a) :: csmv => psb_z_csrg_csmv + procedure, pass(a) :: scals => psb_z_csrg_scals + procedure, pass(a) :: scalv => psb_z_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_z_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_mv_csrg_from_fmt + procedure, pass(a) :: free => z_csrg_free + procedure, pass(a) :: mold => psb_z_csrg_mold + procedure, pass(a) :: is_host => z_csrg_is_host + procedure, pass(a) :: is_dev => z_csrg_is_dev + procedure, pass(a) :: is_sync => z_csrg_is_sync + procedure, pass(a) :: set_host => z_csrg_set_host + procedure, pass(a) :: set_dev => z_csrg_set_dev + procedure, pass(a) :: set_sync => z_csrg_set_sync + procedure, pass(a) :: sync => z_csrg_sync + procedure, pass(a) :: to_gpu => psb_z_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_z_csrg_from_gpu + final :: z_csrg_finalize +#else + contains + procedure, pass(a) :: mold => psb_z_csrg_mold +#endif + end type psb_z_csrg_sparse_mat + +#ifdef HAVE_SPGPU + private :: z_csrg_get_nzeros, z_csrg_free, z_csrg_get_fmt, & + & z_csrg_get_size, z_csrg_sizeof, z_csrg_get_nz_row + + + interface + subroutine psb_z_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_z_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_csrg_vect_mv + end interface + + interface + subroutine psb_z_csrg_reallocate_nz(nz,a) + import :: psb_z_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_z_csrg_reallocate_nz + end interface + + interface + subroutine psb_z_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_z_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_csrg_allocate_mnnz + end interface + + interface + subroutine psb_z_csrg_mold(a,b,info) + import :: psb_z_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_csrg_mold + end interface + + interface + subroutine psb_z_csrg_to_gpu(a,info, nzrm) + import :: psb_z_csrg_sparse_mat, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_z_csrg_to_gpu + end interface + + interface + subroutine psb_z_csrg_from_gpu(a,info) + import :: psb_z_csrg_sparse_mat, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_csrg_from_gpu + end interface + + interface + subroutine psb_z_cp_csrg_from_coo(a,b,info) + import :: psb_z_csrg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_csrg_from_coo + end interface + + interface + subroutine psb_z_cp_csrg_from_fmt(a,b,info) + import :: psb_z_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_csrg_from_fmt + end interface + + interface + subroutine psb_z_mv_csrg_from_coo(a,b,info) + import :: psb_z_csrg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_csrg_from_coo + end interface + + interface + subroutine psb_z_mv_csrg_from_fmt(a,b,info) + import :: psb_z_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_csrg_from_fmt + end interface + + interface + subroutine psb_z_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_csrg_csmv + end interface + interface + subroutine psb_z_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_csrg_csmm + end interface + + interface + subroutine psb_z_csrg_scal(d,a,info,side) + import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_csrg_scal + end interface + + interface + subroutine psb_z_csrg_scals(d,a,info) + import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_csrg_sizeof(a) result(res) + implicit none + class(psb_z_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function z_csrg_sizeof + + function z_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function z_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine z_csrg_set_host(a) + implicit none + class(psb_z_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine z_csrg_set_host + + subroutine z_csrg_set_dev(a) + implicit none + class(psb_z_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine z_csrg_set_dev + + subroutine z_csrg_set_sync(a) + implicit none + class(psb_z_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine z_csrg_set_sync + + function z_csrg_is_dev(a) result(res) + implicit none + class(psb_z_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function z_csrg_is_dev + + function z_csrg_is_host(a) result(res) + implicit none + class(psb_z_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function z_csrg_is_host + + function z_csrg_is_sync(a) result(res) + implicit none + class(psb_z_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function z_csrg_is_sync + + + subroutine z_csrg_sync(a) + implicit none + class(psb_z_csrg_sparse_mat), target, intent(in) :: a + class(psb_z_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine z_csrg_sync + + subroutine z_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_z_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_z_csr_sparse_mat%free() + + return + + end subroutine z_csrg_free + + subroutine z_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_z_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine z_csrg_finalize + +#else + interface + subroutine psb_z_csrg_mold(a,b,info) + import :: psb_z_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_csrg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_csrg_mold + end interface + +#endif + +end module psb_z_csrg_mat_mod diff --git a/gpu/psb_z_diag_mat_mod.F90 b/gpu/psb_z_diag_mat_mod.F90 new file mode 100644 index 00000000..986d75d9 --- /dev/null +++ b/gpu/psb_z_diag_mat_mod.F90 @@ -0,0 +1,308 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_diag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_z_dia_mat_mod + + type, extends(psb_z_dia_sparse_mat) :: psb_z_diag_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => z_diag_get_fmt + procedure, pass(a) :: sizeof => z_diag_sizeof + procedure, pass(a) :: vect_mv => psb_z_diag_vect_mv +! procedure, pass(a) :: csmm => psb_z_diag_csmm + procedure, pass(a) :: csmv => psb_z_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_z_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_z_diag_scals +! procedure, pass(a) :: scalv => psb_z_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_z_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_z_diag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_z_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_z_mv_diag_from_fmt + procedure, pass(a) :: free => z_diag_free + procedure, pass(a) :: mold => psb_z_diag_mold + procedure, pass(a) :: to_gpu => psb_z_diag_to_gpu + final :: z_diag_finalize +#else + contains + procedure, pass(a) :: mold => psb_z_diag_mold +#endif + end type psb_z_diag_sparse_mat + +#ifdef HAVE_SPGPU + private :: z_diag_get_nzeros, z_diag_free, z_diag_get_fmt, & + & z_diag_get_size, z_diag_sizeof, z_diag_get_nz_row + + + interface + subroutine psb_z_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_diag_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_diag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_diag_vect_mv + end interface + + interface + subroutine psb_z_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_z_diag_sparse_mat, psb_dpk_, psb_z_base_vect_type + class(psb_z_diag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_diag_inner_vect_sv + end interface + + interface + subroutine psb_z_diag_reallocate_nz(nz,a) + import :: psb_z_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_diag_sparse_mat), intent(inout) :: a + end subroutine psb_z_diag_reallocate_nz + end interface + + interface + subroutine psb_z_diag_allocate_mnnz(m,n,a,nz) + import :: psb_z_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_diag_allocate_mnnz + end interface + + interface + subroutine psb_z_diag_mold(a,b,info) + import :: psb_z_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_diag_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_diag_mold + end interface + + interface + subroutine psb_z_diag_to_gpu(a,info, nzrm) + import :: psb_z_diag_sparse_mat, psb_ipk_ + class(psb_z_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_z_diag_to_gpu + end interface + + interface + subroutine psb_z_cp_diag_from_coo(a,b,info) + import :: psb_z_diag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_diag_from_coo + end interface + + interface + subroutine psb_z_cp_diag_from_fmt(a,b,info) + import :: psb_z_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_diag_from_fmt + end interface + + interface + subroutine psb_z_mv_diag_from_coo(a,b,info) + import :: psb_z_diag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_diag_from_coo + end interface + + + interface + subroutine psb_z_mv_diag_from_fmt(a,b,info) + import :: psb_z_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_diag_from_fmt + end interface + + interface + subroutine psb_z_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_diag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_diag_csmv + end interface + interface + subroutine psb_z_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_diag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_diag_csmm + end interface + + interface + subroutine psb_z_diag_scal(d,a,info, side) + import :: psb_z_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_diag_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_diag_scal + end interface + + interface + subroutine psb_z_diag_scals(d,a,info) + import :: psb_z_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_diag_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_diag_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_diag_sizeof(a) result(res) + implicit none + class(psb_z_diag_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function z_diag_sizeof + + function z_diag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIAG' + end function z_diag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_diag_free(a) + use diagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_z_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_z_dia_sparse_mat%free() + + return + + end subroutine z_diag_free + + subroutine z_diag_finalize(a) + use diagdev_mod + implicit none + type(psb_z_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine z_diag_finalize + +#else + + interface + subroutine psb_z_diag_mold(a,b,info) + import :: psb_z_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_diag_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_diag_mold + end interface + +#endif + +end module psb_z_diag_mat_mod diff --git a/gpu/psb_z_dnsg_mat_mod.F90 b/gpu/psb_z_dnsg_mat_mod.F90 new file mode 100644 index 00000000..6a3d4369 --- /dev/null +++ b/gpu/psb_z_dnsg_mat_mod.F90 @@ -0,0 +1,294 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_dnsg_mat_mod + + use iso_c_binding + use psb_z_mat_mod + use psb_z_dns_mat_mod + use dnsdev_mod + + type, extends(psb_z_dns_sparse_mat) :: psb_z_dnsg_sparse_mat + ! + ! ITPACK/DNS format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of DNS, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => z_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => z_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_z_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_z_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_z_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_z_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_z_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_z_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_z_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_z_dnsg_allocate_mnnz + ! Note: we *do* need the TO methods, because of the need to invoke SYNC + ! + procedure, pass(a) :: cp_from_coo => psb_z_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_mv_dnsg_from_fmt + procedure, pass(a) :: free => z_dnsg_free + procedure, pass(a) :: mold => psb_z_dnsg_mold + procedure, pass(a) :: to_gpu => psb_z_dnsg_to_gpu + final :: z_dnsg_finalize +#else + contains + procedure, pass(a) :: mold => psb_z_dnsg_mold +#endif + end type psb_z_dnsg_sparse_mat + +#ifdef HAVE_SPGPU + private :: z_dnsg_get_nzeros, z_dnsg_free, z_dnsg_get_fmt, & + & z_dnsg_get_size, z_dnsg_get_nz_row + + + interface + subroutine psb_z_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_dnsg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dnsg_vect_mv + end interface +!!$ +!!$ interface +!!$ subroutine psb_z_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_z_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type +!!$ class(psb_z_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_z_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_z_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_z_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_z_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_z_dnsg_allocate_mnnz +!!$ end interface + + interface + subroutine psb_z_dnsg_mold(a,b,info) + import :: psb_z_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dnsg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dnsg_mold + end interface + + interface + subroutine psb_z_dnsg_to_gpu(a,info) + import :: psb_z_dnsg_sparse_mat, psb_ipk_ + class(psb_z_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dnsg_to_gpu + end interface + + interface + subroutine psb_z_cp_dnsg_from_coo(a,b,info) + import :: psb_z_dnsg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dnsg_from_coo + end interface + + interface + subroutine psb_z_cp_dnsg_from_fmt(a,b,info) + import :: psb_z_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dnsg_from_fmt + end interface + + interface + subroutine psb_z_mv_dnsg_from_coo(a,b,info) + import :: psb_z_dnsg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dnsg_from_coo + end interface + + + interface + subroutine psb_z_mv_dnsg_from_fmt(a,b,info) + import :: psb_z_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dnsg_from_fmt + end interface + +!!$ interface +!!$ subroutine psb_z_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:) +!!$ complex(psb_dpk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_dnsg_csmv +!!$ end interface +!!$ interface +!!$ subroutine psb_z_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_dnsg_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_dnsg_scal(d,a,info, side) +!!$ import :: psb_z_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_z_dnsg_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_dnsg_scals(d,a,info) +!!$ import :: psb_z_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_dnsg_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + + function z_dnsg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNSG' + end function z_dnsg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_dnsg_free(a) + use dnsdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_z_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_z_dns_sparse_mat%free() + + return + + end subroutine z_dnsg_free + + subroutine z_dnsg_finalize(a) + use dnsdev_mod + implicit none + type(psb_z_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine z_dnsg_finalize + +#else + + interface + subroutine psb_z_dnsg_mold(a,b,info) + import :: psb_z_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dnsg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dnsg_mold + end interface + +#endif + +end module psb_z_dnsg_mat_mod diff --git a/gpu/psb_z_elg_mat_mod.F90 b/gpu/psb_z_elg_mat_mod.F90 new file mode 100644 index 00000000..cf9e479c --- /dev/null +++ b/gpu/psb_z_elg_mat_mod.F90 @@ -0,0 +1,483 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_elg_mat_mod + + use iso_c_binding + use psb_z_mat_mod + use psb_z_ell_mat_mod + use psb_i_gpu_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_ell_sparse_mat) :: psb_z_elg_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of ELL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => z_elg_get_fmt + procedure, pass(a) :: sizeof => z_elg_sizeof + procedure, pass(a) :: vect_mv => psb_z_elg_vect_mv + procedure, pass(a) :: csmm => psb_z_elg_csmm + procedure, pass(a) :: csmv => psb_z_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_z_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_z_elg_scals + procedure, pass(a) :: scalv => psb_z_elg_scal + procedure, pass(a) :: reallocate_nz => psb_z_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_elg_allocate_mnnz + procedure, pass(a) :: reinit => z_elg_reinit + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_mv_elg_from_fmt + procedure, pass(a) :: free => z_elg_free + procedure, pass(a) :: mold => psb_z_elg_mold + procedure, pass(a) :: csput_a => psb_z_elg_csput_a + procedure, pass(a) :: csput_v => psb_z_elg_csput_v + procedure, pass(a) :: is_host => z_elg_is_host + procedure, pass(a) :: is_dev => z_elg_is_dev + procedure, pass(a) :: is_sync => z_elg_is_sync + procedure, pass(a) :: set_host => z_elg_set_host + procedure, pass(a) :: set_dev => z_elg_set_dev + procedure, pass(a) :: set_sync => z_elg_set_sync + procedure, pass(a) :: sync => z_elg_sync + procedure, pass(a) :: from_gpu => psb_z_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_z_elg_to_gpu + procedure, pass(a) :: asb => psb_z_elg_asb + final :: z_elg_finalize +#else + contains + procedure, pass(a) :: mold => psb_z_elg_mold + procedure, pass(a) :: asb => psb_z_elg_asb +#endif + end type psb_z_elg_sparse_mat + +#ifdef HAVE_SPGPU + private :: z_elg_get_nzeros, z_elg_free, z_elg_get_fmt, & + & z_elg_get_size, z_elg_sizeof, z_elg_get_nz_row, z_elg_sync + + + interface + subroutine psb_z_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_elg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_elg_vect_mv + end interface + + interface + subroutine psb_z_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_z_elg_sparse_mat, psb_dpk_, psb_z_base_vect_type + class(psb_z_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_elg_inner_vect_sv + end interface + + interface + subroutine psb_z_elg_reallocate_nz(nz,a) + import :: psb_z_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_elg_sparse_mat), intent(inout) :: a + end subroutine psb_z_elg_reallocate_nz + end interface + + interface + subroutine psb_z_elg_allocate_mnnz(m,n,a,nz) + import :: psb_z_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_elg_allocate_mnnz + end interface + + interface + subroutine psb_z_elg_mold(a,b,info) + import :: psb_z_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_elg_mold + end interface + + interface + subroutine psb_z_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_elg_csput_a + end interface + + interface + subroutine psb_z_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_z_base_vect_type,& + & psb_i_base_vect_type + class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_elg_csput_v + end interface + + interface + subroutine psb_z_elg_from_gpu(a,info) + import :: psb_z_elg_sparse_mat, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_elg_from_gpu + end interface + + interface + subroutine psb_z_elg_to_gpu(a,info, nzrm) + import :: psb_z_elg_sparse_mat, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_z_elg_to_gpu + end interface + + interface + subroutine psb_z_cp_elg_from_coo(a,b,info) + import :: psb_z_elg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_elg_from_coo + end interface + + interface + subroutine psb_z_cp_elg_from_fmt(a,b,info) + import :: psb_z_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_elg_from_fmt + end interface + + interface + subroutine psb_z_mv_elg_from_coo(a,b,info) + import :: psb_z_elg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_elg_from_coo + end interface + + + interface + subroutine psb_z_mv_elg_from_fmt(a,b,info) + import :: psb_z_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_elg_from_fmt + end interface + + interface + subroutine psb_z_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_elg_csmv + end interface + interface + subroutine psb_z_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_elg_csmm + end interface + + interface + subroutine psb_z_elg_scal(d,a,info, side) + import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_elg_scal + end interface + + interface + subroutine psb_z_elg_scals(d,a,info) + import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_elg_scals + end interface + + interface + subroutine psb_z_elg_asb(a) + import :: psb_z_elg_sparse_mat + class(psb_z_elg_sparse_mat), intent(inout) :: a + end subroutine psb_z_elg_asb + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_elg_sizeof(a) result(res) + implicit none + class(psb_z_elg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function z_elg_sizeof + + function z_elg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELG' + end function z_elg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + subroutine z_elg_reinit(a,clear) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_z_elg_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + integer(psb_ipk_) :: isz, err_act + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev().or.a%is_sync()) then + if (clear_) call zeroEllDevice(a%deviceMat) + call a%set_dev() + else if (a%is_host()) then + a%val(:,:) = zzero + end if + call a%set_upd() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine z_elg_reinit + + subroutine z_elg_free(a) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_z_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_z_ell_sparse_mat%free() + call a%set_sync() + + return + + end subroutine z_elg_free + + subroutine z_elg_sync(a) + implicit none + class(psb_z_elg_sparse_mat), target, intent(in) :: a + class(psb_z_elg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine z_elg_sync + + subroutine z_elg_set_host(a) + implicit none + class(psb_z_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine z_elg_set_host + + subroutine z_elg_set_dev(a) + implicit none + class(psb_z_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine z_elg_set_dev + + subroutine z_elg_set_sync(a) + implicit none + class(psb_z_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine z_elg_set_sync + + function z_elg_is_dev(a) result(res) + implicit none + class(psb_z_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function z_elg_is_dev + + function z_elg_is_host(a) result(res) + implicit none + class(psb_z_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function z_elg_is_host + + function z_elg_is_sync(a) result(res) + implicit none + class(psb_z_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function z_elg_is_sync + + subroutine z_elg_finalize(a) + use elldev_mod + implicit none + type(psb_z_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + return + + end subroutine z_elg_finalize + +#else + + interface + subroutine psb_z_elg_asb(a) + import :: psb_z_elg_sparse_mat + class(psb_z_elg_sparse_mat), intent(inout) :: a + end subroutine psb_z_elg_asb + end interface + + interface + subroutine psb_z_elg_mold(a,b,info) + import :: psb_z_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_elg_mold + end interface + +#endif + +end module psb_z_elg_mat_mod diff --git a/gpu/psb_z_gpu_vect_mod.F90 b/gpu/psb_z_gpu_vect_mod.F90 new file mode 100644 index 00000000..ca5ac922 --- /dev/null +++ b/gpu/psb_z_gpu_vect_mod.F90 @@ -0,0 +1,1989 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_gpu_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_z_vect_mod + use psb_i_vect_mod +#ifdef HAVE_SPGPU + use psb_gpu_env_mod + use psb_i_gpu_vect_mod + use psb_i_vectordev_mod + use psb_z_vectordev_mod +#endif + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_base_vect_type) :: psb_z_vect_gpu +#ifdef HAVE_SPGPU + integer :: state = is_host + type(c_ptr) :: deviceVect = c_null_ptr + complex(c_double_complex), allocatable :: pinned_buffer(:) + type(c_ptr) :: dt_p_buf = c_null_ptr + complex(c_double_complex), allocatable :: buffer(:) + type(c_ptr) :: dt_buf = c_null_ptr + integer :: dt_buf_sz = 0 + type(c_ptr) :: i_buf = c_null_ptr + integer :: i_buf_sz = 0 + contains + procedure, pass(x) :: get_nrows => z_gpu_get_nrows + procedure, nopass :: get_fmt => z_gpu_get_fmt + + procedure, pass(x) :: all => z_gpu_all + procedure, pass(x) :: zero => z_gpu_zero + procedure, pass(x) :: asb_m => z_gpu_asb_m + procedure, pass(x) :: sync => z_gpu_sync + procedure, pass(x) :: sync_space => z_gpu_sync_space + procedure, pass(x) :: bld_x => z_gpu_bld_x + procedure, pass(x) :: bld_mn => z_gpu_bld_mn + procedure, pass(x) :: free => z_gpu_free + procedure, pass(x) :: ins_a => z_gpu_ins_a + procedure, pass(x) :: ins_v => z_gpu_ins_v + procedure, pass(x) :: is_host => z_gpu_is_host + procedure, pass(x) :: is_dev => z_gpu_is_dev + procedure, pass(x) :: is_sync => z_gpu_is_sync + procedure, pass(x) :: set_host => z_gpu_set_host + procedure, pass(x) :: set_dev => z_gpu_set_dev + procedure, pass(x) :: set_sync => z_gpu_set_sync + procedure, pass(x) :: set_scal => z_gpu_set_scal +!!$ procedure, pass(x) :: set_vect => z_gpu_set_vect + procedure, pass(x) :: gthzv_x => z_gpu_gthzv_x + procedure, pass(y) :: sctb => z_gpu_sctb + procedure, pass(y) :: sctb_x => z_gpu_sctb_x + procedure, pass(x) :: gthzbuf => z_gpu_gthzbuf + procedure, pass(y) :: sctb_buf => z_gpu_sctb_buf + procedure, pass(x) :: new_buffer => z_gpu_new_buffer + procedure, nopass :: device_wait => z_gpu_device_wait + procedure, pass(x) :: free_buffer => z_gpu_free_buffer + procedure, pass(x) :: maybe_free_buffer => z_gpu_maybe_free_buffer + procedure, pass(x) :: dot_v => z_gpu_dot_v + procedure, pass(x) :: dot_a => z_gpu_dot_a + procedure, pass(y) :: axpby_v => z_gpu_axpby_v + procedure, pass(y) :: axpby_a => z_gpu_axpby_a + procedure, pass(y) :: mlt_v => z_gpu_mlt_v + procedure, pass(y) :: mlt_a => z_gpu_mlt_a + procedure, pass(z) :: mlt_a_2 => z_gpu_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => z_gpu_mlt_v_2 + procedure, pass(x) :: scal => z_gpu_scal + procedure, pass(x) :: nrm2 => z_gpu_nrm2 + procedure, pass(x) :: amax => z_gpu_amax + procedure, pass(x) :: asum => z_gpu_asum + procedure, pass(x) :: absval1 => z_gpu_absval1 + procedure, pass(x) :: absval2 => z_gpu_absval2 + + final :: z_gpu_vect_finalize +#endif + end type psb_z_vect_gpu + + public :: psb_z_vect_gpu_ + private :: constructor + interface psb_z_vect_gpu_ + module procedure constructor + end interface psb_z_vect_gpu_ + +contains + + function constructor(x) result(this) + complex(psb_dpk_) :: x(:) + type(psb_z_vect_gpu) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x),info) + + end function constructor + +#ifdef HAVE_SPGPU + + subroutine z_gpu_device_wait() + call psb_cudaSync() + end subroutine z_gpu_device_wait + + subroutine z_gpu_new_buffer(n,x,info) + use psb_realloc_mod + use psb_gpu_env_mod + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + + if (psb_gpu_DeviceHasUVA()) then + if (allocated(x%combuf)) then + if (size(x%combuf) idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_gpu_DeviceHasUVA()) then + ! + ! Only need a sync in this branch; in the others + ! cudamemCpy acts as a sync point. + ! + if (allocated(x%pinned_buffer)) then + if (size(x%pinned_buffer) < n) then + call inner_unregister(x%pinned_buffer) + deallocate(x%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(x%pinned_buffer)) then + allocate(x%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(x%pinned_buffer,x%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + info = igathMultiVecDeviceDoubleComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_p_buf, 1) + call psb_cudaSync() + y(1:n) = x%pinned_buffer(1:n) + + else + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeDoubleComplex(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateDoubleComplex(x%dt_buf,n) + x%dt_buf_sz=n + end if + if (info == 0) & + & info = igathMultiVecDeviceDoubleComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_buf, 1) + if (info == 0) & + & info = readDoubleComplex(x%dt_buf,y,n) + + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + + if (x%i_buf_sz < ni) then + if (c_associated(x%i_buf)) then + call freeInt(x%i_buf) + x%i_buf = c_null_ptr + end if + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeDoubleComplex(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateDoubleComplex(x%dt_buf,n) + x%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(x%i_buf,ii%v,ni) + if (info == 0) & + & info = igathMultiVecDeviceDoubleComplex(x%deviceVect,& + & 0, n, i, x%i_buf, 1, x%dt_buf, 1) + if (info == 0) & + & info = readDoubleComplex(x%dt_buf,y,n) + + end select + + end subroutine z_gpu_gthzv_x + + subroutine z_gpu_gthzbuf(i,n,idx,x) + use psb_gpu_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i,n + class(psb_i_base_vect_type) :: idx + class(psb_z_vect_gpu) :: x + integer :: info, ni + + info = 0 +!!$ write(0,*) 'Starting gth_zbuf' + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'gthzbuf') + return + end if + + select type(ii=> idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_gpu_DeviceHasUVA()) then + info = igathMultiVecDeviceDoubleComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) + + else + info = igathMultiVecDeviceDoubleComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_buf, 1) + if (info == 0) & + & info = readDoubleComplex(i,x%dt_buf,x%combuf(i:),n,1) + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + info = 0 + if (.not.c_associated(x%i_buf)) then + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,x%i_buf,ii%v(i:),n,1) + + if (info == 0) & + & info = igathMultiVecDeviceDoubleComplex(x%deviceVect,& + & 0, n, i, x%i_buf, i,x%dt_buf, 1) + + if (info == 0) & + & info = readDoubleComplex(i,x%dt_buf,x%combuf(i:),n,1) + + end select + + end subroutine z_gpu_gthzbuf + + subroutine z_gpu_sctb(n,idx,x,beta,y) + implicit none + !use psb_const_mod + integer(psb_ipk_) :: n, idx(:) + complex(psb_dpk_) :: beta, x(:) + class(psb_z_vect_gpu) :: y + integer(psb_ipk_) :: info + + if (n == 0) return + + if (y%is_dev()) call y%sync() + + call y%psb_z_base_vect_type%sctb(n,idx,x,beta) + call y%set_host() + + end subroutine z_gpu_sctb + + subroutine z_gpu_sctb_x(i,n,idx,x,beta,y) + use psb_gpu_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + complex(psb_dpk_) :: beta, x(:) + class(psb_z_vect_gpu) :: y + integer :: info, ni + + select type(ii=> idx) + class is (psb_i_vect_gpu) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + + ! + if (psb_gpu_DeviceHasUVA()) then + if (allocated(y%pinned_buffer)) then + if (size(y%pinned_buffer) < n) then + call inner_unregister(y%pinned_buffer) + deallocate(y%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(y%pinned_buffer)) then + allocate(y%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(y%pinned_buffer,y%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + y%pinned_buffer(1:n) = x(1:n) + info = iscatMultiVecDeviceDoubleComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_p_buf, 1,beta) + else + + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeDoubleComplex(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateDoubleComplex(y%dt_buf,n) + y%dt_buf_sz=n + end if + info = writeDoubleComplex(y%dt_buf,x,n) + info = iscatMultiVecDeviceDoubleComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_buf, 1,beta) + + end if + + class default + ni = size(ii%v) + + if (y%i_buf_sz < ni) then + if (c_associated(y%i_buf)) then + call freeInt(y%i_buf) + y%i_buf = c_null_ptr + end if + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeDoubleComplex(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateDoubleComplex(y%dt_buf,n) + y%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(y%i_buf,ii%v(i:i+n-1),n) + info = writeDoubleComplex(y%dt_buf,x,n) + info = iscatMultiVecDeviceDoubleComplex(y%deviceVect,& + & 0, n, 1, y%i_buf, 1, y%dt_buf, 1,beta) + + + end select + ! + ! Need a sync here to make sure we are not reallocating + ! the buffers before iscatMulti has finished. + ! + call psb_cudaSync() + call y%set_dev() + + end subroutine z_gpu_sctb_x + + subroutine z_gpu_sctb_buf(i,n,idx,beta,y) + use psi_serial_mod + use psb_gpu_env_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + complex(psb_dpk_) :: beta + class(psb_z_vect_gpu) :: y + integer(psb_ipk_) :: info, ni + +!!$ write(0,*) 'Starting sctb_buf' + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'sctb_buf') + return + end if + + + select type(ii=> idx) + class is (psb_i_vect_gpu) + + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (psb_gpu_DeviceHasUVA()) then + info = iscatMultiVecDeviceDoubleComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) + else + info = writeDoubleComplex(i,y%dt_buf,y%combuf(i:),n,1) + info = iscatMultiVecDeviceDoubleComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_buf, 1,beta) + + end if + + class default + !call y%sct(n,ii%v(i:),x,beta) + ni = size(ii%v) + info = 0 + if (.not.c_associated(y%i_buf)) then + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,y%i_buf,ii%v(i:),n,1) + if (info == 0) & + & info = writeDoubleComplex(i,y%dt_buf,y%combuf(i:),n,1) + if (info == 0) info = iscatMultiVecDeviceDoubleComplex(y%deviceVect,& + & 0, n, i, y%i_buf, i, y%dt_buf, 1,beta) + end select +!!$ write(0,*) 'Done sctb_buf' + + end subroutine z_gpu_sctb_buf + + + subroutine z_gpu_bld_x(x,this) + use psb_base_mod + complex(psb_dpk_), intent(in) :: this(:) + class(psb_z_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this),x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'z_gpu_bld_x',& + & i_err=(/size(this),izero,izero,izero,izero/)) + end if + x%v(:) = this(:) + call x%set_host() + call x%sync() + + end subroutine z_gpu_bld_x + + subroutine z_gpu_bld_mn(x,n) + integer(psb_mpk_), intent(in) :: n + class(psb_z_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n,info) + if (info /= 0) then + call psb_errpush(info,'z_gpu_bld_n',i_err=(/n,n,n,n,n/)) + end if + + end subroutine z_gpu_bld_mn + + subroutine z_gpu_set_host(x) + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + + x%state = is_host + end subroutine z_gpu_set_host + + subroutine z_gpu_set_dev(x) + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + + x%state = is_dev + end subroutine z_gpu_set_dev + + subroutine z_gpu_set_sync(x) + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + + x%state = is_sync + end subroutine z_gpu_set_sync + + function z_gpu_is_dev(x) result(res) + implicit none + class(psb_z_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function z_gpu_is_dev + + function z_gpu_is_host(x) result(res) + implicit none + class(psb_z_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function z_gpu_is_host + + function z_gpu_is_sync(x) result(res) + implicit none + class(psb_z_vect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function z_gpu_is_sync + + + function z_gpu_get_nrows(x) result(res) + implicit none + class(psb_z_vect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = 0 + if (allocated(x%v)) res = size(x%v) + end function z_gpu_get_nrows + + function z_gpu_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'zGPU' + end function z_gpu_get_fmt + + subroutine z_gpu_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_z_vect_gpu), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n,x%v,info) + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'z_gpu_all',& + & i_err=(/n,n,n,n,n/)) + end if + end subroutine z_gpu_all + + subroutine z_gpu_zero(x) + use psi_serial_mod + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + + if (allocated(x%v)) x%v=zzero + call x%set_host() + end subroutine z_gpu_zero + + subroutine z_gpu_asb_m(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_z_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + if (x%is_dev()) then + nd = getMultiVecDeviceSize(x%deviceVect) + if (nd < n) then + call x%sync() + call x%psb_z_base_vect_type%asb(n,info) + if (info == psb_success_) call x%sync_space(info) + call x%set_host() + end if + else ! + if (x%get_nrows() size(x%v)).or.(n > x%get_nrows())) then +!!$ write(0,*) 'Incoherent situation : sizes',n,size(x%v),x%get_nrows() + call psb_realloc(n,x%v,info) + end if + info = readMultiVecDevice(x%deviceVect,x%v) + end if + if (info == 0) call x%set_sync() + if (info /= 0) then + info=psb_err_internal_error_ + call psb_errpush(info,'z_gpu_sync') + end if + + end subroutine z_gpu_sync + + subroutine z_gpu_free(x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) deallocate(x%v, stat=info) + if (c_associated(x%deviceVect)) then +!!$ write(0,*)'d_gpu_free Calling freeMultiVecDevice' + call freeMultiVecDevice(x%deviceVect) + x%deviceVect=c_null_ptr + end if + call x%free_buffer(info) + call x%set_sync() + end subroutine z_gpu_free + + subroutine z_gpu_set_scal(x,val,first,last) + class(psb_z_vect_gpu), intent(inout) :: x + complex(psb_dpk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: info, first_, last_ + + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1,first) + if (present(last)) last_ = min(last,last_) + + if (x%is_host()) call x%sync() + info = setScalDevice(val,first_,last_,1,x%deviceVect) + call x%set_dev() + + end subroutine z_gpu_set_scal +!!$ +!!$ subroutine z_gpu_set_vect(x,val) +!!$ class(psb_z_vect_gpu), intent(inout) :: x +!!$ complex(psb_dpk_), intent(in) :: val(:) +!!$ integer(psb_ipk_) :: nr +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_z_base_vect_type%set_vect(val) +!!$ call x%set_host() +!!$ +!!$ end subroutine z_gpu_set_vect + + + + function z_gpu_dot_v(n,x,y) result(res) + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + complex(psb_dpk_) :: res + complex(psb_dpk_), external :: ddot + integer(psb_ipk_) :: info + + res = zzero + ! + ! Note: this is the gpu implementation. + ! When we get here, we are sure that X is of + ! TYPE psb_z_vect + ! + select type(yy => y) + type is (psb_z_base_vect_type) + if (x%is_dev()) call x%sync() + res = ddot(n,x%v,1,yy%v,1) + type is (psb_z_vect_gpu) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) + if (info /= 0) then + info = psb_err_internal_error_ + call psb_errpush(info,'z_gpu_dot_v') + end if + + class default + ! y%sync is done in dot_a + call x%sync() + res = y%dot(n,x%v) + end select + + end function z_gpu_dot_v + + function z_gpu_dot_a(n,x,y) result(res) + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + complex(psb_dpk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + complex(psb_dpk_) :: res + complex(psb_dpk_), external :: ddot + + if (x%is_dev()) call x%sync() + res = ddot(n,y,1,x%v,1) + + end function z_gpu_dot_a + + subroutine z_gpu_axpby_v(m,alpha, x, beta, y, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_vect_gpu), intent(inout) :: y + complex(psb_dpk_), intent (in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny + + info = psb_success_ + + select type(xx => x) + type is (psb_z_vect_gpu) + ! Do something different here + if ((beta /= zzero).and.y%is_host())& + & call y%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(y%deviceVect) + if ((nx x) + type is (psb_z_base_vect_type) + if (y%is_dev()) call y%sync() + do i=1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + type is (psb_z_vect_gpu) + ! Do something different here + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + info = axyMultiVecDevice(n,zone,xx%deviceVect,y%deviceVect) + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + call y%mlt(xx%v,info) + call y%set_host() + end select + + end subroutine z_gpu_mlt_v + + subroutine z_gpu_mlt_a(x, y, info) + use psi_serial_mod + implicit none + complex(psb_dpk_), intent(in) :: x(:) + class(psb_z_vect_gpu), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync() + call y%psb_z_base_vect_type%mlt(x,info) + ! set_host() is invoked in the base method + end subroutine z_gpu_mlt_a + + subroutine z_gpu_mlt_a_2(alpha,x,y,beta,z,info) + use psi_serial_mod + implicit none + complex(psb_dpk_), intent(in) :: alpha,beta + complex(psb_dpk_), intent(in) :: x(:) + complex(psb_dpk_), intent(in) :: y(:) + class(psb_z_vect_gpu), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync() + call z%psb_z_base_vect_type%mlt(alpha,x,y,beta,info) + ! set_host() is invoked in the base method + end subroutine z_gpu_mlt_a_2 + + subroutine z_gpu_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + use psi_serial_mod + use psb_string_mod + implicit none + complex(psb_dpk_), intent(in) :: alpha,beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + class(psb_z_vect_gpu), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_ + + if (.false.) then + ! These are present just for coherence with the + ! complex versions; they do nothing here. + conjgx_=.false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') + conjgy_=.false. + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') + end if + + n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) + + ! + ! Need to reconsider BETA in the GPU side + ! of things. + ! + info = 0 + select type(xx => x) + type is (psb_z_vect_gpu) + select type (yy => y) + type is (psb_z_vect_gpu) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= zzero).and.(z%is_host())) call z%sync() + info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& + & yy%deviceVect,beta,z%deviceVect) + call z%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (yy%is_dev()) call yy%sync() + if ((beta /= zzero).and.(z%is_dev())) call z%sync() + call z%psb_z_base_vect_type%mlt(alpha,xx,yy,beta,info) + call z%set_host() + end select + + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= zzero).and.(z%is_dev())) call z%sync() + call z%psb_z_base_vect_type%mlt(alpha,x,y,beta,info) + call z%set_host() + end select + end subroutine z_gpu_mlt_v_2 + + subroutine z_gpu_scal(alpha, x) + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + complex(psb_dpk_), intent (in) :: alpha + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = scalMultiVecDevice(alpha,x%deviceVect) + call x%set_dev() + end subroutine z_gpu_scal + + + function z_gpu_nrm2(n,x) result(res) + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + ! WARNING: this should be changed. + if (x%is_host()) call x%sync() + info = nrm2MultiVecDeviceComplex(res,n,x%deviceVect) + + end function z_gpu_nrm2 + + function z_gpu_amax(n,x) result(res) + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = amaxMultiVecDeviceComplex(res,n,x%deviceVect) + + end function z_gpu_amax + + function z_gpu_asum(n,x) result(res) + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = asumMultiVecDeviceComplex(res,n,x%deviceVect) + + end function z_gpu_asum + + subroutine z_gpu_absval1(x) + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + n=x%get_nrows() + info = absMultiVecDevice(n,zone,x%deviceVect) + + end subroutine z_gpu_absval1 + + subroutine z_gpu_absval2(x,y) + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + n=min(x%get_nrows(),y%get_nrows()) + select type (yy=> y) + class is (psb_z_vect_gpu) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = absMultiVecDevice(n,zone,x%deviceVect,yy%deviceVect) + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_z_base_vect_type%absval(y) + end select + end subroutine z_gpu_absval2 + + + subroutine z_gpu_vect_finalize(x) + use psi_serial_mod + use psb_realloc_mod + implicit none + type(psb_z_vect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + info = 0 + call x%free(info) + end subroutine z_gpu_vect_finalize + + subroutine z_gpu_ins_v(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_z_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_gpu + + info = 0 + if (psb_errstatus_fatal()) return + + done_gpu = .false. + select type(virl => irl) + class is (psb_i_vect_gpu) + select type(vval => val) + class is (psb_z_vect_gpu) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + info = geinsMultiVecDeviceDoubleComplex(n,virl%deviceVect,& + & vval%deviceVect,dupl,1,x%deviceVect) + call x%set_dev() + done_gpu=.true. + end select + end select + + if (.not.done_gpu) then + if (irl%is_dev()) call irl%sync() + if (val%is_dev()) call val%sync() + call x%ins(n,irl%v,val%v,dupl,info) + end if + + if (info /= 0) then + call psb_errpush(info,'gpu_vect_ins') + return + end if + + end subroutine z_gpu_ins_v + + subroutine z_gpu_ins_a(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_z_vect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_z_base_vect_type%ins(n,irl,val,dupl,info) + call x%set_host() + + end subroutine z_gpu_ins_a + +#endif + +end module psb_z_gpu_vect_mod + + +! +! Multivectors +! + + + +module psb_z_gpu_multivect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_z_multivect_mod + use psb_z_base_multivect_mod + + use psb_i_multivect_mod +#ifdef HAVE_SPGPU + use psb_i_gpu_multivect_mod + use psb_z_vectordev_mod +#endif + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_base_multivect_type) :: psb_z_multivect_gpu +#ifdef HAVE_SPGPU + + integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 + type(c_ptr) :: deviceVect = c_null_ptr + real(c_double), allocatable :: buffer(:,:) + type(c_ptr) :: dt_buf = c_null_ptr + contains + procedure, pass(x) :: get_nrows => z_gpu_multi_get_nrows + procedure, pass(x) :: get_ncols => z_gpu_multi_get_ncols + procedure, nopass :: get_fmt => z_gpu_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => z_gpu_multi_dot_v +!!$ procedure, pass(x) :: dot_a => z_gpu_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => z_gpu_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => z_gpu_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => z_gpu_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => z_gpu_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => z_gpu_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => z_gpu_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => z_gpu_multi_scal +!!$ procedure, pass(x) :: nrm2 => z_gpu_multi_nrm2 +!!$ procedure, pass(x) :: amax => z_gpu_multi_amax +!!$ procedure, pass(x) :: asum => z_gpu_multi_asum + procedure, pass(x) :: all => z_gpu_multi_all + procedure, pass(x) :: zero => z_gpu_multi_zero + procedure, pass(x) :: asb => z_gpu_multi_asb + procedure, pass(x) :: sync => z_gpu_multi_sync + procedure, pass(x) :: sync_space => z_gpu_multi_sync_space + procedure, pass(x) :: bld_x => z_gpu_multi_bld_x + procedure, pass(x) :: bld_n => z_gpu_multi_bld_n + procedure, pass(x) :: free => z_gpu_multi_free + procedure, pass(x) :: ins => z_gpu_multi_ins + procedure, pass(x) :: is_host => z_gpu_multi_is_host + procedure, pass(x) :: is_dev => z_gpu_multi_is_dev + procedure, pass(x) :: is_sync => z_gpu_multi_is_sync + procedure, pass(x) :: set_host => z_gpu_multi_set_host + procedure, pass(x) :: set_dev => z_gpu_multi_set_dev + procedure, pass(x) :: set_sync => z_gpu_multi_set_sync + procedure, pass(x) :: set_scal => z_gpu_multi_set_scal + procedure, pass(x) :: set_vect => z_gpu_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => z_gpu_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => z_gpu_multi_sctb +!!$ procedure, pass(y) :: sctb_x => z_gpu_multi_sctb_x + final :: z_gpu_multi_vect_finalize +#endif + end type psb_z_multivect_gpu + + public :: psb_z_multivect_gpu + private :: constructor + interface psb_z_multivect_gpu + module procedure constructor + end interface + +contains + + function constructor(x) result(this) + complex(psb_dpk_) :: x(:,:) + type(psb_z_multivect_gpu) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x,1),size(x,2),info) + + end function constructor + +#ifdef HAVE_SPGPU + +!!$ subroutine z_gpu_multi_gthzv_x(i,n,idx,x,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i,n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ complex(psb_dpk_) :: y(:) +!!$ class(psb_z_multivect_gpu) :: x +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_gpu) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (x%is_host()) call x%sync() +!!$ +!!$ if (allocated(x%buffer)) then +!!$ if (size(x%buffer) < n) then +!!$ call inner_unregister(x%buffer) +!!$ deallocate(x%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(x%buffer)) then +!!$ allocate(x%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(x%buffer,x%dt_buf) +!!$ endif +!!$ info = igathMultiVecDeviceDouble(x%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, x%dt_buf, 1) +!!$ call psb_cudaSync() +!!$ y(1:n) = x%buffer(1:n) +!!$ +!!$ class default +!!$ call x%gth(n,ii%v(i:),y) +!!$ end select +!!$ +!!$ +!!$ end subroutine z_gpu_multi_gthzv_x +!!$ +!!$ +!!$ +!!$ subroutine z_gpu_multi_sctb(n,idx,x,beta,y) +!!$ implicit none +!!$ !use psb_const_mod +!!$ integer(psb_ipk_) :: n, idx(:) +!!$ complex(psb_dpk_) :: beta, x(:) +!!$ class(psb_z_multivect_gpu) :: y +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (n == 0) return +!!$ +!!$ if (y%is_dev()) call y%sync() +!!$ +!!$ call y%psb_z_base_multivect_type%sctb(n,idx,x,beta) +!!$ call y%set_host() +!!$ +!!$ end subroutine z_gpu_multi_sctb +!!$ +!!$ subroutine z_gpu_multi_sctb_x(i,n,idx,x,beta,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ complex(psb_dpk_) :: beta, x(:) +!!$ class(psb_z_multivect_gpu) :: y +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_gpu) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (y%is_host()) call y%sync() +!!$ +!!$ if (allocated(y%buffer)) then +!!$ if (size(y%buffer) < n) then +!!$ call inner_unregister(y%buffer) +!!$ deallocate(y%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(y%buffer)) then +!!$ allocate(y%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(y%buffer,y%dt_buf) +!!$ endif +!!$ y%buffer(1:n) = x(1:n) +!!$ info = iscatMultiVecDeviceDouble(y%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, y%dt_buf, 1,beta) +!!$ +!!$ call y%set_dev() +!!$ call psb_cudaSync() +!!$ +!!$ class default +!!$ call y%sct(n,ii%v(i:),x,beta) +!!$ end select +!!$ +!!$ end subroutine z_gpu_multi_sctb_x + + + subroutine z_gpu_multi_bld_x(x,this) + use psb_base_mod + complex(psb_dpk_), intent(in) :: this(:,:) + class(psb_z_multivect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info, m, n + + m=size(this,1) + n=size(this,2) + x%m_nrows = m + x%m_ncols = n + call psb_realloc(m,n,x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'z_gpu_multi_bld_x',& + & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) + end if + x%v(1:m,1:n) = this(1:m,1:n) + call x%set_host() + call x%sync() + + end subroutine z_gpu_multi_bld_x + + subroutine z_gpu_multi_bld_n(x,m,n) + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_multivect_gpu), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(m,n,info) + if (info /= 0) then + call psb_errpush(info,'z_gpu_multi_bld_n',i_err=(/m,n,n,n,n/)) + end if + + end subroutine z_gpu_multi_bld_n + + + subroutine z_gpu_multi_set_host(x) + implicit none + class(psb_z_multivect_gpu), intent(inout) :: x + + x%state = is_host + end subroutine z_gpu_multi_set_host + + subroutine z_gpu_multi_set_dev(x) + implicit none + class(psb_z_multivect_gpu), intent(inout) :: x + + x%state = is_dev + end subroutine z_gpu_multi_set_dev + + subroutine z_gpu_multi_set_sync(x) + implicit none + class(psb_z_multivect_gpu), intent(inout) :: x + + x%state = is_sync + end subroutine z_gpu_multi_set_sync + + function z_gpu_multi_is_dev(x) result(res) + implicit none + class(psb_z_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function z_gpu_multi_is_dev + + function z_gpu_multi_is_host(x) result(res) + implicit none + class(psb_z_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function z_gpu_multi_is_host + + function z_gpu_multi_is_sync(x) result(res) + implicit none + class(psb_z_multivect_gpu), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function z_gpu_multi_is_sync + + + function z_gpu_multi_get_nrows(x) result(res) + implicit none + class(psb_z_multivect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_nrows + + end function z_gpu_multi_get_nrows + + function z_gpu_multi_get_ncols(x) result(res) + implicit none + class(psb_z_multivect_gpu), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_ncols + + end function z_gpu_multi_get_ncols + + function z_gpu_multi_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'zGPU' + end function z_gpu_multi_get_fmt + +!!$ function z_gpu_multi_dot_v(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ class(psb_z_base_multivect_type), intent(inout) :: y +!!$ integer(psb_ipk_), intent(in) :: n +!!$ complex(psb_dpk_) :: res +!!$ complex(psb_dpk_), external :: ddot +!!$ integer(psb_ipk_) :: info +!!$ +!!$ res = dzero +!!$ ! +!!$ ! Note: this is the gpu implementation. +!!$ ! When we get here, we are sure that X is of +!!$ ! TYPE psb_z_vect +!!$ ! +!!$ select type(yy => y) +!!$ type is (psb_z_base_multivect_type) +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,x%v,1,yy%v,1) +!!$ type is (psb_z_multivect_gpu) +!!$ if (x%is_host()) call x%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) +!!$ if (info /= 0) then +!!$ info = psb_err_internal_error_ +!!$ call psb_errpush(info,'z_gpu_multi_dot_v') +!!$ end if +!!$ +!!$ class default +!!$ ! y%sync is done in dot_a +!!$ call x%sync() +!!$ res = y%dot(n,x%v) +!!$ end select +!!$ +!!$ end function z_gpu_multi_dot_v +!!$ +!!$ function z_gpu_multi_dot_a(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ complex(psb_dpk_), intent(in) :: y(:) +!!$ integer(psb_ipk_), intent(in) :: n +!!$ complex(psb_dpk_) :: res +!!$ complex(psb_dpk_), external :: ddot +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,y,1,x%v,1) +!!$ +!!$ end function z_gpu_multi_dot_a +!!$ +!!$ subroutine z_gpu_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ class(psb_z_base_multivect_type), intent(inout) :: x +!!$ class(psb_z_multivect_gpu), intent(inout) :: y +!!$ complex(psb_dpk_), intent (in) :: alpha, beta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: nx, ny +!!$ +!!$ info = psb_success_ +!!$ +!!$ select type(xx => x) +!!$ type is (psb_z_base_multivect_type) +!!$ if ((beta /= dzero).and.(y%is_dev()))& +!!$ & call y%sync() +!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) +!!$ call y%set_host() +!!$ type is (psb_z_multivect_gpu) +!!$ ! Do something different here +!!$ if ((beta /= dzero).and.y%is_host())& +!!$ & call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ nx = getMultiVecDeviceSize(xx%deviceVect) +!!$ ny = getMultiVecDeviceSize(y%deviceVect) +!!$ if ((nx x) +!!$ type is (psb_z_base_multivect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ do i=1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ type is (psb_z_multivect_gpu) +!!$ ! Do something different here +!!$ if (y%is_host()) call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ info = axyMultiVecDevice(n,done,xx%deviceVect,y%deviceVect) +!!$ call y%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call y%mlt(xx%v,info) +!!$ call y%set_host() +!!$ end select +!!$ +!!$ end subroutine z_gpu_multi_mlt_v +!!$ +!!$ subroutine z_gpu_multi_mlt_a(x, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ complex(psb_dpk_), intent(in) :: x(:) +!!$ class(psb_z_multivect_gpu), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ call y%sync() +!!$ call y%psb_z_base_multivect_type%mlt(x,info) +!!$ call y%set_host() +!!$ end subroutine z_gpu_multi_mlt_a +!!$ +!!$ subroutine z_gpu_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ complex(psb_dpk_), intent(in) :: alpha,beta +!!$ complex(psb_dpk_), intent(in) :: x(:) +!!$ complex(psb_dpk_), intent(in) :: y(:) +!!$ class(psb_z_multivect_gpu), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ if (z%is_dev()) call z%sync() +!!$ call z%psb_z_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end subroutine z_gpu_multi_mlt_a_2 +!!$ +!!$ subroutine z_gpu_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ use psi_serial_mod +!!$ use psb_string_mod +!!$ implicit none +!!$ complex(psb_dpk_), intent(in) :: alpha,beta +!!$ class(psb_z_base_multivect_type), intent(inout) :: x +!!$ class(psb_z_base_multivect_type), intent(inout) :: y +!!$ class(psb_z_multivect_gpu), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character(len=1), intent(in), optional :: conjgx, conjgy +!!$ integer(psb_ipk_) :: i, n +!!$ logical :: conjgx_, conjgy_ +!!$ +!!$ if (.false.) then +!!$ ! These are present just for coherence with the +!!$ ! complex versions; they do nothing here. +!!$ conjgx_=.false. +!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') +!!$ conjgy_=.false. +!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') +!!$ end if +!!$ +!!$ n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) +!!$ +!!$ ! +!!$ ! Need to reconsider BETA in the GPU side +!!$ ! of things. +!!$ ! +!!$ info = 0 +!!$ select type(xx => x) +!!$ type is (psb_z_multivect_gpu) +!!$ select type (yy => y) +!!$ type is (psb_z_multivect_gpu) +!!$ if (xx%is_host()) call xx%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ ! Z state is irrelevant: it will be done on the GPU. +!!$ info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& +!!$ & yy%deviceVect,beta,z%deviceVect) +!!$ call z%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call yy%sync() +!!$ call z%psb_z_base_multivect_type%mlt(alpha,xx,yy,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ +!!$ class default +!!$ call x%sync() +!!$ call y%sync() +!!$ call z%psb_z_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ end subroutine z_gpu_multi_mlt_v_2 + + + subroutine z_gpu_multi_set_scal(x,val) + class(psb_z_multivect_gpu), intent(inout) :: x + complex(psb_dpk_), intent(in) :: val + + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_z_base_multivect_type%set_scal(val) + call x%set_host() + end subroutine z_gpu_multi_set_scal + + subroutine z_gpu_multi_set_vect(x,val) + class(psb_z_multivect_gpu), intent(inout) :: x + complex(psb_dpk_), intent(in) :: val(:,:) + integer(psb_ipk_) :: nr + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_z_base_multivect_type%set_vect(val) + call x%set_host() + + end subroutine z_gpu_multi_set_vect + + + +!!$ subroutine z_gpu_multi_scal(alpha, x) +!!$ implicit none +!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ complex(psb_dpk_), intent (in) :: alpha +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_z_base_multivect_type%scal(alpha) +!!$ call x%set_host() +!!$ end subroutine z_gpu_multi_scal +!!$ +!!$ +!!$ function z_gpu_multi_nrm2(n,x) result(res) +!!$ implicit none +!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ ! WARNING: this should be changed. +!!$ if (x%is_host()) call x%sync() +!!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) +!!$ +!!$ end function z_gpu_multi_nrm2 +!!$ +!!$ function z_gpu_multi_amax(n,x) result(res) +!!$ implicit none +!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = maxval(abs(x%v(1:n))) +!!$ +!!$ end function z_gpu_multi_amax +!!$ +!!$ function z_gpu_multi_asum(n,x) result(res) +!!$ implicit none +!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = sum(abs(x%v(1:n))) +!!$ +!!$ end function z_gpu_multi_asum + + subroutine z_gpu_multi_all(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_multivect_gpu), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(m,n,x%v,info,pad=zzero) + x%m_nrows = m + x%m_ncols = n + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'z_gpu_multi_all',& + & i_err=(/m,n,n,n,n/)) + end if + end subroutine z_gpu_multi_all + + subroutine z_gpu_multi_zero(x) + use psi_serial_mod + implicit none + class(psb_z_multivect_gpu), intent(inout) :: x + + if (allocated(x%v)) x%v=dzero + call x%set_host() + end subroutine z_gpu_multi_zero + + subroutine z_gpu_multi_asb(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_multivect_gpu), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nd, nc + + + x%m_nrows = m + x%m_ncols = n + if (x%is_host()) then + call x%psb_z_base_multivect_type%asb(m,n,info) + if (info == psb_success_) call x%sync_space(info) + else if (x%is_dev()) then + nd = getMultiVecDevicePitch(x%deviceVect) + nc = getMultiVecDeviceCount(x%deviceVect) + if ((nd < m).or.(nc z_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => z_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_z_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_z_hdiag_csmm + procedure, pass(a) :: csmv => psb_z_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_z_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_z_hdiag_scals + ! procedure, pass(a) :: scalv => psb_z_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_z_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_z_hdiag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_z_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_z_mv_hdiag_from_fmt + procedure, pass(a) :: free => z_hdiag_free + procedure, pass(a) :: mold => psb_z_hdiag_mold + procedure, pass(a) :: to_gpu => psb_z_hdiag_to_gpu + final :: z_hdiag_finalize +#else + contains + procedure, pass(a) :: mold => psb_z_hdiag_mold +#endif + end type psb_z_hdiag_sparse_mat + +#ifdef HAVE_SPGPU + private :: z_hdiag_get_nzeros, z_hdiag_free, z_hdiag_get_fmt, & + & z_hdiag_get_size, z_hdiag_sizeof, z_hdiag_get_nz_row + + + interface + subroutine psb_z_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hdiag_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_hdiag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hdiag_vect_mv + end interface + +!!$ interface +!!$ subroutine psb_z_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_z_hdiag_sparse_mat, psb_dpk_, psb_z_base_vect_type +!!$ class(psb_z_hdiag_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_z_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_hdiag_inner_vect_sv +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_z_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_hdiag_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_z_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_z_hdiag_allocate_mnnz +!!$ end interface + + interface + subroutine psb_z_hdiag_mold(a,b,info) + import :: psb_z_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hdiag_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hdiag_mold + end interface + + interface + subroutine psb_z_hdiag_to_gpu(a,info) + import :: psb_z_hdiag_sparse_mat, psb_ipk_ + class(psb_z_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hdiag_to_gpu + end interface + + interface + subroutine psb_z_cp_hdiag_from_coo(a,b,info) + import :: psb_z_hdiag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hdiag_from_coo + end interface + +!!$ interface +!!$ subroutine psb_z_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_z_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_z_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_cp_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_z_mv_hdiag_from_coo(a,b,info) + import :: psb_z_hdiag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hdiag_from_coo + end interface + +!!$ +!!$ interface +!!$ subroutine psb_z_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_z_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_z_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_mv_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_z_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hdiag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hdiag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hdiag_csmv + end interface + +!!$ interface +!!$ subroutine psb_z_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdiag_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_hdiag_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdiag_scal(d,a,info, side) +!!$ import :: psb_z_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_z_hdiag_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdiag_scals(d,a,info) +!!$ import :: psb_z_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_hdiag_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + function z_hdiag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIAG' + end function z_hdiag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_hdiag_free(a) + use hdiagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_z_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_z_hdia_sparse_mat%free() + + return + + end subroutine z_hdiag_free + + subroutine z_hdiag_finalize(a) + use hdiagdev_mod + implicit none + type(psb_z_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_z_hdia_sparse_mat%free() + + return + end subroutine z_hdiag_finalize + +#else + + interface + subroutine psb_z_hdiag_mold(a,b,info) + import :: psb_z_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hdiag_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hdiag_mold + end interface + +#endif + +end module psb_z_hdiag_mat_mod diff --git a/gpu/psb_z_hlg_mat_mod.F90 b/gpu/psb_z_hlg_mat_mod.F90 new file mode 100644 index 00000000..09d490b3 --- /dev/null +++ b/gpu/psb_z_hlg_mat_mod.F90 @@ -0,0 +1,398 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_hlg_mat_mod + + use iso_c_binding + use psb_z_mat_mod + use psb_z_hll_mat_mod + + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_hll_sparse_mat) :: psb_z_hlg_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! +#ifdef HAVE_SPGPU + type(c_ptr) :: deviceMat = c_null_ptr + integer :: devstate = is_host + + contains + procedure, nopass :: get_fmt => z_hlg_get_fmt + procedure, pass(a) :: sizeof => z_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_z_hlg_vect_mv + procedure, pass(a) :: csmm => psb_z_hlg_csmm + procedure, pass(a) :: csmv => psb_z_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_z_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_z_hlg_scals + procedure, pass(a) :: scalv => psb_z_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_z_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_hlg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_mv_hlg_from_fmt + procedure, pass(a) :: free => z_hlg_free + procedure, pass(a) :: mold => psb_z_hlg_mold + procedure, pass(a) :: is_host => z_hlg_is_host + procedure, pass(a) :: is_dev => z_hlg_is_dev + procedure, pass(a) :: is_sync => z_hlg_is_sync + procedure, pass(a) :: set_host => z_hlg_set_host + procedure, pass(a) :: set_dev => z_hlg_set_dev + procedure, pass(a) :: set_sync => z_hlg_set_sync + procedure, pass(a) :: sync => z_hlg_sync + procedure, pass(a) :: from_gpu => psb_z_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_z_hlg_to_gpu + final :: z_hlg_finalize +#else + contains + procedure, pass(a) :: mold => psb_z_hlg_mold +#endif + end type psb_z_hlg_sparse_mat + +#ifdef HAVE_SPGPU + private :: z_hlg_get_nzeros, z_hlg_free, z_hlg_get_fmt, & + & z_hlg_get_size, z_hlg_sizeof, z_hlg_get_nz_row + + + interface + subroutine psb_z_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hlg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hlg_vect_mv + end interface + + interface + subroutine psb_z_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_z_hlg_sparse_mat, psb_dpk_, psb_z_base_vect_type + class(psb_z_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hlg_inner_vect_sv + end interface + + interface + subroutine psb_z_hlg_reallocate_nz(nz,a) + import :: psb_z_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_z_hlg_reallocate_nz + end interface + + interface + subroutine psb_z_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_z_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_hlg_allocate_mnnz + end interface + + interface + subroutine psb_z_hlg_mold(a,b,info) + import :: psb_z_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hlg_mold + end interface + + interface + subroutine psb_z_hlg_from_gpu(a,info) + import :: psb_z_hlg_sparse_mat, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hlg_from_gpu + end interface + + interface + subroutine psb_z_hlg_to_gpu(a,info, nzrm) + import :: psb_z_hlg_sparse_mat, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_z_hlg_to_gpu + end interface + + interface + subroutine psb_z_cp_hlg_from_coo(a,b,info) + import :: psb_z_hlg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hlg_from_coo + end interface + + interface + subroutine psb_z_cp_hlg_from_fmt(a,b,info) + import :: psb_z_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hlg_from_fmt + end interface + + interface + subroutine psb_z_mv_hlg_from_coo(a,b,info) + import :: psb_z_hlg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hlg_from_coo + end interface + + + interface + subroutine psb_z_mv_hlg_from_fmt(a,b,info) + import :: psb_z_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hlg_from_fmt + end interface + + interface + subroutine psb_z_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hlg_csmv + end interface + interface + subroutine psb_z_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hlg_csmm + end interface + + interface + subroutine psb_z_hlg_scal(d,a,info, side) + import :: psb_z_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_hlg_scal + end interface + + interface + subroutine psb_z_hlg_scals(d,a,info) + import :: psb_z_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hlg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_hlg_sizeof(a) result(res) + implicit none + class(psb_z_hlg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function z_hlg_sizeof + + function z_hlg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLG' + end function z_hlg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_hlg_free(a) + use hlldev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_z_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_z_hll_sparse_mat%free() + + return + + end subroutine z_hlg_free + + + subroutine z_hlg_sync(a) + implicit none + class(psb_z_hlg_sparse_mat), target, intent(in) :: a + class(psb_z_hlg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine z_hlg_sync + + subroutine z_hlg_set_host(a) + implicit none + class(psb_z_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine z_hlg_set_host + + subroutine z_hlg_set_dev(a) + implicit none + class(psb_z_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine z_hlg_set_dev + + subroutine z_hlg_set_sync(a) + implicit none + class(psb_z_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine z_hlg_set_sync + + function z_hlg_is_dev(a) result(res) + implicit none + class(psb_z_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function z_hlg_is_dev + + function z_hlg_is_host(a) result(res) + implicit none + class(psb_z_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function z_hlg_is_host + + function z_hlg_is_sync(a) result(res) + implicit none + class(psb_z_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function z_hlg_is_sync + + + subroutine z_hlg_finalize(a) + use hlldev_mod + implicit none + type(psb_z_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine z_hlg_finalize + +#else + + interface + subroutine psb_z_hlg_mold(a,b,info) + import :: psb_z_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hlg_mold + end interface + +#endif + +end module psb_z_hlg_mat_mod diff --git a/gpu/psb_z_hybg_mat_mod.F90 b/gpu/psb_z_hybg_mat_mod.F90 new file mode 100644 index 00000000..465677e3 --- /dev/null +++ b/gpu/psb_z_hybg_mat_mod.F90 @@ -0,0 +1,306 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +#if CUDA_SHORT_VERSION <= 10 + +module psb_z_hybg_mat_mod + + use iso_c_binding + use psb_z_mat_mod + use cusparse_mod + + type, extends(psb_z_csr_sparse_mat) :: psb_z_hybg_sparse_mat + ! + ! HYBG. An interface to the cuSPARSE HYB + ! On the CPU side we keep a CSR storage. + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(z_Hmat) :: deviceMat + + contains + procedure, nopass :: get_fmt => z_hybg_get_fmt + procedure, pass(a) :: sizeof => z_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_z_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_z_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_z_hybg_csmm + procedure, pass(a) :: csmv => psb_z_hybg_csmv + procedure, pass(a) :: scals => psb_z_hybg_scals + procedure, pass(a) :: scalv => psb_z_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_z_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_hybg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_mv_hybg_from_fmt + procedure, pass(a) :: free => z_hybg_free + procedure, pass(a) :: mold => psb_z_hybg_mold + procedure, pass(a) :: to_gpu => psb_z_hybg_to_gpu + final :: z_hybg_finalize +#else + contains + procedure, pass(a) :: mold => psb_z_hybg_mold +#endif + end type psb_z_hybg_sparse_mat + +#ifdef HAVE_SPGPU + private :: z_hybg_get_nzeros, z_hybg_free, z_hybg_get_fmt, & + & z_hybg_get_size, z_hybg_sizeof, z_hybg_get_nz_row + + + interface + subroutine psb_z_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hybg_inner_vect_sv + end interface + + interface + subroutine psb_z_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hybg_vect_mv + end interface + + interface + subroutine psb_z_hybg_reallocate_nz(nz,a) + import :: psb_z_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_z_hybg_reallocate_nz + end interface + + interface + subroutine psb_z_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_z_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_hybg_allocate_mnnz + end interface + + interface + subroutine psb_z_hybg_mold(a,b,info) + import :: psb_z_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hybg_mold + end interface + + interface + subroutine psb_z_hybg_to_gpu(a,info, nzrm) + import :: psb_z_hybg_sparse_mat, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_z_hybg_to_gpu + end interface + + interface + subroutine psb_z_cp_hybg_from_coo(a,b,info) + import :: psb_z_hybg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hybg_from_coo + end interface + + interface + subroutine psb_z_cp_hybg_from_fmt(a,b,info) + import :: psb_z_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hybg_from_fmt + end interface + + interface + subroutine psb_z_mv_hybg_from_coo(a,b,info) + import :: psb_z_hybg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hybg_from_coo + end interface + + interface + subroutine psb_z_mv_hybg_from_fmt(a,b,info) + import :: psb_z_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hybg_from_fmt + end interface + + interface + subroutine psb_z_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hybg_csmv + end interface + interface + subroutine psb_z_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hybg_csmm + end interface + + interface + subroutine psb_z_hybg_scal(d,a,info,side) + import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_hybg_scal + end interface + + interface + subroutine psb_z_hybg_scals(d,a,info) + import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hybg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_hybg_sizeof(a) result(res) + implicit none + class(psb_z_hybg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function z_hybg_sizeof + + function z_hybg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HYBG' + end function z_hybg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_hybg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + class(psb_z_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + call a%psb_z_csr_sparse_mat%free() + + return + + end subroutine z_hybg_free + + subroutine z_hybg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + type(psb_z_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + + return + end subroutine z_hybg_finalize + +#else + + interface + subroutine psb_z_hybg_mold(a,b,info) + import :: psb_z_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hybg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hybg_mold + end interface + +#endif + +end module psb_z_hybg_mat_mod +#endif diff --git a/gpu/psb_z_vectordev_mod.F90 b/gpu/psb_z_vectordev_mod.F90 new file mode 100644 index 00000000..58c43a43 --- /dev/null +++ b/gpu/psb_z_vectordev_mod.F90 @@ -0,0 +1,390 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_vectordev_mod + + use psb_base_vectordev_mod + +#ifdef HAVE_SPGPU + + interface registerMapped + function registerMappedDoubleComplex(buf,d_p,n,dummy) & + & result(res) bind(c,name='registerMappedDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: buf + type(c_ptr) :: d_p + integer(c_int),value :: n + complex(c_double_complex), value :: dummy + end function registerMappedDoubleComplex + end interface + + interface writeMultiVecDevice + function writeMultiVecDeviceDoubleComplex(deviceVec,hostVec) & + & result(res) bind(c,name='writeMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + complex(c_double_complex) :: hostVec(*) + end function writeMultiVecDeviceDoubleComplex + function writeMultiVecDeviceDoubleComplexR2(deviceVec,hostVec,ld) & + & result(res) bind(c,name='writeMultiVecDeviceDoubleComplexR2') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int), value :: ld + complex(c_double_complex) :: hostVec(ld,*) + end function writeMultiVecDeviceDoubleComplexR2 + end interface + + interface readMultiVecDevice + function readMultiVecDeviceDoubleComplex(deviceVec,hostVec) & + & result(res) bind(c,name='readMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + complex(c_double_complex) :: hostVec(*) + end function readMultiVecDeviceDoubleComplex + function readMultiVecDeviceDoubleComplexR2(deviceVec,hostVec,ld) & + & result(res) bind(c,name='readMultiVecDeviceDoubleComplexR2') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int), value :: ld + complex(c_double_complex) :: hostVec(ld,*) + end function readMultiVecDeviceDoubleComplexR2 + end interface + + interface allocateDoubleComplex + function allocateDoubleComplex(didx,n) & + & result(res) bind(c,name='allocateDoubleComplex') + use iso_c_binding + type(c_ptr) :: didx + integer(c_int),value :: n + integer(c_int) :: res + end function allocateDoubleComplex + function allocateMultiDoubleComplex(didx,m,n) & + & result(res) bind(c,name='allocateMultiDoubleComplex') + use iso_c_binding + type(c_ptr) :: didx + integer(c_int),value :: m,n + integer(c_int) :: res + end function allocateMultiDoubleComplex + end interface + + interface writeDoubleComplex + function writeDoubleComplex(didx,hidx,n) & + & result(res) bind(c,name='writeDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + complex(c_double_complex) :: hidx(*) + integer(c_int),value :: n + end function writeDoubleComplex + function writeDoubleComplexFirst(first,didx,hidx,n,IndexBase) & + & result(res) bind(c,name='writeDoubleComplexFirst') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + complex(c_double_complex) :: hidx(*) + integer(c_int),value :: n, first, IndexBase + end function writeDoubleComplexFirst + function writeMultiDoubleComplex(didx,hidx,m,n) & + & result(res) bind(c,name='writeMultiDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + complex(c_double_complex) :: hidx(m,*) + integer(c_int),value :: m,n + end function writeMultiDoubleComplex + end interface + + interface readDoubleComplex + function readDoubleComplex(didx,hidx,n) & + & result(res) bind(c,name='readDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + complex(c_double_complex) :: hidx(*) + integer(c_int),value :: n + end function readDoubleComplex + function readDoubleComplexFirst(first,didx,hidx,n,IndexBase) & + & result(res) bind(c,name='readDoubleComplexFirst') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + complex(c_double_complex) :: hidx(*) + integer(c_int),value :: n, first, IndexBase + end function readDoubleComplexFirst + function readMultiDoubleComplex(didx,hidx,m,n) & + & result(res) bind(c,name='readMultiDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: didx + complex(c_double_complex) :: hidx(m,*) + integer(c_int),value :: m,n + end function readMultiDoubleComplex + end interface + + interface + subroutine freeDoubleComplex(didx) & + & bind(c,name='freeDoubleComplex') + use iso_c_binding + type(c_ptr), value :: didx + end subroutine freeDoubleComplex + end interface + + + interface setScalDevice + function setScalMultiVecDeviceDoubleComplex(val, first, last, & + & indexBase, deviceVecX) result(res) & + & bind(c,name='setscalMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: first,last,indexbase + complex(c_double_complex), value :: val + type(c_ptr), value :: deviceVecX + end function setScalMultiVecDeviceDoubleComplex + end interface + + interface + function geinsMultiVecDeviceDoubleComplex(n,deviceVecIrl,deviceVecVal,& + & dupl,indexbase,deviceVecX) & + & result(res) bind(c,name='geinsMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n, dupl,indexbase + type(c_ptr), value :: deviceVecIrl, deviceVecVal, deviceVecX + end function geinsMultiVecDeviceDoubleComplex + end interface + + ! New gather functions + + interface + function igathMultiVecDeviceDoubleComplex(deviceVec, vectorId, n, first, idx, & + & hfirst, hostVec, indexBase) & + & result(res) bind(c,name='igathMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value:: vectorId + integer(c_int),value:: first, n, hfirst + type(c_ptr),value :: idx + type(c_ptr),value :: hostVec + integer(c_int),value:: indexBase + end function igathMultiVecDeviceDoubleComplex + end interface + + interface + function igathMultiVecDeviceDoubleComplexVecIdx(deviceVec, vectorId, n, first, idx, & + & hfirst, hostVec, indexBase) & + & result(res) bind(c,name='igathMultiVecDeviceDoubleComplexVecIdx') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value:: vectorId + integer(c_int),value:: first, n, hfirst + type(c_ptr),value :: idx + type(c_ptr),value :: hostVec + integer(c_int),value:: indexBase + end function igathMultiVecDeviceDoubleComplexVecIdx + end interface + + interface + function iscatMultiVecDeviceDoubleComplex(deviceVec, vectorId, & + & first, n, idx, hfirst, hostVec, indexBase, beta) & + & result(res) bind(c,name='iscatMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value :: vectorId + integer(c_int),value :: first, n, hfirst + type(c_ptr), value :: idx + type(c_ptr), value :: hostVec + integer(c_int),value :: indexBase + complex(c_double_complex),value :: beta + end function iscatMultiVecDeviceDoubleComplex + end interface + + interface + function iscatMultiVecDeviceDoubleComplexVecIdx(deviceVec, vectorId, & + & first, n, idx, hfirst, hostVec, indexBase, beta) & + & result(res) bind(c,name='iscatMultiVecDeviceDoubleComplexVecIdx') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceVec + integer(c_int),value :: vectorId + integer(c_int),value :: first, n, hfirst + type(c_ptr), value :: idx + type(c_ptr), value :: hostVec + integer(c_int),value :: indexBase + complex(c_double_complex),value :: beta + end function iscatMultiVecDeviceDoubleComplexVecIdx + end interface + + + interface scalMultiVecDevice + function scalMultiVecDeviceDoubleComplex(alpha,deviceVecA) & + & result(val) bind(c,name='scalMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + complex(c_double_complex), value :: alpha + type(c_ptr), value :: deviceVecA + end function scalMultiVecDeviceDoubleComplex + end interface + + interface dotMultiVecDevice + function dotMultiVecDeviceDoubleComplex(res, n,deviceVecA,deviceVecB) & + & result(val) bind(c,name='dotMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + complex(c_double_complex) :: res + type(c_ptr), value :: deviceVecA, deviceVecB + end function dotMultiVecDeviceDoubleComplex + end interface + + interface nrm2MultiVecDeviceComplex + function nrm2MultiVecDeviceDoubleComplex(res,n,deviceVecA) & + & result(val) bind(c,name='nrm2MultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_double) :: res + type(c_ptr), value :: deviceVecA + end function nrm2MultiVecDeviceDoubleComplex + end interface + + interface amaxMultiVecDeviceComplex + function amaxMultiVecDeviceDoubleComplex(res,n,deviceVecA) & + & result(val) bind(c,name='amaxMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_double) :: res + type(c_ptr), value :: deviceVecA + end function amaxMultiVecDeviceDoubleComplex + end interface + + interface asumMultiVecDeviceComplex + function asumMultiVecDeviceDoubleComplex(res,n,deviceVecA) & + & result(val) bind(c,name='asumMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: val + integer(c_int), value :: n + real(c_double) :: res + type(c_ptr), value :: deviceVecA + end function asumMultiVecDeviceDoubleComplex + end interface + + + interface axpbyMultiVecDevice + function axpbyMultiVecDeviceDoubleComplex(n,alpha,deviceVecA,beta,deviceVecB) & + & result(res) bind(c,name='axpbyMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + complex(c_double_complex), value :: alpha, beta + type(c_ptr), value :: deviceVecA, deviceVecB + end function axpbyMultiVecDeviceDoubleComplex + end interface + + interface axyMultiVecDevice + function axyMultiVecDeviceDoubleComplex(n,alpha,deviceVecA,deviceVecB) & + & result(res) bind(c,name='axyMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + complex(c_double_complex), value :: alpha + type(c_ptr), value :: deviceVecA, deviceVecB + end function axyMultiVecDeviceDoubleComplex + end interface + + interface axybzMultiVecDevice + function axybzMultiVecDeviceDoubleComplex(n,alpha,deviceVecA,deviceVecB,beta,deviceVecZ) & + & result(res) bind(c,name='axybzMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + complex(c_double_complex), value :: alpha, beta + type(c_ptr), value :: deviceVecA, deviceVecB,deviceVecZ + end function axybzMultiVecDeviceDoubleComplex + end interface + + + interface absMultiVecDevice + function absMultiVecDeviceDoubleComplex(n,alpha,deviceVecA) & + & result(res) bind(c,name='absMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + complex(c_double_complex), value :: alpha + type(c_ptr), value :: deviceVecA + end function absMultiVecDeviceDoubleComplex + function absMultiVecDeviceDoubleComplex2(n,alpha,deviceVecA,deviceVecB) & + & result(res) bind(c,name='absMultiVecDeviceDoubleComplex2') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + complex(c_double_complex), value :: alpha + type(c_ptr), value :: deviceVecA, deviceVecB + end function absMultiVecDeviceDoubleComplex2 + end interface + + interface inner_register + module procedure inner_registerDoubleComplex + end interface + + interface inner_unregister + module procedure inner_unregisterDoubleComplex + end interface + +contains + + + function inner_registerDoubleComplex(buffer,dval) result(res) + complex(c_double_complex), allocatable, target :: buffer(:) + type(c_ptr) :: dval + integer(c_int) :: res + complex(c_double_complex) :: dummy + res = registerMapped(c_loc(buffer),dval,size(buffer), dummy) + end function inner_registerDoubleComplex + + subroutine inner_unregisterDoubleComplex(buffer) + complex(c_double_complex), allocatable, target :: buffer(:) + + call unregisterMapped(c_loc(buffer)) + end subroutine inner_unregisterDoubleComplex + +#endif + +end module psb_z_vectordev_mod diff --git a/gpu/s_cusparse_mod.F90 b/gpu/s_cusparse_mod.F90 new file mode 100644 index 00000000..6e628fa1 --- /dev/null +++ b/gpu/s_cusparse_mod.F90 @@ -0,0 +1,305 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module s_cusparse_mod + use base_cusparse_mod + + type, bind(c) :: s_Cmat + type(c_ptr) :: Mat = c_null_ptr + end type s_Cmat + +#if CUDA_SHORT_VERSION <= 10 + type, bind(c) :: s_Hmat + type(c_ptr) :: Mat = c_null_ptr + end type s_Hmat +#endif + + +#if defined(HAVE_CUDA) && defined(HAVE_SPGPU) + + interface CSRGDeviceFree + function s_CSRGDeviceFree(Mat) & + & bind(c,name="s_CSRGDeviceFree") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + integer(c_int) :: res + end function s_CSRGDeviceFree + end interface + + interface CSRGDeviceSetMatType + function s_CSRGDeviceSetMatType(Mat,type) & + & bind(c,name="s_CSRGDeviceSetMatType") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function s_CSRGDeviceSetMatType + end interface + + interface CSRGDeviceSetMatFillMode + function s_CSRGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="s_CSRGDeviceSetMatFillMode") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function s_CSRGDeviceSetMatFillMode + end interface + + interface CSRGDeviceSetMatDiagType + function s_CSRGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="s_CSRGDeviceSetMatDiagType") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function s_CSRGDeviceSetMatDiagType + end interface + + interface CSRGDeviceSetMatIndexBase + function s_CSRGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="s_CSRGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function s_CSRGDeviceSetMatIndexBase + end interface + + interface CSRGDeviceCsrsmAnalysis + function s_CSRGDeviceCsrsmAnalysis(Mat) & + & bind(c,name="s_CSRGDeviceCsrsmAnalysis") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + integer(c_int) :: res + end function s_CSRGDeviceCsrsmAnalysis + end interface + + interface CSRGDeviceAlloc + function s_CSRGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="s_CSRGDeviceAlloc") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function s_CSRGDeviceAlloc + end interface + + interface CSRGDeviceGetParms + function s_CSRGDeviceGetParms(Mat,nr,nc,nz) & + & bind(c,name="s_CSRGDeviceGetParms") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + integer(c_int) :: nr, nc, nz + integer(c_int) :: res + end function s_CSRGDeviceGetParms + end interface + + interface spsvCSRGDevice + function s_spsvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="s_spsvCSRGDevice") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + real(c_float), value :: alpha,beta + integer(c_int) :: res + end function s_spsvCSRGDevice + end interface + + interface spmvCSRGDevice + function s_spmvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="s_spmvCSRGDevice") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + real(c_float), value :: alpha,beta + integer(c_int) :: res + end function s_spmvCSRGDevice + end interface + + interface CSRGHost2Device + function s_CSRGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="s_CSRGHost2Device") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + real(c_float) :: val(*) + integer(c_int) :: res + end function s_CSRGHost2Device + end interface + + interface CSRGDevice2Host + function s_CSRGDevice2Host(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="s_CSRGDevice2Host") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + real(c_float) :: val(*) + integer(c_int) :: res + end function s_CSRGDevice2Host + end interface + +#if CUDA_SHORT_VERSION <= 10 + interface HYBGDeviceAlloc + function s_HYBGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="s_HYBGDeviceAlloc") result(res) + use iso_c_binding + import s_hmat + type(s_Hmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function s_HYBGDeviceAlloc + end interface + + interface HYBGDeviceFree + function s_HYBGDeviceFree(Mat) & + & bind(c,name="s_HYBGDeviceFree") result(res) + use iso_c_binding + import s_Hmat + type(s_Hmat) :: Mat + integer(c_int) :: res + end function s_HYBGDeviceFree + end interface + + interface HYBGDeviceSetMatType + function s_HYBGDeviceSetMatType(Mat,type) & + & bind(c,name="s_HYBGDeviceSetMatType") result(res) + use iso_c_binding + import s_Hmat + type(s_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function s_HYBGDeviceSetMatType + end interface + + interface HYBGDeviceSetMatFillMode + function s_HYBGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="s_HYBGDeviceSetMatFillMode") result(res) + use iso_c_binding + import s_Hmat + type(s_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function s_HYBGDeviceSetMatFillMode + end interface + + interface HYBGDeviceSetMatDiagType + function s_HYBGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="s_HYBGDeviceSetMatDiagType") result(res) + use iso_c_binding + import s_Hmat + type(s_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function s_HYBGDeviceSetMatDiagType + end interface + + interface HYBGDeviceSetMatIndexBase + function s_HYBGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="s_HYBGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import s_Hmat + type(s_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function s_HYBGDeviceSetMatIndexBase + end interface + + interface HYBGDeviceHybsmAnalysis + function s_HYBGDeviceHybsmAnalysis(Mat) & + & bind(c,name="s_HYBGDeviceHybsmAnalysis") result(res) + use iso_c_binding + import s_Hmat + type(s_Hmat) :: Mat + integer(c_int) :: res + end function s_HYBGDeviceHybsmAnalysis + end interface + + interface spsvHYBGDevice + function s_spsvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="s_spsvHYBGDevice") result(res) + use iso_c_binding + import s_Hmat + type(s_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + real(c_float), value :: alpha,beta + integer(c_int) :: res + end function s_spsvHYBGDevice + end interface + + interface spmvHYBGDevice + function s_spmvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="s_spmvHYBGDevice") result(res) + use iso_c_binding + import s_Hmat + type(s_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + real(c_float), value :: alpha,beta + integer(c_int) :: res + end function s_spmvHYBGDevice + end interface + + interface HYBGHost2Device + function s_HYBGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="s_HYBGHost2Device") result(res) + use iso_c_binding + import s_Hmat + type(s_Hmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + real(c_float) :: val(*) + integer(c_int) :: res + end function s_HYBGHost2Device + end interface +#endif + +#endif + +end module s_cusparse_mod diff --git a/gpu/scusparse.c b/gpu/scusparse.c new file mode 100644 index 00000000..70a0cbd7 --- /dev/null +++ b/gpu/scusparse.c @@ -0,0 +1,95 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include + +#ifdef HAVE_SPGPU +#include +#include +#include "cintrf.h" +#include "fcusparse.h" + +/* Single precision real */ +#define TYPE float +#define CUSPARSE_BASE_TYPE CUDA_R_32F +#define T_CSRGDeviceMat s_CSRGDeviceMat +#define T_Cmat s_Cmat +#define T_spmvCSRGDevice s_spmvCSRGDevice +#define T_spsvCSRGDevice s_spsvCSRGDevice +#define T_CSRGDeviceAlloc s_CSRGDeviceAlloc +#define T_CSRGDeviceFree s_CSRGDeviceFree +#define T_CSRGHost2Device s_CSRGHost2Device +#define T_CSRGDevice2Host s_CSRGDevice2Host +#define T_CSRGDeviceSetMatFillMode s_CSRGDeviceSetMatFillMode +#define T_CSRGDeviceSetMatDiagType s_CSRGDeviceSetMatDiagType +#define T_CSRGDeviceGetParms s_CSRGDeviceGetParms + +#if CUDA_SHORT_VERSION <= 10 +#define T_CSRGDeviceSetMatType s_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase s_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsmAnalysis s_CSRGDeviceCsrsmAnalysis +#define cusparseTcsrmv cusparseScsrmv +#define cusparseTcsrsv_solve cusparseScsrsv_solve +#define cusparseTcsrsv_analysis cusparseScsrsv_analysis + +#define T_HYBGDeviceMat s_HYBGDeviceMat +#define T_Hmat s_Hmat +#define T_HYBGDeviceFree s_HYBGDeviceFree +#define T_spmvHYBGDevice s_spmvHYBGDevice +#define T_HYBGDeviceAlloc s_HYBGDeviceAlloc +#define T_HYBGDeviceSetMatDiagType s_HYBGDeviceSetMatDiagType +#define T_HYBGDeviceSetMatIndexBase s_HYBGDeviceSetMatIndexBase +#define T_HYBGDeviceSetMatType s_HYBGDeviceSetMatType +#define T_HYBGDeviceSetMatFillMode s_HYBGDeviceSetMatFillMode +#define T_HYBGDeviceHybsmAnalysis s_HYBGDeviceHybsmAnalysis +#define T_spsvHYBGDevice s_spsvHYBGDevice +#define T_HYBGHost2Device s_HYBGHost2Device +#define cusparseThybmv cusparseShybmv +#define cusparseThybsv_solve cusparseShybsv_solve +#define cusparseThybsv_analysis cusparseShybsv_analysis +#define cusparseTcsr2hyb cusparseScsr2hyb + + +#elif CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType s_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase s_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis s_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseScsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseScsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseScsrsv2_solve +#endif + +#include "fcusparse_fct.h" + +#endif diff --git a/gpu/svectordev.c b/gpu/svectordev.c new file mode 100644 index 00000000..d193a4d8 --- /dev/null +++ b/gpu/svectordev.c @@ -0,0 +1,304 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include +#if defined(HAVE_SPGPU) +//#include "utils.h" +//#include "common.h" +#include "svectordev.h" + + +int registerMappedFloat(void *buff, void **d_p, int n, float dummy) +{ + return registerMappedMemory(buff,d_p,n*sizeof(float)); +} + +int writeMultiVecDeviceFloat(void* deviceVec, float* hostVec) +{ int i; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + // Ex updateFromHost vector function + i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, devVec->pitch_*devVec->count_*sizeof(float)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i); + } + return(i); +} + +int writeMultiVecDeviceFloatR2(void* deviceVec, float* hostVec, int ld) +{ int i; + i = writeMultiVecDeviceFloat(deviceVec, (void *) hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceFloatR2",i); + } + return(i); +} + +int readMultiVecDeviceFloat(void* deviceVec, float* hostVec) +{ int i,j; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_, + devVec->pitch_*devVec->count_*sizeof(float)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloat",i); + } + return(i); +} + +int readMultiVecDeviceFloatR2(void* deviceVec, float* hostVec, int ld) +{ int i; + i = readMultiVecDeviceFloat(deviceVec, hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloatR2",i); + } + return(i); +} + +int setscalMultiVecDeviceFloat(float val, int first, int last, + int indexBase, void* devMultiVecX) +{ int i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + spgpuHandle_t handle=psb_gpuGetHandle(); + + spgpuSsetscal(handle, first, last, indexBase, val, (float *) devVecX->v_); + + return(i); +} + +int geinsMultiVecDeviceFloat(int n, void* devMultiVecIrl, void* devMultiVecVal, + int dupl, int indexBase, void* devMultiVecX) +{ int j=0, i=0,nmin=0,nmax=0; + int pitch = 0; + float beta; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; + struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; + spgpuHandle_t handle=psb_gpuGetHandle(); + pitch = devVecIrl->pitch_; + if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) + return SPGPU_UNSUPPORTED; + + //fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_); + + if (dupl == INS_OVERWRITE) + beta = 0.0; + else if (dupl == INS_ADD) + beta = 1.0; + else + beta = 0.0; + + spgpuSscat(handle, (float *) devVecX->v_, n, (float*)devVecVal->v_, + (int*)devVecIrl->v_, indexBase, beta); + + return(i); +} + + +int igathMultiVecDeviceFloatVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + + i= igathMultiVecDeviceFloat(deviceVec, vectorId, n, + first, (void*) devIdx->v_, hfirst, host_values, indexBase); + return(i); +} + +int igathMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, int indexBase) +{ + int i, *idx =(int *) indexes;; + float *hv = (float *) host_values;; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_gpuGetHandle(); + + i=0; + hv = &(hv[hfirst-indexBase]); + idx = &(idx[first-indexBase]); + spgpuSgath(handle,hv, n, idx,indexBase, (float *) devVec->v_+vectorId*devVec->pitch_); + return(i); +} + +int iscatMultiVecDeviceFloatVecIdx(void* deviceVec, int vectorId, int n, int first, void *deviceIdx, + int hfirst, void* host_values, int indexBase, float beta) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + i= iscatMultiVecDeviceFloat(deviceVec, vectorId, n, first, + (void*) devIdx->v_, hfirst,host_values, indexBase, beta); + return(i); +} + +int iscatMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, float beta) +{ int i=0; + float *hv = (float *) host_values; + int *idx=(int *) indexes; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_gpuGetHandle(); + + idx = &(idx[first-indexBase]); + hv = &(hv[hfirst-indexBase]); + spgpuSscat(handle, (float *) devVec->v_, n, hv, idx, indexBase, beta); + return SPGPU_SUCCESS; + +} + + +int nrm2MultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuSmnrm2(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_); + return(i); +} + +int amaxMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuSmamax(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_); + return(i); +} + +int asumMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuSmasum(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_); + + return(i); +} + +int scalMultiVecDeviceFloat(float alpha, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + // Note: inner kernel can handle aliased input/output + spgpuSscal(handle, (float *)devVecA->v_, devVecA->pitch_, + alpha, (float *)devVecA->v_); + return(i); +} + +int dotMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA, void* devMultiVecB) +{int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; + spgpuHandle_t handle=psb_gpuGetHandle(); + + spgpuSmdot(handle, y_res, n, (float*)devVecA->v_, (float*)devVecB->v_,devVecA->count_,devVecB->pitch_); + return(i); +} + +int axpbyMultiVecDeviceFloat(int n,float alpha, void* devMultiVecX, + float beta, void* devMultiVecY) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + spgpuHandle_t handle=psb_gpuGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + for(j=0;jcount_;j++) + spgpuSaxpby(handle,(float*)devVecY->v_+pitch*j, n, beta, + (float*)devVecY->v_+pitch*j, alpha,(float*) devVecX->v_+pitch*j); + return(i); +} + +int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVecB) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + spgpuHandle_t handle=psb_gpuGetHandle(); + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuSmaxy(handle, (float*)devVecB->v_, n, alpha, (float*)devVecA->v_, + (float*)devVecB->v_, devVecA->count_, devVecA->pitch_); + + return(i); +} + +int axybzMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, + void *deviceVecB, float beta, void *deviceVecZ) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; + spgpuHandle_t handle=psb_gpuGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) + return SPGPU_UNSUPPORTED; + spgpuSmaxypbz(handle, (float*)devVecZ->v_, n, beta, (float*)devVecZ->v_, + alpha, (float*) devVecA->v_, (float*) devVecB->v_, + devVecB->count_, devVecB->pitch_); + return(i); +} + +int absMultiVecDeviceFloat2(int n, float alpha, void *deviceVecA, + void *deviceVecB) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + + spgpuHandle_t handle=psb_gpuGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuSabs(handle, (float*)devVecB->v_, n, alpha, (float*)devVecA->v_); + + return(i); +} + +int absMultiVecDeviceFloat(int n, float alpha, void *deviceVecA) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + spgpuHandle_t handle=psb_gpuGetHandle(); + if (n > devVecA->size_) + return SPGPU_UNSUPPORTED; + + spgpuSabs(handle, (float*)devVecA->v_, n, alpha, (float*)devVecA->v_); + + return(i); +} + +#endif + diff --git a/gpu/svectordev.h b/gpu/svectordev.h new file mode 100644 index 00000000..1fd4fd11 --- /dev/null +++ b/gpu/svectordev.h @@ -0,0 +1,78 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#pragma once +#if defined(HAVE_SPGPU) +//#include "utils.h" +#include "vectordev.h" +#include "cuda_runtime.h" +#include "core.h" + +int registerMappedFloat(void *, void **, int, float); +int writeMultiVecDeviceFloat(void* deviceMultiVec, float* hostMultiVec); +int writeMultiVecDeviceFloatR2(void* deviceMultiVec, float* hostMultiVec, int ld); +int readMultiVecDeviceFloat(void* deviceMultiVec, float* hostMultiVec); +int readMultiVecDeviceFloatR2(void* deviceMultiVec, float* hostMultiVec, int ld); + +int setscalMultiVecDeviceFloat(float val, int first, int last, + int indexBase, void* devVecX); + +int geinsMultiVecDeviceFloat(int n, void* devVecIrl, void* devVecVal, + int dupl, int indexBase, void* devVecX); + +int igathMultiVecDeviceFloatVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase); +int igathMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, + int indexBase); +int iscatMultiVecDeviceFloatVecIdx(void* deviceVec, int vectorId, int n, int first, + void *deviceIdx, int hfirst, void* host_values, + int indexBase, float beta); +int iscatMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, float beta); + +int scalMultiVecDeviceFloat(float alpha, void* devMultiVecA); +int nrm2MultiVecDeviceFloat(float* y_res, int n, void* devVecA); +int amaxMultiVecDeviceFloat(float* y_res, int n, void* devVecA); +int asumMultiVecDeviceFloat(float* y_res, int n, void* devVecA); +int dotMultiVecDeviceFloat(float* y_res, int n, void* devVecA, void* devVecB); + +int axpbyMultiVecDeviceFloat(int n, float alpha, void* devVecX, float beta, void* devVecY); +int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVecB); +int axybzMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, + void *deviceVecB, float beta, void *deviceVecZ); +int absMultiVecDeviceFloat(int n, float alpha, void *deviceVecA); +int absMultiVecDeviceFloat2(int n, float alpha, void *deviceVecA, void *deviceVecB); + + +#endif diff --git a/gpu/vectordev.c b/gpu/vectordev.c new file mode 100644 index 00000000..2b22a8a6 --- /dev/null +++ b/gpu/vectordev.c @@ -0,0 +1,198 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include +#if defined(HAVE_SPGPU) +#include "cuComplex.h" +#include "vectordev.h" +#include "cuda_runtime.h" +#include "core.h" + +//new +MultiVectorDeviceParams getMultiVectorDeviceParams(unsigned int count, unsigned int size, + unsigned int elementType) +{ + struct MultiVectorDeviceParams params; + + if (count == 1) + params.pitch = size; + else + if (elementType == SPGPU_TYPE_INT) + { + //fprintf(stderr,"Getting parms for a DOUBLE vector\n"); + params.pitch = (((size*sizeof(int) + 255)/256)*256)/sizeof(int); + } + else if (elementType == SPGPU_TYPE_DOUBLE) + { + //fprintf(stderr,"Getting parms for a DOUBLE vector\n"); + params.pitch = (((size*sizeof(double) + 255)/256)*256)/sizeof(double); + } + else if (elementType == SPGPU_TYPE_FLOAT) + { + params.pitch = (((size*sizeof(float) + 255)/256)*256)/sizeof(float); + } + else if (elementType == SPGPU_TYPE_COMPLEX_FLOAT) + { + params.pitch = (((size*sizeof(cuFloatComplex) + 255)/256)*256)/sizeof(cuFloatComplex); + } + else if (elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + { + params.pitch = (((size*sizeof(cuDoubleComplex) + 255)/256)*256)/sizeof(cuDoubleComplex); + } + else + params.pitch = 0; + + params.elementType = elementType; + + params.count = count; + params.size = size; + + return params; + +} +//new +int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams *params) +{ + if (params->pitch == 0) + return SPGPU_UNSUPPORTED; // Unsupported params + + struct MultiVectDevice *tmp = (struct MultiVectDevice *)malloc(sizeof(struct MultiVectDevice)); + *remoteMultiVec = (void *)tmp; + tmp->size_ = params->size; + tmp->count_ = params->count; + + if (params->elementType == SPGPU_TYPE_INT) + { + if (params->count == 1) + tmp->pitch_ = params->size; + else + tmp->pitch_ = (((params->size*sizeof(int) + 255)/256)*256)/sizeof(int); + //fprintf(stderr,"Allocating an INT vector %ld\n",tmp->pitch_*tmp->count_*sizeof(double)); + + return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*params->count*sizeof(int)); + } + else if (params->elementType == SPGPU_TYPE_FLOAT) + { + if (params->count == 1) + tmp->pitch_ = params->size; + else + tmp->pitch_ = (((params->size*sizeof(float) + 255)/256)*256)/sizeof(float); + + return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*params->count*sizeof(float)); + } + else if (params->elementType == SPGPU_TYPE_DOUBLE) + { + + if (params->count == 1) + tmp->pitch_ = params->size; + else + tmp->pitch_ = (int)(((params->size*sizeof(double) + 255)/256)*256)/sizeof(double); + //fprintf(stderr,"Allocating a DOUBLE vector %ld\n",tmp->pitch_*tmp->count_*sizeof(double)); + + return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*tmp->count_*sizeof(double)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) + { + if (params->count == 1) + tmp->pitch_ = params->size; + else + tmp->pitch_ = (int)(((params->size*sizeof(cuFloatComplex) + 255)/256)*256)/sizeof(cuFloatComplex); + return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*tmp->count_*sizeof(cuFloatComplex)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + { + if (params->count == 1) + tmp->pitch_ = params->size; + else + tmp->pitch_ = (int)(((params->size*sizeof(cuDoubleComplex) + 255)/256)*256)/sizeof(cuDoubleComplex); + return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*tmp->count_*sizeof(cuDoubleComplex)); + } + else + return SPGPU_UNSUPPORTED; // Unsupported params + return SPGPU_SUCCESS; // Success +} + + +int unregisterMapped(void *buff) +{ + return unregisterMappedMemory(buff); +} + +void freeMultiVecDevice(void* deviceVec) +{ + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + // fprintf(stderr,"freeMultiVecDevice\n"); + if (devVec != NULL) { + //fprintf(stderr,"Before freeMultiVecDevice% ld\n",devVec->pitch_*devVec->count_*sizeof(double)); + freeRemoteBuffer(devVec->v_); + free(deviceVec); + } +} + +int FallocMultiVecDevice(void** deviceMultiVec, unsigned int count, + unsigned int size, unsigned int elementType) +{ int i; + struct MultiVectorDeviceParams p; + + p = getMultiVectorDeviceParams(count, size, elementType); + i = allocMultiVecDevice(deviceMultiVec, &p); + //cudaSync(); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d, %d %d \n","FallocMultiVecDevice",i, count, size); + } + return(i); +} + +int getMultiVecDeviceSize(void* deviceVec) +{ int i; + struct MultiVectDevice *dev = (struct MultiVectDevice *) deviceVec; + i = dev->size_; + return(i); +} + +int getMultiVecDeviceCount(void* deviceVec) +{ int i; + struct MultiVectDevice *dev = (struct MultiVectDevice *) deviceVec; + i = dev->count_; + return(i); +} + +int getMultiVecDevicePitch(void* deviceVec) +{ int i; + struct MultiVectDevice *dev = (struct MultiVectDevice *) deviceVec; + i = dev->pitch_; + return(i); +} + +#endif + diff --git a/gpu/vectordev.h b/gpu/vectordev.h new file mode 100644 index 00000000..9739c01b --- /dev/null +++ b/gpu/vectordev.h @@ -0,0 +1,90 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#pragma once +#if defined(HAVE_SPGPU) +//#include "utils.h" +#include "cuda_runtime.h" +//#include "common.h" +#include "cintrf.h" +#include + +struct MultiVectDevice +{ + // number of vectors + int count_; + + //number of elements for a single vector + int size_; + + //pithc in number of elements + int pitch_; + + // Vectors in device memory (single allocation) + void *v_; +}; + +typedef struct MultiVectorDeviceParams +{ + // number on vectors + unsigned int count; //1 for a simple vector + + // The resulting allocation will be pitch*s*(size of the elementType) + unsigned int elementType; + + // Pitch (in number of elements) + unsigned int pitch; + + // Size of a single vector (in number of elements). + unsigned int size; +} MultiVectorDeviceParams; + + +#define INS_OVERWRITE 0 +#define INS_ADD 1 + + +int unregisterMapped(void *); + +MultiVectorDeviceParams getMultiVectorDeviceParams(unsigned int count, + unsigned int size, + unsigned int elementType); + +int FallocMultiVecDevice(void** deviceMultiVec, unsigned count, + unsigned int size, unsigned int elementType); +void freeMultiVecDevice(void* deviceVec); +int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams *params); +int getMultiVecDeviceSize(void* deviceVec); +int getMultiVecDeviceCount(void* deviceVec); +int getMultiVecDevicePitch(void* deviceVec); + +#endif diff --git a/gpu/z_cusparse_mod.F90 b/gpu/z_cusparse_mod.F90 new file mode 100644 index 00000000..020f1de5 --- /dev/null +++ b/gpu/z_cusparse_mod.F90 @@ -0,0 +1,305 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module z_cusparse_mod + use base_cusparse_mod + + type, bind(c) :: z_Cmat + type(c_ptr) :: Mat = c_null_ptr + end type z_Cmat + +#if CUDA_SHORT_VERSION <= 10 + type, bind(c) :: z_Hmat + type(c_ptr) :: Mat = c_null_ptr + end type z_Hmat +#endif + + +#if defined(HAVE_CUDA) && defined(HAVE_SPGPU) + + interface CSRGDeviceFree + function z_CSRGDeviceFree(Mat) & + & bind(c,name="z_CSRGDeviceFree") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int) :: res + end function z_CSRGDeviceFree + end interface + + interface CSRGDeviceSetMatType + function z_CSRGDeviceSetMatType(Mat,type) & + & bind(c,name="z_CSRGDeviceSetMatType") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_CSRGDeviceSetMatType + end interface + + interface CSRGDeviceSetMatFillMode + function z_CSRGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="z_CSRGDeviceSetMatFillMode") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_CSRGDeviceSetMatFillMode + end interface + + interface CSRGDeviceSetMatDiagType + function z_CSRGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="z_CSRGDeviceSetMatDiagType") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_CSRGDeviceSetMatDiagType + end interface + + interface CSRGDeviceSetMatIndexBase + function z_CSRGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="z_CSRGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_CSRGDeviceSetMatIndexBase + end interface + + interface CSRGDeviceCsrsmAnalysis + function z_CSRGDeviceCsrsmAnalysis(Mat) & + & bind(c,name="z_CSRGDeviceCsrsmAnalysis") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int) :: res + end function z_CSRGDeviceCsrsmAnalysis + end interface + + interface CSRGDeviceAlloc + function z_CSRGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="z_CSRGDeviceAlloc") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function z_CSRGDeviceAlloc + end interface + + interface CSRGDeviceGetParms + function z_CSRGDeviceGetParms(Mat,nr,nc,nz) & + & bind(c,name="z_CSRGDeviceGetParms") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int) :: nr, nc, nz + integer(c_int) :: res + end function z_CSRGDeviceGetParms + end interface + + interface spsvCSRGDevice + function z_spsvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="z_spsvCSRGDevice") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_double_complex), value :: alpha,beta + integer(c_int) :: res + end function z_spsvCSRGDevice + end interface + + interface spmvCSRGDevice + function z_spmvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="z_spmvCSRGDevice") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_double_complex), value :: alpha,beta + integer(c_int) :: res + end function z_spmvCSRGDevice + end interface + + interface CSRGHost2Device + function z_CSRGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="z_CSRGHost2Device") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + complex(c_double_complex) :: val(*) + integer(c_int) :: res + end function z_CSRGHost2Device + end interface + + interface CSRGDevice2Host + function z_CSRGDevice2Host(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="z_CSRGDevice2Host") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + complex(c_double_complex) :: val(*) + integer(c_int) :: res + end function z_CSRGDevice2Host + end interface + +#if CUDA_SHORT_VERSION <= 10 + interface HYBGDeviceAlloc + function z_HYBGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="z_HYBGDeviceAlloc") result(res) + use iso_c_binding + import z_hmat + type(z_Hmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function z_HYBGDeviceAlloc + end interface + + interface HYBGDeviceFree + function z_HYBGDeviceFree(Mat) & + & bind(c,name="z_HYBGDeviceFree") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int) :: res + end function z_HYBGDeviceFree + end interface + + interface HYBGDeviceSetMatType + function z_HYBGDeviceSetMatType(Mat,type) & + & bind(c,name="z_HYBGDeviceSetMatType") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_HYBGDeviceSetMatType + end interface + + interface HYBGDeviceSetMatFillMode + function z_HYBGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="z_HYBGDeviceSetMatFillMode") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_HYBGDeviceSetMatFillMode + end interface + + interface HYBGDeviceSetMatDiagType + function z_HYBGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="z_HYBGDeviceSetMatDiagType") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_HYBGDeviceSetMatDiagType + end interface + + interface HYBGDeviceSetMatIndexBase + function z_HYBGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="z_HYBGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_HYBGDeviceSetMatIndexBase + end interface + + interface HYBGDeviceHybsmAnalysis + function z_HYBGDeviceHybsmAnalysis(Mat) & + & bind(c,name="z_HYBGDeviceHybsmAnalysis") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int) :: res + end function z_HYBGDeviceHybsmAnalysis + end interface + + interface spsvHYBGDevice + function z_spsvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="z_spsvHYBGDevice") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_double_complex), value :: alpha,beta + integer(c_int) :: res + end function z_spsvHYBGDevice + end interface + + interface spmvHYBGDevice + function z_spmvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="z_spmvHYBGDevice") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_double_complex), value :: alpha,beta + integer(c_int) :: res + end function z_spmvHYBGDevice + end interface + + interface HYBGHost2Device + function z_HYBGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="z_HYBGHost2Device") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + complex(c_double_complex) :: val(*) + integer(c_int) :: res + end function z_HYBGHost2Device + end interface +#endif + +#endif + +end module z_cusparse_mod diff --git a/gpu/zcusparse.c b/gpu/zcusparse.c new file mode 100644 index 00000000..3991359a --- /dev/null +++ b/gpu/zcusparse.c @@ -0,0 +1,94 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include + +#ifdef HAVE_SPGPU +#include +#include +#include "cintrf.h" +#include "fcusparse.h" + +/* Double precision complex */ +#define TYPE double complex +#define CUSPARSE_BASE_TYPE CUDA_C_64F +#define T_CSRGDeviceMat z_CSRGDeviceMat +#define T_Cmat z_Cmat +#define T_spmvCSRGDevice z_spmvCSRGDevice +#define T_spsvCSRGDevice z_spsvCSRGDevice +#define T_CSRGDeviceAlloc z_CSRGDeviceAlloc +#define T_CSRGDeviceFree z_CSRGDeviceFree +#define T_CSRGHost2Device z_CSRGHost2Device +#define T_CSRGDevice2Host z_CSRGDevice2Host +#define T_CSRGDeviceSetMatFillMode z_CSRGDeviceSetMatFillMode +#define T_CSRGDeviceSetMatDiagType z_CSRGDeviceSetMatDiagType +#define T_CSRGDeviceGetParms z_CSRGDeviceGetParms + +#if CUDA_SHORT_VERSION <= 10 +#define T_CSRGDeviceSetMatType z_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase z_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsmAnalysis z_CSRGDeviceCsrsmAnalysis +#define cusparseTcsrmv cusparseZcsrmv +#define cusparseTcsrsv_solve cusparseZcsrsv_solve +#define cusparseTcsrsv_analysis cusparseZcsrsv_analysis +#define T_HYBGDeviceMat z_HYBGDeviceMat +#define T_Hmat z_Hmat +#define T_HYBGDeviceFree z_HYBGDeviceFree +#define T_spmvHYBGDevice z_spmvHYBGDevice +#define T_HYBGDeviceAlloc z_HYBGDeviceAlloc +#define T_HYBGDeviceSetMatDiagType z_HYBGDeviceSetMatDiagType +#define T_HYBGDeviceSetMatIndexBase z_HYBGDeviceSetMatIndexBase +#define T_HYBGDeviceSetMatType z_HYBGDeviceSetMatType +#define T_HYBGDeviceSetMatFillMode z_HYBGDeviceSetMatFillMode +#define T_HYBGDeviceHybsmAnalysis z_HYBGDeviceHybsmAnalysis +#define T_spsvHYBGDevice z_spsvHYBGDevice +#define T_HYBGHost2Device z_HYBGHost2Device +#define cusparseThybmv cusparseZhybmv +#define cusparseThybsv_solve cusparseZhybsv_solve +#define cusparseThybsv_analysis cusparseZhybsv_analysis +#define cusparseTcsr2hyb cusparseZcsr2hyb + +#elif CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType z_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase z_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis z_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseZcsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseZcsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseZcsrsv2_solve +#endif + +#include "fcusparse_fct.h" + + +#endif diff --git a/gpu/zvectordev.c b/gpu/zvectordev.c new file mode 100644 index 00000000..c245719f --- /dev/null +++ b/gpu/zvectordev.c @@ -0,0 +1,321 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include +#if defined(HAVE_SPGPU) +//#include "utils.h" +//#include "common.h" +#include "zvectordev.h" + + +int registerMappedDoubleComplex(void *buff, void **d_p, int n, cuDoubleComplex dummy) +{ + return registerMappedMemory(buff,d_p,n*sizeof(cuDoubleComplex)); +} + +int writeMultiVecDeviceDoubleComplex(void* deviceVec, cuDoubleComplex* hostVec) +{ int i; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + // Ex updateFromHost vector function + i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, + devVec->pitch_*devVec->count_*sizeof(cuDoubleComplex)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i); + } + return(i); +} + +int writeMultiVecDeviceDoubleComplexR2(void* deviceVec, cuDoubleComplex* hostVec, int ld) +{ int i; + i = writeMultiVecDeviceDoubleComplex(deviceVec, (void *) hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceDoubleComplexR2",i); + } + return(i); +} + +int readMultiVecDeviceDoubleComplex(void* deviceVec, cuDoubleComplex* hostVec) +{ int i,j; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_, + devVec->pitch_*devVec->count_*sizeof(cuDoubleComplex)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDoubleComplex",i); + } + return(i); +} + +int readMultiVecDeviceDoubleComplexR2(void* deviceVec, cuDoubleComplex* hostVec, int ld) +{ int i; + i = readMultiVecDeviceDoubleComplex(deviceVec, hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDoubleComplexR2",i); + } + return(i); +} + +int setscalMultiVecDeviceDoubleComplex(cuDoubleComplex val, int first, int last, + int indexBase, void* devMultiVecX) +{ int i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + spgpuHandle_t handle=psb_gpuGetHandle(); + + spgpuZsetscal(handle, first, last, indexBase, val, (cuDoubleComplex *) devVecX->v_); + + return(i); +} + +int geinsMultiVecDeviceDoubleComplex(int n, void* devMultiVecIrl, void* devMultiVecVal, + int dupl, int indexBase, void* devMultiVecX) +{ int j=0, i=0,nmin=0,nmax=0; + int pitch = 0; + cuDoubleComplex beta; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; + struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; + spgpuHandle_t handle=psb_gpuGetHandle(); + pitch = devVecIrl->pitch_; + if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) + return SPGPU_UNSUPPORTED; + + //fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_); + if (dupl == INS_OVERWRITE) + beta = make_cuDoubleComplex(0.0, 0.0); + else if (dupl == INS_ADD) + beta = make_cuDoubleComplex(1.0, 0.0); + else + beta = make_cuDoubleComplex(0.0, 0.0); + + spgpuZscat(handle, (cuDoubleComplex *) devVecX->v_, n, (cuDoubleComplex*)devVecVal->v_, + (int*)devVecIrl->v_, indexBase, beta); + + return(i); +} + + +int igathMultiVecDeviceDoubleComplexVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + + i= igathMultiVecDeviceDoubleComplex(deviceVec, vectorId, n, + first, (void*) devIdx->v_, + hfirst, host_values, indexBase); + return(i); +} + +int igathMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx =(int *) indexes;; + cuDoubleComplex *hv = (cuDoubleComplex *) host_values;; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_gpuGetHandle(); + + i=0; + hv = &(hv[hfirst-indexBase]); + idx = &(idx[first-indexBase]); + spgpuZgath(handle,hv, n, idx,indexBase, + (cuDoubleComplex *) devVec->v_+vectorId*devVec->pitch_); + return(i); +} + +int iscatMultiVecDeviceDoubleComplexVecIdx(void* deviceVec, int vectorId, int n, + int first, void *deviceIdx, + int hfirst, void* host_values, + int indexBase, cuDoubleComplex beta) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + i= iscatMultiVecDeviceDoubleComplex(deviceVec, vectorId, n, first, + (void*) devIdx->v_, hfirst,host_values, indexBase, beta); + return(i); +} + +int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n, + int first, void *indexes, + int hfirst, void* host_values, + int indexBase, cuDoubleComplex beta) +{ int i=0; + cuDoubleComplex *hv = (cuDoubleComplex *) host_values; + int *idx=(int *) indexes; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_gpuGetHandle(); + + idx = &(idx[first-indexBase]); + hv = &(hv[hfirst-indexBase]); + spgpuZscat(handle, (cuDoubleComplex *) devVec->v_, n, hv, idx, indexBase, beta); + return SPGPU_SUCCESS; + +} + + +int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuZmnrm2(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, devVecA->count_, devVecA->pitch_); + return(i); +} + +int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuZmamax(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, + devVecA->count_, devVecA->pitch_); + return(i); +} + +int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuZmasum(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, + devVecA->count_, devVecA->pitch_); + + return(i); +} + +int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_gpuGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + // Note: inner kernel can handle aliased input/output + spgpuZscal(handle, (cuDoubleComplex *)devVecA->v_, devVecA->pitch_, + alpha, (cuDoubleComplex *)devVecA->v_); + return(i); +} + +int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA, void* devMultiVecB) +{int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; + spgpuHandle_t handle=psb_gpuGetHandle(); + + spgpuZmdot(handle, y_res, n, (cuDoubleComplex*)devVecA->v_, + (cuDoubleComplex*)devVecB->v_,devVecA->count_,devVecB->pitch_); + return(i); +} + +int axpbyMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, void* devMultiVecX, + cuDoubleComplex beta, void* devMultiVecY) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + spgpuHandle_t handle=psb_gpuGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + for(j=0;jcount_;j++) + spgpuZaxpby(handle,(cuDoubleComplex*)devVecY->v_+pitch*j, n, beta, + (cuDoubleComplex*)devVecY->v_+pitch*j, alpha, + (cuDoubleComplex*) devVecX->v_+pitch*j); + return(i); +} + +int axyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, + void *deviceVecA, void *deviceVecB) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + spgpuHandle_t handle=psb_gpuGetHandle(); + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuZmaxy(handle, (cuDoubleComplex*)devVecB->v_, n, alpha, + (cuDoubleComplex*)devVecA->v_, + (cuDoubleComplex*)devVecB->v_, devVecA->count_, devVecA->pitch_); + + return(i); +} + +int axybzMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA, + void *deviceVecB, cuDoubleComplex beta, void *deviceVecZ) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; + spgpuHandle_t handle=psb_gpuGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) + return SPGPU_UNSUPPORTED; + spgpuZmaxypbz(handle, (cuDoubleComplex*)devVecZ->v_, n, beta, + (cuDoubleComplex*)devVecZ->v_, + alpha, (cuDoubleComplex*) devVecA->v_, (cuDoubleComplex*) devVecB->v_, + devVecB->count_, devVecB->pitch_); + return(i); +} + + +int absMultiVecDeviceDoubleComplex2(int n, cuDoubleComplex alpha, void *deviceVecA, + void *deviceVecB) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + + spgpuHandle_t handle=psb_gpuGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuZabs(handle, (cuDoubleComplex*)devVecB->v_, n, + alpha, (cuDoubleComplex*)devVecA->v_); + + return(i); +} + +int absMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + spgpuHandle_t handle=psb_gpuGetHandle(); + if (n > devVecA->size_) + return SPGPU_UNSUPPORTED; + + spgpuZabs(handle, (cuDoubleComplex*)devVecA->v_, n, + alpha, (cuDoubleComplex*)devVecA->v_); + + return(i); +} + +#endif + diff --git a/gpu/zvectordev.h b/gpu/zvectordev.h new file mode 100644 index 00000000..ca3c966e --- /dev/null +++ b/gpu/zvectordev.h @@ -0,0 +1,91 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#pragma once +#if defined(HAVE_SPGPU) +//#include "utils.h" +#include +#include "cuComplex.h" +#include "vectordev.h" +#include "cuda_runtime.h" +#include "core.h" + +int registerMappedDoubleComplex(void *, void **, int, cuDoubleComplex); +int writeMultiVecDeviceDoubleComplex(void* deviceMultiVec, cuDoubleComplex* hostMultiVec); +int writeMultiVecDeviceDoubleComplexR2(void* deviceMultiVec, + cuDoubleComplex* hostMultiVec, int ld); +int readMultiVecDeviceDoubleComplex(void* deviceMultiVec, cuDoubleComplex* hostMultiVec); +int readMultiVecDeviceDoubleComplexR2(void* deviceMultiVec, + cuDoubleComplex* hostMultiVec, int ld); +int setscalMultiVecDeviceDoubleComplex(cuDoubleComplex val, int first, int last, + int indexBase, void* devVecX); + +int geinsMultiVecDeviceDoubleComplex(int n, void* devVecIrl, void* devVecVal, + int dupl, int indexBase, void* devVecX); + +int igathMultiVecDeviceDoubleComplexVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase); +int igathMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n, + int first, void* indexes, + int hfirst, void* host_values, + int indexBase); +int iscatMultiVecDeviceDoubleComplexVecIdx(void* deviceVec, int vectorId, + int n, int first, + void *deviceIdx, int hfirst, + void* host_values, + int indexBase, cuDoubleComplex beta); +int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n, + int first, void *indexes, + int hfirst, void* host_values, + int indexBase, cuDoubleComplex beta); + +int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA); +int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA); +int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA); +int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA); +int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, + void* devVecA, void* devVecB); + +int axpbyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void* devVecX, + cuDoubleComplex beta, void* devVecY); +int axyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, + void *deviceVecA, void *deviceVecB); +int axybzMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA, + void *deviceVecB, cuDoubleComplex beta, + void *deviceVecZ); +int absMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA); +int absMultiVecDeviceDoubleComplex2(int n, cuDoubleComplex alpha, + void *deviceVecA, void *deviceVecB); + + +#endif From d0d4e458771e50c4af63ef8e1d1f9184a0d46b60 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 29 Sep 2023 13:59:40 +0200 Subject: [PATCH 006/116] Fix for I gpu types from template: use psb_sizeof_ip --- gpu/psb_i_csrg_mat_mod.F90 | 2 +- gpu/psb_i_diag_mat_mod.F90 | 2 +- gpu/psb_i_elg_mat_mod.F90 | 2 +- gpu/psb_i_hlg_mat_mod.F90 | 2 +- gpu/psb_i_hybg_mat_mod.F90 | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/gpu/psb_i_csrg_mat_mod.F90 b/gpu/psb_i_csrg_mat_mod.F90 index de25370f..9a4a3852 100644 --- a/gpu/psb_i_csrg_mat_mod.F90 +++ b/gpu/psb_i_csrg_mat_mod.F90 @@ -258,7 +258,7 @@ contains integer(psb_epk_) :: res if (a%is_dev()) call a%sync() res = 8 - res = res + psb_sizeof_int * size(a%val) + res = res + psb_sizeof_ip * size(a%val) res = res + psb_sizeof_ip * size(a%irp) res = res + psb_sizeof_ip * size(a%ja) ! Should we account for the shadow data structure diff --git a/gpu/psb_i_diag_mat_mod.F90 b/gpu/psb_i_diag_mat_mod.F90 index 3559c09a..b54ee8d5 100644 --- a/gpu/psb_i_diag_mat_mod.F90 +++ b/gpu/psb_i_diag_mat_mod.F90 @@ -236,7 +236,7 @@ contains integer(psb_epk_) :: res res = 8 - res = res + psb_sizeof_int * size(a%data) + res = res + psb_sizeof_ip * size(a%data) res = res + psb_sizeof_ip * size(a%offset) ! Should we account for the shadow data structure diff --git a/gpu/psb_i_elg_mat_mod.F90 b/gpu/psb_i_elg_mat_mod.F90 index afc71662..a421e611 100644 --- a/gpu/psb_i_elg_mat_mod.F90 +++ b/gpu/psb_i_elg_mat_mod.F90 @@ -294,7 +294,7 @@ contains if (a%is_dev()) call a%sync() res = 8 - res = res + psb_sizeof_int * size(a%val) + res = res + psb_sizeof_ip * size(a%val) res = res + psb_sizeof_ip * size(a%irn) res = res + psb_sizeof_ip * size(a%idiag) res = res + psb_sizeof_ip * size(a%ja) diff --git a/gpu/psb_i_hlg_mat_mod.F90 b/gpu/psb_i_hlg_mat_mod.F90 index 92917d47..2ec881ce 100644 --- a/gpu/psb_i_hlg_mat_mod.F90 +++ b/gpu/psb_i_hlg_mat_mod.F90 @@ -260,7 +260,7 @@ contains if (a%is_dev()) call a%sync() res = 8 - res = res + psb_sizeof_int * size(a%val) + res = res + psb_sizeof_ip * size(a%val) res = res + psb_sizeof_ip * size(a%irn) res = res + psb_sizeof_ip * size(a%idiag) res = res + psb_sizeof_ip * size(a%hkoffs) diff --git a/gpu/psb_i_hybg_mat_mod.F90 b/gpu/psb_i_hybg_mat_mod.F90 index 9e682365..388a8801 100644 --- a/gpu/psb_i_hybg_mat_mod.F90 +++ b/gpu/psb_i_hybg_mat_mod.F90 @@ -236,7 +236,7 @@ contains class(psb_i_hybg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 - res = res + psb_sizeof_int * size(a%val) + res = res + psb_sizeof_ip * size(a%val) res = res + psb_sizeof_ip * size(a%irp) res = res + psb_sizeof_ip * size(a%ja) ! Should we account for the shadow data structure From 81e9121c910213c82ff1af15de0dbabd27e738d2 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 2 Oct 2023 13:08:56 +0200 Subject: [PATCH 007/116] Add GPULDLIBS into Make.inc (and fix configry) --- Make.inc.in | 2 ++ configure | 3 +++ configure.ac | 4 +++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Make.inc.in b/Make.inc.in index e28e9bee..f0b21f91 100755 --- a/Make.inc.in +++ b/Make.inc.in @@ -69,6 +69,7 @@ CBINDLIBNAME=libpsb_cbind.a GPUD=@GPUD@ GPULD=@GPULD@ +LGPU=@LGPU@ SPGPUDIR=@SPGPU_DIR@ SPGPU_INCDIR=@SPGPU_INCDIR@ @@ -92,3 +93,4 @@ CUDEFINES=@CUDEFINES@ @PSBLASRULES@ +PSBGPULDLIBS=$(LGPU) $(SPGPU_LIBS) $(CUDA_LIBS) $(LIBS) \ No newline at end of file diff --git a/configure b/configure index 5c3444b3..d2fa80d2 100755 --- a/configure +++ b/configure @@ -653,6 +653,7 @@ ac_subst_vars='am__EXEEXT_FALSE am__EXEEXT_TRUE LTLIBOBJS LIBOBJS +LGPU GPULD GPUD CUDEFINES @@ -11020,6 +11021,7 @@ CPPFLAGS="$SAVE_CPPFLAGS" if test "x$pac_cv_have_spgpu" == "xyes" ; then GPUD=gpud; GPULD=gpuld; + LGPU="-lpsb_gpu"; EXTRALDLIBS="-lstdc++"; fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: At this point GPUTARGET is $GPUD $GPULD" >&5 @@ -11161,6 +11163,7 @@ FDEFINES=$(PSBFDEFINES) + ############################################################################### diff --git a/configure.ac b/configure.ac index 4b7f82e1..cb23c6ac 100755 --- a/configure.ac +++ b/configure.ac @@ -801,7 +801,8 @@ CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); PAC_CHECK_SPGPU() if test "x$pac_cv_have_spgpu" == "xyes" ; then GPUD=gpud; - GPULD=gpuld; + GPULD=gpuld; + LGPU="-lpsb_gpu"; EXTRALDLIBS="-lstdc++"; fi AC_MSG_NOTICE([At this point GPUTARGET is $GPUD $GPULD]) @@ -933,6 +934,7 @@ AC_SUBST(CUDA_NVCC) AC_SUBST(CUDEFINES) AC_SUBST(GPUD) AC_SUBST(GPULD) +AC_SUBST(LGPU) ############################################################################### # the following files will be created by Automake From 273233691545139fae7680debffcf6870d03a1fc Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 16 Oct 2023 16:15:00 +0200 Subject: [PATCH 008/116] Fix gpu/makefile --- gpu/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpu/Makefile b/gpu/Makefile index 16e9c084..10e17d55 100755 --- a/gpu/Makefile +++ b/gpu/Makefile @@ -118,7 +118,7 @@ cudaobjs: $(FOBJS) ilib: objs $(MAKE) -C impl lib LIBNAME=$(LIBNAME) -cudalib: objs +cudalib: objs ilib $(MAKE) -C CUDA lib LIBNAME=$(LIBNAME) clean: cclean iclean cudaclean From 6aa7987d52240ec9db6b5346cacc7bb80cb78c7b Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 19 Oct 2023 14:22:32 +0200 Subject: [PATCH 009/116] Rename GPU into cuda, and merge SPGPU code. --- Make.inc.in | 14 +- Makefile | 18 +- configure | 210 +-- configure.ac | 40 +- {gpu => cuda}/CUDA/Makefile | 3 +- {gpu => cuda}/CUDA/psi_cuda_CopyCooToElg.cuh | 0 {gpu => cuda}/CUDA/psi_cuda_CopyCooToHlg.cuh | 0 {gpu => cuda}/CUDA/psi_cuda_c_CopyCooToElg.cu | 0 {gpu => cuda}/CUDA/psi_cuda_c_CopyCooToHlg.cu | 0 {gpu => cuda}/CUDA/psi_cuda_common.cuh | 0 {gpu => cuda}/CUDA/psi_cuda_d_CopyCooToElg.cu | 0 {gpu => cuda}/CUDA/psi_cuda_d_CopyCooToHlg.cu | 0 {gpu => cuda}/CUDA/psi_cuda_s_CopyCooToElg.cu | 0 {gpu => cuda}/CUDA/psi_cuda_s_CopyCooToHlg.cu | 0 {gpu => cuda}/CUDA/psi_cuda_z_CopyCooToElg.cu | 0 {gpu => cuda}/CUDA/psi_cuda_z_CopyCooToHlg.cu | 0 {gpu => cuda}/Makefile | 19 +- {gpu => cuda}/base_cusparse_mod.F90 | 0 {gpu => cuda}/c_cusparse_mod.F90 | 0 {gpu => cuda}/ccusparse.c | 0 {gpu => cuda}/cintrf.h | 0 {gpu => cuda}/core_mod.f90 | 0 {gpu => cuda}/cuda_util.c | 0 {gpu => cuda}/cuda_util.h | 0 {gpu => cuda}/cusparse_mod.F90 | 0 {gpu => cuda}/cvectordev.c | 0 {gpu => cuda}/cvectordev.h | 0 {gpu => cuda}/d_cusparse_mod.F90 | 0 {gpu => cuda}/dcusparse.c | 0 {gpu => cuda}/diagdev.c | 0 {gpu => cuda}/diagdev.h | 0 {gpu => cuda}/diagdev_mod.F90 | 0 {gpu => cuda}/dnsdev.c | 0 {gpu => cuda}/dnsdev.h | 0 {gpu => cuda}/dnsdev_mod.F90 | 0 {gpu => cuda}/dvectordev.c | 0 {gpu => cuda}/dvectordev.h | 0 {gpu => cuda}/elldev.c | 0 {gpu => cuda}/elldev.h | 0 {gpu => cuda}/elldev_mod.F90 | 0 {gpu => cuda}/fcusparse.c | 0 {gpu => cuda}/fcusparse.h | 0 {gpu => cuda}/fcusparse_fct.h | 0 {gpu => cuda}/hdiagdev.c | 0 {gpu => cuda}/hdiagdev.h | 0 {gpu => cuda}/hdiagdev_mod.F90 | 0 {gpu => cuda}/hlldev.c | 0 {gpu => cuda}/hlldev.h | 0 {gpu => cuda}/hlldev_mod.F90 | 0 {gpu => cuda}/impl/Makefile | 3 + {gpu => cuda}/impl/psb_c_cp_csrg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_c_cp_csrg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_c_cp_diag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_c_cp_elg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_c_cp_elg_from_fmt.F90 | 0 .../impl/psb_c_cp_hdiag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_c_cp_hlg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_c_cp_hlg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_c_cp_hybg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_c_cp_hybg_from_fmt.F90 | 0 .../impl/psb_c_csrg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_c_csrg_csmm.F90 | 0 {gpu => cuda}/impl/psb_c_csrg_csmv.F90 | 0 {gpu => cuda}/impl/psb_c_csrg_from_gpu.F90 | 0 .../impl/psb_c_csrg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_c_csrg_mold.F90 | 0 .../impl/psb_c_csrg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_c_csrg_scal.F90 | 0 {gpu => cuda}/impl/psb_c_csrg_scals.F90 | 0 {gpu => cuda}/impl/psb_c_csrg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_c_csrg_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_c_diag_csmv.F90 | 0 {gpu => cuda}/impl/psb_c_diag_mold.F90 | 0 {gpu => cuda}/impl/psb_c_diag_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_c_diag_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_c_dnsg_mat_impl.F90 | 0 .../impl/psb_c_elg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_c_elg_asb.f90 | 0 {gpu => cuda}/impl/psb_c_elg_csmm.F90 | 0 {gpu => cuda}/impl/psb_c_elg_csmv.F90 | 0 {gpu => cuda}/impl/psb_c_elg_csput.F90 | 0 {gpu => cuda}/impl/psb_c_elg_from_gpu.F90 | 0 .../impl/psb_c_elg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_c_elg_mold.F90 | 0 .../impl/psb_c_elg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_c_elg_scal.F90 | 0 {gpu => cuda}/impl/psb_c_elg_scals.F90 | 0 {gpu => cuda}/impl/psb_c_elg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_c_elg_trim.f90 | 0 {gpu => cuda}/impl/psb_c_elg_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_c_hdiag_csmv.F90 | 0 {gpu => cuda}/impl/psb_c_hdiag_mold.F90 | 0 {gpu => cuda}/impl/psb_c_hdiag_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_c_hdiag_vect_mv.F90 | 0 .../impl/psb_c_hlg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_c_hlg_csmm.F90 | 0 {gpu => cuda}/impl/psb_c_hlg_csmv.F90 | 0 {gpu => cuda}/impl/psb_c_hlg_from_gpu.F90 | 0 .../impl/psb_c_hlg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_c_hlg_mold.F90 | 0 .../impl/psb_c_hlg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_c_hlg_scal.F90 | 0 {gpu => cuda}/impl/psb_c_hlg_scals.F90 | 0 {gpu => cuda}/impl/psb_c_hlg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_c_hlg_vect_mv.F90 | 0 .../impl/psb_c_hybg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_c_hybg_csmm.F90 | 0 {gpu => cuda}/impl/psb_c_hybg_csmv.F90 | 0 .../impl/psb_c_hybg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_c_hybg_mold.F90 | 0 .../impl/psb_c_hybg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_c_hybg_scal.F90 | 0 {gpu => cuda}/impl/psb_c_hybg_scals.F90 | 0 {gpu => cuda}/impl/psb_c_hybg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_c_hybg_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_c_mv_csrg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_c_mv_csrg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_c_mv_diag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_c_mv_elg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_c_mv_elg_from_fmt.F90 | 0 .../impl/psb_c_mv_hdiag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_c_mv_hlg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_c_mv_hlg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_c_mv_hybg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_c_mv_hybg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_d_cp_csrg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_d_cp_csrg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_d_cp_diag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_d_cp_elg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_d_cp_elg_from_fmt.F90 | 0 .../impl/psb_d_cp_hdiag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_d_cp_hlg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_d_cp_hlg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_d_cp_hybg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_d_cp_hybg_from_fmt.F90 | 0 .../impl/psb_d_csrg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_d_csrg_csmm.F90 | 0 {gpu => cuda}/impl/psb_d_csrg_csmv.F90 | 0 {gpu => cuda}/impl/psb_d_csrg_from_gpu.F90 | 0 .../impl/psb_d_csrg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_d_csrg_mold.F90 | 0 .../impl/psb_d_csrg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_d_csrg_scal.F90 | 0 {gpu => cuda}/impl/psb_d_csrg_scals.F90 | 0 {gpu => cuda}/impl/psb_d_csrg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_d_csrg_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_d_diag_csmv.F90 | 0 {gpu => cuda}/impl/psb_d_diag_mold.F90 | 0 {gpu => cuda}/impl/psb_d_diag_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_d_diag_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_d_dnsg_mat_impl.F90 | 0 .../impl/psb_d_elg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_d_elg_asb.f90 | 0 {gpu => cuda}/impl/psb_d_elg_csmm.F90 | 0 {gpu => cuda}/impl/psb_d_elg_csmv.F90 | 0 {gpu => cuda}/impl/psb_d_elg_csput.F90 | 0 {gpu => cuda}/impl/psb_d_elg_from_gpu.F90 | 0 .../impl/psb_d_elg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_d_elg_mold.F90 | 0 .../impl/psb_d_elg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_d_elg_scal.F90 | 0 {gpu => cuda}/impl/psb_d_elg_scals.F90 | 0 {gpu => cuda}/impl/psb_d_elg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_d_elg_trim.f90 | 0 {gpu => cuda}/impl/psb_d_elg_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_d_hdiag_csmv.F90 | 0 {gpu => cuda}/impl/psb_d_hdiag_mold.F90 | 0 {gpu => cuda}/impl/psb_d_hdiag_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_d_hdiag_vect_mv.F90 | 0 .../impl/psb_d_hlg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_d_hlg_csmm.F90 | 0 {gpu => cuda}/impl/psb_d_hlg_csmv.F90 | 0 {gpu => cuda}/impl/psb_d_hlg_from_gpu.F90 | 0 .../impl/psb_d_hlg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_d_hlg_mold.F90 | 0 .../impl/psb_d_hlg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_d_hlg_scal.F90 | 0 {gpu => cuda}/impl/psb_d_hlg_scals.F90 | 0 {gpu => cuda}/impl/psb_d_hlg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_d_hlg_vect_mv.F90 | 0 .../impl/psb_d_hybg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_d_hybg_csmm.F90 | 0 {gpu => cuda}/impl/psb_d_hybg_csmv.F90 | 0 .../impl/psb_d_hybg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_d_hybg_mold.F90 | 0 .../impl/psb_d_hybg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_d_hybg_scal.F90 | 0 {gpu => cuda}/impl/psb_d_hybg_scals.F90 | 0 {gpu => cuda}/impl/psb_d_hybg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_d_hybg_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_d_mv_csrg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_d_mv_csrg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_d_mv_diag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_d_mv_elg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_d_mv_elg_from_fmt.F90 | 0 .../impl/psb_d_mv_hdiag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_d_mv_hlg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_d_mv_hlg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_d_mv_hybg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_d_mv_hybg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_s_cp_csrg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_s_cp_csrg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_s_cp_diag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_s_cp_elg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_s_cp_elg_from_fmt.F90 | 0 .../impl/psb_s_cp_hdiag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_s_cp_hlg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_s_cp_hlg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_s_cp_hybg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_s_cp_hybg_from_fmt.F90 | 0 .../impl/psb_s_csrg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_s_csrg_csmm.F90 | 0 {gpu => cuda}/impl/psb_s_csrg_csmv.F90 | 0 {gpu => cuda}/impl/psb_s_csrg_from_gpu.F90 | 0 .../impl/psb_s_csrg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_s_csrg_mold.F90 | 0 .../impl/psb_s_csrg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_s_csrg_scal.F90 | 0 {gpu => cuda}/impl/psb_s_csrg_scals.F90 | 0 {gpu => cuda}/impl/psb_s_csrg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_s_csrg_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_s_diag_csmv.F90 | 0 {gpu => cuda}/impl/psb_s_diag_mold.F90 | 0 {gpu => cuda}/impl/psb_s_diag_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_s_diag_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_s_dnsg_mat_impl.F90 | 0 .../impl/psb_s_elg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_s_elg_asb.f90 | 0 {gpu => cuda}/impl/psb_s_elg_csmm.F90 | 0 {gpu => cuda}/impl/psb_s_elg_csmv.F90 | 0 {gpu => cuda}/impl/psb_s_elg_csput.F90 | 0 {gpu => cuda}/impl/psb_s_elg_from_gpu.F90 | 0 .../impl/psb_s_elg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_s_elg_mold.F90 | 0 .../impl/psb_s_elg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_s_elg_scal.F90 | 0 {gpu => cuda}/impl/psb_s_elg_scals.F90 | 0 {gpu => cuda}/impl/psb_s_elg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_s_elg_trim.f90 | 0 {gpu => cuda}/impl/psb_s_elg_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_s_hdiag_csmv.F90 | 0 {gpu => cuda}/impl/psb_s_hdiag_mold.F90 | 0 {gpu => cuda}/impl/psb_s_hdiag_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_s_hdiag_vect_mv.F90 | 0 .../impl/psb_s_hlg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_s_hlg_csmm.F90 | 0 {gpu => cuda}/impl/psb_s_hlg_csmv.F90 | 0 {gpu => cuda}/impl/psb_s_hlg_from_gpu.F90 | 0 .../impl/psb_s_hlg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_s_hlg_mold.F90 | 0 .../impl/psb_s_hlg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_s_hlg_scal.F90 | 0 {gpu => cuda}/impl/psb_s_hlg_scals.F90 | 0 {gpu => cuda}/impl/psb_s_hlg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_s_hlg_vect_mv.F90 | 0 .../impl/psb_s_hybg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_s_hybg_csmm.F90 | 0 {gpu => cuda}/impl/psb_s_hybg_csmv.F90 | 0 .../impl/psb_s_hybg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_s_hybg_mold.F90 | 0 .../impl/psb_s_hybg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_s_hybg_scal.F90 | 0 {gpu => cuda}/impl/psb_s_hybg_scals.F90 | 0 {gpu => cuda}/impl/psb_s_hybg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_s_hybg_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_s_mv_csrg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_s_mv_csrg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_s_mv_diag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_s_mv_elg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_s_mv_elg_from_fmt.F90 | 0 .../impl/psb_s_mv_hdiag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_s_mv_hlg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_s_mv_hlg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_s_mv_hybg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_s_mv_hybg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_z_cp_csrg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_z_cp_csrg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_z_cp_diag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_z_cp_elg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_z_cp_elg_from_fmt.F90 | 0 .../impl/psb_z_cp_hdiag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_z_cp_hlg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_z_cp_hlg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_z_cp_hybg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_z_cp_hybg_from_fmt.F90 | 0 .../impl/psb_z_csrg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_z_csrg_csmm.F90 | 0 {gpu => cuda}/impl/psb_z_csrg_csmv.F90 | 0 {gpu => cuda}/impl/psb_z_csrg_from_gpu.F90 | 0 .../impl/psb_z_csrg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_z_csrg_mold.F90 | 0 .../impl/psb_z_csrg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_z_csrg_scal.F90 | 0 {gpu => cuda}/impl/psb_z_csrg_scals.F90 | 0 {gpu => cuda}/impl/psb_z_csrg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_z_csrg_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_z_diag_csmv.F90 | 0 {gpu => cuda}/impl/psb_z_diag_mold.F90 | 0 {gpu => cuda}/impl/psb_z_diag_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_z_diag_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_z_dnsg_mat_impl.F90 | 0 .../impl/psb_z_elg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_z_elg_asb.f90 | 0 {gpu => cuda}/impl/psb_z_elg_csmm.F90 | 0 {gpu => cuda}/impl/psb_z_elg_csmv.F90 | 0 {gpu => cuda}/impl/psb_z_elg_csput.F90 | 0 {gpu => cuda}/impl/psb_z_elg_from_gpu.F90 | 0 .../impl/psb_z_elg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_z_elg_mold.F90 | 0 .../impl/psb_z_elg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_z_elg_scal.F90 | 0 {gpu => cuda}/impl/psb_z_elg_scals.F90 | 0 {gpu => cuda}/impl/psb_z_elg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_z_elg_trim.f90 | 0 {gpu => cuda}/impl/psb_z_elg_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_z_hdiag_csmv.F90 | 0 {gpu => cuda}/impl/psb_z_hdiag_mold.F90 | 0 {gpu => cuda}/impl/psb_z_hdiag_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_z_hdiag_vect_mv.F90 | 0 .../impl/psb_z_hlg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_z_hlg_csmm.F90 | 0 {gpu => cuda}/impl/psb_z_hlg_csmv.F90 | 0 {gpu => cuda}/impl/psb_z_hlg_from_gpu.F90 | 0 .../impl/psb_z_hlg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_z_hlg_mold.F90 | 0 .../impl/psb_z_hlg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_z_hlg_scal.F90 | 0 {gpu => cuda}/impl/psb_z_hlg_scals.F90 | 0 {gpu => cuda}/impl/psb_z_hlg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_z_hlg_vect_mv.F90 | 0 .../impl/psb_z_hybg_allocate_mnnz.F90 | 0 {gpu => cuda}/impl/psb_z_hybg_csmm.F90 | 0 {gpu => cuda}/impl/psb_z_hybg_csmv.F90 | 0 .../impl/psb_z_hybg_inner_vect_sv.F90 | 0 {gpu => cuda}/impl/psb_z_hybg_mold.F90 | 0 .../impl/psb_z_hybg_reallocate_nz.F90 | 0 {gpu => cuda}/impl/psb_z_hybg_scal.F90 | 0 {gpu => cuda}/impl/psb_z_hybg_scals.F90 | 0 {gpu => cuda}/impl/psb_z_hybg_to_gpu.F90 | 0 {gpu => cuda}/impl/psb_z_hybg_vect_mv.F90 | 0 {gpu => cuda}/impl/psb_z_mv_csrg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_z_mv_csrg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_z_mv_diag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_z_mv_elg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_z_mv_elg_from_fmt.F90 | 0 .../impl/psb_z_mv_hdiag_from_coo.F90 | 0 {gpu => cuda}/impl/psb_z_mv_hlg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_z_mv_hlg_from_fmt.F90 | 0 {gpu => cuda}/impl/psb_z_mv_hybg_from_coo.F90 | 0 {gpu => cuda}/impl/psb_z_mv_hybg_from_fmt.F90 | 0 {gpu => cuda}/ivectordev.c | 0 {gpu => cuda}/ivectordev.h | 0 {gpu => cuda}/psb_base_vectordev_mod.F90 | 0 {gpu => cuda}/psb_c_csrg_mat_mod.F90 | 0 {gpu => cuda}/psb_c_diag_mat_mod.F90 | 0 {gpu => cuda}/psb_c_dnsg_mat_mod.F90 | 0 {gpu => cuda}/psb_c_elg_mat_mod.F90 | 0 {gpu => cuda}/psb_c_gpu_vect_mod.F90 | 0 {gpu => cuda}/psb_c_hdiag_mat_mod.F90 | 0 {gpu => cuda}/psb_c_hlg_mat_mod.F90 | 0 {gpu => cuda}/psb_c_hybg_mat_mod.F90 | 0 {gpu => cuda}/psb_c_vectordev_mod.F90 | 0 {gpu => cuda}/psb_d_csrg_mat_mod.F90 | 0 {gpu => cuda}/psb_d_diag_mat_mod.F90 | 0 {gpu => cuda}/psb_d_dnsg_mat_mod.F90 | 0 {gpu => cuda}/psb_d_elg_mat_mod.F90 | 0 {gpu => cuda}/psb_d_gpu_vect_mod.F90 | 0 {gpu => cuda}/psb_d_hdiag_mat_mod.F90 | 0 {gpu => cuda}/psb_d_hlg_mat_mod.F90 | 0 {gpu => cuda}/psb_d_hybg_mat_mod.F90 | 0 {gpu => cuda}/psb_d_vectordev_mod.F90 | 0 {gpu => cuda}/psb_gpu_env_mod.F90 | 0 {gpu => cuda}/psb_gpu_mod.F90 | 0 {gpu => cuda}/psb_i_csrg_mat_mod.F90 | 0 {gpu => cuda}/psb_i_diag_mat_mod.F90 | 0 {gpu => cuda}/psb_i_dnsg_mat_mod.F90 | 0 {gpu => cuda}/psb_i_elg_mat_mod.F90 | 0 {gpu => cuda}/psb_i_gpu_vect_mod.F90 | 0 {gpu => cuda}/psb_i_hdiag_mat_mod.F90 | 0 {gpu => cuda}/psb_i_hlg_mat_mod.F90 | 0 {gpu => cuda}/psb_i_hybg_mat_mod.F90 | 0 {gpu => cuda}/psb_i_vectordev_mod.F90 | 0 {gpu => cuda}/psb_s_csrg_mat_mod.F90 | 0 {gpu => cuda}/psb_s_diag_mat_mod.F90 | 0 {gpu => cuda}/psb_s_dnsg_mat_mod.F90 | 0 {gpu => cuda}/psb_s_elg_mat_mod.F90 | 0 {gpu => cuda}/psb_s_gpu_vect_mod.F90 | 0 {gpu => cuda}/psb_s_hdiag_mat_mod.F90 | 0 {gpu => cuda}/psb_s_hlg_mat_mod.F90 | 0 {gpu => cuda}/psb_s_hybg_mat_mod.F90 | 0 {gpu => cuda}/psb_s_vectordev_mod.F90 | 0 {gpu => cuda}/psb_vectordev_mod.f90 | 0 {gpu => cuda}/psb_z_csrg_mat_mod.F90 | 0 {gpu => cuda}/psb_z_diag_mat_mod.F90 | 0 {gpu => cuda}/psb_z_dnsg_mat_mod.F90 | 0 {gpu => cuda}/psb_z_elg_mat_mod.F90 | 0 {gpu => cuda}/psb_z_gpu_vect_mod.F90 | 0 {gpu => cuda}/psb_z_hdiag_mat_mod.F90 | 0 {gpu => cuda}/psb_z_hlg_mat_mod.F90 | 0 {gpu => cuda}/psb_z_hybg_mat_mod.F90 | 0 {gpu => cuda}/psb_z_vectordev_mod.F90 | 0 {gpu => cuda}/s_cusparse_mod.F90 | 0 {gpu => cuda}/scusparse.c | 0 cuda/spgpu/Makefile | 39 + cuda/spgpu/coo.cpp | 98 ++ cuda/spgpu/coo_conv.h | 40 + cuda/spgpu/core.c | 109 ++ cuda/spgpu/core.h | 185 +++ cuda/spgpu/debug.h | 58 + cuda/spgpu/dia.c | 105 ++ cuda/spgpu/dia.h | 158 +++ cuda/spgpu/dia_conv.h | 66 + cuda/spgpu/ell.c | 202 +++ cuda/spgpu/ell.h | 321 +++++ cuda/spgpu/ell_conv.h | 110 ++ cuda/spgpu/hdia.cpp | 374 +++++ cuda/spgpu/hdia.h | 159 +++ cuda/spgpu/hdia_conv.h | 102 ++ cuda/spgpu/hell.c | 104 ++ cuda/spgpu/hell.h | 186 +++ cuda/spgpu/hell_conv.h | 80 ++ cuda/spgpu/kernels/Makefile | 33 + cuda/spgpu/kernels/abs_base.cuh | 110 ++ cuda/spgpu/kernels/amax_base.cuh | 233 ++++ cuda/spgpu/kernels/asum_base.cuh | 240 ++++ cuda/spgpu/kernels/axy_base.cuh | 210 +++ cuda/spgpu/kernels/cabs.cu | 33 + cuda/spgpu/kernels/camax.cu | 31 + cuda/spgpu/kernels/casum.cu | 31 + cuda/spgpu/kernels/caxpby.cu | 103 ++ cuda/spgpu/kernels/caxy.cu | 31 + cuda/spgpu/kernels/cdot.cu | 160 +++ cuda/spgpu/kernels/cgath.cu | 31 + cuda/spgpu/kernels/cnrm2.cu | 169 +++ cuda/spgpu/kernels/cscal.cu | 31 + cuda/spgpu/kernels/cscat.cu | 31 + cuda/spgpu/kernels/csetscal.cu | 31 + cuda/spgpu/kernels/cudadebug.h | 35 + cuda/spgpu/kernels/cudalang.h | 70 + cuda/spgpu/kernels/dabs.cu | 33 + cuda/spgpu/kernels/damax.cu | 32 + cuda/spgpu/kernels/dasum.cu | 32 + cuda/spgpu/kernels/daxpby.cu | 101 ++ cuda/spgpu/kernels/daxy.cu | 32 + cuda/spgpu/kernels/ddot.cu | 160 +++ cuda/spgpu/kernels/dgath.cu | 31 + cuda/spgpu/kernels/dia_cspmv.cu | 33 + cuda/spgpu/kernels/dia_dspmv.cu | 32 + cuda/spgpu/kernels/dia_spmv_base.cuh | 151 ++ cuda/spgpu/kernels/dia_spmv_base_template.cuh | 217 +++ cuda/spgpu/kernels/dia_sspmv.cu | 32 + cuda/spgpu/kernels/dia_zspmv.cu | 33 + cuda/spgpu/kernels/dnrm2.cu | 157 +++ cuda/spgpu/kernels/dscal.cu | 31 + cuda/spgpu/kernels/dscat.cu | 31 + cuda/spgpu/kernels/dsetscal.cu | 31 + cuda/spgpu/kernels/ell_ccsput.cu | 32 + cuda/spgpu/kernels/ell_cspmv.cu | 33 + cuda/spgpu/kernels/ell_csput_base.cuh | 126 ++ cuda/spgpu/kernels/ell_dcsput.cu | 31 + cuda/spgpu/kernels/ell_dspmv.cu | 32 + cuda/spgpu/kernels/ell_scsput.cu | 31 + cuda/spgpu/kernels/ell_spmv_base.cuh | 154 +++ cuda/spgpu/kernels/ell_spmv_base_nors.cuh | 340 +++++ cuda/spgpu/kernels/ell_spmv_base_template.cuh | 426 ++++++ cuda/spgpu/kernels/ell_sspmv.cu | 32 + cuda/spgpu/kernels/ell_zcsput.cu | 32 + cuda/spgpu/kernels/ell_zspmv.cu | 33 + cuda/spgpu/kernels/gath_base.cuh | 86 ++ cuda/spgpu/kernels/hdia_cspmv.cu | 32 + cuda/spgpu/kernels/hdia_dspmv.cu | 33 + cuda/spgpu/kernels/hdia_spmv_base.cuh | 149 ++ .../spgpu/kernels/hdia_spmv_base_template.cuh | 253 ++++ cuda/spgpu/kernels/hdia_sspmv.cu | 32 + cuda/spgpu/kernels/hdia_zspmv.cu | 33 + cuda/spgpu/kernels/hell_cspmv.cu | 32 + cuda/spgpu/kernels/hell_dspmv.cu | 32 + cuda/spgpu/kernels/hell_spmv_base.cuh | 159 +++ .../spgpu/kernels/hell_spmv_base_template.cuh | 357 +++++ cuda/spgpu/kernels/hell_sspmv.cu | 32 + cuda/spgpu/kernels/hell_zspmv.cu | 33 + cuda/spgpu/kernels/igath.cu | 31 + cuda/spgpu/kernels/iscat.cu | 31 + cuda/spgpu/kernels/isetscal.cu | 31 + cuda/spgpu/kernels/mathbase.cuh | 53 + cuda/spgpu/kernels/sabs.cu | 33 + cuda/spgpu/kernels/samax.cu | 32 + cuda/spgpu/kernels/sasum.cu | 32 + cuda/spgpu/kernels/saxpby.cu | 100 ++ cuda/spgpu/kernels/saxy.cu | 32 + cuda/spgpu/kernels/scal_base.cuh | 83 ++ cuda/spgpu/kernels/scat_base.cuh | 89 ++ cuda/spgpu/kernels/sdot.cu | 175 +++ cuda/spgpu/kernels/setscal_base.cuh | 82 ++ cuda/spgpu/kernels/sgath.cu | 31 + cuda/spgpu/kernels/snrm2.cu | 166 +++ cuda/spgpu/kernels/sscal.cu | 31 + cuda/spgpu/kernels/sscat.cu | 31 + cuda/spgpu/kernels/ssetscal.cu | 31 + cuda/spgpu/kernels/zabs.cu | 33 + cuda/spgpu/kernels/zamax.cu | 31 + cuda/spgpu/kernels/zasum.cu | 31 + cuda/spgpu/kernels/zaxpby.cu | 103 ++ cuda/spgpu/kernels/zaxy.cu | 31 + cuda/spgpu/kernels/zdot.cu | 148 ++ cuda/spgpu/kernels/zgath.cu | 31 + cuda/spgpu/kernels/znrm2.cu | 159 +++ cuda/spgpu/kernels/zscal.cu | 31 + cuda/spgpu/kernels/zscat.cu | 31 + cuda/spgpu/kernels/zsetscal.cu | 31 + cuda/spgpu/vector.h | 1231 +++++++++++++++++ {gpu => cuda}/svectordev.c | 0 {gpu => cuda}/svectordev.h | 0 {gpu => cuda}/vectordev.c | 0 {gpu => cuda}/vectordev.h | 0 {gpu => cuda}/z_cusparse_mod.F90 | 0 {gpu => cuda}/zcusparse.c | 0 {gpu => cuda}/zvectordev.c | 0 {gpu => cuda}/zvectordev.h | 0 test/gpukern/Makefile | 62 + test/gpukern/c_file_spmv.F90 | 491 +++++++ test/gpukern/d_file_spmv.F90 | 496 +++++++ test/gpukern/data_input.f90 | 221 +++ test/gpukern/dpdegenmv.F90 | 997 +++++++++++++ test/gpukern/s_file_spmv.F90 | 496 +++++++ test/gpukern/spdegenmv.F90 | 989 +++++++++++++ test/gpukern/z_file_spmv.F90 | 491 +++++++ 527 files changed, 15146 insertions(+), 232 deletions(-) rename {gpu => cuda}/CUDA/Makefile (93%) rename {gpu => cuda}/CUDA/psi_cuda_CopyCooToElg.cuh (100%) rename {gpu => cuda}/CUDA/psi_cuda_CopyCooToHlg.cuh (100%) rename {gpu => cuda}/CUDA/psi_cuda_c_CopyCooToElg.cu (100%) rename {gpu => cuda}/CUDA/psi_cuda_c_CopyCooToHlg.cu (100%) rename {gpu => cuda}/CUDA/psi_cuda_common.cuh (100%) rename {gpu => cuda}/CUDA/psi_cuda_d_CopyCooToElg.cu (100%) rename {gpu => cuda}/CUDA/psi_cuda_d_CopyCooToHlg.cu (100%) rename {gpu => cuda}/CUDA/psi_cuda_s_CopyCooToElg.cu (100%) rename {gpu => cuda}/CUDA/psi_cuda_s_CopyCooToHlg.cu (100%) rename {gpu => cuda}/CUDA/psi_cuda_z_CopyCooToElg.cu (100%) rename {gpu => cuda}/CUDA/psi_cuda_z_CopyCooToHlg.cu (100%) rename {gpu => cuda}/Makefile (93%) rename {gpu => cuda}/base_cusparse_mod.F90 (100%) rename {gpu => cuda}/c_cusparse_mod.F90 (100%) rename {gpu => cuda}/ccusparse.c (100%) rename {gpu => cuda}/cintrf.h (100%) rename {gpu => cuda}/core_mod.f90 (100%) rename {gpu => cuda}/cuda_util.c (100%) rename {gpu => cuda}/cuda_util.h (100%) rename {gpu => cuda}/cusparse_mod.F90 (100%) rename {gpu => cuda}/cvectordev.c (100%) rename {gpu => cuda}/cvectordev.h (100%) rename {gpu => cuda}/d_cusparse_mod.F90 (100%) rename {gpu => cuda}/dcusparse.c (100%) rename {gpu => cuda}/diagdev.c (100%) rename {gpu => cuda}/diagdev.h (100%) rename {gpu => cuda}/diagdev_mod.F90 (100%) rename {gpu => cuda}/dnsdev.c (100%) rename {gpu => cuda}/dnsdev.h (100%) rename {gpu => cuda}/dnsdev_mod.F90 (100%) rename {gpu => cuda}/dvectordev.c (100%) rename {gpu => cuda}/dvectordev.h (100%) rename {gpu => cuda}/elldev.c (100%) rename {gpu => cuda}/elldev.h (100%) rename {gpu => cuda}/elldev_mod.F90 (100%) rename {gpu => cuda}/fcusparse.c (100%) rename {gpu => cuda}/fcusparse.h (100%) rename {gpu => cuda}/fcusparse_fct.h (100%) rename {gpu => cuda}/hdiagdev.c (100%) rename {gpu => cuda}/hdiagdev.h (100%) rename {gpu => cuda}/hdiagdev_mod.F90 (100%) rename {gpu => cuda}/hlldev.c (100%) rename {gpu => cuda}/hlldev.h (100%) rename {gpu => cuda}/hlldev_mod.F90 (100%) rename {gpu => cuda}/impl/Makefile (97%) rename {gpu => cuda}/impl/psb_c_cp_csrg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_c_cp_csrg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_c_cp_diag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_c_cp_elg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_c_cp_elg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_c_cp_hdiag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_c_cp_hlg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_c_cp_hlg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_c_cp_hybg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_c_cp_hybg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_c_csrg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_c_csrg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_c_csrg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_c_csrg_from_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_c_csrg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_c_csrg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_c_csrg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_c_csrg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_c_csrg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_c_csrg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_c_csrg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_c_diag_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_c_diag_mold.F90 (100%) rename {gpu => cuda}/impl/psb_c_diag_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_c_diag_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_c_dnsg_mat_impl.F90 (100%) rename {gpu => cuda}/impl/psb_c_elg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_c_elg_asb.f90 (100%) rename {gpu => cuda}/impl/psb_c_elg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_c_elg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_c_elg_csput.F90 (100%) rename {gpu => cuda}/impl/psb_c_elg_from_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_c_elg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_c_elg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_c_elg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_c_elg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_c_elg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_c_elg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_c_elg_trim.f90 (100%) rename {gpu => cuda}/impl/psb_c_elg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_c_hdiag_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_c_hdiag_mold.F90 (100%) rename {gpu => cuda}/impl/psb_c_hdiag_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_c_hdiag_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_c_hlg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_c_hlg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_c_hlg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_c_hlg_from_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_c_hlg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_c_hlg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_c_hlg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_c_hlg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_c_hlg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_c_hlg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_c_hlg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_c_hybg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_c_hybg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_c_hybg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_c_hybg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_c_hybg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_c_hybg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_c_hybg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_c_hybg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_c_hybg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_c_hybg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_c_mv_csrg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_c_mv_csrg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_c_mv_diag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_c_mv_elg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_c_mv_elg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_c_mv_hdiag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_c_mv_hlg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_c_mv_hlg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_c_mv_hybg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_c_mv_hybg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_d_cp_csrg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_d_cp_csrg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_d_cp_diag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_d_cp_elg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_d_cp_elg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_d_cp_hdiag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_d_cp_hlg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_d_cp_hlg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_d_cp_hybg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_d_cp_hybg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_d_csrg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_d_csrg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_d_csrg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_d_csrg_from_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_d_csrg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_d_csrg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_d_csrg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_d_csrg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_d_csrg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_d_csrg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_d_csrg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_d_diag_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_d_diag_mold.F90 (100%) rename {gpu => cuda}/impl/psb_d_diag_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_d_diag_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_d_dnsg_mat_impl.F90 (100%) rename {gpu => cuda}/impl/psb_d_elg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_d_elg_asb.f90 (100%) rename {gpu => cuda}/impl/psb_d_elg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_d_elg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_d_elg_csput.F90 (100%) rename {gpu => cuda}/impl/psb_d_elg_from_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_d_elg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_d_elg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_d_elg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_d_elg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_d_elg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_d_elg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_d_elg_trim.f90 (100%) rename {gpu => cuda}/impl/psb_d_elg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_d_hdiag_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_d_hdiag_mold.F90 (100%) rename {gpu => cuda}/impl/psb_d_hdiag_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_d_hdiag_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_d_hlg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_d_hlg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_d_hlg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_d_hlg_from_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_d_hlg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_d_hlg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_d_hlg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_d_hlg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_d_hlg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_d_hlg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_d_hlg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_d_hybg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_d_hybg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_d_hybg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_d_hybg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_d_hybg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_d_hybg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_d_hybg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_d_hybg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_d_hybg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_d_hybg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_d_mv_csrg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_d_mv_csrg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_d_mv_diag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_d_mv_elg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_d_mv_elg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_d_mv_hdiag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_d_mv_hlg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_d_mv_hlg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_d_mv_hybg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_d_mv_hybg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_s_cp_csrg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_s_cp_csrg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_s_cp_diag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_s_cp_elg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_s_cp_elg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_s_cp_hdiag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_s_cp_hlg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_s_cp_hlg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_s_cp_hybg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_s_cp_hybg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_s_csrg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_s_csrg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_s_csrg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_s_csrg_from_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_s_csrg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_s_csrg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_s_csrg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_s_csrg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_s_csrg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_s_csrg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_s_csrg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_s_diag_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_s_diag_mold.F90 (100%) rename {gpu => cuda}/impl/psb_s_diag_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_s_diag_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_s_dnsg_mat_impl.F90 (100%) rename {gpu => cuda}/impl/psb_s_elg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_s_elg_asb.f90 (100%) rename {gpu => cuda}/impl/psb_s_elg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_s_elg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_s_elg_csput.F90 (100%) rename {gpu => cuda}/impl/psb_s_elg_from_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_s_elg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_s_elg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_s_elg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_s_elg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_s_elg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_s_elg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_s_elg_trim.f90 (100%) rename {gpu => cuda}/impl/psb_s_elg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_s_hdiag_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_s_hdiag_mold.F90 (100%) rename {gpu => cuda}/impl/psb_s_hdiag_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_s_hdiag_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_s_hlg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_s_hlg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_s_hlg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_s_hlg_from_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_s_hlg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_s_hlg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_s_hlg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_s_hlg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_s_hlg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_s_hlg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_s_hlg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_s_hybg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_s_hybg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_s_hybg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_s_hybg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_s_hybg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_s_hybg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_s_hybg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_s_hybg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_s_hybg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_s_hybg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_s_mv_csrg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_s_mv_csrg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_s_mv_diag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_s_mv_elg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_s_mv_elg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_s_mv_hdiag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_s_mv_hlg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_s_mv_hlg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_s_mv_hybg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_s_mv_hybg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_z_cp_csrg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_z_cp_csrg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_z_cp_diag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_z_cp_elg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_z_cp_elg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_z_cp_hdiag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_z_cp_hlg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_z_cp_hlg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_z_cp_hybg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_z_cp_hybg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_z_csrg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_z_csrg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_z_csrg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_z_csrg_from_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_z_csrg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_z_csrg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_z_csrg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_z_csrg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_z_csrg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_z_csrg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_z_csrg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_z_diag_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_z_diag_mold.F90 (100%) rename {gpu => cuda}/impl/psb_z_diag_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_z_diag_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_z_dnsg_mat_impl.F90 (100%) rename {gpu => cuda}/impl/psb_z_elg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_z_elg_asb.f90 (100%) rename {gpu => cuda}/impl/psb_z_elg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_z_elg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_z_elg_csput.F90 (100%) rename {gpu => cuda}/impl/psb_z_elg_from_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_z_elg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_z_elg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_z_elg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_z_elg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_z_elg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_z_elg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_z_elg_trim.f90 (100%) rename {gpu => cuda}/impl/psb_z_elg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_z_hdiag_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_z_hdiag_mold.F90 (100%) rename {gpu => cuda}/impl/psb_z_hdiag_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_z_hdiag_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_z_hlg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_z_hlg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_z_hlg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_z_hlg_from_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_z_hlg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_z_hlg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_z_hlg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_z_hlg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_z_hlg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_z_hlg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_z_hlg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_z_hybg_allocate_mnnz.F90 (100%) rename {gpu => cuda}/impl/psb_z_hybg_csmm.F90 (100%) rename {gpu => cuda}/impl/psb_z_hybg_csmv.F90 (100%) rename {gpu => cuda}/impl/psb_z_hybg_inner_vect_sv.F90 (100%) rename {gpu => cuda}/impl/psb_z_hybg_mold.F90 (100%) rename {gpu => cuda}/impl/psb_z_hybg_reallocate_nz.F90 (100%) rename {gpu => cuda}/impl/psb_z_hybg_scal.F90 (100%) rename {gpu => cuda}/impl/psb_z_hybg_scals.F90 (100%) rename {gpu => cuda}/impl/psb_z_hybg_to_gpu.F90 (100%) rename {gpu => cuda}/impl/psb_z_hybg_vect_mv.F90 (100%) rename {gpu => cuda}/impl/psb_z_mv_csrg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_z_mv_csrg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_z_mv_diag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_z_mv_elg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_z_mv_elg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_z_mv_hdiag_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_z_mv_hlg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_z_mv_hlg_from_fmt.F90 (100%) rename {gpu => cuda}/impl/psb_z_mv_hybg_from_coo.F90 (100%) rename {gpu => cuda}/impl/psb_z_mv_hybg_from_fmt.F90 (100%) rename {gpu => cuda}/ivectordev.c (100%) rename {gpu => cuda}/ivectordev.h (100%) rename {gpu => cuda}/psb_base_vectordev_mod.F90 (100%) rename {gpu => cuda}/psb_c_csrg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_c_diag_mat_mod.F90 (100%) rename {gpu => cuda}/psb_c_dnsg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_c_elg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_c_gpu_vect_mod.F90 (100%) rename {gpu => cuda}/psb_c_hdiag_mat_mod.F90 (100%) rename {gpu => cuda}/psb_c_hlg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_c_hybg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_c_vectordev_mod.F90 (100%) rename {gpu => cuda}/psb_d_csrg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_d_diag_mat_mod.F90 (100%) rename {gpu => cuda}/psb_d_dnsg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_d_elg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_d_gpu_vect_mod.F90 (100%) rename {gpu => cuda}/psb_d_hdiag_mat_mod.F90 (100%) rename {gpu => cuda}/psb_d_hlg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_d_hybg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_d_vectordev_mod.F90 (100%) rename {gpu => cuda}/psb_gpu_env_mod.F90 (100%) rename {gpu => cuda}/psb_gpu_mod.F90 (100%) rename {gpu => cuda}/psb_i_csrg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_i_diag_mat_mod.F90 (100%) rename {gpu => cuda}/psb_i_dnsg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_i_elg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_i_gpu_vect_mod.F90 (100%) rename {gpu => cuda}/psb_i_hdiag_mat_mod.F90 (100%) rename {gpu => cuda}/psb_i_hlg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_i_hybg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_i_vectordev_mod.F90 (100%) rename {gpu => cuda}/psb_s_csrg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_s_diag_mat_mod.F90 (100%) rename {gpu => cuda}/psb_s_dnsg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_s_elg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_s_gpu_vect_mod.F90 (100%) rename {gpu => cuda}/psb_s_hdiag_mat_mod.F90 (100%) rename {gpu => cuda}/psb_s_hlg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_s_hybg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_s_vectordev_mod.F90 (100%) rename {gpu => cuda}/psb_vectordev_mod.f90 (100%) rename {gpu => cuda}/psb_z_csrg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_z_diag_mat_mod.F90 (100%) rename {gpu => cuda}/psb_z_dnsg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_z_elg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_z_gpu_vect_mod.F90 (100%) rename {gpu => cuda}/psb_z_hdiag_mat_mod.F90 (100%) rename {gpu => cuda}/psb_z_hlg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_z_hybg_mat_mod.F90 (100%) rename {gpu => cuda}/psb_z_vectordev_mod.F90 (100%) rename {gpu => cuda}/s_cusparse_mod.F90 (100%) rename {gpu => cuda}/scusparse.c (100%) create mode 100644 cuda/spgpu/Makefile create mode 100644 cuda/spgpu/coo.cpp create mode 100644 cuda/spgpu/coo_conv.h create mode 100644 cuda/spgpu/core.c create mode 100644 cuda/spgpu/core.h create mode 100644 cuda/spgpu/debug.h create mode 100644 cuda/spgpu/dia.c create mode 100644 cuda/spgpu/dia.h create mode 100644 cuda/spgpu/dia_conv.h create mode 100644 cuda/spgpu/ell.c create mode 100644 cuda/spgpu/ell.h create mode 100644 cuda/spgpu/ell_conv.h create mode 100644 cuda/spgpu/hdia.cpp create mode 100644 cuda/spgpu/hdia.h create mode 100644 cuda/spgpu/hdia_conv.h create mode 100644 cuda/spgpu/hell.c create mode 100644 cuda/spgpu/hell.h create mode 100644 cuda/spgpu/hell_conv.h create mode 100644 cuda/spgpu/kernels/Makefile create mode 100644 cuda/spgpu/kernels/abs_base.cuh create mode 100644 cuda/spgpu/kernels/amax_base.cuh create mode 100644 cuda/spgpu/kernels/asum_base.cuh create mode 100644 cuda/spgpu/kernels/axy_base.cuh create mode 100644 cuda/spgpu/kernels/cabs.cu create mode 100644 cuda/spgpu/kernels/camax.cu create mode 100644 cuda/spgpu/kernels/casum.cu create mode 100644 cuda/spgpu/kernels/caxpby.cu create mode 100644 cuda/spgpu/kernels/caxy.cu create mode 100644 cuda/spgpu/kernels/cdot.cu create mode 100644 cuda/spgpu/kernels/cgath.cu create mode 100644 cuda/spgpu/kernels/cnrm2.cu create mode 100644 cuda/spgpu/kernels/cscal.cu create mode 100644 cuda/spgpu/kernels/cscat.cu create mode 100644 cuda/spgpu/kernels/csetscal.cu create mode 100644 cuda/spgpu/kernels/cudadebug.h create mode 100644 cuda/spgpu/kernels/cudalang.h create mode 100644 cuda/spgpu/kernels/dabs.cu create mode 100644 cuda/spgpu/kernels/damax.cu create mode 100644 cuda/spgpu/kernels/dasum.cu create mode 100644 cuda/spgpu/kernels/daxpby.cu create mode 100644 cuda/spgpu/kernels/daxy.cu create mode 100644 cuda/spgpu/kernels/ddot.cu create mode 100644 cuda/spgpu/kernels/dgath.cu create mode 100644 cuda/spgpu/kernels/dia_cspmv.cu create mode 100644 cuda/spgpu/kernels/dia_dspmv.cu create mode 100644 cuda/spgpu/kernels/dia_spmv_base.cuh create mode 100644 cuda/spgpu/kernels/dia_spmv_base_template.cuh create mode 100644 cuda/spgpu/kernels/dia_sspmv.cu create mode 100644 cuda/spgpu/kernels/dia_zspmv.cu create mode 100644 cuda/spgpu/kernels/dnrm2.cu create mode 100644 cuda/spgpu/kernels/dscal.cu create mode 100644 cuda/spgpu/kernels/dscat.cu create mode 100644 cuda/spgpu/kernels/dsetscal.cu create mode 100644 cuda/spgpu/kernels/ell_ccsput.cu create mode 100644 cuda/spgpu/kernels/ell_cspmv.cu create mode 100644 cuda/spgpu/kernels/ell_csput_base.cuh create mode 100644 cuda/spgpu/kernels/ell_dcsput.cu create mode 100644 cuda/spgpu/kernels/ell_dspmv.cu create mode 100644 cuda/spgpu/kernels/ell_scsput.cu create mode 100644 cuda/spgpu/kernels/ell_spmv_base.cuh create mode 100644 cuda/spgpu/kernels/ell_spmv_base_nors.cuh create mode 100644 cuda/spgpu/kernels/ell_spmv_base_template.cuh create mode 100644 cuda/spgpu/kernels/ell_sspmv.cu create mode 100644 cuda/spgpu/kernels/ell_zcsput.cu create mode 100644 cuda/spgpu/kernels/ell_zspmv.cu create mode 100644 cuda/spgpu/kernels/gath_base.cuh create mode 100644 cuda/spgpu/kernels/hdia_cspmv.cu create mode 100644 cuda/spgpu/kernels/hdia_dspmv.cu create mode 100644 cuda/spgpu/kernels/hdia_spmv_base.cuh create mode 100644 cuda/spgpu/kernels/hdia_spmv_base_template.cuh create mode 100644 cuda/spgpu/kernels/hdia_sspmv.cu create mode 100644 cuda/spgpu/kernels/hdia_zspmv.cu create mode 100644 cuda/spgpu/kernels/hell_cspmv.cu create mode 100644 cuda/spgpu/kernels/hell_dspmv.cu create mode 100644 cuda/spgpu/kernels/hell_spmv_base.cuh create mode 100644 cuda/spgpu/kernels/hell_spmv_base_template.cuh create mode 100644 cuda/spgpu/kernels/hell_sspmv.cu create mode 100644 cuda/spgpu/kernels/hell_zspmv.cu create mode 100644 cuda/spgpu/kernels/igath.cu create mode 100644 cuda/spgpu/kernels/iscat.cu create mode 100644 cuda/spgpu/kernels/isetscal.cu create mode 100644 cuda/spgpu/kernels/mathbase.cuh create mode 100644 cuda/spgpu/kernels/sabs.cu create mode 100644 cuda/spgpu/kernels/samax.cu create mode 100644 cuda/spgpu/kernels/sasum.cu create mode 100644 cuda/spgpu/kernels/saxpby.cu create mode 100644 cuda/spgpu/kernels/saxy.cu create mode 100644 cuda/spgpu/kernels/scal_base.cuh create mode 100644 cuda/spgpu/kernels/scat_base.cuh create mode 100644 cuda/spgpu/kernels/sdot.cu create mode 100644 cuda/spgpu/kernels/setscal_base.cuh create mode 100644 cuda/spgpu/kernels/sgath.cu create mode 100644 cuda/spgpu/kernels/snrm2.cu create mode 100644 cuda/spgpu/kernels/sscal.cu create mode 100644 cuda/spgpu/kernels/sscat.cu create mode 100644 cuda/spgpu/kernels/ssetscal.cu create mode 100644 cuda/spgpu/kernels/zabs.cu create mode 100644 cuda/spgpu/kernels/zamax.cu create mode 100644 cuda/spgpu/kernels/zasum.cu create mode 100644 cuda/spgpu/kernels/zaxpby.cu create mode 100644 cuda/spgpu/kernels/zaxy.cu create mode 100644 cuda/spgpu/kernels/zdot.cu create mode 100644 cuda/spgpu/kernels/zgath.cu create mode 100644 cuda/spgpu/kernels/znrm2.cu create mode 100644 cuda/spgpu/kernels/zscal.cu create mode 100644 cuda/spgpu/kernels/zscat.cu create mode 100644 cuda/spgpu/kernels/zsetscal.cu create mode 100644 cuda/spgpu/vector.h rename {gpu => cuda}/svectordev.c (100%) rename {gpu => cuda}/svectordev.h (100%) rename {gpu => cuda}/vectordev.c (100%) rename {gpu => cuda}/vectordev.h (100%) rename {gpu => cuda}/z_cusparse_mod.F90 (100%) rename {gpu => cuda}/zcusparse.c (100%) rename {gpu => cuda}/zvectordev.c (100%) rename {gpu => cuda}/zvectordev.h (100%) create mode 100755 test/gpukern/Makefile create mode 100644 test/gpukern/c_file_spmv.F90 create mode 100644 test/gpukern/d_file_spmv.F90 create mode 100644 test/gpukern/data_input.f90 create mode 100644 test/gpukern/dpdegenmv.F90 create mode 100644 test/gpukern/s_file_spmv.F90 create mode 100644 test/gpukern/spdegenmv.F90 create mode 100644 test/gpukern/z_file_spmv.F90 diff --git a/Make.inc.in b/Make.inc.in index f0b21f91..ca0fa7f7 100755 --- a/Make.inc.in +++ b/Make.inc.in @@ -67,15 +67,15 @@ UTILMODNAME=@UTILMODNAME@ CBINDLIBNAME=libpsb_cbind.a -GPUD=@GPUD@ -GPULD=@GPULD@ -LGPU=@LGPU@ +CUDAD=@CUDAD@ +CUDALD=@CUDALD@ +LCUDA=@LCUDA@ -SPGPUDIR=@SPGPU_DIR@ -SPGPU_INCDIR=@SPGPU_INCDIR@ +#SPGPUDIR=@SPGPU_DIR@ +#SPGPU_INCDIR=@SPGPU_INCDIR@ SPGPU_LIBS=@SPGPU_LIBS@ SPGPU_DEFINES=@SPGPU_DEFINES@ -SPGPU_INCLUDES=@SPGPU_INCLUDES@ +#SPGPU_INCLUDES=@SPGPU_INCLUDES@ CUDA_DIR=@CUDA_DIR@ CUDA_DEFINES=@CUDA_DEFINES@ @@ -93,4 +93,4 @@ CUDEFINES=@CUDEFINES@ @PSBLASRULES@ -PSBGPULDLIBS=$(LGPU) $(SPGPU_LIBS) $(CUDA_LIBS) $(LIBS) \ No newline at end of file +PSBGPULDLIBS=$(LCUDA) $(SPGPU_LIBS) $(CUDA_LIBS) $(PSBLDLIBS) $(LIBS) \ No newline at end of file diff --git a/Makefile b/Makefile index 49879270..95f4cb17 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ include Make.inc -all: dirs based precd kryld utild cbindd extd $(GPUD) libd +all: dirs based precd kryld utild cbindd extd $(CUDAD) libd @echo "=====================================" @echo "PSBLAS libraries Compilation Successful." @@ -13,18 +13,18 @@ precd: based utild: based kryld: precd extd: based -gpud: extd +cudad: extd cbindd: based precd kryld utild -libd: based precd kryld utild cbindd extd $(GPULD) +libd: based precd kryld utild cbindd extd $(CUDALD) $(MAKE) -C base lib $(MAKE) -C prec lib $(MAKE) -C krylov lib $(MAKE) -C util lib $(MAKE) -C cbind lib $(MAKE) -C ext lib -gpuld: gpud - $(MAKE) -C gpu lib +cudald: cudad + $(MAKE) -C cuda lib based: @@ -39,8 +39,8 @@ cbindd: $(MAKE) -C cbind objs extd: based $(MAKE) -C ext objs -gpud: based extd - $(MAKE) -C gpu objs +cudad: based extd + $(MAKE) -C cuda objs install: all @@ -66,7 +66,7 @@ clean: $(MAKE) -C util clean $(MAKE) -C cbind clean $(MAKE) -C ext clean - $(MAKE) -C gpu clean + $(MAKE) -C cuda clean check: all make check -C test/serial @@ -83,7 +83,7 @@ veryclean: cleanlib cd util && $(MAKE) veryclean cd cbind && $(MAKE) veryclean cd ext && $(MAKE) veryclean - cd gpu && $(MAKE) veryclean + cd cuda && $(MAKE) veryclean cd test/fileread && $(MAKE) clean cd test/pargen && $(MAKE) clean cd test/util && $(MAKE) clean diff --git a/configure b/configure index d2fa80d2..8b217231 100755 --- a/configure +++ b/configure @@ -653,9 +653,9 @@ ac_subst_vars='am__EXEEXT_FALSE am__EXEEXT_TRUE LTLIBOBJS LIBOBJS -LGPU -GPULD -GPUD +LCUDA +CUDALD +CUDAD CUDEFINES CUDA_NVCC CUDA_SHORT_VERSION @@ -665,10 +665,7 @@ CUDA_INCLUDES CUDA_DEFINES CUDA_DIR EXTRALDLIBS -SPGPU_INCDIR -SPGPU_INCLUDES SPGPU_DEFINES -SPGPU_DIR SPGPU_LIBS SPGPU_FLAGS METISINCFILE @@ -844,7 +841,6 @@ with_amddir with_amdincdir with_amdlibdir with_cuda -with_spgpu with_cudacc ' ac_precious_vars='build_alias @@ -1545,8 +1541,6 @@ Optional Packages: --with-amdincdir=DIR Specify the directory for AMD includes. --with-amdlibdir=DIR Specify the directory for AMD library. --with-cuda=DIR Specify the directory for CUDA library and includes. - --with-spgpu=DIR Specify the directory for SPGPU library and - includes. --with-cudacc A comma-separated list of CCs to compile to, for example, --with-cudacc=30,35,37,50,60 @@ -10851,181 +10845,16 @@ CPPFLAGS="$SAVE_CPPFLAGS" CUDA_VERSION="$pac_cv_cuda_version"; CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); -SAVE_LIBS="$LIBS" - SAVE_CPPFLAGS="$CPPFLAGS" - if test "x$pac_cv_have_cuda" == "x"; then - -# Check whether --with-cuda was given. -if test ${with_cuda+y} -then : - withval=$with_cuda; pac_cv_cuda_dir=$withval -else $as_nop - pac_cv_cuda_dir='' -fi - - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - -SAVE_LIBS="$LIBS" -SAVE_CPPFLAGS="$CPPFLAGS" -if test "x$pac_cv_cuda_dir" != "x"; then - CUDA_DIR="$pac_cv_cuda_dir" - LIBS="-L$pac_cv_cuda_dir/lib $LIBS" - CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" - CUDA_DEFINES="-DHAVE_CUDA" - CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS" - CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" - if test -f "$pac_cv_cuda_dir/bin/nvcc"; then - CUDA_NVCC="$pac_cv_cuda_dir/bin/nvcc" - else - CUDA_NVCC="nvcc" - fi -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking cuda dir $pac_cv_cuda_dir" >&5 -printf %s "checking cuda dir $pac_cv_cuda_dir... " >&6; } -ac_fn_c_check_header_compile "$LINENO" "cuda_runtime.h" "ac_cv_header_cuda_runtime_h" "$ac_includes_default" -if test "x$ac_cv_header_cuda_runtime_h" = xyes -then : - pac_cuda_header_ok=yes -else $as_nop - pac_cuda_header_ok=no; CUDA_INCLUDES="" -fi - - -if test "x$pac_cuda_header_ok" == "xyes" ; then - CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR" - LIBS="$CUDA_LIBS -lm $LIBS"; - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cudaMemcpy in $CUDA_LIBS" >&5 -printf %s "checking for cudaMemcpy in $CUDA_LIBS... " >&6; } - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -char cudaMemcpy (); -int -main (void) -{ -return cudaMemcpy (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" -then : - pac_cv_have_cuda=yes;pac_cuda_lib_ok=yes; -else $as_nop - pac_cv_have_cuda=no;pac_cuda_lib_ok=no; CUDA_LIBS="" -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_cuda_lib_ok" >&5 -printf "%s\n" "$pac_cuda_lib_ok" >&6; } - -fi -LIBS="$SAVE_LIBS" -CPPFLAGS="$SAVE_CPPFLAGS" - - fi - if test "x$pac_cv_have_cuda" == "xyes"; then - -# Check whether --with-spgpu was given. -if test ${with_spgpu+y} -then : - withval=$with_spgpu; pac_cv_spgpudir=$withval -else $as_nop - pac_cv_spgpudir='' -fi - - - ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - - if test "x$pac_cv_spgpudir" != "x"; then - LIBS="-L$pac_cv_spgpudir/lib $LIBS" - GPU_INCLUDES="-I$pac_cv_spgpudir/include" - CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS" - GPU_LIBDIR="-L$pac_cv_spgpudir/lib" - fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking spgpu dir $pac_cv_spgpudir" >&5 -printf %s "checking spgpu dir $pac_cv_spgpudir... " >&6; } - ac_fn_c_check_header_compile "$LINENO" "core.h" "ac_cv_header_core_h" "$ac_includes_default" -if test "x$ac_cv_header_core_h" = xyes -then : - pac_gpu_header_ok=yes -else $as_nop - pac_gpu_header_ok=no; GPU_INCLUDES="" -fi - - - if test "x$pac_gpu_header_ok" == "xyes" ; then - GPU_LIBS="-lspgpu $GPU_LIBDIR" - LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS"; - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for spgpuCreate in $GPU_LIBS" >&5 -printf %s "checking for spgpuCreate in $GPU_LIBS... " >&6; } - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -char spgpuCreate (); -int -main (void) -{ -return spgpuCreate (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO" -then : - pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; -else $as_nop - pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS="" -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam \ - conftest$ac_exeext conftest.$ac_ext - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_gpu_lib_ok" >&5 -printf "%s\n" "$pac_gpu_lib_ok" >&6; } - if test "x$pac_cv_have_spgpu" == "xyes" ; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Have found SPGPU" >&5 -printf "%s\n" "$as_me: Have found SPGPU" >&6;} - SPGPULIBNAME="libpsbgpu.a"; - SPGPU_DIR="$pac_cv_spgpudir"; - SPGPU_DEFINES="-DHAVE_SPGPU"; - SPGPU_INCDIR="$SPGPU_DIR/include"; - SPGPU_INCLUDES="-I$SPGPU_INCDIR"; - SPGPU_LIBS="-lspgpu -L$SPGPU_DIR/lib"; - LGPU=-lpsb_gpu - CUDA_DIR="$pac_cv_cuda_dir"; - CUDA_DEFINES="-DHAVE_CUDA"; - CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" - CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" - FDEFINES="$psblas_cv_define_prepend-DHAVE_GPU $psblas_cv_define_prepend-DHAVE_SPGPU $psblas_cv_define_prepend-DHAVE_CUDA $FDEFINES"; - CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES" ; - fi - fi -fi -LIBS="$SAVE_LIBS" -CPPFLAGS="$SAVE_CPPFLAGS" - -if test "x$pac_cv_have_spgpu" == "xyes" ; then - GPUD=gpud; - GPULD=gpuld; - LGPU="-lpsb_gpu"; +if test "x$pac_cv_have_cuda" == "xyes" ; then + SPGPU_DEFINES="-DHAVE_SPGPU -DHAVE_GPU"; + SPGPU_LIBS="-lspgpu"; + CUDAD=cudad; + CUDALD=cudald; + LCUDA="-lpsb_cuda"; EXTRALDLIBS="-lstdc++"; fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: At this point GPUTARGET is $GPUD $GPULD" >&5 -printf "%s\n" "$as_me: At this point GPUTARGET is $GPUD $GPULD" >&6;} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: At this point GPUTARGET is $CUDAD $CUDALD" >&5 +printf "%s\n" "$as_me: At this point GPUTARGET is $CUDAD $CUDALD" >&6;} @@ -11041,7 +10870,7 @@ fi if test "x$pac_cv_cudacc" == "x"; then pac_cv_cudacc="30,35,37,50,60"; fi -CUDEFINES=""; +CUDEFINES="--dopt=on"; for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'` do CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; @@ -11057,8 +10886,10 @@ fi if test "x$pac_cv_ipk_size" != "x4"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&5 printf "%s\n" "$as_me: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&6;} - GPUD=""; - GPULD=""; + SPGPU_DEFINES=""; + SPGPU_LIBS=""; + CUDAD=""; + CUDALD=""; CUDEFINES=""; CUDA_INCLUDES=""; CUDA_LIBS=""; @@ -11124,9 +10955,9 @@ UTILLIBNAME=libpsb_util.a PSBLASRULES=' PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS) -CXXDEFINES=$(PSBCXXDEFINES) -CDEFINES=$(PSBCDEFINES) -FDEFINES=$(PSBFDEFINES) +CXXDEFINES=$(PSBCXXDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) +CDEFINES=$(PSBCDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) +FDEFINES=$(PSBFDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) # These should be portable rules, arent they? @@ -11159,9 +10990,6 @@ FDEFINES=$(PSBFDEFINES) - - - diff --git a/configure.ac b/configure.ac index cb23c6ac..bfc9b5f9 100755 --- a/configure.ac +++ b/configure.ac @@ -798,20 +798,22 @@ if test "x$pac_cv_have_cuda" == "xyes"; then PAC_CHECK_CUDA_VERSION() CUDA_VERSION="$pac_cv_cuda_version"; CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); -PAC_CHECK_SPGPU() -if test "x$pac_cv_have_spgpu" == "xyes" ; then - GPUD=gpud; - GPULD=gpuld; - LGPU="-lpsb_gpu"; +dnl PAC_CHECK_SPGPU() +if test "x$pac_cv_have_cuda" == "xyes" ; then + SPGPU_DEFINES="-DHAVE_SPGPU -DHAVE_GPU"; + SPGPU_LIBS="-lspgpu"; + CUDAD=cudad; + CUDALD=cudald; + LCUDA="-lpsb_cuda"; EXTRALDLIBS="-lstdc++"; fi -AC_MSG_NOTICE([At this point GPUTARGET is $GPUD $GPULD]) +AC_MSG_NOTICE([At this point GPUTARGET is $CUDAD $CUDALD]) PAC_ARG_WITH_CUDACC() if test "x$pac_cv_cudacc" == "x"; then pac_cv_cudacc="30,35,37,50,60"; fi -CUDEFINES=""; +CUDEFINES="--dopt=on"; for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'` do CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; @@ -826,8 +828,10 @@ fi if test "x$pac_cv_ipk_size" != "x4"; then AC_MSG_NOTICE([For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU]) - GPUD=""; - GPULD=""; + SPGPU_DEFINES=""; + SPGPU_LIBS=""; + CUDAD=""; + CUDALD=""; CUDEFINES=""; CUDA_INCLUDES=""; CUDA_LIBS=""; @@ -893,9 +897,9 @@ AC_SUBST(FINCLUDES) PSBLASRULES=' PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS) -CXXDEFINES=$(PSBCXXDEFINES) -CDEFINES=$(PSBCDEFINES) -FDEFINES=$(PSBFDEFINES) +CXXDEFINES=$(PSBCXXDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) +CDEFINES=$(PSBCDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) +FDEFINES=$(PSBFDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) # These should be portable rules, arent they? @@ -919,10 +923,10 @@ AC_SUBST(UTILLIBNAME) AC_SUBST(METISINCFILE) AC_SUBST(SPGPU_FLAGS) AC_SUBST(SPGPU_LIBS) -AC_SUBST(SPGPU_DIR) +dnl AC_SUBST(SPGPU_DIR) AC_SUBST(SPGPU_DEFINES) -AC_SUBST(SPGPU_INCLUDES) -AC_SUBST(SPGPU_INCDIR) +dnl AC_SUBST(SPGPU_INCLUDES) +dnl AC_SUBST(SPGPU_INCDIR) AC_SUBST(EXTRALDLIBS) AC_SUBST(CUDA_DIR) AC_SUBST(CUDA_DEFINES) @@ -932,9 +936,9 @@ AC_SUBST(CUDA_VERSION) AC_SUBST(CUDA_SHORT_VERSION) AC_SUBST(CUDA_NVCC) AC_SUBST(CUDEFINES) -AC_SUBST(GPUD) -AC_SUBST(GPULD) -AC_SUBST(LGPU) +AC_SUBST(CUDAD) +AC_SUBST(CUDALD) +AC_SUBST(LCUDA) ############################################################################### # the following files will be created by Automake diff --git a/gpu/CUDA/Makefile b/cuda/CUDA/Makefile similarity index 93% rename from gpu/CUDA/Makefile rename to cuda/CUDA/Makefile index a1f9d48b..5bdfb935 100644 --- a/gpu/CUDA/Makefile +++ b/cuda/CUDA/Makefile @@ -15,11 +15,10 @@ LDLIBS=$(PSBLDLIBS) # #CCOPT= -g FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(PSBINCDIR) $(FIFLAG). -CINCLUDES=$(SPGPU_INCLUDES) $(CUDA_INCLUDES) -I.. +CINCLUDES=$(SPGPU_INCLUDES) $(CUDA_INCLUDES) -I.. -I$(INCDIR) LIBNAME=libpsb_gpu.a - CUDAOBJS=psi_cuda_c_CopyCooToElg.o psi_cuda_c_CopyCooToHlg.o \ psi_cuda_d_CopyCooToElg.o psi_cuda_d_CopyCooToHlg.o \ psi_cuda_s_CopyCooToElg.o psi_cuda_s_CopyCooToHlg.o \ diff --git a/gpu/CUDA/psi_cuda_CopyCooToElg.cuh b/cuda/CUDA/psi_cuda_CopyCooToElg.cuh similarity index 100% rename from gpu/CUDA/psi_cuda_CopyCooToElg.cuh rename to cuda/CUDA/psi_cuda_CopyCooToElg.cuh diff --git a/gpu/CUDA/psi_cuda_CopyCooToHlg.cuh b/cuda/CUDA/psi_cuda_CopyCooToHlg.cuh similarity index 100% rename from gpu/CUDA/psi_cuda_CopyCooToHlg.cuh rename to cuda/CUDA/psi_cuda_CopyCooToHlg.cuh diff --git a/gpu/CUDA/psi_cuda_c_CopyCooToElg.cu b/cuda/CUDA/psi_cuda_c_CopyCooToElg.cu similarity index 100% rename from gpu/CUDA/psi_cuda_c_CopyCooToElg.cu rename to cuda/CUDA/psi_cuda_c_CopyCooToElg.cu diff --git a/gpu/CUDA/psi_cuda_c_CopyCooToHlg.cu b/cuda/CUDA/psi_cuda_c_CopyCooToHlg.cu similarity index 100% rename from gpu/CUDA/psi_cuda_c_CopyCooToHlg.cu rename to cuda/CUDA/psi_cuda_c_CopyCooToHlg.cu diff --git a/gpu/CUDA/psi_cuda_common.cuh b/cuda/CUDA/psi_cuda_common.cuh similarity index 100% rename from gpu/CUDA/psi_cuda_common.cuh rename to cuda/CUDA/psi_cuda_common.cuh diff --git a/gpu/CUDA/psi_cuda_d_CopyCooToElg.cu b/cuda/CUDA/psi_cuda_d_CopyCooToElg.cu similarity index 100% rename from gpu/CUDA/psi_cuda_d_CopyCooToElg.cu rename to cuda/CUDA/psi_cuda_d_CopyCooToElg.cu diff --git a/gpu/CUDA/psi_cuda_d_CopyCooToHlg.cu b/cuda/CUDA/psi_cuda_d_CopyCooToHlg.cu similarity index 100% rename from gpu/CUDA/psi_cuda_d_CopyCooToHlg.cu rename to cuda/CUDA/psi_cuda_d_CopyCooToHlg.cu diff --git a/gpu/CUDA/psi_cuda_s_CopyCooToElg.cu b/cuda/CUDA/psi_cuda_s_CopyCooToElg.cu similarity index 100% rename from gpu/CUDA/psi_cuda_s_CopyCooToElg.cu rename to cuda/CUDA/psi_cuda_s_CopyCooToElg.cu diff --git a/gpu/CUDA/psi_cuda_s_CopyCooToHlg.cu b/cuda/CUDA/psi_cuda_s_CopyCooToHlg.cu similarity index 100% rename from gpu/CUDA/psi_cuda_s_CopyCooToHlg.cu rename to cuda/CUDA/psi_cuda_s_CopyCooToHlg.cu diff --git a/gpu/CUDA/psi_cuda_z_CopyCooToElg.cu b/cuda/CUDA/psi_cuda_z_CopyCooToElg.cu similarity index 100% rename from gpu/CUDA/psi_cuda_z_CopyCooToElg.cu rename to cuda/CUDA/psi_cuda_z_CopyCooToElg.cu diff --git a/gpu/CUDA/psi_cuda_z_CopyCooToHlg.cu b/cuda/CUDA/psi_cuda_z_CopyCooToHlg.cu similarity index 100% rename from gpu/CUDA/psi_cuda_z_CopyCooToHlg.cu rename to cuda/CUDA/psi_cuda_z_CopyCooToHlg.cu diff --git a/gpu/Makefile b/cuda/Makefile similarity index 93% rename from gpu/Makefile rename to cuda/Makefile index 10e17d55..2b0c011a 100755 --- a/gpu/Makefile +++ b/cuda/Makefile @@ -13,8 +13,8 @@ LDLIBS=$(PSBLDLIBS) # #CCOPT= -g FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FIFLAG). -CINCLUDES=$(SPGPU_INCLUDES) $(CUDA_INCLUDES) -LIBNAME=libpsb_gpu.a +CINCLUDES=$(SPGPU_INCLUDES) $(CUDA_INCLUDES) -I$(INCDIR) +LIBNAME=libpsb_cuda.a FOBJS=cusparse_mod.o base_cusparse_mod.o \ @@ -49,11 +49,18 @@ OBJS=$(COBJS) $(FOBJS) lib: objs -objs: $(OBJS) iobjs cudaobjs +objs: spgpuinc $(OBJS) iobjs cudaobjs spgpuobjs /bin/cp -p *$(.mod) $(MODDIR) /bin/cp -p *.h $(INCDIR) -lib: ilib cudalib +spgpuinc: + $(MAKE) -C spgpu includes +spgpuobjs: + $(MAKE) -C spgpu objs +spgpulib: + $(MAKE) -C spgpu lib + +lib: ilib cudalib spgpulib ar cur $(LIBNAME) $(OBJS) /bin/cp -p $(LIBNAME) $(LIBDIR) @@ -121,7 +128,7 @@ ilib: objs cudalib: objs ilib $(MAKE) -C CUDA lib LIBNAME=$(LIBNAME) -clean: cclean iclean cudaclean +clean: cclean iclean cudaclean spgpuclean /bin/rm -f $(FOBJS) *$(.mod) *.a cclean: @@ -130,5 +137,7 @@ iclean: $(MAKE) -C impl clean cudaclean: $(MAKE) -C CUDA clean +spgpuclean: + $(MAKE) -C spgpu clean veryclean: clean diff --git a/gpu/base_cusparse_mod.F90 b/cuda/base_cusparse_mod.F90 similarity index 100% rename from gpu/base_cusparse_mod.F90 rename to cuda/base_cusparse_mod.F90 diff --git a/gpu/c_cusparse_mod.F90 b/cuda/c_cusparse_mod.F90 similarity index 100% rename from gpu/c_cusparse_mod.F90 rename to cuda/c_cusparse_mod.F90 diff --git a/gpu/ccusparse.c b/cuda/ccusparse.c similarity index 100% rename from gpu/ccusparse.c rename to cuda/ccusparse.c diff --git a/gpu/cintrf.h b/cuda/cintrf.h similarity index 100% rename from gpu/cintrf.h rename to cuda/cintrf.h diff --git a/gpu/core_mod.f90 b/cuda/core_mod.f90 similarity index 100% rename from gpu/core_mod.f90 rename to cuda/core_mod.f90 diff --git a/gpu/cuda_util.c b/cuda/cuda_util.c similarity index 100% rename from gpu/cuda_util.c rename to cuda/cuda_util.c diff --git a/gpu/cuda_util.h b/cuda/cuda_util.h similarity index 100% rename from gpu/cuda_util.h rename to cuda/cuda_util.h diff --git a/gpu/cusparse_mod.F90 b/cuda/cusparse_mod.F90 similarity index 100% rename from gpu/cusparse_mod.F90 rename to cuda/cusparse_mod.F90 diff --git a/gpu/cvectordev.c b/cuda/cvectordev.c similarity index 100% rename from gpu/cvectordev.c rename to cuda/cvectordev.c diff --git a/gpu/cvectordev.h b/cuda/cvectordev.h similarity index 100% rename from gpu/cvectordev.h rename to cuda/cvectordev.h diff --git a/gpu/d_cusparse_mod.F90 b/cuda/d_cusparse_mod.F90 similarity index 100% rename from gpu/d_cusparse_mod.F90 rename to cuda/d_cusparse_mod.F90 diff --git a/gpu/dcusparse.c b/cuda/dcusparse.c similarity index 100% rename from gpu/dcusparse.c rename to cuda/dcusparse.c diff --git a/gpu/diagdev.c b/cuda/diagdev.c similarity index 100% rename from gpu/diagdev.c rename to cuda/diagdev.c diff --git a/gpu/diagdev.h b/cuda/diagdev.h similarity index 100% rename from gpu/diagdev.h rename to cuda/diagdev.h diff --git a/gpu/diagdev_mod.F90 b/cuda/diagdev_mod.F90 similarity index 100% rename from gpu/diagdev_mod.F90 rename to cuda/diagdev_mod.F90 diff --git a/gpu/dnsdev.c b/cuda/dnsdev.c similarity index 100% rename from gpu/dnsdev.c rename to cuda/dnsdev.c diff --git a/gpu/dnsdev.h b/cuda/dnsdev.h similarity index 100% rename from gpu/dnsdev.h rename to cuda/dnsdev.h diff --git a/gpu/dnsdev_mod.F90 b/cuda/dnsdev_mod.F90 similarity index 100% rename from gpu/dnsdev_mod.F90 rename to cuda/dnsdev_mod.F90 diff --git a/gpu/dvectordev.c b/cuda/dvectordev.c similarity index 100% rename from gpu/dvectordev.c rename to cuda/dvectordev.c diff --git a/gpu/dvectordev.h b/cuda/dvectordev.h similarity index 100% rename from gpu/dvectordev.h rename to cuda/dvectordev.h diff --git a/gpu/elldev.c b/cuda/elldev.c similarity index 100% rename from gpu/elldev.c rename to cuda/elldev.c diff --git a/gpu/elldev.h b/cuda/elldev.h similarity index 100% rename from gpu/elldev.h rename to cuda/elldev.h diff --git a/gpu/elldev_mod.F90 b/cuda/elldev_mod.F90 similarity index 100% rename from gpu/elldev_mod.F90 rename to cuda/elldev_mod.F90 diff --git a/gpu/fcusparse.c b/cuda/fcusparse.c similarity index 100% rename from gpu/fcusparse.c rename to cuda/fcusparse.c diff --git a/gpu/fcusparse.h b/cuda/fcusparse.h similarity index 100% rename from gpu/fcusparse.h rename to cuda/fcusparse.h diff --git a/gpu/fcusparse_fct.h b/cuda/fcusparse_fct.h similarity index 100% rename from gpu/fcusparse_fct.h rename to cuda/fcusparse_fct.h diff --git a/gpu/hdiagdev.c b/cuda/hdiagdev.c similarity index 100% rename from gpu/hdiagdev.c rename to cuda/hdiagdev.c diff --git a/gpu/hdiagdev.h b/cuda/hdiagdev.h similarity index 100% rename from gpu/hdiagdev.h rename to cuda/hdiagdev.h diff --git a/gpu/hdiagdev_mod.F90 b/cuda/hdiagdev_mod.F90 similarity index 100% rename from gpu/hdiagdev_mod.F90 rename to cuda/hdiagdev_mod.F90 diff --git a/gpu/hlldev.c b/cuda/hlldev.c similarity index 100% rename from gpu/hlldev.c rename to cuda/hlldev.c diff --git a/gpu/hlldev.h b/cuda/hlldev.h similarity index 100% rename from gpu/hlldev.h rename to cuda/hlldev.h diff --git a/gpu/hlldev_mod.F90 b/cuda/hlldev_mod.F90 similarity index 100% rename from gpu/hlldev_mod.F90 rename to cuda/hlldev_mod.F90 diff --git a/gpu/impl/Makefile b/cuda/impl/Makefile similarity index 97% rename from gpu/impl/Makefile rename to cuda/impl/Makefile index 158066f2..6ddac9a7 100755 --- a/gpu/impl/Makefile +++ b/cuda/impl/Makefile @@ -12,6 +12,9 @@ LDLIBS=$(PSBLDLIBS) FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG).. CINCLUDES=-I$(GPU_INCDIR) -I$(CUDA_INCDIR) LIBNAME=libpsb_gpu.a +CXXDEFINES=$(PSBCXXDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) +CDEFINES=$(PSBCDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) +FDEFINES=$(PSBFDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) OBJS= \ psb_d_cp_csrg_from_coo.o \ diff --git a/gpu/impl/psb_c_cp_csrg_from_coo.F90 b/cuda/impl/psb_c_cp_csrg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_c_cp_csrg_from_coo.F90 rename to cuda/impl/psb_c_cp_csrg_from_coo.F90 diff --git a/gpu/impl/psb_c_cp_csrg_from_fmt.F90 b/cuda/impl/psb_c_cp_csrg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_c_cp_csrg_from_fmt.F90 rename to cuda/impl/psb_c_cp_csrg_from_fmt.F90 diff --git a/gpu/impl/psb_c_cp_diag_from_coo.F90 b/cuda/impl/psb_c_cp_diag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_c_cp_diag_from_coo.F90 rename to cuda/impl/psb_c_cp_diag_from_coo.F90 diff --git a/gpu/impl/psb_c_cp_elg_from_coo.F90 b/cuda/impl/psb_c_cp_elg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_c_cp_elg_from_coo.F90 rename to cuda/impl/psb_c_cp_elg_from_coo.F90 diff --git a/gpu/impl/psb_c_cp_elg_from_fmt.F90 b/cuda/impl/psb_c_cp_elg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_c_cp_elg_from_fmt.F90 rename to cuda/impl/psb_c_cp_elg_from_fmt.F90 diff --git a/gpu/impl/psb_c_cp_hdiag_from_coo.F90 b/cuda/impl/psb_c_cp_hdiag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_c_cp_hdiag_from_coo.F90 rename to cuda/impl/psb_c_cp_hdiag_from_coo.F90 diff --git a/gpu/impl/psb_c_cp_hlg_from_coo.F90 b/cuda/impl/psb_c_cp_hlg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_c_cp_hlg_from_coo.F90 rename to cuda/impl/psb_c_cp_hlg_from_coo.F90 diff --git a/gpu/impl/psb_c_cp_hlg_from_fmt.F90 b/cuda/impl/psb_c_cp_hlg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_c_cp_hlg_from_fmt.F90 rename to cuda/impl/psb_c_cp_hlg_from_fmt.F90 diff --git a/gpu/impl/psb_c_cp_hybg_from_coo.F90 b/cuda/impl/psb_c_cp_hybg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_c_cp_hybg_from_coo.F90 rename to cuda/impl/psb_c_cp_hybg_from_coo.F90 diff --git a/gpu/impl/psb_c_cp_hybg_from_fmt.F90 b/cuda/impl/psb_c_cp_hybg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_c_cp_hybg_from_fmt.F90 rename to cuda/impl/psb_c_cp_hybg_from_fmt.F90 diff --git a/gpu/impl/psb_c_csrg_allocate_mnnz.F90 b/cuda/impl/psb_c_csrg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_c_csrg_allocate_mnnz.F90 rename to cuda/impl/psb_c_csrg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_c_csrg_csmm.F90 b/cuda/impl/psb_c_csrg_csmm.F90 similarity index 100% rename from gpu/impl/psb_c_csrg_csmm.F90 rename to cuda/impl/psb_c_csrg_csmm.F90 diff --git a/gpu/impl/psb_c_csrg_csmv.F90 b/cuda/impl/psb_c_csrg_csmv.F90 similarity index 100% rename from gpu/impl/psb_c_csrg_csmv.F90 rename to cuda/impl/psb_c_csrg_csmv.F90 diff --git a/gpu/impl/psb_c_csrg_from_gpu.F90 b/cuda/impl/psb_c_csrg_from_gpu.F90 similarity index 100% rename from gpu/impl/psb_c_csrg_from_gpu.F90 rename to cuda/impl/psb_c_csrg_from_gpu.F90 diff --git a/gpu/impl/psb_c_csrg_inner_vect_sv.F90 b/cuda/impl/psb_c_csrg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_c_csrg_inner_vect_sv.F90 rename to cuda/impl/psb_c_csrg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_c_csrg_mold.F90 b/cuda/impl/psb_c_csrg_mold.F90 similarity index 100% rename from gpu/impl/psb_c_csrg_mold.F90 rename to cuda/impl/psb_c_csrg_mold.F90 diff --git a/gpu/impl/psb_c_csrg_reallocate_nz.F90 b/cuda/impl/psb_c_csrg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_c_csrg_reallocate_nz.F90 rename to cuda/impl/psb_c_csrg_reallocate_nz.F90 diff --git a/gpu/impl/psb_c_csrg_scal.F90 b/cuda/impl/psb_c_csrg_scal.F90 similarity index 100% rename from gpu/impl/psb_c_csrg_scal.F90 rename to cuda/impl/psb_c_csrg_scal.F90 diff --git a/gpu/impl/psb_c_csrg_scals.F90 b/cuda/impl/psb_c_csrg_scals.F90 similarity index 100% rename from gpu/impl/psb_c_csrg_scals.F90 rename to cuda/impl/psb_c_csrg_scals.F90 diff --git a/gpu/impl/psb_c_csrg_to_gpu.F90 b/cuda/impl/psb_c_csrg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_c_csrg_to_gpu.F90 rename to cuda/impl/psb_c_csrg_to_gpu.F90 diff --git a/gpu/impl/psb_c_csrg_vect_mv.F90 b/cuda/impl/psb_c_csrg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_c_csrg_vect_mv.F90 rename to cuda/impl/psb_c_csrg_vect_mv.F90 diff --git a/gpu/impl/psb_c_diag_csmv.F90 b/cuda/impl/psb_c_diag_csmv.F90 similarity index 100% rename from gpu/impl/psb_c_diag_csmv.F90 rename to cuda/impl/psb_c_diag_csmv.F90 diff --git a/gpu/impl/psb_c_diag_mold.F90 b/cuda/impl/psb_c_diag_mold.F90 similarity index 100% rename from gpu/impl/psb_c_diag_mold.F90 rename to cuda/impl/psb_c_diag_mold.F90 diff --git a/gpu/impl/psb_c_diag_to_gpu.F90 b/cuda/impl/psb_c_diag_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_c_diag_to_gpu.F90 rename to cuda/impl/psb_c_diag_to_gpu.F90 diff --git a/gpu/impl/psb_c_diag_vect_mv.F90 b/cuda/impl/psb_c_diag_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_c_diag_vect_mv.F90 rename to cuda/impl/psb_c_diag_vect_mv.F90 diff --git a/gpu/impl/psb_c_dnsg_mat_impl.F90 b/cuda/impl/psb_c_dnsg_mat_impl.F90 similarity index 100% rename from gpu/impl/psb_c_dnsg_mat_impl.F90 rename to cuda/impl/psb_c_dnsg_mat_impl.F90 diff --git a/gpu/impl/psb_c_elg_allocate_mnnz.F90 b/cuda/impl/psb_c_elg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_c_elg_allocate_mnnz.F90 rename to cuda/impl/psb_c_elg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_c_elg_asb.f90 b/cuda/impl/psb_c_elg_asb.f90 similarity index 100% rename from gpu/impl/psb_c_elg_asb.f90 rename to cuda/impl/psb_c_elg_asb.f90 diff --git a/gpu/impl/psb_c_elg_csmm.F90 b/cuda/impl/psb_c_elg_csmm.F90 similarity index 100% rename from gpu/impl/psb_c_elg_csmm.F90 rename to cuda/impl/psb_c_elg_csmm.F90 diff --git a/gpu/impl/psb_c_elg_csmv.F90 b/cuda/impl/psb_c_elg_csmv.F90 similarity index 100% rename from gpu/impl/psb_c_elg_csmv.F90 rename to cuda/impl/psb_c_elg_csmv.F90 diff --git a/gpu/impl/psb_c_elg_csput.F90 b/cuda/impl/psb_c_elg_csput.F90 similarity index 100% rename from gpu/impl/psb_c_elg_csput.F90 rename to cuda/impl/psb_c_elg_csput.F90 diff --git a/gpu/impl/psb_c_elg_from_gpu.F90 b/cuda/impl/psb_c_elg_from_gpu.F90 similarity index 100% rename from gpu/impl/psb_c_elg_from_gpu.F90 rename to cuda/impl/psb_c_elg_from_gpu.F90 diff --git a/gpu/impl/psb_c_elg_inner_vect_sv.F90 b/cuda/impl/psb_c_elg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_c_elg_inner_vect_sv.F90 rename to cuda/impl/psb_c_elg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_c_elg_mold.F90 b/cuda/impl/psb_c_elg_mold.F90 similarity index 100% rename from gpu/impl/psb_c_elg_mold.F90 rename to cuda/impl/psb_c_elg_mold.F90 diff --git a/gpu/impl/psb_c_elg_reallocate_nz.F90 b/cuda/impl/psb_c_elg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_c_elg_reallocate_nz.F90 rename to cuda/impl/psb_c_elg_reallocate_nz.F90 diff --git a/gpu/impl/psb_c_elg_scal.F90 b/cuda/impl/psb_c_elg_scal.F90 similarity index 100% rename from gpu/impl/psb_c_elg_scal.F90 rename to cuda/impl/psb_c_elg_scal.F90 diff --git a/gpu/impl/psb_c_elg_scals.F90 b/cuda/impl/psb_c_elg_scals.F90 similarity index 100% rename from gpu/impl/psb_c_elg_scals.F90 rename to cuda/impl/psb_c_elg_scals.F90 diff --git a/gpu/impl/psb_c_elg_to_gpu.F90 b/cuda/impl/psb_c_elg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_c_elg_to_gpu.F90 rename to cuda/impl/psb_c_elg_to_gpu.F90 diff --git a/gpu/impl/psb_c_elg_trim.f90 b/cuda/impl/psb_c_elg_trim.f90 similarity index 100% rename from gpu/impl/psb_c_elg_trim.f90 rename to cuda/impl/psb_c_elg_trim.f90 diff --git a/gpu/impl/psb_c_elg_vect_mv.F90 b/cuda/impl/psb_c_elg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_c_elg_vect_mv.F90 rename to cuda/impl/psb_c_elg_vect_mv.F90 diff --git a/gpu/impl/psb_c_hdiag_csmv.F90 b/cuda/impl/psb_c_hdiag_csmv.F90 similarity index 100% rename from gpu/impl/psb_c_hdiag_csmv.F90 rename to cuda/impl/psb_c_hdiag_csmv.F90 diff --git a/gpu/impl/psb_c_hdiag_mold.F90 b/cuda/impl/psb_c_hdiag_mold.F90 similarity index 100% rename from gpu/impl/psb_c_hdiag_mold.F90 rename to cuda/impl/psb_c_hdiag_mold.F90 diff --git a/gpu/impl/psb_c_hdiag_to_gpu.F90 b/cuda/impl/psb_c_hdiag_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_c_hdiag_to_gpu.F90 rename to cuda/impl/psb_c_hdiag_to_gpu.F90 diff --git a/gpu/impl/psb_c_hdiag_vect_mv.F90 b/cuda/impl/psb_c_hdiag_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_c_hdiag_vect_mv.F90 rename to cuda/impl/psb_c_hdiag_vect_mv.F90 diff --git a/gpu/impl/psb_c_hlg_allocate_mnnz.F90 b/cuda/impl/psb_c_hlg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_c_hlg_allocate_mnnz.F90 rename to cuda/impl/psb_c_hlg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_c_hlg_csmm.F90 b/cuda/impl/psb_c_hlg_csmm.F90 similarity index 100% rename from gpu/impl/psb_c_hlg_csmm.F90 rename to cuda/impl/psb_c_hlg_csmm.F90 diff --git a/gpu/impl/psb_c_hlg_csmv.F90 b/cuda/impl/psb_c_hlg_csmv.F90 similarity index 100% rename from gpu/impl/psb_c_hlg_csmv.F90 rename to cuda/impl/psb_c_hlg_csmv.F90 diff --git a/gpu/impl/psb_c_hlg_from_gpu.F90 b/cuda/impl/psb_c_hlg_from_gpu.F90 similarity index 100% rename from gpu/impl/psb_c_hlg_from_gpu.F90 rename to cuda/impl/psb_c_hlg_from_gpu.F90 diff --git a/gpu/impl/psb_c_hlg_inner_vect_sv.F90 b/cuda/impl/psb_c_hlg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_c_hlg_inner_vect_sv.F90 rename to cuda/impl/psb_c_hlg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_c_hlg_mold.F90 b/cuda/impl/psb_c_hlg_mold.F90 similarity index 100% rename from gpu/impl/psb_c_hlg_mold.F90 rename to cuda/impl/psb_c_hlg_mold.F90 diff --git a/gpu/impl/psb_c_hlg_reallocate_nz.F90 b/cuda/impl/psb_c_hlg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_c_hlg_reallocate_nz.F90 rename to cuda/impl/psb_c_hlg_reallocate_nz.F90 diff --git a/gpu/impl/psb_c_hlg_scal.F90 b/cuda/impl/psb_c_hlg_scal.F90 similarity index 100% rename from gpu/impl/psb_c_hlg_scal.F90 rename to cuda/impl/psb_c_hlg_scal.F90 diff --git a/gpu/impl/psb_c_hlg_scals.F90 b/cuda/impl/psb_c_hlg_scals.F90 similarity index 100% rename from gpu/impl/psb_c_hlg_scals.F90 rename to cuda/impl/psb_c_hlg_scals.F90 diff --git a/gpu/impl/psb_c_hlg_to_gpu.F90 b/cuda/impl/psb_c_hlg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_c_hlg_to_gpu.F90 rename to cuda/impl/psb_c_hlg_to_gpu.F90 diff --git a/gpu/impl/psb_c_hlg_vect_mv.F90 b/cuda/impl/psb_c_hlg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_c_hlg_vect_mv.F90 rename to cuda/impl/psb_c_hlg_vect_mv.F90 diff --git a/gpu/impl/psb_c_hybg_allocate_mnnz.F90 b/cuda/impl/psb_c_hybg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_c_hybg_allocate_mnnz.F90 rename to cuda/impl/psb_c_hybg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_c_hybg_csmm.F90 b/cuda/impl/psb_c_hybg_csmm.F90 similarity index 100% rename from gpu/impl/psb_c_hybg_csmm.F90 rename to cuda/impl/psb_c_hybg_csmm.F90 diff --git a/gpu/impl/psb_c_hybg_csmv.F90 b/cuda/impl/psb_c_hybg_csmv.F90 similarity index 100% rename from gpu/impl/psb_c_hybg_csmv.F90 rename to cuda/impl/psb_c_hybg_csmv.F90 diff --git a/gpu/impl/psb_c_hybg_inner_vect_sv.F90 b/cuda/impl/psb_c_hybg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_c_hybg_inner_vect_sv.F90 rename to cuda/impl/psb_c_hybg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_c_hybg_mold.F90 b/cuda/impl/psb_c_hybg_mold.F90 similarity index 100% rename from gpu/impl/psb_c_hybg_mold.F90 rename to cuda/impl/psb_c_hybg_mold.F90 diff --git a/gpu/impl/psb_c_hybg_reallocate_nz.F90 b/cuda/impl/psb_c_hybg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_c_hybg_reallocate_nz.F90 rename to cuda/impl/psb_c_hybg_reallocate_nz.F90 diff --git a/gpu/impl/psb_c_hybg_scal.F90 b/cuda/impl/psb_c_hybg_scal.F90 similarity index 100% rename from gpu/impl/psb_c_hybg_scal.F90 rename to cuda/impl/psb_c_hybg_scal.F90 diff --git a/gpu/impl/psb_c_hybg_scals.F90 b/cuda/impl/psb_c_hybg_scals.F90 similarity index 100% rename from gpu/impl/psb_c_hybg_scals.F90 rename to cuda/impl/psb_c_hybg_scals.F90 diff --git a/gpu/impl/psb_c_hybg_to_gpu.F90 b/cuda/impl/psb_c_hybg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_c_hybg_to_gpu.F90 rename to cuda/impl/psb_c_hybg_to_gpu.F90 diff --git a/gpu/impl/psb_c_hybg_vect_mv.F90 b/cuda/impl/psb_c_hybg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_c_hybg_vect_mv.F90 rename to cuda/impl/psb_c_hybg_vect_mv.F90 diff --git a/gpu/impl/psb_c_mv_csrg_from_coo.F90 b/cuda/impl/psb_c_mv_csrg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_c_mv_csrg_from_coo.F90 rename to cuda/impl/psb_c_mv_csrg_from_coo.F90 diff --git a/gpu/impl/psb_c_mv_csrg_from_fmt.F90 b/cuda/impl/psb_c_mv_csrg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_c_mv_csrg_from_fmt.F90 rename to cuda/impl/psb_c_mv_csrg_from_fmt.F90 diff --git a/gpu/impl/psb_c_mv_diag_from_coo.F90 b/cuda/impl/psb_c_mv_diag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_c_mv_diag_from_coo.F90 rename to cuda/impl/psb_c_mv_diag_from_coo.F90 diff --git a/gpu/impl/psb_c_mv_elg_from_coo.F90 b/cuda/impl/psb_c_mv_elg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_c_mv_elg_from_coo.F90 rename to cuda/impl/psb_c_mv_elg_from_coo.F90 diff --git a/gpu/impl/psb_c_mv_elg_from_fmt.F90 b/cuda/impl/psb_c_mv_elg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_c_mv_elg_from_fmt.F90 rename to cuda/impl/psb_c_mv_elg_from_fmt.F90 diff --git a/gpu/impl/psb_c_mv_hdiag_from_coo.F90 b/cuda/impl/psb_c_mv_hdiag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_c_mv_hdiag_from_coo.F90 rename to cuda/impl/psb_c_mv_hdiag_from_coo.F90 diff --git a/gpu/impl/psb_c_mv_hlg_from_coo.F90 b/cuda/impl/psb_c_mv_hlg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_c_mv_hlg_from_coo.F90 rename to cuda/impl/psb_c_mv_hlg_from_coo.F90 diff --git a/gpu/impl/psb_c_mv_hlg_from_fmt.F90 b/cuda/impl/psb_c_mv_hlg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_c_mv_hlg_from_fmt.F90 rename to cuda/impl/psb_c_mv_hlg_from_fmt.F90 diff --git a/gpu/impl/psb_c_mv_hybg_from_coo.F90 b/cuda/impl/psb_c_mv_hybg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_c_mv_hybg_from_coo.F90 rename to cuda/impl/psb_c_mv_hybg_from_coo.F90 diff --git a/gpu/impl/psb_c_mv_hybg_from_fmt.F90 b/cuda/impl/psb_c_mv_hybg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_c_mv_hybg_from_fmt.F90 rename to cuda/impl/psb_c_mv_hybg_from_fmt.F90 diff --git a/gpu/impl/psb_d_cp_csrg_from_coo.F90 b/cuda/impl/psb_d_cp_csrg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_d_cp_csrg_from_coo.F90 rename to cuda/impl/psb_d_cp_csrg_from_coo.F90 diff --git a/gpu/impl/psb_d_cp_csrg_from_fmt.F90 b/cuda/impl/psb_d_cp_csrg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_d_cp_csrg_from_fmt.F90 rename to cuda/impl/psb_d_cp_csrg_from_fmt.F90 diff --git a/gpu/impl/psb_d_cp_diag_from_coo.F90 b/cuda/impl/psb_d_cp_diag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_d_cp_diag_from_coo.F90 rename to cuda/impl/psb_d_cp_diag_from_coo.F90 diff --git a/gpu/impl/psb_d_cp_elg_from_coo.F90 b/cuda/impl/psb_d_cp_elg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_d_cp_elg_from_coo.F90 rename to cuda/impl/psb_d_cp_elg_from_coo.F90 diff --git a/gpu/impl/psb_d_cp_elg_from_fmt.F90 b/cuda/impl/psb_d_cp_elg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_d_cp_elg_from_fmt.F90 rename to cuda/impl/psb_d_cp_elg_from_fmt.F90 diff --git a/gpu/impl/psb_d_cp_hdiag_from_coo.F90 b/cuda/impl/psb_d_cp_hdiag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_d_cp_hdiag_from_coo.F90 rename to cuda/impl/psb_d_cp_hdiag_from_coo.F90 diff --git a/gpu/impl/psb_d_cp_hlg_from_coo.F90 b/cuda/impl/psb_d_cp_hlg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_d_cp_hlg_from_coo.F90 rename to cuda/impl/psb_d_cp_hlg_from_coo.F90 diff --git a/gpu/impl/psb_d_cp_hlg_from_fmt.F90 b/cuda/impl/psb_d_cp_hlg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_d_cp_hlg_from_fmt.F90 rename to cuda/impl/psb_d_cp_hlg_from_fmt.F90 diff --git a/gpu/impl/psb_d_cp_hybg_from_coo.F90 b/cuda/impl/psb_d_cp_hybg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_d_cp_hybg_from_coo.F90 rename to cuda/impl/psb_d_cp_hybg_from_coo.F90 diff --git a/gpu/impl/psb_d_cp_hybg_from_fmt.F90 b/cuda/impl/psb_d_cp_hybg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_d_cp_hybg_from_fmt.F90 rename to cuda/impl/psb_d_cp_hybg_from_fmt.F90 diff --git a/gpu/impl/psb_d_csrg_allocate_mnnz.F90 b/cuda/impl/psb_d_csrg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_d_csrg_allocate_mnnz.F90 rename to cuda/impl/psb_d_csrg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_d_csrg_csmm.F90 b/cuda/impl/psb_d_csrg_csmm.F90 similarity index 100% rename from gpu/impl/psb_d_csrg_csmm.F90 rename to cuda/impl/psb_d_csrg_csmm.F90 diff --git a/gpu/impl/psb_d_csrg_csmv.F90 b/cuda/impl/psb_d_csrg_csmv.F90 similarity index 100% rename from gpu/impl/psb_d_csrg_csmv.F90 rename to cuda/impl/psb_d_csrg_csmv.F90 diff --git a/gpu/impl/psb_d_csrg_from_gpu.F90 b/cuda/impl/psb_d_csrg_from_gpu.F90 similarity index 100% rename from gpu/impl/psb_d_csrg_from_gpu.F90 rename to cuda/impl/psb_d_csrg_from_gpu.F90 diff --git a/gpu/impl/psb_d_csrg_inner_vect_sv.F90 b/cuda/impl/psb_d_csrg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_d_csrg_inner_vect_sv.F90 rename to cuda/impl/psb_d_csrg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_d_csrg_mold.F90 b/cuda/impl/psb_d_csrg_mold.F90 similarity index 100% rename from gpu/impl/psb_d_csrg_mold.F90 rename to cuda/impl/psb_d_csrg_mold.F90 diff --git a/gpu/impl/psb_d_csrg_reallocate_nz.F90 b/cuda/impl/psb_d_csrg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_d_csrg_reallocate_nz.F90 rename to cuda/impl/psb_d_csrg_reallocate_nz.F90 diff --git a/gpu/impl/psb_d_csrg_scal.F90 b/cuda/impl/psb_d_csrg_scal.F90 similarity index 100% rename from gpu/impl/psb_d_csrg_scal.F90 rename to cuda/impl/psb_d_csrg_scal.F90 diff --git a/gpu/impl/psb_d_csrg_scals.F90 b/cuda/impl/psb_d_csrg_scals.F90 similarity index 100% rename from gpu/impl/psb_d_csrg_scals.F90 rename to cuda/impl/psb_d_csrg_scals.F90 diff --git a/gpu/impl/psb_d_csrg_to_gpu.F90 b/cuda/impl/psb_d_csrg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_d_csrg_to_gpu.F90 rename to cuda/impl/psb_d_csrg_to_gpu.F90 diff --git a/gpu/impl/psb_d_csrg_vect_mv.F90 b/cuda/impl/psb_d_csrg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_d_csrg_vect_mv.F90 rename to cuda/impl/psb_d_csrg_vect_mv.F90 diff --git a/gpu/impl/psb_d_diag_csmv.F90 b/cuda/impl/psb_d_diag_csmv.F90 similarity index 100% rename from gpu/impl/psb_d_diag_csmv.F90 rename to cuda/impl/psb_d_diag_csmv.F90 diff --git a/gpu/impl/psb_d_diag_mold.F90 b/cuda/impl/psb_d_diag_mold.F90 similarity index 100% rename from gpu/impl/psb_d_diag_mold.F90 rename to cuda/impl/psb_d_diag_mold.F90 diff --git a/gpu/impl/psb_d_diag_to_gpu.F90 b/cuda/impl/psb_d_diag_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_d_diag_to_gpu.F90 rename to cuda/impl/psb_d_diag_to_gpu.F90 diff --git a/gpu/impl/psb_d_diag_vect_mv.F90 b/cuda/impl/psb_d_diag_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_d_diag_vect_mv.F90 rename to cuda/impl/psb_d_diag_vect_mv.F90 diff --git a/gpu/impl/psb_d_dnsg_mat_impl.F90 b/cuda/impl/psb_d_dnsg_mat_impl.F90 similarity index 100% rename from gpu/impl/psb_d_dnsg_mat_impl.F90 rename to cuda/impl/psb_d_dnsg_mat_impl.F90 diff --git a/gpu/impl/psb_d_elg_allocate_mnnz.F90 b/cuda/impl/psb_d_elg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_d_elg_allocate_mnnz.F90 rename to cuda/impl/psb_d_elg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_d_elg_asb.f90 b/cuda/impl/psb_d_elg_asb.f90 similarity index 100% rename from gpu/impl/psb_d_elg_asb.f90 rename to cuda/impl/psb_d_elg_asb.f90 diff --git a/gpu/impl/psb_d_elg_csmm.F90 b/cuda/impl/psb_d_elg_csmm.F90 similarity index 100% rename from gpu/impl/psb_d_elg_csmm.F90 rename to cuda/impl/psb_d_elg_csmm.F90 diff --git a/gpu/impl/psb_d_elg_csmv.F90 b/cuda/impl/psb_d_elg_csmv.F90 similarity index 100% rename from gpu/impl/psb_d_elg_csmv.F90 rename to cuda/impl/psb_d_elg_csmv.F90 diff --git a/gpu/impl/psb_d_elg_csput.F90 b/cuda/impl/psb_d_elg_csput.F90 similarity index 100% rename from gpu/impl/psb_d_elg_csput.F90 rename to cuda/impl/psb_d_elg_csput.F90 diff --git a/gpu/impl/psb_d_elg_from_gpu.F90 b/cuda/impl/psb_d_elg_from_gpu.F90 similarity index 100% rename from gpu/impl/psb_d_elg_from_gpu.F90 rename to cuda/impl/psb_d_elg_from_gpu.F90 diff --git a/gpu/impl/psb_d_elg_inner_vect_sv.F90 b/cuda/impl/psb_d_elg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_d_elg_inner_vect_sv.F90 rename to cuda/impl/psb_d_elg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_d_elg_mold.F90 b/cuda/impl/psb_d_elg_mold.F90 similarity index 100% rename from gpu/impl/psb_d_elg_mold.F90 rename to cuda/impl/psb_d_elg_mold.F90 diff --git a/gpu/impl/psb_d_elg_reallocate_nz.F90 b/cuda/impl/psb_d_elg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_d_elg_reallocate_nz.F90 rename to cuda/impl/psb_d_elg_reallocate_nz.F90 diff --git a/gpu/impl/psb_d_elg_scal.F90 b/cuda/impl/psb_d_elg_scal.F90 similarity index 100% rename from gpu/impl/psb_d_elg_scal.F90 rename to cuda/impl/psb_d_elg_scal.F90 diff --git a/gpu/impl/psb_d_elg_scals.F90 b/cuda/impl/psb_d_elg_scals.F90 similarity index 100% rename from gpu/impl/psb_d_elg_scals.F90 rename to cuda/impl/psb_d_elg_scals.F90 diff --git a/gpu/impl/psb_d_elg_to_gpu.F90 b/cuda/impl/psb_d_elg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_d_elg_to_gpu.F90 rename to cuda/impl/psb_d_elg_to_gpu.F90 diff --git a/gpu/impl/psb_d_elg_trim.f90 b/cuda/impl/psb_d_elg_trim.f90 similarity index 100% rename from gpu/impl/psb_d_elg_trim.f90 rename to cuda/impl/psb_d_elg_trim.f90 diff --git a/gpu/impl/psb_d_elg_vect_mv.F90 b/cuda/impl/psb_d_elg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_d_elg_vect_mv.F90 rename to cuda/impl/psb_d_elg_vect_mv.F90 diff --git a/gpu/impl/psb_d_hdiag_csmv.F90 b/cuda/impl/psb_d_hdiag_csmv.F90 similarity index 100% rename from gpu/impl/psb_d_hdiag_csmv.F90 rename to cuda/impl/psb_d_hdiag_csmv.F90 diff --git a/gpu/impl/psb_d_hdiag_mold.F90 b/cuda/impl/psb_d_hdiag_mold.F90 similarity index 100% rename from gpu/impl/psb_d_hdiag_mold.F90 rename to cuda/impl/psb_d_hdiag_mold.F90 diff --git a/gpu/impl/psb_d_hdiag_to_gpu.F90 b/cuda/impl/psb_d_hdiag_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_d_hdiag_to_gpu.F90 rename to cuda/impl/psb_d_hdiag_to_gpu.F90 diff --git a/gpu/impl/psb_d_hdiag_vect_mv.F90 b/cuda/impl/psb_d_hdiag_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_d_hdiag_vect_mv.F90 rename to cuda/impl/psb_d_hdiag_vect_mv.F90 diff --git a/gpu/impl/psb_d_hlg_allocate_mnnz.F90 b/cuda/impl/psb_d_hlg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_d_hlg_allocate_mnnz.F90 rename to cuda/impl/psb_d_hlg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_d_hlg_csmm.F90 b/cuda/impl/psb_d_hlg_csmm.F90 similarity index 100% rename from gpu/impl/psb_d_hlg_csmm.F90 rename to cuda/impl/psb_d_hlg_csmm.F90 diff --git a/gpu/impl/psb_d_hlg_csmv.F90 b/cuda/impl/psb_d_hlg_csmv.F90 similarity index 100% rename from gpu/impl/psb_d_hlg_csmv.F90 rename to cuda/impl/psb_d_hlg_csmv.F90 diff --git a/gpu/impl/psb_d_hlg_from_gpu.F90 b/cuda/impl/psb_d_hlg_from_gpu.F90 similarity index 100% rename from gpu/impl/psb_d_hlg_from_gpu.F90 rename to cuda/impl/psb_d_hlg_from_gpu.F90 diff --git a/gpu/impl/psb_d_hlg_inner_vect_sv.F90 b/cuda/impl/psb_d_hlg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_d_hlg_inner_vect_sv.F90 rename to cuda/impl/psb_d_hlg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_d_hlg_mold.F90 b/cuda/impl/psb_d_hlg_mold.F90 similarity index 100% rename from gpu/impl/psb_d_hlg_mold.F90 rename to cuda/impl/psb_d_hlg_mold.F90 diff --git a/gpu/impl/psb_d_hlg_reallocate_nz.F90 b/cuda/impl/psb_d_hlg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_d_hlg_reallocate_nz.F90 rename to cuda/impl/psb_d_hlg_reallocate_nz.F90 diff --git a/gpu/impl/psb_d_hlg_scal.F90 b/cuda/impl/psb_d_hlg_scal.F90 similarity index 100% rename from gpu/impl/psb_d_hlg_scal.F90 rename to cuda/impl/psb_d_hlg_scal.F90 diff --git a/gpu/impl/psb_d_hlg_scals.F90 b/cuda/impl/psb_d_hlg_scals.F90 similarity index 100% rename from gpu/impl/psb_d_hlg_scals.F90 rename to cuda/impl/psb_d_hlg_scals.F90 diff --git a/gpu/impl/psb_d_hlg_to_gpu.F90 b/cuda/impl/psb_d_hlg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_d_hlg_to_gpu.F90 rename to cuda/impl/psb_d_hlg_to_gpu.F90 diff --git a/gpu/impl/psb_d_hlg_vect_mv.F90 b/cuda/impl/psb_d_hlg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_d_hlg_vect_mv.F90 rename to cuda/impl/psb_d_hlg_vect_mv.F90 diff --git a/gpu/impl/psb_d_hybg_allocate_mnnz.F90 b/cuda/impl/psb_d_hybg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_d_hybg_allocate_mnnz.F90 rename to cuda/impl/psb_d_hybg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_d_hybg_csmm.F90 b/cuda/impl/psb_d_hybg_csmm.F90 similarity index 100% rename from gpu/impl/psb_d_hybg_csmm.F90 rename to cuda/impl/psb_d_hybg_csmm.F90 diff --git a/gpu/impl/psb_d_hybg_csmv.F90 b/cuda/impl/psb_d_hybg_csmv.F90 similarity index 100% rename from gpu/impl/psb_d_hybg_csmv.F90 rename to cuda/impl/psb_d_hybg_csmv.F90 diff --git a/gpu/impl/psb_d_hybg_inner_vect_sv.F90 b/cuda/impl/psb_d_hybg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_d_hybg_inner_vect_sv.F90 rename to cuda/impl/psb_d_hybg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_d_hybg_mold.F90 b/cuda/impl/psb_d_hybg_mold.F90 similarity index 100% rename from gpu/impl/psb_d_hybg_mold.F90 rename to cuda/impl/psb_d_hybg_mold.F90 diff --git a/gpu/impl/psb_d_hybg_reallocate_nz.F90 b/cuda/impl/psb_d_hybg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_d_hybg_reallocate_nz.F90 rename to cuda/impl/psb_d_hybg_reallocate_nz.F90 diff --git a/gpu/impl/psb_d_hybg_scal.F90 b/cuda/impl/psb_d_hybg_scal.F90 similarity index 100% rename from gpu/impl/psb_d_hybg_scal.F90 rename to cuda/impl/psb_d_hybg_scal.F90 diff --git a/gpu/impl/psb_d_hybg_scals.F90 b/cuda/impl/psb_d_hybg_scals.F90 similarity index 100% rename from gpu/impl/psb_d_hybg_scals.F90 rename to cuda/impl/psb_d_hybg_scals.F90 diff --git a/gpu/impl/psb_d_hybg_to_gpu.F90 b/cuda/impl/psb_d_hybg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_d_hybg_to_gpu.F90 rename to cuda/impl/psb_d_hybg_to_gpu.F90 diff --git a/gpu/impl/psb_d_hybg_vect_mv.F90 b/cuda/impl/psb_d_hybg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_d_hybg_vect_mv.F90 rename to cuda/impl/psb_d_hybg_vect_mv.F90 diff --git a/gpu/impl/psb_d_mv_csrg_from_coo.F90 b/cuda/impl/psb_d_mv_csrg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_d_mv_csrg_from_coo.F90 rename to cuda/impl/psb_d_mv_csrg_from_coo.F90 diff --git a/gpu/impl/psb_d_mv_csrg_from_fmt.F90 b/cuda/impl/psb_d_mv_csrg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_d_mv_csrg_from_fmt.F90 rename to cuda/impl/psb_d_mv_csrg_from_fmt.F90 diff --git a/gpu/impl/psb_d_mv_diag_from_coo.F90 b/cuda/impl/psb_d_mv_diag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_d_mv_diag_from_coo.F90 rename to cuda/impl/psb_d_mv_diag_from_coo.F90 diff --git a/gpu/impl/psb_d_mv_elg_from_coo.F90 b/cuda/impl/psb_d_mv_elg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_d_mv_elg_from_coo.F90 rename to cuda/impl/psb_d_mv_elg_from_coo.F90 diff --git a/gpu/impl/psb_d_mv_elg_from_fmt.F90 b/cuda/impl/psb_d_mv_elg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_d_mv_elg_from_fmt.F90 rename to cuda/impl/psb_d_mv_elg_from_fmt.F90 diff --git a/gpu/impl/psb_d_mv_hdiag_from_coo.F90 b/cuda/impl/psb_d_mv_hdiag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_d_mv_hdiag_from_coo.F90 rename to cuda/impl/psb_d_mv_hdiag_from_coo.F90 diff --git a/gpu/impl/psb_d_mv_hlg_from_coo.F90 b/cuda/impl/psb_d_mv_hlg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_d_mv_hlg_from_coo.F90 rename to cuda/impl/psb_d_mv_hlg_from_coo.F90 diff --git a/gpu/impl/psb_d_mv_hlg_from_fmt.F90 b/cuda/impl/psb_d_mv_hlg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_d_mv_hlg_from_fmt.F90 rename to cuda/impl/psb_d_mv_hlg_from_fmt.F90 diff --git a/gpu/impl/psb_d_mv_hybg_from_coo.F90 b/cuda/impl/psb_d_mv_hybg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_d_mv_hybg_from_coo.F90 rename to cuda/impl/psb_d_mv_hybg_from_coo.F90 diff --git a/gpu/impl/psb_d_mv_hybg_from_fmt.F90 b/cuda/impl/psb_d_mv_hybg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_d_mv_hybg_from_fmt.F90 rename to cuda/impl/psb_d_mv_hybg_from_fmt.F90 diff --git a/gpu/impl/psb_s_cp_csrg_from_coo.F90 b/cuda/impl/psb_s_cp_csrg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_s_cp_csrg_from_coo.F90 rename to cuda/impl/psb_s_cp_csrg_from_coo.F90 diff --git a/gpu/impl/psb_s_cp_csrg_from_fmt.F90 b/cuda/impl/psb_s_cp_csrg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_s_cp_csrg_from_fmt.F90 rename to cuda/impl/psb_s_cp_csrg_from_fmt.F90 diff --git a/gpu/impl/psb_s_cp_diag_from_coo.F90 b/cuda/impl/psb_s_cp_diag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_s_cp_diag_from_coo.F90 rename to cuda/impl/psb_s_cp_diag_from_coo.F90 diff --git a/gpu/impl/psb_s_cp_elg_from_coo.F90 b/cuda/impl/psb_s_cp_elg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_s_cp_elg_from_coo.F90 rename to cuda/impl/psb_s_cp_elg_from_coo.F90 diff --git a/gpu/impl/psb_s_cp_elg_from_fmt.F90 b/cuda/impl/psb_s_cp_elg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_s_cp_elg_from_fmt.F90 rename to cuda/impl/psb_s_cp_elg_from_fmt.F90 diff --git a/gpu/impl/psb_s_cp_hdiag_from_coo.F90 b/cuda/impl/psb_s_cp_hdiag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_s_cp_hdiag_from_coo.F90 rename to cuda/impl/psb_s_cp_hdiag_from_coo.F90 diff --git a/gpu/impl/psb_s_cp_hlg_from_coo.F90 b/cuda/impl/psb_s_cp_hlg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_s_cp_hlg_from_coo.F90 rename to cuda/impl/psb_s_cp_hlg_from_coo.F90 diff --git a/gpu/impl/psb_s_cp_hlg_from_fmt.F90 b/cuda/impl/psb_s_cp_hlg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_s_cp_hlg_from_fmt.F90 rename to cuda/impl/psb_s_cp_hlg_from_fmt.F90 diff --git a/gpu/impl/psb_s_cp_hybg_from_coo.F90 b/cuda/impl/psb_s_cp_hybg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_s_cp_hybg_from_coo.F90 rename to cuda/impl/psb_s_cp_hybg_from_coo.F90 diff --git a/gpu/impl/psb_s_cp_hybg_from_fmt.F90 b/cuda/impl/psb_s_cp_hybg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_s_cp_hybg_from_fmt.F90 rename to cuda/impl/psb_s_cp_hybg_from_fmt.F90 diff --git a/gpu/impl/psb_s_csrg_allocate_mnnz.F90 b/cuda/impl/psb_s_csrg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_s_csrg_allocate_mnnz.F90 rename to cuda/impl/psb_s_csrg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_s_csrg_csmm.F90 b/cuda/impl/psb_s_csrg_csmm.F90 similarity index 100% rename from gpu/impl/psb_s_csrg_csmm.F90 rename to cuda/impl/psb_s_csrg_csmm.F90 diff --git a/gpu/impl/psb_s_csrg_csmv.F90 b/cuda/impl/psb_s_csrg_csmv.F90 similarity index 100% rename from gpu/impl/psb_s_csrg_csmv.F90 rename to cuda/impl/psb_s_csrg_csmv.F90 diff --git a/gpu/impl/psb_s_csrg_from_gpu.F90 b/cuda/impl/psb_s_csrg_from_gpu.F90 similarity index 100% rename from gpu/impl/psb_s_csrg_from_gpu.F90 rename to cuda/impl/psb_s_csrg_from_gpu.F90 diff --git a/gpu/impl/psb_s_csrg_inner_vect_sv.F90 b/cuda/impl/psb_s_csrg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_s_csrg_inner_vect_sv.F90 rename to cuda/impl/psb_s_csrg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_s_csrg_mold.F90 b/cuda/impl/psb_s_csrg_mold.F90 similarity index 100% rename from gpu/impl/psb_s_csrg_mold.F90 rename to cuda/impl/psb_s_csrg_mold.F90 diff --git a/gpu/impl/psb_s_csrg_reallocate_nz.F90 b/cuda/impl/psb_s_csrg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_s_csrg_reallocate_nz.F90 rename to cuda/impl/psb_s_csrg_reallocate_nz.F90 diff --git a/gpu/impl/psb_s_csrg_scal.F90 b/cuda/impl/psb_s_csrg_scal.F90 similarity index 100% rename from gpu/impl/psb_s_csrg_scal.F90 rename to cuda/impl/psb_s_csrg_scal.F90 diff --git a/gpu/impl/psb_s_csrg_scals.F90 b/cuda/impl/psb_s_csrg_scals.F90 similarity index 100% rename from gpu/impl/psb_s_csrg_scals.F90 rename to cuda/impl/psb_s_csrg_scals.F90 diff --git a/gpu/impl/psb_s_csrg_to_gpu.F90 b/cuda/impl/psb_s_csrg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_s_csrg_to_gpu.F90 rename to cuda/impl/psb_s_csrg_to_gpu.F90 diff --git a/gpu/impl/psb_s_csrg_vect_mv.F90 b/cuda/impl/psb_s_csrg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_s_csrg_vect_mv.F90 rename to cuda/impl/psb_s_csrg_vect_mv.F90 diff --git a/gpu/impl/psb_s_diag_csmv.F90 b/cuda/impl/psb_s_diag_csmv.F90 similarity index 100% rename from gpu/impl/psb_s_diag_csmv.F90 rename to cuda/impl/psb_s_diag_csmv.F90 diff --git a/gpu/impl/psb_s_diag_mold.F90 b/cuda/impl/psb_s_diag_mold.F90 similarity index 100% rename from gpu/impl/psb_s_diag_mold.F90 rename to cuda/impl/psb_s_diag_mold.F90 diff --git a/gpu/impl/psb_s_diag_to_gpu.F90 b/cuda/impl/psb_s_diag_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_s_diag_to_gpu.F90 rename to cuda/impl/psb_s_diag_to_gpu.F90 diff --git a/gpu/impl/psb_s_diag_vect_mv.F90 b/cuda/impl/psb_s_diag_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_s_diag_vect_mv.F90 rename to cuda/impl/psb_s_diag_vect_mv.F90 diff --git a/gpu/impl/psb_s_dnsg_mat_impl.F90 b/cuda/impl/psb_s_dnsg_mat_impl.F90 similarity index 100% rename from gpu/impl/psb_s_dnsg_mat_impl.F90 rename to cuda/impl/psb_s_dnsg_mat_impl.F90 diff --git a/gpu/impl/psb_s_elg_allocate_mnnz.F90 b/cuda/impl/psb_s_elg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_s_elg_allocate_mnnz.F90 rename to cuda/impl/psb_s_elg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_s_elg_asb.f90 b/cuda/impl/psb_s_elg_asb.f90 similarity index 100% rename from gpu/impl/psb_s_elg_asb.f90 rename to cuda/impl/psb_s_elg_asb.f90 diff --git a/gpu/impl/psb_s_elg_csmm.F90 b/cuda/impl/psb_s_elg_csmm.F90 similarity index 100% rename from gpu/impl/psb_s_elg_csmm.F90 rename to cuda/impl/psb_s_elg_csmm.F90 diff --git a/gpu/impl/psb_s_elg_csmv.F90 b/cuda/impl/psb_s_elg_csmv.F90 similarity index 100% rename from gpu/impl/psb_s_elg_csmv.F90 rename to cuda/impl/psb_s_elg_csmv.F90 diff --git a/gpu/impl/psb_s_elg_csput.F90 b/cuda/impl/psb_s_elg_csput.F90 similarity index 100% rename from gpu/impl/psb_s_elg_csput.F90 rename to cuda/impl/psb_s_elg_csput.F90 diff --git a/gpu/impl/psb_s_elg_from_gpu.F90 b/cuda/impl/psb_s_elg_from_gpu.F90 similarity index 100% rename from gpu/impl/psb_s_elg_from_gpu.F90 rename to cuda/impl/psb_s_elg_from_gpu.F90 diff --git a/gpu/impl/psb_s_elg_inner_vect_sv.F90 b/cuda/impl/psb_s_elg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_s_elg_inner_vect_sv.F90 rename to cuda/impl/psb_s_elg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_s_elg_mold.F90 b/cuda/impl/psb_s_elg_mold.F90 similarity index 100% rename from gpu/impl/psb_s_elg_mold.F90 rename to cuda/impl/psb_s_elg_mold.F90 diff --git a/gpu/impl/psb_s_elg_reallocate_nz.F90 b/cuda/impl/psb_s_elg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_s_elg_reallocate_nz.F90 rename to cuda/impl/psb_s_elg_reallocate_nz.F90 diff --git a/gpu/impl/psb_s_elg_scal.F90 b/cuda/impl/psb_s_elg_scal.F90 similarity index 100% rename from gpu/impl/psb_s_elg_scal.F90 rename to cuda/impl/psb_s_elg_scal.F90 diff --git a/gpu/impl/psb_s_elg_scals.F90 b/cuda/impl/psb_s_elg_scals.F90 similarity index 100% rename from gpu/impl/psb_s_elg_scals.F90 rename to cuda/impl/psb_s_elg_scals.F90 diff --git a/gpu/impl/psb_s_elg_to_gpu.F90 b/cuda/impl/psb_s_elg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_s_elg_to_gpu.F90 rename to cuda/impl/psb_s_elg_to_gpu.F90 diff --git a/gpu/impl/psb_s_elg_trim.f90 b/cuda/impl/psb_s_elg_trim.f90 similarity index 100% rename from gpu/impl/psb_s_elg_trim.f90 rename to cuda/impl/psb_s_elg_trim.f90 diff --git a/gpu/impl/psb_s_elg_vect_mv.F90 b/cuda/impl/psb_s_elg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_s_elg_vect_mv.F90 rename to cuda/impl/psb_s_elg_vect_mv.F90 diff --git a/gpu/impl/psb_s_hdiag_csmv.F90 b/cuda/impl/psb_s_hdiag_csmv.F90 similarity index 100% rename from gpu/impl/psb_s_hdiag_csmv.F90 rename to cuda/impl/psb_s_hdiag_csmv.F90 diff --git a/gpu/impl/psb_s_hdiag_mold.F90 b/cuda/impl/psb_s_hdiag_mold.F90 similarity index 100% rename from gpu/impl/psb_s_hdiag_mold.F90 rename to cuda/impl/psb_s_hdiag_mold.F90 diff --git a/gpu/impl/psb_s_hdiag_to_gpu.F90 b/cuda/impl/psb_s_hdiag_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_s_hdiag_to_gpu.F90 rename to cuda/impl/psb_s_hdiag_to_gpu.F90 diff --git a/gpu/impl/psb_s_hdiag_vect_mv.F90 b/cuda/impl/psb_s_hdiag_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_s_hdiag_vect_mv.F90 rename to cuda/impl/psb_s_hdiag_vect_mv.F90 diff --git a/gpu/impl/psb_s_hlg_allocate_mnnz.F90 b/cuda/impl/psb_s_hlg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_s_hlg_allocate_mnnz.F90 rename to cuda/impl/psb_s_hlg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_s_hlg_csmm.F90 b/cuda/impl/psb_s_hlg_csmm.F90 similarity index 100% rename from gpu/impl/psb_s_hlg_csmm.F90 rename to cuda/impl/psb_s_hlg_csmm.F90 diff --git a/gpu/impl/psb_s_hlg_csmv.F90 b/cuda/impl/psb_s_hlg_csmv.F90 similarity index 100% rename from gpu/impl/psb_s_hlg_csmv.F90 rename to cuda/impl/psb_s_hlg_csmv.F90 diff --git a/gpu/impl/psb_s_hlg_from_gpu.F90 b/cuda/impl/psb_s_hlg_from_gpu.F90 similarity index 100% rename from gpu/impl/psb_s_hlg_from_gpu.F90 rename to cuda/impl/psb_s_hlg_from_gpu.F90 diff --git a/gpu/impl/psb_s_hlg_inner_vect_sv.F90 b/cuda/impl/psb_s_hlg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_s_hlg_inner_vect_sv.F90 rename to cuda/impl/psb_s_hlg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_s_hlg_mold.F90 b/cuda/impl/psb_s_hlg_mold.F90 similarity index 100% rename from gpu/impl/psb_s_hlg_mold.F90 rename to cuda/impl/psb_s_hlg_mold.F90 diff --git a/gpu/impl/psb_s_hlg_reallocate_nz.F90 b/cuda/impl/psb_s_hlg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_s_hlg_reallocate_nz.F90 rename to cuda/impl/psb_s_hlg_reallocate_nz.F90 diff --git a/gpu/impl/psb_s_hlg_scal.F90 b/cuda/impl/psb_s_hlg_scal.F90 similarity index 100% rename from gpu/impl/psb_s_hlg_scal.F90 rename to cuda/impl/psb_s_hlg_scal.F90 diff --git a/gpu/impl/psb_s_hlg_scals.F90 b/cuda/impl/psb_s_hlg_scals.F90 similarity index 100% rename from gpu/impl/psb_s_hlg_scals.F90 rename to cuda/impl/psb_s_hlg_scals.F90 diff --git a/gpu/impl/psb_s_hlg_to_gpu.F90 b/cuda/impl/psb_s_hlg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_s_hlg_to_gpu.F90 rename to cuda/impl/psb_s_hlg_to_gpu.F90 diff --git a/gpu/impl/psb_s_hlg_vect_mv.F90 b/cuda/impl/psb_s_hlg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_s_hlg_vect_mv.F90 rename to cuda/impl/psb_s_hlg_vect_mv.F90 diff --git a/gpu/impl/psb_s_hybg_allocate_mnnz.F90 b/cuda/impl/psb_s_hybg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_s_hybg_allocate_mnnz.F90 rename to cuda/impl/psb_s_hybg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_s_hybg_csmm.F90 b/cuda/impl/psb_s_hybg_csmm.F90 similarity index 100% rename from gpu/impl/psb_s_hybg_csmm.F90 rename to cuda/impl/psb_s_hybg_csmm.F90 diff --git a/gpu/impl/psb_s_hybg_csmv.F90 b/cuda/impl/psb_s_hybg_csmv.F90 similarity index 100% rename from gpu/impl/psb_s_hybg_csmv.F90 rename to cuda/impl/psb_s_hybg_csmv.F90 diff --git a/gpu/impl/psb_s_hybg_inner_vect_sv.F90 b/cuda/impl/psb_s_hybg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_s_hybg_inner_vect_sv.F90 rename to cuda/impl/psb_s_hybg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_s_hybg_mold.F90 b/cuda/impl/psb_s_hybg_mold.F90 similarity index 100% rename from gpu/impl/psb_s_hybg_mold.F90 rename to cuda/impl/psb_s_hybg_mold.F90 diff --git a/gpu/impl/psb_s_hybg_reallocate_nz.F90 b/cuda/impl/psb_s_hybg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_s_hybg_reallocate_nz.F90 rename to cuda/impl/psb_s_hybg_reallocate_nz.F90 diff --git a/gpu/impl/psb_s_hybg_scal.F90 b/cuda/impl/psb_s_hybg_scal.F90 similarity index 100% rename from gpu/impl/psb_s_hybg_scal.F90 rename to cuda/impl/psb_s_hybg_scal.F90 diff --git a/gpu/impl/psb_s_hybg_scals.F90 b/cuda/impl/psb_s_hybg_scals.F90 similarity index 100% rename from gpu/impl/psb_s_hybg_scals.F90 rename to cuda/impl/psb_s_hybg_scals.F90 diff --git a/gpu/impl/psb_s_hybg_to_gpu.F90 b/cuda/impl/psb_s_hybg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_s_hybg_to_gpu.F90 rename to cuda/impl/psb_s_hybg_to_gpu.F90 diff --git a/gpu/impl/psb_s_hybg_vect_mv.F90 b/cuda/impl/psb_s_hybg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_s_hybg_vect_mv.F90 rename to cuda/impl/psb_s_hybg_vect_mv.F90 diff --git a/gpu/impl/psb_s_mv_csrg_from_coo.F90 b/cuda/impl/psb_s_mv_csrg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_s_mv_csrg_from_coo.F90 rename to cuda/impl/psb_s_mv_csrg_from_coo.F90 diff --git a/gpu/impl/psb_s_mv_csrg_from_fmt.F90 b/cuda/impl/psb_s_mv_csrg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_s_mv_csrg_from_fmt.F90 rename to cuda/impl/psb_s_mv_csrg_from_fmt.F90 diff --git a/gpu/impl/psb_s_mv_diag_from_coo.F90 b/cuda/impl/psb_s_mv_diag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_s_mv_diag_from_coo.F90 rename to cuda/impl/psb_s_mv_diag_from_coo.F90 diff --git a/gpu/impl/psb_s_mv_elg_from_coo.F90 b/cuda/impl/psb_s_mv_elg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_s_mv_elg_from_coo.F90 rename to cuda/impl/psb_s_mv_elg_from_coo.F90 diff --git a/gpu/impl/psb_s_mv_elg_from_fmt.F90 b/cuda/impl/psb_s_mv_elg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_s_mv_elg_from_fmt.F90 rename to cuda/impl/psb_s_mv_elg_from_fmt.F90 diff --git a/gpu/impl/psb_s_mv_hdiag_from_coo.F90 b/cuda/impl/psb_s_mv_hdiag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_s_mv_hdiag_from_coo.F90 rename to cuda/impl/psb_s_mv_hdiag_from_coo.F90 diff --git a/gpu/impl/psb_s_mv_hlg_from_coo.F90 b/cuda/impl/psb_s_mv_hlg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_s_mv_hlg_from_coo.F90 rename to cuda/impl/psb_s_mv_hlg_from_coo.F90 diff --git a/gpu/impl/psb_s_mv_hlg_from_fmt.F90 b/cuda/impl/psb_s_mv_hlg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_s_mv_hlg_from_fmt.F90 rename to cuda/impl/psb_s_mv_hlg_from_fmt.F90 diff --git a/gpu/impl/psb_s_mv_hybg_from_coo.F90 b/cuda/impl/psb_s_mv_hybg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_s_mv_hybg_from_coo.F90 rename to cuda/impl/psb_s_mv_hybg_from_coo.F90 diff --git a/gpu/impl/psb_s_mv_hybg_from_fmt.F90 b/cuda/impl/psb_s_mv_hybg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_s_mv_hybg_from_fmt.F90 rename to cuda/impl/psb_s_mv_hybg_from_fmt.F90 diff --git a/gpu/impl/psb_z_cp_csrg_from_coo.F90 b/cuda/impl/psb_z_cp_csrg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_z_cp_csrg_from_coo.F90 rename to cuda/impl/psb_z_cp_csrg_from_coo.F90 diff --git a/gpu/impl/psb_z_cp_csrg_from_fmt.F90 b/cuda/impl/psb_z_cp_csrg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_z_cp_csrg_from_fmt.F90 rename to cuda/impl/psb_z_cp_csrg_from_fmt.F90 diff --git a/gpu/impl/psb_z_cp_diag_from_coo.F90 b/cuda/impl/psb_z_cp_diag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_z_cp_diag_from_coo.F90 rename to cuda/impl/psb_z_cp_diag_from_coo.F90 diff --git a/gpu/impl/psb_z_cp_elg_from_coo.F90 b/cuda/impl/psb_z_cp_elg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_z_cp_elg_from_coo.F90 rename to cuda/impl/psb_z_cp_elg_from_coo.F90 diff --git a/gpu/impl/psb_z_cp_elg_from_fmt.F90 b/cuda/impl/psb_z_cp_elg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_z_cp_elg_from_fmt.F90 rename to cuda/impl/psb_z_cp_elg_from_fmt.F90 diff --git a/gpu/impl/psb_z_cp_hdiag_from_coo.F90 b/cuda/impl/psb_z_cp_hdiag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_z_cp_hdiag_from_coo.F90 rename to cuda/impl/psb_z_cp_hdiag_from_coo.F90 diff --git a/gpu/impl/psb_z_cp_hlg_from_coo.F90 b/cuda/impl/psb_z_cp_hlg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_z_cp_hlg_from_coo.F90 rename to cuda/impl/psb_z_cp_hlg_from_coo.F90 diff --git a/gpu/impl/psb_z_cp_hlg_from_fmt.F90 b/cuda/impl/psb_z_cp_hlg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_z_cp_hlg_from_fmt.F90 rename to cuda/impl/psb_z_cp_hlg_from_fmt.F90 diff --git a/gpu/impl/psb_z_cp_hybg_from_coo.F90 b/cuda/impl/psb_z_cp_hybg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_z_cp_hybg_from_coo.F90 rename to cuda/impl/psb_z_cp_hybg_from_coo.F90 diff --git a/gpu/impl/psb_z_cp_hybg_from_fmt.F90 b/cuda/impl/psb_z_cp_hybg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_z_cp_hybg_from_fmt.F90 rename to cuda/impl/psb_z_cp_hybg_from_fmt.F90 diff --git a/gpu/impl/psb_z_csrg_allocate_mnnz.F90 b/cuda/impl/psb_z_csrg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_z_csrg_allocate_mnnz.F90 rename to cuda/impl/psb_z_csrg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_z_csrg_csmm.F90 b/cuda/impl/psb_z_csrg_csmm.F90 similarity index 100% rename from gpu/impl/psb_z_csrg_csmm.F90 rename to cuda/impl/psb_z_csrg_csmm.F90 diff --git a/gpu/impl/psb_z_csrg_csmv.F90 b/cuda/impl/psb_z_csrg_csmv.F90 similarity index 100% rename from gpu/impl/psb_z_csrg_csmv.F90 rename to cuda/impl/psb_z_csrg_csmv.F90 diff --git a/gpu/impl/psb_z_csrg_from_gpu.F90 b/cuda/impl/psb_z_csrg_from_gpu.F90 similarity index 100% rename from gpu/impl/psb_z_csrg_from_gpu.F90 rename to cuda/impl/psb_z_csrg_from_gpu.F90 diff --git a/gpu/impl/psb_z_csrg_inner_vect_sv.F90 b/cuda/impl/psb_z_csrg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_z_csrg_inner_vect_sv.F90 rename to cuda/impl/psb_z_csrg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_z_csrg_mold.F90 b/cuda/impl/psb_z_csrg_mold.F90 similarity index 100% rename from gpu/impl/psb_z_csrg_mold.F90 rename to cuda/impl/psb_z_csrg_mold.F90 diff --git a/gpu/impl/psb_z_csrg_reallocate_nz.F90 b/cuda/impl/psb_z_csrg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_z_csrg_reallocate_nz.F90 rename to cuda/impl/psb_z_csrg_reallocate_nz.F90 diff --git a/gpu/impl/psb_z_csrg_scal.F90 b/cuda/impl/psb_z_csrg_scal.F90 similarity index 100% rename from gpu/impl/psb_z_csrg_scal.F90 rename to cuda/impl/psb_z_csrg_scal.F90 diff --git a/gpu/impl/psb_z_csrg_scals.F90 b/cuda/impl/psb_z_csrg_scals.F90 similarity index 100% rename from gpu/impl/psb_z_csrg_scals.F90 rename to cuda/impl/psb_z_csrg_scals.F90 diff --git a/gpu/impl/psb_z_csrg_to_gpu.F90 b/cuda/impl/psb_z_csrg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_z_csrg_to_gpu.F90 rename to cuda/impl/psb_z_csrg_to_gpu.F90 diff --git a/gpu/impl/psb_z_csrg_vect_mv.F90 b/cuda/impl/psb_z_csrg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_z_csrg_vect_mv.F90 rename to cuda/impl/psb_z_csrg_vect_mv.F90 diff --git a/gpu/impl/psb_z_diag_csmv.F90 b/cuda/impl/psb_z_diag_csmv.F90 similarity index 100% rename from gpu/impl/psb_z_diag_csmv.F90 rename to cuda/impl/psb_z_diag_csmv.F90 diff --git a/gpu/impl/psb_z_diag_mold.F90 b/cuda/impl/psb_z_diag_mold.F90 similarity index 100% rename from gpu/impl/psb_z_diag_mold.F90 rename to cuda/impl/psb_z_diag_mold.F90 diff --git a/gpu/impl/psb_z_diag_to_gpu.F90 b/cuda/impl/psb_z_diag_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_z_diag_to_gpu.F90 rename to cuda/impl/psb_z_diag_to_gpu.F90 diff --git a/gpu/impl/psb_z_diag_vect_mv.F90 b/cuda/impl/psb_z_diag_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_z_diag_vect_mv.F90 rename to cuda/impl/psb_z_diag_vect_mv.F90 diff --git a/gpu/impl/psb_z_dnsg_mat_impl.F90 b/cuda/impl/psb_z_dnsg_mat_impl.F90 similarity index 100% rename from gpu/impl/psb_z_dnsg_mat_impl.F90 rename to cuda/impl/psb_z_dnsg_mat_impl.F90 diff --git a/gpu/impl/psb_z_elg_allocate_mnnz.F90 b/cuda/impl/psb_z_elg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_z_elg_allocate_mnnz.F90 rename to cuda/impl/psb_z_elg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_z_elg_asb.f90 b/cuda/impl/psb_z_elg_asb.f90 similarity index 100% rename from gpu/impl/psb_z_elg_asb.f90 rename to cuda/impl/psb_z_elg_asb.f90 diff --git a/gpu/impl/psb_z_elg_csmm.F90 b/cuda/impl/psb_z_elg_csmm.F90 similarity index 100% rename from gpu/impl/psb_z_elg_csmm.F90 rename to cuda/impl/psb_z_elg_csmm.F90 diff --git a/gpu/impl/psb_z_elg_csmv.F90 b/cuda/impl/psb_z_elg_csmv.F90 similarity index 100% rename from gpu/impl/psb_z_elg_csmv.F90 rename to cuda/impl/psb_z_elg_csmv.F90 diff --git a/gpu/impl/psb_z_elg_csput.F90 b/cuda/impl/psb_z_elg_csput.F90 similarity index 100% rename from gpu/impl/psb_z_elg_csput.F90 rename to cuda/impl/psb_z_elg_csput.F90 diff --git a/gpu/impl/psb_z_elg_from_gpu.F90 b/cuda/impl/psb_z_elg_from_gpu.F90 similarity index 100% rename from gpu/impl/psb_z_elg_from_gpu.F90 rename to cuda/impl/psb_z_elg_from_gpu.F90 diff --git a/gpu/impl/psb_z_elg_inner_vect_sv.F90 b/cuda/impl/psb_z_elg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_z_elg_inner_vect_sv.F90 rename to cuda/impl/psb_z_elg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_z_elg_mold.F90 b/cuda/impl/psb_z_elg_mold.F90 similarity index 100% rename from gpu/impl/psb_z_elg_mold.F90 rename to cuda/impl/psb_z_elg_mold.F90 diff --git a/gpu/impl/psb_z_elg_reallocate_nz.F90 b/cuda/impl/psb_z_elg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_z_elg_reallocate_nz.F90 rename to cuda/impl/psb_z_elg_reallocate_nz.F90 diff --git a/gpu/impl/psb_z_elg_scal.F90 b/cuda/impl/psb_z_elg_scal.F90 similarity index 100% rename from gpu/impl/psb_z_elg_scal.F90 rename to cuda/impl/psb_z_elg_scal.F90 diff --git a/gpu/impl/psb_z_elg_scals.F90 b/cuda/impl/psb_z_elg_scals.F90 similarity index 100% rename from gpu/impl/psb_z_elg_scals.F90 rename to cuda/impl/psb_z_elg_scals.F90 diff --git a/gpu/impl/psb_z_elg_to_gpu.F90 b/cuda/impl/psb_z_elg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_z_elg_to_gpu.F90 rename to cuda/impl/psb_z_elg_to_gpu.F90 diff --git a/gpu/impl/psb_z_elg_trim.f90 b/cuda/impl/psb_z_elg_trim.f90 similarity index 100% rename from gpu/impl/psb_z_elg_trim.f90 rename to cuda/impl/psb_z_elg_trim.f90 diff --git a/gpu/impl/psb_z_elg_vect_mv.F90 b/cuda/impl/psb_z_elg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_z_elg_vect_mv.F90 rename to cuda/impl/psb_z_elg_vect_mv.F90 diff --git a/gpu/impl/psb_z_hdiag_csmv.F90 b/cuda/impl/psb_z_hdiag_csmv.F90 similarity index 100% rename from gpu/impl/psb_z_hdiag_csmv.F90 rename to cuda/impl/psb_z_hdiag_csmv.F90 diff --git a/gpu/impl/psb_z_hdiag_mold.F90 b/cuda/impl/psb_z_hdiag_mold.F90 similarity index 100% rename from gpu/impl/psb_z_hdiag_mold.F90 rename to cuda/impl/psb_z_hdiag_mold.F90 diff --git a/gpu/impl/psb_z_hdiag_to_gpu.F90 b/cuda/impl/psb_z_hdiag_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_z_hdiag_to_gpu.F90 rename to cuda/impl/psb_z_hdiag_to_gpu.F90 diff --git a/gpu/impl/psb_z_hdiag_vect_mv.F90 b/cuda/impl/psb_z_hdiag_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_z_hdiag_vect_mv.F90 rename to cuda/impl/psb_z_hdiag_vect_mv.F90 diff --git a/gpu/impl/psb_z_hlg_allocate_mnnz.F90 b/cuda/impl/psb_z_hlg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_z_hlg_allocate_mnnz.F90 rename to cuda/impl/psb_z_hlg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_z_hlg_csmm.F90 b/cuda/impl/psb_z_hlg_csmm.F90 similarity index 100% rename from gpu/impl/psb_z_hlg_csmm.F90 rename to cuda/impl/psb_z_hlg_csmm.F90 diff --git a/gpu/impl/psb_z_hlg_csmv.F90 b/cuda/impl/psb_z_hlg_csmv.F90 similarity index 100% rename from gpu/impl/psb_z_hlg_csmv.F90 rename to cuda/impl/psb_z_hlg_csmv.F90 diff --git a/gpu/impl/psb_z_hlg_from_gpu.F90 b/cuda/impl/psb_z_hlg_from_gpu.F90 similarity index 100% rename from gpu/impl/psb_z_hlg_from_gpu.F90 rename to cuda/impl/psb_z_hlg_from_gpu.F90 diff --git a/gpu/impl/psb_z_hlg_inner_vect_sv.F90 b/cuda/impl/psb_z_hlg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_z_hlg_inner_vect_sv.F90 rename to cuda/impl/psb_z_hlg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_z_hlg_mold.F90 b/cuda/impl/psb_z_hlg_mold.F90 similarity index 100% rename from gpu/impl/psb_z_hlg_mold.F90 rename to cuda/impl/psb_z_hlg_mold.F90 diff --git a/gpu/impl/psb_z_hlg_reallocate_nz.F90 b/cuda/impl/psb_z_hlg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_z_hlg_reallocate_nz.F90 rename to cuda/impl/psb_z_hlg_reallocate_nz.F90 diff --git a/gpu/impl/psb_z_hlg_scal.F90 b/cuda/impl/psb_z_hlg_scal.F90 similarity index 100% rename from gpu/impl/psb_z_hlg_scal.F90 rename to cuda/impl/psb_z_hlg_scal.F90 diff --git a/gpu/impl/psb_z_hlg_scals.F90 b/cuda/impl/psb_z_hlg_scals.F90 similarity index 100% rename from gpu/impl/psb_z_hlg_scals.F90 rename to cuda/impl/psb_z_hlg_scals.F90 diff --git a/gpu/impl/psb_z_hlg_to_gpu.F90 b/cuda/impl/psb_z_hlg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_z_hlg_to_gpu.F90 rename to cuda/impl/psb_z_hlg_to_gpu.F90 diff --git a/gpu/impl/psb_z_hlg_vect_mv.F90 b/cuda/impl/psb_z_hlg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_z_hlg_vect_mv.F90 rename to cuda/impl/psb_z_hlg_vect_mv.F90 diff --git a/gpu/impl/psb_z_hybg_allocate_mnnz.F90 b/cuda/impl/psb_z_hybg_allocate_mnnz.F90 similarity index 100% rename from gpu/impl/psb_z_hybg_allocate_mnnz.F90 rename to cuda/impl/psb_z_hybg_allocate_mnnz.F90 diff --git a/gpu/impl/psb_z_hybg_csmm.F90 b/cuda/impl/psb_z_hybg_csmm.F90 similarity index 100% rename from gpu/impl/psb_z_hybg_csmm.F90 rename to cuda/impl/psb_z_hybg_csmm.F90 diff --git a/gpu/impl/psb_z_hybg_csmv.F90 b/cuda/impl/psb_z_hybg_csmv.F90 similarity index 100% rename from gpu/impl/psb_z_hybg_csmv.F90 rename to cuda/impl/psb_z_hybg_csmv.F90 diff --git a/gpu/impl/psb_z_hybg_inner_vect_sv.F90 b/cuda/impl/psb_z_hybg_inner_vect_sv.F90 similarity index 100% rename from gpu/impl/psb_z_hybg_inner_vect_sv.F90 rename to cuda/impl/psb_z_hybg_inner_vect_sv.F90 diff --git a/gpu/impl/psb_z_hybg_mold.F90 b/cuda/impl/psb_z_hybg_mold.F90 similarity index 100% rename from gpu/impl/psb_z_hybg_mold.F90 rename to cuda/impl/psb_z_hybg_mold.F90 diff --git a/gpu/impl/psb_z_hybg_reallocate_nz.F90 b/cuda/impl/psb_z_hybg_reallocate_nz.F90 similarity index 100% rename from gpu/impl/psb_z_hybg_reallocate_nz.F90 rename to cuda/impl/psb_z_hybg_reallocate_nz.F90 diff --git a/gpu/impl/psb_z_hybg_scal.F90 b/cuda/impl/psb_z_hybg_scal.F90 similarity index 100% rename from gpu/impl/psb_z_hybg_scal.F90 rename to cuda/impl/psb_z_hybg_scal.F90 diff --git a/gpu/impl/psb_z_hybg_scals.F90 b/cuda/impl/psb_z_hybg_scals.F90 similarity index 100% rename from gpu/impl/psb_z_hybg_scals.F90 rename to cuda/impl/psb_z_hybg_scals.F90 diff --git a/gpu/impl/psb_z_hybg_to_gpu.F90 b/cuda/impl/psb_z_hybg_to_gpu.F90 similarity index 100% rename from gpu/impl/psb_z_hybg_to_gpu.F90 rename to cuda/impl/psb_z_hybg_to_gpu.F90 diff --git a/gpu/impl/psb_z_hybg_vect_mv.F90 b/cuda/impl/psb_z_hybg_vect_mv.F90 similarity index 100% rename from gpu/impl/psb_z_hybg_vect_mv.F90 rename to cuda/impl/psb_z_hybg_vect_mv.F90 diff --git a/gpu/impl/psb_z_mv_csrg_from_coo.F90 b/cuda/impl/psb_z_mv_csrg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_z_mv_csrg_from_coo.F90 rename to cuda/impl/psb_z_mv_csrg_from_coo.F90 diff --git a/gpu/impl/psb_z_mv_csrg_from_fmt.F90 b/cuda/impl/psb_z_mv_csrg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_z_mv_csrg_from_fmt.F90 rename to cuda/impl/psb_z_mv_csrg_from_fmt.F90 diff --git a/gpu/impl/psb_z_mv_diag_from_coo.F90 b/cuda/impl/psb_z_mv_diag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_z_mv_diag_from_coo.F90 rename to cuda/impl/psb_z_mv_diag_from_coo.F90 diff --git a/gpu/impl/psb_z_mv_elg_from_coo.F90 b/cuda/impl/psb_z_mv_elg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_z_mv_elg_from_coo.F90 rename to cuda/impl/psb_z_mv_elg_from_coo.F90 diff --git a/gpu/impl/psb_z_mv_elg_from_fmt.F90 b/cuda/impl/psb_z_mv_elg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_z_mv_elg_from_fmt.F90 rename to cuda/impl/psb_z_mv_elg_from_fmt.F90 diff --git a/gpu/impl/psb_z_mv_hdiag_from_coo.F90 b/cuda/impl/psb_z_mv_hdiag_from_coo.F90 similarity index 100% rename from gpu/impl/psb_z_mv_hdiag_from_coo.F90 rename to cuda/impl/psb_z_mv_hdiag_from_coo.F90 diff --git a/gpu/impl/psb_z_mv_hlg_from_coo.F90 b/cuda/impl/psb_z_mv_hlg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_z_mv_hlg_from_coo.F90 rename to cuda/impl/psb_z_mv_hlg_from_coo.F90 diff --git a/gpu/impl/psb_z_mv_hlg_from_fmt.F90 b/cuda/impl/psb_z_mv_hlg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_z_mv_hlg_from_fmt.F90 rename to cuda/impl/psb_z_mv_hlg_from_fmt.F90 diff --git a/gpu/impl/psb_z_mv_hybg_from_coo.F90 b/cuda/impl/psb_z_mv_hybg_from_coo.F90 similarity index 100% rename from gpu/impl/psb_z_mv_hybg_from_coo.F90 rename to cuda/impl/psb_z_mv_hybg_from_coo.F90 diff --git a/gpu/impl/psb_z_mv_hybg_from_fmt.F90 b/cuda/impl/psb_z_mv_hybg_from_fmt.F90 similarity index 100% rename from gpu/impl/psb_z_mv_hybg_from_fmt.F90 rename to cuda/impl/psb_z_mv_hybg_from_fmt.F90 diff --git a/gpu/ivectordev.c b/cuda/ivectordev.c similarity index 100% rename from gpu/ivectordev.c rename to cuda/ivectordev.c diff --git a/gpu/ivectordev.h b/cuda/ivectordev.h similarity index 100% rename from gpu/ivectordev.h rename to cuda/ivectordev.h diff --git a/gpu/psb_base_vectordev_mod.F90 b/cuda/psb_base_vectordev_mod.F90 similarity index 100% rename from gpu/psb_base_vectordev_mod.F90 rename to cuda/psb_base_vectordev_mod.F90 diff --git a/gpu/psb_c_csrg_mat_mod.F90 b/cuda/psb_c_csrg_mat_mod.F90 similarity index 100% rename from gpu/psb_c_csrg_mat_mod.F90 rename to cuda/psb_c_csrg_mat_mod.F90 diff --git a/gpu/psb_c_diag_mat_mod.F90 b/cuda/psb_c_diag_mat_mod.F90 similarity index 100% rename from gpu/psb_c_diag_mat_mod.F90 rename to cuda/psb_c_diag_mat_mod.F90 diff --git a/gpu/psb_c_dnsg_mat_mod.F90 b/cuda/psb_c_dnsg_mat_mod.F90 similarity index 100% rename from gpu/psb_c_dnsg_mat_mod.F90 rename to cuda/psb_c_dnsg_mat_mod.F90 diff --git a/gpu/psb_c_elg_mat_mod.F90 b/cuda/psb_c_elg_mat_mod.F90 similarity index 100% rename from gpu/psb_c_elg_mat_mod.F90 rename to cuda/psb_c_elg_mat_mod.F90 diff --git a/gpu/psb_c_gpu_vect_mod.F90 b/cuda/psb_c_gpu_vect_mod.F90 similarity index 100% rename from gpu/psb_c_gpu_vect_mod.F90 rename to cuda/psb_c_gpu_vect_mod.F90 diff --git a/gpu/psb_c_hdiag_mat_mod.F90 b/cuda/psb_c_hdiag_mat_mod.F90 similarity index 100% rename from gpu/psb_c_hdiag_mat_mod.F90 rename to cuda/psb_c_hdiag_mat_mod.F90 diff --git a/gpu/psb_c_hlg_mat_mod.F90 b/cuda/psb_c_hlg_mat_mod.F90 similarity index 100% rename from gpu/psb_c_hlg_mat_mod.F90 rename to cuda/psb_c_hlg_mat_mod.F90 diff --git a/gpu/psb_c_hybg_mat_mod.F90 b/cuda/psb_c_hybg_mat_mod.F90 similarity index 100% rename from gpu/psb_c_hybg_mat_mod.F90 rename to cuda/psb_c_hybg_mat_mod.F90 diff --git a/gpu/psb_c_vectordev_mod.F90 b/cuda/psb_c_vectordev_mod.F90 similarity index 100% rename from gpu/psb_c_vectordev_mod.F90 rename to cuda/psb_c_vectordev_mod.F90 diff --git a/gpu/psb_d_csrg_mat_mod.F90 b/cuda/psb_d_csrg_mat_mod.F90 similarity index 100% rename from gpu/psb_d_csrg_mat_mod.F90 rename to cuda/psb_d_csrg_mat_mod.F90 diff --git a/gpu/psb_d_diag_mat_mod.F90 b/cuda/psb_d_diag_mat_mod.F90 similarity index 100% rename from gpu/psb_d_diag_mat_mod.F90 rename to cuda/psb_d_diag_mat_mod.F90 diff --git a/gpu/psb_d_dnsg_mat_mod.F90 b/cuda/psb_d_dnsg_mat_mod.F90 similarity index 100% rename from gpu/psb_d_dnsg_mat_mod.F90 rename to cuda/psb_d_dnsg_mat_mod.F90 diff --git a/gpu/psb_d_elg_mat_mod.F90 b/cuda/psb_d_elg_mat_mod.F90 similarity index 100% rename from gpu/psb_d_elg_mat_mod.F90 rename to cuda/psb_d_elg_mat_mod.F90 diff --git a/gpu/psb_d_gpu_vect_mod.F90 b/cuda/psb_d_gpu_vect_mod.F90 similarity index 100% rename from gpu/psb_d_gpu_vect_mod.F90 rename to cuda/psb_d_gpu_vect_mod.F90 diff --git a/gpu/psb_d_hdiag_mat_mod.F90 b/cuda/psb_d_hdiag_mat_mod.F90 similarity index 100% rename from gpu/psb_d_hdiag_mat_mod.F90 rename to cuda/psb_d_hdiag_mat_mod.F90 diff --git a/gpu/psb_d_hlg_mat_mod.F90 b/cuda/psb_d_hlg_mat_mod.F90 similarity index 100% rename from gpu/psb_d_hlg_mat_mod.F90 rename to cuda/psb_d_hlg_mat_mod.F90 diff --git a/gpu/psb_d_hybg_mat_mod.F90 b/cuda/psb_d_hybg_mat_mod.F90 similarity index 100% rename from gpu/psb_d_hybg_mat_mod.F90 rename to cuda/psb_d_hybg_mat_mod.F90 diff --git a/gpu/psb_d_vectordev_mod.F90 b/cuda/psb_d_vectordev_mod.F90 similarity index 100% rename from gpu/psb_d_vectordev_mod.F90 rename to cuda/psb_d_vectordev_mod.F90 diff --git a/gpu/psb_gpu_env_mod.F90 b/cuda/psb_gpu_env_mod.F90 similarity index 100% rename from gpu/psb_gpu_env_mod.F90 rename to cuda/psb_gpu_env_mod.F90 diff --git a/gpu/psb_gpu_mod.F90 b/cuda/psb_gpu_mod.F90 similarity index 100% rename from gpu/psb_gpu_mod.F90 rename to cuda/psb_gpu_mod.F90 diff --git a/gpu/psb_i_csrg_mat_mod.F90 b/cuda/psb_i_csrg_mat_mod.F90 similarity index 100% rename from gpu/psb_i_csrg_mat_mod.F90 rename to cuda/psb_i_csrg_mat_mod.F90 diff --git a/gpu/psb_i_diag_mat_mod.F90 b/cuda/psb_i_diag_mat_mod.F90 similarity index 100% rename from gpu/psb_i_diag_mat_mod.F90 rename to cuda/psb_i_diag_mat_mod.F90 diff --git a/gpu/psb_i_dnsg_mat_mod.F90 b/cuda/psb_i_dnsg_mat_mod.F90 similarity index 100% rename from gpu/psb_i_dnsg_mat_mod.F90 rename to cuda/psb_i_dnsg_mat_mod.F90 diff --git a/gpu/psb_i_elg_mat_mod.F90 b/cuda/psb_i_elg_mat_mod.F90 similarity index 100% rename from gpu/psb_i_elg_mat_mod.F90 rename to cuda/psb_i_elg_mat_mod.F90 diff --git a/gpu/psb_i_gpu_vect_mod.F90 b/cuda/psb_i_gpu_vect_mod.F90 similarity index 100% rename from gpu/psb_i_gpu_vect_mod.F90 rename to cuda/psb_i_gpu_vect_mod.F90 diff --git a/gpu/psb_i_hdiag_mat_mod.F90 b/cuda/psb_i_hdiag_mat_mod.F90 similarity index 100% rename from gpu/psb_i_hdiag_mat_mod.F90 rename to cuda/psb_i_hdiag_mat_mod.F90 diff --git a/gpu/psb_i_hlg_mat_mod.F90 b/cuda/psb_i_hlg_mat_mod.F90 similarity index 100% rename from gpu/psb_i_hlg_mat_mod.F90 rename to cuda/psb_i_hlg_mat_mod.F90 diff --git a/gpu/psb_i_hybg_mat_mod.F90 b/cuda/psb_i_hybg_mat_mod.F90 similarity index 100% rename from gpu/psb_i_hybg_mat_mod.F90 rename to cuda/psb_i_hybg_mat_mod.F90 diff --git a/gpu/psb_i_vectordev_mod.F90 b/cuda/psb_i_vectordev_mod.F90 similarity index 100% rename from gpu/psb_i_vectordev_mod.F90 rename to cuda/psb_i_vectordev_mod.F90 diff --git a/gpu/psb_s_csrg_mat_mod.F90 b/cuda/psb_s_csrg_mat_mod.F90 similarity index 100% rename from gpu/psb_s_csrg_mat_mod.F90 rename to cuda/psb_s_csrg_mat_mod.F90 diff --git a/gpu/psb_s_diag_mat_mod.F90 b/cuda/psb_s_diag_mat_mod.F90 similarity index 100% rename from gpu/psb_s_diag_mat_mod.F90 rename to cuda/psb_s_diag_mat_mod.F90 diff --git a/gpu/psb_s_dnsg_mat_mod.F90 b/cuda/psb_s_dnsg_mat_mod.F90 similarity index 100% rename from gpu/psb_s_dnsg_mat_mod.F90 rename to cuda/psb_s_dnsg_mat_mod.F90 diff --git a/gpu/psb_s_elg_mat_mod.F90 b/cuda/psb_s_elg_mat_mod.F90 similarity index 100% rename from gpu/psb_s_elg_mat_mod.F90 rename to cuda/psb_s_elg_mat_mod.F90 diff --git a/gpu/psb_s_gpu_vect_mod.F90 b/cuda/psb_s_gpu_vect_mod.F90 similarity index 100% rename from gpu/psb_s_gpu_vect_mod.F90 rename to cuda/psb_s_gpu_vect_mod.F90 diff --git a/gpu/psb_s_hdiag_mat_mod.F90 b/cuda/psb_s_hdiag_mat_mod.F90 similarity index 100% rename from gpu/psb_s_hdiag_mat_mod.F90 rename to cuda/psb_s_hdiag_mat_mod.F90 diff --git a/gpu/psb_s_hlg_mat_mod.F90 b/cuda/psb_s_hlg_mat_mod.F90 similarity index 100% rename from gpu/psb_s_hlg_mat_mod.F90 rename to cuda/psb_s_hlg_mat_mod.F90 diff --git a/gpu/psb_s_hybg_mat_mod.F90 b/cuda/psb_s_hybg_mat_mod.F90 similarity index 100% rename from gpu/psb_s_hybg_mat_mod.F90 rename to cuda/psb_s_hybg_mat_mod.F90 diff --git a/gpu/psb_s_vectordev_mod.F90 b/cuda/psb_s_vectordev_mod.F90 similarity index 100% rename from gpu/psb_s_vectordev_mod.F90 rename to cuda/psb_s_vectordev_mod.F90 diff --git a/gpu/psb_vectordev_mod.f90 b/cuda/psb_vectordev_mod.f90 similarity index 100% rename from gpu/psb_vectordev_mod.f90 rename to cuda/psb_vectordev_mod.f90 diff --git a/gpu/psb_z_csrg_mat_mod.F90 b/cuda/psb_z_csrg_mat_mod.F90 similarity index 100% rename from gpu/psb_z_csrg_mat_mod.F90 rename to cuda/psb_z_csrg_mat_mod.F90 diff --git a/gpu/psb_z_diag_mat_mod.F90 b/cuda/psb_z_diag_mat_mod.F90 similarity index 100% rename from gpu/psb_z_diag_mat_mod.F90 rename to cuda/psb_z_diag_mat_mod.F90 diff --git a/gpu/psb_z_dnsg_mat_mod.F90 b/cuda/psb_z_dnsg_mat_mod.F90 similarity index 100% rename from gpu/psb_z_dnsg_mat_mod.F90 rename to cuda/psb_z_dnsg_mat_mod.F90 diff --git a/gpu/psb_z_elg_mat_mod.F90 b/cuda/psb_z_elg_mat_mod.F90 similarity index 100% rename from gpu/psb_z_elg_mat_mod.F90 rename to cuda/psb_z_elg_mat_mod.F90 diff --git a/gpu/psb_z_gpu_vect_mod.F90 b/cuda/psb_z_gpu_vect_mod.F90 similarity index 100% rename from gpu/psb_z_gpu_vect_mod.F90 rename to cuda/psb_z_gpu_vect_mod.F90 diff --git a/gpu/psb_z_hdiag_mat_mod.F90 b/cuda/psb_z_hdiag_mat_mod.F90 similarity index 100% rename from gpu/psb_z_hdiag_mat_mod.F90 rename to cuda/psb_z_hdiag_mat_mod.F90 diff --git a/gpu/psb_z_hlg_mat_mod.F90 b/cuda/psb_z_hlg_mat_mod.F90 similarity index 100% rename from gpu/psb_z_hlg_mat_mod.F90 rename to cuda/psb_z_hlg_mat_mod.F90 diff --git a/gpu/psb_z_hybg_mat_mod.F90 b/cuda/psb_z_hybg_mat_mod.F90 similarity index 100% rename from gpu/psb_z_hybg_mat_mod.F90 rename to cuda/psb_z_hybg_mat_mod.F90 diff --git a/gpu/psb_z_vectordev_mod.F90 b/cuda/psb_z_vectordev_mod.F90 similarity index 100% rename from gpu/psb_z_vectordev_mod.F90 rename to cuda/psb_z_vectordev_mod.F90 diff --git a/gpu/s_cusparse_mod.F90 b/cuda/s_cusparse_mod.F90 similarity index 100% rename from gpu/s_cusparse_mod.F90 rename to cuda/s_cusparse_mod.F90 diff --git a/gpu/scusparse.c b/cuda/scusparse.c similarity index 100% rename from gpu/scusparse.c rename to cuda/scusparse.c diff --git a/cuda/spgpu/Makefile b/cuda/spgpu/Makefile new file mode 100644 index 00000000..85801942 --- /dev/null +++ b/cuda/spgpu/Makefile @@ -0,0 +1,39 @@ +TOP=../.. +include $(TOP)/Make.inc +# +# Libraries used +# +LIBDIR=$(TOP)/lib +INCDIR=$(TOP)/include +MODDIR=$(TOP)/modules +LIBNAME=libspgpu.a + +OBJS=coo.o core.o dia.o ell.o hdia.o hell.o + +all: includes objs + +objs: $(OBJS) iobjs + +lib: objs iobjs ilib + ar cur $(LIBNAME) $(OBJS) + /bin/cp -p $(LIBNAME) $(LIBDIR) + +iobjs: + $(MAKE) -C kernels objs +ilib: + $(MAKE) -C kernels lib LIBNAME=$(LIBNAME) + +includes: + /bin/cp -p *.h $(INCDIR) + +clean: iclean + /bin/rm -fr $(OBJS) $(LIBNAME) +iclean: + $(MAKE) -C kernels clean +.c.o: + $(CC) $(CCOPT) $(CINCLUDES) $(CDEFINES) $(CUDA_INCLUDES) -c $< +.cpp.o: + $(CXX) $(CXXOPT) $(CXXINCLUDES) $(CXXDEFINES) $(CUDA_INCLUDES) -c $< -o $@ + +.cu.o: + $(NVCC) $(CINCLUDES) $(CDEFINES) $(CUDEFINES) $(CUDA_INCLUDES) -c $< diff --git a/cuda/spgpu/coo.cpp b/cuda/spgpu/coo.cpp new file mode 100644 index 00000000..eaeba95e --- /dev/null +++ b/cuda/spgpu/coo.cpp @@ -0,0 +1,98 @@ +#include "coo_conv.h" +#include "core.h" + +#include + +// returns the number of non-zero blocks +int computeBcooSize(int blockRows, int blockCols, const int* rows, const int* cols, int nonZeros) +{ + // use a map to count al non zero blocks + std::map blocksPositions; + + int blockCount = 0; + + int i; + for (i=0; i::iterator it = blocksPositions.find(blockId); + + // not found + if(it == blocksPositions.end()) + { + blocksPositions[blockId] = blockCount; + ++blockCount; + } + } + + return blockCount; +} + + +void cooToBcoo(int* bRows, int* bCols, void* blockValues, /*int isBlockColumnMajor,*/ int blockRows, int blockCols, + const int* rows, const int* cols, const void* values, int nonZeros, spgpuType_t valuesType) +{ + // use a map to count al non zero blocks + std::map blocksPositions; + + int blockCount = 0; + + size_t elementSize = spgpuSizeOf(valuesType); + size_t blockElementSize = elementSize*blockRows*blockCols; + int i; + for (i=0; i::iterator it = blocksPositions.find(blockId); + + int blockPos; + + // not found + if(it == blocksPositions.end()) + { + blocksPositions[blockId] = blockCount; + blockPos = blockCount; + + bRows[blockCount] = blockRowId; + bCols[blockCount] = blockColId; + + memset((char*)blockValues + blockCount*blockElementSize, 0, blockElementSize); + + ++blockCount; + } + else + blockPos = it->second; + + int blockRowOffset = rowId % blockRows; + int blockColOffset = colId % blockCols; + + int blockOffset; + + //if (isBlockColumnMajor) + blockOffset = blockRowOffset + blockColOffset*blockRows; + /*else + blockOffset = blockRowOffset*blockCols + blockColOffset; + */ + + char* dest = (char*)blockValues + blockPos*blockElementSize + blockOffset*elementSize; + const char* src = (const char*)values + elementSize*i; + + memcpy(dest, src, elementSize); + } +} + + diff --git a/cuda/spgpu/coo_conv.h b/cuda/spgpu/coo_conv.h new file mode 100644 index 00000000..feccb63a --- /dev/null +++ b/cuda/spgpu/coo_conv.h @@ -0,0 +1,40 @@ +#pragma once + +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include "core.h" + +/** \addtogroup conversionRoutines Conversion Routines + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +int computeBcooSize(int blockRows, int blockCols, const int* rows, const int* cols, int nonZeros); + +// column-major format for blocks +void cooToBcoo(int* bRows, int* bCols, void* blockValues, /*int isBlockColumnMajor,*/ int blockRows, int blockCols, + const int* rows, const int* cols, const void* values, int nonZeros, spgpuType_t valuesType); + +#ifdef __cplusplus +} +#endif + +/** @}*/ diff --git a/cuda/spgpu/core.c b/cuda/spgpu/core.c new file mode 100644 index 00000000..e89aa2cf --- /dev/null +++ b/cuda/spgpu/core.c @@ -0,0 +1,109 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "core.h" +#include "stdlib.h" +#include "cuda_runtime.h" + +spgpuStatus_t spgpuCreate(spgpuHandle_t* pHandle, int device) +{ + struct cudaDeviceProp deviceProperties; + cudaError_t err = cudaGetDeviceProperties(&deviceProperties, device); + + SpgpuHandleStruct* handle = (SpgpuHandleStruct*) malloc(sizeof(SpgpuHandleStruct)); + + int currentDevice; + cudaGetDevice(¤tDevice); + cudaSetDevice(device); + cudaStreamCreate(&handle->defaultStream); + handle->currentStream = handle->defaultStream; + cudaSetDevice(currentDevice); + + handle->device = device; + handle->warpSize = deviceProperties.warpSize; + handle->maxThreadsPerBlock = deviceProperties.maxThreadsPerBlock; + handle->multiProcessorCount = deviceProperties.multiProcessorCount; + handle->maxGridSizeX = deviceProperties.maxGridSize[0]; + handle->maxGridSizeY = deviceProperties.maxGridSize[1]; + handle->maxGridSizeZ = deviceProperties.maxGridSize[2]; + handle->capabilityMajor = deviceProperties.major; + handle->capabilityMinor = deviceProperties.minor; + + *pHandle = handle; + + if (err == cudaSuccess) + return SPGPU_SUCCESS; + else + return SPGPU_UNSPECIFIED; +} + +void spgpuDestroy(spgpuHandle_t pHandle) +{ + cudaStreamDestroy(pHandle->defaultStream); + + free((void*)pHandle); +} + +void spgpuStreamCreate(spgpuHandle_t pHandle, cudaStream_t* stream) +{ + int currentDevice; + cudaGetDevice(¤tDevice); + cudaSetDevice(pHandle->device); + cudaStreamCreate(stream); + cudaSetDevice(currentDevice); +} + +void spgpuStreamDestroy(cudaStream_t stream) +{ + cudaStreamDestroy(stream); +} + +void spgpuSetStream(spgpuHandle_t pHandle, cudaStream_t stream) +{ + SpgpuHandleStruct* handle = (SpgpuHandleStruct*)pHandle; + + if (stream) + { + handle->currentStream = stream; + } + else + handle->currentStream = pHandle->defaultStream; +} + +cudaStream_t spgpuGetStream(spgpuHandle_t pHandle) +{ + SpgpuHandleStruct* handle = (SpgpuHandleStruct*)pHandle; + return handle->currentStream; +} + +size_t spgpuSizeOf(spgpuType_t typeCode) +{ + switch (typeCode) + { + case SPGPU_TYPE_INT: + return sizeof(int); + case SPGPU_TYPE_FLOAT: + return sizeof(float); + case SPGPU_TYPE_DOUBLE: + return sizeof(double); + case SPGPU_TYPE_COMPLEX_FLOAT: + return sizeof(cuFloatComplex); + case SPGPU_TYPE_COMPLEX_DOUBLE: + return sizeof(cuDoubleComplex); + default: + return 0; // error + } +} diff --git a/cuda/spgpu/core.h b/cuda/spgpu/core.h new file mode 100644 index 00000000..46d98f64 --- /dev/null +++ b/cuda/spgpu/core.h @@ -0,0 +1,185 @@ +#pragma once + +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +/*! \mainpage The spGPU library documentation + * + * \section intro_sec Introduction + * + * spGPU is a set of custom matrix storages and CUDA kernels for sparse linear algebra computing on GPU. It isn't a replacement for cuBLAS/cuSPARSE that should be used for a full featured linear algebra environment on GPU.\n + * The main matrix storage used by spGPU is a GPU-friendly ELLpack format, as well as our HELL (Hacked ELLpack) format, an enhanced version of ELLpack with some interesting memory saving properties.\n + * HELL format provides a better memory storage compared to ELL (it avoids allocation inefficency provided by spikes in row sizes), while providing quite the same performances for sparse matrix-vector multiply routine.. + * + * \section install_sec How to build spgpu + * \subsection linuxbuild Linux (and other unix systems) + * cd spgpu/build/cmake\n + * sh configure.sh\n + * make + * \section cr_sec Copyright + * Copyright (C) 2010 - 2015\n + * Davide Barbieri - University of Rome Tor Vergata\n + * Valeria Cardellini - University of Rome Tor Vergata\n + * Salvatore Filippone - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or\n + * modify it under the terms of the GNU General Public License\n + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful,\n + * but WITHOUT ANY WARRANTY; without even the implied warranty of\n + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n + * GNU General Public License for more details. + */ + + +#include "driver_types.h" +#include "cuComplex.h" + +/** \addtogroup coreFun Core Routines + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/// __host pointers reference host allocations (it's just a placeholder) +#define __host +/// __device pointers reference device allocations (it's just a placeholder) +#define __device + +/// The return code for synchronous functions +typedef int spgpuStatus_t; + +#define SPGPU_SUCCESS 0 +#define SPGPU_UNSUPPORTED 1 +#define SPGPU_UNSPECIFIED 2 +#define SPGPU_OUTOFMEMORY 3 + +/// Code to identify a primitive type +typedef int spgpuType_t; + +#define SPGPU_TYPE_INT 0 +#define SPGPU_TYPE_FLOAT 1 +#define SPGPU_TYPE_DOUBLE 2 +#define SPGPU_TYPE_COMPLEX_FLOAT 3 +#define SPGPU_TYPE_COMPLEX_DOUBLE 4 + +/// this struct should be modified only internally by spgpu +typedef struct spgpuHandleStruct { + /// the current stream used by every calls on spgpu routines (passing this handle) + cudaStream_t currentStream; + /// the default stream, created during the handle creation. + cudaStream_t defaultStream; + /// the device associated to this handle + int device; + /// the warp size for this device + int warpSize; + /// the max threads per block count for this device + int maxThreadsPerBlock; + /// the max size for the X coordinate of the grid dimensions + int maxGridSizeX; + /// the max size for the Y coordinate of the grid dimensions + int maxGridSizeY; + /// the max size for the Z coordinate of the grid dimensions + int maxGridSizeZ; + /// Number of SM + int multiProcessorCount; + // compute capability + int capabilityMajor; + int capabilityMinor; +} SpgpuHandleStruct; + +/// A spGPU handle represents a single CUDA device on your platform. +typedef const SpgpuHandleStruct* spgpuHandle_t; + +/** +* \fn spgpuStatus_t spgpuCreate(spgpuHandle_t* pHandle, int device) +* Create a spgpu context for a GPU device. Every call to spgpu routines using this +* handle will execute on the same GPU. This is re-entrant, so it will work if used by multiple host threads. +* \param pHandle outputs the handle +* \param device id of the device to be used by this context +*/ +spgpuStatus_t spgpuCreate(spgpuHandle_t* pHandle, int device); + +/** +* \fn void spgpuDestroy(spgpuHandle_t pHandle) +* Destroy the spgpu context for pHandle. +* \param pHandle the handle previously created with spgpuCreate(). +*/ +void spgpuDestroy(spgpuHandle_t pHandle); + +/** +* \fn void spgpuStreamCreate(spgpuHandle_t pHandle, cudaStream_t* stream) +* Create a cuda stream according to the device of the spgpu handle. +* \param stream outputs the new stream +* \param pHandle the handle used to obtain the device id for the stream +*/ +void spgpuStreamCreate(spgpuHandle_t pHandle, cudaStream_t* stream); + +/** +* \fn void spgpuStreamDestroy(cudaStream_t stream) +* Destroy a stream, previously created with spgpuStreamCreate(). +* \param stream the stream to destroy +*/ +void spgpuStreamDestroy(cudaStream_t stream); + +/** +* \fn void spgpuSetStream(spgpuHandle_t pHandle, cudaStream_t stream) +* Change the current stream for the handle pHandle. +* \param pHandle the handle to configure. +* \param stream the stream to use for next spgpu routines call. Use 0 to reset to the default stream. +*/ +void spgpuSetStream(spgpuHandle_t pHandle, cudaStream_t stream); + +/** +* \fn cudaStram_t spgpuGetStream(spgpuHandle_t pHandle) +* Get the current stream from the handle pHandle. +* \param pHandle the handle from which get the stream. +*/ +cudaStream_t spgpuGetStream(spgpuHandle_t pHandle); + +/** +* \fn size_t spgpuSizeOf(spgpuType_t typeCode) +* Returns the size, in bytes, of the type denoted by typeCode (e.g. 4 for SPGPU_TYPE_FLOAT, 8 for SPGPU_TYPE_DOUBLE). +* \param typeCode outputs the handle +*/ +size_t spgpuSizeOf(spgpuType_t typeCode); + +/* +typedef struct { +spgpuMatrix + +spgpuMatrixType_t MatrixType; +spgpuFillMode_t FillMode; +spgpuDiagType_t DiagType; +int baseIndex; +} spgpuMatrixDesc_t +*/ + +#define cuFloatComplex_isZero(a) (a.x == 0.0f && a.y == 0.0f) +#define cuDoubleComplex_isZero(a) (a.x == 0.0 && a.y == 0.0) +#define cuFloatComplex_isNotZero(a) (a.x != 0.0f || a.y != 0.0f) +#define cuDoubleComplex_isNotZero(a) (a.x != 0.0 || a.y != 0.0) + +#ifdef __cplusplus +} +#endif + +/** @}*/ + diff --git a/cuda/spgpu/debug.h b/cuda/spgpu/debug.h new file mode 100644 index 00000000..d5bc6de2 --- /dev/null +++ b/cuda/spgpu/debug.h @@ -0,0 +1,58 @@ +#pragma once + +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "stdlib.h" + +#ifndef _WIN32 +#include + +inline +void printTrace (void) +{ + void *array[32]; + size_t size; + char **strings; + size_t i; + + size = backtrace (array, 32); + strings = backtrace_symbols (array, size); + + printf ("---- Obtained %zd stack frames.\n", size); + + for (i = 0; i < size; i++) + printf ("%s\n", strings[i]); + + free (strings); +} +#endif + +inline void __assert(int e, const char* w) +{ + if (!e) + { + printf("%s\n",w); + +#ifndef _WIN32 + printTrace(); +#endif + + exit(0); + } +} + diff --git a/cuda/spgpu/dia.c b/cuda/spgpu/dia.c new file mode 100644 index 00000000..cf761c5c --- /dev/null +++ b/cuda/spgpu/dia.c @@ -0,0 +1,105 @@ +#include "dia.h" +#include "dia_conv.h" +#include "stdlib.h" + +int computeDiaAllocPitch(int rowsCount) +{ + // returns a pitch good for indices and values + return ((rowsCount + 31)/32)*32; +} + +int computeDiaDiagonalsCount( + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices) +{ + int* diagIds = (int*)malloc((rowsCount + columnsCount - 1)*sizeof(int)); + int diagonalsCount = 0; + int i; + + for (i=0; i<(rowsCount + columnsCount - 1); ++i) + diagIds[i] = -1; + + for (i=0; i + +/** \addtogroup conversionRoutines Conversion Routines + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +int computeDiaDiagonalsCount( + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices); + + +void coo2dia( + void* values, + int* offsets, + int valuesPitch, + int diagonals, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType); + + +/** +* \fn int computeDiaAllocPitch(int rowsCount) + * This function returns a pitch (in number of elements) that can be used to allocate the values array for DIA matrix format. + * \param rowsCount the rows count + * \return the pitch for an DIA matrix of rowsCount rows. +*/ +int computeDiaAllocPitch(int rowsCount); + + +#ifdef __cplusplus +} +#endif + +/** @}*/ diff --git a/cuda/spgpu/ell.c b/cuda/spgpu/ell.c new file mode 100644 index 00000000..33381c28 --- /dev/null +++ b/cuda/spgpu/ell.c @@ -0,0 +1,202 @@ +#include "ell.h" +#include "ell_conv.h" +#include "stdlib.h" + +void computeEllRowLenghts( + int *ellRowLengths, + int *ellMaxRowSize, + int rowsCount, + int nonZerosCount, + const int* cooRowIndices, + int cooBaseIndex + ) +{ + // find the max number of non zero per row + int maxRowSize = 0; + int i; + for (i=0; i maxRowSize) + maxRowSize = currCount; + } + + *ellMaxRowSize = maxRowSize; +} + +int computeEllAllocPitch(int rowsCount) +{ + // returns a pitch good for indices and values + return ((rowsCount + 31)/32)*32; +} + +void cooToEll( + void *ellValues, + int *ellIndices, + int ellValuesPitch, + int ellIndicesPitch, + int ellMaxRowSize, + int ellBaseIndex, + int rowsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType + ) +{ + + size_t elementSize = spgpuSizeOf(valuesType); + + // fill values and indices + int* currentPos = (int*)malloc(rowsCount*sizeof(int)); + int i; + + for (i=0; i dstRs[j]) { + app_dstRs[k] = dstRs[i]; + app_rIdx[k] = rIdx[i]; + + ++k; ++i; + } else { + app_dstRs[k] = dstRs[j]; + app_rIdx[k] = rIdx[j]; + + ++k; ++j; + } + } + + while (i<=center) + { + app_dstRs[k] = dstRs[i]; + app_rIdx[k] = rIdx[i]; + + ++k; ++i; + } + + while (j<=end) + { + app_dstRs[k] = dstRs[j]; + app_rIdx[k] = rIdx[j]; + + ++k; ++j; + } + + for (k=start; k<=end; k++) + { + dstRs[k] = app_dstRs[k-start]; + rIdx[k] = app_rIdx[k-start]; + } +} + +void mergesort(int *dstRs, int *rIdx, int size) { + int* app_dstRs = (int*)malloc(size*sizeof(int)); + int* app_rIdx = (int*)malloc(size*sizeof(int)); + + int sizetomerge=size-1; + size--; + int i; + int n=2; + + while (nsizetomerge) + merge (app_dstRs, app_rIdx, dstRs, rIdx, sizetomerge -((sizetomerge)%n),sizetomerge,size,size); + sizetomerge=sizetomerge-((sizetomerge+1)%n);} + n=n*2; + } + + if (size>sizetomerge) + merge (app_dstRs, app_rIdx, dstRs,rIdx,0,size-(size-sizetomerge),size,size); + + free(app_dstRs); + free(app_rIdx); +} + + + +void ellToOell( + int *rIdx, + void *dstEllValues, + int *dstEllIndices, + int *dstRs, + const void *srcEllValues, + const int *srcEllIndices, + const int *srcRs, + int ellValuesPitch, + int ellIndicesPitch, + int rowsCount, + spgpuType_t valuesType + ) +{ + size_t elementSize = spgpuSizeOf(valuesType); + + int i,j; + for (i=0; i + +/** \addtogroup conversionRoutines Conversion Routines + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** +* \fn void computeEllRowLenghts(int *ellRowLengths, int *ellMaxRowSize, int rowsCount, int nonZerosCount, const int* cooRowIndices, int cooBaseIndex) + * Compute the Ell row lengths array (and the greatest row size) from the COO matrix format. + * \param ellRowLengths Array of length rowsCount to be filled by the non zeros count for every matrix row + * \param ellMaxRowSize outputs the greatest row size (in non zeros) + * \param rowsCount the number of rows of the coo matrix to convert + * \param nonZerosCount the non zeros count of the coo matrix to convert + * \param cooRowIndices the row indices array for the coo matrix to convert + * \param cooBaseIndex the input base index (e.g. 0 for C, 1 for Fortran) + */ +void computeEllRowLenghts( + int *ellRowLengths, + int *ellMaxRowSize, + int rowsCount, + int nonZerosCount, + const int* cooRowIndices, + int cooBaseIndex + ); + +/** +* \fn int computeEllAllocPitch(int rowsCount) + * This function returns a pitch (in number of elements) that can be used to allocate both indices and values arrays for ELL matrix format. + * \param rowsCount the rows count + * \return the pitch for an ELL matrix of rowsCount rows. +*/ +int computeEllAllocPitch(int rowsCount); + + +/** +* \fn void cooToEll(void *ellValues,int *ellIndices,int ellValuesPitch,int ellIndicesPitch,int ellMaxRowSize,int ellBaseIndex,int rowsCount,int nonZerosCount,const int* cooRowIndices,const int* cooColsIndices,const void* cooValues,int cooBaseIndex, spgpuType_t valuesType) + * Convert a matrix in COO format to a matrix in ELL format. + * The matrix is stored in column-major format. The ellValues and ellIndices sizes are ellMaxRowSize * pitch (pitch is in bytes). + * \param ellValues pointer to the area that will be filled by the non zero coefficients + * \param ellIndices pointer to the area that will be filled by the non zero indices + * \param ellValuesPitch the column-major allocation's pitch of ellValues (in number of elements) + * \param ellIndicesPitch the column-major allocation's pitch of ellIndices (in number of elements) + * \param ellMaxRowSize the greatest row size + * \param ellBaseIndex the desired base index for the ELL matrix (e.g. 0 for C, 1 for Fortran) + * \param rowsCount input matrix rows count + * \param nonZerosCount input matrix non zeros count + * \param cooRowIndices input matrix row indices pointer + * \param cooColsIndices input matrix column indices pointer + * \param cooValues input matrix non zeros values pointer + * \param cooBaseIndex input matrix base index + * \param valuesType the type for elements in ellValues and cooValues (i.e. SPGPU_TYPE_FLOAT or SPGPU_TYPE_DOUBLE) + */ +void cooToEll( + void *ellValues, + int *ellIndices, + int ellValuesPitch, + int ellIndicesPitch, + int ellMaxRowSize, + int ellBaseIndex, + int rowsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType + ); + +void ellToOell( + int *rIdx, + void *dstEllValues, + int *dstEllIndices, + int *dstRs, + const void *srcEllValues, + const int *srcEllIndices, + const int *srcRs, + int ellValuesPitch, + int ellIndicesPitch, + int rowsCount, + spgpuType_t valuesType + ); + +#ifdef __cplusplus +} +#endif + +/** @}*/ diff --git a/cuda/spgpu/hdia.cpp b/cuda/spgpu/hdia.cpp new file mode 100644 index 00000000..daf74173 --- /dev/null +++ b/cuda/spgpu/hdia.cpp @@ -0,0 +1,374 @@ +#include "hdia_conv.h" +#include "stdlib.h" +#include "string.h" + +#include +#include + +int getHdiaHacksCount(int hackSize, int rowsCount) +{ + return (rowsCount + hackSize - 1)/hackSize; +} + +void computeHdiaHackOffsets( + int *allocationHeight, + int *hackOffsets, + int hackSize, + const void* diaValues, + int diaValuesPitch, + int diagonals, + int rowsCount, + spgpuType_t valuesType + ) +{ + int i,r,s, hack; + int hackCount = getHdiaHacksCount(hackSize, rowsCount); + + size_t elementSize = spgpuSizeOf(valuesType); + + int hackHeight = 0; + + hackOffsets[0] = 0; + for (hack=0; hack= rowsCount) + break; + + const char* val = (char*)diaValues + elementSize*(row + i*diaValuesPitch); + + for (s=0; s= rowsCount) + break; + + const char* val = (const char*)diaValues + elementSize*(row + i*diaValuesPitch); + + for (s=0; s= rowsCount) + break; + + char* dest = (char*)hdiaValues + elementSize*((posOffset + i)*hackSize + r); + const char* src = (const char*)diaValues + elementSize*(row + diagPosInsideDia*diaValuesPitch); + + memcpy(dest, src, elementSize); + } + } + } +} + + + + + + + +void computeHdiaHackOffsetsFromCoo( + int *allocationHeight, + int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + int cooBaseIndex + ) +{ + + int i,j,h; + + int hackCount = getHdiaHacksCount(hackSize, rowsCount); + + + // Find rows per hack + std::vector *rowsPerHack = new std::vector [hackCount]; + + for (i=0; i diagIdsToPos; + + hackOffsets[0] = 0; + for (h=0; h *hackRows = &rowsPerHack[h]; + int hackRowsSize = hackRows->size(); + + for (j=0; jat(j); + int rowIdx = cooRowIndices[i]; + int colIdx = cooColsIndices[i]; + int diagId = (colIdx-cooBaseIndex) - ((rowIdx-cooBaseIndex) % hackSize); + int diagPos = hackSize - 1 + diagId; + + std::map::iterator it = diagIdsToPos.find(diagPos); + + if(it == diagIdsToPos.end()) + { + diagIdsToPos[diagPos] = 1; + ++diagonalsCount; + } + } + + hackOffsets[h+1] = hackOffsets[h] + diagonalsCount; + } + + *allocationHeight = hackOffsets[hackCount]; + + delete[] rowsPerHack; +} + +void cooToHdia_size( + void *hdiaValues, + int *hdiaOffsets, + const int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + size_t elementSize + ) +{ + int i,j,h; + + int hackCount = getHdiaHacksCount(hackSize, rowsCount); + + // Find rows per hack + std::vector *rowsPerHack = new std::vector [hackCount]; + + for (i=0; i hackDiagIdsToPos; + + for (h=0; h *hackRows = &rowsPerHack[h]; + int hackRowsSize = hackRows->size(); + + for (j=0; jat(j); + + int rowIdx = cooRowIndices[i]; + int colIdx = cooColsIndices[i]; + int globalDiagId = colIdx - rowIdx; + int diagId = (colIdx - cooBaseIndex) - ((rowIdx - cooBaseIndex) % hackSize); + int diagPos = hackSize - 1 + diagId; + + std::map::iterator it = hackDiagIdsToPos.find(diagPos); + + if(it == hackDiagIdsToPos.end()) + { + hackDiagIdsToPos[diagPos] = globalDiagId; + } + } + + // Reorder diags + for (std::map::iterator it = hackDiagIdsToPos.begin(); it != hackDiagIdsToPos.end(); ++it) + { + int i = it->first; + + int globalDiagId = it->second; + int diagPosInsideOffsets; + int diagId = i - hackSize + 1; + hackDiagIdsToPos[i] = diagPosInsideOffsets = diagonalsCount++; + hdiaOffsets[diagPosInsideOffsets] = globalDiagId; + } + + + hdiaOffsets += diagonalsCount; + + for (j=0; jat(j); + int rowIdx = cooRowIndices[i]; + int colIdx = cooColsIndices[i]; + int diagId = (colIdx - cooBaseIndex) - ((rowIdx - cooBaseIndex) % hackSize); + + int diagPosInsideOffsets = hackDiagIdsToPos[hackSize - 1 + diagId]; + + char* valAddr = (char*)hdiaValues + + elementSize*(((rowIdx - cooBaseIndex) % hackSize) + + hackSize* (hackOffsets[h] + diagPosInsideOffsets)); + + memcpy(valAddr, (const char*)cooValues + i*elementSize, elementSize); + } + } + + delete[] rowsPerHack; +} + + + +void cooToHdia( + void *hdiaValues, + int *hdiaOffsets, + const int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType + ) +{ + size_t elementSize = spgpuSizeOf(valuesType); + + cooToHdia_size(hdiaValues, hdiaOffsets, + hackOffsets, hackSize, rowsCount, + columnsCount, nonZerosCount, + cooRowIndices, cooColsIndices, cooValues, cooBaseIndex, elementSize); +} + +void bcooToBhdia( + void *hdiaValues, + int *hdiaOffsets, + const int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType, + int blockSize + ) +{ + size_t elementSize = blockSize*spgpuSizeOf(valuesType); + + cooToHdia_size(hdiaValues, hdiaOffsets, + hackOffsets, hackSize, rowsCount, + columnsCount, nonZerosCount, + cooRowIndices, cooColsIndices, cooValues, cooBaseIndex, elementSize); +} + diff --git a/cuda/spgpu/hdia.h b/cuda/spgpu/hdia.h new file mode 100644 index 00000000..e8808fb7 --- /dev/null +++ b/cuda/spgpu/hdia.h @@ -0,0 +1,159 @@ +#pragma once + +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2013 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "core.h" + + +/** \addtogroup diaFun DIA/HDIA Format + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + + +/** +* \fn spgpuShdiaspmv (spgpuHandle_t handle, float* z, const float *y, float alpha, const float* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const float *x, float beta) + * Computes single precision z = alpha*A*x + beta*y, with A stored in Hacked Diagonal Format on GPU. + * \param handle The spgpu handle used to call this routine + * \param z The output vector of the routine. z could be y, but not y + k (i.e. an overlapping area over y, but starting from a base index different from y). + * \param y The y input vector + * \param alpha The alpha scalar + * \param dM The stacked HDIA non zero values allocation pointer + * \param offsets The stacked HDIA diagonals offsets vector + * \param hackSize The constant size of every hack (must be a multiple of 32) + * \param hackOffsets the array of base index offset for every hack of HDIA offsets vector, plus a last value equal to the size of the offsets vector + * \param rows the rows count + * \param cols the columns count + * \param x the x vector + * \param beta the beta scalar + */ +void +spgpuShdiaspmv (spgpuHandle_t handle, + float* z, + const float *y, + float alpha, + const float* dM, + const int* offsets, + int hackSize, + const int* hackOffsets, + int rows, + int cols, + const float *x, + float beta); + + +/** +* \fn spgpuDhdiaspmv (spgpuHandle_t handle, double* z, const double *y, double alpha, const double* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const double *x, double beta) + * Computes double precision z = alpha*A*x + beta*y, with A stored in Hacked Diagonal Format on GPU. + * \param handle The spgpu handle used to call this routine + * \param z The output vector of the routine. z could be y, but not y + k (i.e. an overlapping area over y, but starting from a base index different from y). + * \param y The y input vector + * \param alpha The alpha scalar + * \param dM The stacked HDIA non zero values allocation pointer + * \param offsets The stacked HDIA diagonals offsets vector + * \param hackSize The constant size of every hack (must be a multiple of 32) + * \param hackOffsets the array of base index offset for every hack of HDIA offsets vector, plus a last value equal to the size of the offsets vector + * \param rows the rows count + * \param cols the columns count + * \param x the x vector + * \param beta the beta scalar + */ +void +spgpuDhdiaspmv (spgpuHandle_t handle, + double* z, + const double *y, + double alpha, + const double* dM, + const int* offsets, + int hackSize, + const int* hackOffsets, + int rows, + int cols, + const double *x, + double beta); + + +/** +* \fn spgpuChdiaspmv (spgpuHandle_t handle, cuFloatComplex* z, const cuFloatComplex *y, cuFloatComplex alpha, const cuFloatComplex* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const cuFloatComplex *x, cuFloatComplex beta) + * Computes single precision complex z = alpha*A*x + beta*y, with A stored in Hacked Diagonal Format on GPU. + * \param handle The spgpu handle used to call this routine + * \param z The output vector of the routine. z could be y, but not y + k (i.e. an overlapping area over y, but starting from a base index different from y). + * \param y The y input vector + * \param alpha The alpha scalar + * \param dM The stacked HDIA non zero values allocation pointer + * \param offsets The stacked HDIA diagonals offsets vector + * \param hackSize The constant size of every hack (must be a multiple of 32) + * \param hackOffsets the array of base index offset for every hack of HDIA offsets vector, plus a last value equal to the size of the offsets vector + * \param rows the rows count + * \param cols the columns count + * \param x the x vector + * \param beta the beta scalar + */ +void +spgpuChdiaspmv (spgpuHandle_t handle, + cuFloatComplex* z, + const cuFloatComplex *y, + cuFloatComplex alpha, + const cuFloatComplex* dM, + const int* offsets, + int hackSize, + const int* hackOffsets, + int rows, + int cols, + const cuFloatComplex *x, + cuFloatComplex beta); + + +/** +* \fn spgpuZhdiaspmv (spgpuHandle_t handle, cuDoubleComplex* z, const cuDoubleComplex *y, cuDoubleComplex alpha, const cuDoubleComplex* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const cuDoubleComplex *x, cuDoubleComplex beta) + * Computes double precision complex z = alpha*A*x + beta*y, with A stored in Hacked Diagonal Format on GPU. + * \param handle The spgpu handle used to call this routine + * \param z The output vector of the routine. z could be y, but not y + k (i.e. an overlapping area over y, but starting from a base index different from y). + * \param y The y input vector + * \param alpha The alpha scalar + * \param dM The stacked HDIA non zero values allocation pointer + * \param offsets The stacked HDIA diagonals offsets vector + * \param hackSize The constant size of every hack (must be a multiple of 32) + * \param hackOffsets the array of base index offset for every hack of HDIA offsets vector, plus a last value equal to the size of the offsets vector + * \param rows the rows count + * \param cols the columns count + * \param x the x vector + * \param beta the beta scalar + */ +void +spgpuZhdiaspmv (spgpuHandle_t handle, + cuDoubleComplex* z, + const cuDoubleComplex *y, + cuDoubleComplex alpha, + const cuDoubleComplex* dM, + const int* offsets, + int hackSize, + const int* hackOffsets, + int rows, + int cols, + const cuDoubleComplex *x, + cuDoubleComplex beta); + +/** @}*/ + +#ifdef __cplusplus +} +#endif + diff --git a/cuda/spgpu/hdia_conv.h b/cuda/spgpu/hdia_conv.h new file mode 100644 index 00000000..c22bf990 --- /dev/null +++ b/cuda/spgpu/hdia_conv.h @@ -0,0 +1,102 @@ +#pragma once + +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2013 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "core.h" + + +/** \addtogroup conversionRoutines Conversion Routines + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +int getHdiaHacksCount(int hackSize, int rowsCount); + +void computeHdiaHackOffsets( + int *allocationHeight, + int *hackOffsets, + int hackSize, + const void* diaValues, + int diaValuesPitch, + int diagonals, + int rowsCount, + spgpuType_t valuesType); + +void diaToHdia( + void *hdiaValues, + int *hdiaOffsets, + const int *hackOffsets, + int hackSize, + const void* diaValues, + const int* diaOffsets, + int diaValuesPitch, + int diagonals, + int rowsCount, + spgpuType_t valuesType + ); + +void computeHdiaHackOffsetsFromCoo( + int *allocationHeight, + int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + int cooBaseIndex + ); + +void cooToHdia( + void *hdiaValues, + int *hdiaOffsets, + const int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType + ); + +void bcooToBhdia( + void *hdiaValues, + int *hdiaOffsets, + const int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType, + int blockSize); + +#ifdef __cplusplus +} +#endif + + +/** @}*/ diff --git a/cuda/spgpu/hell.c b/cuda/spgpu/hell.c new file mode 100644 index 00000000..07764ee3 --- /dev/null +++ b/cuda/spgpu/hell.c @@ -0,0 +1,104 @@ +#include "hell.h" +#include "hell_conv.h" + +void computeHellAllocSize( + int* allocationHeight, + int hackSize, + int rowsCount, + const int *ellRowLengths + ) +{ + int totalLen = 0; + int i; + int remainings; + int done; + int maxLen; + + for (i=0; i maxLen) + maxLen = curLen; + } + totalLen += maxLen; + } + + remainings = rowsCount % hackSize; + done = (rowsCount/hackSize)*hackSize; + maxLen = 0; + + for (i=0; i maxLen) + maxLen = curLen; + } + + *allocationHeight = totalLen + maxLen; +} + +void ellToHell( + void *hellValues, + int *hellIndices, + int* hackOffsets, + int hackSize, + + const void *ellValues, + const int *ellIndices, + int ellValuesPitch, + int ellIndicesPitch, + int *ellRowLengths, + int rowsCount, + spgpuType_t valuesType + ) +{ + + size_t elementSize = spgpuSizeOf(valuesType); + + int hacks = (rowsCount + hackSize - 1)/hackSize; + + char* currValPos = (char*)hellValues; + int* currIndPos = hellIndices; + + int hackOffset = 0; + int i; + for (i=0; i= rowsCount) + break; + + rowLen = ellRowLengths[row]; + + if (rowLen > maxLen) + maxLen = rowLen; + + for (k=0; k + +/** \addtogroup conversionRoutines Conversion Routines + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** +* \fn void computeHellAllocSize(int* allocationHeight, int hackSize, int rowsCount, const int *ellRowLengths) + * Compute the HELL format allocation's height for the Hell allocation + * (the resulting size should be allocationHeight*hackSize*sizeof(elementsType)). + * \param allocationHeight outputs the total allocation's height + * \param hackSize the hack size for this matrix (32 or 64 are good choices) + * \param rowsCount the rows count + * \param ellRowLengths the row lengths array from the ell matrix to convert +*/ +void computeHellAllocSize( + int* allocationHeight, + int hackSize, + int rowsCount, + const int *ellRowLengths + ); + +/** +* \fn void ellToHell(void *hellValues, int *hellIndices, int* hackOffsets, int hackSize, const void *ellValues, const int *ellIndices, int ellValuesPitch, int ellIndicesPitch, int *ellRowLengths, int rowsCount, spgpuType_t valuesType) + * Convert a matrix from the ELL format to the HELL format. + * \param hellValues pointer to the area that will be filled by the non zero coefficients + * \param hellIndices pointer to the area that will be filled by the non zero indices + * \param hackOffsets + * \param hackSize the hack size used to allocate hellValues and hellIndices (32 or 64 are good choices) + * \param ellValues the input matrix coefficients + * \param ellIndices the input matrix indices + * \param ellValuesPitch the input values allocation pitch (in number of elements) + * \param ellIndicesPitch the input indices allocation pitch (in number of elements) + * \param ellRowLengths the row lengths array of the input matrix + * \param rowsCount the rows count + * \param valuesType the type of hellValues and ellValues elements (i.e. SPGPU_TYPE_FLOAT or SPGPU_TYPE_DOUBLE) +*/ +void ellToHell( + void *hellValues, + int *hellIndices, + int* hackOffsets, + int hackSize, + const void *ellValues, + const int *ellIndices, + int ellValuesPitch, + int ellIndicesPitch, + int *ellRowLengths, + int rowsCount, + spgpuType_t valuesType + ); + +#ifdef __cplusplus +} +#endif + + +/** @}*/ diff --git a/cuda/spgpu/kernels/Makefile b/cuda/spgpu/kernels/Makefile new file mode 100644 index 00000000..85b97aa8 --- /dev/null +++ b/cuda/spgpu/kernels/Makefile @@ -0,0 +1,33 @@ +TOP=../../.. +include $(TOP)/Make.inc +# +# Libraries used +# +LIBDIR=$(TOP)/lib +INCDIR=$(TOP)/include +MODDIR=$(TOP)/modules +UP=.. +LIBNAME=$(UP)/libspgpu.a +CINCLUDES=-I$(INCDIR) + +OBJS=cabs.o camax.o casum.o caxpby.o caxy.o cdot.o cgath.o \ + cnrm2.o cscal.o cscat.o csetscal.o \ + dabs.o damax.o dasum.o daxpby.o daxy.o ddot.o dgath.o \ + dia_cspmv.o dia_dspmv.o dia_sspmv.o dia_zspmv.o dnrm2.o \ + dscal.o dscat.o dsetscal.o ell_ccsput.o ell_cspmv.o \ + ell_dcsput.o ell_dspmv.o ell_scsput.o ell_sspmv.o ell_zcsput.o ell_zspmv.o \ + hdia_cspmv.o hdia_dspmv.o hdia_sspmv.o hdia_zspmv.o hell_cspmv.o hell_dspmv.o \ + hell_sspmv.o hell_zspmv.o igath.o iscat.o isetscal.o sabs.o samax.o sasum.o \ + saxpby.o saxy.o sdot.o sgath.o snrm2.o sscal.o sscat.o ssetscal.o zabs.o zamax.o \ + zasum.o zaxpby.o zaxy.o zdot.o zgath.o znrm2.o zscal.o zscat.o zsetscal.o + +objs: $(OBJS) +lib: objs + ar cur $(UP)/$(LIBNAME) $(OBJS) + + +clean: + /bin/rm -fr $(OBJS) +.cu.o: + $(NVCC) $(CINCLUDES) $(CDEFINES) $(CUDEFINES) $(CUDA_INCLUDES) -c $< + diff --git a/cuda/spgpu/kernels/abs_base.cuh b/cuda/spgpu/kernels/abs_base.cuh new file mode 100644 index 00000000..d48f815d --- /dev/null +++ b/cuda/spgpu/kernels/abs_base.cuh @@ -0,0 +1,110 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_FUNC_NAME +#define GEN_SPGPU_FUNC_NAME(x) CONCAT(CONCAT(spgpu,x),abs) + +#define BLOCK_SIZE 256 + +// Define: +//#define RES_VALUE_TYPE +//#define VALUE_TYPE +//#define TYPE_SYMBOL + +#include "mathbase.cuh" + +__device__ __host__ static inline bool is_one_float(float x) { return (x==1.0f); } +__device__ __host__ static inline bool is_one_cuFloatComplex(cuFloatComplex x) { return ((x.x==1.0f)&&(x.y==0.0f));} + + +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static inline bool is_one_double(double x) { return (x==1.0); } +__device__ __host__ static inline bool is_one_cuDoubleComplex(cuDoubleComplex x) { return ((x.x==1.0)&&(x.y==0.0));} +#endif + + +__global__ void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern_alpha) + (RES_VALUE_TYPE *y, int n, RES_VALUE_TYPE alpha, VALUE_TYPE* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since y, and x are accessed with the same offset by the same thread, + // and the write to y follows the read of x, then x could be y. + + y[id] = CONCAT(RES_VALUE_TYPE, _mul)(alpha, CONCAT(VALUE_TYPE, _abs)(x[id])); + } +} + +__global__ void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern) + (RES_VALUE_TYPE *y, int n, VALUE_TYPE* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since y, and x are accessed with the same offset by the same thread, + // and the write to y follows the read of x, then x could be y. + + y[id] = CONCAT(VALUE_TYPE, _abs)(x[id]); + } +} + +void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (spgpuHandle_t handle, RES_VALUE_TYPE *y, int n, RES_VALUE_TYPE alpha, VALUE_TYPE* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + + dim3 block(BLOCK_SIZE); + dim3 grid(msize); + + + if (CONCAT(is_one_,RES_VALUE_TYPE)(alpha)) + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern)<<currentStream>>>(y, n, x); + else + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern_alpha)<<currentStream>>>(y, n, alpha, x); + +} + +void +GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + __device RES_VALUE_TYPE *y, + int n, + RES_VALUE_TYPE alpha, + __device VALUE_TYPE *x) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (n > maxNForACall) //managing large vectors + { + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, y, maxNForACall, alpha, x); + x = x + maxNForACall; + y = y + maxNForACall; + n -= maxNForACall; + } + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) (handle, y, n, alpha, x); + cudaCheckError("CUDA error on abs"); +} diff --git a/cuda/spgpu/kernels/amax_base.cuh b/cuda/spgpu/kernels/amax_base.cuh new file mode 100644 index 00000000..4d7e7d4a --- /dev/null +++ b/cuda/spgpu/kernels/amax_base.cuh @@ -0,0 +1,233 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_AMAX_NAME +#define GEN_SPGPU_AMAX_NAME(x) CONCAT(CONCAT(spgpu,x),amax) + +// Define: +//#define VALUE_TYPE +//#define TYPE_SYMBOL + +#define BLOCK_SIZE 512 + +typedef float absType_float; +typedef float absType_cuFloatComplex; +typedef double absType_double; +typedef double absType_cuDoubleComplex; + +__device__ __host__ static float zero_float() { return 0.0f; } +__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); } +__device__ __host__ static bool float_isNotZero(float x) { return x != 0.0f; } + +__device__ __host__ static float abs_float(float a) { return fabsf(a); } +__device__ __host__ static float abs_cuFloatComplex(cuFloatComplex a) { return cuCabsf(a); } + + +__device__ static float float_fma(float a, float b, float c) { return PREC_FADD(PREC_FMUL (a, b), c); } +__device__ static float float_add(float a, float b) { return PREC_FADD (a, b); } +__device__ static float float_mul(float a, float b) { return PREC_FMUL (a, b); } + +__device__ static cuFloatComplex cuFloatComplex_fma(cuFloatComplex a, cuFloatComplex b, cuFloatComplex c) { return cuCfmaf(a, b, c); } +__device__ static cuFloatComplex cuFloatComplex_add(cuFloatComplex a, cuFloatComplex b) { return cuCaddf(a, b); } +__device__ static cuFloatComplex cuFloatComplex_mul(cuFloatComplex a, cuFloatComplex b) { return cuCmulf(a, b); } + +__device__ static float readValue_float(float fetch) { return fetch; } +__device__ static cuFloatComplex readValue_cuFloatComplex(cuFloatComplex fetch) { return fetch; } + +// host or c.c >= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } + +__device__ __host__ static double abs_double(float a) { return fabs(a); } +__device__ __host__ static double abs_cuDoubleComplex(cuDoubleComplex a) { return cuCabs(a); } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); } +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif + +static __device__ CONCAT(absType_,VALUE_TYPE) CONCAT(TYPE_SYMBOL,amaxReductionResult)[128]; + +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + +static __device__ CONCAT(absType_,VALUE_TYPE) amaxvv(VALUE_TYPE a, VALUE_TYPE b) +{ + CONCAT(absType_,VALUE_TYPE) absa = CONCAT(abs_,VALUE_TYPE)(a); + CONCAT(absType_,VALUE_TYPE) absb = CONCAT(abs_,VALUE_TYPE)(b); + + return MAX(absa,absb); +} + +static __device__ CONCAT(absType_,VALUE_TYPE) amaxaa(CONCAT(absType_,VALUE_TYPE) a, CONCAT(absType_,VALUE_TYPE) b) +{ + return MAX(a,b); +} + +static __device__ CONCAT(absType_,VALUE_TYPE) amaxav(CONCAT(absType_,VALUE_TYPE) a, VALUE_TYPE b) +{ + CONCAT(absType_,VALUE_TYPE) absb = CONCAT(abs_,VALUE_TYPE)(b); + + return MAX(a,absb); +} + +__global__ void +CONCAT(GEN_SPGPU_AMAX_NAME(TYPE_SYMBOL),_kern) +(int n, VALUE_TYPE* x) +{ + __shared__ CONCAT(absType_,VALUE_TYPE) sSum[BLOCK_SIZE]; + + CONCAT(absType_,VALUE_TYPE) res = 0; + + VALUE_TYPE* lastX = x + n; + + x += threadIdx.x + blockIdx.x*BLOCK_SIZE; + + int blockOffset = gridDim.x*BLOCK_SIZE; + + while (x < lastX) + { + VALUE_TYPE x1 = x[0]; + res = amaxav(res, x1); + + x += blockOffset; + + } + + if (threadIdx.x >= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + CONCAT(absType_,VALUE_TYPE) tRes[128]; + + CONCAT(GEN_SPGPU_AMAX_NAME(TYPE_SYMBOL),_kern)<<currentStream>>>(n, x);; + cudaMemcpyFromSymbol(tRes, CONCAT(TYPE_SYMBOL,amaxReductionResult), blocks*sizeof(CONCAT(absType_,VALUE_TYPE))); + + for (int i=0; i= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } + +__device__ __host__ static double abs_double(float a) { return fabs(a); } +__device__ __host__ static double abs_cuDoubleComplex(cuDoubleComplex a) { return cuCabs(a); } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ __host__ static double double_add(double a, double b) { +#ifndef __CUDA__ARCH__ + return a + b; +#else + return PREC_DADD (a, b); +#endif +} +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif + +static __device__ CONCAT(absType_,VALUE_TYPE) CONCAT(TYPE_SYMBOL,asumReductionResult)[128]; + + +static __device__ CONCAT(absType_,VALUE_TYPE) asumvv(VALUE_TYPE a, VALUE_TYPE b) +{ + CONCAT(absType_,VALUE_TYPE) absa = CONCAT(abs_,VALUE_TYPE)(a); + CONCAT(absType_,VALUE_TYPE) absb = CONCAT(abs_,VALUE_TYPE)(b); + + return absa + absb; +} + +static __device__ __host__ CONCAT(absType_,VALUE_TYPE) asumaa(CONCAT(absType_,VALUE_TYPE) a, CONCAT(absType_,VALUE_TYPE) b) +{ + return a + b; +} + +static __device__ CONCAT(absType_,VALUE_TYPE) asumav(CONCAT(absType_,VALUE_TYPE) a, VALUE_TYPE b) +{ + CONCAT(absType_,VALUE_TYPE) absb = CONCAT(abs_,VALUE_TYPE)(b); + + return a + absb; +} + +__global__ void +CONCAT(GEN_SPGPU_ASUM_NAME(TYPE_SYMBOL),_kern) +(int n, VALUE_TYPE* x) +{ + __shared__ CONCAT(absType_,VALUE_TYPE) sSum[BLOCK_SIZE]; + + CONCAT(absType_,VALUE_TYPE) res = 0; + + VALUE_TYPE* lastX = x + n; + + x += threadIdx.x + blockIdx.x*BLOCK_SIZE; + + int blockOffset = gridDim.x*BLOCK_SIZE; + + while (x < lastX) + { + VALUE_TYPE x1 = x[0]; + res = asumav(res, x1); + + x += blockOffset; + + } + + if (threadIdx.x >= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + CONCAT(absType_,VALUE_TYPE) tRes[128]; + + CONCAT(GEN_SPGPU_ASUM_NAME(TYPE_SYMBOL),_kern)<<currentStream>>>(n, x);; + cudaMemcpyFromSymbol(tRes, CONCAT(TYPE_SYMBOL,asumReductionResult), blocks*sizeof(CONCAT(absType_,VALUE_TYPE))); + + for (int i=0; icurrentStream>>>(z, n, alpha, x, y); + +} + +void +GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + __device VALUE_TYPE *z, + int n, + VALUE_TYPE alpha, + __device VALUE_TYPE *x, + __device VALUE_TYPE *y) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (n > maxNForACall) //managing large vectors + { + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, z, maxNForACall, alpha, x, y); + x = x + maxNForACall; + y = y + maxNForACall; + z = z + maxNForACall; + n -= maxNForACall; + } + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) (handle, z, n, alpha, x, y); + cudaCheckError("CUDA error on axy"); +} + +void +GEN_SPGPU_MFUNC_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + __device VALUE_TYPE *z, + int n, + VALUE_TYPE alpha, + __device VALUE_TYPE* x, + __device VALUE_TYPE *y, + int count, + int pitch) +{ + for (int i=0; icurrentStream>>>(w, n, beta, z, alpha, x, y); + +} + +void +GEN_SPGPU_FUNC_NAME_2(TYPE_SYMBOL) + (spgpuHandle_t handle, + __device VALUE_TYPE *w, + int n, + VALUE_TYPE beta, + __device VALUE_TYPE *z, + VALUE_TYPE alpha, + __device VALUE_TYPE* x, + __device VALUE_TYPE *y + ) +{ + + if (CONCAT(VALUE_TYPE, _isZero(alpha))) + { + GEN_SPGPU_SCAL_NAME(TYPE_SYMBOL) + (handle, w, n, beta, z); + } + else if (CONCAT(VALUE_TYPE, _isZero(beta))) { + GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) + (handle, w, n, alpha, x, y); + } + else { + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + + while (n > maxNForACall) //managing large vectors + { + + CONCAT(GEN_SPGPU_FUNC_NAME_2(TYPE_SYMBOL),_) + (handle, w, maxNForACall, beta, z, alpha, x, y); + + x = x + maxNForACall; + y = y + maxNForACall; + z = z + maxNForACall; + w = w + maxNForACall; + n -= maxNForACall; + } + + CONCAT(GEN_SPGPU_FUNC_NAME_2(TYPE_SYMBOL),_) + (handle, w, n, beta, z, alpha, x, y); + } + + cudaCheckError("CUDA error on axypbz"); +} + +void +GEN_SPGPU_MFUNC_NAME_2(TYPE_SYMBOL) + (spgpuHandle_t handle, + __device VALUE_TYPE *w, + int n, + VALUE_TYPE beta, + __device VALUE_TYPE *z, + VALUE_TYPE alpha, + __device VALUE_TYPE* x, + __device VALUE_TYPE *y, + int count, + int pitch) +{ + for (int i=0; icurrentStream>>>(z, n, beta, y, alpha, x); +} + +void spgpuCaxpby(spgpuHandle_t handle, + __device cuFloatComplex *z, + int n, + cuFloatComplex beta, + __device cuFloatComplex *y, + cuFloatComplex alpha, + __device cuFloatComplex* x) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + + while (n > maxNForACall) //managing large vectors + { + spgpuCaxpby_(handle, z, maxNForACall, beta, y, alpha, x); + + x = x + maxNForACall; + y = y + maxNForACall; + z = z + maxNForACall; + n -= maxNForACall; + } + + spgpuCaxpby_(handle, z, n, beta, y, alpha, x); + + cudaCheckError("CUDA error on saxpby"); +} + +void spgpuCmaxpby(spgpuHandle_t handle, + __device cuFloatComplex *z, + int n, + cuFloatComplex beta, + __device cuFloatComplex *y, + cuFloatComplex alpha, + __device cuFloatComplex* x, + int count, int pitch) +{ + + for (int i=0; i= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + + cuFloatComplex tRes[128]; + + spgpuCdot_kern<<currentStream>>>(n, a, b); + cudaMemcpyFromSymbol(tRes, sdotReductionResult, blocks*sizeof(cuFloatComplex)); + + for (int i=0; i= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + float tRes[128]; + + spgpuCnrm2_kern<<currentStream>>>(n, x); + cudaMemcpyFromSymbol(tRes, snrm2ReductionResult,blocks*sizeof(float)); + + for (int i=0; i= 200 +#define PREC_FADD(a,b) ((a) + (b)) +#define PREC_FMUL(a,b) ((a) * (b)) +#else +#define PREC_FADD(a,b) __fadd_rn((a),(b)) +#define PREC_FMUL(a,b) __fmul_rn((a),(b)) +#endif + +#define PREC_DADD(a,b) ((a) + (b)) +#define PREC_DMUL(a,b) ((a) * (b)) + + +inline __host__ __device__ double2 make_double2(double s) +{ + return make_double2(s, s); +} + +inline __host__ __device__ double2 operator+(double2 a, double2 b) +{ + return make_double2(a.x + b.x, a.y + b.y); +} + +inline __host__ __device__ void operator+=(double2 &a, double2 b) +{ + a.x += b.x; a.y += b.y; +} + +inline __host__ __device__ double2 operator-(double2 a, double2 b) +{ + return make_double2(a.x - b.x, a.y - b.y); +} + +inline __host__ __device__ void operator-=(double2 &a, double2 b) +{ + a.x -= b.x; a.y -= b.y; +} + +inline __host__ __device__ double2 operator*(double2 a, double s) +{ + return make_double2(a.x * s, a.y * s); +} + +inline __host__ __device__ double2 operator*(double s, double2 a) +{ + return make_double2(a.x * s, a.y * s); +} + +inline __host__ __device__ void operator*=(double2 &a, double s) +{ + a.x *= s; a.y *= s; +} diff --git a/cuda/spgpu/kernels/dabs.cu b/cuda/spgpu/kernels/dabs.cu new file mode 100644 index 00000000..7fb9c074 --- /dev/null +++ b/cuda/spgpu/kernels/dabs.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE double +#define RES_VALUE_TYPE double +#define TYPE_SYMBOL D +#include "abs_base.cuh" + diff --git a/cuda/spgpu/kernels/damax.cu b/cuda/spgpu/kernels/damax.cu new file mode 100644 index 00000000..1e78cc1f --- /dev/null +++ b/cuda/spgpu/kernels/damax.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE double +#define TYPE_SYMBOL D +#include "amax_base.cuh" + diff --git a/cuda/spgpu/kernels/dasum.cu b/cuda/spgpu/kernels/dasum.cu new file mode 100644 index 00000000..71a37e80 --- /dev/null +++ b/cuda/spgpu/kernels/dasum.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE double +#define TYPE_SYMBOL D +#include "asum_base.cuh" + diff --git a/cuda/spgpu/kernels/daxpby.cu b/cuda/spgpu/kernels/daxpby.cu new file mode 100644 index 00000000..83724ce2 --- /dev/null +++ b/cuda/spgpu/kernels/daxpby.cu @@ -0,0 +1,101 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuDaxpby_krn(double *z, int n, double beta, double *y, double alpha, double* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + if (beta == 0.0) + z[id] = PREC_DMUL(alpha,x[id]); + else + z[id] = PREC_DADD(PREC_DMUL(alpha, x[id]), PREC_DMUL(beta,y[id])); + } +} + + +void spgpuDaxpby_(spgpuHandle_t handle, + __device double *z, + int n, + double beta, + __device double *y, + double alpha, + __device double* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + + dim3 block(BLOCK_SIZE); + dim3 grid(msize); + + spgpuDaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} + +void spgpuDaxpby(spgpuHandle_t handle, + __device double *z, + int n, + double beta, + __device double *y, + double alpha, + __device double* x) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (n > maxNForACall) //managing large vectors + { + spgpuDaxpby_(handle, z, maxNForACall, beta, y, alpha, x); + + x = x + maxNForACall; + y = y + maxNForACall; + z = z + maxNForACall; + n -= maxNForACall; + } + + spgpuDaxpby_(handle, z, n, beta, y, alpha, x); + + cudaCheckError("CUDA error on daxpby"); +} + +void spgpuDmaxpby(spgpuHandle_t handle, + __device double *z, + int n, + double beta, + __device double *y, + double alpha, + __device double* x, + int count, int pitch) +{ + + for (int i=0; i= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + double tRes[128]; + + spgpuDdot_kern<<currentStream>>>(n, a, b); + cudaMemcpyFromSymbol(tRes, ddotReductionResult,blocks*sizeof(double)); + + for (int i=0; i= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); } +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif + +#if 0 +// Texture cache management +texture < TEX_FETCH_TYPE, 1, cudaReadModeElementType > X_TEX; + +#define bind_tex_x(x) cudaBindTexture(NULL, X_TEX, x) +#define unbind_tex_x(x) cudaUnbindTexture(X_TEX) + +__device__ static VALUE_TYPE +fetchTex (int pointer) +{ + TEX_FETCH_TYPE fetch = tex1Dfetch (X_TEX, pointer); + return CONCAT(readValue_,VALUE_TYPE) (fetch); +} +#endif + +#define GEN_SPGPU_DIA_NAME(x) CONCAT(CONCAT(spgpu,x),diaspmv_vanilla) +#define GEN_SPGPU_DIA_NAME_VANILLA(x) CONCAT(CONCAT(spgpu,x),diaspmv_vanilla) +#include "dia_spmv_base_template.cuh" +#if 0 +#undef GEN_SPGPU_DIA_NAME +#define GEN_SPGPU_DIA_NAME(x) CONCAT(CONCAT(spgpu,x),diaspmv_prefetch) +#define GEN_SPGPU_DIA_NAME_PREFETCH(x) CONCAT(CONCAT(spgpu,x),diaspmv_prefetch) +#undef USE_PREFETCHING +#define USE_PREFETCHING +#include "dia_spmv_base_template.cuh" +#define ENABLE_CACHE +#undef ENABLE_CACHE +#undef GEN_SPGPU_DIA_NAME +#define GEN_SPGPU_DIA_NAME(x) CONCAT(CONCAT(spgpu,x),diaspmv_texcache_prefetch) +#define GEN_SPGPU_DIA_NAME_TEX_PREFETCH(x) CONCAT(CONCAT(spgpu,x),diaspmv_texcache_prefetch) +#include "dia_spmv_base_template.cuh" +#undef GEN_SPGPU_DIA_NAME +#undef USE_PREFETCHING +#define GEN_SPGPU_DIA_NAME(x) CONCAT(CONCAT(spgpu,x),diaspmv_texcache) +#define GEN_SPGPU_DIA_NAME_TEX(x) CONCAT(CONCAT(spgpu,x),diaspmv_texcache) +#include "dia_spmv_base_template.cuh" +#endif + +#undef GEN_SPGPU_DIA_NAME +#define GEN_SPGPU_DIA_NAME(x) CONCAT(CONCAT(spgpu,x),diaspmv) +void +GEN_SPGPU_DIA_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + VALUE_TYPE* z, + const VALUE_TYPE *y, + VALUE_TYPE alpha, + const VALUE_TYPE* dM, + const int* offsets, + int dMPitch, + int rows, + int cols, + int diags, + const VALUE_TYPE *x, + VALUE_TYPE beta) +{ + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + while (rows > maxNForACall) //managing large vectors + { + //if (diags < 10 && handle->capabilityMajor > 1) + // CONCAT(_,GEN_SPGPU_DIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, maxNForACall, cols, diags, x, beta); + //else + CONCAT(_,GEN_SPGPU_DIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, maxNForACall, cols, diags, x, beta); + #if 0 + if (diags < 20) + CONCAT(_,GEN_SPGPU_DIA_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, maxNForACall, cols, diags, x, beta); + else + CONCAT(_,GEN_SPGPU_DIA_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, maxNForACall, cols, diags, x, beta); +#endif + y = y + maxNForACall; + z = z + maxNForACall; + dM = dM + maxNForACall; + + rows -= maxNForACall; + } + CONCAT(_,GEN_SPGPU_DIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); +#if 0 + //if (diags < 10 && handle->capabilityMajor > 1) + // CONCAT(_,GEN_SPGPU_DIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); + //else + if (diags < 20) + CONCAT(_,GEN_SPGPU_DIA_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); + else + CONCAT(_,GEN_SPGPU_DIA_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); +#endif + cudaCheckError("CUDA error on dia_spmv"); +} + diff --git a/cuda/spgpu/kernels/dia_spmv_base_template.cuh b/cuda/spgpu/kernels/dia_spmv_base_template.cuh new file mode 100644 index 00000000..792fa7ed --- /dev/null +++ b/cuda/spgpu/kernels/dia_spmv_base_template.cuh @@ -0,0 +1,217 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define THREAD_BLOCK 128 + +__device__ void +CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* dM, const int* offsets, int dMPitch, int rows, int cols, int diags, const VALUE_TYPE *x, VALUE_TYPE beta) +{ + int i = threadIdx.x + blockIdx.x * (blockDim.x); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows && CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[i]; + + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + dM += i; + + extern __shared__ int offsetsChunk[]; + + int rounds = (diags + blockDim.x - 1)/blockDim.x; + + for (int r = 0; r < rounds; r++) + { + // in the last round diags will be <= blockDim.x + if (threadIdx.x < diags) + offsetsChunk[threadIdx.x] = offsets[threadIdx.x]; + + __syncthreads(); + + if (i < rows) + { + int count = min(diags, blockDim.x ); + + +#ifdef USE_PREFETCHING + int j; + for (j=0; j<=count-3; j += 3) + { + // Prefetch 3 values + int column1 = offsetsChunk[j] + i; + int column2 = offsetsChunk[j+1] + i; + int column3 = offsetsChunk[j+2] + i; + + bool inside1 = column1 >= 0 && column1 < cols; + bool inside2 = column2 >= 0 && column2 < cols; + bool inside3 = column3 >= 0 && column3 < cols; + + // Anticipate global memory read + + VALUE_TYPE xValue1, xValue2, xValue3; + VALUE_TYPE mValue1, mValue2, mValue3; + + if(inside1) + { + mValue1 = dM[0]; +#ifdef ENABLE_CACHE + xValue1 = fetchTex (column1); +#else + xValue1 = x[column1]; +#endif + } + dM += dMPitch; + + if(inside2) + { + mValue2 = dM[0]; +#ifdef ENABLE_CACHE + xValue2 = fetchTex (column2); +#else + xValue2 = x[column2]; +#endif + } + dM += dMPitch; + + if(inside3) + { + mValue3 = dM[0]; +#ifdef ENABLE_CACHE + xValue3 = fetchTex (column3); +#else + xValue3 = x[column3]; +#endif + } + dM += dMPitch; + + if(inside1) + zProd = CONCAT(VALUE_TYPE, _fma)(mValue1, xValue1, zProd); + if(inside2) + zProd = CONCAT(VALUE_TYPE, _fma)(mValue2, xValue2, zProd); + if(inside3) + zProd = CONCAT(VALUE_TYPE, _fma)(mValue3, xValue3, zProd); + } + + for (;j= 0 && column < cols) + { + VALUE_TYPE xValue; +#ifdef ENABLE_CACHE + xValue = fetchTex (column); +#else + xValue = x[column]; +#endif + VALUE_TYPE mValue = dM[0]; + zProd = CONCAT(VALUE_TYPE, _fma)(mValue, xValue, zProd); + + } + + dM += dMPitch; + } +#else + for (int j=0; j= 0 && column < cols) + { + + VALUE_TYPE xValue; + +#ifdef ENABLE_CACHE + xValue = fetchTex (column); +#else + xValue = x[column]; +#endif + + VALUE_TYPE mValue = dM[0]; + zProd = CONCAT(VALUE_TYPE, _fma)(mValue, xValue, zProd); + } + + dM += dMPitch; + } +#endif + } + + diags -= blockDim.x; + offsets += blockDim.x; + __syncthreads(); + } + + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (i >= rows) + return; + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[i] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[i] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); +} + +// Force to recompile and optimize with llvm +__global__ void +CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _krn_b0) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* dM, const int* offsets, int dMPitch, int rows, int cols, int diags, const VALUE_TYPE *x) +{ + CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _) + (z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, CONCAT(zero_,VALUE_TYPE)()); +} + +__global__ void +CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _krn) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* dM, const int* offsets, int dMPitch, int rows, int cols, int diags, const VALUE_TYPE *x, VALUE_TYPE beta) +{ + CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _) + (z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); +} + +void +CONCAT(_,GEN_SPGPU_DIA_NAME(TYPE_SYMBOL)) +(spgpuHandle_t handle, VALUE_TYPE* z, const VALUE_TYPE *y, VALUE_TYPE alpha, + const VALUE_TYPE* dM, const int* offsets, int dMPitch, int rows, int cols, int diags, + const VALUE_TYPE *x, VALUE_TYPE beta) +{ + dim3 block (THREAD_BLOCK ); + dim3 grid ((rows + THREAD_BLOCK - 1) / THREAD_BLOCK ); + +#ifdef ENABLE_CACHE + bind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _krn), cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _krn_b0), cudaFuncCachePreferL1); + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _krn) + <<< grid, block, block.x*sizeof(int), handle->currentStream >>> (z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); + else + CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _krn_b0) <<< grid, block, block.x*sizeof(int), handle->currentStream >>> (z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x); + +#ifdef ENABLE_CACHE + unbind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + +} + diff --git a/cuda/spgpu/kernels/dia_sspmv.cu b/cuda/spgpu/kernels/dia_sspmv.cu new file mode 100644 index 00000000..2870d780 --- /dev/null +++ b/cuda/spgpu/kernels/dia_sspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "dia.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#define TEX_FETCH_TYPE float +#include "dia_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/dia_zspmv.cu b/cuda/spgpu/kernels/dia_zspmv.cu new file mode 100644 index 00000000..7e2fe68f --- /dev/null +++ b/cuda/spgpu/kernels/dia_zspmv.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + +extern "C" +{ +#include "core.h" +#include "dia.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#define TEX_FETCH_TYPE int4 +#include "dia_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/dnrm2.cu b/cuda/spgpu/kernels/dnrm2.cu new file mode 100644 index 00000000..450882c4 --- /dev/null +++ b/cuda/spgpu/kernels/dnrm2.cu @@ -0,0 +1,157 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudalang.h" +#include "cudadebug.h" + + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + + +//#define USE_CUBLAS + +//#define ASSUME_LOCK_SYNC_PARALLELISM + + +#define BLOCK_SIZE 512 + +static __device__ double dnrm2ReductionResult[128]; + +__global__ void spgpuDnrm2_kern(int n, double* x) +{ + __shared__ double sSum[BLOCK_SIZE]; + + double res = 0; + + double* lastX = x + n; + + x += threadIdx.x + blockIdx.x*BLOCK_SIZE; + + int blockOffset = gridDim.x*BLOCK_SIZE; + + while (x < lastX) + { + double x1 = x[0]; + res = PREC_DADD(res, PREC_DMUL(x1, x1)); + + x += blockOffset; + + } + + if (threadIdx.x >= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + double tRes[128]; + + spgpuDnrm2_kern<<currentStream>>>(n, x);; + cudaMemcpyFromSymbol(tRes, dnrm2ReductionResult,blocks*sizeof(double)); + + for (int i=0; icurrentStream >>> (alpha, cM, rP, cMPitch, rPPitch, rS, nnz, aI, aJ, aVal, baseIndex); +} + +void +GEN_SPGPU_ELL_NAME(TYPE_SYMBOL) + (spgpuHandle_t handle, + VALUE_TYPE alpha, + VALUE_TYPE* cM, + const int* rP, + int cMPitch, + int rPPitch, + const int* rS, + int nnz, + int *aI, + int *aJ, + VALUE_TYPE *aVal, + int baseIndex) +{ + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + while (nnz > maxNForACall) //managing large vectors + { + CONCAT(_,GEN_SPGPU_ELL_NAME(TYPE_SYMBOL)) + (handle, alpha, cM, rP, cMPitch, rPPitch, rS, maxNForACall, aI, aJ, aVal, baseIndex); + + aI = aI + maxNForACall; + aJ = aJ + maxNForACall; + aVal = aVal + maxNForACall; + + nnz -= maxNForACall; + } + + CONCAT(_,GEN_SPGPU_ELL_NAME(TYPE_SYMBOL)) + (handle, alpha, cM, rP, cMPitch, rPPitch, rS, nnz, aI, aJ, aVal, baseIndex); + + cudaCheckError("CUDA error on ell_csput"); +} + diff --git a/cuda/spgpu/kernels/ell_dcsput.cu b/cuda/spgpu/kernels/ell_dcsput.cu new file mode 100644 index 00000000..00e60565 --- /dev/null +++ b/cuda/spgpu/kernels/ell_dcsput.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE double +#define TYPE_SYMBOL D +#include "ell_csput_base.cuh" + diff --git a/cuda/spgpu/kernels/ell_dspmv.cu b/cuda/spgpu/kernels/ell_dspmv.cu new file mode 100644 index 00000000..4e682772 --- /dev/null +++ b/cuda/spgpu/kernels/ell_dspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE double +#define TYPE_SYMBOL D +#define TEX_FETCH_TYPE int2 +#include "ell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/ell_scsput.cu b/cuda/spgpu/kernels/ell_scsput.cu new file mode 100644 index 00000000..eeb5c6bb --- /dev/null +++ b/cuda/spgpu/kernels/ell_scsput.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#include "ell_csput_base.cuh" + diff --git a/cuda/spgpu/kernels/ell_spmv_base.cuh b/cuda/spgpu/kernels/ell_spmv_base.cuh new file mode 100644 index 00000000..e2af5896 --- /dev/null +++ b/cuda/spgpu/kernels/ell_spmv_base.cuh @@ -0,0 +1,154 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_ELL_NAME +#undef X_TEX +#define X_TEX CONCAT(x_tex_, FUNC_SUFFIX) + +__device__ __host__ static float zero_float() { return 0.0f; } +__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); } +__device__ __host__ static bool float_isNotZero(float x) { return x != 0.0f; } + +__device__ static float float_fma(float a, float b, float c) { return PREC_FADD(PREC_FMUL (a, b), c); } +__device__ static float float_add(float a, float b) { return PREC_FADD (a, b); } +__device__ static float float_mul(float a, float b) { return PREC_FMUL (a, b); } + +__device__ static cuFloatComplex cuFloatComplex_fma(cuFloatComplex a, cuFloatComplex b, cuFloatComplex c) { return cuCfmaf(a, b, c); } +__device__ static cuFloatComplex cuFloatComplex_add(cuFloatComplex a, cuFloatComplex b) { return cuCaddf(a, b); } +__device__ static cuFloatComplex cuFloatComplex_mul(cuFloatComplex a, cuFloatComplex b) { return cuCmulf(a, b); } + +__device__ static float readValue_float(float fetch) { return fetch; } +__device__ static cuFloatComplex readValue_cuFloatComplex(cuFloatComplex fetch) { return fetch; } + +// host or c.c >= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); } +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif + +#if 0 +// Texture cache management +texture < TEX_FETCH_TYPE, 1, cudaReadModeElementType > X_TEX; + +#define bind_tex_x(x) cudaBindTexture(NULL, X_TEX, x) +#define unbind_tex_x(x) cudaUnbindTexture(X_TEX) + +__device__ static VALUE_TYPE +fetchTex (int pointer) +{ + TEX_FETCH_TYPE fetch = tex1Dfetch (X_TEX, pointer); + return CONCAT(readValue_,VALUE_TYPE) (fetch); +} +#endif +#define GEN_SPGPU_ELL_NAME(x) CONCAT(CONCAT(spgpu,x),ellspmv_vanilla) +#define GEN_SPGPU_ELL_NAME_VANILLA(x) CONCAT(CONCAT(spgpu,x),ellspmv_vanilla) +#include "ell_spmv_base_template.cuh" +#if 0 +#undef GEN_SPGPU_ELL_NAME +#define GEN_SPGPU_ELL_NAME(x) CONCAT(CONCAT(spgpu,x),ellspmv_prefetch) +#define GEN_SPGPU_ELL_NAME_PREFETCH(x) CONCAT(CONCAT(spgpu,x),ellspmv_prefetch) +#undef USE_PREFETCHING +#define USE_PREFETCHING +#include "ell_spmv_base_template.cuh" +#define ENABLE_CACHE +#undef GEN_SPGPU_ELL_NAME +#define GEN_SPGPU_ELL_NAME(x) CONCAT(CONCAT(spgpu,x),ellspmv_texcache_prefetch) +#define GEN_SPGPU_ELL_NAME_TEX_PREFETCH(x) CONCAT(CONCAT(spgpu,x),ellspmv_texcache_prefetch) +#include "ell_spmv_base_template.cuh" +#undef GEN_SPGPU_ELL_NAME +#undef USE_PREFETCHING +#define GEN_SPGPU_ELL_NAME(x) CONCAT(CONCAT(spgpu,x),ellspmv_texcache) +#define GEN_SPGPU_ELL_NAME_TEX(x) CONCAT(CONCAT(spgpu,x),ellspmv_texcache) +#include "ell_spmv_base_template.cuh" +#endif +#undef GEN_SPGPU_ELL_NAME +#define GEN_SPGPU_ELL_NAME(x) CONCAT(CONCAT(spgpu,x),ellspmv) +void +GEN_SPGPU_ELL_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + VALUE_TYPE* z, + const VALUE_TYPE *y, + VALUE_TYPE alpha, + const VALUE_TYPE* cM, + const int* rP, + int cMPitch, + int rPPitch, + const int* rS, + const __device int* rIdx, + int avgNnzPerRow, + int maxNnzPerRow, + int rows, + const VALUE_TYPE *x, + VALUE_TYPE beta, + int baseIndex) +{ + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + while (rows > maxNForACall) //managing large vectors + { +#if 0 + if (avgNnzPerRow < 10 && handle->capabilityMajor > 1) +#endif + CONCAT(_,GEN_SPGPU_ELL_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, avgNnzPerRow, maxNnzPerRow, maxNForACall, x, beta, baseIndex); +#if 0 + else if (avgNnzPerRow < 20) + CONCAT(_,GEN_SPGPU_ELL_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, avgNnzPerRow, maxNnzPerRow, maxNForACall, x, beta, baseIndex); + else + CONCAT(_,GEN_SPGPU_ELL_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, avgNnzPerRow, maxNnzPerRow, maxNForACall, x, beta, baseIndex); +#endif + y = y + maxNForACall; + z = z + maxNForACall; + cM = cM + maxNForACall; + rP = rP + maxNForACall; + rS = rS + maxNForACall; + + rows -= maxNForACall; + } +#if 0 + if (avgNnzPerRow < 10 && handle->capabilityMajor > 1) +#endif + CONCAT(_,GEN_SPGPU_ELL_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, avgNnzPerRow, maxNnzPerRow, rows, x, beta, baseIndex); + #if 0 + else if (avgNnzPerRow < 20) + CONCAT(_,GEN_SPGPU_ELL_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, avgNnzPerRow, maxNnzPerRow, rows, x, beta, baseIndex); + else + CONCAT(_,GEN_SPGPU_ELL_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, avgNnzPerRow, maxNnzPerRow, rows, x, beta, baseIndex); +#endif + cudaCheckError("CUDA error on ell_spmv"); +} + diff --git a/cuda/spgpu/kernels/ell_spmv_base_nors.cuh b/cuda/spgpu/kernels/ell_spmv_base_nors.cuh new file mode 100644 index 00000000..8cd89704 --- /dev/null +++ b/cuda/spgpu/kernels/ell_spmv_base_nors.cuh @@ -0,0 +1,340 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_4_noRs) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, int rows, int maxNnzPerRow, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + __shared__ VALUE_TYPE temp[2][THREAD_BLOCK+1]; + + if (i < rows) + { + rP += i; cM += i; + + int rowSizeM = maxNnzPerRow / 4; + + + if ((maxNnzPerRow % 4) > threadIdx.y) + ++rowSizeM; + + rP += threadIdx.y*rPPitch; + cM += threadIdx.y*cMPitch; + + + for (int j = 0; j < rowSizeM; j++) + { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += 4*rPPitch; + + value = cM[0]; + cM += 4*cMPitch; + +#ifdef ENABLE_CACHE + fetch = fetchTex(pointer); +#else + fetch = x[pointer]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } + + // Reduction + if (threadIdx.y > 1) + temp[threadIdx.y - 2][threadIdx.x] = zProd; + } + + __syncthreads(); + + if (i < rows) + { + if (threadIdx.y <= 1) + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[threadIdx.y][threadIdx.x]); + + if (threadIdx.y == 1) + temp[1][threadIdx.x] = zProd; + } + + __syncthreads(); + + if (i < rows) + { + if (threadIdx.y == 0) + { + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[1][threadIdx.x]); + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } + } +} + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_2_noRs) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, int maxNnzPerRow, const int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + __shared__ VALUE_TYPE temp[THREAD_BLOCK]; + + if (i < rows) + { + rP += i; cM += i; + + int rowSizeM = maxNnzPerRow / 2; + + if (threadIdx.y == 0) + { + if (maxNnzPerRow % 2) + ++rowSizeM; + } + else + { + rP += rPPitch; + cM += cMPitch; + } + + + for (int j = 0; j < rowSizeM; j++) + { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += rPPitch; + rP += rPPitch; + + value = cM[0]; + cM += cMPitch; + cM += cMPitch; + +#ifdef ENABLE_CACHE + fetch = fetchTex(pointer); +#else + fetch = x[pointer]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } + + // Reduction + if (threadIdx.y == 1) + temp[threadIdx.x] = zProd; + } + + __syncthreads(); + + if (i < rows) + { + if (threadIdx.y == 0) + { + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[threadIdx.x]); + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } + } +} + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_noRs) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, int maxNnzPerRow, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows) + { + rP += i; cM += i; + +#ifdef USE_PREFETCHING + for (int j = 0; j < maxNnzPerRow / 2; j++) + { + int pointers1, pointers2; + VALUE_TYPE values1, values2; + VALUE_TYPE fetches1, fetches2; + + pointers1 = rP[0] - baseIndex; + rP += rPPitch; + pointers2 = rP[0] - baseIndex; + rP += rPPitch; + + values1 = cM[0]; + cM += cMPitch; + + values2 = cM[0]; + cM += cMPitch; + +#ifdef ENABLE_CACHE + fetches1 = fetchTex(pointers1); + fetches2 = fetchTex(pointers2); +#else + fetches1 = x[pointers1]; + fetches2 = x[pointers2]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(values1, fetches1, zProd); + zProd = CONCAT(VALUE_TYPE, _fma)(values2, fetches2, zProd); + } + + // odd row size + if (maxNnzPerRow % 2) + { + int pointer = rP[0] - baseIndex; + VALUE_TYPE value = cM[0]; + VALUE_TYPE fetch; + +#ifdef ENABLE_CACHE + fetch = fetchTex (pointer); +#else + fetch = x[pointer]; +#endif + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } +#else + for (int j = 0; j < maxNnzPerRow; j++) + { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += rPPitch; + + value = cM[0]; + cM += cMPitch; + +#ifdef ENABLE_CACHE + fetch = fetchTex (pointer); +#else + fetch = x[pointer]; +#endif + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } +#endif + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } +} + +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_ridx_noRs) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rIdx, int maxNnzPerRow, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + int outRow = 0; + if (i < rows) + { + + outRow = rIdx[i]; + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[outRow]; + } + + if (blockDim.y == 1) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_noRs) + (i, yVal, outRow, z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + else //if (blockDim.y == 2) + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_2_noRs) + (i, yVal, outRow, z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + /* + else if (blockDim.y == 4) + + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_4_noRs) + (i, yVal, outRow, z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + */ +} + + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _noRs) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, int maxNnzPerRow, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows) + { + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[i]; + + } + + if (blockDim.y == 1) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_noRs) + (i, yVal, i, z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + + else //if (blockDim.y == 2) + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_2_noRs) + (i, yVal, i, z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + /* + else if (blockDim.y == 4) + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_4_noRs) + (i, yVal, i, z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + */ + +} + +// Force to recompile and optimize with llvm +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_b0_noRs) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, int maxNnzPerRow, int rows, const VALUE_TYPE *x, int baseIndex) +{ + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _noRs) + (z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, CONCAT(zero_,VALUE_TYPE)(), baseIndex); +} + +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_noRs) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, int maxNnzPerRow, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _noRs) + (z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); +} diff --git a/cuda/spgpu/kernels/ell_spmv_base_template.cuh b/cuda/spgpu/kernels/ell_spmv_base_template.cuh new file mode 100644 index 00000000..fa39d8a6 --- /dev/null +++ b/cuda/spgpu/kernels/ell_spmv_base_template.cuh @@ -0,0 +1,426 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define THREAD_BLOCK 128 + +#include "ell_spmv_base_nors.cuh" + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_4) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + __shared__ VALUE_TYPE temp[2][THREAD_BLOCK+1]; + + if (i < rows) + { + rS += i; rP += i; cM += i; + + int rowSize = rS[0]; + int rowSizeM = rowSize / 4; + + + if ((rowSize % 4) > threadIdx.y) + ++rowSizeM; + + rP += threadIdx.y*rPPitch; + cM += threadIdx.y*cMPitch; + + + for (int j = 0; j < rowSizeM; j++) + { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += 4*rPPitch; + + value = cM[0]; + cM += 4*cMPitch; + +#ifdef ENABLE_CACHE + fetch = fetchTex(pointer); +#else + fetch = x[pointer]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } + + // Reduction + if (threadIdx.y > 1) + temp[threadIdx.y - 2][threadIdx.x] = zProd; + } + + __syncthreads(); + + if (i < rows) + { + if (threadIdx.y <= 1) + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[threadIdx.y][threadIdx.x]); + + if (threadIdx.y == 1) + temp[1][threadIdx.x] = zProd; + } + + __syncthreads(); + + if (i < rows) + { + if (threadIdx.y == 0) + { + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[1][threadIdx.x]); + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } + } +} + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_2) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + __shared__ VALUE_TYPE temp[THREAD_BLOCK]; + + if (i < rows) + { + rS += i; rP += i; cM += i; + + int rowSize = rS[0]; + int rowSizeM = rowSize / 2; + + if (threadIdx.y == 0) + { + if (rowSize % 2) + ++rowSizeM; + } + else + { + rP += rPPitch; + cM += cMPitch; + } + + + for (int j = 0; j < rowSizeM; j++) + { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += rPPitch; + rP += rPPitch; + + value = cM[0]; + cM += cMPitch; + cM += cMPitch; + +#ifdef ENABLE_CACHE + fetch = fetchTex(pointer); +#else + fetch = x[pointer]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } + + // Reduction + if (threadIdx.y == 1) + temp[threadIdx.x] = zProd; + } + + __syncthreads(); + + if (i < rows) + { + if (threadIdx.y == 0) + { + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[threadIdx.x]); + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } + } +} + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows) + { + rS += i; rP += i; cM += i; + + int rowSize = rS[0]; + +#ifdef USE_PREFETCHING + for (int j = 0; j < rowSize / 2; j++) + { + int pointers1, pointers2; + VALUE_TYPE values1, values2; + VALUE_TYPE fetches1, fetches2; + + pointers1 = rP[0] - baseIndex; + rP += rPPitch; + pointers2 = rP[0] - baseIndex; + rP += rPPitch; + + values1 = cM[0]; + cM += cMPitch; + + values2 = cM[0]; + cM += cMPitch; + +#ifdef ENABLE_CACHE + fetches1 = fetchTex(pointers1); + fetches2 = fetchTex(pointers2); +#else + fetches1 = x[pointers1]; + fetches2 = x[pointers2]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(values1, fetches1, zProd); + zProd = CONCAT(VALUE_TYPE, _fma)(values2, fetches2, zProd); + } + + // odd row size + if (rowSize % 2) + { + int pointer = rP[0] - baseIndex; + VALUE_TYPE value = cM[0]; + VALUE_TYPE fetch; + +#ifdef ENABLE_CACHE + fetch = fetchTex (pointer); +#else + fetch = x[pointer]; +#endif + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } +#else + for (int j = 0; j < rowSize; j++) + { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += rPPitch; + + value = cM[0]; + cM += cMPitch; + +#ifdef ENABLE_CACHE + fetch = fetchTex (pointer); +#else + fetch = x[pointer]; +#endif + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } +#endif + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } +} + +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_ridx) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, const int* rIdx, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + int outRow = 0; + if (i < rows) + { + + outRow = rIdx[i]; + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[outRow]; + } + + if (blockDim.y == 1) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx) + (i, yVal, outRow, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + else //if (blockDim.y == 2) + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_2) + (i, yVal, outRow, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + /* + else if (blockDim.y == 4) + + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_4) + (i, yVal, outRow, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + */ +} + + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows) + { + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[i]; + + } + + if (blockDim.y == 1) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx) + (i, yVal, i, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + + else //if (blockDim.y == 2) + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_2) + (i, yVal, i, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + /* + else if (blockDim.y == 4) + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_4) + (i, yVal, i, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + */ + +} + +// Force to recompile and optimize with llvm +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_b0) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, int rows, const VALUE_TYPE *x, int baseIndex) +{ + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _) + (z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, CONCAT(zero_,VALUE_TYPE)(), baseIndex); +} + +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _) + (z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); +} + +void +CONCAT(_,GEN_SPGPU_ELL_NAME(TYPE_SYMBOL)) +(spgpuHandle_t handle, VALUE_TYPE* z, const VALUE_TYPE *y, VALUE_TYPE alpha, + const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, + const __device int* rIdx, int avgNnzPerRow, int maxNnzPerRow, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int avgThreshold; + + if (handle->capabilityMajor < 2) + avgThreshold = 8; + else if (handle->capabilityMajor < 3) + avgThreshold = 16; + else + avgThreshold = 32; + +#if defined(ELL_FORCE_THREADS_1) + dim3 block (THREAD_BLOCK, 1); +#elif defined(ELL_FORCE_THREADS_2) + dim3 block (THREAD_BLOCK, 2); +#else + dim3 block (THREAD_BLOCK, avgNnzPerRow >= avgThreshold ? 2 : 1); +#endif + + dim3 grid ((rows + THREAD_BLOCK - 1) / THREAD_BLOCK); + +#ifdef ENABLE_CACHE + bind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + + if (rIdx) + { + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_ridx), cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_ridx_noRs), cudaFuncCachePreferL1); + + if (rS) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_ridx) + <<< grid, block, 0, handle->currentStream >>> (z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, rows, x, beta, baseIndex); + else + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_ridx_noRs) + <<< grid, block, 0, handle->currentStream >>> (z, y, alpha, cM, rP, cMPitch, rPPitch, rIdx, maxNnzPerRow, rows, x, beta, baseIndex); + } + else + { + + + if (rS) + { + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn), cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_b0), cudaFuncCachePreferL1); + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn) + <<< grid, block, 0, handle->currentStream >>> (z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + else + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_b0) + <<< grid, block, 0, handle->currentStream >>> (z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, baseIndex); + } + else + { + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_noRs), cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_b0_noRs), cudaFuncCachePreferL1); + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_noRs) + <<< grid, block, 0, handle->currentStream >>> (z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + else + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_b0_noRs) + <<< grid, block, 0, handle->currentStream >>> (z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, baseIndex); + } + } + +#ifdef ENABLE_CACHE + unbind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + +} + diff --git a/cuda/spgpu/kernels/ell_sspmv.cu b/cuda/spgpu/kernels/ell_sspmv.cu new file mode 100644 index 00000000..e64e03e8 --- /dev/null +++ b/cuda/spgpu/kernels/ell_sspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#define TEX_FETCH_TYPE float +#include "ell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/ell_zcsput.cu b/cuda/spgpu/kernels/ell_zcsput.cu new file mode 100644 index 00000000..3d0f3293 --- /dev/null +++ b/cuda/spgpu/kernels/ell_zcsput.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + +extern "C" +{ +#include "core.h" +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#include "ell_csput_base.cuh" + diff --git a/cuda/spgpu/kernels/ell_zspmv.cu b/cuda/spgpu/kernels/ell_zspmv.cu new file mode 100644 index 00000000..a9c0fe64 --- /dev/null +++ b/cuda/spgpu/kernels/ell_zspmv.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + +extern "C" +{ +#include "core.h" +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#define TEX_FETCH_TYPE int4 +#include "ell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/gath_base.cuh b/cuda/spgpu/kernels/gath_base.cuh new file mode 100644 index 00000000..a0e77b44 --- /dev/null +++ b/cuda/spgpu/kernels/gath_base.cuh @@ -0,0 +1,86 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_FUNC_NAME +#define GEN_SPGPU_FUNC_NAME(x) CONCAT(CONCAT(spgpu,x),gath) + +#define BLOCK_SIZE 256 + +// Define: +//#define VALUE_TYPE +//#define TYPE_SYMBOL + +#include "mathbase.cuh" + +__global__ void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern) + (VALUE_TYPE* values, int count, const int* indices, int firstIndex, const VALUE_TYPE* vector) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < count) + { + int pos = indices[id]-firstIndex; + + if (pos < 0) + return; + + values[id] = vector[pos]; + } +} + +void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (spgpuHandle_t handle, VALUE_TYPE *xValues, int xNnz, + const __device int *xIndices, int xBaseIndex, const VALUE_TYPE* y) +{ + int msize = (xNnz+BLOCK_SIZE-1)/BLOCK_SIZE; + + dim3 block(BLOCK_SIZE); + dim3 grid(msize); + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern)<<currentStream>>>(xValues, xNnz, xIndices, xBaseIndex, y); + +} + +void +GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + __device VALUE_TYPE *xValues, + int xNnz, + const __device int *xIndices, + int xBaseIndex, + const __device VALUE_TYPE* y) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (xNnz > maxNForACall) //managing large vectors + { + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, xValues, maxNForACall, xIndices, xBaseIndex, y); + xIndices += maxNForACall; + xValues += maxNForACall; + xNnz -= maxNForACall; + } + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, xValues, xNnz, xIndices, xBaseIndex, y); + cudaCheckError("CUDA error on gath"); +} + diff --git a/cuda/spgpu/kernels/hdia_cspmv.cu b/cuda/spgpu/kernels/hdia_cspmv.cu new file mode 100644 index 00000000..52360b40 --- /dev/null +++ b/cuda/spgpu/kernels/hdia_cspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + +extern "C" +{ +#include "core.h" +#include "hdia.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuFloatComplex +#define TYPE_SYMBOL C +#define TEX_FETCH_TYPE cuFloatComplex +#include "hdia_spmv_base.cuh" diff --git a/cuda/spgpu/kernels/hdia_dspmv.cu b/cuda/spgpu/kernels/hdia_dspmv.cu new file mode 100644 index 00000000..5fdb08e9 --- /dev/null +++ b/cuda/spgpu/kernels/hdia_dspmv.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "hdia.h" +} + +#include "debug.h" + +//#define ENABLE_CACHE +#define VALUE_TYPE double +#define TYPE_SYMBOL D +//#define TEX_FETCH_TYPE int2 +#include "hdia_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/hdia_spmv_base.cuh b/cuda/spgpu/kernels/hdia_spmv_base.cuh new file mode 100644 index 00000000..2c8cfbaf --- /dev/null +++ b/cuda/spgpu/kernels/hdia_spmv_base.cuh @@ -0,0 +1,149 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_HDIA_NAME +#undef X_TEX +#define X_TEX CONCAT(x_tex_, FUNC_SUFFIX) + +__device__ __host__ static float zero_float() { return 0.0f; } +__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); } +__device__ __host__ static bool float_isNotZero(float x) { return x != 0.0f; } + +__device__ static float float_fma(float a, float b, float c) { return PREC_FADD(PREC_FMUL (a, b), c); } +__device__ static float float_add(float a, float b) { return PREC_FADD (a, b); } +__device__ static float float_mul(float a, float b) { return PREC_FMUL (a, b); } + +__device__ static cuFloatComplex cuFloatComplex_fma(cuFloatComplex a, cuFloatComplex b, cuFloatComplex c) { return cuCfmaf(a, b, c); } +__device__ static cuFloatComplex cuFloatComplex_add(cuFloatComplex a, cuFloatComplex b) { return cuCaddf(a, b); } +__device__ static cuFloatComplex cuFloatComplex_mul(cuFloatComplex a, cuFloatComplex b) { return cuCmulf(a, b); } + +__device__ static float readValue_float(float fetch) { return fetch; } +__device__ static cuFloatComplex readValue_cuFloatComplex(cuFloatComplex fetch) { return fetch; } + +// host or c.c >= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); } +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif +#if 0 +// Texture cache management +texture < TEX_FETCH_TYPE, 1, cudaReadModeElementType > X_TEX; + +#define bind_tex_x(x) cudaBindTexture(NULL, X_TEX, x) +#define unbind_tex_x(x) cudaUnbindTexture(X_TEX) + +__device__ static VALUE_TYPE +fetchTex (int pointer) +{ + TEX_FETCH_TYPE fetch = tex1Dfetch (X_TEX, pointer); + return CONCAT(readValue_,VALUE_TYPE) (fetch); +} +#endif +#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_vanilla) +#define GEN_SPGPU_HDIA_NAME_VANILLA(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_vanilla) +#include "hdia_spmv_base_template.cuh" +#if 0 +#undef GEN_SPGPU_HDIA_NAME +#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_prefetch) +#define GEN_SPGPU_HDIA_NAME_PREFETCH(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_prefetch) +#undef USE_PREFETCHING +#define USE_PREFETCHING +#include "hdia_spmv_base_template.cuh" +#define ENABLE_CACHE +#undef GEN_SPGPU_HDIA_NAME +#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_texcache_prefetch) +#define GEN_SPGPU_HDIA_NAME_TEX_PREFETCH(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_texcache_prefetch) +#include "hdia_spmv_base_template.cuh" +#undef GEN_SPGPU_HDIA_NAME +#undef USE_PREFETCHING +#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_texcache) +#define GEN_SPGPU_HDIA_NAME_TEX(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_texcache) +#include "hdia_spmv_base_template.cuh" +#endif +#undef GEN_SPGPU_HDIA_NAME +#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv) +void +GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + VALUE_TYPE* z, + const VALUE_TYPE *y, + VALUE_TYPE alpha, + const VALUE_TYPE* dM, + const int* offsets, + int hackSize, + const int* hackOffsets, + int rows, + int cols, + const VALUE_TYPE *x, + VALUE_TYPE beta) +{ + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + // maxNForACall should be a multiple of hackSize + maxNForACall = (maxNForACall/hackSize)*hackSize; + + while (rows > maxNForACall) //managing large vectors + { + + CONCAT(_,GEN_SPGPU_HDIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, maxNForACall, cols, x, beta); + //if (avgDiags < 10 && handle->capabilityMajor > 1) + // CONCAT(_,GEN_SPGPU_HDIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, maxNForACall, cols, x, beta); + //else + //if (avgDiags < 20) + // CONCAT(_,GEN_SPGPU_HDIA_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, maxNForACall, cols, x, beta); + //else + //CONCAT(_,GEN_SPGPU_HDIA_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, maxNForACall, cols, x, beta); + + y = y + maxNForACall; + z = z + maxNForACall; + hackOffsets += maxNForACall/hackSize; + + rows -= maxNForACall; + } + CONCAT(_,GEN_SPGPU_HDIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, beta); + //if (avgDiags < 10 && handle->capabilityMajor > 1) + // CONCAT(_,GEN_SPGPU_HDIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); + //else + //if (avgDiags < 20) + // CONCAT(_,GEN_SPGPU_HDIA_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, beta); + //else + //CONCAT(_,GEN_SPGPU_HDIA_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, beta); + + cudaCheckError("CUDA error on hdia_spmv"); +} + diff --git a/cuda/spgpu/kernels/hdia_spmv_base_template.cuh b/cuda/spgpu/kernels/hdia_spmv_base_template.cuh new file mode 100644 index 00000000..155179fd --- /dev/null +++ b/cuda/spgpu/kernels/hdia_spmv_base_template.cuh @@ -0,0 +1,253 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define THREAD_BLOCK 128 + +__device__ void +CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* dM, const int* offsets, int hackSize, const int* hackOffsets, + int rows, int cols, const VALUE_TYPE *x, VALUE_TYPE beta, int hackCount) +{ + int i = threadIdx.x + blockIdx.x * (blockDim.x); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows && CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[i]; + + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + int hackId = i / hackSize; + int hackLaneId = i % hackSize; + + + // shared between offsetsChunks and warpHackOffsetTemp + extern __shared__ int dynShrMem[]; + + int hackOffset = 0; + int nextOffset = 0; + + unsigned int laneId = threadIdx.x % warpSize; + unsigned int warpId = threadIdx.x / warpSize; + +#if __CUDA_ARCH__ < 300 + int* warpHackOffset = dynShrMem; + + + if (laneId == 0 && i < rows) + { + warpHackOffset[warpId] = hackOffsets[hackId]; + warpHackOffset[warpId + (blockDim.x / warpSize)] = hackOffsets[hackId+1]; + } + + __syncthreads(); + hackOffset = warpHackOffset[warpId]; + nextOffset = warpHackOffset[warpId + blockDim.x / warpSize]; + __syncthreads(); +#elif __CUDA_ARCH__ < 700 + if (laneId == 0 && i < rows) + { + hackOffset = hackOffsets[hackId]; + nextOffset = hackOffsets[hackId+1]; + } + + hackOffset = __shfl(hackOffset, 0); + nextOffset = __shfl(nextOffset, 0); +#else + if (laneId == 0 && i < rows) + { + hackOffset = hackOffsets[hackId]; + nextOffset = hackOffsets[hackId+1]; + } + + hackOffset = __shfl_sync(0xFFFFFFFF,hackOffset, 0); + nextOffset = __shfl_sync(0xFFFFFFFF,nextOffset, 0); + +#endif + + if (hackId >= hackCount) + return; + + dM += hackOffset*hackSize + hackLaneId; + offsets += hackOffset; + + // diags for this hack is next hackOffset minus current hackOffset + int diags = nextOffset - hackOffset; + + + // Warp oriented + int rounds = (diags + warpSize - 1)/warpSize; + + volatile int *offsetsChunk = dynShrMem + warpId*warpSize; + + for (int r = 0; r < rounds; r++) + { + // in the last round diags will be <= warpSize + if (laneId < diags) + offsetsChunk[laneId] = offsets[laneId]; + + if (i < rows) + { + int count = min(diags, warpSize); + +#ifdef USE_PREFETCHING + int j; + for (j=0; j<=count-2; j += 2) + { + // prefetch 2 values + int column1 = offsetsChunk[j] + i; + int column2 = offsetsChunk[j+1] + i; + + VALUE_TYPE xValue1, xValue2; + VALUE_TYPE mValue1, mValue2; + + bool inside1 = column1 >= 0 && column1 < cols; + bool inside2 = column2 >= 0 && column2 < cols; + + if(inside1) + { + mValue1 = dM[0]; +#ifdef ENABLE_CACHE + xValue1 = fetchTex (column1); +#else + xValue1 = x[column1]; +#endif + } + + dM += hackSize; + + if(inside2) + { + mValue2 = dM[0]; +#ifdef ENABLE_CACHE + xValue2 = fetchTex (column2); +#else + xValue2 = x[column2]; +#endif + } + + dM += hackSize; + + if(inside1) + zProd = CONCAT(VALUE_TYPE, _fma)(mValue1, xValue1, zProd); + if(inside2) + zProd = CONCAT(VALUE_TYPE, _fma)(mValue2, xValue2, zProd); + } + + for (;j= 0 && column < cols) + { + VALUE_TYPE xValue; +#ifdef ENABLE_CACHE + xValue = fetchTex (column); +#else + xValue = x[column]; +#endif + VALUE_TYPE mValue = dM[0]; + zProd = CONCAT(VALUE_TYPE, _fma)(mValue, xValue, zProd); + } + + dM += hackSize; + } +#else + for (int j=0;j= 0 && column < cols) + { + VALUE_TYPE xValue; +#ifdef ENABLE_CACHE + xValue = fetchTex (column); +#else + xValue = x[column]; +#endif + VALUE_TYPE mValue = dM[0]; + zProd = CONCAT(VALUE_TYPE, _fma)(mValue, xValue, zProd); + } + + dM += hackSize; + } +#endif + + } + + diags -= warpSize; + offsets += warpSize; + } + + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (i >= rows) + return; + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[i] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[i] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); +} + +// Force to recompile and optimize with llvm +__global__ void +CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _krn_b0) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const VALUE_TYPE *x, int hackCount) +{ + CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _) + (z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, CONCAT(zero_,VALUE_TYPE)(), hackCount); +} + +__global__ void +CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _krn) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const VALUE_TYPE *x, VALUE_TYPE beta, int hackCount) +{ + CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _) + (z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, beta, hackCount); +} + +void +CONCAT(_,GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL)) +(spgpuHandle_t handle, VALUE_TYPE* z, const VALUE_TYPE *y, VALUE_TYPE alpha, + const VALUE_TYPE* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, + const VALUE_TYPE *x, VALUE_TYPE beta) +{ + dim3 block (THREAD_BLOCK); + dim3 grid ((rows + THREAD_BLOCK - 1) / THREAD_BLOCK); + + int hackCount = (rows + hackSize - 1)/hackSize; + +#ifdef ENABLE_CACHE + bind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _krn), cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _krn_b0), cudaFuncCachePreferL1); + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _krn) <<< grid, block, block.x*sizeof(int), handle->currentStream >>> (z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, beta, hackCount); + else + CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _krn_b0) <<< grid, block, block.x*sizeof(int), handle->currentStream >>> (z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, hackCount); + +#ifdef ENABLE_CACHE + unbind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + +} + diff --git a/cuda/spgpu/kernels/hdia_sspmv.cu b/cuda/spgpu/kernels/hdia_sspmv.cu new file mode 100644 index 00000000..7370f368 --- /dev/null +++ b/cuda/spgpu/kernels/hdia_sspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "hdia.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#define TEX_FETCH_TYPE float +#include "hdia_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/hdia_zspmv.cu b/cuda/spgpu/kernels/hdia_zspmv.cu new file mode 100644 index 00000000..4f955ed9 --- /dev/null +++ b/cuda/spgpu/kernels/hdia_zspmv.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + +extern "C" +{ +#include "core.h" +#include "hdia.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#define TEX_FETCH_TYPE int4 +#include "hdia_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/hell_cspmv.cu b/cuda/spgpu/kernels/hell_cspmv.cu new file mode 100644 index 00000000..86fbfc2b --- /dev/null +++ b/cuda/spgpu/kernels/hell_cspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + +extern "C" +{ +#include "core.h" +#include "hell.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuFloatComplex +#define TYPE_SYMBOL C +#define TEX_FETCH_TYPE cuFloatComplex +#include "hell_spmv_base.cuh" diff --git a/cuda/spgpu/kernels/hell_dspmv.cu b/cuda/spgpu/kernels/hell_dspmv.cu new file mode 100644 index 00000000..b06f8cfb --- /dev/null +++ b/cuda/spgpu/kernels/hell_dspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "hell.h" +} + +#include "debug.h" + +#define VALUE_TYPE double +#define TYPE_SYMBOL D +#define TEX_FETCH_TYPE int2 +#include "hell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/hell_spmv_base.cuh b/cuda/spgpu/kernels/hell_spmv_base.cuh new file mode 100644 index 00000000..ca074d33 --- /dev/null +++ b/cuda/spgpu/kernels/hell_spmv_base.cuh @@ -0,0 +1,159 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_HELL_NAME +#undef X_TEX +#define X_TEX CONCAT(x_tex_, FUNC_SUFFIX) + +__device__ __host__ static float zero_float() { return 0.0f; } +__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); } +__device__ __host__ static bool float_isNotZero(float x) { return x != 0.0f; } + +__device__ static float float_fma(float a, float b, float c) { return PREC_FADD(PREC_FMUL (a, b), c); } +__device__ static float float_add(float a, float b) { return PREC_FADD (a, b); } +__device__ static float float_mul(float a, float b) { return PREC_FMUL (a, b); } + +__device__ static cuFloatComplex cuFloatComplex_fma(cuFloatComplex a, cuFloatComplex b, cuFloatComplex c) { return cuCfmaf(a, b, c); } +__device__ static cuFloatComplex cuFloatComplex_add(cuFloatComplex a, cuFloatComplex b) { return cuCaddf(a, b); } +__device__ static cuFloatComplex cuFloatComplex_mul(cuFloatComplex a, cuFloatComplex b) { return cuCmulf(a, b); } + +__device__ static float readValue_float(float fetch) { return fetch; } +__device__ static cuFloatComplex readValue_cuFloatComplex(cuFloatComplex fetch) { return fetch; } + +// host or c.c >= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); } +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif +#if 0 +// Texture cache management +texture < TEX_FETCH_TYPE, 1, cudaReadModeElementType > X_TEX; + +#define bind_tex_x(x) cudaBindTexture(NULL, X_TEX, x) +#define unbind_tex_x(x) cudaUnbindTexture(X_TEX) + +__device__ static VALUE_TYPE +fetchTex (int pointer) +{ + TEX_FETCH_TYPE fetch = tex1Dfetch (X_TEX, pointer); + return CONCAT(readValue_,VALUE_TYPE) (fetch); +} +#endif +#if __CUDA_ARCH__ < 300 +extern __shared__ int dynShrMem[]; +#endif + +#define GEN_SPGPU_HELL_NAME(x) CONCAT(CONCAT(spgpu,x),hellspmv_vanilla) +#define GEN_SPGPU_HELL_NAME_VANILLA(x) CONCAT(CONCAT(spgpu,x),hellspmv_vanilla) +#include "hell_spmv_base_template.cuh" +#undef GEN_SPGPU_HELL_NAME +#if 0 +#define GEN_SPGPU_HELL_NAME(x) CONCAT(CONCAT(spgpu,x),hellspmv_prefetch) +#define GEN_SPGPU_HELL_NAME_PREFETCH(x) CONCAT(CONCAT(spgpu,x),hellspmv_prefetch) +#undef USE_PREFETCHING +#define USE_PREFETCHING +#include "hell_spmv_base_template.cuh" +#define ENABLE_CACHE +#undef GEN_SPGPU_HELL_NAME +#define GEN_SPGPU_HELL_NAME(x) CONCAT(CONCAT(spgpu,x),hellspmv_texcache_prefetch) +#define GEN_SPGPU_HELL_NAME_TEX_PREFETCH(x) CONCAT(CONCAT(spgpu,x),hellspmv_texcache_prefetch) +#include "hell_spmv_base_template.cuh" +#undef GEN_SPGPU_HELL_NAME +#undef USE_PREFETCHING +#endif +#define GEN_SPGPU_HELL_NAME(x) CONCAT(CONCAT(spgpu,x),hellspmv_texcache) +#define GEN_SPGPU_HELL_NAME_TEX(x) CONCAT(CONCAT(spgpu,x),hellspmv_texcache) +#include "hell_spmv_base_template.cuh" +#undef GEN_SPGPU_HELL_NAME +#define GEN_SPGPU_HELL_NAME(x) CONCAT(CONCAT(spgpu,x),hellspmv) +void +GEN_SPGPU_HELL_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + VALUE_TYPE* z, + const VALUE_TYPE *y, + VALUE_TYPE alpha, + const VALUE_TYPE* cM, + const int* rP, + int hackSize, + const __device int* hackOffsets, + const __device int* rS, + const __device int* rIdx, + int avgNnzPerRow, + int rows, + const VALUE_TYPE *x, + VALUE_TYPE beta, + int baseIndex) +{ + + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + // maxNForACall should be a multiple of hackSize + maxNForACall = (maxNForACall/hackSize)*hackSize; + //fprintf(stderr,"Entering kernel %d maxNForACall\n",maxNForACall); + + while (rows > maxNForACall) //managing large vectors + { + + CONCAT(_,GEN_SPGPU_HELL_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, maxNForACall, x, beta, baseIndex); + /* if (avgNnzPerRow < 10 && handle->capabilityMajor > 1) */ + /* CONCAT(_,GEN_SPGPU_HELL_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, maxNForACall, x, beta, baseIndex); */ + /* else if (avgNnzPerRow < 20) */ + /* CONCAT(_,GEN_SPGPU_HELL_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, maxNForACall, x, beta, baseIndex); */ + /* else */ + /* CONCAT(_,GEN_SPGPU_HELL_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, maxNForACall, x, beta, baseIndex); */ + + y = y + maxNForACall; + z = z + maxNForACall; + hackOffsets = hackOffsets + maxNForACall/hackSize; + rS = rS + maxNForACall; + + rows -= maxNForACall; + } + //fprintf(stderr,"Calling kernel on %d rows\n",rows); + CONCAT(_,GEN_SPGPU_HELL_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, rows, x, beta, baseIndex); + //fprintf(stderr,"Done kernel on %d rows\n",rows); + /* if (avgNnzPerRow < 10 && handle->capabilityMajor > 1) */ + /* CONCAT(_,GEN_SPGPU_HELL_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, rows, x, beta, baseIndex); */ + /* else if (avgNnzPerRow < 20) */ + /* CONCAT(_,GEN_SPGPU_HELL_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, rows, x, beta, baseIndex); */ + /* else */ + /* CONCAT(_,GEN_SPGPU_HELL_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, rows, x, beta, baseIndex); */ + + cudaCheckError("CUDA error on hell_spmv"); +} + diff --git a/cuda/spgpu/kernels/hell_spmv_base_template.cuh b/cuda/spgpu/kernels/hell_spmv_base_template.cuh new file mode 100644 index 00000000..9ecd8f74 --- /dev/null +++ b/cuda/spgpu/kernels/hell_spmv_base_template.cuh @@ -0,0 +1,357 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#define IDX2 +#define THREAD_BLOCK 128 + +__device__ void +CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx_2) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + __shared__ VALUE_TYPE temp[THREAD_BLOCK]; + + if (i < rows) { + int hackId = i / hackSize; + int hackLaneId = i % hackSize; + + int hackOffset; + unsigned int laneId = threadIdx.x % 32; +#if __CUDA_ARCH__ < 300 + // "volatile" used to avoid __syncthreads() + volatile int* warpHackOffset = dynShrMem; + + unsigned int warpId = threadIdx.x / 32; + + if (laneId == 0) + warpHackOffset[warpId] = hackOffsets[hackId]; + + hackOffset = warpHackOffset[warpId] + hackLaneId; +#elif __CUDA_ARCH__ < 700 + if (laneId == 0) + hackOffset = hackOffsets[hackId]; + //__syncthreads(); + hackOffset = __shfl(hackOffset, 0) + hackLaneId; +#else + if (laneId == 0) + hackOffset = hackOffsets[hackId]; + //__syncthreads(); + hackOffset = __shfl_sync(0xFFFFFFFF,hackOffset, 0) + hackLaneId; +#endif + + rP += hackOffset; + cM += hackOffset; + + int rowSize = rS[i]; + int rowSizeM = rowSize / 2; + + if (threadIdx.y == 0) { + if (rowSize % 2) + ++rowSizeM; + } else { + rP += hackSize; + cM += hackSize; + } + + + for (int j = 0; j < rowSizeM; j++) { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += hackSize; + rP += hackSize; + + value = cM[0]; + cM += hackSize; + cM += hackSize; + +#ifdef ENABLE_CACHE + fetch = fetchTex(pointer); +#else + fetch = x[pointer]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } + + // Reduction + if (threadIdx.y == 1) + temp[threadIdx.x] = zProd; + + __syncthreads(); + + if (threadIdx.y == 0) { + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[threadIdx.x]); + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } + } +} + +__device__ void +CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx) + (int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows) { + + int hackId = i / hackSize; + int hackLaneId = i % hackSize; + + int hackOffset; + unsigned int laneId = threadIdx.x % 32; +#if __CUDA_ARCH__ < 300 + // "volatile" used to avoid __syncthreads() + volatile int* warpHackOffset = dynShrMem; + + unsigned int warpId = threadIdx.x / 32; + + if (laneId == 0) + warpHackOffset[warpId] = hackOffsets[hackId]; + + hackOffset = warpHackOffset[warpId] + hackLaneId; +#elif __CUDA_ARCH__ < 700 + if (laneId == 0) + hackOffset = hackOffsets[hackId]; + //__syncthreads(); + hackOffset = __shfl(hackOffset, 0) + hackLaneId; +#else + if (laneId == 0) + hackOffset = hackOffsets[hackId]; + //__syncthreads(); + hackOffset = __shfl_sync(0xFFFFFFFF,hackOffset, 0) + hackLaneId; +#endif + + rP += hackOffset; + cM += hackOffset; + + int rowSize = rS[i]; + +#ifdef USE_PREFETCHING + for (int j = 0; j < rowSize / 2; j++) { + int pointers1, pointers2; + VALUE_TYPE values1, values2; + VALUE_TYPE fetches1, fetches2; + + pointers1 = rP[0] - baseIndex; + rP += hackSize; + pointers2 = rP[0] - baseIndex; + rP += hackSize; + + values1 = cM[0]; + cM += hackSize; + + values2 = cM[0]; + cM += hackSize; + +#ifdef ENABLE_CACHE + fetches1 = fetchTex(pointers1); + fetches2 = fetchTex(pointers2); +#else + fetches1 = x[pointers1]; + fetches2 = x[pointers2]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(values1, fetches1, zProd); + zProd = CONCAT(VALUE_TYPE, _fma)(values2, fetches2, zProd); + } + + // odd row size + if (rowSize % 2) { + int pointer = rP[0] - baseIndex; + VALUE_TYPE value = cM[0]; + VALUE_TYPE fetch; + +#ifdef ENABLE_CACHE + fetch = fetchTex (pointer); +#else + fetch = x[pointer]; +#endif + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } +#else + for (int j = 0; j < rowSize; j++) { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += hackSize; + + value = cM[0]; + cM += hackSize; + +#ifdef ENABLE_CACHE + fetch = fetchTex (pointer); +#else + fetch = x[pointer]; +#endif + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } +#endif + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } + +} + +__global__ void +CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn_ridx) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, const int* rIdx, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + int outRow = 0; + if (i < rows) { + + outRow = rIdx[i]; + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[outRow]; + } +#if 1 + if (blockDim.y == 1) + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx) + (i, yVal, outRow, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); + else + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx_2) + (i, yVal, outRow, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); +#else + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx) + (i, yVal, outRow, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); +#endif +} + + +__device__ void +CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _) + (VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows) { + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[i]; + + } + +#ifdef IDX2 + if (blockDim.y == 1) + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx) + (i, yVal, i, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); + else + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx_2) + (i, yVal, i, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); +#else + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx) + (i, yVal, i, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); +#endif + +} + +// Force to recompile and optimize with llvm +__global__ void +CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn_b0) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, int rows, const VALUE_TYPE *x, int baseIndex) +{ + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _) + (z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, CONCAT(zero_,VALUE_TYPE)(), baseIndex); +} + +__global__ void +CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn) + (VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _) + (z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); +} + +void +CONCAT(_,GEN_SPGPU_HELL_NAME(TYPE_SYMBOL)) +(spgpuHandle_t handle, VALUE_TYPE* z, const VALUE_TYPE *y, VALUE_TYPE alpha, + const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, + const __device int* rIdx, int avgNnzPerRow, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int avgThreshold; + + if (handle->capabilityMajor < 2) + avgThreshold = 8; + else if (handle->capabilityMajor < 3) + avgThreshold = 16; + else + avgThreshold = 32; +#ifdef IDX2 +#if defined(HELL_FORCE_THREADS_1) + dim3 block (THREAD_BLOCK, 1); +#elif defined(HELL_FORCE_THREADS_2) + dim3 block (THREAD_BLOCK, 2); +#else + dim3 block (THREAD_BLOCK, avgNnzPerRow >= avgThreshold ? 2 : 1); +#endif +#else + dim3 block (THREAD_BLOCK, 1); +#endif + dim3 grid ((rows + THREAD_BLOCK - 1) / THREAD_BLOCK); + + // Should we generalize the code to 1/2/4/8 threads per row? + // And maybe adjust THREAD_BLOCK size? + int shrMemSize; + shrMemSize=THREAD_BLOCK*sizeof(VALUE_TYPE); + +#ifdef ENABLE_CACHE + bind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + + if (rIdx) { + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn_ridx), cudaFuncCachePreferL1); + + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn_ridx) + <<< grid, block, shrMemSize, handle->currentStream >>> (z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, rows, x, beta, baseIndex); + } else { + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn), cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn_b0), cudaFuncCachePreferL1); + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn) + <<< grid, block, shrMemSize, handle->currentStream >>> (z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); + else + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn_b0) + <<< grid, block, shrMemSize, handle->currentStream >>> (z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, baseIndex); + } + +#ifdef ENABLE_CACHE + unbind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + +} diff --git a/cuda/spgpu/kernels/hell_sspmv.cu b/cuda/spgpu/kernels/hell_sspmv.cu new file mode 100644 index 00000000..8d296b81 --- /dev/null +++ b/cuda/spgpu/kernels/hell_sspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "hell.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#define TEX_FETCH_TYPE float +#include "hell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/hell_zspmv.cu b/cuda/spgpu/kernels/hell_zspmv.cu new file mode 100644 index 00000000..784ab719 --- /dev/null +++ b/cuda/spgpu/kernels/hell_zspmv.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + +extern "C" +{ +#include "core.h" +#include "hell.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#define TEX_FETCH_TYPE int4 +#include "hell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/igath.cu b/cuda/spgpu/kernels/igath.cu new file mode 100644 index 00000000..8c0fe1b6 --- /dev/null +++ b/cuda/spgpu/kernels/igath.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE int +#define TYPE_SYMBOL I +#include "gath_base.cuh" diff --git a/cuda/spgpu/kernels/iscat.cu b/cuda/spgpu/kernels/iscat.cu new file mode 100644 index 00000000..2001a740 --- /dev/null +++ b/cuda/spgpu/kernels/iscat.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE int +#define TYPE_SYMBOL I +#include "scat_base.cuh" diff --git a/cuda/spgpu/kernels/isetscal.cu b/cuda/spgpu/kernels/isetscal.cu new file mode 100644 index 00000000..d54eae95 --- /dev/null +++ b/cuda/spgpu/kernels/isetscal.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE int +#define TYPE_SYMBOL I +#include "setscal_base.cuh" diff --git a/cuda/spgpu/kernels/mathbase.cuh b/cuda/spgpu/kernels/mathbase.cuh new file mode 100644 index 00000000..e7d77778 --- /dev/null +++ b/cuda/spgpu/kernels/mathbase.cuh @@ -0,0 +1,53 @@ +#pragma once + +__device__ __host__ static float zero_float() { return 0.0f; } +__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); } +__device__ __host__ static bool float_isNotZero(float x) { return x != 0.0f; } +__device__ __host__ static bool float_isZero(float x) { return x == 0.0f; } +__device__ __host__ static bool int_isZero(int x) { return x == 0; } +__device__ __host__ static bool int_isNotZero(int x) { return x != 0; } + +__device__ static int int_fma(int a, int b, int c) { return ((a*b)+c); } + +__device__ static float float_fma(float a, float b, float c) { return PREC_FADD(PREC_FMUL (a, b), c); } +__device__ static float float_add(float a, float b) { return PREC_FADD (a, b); } +__device__ static float float_mul(float a, float b) { return PREC_FMUL (a, b); } +__device__ static float float_abs(float a) { return fabsf(a); } + +__device__ static cuFloatComplex cuFloatComplex_fma(cuFloatComplex a, cuFloatComplex b, cuFloatComplex c) { return cuCfmaf(a, b, c); } +__device__ static cuFloatComplex cuFloatComplex_add(cuFloatComplex a, cuFloatComplex b) { return cuCaddf(a, b); } +__device__ static cuFloatComplex cuFloatComplex_mul(cuFloatComplex a, cuFloatComplex b) { return cuCmulf(a, b); } +__device__ static cuFloatComplex cuFloatComplex_abs(cuFloatComplex a) { return make_cuFloatComplex(cuCabsf(a),0); } + +//__device__ static float cuFloatComplex_abs(cuFloatComplex a) { return cuCabsf(a); } + +__device__ static float readValue_float(float fetch) { return fetch; } +__device__ static cuFloatComplex readValue_cuFloatComplex(cuFloatComplex fetch) { return fetch; } + +// host or c.c >= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } +__device__ __host__ static bool double_isZero(double x) { return x == 0.0; } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); } +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } +__device__ static double double_abs(double a) { return fabs (a); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_abs(cuDoubleComplex a) { return make_cuDoubleComplex(cuCabs(a),0); } +//__device__ static double cuDoubleComplex_abs(cuDoubleComplex a) { return cuCabs(a); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif diff --git a/cuda/spgpu/kernels/sabs.cu b/cuda/spgpu/kernels/sabs.cu new file mode 100644 index 00000000..e335e773 --- /dev/null +++ b/cuda/spgpu/kernels/sabs.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define RES_VALUE_TYPE float +#define TYPE_SYMBOL S +#include "abs_base.cuh" + diff --git a/cuda/spgpu/kernels/samax.cu b/cuda/spgpu/kernels/samax.cu new file mode 100644 index 00000000..aaef17e2 --- /dev/null +++ b/cuda/spgpu/kernels/samax.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#include "amax_base.cuh" + diff --git a/cuda/spgpu/kernels/sasum.cu b/cuda/spgpu/kernels/sasum.cu new file mode 100644 index 00000000..33392203 --- /dev/null +++ b/cuda/spgpu/kernels/sasum.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#include "asum_base.cuh" + diff --git a/cuda/spgpu/kernels/saxpby.cu b/cuda/spgpu/kernels/saxpby.cu new file mode 100644 index 00000000..2c46f19e --- /dev/null +++ b/cuda/spgpu/kernels/saxpby.cu @@ -0,0 +1,100 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuSaxpby_krn(float *z, int n, float beta, float *y, float alpha, float* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + if (beta == 0.0f) + z[id] = PREC_FMUL(alpha,x[id]); + else + z[id] = PREC_FADD(PREC_FMUL(alpha, x[id]), PREC_FMUL(beta,y[id])); + } +} + + +void spgpuSaxpby_(spgpuHandle_t handle, + __device float *z, + int n, + float beta, + __device float *y, + float alpha, + __device float* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + + dim3 block(BLOCK_SIZE); + dim3 grid(msize); + + spgpuSaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} + +void spgpuSaxpby(spgpuHandle_t handle, + __device float *z, + int n, + float beta, + __device float *y, + float alpha, + __device float* x) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (n > maxNForACall) //managing large vectors + { + spgpuSaxpby_(handle, z, maxNForACall, beta, y, alpha, x); + + x = x + maxNForACall; + y = y + maxNForACall; + z = z + maxNForACall; + n -= maxNForACall; + } + + spgpuSaxpby_(handle, z, n, beta, y, alpha, x); + + cudaCheckError("CUDA error on saxpby"); +} + +void spgpuSmaxpby(spgpuHandle_t handle, + __device float *z, + int n, + float beta, + __device float *y, + float alpha, + __device float* x, + int count, int pitch) +{ + + for (int i=0; icurrentStream>>>(y, n, alpha, x); +} + +void +GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) + (spgpuHandle_t handle, + __device VALUE_TYPE *y, + int n, + VALUE_TYPE alpha, + __device VALUE_TYPE *x) +{ + + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + while (n > maxNForACall) //managing large vectors + { + CONCAT(_,GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL))(handle, y, maxNForACall, alpha, x); + x = x + maxNForACall; + y = y + maxNForACall; + n -= maxNForACall; + } + + CONCAT(_,GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL))(handle, y, n, alpha, x); + + cudaCheckError("CUDA error on scal"); +} diff --git a/cuda/spgpu/kernels/scat_base.cuh b/cuda/spgpu/kernels/scat_base.cuh new file mode 100644 index 00000000..f6f619b1 --- /dev/null +++ b/cuda/spgpu/kernels/scat_base.cuh @@ -0,0 +1,89 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_FUNC_NAME +#define GEN_SPGPU_FUNC_NAME(x) CONCAT(CONCAT(spgpu,x),scat) + +#define BLOCK_SIZE 256 + +// Define: +//#define VALUE_TYPE +//#define TYPE_SYMBOL + +#include "mathbase.cuh" + +__global__ void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern) + (VALUE_TYPE* vector, int count, const int* indices, const VALUE_TYPE* values, int firstIndex, VALUE_TYPE beta) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < count) + { + int pos = indices[id]-firstIndex; + if (pos < 0) + return; + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + vector[pos] = CONCAT(VALUE_TYPE, _fma)(beta, vector[pos], values[id]); + else + vector[pos] = values[id]; + } +} + +void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (spgpuHandle_t handle, VALUE_TYPE* y, int xNnz, const VALUE_TYPE *xValues, + const __device int *xIndices, int xBaseIndex, VALUE_TYPE beta) +{ + int msize = (xNnz+BLOCK_SIZE-1)/BLOCK_SIZE; + + dim3 block(BLOCK_SIZE); + dim3 grid(msize); + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern)<<currentStream>>>(y, xNnz, xIndices, xValues, xBaseIndex, beta); + +} + +void +GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + __device VALUE_TYPE* y, + int xNnz, + const __device VALUE_TYPE *xValues, + const __device int *xIndices, + int xBaseIndex, + VALUE_TYPE beta) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (xNnz > maxNForACall) //managing large vectors + { + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, y, maxNForACall, xValues, xIndices, xBaseIndex, beta); + xIndices += maxNForACall; + xValues += maxNForACall; + xNnz -= maxNForACall; + } + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, y, xNnz, xValues, xIndices, xBaseIndex, beta); + cudaCheckError("CUDA error on scat"); +} + diff --git a/cuda/spgpu/kernels/sdot.cu b/cuda/spgpu/kernels/sdot.cu new file mode 100644 index 00000000..c19c7710 --- /dev/null +++ b/cuda/spgpu/kernels/sdot.cu @@ -0,0 +1,175 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudalang.h" +#include "cudadebug.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +//#define USE_CUBLAS + +#ifdef USE_CUBLAS +#include "cublas.h" +#endif + +#define BLOCK_SIZE 320 +//#define BLOCK_SIZE 512 + +//#define ASSUME_LOCK_SYNC_PARALLELISM + +#ifndef USE_CUBLAS +static __device__ float sdotReductionResult[128]; +#endif + +__global__ void spgpuSdot_kern(int n, float* x, float* y) +{ + __shared__ float sSum[BLOCK_SIZE]; + + float res = 0; + + float* lastX = x + n; + + x += threadIdx.x + blockIdx.x*BLOCK_SIZE; + y += threadIdx.x + blockIdx.x*BLOCK_SIZE; + + int blockOffset = gridDim.x*BLOCK_SIZE; + + int numSteps = (lastX - x + blockOffset - 1)/blockOffset; + + // prefetching + for (int j = 0; j < numSteps / 2; j++) + { + float x1 = x[0]; x += blockOffset; + float y1 = y[0]; y += blockOffset; + float x2 = x[0]; x += blockOffset; + float y2 = y[0]; y += blockOffset; + + res = PREC_FADD(res, PREC_FMUL(x1,y1)); + res = PREC_FADD(res, PREC_FMUL(x2,y2)); + + } + + if (numSteps % 2) + { + res = PREC_FADD(res, PREC_FMUL(*x,*y)); + } + + if (threadIdx.x >= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + float tRes[128]; + spgpuSdot_kern<<currentStream>>>(n, a, b); + cudaMemcpyFromSymbol(tRes, sdotReductionResult, blocks*sizeof(float)); + + for (int i=0; icurrentStream>>>(y, n, val); + +} + +void +GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) + (spgpuHandle_t handle, + int first, + int last, + int baseIndex, + VALUE_TYPE val, + __device VALUE_TYPE* y) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + int n=last-first+1; + y += (first-baseIndex); + while (n > maxNForACall) //managing large vectors + { + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, maxNForACall, val,y); + y += maxNForACall; + n -= maxNForACall; + } + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, n, val,y); + cudaCheckError("CUDA error on scat"); +} + diff --git a/cuda/spgpu/kernels/sgath.cu b/cuda/spgpu/kernels/sgath.cu new file mode 100644 index 00000000..d1980bc6 --- /dev/null +++ b/cuda/spgpu/kernels/sgath.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#include "gath_base.cuh" diff --git a/cuda/spgpu/kernels/snrm2.cu b/cuda/spgpu/kernels/snrm2.cu new file mode 100644 index 00000000..27b8a6e9 --- /dev/null +++ b/cuda/spgpu/kernels/snrm2.cu @@ -0,0 +1,166 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudalang.h" +#include "cudadebug.h" + + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +//#define USE_CUBLAS + +#define BLOCK_SIZE 320 +//#define BLOCK_SIZE 512 + +//#define ASSUME_LOCK_SYNC_PARALLELISM + +static __device__ float snrm2ReductionResult[128]; + +__global__ void spgpuSnrm2_kern(int n, float* x) +{ + __shared__ float sSum[BLOCK_SIZE]; + + float res = 0; + + float* lastX = x + n; + + x += threadIdx.x + blockIdx.x*BLOCK_SIZE; + + int blockOffset = gridDim.x*BLOCK_SIZE; + + int numSteps = (lastX - x + blockOffset - 1)/blockOffset; + + // prefetching + for (int j = 0; j < numSteps / 2; j++) + { + float x1 = x[0]; x += blockOffset; + float x2 = x[0]; x += blockOffset; + + res = PREC_FADD(res, PREC_FMUL(x1,x1)); + res = PREC_FADD(res, PREC_FMUL(x2,x2)); + + } + + if (numSteps % 2) + { + float x1 = x[0]; + res = PREC_FADD(res, PREC_FMUL(x1,x1)); + } + + if (threadIdx.x >= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + float tRes[128]; + + spgpuSnrm2_kern<<currentStream>>>(n, x); + cudaMemcpyFromSymbol(tRes, snrm2ReductionResult,blocks*sizeof(float)); + + for (int i=0; icurrentStream>>>(z, n, beta, y, alpha, x); +} + +void spgpuZaxpby(spgpuHandle_t handle, + __device cuDoubleComplex *z, + int n, + cuDoubleComplex beta, + __device cuDoubleComplex *y, + cuDoubleComplex alpha, + __device cuDoubleComplex* x) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (n > maxNForACall) //managing large vectors + { + spgpuZaxpby_(handle, z, maxNForACall, beta, y, alpha, x); + + x = x + maxNForACall; + y = y + maxNForACall; + z = z + maxNForACall; + n -= maxNForACall; + } + + spgpuZaxpby_(handle, z, n, beta, y, alpha, x); + + cudaCheckError("CUDA error on daxpby"); +} + +void spgpuZmaxpby(spgpuHandle_t handle, + __device cuDoubleComplex *z, + int n, + cuDoubleComplex beta, + __device cuDoubleComplex *y, + cuDoubleComplex alpha, + __device cuDoubleComplex* x, + int count, int pitch) +{ + + for (int i=0; i= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + cuDoubleComplex tRes[128]; + + spgpuZdot_kern<<currentStream>>>(n, a, b); + cudaMemcpyFromSymbol(tRes, ddotReductionResult,blocks*sizeof(cuDoubleComplex)); + + for (int i=0; i= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + double tRes[128]; + + spgpuZnrm2_kern<<currentStream>>>(n, x);; + cudaMemcpyFromSymbol(tRes, dnrm2ReductionResult,blocks*sizeof(double)); + + for (int i=0; i aux_b(:,1) + do i=1, nrt + b_col_glob(i) = 1.d0 + enddo + + else + + call psb_bcast(ctxt,annz) + call psb_bcast(ctxt,nrt) + + end if + + + select case(psb_toupper(acfmt)) + case('COO') + acmold => acoo + case('CSR') + acmold => acsr + case('ELL') + acmold => aell + case('HLL') + acmold => ahll + case default + write(*,*) 'Unknown format defaulting to CSR' + acmold => acsr + end select + +#ifdef HAVE_GPU + select case(psb_toupper(agfmt)) + case('ELG') + agmold => aelg + case('HLG') + agmold => ahlg + case('CSRG') + agmold => acsrg + case('HYBG') + agmold => ahybg + case default + write(*,*) 'Unknown format defaulting to HLG' + agmold => ahlg + end select +#endif + + + ! switch over different partition types + if (ipart == 0) then + call psb_barrier(ctxt) + if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")') + allocate(ivg(nrt),ipv(np)) + do i=1,nrt + call part_block(i,nrt,np,ipv,nv) + ivg(i) = ipv(1) + enddo + call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) + else if (ipart == 2) then + if (iam==psb_root_) then + write(psb_out_unit,'("Partition type: graph")') + write(psb_out_unit,'(" ")') + ! write(psb_err_unit,'("Build type: graph")') + call build_mtpart(aux_a,np) + endif + call psb_barrier(ctxt) + call distr_mtpart(psb_root_,ctxt) + call getv_mtpart(ivg) + call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) + else + if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")') + call psb_matdist(aux_a, a, ctxt,desc_a,info,parts=part_block) + end if + + call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_) + + t2 = psb_wtime() - t0 + + call psb_amx(ctxt, t2) + + if (iam==psb_root_) then + write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2 + write(psb_out_unit,'(" ")') + end if + call a%cscnv(aux_a,info,mold=acoo) + tcnvcsr = 0 + tcnvgpu = 0 + nr = desc_a%get_local_rows() + nrg = desc_a%get_global_rows() + call psb_geall(x_col,desc_a,info) + do i=1, nr + call desc_a%l2g(i,ig,info) + call psb_geins(ione,(/ig/),(/(cone + (cone*ig)/nrg)/),x_col,desc_a,info) + end do + call psb_geasb(x_col,desc_a,info) + do j=1, ncnv + call aux_a%cscnv(a,info,mold=acoo) + call psb_barrier(ctxt) + t1 = psb_wtime() + call a%cscnv(info,mold=acmold) + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + tcnvcsr = tcnvcsr + t2 + if (j==1) tcnvc1 = t2 + xc1 = x_col%get_vect() + call xv%bld(xc1) + call psb_geasb(bv,desc_a,info,scratch=.true.) + +#ifdef HAVE_GPU + + call aux_a%cscnv(agpu,info,mold=acoo) + call xg%bld(xc1,mold=vmold) + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_barrier(ctxt) + t1 = psb_wtime() + call agpu%cscnv(info,mold=agmold) + call psb_gpu_DeviceSync() + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + if (j==1) tcnvg1 = t2 + tcnvgpu = tcnvgpu + t2 +#endif + end do + + call psb_barrier(ctxt) + t1 = psb_wtime() + do i=1,ntests + call psb_spmm(cone,a,xv,czero,bv,desc_a,info) + end do + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + call psb_amx(ctxt,t2) + +#ifdef HAVE_GPU + ! FIXME: cache flush needed here + call psb_barrier(ctxt) + tt1 = psb_wtime() + do i=1,ntests + call psb_spmm(cone,agpu,xv,czero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 1 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + tt2 = psb_wtime() - tt1 + call psb_amx(ctxt,tt2) + xc1 = bv%get_vect() + xc2 = bg%get_vect() + nr = desc_a%get_local_rows() + eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on xGPU',eps + + call xg%sync() + ! FIXME: cache flush needed here + + call psb_barrier(ctxt) + gt1 = psb_wtime() + do i=1,ntests*ngpu + call psb_spmm(cone,agpu,xg,czero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 2 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + ! For timing purposes we need to make sure all threads + ! in the device are done. + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + gt2 = psb_wtime() - gt1 + call psb_amx(ctxt,gt2) + call bg%sync() + xc1 = bv%get_vect() + xc2 = bg%get_vect() + call psb_geaxpby(-cone,bg,+cone,bv,desc_a,info) + eps = psb_geamax(bv,desc_a,info) + + call psb_amx(ctxt,t2) + nr = desc_a%get_local_rows() + eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on GPU',eps +#endif + + + amatsize = a%sizeof() + agmatsize = agpu%sizeof() + damatsize = amatsize + damatsize = damatsize/(1024*1024) + dgmatsize = agmatsize + dgmatsize = dgmatsize/(1024*1024) + descsize = psb_sizeof(desc_a) + call psb_sum(ctxt,damatsize) + call psb_sum(ctxt,dgmatsize) + call psb_sum(ctxt,descsize) + + if (iam == psb_root_) then + write(psb_out_unit,'("Matrix: ",a)') mtrx_file + write(psb_out_unit,& + &'("Test on : ",i20," processors")') np + write(psb_out_unit,& + &'("Size of matrix : ",i20," ")') nrt + write(psb_out_unit,& + &'("Number of nonzeros : ",i20," ")') annz + write(psb_out_unit,& + &'("Memory occupation CPU (MBytes) : ",f20.2," ")') damatsize + write(psb_out_unit,& + &'("Memory occupation GPU (MBytes) : ",f20.2," ")') dgmatsize + write(psb_out_unit,& + &'("Memory occupation CPU (Bytes) : ",i24," ")') amatsize + write(psb_out_unit,& + &'("Memory occupation GPU (Bytes) : ",i24," ")') agmatsize + flops = ntests*(2.d0*annz) + tflops = flops + gflops = flops * ngpu + write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() +#ifdef HAVE_GPU + write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() + write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& + & tcnvc1 + write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& + & tcnvcsr + write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& + & tcnvcsr/ncnv + write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& + & tcnvg1 + write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& + & tcnvgpu + write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& + & tcnvgpu/ncnv + +#endif + write(psb_out_unit,& + & '("Number of flops (",i0," prod) : ",F20.0," ")') & + & ntests,flops + + flops = flops / (t2) + tflops = tflops / (tt2) + gflops = gflops / (gt2) + write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& + & ntests,t2 + write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& + & t2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& + & flops/1.d6 +#ifdef HAVE_GPU + + write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& + & ntests, tt2 + write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& + & tt2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& + & tflops/1.d6 + + write(psb_out_unit,'("Time for ",i6," products (s) (GPU) : ",F20.3)')& + & ngpu*ntests,gt2 + write(psb_out_unit,'("Time per product (ms) (GPU) : ",F20.3)')& + & gt2*1.d3/(1.d0*ntests*ngpu) + write(psb_out_unit,'("MFLOPS (GPU) : ",F20.3)')& + & gflops/1.d6 +#endif + ! + ! This computation assumes the data movement associated with CSR: + ! it is minimal in terms of coefficients. Other formats may either move + ! more data (padding etc.) or less data (if they can save on the indices). + ! + nbytes = nr*(2*2*psb_sizeof_dp + psb_sizeof_ip)+& + & annz*(2*psb_sizeof_dp + psb_sizeof_ip) + bdwdth = ntests*nbytes/(t2*1.d6) + write(psb_out_unit,*) + write(psb_out_unit,'("MBYTES/S (CPU) : ",F20.3)') bdwdth +#ifdef HAVE_GPU + bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) + write(psb_out_unit,'("MBYTES/S (GPU) : ",F20.3)') bdwdth +#endif + write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() + write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize + + end if + + call psb_gefree(b_col, desc_a,info) + call psb_gefree(x_col, desc_a,info) + call psb_gefree(xv, desc_a,info) + call psb_gefree(bv, desc_a,info) + call psb_spfree(a, desc_a,info) +#ifdef HAVE_GPU + call psb_gefree(xg, desc_a,info) + call psb_gefree(bg, desc_a,info) + call psb_spfree(agpu,desc_a,info) + call psb_gpu_exit() +#endif + call psb_cdfree(desc_a,info) + + call psb_exit(ctxt) + stop + +9999 continue + call psb_error(ctxt) + +end program c_file_spmv + + + + + diff --git a/test/gpukern/d_file_spmv.F90 b/test/gpukern/d_file_spmv.F90 new file mode 100644 index 00000000..2bbc0bc4 --- /dev/null +++ b/test/gpukern/d_file_spmv.F90 @@ -0,0 +1,496 @@ +! +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +program d_file_spmv + use psb_base_mod + use psb_util_mod + use psb_ext_mod +#ifdef HAVE_GPU + use psb_gpu_mod +#endif + use data_input + implicit none + + ! input parameters + character(len=200) :: mtrx_file + + ! sparse matrices + type(psb_dspmat_type) :: a, aux_a, agpu + + ! dense matrices + real(psb_dpk_), allocatable, target :: aux_b(:,:), d(:) + real(psb_dpk_), allocatable , save :: x_col_glob(:), r_col_glob(:) + real(psb_dpk_), pointer :: b_col_glob(:) + type(psb_d_vect_type) :: b_col, x_col, r_col + type(psb_d_vect_type) :: xg, bg, xv, bv +#ifdef HAVE_GPU + type(psb_d_vect_gpu) :: vmold +#endif + real(psb_dpk_), allocatable :: xc1(:),xc2(:) + ! communications data structure + type(psb_desc_type):: desc_a + + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: iam, np + integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes + real(psb_dpk_) :: err, eps, damatsize, dgmatsize + + character(len=5) :: acfmt, agfmt + character(len=20) :: name + character(len=2) :: filefmt + integer, parameter :: iunit=12 + integer, parameter :: times=2000 + integer, parameter :: ntests=200, ngpu=50, ncnv=20 + + type(psb_d_coo_sparse_mat), target :: acoo + type(psb_d_csr_sparse_mat), target :: acsr + type(psb_d_ell_sparse_mat), target :: aell + type(psb_d_hll_sparse_mat), target :: ahll + type(psb_d_hdia_sparse_mat), target :: ahdia +#ifdef HAVE_GPU + type(psb_d_elg_sparse_mat), target :: aelg + type(psb_d_csrg_sparse_mat), target :: acsrg + type(psb_d_hybg_sparse_mat), target :: ahybg + type(psb_d_hlg_sparse_mat), target :: ahlg + type(psb_d_hdiag_sparse_mat), target :: ahdiag +#endif + class(psb_d_base_sparse_mat), pointer :: acmold, agmold + ! other variables + integer(psb_lpk_) :: i,j,nrt, ns, nr, ig, nrg + integer(psb_ipk_) :: internal, m,ii,nnzero,info, ipart + real(psb_dpk_) :: t0,t1, t2, tprec, flops + real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,& + & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 + integer :: nrhs, nrow, n_row, dim, nv, ne + integer, allocatable :: ivg(:), ipv(:) + + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) +#ifdef HAVE_GPU + call psb_gpu_init(ctxt) +#endif + if (iam < 0) then + ! This should not happen, but just in case + call psb_exit(ctxt) + stop + endif + + + name='file_spmv' + if(psb_get_errstatus() /= 0) goto 9999 + info=psb_success_ + call psb_set_errverbosity(2) + if (iam == psb_root_) then + write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ + write(*,*) 'This is the ',trim(name),' sample program' + end if +#ifdef HAVE_GPU + write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) +#endif + + if (iam == 0) then + write(*,*) 'Matrix? ' + call read_data(mtrx_file,psb_inp_unit) + write(*,*) 'file format' + call read_data(filefmt,psb_inp_unit) + write(*,*) 'CPU format' + call read_data(acfmt,psb_inp_unit) + write(*,*) 'GPU format' + call read_data(agfmt,psb_inp_unit) + write(*,*) 'distribution ' + call read_data(ipart,psb_inp_unit) + write(*,*) 'Read all data, going on' + end if + call psb_bcast(ctxt,mtrx_file) + call psb_bcast(ctxt,filefmt) + call psb_bcast(ctxt,acfmt) + call psb_bcast(ctxt,agfmt) + call psb_bcast(ctxt,ipart) + call psb_barrier(ctxt) + t0 = psb_wtime() + ! read the input matrix to be processed and (possibly) the rhs + nrhs = 1 + + if (iam==psb_root_) then + select case(psb_toupper(filefmt)) + case('MM') + ! For Matrix Market we have an input file for the matrix + ! and an (optional) second file for the RHS. + call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file) + + case ('HB') + ! For Harwell-Boeing we have a single file which may or may not + ! contain an RHS. + call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file) + + case default + info = -1 + write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt + end select + if (info /= 0) then + write(psb_err_unit,*) 'Error while reading input matrix ' + call psb_abort(ctxt) + end if + + ! + ! Always get nnz from original matrix. + ! Some formats add fill-in and do not keep track + ! of how many were added. So if the original matrix + ! contained some extra zeros, the count of entries + ! is not recoverable exactly. + ! + nrt = aux_a%get_nrows() + annz = aux_a%get_nzeros() + call psb_bcast(ctxt,annz) + call psb_bcast(ctxt,nrt) + + write(psb_out_unit,'("Generating an rhs...")') + write(psb_out_unit,'(" ")') + call psb_realloc(nrt,1,aux_b,info) + if (info /= 0) then + call psb_errpush(4000,name) + goto 9999 + endif + + b_col_glob => aux_b(:,1) + do i=1, nrt + b_col_glob(i) = 1.d0 + enddo + + else + + call psb_bcast(ctxt,annz) + call psb_bcast(ctxt,nrt) + + end if + + + select case(psb_toupper(acfmt)) + case('COO') + acmold => acoo + case('CSR') + acmold => acsr + case('ELL') + acmold => aell + case('HLL') + acmold => ahll + case('HDIA') + acmold => ahdia + case default + write(*,*) 'Unknown format defaulting to CSR' + acmold => acsr + end select + +#ifdef HAVE_GPU + select case(psb_toupper(agfmt)) + case('ELG') + agmold => aelg + case('HLG') + agmold => ahlg + case('CSRG') + agmold => acsrg + case('HYBG') + agmold => ahybg + case('HDIAG') + agmold => ahdiag + case default + write(*,*) 'Unknown format defaulting to HLG' + agmold => ahlg + end select +#endif + + + ! switch over different partition types + if (ipart == 0) then + call psb_barrier(ctxt) + if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")') + allocate(ivg(nrt),ipv(np)) + do i=1,nrt + call part_block(i,nrt,np,ipv,nv) + ivg(i) = ipv(1) + enddo + call psb_matdist(aux_a, a, ctxt, desc_a,info,vg=ivg) + else if (ipart == 2) then + if (iam==psb_root_) then + write(psb_out_unit,'("Partition type: graph")') + write(psb_out_unit,'(" ")') + ! write(psb_err_unit,'("Build type: graph")') + call build_mtpart(aux_a,np) + endif + call psb_barrier(ctxt) + call distr_mtpart(psb_root_,ctxt) + call getv_mtpart(ivg) + call psb_matdist(aux_a, a, ctxt, desc_a,info,vg=ivg) + else + if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")') + call psb_matdist(aux_a, a, ctxt,desc_a,info,parts=part_block) + end if + + call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_) + + t2 = psb_wtime() - t0 + + call psb_amx(ctxt, t2) + + if (iam==psb_root_) then + write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2 + write(psb_out_unit,'(" ")') + end if + call a%cscnv(aux_a,info,mold=acoo) + tcnvcsr = 0 + tcnvgpu = 0 + nr = desc_a%get_local_rows() + nrg = desc_a%get_global_rows() + call psb_geall(x_col,desc_a,info) + do i=1, nr + call desc_a%l2g(i,ig,info) + call psb_geins(ione,(/ig/),(/(done + (done*ig)/nrg)/),x_col,desc_a,info) + end do + call psb_geasb(x_col,desc_a,info) + do j=1, ncnv + call aux_a%cscnv(a,info,mold=acoo) + call psb_barrier(ctxt) + t1 = psb_wtime() + call a%cscnv(info,mold=acmold) + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + tcnvcsr = tcnvcsr + t2 + if (j==1) tcnvc1 = t2 + xc1 = x_col%get_vect() + call xv%bld(xc1) + call psb_geasb(bv,desc_a,info,scratch=.true.) + +#ifdef HAVE_GPU + + call aux_a%cscnv(agpu,info,mold=acoo) + call xg%bld(xc1,mold=vmold) + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_barrier(ctxt) + t1 = psb_wtime() + call agpu%cscnv(info,mold=agmold) + call psb_gpu_DeviceSync() + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + if (j==1) tcnvg1 = t2 + tcnvgpu = tcnvgpu + t2 +#endif + end do + + call psb_barrier(ctxt) + t1 = psb_wtime() + do i=1,ntests + call psb_spmm(done,a,xv,dzero,bv,desc_a,info) + end do + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + call psb_amx(ctxt,t2) + +#ifdef HAVE_GPU + ! FIXME: cache flush needed here + call psb_barrier(ctxt) + tt1 = psb_wtime() + do i=1,ntests + call psb_spmm(done,agpu,xv,dzero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 1 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + tt2 = psb_wtime() - tt1 + call psb_amx(ctxt,tt2) + xc1 = bv%get_vect() + xc2 = bg%get_vect() + nr = desc_a%get_local_rows() + eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on xGPU',eps + + call xg%sync() + ! FIXME: cache flush needed here + + call psb_barrier(ctxt) + gt1 = psb_wtime() + do i=1,ntests*ngpu + call psb_spmm(done,agpu,xg,dzero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 2 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + ! For timing purposes we need to make sure all threads + ! in the device are done. + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + gt2 = psb_wtime() - gt1 + call psb_amx(ctxt,gt2) + call bg%sync() + xc1 = bv%get_vect() + xc2 = bg%get_vect() + call psb_geaxpby(-done,bg,+done,bv,desc_a,info) + eps = psb_geamax(bv,desc_a,info) + + call psb_amx(ctxt,t2) + nr = desc_a%get_local_rows() + eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on GPU',eps +#endif + + + amatsize = a%sizeof() + agmatsize = agpu%sizeof() + damatsize = amatsize + damatsize = damatsize/(1024*1024) + dgmatsize = agmatsize + dgmatsize = dgmatsize/(1024*1024) + descsize = psb_sizeof(desc_a) + call psb_sum(ctxt,damatsize) + call psb_sum(ctxt,dgmatsize) + call psb_sum(ctxt,descsize) + + if (iam == psb_root_) then + write(psb_out_unit,'("Matrix: ",a)') mtrx_file + write(psb_out_unit,& + &'("Test on : ",i20," processors")') np + write(psb_out_unit,& + &'("Size of matrix : ",i20," ")') nrt + write(psb_out_unit,& + &'("Number of nonzeros : ",i20," ")') annz + write(psb_out_unit,& + &'("Memory occupation CPU (MBytes) : ",f20.2," ")') damatsize + write(psb_out_unit,& + &'("Memory occupation GPU (MBytes) : ",f20.2," ")') dgmatsize + write(psb_out_unit,& + &'("Memory occupation CPU (Bytes) : ",i24," ")') amatsize + write(psb_out_unit,& + &'("Memory occupation GPU (Bytes) : ",i24," ")') agmatsize + flops = ntests*(2.d0*annz) + tflops = flops + gflops = flops * ngpu + write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() +#ifdef HAVE_GPU + write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() + write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& + & tcnvc1 + write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& + & tcnvcsr + write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& + & tcnvcsr/ncnv + write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& + & tcnvg1 + write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& + & tcnvgpu + write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& + & tcnvgpu/ncnv + +#endif + write(psb_out_unit,& + & '("Number of flops (",i0," prod) : ",F20.0," ")') & + & ntests,flops + + flops = flops / (t2) + tflops = tflops / (tt2) + gflops = gflops / (gt2) + write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& + & ntests,t2 + write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& + & t2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& + & flops/1.d6 +#ifdef HAVE_GPU + + write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& + & ntests, tt2 + write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& + & tt2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& + & tflops/1.d6 + + write(psb_out_unit,'("Time for ",i6," products (s) (GPU) : ",F20.3)')& + & ngpu*ntests,gt2 + write(psb_out_unit,'("Time per product (ms) (GPU) : ",F20.3)')& + & gt2*1.d3/(1.d0*ntests*ngpu) + write(psb_out_unit,'("MFLOPS (GPU) : ",F20.3)')& + & gflops/1.d6 +#endif + ! + ! This computation assumes the data movement associated with CSR: + ! it is minimal in terms of coefficients. Other formats may either move + ! more data (padding etc.) or less data (if they can save on the indices). + ! + nbytes = nr*(2*psb_sizeof_dp + psb_sizeof_ip)+& + & annz*(psb_sizeof_dp + psb_sizeof_ip) + bdwdth = ntests*nbytes/(t2*1.d6) + write(psb_out_unit,*) + write(psb_out_unit,'("MBYTES/S (CPU) : ",F20.3)') bdwdth +#ifdef HAVE_GPU + bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) + write(psb_out_unit,'("MBYTES/S (GPU) : ",F20.3)') bdwdth +#endif + write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() + write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize + + end if + + call psb_gefree(b_col, desc_a,info) + call psb_gefree(x_col, desc_a,info) + call psb_gefree(xv, desc_a,info) + call psb_gefree(bv, desc_a,info) + call psb_spfree(a, desc_a,info) +#ifdef HAVE_GPU + call psb_gefree(xg, desc_a,info) + call psb_gefree(bg, desc_a,info) + call psb_spfree(agpu,desc_a,info) + call psb_gpu_exit() +#endif + call psb_cdfree(desc_a,info) + + call psb_exit(ctxt) + stop + +9999 continue + call psb_error(ctxt) + +end program d_file_spmv + + + + + diff --git a/test/gpukern/data_input.f90 b/test/gpukern/data_input.f90 new file mode 100644 index 00000000..274cb7ad --- /dev/null +++ b/test/gpukern/data_input.f90 @@ -0,0 +1,221 @@ +! +! +! MLD2P4 version 2.0 +! MultiLevel Domain Decomposition Parallel Preconditioners Package +! based on PSBLAS (Parallel Sparse BLAS version 3.0) +! +! (C) Copyright 2008,2009,2010 +! +! Salvatore Filippone +! Alfredo Buttari +! Pasqua D'Ambra +! Daniela di Serafino +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the MLD2P4 group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MLD2P4 GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +module data_input + + interface read_data + module procedure read_char, read_int,& + & read_double, read_single, read_logical,& + & string_read_char, string_read_int,& + & string_read_double, string_read_single, & + & string_read_logical + end interface read_data + interface trim_string + module procedure trim_string + end interface + + character(len=4096), private :: charbuf + character, private, parameter :: def_marker="!" + +contains + + subroutine read_logical(val,file,marker) + logical, intent(out) :: val + integer, intent(in) :: file + character(len=1), optional, intent(in) :: marker + + read(file,'(a)')charbuf + call read_data(val,charbuf,marker) + + end subroutine read_logical + + subroutine read_char(val,file,marker) + character(len=*), intent(out) :: val + integer, intent(in) :: file + character(len=1), optional, intent(in) :: marker + + read(file,'(a)')charbuf + call read_data(val,charbuf,marker) + + end subroutine read_char + + + subroutine read_int(val,file,marker) + integer, intent(out) :: val + integer, intent(in) :: file + character(len=1), optional, intent(in) :: marker + + read(file,'(a)')charbuf + call read_data(val,charbuf,marker) + + end subroutine read_int + subroutine read_single(val,file,marker) + use psb_base_mod + real(psb_spk_), intent(out) :: val + integer, intent(in) :: file + character(len=1), optional, intent(in) :: marker + + read(file,'(a)')charbuf + call read_data(val,charbuf,marker) + + end subroutine read_single + subroutine read_double(val,file,marker) + use psb_base_mod + real(psb_dpk_), intent(out) :: val + integer, intent(in) :: file + character(len=1), optional, intent(in) :: marker + + read(file,'(a)')charbuf + call read_data(val,charbuf,marker) + + end subroutine read_double + + subroutine string_read_char(val,file,marker) + character(len=*), intent(out) :: val + character(len=*), intent(in) :: file + character(len=1), optional, intent(in) :: marker + character(len=1) :: marker_ + character(len=1024) :: charbuf + integer :: idx + if (present(marker)) then + marker_ = marker + else + marker_ = def_marker + end if + read(file,'(a)')charbuf + charbuf = adjustl(charbuf) + idx=index(charbuf,marker_) + if (idx == 0) idx = len(charbuf)+1 + read(charbuf(1:idx-1),'(a)') val + end subroutine string_read_char + + subroutine string_read_int(val,file,marker) + integer, intent(out) :: val + character(len=*), intent(in) :: file + character(len=1), optional, intent(in) :: marker + character(len=1) :: marker_ + character(len=1024) :: charbuf + integer :: idx + if (present(marker)) then + marker_ = marker + else + marker_ = def_marker + end if + read(file,'(a)')charbuf + charbuf = adjustl(charbuf) + idx=index(charbuf,marker_) + if (idx == 0) idx = len(charbuf)+1 + read(charbuf(1:idx-1),*) val + end subroutine string_read_int + + subroutine string_read_single(val,file,marker) + use psb_base_mod + real(psb_spk_), intent(out) :: val + character(len=*), intent(in) :: file + character(len=1), optional, intent(in) :: marker + character(len=1) :: marker_ + character(len=1024) :: charbuf + integer :: idx + if (present(marker)) then + marker_ = marker + else + marker_ = def_marker + end if + read(file,'(a)')charbuf + charbuf = adjustl(charbuf) + idx=index(charbuf,marker_) + if (idx == 0) idx = len(charbuf)+1 + read(charbuf(1:idx-1),*) val + end subroutine string_read_single + + subroutine string_read_double(val,file,marker) + use psb_base_mod + real(psb_dpk_), intent(out) :: val + character(len=*), intent(in) :: file + character(len=1), optional, intent(in) :: marker + character(len=1) :: marker_ + character(len=1024) :: charbuf + integer :: idx + if (present(marker)) then + marker_ = marker + else + marker_ = def_marker + end if + read(file,'(a)')charbuf + charbuf = adjustl(charbuf) + idx=index(charbuf,marker_) + if (idx == 0) idx = len(charbuf)+1 + read(charbuf(1:idx-1),*) val + end subroutine string_read_double + + subroutine string_read_logical(val,file,marker) + use psb_base_mod + logical, intent(out) :: val + character(len=*), intent(in) :: file + character(len=1), optional, intent(in) :: marker + character(len=1) :: marker_ + character(len=1024) :: charbuf + integer :: idx + if (present(marker)) then + marker_ = marker + else + marker_ = def_marker + end if + read(file,'(a)')charbuf + charbuf = adjustl(charbuf) + idx=index(charbuf,marker_) + if (idx == 0) idx = len(charbuf)+1 + read(charbuf(1:idx-1),*) val + end subroutine string_read_logical + + function trim_string(string,marker) + character(len=*), intent(in) :: string + character(len=1), optional, intent(in) :: marker + character(len=len(string)) :: trim_string + character(len=1) :: marker_ + integer :: idx + if (present(marker)) then + marker_ = marker + else + marker_ = def_marker + end if + idx=index(string,marker_) + trim_string = adjustl(string(idx:)) + end function trim_string +end module data_input + diff --git a/test/gpukern/dpdegenmv.F90 b/test/gpukern/dpdegenmv.F90 new file mode 100644 index 00000000..ab616471 --- /dev/null +++ b/test/gpukern/dpdegenmv.F90 @@ -0,0 +1,997 @@ +! +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: dpdegenmv.f90 +! +! Program: pdegenmv +! This sample program measures the performance of the matrix-vector product. +! The matrix is generated in the same way as for the pdegen test case of +! the main PSBLAS library. +! +! +module psb_d_pde3d_mod + + + use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_lpk_, psb_desc_type,& + & psb_dspmat_type, psb_d_vect_type, dzero,& + & psb_d_base_sparse_mat, psb_d_base_vect_type, & + & psb_i_base_vect_type, psb_l_base_vect_type + + interface + function d_func_3d(x,y,z) result(val) + import :: psb_dpk_ + real(psb_dpk_), intent(in) :: x,y,z + real(psb_dpk_) :: val + end function d_func_3d + end interface + + interface psb_gen_pde3d + module procedure psb_d_gen_pde3d + end interface psb_gen_pde3d + +contains + + function d_null_func_3d(x,y,z) result(val) + + real(psb_dpk_), intent(in) :: x,y,z + real(psb_dpk_) :: val + + val = dzero + + end function d_null_func_3d + ! + ! functions parametrizing the differential equation + ! + function b1(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b1 + real(psb_dpk_), intent(in) :: x,y,z + b1=done/sqrt((3*done)) + end function b1 + function b2(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b2 + real(psb_dpk_), intent(in) :: x,y,z + b2=done/sqrt((3*done)) + end function b2 + function b3(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b3 + real(psb_dpk_), intent(in) :: x,y,z + b3=done/sqrt((3*done)) + end function b3 + function c(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: c + real(psb_dpk_), intent(in) :: x,y,z + c=dzero + end function c + function a1(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a1 + real(psb_dpk_), intent(in) :: x,y,z + a1=done/80 + end function a1 + function a2(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a2 + real(psb_dpk_), intent(in) :: x,y,z + a2=done/80 + end function a2 + function a3(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a3 + real(psb_dpk_), intent(in) :: x,y,z + a3=done/80 + end function a3 + function g(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: g + real(psb_dpk_), intent(in) :: x,y,z + g = dzero + if (x == done) then + g = done + else if (x == dzero) then + g = exp(y**2-z**2) + end if + end function g + + + ! + ! subroutine to allocate and fill in the coefficient matrix and + ! the rhs. + ! + subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,& + & f,amold,vmold,imold,partition,nrl,iv) + use psb_base_mod + use psb_util_mod + ! + ! Discretizes the partial differential equation + ! + ! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) + ! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f + ! dxdx dydy dzdz dx dy dz + ! + ! with Dirichlet boundary conditions + ! u = g + ! + ! on the unit cube 0<=x,y,z<=1. + ! + ! + ! Note that if b1=b2=b3=c=0., the PDE is the Laplace equation. + ! + implicit none + integer(psb_ipk_) :: idim + type(psb_dspmat_type) :: a + type(psb_d_vect_type) :: xv,bv + type(psb_desc_type) :: desc_a + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: info + character(len=*) :: afmt + procedure(d_func_3d), optional :: f + class(psb_d_base_sparse_mat), optional :: amold + class(psb_d_base_vect_type), optional :: vmold + class(psb_i_base_vect_type), optional :: imold + integer(psb_ipk_), optional :: partition, nrl,iv(:) + + ! Local variables. + + integer(psb_ipk_), parameter :: nb=20 + type(psb_d_csc_sparse_mat) :: acsc + type(psb_d_coo_sparse_mat) :: acoo + type(psb_d_csr_sparse_mat) :: acsr + real(psb_dpk_) :: zt(nb),x,y,z + integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_ + integer(psb_lpk_) :: m,n,glob_row,nt + integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner + ! For 3D partition + ! Note: integer control variables going directly into an MPI call + ! must be 4 bytes, i.e. psb_mpk_ + integer(psb_mpk_) :: npdims(3), npp, minfo + integer(psb_ipk_) :: npx,npy,npz, iamx,iamy,iamz,mynx,myny,mynz + integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:) + ! Process grid + integer(psb_ipk_) :: np, iam + integer(psb_ipk_) :: icoeff + integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:) + real(psb_dpk_), allocatable :: val(:) + ! deltah dimension of each grid cell + ! deltat discretization time + real(psb_dpk_) :: deltah, sqdeltah, deltah2 + real(psb_dpk_), parameter :: rhs=dzero,one=done,zero=dzero + real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb + integer(psb_ipk_) :: err_act + procedure(d_func_3d), pointer :: f_ + character(len=20) :: name, ch_err,tmpfmt + + info = psb_success_ + name = 'create_matrix' + call psb_erractionsave(err_act) + + call psb_info(ctxt, iam, np) + + + if (present(f)) then + f_ => f + else + f_ => d_null_func_3d + end if + + deltah = done/(idim+2) + sqdeltah = deltah*deltah + deltah2 = (2*done)* deltah + + if (present(partition)) then + if ((1<= partition).and.(partition <= 3)) then + partition_ = partition + else + write(*,*) 'Invalid partition choice ',partition,' defaulting to 3' + partition_ = 3 + end if + else + partition_ = 3 + end if + + ! initialize array descriptor and sparse matrix storage. provide an + ! estimate of the number of non zeroes + + m = (1_psb_lpk_*idim)*idim*idim + n = m + nnz = ((n*7)/(np)) + if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n + t0 = psb_wtime() + select case(partition_) + case(1) + ! A BLOCK partition + if (present(nrl)) then + nr = nrl + else + ! + ! Using a simple BLOCK distribution. + ! + nt = (m+np-1)/np + nr = max(0,min(nt,m-(iam*nt))) + end if + + nt = nr + call psb_sum(ctxt,nt) + if (nt /= m) then + write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! First example of use of CDALL: specify for each process a number of + ! contiguous rows + ! + call psb_cdall(ctxt,desc_a,info,nl=nr) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(2) + ! A partition defined by the user through IV + + if (present(iv)) then + if (size(iv) /= m) then + write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + else + write(psb_err_unit,*) iam, 'Initialization error: IV not present' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! Second example of use of CDALL: specify for each row the + ! process that owns it + ! + call psb_cdall(ctxt,desc_a,info,vg=iv) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(3) + ! A 3-dimensional partition + + ! A nifty MPI function will split the process list + npdims = 0 + call mpi_dims_create(np,3,npdims,info) + npx = npdims(1) + npy = npdims(2) + npz = npdims(3) + + allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz)) + ! We can reuse idx2ijk for process indices as well. + call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0) + ! Now let's split the 3D cube in hexahedra + call dist1Didx(bndx,idim,npx) + mynx = bndx(iamx+1)-bndx(iamx) + call dist1Didx(bndy,idim,npy) + myny = bndy(iamy+1)-bndy(iamy) + call dist1Didx(bndz,idim,npz) + mynz = bndz(iamz+1)-bndz(iamz) + + ! How many indices do I own? + nlr = mynx*myny*mynz + allocate(myidx(nlr)) + ! Now, let's generate the list of indices I own + nr = 0 + do i=bndx(iamx),bndx(iamx+1)-1 + do j=bndy(iamy),bndy(iamy+1)-1 + do k=bndz(iamz),bndz(iamz+1)-1 + nr = nr + 1 + call ijk2idx(myidx(nr),i,j,k,idim,idim,idim) + end do + end do + end do + if (nr /= nlr) then + write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',& + & nr,nlr,mynx,myny,mynz + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + end if + + ! + ! Third example of use of CDALL: specify for each process + ! the set of global indices it owns. + ! + call psb_cdall(ctxt,desc_a,info,vl=myidx) + + case default + write(psb_err_unit,*) iam, 'Initialization error: should not get here' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end select + + + if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz,& + & dupl=psb_dupl_err_) + ! define rhs from boundary conditions; also build initial guess + if (info == psb_success_) call psb_geall(xv,desc_a,info) + if (info == psb_success_) call psb_geall(bv,desc_a,info) + + call psb_barrier(ctxt) + talc = psb_wtime()-t0 + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='allocation rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + ! we build an auxiliary matrix consisting of one row at a + ! time; just a small matrix. might be extended to generate + ! a bunch of rows per call. + ! + allocate(val(20*nb),irow(20*nb),& + &icol(20*nb),stat=info) + if (info /= psb_success_ ) then + info=psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + endif + + + ! loop over rows belonging to current process in a block + ! distribution. + + call psb_barrier(ctxt) + t1 = psb_wtime() + do ii=1, nlr,nb + ib = min(nb,nlr-ii+1) + icoeff = 1 + do k=1,ib + i=ii+k-1 + ! local matrix pointer + glob_row=myidx(i) + ! compute gridpoint coordinates + call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim) + ! x, y, z coordinates + x = (ix-1)*deltah + y = (iy-1)*deltah + z = (iz-1)*deltah + zt(k) = f_(x,y,z) + ! internal point: build discretization + ! + ! term depending on (x-1,y,z) + ! + val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2 + if (ix == 1) then + zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y-1,z) + val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2 + if (iy == 1) then + zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y,z-1) + val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2 + if (iz == 1) then + zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + ! term depending on (x,y,z) + val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah & + & + c(x,y,z) + call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + ! term depending on (x,y,z+1) + val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2 + if (iz == idim) then + zt(k) = g(x,y,done)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y+1,z) + val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2 + if (iy == idim) then + zt(k) = g(x,done,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x+1,y,z) + val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2 + if (ix==idim) then + zt(k) = g(done,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + end do + call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info) + if(info /= psb_success_) exit + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info) + if(info /= psb_success_) exit + zt(:)=dzero + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info) + if(info /= psb_success_) exit + end do + + tgen = psb_wtime()-t1 + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='insert rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + deallocate(val,irow,icol) + + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_cdasb(desc_a,info,mold=imold) + tcdasb = psb_wtime()-t1 + call psb_barrier(ctxt) + t1 = psb_wtime() + if (info == psb_success_) then + if (present(amold)) then + call psb_spasb(a,desc_a,info,mold=amold) + else + call psb_spasb(a,desc_a,info,afmt=afmt) + end if + end if + call psb_barrier(ctxt) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (info == psb_success_) call psb_geasb(xv,desc_a,info,mold=vmold) + if (info == psb_success_) call psb_geasb(bv,desc_a,info,mold=vmold) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + tasb = psb_wtime()-t1 + call psb_barrier(ctxt) + ttot = psb_wtime() - t0 + + call psb_amx(ctxt,talc) + call psb_amx(ctxt,tgen) + call psb_amx(ctxt,tasb) + call psb_amx(ctxt,ttot) + if(iam == psb_root_) then + tmpfmt = a%get_fmt() + write(psb_out_unit,'("The matrix has been generated and assembled in ",a3," format.")')& + & tmpfmt + write(psb_out_unit,'("-allocation time : ",es12.5)') talc + write(psb_out_unit,'("-coeff. gen. time : ",es12.5)') tgen + write(psb_out_unit,'("-desc asbly time : ",es12.5)') tcdasb + write(psb_out_unit,'("- mat asbly time : ",es12.5)') tasb + write(psb_out_unit,'("-total time : ",es12.5)') ttot + + end if + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + end subroutine psb_d_gen_pde3d + + +end module psb_d_pde3d_mod + + +program pdgenmv + use psb_base_mod + use psb_util_mod + use psb_ext_mod +#ifdef HAVE_GPU + use psb_gpu_mod +#endif +#ifdef HAVE_RSB + use psb_rsb_mod +#endif + use psb_d_pde3d_mod + implicit none + + ! input parameters + character(len=5) :: acfmt, agfmt + integer :: idim + + ! miscellaneous + real(psb_dpk_), parameter :: one = 1.d0 + real(psb_dpk_) :: t1, t2, tprec, flops, tflops,& + & tt1, tt2, gt1, gt2, gflops, bdwdth,& + & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 + + ! sparse matrix and preconditioner + type(psb_dspmat_type) :: a, agpu, aux_a + ! descriptor + type(psb_desc_type) :: desc_a + ! dense matrices + type(psb_d_vect_type), target :: xv, bv, xg, bg +#ifdef HAVE_GPU + type(psb_d_vect_gpu) :: vmold + type(psb_i_vect_gpu) :: imold +#endif + real(psb_dpk_), allocatable :: x1(:), x2(:), x0(:) + ! blacs parameters + type(psb_ctxt_type) :: ctxt + integer :: iam, np + + ! solver parameters + integer(psb_epk_) :: amatsize, precsize, descsize, annz, nbytes + real(psb_dpk_) :: err, eps + integer, parameter :: ntests=200, ngpu=50, ncnv=20 + type(psb_d_coo_sparse_mat), target :: acoo + type(psb_d_csr_sparse_mat), target :: acsr + type(psb_d_ell_sparse_mat), target :: aell + type(psb_d_hll_sparse_mat), target :: ahll + type(psb_d_dia_sparse_mat), target :: adia + type(psb_d_hdia_sparse_mat), target :: ahdia +#ifdef HAVE_RSB + type(psb_d_rsb_sparse_mat), target :: arsb +#endif +#ifdef HAVE_GPU + type(psb_d_elg_sparse_mat), target :: aelg + type(psb_d_csrg_sparse_mat), target :: acsrg +#if CUDA_SHORT_VERSION <= 10 + type(psb_d_hybg_sparse_mat), target :: ahybg +#endif + type(psb_d_hlg_sparse_mat), target :: ahlg + type(psb_d_hdiag_sparse_mat), target :: ahdiag + type(psb_d_dnsg_sparse_mat), target :: adnsg +#endif + class(psb_d_base_sparse_mat), pointer :: agmold, acmold + ! other variables + logical, parameter :: dump=.false. + integer(psb_ipk_) :: info, i, j, nr, nrg + integer(psb_lpk_) :: ig + character(len=20) :: name,ch_err + character(len=40) :: fname + + info=psb_success_ + + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) + +#ifdef HAVE_GPU + call psb_gpu_init(ctxt) +#endif +#ifdef HAVE_RSB + call psb_rsb_init() +#endif + + if (iam < 0) then + ! This should not happen, but just in case + call psb_exit(ctxt) + stop + endif + if(psb_get_errstatus() /= 0) goto 9999 + name='pdegenmv-gpu' + ! + ! Hello world + ! + if (iam == psb_root_) then + write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ + write(*,*) 'This is the ',trim(name),' sample program' + end if +#ifdef HAVE_GPU + write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) +#endif + ! + ! get parameters + ! + call get_parms(ctxt,acfmt,agfmt,idim) + + ! + ! allocate and fill in the coefficient matrix and initial vectors + ! + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,'CSR ',info,partition=3) + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='create_matrix' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (iam == psb_root_) write(psb_out_unit,'("Overall matrix creation time : ",es12.5)')t2 + if (iam == psb_root_) write(psb_out_unit,'(" ")') + + if (dump) then + write(fname,'(a,i3.3,a,i3.3,a,i3.3,a)') 'pde',idim,'-',iam,'-',np,'.mtx' + call a%print(fname,head='PDEGEN test matrix') + end if + + select case(psb_toupper(acfmt)) + case('ELL') + acmold => aell + case('HLL') + acmold => ahll + case('DIA') + acmold => adia + case('HDIA') + acmold => ahdia + case('CSR') + acmold => acsr + case('COO') + acmold => acoo +#ifdef HAVE_RSB + case('RSB') + acmold => arsb +#endif + case default + write(*,*) 'Unknown format defaulting to HLL' + acmold => ahll + end select + call a%cscnv(info,mold=acmold) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From cscnv ',info + call psb_error() + stop + end if + +#ifdef HAVE_GPU + select case(psb_toupper(agfmt)) + case('ELG') + agmold => aelg + case('HLG') + agmold => ahlg + case('HDIAG') + agmold => ahdiag + case('CSRG') + agmold => acsrg + case('DNSG') + agmold => adnsg +#if CUDA_SHORT_VERSION <= 10 + case('HYBG') + agmold => ahybg +#endif + case default + write(*,*) 'Unknown format defaulting to HLG' + agmold => ahlg + end select + call a%cscnv(agpu,info,mold=agmold) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From cscnv ',info + call psb_error() + stop + end if + call desc_a%cnv(mold=imold) + + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_geasb(xg,desc_a,info,scratch=.true.,mold=vmold) +#endif + nr = desc_a%get_local_rows() + nrg = desc_a%get_global_rows() + call psb_geall(x0,desc_a,info) + do i=1, nr + call desc_a%l2g(i,ig,info) + x0(i) = 1.0 + (1.0*ig)/nrg + end do + call a%cscnv(aux_a,info,mold=acoo) + tcnvcsr = 0 + tcnvgpu = 0 + call psb_geall(x1,desc_a,info) + do j=1, ncnv + call aux_a%cscnv(a,info,mold=acoo) + call psb_barrier(ctxt) + t1 = psb_wtime() + call a%cscnv(info,mold=acmold) + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + tcnvcsr = tcnvcsr + t2 + if (j==1) tcnvc1 = t2 + call psb_geasb(x1,desc_a,info) + call xv%bld(x0) + call psb_geasb(bv,desc_a,info,scratch=.true.) + +#ifdef HAVE_GPU + + call aux_a%cscnv(agpu,info,mold=acoo) + call xg%bld(x0,mold=vmold) + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_barrier(ctxt) + t1 = psb_wtime() + call agpu%cscnv(info,mold=agmold) + call psb_gpu_DeviceSync() + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + if (j==1) tcnvg1 = t2 + tcnvgpu = tcnvgpu + t2 +#endif + end do + + + call xv%set(x0) + call psb_barrier(ctxt) + t1 = psb_wtime() + do i=1,ntests + call psb_spmm(done,a,xv,dzero,bv,desc_a,info) + end do + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + call psb_amx(ctxt,t2) + +#ifdef HAVE_GPU + call xg%set(x0) + + ! FIXME: cache flush needed here + x1 = bv%get_vect() + x2 = bg%get_vect() + + call psb_barrier(ctxt) + tt1 = psb_wtime() + do i=1,ntests + call psb_spmm(done,agpu,xv,dzero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 1 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + tt2 = psb_wtime() - tt1 + call psb_amx(ctxt,tt2) + x1 = bv%get_vect() + x2 = bg%get_vect() + nr = desc_a%get_local_rows() + eps = maxval(abs(x1(1:nr)-x2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on xGPU',eps + + ! FIXME: cache flush needed here + call xg%set(x0) + call xg%sync() + call psb_barrier(ctxt) + gt1 = psb_wtime() + do i=1,ntests*ngpu + call psb_spmm(done,agpu,xg,dzero,bg,desc_a,info) + ! For timing purposes we need to make sure all threads + ! in the device are done. + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 2 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + gt2 = psb_wtime() - gt1 + call psb_amx(ctxt,gt2) + call bg%sync() + x1 = bv%get_vect() + x2 = bg%get_vect() + call psb_geaxpby(-done,bg,+done,bv,desc_a,info) + eps = psb_geamax(bv,desc_a,info) + + call psb_amx(ctxt,t2) + eps = maxval(abs(x1(1:nr)-x2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on GPU',eps + if (dump) then + write(fname,'(a,i3.3,a,i3.3,a)')'XCPU-out-',iam,'-',np,'.mtx' + call mm_array_write(x1(1:nr),'Local part CPU',info,filename=fname) + write(fname,'(a,i3.3,a,i3.3,a)')'XGPU-out-',iam,'-',np,'.mtx' + call mm_array_write(x2(1:nr),'Local part GPU',info,filename=fname) + end if + +#endif + annz = a%get_nzeros() + amatsize = a%sizeof() + descsize = psb_sizeof(desc_a) + call psb_sum(ctxt,nr) + call psb_sum(ctxt,annz) + call psb_sum(ctxt,amatsize) + call psb_sum(ctxt,descsize) + + if (iam == psb_root_) then + write(psb_out_unit,& + & '("Matrix: ell1 ",i0)') idim + write(psb_out_unit,& + &'("Test on : ",i20," processors")') np + write(psb_out_unit,& + &'("Size of matrix : ",i20," ")') nr + write(psb_out_unit,& + &'("Number of nonzeros : ",i20," ")') annz + write(psb_out_unit,& + &'("Memory occupation : ",i20," ")') amatsize + flops = ntests*(2.d0*annz) + tflops = flops + gflops = flops * ngpu + write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() +#ifdef HAVE_GPU + write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() + write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& + & tcnvc1 + write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& + & tcnvcsr + write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& + & tcnvcsr/ncnv + write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& + & tcnvg1 + write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& + & tcnvgpu + write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& + & tcnvgpu/ncnv + +#endif + write(psb_out_unit,& + & '("Number of flops (",i0," prod) : ",F20.0," ")') & + & ntests,flops + + flops = flops / (t2) + tflops = tflops / (tt2) + gflops = gflops / (gt2) + + write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& + & ntests,t2 + write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& + & t2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& + & flops/1.d6 +#ifdef HAVE_GPU + write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& + & ntests, tt2 + write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& + & tt2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& + & tflops/1.d6 + + write(psb_out_unit,'("Time for ",i6," products (s) (GPU.) : ",F20.3)')& + & ngpu*ntests,gt2 + write(psb_out_unit,'("Time per product (ms) (GPU.) : ",F20.3)')& + & gt2*1.d3/(1.d0*ntests*ngpu) + write(psb_out_unit,'("MFLOPS (GPU.) : ",F20.3)')& + & gflops/1.d6 +#endif + ! + ! This computation assumes the data movement associated with CSR: + ! it is minimal in terms of coefficients. Other formats may either move + ! more data (padding etc.) or less data (if they can save on the indices). + ! + nbytes = nr*(2*psb_sizeof_dp + psb_sizeof_ip)+& + & annz*(psb_sizeof_dp + psb_sizeof_ip) + bdwdth = ntests*nbytes/(t2*1.d6) + write(psb_out_unit,*) + write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (CPU) : ",F20.3)') bdwdth +#ifdef HAVE_GPU + bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) + write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (GPU) : ",F20.3)') bdwdth + bdwdth = psb_gpu_MemoryPeakBandwidth() + write(psb_out_unit,'("MBYTES/S peak bandwidth (GPU) : ",F20.3)') bdwdth +#endif + write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() + write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize + + end if + + ! + ! cleanup storage and exit + ! + call psb_gefree(bv,desc_a,info) + call psb_gefree(xv,desc_a,info) + call psb_spfree(a,desc_a,info) + call psb_cdfree(desc_a,info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='free routine' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if +#ifdef HAVE_GPU + call psb_gpu_exit() +#endif + call psb_exit(ctxt) + stop + +9999 continue + call psb_error(ctxt) + +contains + ! + ! get iteration parameters from standard input + ! + subroutine get_parms(ctxt,acfmt,agfmt,idim) + type(psb_ctxt_type) :: ctxt + character(len=*) :: agfmt, acfmt + integer :: idim + integer :: np, iam + integer :: intbuf(10), ip + + call psb_info(ctxt, iam, np) + + if (iam == 0) then + write(*,*) 'CPU side format?' + read(psb_inp_unit,*) acfmt + write(*,*) 'GPU side format?' + read(psb_inp_unit,*) agfmt + write(*,*) 'Size of discretization cube?' + read(psb_inp_unit,*) idim + endif + call psb_bcast(ctxt,acfmt) + call psb_bcast(ctxt,agfmt) + call psb_bcast(ctxt,idim) + + if (iam == 0) then + write(psb_out_unit,'("Testing matrix : ell1")') + write(psb_out_unit,'("Grid dimensions : ",i4,"x",i4,"x",i4)')idim,idim,idim + write(psb_out_unit,'("Number of processors : ",i0)')np + write(psb_out_unit,'("Data distribution : BLOCK")') + write(psb_out_unit,'(" ")') + end if + return + + end subroutine get_parms + +end program pdgenmv diff --git a/test/gpukern/s_file_spmv.F90 b/test/gpukern/s_file_spmv.F90 new file mode 100644 index 00000000..37a52717 --- /dev/null +++ b/test/gpukern/s_file_spmv.F90 @@ -0,0 +1,496 @@ +! +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +program s_file_spmv + use psb_base_mod + use psb_util_mod + use psb_ext_mod +#ifdef HAVE_GPU + use psb_gpu_mod +#endif + use data_input + implicit none + + ! input parameters + character(len=200) :: mtrx_file + + ! sparse matrices + type(psb_sspmat_type) :: a, aux_a, agpu + + ! dense matrices + real(psb_spk_), allocatable, target :: aux_b(:,:), d(:) + real(psb_spk_), allocatable , save :: x_col_glob(:), r_col_glob(:) + real(psb_spk_), pointer :: b_col_glob(:) + type(psb_s_vect_type) :: b_col, x_col, r_col + type(psb_s_vect_type) :: xg, bg, xv, bv +#ifdef HAVE_GPU + type(psb_s_vect_gpu) :: vmold +#endif + real(psb_spk_), allocatable :: xc1(:),xc2(:) + ! communications data structure + type(psb_desc_type):: desc_a + + type(psb_ctxt_type) :: ctxt + integer :: iam, np + integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes + real(psb_spk_) :: err, eps, samatsize, sgmatsize + + character(len=5) :: acfmt, agfmt + character(len=20) :: name + character(len=2) :: filefmt + integer, parameter :: iunit=12 + integer, parameter :: times=2000 + integer, parameter :: ntests=200, ngpu=50, ncnv=20 + + type(psb_s_coo_sparse_mat), target :: acoo + type(psb_s_csr_sparse_mat), target :: acsr + type(psb_s_ell_sparse_mat), target :: aell + type(psb_s_hll_sparse_mat), target :: ahll + type(psb_s_hdia_sparse_mat), target :: ahdia +#ifdef HAVE_GPU + type(psb_s_elg_sparse_mat), target :: aelg + type(psb_s_csrg_sparse_mat), target :: acsrg + type(psb_s_hybg_sparse_mat), target :: ahybg + type(psb_s_hlg_sparse_mat), target :: ahlg + type(psb_s_hdiag_sparse_mat), target :: ahdiag +#endif + class(psb_s_base_sparse_mat), pointer :: acmold, agmold + ! other variables + integer :: i,info,j,nrt, ns, nr, ipart, ig, nrg + integer :: internal, m,ii,nnzero + real(psb_dpk_) :: t0,t1, t2, tprec, flops + real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,& + & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 + integer :: nrhs, nrow, n_row, dim, nv, ne + integer, allocatable :: ivg(:), ipv(:) + + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) +#ifdef HAVE_GPU + call psb_gpu_init(ctxt) +#endif + if (iam < 0) then + ! This should not happen, but just in case + call psb_exit(ctxt) + stop + endif + + + name='file_spmv' + if(psb_get_errstatus() /= 0) goto 9999 + info=psb_success_ + call psb_set_errverbosity(2) + if (iam == psb_root_) then + write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ + write(*,*) 'This is the ',trim(name),' sample program' + end if +#ifdef HAVE_GPU + write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) +#endif + + if (iam == 0) then + write(*,*) 'Matrix? ' + call read_data(mtrx_file,psb_inp_unit) + write(*,*) 'file format' + call read_data(filefmt,psb_inp_unit) + write(*,*) 'CPU format' + call read_data(acfmt,psb_inp_unit) + write(*,*) 'GPU format' + call read_data(agfmt,psb_inp_unit) + write(*,*) 'distribution ' + call read_data(ipart,psb_inp_unit) + write(*,*) 'Read all data, going on' + end if + call psb_bcast(ctxt,mtrx_file) + call psb_bcast(ctxt,filefmt) + call psb_bcast(ctxt,acfmt) + call psb_bcast(ctxt,agfmt) + call psb_bcast(ctxt,ipart) + call psb_barrier(ctxt) + t0 = psb_wtime() + ! read the input matrix to be processed and (possibly) the rhs + nrhs = 1 + + if (iam==psb_root_) then + select case(psb_toupper(filefmt)) + case('MM') + ! For Matrix Market we have an input file for the matrix + ! and an (optional) second file for the RHS. + call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file) + + case ('HB') + ! For Harwell-Boeing we have a single file which may or may not + ! contain an RHS. + call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file) + + case default + info = -1 + write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt + end select + if (info /= 0) then + write(psb_err_unit,*) 'Error while reading input matrix ' + call psb_abort(ctxt) + end if + + ! + ! Always get nnz from original matrix. + ! Some formats add fill-in and do not keep track + ! of how many were added. So if the original matrix + ! contained some extra zeros, the count of entries + ! is not recoverable exactly. + ! + nrt = aux_a%get_nrows() + annz = aux_a%get_nzeros() + call psb_bcast(ctxt,annz) + call psb_bcast(ctxt,nrt) + + write(psb_out_unit,'("Generating an rhs...")') + write(psb_out_unit,'(" ")') + call psb_realloc(nrt,1,aux_b,info) + if (info /= 0) then + call psb_errpush(4000,name) + goto 9999 + endif + + b_col_glob => aux_b(:,1) + do i=1, nrt + b_col_glob(i) = 1.d0 + enddo + + else + + call psb_bcast(ctxt,annz) + call psb_bcast(ctxt,nrt) + + end if + + + select case(psb_toupper(acfmt)) + case('COO') + acmold => acoo + case('CSR') + acmold => acsr + case('ELL') + acmold => aell + case('HLL') + acmold => ahll + case('HDIA') + acmold => ahdia + case default + write(*,*) 'Unknown format defaulting to CSR' + acmold => acsr + end select + +#ifdef HAVE_GPU + select case(psb_toupper(agfmt)) + case('ELG') + agmold => aelg + case('HLG') + agmold => ahlg + case('CSRG') + agmold => acsrg + case('HYBG') + agmold => ahybg + case('HDIAG') + agmold => ahdiag + case default + write(*,*) 'Unknown format defaulting to HLG' + agmold => ahlg + end select +#endif + + + ! switch over different partition types + if (ipart == 0) then + call psb_barrier(ctxt) + if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")') + allocate(ivg(nrt),ipv(np)) + do i=1,nrt + call part_block(i,nrt,np,ipv,nv) + ivg(i) = ipv(1) + enddo + call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) + else if (ipart == 2) then + if (iam==psb_root_) then + write(psb_out_unit,'("Partition type: graph")') + write(psb_out_unit,'(" ")') + ! write(psb_err_unit,'("Build type: graph")') + call build_mtpart(aux_a,np) + endif + call psb_barrier(ctxt) + call distr_mtpart(psb_root_,ctxt) + call getv_mtpart(ivg) + call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) + else + if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")') + call psb_matdist(aux_a, a, ctxt,desc_a,info,parts=part_block) + end if + + call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_) + + t2 = psb_wtime() - t0 + + call psb_amx(ctxt, t2) + + if (iam==psb_root_) then + write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2 + write(psb_out_unit,'(" ")') + end if + call a%cscnv(aux_a,info,mold=acoo) + tcnvcsr = 0 + tcnvgpu = 0 + nr = desc_a%get_local_rows() + nrg = desc_a%get_global_rows() + call psb_geall(x_col,desc_a,info) + do i=1, nr + call desc_a%l2g(i,ig,info) + call psb_geins(ione,(/ig/),(/(sone + (sone*ig)/nrg)/),x_col,desc_a,info) + end do + call psb_geasb(x_col,desc_a,info) + do j=1, ncnv + call aux_a%cscnv(a,info,mold=acoo) + call psb_barrier(ctxt) + t1 = psb_wtime() + call a%cscnv(info,mold=acmold) + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + tcnvcsr = tcnvcsr + t2 + if (j==1) tcnvc1 = t2 + xc1 = x_col%get_vect() + call xv%bld(xc1) + call psb_geasb(bv,desc_a,info,scratch=.true.) + +#ifdef HAVE_GPU + + call aux_a%cscnv(agpu,info,mold=acoo) + call xg%bld(xc1,mold=vmold) + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_barrier(ctxt) + t1 = psb_wtime() + call agpu%cscnv(info,mold=agmold) + call psb_gpu_DeviceSync() + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + if (j==1) tcnvg1 = t2 + tcnvgpu = tcnvgpu + t2 +#endif + end do + + call psb_barrier(ctxt) + t1 = psb_wtime() + do i=1,ntests + call psb_spmm(sone,a,xv,szero,bv,desc_a,info) + end do + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + call psb_amx(ctxt,t2) + +#ifdef HAVE_GPU + ! FIXME: cache flush needed here + call psb_barrier(ctxt) + tt1 = psb_wtime() + do i=1,ntests + call psb_spmm(sone,agpu,xv,szero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 1 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + tt2 = psb_wtime() - tt1 + call psb_amx(ctxt,tt2) + xc1 = bv%get_vect() + xc2 = bg%get_vect() + nr = desc_a%get_local_rows() + eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on xGPU',eps + + call xg%sync() + ! FIXME: cache flush needed here + + call psb_barrier(ctxt) + gt1 = psb_wtime() + do i=1,ntests*ngpu + call psb_spmm(sone,agpu,xg,szero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 2 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + ! For timing purposes we need to make sure all threads + ! in the device are done. + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + gt2 = psb_wtime() - gt1 + call psb_amx(ctxt,gt2) + call bg%sync() + xc1 = bv%get_vect() + xc2 = bg%get_vect() + call psb_geaxpby(-sone,bg,+sone,bv,desc_a,info) + eps = psb_geamax(bv,desc_a,info) + + call psb_amx(ctxt,t2) + nr = desc_a%get_local_rows() + eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on GPU',eps +#endif + + + amatsize = a%sizeof() + agmatsize = agpu%sizeof() + samatsize = amatsize + samatsize = samatsize/(1024*1024) + sgmatsize = agmatsize + sgmatsize = sgmatsize/(1024*1024) + descsize = psb_sizeof(desc_a) + call psb_sum(ctxt,samatsize) + call psb_sum(ctxt,sgmatsize) + call psb_sum(ctxt,descsize) + + if (iam == psb_root_) then + write(psb_out_unit,'("Matrix: ",a)') mtrx_file + write(psb_out_unit,& + &'("Test on : ",i20," processors")') np + write(psb_out_unit,& + &'("Size of matrix : ",i20," ")') nrt + write(psb_out_unit,& + &'("Number of nonzeros : ",i20," ")') annz + write(psb_out_unit,& + &'("Memory occupation CPU (MBytes) : ",f20.2," ")') samatsize + write(psb_out_unit,& + &'("Memory occupation GPU (MBytes) : ",f20.2," ")') sgmatsize + write(psb_out_unit,& + &'("Memory occupation CPU (Bytes) : ",i24," ")') amatsize + write(psb_out_unit,& + &'("Memory occupation GPU (Bytes) : ",i24," ")') agmatsize + flops = ntests*(2.d0*annz) + tflops = flops + gflops = flops * ngpu + write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() +#ifdef HAVE_GPU + write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() + write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& + & tcnvc1 + write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& + & tcnvcsr + write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& + & tcnvcsr/ncnv + write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& + & tcnvg1 + write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& + & tcnvgpu + write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& + & tcnvgpu/ncnv + +#endif + write(psb_out_unit,& + & '("Number of flops (",i0," prod) : ",F20.0," ")') & + & ntests,flops + + flops = flops / (t2) + tflops = tflops / (tt2) + gflops = gflops / (gt2) + write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& + & ntests,t2 + write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& + & t2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& + & flops/1.d6 +#ifdef HAVE_GPU + + write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& + & ntests, tt2 + write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& + & tt2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& + & tflops/1.d6 + + write(psb_out_unit,'("Time for ",i6," products (s) (GPU) : ",F20.3)')& + & ngpu*ntests,gt2 + write(psb_out_unit,'("Time per product (ms) (GPU) : ",F20.3)')& + & gt2*1.d3/(1.d0*ntests*ngpu) + write(psb_out_unit,'("MFLOPS (GPU) : ",F20.3)')& + & gflops/1.d6 +#endif + ! + ! This computation assumes the data movement associated with CSR: + ! it is minimal in terms of coefficients. Other formats may either move + ! more data (padding etc.) or less data (if they can save on the indices). + ! + nbytes = nr*(2*psb_sizeof_sp + psb_sizeof_ip)+& + & annz*(psb_sizeof_sp + psb_sizeof_ip) + bdwdth = ntests*nbytes/(t2*1.d6) + write(psb_out_unit,*) + write(psb_out_unit,'("MBYTES/S (CPU) : ",F20.3)') bdwdth +#ifdef HAVE_GPU + bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) + write(psb_out_unit,'("MBYTES/S (GPU) : ",F20.3)') bdwdth +#endif + write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() + write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize + + end if + + call psb_gefree(b_col, desc_a,info) + call psb_gefree(x_col, desc_a,info) + call psb_gefree(xv, desc_a,info) + call psb_gefree(bv, desc_a,info) + call psb_spfree(a, desc_a,info) +#ifdef HAVE_GPU + call psb_gefree(xg, desc_a,info) + call psb_gefree(bg, desc_a,info) + call psb_spfree(agpu,desc_a,info) + call psb_gpu_exit() +#endif + call psb_cdfree(desc_a,info) + + call psb_exit(ctxt) + stop + +9999 continue + call psb_error(ctxt) + +end program s_file_spmv + + + + + diff --git a/test/gpukern/spdegenmv.F90 b/test/gpukern/spdegenmv.F90 new file mode 100644 index 00000000..1c7d646f --- /dev/null +++ b/test/gpukern/spdegenmv.F90 @@ -0,0 +1,989 @@ +! +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: spdegenmv.f90 +! +! Program: pdegenmv +! This sample program measures the performance of the matrix-vector product. +! The matrix is generated in the same way as for the pdegen test case of +! the main PSBLAS library. +! +! +module psb_s_pde3d_mod + + + use psb_base_mod, only : psb_spk_, psb_ipk_, psb_lpk_, psb_desc_type,& + & psb_sspmat_type, psb_s_vect_type, szero,& + & psb_s_base_sparse_mat, psb_s_base_vect_type, & + & psb_i_base_vect_type, psb_l_base_vect_type + + interface + function s_func_3d(x,y,z) result(val) + import :: psb_spk_ + real(psb_spk_), intent(in) :: x,y,z + real(psb_spk_) :: val + end function s_func_3d + end interface + + interface psb_gen_pde3d + module procedure psb_s_gen_pde3d + end interface psb_gen_pde3d + +contains + + function s_null_func_3d(x,y,z) result(val) + + real(psb_spk_), intent(in) :: x,y,z + real(psb_spk_) :: val + + val = szero + + end function s_null_func_3d + ! + ! functions parametrizing the differential equation + ! + function b1(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: b1 + real(psb_spk_), intent(in) :: x,y,z + b1=sone/sqrt((3*sone)) + end function b1 + function b2(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: b2 + real(psb_spk_), intent(in) :: x,y,z + b2=sone/sqrt((3*sone)) + end function b2 + function b3(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: b3 + real(psb_spk_), intent(in) :: x,y,z + b3=sone/sqrt((3*sone)) + end function b3 + function c(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: c + real(psb_spk_), intent(in) :: x,y,z + c=szero + end function c + function a1(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: a1 + real(psb_spk_), intent(in) :: x,y,z + a1=sone/80 + end function a1 + function a2(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: a2 + real(psb_spk_), intent(in) :: x,y,z + a2=sone/80 + end function a2 + function a3(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: a3 + real(psb_spk_), intent(in) :: x,y,z + a3=sone/80 + end function a3 + function g(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: g + real(psb_spk_), intent(in) :: x,y,z + g = szero + if (x == sone) then + g = sone + else if (x == szero) then + g = exp(y**2-z**2) + end if + end function g + + + ! + ! subroutine to allocate and fill in the coefficient matrix and + ! the rhs. + ! + subroutine psb_s_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,& + & f,amold,vmold,imold,partition,nrl,iv) + use psb_base_mod + use psb_util_mod + ! + ! Discretizes the partial differential equation + ! + ! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) + ! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f + ! dxdx dydy dzdz dx dy dz + ! + ! with Dirichlet boundary conditions + ! u = g + ! + ! on the unit cube 0<=x,y,z<=1. + ! + ! + ! Note that if b1=b2=b3=c=0., the PDE is the Laplace equation. + ! + implicit none + integer(psb_ipk_) :: idim + type(psb_sspmat_type) :: a + type(psb_s_vect_type) :: xv,bv + type(psb_desc_type) :: desc_a + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: info + character(len=*) :: afmt + procedure(s_func_3d), optional :: f + class(psb_s_base_sparse_mat), optional :: amold + class(psb_s_base_vect_type), optional :: vmold + class(psb_i_base_vect_type), optional :: imold + integer(psb_ipk_), optional :: partition, nrl,iv(:) + + ! Local variables. + + integer(psb_ipk_), parameter :: nb=20 + type(psb_s_csc_sparse_mat) :: acsc + type(psb_s_coo_sparse_mat) :: acoo + type(psb_s_csr_sparse_mat) :: acsr + real(psb_spk_) :: zt(nb),x,y,z + integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_ + integer(psb_lpk_) :: m,n,glob_row,nt + integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner + ! For 3D partition + ! Note: integer control variables going directly into an MPI call + ! must be 4 bytes, i.e. psb_mpk_ + integer(psb_mpk_) :: npdims(3), npp, minfo + integer(psb_ipk_) :: npx,npy,npz, iamx,iamy,iamz,mynx,myny,mynz + integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:) + ! Process grid + integer(psb_ipk_) :: np, iam + integer(psb_ipk_) :: icoeff + integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:) + real(psb_spk_), allocatable :: val(:) + ! deltah dimension of each grid cell + ! deltat discretization time + real(psb_spk_) :: deltah, sqdeltah, deltah2 + real(psb_spk_), parameter :: rhs=szero,one=sone,zero=szero + real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb + integer(psb_ipk_) :: err_act + procedure(s_func_3d), pointer :: f_ + character(len=20) :: name, ch_err,tmpfmt + + info = psb_success_ + name = 'create_matrix' + call psb_erractionsave(err_act) + + call psb_info(ctxt, iam, np) + + + if (present(f)) then + f_ => f + else + f_ => s_null_func_3d + end if + + deltah = sone/(idim+2) + sqdeltah = deltah*deltah + deltah2 = (2*sone)* deltah + + if (present(partition)) then + if ((1<= partition).and.(partition <= 3)) then + partition_ = partition + else + write(*,*) 'Invalid partition choice ',partition,' defaulting to 3' + partition_ = 3 + end if + else + partition_ = 3 + end if + + ! initialize array descriptor and sparse matrix storage. provide an + ! estimate of the number of non zeroes + + m = (1_psb_lpk_*idim)*idim*idim + n = m + nnz = ((n*7)/(np)) + if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n + t0 = psb_wtime() + select case(partition_) + case(1) + ! A BLOCK partition + if (present(nrl)) then + nr = nrl + else + ! + ! Using a simple BLOCK distribution. + ! + nt = (m+np-1)/np + nr = max(0,min(nt,m-(iam*nt))) + end if + + nt = nr + call psb_sum(ctxt,nt) + if (nt /= m) then + write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! First example of use of CDALL: specify for each process a number of + ! contiguous rows + ! + call psb_cdall(ctxt,desc_a,info,nl=nr) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(2) + ! A partition defined by the user through IV + + if (present(iv)) then + if (size(iv) /= m) then + write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + else + write(psb_err_unit,*) iam, 'Initialization error: IV not present' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! Second example of use of CDALL: specify for each row the + ! process that owns it + ! + call psb_cdall(ctxt,desc_a,info,vg=iv) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(3) + ! A 3-dimensional partition + + ! A nifty MPI function will split the process list + npdims = 0 + call mpi_dims_create(np,3,npdims,info) + npx = npdims(1) + npy = npdims(2) + npz = npdims(3) + + allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz)) + ! We can reuse idx2ijk for process indices as well. + call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0) + ! Now let's split the 3D cube in hexahedra + call dist1Didx(bndx,idim,npx) + mynx = bndx(iamx+1)-bndx(iamx) + call dist1Didx(bndy,idim,npy) + myny = bndy(iamy+1)-bndy(iamy) + call dist1Didx(bndz,idim,npz) + mynz = bndz(iamz+1)-bndz(iamz) + + ! How many indices do I own? + nlr = mynx*myny*mynz + allocate(myidx(nlr)) + ! Now, let's generate the list of indices I own + nr = 0 + do i=bndx(iamx),bndx(iamx+1)-1 + do j=bndy(iamy),bndy(iamy+1)-1 + do k=bndz(iamz),bndz(iamz+1)-1 + nr = nr + 1 + call ijk2idx(myidx(nr),i,j,k,idim,idim,idim) + end do + end do + end do + if (nr /= nlr) then + write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',& + & nr,nlr,mynx,myny,mynz + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + end if + + ! + ! Third example of use of CDALL: specify for each process + ! the set of global indices it owns. + ! + call psb_cdall(ctxt,desc_a,info,vl=myidx) + + case default + write(psb_err_unit,*) iam, 'Initialization error: should not get here' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end select + + + if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz,& + & dupl=psb_dupl_err_) + ! define rhs from boundary conditions; also build initial guess + if (info == psb_success_) call psb_geall(xv,desc_a,info) + if (info == psb_success_) call psb_geall(bv,desc_a,info) + + call psb_barrier(ctxt) + talc = psb_wtime()-t0 + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='allocation rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + ! we build an auxiliary matrix consisting of one row at a + ! time; just a small matrix. might be extended to generate + ! a bunch of rows per call. + ! + allocate(val(20*nb),irow(20*nb),& + &icol(20*nb),stat=info) + if (info /= psb_success_ ) then + info=psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + endif + + + ! loop over rows belonging to current process in a block + ! distribution. + + call psb_barrier(ctxt) + t1 = psb_wtime() + do ii=1, nlr,nb + ib = min(nb,nlr-ii+1) + icoeff = 1 + do k=1,ib + i=ii+k-1 + ! local matrix pointer + glob_row=myidx(i) + ! compute gridpoint coordinates + call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim) + ! x, y, z coordinates + x = (ix-1)*deltah + y = (iy-1)*deltah + z = (iz-1)*deltah + zt(k) = f_(x,y,z) + ! internal point: build discretization + ! + ! term depending on (x-1,y,z) + ! + val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2 + if (ix == 1) then + zt(k) = g(szero,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y-1,z) + val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2 + if (iy == 1) then + zt(k) = g(x,szero,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y,z-1) + val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2 + if (iz == 1) then + zt(k) = g(x,y,szero)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + ! term depending on (x,y,z) + val(icoeff)=(2*sone)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah & + & + c(x,y,z) + call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + ! term depending on (x,y,z+1) + val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2 + if (iz == idim) then + zt(k) = g(x,y,sone)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y+1,z) + val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2 + if (iy == idim) then + zt(k) = g(x,sone,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x+1,y,z) + val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2 + if (ix==idim) then + zt(k) = g(sone,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + end do + call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info) + if(info /= psb_success_) exit + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info) + if(info /= psb_success_) exit + zt(:)=szero + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info) + if(info /= psb_success_) exit + end do + + tgen = psb_wtime()-t1 + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='insert rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + deallocate(val,irow,icol) + + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_cdasb(desc_a,info,mold=imold) + tcdasb = psb_wtime()-t1 + call psb_barrier(ctxt) + t1 = psb_wtime() + if (info == psb_success_) then + if (present(amold)) then + call psb_spasb(a,desc_a,info,mold=amold) + else + call psb_spasb(a,desc_a,info,afmt=afmt) + end if + end if + call psb_barrier(ctxt) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (info == psb_success_) call psb_geasb(xv,desc_a,info,mold=vmold) + if (info == psb_success_) call psb_geasb(bv,desc_a,info,mold=vmold) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + tasb = psb_wtime()-t1 + call psb_barrier(ctxt) + ttot = psb_wtime() - t0 + + call psb_amx(ctxt,talc) + call psb_amx(ctxt,tgen) + call psb_amx(ctxt,tasb) + call psb_amx(ctxt,ttot) + if(iam == psb_root_) then + tmpfmt = a%get_fmt() + write(psb_out_unit,'("The matrix has been generated and assembled in ",a3," format.")')& + & tmpfmt + write(psb_out_unit,'("-allocation time : ",es12.5)') talc + write(psb_out_unit,'("-coeff. gen. time : ",es12.5)') tgen + write(psb_out_unit,'("-desc asbly time : ",es12.5)') tcdasb + write(psb_out_unit,'("- mat asbly time : ",es12.5)') tasb + write(psb_out_unit,'("-total time : ",es12.5)') ttot + + end if + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + end subroutine psb_s_gen_pde3d + + +end module psb_s_pde3d_mod + + +program pdgenmv + use psb_base_mod + use psb_util_mod + use psb_ext_mod +#ifdef HAVE_GPU + use psb_gpu_mod +#endif + use psb_s_pde3d_mod + implicit none + + ! input parameters + character(len=5) :: acfmt, agfmt + integer :: idim + + ! miscellaneous + real(psb_spk_), parameter :: one = 1.e0 + real(psb_dpk_) :: t1, t2, tprec, flops, tflops,& + & tt1, tt2, gt1, gt2, gflops, bdwdth,& + & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 + + ! sparse matrix and preconditioner + type(psb_sspmat_type) :: a, agpu, aux_a + ! descriptor + type(psb_desc_type) :: desc_a + ! dense matrices + type(psb_s_vect_type), target :: xv,bv, xg, bg +#ifdef HAVE_GPU + type(psb_s_vect_gpu) :: vmold + type(psb_i_vect_gpu) :: imold +#endif + real(psb_spk_), allocatable :: x1(:), x2(:), x0(:) + ! blacs parameters + type(psb_ctxt_type) :: ctxt + integer :: iam, np + + ! solver parameters + integer(psb_epk_) :: amatsize, precsize, descsize, annz, nbytes + real(psb_spk_) :: err, eps + integer, parameter :: ntests=200, ngpu=50, ncnv=20 + type(psb_s_coo_sparse_mat), target :: acoo + type(psb_s_csr_sparse_mat), target :: acsr + type(psb_s_ell_sparse_mat), target :: aell + type(psb_s_hll_sparse_mat), target :: ahll + type(psb_s_dia_sparse_mat), target :: adia + type(psb_s_hdia_sparse_mat), target :: ahdia +#ifdef HAVE_GPU + type(psb_s_elg_sparse_mat), target :: aelg + type(psb_s_csrg_sparse_mat), target :: acsrg +#if CUDA_SHORT_VERSION <= 10 + type(psb_s_hybg_sparse_mat), target :: ahybg +#endif + type(psb_s_hlg_sparse_mat), target :: ahlg + type(psb_s_dnsg_sparse_mat), target :: adnsg + type(psb_s_hdiag_sparse_mat), target :: ahdiag +#endif + class(psb_s_base_sparse_mat), pointer :: agmold, acmold + ! other variables + logical, parameter :: dump=.false. + integer(psb_ipk_) :: info, i, j, nr, nrg + integer(psb_lpk_) :: ig + character(len=20) :: name,ch_err + character(len=40) :: fname + + info=psb_success_ + + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) + +#ifdef HAVE_GPU + call psb_gpu_init(ctxt) +#endif + + if (iam < 0) then + ! This should not happen, but just in case + call psb_exit(ctxt) + stop + endif + if(psb_get_errstatus() /= 0) goto 9999 + name='pdegenmv-gpu' + ! + ! Hello world + ! + if (iam == psb_root_) then + write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ + write(*,*) 'This is the ',trim(name),' sample program' + end if +#ifdef HAVE_GPU + write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) +#endif + ! + ! get parameters + ! + call get_parms(ctxt,acfmt,agfmt,idim) + + ! + ! allocate and fill in the coefficient matrix and initial vectors + ! + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,'CSR ',info,partition=3) + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='create_matrix' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (iam == psb_root_) write(psb_out_unit,'("Overall matrix creation time : ",es12.5)')t2 + if (iam == psb_root_) write(psb_out_unit,'(" ")') + + if (dump) then + write(fname,'(a,i3.3,a,i3.3,a,i3.3,a)') 'pde',idim,'-',iam,'-',np,'.mtx' + call a%print(fname,head='PDEGEN test matrix') + end if + + select case(psb_toupper(acfmt)) + case('ELL') + acmold => aell + case('HLL') + acmold => ahll + case('DIA') + acmold => adia + case('HDIA') + acmold => ahdia + case('CSR') + acmold => acsr + case('COO') + acmold => acoo +#ifdef HAVE_RSB + case('RSB') + acmold => arsb +#endif + case default + write(*,*) 'Unknown format defaulting to HLL' + acmold => ahll + end select + call a%cscnv(info,mold=acmold) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From cscnv ',info + call psb_error() + stop + end if + +#ifdef HAVE_GPU + select case(psb_toupper(agfmt)) + case('ELG') + agmold => aelg + case('HLG') + agmold => ahlg + case('HDIAG') + agmold => ahdiag + case('CSRG') + agmold => acsrg + case('DNSG') + agmold => adnsg +#if CUDA_SHORT_VERSION <= 10 + case('HYBG') + agmold => ahybg +#endif + case default + write(*,*) 'Unknown format defaulting to HLG' + agmold => ahlg + end select + call a%cscnv(agpu,info,mold=agmold) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From cscnv ',info + call psb_error() + stop + end if + call desc_a%cnv(mold=imold) + + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_geasb(xg,desc_a,info,scratch=.true.,mold=vmold) +#endif + nr = desc_a%get_local_rows() + nrg = desc_a%get_global_rows() + call psb_geall(x0,desc_a,info) + do i=1, nr + call desc_a%l2g(i,ig,info) + x0(i) = 1.0 + (1.0*ig)/nrg + end do + call a%cscnv(aux_a,info,mold=acoo) + tcnvcsr = 0 + tcnvgpu = 0 + call psb_geall(x1,desc_a,info) + do j=1, ncnv + call aux_a%cscnv(a,info,mold=acoo) + call psb_barrier(ctxt) + t1 = psb_wtime() + call a%cscnv(info,mold=acmold) + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + tcnvcsr = tcnvcsr + t2 + if (j==1) tcnvc1 = t2 + call psb_geasb(x1,desc_a,info) + call xv%bld(x0) + call psb_geasb(bv,desc_a,info,scratch=.true.) + +#ifdef HAVE_GPU + + call aux_a%cscnv(agpu,info,mold=acoo) + call xg%bld(x0,mold=vmold) + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_barrier(ctxt) + t1 = psb_wtime() + call agpu%cscnv(info,mold=agmold) + call psb_gpu_DeviceSync() + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + if (j==1) tcnvg1 = t2 + tcnvgpu = tcnvgpu + t2 +#endif + end do + + + call xv%set(x0) + call psb_barrier(ctxt) + t1 = psb_wtime() + do i=1,ntests + call psb_spmm(sone,a,xv,szero,bv,desc_a,info) + end do + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + call psb_amx(ctxt,t2) + +#ifdef HAVE_GPU + call xg%set(x0) + + ! FIXME: cache flush needed here + x1 = bv%get_vect() + x2 = bg%get_vect() + + call psb_barrier(ctxt) + tt1 = psb_wtime() + do i=1,ntests + call psb_spmm(sone,agpu,xv,szero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 1 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + tt2 = psb_wtime() - tt1 + call psb_amx(ctxt,tt2) + x1 = bv%get_vect() + x2 = bg%get_vect() + nr = desc_a%get_local_rows() + eps = maxval(abs(x1(1:nr)-x2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on xGPU',eps + + + ! FIXME: cache flush needed here + call xg%set(x0) + call xg%sync() + call psb_barrier(ctxt) + gt1 = psb_wtime() + do i=1,ntests*ngpu + call psb_spmm(sone,agpu,xg,szero,bg,desc_a,info) + ! For timing purposes we need to make sure all threads + ! in the device are done. + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 2 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + gt2 = psb_wtime() - gt1 + call psb_amx(ctxt,gt2) + call bg%sync() + x1 = bv%get_vect() + x2 = bg%get_vect() + call psb_geaxpby(-sone,bg,+sone,bv,desc_a,info) + eps = psb_geamax(bv,desc_a,info) + + call psb_amx(ctxt,t2) + eps = maxval(abs(x1(1:nr)-x2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on GPU',eps + if (dump) then + write(fname,'(a,i3.3,a,i3.3,a)')'XCPU-out-',iam,'-',np,'.mtx' + call mm_array_write(x1(1:nr),'Local part CPU',info,filename=fname) + write(fname,'(a,i3.3,a,i3.3,a)')'XGPU-out-',iam,'-',np,'.mtx' + call mm_array_write(x2(1:nr),'Local part GPU',info,filename=fname) + end if +#endif + annz = a%get_nzeros() + amatsize = a%sizeof() + descsize = psb_sizeof(desc_a) + call psb_sum(ctxt,nr) + call psb_sum(ctxt,annz) + call psb_sum(ctxt,amatsize) + call psb_sum(ctxt,descsize) + + if (iam == psb_root_) then + write(psb_out_unit,& + & '("Matrix: ell1 ",i0)') idim + write(psb_out_unit,& + &'("Test on : ",i20," processors")') np + write(psb_out_unit,& + &'("Size of matrix : ",i20," ")') nr + write(psb_out_unit,& + &'("Number of nonzeros : ",i20," ")') annz + write(psb_out_unit,& + &'("Memory occupation : ",i20," ")') amatsize + flops = ntests*(2.d0*annz) + tflops = flops + gflops = flops * ngpu + write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() +#ifdef HAVE_GPU + write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() + write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& + & tcnvc1 + write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& + & tcnvcsr + write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& + & tcnvcsr/ncnv + write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& + & tcnvg1 + write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& + & tcnvgpu + write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& + & tcnvgpu/ncnv + +#endif + write(psb_out_unit,& + & '("Number of flops (",i0," prod) : ",F20.0," ")') & + & ntests,flops + + flops = flops / (t2) + tflops = tflops / (tt2) + gflops = gflops / (gt2) + + write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& + & ntests,t2 + write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& + & t2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& + & flops/1.d6 +#ifdef HAVE_GPU + write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& + & ntests, tt2 + write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& + & tt2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& + & tflops/1.d6 + + write(psb_out_unit,'("Time for ",i6," products (s) (GPU.) : ",F20.3)')& + & ngpu*ntests,gt2 + write(psb_out_unit,'("Time per product (ms) (GPU.) : ",F20.3)')& + & gt2*1.d3/(1.d0*ntests*ngpu) + write(psb_out_unit,'("MFLOPS (GPU.) : ",F20.3)')& + & gflops/1.d6 +#endif + ! + ! This computation assumes the data movement associated with CSR: + ! it is minimal in terms of coefficients. Other formats may either move + ! more data (padding etc.) or less data (if they can save on the indices). + ! + nbytes = nr*(2*psb_sizeof_sp + psb_sizeof_ip)+& + & annz*(psb_sizeof_sp + psb_sizeof_ip) + bdwdth = ntests*nbytes/(t2*1.d6) + write(psb_out_unit,*) + write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (CPU) : ",F20.3)') bdwdth +#ifdef HAVE_GPU + bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) + write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (GPU) : ",F20.3)') bdwdth + bdwdth = psb_gpu_MemoryPeakBandwidth() + write(psb_out_unit,'("MBYTES/S peak bandwidth (GPU) : ",F20.3)') bdwdth +#endif + write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() + write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize + + end if + + ! + ! cleanup storage and exit + ! + call psb_gefree(bv,desc_a,info) + call psb_gefree(xv,desc_a,info) + call psb_spfree(a,desc_a,info) + call psb_cdfree(desc_a,info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='free routine' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if +#ifdef HAVE_GPU + call psb_gpu_exit() +#endif + call psb_exit(ctxt) + stop + +9999 continue + call psb_error(ctxt) + +contains + ! + ! get iteration parameters from standard input + ! + subroutine get_parms(ctxt,acfmt,agfmt,idim) + type(psb_ctxt_type) :: ctxt + character(len=*) :: agfmt, acfmt + integer :: idim + integer :: np, iam + integer :: intbuf(10), ip + + call psb_info(ctxt, iam, np) + + if (iam == 0) then + write(*,*) 'CPU side format?' + read(psb_inp_unit,*) acfmt + write(*,*) 'GPU side format?' + read(psb_inp_unit,*) agfmt + write(*,*) 'Size of discretization cube?' + read(psb_inp_unit,*) idim + endif + call psb_bcast(ctxt,acfmt) + call psb_bcast(ctxt,agfmt) + call psb_bcast(ctxt,idim) + + if (iam == 0) then + write(psb_out_unit,'("Testing matrix : ell1")') + write(psb_out_unit,'("Grid dimensions : ",i4,"x",i4,"x",i4)')idim,idim,idim + write(psb_out_unit,'("Number of processors : ",i0)')np + write(psb_out_unit,'("Data distribution : BLOCK")') + write(psb_out_unit,'(" ")') + end if + return + + end subroutine get_parms + + +end program pdgenmv diff --git a/test/gpukern/z_file_spmv.F90 b/test/gpukern/z_file_spmv.F90 new file mode 100644 index 00000000..153dd5e1 --- /dev/null +++ b/test/gpukern/z_file_spmv.F90 @@ -0,0 +1,491 @@ +! +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +program z_file_spmv + use psb_base_mod + use psb_util_mod + use psb_ext_mod +#ifdef HAVE_GPU + use psb_gpu_mod +#endif + use data_input + implicit none + + ! input parameters + character(len=200) :: mtrx_file + + ! sparse matrices + type(psb_zspmat_type) :: a, aux_a, agpu + + ! dense matrices + complex(psb_dpk_), allocatable, target :: aux_b(:,:), d(:) + complex(psb_dpk_), allocatable , save :: x_col_glob(:), r_col_glob(:) + complex(psb_dpk_), pointer :: b_col_glob(:) + type(psb_z_vect_type) :: b_col, x_col, r_col + type(psb_z_vect_type) :: xg, bg, xv, bv +#ifdef HAVE_GPU + type(psb_z_vect_gpu) :: vmold +#endif + complex(psb_dpk_), allocatable :: xc1(:),xc2(:) + ! communications data structure + type(psb_desc_type):: desc_a + + type(psb_ctxt_type) :: ctxt + integer :: iam, np + integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes + real(psb_dpk_) :: damatsize, dgmatsize + complex(psb_dpk_) :: err, eps + + character(len=5) :: acfmt, agfmt + character(len=20) :: name + character(len=2) :: filefmt + integer, parameter :: iunit=12 + integer, parameter :: times=2000 + integer, parameter :: ntests=200, ngpu=50, ncnv=20 + + type(psb_z_coo_sparse_mat), target :: acoo + type(psb_z_csr_sparse_mat), target :: acsr + type(psb_z_ell_sparse_mat), target :: aell + type(psb_z_hll_sparse_mat), target :: ahll +#ifdef HAVE_GPU + type(psb_z_elg_sparse_mat), target :: aelg + type(psb_z_csrg_sparse_mat), target :: acsrg + type(psb_z_hybg_sparse_mat), target :: ahybg + type(psb_z_hlg_sparse_mat), target :: ahlg +#endif + class(psb_z_base_sparse_mat), pointer :: acmold, agmold + ! other variables + integer :: i,info,j,nrt, ns, nr, ipart, ig, nrg + integer :: internal, m,ii,nnzero + real(psb_dpk_) :: t0,t1, t2, tprec, flops + real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,& + & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 + integer :: nrhs, nrow, n_row, dim, nv, ne + integer, allocatable :: ivg(:), ipv(:) + + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) +#ifdef HAVE_GPU + call psb_gpu_init(ctxt) +#endif + if (iam < 0) then + ! This should not happen, but just in case + call psb_exit(ctxt) + stop + endif + + + name='file_spmv' + if(psb_get_errstatus() /= 0) goto 9999 + info=psb_success_ + call psb_set_errverbosity(2) + if (iam == psb_root_) then + write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ + write(*,*) 'This is the ',trim(name),' sample program' + end if +#ifdef HAVE_GPU + write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) +#endif + + if (iam == 0) then + write(*,*) 'Matrix? ' + call read_data(mtrx_file,psb_inp_unit) + write(*,*) 'file format' + call read_data(filefmt,psb_inp_unit) + write(*,*) 'CPU format' + call read_data(acfmt,psb_inp_unit) + write(*,*) 'GPU format' + call read_data(agfmt,psb_inp_unit) + write(*,*) 'distribution ' + call read_data(ipart,psb_inp_unit) + write(*,*) 'Read all data, going on' + end if + call psb_bcast(ctxt,mtrx_file) + call psb_bcast(ctxt,filefmt) + call psb_bcast(ctxt,acfmt) + call psb_bcast(ctxt,agfmt) + call psb_bcast(ctxt,ipart) + call psb_barrier(ctxt) + t0 = psb_wtime() + ! read the input matrix to be processed and (possibly) the rhs + nrhs = 1 + + if (iam==psb_root_) then + select case(psb_toupper(filefmt)) + case('MM') + ! For Matrix Market we have an input file for the matrix + ! and an (optional) second file for the RHS. + call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file) + + case ('HB') + ! For Harwell-Boeing we have a single file which may or may not + ! contain an RHS. + call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file) + + case default + info = -1 + write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt + end select + if (info /= 0) then + write(psb_err_unit,*) 'Error while reading input matrix ' + call psb_abort(ctxt) + end if + + ! + ! Always get nnz from original matrix. + ! Some formats add fill-in and do not keep track + ! of how many were added. So if the original matrix + ! contained some extra zeros, the count of entries + ! is not recoverable exactly. + ! + nrt = aux_a%get_nrows() + annz = aux_a%get_nzeros() + call psb_bcast(ctxt,annz) + call psb_bcast(ctxt,nrt) + + write(psb_out_unit,'("Generating an rhs...")') + write(psb_out_unit,'(" ")') + call psb_realloc(nrt,1,aux_b,info) + if (info /= 0) then + call psb_errpush(4000,name) + goto 9999 + endif + + b_col_glob => aux_b(:,1) + do i=1, nrt + b_col_glob(i) = 1.d0 + enddo + + else + + call psb_bcast(ctxt,annz) + call psb_bcast(ctxt,nrt) + + end if + + + select case(psb_toupper(acfmt)) + case('COO') + acmold => acoo + case('CSR') + acmold => acsr + case('ELL') + acmold => aell + case('HLL') + acmold => ahll + case default + write(*,*) 'Unknown format defaulting to CSR' + acmold => acsr + end select + +#ifdef HAVE_GPU + select case(psb_toupper(agfmt)) + case('ELG') + agmold => aelg + case('HLG') + agmold => ahlg + case('CSRG') + agmold => acsrg + case('HYBG') + agmold => ahybg + case default + write(*,*) 'Unknown format defaulting to HLG' + agmold => ahlg + end select +#endif + + + ! switch over different partition types + if (ipart == 0) then + call psb_barrier(ctxt) + if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")') + allocate(ivg(nrt),ipv(np)) + do i=1,nrt + call part_block(i,nrt,np,ipv,nv) + ivg(i) = ipv(1) + enddo + call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) + else if (ipart == 2) then + if (iam==psb_root_) then + write(psb_out_unit,'("Partition type: graph")') + write(psb_out_unit,'(" ")') + ! write(psb_err_unit,'("Build type: graph")') + call build_mtpart(aux_a,np) + endif + call psb_barrier(ctxt) + call distr_mtpart(psb_root_,ctxt) + call getv_mtpart(ivg) + call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) + else + if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")') + call psb_matdist(aux_a, a, ctxt,desc_a,info,parts=part_block) + end if + + call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_) + + t2 = psb_wtime() - t0 + + call psb_amx(ctxt, t2) + + if (iam==psb_root_) then + write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2 + write(psb_out_unit,'(" ")') + end if + call a%cscnv(aux_a,info,mold=acoo) + tcnvcsr = 0 + tcnvgpu = 0 + nr = desc_a%get_local_rows() + nrg = desc_a%get_global_rows() + call psb_geall(x_col,desc_a,info) + do i=1, nr + call desc_a%l2g(i,ig,info) + call psb_geins(ione,(/ig/),(/(zone + (zone*ig)/nrg)/),x_col,desc_a,info) + end do + call psb_geasb(x_col,desc_a,info) + do j=1, ncnv + call aux_a%cscnv(a,info,mold=acoo) + call psb_barrier(ctxt) + t1 = psb_wtime() + call a%cscnv(info,mold=acmold) + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + tcnvcsr = tcnvcsr + t2 + if (j==1) tcnvc1 = t2 + xc1 = x_col%get_vect() + call xv%bld(xc1) + call psb_geasb(bv,desc_a,info,scratch=.true.) + +#ifdef HAVE_GPU + + call aux_a%cscnv(agpu,info,mold=acoo) + call xg%bld(xc1,mold=vmold) + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_barrier(ctxt) + t1 = psb_wtime() + call agpu%cscnv(info,mold=agmold) + call psb_gpu_DeviceSync() + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + if (j==1) tcnvg1 = t2 + tcnvgpu = tcnvgpu + t2 +#endif + end do + + call psb_barrier(ctxt) + t1 = psb_wtime() + do i=1,ntests + call psb_spmm(zone,a,xv,zzero,bv,desc_a,info) + end do + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + call psb_amx(ctxt,t2) + +#ifdef HAVE_GPU + ! FIXME: cache flush needed here + call psb_barrier(ctxt) + tt1 = psb_wtime() + do i=1,ntests + call psb_spmm(zone,agpu,xv,zzero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 1 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + tt2 = psb_wtime() - tt1 + call psb_amx(ctxt,tt2) + xc1 = bv%get_vect() + xc2 = bg%get_vect() + nr = desc_a%get_local_rows() + eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on xGPU',eps + + call xg%sync() + ! FIXME: cache flush needed here + + call psb_barrier(ctxt) + gt1 = psb_wtime() + do i=1,ntests*ngpu + call psb_spmm(zone,agpu,xg,zzero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 2 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + ! For timing purposes we need to make sure all threads + ! in the device are done. + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + gt2 = psb_wtime() - gt1 + call psb_amx(ctxt,gt2) + call bg%sync() + xc1 = bv%get_vect() + xc2 = bg%get_vect() + call psb_geaxpby(-zone,bg,+zone,bv,desc_a,info) + eps = psb_geamax(bv,desc_a,info) + + call psb_amx(ctxt,t2) + nr = desc_a%get_local_rows() + eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on GPU',eps +#endif + + + amatsize = a%sizeof() + agmatsize = agpu%sizeof() + damatsize = amatsize + damatsize = damatsize/(1024*1024) + dgmatsize = agmatsize + dgmatsize = dgmatsize/(1024*1024) + descsize = psb_sizeof(desc_a) + call psb_sum(ctxt,damatsize) + call psb_sum(ctxt,dgmatsize) + call psb_sum(ctxt,descsize) + + if (iam == psb_root_) then + write(psb_out_unit,'("Matrix: ",a)') mtrx_file + write(psb_out_unit,& + &'("Test on : ",i20," processors")') np + write(psb_out_unit,& + &'("Size of matrix : ",i20," ")') nrt + write(psb_out_unit,& + &'("Number of nonzeros : ",i20," ")') annz + write(psb_out_unit,& + &'("Memory occupation CPU (MBytes) : ",f20.2," ")') damatsize + write(psb_out_unit,& + &'("Memory occupation GPU (MBytes) : ",f20.2," ")') dgmatsize + write(psb_out_unit,& + &'("Memory occupation CPU (Bytes) : ",i24," ")') amatsize + write(psb_out_unit,& + &'("Memory occupation GPU (Bytes) : ",i24," ")') agmatsize + flops = ntests*(2.d0*annz) + tflops = flops + gflops = flops * ngpu + write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() +#ifdef HAVE_GPU + write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() + write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& + & tcnvc1 + write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& + & tcnvcsr + write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& + & tcnvcsr/ncnv + write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& + & tcnvg1 + write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& + & tcnvgpu + write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& + & tcnvgpu/ncnv + +#endif + write(psb_out_unit,& + & '("Number of flops (",i0," prod) : ",F20.0," ")') & + & ntests,flops + + flops = flops / (t2) + tflops = tflops / (tt2) + gflops = gflops / (gt2) + write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& + & ntests,t2 + write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& + & t2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& + & flops/1.d6 +#ifdef HAVE_GPU + + write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& + & ntests, tt2 + write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& + & tt2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& + & tflops/1.d6 + + write(psb_out_unit,'("Time for ",i6," products (s) (GPU) : ",F20.3)')& + & ngpu*ntests,gt2 + write(psb_out_unit,'("Time per product (ms) (GPU) : ",F20.3)')& + & gt2*1.d3/(1.d0*ntests*ngpu) + write(psb_out_unit,'("MFLOPS (GPU) : ",F20.3)')& + & gflops/1.d6 +#endif + ! + ! This computation assumes the data movement associated with CSR: + ! it is minimal in terms of coefficients. Other formats may either move + ! more data (padding etc.) or less data (if they can save on the indices). + ! + nbytes = nr*(2*2*psb_sizeof_dp + psb_sizeof_ip)+& + & annz*(2*psb_sizeof_dp + psb_sizeof_ip) + bdwdth = ntests*nbytes/(t2*1.d6) + write(psb_out_unit,*) + write(psb_out_unit,'("MBYTES/S (CPU) : ",F20.3)') bdwdth +#ifdef HAVE_GPU + bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) + write(psb_out_unit,'("MBYTES/S (GPU) : ",F20.3)') bdwdth +#endif + write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() + write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize + + end if + + call psb_gefree(b_col, desc_a,info) + call psb_gefree(x_col, desc_a,info) + call psb_gefree(xv, desc_a,info) + call psb_gefree(bv, desc_a,info) + call psb_spfree(a, desc_a,info) +#ifdef HAVE_GPU + call psb_gefree(xg, desc_a,info) + call psb_gefree(bg, desc_a,info) + call psb_spfree(agpu,desc_a,info) + call psb_gpu_exit() +#endif + call psb_cdfree(desc_a,info) + + call psb_exit(ctxt) + stop + +9999 continue + call psb_error(ctxt) + +end program z_file_spmv + + + + + From a2788bdf0ba1a4b6e4bf3bb447b49da76d4db931 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 7 Nov 2023 13:39:44 +0100 Subject: [PATCH 010/116] New version with ND product --- base/psblas/psb_cspmm.f90 | 23 ++++++++++++++----- base/psblas/psb_dspmm.f90 | 44 ++++++++++++++++++++++++++----------- base/psblas/psb_sspmm.f90 | 23 ++++++++++++++----- base/psblas/psb_zspmm.f90 | 23 ++++++++++++++----- base/tools/psb_cspasb.f90 | 2 +- base/tools/psb_sspasb.f90 | 2 +- base/tools/psb_zspasb.f90 | 2 +- test/pargen/psb_d_pde3d.F90 | 4 ++-- test/pargen/runs/ppde.inp | 2 +- 9 files changed, 88 insertions(+), 37 deletions(-) diff --git a/base/psblas/psb_cspmm.f90 b/base/psblas/psb_cspmm.f90 index 84d8a7d8..25a6bc56 100644 --- a/base/psblas/psb_cspmm.f90 +++ b/base/psblas/psb_cspmm.f90 @@ -180,12 +180,23 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,& ! Matrix is not transposed if (allocated(a%ad)) then - if (doswap_) call psi_swapdata(psb_swap_send_,& - & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - call a%ad%spmm(alpha,x%v,beta,y%v,info) - if (doswap_) call psi_swapdata(psb_swap_recv_,& - & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - call a%and%spmm(alpha,x%v,cone,y%v,info) + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + if (do_timings) t1= psb_wtime() + if (doswap_) call psi_swapdata(psb_swap_send_,& + & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) t2= psb_wtime() + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) t3= psb_wtime() + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) t4= psb_wtime() + call a%and%spmm(alpha,x%v,cone,y%v,info) + if (do_timings) t5= psb_wtime() + if (do_timings) write(0,*) me,' SPMM:',t2-t1,t3-t2,t4-t3,t5-t4 + end block else if (doswap_) then diff --git a/base/psblas/psb_dspmm.f90 b/base/psblas/psb_dspmm.f90 index d5897f82..7888188a 100644 --- a/base/psblas/psb_dspmm.f90 +++ b/base/psblas/psb_dspmm.f90 @@ -180,22 +180,40 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& ! Matrix is not transposed if (allocated(a%ad)) then - if (doswap_) call psi_swapdata(psb_swap_send_,& - & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - call a%ad%spmm(alpha,x%v,beta,y%v,info) - if (doswap_) call psi_swapdata(psb_swap_recv_,& - & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - call a%and%spmm(alpha,x%v,done,y%v,info) + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + if (do_timings) t1= psb_wtime() + if (doswap_) call psi_swapdata(psb_swap_send_,& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) t2= psb_wtime() + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) t3= psb_wtime() + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) t4= psb_wtime() + call a%and%spmm(alpha,x%v,done,y%v,info) + if (do_timings) t5= psb_wtime() + if (do_timings) write(0,*) me,' SPMM:',t2-t1,t3-t2,t4-t3,t5-t4 + end block else - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& - & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + if (do_timings) t1= psb_wtime() + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + if (do_timings) t2= psb_wtime() + call psb_csmm(alpha,a,x,beta,y,info) + if (do_timings) t3= psb_wtime() + if (do_timings) write(0,*) me,' SPMM:',t2-t1,t3-t2 + end block end if - - call psb_csmm(alpha,a,x,beta,y,info) - - end if if(info /= psb_success_) then info = psb_err_from_subroutine_non_ diff --git a/base/psblas/psb_sspmm.f90 b/base/psblas/psb_sspmm.f90 index 7c1e0ab3..cf8919f0 100644 --- a/base/psblas/psb_sspmm.f90 +++ b/base/psblas/psb_sspmm.f90 @@ -180,12 +180,23 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,& ! Matrix is not transposed if (allocated(a%ad)) then - if (doswap_) call psi_swapdata(psb_swap_send_,& - & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - call a%ad%spmm(alpha,x%v,beta,y%v,info) - if (doswap_) call psi_swapdata(psb_swap_recv_,& - & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - call a%and%spmm(alpha,x%v,sone,y%v,info) + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + if (do_timings) t1= psb_wtime() + if (doswap_) call psi_swapdata(psb_swap_send_,& + & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) t2= psb_wtime() + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) t3= psb_wtime() + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) t4= psb_wtime() + call a%and%spmm(alpha,x%v,sone,y%v,info) + if (do_timings) t5= psb_wtime() + if (do_timings) write(0,*) me,' SPMM:',t2-t1,t3-t2,t4-t3,t5-t4 + end block else if (doswap_) then diff --git a/base/psblas/psb_zspmm.f90 b/base/psblas/psb_zspmm.f90 index 4dc73f83..629fcf2b 100644 --- a/base/psblas/psb_zspmm.f90 +++ b/base/psblas/psb_zspmm.f90 @@ -180,12 +180,23 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,& ! Matrix is not transposed if (allocated(a%ad)) then - if (doswap_) call psi_swapdata(psb_swap_send_,& - & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - call a%ad%spmm(alpha,x%v,beta,y%v,info) - if (doswap_) call psi_swapdata(psb_swap_recv_,& - & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - call a%and%spmm(alpha,x%v,zone,y%v,info) + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + if (do_timings) t1= psb_wtime() + if (doswap_) call psi_swapdata(psb_swap_send_,& + & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) t2= psb_wtime() + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) t3= psb_wtime() + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) t4= psb_wtime() + call a%and%spmm(alpha,x%v,zone,y%v,info) + if (do_timings) t5= psb_wtime() + if (do_timings) write(0,*) me,' SPMM:',t2-t1,t3-t2,t4-t3,t5-t4 + end block else if (doswap_) then diff --git a/base/tools/psb_cspasb.f90 b/base/tools/psb_cspasb.f90 index 46258139..8263e309 100644 --- a/base/tools/psb_cspasb.f90 +++ b/base/tools/psb_cspasb.f90 @@ -183,7 +183,7 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold, bld_and) type(psb_c_coo_sparse_mat) :: acoo type(psb_c_csr_sparse_mat), allocatable :: aclip type(psb_c_ecsr_sparse_mat), allocatable :: andclip - logical, parameter :: use_ecsr=.false. + logical, parameter :: use_ecsr=.true. allocate(aclip) call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) allocate(a%ad,mold=a%a) diff --git a/base/tools/psb_sspasb.f90 b/base/tools/psb_sspasb.f90 index 0edae30e..f273c7f4 100644 --- a/base/tools/psb_sspasb.f90 +++ b/base/tools/psb_sspasb.f90 @@ -183,7 +183,7 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold, bld_and) type(psb_s_coo_sparse_mat) :: acoo type(psb_s_csr_sparse_mat), allocatable :: aclip type(psb_s_ecsr_sparse_mat), allocatable :: andclip - logical, parameter :: use_ecsr=.false. + logical, parameter :: use_ecsr=.true. allocate(aclip) call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) allocate(a%ad,mold=a%a) diff --git a/base/tools/psb_zspasb.f90 b/base/tools/psb_zspasb.f90 index cd77de15..1a381303 100644 --- a/base/tools/psb_zspasb.f90 +++ b/base/tools/psb_zspasb.f90 @@ -183,7 +183,7 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold, bld_and) type(psb_z_coo_sparse_mat) :: acoo type(psb_z_csr_sparse_mat), allocatable :: aclip type(psb_z_ecsr_sparse_mat), allocatable :: andclip - logical, parameter :: use_ecsr=.false. + logical, parameter :: use_ecsr=.true. allocate(aclip) call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) allocate(a%ad,mold=a%a) diff --git a/test/pargen/psb_d_pde3d.F90 b/test/pargen/psb_d_pde3d.F90 index cd503d29..e802736e 100644 --- a/test/pargen/psb_d_pde3d.F90 +++ b/test/pargen/psb_d_pde3d.F90 @@ -680,9 +680,9 @@ contains t1 = psb_wtime() if (info == psb_success_) then if (present(amold)) then - call psb_spasb(a,desc_a,info,mold=amold,bld_and=.true.) + call psb_spasb(a,desc_a,info,mold=amold,bld_and=.false.) else - call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=.true.) + call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=.false.) end if end if call psb_barrier(ctxt) diff --git a/test/pargen/runs/ppde.inp b/test/pargen/runs/ppde.inp index c70a973f..44dac085 100644 --- a/test/pargen/runs/ppde.inp +++ b/test/pargen/runs/ppde.inp @@ -5,7 +5,7 @@ CSR Storage format for matrix A: CSR COO 200 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) 3 Partition: 1 BLOCK 3 3D 2 Stopping criterion 1 2 -0300 MAXIT +0008 MAXIT 10 ITRACE 002 IRST restart for RGMRES and BiCGSTABL ILU Block Solver ILU,ILUT,INVK,AINVT,AORTH From a6ec655a97d14dbfdf3003e3aecfc32955b45080 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 7 Nov 2023 17:57:32 +0100 Subject: [PATCH 011/116] Prepare merge --- .../impl/{psb_c_csc_impl.f90 => psb_c_csc_impl.F90} | 0 .../impl/{psb_c_csr_impl.f90 => psb_c_csr_impl.F90} | 0 .../impl/{psb_d_csc_impl.f90 => psb_d_csc_impl.F90} | 0 .../impl/{psb_d_csr_impl.f90 => psb_d_csr_impl.F90} | 0 .../impl/{psb_s_csc_impl.f90 => psb_s_csc_impl.F90} | 0 .../impl/{psb_s_csr_impl.f90 => psb_s_csr_impl.F90} | 0 .../impl/{psb_z_csc_impl.f90 => psb_z_csc_impl.F90} | 0 .../impl/{psb_z_csr_impl.f90 => psb_z_csr_impl.F90} | 0 test/hello/Makefile | 10 ++++++++-- 9 files changed, 8 insertions(+), 2 deletions(-) rename base/serial/impl/{psb_c_csc_impl.f90 => psb_c_csc_impl.F90} (100%) rename base/serial/impl/{psb_c_csr_impl.f90 => psb_c_csr_impl.F90} (100%) rename base/serial/impl/{psb_d_csc_impl.f90 => psb_d_csc_impl.F90} (100%) rename base/serial/impl/{psb_d_csr_impl.f90 => psb_d_csr_impl.F90} (100%) rename base/serial/impl/{psb_s_csc_impl.f90 => psb_s_csc_impl.F90} (100%) rename base/serial/impl/{psb_s_csr_impl.f90 => psb_s_csr_impl.F90} (100%) rename base/serial/impl/{psb_z_csc_impl.f90 => psb_z_csc_impl.F90} (100%) rename base/serial/impl/{psb_z_csr_impl.f90 => psb_z_csr_impl.F90} (100%) diff --git a/base/serial/impl/psb_c_csc_impl.f90 b/base/serial/impl/psb_c_csc_impl.F90 similarity index 100% rename from base/serial/impl/psb_c_csc_impl.f90 rename to base/serial/impl/psb_c_csc_impl.F90 diff --git a/base/serial/impl/psb_c_csr_impl.f90 b/base/serial/impl/psb_c_csr_impl.F90 similarity index 100% rename from base/serial/impl/psb_c_csr_impl.f90 rename to base/serial/impl/psb_c_csr_impl.F90 diff --git a/base/serial/impl/psb_d_csc_impl.f90 b/base/serial/impl/psb_d_csc_impl.F90 similarity index 100% rename from base/serial/impl/psb_d_csc_impl.f90 rename to base/serial/impl/psb_d_csc_impl.F90 diff --git a/base/serial/impl/psb_d_csr_impl.f90 b/base/serial/impl/psb_d_csr_impl.F90 similarity index 100% rename from base/serial/impl/psb_d_csr_impl.f90 rename to base/serial/impl/psb_d_csr_impl.F90 diff --git a/base/serial/impl/psb_s_csc_impl.f90 b/base/serial/impl/psb_s_csc_impl.F90 similarity index 100% rename from base/serial/impl/psb_s_csc_impl.f90 rename to base/serial/impl/psb_s_csc_impl.F90 diff --git a/base/serial/impl/psb_s_csr_impl.f90 b/base/serial/impl/psb_s_csr_impl.F90 similarity index 100% rename from base/serial/impl/psb_s_csr_impl.f90 rename to base/serial/impl/psb_s_csr_impl.F90 diff --git a/base/serial/impl/psb_z_csc_impl.f90 b/base/serial/impl/psb_z_csc_impl.F90 similarity index 100% rename from base/serial/impl/psb_z_csc_impl.f90 rename to base/serial/impl/psb_z_csc_impl.F90 diff --git a/base/serial/impl/psb_z_csr_impl.f90 b/base/serial/impl/psb_z_csr_impl.F90 similarity index 100% rename from base/serial/impl/psb_z_csr_impl.f90 rename to base/serial/impl/psb_z_csr_impl.F90 diff --git a/test/hello/Makefile b/test/hello/Makefile index a6811ea7..f16ff75e 100644 --- a/test/hello/Makefile +++ b/test/hello/Makefile @@ -16,7 +16,7 @@ FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG). EXEDIR=./runs -all: runsd hello pingpong +all: runsd hello pingpong tsum tsum1 runsd: (if test ! -d runs ; then mkdir runs; fi) @@ -28,11 +28,17 @@ hello: hello.o pingpong: pingpong.o $(FLINK) pingpong.o -o pingpong $(PSBLAS_LIB) $(LDLIBS) /bin/mv pingpong $(EXEDIR) +tsum: tsum.o + $(FLINK) tsum.o -o tsum $(PSBLAS_LIB) $(LDLIBS) + /bin/mv tsum $(EXEDIR) +tsum1: tsum1.o + $(FLINK) tsum1.o -o tsum1 $(PSBLAS_LIB) $(LDLIBS) + /bin/mv tsum1 $(EXEDIR) clean: - /bin/rm -f hello.o pingpong.o + /bin/rm -f hello.o pingpong.o tsum.o tsum1.o $(EXEDIR)/hello verycleanlib: (cd ../..; make veryclean) From 6fa0bf7fe7f5f9746bbbaf6a6d335ebe88aa97eb Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 28 Nov 2023 19:14:28 +0100 Subject: [PATCH 012/116] Complete cuda renaming --- cuda/Makefile | 108 +-- cuda/impl/Makefile | 540 ++++++------ ...oo.F90 => psb_c_cuda_cp_csrg_from_coo.F90} | 10 +- ...mt.F90 => psb_c_cuda_cp_csrg_from_fmt.F90} | 10 +- ...oo.F90 => psb_c_cuda_cp_diag_from_coo.F90} | 10 +- ...coo.F90 => psb_c_cuda_cp_elg_from_coo.F90} | 14 +- ...fmt.F90 => psb_c_cuda_cp_elg_from_fmt.F90} | 10 +- ...o.F90 => psb_c_cuda_cp_hdiag_from_coo.F90} | 14 +- ...coo.F90 => psb_c_cuda_cp_hlg_from_coo.F90} | 14 +- ...fmt.F90 => psb_c_cuda_cp_hlg_from_fmt.F90} | 10 +- ...oo.F90 => psb_c_cuda_cp_hybg_from_coo.F90} | 10 +- ...mt.F90 => psb_c_cuda_cp_hybg_from_fmt.F90} | 10 +- ....F90 => psb_c_cuda_csrg_allocate_mnnz.F90} | 10 +- ...csrg_csmm.F90 => psb_c_cuda_csrg_csmm.F90} | 10 +- ...csrg_csmv.F90 => psb_c_cuda_csrg_csmv.F90} | 12 +- ...m_gpu.F90 => psb_c_cuda_csrg_from_gpu.F90} | 10 +- ....F90 => psb_c_cuda_csrg_inner_vect_sv.F90} | 18 +- ...csrg_mold.F90 => psb_c_cuda_csrg_mold.F90} | 10 +- ....F90 => psb_c_cuda_csrg_reallocate_nz.F90} | 12 +- ...csrg_scal.F90 => psb_c_cuda_csrg_scal.F90} | 10 +- ...rg_scals.F90 => psb_c_cuda_csrg_scals.F90} | 10 +- ..._to_gpu.F90 => psb_c_cuda_csrg_to_gpu.F90} | 10 +- ...ect_mv.F90 => psb_c_cuda_csrg_vect_mv.F90} | 18 +- ...diag_csmv.F90 => psb_c_cuda_diag_csmv.F90} | 12 +- ...diag_mold.F90 => psb_c_cuda_diag_mold.F90} | 10 +- ..._to_gpu.F90 => psb_c_cuda_diag_to_gpu.F90} | 10 +- ...ect_mv.F90 => psb_c_cuda_diag_vect_mv.F90} | 18 +- ..._impl.F90 => psb_c_cuda_dnsg_mat_impl.F90} | 126 +-- ...z.F90 => psb_c_cuda_elg_allocate_mnnz.F90} | 10 +- ...b_d_elg_asb.f90 => psb_c_cuda_elg_asb.f90} | 8 +- ...c_elg_csmm.F90 => psb_c_cuda_elg_csmm.F90} | 12 +- ...c_elg_csmv.F90 => psb_c_cuda_elg_csmv.F90} | 10 +- ...elg_csput.F90 => psb_c_cuda_elg_csput.F90} | 32 +- ...om_gpu.F90 => psb_c_cuda_elg_from_gpu.F90} | 10 +- ...v.F90 => psb_c_cuda_elg_inner_vect_sv.F90} | 14 +- ...c_elg_mold.F90 => psb_c_cuda_elg_mold.F90} | 10 +- ...z.F90 => psb_c_cuda_elg_reallocate_nz.F90} | 12 +- ...c_elg_scal.F90 => psb_c_cuda_elg_scal.F90} | 10 +- ...elg_scals.F90 => psb_c_cuda_elg_scals.F90} | 10 +- ...g_to_gpu.F90 => psb_c_cuda_elg_to_gpu.F90} | 10 +- ...d_elg_trim.f90 => psb_c_cuda_elg_trim.f90} | 8 +- ...vect_mv.F90 => psb_c_cuda_elg_vect_mv.F90} | 18 +- ...iag_csmv.F90 => psb_c_cuda_hdiag_csmv.F90} | 12 +- ...iag_mold.F90 => psb_c_cuda_hdiag_mold.F90} | 10 +- ...to_gpu.F90 => psb_c_cuda_hdiag_to_gpu.F90} | 10 +- ...ct_mv.F90 => psb_c_cuda_hdiag_vect_mv.F90} | 18 +- ...z.F90 => psb_c_cuda_hlg_allocate_mnnz.F90} | 10 +- ...c_hlg_csmm.F90 => psb_c_cuda_hlg_csmm.F90} | 12 +- ...c_hlg_csmv.F90 => psb_c_cuda_hlg_csmv.F90} | 12 +- ...om_gpu.F90 => psb_c_cuda_hlg_from_gpu.F90} | 10 +- ...v.F90 => psb_c_cuda_hlg_inner_vect_sv.F90} | 12 +- ...c_hlg_mold.F90 => psb_c_cuda_hlg_mold.F90} | 10 +- ...z.F90 => psb_c_cuda_hlg_reallocate_nz.F90} | 12 +- ...c_hlg_scal.F90 => psb_c_cuda_hlg_scal.F90} | 10 +- ...hlg_scals.F90 => psb_c_cuda_hlg_scals.F90} | 10 +- ...g_to_gpu.F90 => psb_c_cuda_hlg_to_gpu.F90} | 10 +- ...vect_mv.F90 => psb_c_cuda_hlg_vect_mv.F90} | 18 +- ....F90 => psb_c_cuda_hybg_allocate_mnnz.F90} | 10 +- ...hybg_csmm.F90 => psb_c_cuda_hybg_csmm.F90} | 12 +- ...hybg_csmv.F90 => psb_c_cuda_hybg_csmv.F90} | 12 +- ....F90 => psb_c_cuda_hybg_inner_vect_sv.F90} | 18 +- ...hybg_mold.F90 => psb_c_cuda_hybg_mold.F90} | 10 +- ....F90 => psb_c_cuda_hybg_reallocate_nz.F90} | 12 +- ...hybg_scal.F90 => psb_c_cuda_hybg_scal.F90} | 10 +- ...bg_scals.F90 => psb_c_cuda_hybg_scals.F90} | 10 +- ..._to_gpu.F90 => psb_c_cuda_hybg_to_gpu.F90} | 10 +- ...ect_mv.F90 => psb_c_cuda_hybg_vect_mv.F90} | 18 +- ...oo.F90 => psb_c_cuda_mv_csrg_from_coo.F90} | 10 +- ...mt.F90 => psb_c_cuda_mv_csrg_from_fmt.F90} | 10 +- ...oo.F90 => psb_c_cuda_mv_diag_from_coo.F90} | 10 +- ...coo.F90 => psb_c_cuda_mv_elg_from_coo.F90} | 10 +- ...fmt.F90 => psb_c_cuda_mv_elg_from_fmt.F90} | 10 +- ...o.F90 => psb_c_cuda_mv_hdiag_from_coo.F90} | 14 +- ...coo.F90 => psb_c_cuda_mv_hlg_from_coo.F90} | 12 +- ...fmt.F90 => psb_c_cuda_mv_hlg_from_fmt.F90} | 10 +- ...oo.F90 => psb_c_cuda_mv_hybg_from_coo.F90} | 10 +- ...mt.F90 => psb_c_cuda_mv_hybg_from_fmt.F90} | 10 +- ...oo.F90 => psb_d_cuda_cp_csrg_from_coo.F90} | 10 +- ...mt.F90 => psb_d_cuda_cp_csrg_from_fmt.F90} | 10 +- ...oo.F90 => psb_d_cuda_cp_diag_from_coo.F90} | 10 +- ...coo.F90 => psb_d_cuda_cp_elg_from_coo.F90} | 14 +- ...fmt.F90 => psb_d_cuda_cp_elg_from_fmt.F90} | 10 +- ...o.F90 => psb_d_cuda_cp_hdiag_from_coo.F90} | 14 +- ...coo.F90 => psb_d_cuda_cp_hlg_from_coo.F90} | 14 +- ...fmt.F90 => psb_d_cuda_cp_hlg_from_fmt.F90} | 10 +- ...oo.F90 => psb_d_cuda_cp_hybg_from_coo.F90} | 10 +- ...mt.F90 => psb_d_cuda_cp_hybg_from_fmt.F90} | 10 +- ....F90 => psb_d_cuda_csrg_allocate_mnnz.F90} | 10 +- ...csrg_csmm.F90 => psb_d_cuda_csrg_csmm.F90} | 10 +- ...csrg_csmv.F90 => psb_d_cuda_csrg_csmv.F90} | 12 +- ...m_gpu.F90 => psb_d_cuda_csrg_from_gpu.F90} | 10 +- ....F90 => psb_d_cuda_csrg_inner_vect_sv.F90} | 18 +- ...csrg_mold.F90 => psb_d_cuda_csrg_mold.F90} | 10 +- ....F90 => psb_d_cuda_csrg_reallocate_nz.F90} | 12 +- ...csrg_scal.F90 => psb_d_cuda_csrg_scal.F90} | 10 +- ...rg_scals.F90 => psb_d_cuda_csrg_scals.F90} | 10 +- ..._to_gpu.F90 => psb_d_cuda_csrg_to_gpu.F90} | 10 +- ...ect_mv.F90 => psb_d_cuda_csrg_vect_mv.F90} | 18 +- ...diag_csmv.F90 => psb_d_cuda_diag_csmv.F90} | 12 +- ...diag_mold.F90 => psb_d_cuda_diag_mold.F90} | 10 +- ..._to_gpu.F90 => psb_d_cuda_diag_to_gpu.F90} | 10 +- ...ect_mv.F90 => psb_d_cuda_diag_vect_mv.F90} | 18 +- ..._impl.F90 => psb_d_cuda_dnsg_mat_impl.F90} | 126 +-- ...z.F90 => psb_d_cuda_elg_allocate_mnnz.F90} | 10 +- ...b_s_elg_asb.f90 => psb_d_cuda_elg_asb.f90} | 8 +- ...d_elg_csmm.F90 => psb_d_cuda_elg_csmm.F90} | 12 +- ...d_elg_csmv.F90 => psb_d_cuda_elg_csmv.F90} | 10 +- ...elg_csput.F90 => psb_d_cuda_elg_csput.F90} | 32 +- ...om_gpu.F90 => psb_d_cuda_elg_from_gpu.F90} | 10 +- ...v.F90 => psb_d_cuda_elg_inner_vect_sv.F90} | 14 +- ...d_elg_mold.F90 => psb_d_cuda_elg_mold.F90} | 10 +- ...z.F90 => psb_d_cuda_elg_reallocate_nz.F90} | 12 +- ...d_elg_scal.F90 => psb_d_cuda_elg_scal.F90} | 10 +- ...elg_scals.F90 => psb_d_cuda_elg_scals.F90} | 10 +- ...g_to_gpu.F90 => psb_d_cuda_elg_to_gpu.F90} | 10 +- ...s_elg_trim.f90 => psb_d_cuda_elg_trim.f90} | 8 +- ...vect_mv.F90 => psb_d_cuda_elg_vect_mv.F90} | 18 +- ...iag_csmv.F90 => psb_d_cuda_hdiag_csmv.F90} | 12 +- ...iag_mold.F90 => psb_d_cuda_hdiag_mold.F90} | 10 +- ...to_gpu.F90 => psb_d_cuda_hdiag_to_gpu.F90} | 10 +- ...ct_mv.F90 => psb_d_cuda_hdiag_vect_mv.F90} | 18 +- ...z.F90 => psb_d_cuda_hlg_allocate_mnnz.F90} | 10 +- ...d_hlg_csmm.F90 => psb_d_cuda_hlg_csmm.F90} | 12 +- ...d_hlg_csmv.F90 => psb_d_cuda_hlg_csmv.F90} | 12 +- ...om_gpu.F90 => psb_d_cuda_hlg_from_gpu.F90} | 10 +- ...v.F90 => psb_d_cuda_hlg_inner_vect_sv.F90} | 12 +- ...d_hlg_mold.F90 => psb_d_cuda_hlg_mold.F90} | 10 +- ...z.F90 => psb_d_cuda_hlg_reallocate_nz.F90} | 12 +- ...d_hlg_scal.F90 => psb_d_cuda_hlg_scal.F90} | 10 +- ...hlg_scals.F90 => psb_d_cuda_hlg_scals.F90} | 10 +- ...g_to_gpu.F90 => psb_d_cuda_hlg_to_gpu.F90} | 10 +- ...vect_mv.F90 => psb_d_cuda_hlg_vect_mv.F90} | 18 +- ....F90 => psb_d_cuda_hybg_allocate_mnnz.F90} | 10 +- ...hybg_csmm.F90 => psb_d_cuda_hybg_csmm.F90} | 12 +- ...hybg_csmv.F90 => psb_d_cuda_hybg_csmv.F90} | 12 +- ....F90 => psb_d_cuda_hybg_inner_vect_sv.F90} | 18 +- ...hybg_mold.F90 => psb_d_cuda_hybg_mold.F90} | 10 +- ....F90 => psb_d_cuda_hybg_reallocate_nz.F90} | 12 +- ...hybg_scal.F90 => psb_d_cuda_hybg_scal.F90} | 10 +- ...bg_scals.F90 => psb_d_cuda_hybg_scals.F90} | 10 +- ..._to_gpu.F90 => psb_d_cuda_hybg_to_gpu.F90} | 10 +- ...ect_mv.F90 => psb_d_cuda_hybg_vect_mv.F90} | 18 +- ...oo.F90 => psb_d_cuda_mv_csrg_from_coo.F90} | 10 +- ...mt.F90 => psb_d_cuda_mv_csrg_from_fmt.F90} | 10 +- ...oo.F90 => psb_d_cuda_mv_diag_from_coo.F90} | 10 +- ...coo.F90 => psb_d_cuda_mv_elg_from_coo.F90} | 10 +- ...fmt.F90 => psb_d_cuda_mv_elg_from_fmt.F90} | 10 +- ...o.F90 => psb_d_cuda_mv_hdiag_from_coo.F90} | 14 +- ...coo.F90 => psb_d_cuda_mv_hlg_from_coo.F90} | 12 +- ...fmt.F90 => psb_d_cuda_mv_hlg_from_fmt.F90} | 10 +- ...oo.F90 => psb_d_cuda_mv_hybg_from_coo.F90} | 10 +- ...mt.F90 => psb_d_cuda_mv_hybg_from_fmt.F90} | 10 +- ...oo.F90 => psb_s_cuda_cp_csrg_from_coo.F90} | 10 +- ...mt.F90 => psb_s_cuda_cp_csrg_from_fmt.F90} | 10 +- ...oo.F90 => psb_s_cuda_cp_diag_from_coo.F90} | 10 +- ...coo.F90 => psb_s_cuda_cp_elg_from_coo.F90} | 14 +- ...fmt.F90 => psb_s_cuda_cp_elg_from_fmt.F90} | 10 +- ...o.F90 => psb_s_cuda_cp_hdiag_from_coo.F90} | 14 +- ...coo.F90 => psb_s_cuda_cp_hlg_from_coo.F90} | 14 +- ...fmt.F90 => psb_s_cuda_cp_hlg_from_fmt.F90} | 10 +- ...oo.F90 => psb_s_cuda_cp_hybg_from_coo.F90} | 10 +- ...mt.F90 => psb_s_cuda_cp_hybg_from_fmt.F90} | 10 +- ....F90 => psb_s_cuda_csrg_allocate_mnnz.F90} | 10 +- ...csrg_csmm.F90 => psb_s_cuda_csrg_csmm.F90} | 10 +- ...csrg_csmv.F90 => psb_s_cuda_csrg_csmv.F90} | 12 +- ...m_gpu.F90 => psb_s_cuda_csrg_from_gpu.F90} | 10 +- ....F90 => psb_s_cuda_csrg_inner_vect_sv.F90} | 18 +- ...csrg_mold.F90 => psb_s_cuda_csrg_mold.F90} | 10 +- ....F90 => psb_s_cuda_csrg_reallocate_nz.F90} | 12 +- ...csrg_scal.F90 => psb_s_cuda_csrg_scal.F90} | 10 +- ...rg_scals.F90 => psb_s_cuda_csrg_scals.F90} | 10 +- ..._to_gpu.F90 => psb_s_cuda_csrg_to_gpu.F90} | 10 +- ...ect_mv.F90 => psb_s_cuda_csrg_vect_mv.F90} | 18 +- ...diag_csmv.F90 => psb_s_cuda_diag_csmv.F90} | 12 +- ...diag_mold.F90 => psb_s_cuda_diag_mold.F90} | 10 +- ..._to_gpu.F90 => psb_s_cuda_diag_to_gpu.F90} | 10 +- ...ect_mv.F90 => psb_s_cuda_diag_vect_mv.F90} | 18 +- ..._impl.F90 => psb_s_cuda_dnsg_mat_impl.F90} | 126 +-- ...z.F90 => psb_s_cuda_elg_allocate_mnnz.F90} | 10 +- ...b_c_elg_asb.f90 => psb_s_cuda_elg_asb.f90} | 8 +- ...s_elg_csmm.F90 => psb_s_cuda_elg_csmm.F90} | 12 +- ...s_elg_csmv.F90 => psb_s_cuda_elg_csmv.F90} | 10 +- ...elg_csput.F90 => psb_s_cuda_elg_csput.F90} | 32 +- ...om_gpu.F90 => psb_s_cuda_elg_from_gpu.F90} | 10 +- ...v.F90 => psb_s_cuda_elg_inner_vect_sv.F90} | 14 +- ...s_elg_mold.F90 => psb_s_cuda_elg_mold.F90} | 10 +- ...z.F90 => psb_s_cuda_elg_reallocate_nz.F90} | 12 +- ...s_elg_scal.F90 => psb_s_cuda_elg_scal.F90} | 10 +- ...elg_scals.F90 => psb_s_cuda_elg_scals.F90} | 10 +- ...g_to_gpu.F90 => psb_s_cuda_elg_to_gpu.F90} | 10 +- ...z_elg_trim.f90 => psb_s_cuda_elg_trim.f90} | 8 +- ...vect_mv.F90 => psb_s_cuda_elg_vect_mv.F90} | 18 +- ...iag_csmv.F90 => psb_s_cuda_hdiag_csmv.F90} | 12 +- ...iag_mold.F90 => psb_s_cuda_hdiag_mold.F90} | 10 +- ...to_gpu.F90 => psb_s_cuda_hdiag_to_gpu.F90} | 10 +- ...ct_mv.F90 => psb_s_cuda_hdiag_vect_mv.F90} | 18 +- ...z.F90 => psb_s_cuda_hlg_allocate_mnnz.F90} | 10 +- ...s_hlg_csmm.F90 => psb_s_cuda_hlg_csmm.F90} | 12 +- ...s_hlg_csmv.F90 => psb_s_cuda_hlg_csmv.F90} | 12 +- ...om_gpu.F90 => psb_s_cuda_hlg_from_gpu.F90} | 10 +- ...v.F90 => psb_s_cuda_hlg_inner_vect_sv.F90} | 12 +- ...s_hlg_mold.F90 => psb_s_cuda_hlg_mold.F90} | 10 +- ...z.F90 => psb_s_cuda_hlg_reallocate_nz.F90} | 12 +- ...s_hlg_scal.F90 => psb_s_cuda_hlg_scal.F90} | 10 +- ...hlg_scals.F90 => psb_s_cuda_hlg_scals.F90} | 10 +- ...g_to_gpu.F90 => psb_s_cuda_hlg_to_gpu.F90} | 10 +- ...vect_mv.F90 => psb_s_cuda_hlg_vect_mv.F90} | 18 +- ....F90 => psb_s_cuda_hybg_allocate_mnnz.F90} | 10 +- ...hybg_csmm.F90 => psb_s_cuda_hybg_csmm.F90} | 12 +- ...hybg_csmv.F90 => psb_s_cuda_hybg_csmv.F90} | 12 +- ....F90 => psb_s_cuda_hybg_inner_vect_sv.F90} | 18 +- ...hybg_mold.F90 => psb_s_cuda_hybg_mold.F90} | 10 +- ....F90 => psb_s_cuda_hybg_reallocate_nz.F90} | 12 +- ...hybg_scal.F90 => psb_s_cuda_hybg_scal.F90} | 10 +- ...bg_scals.F90 => psb_s_cuda_hybg_scals.F90} | 10 +- ..._to_gpu.F90 => psb_s_cuda_hybg_to_gpu.F90} | 10 +- ...ect_mv.F90 => psb_s_cuda_hybg_vect_mv.F90} | 18 +- ...oo.F90 => psb_s_cuda_mv_csrg_from_coo.F90} | 10 +- ...mt.F90 => psb_s_cuda_mv_csrg_from_fmt.F90} | 10 +- ...oo.F90 => psb_s_cuda_mv_diag_from_coo.F90} | 10 +- ...coo.F90 => psb_s_cuda_mv_elg_from_coo.F90} | 10 +- ...fmt.F90 => psb_s_cuda_mv_elg_from_fmt.F90} | 10 +- ...o.F90 => psb_s_cuda_mv_hdiag_from_coo.F90} | 14 +- ...coo.F90 => psb_s_cuda_mv_hlg_from_coo.F90} | 12 +- ...fmt.F90 => psb_s_cuda_mv_hlg_from_fmt.F90} | 10 +- ...oo.F90 => psb_s_cuda_mv_hybg_from_coo.F90} | 10 +- ...mt.F90 => psb_s_cuda_mv_hybg_from_fmt.F90} | 10 +- ...oo.F90 => psb_z_cuda_cp_csrg_from_coo.F90} | 10 +- ...mt.F90 => psb_z_cuda_cp_csrg_from_fmt.F90} | 10 +- ...oo.F90 => psb_z_cuda_cp_diag_from_coo.F90} | 10 +- ...coo.F90 => psb_z_cuda_cp_elg_from_coo.F90} | 14 +- ...fmt.F90 => psb_z_cuda_cp_elg_from_fmt.F90} | 10 +- ...o.F90 => psb_z_cuda_cp_hdiag_from_coo.F90} | 14 +- ...coo.F90 => psb_z_cuda_cp_hlg_from_coo.F90} | 14 +- ...fmt.F90 => psb_z_cuda_cp_hlg_from_fmt.F90} | 10 +- ...oo.F90 => psb_z_cuda_cp_hybg_from_coo.F90} | 10 +- ...mt.F90 => psb_z_cuda_cp_hybg_from_fmt.F90} | 10 +- ....F90 => psb_z_cuda_csrg_allocate_mnnz.F90} | 10 +- ...csrg_csmm.F90 => psb_z_cuda_csrg_csmm.F90} | 10 +- ...csrg_csmv.F90 => psb_z_cuda_csrg_csmv.F90} | 12 +- ...m_gpu.F90 => psb_z_cuda_csrg_from_gpu.F90} | 10 +- ....F90 => psb_z_cuda_csrg_inner_vect_sv.F90} | 18 +- ...csrg_mold.F90 => psb_z_cuda_csrg_mold.F90} | 10 +- ....F90 => psb_z_cuda_csrg_reallocate_nz.F90} | 12 +- ...csrg_scal.F90 => psb_z_cuda_csrg_scal.F90} | 10 +- ...rg_scals.F90 => psb_z_cuda_csrg_scals.F90} | 10 +- ..._to_gpu.F90 => psb_z_cuda_csrg_to_gpu.F90} | 10 +- ...ect_mv.F90 => psb_z_cuda_csrg_vect_mv.F90} | 18 +- ...diag_csmv.F90 => psb_z_cuda_diag_csmv.F90} | 12 +- ...diag_mold.F90 => psb_z_cuda_diag_mold.F90} | 10 +- ..._to_gpu.F90 => psb_z_cuda_diag_to_gpu.F90} | 10 +- ...ect_mv.F90 => psb_z_cuda_diag_vect_mv.F90} | 18 +- ..._impl.F90 => psb_z_cuda_dnsg_mat_impl.F90} | 126 +-- ...z.F90 => psb_z_cuda_elg_allocate_mnnz.F90} | 10 +- ...b_z_elg_asb.f90 => psb_z_cuda_elg_asb.f90} | 8 +- ...z_elg_csmm.F90 => psb_z_cuda_elg_csmm.F90} | 12 +- ...z_elg_csmv.F90 => psb_z_cuda_elg_csmv.F90} | 10 +- ...elg_csput.F90 => psb_z_cuda_elg_csput.F90} | 32 +- ...om_gpu.F90 => psb_z_cuda_elg_from_gpu.F90} | 10 +- ...v.F90 => psb_z_cuda_elg_inner_vect_sv.F90} | 14 +- ...z_elg_mold.F90 => psb_z_cuda_elg_mold.F90} | 10 +- ...z.F90 => psb_z_cuda_elg_reallocate_nz.F90} | 12 +- ...z_elg_scal.F90 => psb_z_cuda_elg_scal.F90} | 10 +- ...elg_scals.F90 => psb_z_cuda_elg_scals.F90} | 10 +- ...g_to_gpu.F90 => psb_z_cuda_elg_to_gpu.F90} | 10 +- ...c_elg_trim.f90 => psb_z_cuda_elg_trim.f90} | 8 +- ...vect_mv.F90 => psb_z_cuda_elg_vect_mv.F90} | 18 +- ...iag_csmv.F90 => psb_z_cuda_hdiag_csmv.F90} | 12 +- ...iag_mold.F90 => psb_z_cuda_hdiag_mold.F90} | 10 +- ...to_gpu.F90 => psb_z_cuda_hdiag_to_gpu.F90} | 10 +- ...ct_mv.F90 => psb_z_cuda_hdiag_vect_mv.F90} | 18 +- ...z.F90 => psb_z_cuda_hlg_allocate_mnnz.F90} | 10 +- ...z_hlg_csmm.F90 => psb_z_cuda_hlg_csmm.F90} | 12 +- ...z_hlg_csmv.F90 => psb_z_cuda_hlg_csmv.F90} | 12 +- ...om_gpu.F90 => psb_z_cuda_hlg_from_gpu.F90} | 10 +- ...v.F90 => psb_z_cuda_hlg_inner_vect_sv.F90} | 12 +- ...z_hlg_mold.F90 => psb_z_cuda_hlg_mold.F90} | 10 +- ...z.F90 => psb_z_cuda_hlg_reallocate_nz.F90} | 12 +- ...z_hlg_scal.F90 => psb_z_cuda_hlg_scal.F90} | 10 +- ...hlg_scals.F90 => psb_z_cuda_hlg_scals.F90} | 10 +- ...g_to_gpu.F90 => psb_z_cuda_hlg_to_gpu.F90} | 10 +- ...vect_mv.F90 => psb_z_cuda_hlg_vect_mv.F90} | 18 +- ....F90 => psb_z_cuda_hybg_allocate_mnnz.F90} | 10 +- ...hybg_csmm.F90 => psb_z_cuda_hybg_csmm.F90} | 12 +- ...hybg_csmv.F90 => psb_z_cuda_hybg_csmv.F90} | 12 +- ....F90 => psb_z_cuda_hybg_inner_vect_sv.F90} | 18 +- ...hybg_mold.F90 => psb_z_cuda_hybg_mold.F90} | 10 +- ....F90 => psb_z_cuda_hybg_reallocate_nz.F90} | 12 +- ...hybg_scal.F90 => psb_z_cuda_hybg_scal.F90} | 10 +- ...bg_scals.F90 => psb_z_cuda_hybg_scals.F90} | 10 +- ..._to_gpu.F90 => psb_z_cuda_hybg_to_gpu.F90} | 10 +- ...ect_mv.F90 => psb_z_cuda_hybg_vect_mv.F90} | 18 +- ...oo.F90 => psb_z_cuda_mv_csrg_from_coo.F90} | 10 +- ...mt.F90 => psb_z_cuda_mv_csrg_from_fmt.F90} | 10 +- ...oo.F90 => psb_z_cuda_mv_diag_from_coo.F90} | 10 +- ...coo.F90 => psb_z_cuda_mv_elg_from_coo.F90} | 10 +- ...fmt.F90 => psb_z_cuda_mv_elg_from_fmt.F90} | 10 +- ...o.F90 => psb_z_cuda_mv_hdiag_from_coo.F90} | 14 +- ...coo.F90 => psb_z_cuda_mv_hlg_from_coo.F90} | 12 +- ...fmt.F90 => psb_z_cuda_mv_hlg_from_fmt.F90} | 10 +- ...oo.F90 => psb_z_cuda_mv_hybg_from_coo.F90} | 10 +- ...mt.F90 => psb_z_cuda_mv_hybg_from_fmt.F90} | 10 +- cuda/psb_c_csrg_mat_mod.F90 | 393 --------- cuda/psb_c_cuda_csrg_mat_mod.F90 | 393 +++++++++ ...at_mod.F90 => psb_c_cuda_diag_mat_mod.F90} | 192 ++--- ...at_mod.F90 => psb_c_cuda_dnsg_mat_mod.F90} | 186 ++--- ...mat_mod.F90 => psb_c_cuda_elg_mat_mod.F90} | 310 +++---- ...t_mod.F90 => psb_c_cuda_hdiag_mat_mod.F90} | 186 ++--- ...mat_mod.F90 => psb_c_cuda_hlg_mat_mod.F90} | 260 +++--- ...at_mod.F90 => psb_c_cuda_hybg_mat_mod.F90} | 192 ++--- ...u_vect_mod.F90 => psb_c_cuda_vect_mod.F90} | 772 +++++++++--------- ...b_gpu_env_mod.F90 => psb_cuda_env_mod.F90} | 120 +-- cuda/{psb_gpu_mod.F90 => psb_cuda_mod.F90} | 74 +- cuda/psb_d_csrg_mat_mod.F90 | 393 --------- cuda/psb_d_cuda_csrg_mat_mod.F90 | 393 +++++++++ ...at_mod.F90 => psb_d_cuda_diag_mat_mod.F90} | 192 ++--- ...at_mod.F90 => psb_d_cuda_dnsg_mat_mod.F90} | 186 ++--- ...mat_mod.F90 => psb_d_cuda_elg_mat_mod.F90} | 310 +++---- ...t_mod.F90 => psb_d_cuda_hdiag_mat_mod.F90} | 186 ++--- ...mat_mod.F90 => psb_d_cuda_hlg_mat_mod.F90} | 260 +++--- ...at_mod.F90 => psb_d_cuda_hybg_mat_mod.F90} | 192 ++--- ...u_vect_mod.F90 => psb_d_cuda_vect_mod.F90} | 772 +++++++++--------- cuda/psb_i_csrg_mat_mod.F90 | 393 --------- cuda/psb_i_cuda_csrg_mat_mod.F90 | 393 +++++++++ ...at_mod.F90 => psb_i_cuda_diag_mat_mod.F90} | 192 ++--- ...at_mod.F90 => psb_i_cuda_dnsg_mat_mod.F90} | 186 ++--- ...mat_mod.F90 => psb_i_cuda_elg_mat_mod.F90} | 310 +++---- ...t_mod.F90 => psb_i_cuda_hdiag_mat_mod.F90} | 186 ++--- ...mat_mod.F90 => psb_i_cuda_hlg_mat_mod.F90} | 260 +++--- ...at_mod.F90 => psb_i_cuda_hybg_mat_mod.F90} | 192 ++--- ...u_vect_mod.F90 => psb_i_cuda_vect_mod.F90} | 642 +++++++-------- cuda/psb_s_csrg_mat_mod.F90 | 393 --------- cuda/psb_s_cuda_csrg_mat_mod.F90 | 393 +++++++++ ...at_mod.F90 => psb_s_cuda_diag_mat_mod.F90} | 192 ++--- ...at_mod.F90 => psb_s_cuda_dnsg_mat_mod.F90} | 186 ++--- ...mat_mod.F90 => psb_s_cuda_elg_mat_mod.F90} | 310 +++---- ...t_mod.F90 => psb_s_cuda_hdiag_mat_mod.F90} | 186 ++--- ...mat_mod.F90 => psb_s_cuda_hlg_mat_mod.F90} | 260 +++--- ...at_mod.F90 => psb_s_cuda_hybg_mat_mod.F90} | 192 ++--- ...u_vect_mod.F90 => psb_s_cuda_vect_mod.F90} | 772 +++++++++--------- cuda/psb_z_csrg_mat_mod.F90 | 393 --------- cuda/psb_z_cuda_csrg_mat_mod.F90 | 393 +++++++++ ...at_mod.F90 => psb_z_cuda_diag_mat_mod.F90} | 192 ++--- ...at_mod.F90 => psb_z_cuda_dnsg_mat_mod.F90} | 186 ++--- ...mat_mod.F90 => psb_z_cuda_elg_mat_mod.F90} | 310 +++---- ...t_mod.F90 => psb_z_cuda_hdiag_mat_mod.F90} | 186 ++--- ...mat_mod.F90 => psb_z_cuda_hlg_mat_mod.F90} | 260 +++--- ...at_mod.F90 => psb_z_cuda_hybg_mat_mod.F90} | 192 ++--- ...u_vect_mod.F90 => psb_z_cuda_vect_mod.F90} | 772 +++++++++--------- test/{gpukern => cudakern}/Makefile | 0 test/{gpukern => cudakern}/c_file_spmv.F90 | 0 test/{gpukern => cudakern}/d_file_spmv.F90 | 0 test/{gpukern => cudakern}/data_input.f90 | 0 test/{gpukern => cudakern}/dpdegenmv.F90 | 16 +- test/{gpukern => cudakern}/s_file_spmv.F90 | 0 test/{gpukern => cudakern}/spdegenmv.F90 | 0 test/{gpukern => cudakern}/z_file_spmv.F90 | 0 357 files changed, 9566 insertions(+), 9566 deletions(-) rename cuda/impl/{psb_c_cp_csrg_from_coo.F90 => psb_c_cuda_cp_csrg_from_coo.F90} (89%) rename cuda/impl/{psb_c_cp_csrg_from_fmt.F90 => psb_c_cuda_cp_csrg_from_fmt.F90} (89%) rename cuda/impl/{psb_c_cp_diag_from_coo.F90 => psb_c_cuda_cp_diag_from_coo.F90} (89%) rename cuda/impl/{psb_c_cp_elg_from_coo.F90 => psb_c_cuda_cp_elg_from_coo.F90} (94%) rename cuda/impl/{psb_c_cp_elg_from_fmt.F90 => psb_c_cuda_cp_elg_from_fmt.F90} (93%) rename cuda/impl/{psb_c_cp_hdiag_from_coo.F90 => psb_c_cuda_cp_hdiag_from_coo.F90} (87%) rename cuda/impl/{psb_c_cp_hlg_from_coo.F90 => psb_c_cuda_cp_hlg_from_coo.F90} (95%) rename cuda/impl/{psb_c_cp_hlg_from_fmt.F90 => psb_c_cuda_cp_hlg_from_fmt.F90} (90%) rename cuda/impl/{psb_c_cp_hybg_from_coo.F90 => psb_c_cuda_cp_hybg_from_coo.F90} (89%) rename cuda/impl/{psb_c_cp_hybg_from_fmt.F90 => psb_c_cuda_cp_hybg_from_fmt.F90} (89%) rename cuda/impl/{psb_c_csrg_allocate_mnnz.F90 => psb_c_cuda_csrg_allocate_mnnz.F90} (89%) rename cuda/impl/{psb_c_csrg_csmm.F90 => psb_c_cuda_csrg_csmm.F90} (94%) rename cuda/impl/{psb_c_csrg_csmv.F90 => psb_c_cuda_csrg_csmv.F90} (93%) rename cuda/impl/{psb_c_csrg_from_gpu.F90 => psb_c_cuda_csrg_from_gpu.F90} (91%) rename cuda/impl/{psb_c_csrg_inner_vect_sv.F90 => psb_c_cuda_csrg_inner_vect_sv.F90} (90%) rename cuda/impl/{psb_c_csrg_mold.F90 => psb_c_cuda_csrg_mold.F90} (88%) rename cuda/impl/{psb_c_csrg_reallocate_nz.F90 => psb_c_cuda_csrg_reallocate_nz.F90} (87%) rename cuda/impl/{psb_c_csrg_scal.F90 => psb_c_cuda_csrg_scal.F90} (90%) rename cuda/impl/{psb_c_csrg_scals.F90 => psb_c_cuda_csrg_scals.F90} (90%) rename cuda/impl/{psb_c_csrg_to_gpu.F90 => psb_c_cuda_csrg_to_gpu.F90} (98%) rename cuda/impl/{psb_c_csrg_vect_mv.F90 => psb_c_cuda_csrg_vect_mv.F90} (90%) rename cuda/impl/{psb_c_diag_csmv.F90 => psb_c_cuda_diag_csmv.F90} (92%) rename cuda/impl/{psb_c_diag_mold.F90 => psb_c_cuda_diag_mold.F90} (88%) rename cuda/impl/{psb_c_diag_to_gpu.F90 => psb_c_cuda_diag_to_gpu.F90} (91%) rename cuda/impl/{psb_c_diag_vect_mv.F90 => psb_c_cuda_diag_vect_mv.F90} (90%) rename cuda/impl/{psb_c_dnsg_mat_impl.F90 => psb_c_cuda_dnsg_mat_impl.F90} (77%) rename cuda/impl/{psb_c_elg_allocate_mnnz.F90 => psb_c_cuda_elg_allocate_mnnz.F90} (93%) rename cuda/impl/{psb_d_elg_asb.f90 => psb_c_cuda_elg_asb.f90} (92%) rename cuda/impl/{psb_c_elg_csmm.F90 => psb_c_cuda_elg_csmm.F90} (93%) rename cuda/impl/{psb_c_elg_csmv.F90 => psb_c_cuda_elg_csmv.F90} (94%) rename cuda/impl/{psb_c_elg_csput.F90 => psb_c_cuda_elg_csput.F90} (89%) rename cuda/impl/{psb_s_elg_from_gpu.F90 => psb_c_cuda_elg_from_gpu.F90} (91%) rename cuda/impl/{psb_c_elg_inner_vect_sv.F90 => psb_c_cuda_elg_inner_vect_sv.F90} (89%) rename cuda/impl/{psb_c_elg_mold.F90 => psb_c_cuda_elg_mold.F90} (89%) rename cuda/impl/{psb_c_elg_reallocate_nz.F90 => psb_c_cuda_elg_reallocate_nz.F90} (89%) rename cuda/impl/{psb_c_elg_scal.F90 => psb_c_cuda_elg_scal.F90} (91%) rename cuda/impl/{psb_c_elg_scals.F90 => psb_c_cuda_elg_scals.F90} (90%) rename cuda/impl/{psb_c_elg_to_gpu.F90 => psb_c_cuda_elg_to_gpu.F90} (93%) rename cuda/impl/{psb_d_elg_trim.f90 => psb_c_cuda_elg_trim.f90} (92%) rename cuda/impl/{psb_c_elg_vect_mv.F90 => psb_c_cuda_elg_vect_mv.F90} (91%) rename cuda/impl/{psb_c_hdiag_csmv.F90 => psb_c_cuda_hdiag_csmv.F90} (92%) rename cuda/impl/{psb_c_hdiag_mold.F90 => psb_c_cuda_hdiag_mold.F90} (88%) rename cuda/impl/{psb_c_hdiag_to_gpu.F90 => psb_c_cuda_hdiag_to_gpu.F90} (92%) rename cuda/impl/{psb_c_hdiag_vect_mv.F90 => psb_c_cuda_hdiag_vect_mv.F90} (90%) rename cuda/impl/{psb_c_hlg_allocate_mnnz.F90 => psb_c_cuda_hlg_allocate_mnnz.F90} (90%) rename cuda/impl/{psb_c_hlg_csmm.F90 => psb_c_cuda_hlg_csmm.F90} (93%) rename cuda/impl/{psb_c_hlg_csmv.F90 => psb_c_cuda_hlg_csmv.F90} (93%) rename cuda/impl/{psb_s_hlg_from_gpu.F90 => psb_c_cuda_hlg_from_gpu.F90} (92%) rename cuda/impl/{psb_c_hlg_inner_vect_sv.F90 => psb_c_cuda_hlg_inner_vect_sv.F90} (90%) rename cuda/impl/{psb_c_hlg_mold.F90 => psb_c_cuda_hlg_mold.F90} (89%) rename cuda/impl/{psb_c_hlg_reallocate_nz.F90 => psb_c_cuda_hlg_reallocate_nz.F90} (87%) rename cuda/impl/{psb_c_hlg_scal.F90 => psb_c_cuda_hlg_scal.F90} (91%) rename cuda/impl/{psb_c_hlg_scals.F90 => psb_c_cuda_hlg_scals.F90} (91%) rename cuda/impl/{psb_c_hlg_to_gpu.F90 => psb_c_cuda_hlg_to_gpu.F90} (91%) rename cuda/impl/{psb_c_hlg_vect_mv.F90 => psb_c_cuda_hlg_vect_mv.F90} (91%) rename cuda/impl/{psb_c_hybg_allocate_mnnz.F90 => psb_c_cuda_hybg_allocate_mnnz.F90} (90%) rename cuda/impl/{psb_c_hybg_csmm.F90 => psb_c_cuda_hybg_csmm.F90} (93%) rename cuda/impl/{psb_c_hybg_csmv.F90 => psb_c_cuda_hybg_csmv.F90} (93%) rename cuda/impl/{psb_c_hybg_inner_vect_sv.F90 => psb_c_cuda_hybg_inner_vect_sv.F90} (90%) rename cuda/impl/{psb_c_hybg_mold.F90 => psb_c_cuda_hybg_mold.F90} (89%) rename cuda/impl/{psb_c_hybg_reallocate_nz.F90 => psb_c_cuda_hybg_reallocate_nz.F90} (88%) rename cuda/impl/{psb_c_hybg_scal.F90 => psb_c_cuda_hybg_scal.F90} (91%) rename cuda/impl/{psb_c_hybg_scals.F90 => psb_c_cuda_hybg_scals.F90} (91%) rename cuda/impl/{psb_c_hybg_to_gpu.F90 => psb_c_cuda_hybg_to_gpu.F90} (96%) rename cuda/impl/{psb_c_hybg_vect_mv.F90 => psb_c_cuda_hybg_vect_mv.F90} (91%) rename cuda/impl/{psb_c_mv_csrg_from_coo.F90 => psb_c_cuda_mv_csrg_from_coo.F90} (89%) rename cuda/impl/{psb_c_mv_csrg_from_fmt.F90 => psb_c_cuda_mv_csrg_from_fmt.F90} (89%) rename cuda/impl/{psb_c_mv_diag_from_coo.F90 => psb_c_cuda_mv_diag_from_coo.F90} (89%) rename cuda/impl/{psb_c_mv_elg_from_coo.F90 => psb_c_cuda_mv_elg_from_coo.F90} (89%) rename cuda/impl/{psb_c_mv_elg_from_fmt.F90 => psb_c_cuda_mv_elg_from_fmt.F90} (92%) rename cuda/impl/{psb_c_mv_hdiag_from_coo.F90 => psb_c_cuda_mv_hdiag_from_coo.F90} (87%) rename cuda/impl/{psb_c_mv_hlg_from_coo.F90 => psb_c_cuda_mv_hlg_from_coo.F90} (88%) rename cuda/impl/{psb_c_mv_hlg_from_fmt.F90 => psb_c_cuda_mv_hlg_from_fmt.F90} (89%) rename cuda/impl/{psb_c_mv_hybg_from_coo.F90 => psb_c_cuda_mv_hybg_from_coo.F90} (89%) rename cuda/impl/{psb_c_mv_hybg_from_fmt.F90 => psb_c_cuda_mv_hybg_from_fmt.F90} (89%) rename cuda/impl/{psb_d_cp_csrg_from_coo.F90 => psb_d_cuda_cp_csrg_from_coo.F90} (89%) rename cuda/impl/{psb_d_cp_csrg_from_fmt.F90 => psb_d_cuda_cp_csrg_from_fmt.F90} (89%) rename cuda/impl/{psb_d_cp_diag_from_coo.F90 => psb_d_cuda_cp_diag_from_coo.F90} (89%) rename cuda/impl/{psb_d_cp_elg_from_coo.F90 => psb_d_cuda_cp_elg_from_coo.F90} (94%) rename cuda/impl/{psb_d_cp_elg_from_fmt.F90 => psb_d_cuda_cp_elg_from_fmt.F90} (93%) rename cuda/impl/{psb_d_cp_hdiag_from_coo.F90 => psb_d_cuda_cp_hdiag_from_coo.F90} (87%) rename cuda/impl/{psb_d_cp_hlg_from_coo.F90 => psb_d_cuda_cp_hlg_from_coo.F90} (95%) rename cuda/impl/{psb_d_cp_hlg_from_fmt.F90 => psb_d_cuda_cp_hlg_from_fmt.F90} (90%) rename cuda/impl/{psb_d_cp_hybg_from_coo.F90 => psb_d_cuda_cp_hybg_from_coo.F90} (89%) rename cuda/impl/{psb_d_cp_hybg_from_fmt.F90 => psb_d_cuda_cp_hybg_from_fmt.F90} (89%) rename cuda/impl/{psb_d_csrg_allocate_mnnz.F90 => psb_d_cuda_csrg_allocate_mnnz.F90} (89%) rename cuda/impl/{psb_d_csrg_csmm.F90 => psb_d_cuda_csrg_csmm.F90} (94%) rename cuda/impl/{psb_d_csrg_csmv.F90 => psb_d_cuda_csrg_csmv.F90} (93%) rename cuda/impl/{psb_s_csrg_from_gpu.F90 => psb_d_cuda_csrg_from_gpu.F90} (91%) rename cuda/impl/{psb_d_csrg_inner_vect_sv.F90 => psb_d_cuda_csrg_inner_vect_sv.F90} (90%) rename cuda/impl/{psb_d_csrg_mold.F90 => psb_d_cuda_csrg_mold.F90} (88%) rename cuda/impl/{psb_d_csrg_reallocate_nz.F90 => psb_d_cuda_csrg_reallocate_nz.F90} (87%) rename cuda/impl/{psb_d_csrg_scal.F90 => psb_d_cuda_csrg_scal.F90} (90%) rename cuda/impl/{psb_d_csrg_scals.F90 => psb_d_cuda_csrg_scals.F90} (90%) rename cuda/impl/{psb_d_csrg_to_gpu.F90 => psb_d_cuda_csrg_to_gpu.F90} (98%) rename cuda/impl/{psb_d_csrg_vect_mv.F90 => psb_d_cuda_csrg_vect_mv.F90} (90%) rename cuda/impl/{psb_d_diag_csmv.F90 => psb_d_cuda_diag_csmv.F90} (92%) rename cuda/impl/{psb_d_diag_mold.F90 => psb_d_cuda_diag_mold.F90} (88%) rename cuda/impl/{psb_d_diag_to_gpu.F90 => psb_d_cuda_diag_to_gpu.F90} (91%) rename cuda/impl/{psb_d_diag_vect_mv.F90 => psb_d_cuda_diag_vect_mv.F90} (90%) rename cuda/impl/{psb_d_dnsg_mat_impl.F90 => psb_d_cuda_dnsg_mat_impl.F90} (77%) rename cuda/impl/{psb_d_elg_allocate_mnnz.F90 => psb_d_cuda_elg_allocate_mnnz.F90} (93%) rename cuda/impl/{psb_s_elg_asb.f90 => psb_d_cuda_elg_asb.f90} (92%) rename cuda/impl/{psb_d_elg_csmm.F90 => psb_d_cuda_elg_csmm.F90} (93%) rename cuda/impl/{psb_d_elg_csmv.F90 => psb_d_cuda_elg_csmv.F90} (94%) rename cuda/impl/{psb_d_elg_csput.F90 => psb_d_cuda_elg_csput.F90} (89%) rename cuda/impl/{psb_d_elg_from_gpu.F90 => psb_d_cuda_elg_from_gpu.F90} (91%) rename cuda/impl/{psb_d_elg_inner_vect_sv.F90 => psb_d_cuda_elg_inner_vect_sv.F90} (89%) rename cuda/impl/{psb_d_elg_mold.F90 => psb_d_cuda_elg_mold.F90} (89%) rename cuda/impl/{psb_d_elg_reallocate_nz.F90 => psb_d_cuda_elg_reallocate_nz.F90} (89%) rename cuda/impl/{psb_d_elg_scal.F90 => psb_d_cuda_elg_scal.F90} (91%) rename cuda/impl/{psb_d_elg_scals.F90 => psb_d_cuda_elg_scals.F90} (90%) rename cuda/impl/{psb_d_elg_to_gpu.F90 => psb_d_cuda_elg_to_gpu.F90} (93%) rename cuda/impl/{psb_s_elg_trim.f90 => psb_d_cuda_elg_trim.f90} (92%) rename cuda/impl/{psb_d_elg_vect_mv.F90 => psb_d_cuda_elg_vect_mv.F90} (91%) rename cuda/impl/{psb_d_hdiag_csmv.F90 => psb_d_cuda_hdiag_csmv.F90} (92%) rename cuda/impl/{psb_d_hdiag_mold.F90 => psb_d_cuda_hdiag_mold.F90} (88%) rename cuda/impl/{psb_s_hdiag_to_gpu.F90 => psb_d_cuda_hdiag_to_gpu.F90} (92%) rename cuda/impl/{psb_d_hdiag_vect_mv.F90 => psb_d_cuda_hdiag_vect_mv.F90} (90%) rename cuda/impl/{psb_d_hlg_allocate_mnnz.F90 => psb_d_cuda_hlg_allocate_mnnz.F90} (90%) rename cuda/impl/{psb_d_hlg_csmm.F90 => psb_d_cuda_hlg_csmm.F90} (93%) rename cuda/impl/{psb_d_hlg_csmv.F90 => psb_d_cuda_hlg_csmv.F90} (93%) rename cuda/impl/{psb_d_hlg_from_gpu.F90 => psb_d_cuda_hlg_from_gpu.F90} (92%) rename cuda/impl/{psb_d_hlg_inner_vect_sv.F90 => psb_d_cuda_hlg_inner_vect_sv.F90} (90%) rename cuda/impl/{psb_d_hlg_mold.F90 => psb_d_cuda_hlg_mold.F90} (89%) rename cuda/impl/{psb_d_hlg_reallocate_nz.F90 => psb_d_cuda_hlg_reallocate_nz.F90} (87%) rename cuda/impl/{psb_d_hlg_scal.F90 => psb_d_cuda_hlg_scal.F90} (91%) rename cuda/impl/{psb_d_hlg_scals.F90 => psb_d_cuda_hlg_scals.F90} (91%) rename cuda/impl/{psb_d_hlg_to_gpu.F90 => psb_d_cuda_hlg_to_gpu.F90} (91%) rename cuda/impl/{psb_d_hlg_vect_mv.F90 => psb_d_cuda_hlg_vect_mv.F90} (91%) rename cuda/impl/{psb_d_hybg_allocate_mnnz.F90 => psb_d_cuda_hybg_allocate_mnnz.F90} (90%) rename cuda/impl/{psb_d_hybg_csmm.F90 => psb_d_cuda_hybg_csmm.F90} (93%) rename cuda/impl/{psb_d_hybg_csmv.F90 => psb_d_cuda_hybg_csmv.F90} (93%) rename cuda/impl/{psb_d_hybg_inner_vect_sv.F90 => psb_d_cuda_hybg_inner_vect_sv.F90} (90%) rename cuda/impl/{psb_d_hybg_mold.F90 => psb_d_cuda_hybg_mold.F90} (89%) rename cuda/impl/{psb_d_hybg_reallocate_nz.F90 => psb_d_cuda_hybg_reallocate_nz.F90} (88%) rename cuda/impl/{psb_d_hybg_scal.F90 => psb_d_cuda_hybg_scal.F90} (91%) rename cuda/impl/{psb_d_hybg_scals.F90 => psb_d_cuda_hybg_scals.F90} (91%) rename cuda/impl/{psb_d_hybg_to_gpu.F90 => psb_d_cuda_hybg_to_gpu.F90} (96%) rename cuda/impl/{psb_d_hybg_vect_mv.F90 => psb_d_cuda_hybg_vect_mv.F90} (90%) rename cuda/impl/{psb_d_mv_csrg_from_coo.F90 => psb_d_cuda_mv_csrg_from_coo.F90} (89%) rename cuda/impl/{psb_d_mv_csrg_from_fmt.F90 => psb_d_cuda_mv_csrg_from_fmt.F90} (89%) rename cuda/impl/{psb_d_mv_diag_from_coo.F90 => psb_d_cuda_mv_diag_from_coo.F90} (89%) rename cuda/impl/{psb_d_mv_elg_from_coo.F90 => psb_d_cuda_mv_elg_from_coo.F90} (89%) rename cuda/impl/{psb_d_mv_elg_from_fmt.F90 => psb_d_cuda_mv_elg_from_fmt.F90} (92%) rename cuda/impl/{psb_d_mv_hdiag_from_coo.F90 => psb_d_cuda_mv_hdiag_from_coo.F90} (87%) rename cuda/impl/{psb_d_mv_hlg_from_coo.F90 => psb_d_cuda_mv_hlg_from_coo.F90} (88%) rename cuda/impl/{psb_d_mv_hlg_from_fmt.F90 => psb_d_cuda_mv_hlg_from_fmt.F90} (89%) rename cuda/impl/{psb_d_mv_hybg_from_coo.F90 => psb_d_cuda_mv_hybg_from_coo.F90} (89%) rename cuda/impl/{psb_d_mv_hybg_from_fmt.F90 => psb_d_cuda_mv_hybg_from_fmt.F90} (89%) rename cuda/impl/{psb_s_cp_csrg_from_coo.F90 => psb_s_cuda_cp_csrg_from_coo.F90} (89%) rename cuda/impl/{psb_s_cp_csrg_from_fmt.F90 => psb_s_cuda_cp_csrg_from_fmt.F90} (89%) rename cuda/impl/{psb_s_cp_diag_from_coo.F90 => psb_s_cuda_cp_diag_from_coo.F90} (89%) rename cuda/impl/{psb_s_cp_elg_from_coo.F90 => psb_s_cuda_cp_elg_from_coo.F90} (94%) rename cuda/impl/{psb_s_cp_elg_from_fmt.F90 => psb_s_cuda_cp_elg_from_fmt.F90} (93%) rename cuda/impl/{psb_s_cp_hdiag_from_coo.F90 => psb_s_cuda_cp_hdiag_from_coo.F90} (87%) rename cuda/impl/{psb_s_cp_hlg_from_coo.F90 => psb_s_cuda_cp_hlg_from_coo.F90} (95%) rename cuda/impl/{psb_s_cp_hlg_from_fmt.F90 => psb_s_cuda_cp_hlg_from_fmt.F90} (90%) rename cuda/impl/{psb_s_cp_hybg_from_coo.F90 => psb_s_cuda_cp_hybg_from_coo.F90} (89%) rename cuda/impl/{psb_s_cp_hybg_from_fmt.F90 => psb_s_cuda_cp_hybg_from_fmt.F90} (89%) rename cuda/impl/{psb_s_csrg_allocate_mnnz.F90 => psb_s_cuda_csrg_allocate_mnnz.F90} (89%) rename cuda/impl/{psb_s_csrg_csmm.F90 => psb_s_cuda_csrg_csmm.F90} (94%) rename cuda/impl/{psb_s_csrg_csmv.F90 => psb_s_cuda_csrg_csmv.F90} (93%) rename cuda/impl/{psb_z_csrg_from_gpu.F90 => psb_s_cuda_csrg_from_gpu.F90} (91%) rename cuda/impl/{psb_s_csrg_inner_vect_sv.F90 => psb_s_cuda_csrg_inner_vect_sv.F90} (90%) rename cuda/impl/{psb_s_csrg_mold.F90 => psb_s_cuda_csrg_mold.F90} (88%) rename cuda/impl/{psb_s_csrg_reallocate_nz.F90 => psb_s_cuda_csrg_reallocate_nz.F90} (87%) rename cuda/impl/{psb_s_csrg_scal.F90 => psb_s_cuda_csrg_scal.F90} (90%) rename cuda/impl/{psb_s_csrg_scals.F90 => psb_s_cuda_csrg_scals.F90} (90%) rename cuda/impl/{psb_s_csrg_to_gpu.F90 => psb_s_cuda_csrg_to_gpu.F90} (98%) rename cuda/impl/{psb_s_csrg_vect_mv.F90 => psb_s_cuda_csrg_vect_mv.F90} (90%) rename cuda/impl/{psb_s_diag_csmv.F90 => psb_s_cuda_diag_csmv.F90} (92%) rename cuda/impl/{psb_s_diag_mold.F90 => psb_s_cuda_diag_mold.F90} (88%) rename cuda/impl/{psb_s_diag_to_gpu.F90 => psb_s_cuda_diag_to_gpu.F90} (91%) rename cuda/impl/{psb_s_diag_vect_mv.F90 => psb_s_cuda_diag_vect_mv.F90} (90%) rename cuda/impl/{psb_s_dnsg_mat_impl.F90 => psb_s_cuda_dnsg_mat_impl.F90} (77%) rename cuda/impl/{psb_s_elg_allocate_mnnz.F90 => psb_s_cuda_elg_allocate_mnnz.F90} (93%) rename cuda/impl/{psb_c_elg_asb.f90 => psb_s_cuda_elg_asb.f90} (92%) rename cuda/impl/{psb_s_elg_csmm.F90 => psb_s_cuda_elg_csmm.F90} (93%) rename cuda/impl/{psb_s_elg_csmv.F90 => psb_s_cuda_elg_csmv.F90} (94%) rename cuda/impl/{psb_s_elg_csput.F90 => psb_s_cuda_elg_csput.F90} (89%) rename cuda/impl/{psb_c_elg_from_gpu.F90 => psb_s_cuda_elg_from_gpu.F90} (91%) rename cuda/impl/{psb_s_elg_inner_vect_sv.F90 => psb_s_cuda_elg_inner_vect_sv.F90} (89%) rename cuda/impl/{psb_s_elg_mold.F90 => psb_s_cuda_elg_mold.F90} (89%) rename cuda/impl/{psb_s_elg_reallocate_nz.F90 => psb_s_cuda_elg_reallocate_nz.F90} (89%) rename cuda/impl/{psb_s_elg_scal.F90 => psb_s_cuda_elg_scal.F90} (91%) rename cuda/impl/{psb_s_elg_scals.F90 => psb_s_cuda_elg_scals.F90} (90%) rename cuda/impl/{psb_s_elg_to_gpu.F90 => psb_s_cuda_elg_to_gpu.F90} (93%) rename cuda/impl/{psb_z_elg_trim.f90 => psb_s_cuda_elg_trim.f90} (92%) rename cuda/impl/{psb_s_elg_vect_mv.F90 => psb_s_cuda_elg_vect_mv.F90} (91%) rename cuda/impl/{psb_s_hdiag_csmv.F90 => psb_s_cuda_hdiag_csmv.F90} (92%) rename cuda/impl/{psb_s_hdiag_mold.F90 => psb_s_cuda_hdiag_mold.F90} (88%) rename cuda/impl/{psb_z_hdiag_to_gpu.F90 => psb_s_cuda_hdiag_to_gpu.F90} (92%) rename cuda/impl/{psb_s_hdiag_vect_mv.F90 => psb_s_cuda_hdiag_vect_mv.F90} (90%) rename cuda/impl/{psb_s_hlg_allocate_mnnz.F90 => psb_s_cuda_hlg_allocate_mnnz.F90} (90%) rename cuda/impl/{psb_s_hlg_csmm.F90 => psb_s_cuda_hlg_csmm.F90} (93%) rename cuda/impl/{psb_s_hlg_csmv.F90 => psb_s_cuda_hlg_csmv.F90} (93%) rename cuda/impl/{psb_c_hlg_from_gpu.F90 => psb_s_cuda_hlg_from_gpu.F90} (92%) rename cuda/impl/{psb_s_hlg_inner_vect_sv.F90 => psb_s_cuda_hlg_inner_vect_sv.F90} (90%) rename cuda/impl/{psb_s_hlg_mold.F90 => psb_s_cuda_hlg_mold.F90} (89%) rename cuda/impl/{psb_s_hlg_reallocate_nz.F90 => psb_s_cuda_hlg_reallocate_nz.F90} (87%) rename cuda/impl/{psb_s_hlg_scal.F90 => psb_s_cuda_hlg_scal.F90} (91%) rename cuda/impl/{psb_s_hlg_scals.F90 => psb_s_cuda_hlg_scals.F90} (91%) rename cuda/impl/{psb_s_hlg_to_gpu.F90 => psb_s_cuda_hlg_to_gpu.F90} (91%) rename cuda/impl/{psb_s_hlg_vect_mv.F90 => psb_s_cuda_hlg_vect_mv.F90} (91%) rename cuda/impl/{psb_s_hybg_allocate_mnnz.F90 => psb_s_cuda_hybg_allocate_mnnz.F90} (90%) rename cuda/impl/{psb_s_hybg_csmm.F90 => psb_s_cuda_hybg_csmm.F90} (93%) rename cuda/impl/{psb_s_hybg_csmv.F90 => psb_s_cuda_hybg_csmv.F90} (93%) rename cuda/impl/{psb_s_hybg_inner_vect_sv.F90 => psb_s_cuda_hybg_inner_vect_sv.F90} (90%) rename cuda/impl/{psb_s_hybg_mold.F90 => psb_s_cuda_hybg_mold.F90} (89%) rename cuda/impl/{psb_s_hybg_reallocate_nz.F90 => psb_s_cuda_hybg_reallocate_nz.F90} (88%) rename cuda/impl/{psb_s_hybg_scal.F90 => psb_s_cuda_hybg_scal.F90} (91%) rename cuda/impl/{psb_s_hybg_scals.F90 => psb_s_cuda_hybg_scals.F90} (91%) rename cuda/impl/{psb_s_hybg_to_gpu.F90 => psb_s_cuda_hybg_to_gpu.F90} (96%) rename cuda/impl/{psb_s_hybg_vect_mv.F90 => psb_s_cuda_hybg_vect_mv.F90} (90%) rename cuda/impl/{psb_s_mv_csrg_from_coo.F90 => psb_s_cuda_mv_csrg_from_coo.F90} (89%) rename cuda/impl/{psb_s_mv_csrg_from_fmt.F90 => psb_s_cuda_mv_csrg_from_fmt.F90} (89%) rename cuda/impl/{psb_s_mv_diag_from_coo.F90 => psb_s_cuda_mv_diag_from_coo.F90} (89%) rename cuda/impl/{psb_s_mv_elg_from_coo.F90 => psb_s_cuda_mv_elg_from_coo.F90} (89%) rename cuda/impl/{psb_s_mv_elg_from_fmt.F90 => psb_s_cuda_mv_elg_from_fmt.F90} (92%) rename cuda/impl/{psb_s_mv_hdiag_from_coo.F90 => psb_s_cuda_mv_hdiag_from_coo.F90} (87%) rename cuda/impl/{psb_s_mv_hlg_from_coo.F90 => psb_s_cuda_mv_hlg_from_coo.F90} (88%) rename cuda/impl/{psb_s_mv_hlg_from_fmt.F90 => psb_s_cuda_mv_hlg_from_fmt.F90} (89%) rename cuda/impl/{psb_s_mv_hybg_from_coo.F90 => psb_s_cuda_mv_hybg_from_coo.F90} (89%) rename cuda/impl/{psb_s_mv_hybg_from_fmt.F90 => psb_s_cuda_mv_hybg_from_fmt.F90} (89%) rename cuda/impl/{psb_z_cp_csrg_from_coo.F90 => psb_z_cuda_cp_csrg_from_coo.F90} (89%) rename cuda/impl/{psb_z_cp_csrg_from_fmt.F90 => psb_z_cuda_cp_csrg_from_fmt.F90} (89%) rename cuda/impl/{psb_z_cp_diag_from_coo.F90 => psb_z_cuda_cp_diag_from_coo.F90} (89%) rename cuda/impl/{psb_z_cp_elg_from_coo.F90 => psb_z_cuda_cp_elg_from_coo.F90} (94%) rename cuda/impl/{psb_z_cp_elg_from_fmt.F90 => psb_z_cuda_cp_elg_from_fmt.F90} (93%) rename cuda/impl/{psb_z_cp_hdiag_from_coo.F90 => psb_z_cuda_cp_hdiag_from_coo.F90} (87%) rename cuda/impl/{psb_z_cp_hlg_from_coo.F90 => psb_z_cuda_cp_hlg_from_coo.F90} (95%) rename cuda/impl/{psb_z_cp_hlg_from_fmt.F90 => psb_z_cuda_cp_hlg_from_fmt.F90} (90%) rename cuda/impl/{psb_z_cp_hybg_from_coo.F90 => psb_z_cuda_cp_hybg_from_coo.F90} (89%) rename cuda/impl/{psb_z_cp_hybg_from_fmt.F90 => psb_z_cuda_cp_hybg_from_fmt.F90} (89%) rename cuda/impl/{psb_z_csrg_allocate_mnnz.F90 => psb_z_cuda_csrg_allocate_mnnz.F90} (89%) rename cuda/impl/{psb_z_csrg_csmm.F90 => psb_z_cuda_csrg_csmm.F90} (94%) rename cuda/impl/{psb_z_csrg_csmv.F90 => psb_z_cuda_csrg_csmv.F90} (93%) rename cuda/impl/{psb_d_csrg_from_gpu.F90 => psb_z_cuda_csrg_from_gpu.F90} (91%) rename cuda/impl/{psb_z_csrg_inner_vect_sv.F90 => psb_z_cuda_csrg_inner_vect_sv.F90} (90%) rename cuda/impl/{psb_z_csrg_mold.F90 => psb_z_cuda_csrg_mold.F90} (88%) rename cuda/impl/{psb_z_csrg_reallocate_nz.F90 => psb_z_cuda_csrg_reallocate_nz.F90} (87%) rename cuda/impl/{psb_z_csrg_scal.F90 => psb_z_cuda_csrg_scal.F90} (90%) rename cuda/impl/{psb_z_csrg_scals.F90 => psb_z_cuda_csrg_scals.F90} (90%) rename cuda/impl/{psb_z_csrg_to_gpu.F90 => psb_z_cuda_csrg_to_gpu.F90} (98%) rename cuda/impl/{psb_z_csrg_vect_mv.F90 => psb_z_cuda_csrg_vect_mv.F90} (90%) rename cuda/impl/{psb_z_diag_csmv.F90 => psb_z_cuda_diag_csmv.F90} (92%) rename cuda/impl/{psb_z_diag_mold.F90 => psb_z_cuda_diag_mold.F90} (88%) rename cuda/impl/{psb_z_diag_to_gpu.F90 => psb_z_cuda_diag_to_gpu.F90} (91%) rename cuda/impl/{psb_z_diag_vect_mv.F90 => psb_z_cuda_diag_vect_mv.F90} (90%) rename cuda/impl/{psb_z_dnsg_mat_impl.F90 => psb_z_cuda_dnsg_mat_impl.F90} (77%) rename cuda/impl/{psb_z_elg_allocate_mnnz.F90 => psb_z_cuda_elg_allocate_mnnz.F90} (93%) rename cuda/impl/{psb_z_elg_asb.f90 => psb_z_cuda_elg_asb.f90} (92%) rename cuda/impl/{psb_z_elg_csmm.F90 => psb_z_cuda_elg_csmm.F90} (93%) rename cuda/impl/{psb_z_elg_csmv.F90 => psb_z_cuda_elg_csmv.F90} (94%) rename cuda/impl/{psb_z_elg_csput.F90 => psb_z_cuda_elg_csput.F90} (89%) rename cuda/impl/{psb_z_elg_from_gpu.F90 => psb_z_cuda_elg_from_gpu.F90} (91%) rename cuda/impl/{psb_z_elg_inner_vect_sv.F90 => psb_z_cuda_elg_inner_vect_sv.F90} (89%) rename cuda/impl/{psb_z_elg_mold.F90 => psb_z_cuda_elg_mold.F90} (89%) rename cuda/impl/{psb_z_elg_reallocate_nz.F90 => psb_z_cuda_elg_reallocate_nz.F90} (89%) rename cuda/impl/{psb_z_elg_scal.F90 => psb_z_cuda_elg_scal.F90} (91%) rename cuda/impl/{psb_z_elg_scals.F90 => psb_z_cuda_elg_scals.F90} (90%) rename cuda/impl/{psb_z_elg_to_gpu.F90 => psb_z_cuda_elg_to_gpu.F90} (93%) rename cuda/impl/{psb_c_elg_trim.f90 => psb_z_cuda_elg_trim.f90} (92%) rename cuda/impl/{psb_z_elg_vect_mv.F90 => psb_z_cuda_elg_vect_mv.F90} (91%) rename cuda/impl/{psb_z_hdiag_csmv.F90 => psb_z_cuda_hdiag_csmv.F90} (92%) rename cuda/impl/{psb_z_hdiag_mold.F90 => psb_z_cuda_hdiag_mold.F90} (88%) rename cuda/impl/{psb_d_hdiag_to_gpu.F90 => psb_z_cuda_hdiag_to_gpu.F90} (92%) rename cuda/impl/{psb_z_hdiag_vect_mv.F90 => psb_z_cuda_hdiag_vect_mv.F90} (90%) rename cuda/impl/{psb_z_hlg_allocate_mnnz.F90 => psb_z_cuda_hlg_allocate_mnnz.F90} (90%) rename cuda/impl/{psb_z_hlg_csmm.F90 => psb_z_cuda_hlg_csmm.F90} (93%) rename cuda/impl/{psb_z_hlg_csmv.F90 => psb_z_cuda_hlg_csmv.F90} (93%) rename cuda/impl/{psb_z_hlg_from_gpu.F90 => psb_z_cuda_hlg_from_gpu.F90} (92%) rename cuda/impl/{psb_z_hlg_inner_vect_sv.F90 => psb_z_cuda_hlg_inner_vect_sv.F90} (90%) rename cuda/impl/{psb_z_hlg_mold.F90 => psb_z_cuda_hlg_mold.F90} (89%) rename cuda/impl/{psb_z_hlg_reallocate_nz.F90 => psb_z_cuda_hlg_reallocate_nz.F90} (87%) rename cuda/impl/{psb_z_hlg_scal.F90 => psb_z_cuda_hlg_scal.F90} (91%) rename cuda/impl/{psb_z_hlg_scals.F90 => psb_z_cuda_hlg_scals.F90} (91%) rename cuda/impl/{psb_z_hlg_to_gpu.F90 => psb_z_cuda_hlg_to_gpu.F90} (91%) rename cuda/impl/{psb_z_hlg_vect_mv.F90 => psb_z_cuda_hlg_vect_mv.F90} (91%) rename cuda/impl/{psb_z_hybg_allocate_mnnz.F90 => psb_z_cuda_hybg_allocate_mnnz.F90} (90%) rename cuda/impl/{psb_z_hybg_csmm.F90 => psb_z_cuda_hybg_csmm.F90} (93%) rename cuda/impl/{psb_z_hybg_csmv.F90 => psb_z_cuda_hybg_csmv.F90} (93%) rename cuda/impl/{psb_z_hybg_inner_vect_sv.F90 => psb_z_cuda_hybg_inner_vect_sv.F90} (90%) rename cuda/impl/{psb_z_hybg_mold.F90 => psb_z_cuda_hybg_mold.F90} (89%) rename cuda/impl/{psb_z_hybg_reallocate_nz.F90 => psb_z_cuda_hybg_reallocate_nz.F90} (88%) rename cuda/impl/{psb_z_hybg_scal.F90 => psb_z_cuda_hybg_scal.F90} (91%) rename cuda/impl/{psb_z_hybg_scals.F90 => psb_z_cuda_hybg_scals.F90} (91%) rename cuda/impl/{psb_z_hybg_to_gpu.F90 => psb_z_cuda_hybg_to_gpu.F90} (96%) rename cuda/impl/{psb_z_hybg_vect_mv.F90 => psb_z_cuda_hybg_vect_mv.F90} (91%) rename cuda/impl/{psb_z_mv_csrg_from_coo.F90 => psb_z_cuda_mv_csrg_from_coo.F90} (89%) rename cuda/impl/{psb_z_mv_csrg_from_fmt.F90 => psb_z_cuda_mv_csrg_from_fmt.F90} (89%) rename cuda/impl/{psb_z_mv_diag_from_coo.F90 => psb_z_cuda_mv_diag_from_coo.F90} (89%) rename cuda/impl/{psb_z_mv_elg_from_coo.F90 => psb_z_cuda_mv_elg_from_coo.F90} (89%) rename cuda/impl/{psb_z_mv_elg_from_fmt.F90 => psb_z_cuda_mv_elg_from_fmt.F90} (92%) rename cuda/impl/{psb_z_mv_hdiag_from_coo.F90 => psb_z_cuda_mv_hdiag_from_coo.F90} (87%) rename cuda/impl/{psb_z_mv_hlg_from_coo.F90 => psb_z_cuda_mv_hlg_from_coo.F90} (88%) rename cuda/impl/{psb_z_mv_hlg_from_fmt.F90 => psb_z_cuda_mv_hlg_from_fmt.F90} (89%) rename cuda/impl/{psb_z_mv_hybg_from_coo.F90 => psb_z_cuda_mv_hybg_from_coo.F90} (89%) rename cuda/impl/{psb_z_mv_hybg_from_fmt.F90 => psb_z_cuda_mv_hybg_from_fmt.F90} (89%) delete mode 100644 cuda/psb_c_csrg_mat_mod.F90 create mode 100644 cuda/psb_c_cuda_csrg_mat_mod.F90 rename cuda/{psb_c_diag_mat_mod.F90 => psb_c_cuda_diag_mat_mod.F90} (52%) rename cuda/{psb_c_dnsg_mat_mod.F90 => psb_c_cuda_dnsg_mat_mod.F90} (51%) rename cuda/{psb_c_elg_mat_mod.F90 => psb_c_cuda_elg_mat_mod.F90} (50%) rename cuda/{psb_c_hdiag_mat_mod.F90 => psb_c_cuda_hdiag_mat_mod.F90} (50%) rename cuda/{psb_c_hlg_mat_mod.F90 => psb_c_cuda_hlg_mat_mod.F90} (50%) rename cuda/{psb_c_hybg_mat_mod.F90 => psb_c_cuda_hybg_mat_mod.F90} (52%) rename cuda/{psb_c_gpu_vect_mod.F90 => psb_c_cuda_vect_mod.F90} (72%) rename cuda/{psb_gpu_env_mod.F90 => psb_cuda_env_mod.F90} (74%) rename cuda/{psb_gpu_mod.F90 => psb_cuda_mod.F90} (65%) delete mode 100644 cuda/psb_d_csrg_mat_mod.F90 create mode 100644 cuda/psb_d_cuda_csrg_mat_mod.F90 rename cuda/{psb_d_diag_mat_mod.F90 => psb_d_cuda_diag_mat_mod.F90} (52%) rename cuda/{psb_d_dnsg_mat_mod.F90 => psb_d_cuda_dnsg_mat_mod.F90} (51%) rename cuda/{psb_d_elg_mat_mod.F90 => psb_d_cuda_elg_mat_mod.F90} (50%) rename cuda/{psb_d_hdiag_mat_mod.F90 => psb_d_cuda_hdiag_mat_mod.F90} (50%) rename cuda/{psb_d_hlg_mat_mod.F90 => psb_d_cuda_hlg_mat_mod.F90} (50%) rename cuda/{psb_d_hybg_mat_mod.F90 => psb_d_cuda_hybg_mat_mod.F90} (52%) rename cuda/{psb_d_gpu_vect_mod.F90 => psb_d_cuda_vect_mod.F90} (72%) delete mode 100644 cuda/psb_i_csrg_mat_mod.F90 create mode 100644 cuda/psb_i_cuda_csrg_mat_mod.F90 rename cuda/{psb_i_diag_mat_mod.F90 => psb_i_cuda_diag_mat_mod.F90} (52%) rename cuda/{psb_i_dnsg_mat_mod.F90 => psb_i_cuda_dnsg_mat_mod.F90} (51%) rename cuda/{psb_i_elg_mat_mod.F90 => psb_i_cuda_elg_mat_mod.F90} (50%) rename cuda/{psb_i_hdiag_mat_mod.F90 => psb_i_cuda_hdiag_mat_mod.F90} (50%) rename cuda/{psb_i_hlg_mat_mod.F90 => psb_i_cuda_hlg_mat_mod.F90} (50%) rename cuda/{psb_i_hybg_mat_mod.F90 => psb_i_cuda_hybg_mat_mod.F90} (52%) rename cuda/{psb_i_gpu_vect_mod.F90 => psb_i_cuda_vect_mod.F90} (72%) delete mode 100644 cuda/psb_s_csrg_mat_mod.F90 create mode 100644 cuda/psb_s_cuda_csrg_mat_mod.F90 rename cuda/{psb_s_diag_mat_mod.F90 => psb_s_cuda_diag_mat_mod.F90} (52%) rename cuda/{psb_s_dnsg_mat_mod.F90 => psb_s_cuda_dnsg_mat_mod.F90} (51%) rename cuda/{psb_s_elg_mat_mod.F90 => psb_s_cuda_elg_mat_mod.F90} (50%) rename cuda/{psb_s_hdiag_mat_mod.F90 => psb_s_cuda_hdiag_mat_mod.F90} (50%) rename cuda/{psb_s_hlg_mat_mod.F90 => psb_s_cuda_hlg_mat_mod.F90} (50%) rename cuda/{psb_s_hybg_mat_mod.F90 => psb_s_cuda_hybg_mat_mod.F90} (52%) rename cuda/{psb_s_gpu_vect_mod.F90 => psb_s_cuda_vect_mod.F90} (72%) delete mode 100644 cuda/psb_z_csrg_mat_mod.F90 create mode 100644 cuda/psb_z_cuda_csrg_mat_mod.F90 rename cuda/{psb_z_diag_mat_mod.F90 => psb_z_cuda_diag_mat_mod.F90} (52%) rename cuda/{psb_z_dnsg_mat_mod.F90 => psb_z_cuda_dnsg_mat_mod.F90} (51%) rename cuda/{psb_z_elg_mat_mod.F90 => psb_z_cuda_elg_mat_mod.F90} (50%) rename cuda/{psb_z_hdiag_mat_mod.F90 => psb_z_cuda_hdiag_mat_mod.F90} (50%) rename cuda/{psb_z_hlg_mat_mod.F90 => psb_z_cuda_hlg_mat_mod.F90} (50%) rename cuda/{psb_z_hybg_mat_mod.F90 => psb_z_cuda_hybg_mat_mod.F90} (52%) rename cuda/{psb_z_gpu_vect_mod.F90 => psb_z_cuda_vect_mod.F90} (72%) rename test/{gpukern => cudakern}/Makefile (100%) rename test/{gpukern => cudakern}/c_file_spmv.F90 (100%) rename test/{gpukern => cudakern}/d_file_spmv.F90 (100%) rename test/{gpukern => cudakern}/data_input.f90 (100%) rename test/{gpukern => cudakern}/dpdegenmv.F90 (99%) rename test/{gpukern => cudakern}/s_file_spmv.F90 (100%) rename test/{gpukern => cudakern}/spdegenmv.F90 (100%) rename test/{gpukern => cudakern}/z_file_spmv.F90 (100%) diff --git a/cuda/Makefile b/cuda/Makefile index 2b0c011a..0f03e359 100755 --- a/cuda/Makefile +++ b/cuda/Makefile @@ -23,22 +23,22 @@ FOBJS=cusparse_mod.o base_cusparse_mod.o \ psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_i_vectordev_mod.o\ psb_c_vectordev_mod.o psb_z_vectordev_mod.o psb_base_vectordev_mod.o \ elldev_mod.o hlldev_mod.o diagdev_mod.o hdiagdev_mod.o \ - psb_i_gpu_vect_mod.o \ - psb_d_gpu_vect_mod.o psb_s_gpu_vect_mod.o\ - psb_z_gpu_vect_mod.o psb_c_gpu_vect_mod.o\ - psb_d_elg_mat_mod.o psb_d_hlg_mat_mod.o \ - psb_d_hybg_mat_mod.o psb_d_csrg_mat_mod.o\ - psb_s_elg_mat_mod.o psb_s_hlg_mat_mod.o \ - psb_s_hybg_mat_mod.o psb_s_csrg_mat_mod.o\ - psb_c_elg_mat_mod.o psb_c_hlg_mat_mod.o \ - psb_c_hybg_mat_mod.o psb_c_csrg_mat_mod.o\ - psb_z_elg_mat_mod.o psb_z_hlg_mat_mod.o \ - psb_z_hybg_mat_mod.o psb_z_csrg_mat_mod.o\ - psb_gpu_env_mod.o psb_gpu_mod.o \ - psb_d_diag_mat_mod.o\ - psb_d_hdiag_mat_mod.o psb_s_hdiag_mat_mod.o\ - psb_s_dnsg_mat_mod.o psb_d_dnsg_mat_mod.o \ - psb_c_dnsg_mat_mod.o psb_z_dnsg_mat_mod.o \ + psb_i_cuda_vect_mod.o \ + psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o\ + psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o\ + psb_d_cuda_elg_mat_mod.o psb_d_cuda_hlg_mat_mod.o \ + psb_d_cuda_hybg_mat_mod.o psb_d_cuda_csrg_mat_mod.o\ + psb_s_cuda_elg_mat_mod.o psb_s_cuda_hlg_mat_mod.o \ + psb_s_cuda_hybg_mat_mod.o psb_s_cuda_csrg_mat_mod.o\ + psb_c_cuda_elg_mat_mod.o psb_c_cuda_hlg_mat_mod.o \ + psb_c_cuda_hybg_mat_mod.o psb_c_cuda_csrg_mat_mod.o\ + psb_z_cuda_elg_mat_mod.o psb_z_cuda_hlg_mat_mod.o \ + psb_z_cuda_hybg_mat_mod.o psb_z_cuda_csrg_mat_mod.o\ + psb_cuda_env_mod.o psb_cuda_mod.o \ + psb_d_cuda_diag_mat_mod.o\ + psb_d_cuda_hdiag_mat_mod.o psb_s_cuda_hdiag_mat_mod.o\ + psb_s_cuda_dnsg_mat_mod.o psb_d_cuda_dnsg_mat_mod.o \ + psb_c_cuda_dnsg_mat_mod.o psb_z_cuda_dnsg_mat_mod.o \ dnsdev_mod.o COBJS= elldev.o hlldev.o diagdev.o hdiagdev.o vectordev.o ivectordev.o dnsdev.o\ @@ -65,30 +65,30 @@ lib: ilib cudalib spgpulib /bin/cp -p $(LIBNAME) $(LIBDIR) dnsdev_mod.o hlldev_mod.o elldev_mod.o psb_base_vectordev_mod.o: core_mod.o -psb_d_gpu_vect_mod.o psb_s_gpu_vect_mod.o psb_z_gpu_vect_mod.o psb_c_gpu_vect_mod.o: psb_i_gpu_vect_mod.o -psb_i_gpu_vect_mod.o : psb_vectordev_mod.o psb_gpu_env_mod.o +psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o: psb_i_cuda_vect_mod.o +psb_i_cuda_vect_mod.o : psb_vectordev_mod.o psb_cuda_env_mod.o cusparse_mod.o: s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o : base_cusparse_mod.o -psb_d_hlg_mat_mod.o: hlldev_mod.o psb_d_gpu_vect_mod.o psb_gpu_env_mod.o -psb_d_elg_mat_mod.o: elldev_mod.o psb_d_gpu_vect_mod.o -psb_d_diag_mat_mod.o: diagdev_mod.o psb_d_gpu_vect_mod.o -psb_d_hdiag_mat_mod.o: hdiagdev_mod.o psb_d_gpu_vect_mod.o -psb_s_dnsg_mat_mod.o: dnsdev_mod.o psb_s_gpu_vect_mod.o -psb_d_dnsg_mat_mod.o: dnsdev_mod.o psb_d_gpu_vect_mod.o -psb_c_dnsg_mat_mod.o: dnsdev_mod.o psb_c_gpu_vect_mod.o -psb_z_dnsg_mat_mod.o: dnsdev_mod.o psb_z_gpu_vect_mod.o -psb_s_hlg_mat_mod.o: hlldev_mod.o psb_s_gpu_vect_mod.o psb_gpu_env_mod.o -psb_s_elg_mat_mod.o: elldev_mod.o psb_s_gpu_vect_mod.o -psb_s_diag_mat_mod.o: diagdev_mod.o psb_s_gpu_vect_mod.o -psb_s_hdiag_mat_mod.o: hdiagdev_mod.o psb_s_gpu_vect_mod.o -psb_s_csrg_mat_mod.o psb_s_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o -psb_d_csrg_mat_mod.o psb_d_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o -psb_z_hlg_mat_mod.o: hlldev_mod.o psb_z_gpu_vect_mod.o psb_gpu_env_mod.o -psb_z_elg_mat_mod.o: elldev_mod.o psb_z_gpu_vect_mod.o -psb_c_hlg_mat_mod.o: hlldev_mod.o psb_c_gpu_vect_mod.o psb_gpu_env_mod.o -psb_c_elg_mat_mod.o: elldev_mod.o psb_c_gpu_vect_mod.o -psb_c_csrg_mat_mod.o psb_c_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o -psb_z_csrg_mat_mod.o psb_z_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_d_cuda_hlg_mat_mod.o: hlldev_mod.o psb_d_cuda_vect_mod.o psb_cuda_env_mod.o +psb_d_cuda_elg_mat_mod.o: elldev_mod.o psb_d_cuda_vect_mod.o +psb_d_cuda_diag_mat_mod.o: diagdev_mod.o psb_d_cuda_vect_mod.o +psb_d_cuda_hdiag_mat_mod.o: hdiagdev_mod.o psb_d_cuda_vect_mod.o +psb_s_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_s_cuda_vect_mod.o +psb_d_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_d_cuda_vect_mod.o +psb_c_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_c_cuda_vect_mod.o +psb_z_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_z_cuda_vect_mod.o +psb_s_cuda_hlg_mat_mod.o: hlldev_mod.o psb_s_cuda_vect_mod.o psb_cuda_env_mod.o +psb_s_cuda_elg_mat_mod.o: elldev_mod.o psb_s_cuda_vect_mod.o +psb_s_cuda_diag_mat_mod.o: diagdev_mod.o psb_s_cuda_vect_mod.o +psb_s_cuda_hdiag_mat_mod.o: hdiagdev_mod.o psb_s_cuda_vect_mod.o +psb_s_cuda_csrg_mat_mod.o psb_s_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_d_cuda_csrg_mat_mod.o psb_d_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_z_cuda_hlg_mat_mod.o: hlldev_mod.o psb_z_cuda_vect_mod.o psb_cuda_env_mod.o +psb_z_cuda_elg_mat_mod.o: elldev_mod.o psb_z_cuda_vect_mod.o +psb_c_cuda_hlg_mat_mod.o: hlldev_mod.o psb_c_cuda_vect_mod.o psb_cuda_env_mod.o +psb_c_cuda_elg_mat_mod.o: elldev_mod.o psb_c_cuda_vect_mod.o +psb_c_cuda_csrg_mat_mod.o psb_c_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_z_cuda_csrg_mat_mod.o psb_z_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o psb_vectordev_mod.o: psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_c_vectordev_mod.o psb_z_vectordev_mod.o psb_i_vectordev_mod.o psb_i_vectordev_mod.o psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_c_vectordev_mod.o psb_z_vectordev_mod.o: psb_base_vectordev_mod.o vectordev.o: cuda_util.o vectordev.h @@ -101,22 +101,22 @@ svectordev.o: svectordev.h vectordev.h dvectordev.o: dvectordev.h vectordev.h cvectordev.o: cvectordev.h vectordev.h zvectordev.o: zvectordev.h vectordev.h -psb_gpu_env_mod.o: base_cusparse_mod.o -psb_gpu_mod.o: psb_gpu_env_mod.o psb_i_gpu_vect_mod.o\ - psb_d_gpu_vect_mod.o psb_s_gpu_vect_mod.o\ - psb_z_gpu_vect_mod.o psb_c_gpu_vect_mod.o\ - psb_d_elg_mat_mod.o psb_d_hlg_mat_mod.o \ - psb_d_hybg_mat_mod.o psb_d_csrg_mat_mod.o\ - psb_s_elg_mat_mod.o psb_s_hlg_mat_mod.o \ - psb_s_hybg_mat_mod.o psb_s_csrg_mat_mod.o\ - psb_c_elg_mat_mod.o psb_c_hlg_mat_mod.o \ - psb_c_hybg_mat_mod.o psb_c_csrg_mat_mod.o\ - psb_z_elg_mat_mod.o psb_z_hlg_mat_mod.o \ - psb_z_hybg_mat_mod.o psb_z_csrg_mat_mod.o\ - psb_d_diag_mat_mod.o \ - psb_d_hdiag_mat_mod.o psb_s_hdiag_mat_mod.o\ - psb_s_dnsg_mat_mod.o psb_d_dnsg_mat_mod.o \ - psb_c_dnsg_mat_mod.o psb_z_dnsg_mat_mod.o +psb_cuda_env_mod.o: base_cusparse_mod.o +psb_cuda_mod.o: psb_cuda_env_mod.o psb_i_cuda_vect_mod.o\ + psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o\ + psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o\ + psb_d_cuda_elg_mat_mod.o psb_d_cuda_hlg_mat_mod.o \ + psb_d_cuda_hybg_mat_mod.o psb_d_cuda_csrg_mat_mod.o\ + psb_s_cuda_elg_mat_mod.o psb_s_cuda_hlg_mat_mod.o \ + psb_s_cuda_hybg_mat_mod.o psb_s_cuda_csrg_mat_mod.o\ + psb_c_cuda_elg_mat_mod.o psb_c_cuda_hlg_mat_mod.o \ + psb_c_cuda_hybg_mat_mod.o psb_c_cuda_csrg_mat_mod.o\ + psb_z_cuda_elg_mat_mod.o psb_z_cuda_hlg_mat_mod.o \ + psb_z_cuda_hybg_mat_mod.o psb_z_cuda_csrg_mat_mod.o\ + psb_d_cuda_diag_mat_mod.o \ + psb_d_cuda_hdiag_mat_mod.o psb_s_cuda_hdiag_mat_mod.o\ + psb_s_cuda_dnsg_mat_mod.o psb_d_cuda_dnsg_mat_mod.o \ + psb_c_cuda_dnsg_mat_mod.o psb_z_cuda_dnsg_mat_mod.o iobjs: $(FOBJS) $(MAKE) -C impl objs diff --git a/cuda/impl/Makefile b/cuda/impl/Makefile index 6ddac9a7..12bf0747 100755 --- a/cuda/impl/Makefile +++ b/cuda/impl/Makefile @@ -17,276 +17,276 @@ CDEFINES=$(PSBCDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) FDEFINES=$(PSBFDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) OBJS= \ -psb_d_cp_csrg_from_coo.o \ -psb_d_cp_csrg_from_fmt.o \ -psb_d_cp_elg_from_coo.o \ -psb_d_cp_elg_from_fmt.o \ -psb_s_cp_csrg_from_coo.o \ -psb_s_cp_csrg_from_fmt.o \ -psb_s_csrg_allocate_mnnz.o \ -psb_s_csrg_csmm.o \ -psb_s_csrg_csmv.o \ -psb_s_csrg_mold.o \ -psb_s_csrg_reallocate_nz.o \ -psb_s_csrg_scal.o \ -psb_s_csrg_scals.o \ -psb_s_csrg_from_gpu.o \ -psb_s_csrg_to_gpu.o \ -psb_s_csrg_vect_mv.o \ -psb_s_csrg_inner_vect_sv.o \ -psb_d_csrg_allocate_mnnz.o \ -psb_d_csrg_csmm.o \ -psb_d_csrg_csmv.o \ -psb_d_csrg_mold.o \ -psb_d_csrg_reallocate_nz.o \ -psb_d_csrg_scal.o \ -psb_d_csrg_scals.o \ -psb_d_csrg_from_gpu.o \ -psb_d_csrg_to_gpu.o \ -psb_d_csrg_vect_mv.o \ -psb_d_csrg_inner_vect_sv.o \ -psb_d_elg_allocate_mnnz.o \ -psb_d_elg_asb.o \ -psb_d_elg_csmm.o \ -psb_d_elg_csmv.o \ -psb_d_elg_csput.o \ -psb_d_elg_from_gpu.o \ -psb_d_elg_inner_vect_sv.o \ -psb_d_elg_mold.o \ -psb_d_elg_reallocate_nz.o \ -psb_d_elg_scal.o \ -psb_d_elg_scals.o \ -psb_d_elg_to_gpu.o \ -psb_d_elg_vect_mv.o \ -psb_d_mv_csrg_from_coo.o \ -psb_d_mv_csrg_from_fmt.o \ -psb_d_mv_elg_from_coo.o \ -psb_d_mv_elg_from_fmt.o \ -psb_s_mv_csrg_from_coo.o \ -psb_s_mv_csrg_from_fmt.o \ -psb_s_cp_elg_from_coo.o \ -psb_s_cp_elg_from_fmt.o \ -psb_s_elg_allocate_mnnz.o \ -psb_s_elg_asb.o \ -psb_s_elg_csmm.o \ -psb_s_elg_csmv.o \ -psb_s_elg_csput.o \ -psb_s_elg_inner_vect_sv.o \ -psb_s_elg_mold.o \ -psb_s_elg_reallocate_nz.o \ -psb_s_elg_scal.o \ -psb_s_elg_scals.o \ -psb_s_elg_to_gpu.o \ -psb_s_elg_from_gpu.o \ -psb_s_elg_vect_mv.o \ -psb_s_mv_elg_from_coo.o \ -psb_s_mv_elg_from_fmt.o \ -psb_s_cp_hlg_from_fmt.o \ -psb_s_cp_hlg_from_coo.o \ -psb_d_cp_hlg_from_fmt.o \ -psb_d_cp_hlg_from_coo.o \ -psb_d_hlg_allocate_mnnz.o \ -psb_d_hlg_csmm.o \ -psb_d_hlg_csmv.o \ -psb_d_hlg_inner_vect_sv.o \ -psb_d_hlg_mold.o \ -psb_d_hlg_reallocate_nz.o \ -psb_d_hlg_scal.o \ -psb_d_hlg_scals.o \ -psb_d_hlg_from_gpu.o \ -psb_d_hlg_to_gpu.o \ -psb_d_hlg_vect_mv.o \ -psb_s_hlg_allocate_mnnz.o \ -psb_s_hlg_csmm.o \ -psb_s_hlg_csmv.o \ -psb_s_hlg_inner_vect_sv.o \ -psb_s_hlg_mold.o \ -psb_s_hlg_reallocate_nz.o \ -psb_s_hlg_scal.o \ -psb_s_hlg_scals.o \ -psb_s_hlg_from_gpu.o \ -psb_s_hlg_to_gpu.o \ -psb_s_hlg_vect_mv.o \ -psb_s_mv_hlg_from_coo.o \ -psb_s_cp_hlg_from_coo.o \ -psb_s_mv_hlg_from_fmt.o \ -psb_d_mv_hlg_from_coo.o \ -psb_d_cp_hlg_from_coo.o \ -psb_d_mv_hlg_from_fmt.o \ -psb_s_hybg_allocate_mnnz.o \ -psb_s_hybg_csmm.o \ -psb_s_hybg_csmv.o \ -psb_s_hybg_reallocate_nz.o \ -psb_s_hybg_scal.o \ -psb_s_hybg_scals.o \ -psb_s_hybg_to_gpu.o \ -psb_s_hybg_vect_mv.o \ -psb_s_hybg_inner_vect_sv.o \ -psb_s_cp_hybg_from_coo.o \ -psb_s_cp_hybg_from_fmt.o \ -psb_s_mv_hybg_from_fmt.o \ -psb_s_mv_hybg_from_coo.o \ -psb_s_hybg_mold.o \ -psb_d_hybg_allocate_mnnz.o \ -psb_d_hybg_csmm.o \ -psb_d_hybg_csmv.o \ -psb_d_hybg_reallocate_nz.o \ -psb_d_hybg_scal.o \ -psb_d_hybg_scals.o \ -psb_d_hybg_to_gpu.o \ -psb_d_hybg_vect_mv.o \ -psb_d_hybg_inner_vect_sv.o \ -psb_d_cp_hybg_from_coo.o \ -psb_d_cp_hybg_from_fmt.o \ -psb_d_mv_hybg_from_fmt.o \ -psb_d_mv_hybg_from_coo.o \ -psb_d_hybg_mold.o \ -psb_z_cp_csrg_from_coo.o \ -psb_z_cp_csrg_from_fmt.o \ -psb_z_cp_elg_from_coo.o \ -psb_z_cp_elg_from_fmt.o \ -psb_c_cp_csrg_from_coo.o \ -psb_c_cp_csrg_from_fmt.o \ -psb_c_csrg_allocate_mnnz.o \ -psb_c_csrg_csmm.o \ -psb_c_csrg_csmv.o \ -psb_c_csrg_mold.o \ -psb_c_csrg_reallocate_nz.o \ -psb_c_csrg_scal.o \ -psb_c_csrg_scals.o \ -psb_c_csrg_from_gpu.o \ -psb_c_csrg_to_gpu.o \ -psb_c_csrg_vect_mv.o \ -psb_c_csrg_inner_vect_sv.o \ -psb_z_csrg_allocate_mnnz.o \ -psb_z_csrg_csmm.o \ -psb_z_csrg_csmv.o \ -psb_z_csrg_mold.o \ -psb_z_csrg_reallocate_nz.o \ -psb_z_csrg_scal.o \ -psb_z_csrg_scals.o \ -psb_z_csrg_from_gpu.o \ -psb_z_csrg_to_gpu.o \ -psb_z_csrg_vect_mv.o \ -psb_z_csrg_inner_vect_sv.o \ -psb_z_elg_allocate_mnnz.o \ -psb_z_elg_asb.o \ -psb_z_elg_csmm.o \ -psb_z_elg_csmv.o \ -psb_z_elg_csput.o \ -psb_z_elg_inner_vect_sv.o \ -psb_z_elg_mold.o \ -psb_z_elg_reallocate_nz.o \ -psb_z_elg_scal.o \ -psb_z_elg_scals.o \ -psb_z_elg_to_gpu.o \ -psb_z_elg_from_gpu.o \ -psb_z_elg_vect_mv.o \ -psb_z_mv_csrg_from_coo.o \ -psb_z_mv_csrg_from_fmt.o \ -psb_z_mv_elg_from_coo.o \ -psb_z_mv_elg_from_fmt.o \ -psb_c_mv_csrg_from_coo.o \ -psb_c_mv_csrg_from_fmt.o \ -psb_c_cp_elg_from_coo.o \ -psb_c_cp_elg_from_fmt.o \ -psb_c_elg_allocate_mnnz.o \ -psb_c_elg_asb.o \ -psb_c_elg_csmm.o \ -psb_c_elg_csmv.o \ -psb_c_elg_csput.o \ -psb_c_elg_inner_vect_sv.o \ -psb_c_elg_mold.o \ -psb_c_elg_reallocate_nz.o \ -psb_c_elg_scal.o \ -psb_c_elg_scals.o \ -psb_c_elg_to_gpu.o \ -psb_c_elg_from_gpu.o \ -psb_c_elg_vect_mv.o \ -psb_c_mv_elg_from_coo.o \ -psb_c_mv_elg_from_fmt.o \ -psb_c_cp_hlg_from_fmt.o \ -psb_c_cp_hlg_from_coo.o \ -psb_z_cp_hlg_from_fmt.o \ -psb_z_cp_hlg_from_coo.o \ -psb_z_hlg_allocate_mnnz.o \ -psb_z_hlg_csmm.o \ -psb_z_hlg_csmv.o \ -psb_z_hlg_inner_vect_sv.o \ -psb_z_hlg_mold.o \ -psb_z_hlg_reallocate_nz.o \ -psb_z_hlg_scal.o \ -psb_z_hlg_scals.o \ -psb_z_hlg_from_gpu.o \ -psb_z_hlg_to_gpu.o \ -psb_z_hlg_vect_mv.o \ -psb_c_hlg_allocate_mnnz.o \ -psb_c_hlg_csmm.o \ -psb_c_hlg_csmv.o \ -psb_c_hlg_inner_vect_sv.o \ -psb_c_hlg_mold.o \ -psb_c_hlg_reallocate_nz.o \ -psb_c_hlg_scal.o \ -psb_c_hlg_scals.o \ -psb_c_hlg_from_gpu.o \ -psb_c_hlg_to_gpu.o \ -psb_c_hlg_vect_mv.o \ -psb_c_mv_hlg_from_coo.o \ -psb_c_cp_hlg_from_coo.o \ -psb_c_mv_hlg_from_fmt.o \ -psb_z_mv_hlg_from_coo.o \ -psb_z_cp_hlg_from_coo.o \ -psb_z_mv_hlg_from_fmt.o \ -psb_c_hybg_allocate_mnnz.o \ -psb_c_hybg_csmm.o \ -psb_c_hybg_csmv.o \ -psb_c_hybg_reallocate_nz.o \ -psb_c_hybg_scal.o \ -psb_c_hybg_scals.o \ -psb_c_hybg_to_gpu.o \ -psb_c_hybg_vect_mv.o \ -psb_c_hybg_inner_vect_sv.o \ -psb_c_cp_hybg_from_coo.o \ -psb_c_cp_hybg_from_fmt.o \ -psb_c_mv_hybg_from_fmt.o \ -psb_c_mv_hybg_from_coo.o \ -psb_c_hybg_mold.o \ -psb_z_hybg_allocate_mnnz.o \ -psb_z_hybg_csmm.o \ -psb_z_hybg_csmv.o \ -psb_z_hybg_reallocate_nz.o \ -psb_z_hybg_scal.o \ -psb_z_hybg_scals.o \ -psb_z_hybg_to_gpu.o \ -psb_z_hybg_vect_mv.o \ -psb_z_hybg_inner_vect_sv.o \ -psb_z_cp_hybg_from_coo.o \ -psb_z_cp_hybg_from_fmt.o \ -psb_z_mv_hybg_from_fmt.o \ -psb_z_mv_hybg_from_coo.o \ -psb_z_hybg_mold.o \ -psb_d_cp_diag_from_coo.o \ -psb_d_mv_diag_from_coo.o \ -psb_d_diag_to_gpu.o \ -psb_d_diag_csmv.o \ -psb_d_diag_mold.o \ -psb_d_diag_vect_mv.o \ -psb_d_cp_hdiag_from_coo.o \ -psb_d_mv_hdiag_from_coo.o \ -psb_d_hdiag_to_gpu.o \ -psb_d_hdiag_csmv.o \ -psb_d_hdiag_mold.o \ -psb_d_hdiag_vect_mv.o \ -psb_s_cp_hdiag_from_coo.o \ -psb_s_mv_hdiag_from_coo.o \ -psb_s_hdiag_to_gpu.o \ -psb_s_hdiag_csmv.o \ -psb_s_hdiag_mold.o \ -psb_s_hdiag_vect_mv.o \ -psb_s_dnsg_mat_impl.o \ -psb_d_dnsg_mat_impl.o \ -psb_c_dnsg_mat_impl.o \ -psb_z_dnsg_mat_impl.o +psb_d_cuda_cp_csrg_from_coo.o \ +psb_d_cuda_cp_csrg_from_fmt.o \ +psb_d_cuda_cp_elg_from_coo.o \ +psb_d_cuda_cp_elg_from_fmt.o \ +psb_s_cuda_cp_csrg_from_coo.o \ +psb_s_cuda_cp_csrg_from_fmt.o \ +psb_s_cuda_csrg_allocate_mnnz.o \ +psb_s_cuda_csrg_csmm.o \ +psb_s_cuda_csrg_csmv.o \ +psb_s_cuda_csrg_mold.o \ +psb_s_cuda_csrg_reallocate_nz.o \ +psb_s_cuda_csrg_scal.o \ +psb_s_cuda_csrg_scals.o \ +psb_s_cuda_csrg_from_gpu.o \ +psb_s_cuda_csrg_to_gpu.o \ +psb_s_cuda_csrg_vect_mv.o \ +psb_s_cuda_csrg_inner_vect_sv.o \ +psb_d_cuda_csrg_allocate_mnnz.o \ +psb_d_cuda_csrg_csmm.o \ +psb_d_cuda_csrg_csmv.o \ +psb_d_cuda_csrg_mold.o \ +psb_d_cuda_csrg_reallocate_nz.o \ +psb_d_cuda_csrg_scal.o \ +psb_d_cuda_csrg_scals.o \ +psb_d_cuda_csrg_from_gpu.o \ +psb_d_cuda_csrg_to_gpu.o \ +psb_d_cuda_csrg_vect_mv.o \ +psb_d_cuda_csrg_inner_vect_sv.o \ +psb_d_cuda_elg_allocate_mnnz.o \ +psb_d_cuda_elg_asb.o \ +psb_d_cuda_elg_csmm.o \ +psb_d_cuda_elg_csmv.o \ +psb_d_cuda_elg_csput.o \ +psb_d_cuda_elg_from_gpu.o \ +psb_d_cuda_elg_inner_vect_sv.o \ +psb_d_cuda_elg_mold.o \ +psb_d_cuda_elg_reallocate_nz.o \ +psb_d_cuda_elg_scal.o \ +psb_d_cuda_elg_scals.o \ +psb_d_cuda_elg_to_gpu.o \ +psb_d_cuda_elg_vect_mv.o \ +psb_d_cuda_mv_csrg_from_coo.o \ +psb_d_cuda_mv_csrg_from_fmt.o \ +psb_d_cuda_mv_elg_from_coo.o \ +psb_d_cuda_mv_elg_from_fmt.o \ +psb_s_cuda_mv_csrg_from_coo.o \ +psb_s_cuda_mv_csrg_from_fmt.o \ +psb_s_cuda_cp_elg_from_coo.o \ +psb_s_cuda_cp_elg_from_fmt.o \ +psb_s_cuda_elg_allocate_mnnz.o \ +psb_s_cuda_elg_asb.o \ +psb_s_cuda_elg_csmm.o \ +psb_s_cuda_elg_csmv.o \ +psb_s_cuda_elg_csput.o \ +psb_s_cuda_elg_inner_vect_sv.o \ +psb_s_cuda_elg_mold.o \ +psb_s_cuda_elg_reallocate_nz.o \ +psb_s_cuda_elg_scal.o \ +psb_s_cuda_elg_scals.o \ +psb_s_cuda_elg_to_gpu.o \ +psb_s_cuda_elg_from_gpu.o \ +psb_s_cuda_elg_vect_mv.o \ +psb_s_cuda_mv_elg_from_coo.o \ +psb_s_cuda_mv_elg_from_fmt.o \ +psb_s_cuda_cp_hlg_from_fmt.o \ +psb_s_cuda_cp_hlg_from_coo.o \ +psb_d_cuda_cp_hlg_from_fmt.o \ +psb_d_cuda_cp_hlg_from_coo.o \ +psb_d_cuda_hlg_allocate_mnnz.o \ +psb_d_cuda_hlg_csmm.o \ +psb_d_cuda_hlg_csmv.o \ +psb_d_cuda_hlg_inner_vect_sv.o \ +psb_d_cuda_hlg_mold.o \ +psb_d_cuda_hlg_reallocate_nz.o \ +psb_d_cuda_hlg_scal.o \ +psb_d_cuda_hlg_scals.o \ +psb_d_cuda_hlg_from_gpu.o \ +psb_d_cuda_hlg_to_gpu.o \ +psb_d_cuda_hlg_vect_mv.o \ +psb_s_cuda_hlg_allocate_mnnz.o \ +psb_s_cuda_hlg_csmm.o \ +psb_s_cuda_hlg_csmv.o \ +psb_s_cuda_hlg_inner_vect_sv.o \ +psb_s_cuda_hlg_mold.o \ +psb_s_cuda_hlg_reallocate_nz.o \ +psb_s_cuda_hlg_scal.o \ +psb_s_cuda_hlg_scals.o \ +psb_s_cuda_hlg_from_gpu.o \ +psb_s_cuda_hlg_to_gpu.o \ +psb_s_cuda_hlg_vect_mv.o \ +psb_s_cuda_mv_hlg_from_coo.o \ +psb_s_cuda_cp_hlg_from_coo.o \ +psb_s_cuda_mv_hlg_from_fmt.o \ +psb_d_cuda_mv_hlg_from_coo.o \ +psb_d_cuda_cp_hlg_from_coo.o \ +psb_d_cuda_mv_hlg_from_fmt.o \ +psb_s_cuda_hybg_allocate_mnnz.o \ +psb_s_cuda_hybg_csmm.o \ +psb_s_cuda_hybg_csmv.o \ +psb_s_cuda_hybg_reallocate_nz.o \ +psb_s_cuda_hybg_scal.o \ +psb_s_cuda_hybg_scals.o \ +psb_s_cuda_hybg_to_gpu.o \ +psb_s_cuda_hybg_vect_mv.o \ +psb_s_cuda_hybg_inner_vect_sv.o \ +psb_s_cuda_cp_hybg_from_coo.o \ +psb_s_cuda_cp_hybg_from_fmt.o \ +psb_s_cuda_mv_hybg_from_fmt.o \ +psb_s_cuda_mv_hybg_from_coo.o \ +psb_s_cuda_hybg_mold.o \ +psb_d_cuda_hybg_allocate_mnnz.o \ +psb_d_cuda_hybg_csmm.o \ +psb_d_cuda_hybg_csmv.o \ +psb_d_cuda_hybg_reallocate_nz.o \ +psb_d_cuda_hybg_scal.o \ +psb_d_cuda_hybg_scals.o \ +psb_d_cuda_hybg_to_gpu.o \ +psb_d_cuda_hybg_vect_mv.o \ +psb_d_cuda_hybg_inner_vect_sv.o \ +psb_d_cuda_cp_hybg_from_coo.o \ +psb_d_cuda_cp_hybg_from_fmt.o \ +psb_d_cuda_mv_hybg_from_fmt.o \ +psb_d_cuda_mv_hybg_from_coo.o \ +psb_d_cuda_hybg_mold.o \ +psb_z_cuda_cp_csrg_from_coo.o \ +psb_z_cuda_cp_csrg_from_fmt.o \ +psb_z_cuda_cp_elg_from_coo.o \ +psb_z_cuda_cp_elg_from_fmt.o \ +psb_c_cuda_cp_csrg_from_coo.o \ +psb_c_cuda_cp_csrg_from_fmt.o \ +psb_c_cuda_csrg_allocate_mnnz.o \ +psb_c_cuda_csrg_csmm.o \ +psb_c_cuda_csrg_csmv.o \ +psb_c_cuda_csrg_mold.o \ +psb_c_cuda_csrg_reallocate_nz.o \ +psb_c_cuda_csrg_scal.o \ +psb_c_cuda_csrg_scals.o \ +psb_c_cuda_csrg_from_gpu.o \ +psb_c_cuda_csrg_to_gpu.o \ +psb_c_cuda_csrg_vect_mv.o \ +psb_c_cuda_csrg_inner_vect_sv.o \ +psb_z_cuda_csrg_allocate_mnnz.o \ +psb_z_cuda_csrg_csmm.o \ +psb_z_cuda_csrg_csmv.o \ +psb_z_cuda_csrg_mold.o \ +psb_z_cuda_csrg_reallocate_nz.o \ +psb_z_cuda_csrg_scal.o \ +psb_z_cuda_csrg_scals.o \ +psb_z_cuda_csrg_from_gpu.o \ +psb_z_cuda_csrg_to_gpu.o \ +psb_z_cuda_csrg_vect_mv.o \ +psb_z_cuda_csrg_inner_vect_sv.o \ +psb_z_cuda_elg_allocate_mnnz.o \ +psb_z_cuda_elg_asb.o \ +psb_z_cuda_elg_csmm.o \ +psb_z_cuda_elg_csmv.o \ +psb_z_cuda_elg_csput.o \ +psb_z_cuda_elg_inner_vect_sv.o \ +psb_z_cuda_elg_mold.o \ +psb_z_cuda_elg_reallocate_nz.o \ +psb_z_cuda_elg_scal.o \ +psb_z_cuda_elg_scals.o \ +psb_z_cuda_elg_to_gpu.o \ +psb_z_cuda_elg_from_gpu.o \ +psb_z_cuda_elg_vect_mv.o \ +psb_z_cuda_mv_csrg_from_coo.o \ +psb_z_cuda_mv_csrg_from_fmt.o \ +psb_z_cuda_mv_elg_from_coo.o \ +psb_z_cuda_mv_elg_from_fmt.o \ +psb_c_cuda_mv_csrg_from_coo.o \ +psb_c_cuda_mv_csrg_from_fmt.o \ +psb_c_cuda_cp_elg_from_coo.o \ +psb_c_cuda_cp_elg_from_fmt.o \ +psb_c_cuda_elg_allocate_mnnz.o \ +psb_c_cuda_elg_asb.o \ +psb_c_cuda_elg_csmm.o \ +psb_c_cuda_elg_csmv.o \ +psb_c_cuda_elg_csput.o \ +psb_c_cuda_elg_inner_vect_sv.o \ +psb_c_cuda_elg_mold.o \ +psb_c_cuda_elg_reallocate_nz.o \ +psb_c_cuda_elg_scal.o \ +psb_c_cuda_elg_scals.o \ +psb_c_cuda_elg_to_gpu.o \ +psb_c_cuda_elg_from_gpu.o \ +psb_c_cuda_elg_vect_mv.o \ +psb_c_cuda_mv_elg_from_coo.o \ +psb_c_cuda_mv_elg_from_fmt.o \ +psb_c_cuda_cp_hlg_from_fmt.o \ +psb_c_cuda_cp_hlg_from_coo.o \ +psb_z_cuda_cp_hlg_from_fmt.o \ +psb_z_cuda_cp_hlg_from_coo.o \ +psb_z_cuda_hlg_allocate_mnnz.o \ +psb_z_cuda_hlg_csmm.o \ +psb_z_cuda_hlg_csmv.o \ +psb_z_cuda_hlg_inner_vect_sv.o \ +psb_z_cuda_hlg_mold.o \ +psb_z_cuda_hlg_reallocate_nz.o \ +psb_z_cuda_hlg_scal.o \ +psb_z_cuda_hlg_scals.o \ +psb_z_cuda_hlg_from_gpu.o \ +psb_z_cuda_hlg_to_gpu.o \ +psb_z_cuda_hlg_vect_mv.o \ +psb_c_cuda_hlg_allocate_mnnz.o \ +psb_c_cuda_hlg_csmm.o \ +psb_c_cuda_hlg_csmv.o \ +psb_c_cuda_hlg_inner_vect_sv.o \ +psb_c_cuda_hlg_mold.o \ +psb_c_cuda_hlg_reallocate_nz.o \ +psb_c_cuda_hlg_scal.o \ +psb_c_cuda_hlg_scals.o \ +psb_c_cuda_hlg_from_gpu.o \ +psb_c_cuda_hlg_to_gpu.o \ +psb_c_cuda_hlg_vect_mv.o \ +psb_c_cuda_mv_hlg_from_coo.o \ +psb_c_cuda_cp_hlg_from_coo.o \ +psb_c_cuda_mv_hlg_from_fmt.o \ +psb_z_cuda_mv_hlg_from_coo.o \ +psb_z_cuda_cp_hlg_from_coo.o \ +psb_z_cuda_mv_hlg_from_fmt.o \ +psb_c_cuda_hybg_allocate_mnnz.o \ +psb_c_cuda_hybg_csmm.o \ +psb_c_cuda_hybg_csmv.o \ +psb_c_cuda_hybg_reallocate_nz.o \ +psb_c_cuda_hybg_scal.o \ +psb_c_cuda_hybg_scals.o \ +psb_c_cuda_hybg_to_gpu.o \ +psb_c_cuda_hybg_vect_mv.o \ +psb_c_cuda_hybg_inner_vect_sv.o \ +psb_c_cuda_cp_hybg_from_coo.o \ +psb_c_cuda_cp_hybg_from_fmt.o \ +psb_c_cuda_mv_hybg_from_fmt.o \ +psb_c_cuda_mv_hybg_from_coo.o \ +psb_c_cuda_hybg_mold.o \ +psb_z_cuda_hybg_allocate_mnnz.o \ +psb_z_cuda_hybg_csmm.o \ +psb_z_cuda_hybg_csmv.o \ +psb_z_cuda_hybg_reallocate_nz.o \ +psb_z_cuda_hybg_scal.o \ +psb_z_cuda_hybg_scals.o \ +psb_z_cuda_hybg_to_gpu.o \ +psb_z_cuda_hybg_vect_mv.o \ +psb_z_cuda_hybg_inner_vect_sv.o \ +psb_z_cuda_cp_hybg_from_coo.o \ +psb_z_cuda_cp_hybg_from_fmt.o \ +psb_z_cuda_mv_hybg_from_fmt.o \ +psb_z_cuda_mv_hybg_from_coo.o \ +psb_z_cuda_hybg_mold.o \ +psb_d_cuda_cp_diag_from_coo.o \ +psb_d_cuda_mv_diag_from_coo.o \ +psb_d_cuda_diag_to_gpu.o \ +psb_d_cuda_diag_csmv.o \ +psb_d_cuda_diag_mold.o \ +psb_d_cuda_diag_vect_mv.o \ +psb_d_cuda_cp_hdiag_from_coo.o \ +psb_d_cuda_mv_hdiag_from_coo.o \ +psb_d_cuda_hdiag_to_gpu.o \ +psb_d_cuda_hdiag_csmv.o \ +psb_d_cuda_hdiag_mold.o \ +psb_d_cuda_hdiag_vect_mv.o \ +psb_s_cuda_cp_hdiag_from_coo.o \ +psb_s_cuda_mv_hdiag_from_coo.o \ +psb_s_cuda_hdiag_to_gpu.o \ +psb_s_cuda_hdiag_csmv.o \ +psb_s_cuda_hdiag_mold.o \ +psb_s_cuda_hdiag_vect_mv.o \ +psb_s_cuda_dnsg_mat_impl.o \ +psb_d_cuda_dnsg_mat_impl.o \ +psb_c_cuda_dnsg_mat_impl.o \ +psb_z_cuda_dnsg_mat_impl.o objs: $(OBJS) diff --git a/cuda/impl/psb_c_cp_csrg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_csrg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_c_cp_csrg_from_coo.F90 rename to cuda/impl/psb_c_cuda_cp_csrg_from_coo.F90 index 9ab3b7f0..af3301ff 100644 --- a/cuda/impl/psb_c_cp_csrg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_csrg_from_coo.F90 @@ -29,18 +29,18 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_c_cp_csrg_from_coo(a,b,info) +subroutine psb_c_cuda_cp_csrg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_cp_csrg_from_coo + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_cp_csrg_from_coo #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -59,4 +59,4 @@ subroutine psb_c_cp_csrg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_cp_csrg_from_coo +end subroutine psb_c_cuda_cp_csrg_from_coo diff --git a/cuda/impl/psb_c_cp_csrg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_c_cp_csrg_from_fmt.F90 rename to cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 index 5229244f..47845e52 100644 --- a/cuda/impl/psb_c_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 @@ -29,19 +29,19 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_c_cp_csrg_from_fmt(a,b,info) +subroutine psb_c_cuda_cp_csrg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_cp_csrg_from_fmt + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_cp_csrg_from_fmt #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif !use iso_c_binding implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_c_cp_csrg_from_fmt(a,b,info) #endif end select -end subroutine psb_c_cp_csrg_from_fmt +end subroutine psb_c_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_c_cp_diag_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_diag_from_coo.F90 similarity index 89% rename from cuda/impl/psb_c_cp_diag_from_coo.F90 rename to cuda/impl/psb_c_cuda_cp_diag_from_coo.F90 index 8d196891..5b1eb817 100644 --- a/cuda/impl/psb_c_cp_diag_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_diag_from_coo.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_cp_diag_from_coo(a,b,info) +subroutine psb_c_cuda_cp_diag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_c_diag_mat_mod, psb_protect_name => psb_c_cp_diag_from_coo + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_cp_diag_from_coo #else - use psb_c_diag_mat_mod + use psb_c_cuda_diag_mat_mod #endif implicit none - class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -61,4 +61,4 @@ subroutine psb_c_cp_diag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_cp_diag_from_coo +end subroutine psb_c_cuda_cp_diag_from_coo diff --git a/cuda/impl/psb_c_cp_elg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_elg_from_coo.F90 similarity index 94% rename from cuda/impl/psb_c_cp_elg_from_coo.F90 rename to cuda/impl/psb_c_cuda_cp_elg_from_coo.F90 index 95193c13..fedffa22 100644 --- a/cuda/impl/psb_c_cp_elg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_elg_from_coo.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_c_cp_elg_from_coo(a,b,info) +subroutine psb_c_cuda_cp_elg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_cp_elg_from_coo + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_cp_elg_from_coo use psi_ext_util_mod - use psb_gpu_env_mod + use psb_cuda_env_mod #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -58,7 +58,7 @@ subroutine psb_c_cp_elg_from_coo(a,b,info) info = psb_success_ #ifdef HAVE_SPGPU - hacksize = max(1,psb_gpu_WarpSize()) + hacksize = max(1,psb_cuda_WarpSize()) #else hacksize = 1 #endif @@ -181,4 +181,4 @@ contains end subroutine psi_c_count_ell_from_coo -end subroutine psb_c_cp_elg_from_coo +end subroutine psb_c_cuda_cp_elg_from_coo diff --git a/cuda/impl/psb_c_cp_elg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_elg_from_fmt.F90 similarity index 93% rename from cuda/impl/psb_c_cp_elg_from_fmt.F90 rename to cuda/impl/psb_c_cuda_cp_elg_from_fmt.F90 index e8be8a8d..4c44d29a 100644 --- a/cuda/impl/psb_c_cp_elg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_cp_elg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_cp_elg_from_fmt(a,b,info) +subroutine psb_c_cuda_cp_elg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_cp_elg_from_fmt + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_cp_elg_from_fmt #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -98,4 +98,4 @@ subroutine psb_c_cp_elg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_c_cp_elg_from_fmt +end subroutine psb_c_cuda_cp_elg_from_fmt diff --git a/cuda/impl/psb_c_cp_hdiag_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_hdiag_from_coo.F90 similarity index 87% rename from cuda/impl/psb_c_cp_hdiag_from_coo.F90 rename to cuda/impl/psb_c_cuda_cp_hdiag_from_coo.F90 index f0ec00ad..436eabaa 100644 --- a/cuda/impl/psb_c_cp_hdiag_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_hdiag_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_c_cp_hdiag_from_coo(a,b,info) +subroutine psb_c_cuda_cp_hdiag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_cp_hdiag_from_coo - use psb_gpu_env_mod + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_cp_hdiag_from_coo + use psb_cuda_env_mod #else - use psb_c_hdiag_mat_mod + use psb_c_cuda_hdiag_mat_mod #endif implicit none - class(psb_c_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -54,7 +54,7 @@ subroutine psb_c_cp_hdiag_from_coo(a,b,info) info = psb_success_ #ifdef HAVE_SPGPU - a%hacksize = psb_gpu_WarpSize() + a%hacksize = psb_cuda_WarpSize() #endif call a%psb_c_hdia_sparse_mat%cp_from_coo(b,info) @@ -70,4 +70,4 @@ subroutine psb_c_cp_hdiag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_cp_hdiag_from_coo +end subroutine psb_c_cuda_cp_hdiag_from_coo diff --git a/cuda/impl/psb_c_cp_hlg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_hlg_from_coo.F90 similarity index 95% rename from cuda/impl/psb_c_cp_hlg_from_coo.F90 rename to cuda/impl/psb_c_cuda_cp_hlg_from_coo.F90 index cf305592..d30fccbf 100644 --- a/cuda/impl/psb_c_cp_hlg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_hlg_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_c_cp_hlg_from_coo(a,b,info) +subroutine psb_c_cuda_cp_hlg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_gpu_env_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_cp_hlg_from_coo + use psb_cuda_env_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_cp_hlg_from_coo #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -62,7 +62,7 @@ subroutine psb_c_cp_hlg_from_coo(a,b,info) debug_unit = psb_get_debug_unit() debug_level = psb_get_debug_level() #ifdef HAVE_SPGPU - hksz = max(1,psb_gpu_WarpSize()) + hksz = max(1,psb_cuda_WarpSize()) #else hksz = psi_get_hksz() #endif @@ -195,4 +195,4 @@ contains !!$ write(*,*) 'End of psi_comput_hckoff ',info end subroutine psi_compute_hckoff_from_coo -end subroutine psb_c_cp_hlg_from_coo +end subroutine psb_c_cuda_cp_hlg_from_coo diff --git a/cuda/impl/psb_c_cp_hlg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_hlg_from_fmt.F90 similarity index 90% rename from cuda/impl/psb_c_cp_hlg_from_fmt.F90 rename to cuda/impl/psb_c_cuda_cp_hlg_from_fmt.F90 index 559c501c..259364cd 100644 --- a/cuda/impl/psb_c_cp_hlg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_cp_hlg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_cp_hlg_from_fmt(a,b,info) +subroutine psb_c_cuda_cp_hlg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_cp_hlg_from_fmt + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_cp_hlg_from_fmt #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -65,4 +65,4 @@ subroutine psb_c_cp_hlg_from_fmt(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_cp_hlg_from_fmt +end subroutine psb_c_cuda_cp_hlg_from_fmt diff --git a/cuda/impl/psb_c_cp_hybg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_hybg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_c_cp_hybg_from_coo.F90 rename to cuda/impl/psb_c_cuda_cp_hybg_from_coo.F90 index 00a7d4ee..7ebb5240 100644 --- a/cuda/impl/psb_c_cp_hybg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_hybg_from_coo.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_cp_hybg_from_coo(a,b,info) +subroutine psb_c_cuda_cp_hybg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_cp_hybg_from_coo + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_cp_hybg_from_coo #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -60,5 +60,5 @@ subroutine psb_c_cp_hybg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_cp_hybg_from_coo +end subroutine psb_c_cuda_cp_hybg_from_coo #endif diff --git a/cuda/impl/psb_c_cp_hybg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_hybg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_c_cp_hybg_from_fmt.F90 rename to cuda/impl/psb_c_cuda_cp_hybg_from_fmt.F90 index 643abf99..033ba966 100644 --- a/cuda/impl/psb_c_cp_hybg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_cp_hybg_from_fmt.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_cp_hybg_from_fmt(a,b,info) +subroutine psb_c_cuda_cp_hybg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_cp_hybg_from_fmt + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_cp_hybg_from_fmt #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,5 +58,5 @@ subroutine psb_c_cp_hybg_from_fmt(a,b,info) #endif end select -end subroutine psb_c_cp_hybg_from_fmt +end subroutine psb_c_cuda_cp_hybg_from_fmt #endif diff --git a/cuda/impl/psb_c_csrg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_csrg_allocate_mnnz.F90 similarity index 89% rename from cuda/impl/psb_c_csrg_allocate_mnnz.F90 rename to cuda/impl/psb_c_cuda_csrg_allocate_mnnz.F90 index 2183ee63..d9736d23 100644 --- a/cuda/impl/psb_c_csrg_allocate_mnnz.F90 +++ b/cuda/impl/psb_c_cuda_csrg_allocate_mnnz.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_csrg_allocate_mnnz(m,n,a,nz) +subroutine psb_c_cuda_csrg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_allocate_mnnz + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_allocate_mnnz #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -65,4 +65,4 @@ subroutine psb_c_csrg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_c_csrg_allocate_mnnz +end subroutine psb_c_cuda_csrg_allocate_mnnz diff --git a/cuda/impl/psb_c_csrg_csmm.F90 b/cuda/impl/psb_c_cuda_csrg_csmm.F90 similarity index 94% rename from cuda/impl/psb_c_csrg_csmm.F90 rename to cuda/impl/psb_c_cuda_csrg_csmm.F90 index cef5d288..8f2f55b9 100644 --- a/cuda/impl/psb_c_csrg_csmm.F90 +++ b/cuda/impl/psb_c_cuda_csrg_csmm.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_csrg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_csmm + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_csmm #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) complex(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -131,4 +131,4 @@ subroutine psb_c_csrg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_csrg_csmm +end subroutine psb_c_cuda_csrg_csmm diff --git a/cuda/impl/psb_c_csrg_csmv.F90 b/cuda/impl/psb_c_cuda_csrg_csmv.F90 similarity index 93% rename from cuda/impl/psb_c_csrg_csmv.F90 rename to cuda/impl/psb_c_cuda_csrg_csmv.F90 index 3a543da3..ba681401 100644 --- a/cuda/impl/psb_c_csrg_csmv.F90 +++ b/cuda/impl/psb_c_cuda_csrg_csmv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_csrg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_csmv + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_csmv #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -55,7 +55,7 @@ subroutine psb_c_csrg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_csrg_csmv' + character(len=20) :: name='c_cuda_csrg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -136,4 +136,4 @@ subroutine psb_c_csrg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_csrg_csmv +end subroutine psb_c_cuda_csrg_csmv diff --git a/cuda/impl/psb_c_csrg_from_gpu.F90 b/cuda/impl/psb_c_cuda_csrg_from_gpu.F90 similarity index 91% rename from cuda/impl/psb_c_csrg_from_gpu.F90 rename to cuda/impl/psb_c_cuda_csrg_from_gpu.F90 index 606606bb..b1bed7e5 100644 --- a/cuda/impl/psb_c_csrg_from_gpu.F90 +++ b/cuda/impl/psb_c_cuda_csrg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_csrg_from_gpu(a,info) +subroutine psb_c_cuda_csrg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_from_gpu + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_from_gpu #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: m, n, nz @@ -70,4 +70,4 @@ subroutine psb_c_csrg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_c_csrg_from_gpu +end subroutine psb_c_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_c_csrg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_csrg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_c_csrg_inner_vect_sv.F90 rename to cuda/impl/psb_c_cuda_csrg_inner_vect_sv.F90 index 39938752..32dec5ef 100644 --- a/cuda/impl/psb_c_csrg_inner_vect_sv.F90 +++ b/cuda/impl/psb_c_cuda_csrg_inner_vect_sv.F90 @@ -29,19 +29,19 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_c_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_inner_vect_sv + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_inner_vect_sv #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -51,7 +51,7 @@ subroutine psb_c_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ integer(psb_ipk_) :: err_act - character(len=20) :: name='c_csrg_inner_vect_sv' + character(len=20) :: name='c_cuda_csrg_inner_vect_sv' logical, parameter :: debug=.false. call psb_get_erraction(err_act) @@ -83,9 +83,9 @@ subroutine psb_c_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%set_host() else select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -133,4 +133,4 @@ subroutine psb_c_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_csrg_inner_vect_sv +end subroutine psb_c_cuda_csrg_inner_vect_sv diff --git a/cuda/impl/psb_c_csrg_mold.F90 b/cuda/impl/psb_c_cuda_csrg_mold.F90 similarity index 88% rename from cuda/impl/psb_c_csrg_mold.F90 rename to cuda/impl/psb_c_cuda_csrg_mold.F90 index 8b1b616a..405f2736 100644 --- a/cuda/impl/psb_c_csrg_mold.F90 +++ b/cuda/impl/psb_c_cuda_csrg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_c_csrg_mold(a,b,info) +subroutine psb_c_cuda_csrg_mold(a,b,info) use psb_base_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_mold + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_mold implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_c_csrg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_c_csrg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_c_cuda_csrg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_c_csrg_mold(a,b,info) return -end subroutine psb_c_csrg_mold +end subroutine psb_c_cuda_csrg_mold diff --git a/cuda/impl/psb_c_csrg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_csrg_reallocate_nz.F90 similarity index 87% rename from cuda/impl/psb_c_csrg_reallocate_nz.F90 rename to cuda/impl/psb_c_cuda_csrg_reallocate_nz.F90 index e9db4128..22f9f118 100644 --- a/cuda/impl/psb_c_csrg_reallocate_nz.F90 +++ b/cuda/impl/psb_c_cuda_csrg_reallocate_nz.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_c_csrg_reallocate_nz(nz,a) +subroutine psb_c_cuda_csrg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_reallocate_nz + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_reallocate_nz #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='c_csrg_reallocate_nz' + character(len=20) :: name='c_cuda_csrg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -67,4 +67,4 @@ subroutine psb_c_csrg_reallocate_nz(nz,a) return -end subroutine psb_c_csrg_reallocate_nz +end subroutine psb_c_cuda_csrg_reallocate_nz diff --git a/cuda/impl/psb_c_csrg_scal.F90 b/cuda/impl/psb_c_cuda_csrg_scal.F90 similarity index 90% rename from cuda/impl/psb_c_csrg_scal.F90 rename to cuda/impl/psb_c_cuda_csrg_scal.F90 index f183a822..556a0ec5 100644 --- a/cuda/impl/psb_c_csrg_scal.F90 +++ b/cuda/impl/psb_c_cuda_csrg_scal.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_c_csrg_scal(d,a,info,side) +subroutine psb_c_cuda_csrg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_scal + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_scal #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -70,4 +70,4 @@ subroutine psb_c_csrg_scal(d,a,info,side) return -end subroutine psb_c_csrg_scal +end subroutine psb_c_cuda_csrg_scal diff --git a/cuda/impl/psb_c_csrg_scals.F90 b/cuda/impl/psb_c_cuda_csrg_scals.F90 similarity index 90% rename from cuda/impl/psb_c_csrg_scals.F90 rename to cuda/impl/psb_c_cuda_csrg_scals.F90 index 13f0d707..a67e91cd 100644 --- a/cuda/impl/psb_c_csrg_scals.F90 +++ b/cuda/impl/psb_c_cuda_csrg_scals.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_c_csrg_scals(d,a,info) +subroutine psb_c_cuda_csrg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_scals + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_scals #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -68,4 +68,4 @@ subroutine psb_c_csrg_scals(d,a,info) return -end subroutine psb_c_csrg_scals +end subroutine psb_c_cuda_csrg_scals diff --git a/cuda/impl/psb_c_csrg_to_gpu.F90 b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 similarity index 98% rename from cuda/impl/psb_c_csrg_to_gpu.F90 rename to cuda/impl/psb_c_cuda_csrg_to_gpu.F90 index a04f1bab..ea710cbc 100644 --- a/cuda/impl/psb_c_csrg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_c_csrg_to_gpu(a,info,nzrm) +subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_to_gpu + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_to_gpu #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -322,4 +322,4 @@ subroutine psb_c_csrg_to_gpu(a,info,nzrm) end if #endif -end subroutine psb_c_csrg_to_gpu +end subroutine psb_c_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_c_csrg_vect_mv.F90 b/cuda/impl/psb_c_cuda_csrg_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_c_csrg_vect_mv.F90 rename to cuda/impl/psb_c_cuda_csrg_vect_mv.F90 index 0feb03fd..cb556d20 100644 --- a/cuda/impl/psb_c_csrg_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_csrg_vect_mv.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_c_csrg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_csrg_vect_mv + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_vect_mv #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y @@ -54,7 +54,7 @@ subroutine psb_c_csrg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_csrg_vect_mv' + character(len=20) :: name='c_cuda_csrg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_c_csrg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= czero) then if (yy%is_host()) call yy%sync() @@ -122,4 +122,4 @@ subroutine psb_c_csrg_vect_mv(alpha,a,x,beta,y,info,trans) 9999 call psb_error_handler(err_act) return -end subroutine psb_c_csrg_vect_mv +end subroutine psb_c_cuda_csrg_vect_mv diff --git a/cuda/impl/psb_c_diag_csmv.F90 b/cuda/impl/psb_c_cuda_diag_csmv.F90 similarity index 92% rename from cuda/impl/psb_c_diag_csmv.F90 rename to cuda/impl/psb_c_cuda_diag_csmv.F90 index 05ca102f..00ab742d 100644 --- a/cuda/impl/psb_c_diag_csmv.F90 +++ b/cuda/impl/psb_c_cuda_diag_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_diag_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_c_diag_mat_mod, psb_protect_name => psb_c_diag_csmv + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_csmv #else - use psb_c_diag_mat_mod + use psb_c_cuda_diag_mat_mod #endif implicit none - class(psb_c_diag_sparse_mat), intent(in) :: a + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_c_diag_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='c_diag_csmv' + character(len=20) :: name='c_cuda_diag_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -133,4 +133,4 @@ subroutine psb_c_diag_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_diag_csmv +end subroutine psb_c_cuda_diag_csmv diff --git a/cuda/impl/psb_c_diag_mold.F90 b/cuda/impl/psb_c_cuda_diag_mold.F90 similarity index 88% rename from cuda/impl/psb_c_diag_mold.F90 rename to cuda/impl/psb_c_cuda_diag_mold.F90 index 8d79e78d..4c8a3c56 100644 --- a/cuda/impl/psb_c_diag_mold.F90 +++ b/cuda/impl/psb_c_cuda_diag_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_c_diag_mold(a,b,info) +subroutine psb_c_cuda_diag_mold(a,b,info) use psb_base_mod - use psb_c_diag_mat_mod, psb_protect_name => psb_c_diag_mold + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_mold implicit none - class(psb_c_diag_sparse_mat), intent(in) :: a + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_c_diag_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_c_diag_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_c_cuda_diag_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_c_diag_mold(a,b,info) return -end subroutine psb_c_diag_mold +end subroutine psb_c_cuda_diag_mold diff --git a/cuda/impl/psb_c_diag_to_gpu.F90 b/cuda/impl/psb_c_cuda_diag_to_gpu.F90 similarity index 91% rename from cuda/impl/psb_c_diag_to_gpu.F90 rename to cuda/impl/psb_c_cuda_diag_to_gpu.F90 index a60fc741..4f2c21d9 100644 --- a/cuda/impl/psb_c_diag_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_diag_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_diag_to_gpu(a,info,nzrm) +subroutine psb_c_cuda_diag_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_c_diag_mat_mod, psb_protect_name => psb_c_diag_to_gpu + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_to_gpu #else - use psb_c_diag_mat_mod + use psb_c_cuda_diag_mat_mod #endif use iso_c_binding implicit none - class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -71,4 +71,4 @@ subroutine psb_c_diag_to_gpu(a,info,nzrm) ! if (info /= 0) goto 9999 #endif -end subroutine psb_c_diag_to_gpu +end subroutine psb_c_cuda_diag_to_gpu diff --git a/cuda/impl/psb_c_diag_vect_mv.F90 b/cuda/impl/psb_c_cuda_diag_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_c_diag_vect_mv.F90 rename to cuda/impl/psb_c_cuda_diag_vect_mv.F90 index e680a737..02bb9587 100644 --- a/cuda/impl/psb_c_diag_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_diag_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_diag_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_c_diag_mat_mod, psb_protect_name => psb_c_diag_vect_mv + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_vect_mv #else - use psb_c_diag_mat_mod + use psb_c_cuda_diag_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_diag_sparse_mat), intent(in) :: a + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_c_diag_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_diag_vect_mv' + character(len=20) :: name='c_cuda_diag_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -82,9 +82,9 @@ subroutine psb_c_diag_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,4 +123,4 @@ subroutine psb_c_diag_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_diag_vect_mv +end subroutine psb_c_cuda_diag_vect_mv diff --git a/cuda/impl/psb_c_dnsg_mat_impl.F90 b/cuda/impl/psb_c_cuda_dnsg_mat_impl.F90 similarity index 77% rename from cuda/impl/psb_c_dnsg_mat_impl.F90 rename to cuda/impl/psb_c_cuda_dnsg_mat_impl.F90 index b70f383a..bb2ec97b 100644 --- a/cuda/impl/psb_c_dnsg_mat_impl.F90 +++ b/cuda/impl/psb_c_cuda_dnsg_mat_impl.F90 @@ -29,18 +29,18 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_c_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod - use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_dnsg_vect_mv + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_vect_mv #else - use psb_c_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod #endif implicit none - class(psb_c_dnsg_sparse_mat), intent(in) :: a + class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y @@ -50,7 +50,7 @@ subroutine psb_c_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) character :: trans_ complex(psb_spk_), allocatable :: rx(:), ry(:) Integer(Psb_ipk_) :: err_act, m, n, k - character(len=20) :: name='c_dnsg_vect_mv' + character(len=20) :: name='c_cuda_dnsg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -76,9 +76,9 @@ subroutine psb_c_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) k = a%get_nrows() end if select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (a%is_host()) call a%sync() if (xx%is_host()) call xx%sync() if (beta /= czero) then @@ -117,21 +117,21 @@ subroutine psb_c_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_dnsg_vect_mv +end subroutine psb_c_cuda_dnsg_vect_mv -subroutine psb_c_dnsg_mold(a,b,info) +subroutine psb_c_cuda_dnsg_mold(a,b,info) use psb_base_mod - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod - use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_dnsg_mold + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_mold #else - use psb_c_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod #endif implicit none - class(psb_c_dnsg_sparse_mat), intent(in) :: a + class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -145,7 +145,7 @@ subroutine psb_c_dnsg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_c_dnsg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_c_cuda_dnsg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -158,54 +158,54 @@ subroutine psb_c_dnsg_mold(a,b,info) return -end subroutine psb_c_dnsg_mold +end subroutine psb_c_cuda_dnsg_mold !!$ !!$ interface -!!$ subroutine psb_c_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_c_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type -!!$ class(psb_c_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_c_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a !!$ complex(psb_spk_), intent(in) :: alpha, beta !!$ class(psb_c_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_c_dnsg_inner_vect_sv +!!$ end subroutine psb_c_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_c_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_c_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_c_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_c_dnsg_reallocate_nz +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_c_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_c_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_c_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_c_dnsg_allocate_mnnz +!!$ end subroutine psb_c_cuda_dnsg_allocate_mnnz !!$ end interface -subroutine psb_c_dnsg_to_gpu(a,info) +subroutine psb_c_cuda_dnsg_to_gpu(a,info) use psb_base_mod - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod - use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_dnsg_to_gpu + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_to_gpu #else - use psb_c_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod #endif - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act, pitch, lda logical, parameter :: debug=.false. - character(len=20) :: name='c_dnsg_to_gpu' + character(len=20) :: name='c_cuda_dnsg_to_gpu' call psb_erractionsave(err_act) info = psb_success_ @@ -226,27 +226,27 @@ subroutine psb_c_dnsg_to_gpu(a,info) return -end subroutine psb_c_dnsg_to_gpu +end subroutine psb_c_cuda_dnsg_to_gpu -subroutine psb_c_cp_dnsg_from_coo(a,b,info) +subroutine psb_c_cuda_cp_dnsg_from_coo(a,b,info) use psb_base_mod - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod - use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_cp_dnsg_from_coo + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_cp_dnsg_from_coo #else - use psb_c_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod #endif implicit none - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_dnsg_cp_from_coo' + character(len=20) :: name='c_cuda_dnsg_cp_from_coo' integer(psb_ipk_) :: debug_level, debug_unit logical, parameter :: debug=.false. type(psb_c_coo_sparse_mat) :: tmp @@ -267,27 +267,27 @@ subroutine psb_c_cp_dnsg_from_coo(a,b,info) return -end subroutine psb_c_cp_dnsg_from_coo +end subroutine psb_c_cuda_cp_dnsg_from_coo -subroutine psb_c_cp_dnsg_from_fmt(a,b,info) +subroutine psb_c_cuda_cp_dnsg_from_fmt(a,b,info) use psb_base_mod - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod - use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_cp_dnsg_from_fmt + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_cp_dnsg_from_fmt #else - use psb_c_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod #endif implicit none - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info type(psb_c_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_dnsg_cp_from_fmt' + character(len=20) :: name='c_cuda_dnsg_cp_from_fmt' call psb_erractionsave(err_act) info = psb_success_ @@ -341,29 +341,29 @@ subroutine psb_c_cp_dnsg_from_fmt(a,b,info) return -end subroutine psb_c_cp_dnsg_from_fmt +end subroutine psb_c_cuda_cp_dnsg_from_fmt -subroutine psb_c_mv_dnsg_from_coo(a,b,info) +subroutine psb_c_cuda_mv_dnsg_from_coo(a,b,info) use psb_base_mod - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod - use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_mv_dnsg_from_coo + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_mv_dnsg_from_coo #else - use psb_c_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod #endif implicit none - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act logical, parameter :: debug=.false. - character(len=20) :: name='c_dnsg_mv_from_coo' + character(len=20) :: name='c_cuda_dnsg_mv_from_coo' call psb_erractionsave(err_act) info = psb_success_ @@ -382,28 +382,28 @@ subroutine psb_c_mv_dnsg_from_coo(a,b,info) return -end subroutine psb_c_mv_dnsg_from_coo +end subroutine psb_c_cuda_mv_dnsg_from_coo -subroutine psb_c_mv_dnsg_from_fmt(a,b,info) +subroutine psb_c_cuda_mv_dnsg_from_fmt(a,b,info) use psb_base_mod - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod - use psb_c_dnsg_mat_mod, psb_protect_name => psb_c_mv_dnsg_from_fmt + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_mv_dnsg_from_fmt #else - use psb_c_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod #endif implicit none - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info type(psb_c_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_dnsg_cp_from_fmt' + character(len=20) :: name='c_cuda_dnsg_cp_from_fmt' call psb_erractionsave(err_act) info = psb_success_ @@ -458,4 +458,4 @@ subroutine psb_c_mv_dnsg_from_fmt(a,b,info) return -end subroutine psb_c_mv_dnsg_from_fmt +end subroutine psb_c_cuda_mv_dnsg_from_fmt diff --git a/cuda/impl/psb_c_elg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_elg_allocate_mnnz.F90 similarity index 93% rename from cuda/impl/psb_c_elg_allocate_mnnz.F90 rename to cuda/impl/psb_c_cuda_elg_allocate_mnnz.F90 index ac9e654f..01ca7189 100644 --- a/cuda/impl/psb_c_elg_allocate_mnnz.F90 +++ b/cuda/impl/psb_c_cuda_elg_allocate_mnnz.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_elg_allocate_mnnz(m,n,a,nz) +subroutine psb_c_cuda_elg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_allocate_mnnz + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_allocate_mnnz #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -110,4 +110,4 @@ subroutine psb_c_elg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_c_elg_allocate_mnnz +end subroutine psb_c_cuda_elg_allocate_mnnz diff --git a/cuda/impl/psb_d_elg_asb.f90 b/cuda/impl/psb_c_cuda_elg_asb.f90 similarity index 92% rename from cuda/impl/psb_d_elg_asb.f90 rename to cuda/impl/psb_c_cuda_elg_asb.f90 index f80537ef..24af1cc9 100644 --- a/cuda/impl/psb_d_elg_asb.f90 +++ b/cuda/impl/psb_c_cuda_elg_asb.f90 @@ -30,13 +30,13 @@ ! -subroutine psb_d_elg_asb(a) +subroutine psb_c_cuda_elg_asb(a) use psb_base_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_asb + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_asb implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: err_act, info character(len=20) :: name='elg_asb' @@ -62,4 +62,4 @@ subroutine psb_d_elg_asb(a) return -end subroutine psb_d_elg_asb +end subroutine psb_c_cuda_elg_asb diff --git a/cuda/impl/psb_c_elg_csmm.F90 b/cuda/impl/psb_c_cuda_elg_csmm.F90 similarity index 93% rename from cuda/impl/psb_c_elg_csmm.F90 rename to cuda/impl/psb_c_cuda_elg_csmm.F90 index 5d355d88..a5f0e3d5 100644 --- a/cuda/impl/psb_c_elg_csmm.F90 +++ b/cuda/impl/psb_c_cuda_elg_csmm.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_elg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_csmm + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csmm #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) complex(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_c_elg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_elg_csmm' + character(len=20) :: name='c_cuda_elg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -131,4 +131,4 @@ subroutine psb_c_elg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_elg_csmm +end subroutine psb_c_cuda_elg_csmm diff --git a/cuda/impl/psb_c_elg_csmv.F90 b/cuda/impl/psb_c_cuda_elg_csmv.F90 similarity index 94% rename from cuda/impl/psb_c_elg_csmv.F90 rename to cuda/impl/psb_c_cuda_elg_csmv.F90 index 9e377726..00f39e8c 100644 --- a/cuda/impl/psb_c_elg_csmv.F90 +++ b/cuda/impl/psb_c_cuda_elg_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_elg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_csmv + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csmv #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -133,4 +133,4 @@ subroutine psb_c_elg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_elg_csmv +end subroutine psb_c_cuda_elg_csmv diff --git a/cuda/impl/psb_c_elg_csput.F90 b/cuda/impl/psb_c_cuda_elg_csput.F90 similarity index 89% rename from cuda/impl/psb_c_elg_csput.F90 rename to cuda/impl/psb_c_cuda_elg_csput.F90 index 2a632f21..cc6fc024 100644 --- a/cuda/impl/psb_c_elg_csput.F90 +++ b/cuda/impl/psb_c_cuda_elg_csput.F90 @@ -30,26 +30,26 @@ ! -subroutine psb_c_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +subroutine psb_c_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding #ifdef HAVE_SPGPU use elldev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_csput_a + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csput_a #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act - character(len=20) :: name='c_elg_csput_a' + character(len=20) :: name='c_cuda_elg_csput_a' logical, parameter :: debug=.false. integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit real(psb_dpk_) :: t1,t2,t3 @@ -120,24 +120,24 @@ subroutine psb_c_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) return -end subroutine psb_c_elg_csput_a +end subroutine psb_c_cuda_elg_csput_a -subroutine psb_c_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +subroutine psb_c_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding #ifdef HAVE_SPGPU use elldev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_csput_v - use psb_c_gpu_vect_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csput_v + use psb_c_cuda_vect_mod #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax @@ -145,7 +145,7 @@ subroutine psb_c_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) integer(psb_ipk_) :: err_act - character(len=20) :: name='c_elg_csput_v' + character(len=20) :: name='c_cuda_elg_csput_v' logical, parameter :: debug=.false. integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw logical :: gpu_invoked @@ -199,11 +199,11 @@ subroutine psb_c_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) t1=psb_wtime() gpu_invoked = .false. select type (ia) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type (ja) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type (val) - class is (psb_c_vect_gpu) + class is (psb_c_vect_cuda) if (a%is_host()) call a%sync() if (val%is_host()) call val%sync() if (ia%is_host()) call ia%sync() @@ -245,4 +245,4 @@ subroutine psb_c_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) return -end subroutine psb_c_elg_csput_v +end subroutine psb_c_cuda_elg_csput_v diff --git a/cuda/impl/psb_s_elg_from_gpu.F90 b/cuda/impl/psb_c_cuda_elg_from_gpu.F90 similarity index 91% rename from cuda/impl/psb_s_elg_from_gpu.F90 rename to cuda/impl/psb_c_cuda_elg_from_gpu.F90 index d043790d..593b52be 100644 --- a/cuda/impl/psb_s_elg_from_gpu.F90 +++ b/cuda/impl/psb_c_cuda_elg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_elg_from_gpu(a,info) +subroutine psb_c_cuda_elg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_from_gpu + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_from_gpu #else - use psb_s_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize @@ -71,4 +71,4 @@ subroutine psb_s_elg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_s_elg_from_gpu +end subroutine psb_c_cuda_elg_from_gpu diff --git a/cuda/impl/psb_c_elg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_elg_inner_vect_sv.F90 similarity index 89% rename from cuda/impl/psb_c_elg_inner_vect_sv.F90 rename to cuda/impl/psb_c_cuda_elg_inner_vect_sv.F90 index 97f0f7ff..43843dc6 100644 --- a/cuda/impl/psb_c_elg_inner_vect_sv.F90 +++ b/cuda/impl/psb_c_cuda_elg_inner_vect_sv.F90 @@ -30,26 +30,26 @@ ! -subroutine psb_c_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_inner_vect_sv + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_inner_vect_sv #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans integer(psb_ipk_) :: err_act - character(len=20) :: name='c_elg_inner_vect_sv' + character(len=20) :: name='c_cuda_elg_inner_vect_sv' logical, parameter :: debug=.false. complex(psb_spk_), allocatable :: rx(:), ry(:) @@ -86,4 +86,4 @@ subroutine psb_c_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_elg_inner_vect_sv +end subroutine psb_c_cuda_elg_inner_vect_sv diff --git a/cuda/impl/psb_c_elg_mold.F90 b/cuda/impl/psb_c_cuda_elg_mold.F90 similarity index 89% rename from cuda/impl/psb_c_elg_mold.F90 rename to cuda/impl/psb_c_cuda_elg_mold.F90 index 17cd2ce2..b428055c 100644 --- a/cuda/impl/psb_c_elg_mold.F90 +++ b/cuda/impl/psb_c_cuda_elg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_c_elg_mold(a,b,info) +subroutine psb_c_cuda_elg_mold(a,b,info) use psb_base_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_mold + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_mold implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_c_elg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_c_elg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_c_cuda_elg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_c_elg_mold(a,b,info) return -end subroutine psb_c_elg_mold +end subroutine psb_c_cuda_elg_mold diff --git a/cuda/impl/psb_c_elg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_elg_reallocate_nz.F90 similarity index 89% rename from cuda/impl/psb_c_elg_reallocate_nz.F90 rename to cuda/impl/psb_c_cuda_elg_reallocate_nz.F90 index 40d94d36..b97530e1 100644 --- a/cuda/impl/psb_c_elg_reallocate_nz.F90 +++ b/cuda/impl/psb_c_cuda_elg_reallocate_nz.F90 @@ -30,22 +30,22 @@ ! -subroutine psb_c_elg_reallocate_nz(nz,a) +subroutine psb_c_cuda_elg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_reallocate_nz + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_reallocate_nz #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='c_elg_reallocate_nz' + character(len=20) :: name='c_cuda_elg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -76,4 +76,4 @@ subroutine psb_c_elg_reallocate_nz(nz,a) return -end subroutine psb_c_elg_reallocate_nz +end subroutine psb_c_cuda_elg_reallocate_nz diff --git a/cuda/impl/psb_c_elg_scal.F90 b/cuda/impl/psb_c_cuda_elg_scal.F90 similarity index 91% rename from cuda/impl/psb_c_elg_scal.F90 rename to cuda/impl/psb_c_cuda_elg_scal.F90 index 63d9907e..b169451b 100644 --- a/cuda/impl/psb_c_elg_scal.F90 +++ b/cuda/impl/psb_c_cuda_elg_scal.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_elg_scal(d,a,info,side) +subroutine psb_c_cuda_elg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_scal + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_scal #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -75,4 +75,4 @@ subroutine psb_c_elg_scal(d,a,info,side) return -end subroutine psb_c_elg_scal +end subroutine psb_c_cuda_elg_scal diff --git a/cuda/impl/psb_c_elg_scals.F90 b/cuda/impl/psb_c_cuda_elg_scals.F90 similarity index 90% rename from cuda/impl/psb_c_elg_scals.F90 rename to cuda/impl/psb_c_cuda_elg_scals.F90 index b954e0a1..d20ee568 100644 --- a/cuda/impl/psb_c_elg_scals.F90 +++ b/cuda/impl/psb_c_cuda_elg_scals.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_elg_scals(d,a,info) +subroutine psb_c_cuda_elg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_scals + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_scals #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -70,4 +70,4 @@ subroutine psb_c_elg_scals(d,a,info) return -end subroutine psb_c_elg_scals +end subroutine psb_c_cuda_elg_scals diff --git a/cuda/impl/psb_c_elg_to_gpu.F90 b/cuda/impl/psb_c_cuda_elg_to_gpu.F90 similarity index 93% rename from cuda/impl/psb_c_elg_to_gpu.F90 rename to cuda/impl/psb_c_cuda_elg_to_gpu.F90 index b967a59b..5ea61a41 100644 --- a/cuda/impl/psb_c_elg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_elg_to_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_elg_to_gpu(a,info,nzrm) +subroutine psb_c_cuda_elg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_to_gpu + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_to_gpu #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -90,4 +90,4 @@ subroutine psb_c_elg_to_gpu(a,info,nzrm) call a%set_sync() #endif -end subroutine psb_c_elg_to_gpu +end subroutine psb_c_cuda_elg_to_gpu diff --git a/cuda/impl/psb_d_elg_trim.f90 b/cuda/impl/psb_c_cuda_elg_trim.f90 similarity index 92% rename from cuda/impl/psb_d_elg_trim.f90 rename to cuda/impl/psb_c_cuda_elg_trim.f90 index d2a2047c..483e189d 100644 --- a/cuda/impl/psb_d_elg_trim.f90 +++ b/cuda/impl/psb_c_cuda_elg_trim.f90 @@ -30,12 +30,12 @@ ! -subroutine psb_d_elg_trim(a) +subroutine psb_c_cuda_elg_trim(a) use psb_base_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_trim + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_trim implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld character(len=20) :: name='trim' logical, parameter :: debug=.false. @@ -59,4 +59,4 @@ subroutine psb_d_elg_trim(a) return -end subroutine psb_d_elg_trim +end subroutine psb_c_cuda_elg_trim diff --git a/cuda/impl/psb_c_elg_vect_mv.F90 b/cuda/impl/psb_c_cuda_elg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_c_elg_vect_mv.F90 rename to cuda/impl/psb_c_cuda_elg_vect_mv.F90 index ec6e5b50..b89ba5a2 100644 --- a/cuda/impl/psb_c_elg_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_elg_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_elg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_vect_mv + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_vect_mv #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_c_elg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_elg_vect_mv' + character(len=20) :: name='c_cuda_elg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_c_elg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (a%is_host()) call a%sync() if (xx%is_host()) call xx%sync() if (beta /= czero) then @@ -128,4 +128,4 @@ subroutine psb_c_elg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_elg_vect_mv +end subroutine psb_c_cuda_elg_vect_mv diff --git a/cuda/impl/psb_c_hdiag_csmv.F90 b/cuda/impl/psb_c_cuda_hdiag_csmv.F90 similarity index 92% rename from cuda/impl/psb_c_hdiag_csmv.F90 rename to cuda/impl/psb_c_cuda_hdiag_csmv.F90 index 1ba58c6f..4ea2c269 100644 --- a/cuda/impl/psb_c_hdiag_csmv.F90 +++ b/cuda/impl/psb_c_cuda_hdiag_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_hdiag_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_hdiag_csmv + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_csmv #else - use psb_c_hdiag_mat_mod + use psb_c_cuda_hdiag_mat_mod #endif implicit none - class(psb_c_hdiag_sparse_mat), intent(in) :: a + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_c_hdiag_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='c_hdiag_csmv' + character(len=20) :: name='c_cuda_hdiag_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -133,4 +133,4 @@ subroutine psb_c_hdiag_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hdiag_csmv +end subroutine psb_c_cuda_hdiag_csmv diff --git a/cuda/impl/psb_c_hdiag_mold.F90 b/cuda/impl/psb_c_cuda_hdiag_mold.F90 similarity index 88% rename from cuda/impl/psb_c_hdiag_mold.F90 rename to cuda/impl/psb_c_cuda_hdiag_mold.F90 index bc913690..67e0b92e 100644 --- a/cuda/impl/psb_c_hdiag_mold.F90 +++ b/cuda/impl/psb_c_cuda_hdiag_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_c_hdiag_mold(a,b,info) +subroutine psb_c_cuda_hdiag_mold(a,b,info) use psb_base_mod - use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_hdiag_mold + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_mold implicit none - class(psb_c_hdiag_sparse_mat), intent(in) :: a + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_c_hdiag_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_c_hdiag_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_c_cuda_hdiag_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_c_hdiag_mold(a,b,info) return -end subroutine psb_c_hdiag_mold +end subroutine psb_c_cuda_hdiag_mold diff --git a/cuda/impl/psb_c_hdiag_to_gpu.F90 b/cuda/impl/psb_c_cuda_hdiag_to_gpu.F90 similarity index 92% rename from cuda/impl/psb_c_hdiag_to_gpu.F90 rename to cuda/impl/psb_c_cuda_hdiag_to_gpu.F90 index 565babe0..63ab178a 100644 --- a/cuda/impl/psb_c_hdiag_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_hdiag_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_hdiag_to_gpu(a,info) +subroutine psb_c_cuda_hdiag_to_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_hdiag_to_gpu + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_to_gpu #else - use psb_c_hdiag_mat_mod + use psb_c_cuda_hdiag_mat_mod #endif use iso_c_binding implicit none - class(psb_c_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight #ifdef HAVE_SPGPU @@ -83,4 +83,4 @@ subroutine psb_c_hdiag_to_gpu(a,info) #endif -end subroutine psb_c_hdiag_to_gpu +end subroutine psb_c_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_c_hdiag_vect_mv.F90 b/cuda/impl/psb_c_cuda_hdiag_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_c_hdiag_vect_mv.F90 rename to cuda/impl/psb_c_cuda_hdiag_vect_mv.F90 index a891a274..fb80611f 100644 --- a/cuda/impl/psb_c_hdiag_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_hdiag_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_hdiag_vect_mv + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_vect_mv #else - use psb_c_hdiag_mat_mod + use psb_c_cuda_hdiag_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_hdiag_sparse_mat), intent(in) :: a + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_c_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_hdiag_vect_mv' + character(len=20) :: name='c_cuda_hdiag_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -82,9 +82,9 @@ subroutine psb_c_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,4 +123,4 @@ subroutine psb_c_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hdiag_vect_mv +end subroutine psb_c_cuda_hdiag_vect_mv diff --git a/cuda/impl/psb_c_hlg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_hlg_allocate_mnnz.F90 similarity index 90% rename from cuda/impl/psb_c_hlg_allocate_mnnz.F90 rename to cuda/impl/psb_c_cuda_hlg_allocate_mnnz.F90 index 27e5c0b6..277b974f 100644 --- a/cuda/impl/psb_c_hlg_allocate_mnnz.F90 +++ b/cuda/impl/psb_c_cuda_hlg_allocate_mnnz.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_hlg_allocate_mnnz(m,n,a,nz) +subroutine psb_c_cuda_hlg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_allocate_mnnz + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_allocate_mnnz #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -68,4 +68,4 @@ subroutine psb_c_hlg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_c_hlg_allocate_mnnz +end subroutine psb_c_cuda_hlg_allocate_mnnz diff --git a/cuda/impl/psb_c_hlg_csmm.F90 b/cuda/impl/psb_c_cuda_hlg_csmm.F90 similarity index 93% rename from cuda/impl/psb_c_hlg_csmm.F90 rename to cuda/impl/psb_c_cuda_hlg_csmm.F90 index c33b2dde..f351ffd0 100644 --- a/cuda/impl/psb_c_hlg_csmm.F90 +++ b/cuda/impl/psb_c_cuda_hlg_csmm.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_hlg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_csmm + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_csmm #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) complex(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_c_hlg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_hlg_csmm' + character(len=20) :: name='c_cuda_hlg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -129,4 +129,4 @@ subroutine psb_c_hlg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hlg_csmm +end subroutine psb_c_cuda_hlg_csmm diff --git a/cuda/impl/psb_c_hlg_csmv.F90 b/cuda/impl/psb_c_cuda_hlg_csmv.F90 similarity index 93% rename from cuda/impl/psb_c_hlg_csmv.F90 rename to cuda/impl/psb_c_cuda_hlg_csmv.F90 index 2599e44e..d39e5f51 100644 --- a/cuda/impl/psb_c_hlg_csmv.F90 +++ b/cuda/impl/psb_c_cuda_hlg_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_hlg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_csmv + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_csmv #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_c_hlg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='c_hlg_csmv' + character(len=20) :: name='c_cuda_hlg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -132,4 +132,4 @@ subroutine psb_c_hlg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hlg_csmv +end subroutine psb_c_cuda_hlg_csmv diff --git a/cuda/impl/psb_s_hlg_from_gpu.F90 b/cuda/impl/psb_c_cuda_hlg_from_gpu.F90 similarity index 92% rename from cuda/impl/psb_s_hlg_from_gpu.F90 rename to cuda/impl/psb_c_cuda_hlg_from_gpu.F90 index 1c34b15e..f823153d 100644 --- a/cuda/impl/psb_s_hlg_from_gpu.F90 +++ b/cuda/impl/psb_c_cuda_hlg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_hlg_from_gpu(a,info) +subroutine psb_c_cuda_hlg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_from_gpu + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_from_gpu #else - use psb_s_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr @@ -73,4 +73,4 @@ subroutine psb_s_hlg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_s_hlg_from_gpu +end subroutine psb_c_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_c_hlg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_hlg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_c_hlg_inner_vect_sv.F90 rename to cuda/impl/psb_c_cuda_hlg_inner_vect_sv.F90 index 0955d8a1..6202885c 100644 --- a/cuda/impl/psb_c_hlg_inner_vect_sv.F90 +++ b/cuda/impl/psb_c_cuda_hlg_inner_vect_sv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_inner_vect_sv + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_inner_vect_sv #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -78,4 +78,4 @@ subroutine psb_c_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hlg_inner_vect_sv +end subroutine psb_c_cuda_hlg_inner_vect_sv diff --git a/cuda/impl/psb_c_hlg_mold.F90 b/cuda/impl/psb_c_cuda_hlg_mold.F90 similarity index 89% rename from cuda/impl/psb_c_hlg_mold.F90 rename to cuda/impl/psb_c_cuda_hlg_mold.F90 index 321111f0..85453422 100644 --- a/cuda/impl/psb_c_hlg_mold.F90 +++ b/cuda/impl/psb_c_cuda_hlg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_c_hlg_mold(a,b,info) +subroutine psb_c_cuda_hlg_mold(a,b,info) use psb_base_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_mold + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_mold implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer, intent(out) :: info Integer :: err_act @@ -49,7 +49,7 @@ subroutine psb_c_hlg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_c_hlg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_c_cuda_hlg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -61,4 +61,4 @@ subroutine psb_c_hlg_mold(a,b,info) 9999 call psb_error_handler(err_act) return -end subroutine psb_c_hlg_mold +end subroutine psb_c_cuda_hlg_mold diff --git a/cuda/impl/psb_c_hlg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_hlg_reallocate_nz.F90 similarity index 87% rename from cuda/impl/psb_c_hlg_reallocate_nz.F90 rename to cuda/impl/psb_c_cuda_hlg_reallocate_nz.F90 index a27c3f55..848b659d 100644 --- a/cuda/impl/psb_c_hlg_reallocate_nz.F90 +++ b/cuda/impl/psb_c_cuda_hlg_reallocate_nz.F90 @@ -30,22 +30,22 @@ ! -subroutine psb_c_hlg_reallocate_nz(nz,a) +subroutine psb_c_cuda_hlg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_reallocate_nz + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_reallocate_nz #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif use iso_c_binding implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='c_hlg_reallocate_nz' + character(len=20) :: name='c_cuda_hlg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -64,4 +64,4 @@ subroutine psb_c_hlg_reallocate_nz(nz,a) return -end subroutine psb_c_hlg_reallocate_nz +end subroutine psb_c_cuda_hlg_reallocate_nz diff --git a/cuda/impl/psb_c_hlg_scal.F90 b/cuda/impl/psb_c_cuda_hlg_scal.F90 similarity index 91% rename from cuda/impl/psb_c_hlg_scal.F90 rename to cuda/impl/psb_c_cuda_hlg_scal.F90 index b2c9d30d..d768048f 100644 --- a/cuda/impl/psb_c_hlg_scal.F90 +++ b/cuda/impl/psb_c_cuda_hlg_scal.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_hlg_scal(d,a,info,side) +subroutine psb_c_cuda_hlg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_scal + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_scal #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -72,4 +72,4 @@ subroutine psb_c_hlg_scal(d,a,info,side) return -end subroutine psb_c_hlg_scal +end subroutine psb_c_cuda_hlg_scal diff --git a/cuda/impl/psb_c_hlg_scals.F90 b/cuda/impl/psb_c_cuda_hlg_scals.F90 similarity index 91% rename from cuda/impl/psb_c_hlg_scals.F90 rename to cuda/impl/psb_c_cuda_hlg_scals.F90 index af2efb19..7574bf94 100644 --- a/cuda/impl/psb_c_hlg_scals.F90 +++ b/cuda/impl/psb_c_cuda_hlg_scals.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_hlg_scals(d,a,info) +subroutine psb_c_cuda_hlg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_scals + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_scals #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif use iso_c_binding implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -70,4 +70,4 @@ subroutine psb_c_hlg_scals(d,a,info) 9999 call psb_error_handler(err_act) return -end subroutine psb_c_hlg_scals +end subroutine psb_c_cuda_hlg_scals diff --git a/cuda/impl/psb_c_hlg_to_gpu.F90 b/cuda/impl/psb_c_cuda_hlg_to_gpu.F90 similarity index 91% rename from cuda/impl/psb_c_hlg_to_gpu.F90 rename to cuda/impl/psb_c_cuda_hlg_to_gpu.F90 index 0d37bc24..d7fc8fb2 100644 --- a/cuda/impl/psb_c_hlg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_hlg_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_hlg_to_gpu(a,info,nzrm) +subroutine psb_c_cuda_hlg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_to_gpu + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_to_gpu #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif use iso_c_binding implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -65,4 +65,4 @@ subroutine psb_c_hlg_to_gpu(a,info,nzrm) ! if (info /= 0) goto 9999 #endif -end subroutine psb_c_hlg_to_gpu +end subroutine psb_c_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_c_hlg_vect_mv.F90 b/cuda/impl/psb_c_cuda_hlg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_c_hlg_vect_mv.F90 rename to cuda/impl/psb_c_cuda_hlg_vect_mv.F90 index bc4e2f56..2d7a679e 100644 --- a/cuda/impl/psb_c_hlg_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_hlg_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_hlg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_vect_mv + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_vect_mv #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_c_hlg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_hlg_vect_mv' + character(len=20) :: name='c_cuda_hlg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_c_hlg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -126,4 +126,4 @@ subroutine psb_c_hlg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hlg_vect_mv +end subroutine psb_c_cuda_hlg_vect_mv diff --git a/cuda/impl/psb_c_hybg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_hybg_allocate_mnnz.F90 similarity index 90% rename from cuda/impl/psb_c_hybg_allocate_mnnz.F90 rename to cuda/impl/psb_c_cuda_hybg_allocate_mnnz.F90 index 5cd57fa2..eced26e0 100644 --- a/cuda/impl/psb_c_hybg_allocate_mnnz.F90 +++ b/cuda/impl/psb_c_cuda_hybg_allocate_mnnz.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_allocate_mnnz(m,n,a,nz) +subroutine psb_c_cuda_hybg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_allocate_mnnz + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_allocate_mnnz #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -65,5 +65,5 @@ subroutine psb_c_hybg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_c_hybg_allocate_mnnz +end subroutine psb_c_cuda_hybg_allocate_mnnz #endif diff --git a/cuda/impl/psb_c_hybg_csmm.F90 b/cuda/impl/psb_c_cuda_hybg_csmm.F90 similarity index 93% rename from cuda/impl/psb_c_hybg_csmm.F90 rename to cuda/impl/psb_c_cuda_hybg_csmm.F90 index 7c8bb582..cc459f66 100644 --- a/cuda/impl/psb_c_hybg_csmm.F90 +++ b/cuda/impl/psb_c_cuda_hybg_csmm.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_csmm + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_csmm #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) complex(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_c_hybg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_hybg_csmm' + character(len=20) :: name='c_cuda_hybg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -131,5 +131,5 @@ subroutine psb_c_hybg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hybg_csmm +end subroutine psb_c_cuda_hybg_csmm #endif diff --git a/cuda/impl/psb_c_hybg_csmv.F90 b/cuda/impl/psb_c_cuda_hybg_csmv.F90 similarity index 93% rename from cuda/impl/psb_c_hybg_csmv.F90 rename to cuda/impl/psb_c_cuda_hybg_csmv.F90 index 5e15bc1b..ab07d756 100644 --- a/cuda/impl/psb_c_hybg_csmv.F90 +++ b/cuda/impl/psb_c_cuda_hybg_csmv.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_csmv + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_csmv #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -54,7 +54,7 @@ subroutine psb_c_hybg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_hybg_csmv' + character(len=20) :: name='c_cuda_hybg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -134,5 +134,5 @@ subroutine psb_c_hybg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hybg_csmv +end subroutine psb_c_cuda_hybg_csmv #endif diff --git a/cuda/impl/psb_c_hybg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_hybg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_c_hybg_inner_vect_sv.F90 rename to cuda/impl/psb_c_cuda_hybg_inner_vect_sv.F90 index 20bc842d..fcaf49ff 100644 --- a/cuda/impl/psb_c_hybg_inner_vect_sv.F90 +++ b/cuda/impl/psb_c_cuda_hybg_inner_vect_sv.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_inner_vect_sv + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_inner_vect_sv #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_hybg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -52,7 +52,7 @@ subroutine psb_c_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ integer(psb_ipk_) :: err_act - character(len=20) :: name='c_hybg_inner_vect_sv' + character(len=20) :: name='c_cuda_hybg_inner_vect_sv' logical, parameter :: debug=.false. call psb_get_erraction(err_act) @@ -84,9 +84,9 @@ subroutine psb_c_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%set_host() else select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= czero) then if (yy%is_host()) call yy%sync() @@ -134,5 +134,5 @@ subroutine psb_c_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hybg_inner_vect_sv +end subroutine psb_c_cuda_hybg_inner_vect_sv #endif diff --git a/cuda/impl/psb_c_hybg_mold.F90 b/cuda/impl/psb_c_cuda_hybg_mold.F90 similarity index 89% rename from cuda/impl/psb_c_hybg_mold.F90 rename to cuda/impl/psb_c_cuda_hybg_mold.F90 index 54dd24c2..6fe4c378 100644 --- a/cuda/impl/psb_c_hybg_mold.F90 +++ b/cuda/impl/psb_c_cuda_hybg_mold.F90 @@ -30,12 +30,12 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_mold(a,b,info) +subroutine psb_c_cuda_hybg_mold(a,b,info) use psb_base_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_mold + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_mold implicit none - class(psb_c_hybg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_c_hybg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_c_hybg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_c_cuda_hybg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,5 +62,5 @@ subroutine psb_c_hybg_mold(a,b,info) return -end subroutine psb_c_hybg_mold +end subroutine psb_c_cuda_hybg_mold #endif diff --git a/cuda/impl/psb_c_hybg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_hybg_reallocate_nz.F90 similarity index 88% rename from cuda/impl/psb_c_hybg_reallocate_nz.F90 rename to cuda/impl/psb_c_cuda_hybg_reallocate_nz.F90 index 3272b797..979eaad8 100644 --- a/cuda/impl/psb_c_hybg_reallocate_nz.F90 +++ b/cuda/impl/psb_c_cuda_hybg_reallocate_nz.F90 @@ -30,21 +30,21 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_reallocate_nz(nz,a) +subroutine psb_c_cuda_hybg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_reallocate_nz + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_reallocate_nz #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='c_hybg_reallocate_nz' + character(len=20) :: name='c_cuda_hybg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -67,5 +67,5 @@ subroutine psb_c_hybg_reallocate_nz(nz,a) return -end subroutine psb_c_hybg_reallocate_nz +end subroutine psb_c_cuda_hybg_reallocate_nz #endif diff --git a/cuda/impl/psb_c_hybg_scal.F90 b/cuda/impl/psb_c_cuda_hybg_scal.F90 similarity index 91% rename from cuda/impl/psb_c_hybg_scal.F90 rename to cuda/impl/psb_c_cuda_hybg_scal.F90 index 1019f979..ac4d788e 100644 --- a/cuda/impl/psb_c_hybg_scal.F90 +++ b/cuda/impl/psb_c_cuda_hybg_scal.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_scal(d,a,info,side) +subroutine psb_c_cuda_hybg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_scal + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_scal #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -72,5 +72,5 @@ subroutine psb_c_hybg_scal(d,a,info,side) return -end subroutine psb_c_hybg_scal +end subroutine psb_c_cuda_hybg_scal #endif diff --git a/cuda/impl/psb_c_hybg_scals.F90 b/cuda/impl/psb_c_cuda_hybg_scals.F90 similarity index 91% rename from cuda/impl/psb_c_hybg_scals.F90 rename to cuda/impl/psb_c_cuda_hybg_scals.F90 index 1d09abbb..7def71d2 100644 --- a/cuda/impl/psb_c_hybg_scals.F90 +++ b/cuda/impl/psb_c_cuda_hybg_scals.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_scals(d,a,info) +subroutine psb_c_cuda_hybg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_scals + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_scals #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -72,5 +72,5 @@ subroutine psb_c_hybg_scals(d,a,info) return -end subroutine psb_c_hybg_scals +end subroutine psb_c_cuda_hybg_scals #endif diff --git a/cuda/impl/psb_c_hybg_to_gpu.F90 b/cuda/impl/psb_c_cuda_hybg_to_gpu.F90 similarity index 96% rename from cuda/impl/psb_c_hybg_to_gpu.F90 rename to cuda/impl/psb_c_cuda_hybg_to_gpu.F90 index 107efba9..1a77586e 100644 --- a/cuda/impl/psb_c_hybg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_hybg_to_gpu.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_to_gpu(a,info,nzrm) +subroutine psb_c_cuda_hybg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_to_gpu + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_to_gpu #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -150,5 +150,5 @@ subroutine psb_c_hybg_to_gpu(a,info,nzrm) end if #endif -end subroutine psb_c_hybg_to_gpu +end subroutine psb_c_cuda_hybg_to_gpu #endif diff --git a/cuda/impl/psb_c_hybg_vect_mv.F90 b/cuda/impl/psb_c_cuda_hybg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_c_hybg_vect_mv.F90 rename to cuda/impl/psb_c_cuda_hybg_vect_mv.F90 index 3ed0f7fd..da20ca41 100644 --- a/cuda/impl/psb_c_hybg_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_hybg_vect_mv.F90 @@ -30,20 +30,20 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_hybg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_c_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_hybg_vect_mv + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_vect_mv #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif - use psb_c_gpu_vect_mod + use psb_c_cuda_vect_mod implicit none - class(psb_c_hybg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y @@ -53,7 +53,7 @@ subroutine psb_c_hybg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='c_hybg_vect_mv' + character(len=20) :: name='c_cuda_hybg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_c_hybg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type(yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= czero) then if (yy%is_host()) call yy%sync() @@ -123,5 +123,5 @@ subroutine psb_c_hybg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_c_hybg_vect_mv +end subroutine psb_c_cuda_hybg_vect_mv #endif diff --git a/cuda/impl/psb_c_mv_csrg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_csrg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_c_mv_csrg_from_coo.F90 rename to cuda/impl/psb_c_cuda_mv_csrg_from_coo.F90 index d2533c2d..f80a8f87 100644 --- a/cuda/impl/psb_c_mv_csrg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_csrg_from_coo.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_mv_csrg_from_coo(a,b,info) +subroutine psb_c_cuda_mv_csrg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_mv_csrg_from_coo + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_mv_csrg_from_coo #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -62,4 +62,4 @@ subroutine psb_c_mv_csrg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_mv_csrg_from_coo +end subroutine psb_c_cuda_mv_csrg_from_coo diff --git a/cuda/impl/psb_c_mv_csrg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_csrg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_c_mv_csrg_from_fmt.F90 rename to cuda/impl/psb_c_cuda_mv_csrg_from_fmt.F90 index 3e898e8f..1f23a6c2 100644 --- a/cuda/impl/psb_c_mv_csrg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_mv_csrg_from_fmt.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_mv_csrg_from_fmt(a,b,info) +subroutine psb_c_cuda_mv_csrg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_csrg_mat_mod, psb_protect_name => psb_c_mv_csrg_from_fmt + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_mv_csrg_from_fmt #else - use psb_c_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod #endif implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer, intent(out) :: info @@ -60,4 +60,4 @@ subroutine psb_c_mv_csrg_from_fmt(a,b,info) #endif end select -end subroutine psb_c_mv_csrg_from_fmt +end subroutine psb_c_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_c_mv_diag_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_diag_from_coo.F90 similarity index 89% rename from cuda/impl/psb_c_mv_diag_from_coo.F90 rename to cuda/impl/psb_c_cuda_mv_diag_from_coo.F90 index 34fe69b7..e20e0b0a 100644 --- a/cuda/impl/psb_c_mv_diag_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_diag_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_c_mv_diag_from_coo(a,b,info) +subroutine psb_c_cuda_mv_diag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_c_diag_mat_mod, psb_protect_name => psb_c_mv_diag_from_coo + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_mv_diag_from_coo #else - use psb_c_diag_mat_mod + use psb_c_cuda_diag_mat_mod #endif implicit none - class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -66,4 +66,4 @@ subroutine psb_c_mv_diag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_mv_diag_from_coo +end subroutine psb_c_cuda_mv_diag_from_coo diff --git a/cuda/impl/psb_c_mv_elg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_elg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_c_mv_elg_from_coo.F90 rename to cuda/impl/psb_c_cuda_mv_elg_from_coo.F90 index acf7e28c..741058cd 100644 --- a/cuda/impl/psb_c_mv_elg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_elg_from_coo.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_mv_elg_from_coo(a,b,info) +subroutine psb_c_cuda_mv_elg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_mv_elg_from_coo + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_mv_elg_from_coo #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_c_mv_elg_from_coo(a,b,info) return -end subroutine psb_c_mv_elg_from_coo +end subroutine psb_c_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_c_mv_elg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_elg_from_fmt.F90 similarity index 92% rename from cuda/impl/psb_c_mv_elg_from_fmt.F90 rename to cuda/impl/psb_c_cuda_mv_elg_from_fmt.F90 index fb9e3cfe..b375bc63 100644 --- a/cuda/impl/psb_c_mv_elg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_mv_elg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_mv_elg_from_fmt(a,b,info) +subroutine psb_c_cuda_mv_elg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_mv_elg_from_fmt + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_mv_elg_from_fmt #else - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -96,4 +96,4 @@ subroutine psb_c_mv_elg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_c_mv_elg_from_fmt +end subroutine psb_c_cuda_mv_elg_from_fmt diff --git a/cuda/impl/psb_c_mv_hdiag_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_hdiag_from_coo.F90 similarity index 87% rename from cuda/impl/psb_c_mv_hdiag_from_coo.F90 rename to cuda/impl/psb_c_cuda_mv_hdiag_from_coo.F90 index 1d07bddb..8826081f 100644 --- a/cuda/impl/psb_c_mv_hdiag_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_hdiag_from_coo.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_c_mv_hdiag_from_coo(a,b,info) +subroutine psb_c_cuda_mv_hdiag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_c_hdiag_mat_mod, psb_protect_name => psb_c_mv_hdiag_from_coo - use psb_gpu_env_mod + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_mv_hdiag_from_coo + use psb_cuda_env_mod #else - use psb_c_hdiag_mat_mod + use psb_c_cuda_hdiag_mat_mod #endif implicit none - class(psb_c_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -55,7 +55,7 @@ subroutine psb_c_mv_hdiag_from_coo(a,b,info) #ifdef HAVE_SPGPU - a%hacksize = psb_gpu_WarpSize() + a%hacksize = psb_cuda_WarpSize() #endif call a%psb_c_hdia_sparse_mat%mv_from_coo(b,info) @@ -71,4 +71,4 @@ subroutine psb_c_mv_hdiag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_mv_hdiag_from_coo +end subroutine psb_c_cuda_mv_hdiag_from_coo diff --git a/cuda/impl/psb_c_mv_hlg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_hlg_from_coo.F90 similarity index 88% rename from cuda/impl/psb_c_mv_hlg_from_coo.F90 rename to cuda/impl/psb_c_cuda_mv_hlg_from_coo.F90 index 0fa2d72d..416bbaed 100644 --- a/cuda/impl/psb_c_mv_hlg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_hlg_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_c_mv_hlg_from_coo(a,b,info) +subroutine psb_c_cuda_mv_hlg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_gpu_env_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_mv_hlg_from_coo + use psb_cuda_env_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_mv_hlg_from_coo #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_c_mv_hlg_from_coo(a,b,info) return -end subroutine psb_c_mv_hlg_from_coo +end subroutine psb_c_cuda_mv_hlg_from_coo diff --git a/cuda/impl/psb_c_mv_hlg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_hlg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_c_mv_hlg_from_fmt.F90 rename to cuda/impl/psb_c_cuda_mv_hlg_from_fmt.F90 index 0581c7d6..aafe692d 100644 --- a/cuda/impl/psb_c_mv_hlg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_mv_hlg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_c_mv_hlg_from_fmt(a,b,info) +subroutine psb_c_cuda_mv_hlg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_mv_hlg_from_fmt + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_mv_hlg_from_fmt #else - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod #endif implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -59,4 +59,4 @@ subroutine psb_c_mv_hlg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_c_mv_hlg_from_fmt +end subroutine psb_c_cuda_mv_hlg_from_fmt diff --git a/cuda/impl/psb_c_mv_hybg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_hybg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_c_mv_hybg_from_coo.F90 rename to cuda/impl/psb_c_cuda_mv_hybg_from_coo.F90 index 7aca6065..eb5ba685 100644 --- a/cuda/impl/psb_c_mv_hybg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_hybg_from_coo.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_mv_hybg_from_coo(a,b,info) +subroutine psb_c_cuda_mv_hybg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_mv_hybg_from_coo + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_mv_hybg_from_coo #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -61,5 +61,5 @@ subroutine psb_c_mv_hybg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_c_mv_hybg_from_coo +end subroutine psb_c_cuda_mv_hybg_from_coo #endif diff --git a/cuda/impl/psb_c_mv_hybg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_hybg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_c_mv_hybg_from_fmt.F90 rename to cuda/impl/psb_c_cuda_mv_hybg_from_fmt.F90 index 41581b85..d74e89bd 100644 --- a/cuda/impl/psb_c_mv_hybg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_mv_hybg_from_fmt.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_c_mv_hybg_from_fmt(a,b,info) +subroutine psb_c_cuda_mv_hybg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_c_hybg_mat_mod, psb_protect_name => psb_c_mv_hybg_from_fmt + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_mv_hybg_from_fmt #else - use psb_c_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod #endif implicit none - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,5 +58,5 @@ subroutine psb_c_mv_hybg_from_fmt(a,b,info) call a%to_gpu(info) #endif end select -end subroutine psb_c_mv_hybg_from_fmt +end subroutine psb_c_cuda_mv_hybg_from_fmt #endif diff --git a/cuda/impl/psb_d_cp_csrg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_csrg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_d_cp_csrg_from_coo.F90 rename to cuda/impl/psb_d_cuda_cp_csrg_from_coo.F90 index ec00007e..e3383af1 100644 --- a/cuda/impl/psb_d_cp_csrg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_csrg_from_coo.F90 @@ -29,18 +29,18 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_d_cp_csrg_from_coo(a,b,info) +subroutine psb_d_cuda_cp_csrg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_cp_csrg_from_coo + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_cp_csrg_from_coo #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -59,4 +59,4 @@ subroutine psb_d_cp_csrg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_cp_csrg_from_coo +end subroutine psb_d_cuda_cp_csrg_from_coo diff --git a/cuda/impl/psb_d_cp_csrg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_d_cp_csrg_from_fmt.F90 rename to cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 index b3aabeed..28b46c76 100644 --- a/cuda/impl/psb_d_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 @@ -29,19 +29,19 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_d_cp_csrg_from_fmt(a,b,info) +subroutine psb_d_cuda_cp_csrg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_cp_csrg_from_fmt + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_cp_csrg_from_fmt #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif !use iso_c_binding implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_d_cp_csrg_from_fmt(a,b,info) #endif end select -end subroutine psb_d_cp_csrg_from_fmt +end subroutine psb_d_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_d_cp_diag_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_diag_from_coo.F90 similarity index 89% rename from cuda/impl/psb_d_cp_diag_from_coo.F90 rename to cuda/impl/psb_d_cuda_cp_diag_from_coo.F90 index 06aff19d..d21bb469 100644 --- a/cuda/impl/psb_d_cp_diag_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_diag_from_coo.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_cp_diag_from_coo(a,b,info) +subroutine psb_d_cuda_cp_diag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_d_diag_mat_mod, psb_protect_name => psb_d_cp_diag_from_coo + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_cp_diag_from_coo #else - use psb_d_diag_mat_mod + use psb_d_cuda_diag_mat_mod #endif implicit none - class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -61,4 +61,4 @@ subroutine psb_d_cp_diag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_cp_diag_from_coo +end subroutine psb_d_cuda_cp_diag_from_coo diff --git a/cuda/impl/psb_d_cp_elg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_elg_from_coo.F90 similarity index 94% rename from cuda/impl/psb_d_cp_elg_from_coo.F90 rename to cuda/impl/psb_d_cuda_cp_elg_from_coo.F90 index 381e4bfb..a4d58297 100644 --- a/cuda/impl/psb_d_cp_elg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_elg_from_coo.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_d_cp_elg_from_coo(a,b,info) +subroutine psb_d_cuda_cp_elg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_cp_elg_from_coo + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_cp_elg_from_coo use psi_ext_util_mod - use psb_gpu_env_mod + use psb_cuda_env_mod #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -58,7 +58,7 @@ subroutine psb_d_cp_elg_from_coo(a,b,info) info = psb_success_ #ifdef HAVE_SPGPU - hacksize = max(1,psb_gpu_WarpSize()) + hacksize = max(1,psb_cuda_WarpSize()) #else hacksize = 1 #endif @@ -181,4 +181,4 @@ contains end subroutine psi_d_count_ell_from_coo -end subroutine psb_d_cp_elg_from_coo +end subroutine psb_d_cuda_cp_elg_from_coo diff --git a/cuda/impl/psb_d_cp_elg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_elg_from_fmt.F90 similarity index 93% rename from cuda/impl/psb_d_cp_elg_from_fmt.F90 rename to cuda/impl/psb_d_cuda_cp_elg_from_fmt.F90 index 9a6b6d41..31786c1b 100644 --- a/cuda/impl/psb_d_cp_elg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_cp_elg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_cp_elg_from_fmt(a,b,info) +subroutine psb_d_cuda_cp_elg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_cp_elg_from_fmt + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_cp_elg_from_fmt #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -98,4 +98,4 @@ subroutine psb_d_cp_elg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_d_cp_elg_from_fmt +end subroutine psb_d_cuda_cp_elg_from_fmt diff --git a/cuda/impl/psb_d_cp_hdiag_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_hdiag_from_coo.F90 similarity index 87% rename from cuda/impl/psb_d_cp_hdiag_from_coo.F90 rename to cuda/impl/psb_d_cuda_cp_hdiag_from_coo.F90 index 443452a1..efcf9d66 100644 --- a/cuda/impl/psb_d_cp_hdiag_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_hdiag_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_d_cp_hdiag_from_coo(a,b,info) +subroutine psb_d_cuda_cp_hdiag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_cp_hdiag_from_coo - use psb_gpu_env_mod + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_cp_hdiag_from_coo + use psb_cuda_env_mod #else - use psb_d_hdiag_mat_mod + use psb_d_cuda_hdiag_mat_mod #endif implicit none - class(psb_d_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -54,7 +54,7 @@ subroutine psb_d_cp_hdiag_from_coo(a,b,info) info = psb_success_ #ifdef HAVE_SPGPU - a%hacksize = psb_gpu_WarpSize() + a%hacksize = psb_cuda_WarpSize() #endif call a%psb_d_hdia_sparse_mat%cp_from_coo(b,info) @@ -70,4 +70,4 @@ subroutine psb_d_cp_hdiag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_cp_hdiag_from_coo +end subroutine psb_d_cuda_cp_hdiag_from_coo diff --git a/cuda/impl/psb_d_cp_hlg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_hlg_from_coo.F90 similarity index 95% rename from cuda/impl/psb_d_cp_hlg_from_coo.F90 rename to cuda/impl/psb_d_cuda_cp_hlg_from_coo.F90 index 02855fef..2fc898b2 100644 --- a/cuda/impl/psb_d_cp_hlg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_hlg_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_d_cp_hlg_from_coo(a,b,info) +subroutine psb_d_cuda_cp_hlg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_gpu_env_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_cp_hlg_from_coo + use psb_cuda_env_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_cp_hlg_from_coo #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -62,7 +62,7 @@ subroutine psb_d_cp_hlg_from_coo(a,b,info) debug_unit = psb_get_debug_unit() debug_level = psb_get_debug_level() #ifdef HAVE_SPGPU - hksz = max(1,psb_gpu_WarpSize()) + hksz = max(1,psb_cuda_WarpSize()) #else hksz = psi_get_hksz() #endif @@ -195,4 +195,4 @@ contains !!$ write(*,*) 'End of psi_comput_hckoff ',info end subroutine psi_compute_hckoff_from_coo -end subroutine psb_d_cp_hlg_from_coo +end subroutine psb_d_cuda_cp_hlg_from_coo diff --git a/cuda/impl/psb_d_cp_hlg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_hlg_from_fmt.F90 similarity index 90% rename from cuda/impl/psb_d_cp_hlg_from_fmt.F90 rename to cuda/impl/psb_d_cuda_cp_hlg_from_fmt.F90 index 133fbb32..0796630c 100644 --- a/cuda/impl/psb_d_cp_hlg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_cp_hlg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_cp_hlg_from_fmt(a,b,info) +subroutine psb_d_cuda_cp_hlg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_cp_hlg_from_fmt + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_cp_hlg_from_fmt #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -65,4 +65,4 @@ subroutine psb_d_cp_hlg_from_fmt(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_cp_hlg_from_fmt +end subroutine psb_d_cuda_cp_hlg_from_fmt diff --git a/cuda/impl/psb_d_cp_hybg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_hybg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_d_cp_hybg_from_coo.F90 rename to cuda/impl/psb_d_cuda_cp_hybg_from_coo.F90 index a74409cb..f1f62a89 100644 --- a/cuda/impl/psb_d_cp_hybg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_hybg_from_coo.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_cp_hybg_from_coo(a,b,info) +subroutine psb_d_cuda_cp_hybg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_cp_hybg_from_coo + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_cp_hybg_from_coo #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -60,5 +60,5 @@ subroutine psb_d_cp_hybg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_cp_hybg_from_coo +end subroutine psb_d_cuda_cp_hybg_from_coo #endif diff --git a/cuda/impl/psb_d_cp_hybg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_hybg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_d_cp_hybg_from_fmt.F90 rename to cuda/impl/psb_d_cuda_cp_hybg_from_fmt.F90 index 91d59060..37c9cc42 100644 --- a/cuda/impl/psb_d_cp_hybg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_cp_hybg_from_fmt.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_cp_hybg_from_fmt(a,b,info) +subroutine psb_d_cuda_cp_hybg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_cp_hybg_from_fmt + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_cp_hybg_from_fmt #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,5 +58,5 @@ subroutine psb_d_cp_hybg_from_fmt(a,b,info) #endif end select -end subroutine psb_d_cp_hybg_from_fmt +end subroutine psb_d_cuda_cp_hybg_from_fmt #endif diff --git a/cuda/impl/psb_d_csrg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_csrg_allocate_mnnz.F90 similarity index 89% rename from cuda/impl/psb_d_csrg_allocate_mnnz.F90 rename to cuda/impl/psb_d_cuda_csrg_allocate_mnnz.F90 index 7d2d4470..3858672c 100644 --- a/cuda/impl/psb_d_csrg_allocate_mnnz.F90 +++ b/cuda/impl/psb_d_cuda_csrg_allocate_mnnz.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_csrg_allocate_mnnz(m,n,a,nz) +subroutine psb_d_cuda_csrg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_allocate_mnnz + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_allocate_mnnz #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -65,4 +65,4 @@ subroutine psb_d_csrg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_d_csrg_allocate_mnnz +end subroutine psb_d_cuda_csrg_allocate_mnnz diff --git a/cuda/impl/psb_d_csrg_csmm.F90 b/cuda/impl/psb_d_cuda_csrg_csmm.F90 similarity index 94% rename from cuda/impl/psb_d_csrg_csmm.F90 rename to cuda/impl/psb_d_cuda_csrg_csmm.F90 index 59c8343e..58251d9a 100644 --- a/cuda/impl/psb_d_csrg_csmm.F90 +++ b/cuda/impl/psb_d_cuda_csrg_csmm.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_csrg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_csmm + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_csmm #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -131,4 +131,4 @@ subroutine psb_d_csrg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_csrg_csmm +end subroutine psb_d_cuda_csrg_csmm diff --git a/cuda/impl/psb_d_csrg_csmv.F90 b/cuda/impl/psb_d_cuda_csrg_csmv.F90 similarity index 93% rename from cuda/impl/psb_d_csrg_csmv.F90 rename to cuda/impl/psb_d_cuda_csrg_csmv.F90 index 44a6428b..269760f0 100644 --- a/cuda/impl/psb_d_csrg_csmv.F90 +++ b/cuda/impl/psb_d_cuda_csrg_csmv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_csrg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_csmv + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_csmv #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -55,7 +55,7 @@ subroutine psb_d_csrg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_csrg_csmv' + character(len=20) :: name='d_cuda_csrg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -136,4 +136,4 @@ subroutine psb_d_csrg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_csrg_csmv +end subroutine psb_d_cuda_csrg_csmv diff --git a/cuda/impl/psb_s_csrg_from_gpu.F90 b/cuda/impl/psb_d_cuda_csrg_from_gpu.F90 similarity index 91% rename from cuda/impl/psb_s_csrg_from_gpu.F90 rename to cuda/impl/psb_d_cuda_csrg_from_gpu.F90 index 23748d97..c451a99f 100644 --- a/cuda/impl/psb_s_csrg_from_gpu.F90 +++ b/cuda/impl/psb_d_cuda_csrg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_csrg_from_gpu(a,info) +subroutine psb_d_cuda_csrg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_from_gpu + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_from_gpu #else - use psb_s_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: m, n, nz @@ -70,4 +70,4 @@ subroutine psb_s_csrg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_s_csrg_from_gpu +end subroutine psb_d_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_d_csrg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_csrg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_d_csrg_inner_vect_sv.F90 rename to cuda/impl/psb_d_cuda_csrg_inner_vect_sv.F90 index 016d63d6..60ee541f 100644 --- a/cuda/impl/psb_d_csrg_inner_vect_sv.F90 +++ b/cuda/impl/psb_d_cuda_csrg_inner_vect_sv.F90 @@ -29,19 +29,19 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_d_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_inner_vect_sv + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_inner_vect_sv #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -51,7 +51,7 @@ subroutine psb_d_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ integer(psb_ipk_) :: err_act - character(len=20) :: name='d_csrg_inner_vect_sv' + character(len=20) :: name='d_cuda_csrg_inner_vect_sv' logical, parameter :: debug=.false. call psb_get_erraction(err_act) @@ -83,9 +83,9 @@ subroutine psb_d_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%set_host() else select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -133,4 +133,4 @@ subroutine psb_d_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_csrg_inner_vect_sv +end subroutine psb_d_cuda_csrg_inner_vect_sv diff --git a/cuda/impl/psb_d_csrg_mold.F90 b/cuda/impl/psb_d_cuda_csrg_mold.F90 similarity index 88% rename from cuda/impl/psb_d_csrg_mold.F90 rename to cuda/impl/psb_d_cuda_csrg_mold.F90 index d7288868..eec34975 100644 --- a/cuda/impl/psb_d_csrg_mold.F90 +++ b/cuda/impl/psb_d_cuda_csrg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_d_csrg_mold(a,b,info) +subroutine psb_d_cuda_csrg_mold(a,b,info) use psb_base_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_mold + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_mold implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_d_csrg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_d_csrg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_d_cuda_csrg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_d_csrg_mold(a,b,info) return -end subroutine psb_d_csrg_mold +end subroutine psb_d_cuda_csrg_mold diff --git a/cuda/impl/psb_d_csrg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_csrg_reallocate_nz.F90 similarity index 87% rename from cuda/impl/psb_d_csrg_reallocate_nz.F90 rename to cuda/impl/psb_d_cuda_csrg_reallocate_nz.F90 index 083091f5..dbf34958 100644 --- a/cuda/impl/psb_d_csrg_reallocate_nz.F90 +++ b/cuda/impl/psb_d_cuda_csrg_reallocate_nz.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_d_csrg_reallocate_nz(nz,a) +subroutine psb_d_cuda_csrg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_reallocate_nz + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_reallocate_nz #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='d_csrg_reallocate_nz' + character(len=20) :: name='d_cuda_csrg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -67,4 +67,4 @@ subroutine psb_d_csrg_reallocate_nz(nz,a) return -end subroutine psb_d_csrg_reallocate_nz +end subroutine psb_d_cuda_csrg_reallocate_nz diff --git a/cuda/impl/psb_d_csrg_scal.F90 b/cuda/impl/psb_d_cuda_csrg_scal.F90 similarity index 90% rename from cuda/impl/psb_d_csrg_scal.F90 rename to cuda/impl/psb_d_cuda_csrg_scal.F90 index 60dbaecd..73e1b9f3 100644 --- a/cuda/impl/psb_d_csrg_scal.F90 +++ b/cuda/impl/psb_d_cuda_csrg_scal.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_d_csrg_scal(d,a,info,side) +subroutine psb_d_cuda_csrg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_scal + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_scal #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -70,4 +70,4 @@ subroutine psb_d_csrg_scal(d,a,info,side) return -end subroutine psb_d_csrg_scal +end subroutine psb_d_cuda_csrg_scal diff --git a/cuda/impl/psb_d_csrg_scals.F90 b/cuda/impl/psb_d_cuda_csrg_scals.F90 similarity index 90% rename from cuda/impl/psb_d_csrg_scals.F90 rename to cuda/impl/psb_d_cuda_csrg_scals.F90 index 6d4a1f40..cf8d6270 100644 --- a/cuda/impl/psb_d_csrg_scals.F90 +++ b/cuda/impl/psb_d_cuda_csrg_scals.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_d_csrg_scals(d,a,info) +subroutine psb_d_cuda_csrg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_scals + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_scals #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -68,4 +68,4 @@ subroutine psb_d_csrg_scals(d,a,info) return -end subroutine psb_d_csrg_scals +end subroutine psb_d_cuda_csrg_scals diff --git a/cuda/impl/psb_d_csrg_to_gpu.F90 b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 similarity index 98% rename from cuda/impl/psb_d_csrg_to_gpu.F90 rename to cuda/impl/psb_d_cuda_csrg_to_gpu.F90 index eb5d3942..a0e72cb4 100644 --- a/cuda/impl/psb_d_csrg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_d_csrg_to_gpu(a,info,nzrm) +subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_to_gpu + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_to_gpu #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -322,4 +322,4 @@ subroutine psb_d_csrg_to_gpu(a,info,nzrm) end if #endif -end subroutine psb_d_csrg_to_gpu +end subroutine psb_d_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_d_csrg_vect_mv.F90 b/cuda/impl/psb_d_cuda_csrg_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_d_csrg_vect_mv.F90 rename to cuda/impl/psb_d_cuda_csrg_vect_mv.F90 index f7124bbb..b828d878 100644 --- a/cuda/impl/psb_d_csrg_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_csrg_vect_mv.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_d_csrg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_vect_mv + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_vect_mv #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y @@ -54,7 +54,7 @@ subroutine psb_d_csrg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_csrg_vect_mv' + character(len=20) :: name='d_cuda_csrg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_d_csrg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -122,4 +122,4 @@ subroutine psb_d_csrg_vect_mv(alpha,a,x,beta,y,info,trans) 9999 call psb_error_handler(err_act) return -end subroutine psb_d_csrg_vect_mv +end subroutine psb_d_cuda_csrg_vect_mv diff --git a/cuda/impl/psb_d_diag_csmv.F90 b/cuda/impl/psb_d_cuda_diag_csmv.F90 similarity index 92% rename from cuda/impl/psb_d_diag_csmv.F90 rename to cuda/impl/psb_d_cuda_diag_csmv.F90 index af9ad2db..8b49769e 100644 --- a/cuda/impl/psb_d_diag_csmv.F90 +++ b/cuda/impl/psb_d_cuda_diag_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_diag_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_d_diag_mat_mod, psb_protect_name => psb_d_diag_csmv + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_diag_csmv #else - use psb_d_diag_mat_mod + use psb_d_cuda_diag_mat_mod #endif implicit none - class(psb_d_diag_sparse_mat), intent(in) :: a + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_d_diag_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='d_diag_csmv' + character(len=20) :: name='d_cuda_diag_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -133,4 +133,4 @@ subroutine psb_d_diag_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_diag_csmv +end subroutine psb_d_cuda_diag_csmv diff --git a/cuda/impl/psb_d_diag_mold.F90 b/cuda/impl/psb_d_cuda_diag_mold.F90 similarity index 88% rename from cuda/impl/psb_d_diag_mold.F90 rename to cuda/impl/psb_d_cuda_diag_mold.F90 index 4b0d066a..7cea069e 100644 --- a/cuda/impl/psb_d_diag_mold.F90 +++ b/cuda/impl/psb_d_cuda_diag_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_d_diag_mold(a,b,info) +subroutine psb_d_cuda_diag_mold(a,b,info) use psb_base_mod - use psb_d_diag_mat_mod, psb_protect_name => psb_d_diag_mold + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_diag_mold implicit none - class(psb_d_diag_sparse_mat), intent(in) :: a + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_d_diag_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_d_diag_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_d_cuda_diag_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_d_diag_mold(a,b,info) return -end subroutine psb_d_diag_mold +end subroutine psb_d_cuda_diag_mold diff --git a/cuda/impl/psb_d_diag_to_gpu.F90 b/cuda/impl/psb_d_cuda_diag_to_gpu.F90 similarity index 91% rename from cuda/impl/psb_d_diag_to_gpu.F90 rename to cuda/impl/psb_d_cuda_diag_to_gpu.F90 index de244124..4903de8a 100644 --- a/cuda/impl/psb_d_diag_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_diag_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_diag_to_gpu(a,info,nzrm) +subroutine psb_d_cuda_diag_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_d_diag_mat_mod, psb_protect_name => psb_d_diag_to_gpu + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_diag_to_gpu #else - use psb_d_diag_mat_mod + use psb_d_cuda_diag_mat_mod #endif use iso_c_binding implicit none - class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -71,4 +71,4 @@ subroutine psb_d_diag_to_gpu(a,info,nzrm) ! if (info /= 0) goto 9999 #endif -end subroutine psb_d_diag_to_gpu +end subroutine psb_d_cuda_diag_to_gpu diff --git a/cuda/impl/psb_d_diag_vect_mv.F90 b/cuda/impl/psb_d_cuda_diag_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_d_diag_vect_mv.F90 rename to cuda/impl/psb_d_cuda_diag_vect_mv.F90 index 3f2f5ac6..0f23d363 100644 --- a/cuda/impl/psb_d_diag_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_diag_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_diag_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_d_diag_mat_mod, psb_protect_name => psb_d_diag_vect_mv + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_diag_vect_mv #else - use psb_d_diag_mat_mod + use psb_d_cuda_diag_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_diag_sparse_mat), intent(in) :: a + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_d_diag_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_diag_vect_mv' + character(len=20) :: name='d_cuda_diag_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -82,9 +82,9 @@ subroutine psb_d_diag_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,4 +123,4 @@ subroutine psb_d_diag_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_diag_vect_mv +end subroutine psb_d_cuda_diag_vect_mv diff --git a/cuda/impl/psb_d_dnsg_mat_impl.F90 b/cuda/impl/psb_d_cuda_dnsg_mat_impl.F90 similarity index 77% rename from cuda/impl/psb_d_dnsg_mat_impl.F90 rename to cuda/impl/psb_d_cuda_dnsg_mat_impl.F90 index a7915898..8d922d82 100644 --- a/cuda/impl/psb_d_dnsg_mat_impl.F90 +++ b/cuda/impl/psb_d_cuda_dnsg_mat_impl.F90 @@ -29,18 +29,18 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_d_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod - use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_dnsg_vect_mv + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_dnsg_vect_mv #else - use psb_d_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod #endif implicit none - class(psb_d_dnsg_sparse_mat), intent(in) :: a + class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y @@ -50,7 +50,7 @@ subroutine psb_d_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) character :: trans_ real(psb_dpk_), allocatable :: rx(:), ry(:) Integer(Psb_ipk_) :: err_act, m, n, k - character(len=20) :: name='d_dnsg_vect_mv' + character(len=20) :: name='d_cuda_dnsg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -76,9 +76,9 @@ subroutine psb_d_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) k = a%get_nrows() end if select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (a%is_host()) call a%sync() if (xx%is_host()) call xx%sync() if (beta /= dzero) then @@ -117,21 +117,21 @@ subroutine psb_d_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_dnsg_vect_mv +end subroutine psb_d_cuda_dnsg_vect_mv -subroutine psb_d_dnsg_mold(a,b,info) +subroutine psb_d_cuda_dnsg_mold(a,b,info) use psb_base_mod - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod - use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_dnsg_mold + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_dnsg_mold #else - use psb_d_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod #endif implicit none - class(psb_d_dnsg_sparse_mat), intent(in) :: a + class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -145,7 +145,7 @@ subroutine psb_d_dnsg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_d_dnsg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_d_cuda_dnsg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -158,54 +158,54 @@ subroutine psb_d_dnsg_mold(a,b,info) return -end subroutine psb_d_dnsg_mold +end subroutine psb_d_cuda_dnsg_mold !!$ !!$ interface -!!$ subroutine psb_d_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_d_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type -!!$ class(psb_d_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_d_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a !!$ real(psb_dpk_), intent(in) :: alpha, beta !!$ class(psb_d_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_d_dnsg_inner_vect_sv +!!$ end subroutine psb_d_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_d_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_d_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_d_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_d_dnsg_reallocate_nz +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_d_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_d_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_d_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_d_dnsg_allocate_mnnz +!!$ end subroutine psb_d_cuda_dnsg_allocate_mnnz !!$ end interface -subroutine psb_d_dnsg_to_gpu(a,info) +subroutine psb_d_cuda_dnsg_to_gpu(a,info) use psb_base_mod - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod - use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_dnsg_to_gpu + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_dnsg_to_gpu #else - use psb_d_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod #endif - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act, pitch, lda logical, parameter :: debug=.false. - character(len=20) :: name='d_dnsg_to_gpu' + character(len=20) :: name='d_cuda_dnsg_to_gpu' call psb_erractionsave(err_act) info = psb_success_ @@ -226,27 +226,27 @@ subroutine psb_d_dnsg_to_gpu(a,info) return -end subroutine psb_d_dnsg_to_gpu +end subroutine psb_d_cuda_dnsg_to_gpu -subroutine psb_d_cp_dnsg_from_coo(a,b,info) +subroutine psb_d_cuda_cp_dnsg_from_coo(a,b,info) use psb_base_mod - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod - use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_cp_dnsg_from_coo + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_cp_dnsg_from_coo #else - use psb_d_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod #endif implicit none - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_dnsg_cp_from_coo' + character(len=20) :: name='d_cuda_dnsg_cp_from_coo' integer(psb_ipk_) :: debug_level, debug_unit logical, parameter :: debug=.false. type(psb_d_coo_sparse_mat) :: tmp @@ -267,27 +267,27 @@ subroutine psb_d_cp_dnsg_from_coo(a,b,info) return -end subroutine psb_d_cp_dnsg_from_coo +end subroutine psb_d_cuda_cp_dnsg_from_coo -subroutine psb_d_cp_dnsg_from_fmt(a,b,info) +subroutine psb_d_cuda_cp_dnsg_from_fmt(a,b,info) use psb_base_mod - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod - use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_cp_dnsg_from_fmt + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_cp_dnsg_from_fmt #else - use psb_d_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod #endif implicit none - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info type(psb_d_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_dnsg_cp_from_fmt' + character(len=20) :: name='d_cuda_dnsg_cp_from_fmt' call psb_erractionsave(err_act) info = psb_success_ @@ -341,29 +341,29 @@ subroutine psb_d_cp_dnsg_from_fmt(a,b,info) return -end subroutine psb_d_cp_dnsg_from_fmt +end subroutine psb_d_cuda_cp_dnsg_from_fmt -subroutine psb_d_mv_dnsg_from_coo(a,b,info) +subroutine psb_d_cuda_mv_dnsg_from_coo(a,b,info) use psb_base_mod - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod - use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_mv_dnsg_from_coo + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_mv_dnsg_from_coo #else - use psb_d_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod #endif implicit none - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act logical, parameter :: debug=.false. - character(len=20) :: name='d_dnsg_mv_from_coo' + character(len=20) :: name='d_cuda_dnsg_mv_from_coo' call psb_erractionsave(err_act) info = psb_success_ @@ -382,28 +382,28 @@ subroutine psb_d_mv_dnsg_from_coo(a,b,info) return -end subroutine psb_d_mv_dnsg_from_coo +end subroutine psb_d_cuda_mv_dnsg_from_coo -subroutine psb_d_mv_dnsg_from_fmt(a,b,info) +subroutine psb_d_cuda_mv_dnsg_from_fmt(a,b,info) use psb_base_mod - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod - use psb_d_dnsg_mat_mod, psb_protect_name => psb_d_mv_dnsg_from_fmt + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_mv_dnsg_from_fmt #else - use psb_d_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod #endif implicit none - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info type(psb_d_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_dnsg_cp_from_fmt' + character(len=20) :: name='d_cuda_dnsg_cp_from_fmt' call psb_erractionsave(err_act) info = psb_success_ @@ -458,4 +458,4 @@ subroutine psb_d_mv_dnsg_from_fmt(a,b,info) return -end subroutine psb_d_mv_dnsg_from_fmt +end subroutine psb_d_cuda_mv_dnsg_from_fmt diff --git a/cuda/impl/psb_d_elg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_elg_allocate_mnnz.F90 similarity index 93% rename from cuda/impl/psb_d_elg_allocate_mnnz.F90 rename to cuda/impl/psb_d_cuda_elg_allocate_mnnz.F90 index 105f5617..b9308514 100644 --- a/cuda/impl/psb_d_elg_allocate_mnnz.F90 +++ b/cuda/impl/psb_d_cuda_elg_allocate_mnnz.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_elg_allocate_mnnz(m,n,a,nz) +subroutine psb_d_cuda_elg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_allocate_mnnz + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_allocate_mnnz #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -110,4 +110,4 @@ subroutine psb_d_elg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_d_elg_allocate_mnnz +end subroutine psb_d_cuda_elg_allocate_mnnz diff --git a/cuda/impl/psb_s_elg_asb.f90 b/cuda/impl/psb_d_cuda_elg_asb.f90 similarity index 92% rename from cuda/impl/psb_s_elg_asb.f90 rename to cuda/impl/psb_d_cuda_elg_asb.f90 index 190be710..c158ccde 100644 --- a/cuda/impl/psb_s_elg_asb.f90 +++ b/cuda/impl/psb_d_cuda_elg_asb.f90 @@ -30,13 +30,13 @@ ! -subroutine psb_s_elg_asb(a) +subroutine psb_d_cuda_elg_asb(a) use psb_base_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_asb + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_asb implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: err_act, info character(len=20) :: name='elg_asb' @@ -62,4 +62,4 @@ subroutine psb_s_elg_asb(a) return -end subroutine psb_s_elg_asb +end subroutine psb_d_cuda_elg_asb diff --git a/cuda/impl/psb_d_elg_csmm.F90 b/cuda/impl/psb_d_cuda_elg_csmm.F90 similarity index 93% rename from cuda/impl/psb_d_elg_csmm.F90 rename to cuda/impl/psb_d_cuda_elg_csmm.F90 index add9c3b2..2d9883fa 100644 --- a/cuda/impl/psb_d_elg_csmm.F90 +++ b/cuda/impl/psb_d_cuda_elg_csmm.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_elg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_csmm + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_csmm #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_d_elg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_elg_csmm' + character(len=20) :: name='d_cuda_elg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -131,4 +131,4 @@ subroutine psb_d_elg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_elg_csmm +end subroutine psb_d_cuda_elg_csmm diff --git a/cuda/impl/psb_d_elg_csmv.F90 b/cuda/impl/psb_d_cuda_elg_csmv.F90 similarity index 94% rename from cuda/impl/psb_d_elg_csmv.F90 rename to cuda/impl/psb_d_cuda_elg_csmv.F90 index 5237fb75..6420e28d 100644 --- a/cuda/impl/psb_d_elg_csmv.F90 +++ b/cuda/impl/psb_d_cuda_elg_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_elg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_csmv + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_csmv #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -133,4 +133,4 @@ subroutine psb_d_elg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_elg_csmv +end subroutine psb_d_cuda_elg_csmv diff --git a/cuda/impl/psb_d_elg_csput.F90 b/cuda/impl/psb_d_cuda_elg_csput.F90 similarity index 89% rename from cuda/impl/psb_d_elg_csput.F90 rename to cuda/impl/psb_d_cuda_elg_csput.F90 index 107c69d0..19d26c43 100644 --- a/cuda/impl/psb_d_elg_csput.F90 +++ b/cuda/impl/psb_d_cuda_elg_csput.F90 @@ -30,26 +30,26 @@ ! -subroutine psb_d_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +subroutine psb_d_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding #ifdef HAVE_SPGPU use elldev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_csput_a + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_csput_a #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act - character(len=20) :: name='d_elg_csput_a' + character(len=20) :: name='d_cuda_elg_csput_a' logical, parameter :: debug=.false. integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit real(psb_dpk_) :: t1,t2,t3 @@ -120,24 +120,24 @@ subroutine psb_d_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) return -end subroutine psb_d_elg_csput_a +end subroutine psb_d_cuda_elg_csput_a -subroutine psb_d_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +subroutine psb_d_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding #ifdef HAVE_SPGPU use elldev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_csput_v - use psb_d_gpu_vect_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_csput_v + use psb_d_cuda_vect_mod #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax @@ -145,7 +145,7 @@ subroutine psb_d_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) integer(psb_ipk_) :: err_act - character(len=20) :: name='d_elg_csput_v' + character(len=20) :: name='d_cuda_elg_csput_v' logical, parameter :: debug=.false. integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw logical :: gpu_invoked @@ -199,11 +199,11 @@ subroutine psb_d_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) t1=psb_wtime() gpu_invoked = .false. select type (ia) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type (ja) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type (val) - class is (psb_d_vect_gpu) + class is (psb_d_vect_cuda) if (a%is_host()) call a%sync() if (val%is_host()) call val%sync() if (ia%is_host()) call ia%sync() @@ -245,4 +245,4 @@ subroutine psb_d_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) return -end subroutine psb_d_elg_csput_v +end subroutine psb_d_cuda_elg_csput_v diff --git a/cuda/impl/psb_d_elg_from_gpu.F90 b/cuda/impl/psb_d_cuda_elg_from_gpu.F90 similarity index 91% rename from cuda/impl/psb_d_elg_from_gpu.F90 rename to cuda/impl/psb_d_cuda_elg_from_gpu.F90 index c1da9584..b532a83c 100644 --- a/cuda/impl/psb_d_elg_from_gpu.F90 +++ b/cuda/impl/psb_d_cuda_elg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_elg_from_gpu(a,info) +subroutine psb_d_cuda_elg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_from_gpu + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_from_gpu #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize @@ -71,4 +71,4 @@ subroutine psb_d_elg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_d_elg_from_gpu +end subroutine psb_d_cuda_elg_from_gpu diff --git a/cuda/impl/psb_d_elg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_elg_inner_vect_sv.F90 similarity index 89% rename from cuda/impl/psb_d_elg_inner_vect_sv.F90 rename to cuda/impl/psb_d_cuda_elg_inner_vect_sv.F90 index 333946bf..c262969f 100644 --- a/cuda/impl/psb_d_elg_inner_vect_sv.F90 +++ b/cuda/impl/psb_d_cuda_elg_inner_vect_sv.F90 @@ -30,26 +30,26 @@ ! -subroutine psb_d_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_inner_vect_sv + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_inner_vect_sv #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans integer(psb_ipk_) :: err_act - character(len=20) :: name='d_elg_inner_vect_sv' + character(len=20) :: name='d_cuda_elg_inner_vect_sv' logical, parameter :: debug=.false. real(psb_dpk_), allocatable :: rx(:), ry(:) @@ -86,4 +86,4 @@ subroutine psb_d_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_elg_inner_vect_sv +end subroutine psb_d_cuda_elg_inner_vect_sv diff --git a/cuda/impl/psb_d_elg_mold.F90 b/cuda/impl/psb_d_cuda_elg_mold.F90 similarity index 89% rename from cuda/impl/psb_d_elg_mold.F90 rename to cuda/impl/psb_d_cuda_elg_mold.F90 index 3fd6d071..f887f96f 100644 --- a/cuda/impl/psb_d_elg_mold.F90 +++ b/cuda/impl/psb_d_cuda_elg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_d_elg_mold(a,b,info) +subroutine psb_d_cuda_elg_mold(a,b,info) use psb_base_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_mold + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_mold implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_d_elg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_d_elg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_d_cuda_elg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_d_elg_mold(a,b,info) return -end subroutine psb_d_elg_mold +end subroutine psb_d_cuda_elg_mold diff --git a/cuda/impl/psb_d_elg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_elg_reallocate_nz.F90 similarity index 89% rename from cuda/impl/psb_d_elg_reallocate_nz.F90 rename to cuda/impl/psb_d_cuda_elg_reallocate_nz.F90 index 70b3705c..66c583e1 100644 --- a/cuda/impl/psb_d_elg_reallocate_nz.F90 +++ b/cuda/impl/psb_d_cuda_elg_reallocate_nz.F90 @@ -30,22 +30,22 @@ ! -subroutine psb_d_elg_reallocate_nz(nz,a) +subroutine psb_d_cuda_elg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_reallocate_nz + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_reallocate_nz #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='d_elg_reallocate_nz' + character(len=20) :: name='d_cuda_elg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -76,4 +76,4 @@ subroutine psb_d_elg_reallocate_nz(nz,a) return -end subroutine psb_d_elg_reallocate_nz +end subroutine psb_d_cuda_elg_reallocate_nz diff --git a/cuda/impl/psb_d_elg_scal.F90 b/cuda/impl/psb_d_cuda_elg_scal.F90 similarity index 91% rename from cuda/impl/psb_d_elg_scal.F90 rename to cuda/impl/psb_d_cuda_elg_scal.F90 index 53ab82d7..7aa21c93 100644 --- a/cuda/impl/psb_d_elg_scal.F90 +++ b/cuda/impl/psb_d_cuda_elg_scal.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_elg_scal(d,a,info,side) +subroutine psb_d_cuda_elg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_scal + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_scal #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -75,4 +75,4 @@ subroutine psb_d_elg_scal(d,a,info,side) return -end subroutine psb_d_elg_scal +end subroutine psb_d_cuda_elg_scal diff --git a/cuda/impl/psb_d_elg_scals.F90 b/cuda/impl/psb_d_cuda_elg_scals.F90 similarity index 90% rename from cuda/impl/psb_d_elg_scals.F90 rename to cuda/impl/psb_d_cuda_elg_scals.F90 index f85780ce..1950b366 100644 --- a/cuda/impl/psb_d_elg_scals.F90 +++ b/cuda/impl/psb_d_cuda_elg_scals.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_elg_scals(d,a,info) +subroutine psb_d_cuda_elg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_scals + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_scals #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -70,4 +70,4 @@ subroutine psb_d_elg_scals(d,a,info) return -end subroutine psb_d_elg_scals +end subroutine psb_d_cuda_elg_scals diff --git a/cuda/impl/psb_d_elg_to_gpu.F90 b/cuda/impl/psb_d_cuda_elg_to_gpu.F90 similarity index 93% rename from cuda/impl/psb_d_elg_to_gpu.F90 rename to cuda/impl/psb_d_cuda_elg_to_gpu.F90 index 28e61606..b589ec2d 100644 --- a/cuda/impl/psb_d_elg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_elg_to_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_elg_to_gpu(a,info,nzrm) +subroutine psb_d_cuda_elg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_to_gpu + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_to_gpu #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -90,4 +90,4 @@ subroutine psb_d_elg_to_gpu(a,info,nzrm) call a%set_sync() #endif -end subroutine psb_d_elg_to_gpu +end subroutine psb_d_cuda_elg_to_gpu diff --git a/cuda/impl/psb_s_elg_trim.f90 b/cuda/impl/psb_d_cuda_elg_trim.f90 similarity index 92% rename from cuda/impl/psb_s_elg_trim.f90 rename to cuda/impl/psb_d_cuda_elg_trim.f90 index f3bd3b2f..be573c8c 100644 --- a/cuda/impl/psb_s_elg_trim.f90 +++ b/cuda/impl/psb_d_cuda_elg_trim.f90 @@ -30,12 +30,12 @@ ! -subroutine psb_s_elg_trim(a) +subroutine psb_d_cuda_elg_trim(a) use psb_base_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_trim + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_trim implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld character(len=20) :: name='trim' logical, parameter :: debug=.false. @@ -59,4 +59,4 @@ subroutine psb_s_elg_trim(a) return -end subroutine psb_s_elg_trim +end subroutine psb_d_cuda_elg_trim diff --git a/cuda/impl/psb_d_elg_vect_mv.F90 b/cuda/impl/psb_d_cuda_elg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_d_elg_vect_mv.F90 rename to cuda/impl/psb_d_cuda_elg_vect_mv.F90 index e46f84da..1be57d22 100644 --- a/cuda/impl/psb_d_elg_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_elg_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_elg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_elg_vect_mv + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_vect_mv #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_d_elg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_elg_vect_mv' + character(len=20) :: name='d_cuda_elg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_d_elg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (a%is_host()) call a%sync() if (xx%is_host()) call xx%sync() if (beta /= dzero) then @@ -128,4 +128,4 @@ subroutine psb_d_elg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_elg_vect_mv +end subroutine psb_d_cuda_elg_vect_mv diff --git a/cuda/impl/psb_d_hdiag_csmv.F90 b/cuda/impl/psb_d_cuda_hdiag_csmv.F90 similarity index 92% rename from cuda/impl/psb_d_hdiag_csmv.F90 rename to cuda/impl/psb_d_cuda_hdiag_csmv.F90 index 6f6bcedf..4bcd6e7a 100644 --- a/cuda/impl/psb_d_hdiag_csmv.F90 +++ b/cuda/impl/psb_d_cuda_hdiag_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_hdiag_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_hdiag_csmv + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_csmv #else - use psb_d_hdiag_mat_mod + use psb_d_cuda_hdiag_mat_mod #endif implicit none - class(psb_d_hdiag_sparse_mat), intent(in) :: a + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_d_hdiag_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='d_hdiag_csmv' + character(len=20) :: name='d_cuda_hdiag_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -133,4 +133,4 @@ subroutine psb_d_hdiag_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hdiag_csmv +end subroutine psb_d_cuda_hdiag_csmv diff --git a/cuda/impl/psb_d_hdiag_mold.F90 b/cuda/impl/psb_d_cuda_hdiag_mold.F90 similarity index 88% rename from cuda/impl/psb_d_hdiag_mold.F90 rename to cuda/impl/psb_d_cuda_hdiag_mold.F90 index b6c254e9..c5028c07 100644 --- a/cuda/impl/psb_d_hdiag_mold.F90 +++ b/cuda/impl/psb_d_cuda_hdiag_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_d_hdiag_mold(a,b,info) +subroutine psb_d_cuda_hdiag_mold(a,b,info) use psb_base_mod - use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_hdiag_mold + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_mold implicit none - class(psb_d_hdiag_sparse_mat), intent(in) :: a + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_d_hdiag_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_d_hdiag_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_d_cuda_hdiag_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_d_hdiag_mold(a,b,info) return -end subroutine psb_d_hdiag_mold +end subroutine psb_d_cuda_hdiag_mold diff --git a/cuda/impl/psb_s_hdiag_to_gpu.F90 b/cuda/impl/psb_d_cuda_hdiag_to_gpu.F90 similarity index 92% rename from cuda/impl/psb_s_hdiag_to_gpu.F90 rename to cuda/impl/psb_d_cuda_hdiag_to_gpu.F90 index ade1c080..ca79b9fa 100644 --- a/cuda/impl/psb_s_hdiag_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_hdiag_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_hdiag_to_gpu(a,info) +subroutine psb_d_cuda_hdiag_to_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_hdiag_to_gpu + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_to_gpu #else - use psb_s_hdiag_mat_mod + use psb_d_cuda_hdiag_mat_mod #endif use iso_c_binding implicit none - class(psb_s_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight #ifdef HAVE_SPGPU @@ -83,4 +83,4 @@ subroutine psb_s_hdiag_to_gpu(a,info) #endif -end subroutine psb_s_hdiag_to_gpu +end subroutine psb_d_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_d_hdiag_vect_mv.F90 b/cuda/impl/psb_d_cuda_hdiag_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_d_hdiag_vect_mv.F90 rename to cuda/impl/psb_d_cuda_hdiag_vect_mv.F90 index db7ec9c6..74233f90 100644 --- a/cuda/impl/psb_d_hdiag_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_hdiag_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_hdiag_vect_mv + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_vect_mv #else - use psb_d_hdiag_mat_mod + use psb_d_cuda_hdiag_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_hdiag_sparse_mat), intent(in) :: a + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_d_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_hdiag_vect_mv' + character(len=20) :: name='d_cuda_hdiag_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -82,9 +82,9 @@ subroutine psb_d_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,4 +123,4 @@ subroutine psb_d_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hdiag_vect_mv +end subroutine psb_d_cuda_hdiag_vect_mv diff --git a/cuda/impl/psb_d_hlg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_hlg_allocate_mnnz.F90 similarity index 90% rename from cuda/impl/psb_d_hlg_allocate_mnnz.F90 rename to cuda/impl/psb_d_cuda_hlg_allocate_mnnz.F90 index 6f327e81..3382327f 100644 --- a/cuda/impl/psb_d_hlg_allocate_mnnz.F90 +++ b/cuda/impl/psb_d_cuda_hlg_allocate_mnnz.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_hlg_allocate_mnnz(m,n,a,nz) +subroutine psb_d_cuda_hlg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_allocate_mnnz + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_allocate_mnnz #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -68,4 +68,4 @@ subroutine psb_d_hlg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_d_hlg_allocate_mnnz +end subroutine psb_d_cuda_hlg_allocate_mnnz diff --git a/cuda/impl/psb_d_hlg_csmm.F90 b/cuda/impl/psb_d_cuda_hlg_csmm.F90 similarity index 93% rename from cuda/impl/psb_d_hlg_csmm.F90 rename to cuda/impl/psb_d_cuda_hlg_csmm.F90 index 120f3e06..a223aace 100644 --- a/cuda/impl/psb_d_hlg_csmm.F90 +++ b/cuda/impl/psb_d_cuda_hlg_csmm.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_hlg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_csmm + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_csmm #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_d_hlg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_hlg_csmm' + character(len=20) :: name='d_cuda_hlg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -129,4 +129,4 @@ subroutine psb_d_hlg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hlg_csmm +end subroutine psb_d_cuda_hlg_csmm diff --git a/cuda/impl/psb_d_hlg_csmv.F90 b/cuda/impl/psb_d_cuda_hlg_csmv.F90 similarity index 93% rename from cuda/impl/psb_d_hlg_csmv.F90 rename to cuda/impl/psb_d_cuda_hlg_csmv.F90 index 4584826d..04779296 100644 --- a/cuda/impl/psb_d_hlg_csmv.F90 +++ b/cuda/impl/psb_d_cuda_hlg_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_hlg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_csmv + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_csmv #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_d_hlg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='d_hlg_csmv' + character(len=20) :: name='d_cuda_hlg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -132,4 +132,4 @@ subroutine psb_d_hlg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hlg_csmv +end subroutine psb_d_cuda_hlg_csmv diff --git a/cuda/impl/psb_d_hlg_from_gpu.F90 b/cuda/impl/psb_d_cuda_hlg_from_gpu.F90 similarity index 92% rename from cuda/impl/psb_d_hlg_from_gpu.F90 rename to cuda/impl/psb_d_cuda_hlg_from_gpu.F90 index eec714f4..7c1a2de8 100644 --- a/cuda/impl/psb_d_hlg_from_gpu.F90 +++ b/cuda/impl/psb_d_cuda_hlg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_hlg_from_gpu(a,info) +subroutine psb_d_cuda_hlg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_from_gpu + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_from_gpu #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr @@ -73,4 +73,4 @@ subroutine psb_d_hlg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_d_hlg_from_gpu +end subroutine psb_d_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_d_hlg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_hlg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_d_hlg_inner_vect_sv.F90 rename to cuda/impl/psb_d_cuda_hlg_inner_vect_sv.F90 index 0ad867a3..c6bd68b5 100644 --- a/cuda/impl/psb_d_hlg_inner_vect_sv.F90 +++ b/cuda/impl/psb_d_cuda_hlg_inner_vect_sv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_inner_vect_sv + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_inner_vect_sv #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -78,4 +78,4 @@ subroutine psb_d_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hlg_inner_vect_sv +end subroutine psb_d_cuda_hlg_inner_vect_sv diff --git a/cuda/impl/psb_d_hlg_mold.F90 b/cuda/impl/psb_d_cuda_hlg_mold.F90 similarity index 89% rename from cuda/impl/psb_d_hlg_mold.F90 rename to cuda/impl/psb_d_cuda_hlg_mold.F90 index 3ce9f33a..dddce134 100644 --- a/cuda/impl/psb_d_hlg_mold.F90 +++ b/cuda/impl/psb_d_cuda_hlg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_d_hlg_mold(a,b,info) +subroutine psb_d_cuda_hlg_mold(a,b,info) use psb_base_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_mold + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_mold implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer, intent(out) :: info Integer :: err_act @@ -49,7 +49,7 @@ subroutine psb_d_hlg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_d_hlg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_d_cuda_hlg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -61,4 +61,4 @@ subroutine psb_d_hlg_mold(a,b,info) 9999 call psb_error_handler(err_act) return -end subroutine psb_d_hlg_mold +end subroutine psb_d_cuda_hlg_mold diff --git a/cuda/impl/psb_d_hlg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_hlg_reallocate_nz.F90 similarity index 87% rename from cuda/impl/psb_d_hlg_reallocate_nz.F90 rename to cuda/impl/psb_d_cuda_hlg_reallocate_nz.F90 index c9fa4771..aa2954d6 100644 --- a/cuda/impl/psb_d_hlg_reallocate_nz.F90 +++ b/cuda/impl/psb_d_cuda_hlg_reallocate_nz.F90 @@ -30,22 +30,22 @@ ! -subroutine psb_d_hlg_reallocate_nz(nz,a) +subroutine psb_d_cuda_hlg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_reallocate_nz + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_reallocate_nz #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif use iso_c_binding implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='d_hlg_reallocate_nz' + character(len=20) :: name='d_cuda_hlg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -64,4 +64,4 @@ subroutine psb_d_hlg_reallocate_nz(nz,a) return -end subroutine psb_d_hlg_reallocate_nz +end subroutine psb_d_cuda_hlg_reallocate_nz diff --git a/cuda/impl/psb_d_hlg_scal.F90 b/cuda/impl/psb_d_cuda_hlg_scal.F90 similarity index 91% rename from cuda/impl/psb_d_hlg_scal.F90 rename to cuda/impl/psb_d_cuda_hlg_scal.F90 index b487303d..3cbfada0 100644 --- a/cuda/impl/psb_d_hlg_scal.F90 +++ b/cuda/impl/psb_d_cuda_hlg_scal.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_hlg_scal(d,a,info,side) +subroutine psb_d_cuda_hlg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_scal + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_scal #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -72,4 +72,4 @@ subroutine psb_d_hlg_scal(d,a,info,side) return -end subroutine psb_d_hlg_scal +end subroutine psb_d_cuda_hlg_scal diff --git a/cuda/impl/psb_d_hlg_scals.F90 b/cuda/impl/psb_d_cuda_hlg_scals.F90 similarity index 91% rename from cuda/impl/psb_d_hlg_scals.F90 rename to cuda/impl/psb_d_cuda_hlg_scals.F90 index e3f676e9..1ddf764f 100644 --- a/cuda/impl/psb_d_hlg_scals.F90 +++ b/cuda/impl/psb_d_cuda_hlg_scals.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_hlg_scals(d,a,info) +subroutine psb_d_cuda_hlg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_scals + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_scals #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif use iso_c_binding implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -70,4 +70,4 @@ subroutine psb_d_hlg_scals(d,a,info) 9999 call psb_error_handler(err_act) return -end subroutine psb_d_hlg_scals +end subroutine psb_d_cuda_hlg_scals diff --git a/cuda/impl/psb_d_hlg_to_gpu.F90 b/cuda/impl/psb_d_cuda_hlg_to_gpu.F90 similarity index 91% rename from cuda/impl/psb_d_hlg_to_gpu.F90 rename to cuda/impl/psb_d_cuda_hlg_to_gpu.F90 index 5e3b3558..82737315 100644 --- a/cuda/impl/psb_d_hlg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_hlg_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_hlg_to_gpu(a,info,nzrm) +subroutine psb_d_cuda_hlg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_to_gpu + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_to_gpu #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif use iso_c_binding implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -65,4 +65,4 @@ subroutine psb_d_hlg_to_gpu(a,info,nzrm) ! if (info /= 0) goto 9999 #endif -end subroutine psb_d_hlg_to_gpu +end subroutine psb_d_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_d_hlg_vect_mv.F90 b/cuda/impl/psb_d_cuda_hlg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_d_hlg_vect_mv.F90 rename to cuda/impl/psb_d_cuda_hlg_vect_mv.F90 index cd5e95e5..9d0741c4 100644 --- a/cuda/impl/psb_d_hlg_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_hlg_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_hlg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_hlg_vect_mv + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_vect_mv #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_d_hlg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_hlg_vect_mv' + character(len=20) :: name='d_cuda_hlg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_d_hlg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -126,4 +126,4 @@ subroutine psb_d_hlg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hlg_vect_mv +end subroutine psb_d_cuda_hlg_vect_mv diff --git a/cuda/impl/psb_d_hybg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_hybg_allocate_mnnz.F90 similarity index 90% rename from cuda/impl/psb_d_hybg_allocate_mnnz.F90 rename to cuda/impl/psb_d_cuda_hybg_allocate_mnnz.F90 index 1565a719..b0bff6c0 100644 --- a/cuda/impl/psb_d_hybg_allocate_mnnz.F90 +++ b/cuda/impl/psb_d_cuda_hybg_allocate_mnnz.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_allocate_mnnz(m,n,a,nz) +subroutine psb_d_cuda_hybg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_allocate_mnnz + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_allocate_mnnz #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -65,5 +65,5 @@ subroutine psb_d_hybg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_d_hybg_allocate_mnnz +end subroutine psb_d_cuda_hybg_allocate_mnnz #endif diff --git a/cuda/impl/psb_d_hybg_csmm.F90 b/cuda/impl/psb_d_cuda_hybg_csmm.F90 similarity index 93% rename from cuda/impl/psb_d_hybg_csmm.F90 rename to cuda/impl/psb_d_cuda_hybg_csmm.F90 index abc0e0c2..3fcfd17f 100644 --- a/cuda/impl/psb_d_hybg_csmm.F90 +++ b/cuda/impl/psb_d_cuda_hybg_csmm.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_csmm + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_csmm #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_d_hybg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_hybg_csmm' + character(len=20) :: name='d_cuda_hybg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -131,5 +131,5 @@ subroutine psb_d_hybg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hybg_csmm +end subroutine psb_d_cuda_hybg_csmm #endif diff --git a/cuda/impl/psb_d_hybg_csmv.F90 b/cuda/impl/psb_d_cuda_hybg_csmv.F90 similarity index 93% rename from cuda/impl/psb_d_hybg_csmv.F90 rename to cuda/impl/psb_d_cuda_hybg_csmv.F90 index c636ec8a..5e06f633 100644 --- a/cuda/impl/psb_d_hybg_csmv.F90 +++ b/cuda/impl/psb_d_cuda_hybg_csmv.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_csmv + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_csmv #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -54,7 +54,7 @@ subroutine psb_d_hybg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_hybg_csmv' + character(len=20) :: name='d_cuda_hybg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -134,5 +134,5 @@ subroutine psb_d_hybg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hybg_csmv +end subroutine psb_d_cuda_hybg_csmv #endif diff --git a/cuda/impl/psb_d_hybg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_hybg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_d_hybg_inner_vect_sv.F90 rename to cuda/impl/psb_d_cuda_hybg_inner_vect_sv.F90 index 82b536ca..a30c1abe 100644 --- a/cuda/impl/psb_d_hybg_inner_vect_sv.F90 +++ b/cuda/impl/psb_d_cuda_hybg_inner_vect_sv.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_inner_vect_sv + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_inner_vect_sv #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_hybg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -52,7 +52,7 @@ subroutine psb_d_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ integer(psb_ipk_) :: err_act - character(len=20) :: name='d_hybg_inner_vect_sv' + character(len=20) :: name='d_cuda_hybg_inner_vect_sv' logical, parameter :: debug=.false. call psb_get_erraction(err_act) @@ -84,9 +84,9 @@ subroutine psb_d_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%set_host() else select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -134,5 +134,5 @@ subroutine psb_d_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hybg_inner_vect_sv +end subroutine psb_d_cuda_hybg_inner_vect_sv #endif diff --git a/cuda/impl/psb_d_hybg_mold.F90 b/cuda/impl/psb_d_cuda_hybg_mold.F90 similarity index 89% rename from cuda/impl/psb_d_hybg_mold.F90 rename to cuda/impl/psb_d_cuda_hybg_mold.F90 index 27390db0..fa239696 100644 --- a/cuda/impl/psb_d_hybg_mold.F90 +++ b/cuda/impl/psb_d_cuda_hybg_mold.F90 @@ -30,12 +30,12 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_mold(a,b,info) +subroutine psb_d_cuda_hybg_mold(a,b,info) use psb_base_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_mold + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_mold implicit none - class(psb_d_hybg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_d_hybg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_d_hybg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_d_cuda_hybg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,5 +62,5 @@ subroutine psb_d_hybg_mold(a,b,info) return -end subroutine psb_d_hybg_mold +end subroutine psb_d_cuda_hybg_mold #endif diff --git a/cuda/impl/psb_d_hybg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_hybg_reallocate_nz.F90 similarity index 88% rename from cuda/impl/psb_d_hybg_reallocate_nz.F90 rename to cuda/impl/psb_d_cuda_hybg_reallocate_nz.F90 index 537101e9..cadce8d3 100644 --- a/cuda/impl/psb_d_hybg_reallocate_nz.F90 +++ b/cuda/impl/psb_d_cuda_hybg_reallocate_nz.F90 @@ -30,21 +30,21 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_reallocate_nz(nz,a) +subroutine psb_d_cuda_hybg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_reallocate_nz + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_reallocate_nz #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='d_hybg_reallocate_nz' + character(len=20) :: name='d_cuda_hybg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -67,5 +67,5 @@ subroutine psb_d_hybg_reallocate_nz(nz,a) return -end subroutine psb_d_hybg_reallocate_nz +end subroutine psb_d_cuda_hybg_reallocate_nz #endif diff --git a/cuda/impl/psb_d_hybg_scal.F90 b/cuda/impl/psb_d_cuda_hybg_scal.F90 similarity index 91% rename from cuda/impl/psb_d_hybg_scal.F90 rename to cuda/impl/psb_d_cuda_hybg_scal.F90 index 32ef2da0..126e25cb 100644 --- a/cuda/impl/psb_d_hybg_scal.F90 +++ b/cuda/impl/psb_d_cuda_hybg_scal.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_scal(d,a,info,side) +subroutine psb_d_cuda_hybg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_scal + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_scal #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -72,5 +72,5 @@ subroutine psb_d_hybg_scal(d,a,info,side) return -end subroutine psb_d_hybg_scal +end subroutine psb_d_cuda_hybg_scal #endif diff --git a/cuda/impl/psb_d_hybg_scals.F90 b/cuda/impl/psb_d_cuda_hybg_scals.F90 similarity index 91% rename from cuda/impl/psb_d_hybg_scals.F90 rename to cuda/impl/psb_d_cuda_hybg_scals.F90 index 8c38328a..88b7e05c 100644 --- a/cuda/impl/psb_d_hybg_scals.F90 +++ b/cuda/impl/psb_d_cuda_hybg_scals.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_scals(d,a,info) +subroutine psb_d_cuda_hybg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_scals + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_scals #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -72,5 +72,5 @@ subroutine psb_d_hybg_scals(d,a,info) return -end subroutine psb_d_hybg_scals +end subroutine psb_d_cuda_hybg_scals #endif diff --git a/cuda/impl/psb_d_hybg_to_gpu.F90 b/cuda/impl/psb_d_cuda_hybg_to_gpu.F90 similarity index 96% rename from cuda/impl/psb_d_hybg_to_gpu.F90 rename to cuda/impl/psb_d_cuda_hybg_to_gpu.F90 index 33bf55b8..d94a75c7 100644 --- a/cuda/impl/psb_d_hybg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_hybg_to_gpu.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_to_gpu(a,info,nzrm) +subroutine psb_d_cuda_hybg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_to_gpu + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_to_gpu #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -150,5 +150,5 @@ subroutine psb_d_hybg_to_gpu(a,info,nzrm) end if #endif -end subroutine psb_d_hybg_to_gpu +end subroutine psb_d_cuda_hybg_to_gpu #endif diff --git a/cuda/impl/psb_d_hybg_vect_mv.F90 b/cuda/impl/psb_d_cuda_hybg_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_d_hybg_vect_mv.F90 rename to cuda/impl/psb_d_cuda_hybg_vect_mv.F90 index d9653a48..9d0aedb7 100644 --- a/cuda/impl/psb_d_hybg_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_hybg_vect_mv.F90 @@ -30,20 +30,20 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_hybg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_d_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_hybg_vect_mv + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_vect_mv #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif - use psb_d_gpu_vect_mod + use psb_d_cuda_vect_mod implicit none - class(psb_d_hybg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y @@ -53,7 +53,7 @@ subroutine psb_d_hybg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='d_hybg_vect_mv' + character(len=20) :: name='d_cuda_hybg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_d_hybg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type(yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,5 +123,5 @@ subroutine psb_d_hybg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_d_hybg_vect_mv +end subroutine psb_d_cuda_hybg_vect_mv #endif diff --git a/cuda/impl/psb_d_mv_csrg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_csrg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_d_mv_csrg_from_coo.F90 rename to cuda/impl/psb_d_cuda_mv_csrg_from_coo.F90 index 8c59e6d1..18e7c636 100644 --- a/cuda/impl/psb_d_mv_csrg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_csrg_from_coo.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_mv_csrg_from_coo(a,b,info) +subroutine psb_d_cuda_mv_csrg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_mv_csrg_from_coo + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_mv_csrg_from_coo #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -62,4 +62,4 @@ subroutine psb_d_mv_csrg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_mv_csrg_from_coo +end subroutine psb_d_cuda_mv_csrg_from_coo diff --git a/cuda/impl/psb_d_mv_csrg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_csrg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_d_mv_csrg_from_fmt.F90 rename to cuda/impl/psb_d_cuda_mv_csrg_from_fmt.F90 index 30c133e4..837c78c1 100644 --- a/cuda/impl/psb_d_mv_csrg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_mv_csrg_from_fmt.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_mv_csrg_from_fmt(a,b,info) +subroutine psb_d_cuda_mv_csrg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_mv_csrg_from_fmt + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_mv_csrg_from_fmt #else - use psb_d_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer, intent(out) :: info @@ -60,4 +60,4 @@ subroutine psb_d_mv_csrg_from_fmt(a,b,info) #endif end select -end subroutine psb_d_mv_csrg_from_fmt +end subroutine psb_d_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_d_mv_diag_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_diag_from_coo.F90 similarity index 89% rename from cuda/impl/psb_d_mv_diag_from_coo.F90 rename to cuda/impl/psb_d_cuda_mv_diag_from_coo.F90 index f37a5523..8d33c459 100644 --- a/cuda/impl/psb_d_mv_diag_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_diag_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_d_mv_diag_from_coo(a,b,info) +subroutine psb_d_cuda_mv_diag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_d_diag_mat_mod, psb_protect_name => psb_d_mv_diag_from_coo + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_mv_diag_from_coo #else - use psb_d_diag_mat_mod + use psb_d_cuda_diag_mat_mod #endif implicit none - class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -66,4 +66,4 @@ subroutine psb_d_mv_diag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_mv_diag_from_coo +end subroutine psb_d_cuda_mv_diag_from_coo diff --git a/cuda/impl/psb_d_mv_elg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_elg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_d_mv_elg_from_coo.F90 rename to cuda/impl/psb_d_cuda_mv_elg_from_coo.F90 index 73216cfa..ad9e7f10 100644 --- a/cuda/impl/psb_d_mv_elg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_elg_from_coo.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_mv_elg_from_coo(a,b,info) +subroutine psb_d_cuda_mv_elg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_mv_elg_from_coo + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_mv_elg_from_coo #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_d_mv_elg_from_coo(a,b,info) return -end subroutine psb_d_mv_elg_from_coo +end subroutine psb_d_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_d_mv_elg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_elg_from_fmt.F90 similarity index 92% rename from cuda/impl/psb_d_mv_elg_from_fmt.F90 rename to cuda/impl/psb_d_cuda_mv_elg_from_fmt.F90 index 5038c50e..9cdf790e 100644 --- a/cuda/impl/psb_d_mv_elg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_mv_elg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_mv_elg_from_fmt(a,b,info) +subroutine psb_d_cuda_mv_elg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_elg_mat_mod, psb_protect_name => psb_d_mv_elg_from_fmt + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_mv_elg_from_fmt #else - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod #endif implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -96,4 +96,4 @@ subroutine psb_d_mv_elg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_d_mv_elg_from_fmt +end subroutine psb_d_cuda_mv_elg_from_fmt diff --git a/cuda/impl/psb_d_mv_hdiag_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_hdiag_from_coo.F90 similarity index 87% rename from cuda/impl/psb_d_mv_hdiag_from_coo.F90 rename to cuda/impl/psb_d_cuda_mv_hdiag_from_coo.F90 index ee0e983f..aff5e0c0 100644 --- a/cuda/impl/psb_d_mv_hdiag_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_hdiag_from_coo.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_d_mv_hdiag_from_coo(a,b,info) +subroutine psb_d_cuda_mv_hdiag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_mv_hdiag_from_coo - use psb_gpu_env_mod + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_mv_hdiag_from_coo + use psb_cuda_env_mod #else - use psb_d_hdiag_mat_mod + use psb_d_cuda_hdiag_mat_mod #endif implicit none - class(psb_d_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -55,7 +55,7 @@ subroutine psb_d_mv_hdiag_from_coo(a,b,info) #ifdef HAVE_SPGPU - a%hacksize = psb_gpu_WarpSize() + a%hacksize = psb_cuda_WarpSize() #endif call a%psb_d_hdia_sparse_mat%mv_from_coo(b,info) @@ -71,4 +71,4 @@ subroutine psb_d_mv_hdiag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_mv_hdiag_from_coo +end subroutine psb_d_cuda_mv_hdiag_from_coo diff --git a/cuda/impl/psb_d_mv_hlg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_hlg_from_coo.F90 similarity index 88% rename from cuda/impl/psb_d_mv_hlg_from_coo.F90 rename to cuda/impl/psb_d_cuda_mv_hlg_from_coo.F90 index fe030415..a2b358c4 100644 --- a/cuda/impl/psb_d_mv_hlg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_hlg_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_d_mv_hlg_from_coo(a,b,info) +subroutine psb_d_cuda_mv_hlg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_gpu_env_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_mv_hlg_from_coo + use psb_cuda_env_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_mv_hlg_from_coo #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_d_mv_hlg_from_coo(a,b,info) return -end subroutine psb_d_mv_hlg_from_coo +end subroutine psb_d_cuda_mv_hlg_from_coo diff --git a/cuda/impl/psb_d_mv_hlg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_hlg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_d_mv_hlg_from_fmt.F90 rename to cuda/impl/psb_d_cuda_mv_hlg_from_fmt.F90 index e538b017..130d88c2 100644 --- a/cuda/impl/psb_d_mv_hlg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_mv_hlg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_mv_hlg_from_fmt(a,b,info) +subroutine psb_d_cuda_mv_hlg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_d_hlg_mat_mod, psb_protect_name => psb_d_mv_hlg_from_fmt + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_mv_hlg_from_fmt #else - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod #endif implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -59,4 +59,4 @@ subroutine psb_d_mv_hlg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_d_mv_hlg_from_fmt +end subroutine psb_d_cuda_mv_hlg_from_fmt diff --git a/cuda/impl/psb_d_mv_hybg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_hybg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_d_mv_hybg_from_coo.F90 rename to cuda/impl/psb_d_cuda_mv_hybg_from_coo.F90 index 4fe76c72..8b0ad032 100644 --- a/cuda/impl/psb_d_mv_hybg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_hybg_from_coo.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_mv_hybg_from_coo(a,b,info) +subroutine psb_d_cuda_mv_hybg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_mv_hybg_from_coo + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_mv_hybg_from_coo #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -61,5 +61,5 @@ subroutine psb_d_mv_hybg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_d_mv_hybg_from_coo +end subroutine psb_d_cuda_mv_hybg_from_coo #endif diff --git a/cuda/impl/psb_d_mv_hybg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_hybg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_d_mv_hybg_from_fmt.F90 rename to cuda/impl/psb_d_cuda_mv_hybg_from_fmt.F90 index 454533d0..71badfc5 100644 --- a/cuda/impl/psb_d_mv_hybg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_mv_hybg_from_fmt.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_d_mv_hybg_from_fmt(a,b,info) +subroutine psb_d_cuda_mv_hybg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_d_hybg_mat_mod, psb_protect_name => psb_d_mv_hybg_from_fmt + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_mv_hybg_from_fmt #else - use psb_d_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod #endif implicit none - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,5 +58,5 @@ subroutine psb_d_mv_hybg_from_fmt(a,b,info) call a%to_gpu(info) #endif end select -end subroutine psb_d_mv_hybg_from_fmt +end subroutine psb_d_cuda_mv_hybg_from_fmt #endif diff --git a/cuda/impl/psb_s_cp_csrg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_csrg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_s_cp_csrg_from_coo.F90 rename to cuda/impl/psb_s_cuda_cp_csrg_from_coo.F90 index 4a714d41..b7bebc95 100644 --- a/cuda/impl/psb_s_cp_csrg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_csrg_from_coo.F90 @@ -29,18 +29,18 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_s_cp_csrg_from_coo(a,b,info) +subroutine psb_s_cuda_cp_csrg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_cp_csrg_from_coo + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_cp_csrg_from_coo #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -59,4 +59,4 @@ subroutine psb_s_cp_csrg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_cp_csrg_from_coo +end subroutine psb_s_cuda_cp_csrg_from_coo diff --git a/cuda/impl/psb_s_cp_csrg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_s_cp_csrg_from_fmt.F90 rename to cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 index 962a8c9d..7ab9283d 100644 --- a/cuda/impl/psb_s_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 @@ -29,19 +29,19 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_s_cp_csrg_from_fmt(a,b,info) +subroutine psb_s_cuda_cp_csrg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_cp_csrg_from_fmt + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_cp_csrg_from_fmt #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif !use iso_c_binding implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_s_cp_csrg_from_fmt(a,b,info) #endif end select -end subroutine psb_s_cp_csrg_from_fmt +end subroutine psb_s_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_s_cp_diag_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_diag_from_coo.F90 similarity index 89% rename from cuda/impl/psb_s_cp_diag_from_coo.F90 rename to cuda/impl/psb_s_cuda_cp_diag_from_coo.F90 index 6b105ef2..9f038a09 100644 --- a/cuda/impl/psb_s_cp_diag_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_diag_from_coo.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_cp_diag_from_coo(a,b,info) +subroutine psb_s_cuda_cp_diag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_s_diag_mat_mod, psb_protect_name => psb_s_cp_diag_from_coo + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_cp_diag_from_coo #else - use psb_s_diag_mat_mod + use psb_s_cuda_diag_mat_mod #endif implicit none - class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -61,4 +61,4 @@ subroutine psb_s_cp_diag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_cp_diag_from_coo +end subroutine psb_s_cuda_cp_diag_from_coo diff --git a/cuda/impl/psb_s_cp_elg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_elg_from_coo.F90 similarity index 94% rename from cuda/impl/psb_s_cp_elg_from_coo.F90 rename to cuda/impl/psb_s_cuda_cp_elg_from_coo.F90 index af8c7d28..f6e1ba42 100644 --- a/cuda/impl/psb_s_cp_elg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_elg_from_coo.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_s_cp_elg_from_coo(a,b,info) +subroutine psb_s_cuda_cp_elg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_cp_elg_from_coo + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_cp_elg_from_coo use psi_ext_util_mod - use psb_gpu_env_mod + use psb_cuda_env_mod #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -58,7 +58,7 @@ subroutine psb_s_cp_elg_from_coo(a,b,info) info = psb_success_ #ifdef HAVE_SPGPU - hacksize = max(1,psb_gpu_WarpSize()) + hacksize = max(1,psb_cuda_WarpSize()) #else hacksize = 1 #endif @@ -181,4 +181,4 @@ contains end subroutine psi_s_count_ell_from_coo -end subroutine psb_s_cp_elg_from_coo +end subroutine psb_s_cuda_cp_elg_from_coo diff --git a/cuda/impl/psb_s_cp_elg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_elg_from_fmt.F90 similarity index 93% rename from cuda/impl/psb_s_cp_elg_from_fmt.F90 rename to cuda/impl/psb_s_cuda_cp_elg_from_fmt.F90 index c3d973e1..0c811426 100644 --- a/cuda/impl/psb_s_cp_elg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_cp_elg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_cp_elg_from_fmt(a,b,info) +subroutine psb_s_cuda_cp_elg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_cp_elg_from_fmt + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_cp_elg_from_fmt #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -98,4 +98,4 @@ subroutine psb_s_cp_elg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_s_cp_elg_from_fmt +end subroutine psb_s_cuda_cp_elg_from_fmt diff --git a/cuda/impl/psb_s_cp_hdiag_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_hdiag_from_coo.F90 similarity index 87% rename from cuda/impl/psb_s_cp_hdiag_from_coo.F90 rename to cuda/impl/psb_s_cuda_cp_hdiag_from_coo.F90 index 0509706d..07b56fa6 100644 --- a/cuda/impl/psb_s_cp_hdiag_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_hdiag_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_s_cp_hdiag_from_coo(a,b,info) +subroutine psb_s_cuda_cp_hdiag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_cp_hdiag_from_coo - use psb_gpu_env_mod + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_cp_hdiag_from_coo + use psb_cuda_env_mod #else - use psb_s_hdiag_mat_mod + use psb_s_cuda_hdiag_mat_mod #endif implicit none - class(psb_s_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -54,7 +54,7 @@ subroutine psb_s_cp_hdiag_from_coo(a,b,info) info = psb_success_ #ifdef HAVE_SPGPU - a%hacksize = psb_gpu_WarpSize() + a%hacksize = psb_cuda_WarpSize() #endif call a%psb_s_hdia_sparse_mat%cp_from_coo(b,info) @@ -70,4 +70,4 @@ subroutine psb_s_cp_hdiag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_cp_hdiag_from_coo +end subroutine psb_s_cuda_cp_hdiag_from_coo diff --git a/cuda/impl/psb_s_cp_hlg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_hlg_from_coo.F90 similarity index 95% rename from cuda/impl/psb_s_cp_hlg_from_coo.F90 rename to cuda/impl/psb_s_cuda_cp_hlg_from_coo.F90 index 5988c8dd..055fa046 100644 --- a/cuda/impl/psb_s_cp_hlg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_hlg_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_s_cp_hlg_from_coo(a,b,info) +subroutine psb_s_cuda_cp_hlg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_gpu_env_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_cp_hlg_from_coo + use psb_cuda_env_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_cp_hlg_from_coo #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -62,7 +62,7 @@ subroutine psb_s_cp_hlg_from_coo(a,b,info) debug_unit = psb_get_debug_unit() debug_level = psb_get_debug_level() #ifdef HAVE_SPGPU - hksz = max(1,psb_gpu_WarpSize()) + hksz = max(1,psb_cuda_WarpSize()) #else hksz = psi_get_hksz() #endif @@ -195,4 +195,4 @@ contains !!$ write(*,*) 'End of psi_comput_hckoff ',info end subroutine psi_compute_hckoff_from_coo -end subroutine psb_s_cp_hlg_from_coo +end subroutine psb_s_cuda_cp_hlg_from_coo diff --git a/cuda/impl/psb_s_cp_hlg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_hlg_from_fmt.F90 similarity index 90% rename from cuda/impl/psb_s_cp_hlg_from_fmt.F90 rename to cuda/impl/psb_s_cuda_cp_hlg_from_fmt.F90 index 41c20866..b49be761 100644 --- a/cuda/impl/psb_s_cp_hlg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_cp_hlg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_cp_hlg_from_fmt(a,b,info) +subroutine psb_s_cuda_cp_hlg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_cp_hlg_from_fmt + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_cp_hlg_from_fmt #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -65,4 +65,4 @@ subroutine psb_s_cp_hlg_from_fmt(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_cp_hlg_from_fmt +end subroutine psb_s_cuda_cp_hlg_from_fmt diff --git a/cuda/impl/psb_s_cp_hybg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_hybg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_s_cp_hybg_from_coo.F90 rename to cuda/impl/psb_s_cuda_cp_hybg_from_coo.F90 index 92dc4a68..ab135944 100644 --- a/cuda/impl/psb_s_cp_hybg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_hybg_from_coo.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_cp_hybg_from_coo(a,b,info) +subroutine psb_s_cuda_cp_hybg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_cp_hybg_from_coo + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_cp_hybg_from_coo #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -60,5 +60,5 @@ subroutine psb_s_cp_hybg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_cp_hybg_from_coo +end subroutine psb_s_cuda_cp_hybg_from_coo #endif diff --git a/cuda/impl/psb_s_cp_hybg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_hybg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_s_cp_hybg_from_fmt.F90 rename to cuda/impl/psb_s_cuda_cp_hybg_from_fmt.F90 index 53143776..62a54759 100644 --- a/cuda/impl/psb_s_cp_hybg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_cp_hybg_from_fmt.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_cp_hybg_from_fmt(a,b,info) +subroutine psb_s_cuda_cp_hybg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_cp_hybg_from_fmt + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_cp_hybg_from_fmt #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,5 +58,5 @@ subroutine psb_s_cp_hybg_from_fmt(a,b,info) #endif end select -end subroutine psb_s_cp_hybg_from_fmt +end subroutine psb_s_cuda_cp_hybg_from_fmt #endif diff --git a/cuda/impl/psb_s_csrg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_csrg_allocate_mnnz.F90 similarity index 89% rename from cuda/impl/psb_s_csrg_allocate_mnnz.F90 rename to cuda/impl/psb_s_cuda_csrg_allocate_mnnz.F90 index e93452d2..53ca8f12 100644 --- a/cuda/impl/psb_s_csrg_allocate_mnnz.F90 +++ b/cuda/impl/psb_s_cuda_csrg_allocate_mnnz.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_csrg_allocate_mnnz(m,n,a,nz) +subroutine psb_s_cuda_csrg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_allocate_mnnz + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_allocate_mnnz #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -65,4 +65,4 @@ subroutine psb_s_csrg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_s_csrg_allocate_mnnz +end subroutine psb_s_cuda_csrg_allocate_mnnz diff --git a/cuda/impl/psb_s_csrg_csmm.F90 b/cuda/impl/psb_s_cuda_csrg_csmm.F90 similarity index 94% rename from cuda/impl/psb_s_csrg_csmm.F90 rename to cuda/impl/psb_s_cuda_csrg_csmm.F90 index 55087053..c8ff4a9e 100644 --- a/cuda/impl/psb_s_csrg_csmm.F90 +++ b/cuda/impl/psb_s_cuda_csrg_csmm.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_csrg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_csmm + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_csmm #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) real(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -131,4 +131,4 @@ subroutine psb_s_csrg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_csrg_csmm +end subroutine psb_s_cuda_csrg_csmm diff --git a/cuda/impl/psb_s_csrg_csmv.F90 b/cuda/impl/psb_s_cuda_csrg_csmv.F90 similarity index 93% rename from cuda/impl/psb_s_csrg_csmv.F90 rename to cuda/impl/psb_s_cuda_csrg_csmv.F90 index 42520843..72658c28 100644 --- a/cuda/impl/psb_s_csrg_csmv.F90 +++ b/cuda/impl/psb_s_cuda_csrg_csmv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_csrg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_csmv + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_csmv #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -55,7 +55,7 @@ subroutine psb_s_csrg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_csrg_csmv' + character(len=20) :: name='s_cuda_csrg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -136,4 +136,4 @@ subroutine psb_s_csrg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_csrg_csmv +end subroutine psb_s_cuda_csrg_csmv diff --git a/cuda/impl/psb_z_csrg_from_gpu.F90 b/cuda/impl/psb_s_cuda_csrg_from_gpu.F90 similarity index 91% rename from cuda/impl/psb_z_csrg_from_gpu.F90 rename to cuda/impl/psb_s_cuda_csrg_from_gpu.F90 index f8a6951a..7811f746 100644 --- a/cuda/impl/psb_z_csrg_from_gpu.F90 +++ b/cuda/impl/psb_s_cuda_csrg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_csrg_from_gpu(a,info) +subroutine psb_s_cuda_csrg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_from_gpu + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_from_gpu #else - use psb_z_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: m, n, nz @@ -70,4 +70,4 @@ subroutine psb_z_csrg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_z_csrg_from_gpu +end subroutine psb_s_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_s_csrg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_csrg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_s_csrg_inner_vect_sv.F90 rename to cuda/impl/psb_s_cuda_csrg_inner_vect_sv.F90 index 133a6350..7f9965d8 100644 --- a/cuda/impl/psb_s_csrg_inner_vect_sv.F90 +++ b/cuda/impl/psb_s_cuda_csrg_inner_vect_sv.F90 @@ -29,19 +29,19 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_s_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_inner_vect_sv + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_inner_vect_sv #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -51,7 +51,7 @@ subroutine psb_s_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ integer(psb_ipk_) :: err_act - character(len=20) :: name='s_csrg_inner_vect_sv' + character(len=20) :: name='s_cuda_csrg_inner_vect_sv' logical, parameter :: debug=.false. call psb_get_erraction(err_act) @@ -83,9 +83,9 @@ subroutine psb_s_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%set_host() else select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -133,4 +133,4 @@ subroutine psb_s_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_csrg_inner_vect_sv +end subroutine psb_s_cuda_csrg_inner_vect_sv diff --git a/cuda/impl/psb_s_csrg_mold.F90 b/cuda/impl/psb_s_cuda_csrg_mold.F90 similarity index 88% rename from cuda/impl/psb_s_csrg_mold.F90 rename to cuda/impl/psb_s_cuda_csrg_mold.F90 index 6ac4cc3d..5e33850b 100644 --- a/cuda/impl/psb_s_csrg_mold.F90 +++ b/cuda/impl/psb_s_cuda_csrg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_s_csrg_mold(a,b,info) +subroutine psb_s_cuda_csrg_mold(a,b,info) use psb_base_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_mold + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_mold implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_s_csrg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_s_csrg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_s_cuda_csrg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_s_csrg_mold(a,b,info) return -end subroutine psb_s_csrg_mold +end subroutine psb_s_cuda_csrg_mold diff --git a/cuda/impl/psb_s_csrg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_csrg_reallocate_nz.F90 similarity index 87% rename from cuda/impl/psb_s_csrg_reallocate_nz.F90 rename to cuda/impl/psb_s_cuda_csrg_reallocate_nz.F90 index dd9a50d0..fed3b0e7 100644 --- a/cuda/impl/psb_s_csrg_reallocate_nz.F90 +++ b/cuda/impl/psb_s_cuda_csrg_reallocate_nz.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_s_csrg_reallocate_nz(nz,a) +subroutine psb_s_cuda_csrg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_reallocate_nz + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_reallocate_nz #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='s_csrg_reallocate_nz' + character(len=20) :: name='s_cuda_csrg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -67,4 +67,4 @@ subroutine psb_s_csrg_reallocate_nz(nz,a) return -end subroutine psb_s_csrg_reallocate_nz +end subroutine psb_s_cuda_csrg_reallocate_nz diff --git a/cuda/impl/psb_s_csrg_scal.F90 b/cuda/impl/psb_s_cuda_csrg_scal.F90 similarity index 90% rename from cuda/impl/psb_s_csrg_scal.F90 rename to cuda/impl/psb_s_cuda_csrg_scal.F90 index 5e0fbcf0..826ab2dd 100644 --- a/cuda/impl/psb_s_csrg_scal.F90 +++ b/cuda/impl/psb_s_cuda_csrg_scal.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_s_csrg_scal(d,a,info,side) +subroutine psb_s_cuda_csrg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_scal + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_scal #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -70,4 +70,4 @@ subroutine psb_s_csrg_scal(d,a,info,side) return -end subroutine psb_s_csrg_scal +end subroutine psb_s_cuda_csrg_scal diff --git a/cuda/impl/psb_s_csrg_scals.F90 b/cuda/impl/psb_s_cuda_csrg_scals.F90 similarity index 90% rename from cuda/impl/psb_s_csrg_scals.F90 rename to cuda/impl/psb_s_cuda_csrg_scals.F90 index 54b299a1..04f4c29a 100644 --- a/cuda/impl/psb_s_csrg_scals.F90 +++ b/cuda/impl/psb_s_cuda_csrg_scals.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_s_csrg_scals(d,a,info) +subroutine psb_s_cuda_csrg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_scals + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_scals #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -68,4 +68,4 @@ subroutine psb_s_csrg_scals(d,a,info) return -end subroutine psb_s_csrg_scals +end subroutine psb_s_cuda_csrg_scals diff --git a/cuda/impl/psb_s_csrg_to_gpu.F90 b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 similarity index 98% rename from cuda/impl/psb_s_csrg_to_gpu.F90 rename to cuda/impl/psb_s_cuda_csrg_to_gpu.F90 index f90ae4ea..eadca5df 100644 --- a/cuda/impl/psb_s_csrg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_s_csrg_to_gpu(a,info,nzrm) +subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_to_gpu + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_to_gpu #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -322,4 +322,4 @@ subroutine psb_s_csrg_to_gpu(a,info,nzrm) end if #endif -end subroutine psb_s_csrg_to_gpu +end subroutine psb_s_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_s_csrg_vect_mv.F90 b/cuda/impl/psb_s_cuda_csrg_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_s_csrg_vect_mv.F90 rename to cuda/impl/psb_s_cuda_csrg_vect_mv.F90 index ff88bf89..38e2dfc0 100644 --- a/cuda/impl/psb_s_csrg_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_csrg_vect_mv.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_s_csrg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_csrg_vect_mv + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_vect_mv #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y @@ -54,7 +54,7 @@ subroutine psb_s_csrg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_csrg_vect_mv' + character(len=20) :: name='s_cuda_csrg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_s_csrg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= szero) then if (yy%is_host()) call yy%sync() @@ -122,4 +122,4 @@ subroutine psb_s_csrg_vect_mv(alpha,a,x,beta,y,info,trans) 9999 call psb_error_handler(err_act) return -end subroutine psb_s_csrg_vect_mv +end subroutine psb_s_cuda_csrg_vect_mv diff --git a/cuda/impl/psb_s_diag_csmv.F90 b/cuda/impl/psb_s_cuda_diag_csmv.F90 similarity index 92% rename from cuda/impl/psb_s_diag_csmv.F90 rename to cuda/impl/psb_s_cuda_diag_csmv.F90 index 4cf14d12..214cf6f8 100644 --- a/cuda/impl/psb_s_diag_csmv.F90 +++ b/cuda/impl/psb_s_cuda_diag_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_diag_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_s_diag_mat_mod, psb_protect_name => psb_s_diag_csmv + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_diag_csmv #else - use psb_s_diag_mat_mod + use psb_s_cuda_diag_mat_mod #endif implicit none - class(psb_s_diag_sparse_mat), intent(in) :: a + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_s_diag_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='s_diag_csmv' + character(len=20) :: name='s_cuda_diag_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -133,4 +133,4 @@ subroutine psb_s_diag_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_diag_csmv +end subroutine psb_s_cuda_diag_csmv diff --git a/cuda/impl/psb_s_diag_mold.F90 b/cuda/impl/psb_s_cuda_diag_mold.F90 similarity index 88% rename from cuda/impl/psb_s_diag_mold.F90 rename to cuda/impl/psb_s_cuda_diag_mold.F90 index a7690f62..9e6c58a6 100644 --- a/cuda/impl/psb_s_diag_mold.F90 +++ b/cuda/impl/psb_s_cuda_diag_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_s_diag_mold(a,b,info) +subroutine psb_s_cuda_diag_mold(a,b,info) use psb_base_mod - use psb_s_diag_mat_mod, psb_protect_name => psb_s_diag_mold + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_diag_mold implicit none - class(psb_s_diag_sparse_mat), intent(in) :: a + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_s_diag_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_s_diag_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_s_cuda_diag_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_s_diag_mold(a,b,info) return -end subroutine psb_s_diag_mold +end subroutine psb_s_cuda_diag_mold diff --git a/cuda/impl/psb_s_diag_to_gpu.F90 b/cuda/impl/psb_s_cuda_diag_to_gpu.F90 similarity index 91% rename from cuda/impl/psb_s_diag_to_gpu.F90 rename to cuda/impl/psb_s_cuda_diag_to_gpu.F90 index bb09b127..c1ee7401 100644 --- a/cuda/impl/psb_s_diag_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_diag_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_diag_to_gpu(a,info,nzrm) +subroutine psb_s_cuda_diag_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_s_diag_mat_mod, psb_protect_name => psb_s_diag_to_gpu + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_diag_to_gpu #else - use psb_s_diag_mat_mod + use psb_s_cuda_diag_mat_mod #endif use iso_c_binding implicit none - class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -71,4 +71,4 @@ subroutine psb_s_diag_to_gpu(a,info,nzrm) ! if (info /= 0) goto 9999 #endif -end subroutine psb_s_diag_to_gpu +end subroutine psb_s_cuda_diag_to_gpu diff --git a/cuda/impl/psb_s_diag_vect_mv.F90 b/cuda/impl/psb_s_cuda_diag_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_s_diag_vect_mv.F90 rename to cuda/impl/psb_s_cuda_diag_vect_mv.F90 index 31976247..ab655b7c 100644 --- a/cuda/impl/psb_s_diag_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_diag_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_diag_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_s_diag_mat_mod, psb_protect_name => psb_s_diag_vect_mv + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_diag_vect_mv #else - use psb_s_diag_mat_mod + use psb_s_cuda_diag_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_diag_sparse_mat), intent(in) :: a + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_s_diag_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_diag_vect_mv' + character(len=20) :: name='s_cuda_diag_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -82,9 +82,9 @@ subroutine psb_s_diag_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,4 +123,4 @@ subroutine psb_s_diag_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_diag_vect_mv +end subroutine psb_s_cuda_diag_vect_mv diff --git a/cuda/impl/psb_s_dnsg_mat_impl.F90 b/cuda/impl/psb_s_cuda_dnsg_mat_impl.F90 similarity index 77% rename from cuda/impl/psb_s_dnsg_mat_impl.F90 rename to cuda/impl/psb_s_cuda_dnsg_mat_impl.F90 index 13c58985..861724aa 100644 --- a/cuda/impl/psb_s_dnsg_mat_impl.F90 +++ b/cuda/impl/psb_s_cuda_dnsg_mat_impl.F90 @@ -29,18 +29,18 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_s_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod - use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_dnsg_vect_mv + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_dnsg_vect_mv #else - use psb_s_dnsg_mat_mod + use psb_s_cuda_dnsg_mat_mod #endif implicit none - class(psb_s_dnsg_sparse_mat), intent(in) :: a + class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y @@ -50,7 +50,7 @@ subroutine psb_s_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) character :: trans_ real(psb_spk_), allocatable :: rx(:), ry(:) Integer(Psb_ipk_) :: err_act, m, n, k - character(len=20) :: name='s_dnsg_vect_mv' + character(len=20) :: name='s_cuda_dnsg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -76,9 +76,9 @@ subroutine psb_s_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) k = a%get_nrows() end if select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (a%is_host()) call a%sync() if (xx%is_host()) call xx%sync() if (beta /= szero) then @@ -117,21 +117,21 @@ subroutine psb_s_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_dnsg_vect_mv +end subroutine psb_s_cuda_dnsg_vect_mv -subroutine psb_s_dnsg_mold(a,b,info) +subroutine psb_s_cuda_dnsg_mold(a,b,info) use psb_base_mod - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod - use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_dnsg_mold + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_dnsg_mold #else - use psb_s_dnsg_mat_mod + use psb_s_cuda_dnsg_mat_mod #endif implicit none - class(psb_s_dnsg_sparse_mat), intent(in) :: a + class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -145,7 +145,7 @@ subroutine psb_s_dnsg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_s_dnsg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_s_cuda_dnsg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -158,54 +158,54 @@ subroutine psb_s_dnsg_mold(a,b,info) return -end subroutine psb_s_dnsg_mold +end subroutine psb_s_cuda_dnsg_mold !!$ !!$ interface -!!$ subroutine psb_s_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_s_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type -!!$ class(psb_s_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_s_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a !!$ real(psb_spk_), intent(in) :: alpha, beta !!$ class(psb_s_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_s_dnsg_inner_vect_sv +!!$ end subroutine psb_s_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_s_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_s_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_s_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_s_dnsg_reallocate_nz +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_s_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_s_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_s_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_s_dnsg_allocate_mnnz +!!$ end subroutine psb_s_cuda_dnsg_allocate_mnnz !!$ end interface -subroutine psb_s_dnsg_to_gpu(a,info) +subroutine psb_s_cuda_dnsg_to_gpu(a,info) use psb_base_mod - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod - use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_dnsg_to_gpu + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_dnsg_to_gpu #else - use psb_s_dnsg_mat_mod + use psb_s_cuda_dnsg_mat_mod #endif - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act, pitch, lda logical, parameter :: debug=.false. - character(len=20) :: name='s_dnsg_to_gpu' + character(len=20) :: name='s_cuda_dnsg_to_gpu' call psb_erractionsave(err_act) info = psb_success_ @@ -226,27 +226,27 @@ subroutine psb_s_dnsg_to_gpu(a,info) return -end subroutine psb_s_dnsg_to_gpu +end subroutine psb_s_cuda_dnsg_to_gpu -subroutine psb_s_cp_dnsg_from_coo(a,b,info) +subroutine psb_s_cuda_cp_dnsg_from_coo(a,b,info) use psb_base_mod - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod - use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_cp_dnsg_from_coo + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_cp_dnsg_from_coo #else - use psb_s_dnsg_mat_mod + use psb_s_cuda_dnsg_mat_mod #endif implicit none - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_dnsg_cp_from_coo' + character(len=20) :: name='s_cuda_dnsg_cp_from_coo' integer(psb_ipk_) :: debug_level, debug_unit logical, parameter :: debug=.false. type(psb_s_coo_sparse_mat) :: tmp @@ -267,27 +267,27 @@ subroutine psb_s_cp_dnsg_from_coo(a,b,info) return -end subroutine psb_s_cp_dnsg_from_coo +end subroutine psb_s_cuda_cp_dnsg_from_coo -subroutine psb_s_cp_dnsg_from_fmt(a,b,info) +subroutine psb_s_cuda_cp_dnsg_from_fmt(a,b,info) use psb_base_mod - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod - use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_cp_dnsg_from_fmt + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_cp_dnsg_from_fmt #else - use psb_s_dnsg_mat_mod + use psb_s_cuda_dnsg_mat_mod #endif implicit none - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info type(psb_s_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_dnsg_cp_from_fmt' + character(len=20) :: name='s_cuda_dnsg_cp_from_fmt' call psb_erractionsave(err_act) info = psb_success_ @@ -341,29 +341,29 @@ subroutine psb_s_cp_dnsg_from_fmt(a,b,info) return -end subroutine psb_s_cp_dnsg_from_fmt +end subroutine psb_s_cuda_cp_dnsg_from_fmt -subroutine psb_s_mv_dnsg_from_coo(a,b,info) +subroutine psb_s_cuda_mv_dnsg_from_coo(a,b,info) use psb_base_mod - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod - use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_mv_dnsg_from_coo + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_mv_dnsg_from_coo #else - use psb_s_dnsg_mat_mod + use psb_s_cuda_dnsg_mat_mod #endif implicit none - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act logical, parameter :: debug=.false. - character(len=20) :: name='s_dnsg_mv_from_coo' + character(len=20) :: name='s_cuda_dnsg_mv_from_coo' call psb_erractionsave(err_act) info = psb_success_ @@ -382,28 +382,28 @@ subroutine psb_s_mv_dnsg_from_coo(a,b,info) return -end subroutine psb_s_mv_dnsg_from_coo +end subroutine psb_s_cuda_mv_dnsg_from_coo -subroutine psb_s_mv_dnsg_from_fmt(a,b,info) +subroutine psb_s_cuda_mv_dnsg_from_fmt(a,b,info) use psb_base_mod - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod - use psb_s_dnsg_mat_mod, psb_protect_name => psb_s_mv_dnsg_from_fmt + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_mv_dnsg_from_fmt #else - use psb_s_dnsg_mat_mod + use psb_s_cuda_dnsg_mat_mod #endif implicit none - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info type(psb_s_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_dnsg_cp_from_fmt' + character(len=20) :: name='s_cuda_dnsg_cp_from_fmt' call psb_erractionsave(err_act) info = psb_success_ @@ -458,4 +458,4 @@ subroutine psb_s_mv_dnsg_from_fmt(a,b,info) return -end subroutine psb_s_mv_dnsg_from_fmt +end subroutine psb_s_cuda_mv_dnsg_from_fmt diff --git a/cuda/impl/psb_s_elg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_elg_allocate_mnnz.F90 similarity index 93% rename from cuda/impl/psb_s_elg_allocate_mnnz.F90 rename to cuda/impl/psb_s_cuda_elg_allocate_mnnz.F90 index f3b1d743..63c41644 100644 --- a/cuda/impl/psb_s_elg_allocate_mnnz.F90 +++ b/cuda/impl/psb_s_cuda_elg_allocate_mnnz.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_elg_allocate_mnnz(m,n,a,nz) +subroutine psb_s_cuda_elg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_allocate_mnnz + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_allocate_mnnz #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -110,4 +110,4 @@ subroutine psb_s_elg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_s_elg_allocate_mnnz +end subroutine psb_s_cuda_elg_allocate_mnnz diff --git a/cuda/impl/psb_c_elg_asb.f90 b/cuda/impl/psb_s_cuda_elg_asb.f90 similarity index 92% rename from cuda/impl/psb_c_elg_asb.f90 rename to cuda/impl/psb_s_cuda_elg_asb.f90 index f2a8c641..0d53c26a 100644 --- a/cuda/impl/psb_c_elg_asb.f90 +++ b/cuda/impl/psb_s_cuda_elg_asb.f90 @@ -30,13 +30,13 @@ ! -subroutine psb_c_elg_asb(a) +subroutine psb_s_cuda_elg_asb(a) use psb_base_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_asb + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_asb implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: err_act, info character(len=20) :: name='elg_asb' @@ -62,4 +62,4 @@ subroutine psb_c_elg_asb(a) return -end subroutine psb_c_elg_asb +end subroutine psb_s_cuda_elg_asb diff --git a/cuda/impl/psb_s_elg_csmm.F90 b/cuda/impl/psb_s_cuda_elg_csmm.F90 similarity index 93% rename from cuda/impl/psb_s_elg_csmm.F90 rename to cuda/impl/psb_s_cuda_elg_csmm.F90 index 8bda23e3..e7f88a2e 100644 --- a/cuda/impl/psb_s_elg_csmm.F90 +++ b/cuda/impl/psb_s_cuda_elg_csmm.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_elg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_csmm + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_csmm #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) real(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_s_elg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_elg_csmm' + character(len=20) :: name='s_cuda_elg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -131,4 +131,4 @@ subroutine psb_s_elg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_elg_csmm +end subroutine psb_s_cuda_elg_csmm diff --git a/cuda/impl/psb_s_elg_csmv.F90 b/cuda/impl/psb_s_cuda_elg_csmv.F90 similarity index 94% rename from cuda/impl/psb_s_elg_csmv.F90 rename to cuda/impl/psb_s_cuda_elg_csmv.F90 index 29e345c2..1844d338 100644 --- a/cuda/impl/psb_s_elg_csmv.F90 +++ b/cuda/impl/psb_s_cuda_elg_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_elg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_csmv + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_csmv #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -133,4 +133,4 @@ subroutine psb_s_elg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_elg_csmv +end subroutine psb_s_cuda_elg_csmv diff --git a/cuda/impl/psb_s_elg_csput.F90 b/cuda/impl/psb_s_cuda_elg_csput.F90 similarity index 89% rename from cuda/impl/psb_s_elg_csput.F90 rename to cuda/impl/psb_s_cuda_elg_csput.F90 index ff2b0ff3..036eabb2 100644 --- a/cuda/impl/psb_s_elg_csput.F90 +++ b/cuda/impl/psb_s_cuda_elg_csput.F90 @@ -30,26 +30,26 @@ ! -subroutine psb_s_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +subroutine psb_s_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding #ifdef HAVE_SPGPU use elldev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_csput_a + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_csput_a #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act - character(len=20) :: name='s_elg_csput_a' + character(len=20) :: name='s_cuda_elg_csput_a' logical, parameter :: debug=.false. integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit real(psb_dpk_) :: t1,t2,t3 @@ -120,24 +120,24 @@ subroutine psb_s_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) return -end subroutine psb_s_elg_csput_a +end subroutine psb_s_cuda_elg_csput_a -subroutine psb_s_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +subroutine psb_s_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding #ifdef HAVE_SPGPU use elldev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_csput_v - use psb_s_gpu_vect_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_csput_v + use psb_s_cuda_vect_mod #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax @@ -145,7 +145,7 @@ subroutine psb_s_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) integer(psb_ipk_) :: err_act - character(len=20) :: name='s_elg_csput_v' + character(len=20) :: name='s_cuda_elg_csput_v' logical, parameter :: debug=.false. integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw logical :: gpu_invoked @@ -199,11 +199,11 @@ subroutine psb_s_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) t1=psb_wtime() gpu_invoked = .false. select type (ia) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type (ja) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type (val) - class is (psb_s_vect_gpu) + class is (psb_s_vect_cuda) if (a%is_host()) call a%sync() if (val%is_host()) call val%sync() if (ia%is_host()) call ia%sync() @@ -245,4 +245,4 @@ subroutine psb_s_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) return -end subroutine psb_s_elg_csput_v +end subroutine psb_s_cuda_elg_csput_v diff --git a/cuda/impl/psb_c_elg_from_gpu.F90 b/cuda/impl/psb_s_cuda_elg_from_gpu.F90 similarity index 91% rename from cuda/impl/psb_c_elg_from_gpu.F90 rename to cuda/impl/psb_s_cuda_elg_from_gpu.F90 index eda65380..bdc55790 100644 --- a/cuda/impl/psb_c_elg_from_gpu.F90 +++ b/cuda/impl/psb_s_cuda_elg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_elg_from_gpu(a,info) +subroutine psb_s_cuda_elg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_from_gpu + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_from_gpu #else - use psb_c_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize @@ -71,4 +71,4 @@ subroutine psb_c_elg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_c_elg_from_gpu +end subroutine psb_s_cuda_elg_from_gpu diff --git a/cuda/impl/psb_s_elg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_elg_inner_vect_sv.F90 similarity index 89% rename from cuda/impl/psb_s_elg_inner_vect_sv.F90 rename to cuda/impl/psb_s_cuda_elg_inner_vect_sv.F90 index 83c79cf3..79e546f5 100644 --- a/cuda/impl/psb_s_elg_inner_vect_sv.F90 +++ b/cuda/impl/psb_s_cuda_elg_inner_vect_sv.F90 @@ -30,26 +30,26 @@ ! -subroutine psb_s_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_inner_vect_sv + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_inner_vect_sv #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans integer(psb_ipk_) :: err_act - character(len=20) :: name='s_elg_inner_vect_sv' + character(len=20) :: name='s_cuda_elg_inner_vect_sv' logical, parameter :: debug=.false. real(psb_spk_), allocatable :: rx(:), ry(:) @@ -86,4 +86,4 @@ subroutine psb_s_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_elg_inner_vect_sv +end subroutine psb_s_cuda_elg_inner_vect_sv diff --git a/cuda/impl/psb_s_elg_mold.F90 b/cuda/impl/psb_s_cuda_elg_mold.F90 similarity index 89% rename from cuda/impl/psb_s_elg_mold.F90 rename to cuda/impl/psb_s_cuda_elg_mold.F90 index a481d605..dc8730bb 100644 --- a/cuda/impl/psb_s_elg_mold.F90 +++ b/cuda/impl/psb_s_cuda_elg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_s_elg_mold(a,b,info) +subroutine psb_s_cuda_elg_mold(a,b,info) use psb_base_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_mold + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_mold implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_s_elg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_s_elg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_s_cuda_elg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_s_elg_mold(a,b,info) return -end subroutine psb_s_elg_mold +end subroutine psb_s_cuda_elg_mold diff --git a/cuda/impl/psb_s_elg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_elg_reallocate_nz.F90 similarity index 89% rename from cuda/impl/psb_s_elg_reallocate_nz.F90 rename to cuda/impl/psb_s_cuda_elg_reallocate_nz.F90 index 22916852..3f34fcec 100644 --- a/cuda/impl/psb_s_elg_reallocate_nz.F90 +++ b/cuda/impl/psb_s_cuda_elg_reallocate_nz.F90 @@ -30,22 +30,22 @@ ! -subroutine psb_s_elg_reallocate_nz(nz,a) +subroutine psb_s_cuda_elg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_reallocate_nz + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_reallocate_nz #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='s_elg_reallocate_nz' + character(len=20) :: name='s_cuda_elg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -76,4 +76,4 @@ subroutine psb_s_elg_reallocate_nz(nz,a) return -end subroutine psb_s_elg_reallocate_nz +end subroutine psb_s_cuda_elg_reallocate_nz diff --git a/cuda/impl/psb_s_elg_scal.F90 b/cuda/impl/psb_s_cuda_elg_scal.F90 similarity index 91% rename from cuda/impl/psb_s_elg_scal.F90 rename to cuda/impl/psb_s_cuda_elg_scal.F90 index 913ae47e..cd6e1a5b 100644 --- a/cuda/impl/psb_s_elg_scal.F90 +++ b/cuda/impl/psb_s_cuda_elg_scal.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_elg_scal(d,a,info,side) +subroutine psb_s_cuda_elg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_scal + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_scal #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -75,4 +75,4 @@ subroutine psb_s_elg_scal(d,a,info,side) return -end subroutine psb_s_elg_scal +end subroutine psb_s_cuda_elg_scal diff --git a/cuda/impl/psb_s_elg_scals.F90 b/cuda/impl/psb_s_cuda_elg_scals.F90 similarity index 90% rename from cuda/impl/psb_s_elg_scals.F90 rename to cuda/impl/psb_s_cuda_elg_scals.F90 index 8261fc94..4ee8a64d 100644 --- a/cuda/impl/psb_s_elg_scals.F90 +++ b/cuda/impl/psb_s_cuda_elg_scals.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_elg_scals(d,a,info) +subroutine psb_s_cuda_elg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_scals + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_scals #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -70,4 +70,4 @@ subroutine psb_s_elg_scals(d,a,info) return -end subroutine psb_s_elg_scals +end subroutine psb_s_cuda_elg_scals diff --git a/cuda/impl/psb_s_elg_to_gpu.F90 b/cuda/impl/psb_s_cuda_elg_to_gpu.F90 similarity index 93% rename from cuda/impl/psb_s_elg_to_gpu.F90 rename to cuda/impl/psb_s_cuda_elg_to_gpu.F90 index bf86343b..7d04d2b0 100644 --- a/cuda/impl/psb_s_elg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_elg_to_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_elg_to_gpu(a,info,nzrm) +subroutine psb_s_cuda_elg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_to_gpu + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_to_gpu #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -90,4 +90,4 @@ subroutine psb_s_elg_to_gpu(a,info,nzrm) call a%set_sync() #endif -end subroutine psb_s_elg_to_gpu +end subroutine psb_s_cuda_elg_to_gpu diff --git a/cuda/impl/psb_z_elg_trim.f90 b/cuda/impl/psb_s_cuda_elg_trim.f90 similarity index 92% rename from cuda/impl/psb_z_elg_trim.f90 rename to cuda/impl/psb_s_cuda_elg_trim.f90 index 9bd43312..516aebc4 100644 --- a/cuda/impl/psb_z_elg_trim.f90 +++ b/cuda/impl/psb_s_cuda_elg_trim.f90 @@ -30,12 +30,12 @@ ! -subroutine psb_z_elg_trim(a) +subroutine psb_s_cuda_elg_trim(a) use psb_base_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_trim + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_trim implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld character(len=20) :: name='trim' logical, parameter :: debug=.false. @@ -59,4 +59,4 @@ subroutine psb_z_elg_trim(a) return -end subroutine psb_z_elg_trim +end subroutine psb_s_cuda_elg_trim diff --git a/cuda/impl/psb_s_elg_vect_mv.F90 b/cuda/impl/psb_s_cuda_elg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_s_elg_vect_mv.F90 rename to cuda/impl/psb_s_cuda_elg_vect_mv.F90 index f8d297d1..dad62418 100644 --- a/cuda/impl/psb_s_elg_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_elg_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_elg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_elg_vect_mv + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_vect_mv #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_s_elg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_elg_vect_mv' + character(len=20) :: name='s_cuda_elg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_s_elg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (a%is_host()) call a%sync() if (xx%is_host()) call xx%sync() if (beta /= szero) then @@ -128,4 +128,4 @@ subroutine psb_s_elg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_elg_vect_mv +end subroutine psb_s_cuda_elg_vect_mv diff --git a/cuda/impl/psb_s_hdiag_csmv.F90 b/cuda/impl/psb_s_cuda_hdiag_csmv.F90 similarity index 92% rename from cuda/impl/psb_s_hdiag_csmv.F90 rename to cuda/impl/psb_s_cuda_hdiag_csmv.F90 index 3320901c..8e7e4931 100644 --- a/cuda/impl/psb_s_hdiag_csmv.F90 +++ b/cuda/impl/psb_s_cuda_hdiag_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_hdiag_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_hdiag_csmv + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_csmv #else - use psb_s_hdiag_mat_mod + use psb_s_cuda_hdiag_mat_mod #endif implicit none - class(psb_s_hdiag_sparse_mat), intent(in) :: a + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_s_hdiag_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='s_hdiag_csmv' + character(len=20) :: name='s_cuda_hdiag_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -133,4 +133,4 @@ subroutine psb_s_hdiag_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hdiag_csmv +end subroutine psb_s_cuda_hdiag_csmv diff --git a/cuda/impl/psb_s_hdiag_mold.F90 b/cuda/impl/psb_s_cuda_hdiag_mold.F90 similarity index 88% rename from cuda/impl/psb_s_hdiag_mold.F90 rename to cuda/impl/psb_s_cuda_hdiag_mold.F90 index 1486b17e..e662b07b 100644 --- a/cuda/impl/psb_s_hdiag_mold.F90 +++ b/cuda/impl/psb_s_cuda_hdiag_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_s_hdiag_mold(a,b,info) +subroutine psb_s_cuda_hdiag_mold(a,b,info) use psb_base_mod - use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_hdiag_mold + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_mold implicit none - class(psb_s_hdiag_sparse_mat), intent(in) :: a + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_s_hdiag_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_s_hdiag_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_s_cuda_hdiag_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_s_hdiag_mold(a,b,info) return -end subroutine psb_s_hdiag_mold +end subroutine psb_s_cuda_hdiag_mold diff --git a/cuda/impl/psb_z_hdiag_to_gpu.F90 b/cuda/impl/psb_s_cuda_hdiag_to_gpu.F90 similarity index 92% rename from cuda/impl/psb_z_hdiag_to_gpu.F90 rename to cuda/impl/psb_s_cuda_hdiag_to_gpu.F90 index 622a0141..5fe493aa 100644 --- a/cuda/impl/psb_z_hdiag_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_hdiag_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_hdiag_to_gpu(a,info) +subroutine psb_s_cuda_hdiag_to_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_hdiag_to_gpu + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_to_gpu #else - use psb_z_hdiag_mat_mod + use psb_s_cuda_hdiag_mat_mod #endif use iso_c_binding implicit none - class(psb_z_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight #ifdef HAVE_SPGPU @@ -83,4 +83,4 @@ subroutine psb_z_hdiag_to_gpu(a,info) #endif -end subroutine psb_z_hdiag_to_gpu +end subroutine psb_s_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_s_hdiag_vect_mv.F90 b/cuda/impl/psb_s_cuda_hdiag_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_s_hdiag_vect_mv.F90 rename to cuda/impl/psb_s_cuda_hdiag_vect_mv.F90 index ac261e92..3496a637 100644 --- a/cuda/impl/psb_s_hdiag_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_hdiag_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_hdiag_vect_mv + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_vect_mv #else - use psb_s_hdiag_mat_mod + use psb_s_cuda_hdiag_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_hdiag_sparse_mat), intent(in) :: a + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_s_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_hdiag_vect_mv' + character(len=20) :: name='s_cuda_hdiag_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -82,9 +82,9 @@ subroutine psb_s_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,4 +123,4 @@ subroutine psb_s_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hdiag_vect_mv +end subroutine psb_s_cuda_hdiag_vect_mv diff --git a/cuda/impl/psb_s_hlg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_hlg_allocate_mnnz.F90 similarity index 90% rename from cuda/impl/psb_s_hlg_allocate_mnnz.F90 rename to cuda/impl/psb_s_cuda_hlg_allocate_mnnz.F90 index c7e430f1..3f2765c4 100644 --- a/cuda/impl/psb_s_hlg_allocate_mnnz.F90 +++ b/cuda/impl/psb_s_cuda_hlg_allocate_mnnz.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_hlg_allocate_mnnz(m,n,a,nz) +subroutine psb_s_cuda_hlg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_allocate_mnnz + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_allocate_mnnz #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -68,4 +68,4 @@ subroutine psb_s_hlg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_s_hlg_allocate_mnnz +end subroutine psb_s_cuda_hlg_allocate_mnnz diff --git a/cuda/impl/psb_s_hlg_csmm.F90 b/cuda/impl/psb_s_cuda_hlg_csmm.F90 similarity index 93% rename from cuda/impl/psb_s_hlg_csmm.F90 rename to cuda/impl/psb_s_cuda_hlg_csmm.F90 index 126b17e6..2e274c22 100644 --- a/cuda/impl/psb_s_hlg_csmm.F90 +++ b/cuda/impl/psb_s_cuda_hlg_csmm.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_hlg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_csmm + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_csmm #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) real(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_s_hlg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_hlg_csmm' + character(len=20) :: name='s_cuda_hlg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -129,4 +129,4 @@ subroutine psb_s_hlg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hlg_csmm +end subroutine psb_s_cuda_hlg_csmm diff --git a/cuda/impl/psb_s_hlg_csmv.F90 b/cuda/impl/psb_s_cuda_hlg_csmv.F90 similarity index 93% rename from cuda/impl/psb_s_hlg_csmv.F90 rename to cuda/impl/psb_s_cuda_hlg_csmv.F90 index 2a7f5a4d..56ea8cdb 100644 --- a/cuda/impl/psb_s_hlg_csmv.F90 +++ b/cuda/impl/psb_s_cuda_hlg_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_hlg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_csmv + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_csmv #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_s_hlg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='s_hlg_csmv' + character(len=20) :: name='s_cuda_hlg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -132,4 +132,4 @@ subroutine psb_s_hlg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hlg_csmv +end subroutine psb_s_cuda_hlg_csmv diff --git a/cuda/impl/psb_c_hlg_from_gpu.F90 b/cuda/impl/psb_s_cuda_hlg_from_gpu.F90 similarity index 92% rename from cuda/impl/psb_c_hlg_from_gpu.F90 rename to cuda/impl/psb_s_cuda_hlg_from_gpu.F90 index 85f337ff..14ab19b7 100644 --- a/cuda/impl/psb_c_hlg_from_gpu.F90 +++ b/cuda/impl/psb_s_cuda_hlg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_c_hlg_from_gpu(a,info) +subroutine psb_s_cuda_hlg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_c_hlg_mat_mod, psb_protect_name => psb_c_hlg_from_gpu + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_from_gpu #else - use psb_c_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr @@ -73,4 +73,4 @@ subroutine psb_c_hlg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_c_hlg_from_gpu +end subroutine psb_s_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_s_hlg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_hlg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_s_hlg_inner_vect_sv.F90 rename to cuda/impl/psb_s_cuda_hlg_inner_vect_sv.F90 index d545eb02..a9f4f743 100644 --- a/cuda/impl/psb_s_hlg_inner_vect_sv.F90 +++ b/cuda/impl/psb_s_cuda_hlg_inner_vect_sv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_inner_vect_sv + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_inner_vect_sv #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -78,4 +78,4 @@ subroutine psb_s_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hlg_inner_vect_sv +end subroutine psb_s_cuda_hlg_inner_vect_sv diff --git a/cuda/impl/psb_s_hlg_mold.F90 b/cuda/impl/psb_s_cuda_hlg_mold.F90 similarity index 89% rename from cuda/impl/psb_s_hlg_mold.F90 rename to cuda/impl/psb_s_cuda_hlg_mold.F90 index c5dc4774..90e9cebf 100644 --- a/cuda/impl/psb_s_hlg_mold.F90 +++ b/cuda/impl/psb_s_cuda_hlg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_s_hlg_mold(a,b,info) +subroutine psb_s_cuda_hlg_mold(a,b,info) use psb_base_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_mold + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_mold implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer, intent(out) :: info Integer :: err_act @@ -49,7 +49,7 @@ subroutine psb_s_hlg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_s_hlg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_s_cuda_hlg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -61,4 +61,4 @@ subroutine psb_s_hlg_mold(a,b,info) 9999 call psb_error_handler(err_act) return -end subroutine psb_s_hlg_mold +end subroutine psb_s_cuda_hlg_mold diff --git a/cuda/impl/psb_s_hlg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_hlg_reallocate_nz.F90 similarity index 87% rename from cuda/impl/psb_s_hlg_reallocate_nz.F90 rename to cuda/impl/psb_s_cuda_hlg_reallocate_nz.F90 index 19cd95df..d5b9333c 100644 --- a/cuda/impl/psb_s_hlg_reallocate_nz.F90 +++ b/cuda/impl/psb_s_cuda_hlg_reallocate_nz.F90 @@ -30,22 +30,22 @@ ! -subroutine psb_s_hlg_reallocate_nz(nz,a) +subroutine psb_s_cuda_hlg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_reallocate_nz + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_reallocate_nz #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif use iso_c_binding implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='s_hlg_reallocate_nz' + character(len=20) :: name='s_cuda_hlg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -64,4 +64,4 @@ subroutine psb_s_hlg_reallocate_nz(nz,a) return -end subroutine psb_s_hlg_reallocate_nz +end subroutine psb_s_cuda_hlg_reallocate_nz diff --git a/cuda/impl/psb_s_hlg_scal.F90 b/cuda/impl/psb_s_cuda_hlg_scal.F90 similarity index 91% rename from cuda/impl/psb_s_hlg_scal.F90 rename to cuda/impl/psb_s_cuda_hlg_scal.F90 index cd389baa..e803a63d 100644 --- a/cuda/impl/psb_s_hlg_scal.F90 +++ b/cuda/impl/psb_s_cuda_hlg_scal.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_hlg_scal(d,a,info,side) +subroutine psb_s_cuda_hlg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_scal + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_scal #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -72,4 +72,4 @@ subroutine psb_s_hlg_scal(d,a,info,side) return -end subroutine psb_s_hlg_scal +end subroutine psb_s_cuda_hlg_scal diff --git a/cuda/impl/psb_s_hlg_scals.F90 b/cuda/impl/psb_s_cuda_hlg_scals.F90 similarity index 91% rename from cuda/impl/psb_s_hlg_scals.F90 rename to cuda/impl/psb_s_cuda_hlg_scals.F90 index 256fac3e..eec592e1 100644 --- a/cuda/impl/psb_s_hlg_scals.F90 +++ b/cuda/impl/psb_s_cuda_hlg_scals.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_hlg_scals(d,a,info) +subroutine psb_s_cuda_hlg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_scals + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_scals #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif use iso_c_binding implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -70,4 +70,4 @@ subroutine psb_s_hlg_scals(d,a,info) 9999 call psb_error_handler(err_act) return -end subroutine psb_s_hlg_scals +end subroutine psb_s_cuda_hlg_scals diff --git a/cuda/impl/psb_s_hlg_to_gpu.F90 b/cuda/impl/psb_s_cuda_hlg_to_gpu.F90 similarity index 91% rename from cuda/impl/psb_s_hlg_to_gpu.F90 rename to cuda/impl/psb_s_cuda_hlg_to_gpu.F90 index 139482c2..14a2a629 100644 --- a/cuda/impl/psb_s_hlg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_hlg_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_hlg_to_gpu(a,info,nzrm) +subroutine psb_s_cuda_hlg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_to_gpu + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_to_gpu #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif use iso_c_binding implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -65,4 +65,4 @@ subroutine psb_s_hlg_to_gpu(a,info,nzrm) ! if (info /= 0) goto 9999 #endif -end subroutine psb_s_hlg_to_gpu +end subroutine psb_s_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_s_hlg_vect_mv.F90 b/cuda/impl/psb_s_cuda_hlg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_s_hlg_vect_mv.F90 rename to cuda/impl/psb_s_cuda_hlg_vect_mv.F90 index 52f322aa..2b964f91 100644 --- a/cuda/impl/psb_s_hlg_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_hlg_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_hlg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_hlg_vect_mv + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_vect_mv #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_s_hlg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_hlg_vect_mv' + character(len=20) :: name='s_cuda_hlg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_s_hlg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -126,4 +126,4 @@ subroutine psb_s_hlg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hlg_vect_mv +end subroutine psb_s_cuda_hlg_vect_mv diff --git a/cuda/impl/psb_s_hybg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_hybg_allocate_mnnz.F90 similarity index 90% rename from cuda/impl/psb_s_hybg_allocate_mnnz.F90 rename to cuda/impl/psb_s_cuda_hybg_allocate_mnnz.F90 index f2b79c77..0cf1e2bc 100644 --- a/cuda/impl/psb_s_hybg_allocate_mnnz.F90 +++ b/cuda/impl/psb_s_cuda_hybg_allocate_mnnz.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_allocate_mnnz(m,n,a,nz) +subroutine psb_s_cuda_hybg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_allocate_mnnz + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_allocate_mnnz #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -65,5 +65,5 @@ subroutine psb_s_hybg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_s_hybg_allocate_mnnz +end subroutine psb_s_cuda_hybg_allocate_mnnz #endif diff --git a/cuda/impl/psb_s_hybg_csmm.F90 b/cuda/impl/psb_s_cuda_hybg_csmm.F90 similarity index 93% rename from cuda/impl/psb_s_hybg_csmm.F90 rename to cuda/impl/psb_s_cuda_hybg_csmm.F90 index 9de67633..f89df384 100644 --- a/cuda/impl/psb_s_hybg_csmm.F90 +++ b/cuda/impl/psb_s_cuda_hybg_csmm.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_csmm + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_csmm #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) real(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_s_hybg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_hybg_csmm' + character(len=20) :: name='s_cuda_hybg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -131,5 +131,5 @@ subroutine psb_s_hybg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hybg_csmm +end subroutine psb_s_cuda_hybg_csmm #endif diff --git a/cuda/impl/psb_s_hybg_csmv.F90 b/cuda/impl/psb_s_cuda_hybg_csmv.F90 similarity index 93% rename from cuda/impl/psb_s_hybg_csmv.F90 rename to cuda/impl/psb_s_cuda_hybg_csmv.F90 index d20740a6..01642146 100644 --- a/cuda/impl/psb_s_hybg_csmv.F90 +++ b/cuda/impl/psb_s_cuda_hybg_csmv.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_csmv + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_csmv #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -54,7 +54,7 @@ subroutine psb_s_hybg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_hybg_csmv' + character(len=20) :: name='s_cuda_hybg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -134,5 +134,5 @@ subroutine psb_s_hybg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hybg_csmv +end subroutine psb_s_cuda_hybg_csmv #endif diff --git a/cuda/impl/psb_s_hybg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_hybg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_s_hybg_inner_vect_sv.F90 rename to cuda/impl/psb_s_cuda_hybg_inner_vect_sv.F90 index 95920fc9..f0006f5c 100644 --- a/cuda/impl/psb_s_hybg_inner_vect_sv.F90 +++ b/cuda/impl/psb_s_cuda_hybg_inner_vect_sv.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_inner_vect_sv + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_inner_vect_sv #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_hybg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -52,7 +52,7 @@ subroutine psb_s_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ integer(psb_ipk_) :: err_act - character(len=20) :: name='s_hybg_inner_vect_sv' + character(len=20) :: name='s_cuda_hybg_inner_vect_sv' logical, parameter :: debug=.false. call psb_get_erraction(err_act) @@ -84,9 +84,9 @@ subroutine psb_s_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%set_host() else select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= szero) then if (yy%is_host()) call yy%sync() @@ -134,5 +134,5 @@ subroutine psb_s_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hybg_inner_vect_sv +end subroutine psb_s_cuda_hybg_inner_vect_sv #endif diff --git a/cuda/impl/psb_s_hybg_mold.F90 b/cuda/impl/psb_s_cuda_hybg_mold.F90 similarity index 89% rename from cuda/impl/psb_s_hybg_mold.F90 rename to cuda/impl/psb_s_cuda_hybg_mold.F90 index 882990c0..4a1fc64e 100644 --- a/cuda/impl/psb_s_hybg_mold.F90 +++ b/cuda/impl/psb_s_cuda_hybg_mold.F90 @@ -30,12 +30,12 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_mold(a,b,info) +subroutine psb_s_cuda_hybg_mold(a,b,info) use psb_base_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_mold + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_mold implicit none - class(psb_s_hybg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_s_hybg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_s_hybg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_s_cuda_hybg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,5 +62,5 @@ subroutine psb_s_hybg_mold(a,b,info) return -end subroutine psb_s_hybg_mold +end subroutine psb_s_cuda_hybg_mold #endif diff --git a/cuda/impl/psb_s_hybg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_hybg_reallocate_nz.F90 similarity index 88% rename from cuda/impl/psb_s_hybg_reallocate_nz.F90 rename to cuda/impl/psb_s_cuda_hybg_reallocate_nz.F90 index 46079a92..7ee15f52 100644 --- a/cuda/impl/psb_s_hybg_reallocate_nz.F90 +++ b/cuda/impl/psb_s_cuda_hybg_reallocate_nz.F90 @@ -30,21 +30,21 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_reallocate_nz(nz,a) +subroutine psb_s_cuda_hybg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_reallocate_nz + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_reallocate_nz #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='s_hybg_reallocate_nz' + character(len=20) :: name='s_cuda_hybg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -67,5 +67,5 @@ subroutine psb_s_hybg_reallocate_nz(nz,a) return -end subroutine psb_s_hybg_reallocate_nz +end subroutine psb_s_cuda_hybg_reallocate_nz #endif diff --git a/cuda/impl/psb_s_hybg_scal.F90 b/cuda/impl/psb_s_cuda_hybg_scal.F90 similarity index 91% rename from cuda/impl/psb_s_hybg_scal.F90 rename to cuda/impl/psb_s_cuda_hybg_scal.F90 index a55a8b2c..7a3978b7 100644 --- a/cuda/impl/psb_s_hybg_scal.F90 +++ b/cuda/impl/psb_s_cuda_hybg_scal.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_scal(d,a,info,side) +subroutine psb_s_cuda_hybg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_scal + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_scal #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -72,5 +72,5 @@ subroutine psb_s_hybg_scal(d,a,info,side) return -end subroutine psb_s_hybg_scal +end subroutine psb_s_cuda_hybg_scal #endif diff --git a/cuda/impl/psb_s_hybg_scals.F90 b/cuda/impl/psb_s_cuda_hybg_scals.F90 similarity index 91% rename from cuda/impl/psb_s_hybg_scals.F90 rename to cuda/impl/psb_s_cuda_hybg_scals.F90 index ae92166f..a19ae3f6 100644 --- a/cuda/impl/psb_s_hybg_scals.F90 +++ b/cuda/impl/psb_s_cuda_hybg_scals.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_scals(d,a,info) +subroutine psb_s_cuda_hybg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_scals + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_scals #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -72,5 +72,5 @@ subroutine psb_s_hybg_scals(d,a,info) return -end subroutine psb_s_hybg_scals +end subroutine psb_s_cuda_hybg_scals #endif diff --git a/cuda/impl/psb_s_hybg_to_gpu.F90 b/cuda/impl/psb_s_cuda_hybg_to_gpu.F90 similarity index 96% rename from cuda/impl/psb_s_hybg_to_gpu.F90 rename to cuda/impl/psb_s_cuda_hybg_to_gpu.F90 index bfb9b261..ec415176 100644 --- a/cuda/impl/psb_s_hybg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_hybg_to_gpu.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_to_gpu(a,info,nzrm) +subroutine psb_s_cuda_hybg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_to_gpu + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_to_gpu #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -150,5 +150,5 @@ subroutine psb_s_hybg_to_gpu(a,info,nzrm) end if #endif -end subroutine psb_s_hybg_to_gpu +end subroutine psb_s_cuda_hybg_to_gpu #endif diff --git a/cuda/impl/psb_s_hybg_vect_mv.F90 b/cuda/impl/psb_s_cuda_hybg_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_s_hybg_vect_mv.F90 rename to cuda/impl/psb_s_cuda_hybg_vect_mv.F90 index 5fe102f6..a83c4561 100644 --- a/cuda/impl/psb_s_hybg_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_hybg_vect_mv.F90 @@ -30,20 +30,20 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_hybg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_s_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_hybg_vect_mv + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_vect_mv #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif - use psb_s_gpu_vect_mod + use psb_s_cuda_vect_mod implicit none - class(psb_s_hybg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y @@ -53,7 +53,7 @@ subroutine psb_s_hybg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='s_hybg_vect_mv' + character(len=20) :: name='s_cuda_hybg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_s_hybg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type(yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= szero) then if (yy%is_host()) call yy%sync() @@ -123,5 +123,5 @@ subroutine psb_s_hybg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_s_hybg_vect_mv +end subroutine psb_s_cuda_hybg_vect_mv #endif diff --git a/cuda/impl/psb_s_mv_csrg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_csrg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_s_mv_csrg_from_coo.F90 rename to cuda/impl/psb_s_cuda_mv_csrg_from_coo.F90 index 01c9db06..a9e297bd 100644 --- a/cuda/impl/psb_s_mv_csrg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_csrg_from_coo.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_mv_csrg_from_coo(a,b,info) +subroutine psb_s_cuda_mv_csrg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_mv_csrg_from_coo + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_mv_csrg_from_coo #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -62,4 +62,4 @@ subroutine psb_s_mv_csrg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_mv_csrg_from_coo +end subroutine psb_s_cuda_mv_csrg_from_coo diff --git a/cuda/impl/psb_s_mv_csrg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_csrg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_s_mv_csrg_from_fmt.F90 rename to cuda/impl/psb_s_cuda_mv_csrg_from_fmt.F90 index 0ac28af3..54bc0ae4 100644 --- a/cuda/impl/psb_s_mv_csrg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_mv_csrg_from_fmt.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_s_mv_csrg_from_fmt(a,b,info) +subroutine psb_s_cuda_mv_csrg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_csrg_mat_mod, psb_protect_name => psb_s_mv_csrg_from_fmt + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_mv_csrg_from_fmt #else - use psb_s_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod #endif implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer, intent(out) :: info @@ -60,4 +60,4 @@ subroutine psb_s_mv_csrg_from_fmt(a,b,info) #endif end select -end subroutine psb_s_mv_csrg_from_fmt +end subroutine psb_s_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_s_mv_diag_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_diag_from_coo.F90 similarity index 89% rename from cuda/impl/psb_s_mv_diag_from_coo.F90 rename to cuda/impl/psb_s_cuda_mv_diag_from_coo.F90 index f51607e5..fda60d96 100644 --- a/cuda/impl/psb_s_mv_diag_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_diag_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_s_mv_diag_from_coo(a,b,info) +subroutine psb_s_cuda_mv_diag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_s_diag_mat_mod, psb_protect_name => psb_s_mv_diag_from_coo + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_mv_diag_from_coo #else - use psb_s_diag_mat_mod + use psb_s_cuda_diag_mat_mod #endif implicit none - class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -66,4 +66,4 @@ subroutine psb_s_mv_diag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_mv_diag_from_coo +end subroutine psb_s_cuda_mv_diag_from_coo diff --git a/cuda/impl/psb_s_mv_elg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_elg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_s_mv_elg_from_coo.F90 rename to cuda/impl/psb_s_cuda_mv_elg_from_coo.F90 index ac153f6c..447e2971 100644 --- a/cuda/impl/psb_s_mv_elg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_elg_from_coo.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_mv_elg_from_coo(a,b,info) +subroutine psb_s_cuda_mv_elg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_mv_elg_from_coo + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_mv_elg_from_coo #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_s_mv_elg_from_coo(a,b,info) return -end subroutine psb_s_mv_elg_from_coo +end subroutine psb_s_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_s_mv_elg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_elg_from_fmt.F90 similarity index 92% rename from cuda/impl/psb_s_mv_elg_from_fmt.F90 rename to cuda/impl/psb_s_cuda_mv_elg_from_fmt.F90 index 9238544c..e88080dd 100644 --- a/cuda/impl/psb_s_mv_elg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_mv_elg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_mv_elg_from_fmt(a,b,info) +subroutine psb_s_cuda_mv_elg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_s_elg_mat_mod, psb_protect_name => psb_s_mv_elg_from_fmt + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_mv_elg_from_fmt #else - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod #endif implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -96,4 +96,4 @@ subroutine psb_s_mv_elg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_s_mv_elg_from_fmt +end subroutine psb_s_cuda_mv_elg_from_fmt diff --git a/cuda/impl/psb_s_mv_hdiag_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_hdiag_from_coo.F90 similarity index 87% rename from cuda/impl/psb_s_mv_hdiag_from_coo.F90 rename to cuda/impl/psb_s_cuda_mv_hdiag_from_coo.F90 index dcbcfe4d..f3252eb2 100644 --- a/cuda/impl/psb_s_mv_hdiag_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_hdiag_from_coo.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_s_mv_hdiag_from_coo(a,b,info) +subroutine psb_s_cuda_mv_hdiag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_s_hdiag_mat_mod, psb_protect_name => psb_s_mv_hdiag_from_coo - use psb_gpu_env_mod + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_mv_hdiag_from_coo + use psb_cuda_env_mod #else - use psb_s_hdiag_mat_mod + use psb_s_cuda_hdiag_mat_mod #endif implicit none - class(psb_s_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -55,7 +55,7 @@ subroutine psb_s_mv_hdiag_from_coo(a,b,info) #ifdef HAVE_SPGPU - a%hacksize = psb_gpu_WarpSize() + a%hacksize = psb_cuda_WarpSize() #endif call a%psb_s_hdia_sparse_mat%mv_from_coo(b,info) @@ -71,4 +71,4 @@ subroutine psb_s_mv_hdiag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_mv_hdiag_from_coo +end subroutine psb_s_cuda_mv_hdiag_from_coo diff --git a/cuda/impl/psb_s_mv_hlg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_hlg_from_coo.F90 similarity index 88% rename from cuda/impl/psb_s_mv_hlg_from_coo.F90 rename to cuda/impl/psb_s_cuda_mv_hlg_from_coo.F90 index dc72a135..9810a85e 100644 --- a/cuda/impl/psb_s_mv_hlg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_hlg_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_s_mv_hlg_from_coo(a,b,info) +subroutine psb_s_cuda_mv_hlg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_gpu_env_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_mv_hlg_from_coo + use psb_cuda_env_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_mv_hlg_from_coo #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_s_mv_hlg_from_coo(a,b,info) return -end subroutine psb_s_mv_hlg_from_coo +end subroutine psb_s_cuda_mv_hlg_from_coo diff --git a/cuda/impl/psb_s_mv_hlg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_hlg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_s_mv_hlg_from_fmt.F90 rename to cuda/impl/psb_s_cuda_mv_hlg_from_fmt.F90 index bbe42e4a..700dc151 100644 --- a/cuda/impl/psb_s_mv_hlg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_mv_hlg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_s_mv_hlg_from_fmt(a,b,info) +subroutine psb_s_cuda_mv_hlg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_s_hlg_mat_mod, psb_protect_name => psb_s_mv_hlg_from_fmt + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_mv_hlg_from_fmt #else - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod #endif implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -59,4 +59,4 @@ subroutine psb_s_mv_hlg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_s_mv_hlg_from_fmt +end subroutine psb_s_cuda_mv_hlg_from_fmt diff --git a/cuda/impl/psb_s_mv_hybg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_hybg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_s_mv_hybg_from_coo.F90 rename to cuda/impl/psb_s_cuda_mv_hybg_from_coo.F90 index 7d3197a8..ca9f34c1 100644 --- a/cuda/impl/psb_s_mv_hybg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_hybg_from_coo.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_mv_hybg_from_coo(a,b,info) +subroutine psb_s_cuda_mv_hybg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_mv_hybg_from_coo + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_mv_hybg_from_coo #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -61,5 +61,5 @@ subroutine psb_s_mv_hybg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_s_mv_hybg_from_coo +end subroutine psb_s_cuda_mv_hybg_from_coo #endif diff --git a/cuda/impl/psb_s_mv_hybg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_hybg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_s_mv_hybg_from_fmt.F90 rename to cuda/impl/psb_s_cuda_mv_hybg_from_fmt.F90 index 51d8a2e6..5ba606af 100644 --- a/cuda/impl/psb_s_mv_hybg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_mv_hybg_from_fmt.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_s_mv_hybg_from_fmt(a,b,info) +subroutine psb_s_cuda_mv_hybg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_s_hybg_mat_mod, psb_protect_name => psb_s_mv_hybg_from_fmt + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_mv_hybg_from_fmt #else - use psb_s_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod #endif implicit none - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,5 +58,5 @@ subroutine psb_s_mv_hybg_from_fmt(a,b,info) call a%to_gpu(info) #endif end select -end subroutine psb_s_mv_hybg_from_fmt +end subroutine psb_s_cuda_mv_hybg_from_fmt #endif diff --git a/cuda/impl/psb_z_cp_csrg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_csrg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_z_cp_csrg_from_coo.F90 rename to cuda/impl/psb_z_cuda_cp_csrg_from_coo.F90 index c3b0eebd..186190ac 100644 --- a/cuda/impl/psb_z_cp_csrg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_csrg_from_coo.F90 @@ -29,18 +29,18 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_z_cp_csrg_from_coo(a,b,info) +subroutine psb_z_cuda_cp_csrg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_cp_csrg_from_coo + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_cp_csrg_from_coo #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -59,4 +59,4 @@ subroutine psb_z_cp_csrg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_cp_csrg_from_coo +end subroutine psb_z_cuda_cp_csrg_from_coo diff --git a/cuda/impl/psb_z_cp_csrg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_z_cp_csrg_from_fmt.F90 rename to cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 index 218d6c7b..d1e1a82d 100644 --- a/cuda/impl/psb_z_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 @@ -29,19 +29,19 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_z_cp_csrg_from_fmt(a,b,info) +subroutine psb_z_cuda_cp_csrg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_cp_csrg_from_fmt + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_cp_csrg_from_fmt #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif !use iso_c_binding implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_z_cp_csrg_from_fmt(a,b,info) #endif end select -end subroutine psb_z_cp_csrg_from_fmt +end subroutine psb_z_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_z_cp_diag_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_diag_from_coo.F90 similarity index 89% rename from cuda/impl/psb_z_cp_diag_from_coo.F90 rename to cuda/impl/psb_z_cuda_cp_diag_from_coo.F90 index 013e88cd..c303b127 100644 --- a/cuda/impl/psb_z_cp_diag_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_diag_from_coo.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_cp_diag_from_coo(a,b,info) +subroutine psb_z_cuda_cp_diag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_z_diag_mat_mod, psb_protect_name => psb_z_cp_diag_from_coo + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_cp_diag_from_coo #else - use psb_z_diag_mat_mod + use psb_z_cuda_diag_mat_mod #endif implicit none - class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -61,4 +61,4 @@ subroutine psb_z_cp_diag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_cp_diag_from_coo +end subroutine psb_z_cuda_cp_diag_from_coo diff --git a/cuda/impl/psb_z_cp_elg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_elg_from_coo.F90 similarity index 94% rename from cuda/impl/psb_z_cp_elg_from_coo.F90 rename to cuda/impl/psb_z_cuda_cp_elg_from_coo.F90 index c9b61a99..4b18b89b 100644 --- a/cuda/impl/psb_z_cp_elg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_elg_from_coo.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_z_cp_elg_from_coo(a,b,info) +subroutine psb_z_cuda_cp_elg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_cp_elg_from_coo + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_cp_elg_from_coo use psi_ext_util_mod - use psb_gpu_env_mod + use psb_cuda_env_mod #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -58,7 +58,7 @@ subroutine psb_z_cp_elg_from_coo(a,b,info) info = psb_success_ #ifdef HAVE_SPGPU - hacksize = max(1,psb_gpu_WarpSize()) + hacksize = max(1,psb_cuda_WarpSize()) #else hacksize = 1 #endif @@ -181,4 +181,4 @@ contains end subroutine psi_z_count_ell_from_coo -end subroutine psb_z_cp_elg_from_coo +end subroutine psb_z_cuda_cp_elg_from_coo diff --git a/cuda/impl/psb_z_cp_elg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_elg_from_fmt.F90 similarity index 93% rename from cuda/impl/psb_z_cp_elg_from_fmt.F90 rename to cuda/impl/psb_z_cuda_cp_elg_from_fmt.F90 index 23468b8a..6fa91de6 100644 --- a/cuda/impl/psb_z_cp_elg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_cp_elg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_cp_elg_from_fmt(a,b,info) +subroutine psb_z_cuda_cp_elg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_cp_elg_from_fmt + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_cp_elg_from_fmt #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -98,4 +98,4 @@ subroutine psb_z_cp_elg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_z_cp_elg_from_fmt +end subroutine psb_z_cuda_cp_elg_from_fmt diff --git a/cuda/impl/psb_z_cp_hdiag_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_hdiag_from_coo.F90 similarity index 87% rename from cuda/impl/psb_z_cp_hdiag_from_coo.F90 rename to cuda/impl/psb_z_cuda_cp_hdiag_from_coo.F90 index b44c2854..c94d8824 100644 --- a/cuda/impl/psb_z_cp_hdiag_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_hdiag_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_z_cp_hdiag_from_coo(a,b,info) +subroutine psb_z_cuda_cp_hdiag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_cp_hdiag_from_coo - use psb_gpu_env_mod + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_cp_hdiag_from_coo + use psb_cuda_env_mod #else - use psb_z_hdiag_mat_mod + use psb_z_cuda_hdiag_mat_mod #endif implicit none - class(psb_z_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -54,7 +54,7 @@ subroutine psb_z_cp_hdiag_from_coo(a,b,info) info = psb_success_ #ifdef HAVE_SPGPU - a%hacksize = psb_gpu_WarpSize() + a%hacksize = psb_cuda_WarpSize() #endif call a%psb_z_hdia_sparse_mat%cp_from_coo(b,info) @@ -70,4 +70,4 @@ subroutine psb_z_cp_hdiag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_cp_hdiag_from_coo +end subroutine psb_z_cuda_cp_hdiag_from_coo diff --git a/cuda/impl/psb_z_cp_hlg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_hlg_from_coo.F90 similarity index 95% rename from cuda/impl/psb_z_cp_hlg_from_coo.F90 rename to cuda/impl/psb_z_cuda_cp_hlg_from_coo.F90 index 51d0c8e6..1607f1b6 100644 --- a/cuda/impl/psb_z_cp_hlg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_hlg_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_z_cp_hlg_from_coo(a,b,info) +subroutine psb_z_cuda_cp_hlg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_gpu_env_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_cp_hlg_from_coo + use psb_cuda_env_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_cp_hlg_from_coo #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -62,7 +62,7 @@ subroutine psb_z_cp_hlg_from_coo(a,b,info) debug_unit = psb_get_debug_unit() debug_level = psb_get_debug_level() #ifdef HAVE_SPGPU - hksz = max(1,psb_gpu_WarpSize()) + hksz = max(1,psb_cuda_WarpSize()) #else hksz = psi_get_hksz() #endif @@ -195,4 +195,4 @@ contains !!$ write(*,*) 'End of psi_comput_hckoff ',info end subroutine psi_compute_hckoff_from_coo -end subroutine psb_z_cp_hlg_from_coo +end subroutine psb_z_cuda_cp_hlg_from_coo diff --git a/cuda/impl/psb_z_cp_hlg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_hlg_from_fmt.F90 similarity index 90% rename from cuda/impl/psb_z_cp_hlg_from_fmt.F90 rename to cuda/impl/psb_z_cuda_cp_hlg_from_fmt.F90 index a6dd5970..e8c1f95d 100644 --- a/cuda/impl/psb_z_cp_hlg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_cp_hlg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_cp_hlg_from_fmt(a,b,info) +subroutine psb_z_cuda_cp_hlg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_cp_hlg_from_fmt + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_cp_hlg_from_fmt #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -65,4 +65,4 @@ subroutine psb_z_cp_hlg_from_fmt(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_cp_hlg_from_fmt +end subroutine psb_z_cuda_cp_hlg_from_fmt diff --git a/cuda/impl/psb_z_cp_hybg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_hybg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_z_cp_hybg_from_coo.F90 rename to cuda/impl/psb_z_cuda_cp_hybg_from_coo.F90 index ebb6f60a..6031526a 100644 --- a/cuda/impl/psb_z_cp_hybg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_hybg_from_coo.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_cp_hybg_from_coo(a,b,info) +subroutine psb_z_cuda_cp_hybg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_cp_hybg_from_coo + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_cp_hybg_from_coo #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info @@ -60,5 +60,5 @@ subroutine psb_z_cp_hybg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_cp_hybg_from_coo +end subroutine psb_z_cuda_cp_hybg_from_coo #endif diff --git a/cuda/impl/psb_z_cp_hybg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_hybg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_z_cp_hybg_from_fmt.F90 rename to cuda/impl/psb_z_cuda_cp_hybg_from_fmt.F90 index 82f2ac65..0202ef24 100644 --- a/cuda/impl/psb_z_cp_hybg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_cp_hybg_from_fmt.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_cp_hybg_from_fmt(a,b,info) +subroutine psb_z_cuda_cp_hybg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_cp_hybg_from_fmt + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_cp_hybg_from_fmt #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,5 +58,5 @@ subroutine psb_z_cp_hybg_from_fmt(a,b,info) #endif end select -end subroutine psb_z_cp_hybg_from_fmt +end subroutine psb_z_cuda_cp_hybg_from_fmt #endif diff --git a/cuda/impl/psb_z_csrg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_csrg_allocate_mnnz.F90 similarity index 89% rename from cuda/impl/psb_z_csrg_allocate_mnnz.F90 rename to cuda/impl/psb_z_cuda_csrg_allocate_mnnz.F90 index 8cb2ccb1..a7533e59 100644 --- a/cuda/impl/psb_z_csrg_allocate_mnnz.F90 +++ b/cuda/impl/psb_z_cuda_csrg_allocate_mnnz.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_csrg_allocate_mnnz(m,n,a,nz) +subroutine psb_z_cuda_csrg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_allocate_mnnz + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_allocate_mnnz #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -65,4 +65,4 @@ subroutine psb_z_csrg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_z_csrg_allocate_mnnz +end subroutine psb_z_cuda_csrg_allocate_mnnz diff --git a/cuda/impl/psb_z_csrg_csmm.F90 b/cuda/impl/psb_z_cuda_csrg_csmm.F90 similarity index 94% rename from cuda/impl/psb_z_csrg_csmm.F90 rename to cuda/impl/psb_z_cuda_csrg_csmm.F90 index eb8a4d7f..49fb9fcf 100644 --- a/cuda/impl/psb_z_csrg_csmm.F90 +++ b/cuda/impl/psb_z_cuda_csrg_csmm.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_csrg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_csmm + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_csmm #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) complex(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -131,4 +131,4 @@ subroutine psb_z_csrg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_csrg_csmm +end subroutine psb_z_cuda_csrg_csmm diff --git a/cuda/impl/psb_z_csrg_csmv.F90 b/cuda/impl/psb_z_cuda_csrg_csmv.F90 similarity index 93% rename from cuda/impl/psb_z_csrg_csmv.F90 rename to cuda/impl/psb_z_cuda_csrg_csmv.F90 index 10546eb1..54ad6f4f 100644 --- a/cuda/impl/psb_z_csrg_csmv.F90 +++ b/cuda/impl/psb_z_cuda_csrg_csmv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_csrg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_csmv + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_csmv #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -55,7 +55,7 @@ subroutine psb_z_csrg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_csrg_csmv' + character(len=20) :: name='z_cuda_csrg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -136,4 +136,4 @@ subroutine psb_z_csrg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_csrg_csmv +end subroutine psb_z_cuda_csrg_csmv diff --git a/cuda/impl/psb_d_csrg_from_gpu.F90 b/cuda/impl/psb_z_cuda_csrg_from_gpu.F90 similarity index 91% rename from cuda/impl/psb_d_csrg_from_gpu.F90 rename to cuda/impl/psb_z_cuda_csrg_from_gpu.F90 index 9c0237f0..bb3b49d5 100644 --- a/cuda/impl/psb_d_csrg_from_gpu.F90 +++ b/cuda/impl/psb_z_cuda_csrg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_d_csrg_from_gpu(a,info) +subroutine psb_z_cuda_csrg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_d_csrg_mat_mod, psb_protect_name => psb_d_csrg_from_gpu + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_from_gpu #else - use psb_d_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: m, n, nz @@ -70,4 +70,4 @@ subroutine psb_d_csrg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_d_csrg_from_gpu +end subroutine psb_z_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_z_csrg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_csrg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_z_csrg_inner_vect_sv.F90 rename to cuda/impl/psb_z_cuda_csrg_inner_vect_sv.F90 index 75d6800b..6c7b1fcb 100644 --- a/cuda/impl/psb_z_csrg_inner_vect_sv.F90 +++ b/cuda/impl/psb_z_cuda_csrg_inner_vect_sv.F90 @@ -29,19 +29,19 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_z_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_inner_vect_sv + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_inner_vect_sv #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -51,7 +51,7 @@ subroutine psb_z_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ integer(psb_ipk_) :: err_act - character(len=20) :: name='z_csrg_inner_vect_sv' + character(len=20) :: name='z_cuda_csrg_inner_vect_sv' logical, parameter :: debug=.false. call psb_get_erraction(err_act) @@ -83,9 +83,9 @@ subroutine psb_z_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%set_host() else select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -133,4 +133,4 @@ subroutine psb_z_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_csrg_inner_vect_sv +end subroutine psb_z_cuda_csrg_inner_vect_sv diff --git a/cuda/impl/psb_z_csrg_mold.F90 b/cuda/impl/psb_z_cuda_csrg_mold.F90 similarity index 88% rename from cuda/impl/psb_z_csrg_mold.F90 rename to cuda/impl/psb_z_cuda_csrg_mold.F90 index e83deb3f..23bb658a 100644 --- a/cuda/impl/psb_z_csrg_mold.F90 +++ b/cuda/impl/psb_z_cuda_csrg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_z_csrg_mold(a,b,info) +subroutine psb_z_cuda_csrg_mold(a,b,info) use psb_base_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_mold + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_mold implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_z_csrg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_z_csrg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_z_cuda_csrg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_z_csrg_mold(a,b,info) return -end subroutine psb_z_csrg_mold +end subroutine psb_z_cuda_csrg_mold diff --git a/cuda/impl/psb_z_csrg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_csrg_reallocate_nz.F90 similarity index 87% rename from cuda/impl/psb_z_csrg_reallocate_nz.F90 rename to cuda/impl/psb_z_cuda_csrg_reallocate_nz.F90 index c2509c22..61ae0f59 100644 --- a/cuda/impl/psb_z_csrg_reallocate_nz.F90 +++ b/cuda/impl/psb_z_cuda_csrg_reallocate_nz.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_z_csrg_reallocate_nz(nz,a) +subroutine psb_z_cuda_csrg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_reallocate_nz + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_reallocate_nz #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='z_csrg_reallocate_nz' + character(len=20) :: name='z_cuda_csrg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -67,4 +67,4 @@ subroutine psb_z_csrg_reallocate_nz(nz,a) return -end subroutine psb_z_csrg_reallocate_nz +end subroutine psb_z_cuda_csrg_reallocate_nz diff --git a/cuda/impl/psb_z_csrg_scal.F90 b/cuda/impl/psb_z_cuda_csrg_scal.F90 similarity index 90% rename from cuda/impl/psb_z_csrg_scal.F90 rename to cuda/impl/psb_z_cuda_csrg_scal.F90 index d8ab0ca3..a2099933 100644 --- a/cuda/impl/psb_z_csrg_scal.F90 +++ b/cuda/impl/psb_z_cuda_csrg_scal.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_z_csrg_scal(d,a,info,side) +subroutine psb_z_cuda_csrg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_scal + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_scal #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -70,4 +70,4 @@ subroutine psb_z_csrg_scal(d,a,info,side) return -end subroutine psb_z_csrg_scal +end subroutine psb_z_cuda_csrg_scal diff --git a/cuda/impl/psb_z_csrg_scals.F90 b/cuda/impl/psb_z_cuda_csrg_scals.F90 similarity index 90% rename from cuda/impl/psb_z_csrg_scals.F90 rename to cuda/impl/psb_z_cuda_csrg_scals.F90 index 3d14998d..72fee99b 100644 --- a/cuda/impl/psb_z_csrg_scals.F90 +++ b/cuda/impl/psb_z_cuda_csrg_scals.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_z_csrg_scals(d,a,info) +subroutine psb_z_cuda_csrg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_scals + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_scals #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -68,4 +68,4 @@ subroutine psb_z_csrg_scals(d,a,info) return -end subroutine psb_z_csrg_scals +end subroutine psb_z_cuda_csrg_scals diff --git a/cuda/impl/psb_z_csrg_to_gpu.F90 b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 similarity index 98% rename from cuda/impl/psb_z_csrg_to_gpu.F90 rename to cuda/impl/psb_z_cuda_csrg_to_gpu.F90 index 4548935d..c6f217ab 100644 --- a/cuda/impl/psb_z_csrg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 @@ -30,17 +30,17 @@ ! -subroutine psb_z_csrg_to_gpu(a,info,nzrm) +subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_to_gpu + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_to_gpu #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -322,4 +322,4 @@ subroutine psb_z_csrg_to_gpu(a,info,nzrm) end if #endif -end subroutine psb_z_csrg_to_gpu +end subroutine psb_z_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_z_csrg_vect_mv.F90 b/cuda/impl/psb_z_cuda_csrg_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_z_csrg_vect_mv.F90 rename to cuda/impl/psb_z_cuda_csrg_vect_mv.F90 index 0770d448..964134fb 100644 --- a/cuda/impl/psb_z_csrg_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_csrg_vect_mv.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_z_csrg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_csrg_vect_mv + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_vect_mv #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y @@ -54,7 +54,7 @@ subroutine psb_z_csrg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_csrg_vect_mv' + character(len=20) :: name='z_cuda_csrg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_z_csrg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= zzero) then if (yy%is_host()) call yy%sync() @@ -122,4 +122,4 @@ subroutine psb_z_csrg_vect_mv(alpha,a,x,beta,y,info,trans) 9999 call psb_error_handler(err_act) return -end subroutine psb_z_csrg_vect_mv +end subroutine psb_z_cuda_csrg_vect_mv diff --git a/cuda/impl/psb_z_diag_csmv.F90 b/cuda/impl/psb_z_cuda_diag_csmv.F90 similarity index 92% rename from cuda/impl/psb_z_diag_csmv.F90 rename to cuda/impl/psb_z_cuda_diag_csmv.F90 index 667e1a1f..2e86f0f8 100644 --- a/cuda/impl/psb_z_diag_csmv.F90 +++ b/cuda/impl/psb_z_cuda_diag_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_diag_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_z_diag_mat_mod, psb_protect_name => psb_z_diag_csmv + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_diag_csmv #else - use psb_z_diag_mat_mod + use psb_z_cuda_diag_mat_mod #endif implicit none - class(psb_z_diag_sparse_mat), intent(in) :: a + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_z_diag_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='z_diag_csmv' + character(len=20) :: name='z_cuda_diag_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -133,4 +133,4 @@ subroutine psb_z_diag_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_diag_csmv +end subroutine psb_z_cuda_diag_csmv diff --git a/cuda/impl/psb_z_diag_mold.F90 b/cuda/impl/psb_z_cuda_diag_mold.F90 similarity index 88% rename from cuda/impl/psb_z_diag_mold.F90 rename to cuda/impl/psb_z_cuda_diag_mold.F90 index 5cd752ce..5b11b41c 100644 --- a/cuda/impl/psb_z_diag_mold.F90 +++ b/cuda/impl/psb_z_cuda_diag_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_z_diag_mold(a,b,info) +subroutine psb_z_cuda_diag_mold(a,b,info) use psb_base_mod - use psb_z_diag_mat_mod, psb_protect_name => psb_z_diag_mold + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_diag_mold implicit none - class(psb_z_diag_sparse_mat), intent(in) :: a + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_z_diag_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_z_diag_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_z_cuda_diag_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_z_diag_mold(a,b,info) return -end subroutine psb_z_diag_mold +end subroutine psb_z_cuda_diag_mold diff --git a/cuda/impl/psb_z_diag_to_gpu.F90 b/cuda/impl/psb_z_cuda_diag_to_gpu.F90 similarity index 91% rename from cuda/impl/psb_z_diag_to_gpu.F90 rename to cuda/impl/psb_z_cuda_diag_to_gpu.F90 index 40913624..a28858b5 100644 --- a/cuda/impl/psb_z_diag_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_diag_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_diag_to_gpu(a,info,nzrm) +subroutine psb_z_cuda_diag_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_z_diag_mat_mod, psb_protect_name => psb_z_diag_to_gpu + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_diag_to_gpu #else - use psb_z_diag_mat_mod + use psb_z_cuda_diag_mat_mod #endif use iso_c_binding implicit none - class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -71,4 +71,4 @@ subroutine psb_z_diag_to_gpu(a,info,nzrm) ! if (info /= 0) goto 9999 #endif -end subroutine psb_z_diag_to_gpu +end subroutine psb_z_cuda_diag_to_gpu diff --git a/cuda/impl/psb_z_diag_vect_mv.F90 b/cuda/impl/psb_z_cuda_diag_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_z_diag_vect_mv.F90 rename to cuda/impl/psb_z_cuda_diag_vect_mv.F90 index b8946491..12f3c3e7 100644 --- a/cuda/impl/psb_z_diag_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_diag_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_diag_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_z_diag_mat_mod, psb_protect_name => psb_z_diag_vect_mv + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_diag_vect_mv #else - use psb_z_diag_mat_mod + use psb_z_cuda_diag_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_diag_sparse_mat), intent(in) :: a + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_z_diag_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_diag_vect_mv' + character(len=20) :: name='z_cuda_diag_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -82,9 +82,9 @@ subroutine psb_z_diag_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,4 +123,4 @@ subroutine psb_z_diag_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_diag_vect_mv +end subroutine psb_z_cuda_diag_vect_mv diff --git a/cuda/impl/psb_z_dnsg_mat_impl.F90 b/cuda/impl/psb_z_cuda_dnsg_mat_impl.F90 similarity index 77% rename from cuda/impl/psb_z_dnsg_mat_impl.F90 rename to cuda/impl/psb_z_cuda_dnsg_mat_impl.F90 index 407deaa2..c2a641b6 100644 --- a/cuda/impl/psb_z_dnsg_mat_impl.F90 +++ b/cuda/impl/psb_z_cuda_dnsg_mat_impl.F90 @@ -29,18 +29,18 @@ ! POSSIBILITY OF SUCH DAMAGE. ! -subroutine psb_z_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod - use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_dnsg_vect_mv + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_dnsg_vect_mv #else - use psb_z_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod #endif implicit none - class(psb_z_dnsg_sparse_mat), intent(in) :: a + class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y @@ -50,7 +50,7 @@ subroutine psb_z_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) character :: trans_ complex(psb_dpk_), allocatable :: rx(:), ry(:) Integer(Psb_ipk_) :: err_act, m, n, k - character(len=20) :: name='z_dnsg_vect_mv' + character(len=20) :: name='z_cuda_dnsg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -76,9 +76,9 @@ subroutine psb_z_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) k = a%get_nrows() end if select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (a%is_host()) call a%sync() if (xx%is_host()) call xx%sync() if (beta /= zzero) then @@ -117,21 +117,21 @@ subroutine psb_z_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_dnsg_vect_mv +end subroutine psb_z_cuda_dnsg_vect_mv -subroutine psb_z_dnsg_mold(a,b,info) +subroutine psb_z_cuda_dnsg_mold(a,b,info) use psb_base_mod - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod - use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_dnsg_mold + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_dnsg_mold #else - use psb_z_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod #endif implicit none - class(psb_z_dnsg_sparse_mat), intent(in) :: a + class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -145,7 +145,7 @@ subroutine psb_z_dnsg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_z_dnsg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_z_cuda_dnsg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -158,54 +158,54 @@ subroutine psb_z_dnsg_mold(a,b,info) return -end subroutine psb_z_dnsg_mold +end subroutine psb_z_cuda_dnsg_mold !!$ !!$ interface -!!$ subroutine psb_z_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_z_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type -!!$ class(psb_z_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_z_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a !!$ complex(psb_dpk_), intent(in) :: alpha, beta !!$ class(psb_z_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_z_dnsg_inner_vect_sv +!!$ end subroutine psb_z_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_z_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_z_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_z_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_z_dnsg_reallocate_nz +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_z_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_z_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_z_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_z_dnsg_allocate_mnnz +!!$ end subroutine psb_z_cuda_dnsg_allocate_mnnz !!$ end interface -subroutine psb_z_dnsg_to_gpu(a,info) +subroutine psb_z_cuda_dnsg_to_gpu(a,info) use psb_base_mod - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod - use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_dnsg_to_gpu + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_dnsg_to_gpu #else - use psb_z_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod #endif - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act, pitch, lda logical, parameter :: debug=.false. - character(len=20) :: name='z_dnsg_to_gpu' + character(len=20) :: name='z_cuda_dnsg_to_gpu' call psb_erractionsave(err_act) info = psb_success_ @@ -226,27 +226,27 @@ subroutine psb_z_dnsg_to_gpu(a,info) return -end subroutine psb_z_dnsg_to_gpu +end subroutine psb_z_cuda_dnsg_to_gpu -subroutine psb_z_cp_dnsg_from_coo(a,b,info) +subroutine psb_z_cuda_cp_dnsg_from_coo(a,b,info) use psb_base_mod - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod - use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_cp_dnsg_from_coo + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_cp_dnsg_from_coo #else - use psb_z_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod #endif implicit none - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_dnsg_cp_from_coo' + character(len=20) :: name='z_cuda_dnsg_cp_from_coo' integer(psb_ipk_) :: debug_level, debug_unit logical, parameter :: debug=.false. type(psb_z_coo_sparse_mat) :: tmp @@ -267,27 +267,27 @@ subroutine psb_z_cp_dnsg_from_coo(a,b,info) return -end subroutine psb_z_cp_dnsg_from_coo +end subroutine psb_z_cuda_cp_dnsg_from_coo -subroutine psb_z_cp_dnsg_from_fmt(a,b,info) +subroutine psb_z_cuda_cp_dnsg_from_fmt(a,b,info) use psb_base_mod - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod - use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_cp_dnsg_from_fmt + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_cp_dnsg_from_fmt #else - use psb_z_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod #endif implicit none - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info type(psb_z_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_dnsg_cp_from_fmt' + character(len=20) :: name='z_cuda_dnsg_cp_from_fmt' call psb_erractionsave(err_act) info = psb_success_ @@ -341,29 +341,29 @@ subroutine psb_z_cp_dnsg_from_fmt(a,b,info) return -end subroutine psb_z_cp_dnsg_from_fmt +end subroutine psb_z_cuda_cp_dnsg_from_fmt -subroutine psb_z_mv_dnsg_from_coo(a,b,info) +subroutine psb_z_cuda_mv_dnsg_from_coo(a,b,info) use psb_base_mod - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod - use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_mv_dnsg_from_coo + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_mv_dnsg_from_coo #else - use psb_z_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod #endif implicit none - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act logical, parameter :: debug=.false. - character(len=20) :: name='z_dnsg_mv_from_coo' + character(len=20) :: name='z_cuda_dnsg_mv_from_coo' call psb_erractionsave(err_act) info = psb_success_ @@ -382,28 +382,28 @@ subroutine psb_z_mv_dnsg_from_coo(a,b,info) return -end subroutine psb_z_mv_dnsg_from_coo +end subroutine psb_z_cuda_mv_dnsg_from_coo -subroutine psb_z_mv_dnsg_from_fmt(a,b,info) +subroutine psb_z_cuda_mv_dnsg_from_fmt(a,b,info) use psb_base_mod - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod #ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod - use psb_z_dnsg_mat_mod, psb_protect_name => psb_z_mv_dnsg_from_fmt + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_mv_dnsg_from_fmt #else - use psb_z_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod #endif implicit none - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info type(psb_z_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_dnsg_cp_from_fmt' + character(len=20) :: name='z_cuda_dnsg_cp_from_fmt' call psb_erractionsave(err_act) info = psb_success_ @@ -458,4 +458,4 @@ subroutine psb_z_mv_dnsg_from_fmt(a,b,info) return -end subroutine psb_z_mv_dnsg_from_fmt +end subroutine psb_z_cuda_mv_dnsg_from_fmt diff --git a/cuda/impl/psb_z_elg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_elg_allocate_mnnz.F90 similarity index 93% rename from cuda/impl/psb_z_elg_allocate_mnnz.F90 rename to cuda/impl/psb_z_cuda_elg_allocate_mnnz.F90 index 39d14dd2..a2e36feb 100644 --- a/cuda/impl/psb_z_elg_allocate_mnnz.F90 +++ b/cuda/impl/psb_z_cuda_elg_allocate_mnnz.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_elg_allocate_mnnz(m,n,a,nz) +subroutine psb_z_cuda_elg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_allocate_mnnz + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_allocate_mnnz #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -110,4 +110,4 @@ subroutine psb_z_elg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_z_elg_allocate_mnnz +end subroutine psb_z_cuda_elg_allocate_mnnz diff --git a/cuda/impl/psb_z_elg_asb.f90 b/cuda/impl/psb_z_cuda_elg_asb.f90 similarity index 92% rename from cuda/impl/psb_z_elg_asb.f90 rename to cuda/impl/psb_z_cuda_elg_asb.f90 index 515f579a..511183f5 100644 --- a/cuda/impl/psb_z_elg_asb.f90 +++ b/cuda/impl/psb_z_cuda_elg_asb.f90 @@ -30,13 +30,13 @@ ! -subroutine psb_z_elg_asb(a) +subroutine psb_z_cuda_elg_asb(a) use psb_base_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_asb + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_asb implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: err_act, info character(len=20) :: name='elg_asb' @@ -62,4 +62,4 @@ subroutine psb_z_elg_asb(a) return -end subroutine psb_z_elg_asb +end subroutine psb_z_cuda_elg_asb diff --git a/cuda/impl/psb_z_elg_csmm.F90 b/cuda/impl/psb_z_cuda_elg_csmm.F90 similarity index 93% rename from cuda/impl/psb_z_elg_csmm.F90 rename to cuda/impl/psb_z_cuda_elg_csmm.F90 index aa27419c..d4034b65 100644 --- a/cuda/impl/psb_z_elg_csmm.F90 +++ b/cuda/impl/psb_z_cuda_elg_csmm.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_elg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_csmm + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_csmm #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) complex(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_z_elg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_elg_csmm' + character(len=20) :: name='z_cuda_elg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -131,4 +131,4 @@ subroutine psb_z_elg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_elg_csmm +end subroutine psb_z_cuda_elg_csmm diff --git a/cuda/impl/psb_z_elg_csmv.F90 b/cuda/impl/psb_z_cuda_elg_csmv.F90 similarity index 94% rename from cuda/impl/psb_z_elg_csmv.F90 rename to cuda/impl/psb_z_cuda_elg_csmv.F90 index 46bc9615..eba12d16 100644 --- a/cuda/impl/psb_z_elg_csmv.F90 +++ b/cuda/impl/psb_z_cuda_elg_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_elg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_csmv + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_csmv #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -133,4 +133,4 @@ subroutine psb_z_elg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_elg_csmv +end subroutine psb_z_cuda_elg_csmv diff --git a/cuda/impl/psb_z_elg_csput.F90 b/cuda/impl/psb_z_cuda_elg_csput.F90 similarity index 89% rename from cuda/impl/psb_z_elg_csput.F90 rename to cuda/impl/psb_z_cuda_elg_csput.F90 index 51080f5b..5a52f4f8 100644 --- a/cuda/impl/psb_z_elg_csput.F90 +++ b/cuda/impl/psb_z_cuda_elg_csput.F90 @@ -30,26 +30,26 @@ ! -subroutine psb_z_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +subroutine psb_z_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding #ifdef HAVE_SPGPU use elldev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_csput_a + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_csput_a #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act - character(len=20) :: name='z_elg_csput_a' + character(len=20) :: name='z_cuda_elg_csput_a' logical, parameter :: debug=.false. integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit real(psb_dpk_) :: t1,t2,t3 @@ -120,24 +120,24 @@ subroutine psb_z_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) return -end subroutine psb_z_elg_csput_a +end subroutine psb_z_cuda_elg_csput_a -subroutine psb_z_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +subroutine psb_z_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding #ifdef HAVE_SPGPU use elldev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_csput_v - use psb_z_gpu_vect_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_csput_v + use psb_z_cuda_vect_mod #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax @@ -145,7 +145,7 @@ subroutine psb_z_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) integer(psb_ipk_) :: err_act - character(len=20) :: name='z_elg_csput_v' + character(len=20) :: name='z_cuda_elg_csput_v' logical, parameter :: debug=.false. integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw logical :: gpu_invoked @@ -199,11 +199,11 @@ subroutine psb_z_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) t1=psb_wtime() gpu_invoked = .false. select type (ia) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type (ja) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type (val) - class is (psb_z_vect_gpu) + class is (psb_z_vect_cuda) if (a%is_host()) call a%sync() if (val%is_host()) call val%sync() if (ia%is_host()) call ia%sync() @@ -245,4 +245,4 @@ subroutine psb_z_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) return -end subroutine psb_z_elg_csput_v +end subroutine psb_z_cuda_elg_csput_v diff --git a/cuda/impl/psb_z_elg_from_gpu.F90 b/cuda/impl/psb_z_cuda_elg_from_gpu.F90 similarity index 91% rename from cuda/impl/psb_z_elg_from_gpu.F90 rename to cuda/impl/psb_z_cuda_elg_from_gpu.F90 index e8670cd4..ffed4349 100644 --- a/cuda/impl/psb_z_elg_from_gpu.F90 +++ b/cuda/impl/psb_z_cuda_elg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_elg_from_gpu(a,info) +subroutine psb_z_cuda_elg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_from_gpu + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_from_gpu #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize @@ -71,4 +71,4 @@ subroutine psb_z_elg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_z_elg_from_gpu +end subroutine psb_z_cuda_elg_from_gpu diff --git a/cuda/impl/psb_z_elg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_elg_inner_vect_sv.F90 similarity index 89% rename from cuda/impl/psb_z_elg_inner_vect_sv.F90 rename to cuda/impl/psb_z_cuda_elg_inner_vect_sv.F90 index 66d7eed8..7564d5dd 100644 --- a/cuda/impl/psb_z_elg_inner_vect_sv.F90 +++ b/cuda/impl/psb_z_cuda_elg_inner_vect_sv.F90 @@ -30,26 +30,26 @@ ! -subroutine psb_z_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_inner_vect_sv + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_inner_vect_sv #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans integer(psb_ipk_) :: err_act - character(len=20) :: name='z_elg_inner_vect_sv' + character(len=20) :: name='z_cuda_elg_inner_vect_sv' logical, parameter :: debug=.false. complex(psb_dpk_), allocatable :: rx(:), ry(:) @@ -86,4 +86,4 @@ subroutine psb_z_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_elg_inner_vect_sv +end subroutine psb_z_cuda_elg_inner_vect_sv diff --git a/cuda/impl/psb_z_elg_mold.F90 b/cuda/impl/psb_z_cuda_elg_mold.F90 similarity index 89% rename from cuda/impl/psb_z_elg_mold.F90 rename to cuda/impl/psb_z_cuda_elg_mold.F90 index 1a5ebe54..e027c9f2 100644 --- a/cuda/impl/psb_z_elg_mold.F90 +++ b/cuda/impl/psb_z_cuda_elg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_z_elg_mold(a,b,info) +subroutine psb_z_cuda_elg_mold(a,b,info) use psb_base_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_mold + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_mold implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_z_elg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_z_elg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_z_cuda_elg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_z_elg_mold(a,b,info) return -end subroutine psb_z_elg_mold +end subroutine psb_z_cuda_elg_mold diff --git a/cuda/impl/psb_z_elg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_elg_reallocate_nz.F90 similarity index 89% rename from cuda/impl/psb_z_elg_reallocate_nz.F90 rename to cuda/impl/psb_z_cuda_elg_reallocate_nz.F90 index f6bc194f..16cebe70 100644 --- a/cuda/impl/psb_z_elg_reallocate_nz.F90 +++ b/cuda/impl/psb_z_cuda_elg_reallocate_nz.F90 @@ -30,22 +30,22 @@ ! -subroutine psb_z_elg_reallocate_nz(nz,a) +subroutine psb_z_cuda_elg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_reallocate_nz + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_reallocate_nz #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='z_elg_reallocate_nz' + character(len=20) :: name='z_cuda_elg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -76,4 +76,4 @@ subroutine psb_z_elg_reallocate_nz(nz,a) return -end subroutine psb_z_elg_reallocate_nz +end subroutine psb_z_cuda_elg_reallocate_nz diff --git a/cuda/impl/psb_z_elg_scal.F90 b/cuda/impl/psb_z_cuda_elg_scal.F90 similarity index 91% rename from cuda/impl/psb_z_elg_scal.F90 rename to cuda/impl/psb_z_cuda_elg_scal.F90 index eed9007a..4802aaaa 100644 --- a/cuda/impl/psb_z_elg_scal.F90 +++ b/cuda/impl/psb_z_cuda_elg_scal.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_elg_scal(d,a,info,side) +subroutine psb_z_cuda_elg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_scal + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_scal #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -75,4 +75,4 @@ subroutine psb_z_elg_scal(d,a,info,side) return -end subroutine psb_z_elg_scal +end subroutine psb_z_cuda_elg_scal diff --git a/cuda/impl/psb_z_elg_scals.F90 b/cuda/impl/psb_z_cuda_elg_scals.F90 similarity index 90% rename from cuda/impl/psb_z_elg_scals.F90 rename to cuda/impl/psb_z_cuda_elg_scals.F90 index 1e3f3682..5db823da 100644 --- a/cuda/impl/psb_z_elg_scals.F90 +++ b/cuda/impl/psb_z_cuda_elg_scals.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_elg_scals(d,a,info) +subroutine psb_z_cuda_elg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_scals + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_scals #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -70,4 +70,4 @@ subroutine psb_z_elg_scals(d,a,info) return -end subroutine psb_z_elg_scals +end subroutine psb_z_cuda_elg_scals diff --git a/cuda/impl/psb_z_elg_to_gpu.F90 b/cuda/impl/psb_z_cuda_elg_to_gpu.F90 similarity index 93% rename from cuda/impl/psb_z_elg_to_gpu.F90 rename to cuda/impl/psb_z_cuda_elg_to_gpu.F90 index 71a5ec66..6d86bdd9 100644 --- a/cuda/impl/psb_z_elg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_elg_to_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_elg_to_gpu(a,info,nzrm) +subroutine psb_z_cuda_elg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_to_gpu + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_to_gpu #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -90,4 +90,4 @@ subroutine psb_z_elg_to_gpu(a,info,nzrm) call a%set_sync() #endif -end subroutine psb_z_elg_to_gpu +end subroutine psb_z_cuda_elg_to_gpu diff --git a/cuda/impl/psb_c_elg_trim.f90 b/cuda/impl/psb_z_cuda_elg_trim.f90 similarity index 92% rename from cuda/impl/psb_c_elg_trim.f90 rename to cuda/impl/psb_z_cuda_elg_trim.f90 index bc0c0696..3d261150 100644 --- a/cuda/impl/psb_c_elg_trim.f90 +++ b/cuda/impl/psb_z_cuda_elg_trim.f90 @@ -30,12 +30,12 @@ ! -subroutine psb_c_elg_trim(a) +subroutine psb_z_cuda_elg_trim(a) use psb_base_mod - use psb_c_elg_mat_mod, psb_protect_name => psb_c_elg_trim + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_trim implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld character(len=20) :: name='trim' logical, parameter :: debug=.false. @@ -59,4 +59,4 @@ subroutine psb_c_elg_trim(a) return -end subroutine psb_c_elg_trim +end subroutine psb_z_cuda_elg_trim diff --git a/cuda/impl/psb_z_elg_vect_mv.F90 b/cuda/impl/psb_z_cuda_elg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_z_elg_vect_mv.F90 rename to cuda/impl/psb_z_cuda_elg_vect_mv.F90 index 5cd72e44..4bd1b3ed 100644 --- a/cuda/impl/psb_z_elg_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_elg_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_elg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_elg_vect_mv + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_vect_mv #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_z_elg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_elg_vect_mv' + character(len=20) :: name='z_cuda_elg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_z_elg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (a%is_host()) call a%sync() if (xx%is_host()) call xx%sync() if (beta /= zzero) then @@ -128,4 +128,4 @@ subroutine psb_z_elg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_elg_vect_mv +end subroutine psb_z_cuda_elg_vect_mv diff --git a/cuda/impl/psb_z_hdiag_csmv.F90 b/cuda/impl/psb_z_cuda_hdiag_csmv.F90 similarity index 92% rename from cuda/impl/psb_z_hdiag_csmv.F90 rename to cuda/impl/psb_z_cuda_hdiag_csmv.F90 index baf730a2..8be14704 100644 --- a/cuda/impl/psb_z_hdiag_csmv.F90 +++ b/cuda/impl/psb_z_cuda_hdiag_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_hdiag_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_hdiag_csmv + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_csmv #else - use psb_z_hdiag_mat_mod + use psb_z_cuda_hdiag_mat_mod #endif implicit none - class(psb_z_hdiag_sparse_mat), intent(in) :: a + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_z_hdiag_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='z_hdiag_csmv' + character(len=20) :: name='z_cuda_hdiag_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -133,4 +133,4 @@ subroutine psb_z_hdiag_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hdiag_csmv +end subroutine psb_z_cuda_hdiag_csmv diff --git a/cuda/impl/psb_z_hdiag_mold.F90 b/cuda/impl/psb_z_cuda_hdiag_mold.F90 similarity index 88% rename from cuda/impl/psb_z_hdiag_mold.F90 rename to cuda/impl/psb_z_cuda_hdiag_mold.F90 index b656ed0f..33fdd8eb 100644 --- a/cuda/impl/psb_z_hdiag_mold.F90 +++ b/cuda/impl/psb_z_cuda_hdiag_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_z_hdiag_mold(a,b,info) +subroutine psb_z_cuda_hdiag_mold(a,b,info) use psb_base_mod - use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_hdiag_mold + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_mold implicit none - class(psb_z_hdiag_sparse_mat), intent(in) :: a + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_z_hdiag_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_z_hdiag_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_z_cuda_hdiag_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,4 +62,4 @@ subroutine psb_z_hdiag_mold(a,b,info) return -end subroutine psb_z_hdiag_mold +end subroutine psb_z_cuda_hdiag_mold diff --git a/cuda/impl/psb_d_hdiag_to_gpu.F90 b/cuda/impl/psb_z_cuda_hdiag_to_gpu.F90 similarity index 92% rename from cuda/impl/psb_d_hdiag_to_gpu.F90 rename to cuda/impl/psb_z_cuda_hdiag_to_gpu.F90 index fb013586..47126aca 100644 --- a/cuda/impl/psb_d_hdiag_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_hdiag_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_d_hdiag_to_gpu(a,info) +subroutine psb_z_cuda_hdiag_to_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_d_hdiag_mat_mod, psb_protect_name => psb_d_hdiag_to_gpu + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_to_gpu #else - use psb_d_hdiag_mat_mod + use psb_z_cuda_hdiag_mat_mod #endif use iso_c_binding implicit none - class(psb_d_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight #ifdef HAVE_SPGPU @@ -83,4 +83,4 @@ subroutine psb_d_hdiag_to_gpu(a,info) #endif -end subroutine psb_d_hdiag_to_gpu +end subroutine psb_z_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_z_hdiag_vect_mv.F90 b/cuda/impl/psb_z_cuda_hdiag_vect_mv.F90 similarity index 90% rename from cuda/impl/psb_z_hdiag_vect_mv.F90 rename to cuda/impl/psb_z_cuda_hdiag_vect_mv.F90 index 3e1c859e..cf0b3457 100644 --- a/cuda/impl/psb_z_hdiag_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_hdiag_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_hdiag_vect_mv + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_vect_mv #else - use psb_z_hdiag_mat_mod + use psb_z_cuda_hdiag_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_hdiag_sparse_mat), intent(in) :: a + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_z_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_hdiag_vect_mv' + character(len=20) :: name='z_cuda_hdiag_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -82,9 +82,9 @@ subroutine psb_z_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -123,4 +123,4 @@ subroutine psb_z_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hdiag_vect_mv +end subroutine psb_z_cuda_hdiag_vect_mv diff --git a/cuda/impl/psb_z_hlg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_hlg_allocate_mnnz.F90 similarity index 90% rename from cuda/impl/psb_z_hlg_allocate_mnnz.F90 rename to cuda/impl/psb_z_cuda_hlg_allocate_mnnz.F90 index e3c05ec1..228244f1 100644 --- a/cuda/impl/psb_z_hlg_allocate_mnnz.F90 +++ b/cuda/impl/psb_z_cuda_hlg_allocate_mnnz.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_hlg_allocate_mnnz(m,n,a,nz) +subroutine psb_z_cuda_hlg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_allocate_mnnz + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_allocate_mnnz #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -68,4 +68,4 @@ subroutine psb_z_hlg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_z_hlg_allocate_mnnz +end subroutine psb_z_cuda_hlg_allocate_mnnz diff --git a/cuda/impl/psb_z_hlg_csmm.F90 b/cuda/impl/psb_z_cuda_hlg_csmm.F90 similarity index 93% rename from cuda/impl/psb_z_hlg_csmm.F90 rename to cuda/impl/psb_z_cuda_hlg_csmm.F90 index 3432c177..325ab0d0 100644 --- a/cuda/impl/psb_z_hlg_csmm.F90 +++ b/cuda/impl/psb_z_cuda_hlg_csmm.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_hlg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_csmm + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_csmm #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) complex(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_z_hlg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_hlg_csmm' + character(len=20) :: name='z_cuda_hlg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -129,4 +129,4 @@ subroutine psb_z_hlg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hlg_csmm +end subroutine psb_z_cuda_hlg_csmm diff --git a/cuda/impl/psb_z_hlg_csmv.F90 b/cuda/impl/psb_z_cuda_hlg_csmv.F90 similarity index 93% rename from cuda/impl/psb_z_hlg_csmv.F90 rename to cuda/impl/psb_z_cuda_hlg_csmv.F90 index b9b79e0c..ac84190e 100644 --- a/cuda/impl/psb_z_hlg_csmv.F90 +++ b/cuda/impl/psb_z_cuda_hlg_csmv.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_hlg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_csmv + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_csmv #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer, intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_z_hlg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer :: err_act - character(len=20) :: name='z_hlg_csmv' + character(len=20) :: name='z_cuda_hlg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -132,4 +132,4 @@ subroutine psb_z_hlg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hlg_csmv +end subroutine psb_z_cuda_hlg_csmv diff --git a/cuda/impl/psb_z_hlg_from_gpu.F90 b/cuda/impl/psb_z_cuda_hlg_from_gpu.F90 similarity index 92% rename from cuda/impl/psb_z_hlg_from_gpu.F90 rename to cuda/impl/psb_z_cuda_hlg_from_gpu.F90 index f582e506..4db6c3ce 100644 --- a/cuda/impl/psb_z_hlg_from_gpu.F90 +++ b/cuda/impl/psb_z_cuda_hlg_from_gpu.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_hlg_from_gpu(a,info) +subroutine psb_z_cuda_hlg_from_gpu(a,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_from_gpu + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_from_gpu #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr @@ -73,4 +73,4 @@ subroutine psb_z_hlg_from_gpu(a,info) call a%set_sync() #endif -end subroutine psb_z_hlg_from_gpu +end subroutine psb_z_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_z_hlg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_hlg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_z_hlg_inner_vect_sv.F90 rename to cuda/impl/psb_z_cuda_hlg_inner_vect_sv.F90 index 5a7b1031..f99a5a9e 100644 --- a/cuda/impl/psb_z_hlg_inner_vect_sv.F90 +++ b/cuda/impl/psb_z_cuda_hlg_inner_vect_sv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_inner_vect_sv + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_inner_vect_sv #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -78,4 +78,4 @@ subroutine psb_z_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hlg_inner_vect_sv +end subroutine psb_z_cuda_hlg_inner_vect_sv diff --git a/cuda/impl/psb_z_hlg_mold.F90 b/cuda/impl/psb_z_cuda_hlg_mold.F90 similarity index 89% rename from cuda/impl/psb_z_hlg_mold.F90 rename to cuda/impl/psb_z_cuda_hlg_mold.F90 index f9ff0c7a..cc9ad510 100644 --- a/cuda/impl/psb_z_hlg_mold.F90 +++ b/cuda/impl/psb_z_cuda_hlg_mold.F90 @@ -30,12 +30,12 @@ ! -subroutine psb_z_hlg_mold(a,b,info) +subroutine psb_z_cuda_hlg_mold(a,b,info) use psb_base_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_mold + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_mold implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer, intent(out) :: info Integer :: err_act @@ -49,7 +49,7 @@ subroutine psb_z_hlg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_z_hlg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_z_cuda_hlg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -61,4 +61,4 @@ subroutine psb_z_hlg_mold(a,b,info) 9999 call psb_error_handler(err_act) return -end subroutine psb_z_hlg_mold +end subroutine psb_z_cuda_hlg_mold diff --git a/cuda/impl/psb_z_hlg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_hlg_reallocate_nz.F90 similarity index 87% rename from cuda/impl/psb_z_hlg_reallocate_nz.F90 rename to cuda/impl/psb_z_cuda_hlg_reallocate_nz.F90 index f3d50626..aaba9be5 100644 --- a/cuda/impl/psb_z_hlg_reallocate_nz.F90 +++ b/cuda/impl/psb_z_cuda_hlg_reallocate_nz.F90 @@ -30,22 +30,22 @@ ! -subroutine psb_z_hlg_reallocate_nz(nz,a) +subroutine psb_z_cuda_hlg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_reallocate_nz + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_reallocate_nz #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif use iso_c_binding implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='z_hlg_reallocate_nz' + character(len=20) :: name='z_cuda_hlg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -64,4 +64,4 @@ subroutine psb_z_hlg_reallocate_nz(nz,a) return -end subroutine psb_z_hlg_reallocate_nz +end subroutine psb_z_cuda_hlg_reallocate_nz diff --git a/cuda/impl/psb_z_hlg_scal.F90 b/cuda/impl/psb_z_cuda_hlg_scal.F90 similarity index 91% rename from cuda/impl/psb_z_hlg_scal.F90 rename to cuda/impl/psb_z_cuda_hlg_scal.F90 index 8aa85500..3ffda36a 100644 --- a/cuda/impl/psb_z_hlg_scal.F90 +++ b/cuda/impl/psb_z_cuda_hlg_scal.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_hlg_scal(d,a,info,side) +subroutine psb_z_cuda_hlg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_scal + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_scal #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -72,4 +72,4 @@ subroutine psb_z_hlg_scal(d,a,info,side) return -end subroutine psb_z_hlg_scal +end subroutine psb_z_cuda_hlg_scal diff --git a/cuda/impl/psb_z_hlg_scals.F90 b/cuda/impl/psb_z_cuda_hlg_scals.F90 similarity index 91% rename from cuda/impl/psb_z_hlg_scals.F90 rename to cuda/impl/psb_z_cuda_hlg_scals.F90 index d5689c06..bae50c7c 100644 --- a/cuda/impl/psb_z_hlg_scals.F90 +++ b/cuda/impl/psb_z_cuda_hlg_scals.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_hlg_scals(d,a,info) +subroutine psb_z_cuda_hlg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_scals + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_scals #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif use iso_c_binding implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -70,4 +70,4 @@ subroutine psb_z_hlg_scals(d,a,info) 9999 call psb_error_handler(err_act) return -end subroutine psb_z_hlg_scals +end subroutine psb_z_cuda_hlg_scals diff --git a/cuda/impl/psb_z_hlg_to_gpu.F90 b/cuda/impl/psb_z_cuda_hlg_to_gpu.F90 similarity index 91% rename from cuda/impl/psb_z_hlg_to_gpu.F90 rename to cuda/impl/psb_z_cuda_hlg_to_gpu.F90 index d63aee9c..93c9f043 100644 --- a/cuda/impl/psb_z_hlg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_hlg_to_gpu.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_hlg_to_gpu(a,info,nzrm) +subroutine psb_z_cuda_hlg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_to_gpu + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_to_gpu #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif use iso_c_binding implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -65,4 +65,4 @@ subroutine psb_z_hlg_to_gpu(a,info,nzrm) ! if (info /= 0) goto 9999 #endif -end subroutine psb_z_hlg_to_gpu +end subroutine psb_z_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_z_hlg_vect_mv.F90 b/cuda/impl/psb_z_cuda_hlg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_z_hlg_vect_mv.F90 rename to cuda/impl/psb_z_cuda_hlg_vect_mv.F90 index 9efefc0a..f377efec 100644 --- a/cuda/impl/psb_z_hlg_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_hlg_vect_mv.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_hlg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_hlg_vect_mv + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_vect_mv #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y @@ -52,7 +52,7 @@ subroutine psb_z_hlg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_hlg_vect_mv' + character(len=20) :: name='z_cuda_hlg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_z_hlg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= dzero) then if (yy%is_host()) call yy%sync() @@ -126,4 +126,4 @@ subroutine psb_z_hlg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hlg_vect_mv +end subroutine psb_z_cuda_hlg_vect_mv diff --git a/cuda/impl/psb_z_hybg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_hybg_allocate_mnnz.F90 similarity index 90% rename from cuda/impl/psb_z_hybg_allocate_mnnz.F90 rename to cuda/impl/psb_z_cuda_hybg_allocate_mnnz.F90 index 2c38c536..0c6f9aa9 100644 --- a/cuda/impl/psb_z_hybg_allocate_mnnz.F90 +++ b/cuda/impl/psb_z_cuda_hybg_allocate_mnnz.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_allocate_mnnz(m,n,a,nz) +subroutine psb_z_cuda_hybg_allocate_mnnz(m,n,a,nz) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_allocate_mnnz + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_allocate_mnnz #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' @@ -65,5 +65,5 @@ subroutine psb_z_hybg_allocate_mnnz(m,n,a,nz) return -end subroutine psb_z_hybg_allocate_mnnz +end subroutine psb_z_cuda_hybg_allocate_mnnz #endif diff --git a/cuda/impl/psb_z_hybg_csmm.F90 b/cuda/impl/psb_z_cuda_hybg_csmm.F90 similarity index 93% rename from cuda/impl/psb_z_hybg_csmm.F90 rename to cuda/impl/psb_z_cuda_hybg_csmm.F90 index 5ec9701b..d4a32420 100644 --- a/cuda/impl/psb_z_hybg_csmm.F90 +++ b/cuda/impl/psb_z_cuda_hybg_csmm.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_csmm(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_csmm + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_csmm #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) complex(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info @@ -53,7 +53,7 @@ subroutine psb_z_hybg_csmm(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpX, gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_hybg_csmm' + character(len=20) :: name='z_cuda_hybg_csmm' logical, parameter :: debug=.false. info = psb_success_ @@ -131,5 +131,5 @@ subroutine psb_z_hybg_csmm(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hybg_csmm +end subroutine psb_z_cuda_hybg_csmm #endif diff --git a/cuda/impl/psb_z_hybg_csmv.F90 b/cuda/impl/psb_z_cuda_hybg_csmv.F90 similarity index 93% rename from cuda/impl/psb_z_hybg_csmv.F90 rename to cuda/impl/psb_z_cuda_hybg_csmv.F90 index e7f39cb6..180a8ae1 100644 --- a/cuda/impl/psb_z_hybg_csmv.F90 +++ b/cuda/impl/psb_z_cuda_hybg_csmv.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_csmv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_csmv + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_csmv #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info @@ -54,7 +54,7 @@ subroutine psb_z_hybg_csmv(alpha,a,x,beta,y,info,trans) type(c_ptr) :: gpY logical :: tra Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_hybg_csmv' + character(len=20) :: name='z_cuda_hybg_csmv' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -134,5 +134,5 @@ subroutine psb_z_hybg_csmv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hybg_csmv +end subroutine psb_z_cuda_hybg_csmv #endif diff --git a/cuda/impl/psb_z_hybg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_hybg_inner_vect_sv.F90 similarity index 90% rename from cuda/impl/psb_z_hybg_inner_vect_sv.F90 rename to cuda/impl/psb_z_cuda_hybg_inner_vect_sv.F90 index 35c55ec7..1df47788 100644 --- a/cuda/impl/psb_z_hybg_inner_vect_sv.F90 +++ b/cuda/impl/psb_z_cuda_hybg_inner_vect_sv.F90 @@ -30,19 +30,19 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_inner_vect_sv + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_inner_vect_sv #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_hybg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info @@ -52,7 +52,7 @@ subroutine psb_z_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ integer(psb_ipk_) :: err_act - character(len=20) :: name='z_hybg_inner_vect_sv' + character(len=20) :: name='z_cuda_hybg_inner_vect_sv' logical, parameter :: debug=.false. call psb_get_erraction(err_act) @@ -84,9 +84,9 @@ subroutine psb_z_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%set_host() else select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= zzero) then if (yy%is_host()) call yy%sync() @@ -134,5 +134,5 @@ subroutine psb_z_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hybg_inner_vect_sv +end subroutine psb_z_cuda_hybg_inner_vect_sv #endif diff --git a/cuda/impl/psb_z_hybg_mold.F90 b/cuda/impl/psb_z_cuda_hybg_mold.F90 similarity index 89% rename from cuda/impl/psb_z_hybg_mold.F90 rename to cuda/impl/psb_z_cuda_hybg_mold.F90 index 3a17dbd2..5a13ff19 100644 --- a/cuda/impl/psb_z_hybg_mold.F90 +++ b/cuda/impl/psb_z_cuda_hybg_mold.F90 @@ -30,12 +30,12 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_mold(a,b,info) +subroutine psb_z_cuda_hybg_mold(a,b,info) use psb_base_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_mold + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_mold implicit none - class(psb_z_hybg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act @@ -49,7 +49,7 @@ subroutine psb_z_hybg_mold(a,b,info) call b%free() deallocate(b,stat=info) end if - if (info == 0) allocate(psb_z_hybg_sparse_mat :: b, stat=info) + if (info == 0) allocate(psb_z_cuda_hybg_sparse_mat :: b, stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ @@ -62,5 +62,5 @@ subroutine psb_z_hybg_mold(a,b,info) return -end subroutine psb_z_hybg_mold +end subroutine psb_z_cuda_hybg_mold #endif diff --git a/cuda/impl/psb_z_hybg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_hybg_reallocate_nz.F90 similarity index 88% rename from cuda/impl/psb_z_hybg_reallocate_nz.F90 rename to cuda/impl/psb_z_cuda_hybg_reallocate_nz.F90 index 79d81911..5278ba35 100644 --- a/cuda/impl/psb_z_hybg_reallocate_nz.F90 +++ b/cuda/impl/psb_z_cuda_hybg_reallocate_nz.F90 @@ -30,21 +30,21 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_reallocate_nz(nz,a) +subroutine psb_z_cuda_hybg_reallocate_nz(nz,a) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_reallocate_nz + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_reallocate_nz #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none integer(psb_ipk_), intent(in) :: nz - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: m, nzrm,ld Integer(Psb_ipk_) :: err_act, info - character(len=20) :: name='z_hybg_reallocate_nz' + character(len=20) :: name='z_cuda_hybg_reallocate_nz' logical, parameter :: debug=.false. call psb_erractionsave(err_act) @@ -67,5 +67,5 @@ subroutine psb_z_hybg_reallocate_nz(nz,a) return -end subroutine psb_z_hybg_reallocate_nz +end subroutine psb_z_cuda_hybg_reallocate_nz #endif diff --git a/cuda/impl/psb_z_hybg_scal.F90 b/cuda/impl/psb_z_cuda_hybg_scal.F90 similarity index 91% rename from cuda/impl/psb_z_hybg_scal.F90 rename to cuda/impl/psb_z_cuda_hybg_scal.F90 index c8179bf2..cd436e76 100644 --- a/cuda/impl/psb_z_hybg_scal.F90 +++ b/cuda/impl/psb_z_cuda_hybg_scal.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_scal(d,a,info,side) +subroutine psb_z_cuda_hybg_scal(d,a,info,side) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_scal + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_scal #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side @@ -72,5 +72,5 @@ subroutine psb_z_hybg_scal(d,a,info,side) return -end subroutine psb_z_hybg_scal +end subroutine psb_z_cuda_hybg_scal #endif diff --git a/cuda/impl/psb_z_hybg_scals.F90 b/cuda/impl/psb_z_cuda_hybg_scals.F90 similarity index 91% rename from cuda/impl/psb_z_hybg_scals.F90 rename to cuda/impl/psb_z_cuda_hybg_scals.F90 index 3729412d..0a9ee79d 100644 --- a/cuda/impl/psb_z_hybg_scals.F90 +++ b/cuda/impl/psb_z_cuda_hybg_scals.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_scals(d,a,info) +subroutine psb_z_cuda_hybg_scals(d,a,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_scals + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_scals #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info @@ -72,5 +72,5 @@ subroutine psb_z_hybg_scals(d,a,info) return -end subroutine psb_z_hybg_scals +end subroutine psb_z_cuda_hybg_scals #endif diff --git a/cuda/impl/psb_z_hybg_to_gpu.F90 b/cuda/impl/psb_z_cuda_hybg_to_gpu.F90 similarity index 96% rename from cuda/impl/psb_z_hybg_to_gpu.F90 rename to cuda/impl/psb_z_cuda_hybg_to_gpu.F90 index 4a2a9b1c..107b5049 100644 --- a/cuda/impl/psb_z_hybg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_hybg_to_gpu.F90 @@ -30,17 +30,17 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_to_gpu(a,info,nzrm) +subroutine psb_z_cuda_hybg_to_gpu(a,info,nzrm) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_to_gpu + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_to_gpu #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm @@ -150,5 +150,5 @@ subroutine psb_z_hybg_to_gpu(a,info,nzrm) end if #endif -end subroutine psb_z_hybg_to_gpu +end subroutine psb_z_cuda_hybg_to_gpu #endif diff --git a/cuda/impl/psb_z_hybg_vect_mv.F90 b/cuda/impl/psb_z_cuda_hybg_vect_mv.F90 similarity index 91% rename from cuda/impl/psb_z_hybg_vect_mv.F90 rename to cuda/impl/psb_z_cuda_hybg_vect_mv.F90 index f3b6695e..22751f2d 100644 --- a/cuda/impl/psb_z_hybg_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_hybg_vect_mv.F90 @@ -30,20 +30,20 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_hybg_vect_mv(alpha,a,x,beta,y,info,trans) +subroutine psb_z_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_hybg_vect_mv + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_vect_mv #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif - use psb_z_gpu_vect_mod + use psb_z_cuda_vect_mod implicit none - class(psb_z_hybg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y @@ -53,7 +53,7 @@ subroutine psb_z_hybg_vect_mv(alpha,a,x,beta,y,info,trans) logical :: tra character :: trans_ Integer(Psb_ipk_) :: err_act - character(len=20) :: name='z_hybg_vect_mv' + character(len=20) :: name='z_cuda_hybg_vect_mv' call psb_erractionsave(err_act) info = psb_success_ @@ -83,9 +83,9 @@ subroutine psb_z_hybg_vect_mv(alpha,a,x,beta,y,info,trans) else if (a%is_host()) call a%sync() select type (xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type(yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (xx%is_host()) call xx%sync() if (beta /= zzero) then if (yy%is_host()) call yy%sync() @@ -123,5 +123,5 @@ subroutine psb_z_hybg_vect_mv(alpha,a,x,beta,y,info,trans) return -end subroutine psb_z_hybg_vect_mv +end subroutine psb_z_cuda_hybg_vect_mv #endif diff --git a/cuda/impl/psb_z_mv_csrg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_csrg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_z_mv_csrg_from_coo.F90 rename to cuda/impl/psb_z_cuda_mv_csrg_from_coo.F90 index 21771b89..d5390ee3 100644 --- a/cuda/impl/psb_z_mv_csrg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_csrg_from_coo.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_mv_csrg_from_coo(a,b,info) +subroutine psb_z_cuda_mv_csrg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_mv_csrg_from_coo + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_mv_csrg_from_coo #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -62,4 +62,4 @@ subroutine psb_z_mv_csrg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_mv_csrg_from_coo +end subroutine psb_z_cuda_mv_csrg_from_coo diff --git a/cuda/impl/psb_z_mv_csrg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_csrg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_z_mv_csrg_from_fmt.F90 rename to cuda/impl/psb_z_cuda_mv_csrg_from_fmt.F90 index 31408214..e2bfdb73 100644 --- a/cuda/impl/psb_z_mv_csrg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_mv_csrg_from_fmt.F90 @@ -30,18 +30,18 @@ ! -subroutine psb_z_mv_csrg_from_fmt(a,b,info) +subroutine psb_z_cuda_mv_csrg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_csrg_mat_mod, psb_protect_name => psb_z_mv_csrg_from_fmt + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_mv_csrg_from_fmt #else - use psb_z_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #endif implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer, intent(out) :: info @@ -60,4 +60,4 @@ subroutine psb_z_mv_csrg_from_fmt(a,b,info) #endif end select -end subroutine psb_z_mv_csrg_from_fmt +end subroutine psb_z_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_z_mv_diag_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_diag_from_coo.F90 similarity index 89% rename from cuda/impl/psb_z_mv_diag_from_coo.F90 rename to cuda/impl/psb_z_cuda_mv_diag_from_coo.F90 index 8872c890..b61813bf 100644 --- a/cuda/impl/psb_z_mv_diag_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_diag_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_z_mv_diag_from_coo(a,b,info) +subroutine psb_z_cuda_mv_diag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod - use psb_z_diag_mat_mod, psb_protect_name => psb_z_mv_diag_from_coo + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_mv_diag_from_coo #else - use psb_z_diag_mat_mod + use psb_z_cuda_diag_mat_mod #endif implicit none - class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -66,4 +66,4 @@ subroutine psb_z_mv_diag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_mv_diag_from_coo +end subroutine psb_z_cuda_mv_diag_from_coo diff --git a/cuda/impl/psb_z_mv_elg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_elg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_z_mv_elg_from_coo.F90 rename to cuda/impl/psb_z_cuda_mv_elg_from_coo.F90 index 2d78edc6..e3ff4036 100644 --- a/cuda/impl/psb_z_mv_elg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_elg_from_coo.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_mv_elg_from_coo(a,b,info) +subroutine psb_z_cuda_mv_elg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_mv_elg_from_coo + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_mv_elg_from_coo #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_z_mv_elg_from_coo(a,b,info) return -end subroutine psb_z_mv_elg_from_coo +end subroutine psb_z_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_z_mv_elg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_elg_from_fmt.F90 similarity index 92% rename from cuda/impl/psb_z_mv_elg_from_fmt.F90 rename to cuda/impl/psb_z_cuda_mv_elg_from_fmt.F90 index 3bf663b3..07a80173 100644 --- a/cuda/impl/psb_z_mv_elg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_mv_elg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_mv_elg_from_fmt(a,b,info) +subroutine psb_z_cuda_mv_elg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod - use psb_z_elg_mat_mod, psb_protect_name => psb_z_mv_elg_from_fmt + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_mv_elg_from_fmt #else - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod #endif implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -96,4 +96,4 @@ subroutine psb_z_mv_elg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_z_mv_elg_from_fmt +end subroutine psb_z_cuda_mv_elg_from_fmt diff --git a/cuda/impl/psb_z_mv_hdiag_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_hdiag_from_coo.F90 similarity index 87% rename from cuda/impl/psb_z_mv_hdiag_from_coo.F90 rename to cuda/impl/psb_z_cuda_mv_hdiag_from_coo.F90 index e1df9cc4..f25e6370 100644 --- a/cuda/impl/psb_z_mv_hdiag_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_hdiag_from_coo.F90 @@ -30,21 +30,21 @@ ! -subroutine psb_z_mv_hdiag_from_coo(a,b,info) +subroutine psb_z_cuda_mv_hdiag_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod - use psb_z_hdiag_mat_mod, psb_protect_name => psb_z_mv_hdiag_from_coo - use psb_gpu_env_mod + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_mv_hdiag_from_coo + use psb_cuda_env_mod #else - use psb_z_hdiag_mat_mod + use psb_z_cuda_hdiag_mat_mod #endif implicit none - class(psb_z_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -55,7 +55,7 @@ subroutine psb_z_mv_hdiag_from_coo(a,b,info) #ifdef HAVE_SPGPU - a%hacksize = psb_gpu_WarpSize() + a%hacksize = psb_cuda_WarpSize() #endif call a%psb_z_hdia_sparse_mat%mv_from_coo(b,info) @@ -71,4 +71,4 @@ subroutine psb_z_mv_hdiag_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_mv_hdiag_from_coo +end subroutine psb_z_cuda_mv_hdiag_from_coo diff --git a/cuda/impl/psb_z_mv_hlg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_hlg_from_coo.F90 similarity index 88% rename from cuda/impl/psb_z_mv_hlg_from_coo.F90 rename to cuda/impl/psb_z_cuda_mv_hlg_from_coo.F90 index ce037be2..3bc630de 100644 --- a/cuda/impl/psb_z_mv_hlg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_hlg_from_coo.F90 @@ -30,20 +30,20 @@ ! -subroutine psb_z_mv_hlg_from_coo(a,b,info) +subroutine psb_z_cuda_mv_hlg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_gpu_env_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_mv_hlg_from_coo + use psb_cuda_env_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_mv_hlg_from_coo #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,4 +58,4 @@ subroutine psb_z_mv_hlg_from_coo(a,b,info) return -end subroutine psb_z_mv_hlg_from_coo +end subroutine psb_z_cuda_mv_hlg_from_coo diff --git a/cuda/impl/psb_z_mv_hlg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_hlg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_z_mv_hlg_from_fmt.F90 rename to cuda/impl/psb_z_cuda_mv_hlg_from_fmt.F90 index 4ea1b385..d746a341 100644 --- a/cuda/impl/psb_z_mv_hlg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_mv_hlg_from_fmt.F90 @@ -30,19 +30,19 @@ ! -subroutine psb_z_mv_hlg_from_fmt(a,b,info) +subroutine psb_z_cuda_mv_hlg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod - use psb_z_hlg_mat_mod, psb_protect_name => psb_z_mv_hlg_from_fmt + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_mv_hlg_from_fmt #else - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod #endif implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -59,4 +59,4 @@ subroutine psb_z_mv_hlg_from_fmt(a,b,info) if (info == psb_success_) call a%mv_from_coo(tmp,info) end select -end subroutine psb_z_mv_hlg_from_fmt +end subroutine psb_z_cuda_mv_hlg_from_fmt diff --git a/cuda/impl/psb_z_mv_hybg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_hybg_from_coo.F90 similarity index 89% rename from cuda/impl/psb_z_mv_hybg_from_coo.F90 rename to cuda/impl/psb_z_cuda_mv_hybg_from_coo.F90 index 3424caea..7d0d9eec 100644 --- a/cuda/impl/psb_z_mv_hybg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_hybg_from_coo.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_mv_hybg_from_coo(a,b,info) +subroutine psb_z_cuda_mv_hybg_from_coo(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_mv_hybg_from_coo + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_mv_hybg_from_coo #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -61,5 +61,5 @@ subroutine psb_z_mv_hybg_from_coo(a,b,info) info = psb_err_alloc_dealloc_ return -end subroutine psb_z_mv_hybg_from_coo +end subroutine psb_z_cuda_mv_hybg_from_coo #endif diff --git a/cuda/impl/psb_z_mv_hybg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_hybg_from_fmt.F90 similarity index 89% rename from cuda/impl/psb_z_mv_hybg_from_fmt.F90 rename to cuda/impl/psb_z_cuda_mv_hybg_from_fmt.F90 index 90c35897..7bfc27e3 100644 --- a/cuda/impl/psb_z_mv_hybg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_mv_hybg_from_fmt.F90 @@ -30,18 +30,18 @@ ! #if CUDA_SHORT_VERSION <= 10 -subroutine psb_z_mv_hybg_from_fmt(a,b,info) +subroutine psb_z_cuda_mv_hybg_from_fmt(a,b,info) use psb_base_mod #ifdef HAVE_SPGPU use cusparse_mod - use psb_z_hybg_mat_mod, psb_protect_name => psb_z_mv_hybg_from_fmt + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_mv_hybg_from_fmt #else - use psb_z_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif implicit none - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info @@ -58,5 +58,5 @@ subroutine psb_z_mv_hybg_from_fmt(a,b,info) call a%to_gpu(info) #endif end select -end subroutine psb_z_mv_hybg_from_fmt +end subroutine psb_z_cuda_mv_hybg_from_fmt #endif diff --git a/cuda/psb_c_csrg_mat_mod.F90 b/cuda/psb_c_csrg_mat_mod.F90 deleted file mode 100644 index 203a6dbf..00000000 --- a/cuda/psb_c_csrg_mat_mod.F90 +++ /dev/null @@ -1,393 +0,0 @@ -! Parallel Sparse BLAS GPU plugin -! (C) Copyright 2013 -! -! Salvatore Filippone -! Alessandro Fanfarillo -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! - - -module psb_c_csrg_mat_mod - - use iso_c_binding - use psb_c_mat_mod - use cusparse_mod - - integer(psb_ipk_), parameter, private :: is_host = -1 - integer(psb_ipk_), parameter, private :: is_sync = 0 - integer(psb_ipk_), parameter, private :: is_dev = 1 - - type, extends(psb_c_csr_sparse_mat) :: psb_c_csrg_sparse_mat - ! - ! cuSPARSE 4.0 CSR format. - ! - ! - ! - ! - ! -#ifdef HAVE_SPGPU - type(c_Cmat) :: deviceMat - integer(psb_ipk_) :: devstate = is_host - - contains - procedure, nopass :: get_fmt => c_csrg_get_fmt - procedure, pass(a) :: sizeof => c_csrg_sizeof - procedure, pass(a) :: vect_mv => psb_c_csrg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_c_csrg_inner_vect_sv - procedure, pass(a) :: csmm => psb_c_csrg_csmm - procedure, pass(a) :: csmv => psb_c_csrg_csmv - procedure, pass(a) :: scals => psb_c_csrg_scals - procedure, pass(a) :: scalv => psb_c_csrg_scal - procedure, pass(a) :: reallocate_nz => psb_c_csrg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_c_csrg_allocate_mnnz - ! Note: we do *not* need the TO methods, because the parent type - ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_c_cp_csrg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_c_cp_csrg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_c_mv_csrg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_c_mv_csrg_from_fmt - procedure, pass(a) :: free => c_csrg_free - procedure, pass(a) :: mold => psb_c_csrg_mold - procedure, pass(a) :: is_host => c_csrg_is_host - procedure, pass(a) :: is_dev => c_csrg_is_dev - procedure, pass(a) :: is_sync => c_csrg_is_sync - procedure, pass(a) :: set_host => c_csrg_set_host - procedure, pass(a) :: set_dev => c_csrg_set_dev - procedure, pass(a) :: set_sync => c_csrg_set_sync - procedure, pass(a) :: sync => c_csrg_sync - procedure, pass(a) :: to_gpu => psb_c_csrg_to_gpu - procedure, pass(a) :: from_gpu => psb_c_csrg_from_gpu - final :: c_csrg_finalize -#else - contains - procedure, pass(a) :: mold => psb_c_csrg_mold -#endif - end type psb_c_csrg_sparse_mat - -#ifdef HAVE_SPGPU - private :: c_csrg_get_nzeros, c_csrg_free, c_csrg_get_fmt, & - & c_csrg_get_size, c_csrg_sizeof, c_csrg_get_nz_row - - - interface - subroutine psb_c_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_c_csrg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(in) :: a - complex(psb_spk_), intent(in) :: alpha, beta - class(psb_c_base_vect_type), intent(inout) :: x - class(psb_c_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_c_csrg_inner_vect_sv - end interface - - - interface - subroutine psb_c_csrg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_c_csrg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(in) :: a - complex(psb_spk_), intent(in) :: alpha, beta - class(psb_c_base_vect_type), intent(inout) :: x - class(psb_c_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_c_csrg_vect_mv - end interface - - interface - subroutine psb_c_csrg_reallocate_nz(nz,a) - import :: psb_c_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: nz - class(psb_c_csrg_sparse_mat), intent(inout) :: a - end subroutine psb_c_csrg_reallocate_nz - end interface - - interface - subroutine psb_c_csrg_allocate_mnnz(m,n,a,nz) - import :: psb_c_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: m,n - class(psb_c_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_c_csrg_allocate_mnnz - end interface - - interface - subroutine psb_c_csrg_mold(a,b,info) - import :: psb_c_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(in) :: a - class(psb_c_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_csrg_mold - end interface - - interface - subroutine psb_c_csrg_to_gpu(a,info, nzrm) - import :: psb_c_csrg_sparse_mat, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_c_csrg_to_gpu - end interface - - interface - subroutine psb_c_csrg_from_gpu(a,info) - import :: psb_c_csrg_sparse_mat, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_csrg_from_gpu - end interface - - interface - subroutine psb_c_cp_csrg_from_coo(a,b,info) - import :: psb_c_csrg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(inout) :: a - class(psb_c_coo_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_csrg_from_coo - end interface - - interface - subroutine psb_c_cp_csrg_from_fmt(a,b,info) - import :: psb_c_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(inout) :: a - class(psb_c_base_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_csrg_from_fmt - end interface - - interface - subroutine psb_c_mv_csrg_from_coo(a,b,info) - import :: psb_c_csrg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(inout) :: a - class(psb_c_coo_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_csrg_from_coo - end interface - - interface - subroutine psb_c_mv_csrg_from_fmt(a,b,info) - import :: psb_c_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(inout) :: a - class(psb_c_base_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_csrg_from_fmt - end interface - - interface - subroutine psb_c_csrg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_c_csrg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(in) :: a - complex(psb_spk_), intent(in) :: alpha, beta, x(:) - complex(psb_spk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_c_csrg_csmv - end interface - interface - subroutine psb_c_csrg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_c_csrg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(in) :: a - complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) - complex(psb_spk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_c_csrg_csmm - end interface - - interface - subroutine psb_c_csrg_scal(d,a,info,side) - import :: psb_c_csrg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(inout) :: a - complex(psb_spk_), intent(in) :: d(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in), optional :: side - end subroutine psb_c_csrg_scal - end interface - - interface - subroutine psb_c_csrg_scals(d,a,info) - import :: psb_c_csrg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(inout) :: a - complex(psb_spk_), intent(in) :: d - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_csrg_scals - end interface - - -contains - - ! == =================================== - ! - ! - ! - ! Getters - ! - ! - ! - ! - ! - ! == =================================== - - - function c_csrg_sizeof(a) result(res) - implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a - integer(psb_epk_) :: res - if (a%is_dev()) call a%sync() - res = 8 - res = res + (2*psb_sizeof_sp) * size(a%val) - res = res + psb_sizeof_ip * size(a%irp) - res = res + psb_sizeof_ip * size(a%ja) - ! Should we account for the shadow data structure - ! on the GPU device side? - ! res = 2*res - - end function c_csrg_sizeof - - function c_csrg_get_fmt() result(res) - implicit none - character(len=5) :: res - res = 'CSRG' - end function c_csrg_get_fmt - - - - ! == =================================== - ! - ! - ! - ! Data management - ! - ! - ! - ! - ! - ! == =================================== - - - subroutine c_csrg_set_host(a) - implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_host - end subroutine c_csrg_set_host - - subroutine c_csrg_set_dev(a) - implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_dev - end subroutine c_csrg_set_dev - - subroutine c_csrg_set_sync(a) - implicit none - class(psb_c_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_sync - end subroutine c_csrg_set_sync - - function c_csrg_is_dev(a) result(res) - implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_dev) - end function c_csrg_is_dev - - function c_csrg_is_host(a) result(res) - implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_host) - end function c_csrg_is_host - - function c_csrg_is_sync(a) result(res) - implicit none - class(psb_c_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_sync) - end function c_csrg_is_sync - - - subroutine c_csrg_sync(a) - implicit none - class(psb_c_csrg_sparse_mat), target, intent(in) :: a - class(psb_c_csrg_sparse_mat), pointer :: tmpa - integer(psb_ipk_) :: info - - tmpa => a - if (tmpa%is_host()) then - call tmpa%to_gpu(info) - else if (tmpa%is_dev()) then - call tmpa%from_gpu(info) - end if - call tmpa%set_sync() - return - - end subroutine c_csrg_sync - - subroutine c_csrg_free(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - class(psb_c_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - call a%psb_c_csr_sparse_mat%free() - - return - - end subroutine c_csrg_free - - subroutine c_csrg_finalize(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - type(psb_c_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - - return - - end subroutine c_csrg_finalize - -#else - interface - subroutine psb_c_csrg_mold(a,b,info) - import :: psb_c_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_csrg_sparse_mat), intent(in) :: a - class(psb_c_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_csrg_mold - end interface - -#endif - -end module psb_c_csrg_mat_mod diff --git a/cuda/psb_c_cuda_csrg_mat_mod.F90 b/cuda/psb_c_cuda_csrg_mat_mod.F90 new file mode 100644 index 00000000..a98d7e99 --- /dev/null +++ b/cuda/psb_c_cuda_csrg_mat_mod.F90 @@ -0,0 +1,393 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_cuda_csrg_mat_mod + + use iso_c_binding + use psb_c_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_csr_sparse_mat) :: psb_c_cuda_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(c_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => c_cuda_csrg_get_fmt + procedure, pass(a) :: sizeof => c_cuda_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_c_cuda_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_c_cuda_csrg_csmm + procedure, pass(a) :: csmv => psb_c_cuda_csrg_csmv + procedure, pass(a) :: scals => psb_c_cuda_csrg_scals + procedure, pass(a) :: scalv => psb_c_cuda_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_c_cuda_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_cuda_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_csrg_from_fmt + procedure, pass(a) :: free => c_cuda_csrg_free + procedure, pass(a) :: mold => psb_c_cuda_csrg_mold + procedure, pass(a) :: is_host => c_cuda_csrg_is_host + procedure, pass(a) :: is_dev => c_cuda_csrg_is_dev + procedure, pass(a) :: is_sync => c_cuda_csrg_is_sync + procedure, pass(a) :: set_host => c_cuda_csrg_set_host + procedure, pass(a) :: set_dev => c_cuda_csrg_set_dev + procedure, pass(a) :: set_sync => c_cuda_csrg_set_sync + procedure, pass(a) :: sync => c_cuda_csrg_sync + procedure, pass(a) :: to_gpu => psb_c_cuda_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_c_cuda_csrg_from_gpu + final :: c_cuda_csrg_finalize +#else + contains + procedure, pass(a) :: mold => psb_c_cuda_csrg_mold +#endif + end type psb_c_cuda_csrg_sparse_mat + +#ifdef HAVE_SPGPU + private :: c_cuda_csrg_get_nzeros, c_cuda_csrg_free, c_cuda_csrg_get_fmt, & + & c_cuda_csrg_get_size, c_cuda_csrg_sizeof, c_cuda_csrg_get_nz_row + + + interface + subroutine psb_c_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_c_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_csrg_vect_mv + end interface + + interface + subroutine psb_c_cuda_csrg_reallocate_nz(nz,a) + import :: psb_c_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_csrg_reallocate_nz + end interface + + interface + subroutine psb_c_cuda_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_c_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_cuda_csrg_allocate_mnnz + end interface + + interface + subroutine psb_c_cuda_csrg_mold(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_csrg_mold + end interface + + interface + subroutine psb_c_cuda_csrg_to_gpu(a,info, nzrm) + import :: psb_c_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_c_cuda_csrg_to_gpu + end interface + + interface + subroutine psb_c_cuda_csrg_from_gpu(a,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_csrg_from_gpu + end interface + + interface + subroutine psb_c_cuda_cp_csrg_from_coo(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_csrg_from_coo + end interface + + interface + subroutine psb_c_cuda_cp_csrg_from_fmt(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_csrg_from_fmt + end interface + + interface + subroutine psb_c_cuda_mv_csrg_from_coo(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_csrg_from_coo + end interface + + interface + subroutine psb_c_cuda_mv_csrg_from_fmt(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_csrg_from_fmt + end interface + + interface + subroutine psb_c_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_csrg_csmv + end interface + interface + subroutine psb_c_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_csrg_csmm + end interface + + interface + subroutine psb_c_cuda_csrg_scal(d,a,info,side) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_cuda_csrg_scal + end interface + + interface + subroutine psb_c_cuda_csrg_scals(d,a,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_cuda_csrg_sizeof(a) result(res) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function c_cuda_csrg_sizeof + + function c_cuda_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function c_cuda_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine c_cuda_csrg_set_host(a) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine c_cuda_csrg_set_host + + subroutine c_cuda_csrg_set_dev(a) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine c_cuda_csrg_set_dev + + subroutine c_cuda_csrg_set_sync(a) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine c_cuda_csrg_set_sync + + function c_cuda_csrg_is_dev(a) result(res) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function c_cuda_csrg_is_dev + + function c_cuda_csrg_is_host(a) result(res) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function c_cuda_csrg_is_host + + function c_cuda_csrg_is_sync(a) result(res) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function c_cuda_csrg_is_sync + + + subroutine c_cuda_csrg_sync(a) + implicit none + class(psb_c_cuda_csrg_sparse_mat), target, intent(in) :: a + class(psb_c_cuda_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine c_cuda_csrg_sync + + subroutine c_cuda_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_c_csr_sparse_mat%free() + + return + + end subroutine c_cuda_csrg_free + + subroutine c_cuda_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine c_cuda_csrg_finalize + +#else + interface + subroutine psb_c_cuda_csrg_mold(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_csrg_mold + end interface + +#endif + +end module psb_c_cuda_csrg_mat_mod diff --git a/cuda/psb_c_diag_mat_mod.F90 b/cuda/psb_c_cuda_diag_mat_mod.F90 similarity index 52% rename from cuda/psb_c_diag_mat_mod.F90 rename to cuda/psb_c_cuda_diag_mat_mod.F90 index a7ab2fbb..1d5db05b 100644 --- a/cuda/psb_c_diag_mat_mod.F90 +++ b/cuda/psb_c_cuda_diag_mat_mod.F90 @@ -30,13 +30,13 @@ ! -module psb_c_diag_mat_mod +module psb_c_cuda_diag_mat_mod use iso_c_binding use psb_base_mod use psb_c_dia_mat_mod - type, extends(psb_c_dia_sparse_mat) :: psb_c_diag_sparse_mat + type, extends(psb_c_dia_sparse_mat) :: psb_c_cuda_diag_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -48,170 +48,170 @@ module psb_c_diag_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => c_diag_get_fmt - procedure, pass(a) :: sizeof => c_diag_sizeof - procedure, pass(a) :: vect_mv => psb_c_diag_vect_mv -! procedure, pass(a) :: csmm => psb_c_diag_csmm - procedure, pass(a) :: csmv => psb_c_diag_csmv -! procedure, pass(a) :: in_vect_sv => psb_c_diag_inner_vect_sv -! procedure, pass(a) :: scals => psb_c_diag_scals -! procedure, pass(a) :: scalv => psb_c_diag_scal -! procedure, pass(a) :: reallocate_nz => psb_c_diag_reallocate_nz -! procedure, pass(a) :: allocate_mnnz => psb_c_diag_allocate_mnnz + procedure, nopass :: get_fmt => c_cuda_diag_get_fmt + procedure, pass(a) :: sizeof => c_cuda_diag_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_diag_vect_mv +! procedure, pass(a) :: csmm => psb_c_cuda_diag_csmm + procedure, pass(a) :: csmv => psb_c_cuda_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_c_cuda_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_c_cuda_diag_scals +! procedure, pass(a) :: scalv => psb_c_cuda_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_c_cuda_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_c_cuda_diag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_c_cp_diag_from_coo -! procedure, pass(a) :: cp_from_fmt => psb_c_cp_diag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_c_mv_diag_from_coo -! procedure, pass(a) :: mv_from_fmt => psb_c_mv_diag_from_fmt - procedure, pass(a) :: free => c_diag_free - procedure, pass(a) :: mold => psb_c_diag_mold - procedure, pass(a) :: to_gpu => psb_c_diag_to_gpu - final :: c_diag_finalize + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_diag_from_fmt + procedure, pass(a) :: free => c_cuda_diag_free + procedure, pass(a) :: mold => psb_c_cuda_diag_mold + procedure, pass(a) :: to_gpu => psb_c_cuda_diag_to_gpu + final :: c_cuda_diag_finalize #else contains - procedure, pass(a) :: mold => psb_c_diag_mold + procedure, pass(a) :: mold => psb_c_cuda_diag_mold #endif - end type psb_c_diag_sparse_mat + end type psb_c_cuda_diag_sparse_mat #ifdef HAVE_SPGPU - private :: c_diag_get_nzeros, c_diag_free, c_diag_get_fmt, & - & c_diag_get_size, c_diag_sizeof, c_diag_get_nz_row + private :: c_cuda_diag_get_nzeros, c_cuda_diag_free, c_cuda_diag_get_fmt, & + & c_cuda_diag_get_size, c_cuda_diag_sizeof, c_cuda_diag_get_nz_row interface - subroutine psb_c_diag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_c_diag_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_diag_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_diag_vect_mv + end subroutine psb_c_cuda_diag_vect_mv end interface interface - subroutine psb_c_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_c_diag_sparse_mat, psb_spk_, psb_c_base_vect_type - class(psb_c_diag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_c_cuda_diag_sparse_mat, psb_spk_, psb_c_base_vect_type + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_diag_inner_vect_sv + end subroutine psb_c_cuda_diag_inner_vect_sv end interface interface - subroutine psb_c_diag_reallocate_nz(nz,a) - import :: psb_c_diag_sparse_mat, psb_ipk_ + subroutine psb_c_cuda_diag_reallocate_nz(nz,a) + import :: psb_c_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_c_diag_sparse_mat), intent(inout) :: a - end subroutine psb_c_diag_reallocate_nz + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_diag_reallocate_nz end interface interface - subroutine psb_c_diag_allocate_mnnz(m,n,a,nz) - import :: psb_c_diag_sparse_mat, psb_ipk_ + subroutine psb_c_cuda_diag_allocate_mnnz(m,n,a,nz) + import :: psb_c_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_c_diag_allocate_mnnz + end subroutine psb_c_cuda_diag_allocate_mnnz end interface interface - subroutine psb_c_diag_mold(a,b,info) - import :: psb_c_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_diag_mold(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_diag_mold + end subroutine psb_c_cuda_diag_mold end interface interface - subroutine psb_c_diag_to_gpu(a,info, nzrm) - import :: psb_c_diag_sparse_mat, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_diag_to_gpu(a,info, nzrm) + import :: psb_c_cuda_diag_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_c_diag_to_gpu + end subroutine psb_c_cuda_diag_to_gpu end interface interface - subroutine psb_c_cp_diag_from_coo(a,b,info) - import :: psb_c_diag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_diag_from_coo(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_diag_from_coo + end subroutine psb_c_cuda_cp_diag_from_coo end interface interface - subroutine psb_c_cp_diag_from_fmt(a,b,info) - import :: psb_c_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_diag_from_fmt(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_diag_from_fmt + end subroutine psb_c_cuda_cp_diag_from_fmt end interface interface - subroutine psb_c_mv_diag_from_coo(a,b,info) - import :: psb_c_diag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_diag_from_coo(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_diag_from_coo + end subroutine psb_c_cuda_mv_diag_from_coo end interface interface - subroutine psb_c_mv_diag_from_fmt(a,b,info) - import :: psb_c_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_diag_from_fmt(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_diag_from_fmt + end subroutine psb_c_cuda_mv_diag_from_fmt end interface interface - subroutine psb_c_diag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_c_diag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_diag_csmv + end subroutine psb_c_cuda_diag_csmv end interface interface - subroutine psb_c_diag_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_c_diag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) complex(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_diag_csmm + end subroutine psb_c_cuda_diag_csmm end interface interface - subroutine psb_c_diag_scal(d,a,info, side) - import :: psb_c_diag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_diag_scal(d,a,info, side) + import :: psb_c_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_c_diag_scal + end subroutine psb_c_cuda_diag_scal end interface interface - subroutine psb_c_diag_scals(d,a,info) - import :: psb_c_diag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_diag_scals(d,a,info) + import :: psb_c_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_diag_scals + end subroutine psb_c_cuda_diag_scals end interface @@ -230,9 +230,9 @@ contains ! == =================================== - function c_diag_sizeof(a) result(res) + function c_cuda_diag_sizeof(a) result(res) implicit none - class(psb_c_diag_sparse_mat), intent(in) :: a + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function c_diag_sizeof + end function c_cuda_diag_sizeof - function c_diag_get_fmt() result(res) + function c_cuda_diag_get_fmt() result(res) implicit none character(len=5) :: res res = 'DIAG' - end function c_diag_get_fmt + end function c_cuda_diag_get_fmt @@ -265,11 +265,11 @@ contains ! ! == =================================== - subroutine c_diag_free(a) + subroutine c_cuda_diag_free(a) use diagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_c_diag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) @@ -278,31 +278,31 @@ contains return - end subroutine c_diag_free + end subroutine c_cuda_diag_free - subroutine c_diag_finalize(a) + subroutine c_cuda_diag_finalize(a) use diagdev_mod implicit none - type(psb_c_diag_sparse_mat), intent(inout) :: a + type(psb_c_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine c_diag_finalize + end subroutine c_cuda_diag_finalize #else interface - subroutine psb_c_diag_mold(a,b,info) - import :: psb_c_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_diag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_diag_mold(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_diag_mold + end subroutine psb_c_cuda_diag_mold end interface #endif -end module psb_c_diag_mat_mod +end module psb_c_cuda_diag_mat_mod diff --git a/cuda/psb_c_dnsg_mat_mod.F90 b/cuda/psb_c_cuda_dnsg_mat_mod.F90 similarity index 51% rename from cuda/psb_c_dnsg_mat_mod.F90 rename to cuda/psb_c_cuda_dnsg_mat_mod.F90 index 7fe5fdda..e89e117b 100644 --- a/cuda/psb_c_dnsg_mat_mod.F90 +++ b/cuda/psb_c_cuda_dnsg_mat_mod.F90 @@ -30,14 +30,14 @@ ! -module psb_c_dnsg_mat_mod +module psb_c_cuda_dnsg_mat_mod use iso_c_binding use psb_c_mat_mod use psb_c_dns_mat_mod use dnsdev_mod - type, extends(psb_c_dns_sparse_mat) :: psb_c_dnsg_sparse_mat + type, extends(psb_c_dns_sparse_mat) :: psb_c_cuda_dnsg_sparse_mat ! ! ITPACK/DNS format, extended. ! We are adding here the routines to create a copy of the data @@ -49,169 +49,169 @@ module psb_c_dnsg_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => c_dnsg_get_fmt - ! procedure, pass(a) :: sizeof => c_dnsg_sizeof - procedure, pass(a) :: vect_mv => psb_c_dnsg_vect_mv -!!$ procedure, pass(a) :: csmm => psb_c_dnsg_csmm -!!$ procedure, pass(a) :: csmv => psb_c_dnsg_csmv -!!$ procedure, pass(a) :: in_vect_sv => psb_c_dnsg_inner_vect_sv -!!$ procedure, pass(a) :: scals => psb_c_dnsg_scals -!!$ procedure, pass(a) :: scalv => psb_c_dnsg_scal -!!$ procedure, pass(a) :: reallocate_nz => psb_c_dnsg_reallocate_nz -!!$ procedure, pass(a) :: allocate_mnnz => psb_c_dnsg_allocate_mnnz + procedure, nopass :: get_fmt => c_cuda_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => c_cuda_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_c_cuda_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_c_cuda_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_c_cuda_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_c_cuda_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_c_cuda_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_c_cuda_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_c_cuda_dnsg_allocate_mnnz ! Note: we *do* need the TO methods, because of the need to invoke SYNC ! - procedure, pass(a) :: cp_from_coo => psb_c_cp_dnsg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_c_cp_dnsg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_c_mv_dnsg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_c_mv_dnsg_from_fmt - procedure, pass(a) :: free => c_dnsg_free - procedure, pass(a) :: mold => psb_c_dnsg_mold - procedure, pass(a) :: to_gpu => psb_c_dnsg_to_gpu - final :: c_dnsg_finalize + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_dnsg_from_fmt + procedure, pass(a) :: free => c_cuda_dnsg_free + procedure, pass(a) :: mold => psb_c_cuda_dnsg_mold + procedure, pass(a) :: to_gpu => psb_c_cuda_dnsg_to_gpu + final :: c_cuda_dnsg_finalize #else contains - procedure, pass(a) :: mold => psb_c_dnsg_mold + procedure, pass(a) :: mold => psb_c_cuda_dnsg_mold #endif - end type psb_c_dnsg_sparse_mat + end type psb_c_cuda_dnsg_sparse_mat #ifdef HAVE_SPGPU - private :: c_dnsg_get_nzeros, c_dnsg_free, c_dnsg_get_fmt, & - & c_dnsg_get_size, c_dnsg_get_nz_row + private :: c_cuda_dnsg_get_nzeros, c_cuda_dnsg_free, c_cuda_dnsg_get_fmt, & + & c_cuda_dnsg_get_size, c_cuda_dnsg_get_nz_row interface - subroutine psb_c_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_c_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_dnsg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_dnsg_vect_mv + end subroutine psb_c_cuda_dnsg_vect_mv end interface !!$ !!$ interface -!!$ subroutine psb_c_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_c_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type -!!$ class(psb_c_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_c_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a !!$ complex(psb_spk_), intent(in) :: alpha, beta !!$ class(psb_c_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_c_dnsg_inner_vect_sv +!!$ end subroutine psb_c_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_c_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_c_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_c_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_c_dnsg_reallocate_nz +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_c_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_c_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_c_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_c_dnsg_allocate_mnnz +!!$ end subroutine psb_c_cuda_dnsg_allocate_mnnz !!$ end interface interface - subroutine psb_c_dnsg_mold(a,b,info) - import :: psb_c_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_dnsg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_dnsg_mold(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_dnsg_mold + end subroutine psb_c_cuda_dnsg_mold end interface interface - subroutine psb_c_dnsg_to_gpu(a,info) - import :: psb_c_dnsg_sparse_mat, psb_ipk_ - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_dnsg_to_gpu(a,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_dnsg_to_gpu + end subroutine psb_c_cuda_dnsg_to_gpu end interface interface - subroutine psb_c_cp_dnsg_from_coo(a,b,info) - import :: psb_c_dnsg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_dnsg_from_coo(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_dnsg_from_coo + end subroutine psb_c_cuda_cp_dnsg_from_coo end interface interface - subroutine psb_c_cp_dnsg_from_fmt(a,b,info) - import :: psb_c_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_dnsg_from_fmt(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_dnsg_from_fmt + end subroutine psb_c_cuda_cp_dnsg_from_fmt end interface interface - subroutine psb_c_mv_dnsg_from_coo(a,b,info) - import :: psb_c_dnsg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_dnsg_from_coo(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_dnsg_from_coo + end subroutine psb_c_cuda_mv_dnsg_from_coo end interface interface - subroutine psb_c_mv_dnsg_from_fmt(a,b,info) - import :: psb_c_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_dnsg_from_fmt(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_dnsg_from_fmt + end subroutine psb_c_cuda_mv_dnsg_from_fmt end interface !!$ interface -!!$ subroutine psb_c_dnsg_csmv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_c_dnsg_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_c_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_c_cuda_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a !!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:) !!$ complex(psb_spk_), intent(inout) :: y(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_c_dnsg_csmv +!!$ end subroutine psb_c_cuda_dnsg_csmv !!$ end interface !!$ interface -!!$ subroutine psb_c_dnsg_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_c_dnsg_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_c_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_c_cuda_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a !!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) !!$ complex(psb_spk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_c_dnsg_csmm +!!$ end subroutine psb_c_cuda_dnsg_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_c_dnsg_scal(d,a,info, side) -!!$ import :: psb_c_dnsg_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_c_cuda_dnsg_scal(d,a,info, side) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ complex(psb_spk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_c_dnsg_scal +!!$ end subroutine psb_c_cuda_dnsg_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_c_dnsg_scals(d,a,info) -!!$ import :: psb_c_dnsg_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_c_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_c_cuda_dnsg_scals(d,a,info) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ complex(psb_spk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_c_dnsg_scals +!!$ end subroutine psb_c_cuda_dnsg_scals !!$ end interface !!$ @@ -231,11 +231,11 @@ contains - function c_dnsg_get_fmt() result(res) + function c_cuda_dnsg_get_fmt() result(res) implicit none character(len=5) :: res res = 'DNSG' - end function c_dnsg_get_fmt + end function c_cuda_dnsg_get_fmt @@ -251,11 +251,11 @@ contains ! ! == =================================== - subroutine c_dnsg_free(a) + subroutine c_cuda_dnsg_free(a) use dnsdev_mod implicit none integer(psb_ipk_) :: info - class(psb_c_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) @@ -264,31 +264,31 @@ contains return - end subroutine c_dnsg_free + end subroutine c_cuda_dnsg_free - subroutine c_dnsg_finalize(a) + subroutine c_cuda_dnsg_finalize(a) use dnsdev_mod implicit none - type(psb_c_dnsg_sparse_mat), intent(inout) :: a + type(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine c_dnsg_finalize + end subroutine c_cuda_dnsg_finalize #else interface - subroutine psb_c_dnsg_mold(a,b,info) - import :: psb_c_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_dnsg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_dnsg_mold(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_dnsg_mold + end subroutine psb_c_cuda_dnsg_mold end interface #endif -end module psb_c_dnsg_mat_mod +end module psb_c_cuda_dnsg_mat_mod diff --git a/cuda/psb_c_elg_mat_mod.F90 b/cuda/psb_c_cuda_elg_mat_mod.F90 similarity index 50% rename from cuda/psb_c_elg_mat_mod.F90 rename to cuda/psb_c_cuda_elg_mat_mod.F90 index 83355b9d..43250ce3 100644 --- a/cuda/psb_c_elg_mat_mod.F90 +++ b/cuda/psb_c_cuda_elg_mat_mod.F90 @@ -30,18 +30,18 @@ ! -module psb_c_elg_mat_mod +module psb_c_cuda_elg_mat_mod use iso_c_binding use psb_c_mat_mod use psb_c_ell_mat_mod - use psb_i_gpu_vect_mod + use psb_i_cuda_vect_mod integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_c_ell_sparse_mat) :: psb_c_elg_sparse_mat + type, extends(psb_c_ell_sparse_mat) :: psb_c_cuda_elg_sparse_mat ! ! ITPACK/ELL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,221 +54,221 @@ module psb_c_elg_mat_mod integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => c_elg_get_fmt - procedure, pass(a) :: sizeof => c_elg_sizeof - procedure, pass(a) :: vect_mv => psb_c_elg_vect_mv - procedure, pass(a) :: csmm => psb_c_elg_csmm - procedure, pass(a) :: csmv => psb_c_elg_csmv - procedure, pass(a) :: in_vect_sv => psb_c_elg_inner_vect_sv - procedure, pass(a) :: scals => psb_c_elg_scals - procedure, pass(a) :: scalv => psb_c_elg_scal - procedure, pass(a) :: reallocate_nz => psb_c_elg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_c_elg_allocate_mnnz - procedure, pass(a) :: reinit => c_elg_reinit + procedure, nopass :: get_fmt => c_cuda_elg_get_fmt + procedure, pass(a) :: sizeof => c_cuda_elg_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_elg_vect_mv + procedure, pass(a) :: csmm => psb_c_cuda_elg_csmm + procedure, pass(a) :: csmv => psb_c_cuda_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_c_cuda_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_c_cuda_elg_scals + procedure, pass(a) :: scalv => psb_c_cuda_elg_scal + procedure, pass(a) :: reallocate_nz => psb_c_cuda_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_cuda_elg_allocate_mnnz + procedure, pass(a) :: reinit => c_cuda_elg_reinit ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_c_cp_elg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_c_cp_elg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_c_mv_elg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_c_mv_elg_from_fmt - procedure, pass(a) :: free => c_elg_free - procedure, pass(a) :: mold => psb_c_elg_mold - procedure, pass(a) :: csput_a => psb_c_elg_csput_a - procedure, pass(a) :: csput_v => psb_c_elg_csput_v - procedure, pass(a) :: is_host => c_elg_is_host - procedure, pass(a) :: is_dev => c_elg_is_dev - procedure, pass(a) :: is_sync => c_elg_is_sync - procedure, pass(a) :: set_host => c_elg_set_host - procedure, pass(a) :: set_dev => c_elg_set_dev - procedure, pass(a) :: set_sync => c_elg_set_sync - procedure, pass(a) :: sync => c_elg_sync - procedure, pass(a) :: from_gpu => psb_c_elg_from_gpu - procedure, pass(a) :: to_gpu => psb_c_elg_to_gpu - procedure, pass(a) :: asb => psb_c_elg_asb - final :: c_elg_finalize + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_elg_from_fmt + procedure, pass(a) :: free => c_cuda_elg_free + procedure, pass(a) :: mold => psb_c_cuda_elg_mold + procedure, pass(a) :: csput_a => psb_c_cuda_elg_csput_a + procedure, pass(a) :: csput_v => psb_c_cuda_elg_csput_v + procedure, pass(a) :: is_host => c_cuda_elg_is_host + procedure, pass(a) :: is_dev => c_cuda_elg_is_dev + procedure, pass(a) :: is_sync => c_cuda_elg_is_sync + procedure, pass(a) :: set_host => c_cuda_elg_set_host + procedure, pass(a) :: set_dev => c_cuda_elg_set_dev + procedure, pass(a) :: set_sync => c_cuda_elg_set_sync + procedure, pass(a) :: sync => c_cuda_elg_sync + procedure, pass(a) :: from_gpu => psb_c_cuda_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_c_cuda_elg_to_gpu + procedure, pass(a) :: asb => psb_c_cuda_elg_asb + final :: c_cuda_elg_finalize #else contains - procedure, pass(a) :: mold => psb_c_elg_mold - procedure, pass(a) :: asb => psb_c_elg_asb + procedure, pass(a) :: mold => psb_c_cuda_elg_mold + procedure, pass(a) :: asb => psb_c_cuda_elg_asb #endif - end type psb_c_elg_sparse_mat + end type psb_c_cuda_elg_sparse_mat #ifdef HAVE_SPGPU - private :: c_elg_get_nzeros, c_elg_free, c_elg_get_fmt, & - & c_elg_get_size, c_elg_sizeof, c_elg_get_nz_row, c_elg_sync + private :: c_cuda_elg_get_nzeros, c_cuda_elg_free, c_cuda_elg_get_fmt, & + & c_cuda_elg_get_size, c_cuda_elg_sizeof, c_cuda_elg_get_nz_row, c_cuda_elg_sync interface - subroutine psb_c_elg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_c_elg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_elg_vect_mv + end subroutine psb_c_cuda_elg_vect_mv end interface interface - subroutine psb_c_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_c_elg_sparse_mat, psb_spk_, psb_c_base_vect_type - class(psb_c_elg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_c_cuda_elg_sparse_mat, psb_spk_, psb_c_base_vect_type + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_elg_inner_vect_sv + end subroutine psb_c_cuda_elg_inner_vect_sv end interface interface - subroutine psb_c_elg_reallocate_nz(nz,a) - import :: psb_c_elg_sparse_mat, psb_ipk_ + subroutine psb_c_cuda_elg_reallocate_nz(nz,a) + import :: psb_c_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_c_elg_sparse_mat), intent(inout) :: a - end subroutine psb_c_elg_reallocate_nz + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_elg_reallocate_nz end interface interface - subroutine psb_c_elg_allocate_mnnz(m,n,a,nz) - import :: psb_c_elg_sparse_mat, psb_ipk_ + subroutine psb_c_cuda_elg_allocate_mnnz(m,n,a,nz) + import :: psb_c_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_c_elg_allocate_mnnz + end subroutine psb_c_cuda_elg_allocate_mnnz end interface interface - subroutine psb_c_elg_mold(a,b,info) - import :: psb_c_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_elg_mold(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_elg_mold + end subroutine psb_c_cuda_elg_mold end interface interface - subroutine psb_c_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_c_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& & imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_elg_csput_a + end subroutine psb_c_cuda_elg_csput_a end interface interface - subroutine psb_c_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_c_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_c_base_vect_type,& + subroutine psb_c_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_c_base_vect_type,& & psb_i_base_vect_type - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_elg_csput_v + end subroutine psb_c_cuda_elg_csput_v end interface interface - subroutine psb_c_elg_from_gpu(a,info) - import :: psb_c_elg_sparse_mat, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_elg_from_gpu(a,info) + import :: psb_c_cuda_elg_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_elg_from_gpu + end subroutine psb_c_cuda_elg_from_gpu end interface interface - subroutine psb_c_elg_to_gpu(a,info, nzrm) - import :: psb_c_elg_sparse_mat, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_elg_to_gpu(a,info, nzrm) + import :: psb_c_cuda_elg_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_c_elg_to_gpu + end subroutine psb_c_cuda_elg_to_gpu end interface interface - subroutine psb_c_cp_elg_from_coo(a,b,info) - import :: psb_c_elg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_elg_from_coo(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_elg_from_coo + end subroutine psb_c_cuda_cp_elg_from_coo end interface interface - subroutine psb_c_cp_elg_from_fmt(a,b,info) - import :: psb_c_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_elg_from_fmt(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_elg_from_fmt + end subroutine psb_c_cuda_cp_elg_from_fmt end interface interface - subroutine psb_c_mv_elg_from_coo(a,b,info) - import :: psb_c_elg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_elg_from_coo(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_elg_from_coo + end subroutine psb_c_cuda_mv_elg_from_coo end interface interface - subroutine psb_c_mv_elg_from_fmt(a,b,info) - import :: psb_c_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_elg_from_fmt(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_elg_from_fmt + end subroutine psb_c_cuda_mv_elg_from_fmt end interface interface - subroutine psb_c_elg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_c_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_elg_csmv + end subroutine psb_c_cuda_elg_csmv end interface interface - subroutine psb_c_elg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_c_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) complex(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_elg_csmm + end subroutine psb_c_cuda_elg_csmm end interface interface - subroutine psb_c_elg_scal(d,a,info, side) - import :: psb_c_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_elg_scal(d,a,info, side) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_c_elg_scal + end subroutine psb_c_cuda_elg_scal end interface interface - subroutine psb_c_elg_scals(d,a,info) - import :: psb_c_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_elg_scals(d,a,info) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_elg_scals + end subroutine psb_c_cuda_elg_scals end interface interface - subroutine psb_c_elg_asb(a) - import :: psb_c_elg_sparse_mat - class(psb_c_elg_sparse_mat), intent(inout) :: a - end subroutine psb_c_elg_asb + subroutine psb_c_cuda_elg_asb(a) + import :: psb_c_cuda_elg_sparse_mat + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_elg_asb end interface @@ -287,9 +287,9 @@ contains ! == =================================== - function c_elg_sizeof(a) result(res) + function c_cuda_elg_sizeof(a) result(res) implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res if (a%is_dev()) call a%sync() @@ -302,13 +302,13 @@ contains ! on the GPU device side? ! res = 2*res - end function c_elg_sizeof + end function c_cuda_elg_sizeof - function c_elg_get_fmt() result(res) + function c_cuda_elg_get_fmt() result(res) implicit none character(len=5) :: res res = 'ELG' - end function c_elg_get_fmt + end function c_cuda_elg_get_fmt @@ -323,12 +323,12 @@ contains ! ! ! == =================================== - subroutine c_elg_reinit(a,clear) + subroutine c_cuda_elg_reinit(a,clear) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a logical, intent(in), optional :: clear integer(psb_ipk_) :: isz, err_act character(len=20) :: name='reinit' @@ -367,14 +367,14 @@ contains 9999 call psb_error_handler(err_act) return - end subroutine c_elg_reinit + end subroutine c_cuda_elg_reinit - subroutine c_elg_free(a) + subroutine c_cuda_elg_free(a) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) @@ -384,12 +384,12 @@ contains return - end subroutine c_elg_free + end subroutine c_cuda_elg_free - subroutine c_elg_sync(a) + subroutine c_cuda_elg_sync(a) implicit none - class(psb_c_elg_sparse_mat), target, intent(in) :: a - class(psb_c_elg_sparse_mat), pointer :: tmpa + class(psb_c_cuda_elg_sparse_mat), target, intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -401,83 +401,83 @@ contains call tmpa%set_sync() return - end subroutine c_elg_sync + end subroutine c_cuda_elg_sync - subroutine c_elg_set_host(a) + subroutine c_cuda_elg_set_host(a) implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine c_elg_set_host + end subroutine c_cuda_elg_set_host - subroutine c_elg_set_dev(a) + subroutine c_cuda_elg_set_dev(a) implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine c_elg_set_dev + end subroutine c_cuda_elg_set_dev - subroutine c_elg_set_sync(a) + subroutine c_cuda_elg_set_sync(a) implicit none - class(psb_c_elg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine c_elg_set_sync + end subroutine c_cuda_elg_set_sync - function c_elg_is_dev(a) result(res) + function c_cuda_elg_is_dev(a) result(res) implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function c_elg_is_dev + end function c_cuda_elg_is_dev - function c_elg_is_host(a) result(res) + function c_cuda_elg_is_host(a) result(res) implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function c_elg_is_host + end function c_cuda_elg_is_host - function c_elg_is_sync(a) result(res) + function c_cuda_elg_is_sync(a) result(res) implicit none - class(psb_c_elg_sparse_mat), intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function c_elg_is_sync + end function c_cuda_elg_is_sync - subroutine c_elg_finalize(a) + subroutine c_cuda_elg_finalize(a) use elldev_mod implicit none - type(psb_c_elg_sparse_mat), intent(inout) :: a + type(psb_c_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine c_elg_finalize + end subroutine c_cuda_elg_finalize #else interface - subroutine psb_c_elg_asb(a) - import :: psb_c_elg_sparse_mat - class(psb_c_elg_sparse_mat), intent(inout) :: a - end subroutine psb_c_elg_asb + subroutine psb_c_cuda_elg_asb(a) + import :: psb_c_cuda_elg_sparse_mat + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_elg_asb end interface interface - subroutine psb_c_elg_mold(a,b,info) - import :: psb_c_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_elg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_elg_mold(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_elg_mold + end subroutine psb_c_cuda_elg_mold end interface #endif -end module psb_c_elg_mat_mod +end module psb_c_cuda_elg_mat_mod diff --git a/cuda/psb_c_hdiag_mat_mod.F90 b/cuda/psb_c_cuda_hdiag_mat_mod.F90 similarity index 50% rename from cuda/psb_c_hdiag_mat_mod.F90 rename to cuda/psb_c_cuda_hdiag_mat_mod.F90 index 8206abed..54f47684 100644 --- a/cuda/psb_c_hdiag_mat_mod.F90 +++ b/cuda/psb_c_cuda_hdiag_mat_mod.F90 @@ -30,182 +30,182 @@ ! -module psb_c_hdiag_mat_mod +module psb_c_cuda_hdiag_mat_mod use iso_c_binding use psb_base_mod use psb_c_hdia_mat_mod - type, extends(psb_c_hdia_sparse_mat) :: psb_c_hdiag_sparse_mat + type, extends(psb_c_hdia_sparse_mat) :: psb_c_cuda_hdiag_sparse_mat ! #ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => c_hdiag_get_fmt - ! procedure, pass(a) :: sizeof => c_hdiag_sizeof - procedure, pass(a) :: vect_mv => psb_c_hdiag_vect_mv - ! procedure, pass(a) :: csmm => psb_c_hdiag_csmm - procedure, pass(a) :: csmv => psb_c_hdiag_csmv - ! procedure, pass(a) :: in_vect_sv => psb_c_hdiag_inner_vect_sv - ! procedure, pass(a) :: scals => psb_c_hdiag_scals - ! procedure, pass(a) :: scalv => psb_c_hdiag_scal - ! procedure, pass(a) :: reallocate_nz => psb_c_hdiag_reallocate_nz - ! procedure, pass(a) :: allocate_mnnz => psb_c_hdiag_allocate_mnnz + procedure, nopass :: get_fmt => c_cuda_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => c_cuda_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_c_cuda_hdiag_csmm + procedure, pass(a) :: csmv => psb_c_cuda_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_c_cuda_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_c_cuda_hdiag_scals + ! procedure, pass(a) :: scalv => psb_c_cuda_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_c_cuda_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_c_cuda_hdiag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_c_cp_hdiag_from_coo - ! procedure, pass(a) :: cp_from_fmt => psb_c_cp_hdiag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_c_mv_hdiag_from_coo - ! procedure, pass(a) :: mv_from_fmt => psb_c_mv_hdiag_from_fmt - procedure, pass(a) :: free => c_hdiag_free - procedure, pass(a) :: mold => psb_c_hdiag_mold - procedure, pass(a) :: to_gpu => psb_c_hdiag_to_gpu - final :: c_hdiag_finalize + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_hdiag_from_fmt + procedure, pass(a) :: free => c_cuda_hdiag_free + procedure, pass(a) :: mold => psb_c_cuda_hdiag_mold + procedure, pass(a) :: to_gpu => psb_c_cuda_hdiag_to_gpu + final :: c_cuda_hdiag_finalize #else contains - procedure, pass(a) :: mold => psb_c_hdiag_mold + procedure, pass(a) :: mold => psb_c_cuda_hdiag_mold #endif - end type psb_c_hdiag_sparse_mat + end type psb_c_cuda_hdiag_sparse_mat #ifdef HAVE_SPGPU - private :: c_hdiag_get_nzeros, c_hdiag_free, c_hdiag_get_fmt, & - & c_hdiag_get_size, c_hdiag_sizeof, c_hdiag_get_nz_row + private :: c_cuda_hdiag_get_nzeros, c_cuda_hdiag_free, c_cuda_hdiag_get_fmt, & + & c_cuda_hdiag_get_size, c_cuda_hdiag_sizeof, c_cuda_hdiag_get_nz_row interface - subroutine psb_c_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_c_hdiag_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_hdiag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hdiag_vect_mv + end subroutine psb_c_cuda_hdiag_vect_mv end interface !!$ interface -!!$ subroutine psb_c_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_c_hdiag_sparse_mat, psb_spk_, psb_c_base_vect_type -!!$ class(psb_c_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_c_cuda_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_c_base_vect_type +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a !!$ complex(psb_spk_), intent(in) :: alpha, beta !!$ class(psb_c_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_c_hdiag_inner_vect_sv +!!$ end subroutine psb_c_cuda_hdiag_inner_vect_sv !!$ end interface !!$ !!$ interface -!!$ subroutine psb_c_hdiag_reallocate_nz(nz,a) -!!$ import :: psb_c_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_c_cuda_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_c_hdiag_reallocate_nz +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_cuda_hdiag_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_c_hdiag_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_c_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_c_cuda_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_c_hdiag_allocate_mnnz +!!$ end subroutine psb_c_cuda_hdiag_allocate_mnnz !!$ end interface interface - subroutine psb_c_hdiag_mold(a,b,info) - import :: psb_c_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hdiag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hdiag_mold(a,b,info) + import :: psb_c_cuda_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hdiag_mold + end subroutine psb_c_cuda_hdiag_mold end interface interface - subroutine psb_c_hdiag_to_gpu(a,info) - import :: psb_c_hdiag_sparse_mat, psb_ipk_ - class(psb_c_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_hdiag_to_gpu(a,info) + import :: psb_c_cuda_hdiag_sparse_mat, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hdiag_to_gpu + end subroutine psb_c_cuda_hdiag_to_gpu end interface interface - subroutine psb_c_cp_hdiag_from_coo(a,b,info) - import :: psb_c_hdiag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_hdiag_from_coo(a,b,info) + import :: psb_c_cuda_hdiag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_hdiag_from_coo + end subroutine psb_c_cuda_cp_hdiag_from_coo end interface !!$ interface -!!$ subroutine psb_c_cp_hdiag_from_fmt(a,b,info) -!!$ import :: psb_c_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ -!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_c_cuda_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_c_base_sparse_mat), intent(in) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_c_cp_hdiag_from_fmt +!!$ end subroutine psb_c_cuda_cp_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_c_mv_hdiag_from_coo(a,b,info) - import :: psb_c_hdiag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_hdiag_from_coo(a,b,info) + import :: psb_c_cuda_hdiag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_hdiag_from_coo + end subroutine psb_c_cuda_mv_hdiag_from_coo end interface !!$ !!$ interface -!!$ subroutine psb_c_mv_hdiag_from_fmt(a,b,info) -!!$ import :: psb_c_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ -!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_c_cuda_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_c_base_sparse_mat), intent(inout) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_c_mv_hdiag_from_fmt +!!$ end subroutine psb_c_cuda_mv_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_c_hdiag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_c_hdiag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hdiag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hdiag_csmv + end subroutine psb_c_cuda_hdiag_csmv end interface !!$ interface -!!$ subroutine psb_c_hdiag_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_c_hdiag_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_c_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_c_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a !!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) !!$ complex(psb_spk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_c_hdiag_csmm +!!$ end subroutine psb_c_cuda_hdiag_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_c_hdiag_scal(d,a,info, side) -!!$ import :: psb_c_hdiag_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_c_cuda_hdiag_scal(d,a,info, side) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ complex(psb_spk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_c_hdiag_scal +!!$ end subroutine psb_c_cuda_hdiag_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_c_hdiag_scals(d,a,info) -!!$ import :: psb_c_hdiag_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_c_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_c_cuda_hdiag_scals(d,a,info) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ complex(psb_spk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_c_hdiag_scals +!!$ end subroutine psb_c_cuda_hdiag_scals !!$ end interface !!$ @@ -223,11 +223,11 @@ contains ! ! == =================================== - function c_hdiag_get_fmt() result(res) + function c_cuda_hdiag_get_fmt() result(res) implicit none character(len=5) :: res res = 'HDIAG' - end function c_hdiag_get_fmt + end function c_cuda_hdiag_get_fmt @@ -243,11 +243,11 @@ contains ! ! == =================================== - subroutine c_hdiag_free(a) + subroutine c_cuda_hdiag_free(a) use hdiagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_c_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -256,12 +256,12 @@ contains return - end subroutine c_hdiag_free + end subroutine c_cuda_hdiag_free - subroutine c_hdiag_finalize(a) + subroutine c_cuda_hdiag_finalize(a) use hdiagdev_mod implicit none - type(psb_c_hdiag_sparse_mat), intent(inout) :: a + type(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -269,19 +269,19 @@ contains call a%psb_c_hdia_sparse_mat%free() return - end subroutine c_hdiag_finalize + end subroutine c_cuda_hdiag_finalize #else interface - subroutine psb_c_hdiag_mold(a,b,info) - import :: psb_c_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hdiag_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hdiag_mold(a,b,info) + import :: psb_c_cuda_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hdiag_mold + end subroutine psb_c_cuda_hdiag_mold end interface #endif -end module psb_c_hdiag_mat_mod +end module psb_c_cuda_hdiag_mat_mod diff --git a/cuda/psb_c_hlg_mat_mod.F90 b/cuda/psb_c_cuda_hlg_mat_mod.F90 similarity index 50% rename from cuda/psb_c_hlg_mat_mod.F90 rename to cuda/psb_c_cuda_hlg_mat_mod.F90 index 9236a202..74284f30 100644 --- a/cuda/psb_c_hlg_mat_mod.F90 +++ b/cuda/psb_c_cuda_hlg_mat_mod.F90 @@ -30,7 +30,7 @@ ! -module psb_c_hlg_mat_mod +module psb_c_cuda_hlg_mat_mod use iso_c_binding use psb_c_mat_mod @@ -41,7 +41,7 @@ module psb_c_hlg_mat_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_c_hll_sparse_mat) :: psb_c_hlg_sparse_mat + type, extends(psb_c_hll_sparse_mat) :: psb_c_cuda_hlg_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,186 +54,186 @@ module psb_c_hlg_mat_mod integer :: devstate = is_host contains - procedure, nopass :: get_fmt => c_hlg_get_fmt - procedure, pass(a) :: sizeof => c_hlg_sizeof - procedure, pass(a) :: vect_mv => psb_c_hlg_vect_mv - procedure, pass(a) :: csmm => psb_c_hlg_csmm - procedure, pass(a) :: csmv => psb_c_hlg_csmv - procedure, pass(a) :: in_vect_sv => psb_c_hlg_inner_vect_sv - procedure, pass(a) :: scals => psb_c_hlg_scals - procedure, pass(a) :: scalv => psb_c_hlg_scal - procedure, pass(a) :: reallocate_nz => psb_c_hlg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_c_hlg_allocate_mnnz + procedure, nopass :: get_fmt => c_cuda_hlg_get_fmt + procedure, pass(a) :: sizeof => c_cuda_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_hlg_vect_mv + procedure, pass(a) :: csmm => psb_c_cuda_hlg_csmm + procedure, pass(a) :: csmv => psb_c_cuda_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_c_cuda_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_c_cuda_hlg_scals + procedure, pass(a) :: scalv => psb_c_cuda_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_c_cuda_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_cuda_hlg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_c_cp_hlg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_c_cp_hlg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_c_mv_hlg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_c_mv_hlg_from_fmt - procedure, pass(a) :: free => c_hlg_free - procedure, pass(a) :: mold => psb_c_hlg_mold - procedure, pass(a) :: is_host => c_hlg_is_host - procedure, pass(a) :: is_dev => c_hlg_is_dev - procedure, pass(a) :: is_sync => c_hlg_is_sync - procedure, pass(a) :: set_host => c_hlg_set_host - procedure, pass(a) :: set_dev => c_hlg_set_dev - procedure, pass(a) :: set_sync => c_hlg_set_sync - procedure, pass(a) :: sync => c_hlg_sync - procedure, pass(a) :: from_gpu => psb_c_hlg_from_gpu - procedure, pass(a) :: to_gpu => psb_c_hlg_to_gpu - final :: c_hlg_finalize + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_hlg_from_fmt + procedure, pass(a) :: free => c_cuda_hlg_free + procedure, pass(a) :: mold => psb_c_cuda_hlg_mold + procedure, pass(a) :: is_host => c_cuda_hlg_is_host + procedure, pass(a) :: is_dev => c_cuda_hlg_is_dev + procedure, pass(a) :: is_sync => c_cuda_hlg_is_sync + procedure, pass(a) :: set_host => c_cuda_hlg_set_host + procedure, pass(a) :: set_dev => c_cuda_hlg_set_dev + procedure, pass(a) :: set_sync => c_cuda_hlg_set_sync + procedure, pass(a) :: sync => c_cuda_hlg_sync + procedure, pass(a) :: from_gpu => psb_c_cuda_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_c_cuda_hlg_to_gpu + final :: c_cuda_hlg_finalize #else contains - procedure, pass(a) :: mold => psb_c_hlg_mold + procedure, pass(a) :: mold => psb_c_cuda_hlg_mold #endif - end type psb_c_hlg_sparse_mat + end type psb_c_cuda_hlg_sparse_mat #ifdef HAVE_SPGPU - private :: c_hlg_get_nzeros, c_hlg_free, c_hlg_get_fmt, & - & c_hlg_get_size, c_hlg_sizeof, c_hlg_get_nz_row + private :: c_cuda_hlg_get_nzeros, c_cuda_hlg_free, c_cuda_hlg_get_fmt, & + & c_cuda_hlg_get_size, c_cuda_hlg_sizeof, c_cuda_hlg_get_nz_row interface - subroutine psb_c_hlg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_c_hlg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hlg_vect_mv + end subroutine psb_c_cuda_hlg_vect_mv end interface interface - subroutine psb_c_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_c_hlg_sparse_mat, psb_spk_, psb_c_base_vect_type - class(psb_c_hlg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_c_base_vect_type + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hlg_inner_vect_sv + end subroutine psb_c_cuda_hlg_inner_vect_sv end interface interface - subroutine psb_c_hlg_reallocate_nz(nz,a) - import :: psb_c_hlg_sparse_mat, psb_ipk_ + subroutine psb_c_cuda_hlg_reallocate_nz(nz,a) + import :: psb_c_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_c_hlg_sparse_mat), intent(inout) :: a - end subroutine psb_c_hlg_reallocate_nz + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_hlg_reallocate_nz end interface interface - subroutine psb_c_hlg_allocate_mnnz(m,n,a,nz) - import :: psb_c_hlg_sparse_mat, psb_ipk_ + subroutine psb_c_cuda_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_c_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_c_hlg_allocate_mnnz + end subroutine psb_c_cuda_hlg_allocate_mnnz end interface interface - subroutine psb_c_hlg_mold(a,b,info) - import :: psb_c_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hlg_mold(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hlg_mold + end subroutine psb_c_cuda_hlg_mold end interface interface - subroutine psb_c_hlg_from_gpu(a,info) - import :: psb_c_hlg_sparse_mat, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_hlg_from_gpu(a,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hlg_from_gpu + end subroutine psb_c_cuda_hlg_from_gpu end interface interface - subroutine psb_c_hlg_to_gpu(a,info, nzrm) - import :: psb_c_hlg_sparse_mat, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_hlg_to_gpu(a,info, nzrm) + import :: psb_c_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_c_hlg_to_gpu + end subroutine psb_c_cuda_hlg_to_gpu end interface interface - subroutine psb_c_cp_hlg_from_coo(a,b,info) - import :: psb_c_hlg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_hlg_from_coo(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_hlg_from_coo + end subroutine psb_c_cuda_cp_hlg_from_coo end interface interface - subroutine psb_c_cp_hlg_from_fmt(a,b,info) - import :: psb_c_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_hlg_from_fmt(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_hlg_from_fmt + end subroutine psb_c_cuda_cp_hlg_from_fmt end interface interface - subroutine psb_c_mv_hlg_from_coo(a,b,info) - import :: psb_c_hlg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_hlg_from_coo(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_hlg_from_coo + end subroutine psb_c_cuda_mv_hlg_from_coo end interface interface - subroutine psb_c_mv_hlg_from_fmt(a,b,info) - import :: psb_c_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_hlg_from_fmt(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_hlg_from_fmt + end subroutine psb_c_cuda_mv_hlg_from_fmt end interface interface - subroutine psb_c_hlg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_c_hlg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hlg_csmv + end subroutine psb_c_cuda_hlg_csmv end interface interface - subroutine psb_c_hlg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_c_hlg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) complex(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hlg_csmm + end subroutine psb_c_cuda_hlg_csmm end interface interface - subroutine psb_c_hlg_scal(d,a,info, side) - import :: psb_c_hlg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_hlg_scal(d,a,info, side) + import :: psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_c_hlg_scal + end subroutine psb_c_cuda_hlg_scal end interface interface - subroutine psb_c_hlg_scals(d,a,info) - import :: psb_c_hlg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_hlg_scals(d,a,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hlg_scals + end subroutine psb_c_cuda_hlg_scals end interface @@ -252,9 +252,9 @@ contains ! == =================================== - function c_hlg_sizeof(a) result(res) + function c_cuda_hlg_sizeof(a) result(res) implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res @@ -269,13 +269,13 @@ contains ! on the GPU device side? ! res = 2*res - end function c_hlg_sizeof + end function c_cuda_hlg_sizeof - function c_hlg_get_fmt() result(res) + function c_cuda_hlg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HLG' - end function c_hlg_get_fmt + end function c_cuda_hlg_get_fmt @@ -291,11 +291,11 @@ contains ! ! == =================================== - subroutine c_hlg_free(a) + subroutine c_cuda_hlg_free(a) use hlldev_mod implicit none integer(psb_ipk_) :: info - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) @@ -304,13 +304,13 @@ contains return - end subroutine c_hlg_free + end subroutine c_cuda_hlg_free - subroutine c_hlg_sync(a) + subroutine c_cuda_hlg_sync(a) implicit none - class(psb_c_hlg_sparse_mat), target, intent(in) :: a - class(psb_c_hlg_sparse_mat), pointer :: tmpa + class(psb_c_cuda_hlg_sparse_mat), target, intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -322,77 +322,77 @@ contains call tmpa%set_sync() return - end subroutine c_hlg_sync + end subroutine c_cuda_hlg_sync - subroutine c_hlg_set_host(a) + subroutine c_cuda_hlg_set_host(a) implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine c_hlg_set_host + end subroutine c_cuda_hlg_set_host - subroutine c_hlg_set_dev(a) + subroutine c_cuda_hlg_set_dev(a) implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine c_hlg_set_dev + end subroutine c_cuda_hlg_set_dev - subroutine c_hlg_set_sync(a) + subroutine c_cuda_hlg_set_sync(a) implicit none - class(psb_c_hlg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine c_hlg_set_sync + end subroutine c_cuda_hlg_set_sync - function c_hlg_is_dev(a) result(res) + function c_cuda_hlg_is_dev(a) result(res) implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function c_hlg_is_dev + end function c_cuda_hlg_is_dev - function c_hlg_is_host(a) result(res) + function c_cuda_hlg_is_host(a) result(res) implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function c_hlg_is_host + end function c_cuda_hlg_is_host - function c_hlg_is_sync(a) result(res) + function c_cuda_hlg_is_sync(a) result(res) implicit none - class(psb_c_hlg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function c_hlg_is_sync + end function c_cuda_hlg_is_sync - subroutine c_hlg_finalize(a) + subroutine c_cuda_hlg_finalize(a) use hlldev_mod implicit none - type(psb_c_hlg_sparse_mat), intent(inout) :: a + type(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine c_hlg_finalize + end subroutine c_cuda_hlg_finalize #else interface - subroutine psb_c_hlg_mold(a,b,info) - import :: psb_c_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hlg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hlg_mold(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hlg_mold + end subroutine psb_c_cuda_hlg_mold end interface #endif -end module psb_c_hlg_mat_mod +end module psb_c_cuda_hlg_mat_mod diff --git a/cuda/psb_c_hybg_mat_mod.F90 b/cuda/psb_c_cuda_hybg_mat_mod.F90 similarity index 52% rename from cuda/psb_c_hybg_mat_mod.F90 rename to cuda/psb_c_cuda_hybg_mat_mod.F90 index d5c605ec..d16988ba 100644 --- a/cuda/psb_c_hybg_mat_mod.F90 +++ b/cuda/psb_c_cuda_hybg_mat_mod.F90 @@ -31,13 +31,13 @@ #if CUDA_SHORT_VERSION <= 10 -module psb_c_hybg_mat_mod +module psb_c_cuda_hybg_mat_mod use iso_c_binding use psb_c_mat_mod use cusparse_mod - type, extends(psb_c_csr_sparse_mat) :: psb_c_hybg_sparse_mat + type, extends(psb_c_csr_sparse_mat) :: psb_c_cuda_hybg_sparse_mat ! ! HYBG. An interface to the cuSPARSE HYB ! On the CPU side we keep a CSR storage. @@ -49,170 +49,170 @@ module psb_c_hybg_mat_mod type(c_Hmat) :: deviceMat contains - procedure, nopass :: get_fmt => c_hybg_get_fmt - procedure, pass(a) :: sizeof => c_hybg_sizeof - procedure, pass(a) :: vect_mv => psb_c_hybg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_c_hybg_inner_vect_sv - procedure, pass(a) :: csmm => psb_c_hybg_csmm - procedure, pass(a) :: csmv => psb_c_hybg_csmv - procedure, pass(a) :: scals => psb_c_hybg_scals - procedure, pass(a) :: scalv => psb_c_hybg_scal - procedure, pass(a) :: reallocate_nz => psb_c_hybg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_c_hybg_allocate_mnnz + procedure, nopass :: get_fmt => c_cuda_hybg_get_fmt + procedure, pass(a) :: sizeof => c_cuda_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_c_cuda_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_c_cuda_hybg_csmm + procedure, pass(a) :: csmv => psb_c_cuda_hybg_csmv + procedure, pass(a) :: scals => psb_c_cuda_hybg_scals + procedure, pass(a) :: scalv => psb_c_cuda_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_c_cuda_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_cuda_hybg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_c_cp_hybg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_c_cp_hybg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_c_mv_hybg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_c_mv_hybg_from_fmt - procedure, pass(a) :: free => c_hybg_free - procedure, pass(a) :: mold => psb_c_hybg_mold - procedure, pass(a) :: to_gpu => psb_c_hybg_to_gpu - final :: c_hybg_finalize + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_hybg_from_fmt + procedure, pass(a) :: free => c_cuda_hybg_free + procedure, pass(a) :: mold => psb_c_cuda_hybg_mold + procedure, pass(a) :: to_gpu => psb_c_cuda_hybg_to_gpu + final :: c_cuda_hybg_finalize #else contains - procedure, pass(a) :: mold => psb_c_hybg_mold + procedure, pass(a) :: mold => psb_c_cuda_hybg_mold #endif - end type psb_c_hybg_sparse_mat + end type psb_c_cuda_hybg_sparse_mat #ifdef HAVE_SPGPU - private :: c_hybg_get_nzeros, c_hybg_free, c_hybg_get_fmt, & - & c_hybg_get_size, c_hybg_sizeof, c_hybg_get_nz_row + private :: c_cuda_hybg_get_nzeros, c_cuda_hybg_free, c_cuda_hybg_get_fmt, & + & c_cuda_hybg_get_size, c_cuda_hybg_sizeof, c_cuda_hybg_get_nz_row interface - subroutine psb_c_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_c_hybg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hybg_inner_vect_sv + end subroutine psb_c_cuda_hybg_inner_vect_sv end interface interface - subroutine psb_c_hybg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_c_hybg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hybg_vect_mv + end subroutine psb_c_cuda_hybg_vect_mv end interface interface - subroutine psb_c_hybg_reallocate_nz(nz,a) - import :: psb_c_hybg_sparse_mat, psb_ipk_ + subroutine psb_c_cuda_hybg_reallocate_nz(nz,a) + import :: psb_c_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_c_hybg_sparse_mat), intent(inout) :: a - end subroutine psb_c_hybg_reallocate_nz + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_hybg_reallocate_nz end interface interface - subroutine psb_c_hybg_allocate_mnnz(m,n,a,nz) - import :: psb_c_hybg_sparse_mat, psb_ipk_ + subroutine psb_c_cuda_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_c_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_c_hybg_allocate_mnnz + end subroutine psb_c_cuda_hybg_allocate_mnnz end interface interface - subroutine psb_c_hybg_mold(a,b,info) - import :: psb_c_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hybg_mold(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hybg_mold + end subroutine psb_c_cuda_hybg_mold end interface interface - subroutine psb_c_hybg_to_gpu(a,info, nzrm) - import :: psb_c_hybg_sparse_mat, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_hybg_to_gpu(a,info, nzrm) + import :: psb_c_cuda_hybg_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_c_hybg_to_gpu + end subroutine psb_c_cuda_hybg_to_gpu end interface interface - subroutine psb_c_cp_hybg_from_coo(a,b,info) - import :: psb_c_hybg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_hybg_from_coo(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_hybg_from_coo + end subroutine psb_c_cuda_cp_hybg_from_coo end interface interface - subroutine psb_c_cp_hybg_from_fmt(a,b,info) - import :: psb_c_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_cp_hybg_from_fmt(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cp_hybg_from_fmt + end subroutine psb_c_cuda_cp_hybg_from_fmt end interface interface - subroutine psb_c_mv_hybg_from_coo(a,b,info) - import :: psb_c_hybg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_hybg_from_coo(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_hybg_from_coo + end subroutine psb_c_cuda_mv_hybg_from_coo end interface interface - subroutine psb_c_mv_hybg_from_fmt(a,b,info) - import :: psb_c_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_mv_hybg_from_fmt(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_mv_hybg_from_fmt + end subroutine psb_c_cuda_mv_hybg_from_fmt end interface interface - subroutine psb_c_hybg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_c_hybg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) complex(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hybg_csmv + end subroutine psb_c_cuda_hybg_csmv end interface interface - subroutine psb_c_hybg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_c_hybg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) complex(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_c_hybg_csmm + end subroutine psb_c_cuda_hybg_csmm end interface interface - subroutine psb_c_hybg_scal(d,a,info,side) - import :: psb_c_hybg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_hybg_scal(d,a,info,side) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_c_hybg_scal + end subroutine psb_c_cuda_hybg_scal end interface interface - subroutine psb_c_hybg_scals(d,a,info) - import :: psb_c_hybg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(inout) :: a + subroutine psb_c_cuda_hybg_scals(d,a,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hybg_scals + end subroutine psb_c_cuda_hybg_scals end interface @@ -231,9 +231,9 @@ contains ! == =================================== - function c_hybg_sizeof(a) result(res) + function c_cuda_hybg_sizeof(a) result(res) implicit none - class(psb_c_hybg_sparse_mat), intent(in) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 res = res + (2*psb_sizeof_sp) * size(a%val) @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function c_hybg_sizeof + end function c_cuda_hybg_sizeof - function c_hybg_get_fmt() result(res) + function c_cuda_hybg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HYBG' - end function c_hybg_get_fmt + end function c_cuda_hybg_get_fmt @@ -265,42 +265,42 @@ contains ! ! == =================================== - subroutine c_hybg_free(a) + subroutine c_cuda_hybg_free(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - class(psb_c_hybg_sparse_mat), intent(inout) :: a + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) call a%psb_c_csr_sparse_mat%free() return - end subroutine c_hybg_free + end subroutine c_cuda_hybg_free - subroutine c_hybg_finalize(a) + subroutine c_cuda_hybg_finalize(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - type(psb_c_hybg_sparse_mat), intent(inout) :: a + type(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) return - end subroutine c_hybg_finalize + end subroutine c_cuda_hybg_finalize #else interface - subroutine psb_c_hybg_mold(a,b,info) - import :: psb_c_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_hybg_sparse_mat), intent(in) :: a + subroutine psb_c_cuda_hybg_mold(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_hybg_mold + end subroutine psb_c_cuda_hybg_mold end interface #endif -end module psb_c_hybg_mat_mod +end module psb_c_cuda_hybg_mat_mod #endif diff --git a/cuda/psb_c_gpu_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 similarity index 72% rename from cuda/psb_c_gpu_vect_mod.F90 rename to cuda/psb_c_cuda_vect_mod.F90 index 4c31154f..be06167e 100644 --- a/cuda/psb_c_gpu_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -30,15 +30,15 @@ ! -module psb_c_gpu_vect_mod +module psb_c_cuda_vect_mod use iso_c_binding use psb_const_mod use psb_error_mod use psb_c_vect_mod use psb_i_vect_mod #ifdef HAVE_SPGPU - use psb_gpu_env_mod - use psb_i_gpu_vect_mod + use psb_cuda_env_mod + use psb_i_cuda_vect_mod use psb_i_vectordev_mod use psb_c_vectordev_mod #endif @@ -47,7 +47,7 @@ module psb_c_gpu_vect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_c_base_vect_type) :: psb_c_vect_gpu + type, extends(psb_c_base_vect_type) :: psb_c_vect_cuda #ifdef HAVE_SPGPU integer :: state = is_host type(c_ptr) :: deviceVect = c_null_ptr @@ -59,66 +59,66 @@ module psb_c_gpu_vect_mod type(c_ptr) :: i_buf = c_null_ptr integer :: i_buf_sz = 0 contains - procedure, pass(x) :: get_nrows => c_gpu_get_nrows - procedure, nopass :: get_fmt => c_gpu_get_fmt - - procedure, pass(x) :: all => c_gpu_all - procedure, pass(x) :: zero => c_gpu_zero - procedure, pass(x) :: asb_m => c_gpu_asb_m - procedure, pass(x) :: sync => c_gpu_sync - procedure, pass(x) :: sync_space => c_gpu_sync_space - procedure, pass(x) :: bld_x => c_gpu_bld_x - procedure, pass(x) :: bld_mn => c_gpu_bld_mn - procedure, pass(x) :: free => c_gpu_free - procedure, pass(x) :: ins_a => c_gpu_ins_a - procedure, pass(x) :: ins_v => c_gpu_ins_v - procedure, pass(x) :: is_host => c_gpu_is_host - procedure, pass(x) :: is_dev => c_gpu_is_dev - procedure, pass(x) :: is_sync => c_gpu_is_sync - procedure, pass(x) :: set_host => c_gpu_set_host - procedure, pass(x) :: set_dev => c_gpu_set_dev - procedure, pass(x) :: set_sync => c_gpu_set_sync - procedure, pass(x) :: set_scal => c_gpu_set_scal -!!$ procedure, pass(x) :: set_vect => c_gpu_set_vect - procedure, pass(x) :: gthzv_x => c_gpu_gthzv_x - procedure, pass(y) :: sctb => c_gpu_sctb - procedure, pass(y) :: sctb_x => c_gpu_sctb_x - procedure, pass(x) :: gthzbuf => c_gpu_gthzbuf - procedure, pass(y) :: sctb_buf => c_gpu_sctb_buf - procedure, pass(x) :: new_buffer => c_gpu_new_buffer - procedure, nopass :: device_wait => c_gpu_device_wait - procedure, pass(x) :: free_buffer => c_gpu_free_buffer - procedure, pass(x) :: maybe_free_buffer => c_gpu_maybe_free_buffer - procedure, pass(x) :: dot_v => c_gpu_dot_v - procedure, pass(x) :: dot_a => c_gpu_dot_a - procedure, pass(y) :: axpby_v => c_gpu_axpby_v - procedure, pass(y) :: axpby_a => c_gpu_axpby_a - procedure, pass(y) :: mlt_v => c_gpu_mlt_v - procedure, pass(y) :: mlt_a => c_gpu_mlt_a - procedure, pass(z) :: mlt_a_2 => c_gpu_mlt_a_2 - procedure, pass(z) :: mlt_v_2 => c_gpu_mlt_v_2 - procedure, pass(x) :: scal => c_gpu_scal - procedure, pass(x) :: nrm2 => c_gpu_nrm2 - procedure, pass(x) :: amax => c_gpu_amax - procedure, pass(x) :: asum => c_gpu_asum - procedure, pass(x) :: absval1 => c_gpu_absval1 - procedure, pass(x) :: absval2 => c_gpu_absval2 - - final :: c_gpu_vect_finalize + procedure, pass(x) :: get_nrows => c_cuda_get_nrows + procedure, nopass :: get_fmt => c_cuda_get_fmt + + procedure, pass(x) :: all => c_cuda_all + procedure, pass(x) :: zero => c_cuda_zero + procedure, pass(x) :: asb_m => c_cuda_asb_m + procedure, pass(x) :: sync => c_cuda_sync + procedure, pass(x) :: sync_space => c_cuda_sync_space + procedure, pass(x) :: bld_x => c_cuda_bld_x + procedure, pass(x) :: bld_mn => c_cuda_bld_mn + procedure, pass(x) :: free => c_cuda_free + procedure, pass(x) :: ins_a => c_cuda_ins_a + procedure, pass(x) :: ins_v => c_cuda_ins_v + procedure, pass(x) :: is_host => c_cuda_is_host + procedure, pass(x) :: is_dev => c_cuda_is_dev + procedure, pass(x) :: is_sync => c_cuda_is_sync + procedure, pass(x) :: set_host => c_cuda_set_host + procedure, pass(x) :: set_dev => c_cuda_set_dev + procedure, pass(x) :: set_sync => c_cuda_set_sync + procedure, pass(x) :: set_scal => c_cuda_set_scal +!!$ procedure, pass(x) :: set_vect => c_cuda_set_vect + procedure, pass(x) :: gthzv_x => c_cuda_gthzv_x + procedure, pass(y) :: sctb => c_cuda_sctb + procedure, pass(y) :: sctb_x => c_cuda_sctb_x + procedure, pass(x) :: gthzbuf => c_cuda_gthzbuf + procedure, pass(y) :: sctb_buf => c_cuda_sctb_buf + procedure, pass(x) :: new_buffer => c_cuda_new_buffer + procedure, nopass :: device_wait => c_cuda_device_wait + procedure, pass(x) :: free_buffer => c_cuda_free_buffer + procedure, pass(x) :: maybe_free_buffer => c_cuda_maybe_free_buffer + procedure, pass(x) :: dot_v => c_cuda_dot_v + procedure, pass(x) :: dot_a => c_cuda_dot_a + procedure, pass(y) :: axpby_v => c_cuda_axpby_v + procedure, pass(y) :: axpby_a => c_cuda_axpby_a + procedure, pass(y) :: mlt_v => c_cuda_mlt_v + procedure, pass(y) :: mlt_a => c_cuda_mlt_a + procedure, pass(z) :: mlt_a_2 => c_cuda_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => c_cuda_mlt_v_2 + procedure, pass(x) :: scal => c_cuda_scal + procedure, pass(x) :: nrm2 => c_cuda_nrm2 + procedure, pass(x) :: amax => c_cuda_amax + procedure, pass(x) :: asum => c_cuda_asum + procedure, pass(x) :: absval1 => c_cuda_absval1 + procedure, pass(x) :: absval2 => c_cuda_absval2 + + final :: c_cuda_vect_finalize #endif - end type psb_c_vect_gpu + end type psb_c_vect_cuda - public :: psb_c_vect_gpu_ + public :: psb_c_vect_cuda_ private :: constructor - interface psb_c_vect_gpu_ + interface psb_c_vect_cuda_ module procedure constructor - end interface psb_c_vect_gpu_ + end interface psb_c_vect_cuda_ contains function constructor(x) result(this) complex(psb_spk_) :: x(:) - type(psb_c_vect_gpu) :: this + type(psb_c_vect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -128,20 +128,20 @@ contains #ifdef HAVE_SPGPU - subroutine c_gpu_device_wait() + subroutine c_cuda_device_wait() call psb_cudaSync() - end subroutine c_gpu_device_wait + end subroutine c_cuda_device_wait - subroutine c_gpu_new_buffer(n,x,info) + subroutine c_cuda_new_buffer(n,x,info) use psb_realloc_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(x%combuf)) then if (size(x%combuf) idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then ! ! Only need a sync in this branch; in the others ! cudamemCpy acts as a sync point. @@ -331,14 +331,14 @@ contains end select - end subroutine c_gpu_gthzv_x + end subroutine c_cuda_gthzv_x - subroutine c_gpu_gthzbuf(i,n,idx,x) - use psb_gpu_env_mod + subroutine c_cuda_gthzbuf(i,n,idx,x) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i,n class(psb_i_base_vect_type) :: idx - class(psb_c_vect_gpu) :: x + class(psb_c_vect_cuda) :: x integer :: info, ni info = 0 @@ -349,11 +349,11 @@ contains end if select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = igathMultiVecDeviceFloatComplexVecIdx(x%deviceVect,& & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) @@ -384,14 +384,14 @@ contains end select - end subroutine c_gpu_gthzbuf + end subroutine c_cuda_gthzbuf - subroutine c_gpu_sctb(n,idx,x,beta,y) + subroutine c_cuda_sctb(n,idx,x,beta,y) implicit none !use psb_const_mod integer(psb_ipk_) :: n, idx(:) complex(psb_spk_) :: beta, x(:) - class(psb_c_vect_gpu) :: y + class(psb_c_vect_cuda) :: y integer(psb_ipk_) :: info if (n == 0) return @@ -401,24 +401,24 @@ contains call y%psb_c_base_vect_type%sctb(n,idx,x,beta) call y%set_host() - end subroutine c_gpu_sctb + end subroutine c_cuda_sctb - subroutine c_gpu_sctb_x(i,n,idx,x,beta,y) - use psb_gpu_env_mod + subroutine c_cuda_sctb_x(i,n,idx,x,beta,y) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx complex(psb_spk_) :: beta, x(:) - class(psb_c_vect_gpu) :: y + class(psb_c_vect_cuda) :: y integer :: info, ni select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() ! - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(y%pinned_buffer)) then if (size(y%pinned_buffer) < n) then call inner_unregister(y%pinned_buffer) @@ -506,16 +506,16 @@ contains call psb_cudaSync() call y%set_dev() - end subroutine c_gpu_sctb_x + end subroutine c_cuda_sctb_x - subroutine c_gpu_sctb_buf(i,n,idx,beta,y) + subroutine c_cuda_sctb_buf(i,n,idx,beta,y) use psi_serial_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx complex(psb_spk_) :: beta - class(psb_c_vect_gpu) :: y + class(psb_c_vect_cuda) :: y integer(psb_ipk_) :: info, ni !!$ write(0,*) 'Starting sctb_buf' @@ -526,11 +526,11 @@ contains select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = iscatMultiVecDeviceFloatComplexVecIdx(y%deviceVect,& & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) else @@ -557,106 +557,106 @@ contains end select !!$ write(0,*) 'Done sctb_buf' - end subroutine c_gpu_sctb_buf + end subroutine c_cuda_sctb_buf - subroutine c_gpu_bld_x(x,this) + subroutine c_cuda_bld_x(x,this) use psb_base_mod complex(psb_spk_), intent(in) :: this(:) - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call psb_realloc(size(this),x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'c_gpu_bld_x',& + call psb_errpush(info,'c_cuda_bld_x',& & i_err=(/size(this),izero,izero,izero,izero/)) end if x%v(:) = this(:) call x%set_host() call x%sync() - end subroutine c_gpu_bld_x + end subroutine c_cuda_bld_x - subroutine c_gpu_bld_mn(x,n) + subroutine c_cuda_bld_mn(x,n) integer(psb_mpk_), intent(in) :: n - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(n,info) if (info /= 0) then - call psb_errpush(info,'c_gpu_bld_n',i_err=(/n,n,n,n,n/)) + call psb_errpush(info,'c_cuda_bld_n',i_err=(/n,n,n,n,n/)) end if - end subroutine c_gpu_bld_mn + end subroutine c_cuda_bld_mn - subroutine c_gpu_set_host(x) + subroutine c_cuda_set_host(x) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x x%state = is_host - end subroutine c_gpu_set_host + end subroutine c_cuda_set_host - subroutine c_gpu_set_dev(x) + subroutine c_cuda_set_dev(x) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x x%state = is_dev - end subroutine c_gpu_set_dev + end subroutine c_cuda_set_dev - subroutine c_gpu_set_sync(x) + subroutine c_cuda_set_sync(x) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x x%state = is_sync - end subroutine c_gpu_set_sync + end subroutine c_cuda_set_sync - function c_gpu_is_dev(x) result(res) + function c_cuda_is_dev(x) result(res) implicit none - class(psb_c_vect_gpu), intent(in) :: x + class(psb_c_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function c_gpu_is_dev + end function c_cuda_is_dev - function c_gpu_is_host(x) result(res) + function c_cuda_is_host(x) result(res) implicit none - class(psb_c_vect_gpu), intent(in) :: x + class(psb_c_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function c_gpu_is_host + end function c_cuda_is_host - function c_gpu_is_sync(x) result(res) + function c_cuda_is_sync(x) result(res) implicit none - class(psb_c_vect_gpu), intent(in) :: x + class(psb_c_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function c_gpu_is_sync + end function c_cuda_is_sync - function c_gpu_get_nrows(x) result(res) + function c_cuda_get_nrows(x) result(res) implicit none - class(psb_c_vect_gpu), intent(in) :: x + class(psb_c_vect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = size(x%v) - end function c_gpu_get_nrows + end function c_cuda_get_nrows - function c_gpu_get_fmt() result(res) + function c_cuda_get_fmt() result(res) implicit none character(len=5) :: res res = 'cGPU' - end function c_gpu_get_fmt + end function c_cuda_get_fmt - subroutine c_gpu_all(n, x, info) + subroutine c_cuda_all(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: n - class(psb_c_vect_gpu), intent(out) :: x + class(psb_c_vect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(n,x%v,info) @@ -664,26 +664,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'c_gpu_all',& + call psb_errpush(info,'c_cuda_all',& & i_err=(/n,n,n,n,n/)) end if - end subroutine c_gpu_all + end subroutine c_cuda_all - subroutine c_gpu_zero(x) + subroutine c_cuda_zero(x) use psi_serial_mod implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=czero call x%set_host() - end subroutine c_gpu_zero + end subroutine c_cuda_zero - subroutine c_gpu_asb_m(n, x, info) + subroutine c_cuda_asb_m(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_mpk_), intent(in) :: n - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: nd @@ -703,12 +703,12 @@ contains end if end if - end subroutine c_gpu_asb_m + end subroutine c_cuda_asb_m - subroutine c_gpu_sync_space(x,info) + subroutine c_cuda_sync_space(x,info) use psb_base_mod, only : psb_realloc implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nh, nd @@ -747,12 +747,12 @@ contains end if end if - end subroutine c_gpu_sync_space + end subroutine c_cuda_sync_space - subroutine c_gpu_sync(x) + subroutine c_cuda_sync(x) use psb_base_mod, only : psb_realloc implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -778,31 +778,31 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'c_gpu_sync') + call psb_errpush(info,'c_cuda_sync') end if - end subroutine c_gpu_sync + end subroutine c_cuda_sync - subroutine c_gpu_free(x, info) + subroutine c_cuda_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) deallocate(x%v, stat=info) if (c_associated(x%deviceVect)) then -!!$ write(0,*)'d_gpu_free Calling freeMultiVecDevice' +!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice' call freeMultiVecDevice(x%deviceVect) x%deviceVect=c_null_ptr end if call x%free_buffer(info) call x%set_sync() - end subroutine c_gpu_free + end subroutine c_cuda_free - subroutine c_gpu_set_scal(x,val,first,last) - class(psb_c_vect_gpu), intent(inout) :: x + subroutine c_cuda_set_scal(x,val,first,last) + class(psb_c_vect_cuda), intent(inout) :: x complex(psb_spk_), intent(in) :: val integer(psb_ipk_), optional :: first, last @@ -817,10 +817,10 @@ contains info = setScalDevice(val,first_,last_,1,x%deviceVect) call x%set_dev() - end subroutine c_gpu_set_scal + end subroutine c_cuda_set_scal !!$ -!!$ subroutine c_gpu_set_vect(x,val) -!!$ class(psb_c_vect_gpu), intent(inout) :: x +!!$ subroutine c_cuda_set_vect(x,val) +!!$ class(psb_c_vect_cuda), intent(inout) :: x !!$ complex(psb_spk_), intent(in) :: val(:) !!$ integer(psb_ipk_) :: nr !!$ integer(psb_ipk_) :: info @@ -829,13 +829,13 @@ contains !!$ call x%psb_c_base_vect_type%set_vect(val) !!$ call x%set_host() !!$ -!!$ end subroutine c_gpu_set_vect +!!$ end subroutine c_cuda_set_vect - function c_gpu_dot_v(n,x,y) result(res) + function c_cuda_dot_v(n,x,y) result(res) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(in) :: n complex(psb_spk_) :: res @@ -852,13 +852,13 @@ contains type is (psb_c_base_vect_type) if (x%is_dev()) call x%sync() res = ddot(n,x%v,1,yy%v,1) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) if (info /= 0) then info = psb_err_internal_error_ - call psb_errpush(info,'c_gpu_dot_v') + call psb_errpush(info,'c_cuda_dot_v') end if class default @@ -867,11 +867,11 @@ contains res = y%dot(n,x%v) end select - end function c_gpu_dot_v + end function c_cuda_dot_v - function c_gpu_dot_a(n,x,y) result(res) + function c_cuda_dot_a(n,x,y) result(res) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x complex(psb_spk_), intent(in) :: y(:) integer(psb_ipk_), intent(in) :: n complex(psb_spk_) :: res @@ -880,14 +880,14 @@ contains if (x%is_dev()) call x%sync() res = ddot(n,y,1,x%v,1) - end function c_gpu_dot_a + end function c_cuda_dot_a - subroutine c_gpu_axpby_v(m,alpha, x, beta, y, info) + subroutine c_cuda_axpby_v(m,alpha, x, beta, y, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m class(psb_c_base_vect_type), intent(inout) :: x - class(psb_c_vect_gpu), intent(inout) :: y + class(psb_c_vect_cuda), intent(inout) :: y complex(psb_spk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nx, ny @@ -895,7 +895,7 @@ contains info = psb_success_ select type(xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) ! Do something different here if ((beta /= czero).and.y%is_host())& & call y%sync() @@ -915,14 +915,14 @@ contains call y%axpby(m,alpha,x%v,beta,info) end select - end subroutine c_gpu_axpby_v + end subroutine c_cuda_axpby_v - subroutine c_gpu_axpby_a(m,alpha, x, beta, y, info) + subroutine c_cuda_axpby_a(m,alpha, x, beta, y, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m complex(psb_spk_), intent(in) :: x(:) - class(psb_c_vect_gpu), intent(inout) :: y + class(psb_c_vect_cuda), intent(inout) :: y complex(psb_spk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info @@ -930,13 +930,13 @@ contains & call y%sync() call psb_geaxpby(m,alpha,x,beta,y%v,info) call y%set_host() - end subroutine c_gpu_axpby_a + end subroutine c_cuda_axpby_a - subroutine c_gpu_mlt_v(x, y, info) + subroutine c_cuda_mlt_v(x, y, info) use psi_serial_mod implicit none class(psb_c_base_vect_type), intent(inout) :: x - class(psb_c_vect_gpu), intent(inout) :: y + class(psb_c_vect_cuda), intent(inout) :: y integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -950,7 +950,7 @@ contains y%v(i) = y%v(i) * xx%v(i) end do call y%set_host() - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) ! Do something different here if (y%is_host()) call y%sync() if (xx%is_host()) call xx%sync() @@ -963,13 +963,13 @@ contains call y%set_host() end select - end subroutine c_gpu_mlt_v + end subroutine c_cuda_mlt_v - subroutine c_gpu_mlt_a(x, y, info) + subroutine c_cuda_mlt_a(x, y, info) use psi_serial_mod implicit none complex(psb_spk_), intent(in) :: x(:) - class(psb_c_vect_gpu), intent(inout) :: y + class(psb_c_vect_cuda), intent(inout) :: y integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -977,15 +977,15 @@ contains if (y%is_dev()) call y%sync() call y%psb_c_base_vect_type%mlt(x,info) ! set_host() is invoked in the base method - end subroutine c_gpu_mlt_a + end subroutine c_cuda_mlt_a - subroutine c_gpu_mlt_a_2(alpha,x,y,beta,z,info) + subroutine c_cuda_mlt_a_2(alpha,x,y,beta,z,info) use psi_serial_mod implicit none complex(psb_spk_), intent(in) :: alpha,beta complex(psb_spk_), intent(in) :: x(:) complex(psb_spk_), intent(in) :: y(:) - class(psb_c_vect_gpu), intent(inout) :: z + class(psb_c_vect_cuda), intent(inout) :: z integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -993,16 +993,16 @@ contains if (z%is_dev()) call z%sync() call z%psb_c_base_vect_type%mlt(alpha,x,y,beta,info) ! set_host() is invoked in the base method - end subroutine c_gpu_mlt_a_2 + end subroutine c_cuda_mlt_a_2 - subroutine c_gpu_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + subroutine c_cuda_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) use psi_serial_mod use psb_string_mod implicit none complex(psb_spk_), intent(in) :: alpha,beta class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y - class(psb_c_vect_gpu), intent(inout) :: z + class(psb_c_vect_cuda), intent(inout) :: z integer(psb_ipk_), intent(out) :: info character(len=1), intent(in), optional :: conjgx, conjgy integer(psb_ipk_) :: i, n @@ -1025,9 +1025,9 @@ contains ! info = 0 select type(xx => x) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) select type (yy => y) - type is (psb_c_vect_gpu) + type is (psb_c_vect_cuda) if (xx%is_host()) call xx%sync() if (yy%is_host()) call yy%sync() if ((beta /= czero).and.(z%is_host())) call z%sync() @@ -1049,23 +1049,23 @@ contains call z%psb_c_base_vect_type%mlt(alpha,x,y,beta,info) call z%set_host() end select - end subroutine c_gpu_mlt_v_2 + end subroutine c_cuda_mlt_v_2 - subroutine c_gpu_scal(alpha, x) + subroutine c_cuda_scal(alpha, x) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x complex(psb_spk_), intent (in) :: alpha integer(psb_ipk_) :: info if (x%is_host()) call x%sync() info = scalMultiVecDevice(alpha,x%deviceVect) call x%set_dev() - end subroutine c_gpu_scal + end subroutine c_cuda_scal - function c_gpu_nrm2(n,x) result(res) + function c_cuda_nrm2(n,x) result(res) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res integer(psb_ipk_) :: info @@ -1073,11 +1073,11 @@ contains if (x%is_host()) call x%sync() info = nrm2MultiVecDeviceComplex(res,n,x%deviceVect) - end function c_gpu_nrm2 + end function c_cuda_nrm2 - function c_gpu_amax(n,x) result(res) + function c_cuda_amax(n,x) result(res) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res integer(psb_ipk_) :: info @@ -1085,11 +1085,11 @@ contains if (x%is_host()) call x%sync() info = amaxMultiVecDeviceComplex(res,n,x%deviceVect) - end function c_gpu_amax + end function c_cuda_amax - function c_gpu_asum(n,x) result(res) + function c_cuda_asum(n,x) result(res) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res integer(psb_ipk_) :: info @@ -1097,11 +1097,11 @@ contains if (x%is_host()) call x%sync() info = asumMultiVecDeviceComplex(res,n,x%deviceVect) - end function c_gpu_asum + end function c_cuda_asum - subroutine c_gpu_absval1(x) + subroutine c_cuda_absval1(x) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n integer(psb_ipk_) :: info @@ -1109,18 +1109,18 @@ contains n=x%get_nrows() info = absMultiVecDevice(n,cone,x%deviceVect) - end subroutine c_gpu_absval1 + end subroutine c_cuda_absval1 - subroutine c_gpu_absval2(x,y) + subroutine c_cuda_absval2(x,y) implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: y integer(psb_ipk_) :: n integer(psb_ipk_) :: info n=min(x%get_nrows(),y%get_nrows()) select type (yy=> y) - class is (psb_c_vect_gpu) + class is (psb_c_vect_cuda) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() info = absMultiVecDevice(n,cone,x%deviceVect,yy%deviceVect) @@ -1129,67 +1129,67 @@ contains if (y%is_dev()) call y%sync() call x%psb_c_base_vect_type%absval(y) end select - end subroutine c_gpu_absval2 + end subroutine c_cuda_absval2 - subroutine c_gpu_vect_finalize(x) + subroutine c_cuda_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_c_vect_gpu), intent(inout) :: x + type(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 call x%free(info) - end subroutine c_gpu_vect_finalize + end subroutine c_cuda_vect_finalize - subroutine c_gpu_ins_v(n,irl,val,dupl,x,info) + subroutine c_cuda_ins_v(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl class(psb_i_base_vect_type), intent(inout) :: irl class(psb_c_base_vect_type), intent(inout) :: val integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, isz - logical :: done_gpu + logical :: done_cuda info = 0 if (psb_errstatus_fatal()) return - done_gpu = .false. + done_cuda = .false. select type(virl => irl) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type(vval => val) - class is (psb_c_vect_gpu) + class is (psb_c_vect_cuda) if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() info = geinsMultiVecDeviceFloatComplex(n,virl%deviceVect,& & vval%deviceVect,dupl,1,x%deviceVect) call x%set_dev() - done_gpu=.true. + done_cuda=.true. end select end select - if (.not.done_gpu) then + if (.not.done_cuda) then if (irl%is_dev()) call irl%sync() if (val%is_dev()) call val%sync() call x%ins(n,irl%v,val%v,dupl,info) end if if (info /= 0) then - call psb_errpush(info,'gpu_vect_ins') + call psb_errpush(info,'cuda_vect_ins') return end if - end subroutine c_gpu_ins_v + end subroutine c_cuda_ins_v - subroutine c_gpu_ins_a(n,irl,val,dupl,x,info) + subroutine c_cuda_ins_a(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_c_vect_gpu), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) complex(psb_spk_), intent(in) :: val(:) @@ -1202,11 +1202,11 @@ contains call x%psb_c_base_vect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine c_gpu_ins_a + end subroutine c_cuda_ins_a #endif -end module psb_c_gpu_vect_mod +end module psb_c_cuda_vect_mod ! @@ -1215,7 +1215,7 @@ end module psb_c_gpu_vect_mod -module psb_c_gpu_multivect_mod +module psb_c_cuda_multivect_mod use iso_c_binding use psb_const_mod use psb_error_mod @@ -1224,7 +1224,7 @@ module psb_c_gpu_multivect_mod use psb_i_multivect_mod #ifdef HAVE_SPGPU - use psb_i_gpu_multivect_mod + use psb_i_cuda_multivect_mod use psb_c_vectordev_mod #endif @@ -1232,7 +1232,7 @@ module psb_c_gpu_multivect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_c_base_multivect_type) :: psb_c_multivect_gpu + type, extends(psb_c_base_multivect_type) :: psb_c_multivect_cuda #ifdef HAVE_SPGPU integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 @@ -1240,48 +1240,48 @@ module psb_c_gpu_multivect_mod real(c_double), allocatable :: buffer(:,:) type(c_ptr) :: dt_buf = c_null_ptr contains - procedure, pass(x) :: get_nrows => c_gpu_multi_get_nrows - procedure, pass(x) :: get_ncols => c_gpu_multi_get_ncols - procedure, nopass :: get_fmt => c_gpu_multi_get_fmt -!!$ procedure, pass(x) :: dot_v => c_gpu_multi_dot_v -!!$ procedure, pass(x) :: dot_a => c_gpu_multi_dot_a -!!$ procedure, pass(y) :: axpby_v => c_gpu_multi_axpby_v -!!$ procedure, pass(y) :: axpby_a => c_gpu_multi_axpby_a -!!$ procedure, pass(y) :: mlt_v => c_gpu_multi_mlt_v -!!$ procedure, pass(y) :: mlt_a => c_gpu_multi_mlt_a -!!$ procedure, pass(z) :: mlt_a_2 => c_gpu_multi_mlt_a_2 -!!$ procedure, pass(z) :: mlt_v_2 => c_gpu_multi_mlt_v_2 -!!$ procedure, pass(x) :: scal => c_gpu_multi_scal -!!$ procedure, pass(x) :: nrm2 => c_gpu_multi_nrm2 -!!$ procedure, pass(x) :: amax => c_gpu_multi_amax -!!$ procedure, pass(x) :: asum => c_gpu_multi_asum - procedure, pass(x) :: all => c_gpu_multi_all - procedure, pass(x) :: zero => c_gpu_multi_zero - procedure, pass(x) :: asb => c_gpu_multi_asb - procedure, pass(x) :: sync => c_gpu_multi_sync - procedure, pass(x) :: sync_space => c_gpu_multi_sync_space - procedure, pass(x) :: bld_x => c_gpu_multi_bld_x - procedure, pass(x) :: bld_n => c_gpu_multi_bld_n - procedure, pass(x) :: free => c_gpu_multi_free - procedure, pass(x) :: ins => c_gpu_multi_ins - procedure, pass(x) :: is_host => c_gpu_multi_is_host - procedure, pass(x) :: is_dev => c_gpu_multi_is_dev - procedure, pass(x) :: is_sync => c_gpu_multi_is_sync - procedure, pass(x) :: set_host => c_gpu_multi_set_host - procedure, pass(x) :: set_dev => c_gpu_multi_set_dev - procedure, pass(x) :: set_sync => c_gpu_multi_set_sync - procedure, pass(x) :: set_scal => c_gpu_multi_set_scal - procedure, pass(x) :: set_vect => c_gpu_multi_set_vect -!!$ procedure, pass(x) :: gthzv_x => c_gpu_multi_gthzv_x -!!$ procedure, pass(y) :: sctb => c_gpu_multi_sctb -!!$ procedure, pass(y) :: sctb_x => c_gpu_multi_sctb_x - final :: c_gpu_multi_vect_finalize + procedure, pass(x) :: get_nrows => c_cuda_multi_get_nrows + procedure, pass(x) :: get_ncols => c_cuda_multi_get_ncols + procedure, nopass :: get_fmt => c_cuda_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => c_cuda_multi_dot_v +!!$ procedure, pass(x) :: dot_a => c_cuda_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => c_cuda_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => c_cuda_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => c_cuda_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => c_cuda_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => c_cuda_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => c_cuda_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => c_cuda_multi_scal +!!$ procedure, pass(x) :: nrm2 => c_cuda_multi_nrm2 +!!$ procedure, pass(x) :: amax => c_cuda_multi_amax +!!$ procedure, pass(x) :: asum => c_cuda_multi_asum + procedure, pass(x) :: all => c_cuda_multi_all + procedure, pass(x) :: zero => c_cuda_multi_zero + procedure, pass(x) :: asb => c_cuda_multi_asb + procedure, pass(x) :: sync => c_cuda_multi_sync + procedure, pass(x) :: sync_space => c_cuda_multi_sync_space + procedure, pass(x) :: bld_x => c_cuda_multi_bld_x + procedure, pass(x) :: bld_n => c_cuda_multi_bld_n + procedure, pass(x) :: free => c_cuda_multi_free + procedure, pass(x) :: ins => c_cuda_multi_ins + procedure, pass(x) :: is_host => c_cuda_multi_is_host + procedure, pass(x) :: is_dev => c_cuda_multi_is_dev + procedure, pass(x) :: is_sync => c_cuda_multi_is_sync + procedure, pass(x) :: set_host => c_cuda_multi_set_host + procedure, pass(x) :: set_dev => c_cuda_multi_set_dev + procedure, pass(x) :: set_sync => c_cuda_multi_set_sync + procedure, pass(x) :: set_scal => c_cuda_multi_set_scal + procedure, pass(x) :: set_vect => c_cuda_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => c_cuda_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => c_cuda_multi_sctb +!!$ procedure, pass(y) :: sctb_x => c_cuda_multi_sctb_x + final :: c_cuda_multi_vect_finalize #endif - end type psb_c_multivect_gpu + end type psb_c_multivect_cuda - public :: psb_c_multivect_gpu + public :: psb_c_multivect_cuda private :: constructor - interface psb_c_multivect_gpu + interface psb_c_multivect_cuda module procedure constructor end interface @@ -1289,7 +1289,7 @@ contains function constructor(x) result(this) complex(psb_spk_) :: x(:,:) - type(psb_c_multivect_gpu) :: this + type(psb_c_multivect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -1299,15 +1299,15 @@ contains #ifdef HAVE_SPGPU -!!$ subroutine c_gpu_multi_gthzv_x(i,n,idx,x,y) +!!$ subroutine c_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i,n !!$ class(psb_i_base_multivect_type) :: idx !!$ complex(psb_spk_) :: y(:) -!!$ class(psb_c_multivect_gpu) :: x +!!$ class(psb_c_multivect_cuda) :: x !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (x%is_host()) call x%sync() !!$ @@ -1332,16 +1332,16 @@ contains !!$ end select !!$ !!$ -!!$ end subroutine c_gpu_multi_gthzv_x +!!$ end subroutine c_cuda_multi_gthzv_x !!$ !!$ !!$ -!!$ subroutine c_gpu_multi_sctb(n,idx,x,beta,y) +!!$ subroutine c_cuda_multi_sctb(n,idx,x,beta,y) !!$ implicit none !!$ !use psb_const_mod !!$ integer(psb_ipk_) :: n, idx(:) !!$ complex(psb_spk_) :: beta, x(:) -!!$ class(psb_c_multivect_gpu) :: y +!!$ class(psb_c_multivect_cuda) :: y !!$ integer(psb_ipk_) :: info !!$ !!$ if (n == 0) return @@ -1351,17 +1351,17 @@ contains !!$ call y%psb_c_base_multivect_type%sctb(n,idx,x,beta) !!$ call y%set_host() !!$ -!!$ end subroutine c_gpu_multi_sctb +!!$ end subroutine c_cuda_multi_sctb !!$ -!!$ subroutine c_gpu_multi_sctb_x(i,n,idx,x,beta,y) +!!$ subroutine c_cuda_multi_sctb_x(i,n,idx,x,beta,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i, n !!$ class(psb_i_base_multivect_type) :: idx !!$ complex(psb_spk_) :: beta, x(:) -!!$ class(psb_c_multivect_gpu) :: y +!!$ class(psb_c_multivect_cuda) :: y !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (y%is_host()) call y%sync() !!$ @@ -1387,13 +1387,13 @@ contains !!$ call y%sct(n,ii%v(i:),x,beta) !!$ end select !!$ -!!$ end subroutine c_gpu_multi_sctb_x +!!$ end subroutine c_cuda_multi_sctb_x - subroutine c_gpu_multi_bld_x(x,this) + subroutine c_cuda_multi_bld_x(x,this) use psb_base_mod complex(psb_spk_), intent(in) :: this(:,:) - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info, m, n m=size(this,1) @@ -1403,101 +1403,101 @@ contains call psb_realloc(m,n,x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'c_gpu_multi_bld_x',& + call psb_errpush(info,'c_cuda_multi_bld_x',& & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) end if x%v(1:m,1:n) = this(1:m,1:n) call x%set_host() call x%sync() - end subroutine c_gpu_multi_bld_x + end subroutine c_cuda_multi_bld_x - subroutine c_gpu_multi_bld_n(x,m,n) + subroutine c_cuda_multi_bld_n(x,m,n) integer(psb_ipk_), intent(in) :: m,n - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(m,n,info) if (info /= 0) then - call psb_errpush(info,'c_gpu_multi_bld_n',i_err=(/m,n,n,n,n/)) + call psb_errpush(info,'c_cuda_multi_bld_n',i_err=(/m,n,n,n,n/)) end if - end subroutine c_gpu_multi_bld_n + end subroutine c_cuda_multi_bld_n - subroutine c_gpu_multi_set_host(x) + subroutine c_cuda_multi_set_host(x) implicit none - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x x%state = is_host - end subroutine c_gpu_multi_set_host + end subroutine c_cuda_multi_set_host - subroutine c_gpu_multi_set_dev(x) + subroutine c_cuda_multi_set_dev(x) implicit none - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x x%state = is_dev - end subroutine c_gpu_multi_set_dev + end subroutine c_cuda_multi_set_dev - subroutine c_gpu_multi_set_sync(x) + subroutine c_cuda_multi_set_sync(x) implicit none - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x x%state = is_sync - end subroutine c_gpu_multi_set_sync + end subroutine c_cuda_multi_set_sync - function c_gpu_multi_is_dev(x) result(res) + function c_cuda_multi_is_dev(x) result(res) implicit none - class(psb_c_multivect_gpu), intent(in) :: x + class(psb_c_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function c_gpu_multi_is_dev + end function c_cuda_multi_is_dev - function c_gpu_multi_is_host(x) result(res) + function c_cuda_multi_is_host(x) result(res) implicit none - class(psb_c_multivect_gpu), intent(in) :: x + class(psb_c_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function c_gpu_multi_is_host + end function c_cuda_multi_is_host - function c_gpu_multi_is_sync(x) result(res) + function c_cuda_multi_is_sync(x) result(res) implicit none - class(psb_c_multivect_gpu), intent(in) :: x + class(psb_c_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function c_gpu_multi_is_sync + end function c_cuda_multi_is_sync - function c_gpu_multi_get_nrows(x) result(res) + function c_cuda_multi_get_nrows(x) result(res) implicit none - class(psb_c_multivect_gpu), intent(in) :: x + class(psb_c_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_nrows - end function c_gpu_multi_get_nrows + end function c_cuda_multi_get_nrows - function c_gpu_multi_get_ncols(x) result(res) + function c_cuda_multi_get_ncols(x) result(res) implicit none - class(psb_c_multivect_gpu), intent(in) :: x + class(psb_c_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_ncols - end function c_gpu_multi_get_ncols + end function c_cuda_multi_get_ncols - function c_gpu_multi_get_fmt() result(res) + function c_cuda_multi_get_fmt() result(res) implicit none character(len=5) :: res res = 'cGPU' - end function c_gpu_multi_get_fmt + end function c_cuda_multi_get_fmt -!!$ function c_gpu_multi_dot_v(n,x,y) result(res) +!!$ function c_cuda_multi_dot_v(n,x,y) result(res) !!$ implicit none -!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ class(psb_c_multivect_cuda), intent(inout) :: x !!$ class(psb_c_base_multivect_type), intent(inout) :: y !!$ integer(psb_ipk_), intent(in) :: n !!$ complex(psb_spk_) :: res @@ -1514,13 +1514,13 @@ contains !!$ type is (psb_c_base_multivect_type) !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,x%v,1,yy%v,1) -!!$ type is (psb_c_multivect_gpu) +!!$ type is (psb_c_multivect_cuda) !!$ if (x%is_host()) call x%sync() !!$ if (yy%is_host()) call yy%sync() !!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) !!$ if (info /= 0) then !!$ info = psb_err_internal_error_ -!!$ call psb_errpush(info,'c_gpu_multi_dot_v') +!!$ call psb_errpush(info,'c_cuda_multi_dot_v') !!$ end if !!$ !!$ class default @@ -1529,11 +1529,11 @@ contains !!$ res = y%dot(n,x%v) !!$ end select !!$ -!!$ end function c_gpu_multi_dot_v +!!$ end function c_cuda_multi_dot_v !!$ -!!$ function c_gpu_multi_dot_a(n,x,y) result(res) +!!$ function c_cuda_multi_dot_a(n,x,y) result(res) !!$ implicit none -!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ class(psb_c_multivect_cuda), intent(inout) :: x !!$ complex(psb_spk_), intent(in) :: y(:) !!$ integer(psb_ipk_), intent(in) :: n !!$ complex(psb_spk_) :: res @@ -1542,14 +1542,14 @@ contains !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,y,1,x%v,1) !!$ -!!$ end function c_gpu_multi_dot_a +!!$ end function c_cuda_multi_dot_a !!$ -!!$ subroutine c_gpu_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ subroutine c_cuda_multi_axpby_v(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ class(psb_c_base_multivect_type), intent(inout) :: x -!!$ class(psb_c_multivect_gpu), intent(inout) :: y +!!$ class(psb_c_multivect_cuda), intent(inout) :: y !!$ complex(psb_spk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: nx, ny @@ -1562,7 +1562,7 @@ contains !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() -!!$ type is (psb_c_multivect_gpu) +!!$ type is (psb_c_multivect_cuda) !!$ ! Do something different here !!$ if ((beta /= dzero).and.y%is_host())& !!$ & call y%sync() @@ -1581,27 +1581,27 @@ contains !!$ call y%axpby(m,alpha,x%v,beta,info) !!$ end select !!$ -!!$ end subroutine c_gpu_multi_axpby_v +!!$ end subroutine c_cuda_multi_axpby_v !!$ -!!$ subroutine c_gpu_multi_axpby_a(m,alpha, x, beta, y, info) +!!$ subroutine c_cuda_multi_axpby_a(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ complex(psb_spk_), intent(in) :: x(:) -!!$ class(psb_c_multivect_gpu), intent(inout) :: y +!!$ class(psb_c_multivect_cuda), intent(inout) :: y !!$ complex(psb_spk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ if (y%is_dev()) call y%sync() !!$ call psb_geaxpby(m,alpha,x,beta,y%v,info) !!$ call y%set_host() -!!$ end subroutine c_gpu_multi_axpby_a +!!$ end subroutine c_cuda_multi_axpby_a !!$ -!!$ subroutine c_gpu_multi_mlt_v(x, y, info) +!!$ subroutine c_cuda_multi_mlt_v(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ class(psb_c_base_multivect_type), intent(inout) :: x -!!$ class(psb_c_multivect_gpu), intent(inout) :: y +!!$ class(psb_c_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ integer(psb_ipk_) :: i, n @@ -1615,7 +1615,7 @@ contains !!$ y%v(i) = y%v(i) * xx%v(i) !!$ end do !!$ call y%set_host() -!!$ type is (psb_c_multivect_gpu) +!!$ type is (psb_c_multivect_cuda) !!$ ! Do something different here !!$ if (y%is_host()) call y%sync() !!$ if (xx%is_host()) call xx%sync() @@ -1627,13 +1627,13 @@ contains !!$ call y%set_host() !!$ end select !!$ -!!$ end subroutine c_gpu_multi_mlt_v +!!$ end subroutine c_cuda_multi_mlt_v !!$ -!!$ subroutine c_gpu_multi_mlt_a(x, y, info) +!!$ subroutine c_cuda_multi_mlt_a(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ complex(psb_spk_), intent(in) :: x(:) -!!$ class(psb_c_multivect_gpu), intent(inout) :: y +!!$ class(psb_c_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1641,15 +1641,15 @@ contains !!$ call y%sync() !!$ call y%psb_c_base_multivect_type%mlt(x,info) !!$ call y%set_host() -!!$ end subroutine c_gpu_multi_mlt_a +!!$ end subroutine c_cuda_multi_mlt_a !!$ -!!$ subroutine c_gpu_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ subroutine c_cuda_multi_mlt_a_2(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ complex(psb_spk_), intent(in) :: alpha,beta !!$ complex(psb_spk_), intent(in) :: x(:) !!$ complex(psb_spk_), intent(in) :: y(:) -!!$ class(psb_c_multivect_gpu), intent(inout) :: z +!!$ class(psb_c_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1657,16 +1657,16 @@ contains !!$ if (z%is_dev()) call z%sync() !!$ call z%psb_c_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() -!!$ end subroutine c_gpu_multi_mlt_a_2 +!!$ end subroutine c_cuda_multi_mlt_a_2 !!$ -!!$ subroutine c_gpu_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ subroutine c_cuda_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) !!$ use psi_serial_mod !!$ use psb_string_mod !!$ implicit none !!$ complex(psb_spk_), intent(in) :: alpha,beta !!$ class(psb_c_base_multivect_type), intent(inout) :: x !!$ class(psb_c_base_multivect_type), intent(inout) :: y -!!$ class(psb_c_multivect_gpu), intent(inout) :: z +!!$ class(psb_c_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ character(len=1), intent(in), optional :: conjgx, conjgy !!$ integer(psb_ipk_) :: i, n @@ -1689,9 +1689,9 @@ contains !!$ ! !!$ info = 0 !!$ select type(xx => x) -!!$ type is (psb_c_multivect_gpu) +!!$ type is (psb_c_multivect_cuda) !!$ select type (yy => y) -!!$ type is (psb_c_multivect_gpu) +!!$ type is (psb_c_multivect_cuda) !!$ if (xx%is_host()) call xx%sync() !!$ if (yy%is_host()) call yy%sync() !!$ ! Z state is irrelevant: it will be done on the GPU. @@ -1711,11 +1711,11 @@ contains !!$ call z%psb_c_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() !!$ end select -!!$ end subroutine c_gpu_multi_mlt_v_2 +!!$ end subroutine c_cuda_multi_mlt_v_2 - subroutine c_gpu_multi_set_scal(x,val) - class(psb_c_multivect_gpu), intent(inout) :: x + subroutine c_cuda_multi_set_scal(x,val) + class(psb_c_multivect_cuda), intent(inout) :: x complex(psb_spk_), intent(in) :: val integer(psb_ipk_) :: info @@ -1723,10 +1723,10 @@ contains if (x%is_dev()) call x%sync() call x%psb_c_base_multivect_type%set_scal(val) call x%set_host() - end subroutine c_gpu_multi_set_scal + end subroutine c_cuda_multi_set_scal - subroutine c_gpu_multi_set_vect(x,val) - class(psb_c_multivect_gpu), intent(inout) :: x + subroutine c_cuda_multi_set_vect(x,val) + class(psb_c_multivect_cuda), intent(inout) :: x complex(psb_spk_), intent(in) :: val(:,:) integer(psb_ipk_) :: nr integer(psb_ipk_) :: info @@ -1735,24 +1735,24 @@ contains call x%psb_c_base_multivect_type%set_vect(val) call x%set_host() - end subroutine c_gpu_multi_set_vect + end subroutine c_cuda_multi_set_vect -!!$ subroutine c_gpu_multi_scal(alpha, x) +!!$ subroutine c_cuda_multi_scal(alpha, x) !!$ implicit none -!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ class(psb_c_multivect_cuda), intent(inout) :: x !!$ complex(psb_spk_), intent (in) :: alpha !!$ !!$ if (x%is_dev()) call x%sync() !!$ call x%psb_c_base_multivect_type%scal(alpha) !!$ call x%set_host() -!!$ end subroutine c_gpu_multi_scal +!!$ end subroutine c_cuda_multi_scal !!$ !!$ -!!$ function c_gpu_multi_nrm2(n,x) result(res) +!!$ function c_cuda_multi_nrm2(n,x) result(res) !!$ implicit none -!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ class(psb_c_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_spk_) :: res !!$ integer(psb_ipk_) :: info @@ -1760,36 +1760,36 @@ contains !!$ if (x%is_host()) call x%sync() !!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) !!$ -!!$ end function c_gpu_multi_nrm2 +!!$ end function c_cuda_multi_nrm2 !!$ -!!$ function c_gpu_multi_amax(n,x) result(res) +!!$ function c_cuda_multi_amax(n,x) result(res) !!$ implicit none -!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ class(psb_c_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_spk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = maxval(abs(x%v(1:n))) !!$ -!!$ end function c_gpu_multi_amax +!!$ end function c_cuda_multi_amax !!$ -!!$ function c_gpu_multi_asum(n,x) result(res) +!!$ function c_cuda_multi_asum(n,x) result(res) !!$ implicit none -!!$ class(psb_c_multivect_gpu), intent(inout) :: x +!!$ class(psb_c_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_spk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = sum(abs(x%v(1:n))) !!$ -!!$ end function c_gpu_multi_asum +!!$ end function c_cuda_multi_asum - subroutine c_gpu_multi_all(m,n, x, info) + subroutine c_cuda_multi_all(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_c_multivect_gpu), intent(out) :: x + class(psb_c_multivect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(m,n,x%v,info,pad=czero) @@ -1799,26 +1799,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'c_gpu_multi_all',& + call psb_errpush(info,'c_cuda_multi_all',& & i_err=(/m,n,n,n,n/)) end if - end subroutine c_gpu_multi_all + end subroutine c_cuda_multi_all - subroutine c_gpu_multi_zero(x) + subroutine c_cuda_multi_zero(x) use psi_serial_mod implicit none - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=dzero call x%set_host() - end subroutine c_gpu_multi_zero + end subroutine c_cuda_multi_zero - subroutine c_gpu_multi_asb(m,n, x, info) + subroutine c_cuda_multi_asb(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nd, nc @@ -1838,12 +1838,12 @@ contains call x%set_host() end if end if - end subroutine c_gpu_multi_asb + end subroutine c_cuda_multi_asb - subroutine c_gpu_multi_sync_space(x,info) + subroutine c_cuda_multi_sync_space(x,info) use psb_realloc_mod implicit none - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: mh,nh,md,nd @@ -1896,11 +1896,11 @@ contains end if - end subroutine c_gpu_multi_sync_space + end subroutine c_cuda_multi_sync_space - subroutine c_gpu_multi_sync(x) + subroutine c_cuda_multi_sync(x) implicit none - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -1916,16 +1916,16 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'c_gpu_multi_sync') + call psb_errpush(info,'c_cuda_multi_sync') end if - end subroutine c_gpu_multi_sync + end subroutine c_cuda_multi_sync - subroutine c_gpu_multi_free(x, info) + subroutine c_cuda_multi_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 @@ -1940,13 +1940,13 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine c_gpu_multi_free + end subroutine c_cuda_multi_free - subroutine c_gpu_multi_vect_finalize(x) + subroutine c_cuda_multi_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_c_multivect_gpu), intent(inout) :: x + type(psb_c_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 @@ -1961,12 +1961,12 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine c_gpu_multi_vect_finalize + end subroutine c_cuda_multi_vect_finalize - subroutine c_gpu_multi_ins(n,irl,val,dupl,x,info) + subroutine c_cuda_multi_ins(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_c_multivect_gpu), intent(inout) :: x + class(psb_c_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) complex(psb_spk_), intent(in) :: val(:,:) @@ -1979,11 +1979,11 @@ contains call x%psb_c_base_multivect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine c_gpu_multi_ins + end subroutine c_cuda_multi_ins #endif -end module psb_c_gpu_multivect_mod +end module psb_c_cuda_multivect_mod diff --git a/cuda/psb_gpu_env_mod.F90 b/cuda/psb_cuda_env_mod.F90 similarity index 74% rename from cuda/psb_gpu_env_mod.F90 rename to cuda/psb_cuda_env_mod.F90 index 0473f4ac..0d1d4ced 100644 --- a/cuda/psb_gpu_env_mod.F90 +++ b/cuda/psb_cuda_env_mod.F90 @@ -30,30 +30,30 @@ ! -module psb_gpu_env_mod +module psb_cuda_env_mod use psb_const_mod use iso_c_binding use base_cusparse_mod -! interface psb_gpu_init -! module procedure psb_gpu_init +! interface psb_cuda_init +! module procedure psb_cuda_init ! end interface #if defined(HAVE_CUDA) use core_mod interface - function psb_gpuGetHandle() & - & result(res) bind(c,name='psb_gpuGetHandle') + function psb_cudaGetHandle() & + & result(res) bind(c,name='psb_cudaGetHandle') use iso_c_binding type(c_ptr) :: res - end function psb_gpuGetHandle + end function psb_cudaGetHandle end interface interface - function psb_gpuGetStream() & - & result(res) bind(c,name='psb_gpuGetStream') + function psb_cudaGetStream() & + & result(res) bind(c,name='psb_cudaGetStream') use iso_c_binding type(c_ptr) :: res - end function psb_gpuGetStream + end function psb_cudaGetStream end interface interface @@ -66,11 +66,11 @@ module psb_gpu_env_mod end interface interface - function psb_cuda_getDeviceCount() & + function psb_cuda_inner_getDeviceCount() & & result(res) bind(c,name='getDeviceCount') use iso_c_binding integer(c_int) :: res - end function psb_cuda_getDeviceCount + end function psb_cuda_inner_getDeviceCount end interface interface @@ -92,39 +92,39 @@ module psb_gpu_env_mod interface - subroutine psb_gpuCreateHandle() & - & bind(c,name='psb_gpuCreateHandle') + subroutine psb_cudaCreateHandle() & + & bind(c,name='psb_cudaCreateHandle') use iso_c_binding - end subroutine psb_gpuCreateHandle + end subroutine psb_cudaCreateHandle end interface interface - subroutine psb_gpuSetStream(handle,stream) & - & bind(c,name='psb_gpuSetStream') + subroutine psb_cudaSetStream(handle,stream) & + & bind(c,name='psb_cudaSetStream') use iso_c_binding type(c_ptr), value :: handle, stream - end subroutine psb_gpuSetStream + end subroutine psb_cudaSetStream end interface interface - subroutine psb_gpuDestroyHandle() & - & bind(c,name='psb_gpuDestroyHandle') + subroutine psb_cudaDestroyHandle() & + & bind(c,name='psb_cudaDestroyHandle') use iso_c_binding - end subroutine psb_gpuDestroyHandle + end subroutine psb_cudaDestroyHandle end interface interface - subroutine psb_cudaReset() & + subroutine psb_cuda_innerReset() & & bind(c,name='cudaReset') use iso_c_binding - end subroutine psb_cudaReset + end subroutine psb_cuda_innerReset end interface interface - subroutine psb_gpuClose() & + subroutine psb_cuda_innerClose() & & bind(c,name='gpuClose') use iso_c_binding - end subroutine psb_gpuClose + end subroutine psb_cuda_innerClose end interface #endif @@ -180,15 +180,15 @@ module psb_gpu_env_mod Contains - function psb_gpu_get_maybe_free_buffer() result(res) + function psb_cuda_get_maybe_free_buffer() result(res) logical :: res res = gpu_do_maybe_free_buffer - end function psb_gpu_get_maybe_free_buffer + end function psb_cuda_get_maybe_free_buffer - subroutine psb_gpu_set_maybe_free_buffer(val) + subroutine psb_cuda_set_maybe_free_buffer(val) logical, intent(in) :: val gpu_do_maybe_free_buffer = val - end subroutine psb_gpu_set_maybe_free_buffer + end subroutine psb_cuda_set_maybe_free_buffer ! !!!!!!!!!!!!!!!!!!!!!! ! @@ -197,7 +197,7 @@ Contains ! !!!!!!!!!!!!!!!!!!!!!! - subroutine psb_gpu_init(ctxt,dev) + subroutine psb_cuda_init(ctxt,dev) use psb_penv_mod use psb_const_mod use psb_error_mod @@ -230,10 +230,10 @@ Contains end if if (info == 0) info = initFcusparse() if (info /= 0) then - call psb_errpush(psb_err_internal_error_,'psb_gpu_init') + call psb_errpush(psb_err_internal_error_,'psb_cuda_init') goto 9999 end if - call psb_gpuCreateHandle() + call psb_cudaCreateHandle() #endif call psb_erractionrestore(err_act) return @@ -241,80 +241,80 @@ Contains return - end subroutine psb_gpu_init + end subroutine psb_cuda_init - subroutine psb_gpu_DeviceSync() + subroutine psb_cuda_DeviceSync() #if defined(HAVE_CUDA) call psb_cudaSync() #endif - end subroutine psb_gpu_DeviceSync + end subroutine psb_cuda_DeviceSync - function psb_gpu_getDeviceCount() result(res) + function psb_cuda_getDeviceCount() result(res) integer :: res #if defined(HAVE_CUDA) - res = psb_cuda_getDeviceCount() + res = psb_cuda_inner_getDeviceCount() #else res = 0 #endif - end function psb_gpu_getDeviceCount + end function psb_cuda_getDeviceCount - subroutine psb_gpu_exit() + subroutine psb_cuda_exit() integer :: res res = closeFcusparse() - call psb_gpuClose() - call psb_cudaReset() - end subroutine psb_gpu_exit + call psb_cuda_innerClose() + call psb_cuda_innerReset() + end subroutine psb_cuda_exit - function psb_gpu_DeviceHasUVA() result(res) + function psb_cuda_DeviceHasUVA() result(res) logical :: res res = (psb_C_DeviceHasUVA() == 1) - end function psb_gpu_DeviceHasUVA + end function psb_cuda_DeviceHasUVA - function psb_gpu_MultiProcessors() result(res) + function psb_cuda_MultiProcessors() result(res) integer(psb_ipk_) :: res res = psb_C_get_MultiProcessors() - end function psb_gpu_MultiProcessors + end function psb_cuda_MultiProcessors - function psb_gpu_MaxRegistersPerBlock() result(res) + function psb_cuda_MaxRegistersPerBlock() result(res) integer(psb_ipk_) :: res res = psb_C_get_MaxRegistersPerBlock() - end function psb_gpu_MaxRegistersPerBlock + end function psb_cuda_MaxRegistersPerBlock - function psb_gpu_MaxThreadsPerMP() result(res) + function psb_cuda_MaxThreadsPerMP() result(res) integer(psb_ipk_) :: res res = psb_C_get_MaxThreadsPerMP() - end function psb_gpu_MaxThreadsPerMP + end function psb_cuda_MaxThreadsPerMP - function psb_gpu_WarpSize() result(res) + function psb_cuda_WarpSize() result(res) integer(psb_ipk_) :: res res = psb_C_get_WarpSize() - end function psb_gpu_WarpSize + end function psb_cuda_WarpSize - function psb_gpu_MemoryClockRate() result(res) + function psb_cuda_MemoryClockRate() result(res) integer(psb_ipk_) :: res res = psb_C_get_MemoryClockRate() - end function psb_gpu_MemoryClockRate + end function psb_cuda_MemoryClockRate - function psb_gpu_MemoryBusWidth() result(res) + function psb_cuda_MemoryBusWidth() result(res) integer(psb_ipk_) :: res res = psb_C_get_MemoryBusWidth() - end function psb_gpu_MemoryBusWidth + end function psb_cuda_MemoryBusWidth - function psb_gpu_MemoryPeakBandwidth() result(res) + function psb_cuda_MemoryPeakBandwidth() result(res) real(psb_dpk_) :: res ! Formula here: 2*ClockRate(KHz)*BusWidth(bit) ! normalization: bit/byte, KHz/MHz ! output: MBytes/s res = 2.d0*0.125d0*1.d-3*psb_C_get_MemoryBusWidth()*psb_C_get_MemoryClockRate() - end function psb_gpu_MemoryPeakBandwidth + end function psb_cuda_MemoryPeakBandwidth - function psb_gpu_DeviceName() result(res) + function psb_cuda_DeviceName() result(res) character(len=256) :: res character :: cstring(256) call psb_C_cpy_NameString(cstring) call stringc2f(cstring,res) - end function psb_gpu_DeviceName + end function psb_cuda_DeviceName subroutine stringc2f(cstring,fstring) @@ -337,4 +337,4 @@ Contains return end subroutine stringc2f -end module psb_gpu_env_mod +end module psb_cuda_env_mod diff --git a/cuda/psb_gpu_mod.F90 b/cuda/psb_cuda_mod.F90 similarity index 65% rename from cuda/psb_gpu_mod.F90 rename to cuda/psb_cuda_mod.F90 index 7eba8062..81ce3e31 100644 --- a/cuda/psb_gpu_mod.F90 +++ b/cuda/psb_cuda_mod.F90 @@ -30,60 +30,60 @@ ! -module psb_gpu_mod +module psb_cuda_mod use psb_const_mod - use psb_gpu_env_mod + use psb_cuda_env_mod - use psb_i_gpu_vect_mod - use psb_s_gpu_vect_mod - use psb_d_gpu_vect_mod - use psb_c_gpu_vect_mod - use psb_z_gpu_vect_mod + use psb_i_cuda_vect_mod + use psb_s_cuda_vect_mod + use psb_d_cuda_vect_mod + use psb_c_cuda_vect_mod + use psb_z_cuda_vect_mod - use psb_i_gpu_multivect_mod - use psb_s_gpu_multivect_mod - use psb_d_gpu_multivect_mod - use psb_c_gpu_multivect_mod - use psb_z_gpu_multivect_mod + use psb_i_cuda_multivect_mod + use psb_s_cuda_multivect_mod + use psb_d_cuda_multivect_mod + use psb_c_cuda_multivect_mod + use psb_z_cuda_multivect_mod use psb_d_ell_mat_mod - use psb_d_elg_mat_mod + use psb_d_cuda_elg_mat_mod use psb_s_ell_mat_mod - use psb_s_elg_mat_mod + use psb_s_cuda_elg_mat_mod use psb_z_ell_mat_mod - use psb_z_elg_mat_mod + use psb_z_cuda_elg_mat_mod use psb_c_ell_mat_mod - use psb_c_elg_mat_mod + use psb_c_cuda_elg_mat_mod use psb_s_hll_mat_mod - use psb_s_hlg_mat_mod + use psb_s_cuda_hlg_mat_mod use psb_d_hll_mat_mod - use psb_d_hlg_mat_mod + use psb_d_cuda_hlg_mat_mod use psb_c_hll_mat_mod - use psb_c_hlg_mat_mod + use psb_c_cuda_hlg_mat_mod use psb_z_hll_mat_mod - use psb_z_hlg_mat_mod + use psb_z_cuda_hlg_mat_mod - use psb_s_csrg_mat_mod - use psb_d_csrg_mat_mod - use psb_c_csrg_mat_mod - use psb_z_csrg_mat_mod + use psb_s_cuda_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod #if CUDA_SHORT_VERSION <= 10 - use psb_s_hybg_mat_mod - use psb_d_hybg_mat_mod - use psb_c_hybg_mat_mod - use psb_z_hybg_mat_mod + use psb_s_cuda_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod #endif - use psb_d_diag_mat_mod - use psb_d_hdiag_mat_mod + use psb_d_cuda_diag_mat_mod + use psb_d_cuda_hdiag_mat_mod - use psb_s_dnsg_mat_mod - use psb_d_dnsg_mat_mod - use psb_c_dnsg_mat_mod - use psb_z_dnsg_mat_mod + use psb_s_cuda_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod - use psb_s_hdiag_mat_mod - ! use psb_s_diag_mat_mod + use psb_s_cuda_hdiag_mat_mod + ! use psb_s_cuda_diag_mat_mod -end module psb_gpu_mod +end module psb_cuda_mod diff --git a/cuda/psb_d_csrg_mat_mod.F90 b/cuda/psb_d_csrg_mat_mod.F90 deleted file mode 100644 index 177c7440..00000000 --- a/cuda/psb_d_csrg_mat_mod.F90 +++ /dev/null @@ -1,393 +0,0 @@ -! Parallel Sparse BLAS GPU plugin -! (C) Copyright 2013 -! -! Salvatore Filippone -! Alessandro Fanfarillo -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! - - -module psb_d_csrg_mat_mod - - use iso_c_binding - use psb_d_mat_mod - use cusparse_mod - - integer(psb_ipk_), parameter, private :: is_host = -1 - integer(psb_ipk_), parameter, private :: is_sync = 0 - integer(psb_ipk_), parameter, private :: is_dev = 1 - - type, extends(psb_d_csr_sparse_mat) :: psb_d_csrg_sparse_mat - ! - ! cuSPARSE 4.0 CSR format. - ! - ! - ! - ! - ! -#ifdef HAVE_SPGPU - type(d_Cmat) :: deviceMat - integer(psb_ipk_) :: devstate = is_host - - contains - procedure, nopass :: get_fmt => d_csrg_get_fmt - procedure, pass(a) :: sizeof => d_csrg_sizeof - procedure, pass(a) :: vect_mv => psb_d_csrg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_d_csrg_inner_vect_sv - procedure, pass(a) :: csmm => psb_d_csrg_csmm - procedure, pass(a) :: csmv => psb_d_csrg_csmv - procedure, pass(a) :: scals => psb_d_csrg_scals - procedure, pass(a) :: scalv => psb_d_csrg_scal - procedure, pass(a) :: reallocate_nz => psb_d_csrg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_d_csrg_allocate_mnnz - ! Note: we do *not* need the TO methods, because the parent type - ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_d_cp_csrg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_d_cp_csrg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_mv_csrg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_d_mv_csrg_from_fmt - procedure, pass(a) :: free => d_csrg_free - procedure, pass(a) :: mold => psb_d_csrg_mold - procedure, pass(a) :: is_host => d_csrg_is_host - procedure, pass(a) :: is_dev => d_csrg_is_dev - procedure, pass(a) :: is_sync => d_csrg_is_sync - procedure, pass(a) :: set_host => d_csrg_set_host - procedure, pass(a) :: set_dev => d_csrg_set_dev - procedure, pass(a) :: set_sync => d_csrg_set_sync - procedure, pass(a) :: sync => d_csrg_sync - procedure, pass(a) :: to_gpu => psb_d_csrg_to_gpu - procedure, pass(a) :: from_gpu => psb_d_csrg_from_gpu - final :: d_csrg_finalize -#else - contains - procedure, pass(a) :: mold => psb_d_csrg_mold -#endif - end type psb_d_csrg_sparse_mat - -#ifdef HAVE_SPGPU - private :: d_csrg_get_nzeros, d_csrg_free, d_csrg_get_fmt, & - & d_csrg_get_size, d_csrg_sizeof, d_csrg_get_nz_row - - - interface - subroutine psb_d_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta - class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_csrg_inner_vect_sv - end interface - - - interface - subroutine psb_d_csrg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta - class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_csrg_vect_mv - end interface - - interface - subroutine psb_d_csrg_reallocate_nz(nz,a) - import :: psb_d_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: nz - class(psb_d_csrg_sparse_mat), intent(inout) :: a - end subroutine psb_d_csrg_reallocate_nz - end interface - - interface - subroutine psb_d_csrg_allocate_mnnz(m,n,a,nz) - import :: psb_d_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: m,n - class(psb_d_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_d_csrg_allocate_mnnz - end interface - - interface - subroutine psb_d_csrg_mold(a,b,info) - import :: psb_d_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_csrg_mold - end interface - - interface - subroutine psb_d_csrg_to_gpu(a,info, nzrm) - import :: psb_d_csrg_sparse_mat, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_d_csrg_to_gpu - end interface - - interface - subroutine psb_d_csrg_from_gpu(a,info) - import :: psb_d_csrg_sparse_mat, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_csrg_from_gpu - end interface - - interface - subroutine psb_d_cp_csrg_from_coo(a,b,info) - import :: psb_d_csrg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(inout) :: a - class(psb_d_coo_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_csrg_from_coo - end interface - - interface - subroutine psb_d_cp_csrg_from_fmt(a,b,info) - import :: psb_d_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(inout) :: a - class(psb_d_base_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_csrg_from_fmt - end interface - - interface - subroutine psb_d_mv_csrg_from_coo(a,b,info) - import :: psb_d_csrg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(inout) :: a - class(psb_d_coo_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_csrg_from_coo - end interface - - interface - subroutine psb_d_mv_csrg_from_fmt(a,b,info) - import :: psb_d_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(inout) :: a - class(psb_d_base_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_csrg_from_fmt - end interface - - interface - subroutine psb_d_csrg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta, x(:) - real(psb_dpk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_csrg_csmv - end interface - interface - subroutine psb_d_csrg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) - real(psb_dpk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_csrg_csmm - end interface - - interface - subroutine psb_d_csrg_scal(d,a,info,side) - import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(inout) :: a - real(psb_dpk_), intent(in) :: d(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in), optional :: side - end subroutine psb_d_csrg_scal - end interface - - interface - subroutine psb_d_csrg_scals(d,a,info) - import :: psb_d_csrg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(inout) :: a - real(psb_dpk_), intent(in) :: d - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_csrg_scals - end interface - - -contains - - ! == =================================== - ! - ! - ! - ! Getters - ! - ! - ! - ! - ! - ! == =================================== - - - function d_csrg_sizeof(a) result(res) - implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a - integer(psb_epk_) :: res - if (a%is_dev()) call a%sync() - res = 8 - res = res + psb_sizeof_dp * size(a%val) - res = res + psb_sizeof_ip * size(a%irp) - res = res + psb_sizeof_ip * size(a%ja) - ! Should we account for the shadow data structure - ! on the GPU device side? - ! res = 2*res - - end function d_csrg_sizeof - - function d_csrg_get_fmt() result(res) - implicit none - character(len=5) :: res - res = 'CSRG' - end function d_csrg_get_fmt - - - - ! == =================================== - ! - ! - ! - ! Data management - ! - ! - ! - ! - ! - ! == =================================== - - - subroutine d_csrg_set_host(a) - implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_host - end subroutine d_csrg_set_host - - subroutine d_csrg_set_dev(a) - implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_dev - end subroutine d_csrg_set_dev - - subroutine d_csrg_set_sync(a) - implicit none - class(psb_d_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_sync - end subroutine d_csrg_set_sync - - function d_csrg_is_dev(a) result(res) - implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_dev) - end function d_csrg_is_dev - - function d_csrg_is_host(a) result(res) - implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_host) - end function d_csrg_is_host - - function d_csrg_is_sync(a) result(res) - implicit none - class(psb_d_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_sync) - end function d_csrg_is_sync - - - subroutine d_csrg_sync(a) - implicit none - class(psb_d_csrg_sparse_mat), target, intent(in) :: a - class(psb_d_csrg_sparse_mat), pointer :: tmpa - integer(psb_ipk_) :: info - - tmpa => a - if (tmpa%is_host()) then - call tmpa%to_gpu(info) - else if (tmpa%is_dev()) then - call tmpa%from_gpu(info) - end if - call tmpa%set_sync() - return - - end subroutine d_csrg_sync - - subroutine d_csrg_free(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - class(psb_d_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - call a%psb_d_csr_sparse_mat%free() - - return - - end subroutine d_csrg_free - - subroutine d_csrg_finalize(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - type(psb_d_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - - return - - end subroutine d_csrg_finalize - -#else - interface - subroutine psb_d_csrg_mold(a,b,info) - import :: psb_d_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_csrg_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_csrg_mold - end interface - -#endif - -end module psb_d_csrg_mat_mod diff --git a/cuda/psb_d_cuda_csrg_mat_mod.F90 b/cuda/psb_d_cuda_csrg_mat_mod.F90 new file mode 100644 index 00000000..465c16a7 --- /dev/null +++ b/cuda/psb_d_cuda_csrg_mat_mod.F90 @@ -0,0 +1,393 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_cuda_csrg_mat_mod + + use iso_c_binding + use psb_d_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_csr_sparse_mat) :: psb_d_cuda_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(d_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => d_cuda_csrg_get_fmt + procedure, pass(a) :: sizeof => d_cuda_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_cuda_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_cuda_csrg_csmm + procedure, pass(a) :: csmv => psb_d_cuda_csrg_csmv + procedure, pass(a) :: scals => psb_d_cuda_csrg_scals + procedure, pass(a) :: scalv => psb_d_cuda_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_d_cuda_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_cuda_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_csrg_from_fmt + procedure, pass(a) :: free => d_cuda_csrg_free + procedure, pass(a) :: mold => psb_d_cuda_csrg_mold + procedure, pass(a) :: is_host => d_cuda_csrg_is_host + procedure, pass(a) :: is_dev => d_cuda_csrg_is_dev + procedure, pass(a) :: is_sync => d_cuda_csrg_is_sync + procedure, pass(a) :: set_host => d_cuda_csrg_set_host + procedure, pass(a) :: set_dev => d_cuda_csrg_set_dev + procedure, pass(a) :: set_sync => d_cuda_csrg_set_sync + procedure, pass(a) :: sync => d_cuda_csrg_sync + procedure, pass(a) :: to_gpu => psb_d_cuda_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_d_cuda_csrg_from_gpu + final :: d_cuda_csrg_finalize +#else + contains + procedure, pass(a) :: mold => psb_d_cuda_csrg_mold +#endif + end type psb_d_cuda_csrg_sparse_mat + +#ifdef HAVE_SPGPU + private :: d_cuda_csrg_get_nzeros, d_cuda_csrg_free, d_cuda_csrg_get_fmt, & + & d_cuda_csrg_get_size, d_cuda_csrg_sizeof, d_cuda_csrg_get_nz_row + + + interface + subroutine psb_d_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_d_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_csrg_vect_mv + end interface + + interface + subroutine psb_d_cuda_csrg_reallocate_nz(nz,a) + import :: psb_d_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_csrg_reallocate_nz + end interface + + interface + subroutine psb_d_cuda_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_d_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_cuda_csrg_allocate_mnnz + end interface + + interface + subroutine psb_d_cuda_csrg_mold(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_csrg_mold + end interface + + interface + subroutine psb_d_cuda_csrg_to_gpu(a,info, nzrm) + import :: psb_d_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_d_cuda_csrg_to_gpu + end interface + + interface + subroutine psb_d_cuda_csrg_from_gpu(a,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_csrg_from_gpu + end interface + + interface + subroutine psb_d_cuda_cp_csrg_from_coo(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_csrg_from_coo + end interface + + interface + subroutine psb_d_cuda_cp_csrg_from_fmt(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_csrg_from_fmt + end interface + + interface + subroutine psb_d_cuda_mv_csrg_from_coo(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_csrg_from_coo + end interface + + interface + subroutine psb_d_cuda_mv_csrg_from_fmt(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_csrg_from_fmt + end interface + + interface + subroutine psb_d_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_csrg_csmv + end interface + interface + subroutine psb_d_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_csrg_csmm + end interface + + interface + subroutine psb_d_cuda_csrg_scal(d,a,info,side) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_cuda_csrg_scal + end interface + + interface + subroutine psb_d_cuda_csrg_scals(d,a,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_cuda_csrg_sizeof(a) result(res) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function d_cuda_csrg_sizeof + + function d_cuda_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function d_cuda_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine d_cuda_csrg_set_host(a) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_cuda_csrg_set_host + + subroutine d_cuda_csrg_set_dev(a) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_cuda_csrg_set_dev + + subroutine d_cuda_csrg_set_sync(a) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_cuda_csrg_set_sync + + function d_cuda_csrg_is_dev(a) result(res) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_cuda_csrg_is_dev + + function d_cuda_csrg_is_host(a) result(res) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_cuda_csrg_is_host + + function d_cuda_csrg_is_sync(a) result(res) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_cuda_csrg_is_sync + + + subroutine d_cuda_csrg_sync(a) + implicit none + class(psb_d_cuda_csrg_sparse_mat), target, intent(in) :: a + class(psb_d_cuda_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine d_cuda_csrg_sync + + subroutine d_cuda_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_d_csr_sparse_mat%free() + + return + + end subroutine d_cuda_csrg_free + + subroutine d_cuda_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine d_cuda_csrg_finalize + +#else + interface + subroutine psb_d_cuda_csrg_mold(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_csrg_mold + end interface + +#endif + +end module psb_d_cuda_csrg_mat_mod diff --git a/cuda/psb_d_diag_mat_mod.F90 b/cuda/psb_d_cuda_diag_mat_mod.F90 similarity index 52% rename from cuda/psb_d_diag_mat_mod.F90 rename to cuda/psb_d_cuda_diag_mat_mod.F90 index 564f7a13..1d55faa0 100644 --- a/cuda/psb_d_diag_mat_mod.F90 +++ b/cuda/psb_d_cuda_diag_mat_mod.F90 @@ -30,13 +30,13 @@ ! -module psb_d_diag_mat_mod +module psb_d_cuda_diag_mat_mod use iso_c_binding use psb_base_mod use psb_d_dia_mat_mod - type, extends(psb_d_dia_sparse_mat) :: psb_d_diag_sparse_mat + type, extends(psb_d_dia_sparse_mat) :: psb_d_cuda_diag_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -48,170 +48,170 @@ module psb_d_diag_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => d_diag_get_fmt - procedure, pass(a) :: sizeof => d_diag_sizeof - procedure, pass(a) :: vect_mv => psb_d_diag_vect_mv -! procedure, pass(a) :: csmm => psb_d_diag_csmm - procedure, pass(a) :: csmv => psb_d_diag_csmv -! procedure, pass(a) :: in_vect_sv => psb_d_diag_inner_vect_sv -! procedure, pass(a) :: scals => psb_d_diag_scals -! procedure, pass(a) :: scalv => psb_d_diag_scal -! procedure, pass(a) :: reallocate_nz => psb_d_diag_reallocate_nz -! procedure, pass(a) :: allocate_mnnz => psb_d_diag_allocate_mnnz + procedure, nopass :: get_fmt => d_cuda_diag_get_fmt + procedure, pass(a) :: sizeof => d_cuda_diag_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_diag_vect_mv +! procedure, pass(a) :: csmm => psb_d_cuda_diag_csmm + procedure, pass(a) :: csmv => psb_d_cuda_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_d_cuda_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_d_cuda_diag_scals +! procedure, pass(a) :: scalv => psb_d_cuda_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_d_cuda_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_d_cuda_diag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_d_cp_diag_from_coo -! procedure, pass(a) :: cp_from_fmt => psb_d_cp_diag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_mv_diag_from_coo -! procedure, pass(a) :: mv_from_fmt => psb_d_mv_diag_from_fmt - procedure, pass(a) :: free => d_diag_free - procedure, pass(a) :: mold => psb_d_diag_mold - procedure, pass(a) :: to_gpu => psb_d_diag_to_gpu - final :: d_diag_finalize + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_diag_from_fmt + procedure, pass(a) :: free => d_cuda_diag_free + procedure, pass(a) :: mold => psb_d_cuda_diag_mold + procedure, pass(a) :: to_gpu => psb_d_cuda_diag_to_gpu + final :: d_cuda_diag_finalize #else contains - procedure, pass(a) :: mold => psb_d_diag_mold + procedure, pass(a) :: mold => psb_d_cuda_diag_mold #endif - end type psb_d_diag_sparse_mat + end type psb_d_cuda_diag_sparse_mat #ifdef HAVE_SPGPU - private :: d_diag_get_nzeros, d_diag_free, d_diag_get_fmt, & - & d_diag_get_size, d_diag_sizeof, d_diag_get_nz_row + private :: d_cuda_diag_get_nzeros, d_cuda_diag_free, d_cuda_diag_get_fmt, & + & d_cuda_diag_get_size, d_cuda_diag_sizeof, d_cuda_diag_get_nz_row interface - subroutine psb_d_diag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_d_diag_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_diag_vect_mv + end subroutine psb_d_cuda_diag_vect_mv end interface interface - subroutine psb_d_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_d_diag_sparse_mat, psb_dpk_, psb_d_base_vect_type - class(psb_d_diag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_d_base_vect_type + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_diag_inner_vect_sv + end subroutine psb_d_cuda_diag_inner_vect_sv end interface interface - subroutine psb_d_diag_reallocate_nz(nz,a) - import :: psb_d_diag_sparse_mat, psb_ipk_ + subroutine psb_d_cuda_diag_reallocate_nz(nz,a) + import :: psb_d_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_d_diag_sparse_mat), intent(inout) :: a - end subroutine psb_d_diag_reallocate_nz + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_diag_reallocate_nz end interface interface - subroutine psb_d_diag_allocate_mnnz(m,n,a,nz) - import :: psb_d_diag_sparse_mat, psb_ipk_ + subroutine psb_d_cuda_diag_allocate_mnnz(m,n,a,nz) + import :: psb_d_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_d_diag_allocate_mnnz + end subroutine psb_d_cuda_diag_allocate_mnnz end interface interface - subroutine psb_d_diag_mold(a,b,info) - import :: psb_d_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_diag_mold(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_diag_mold + end subroutine psb_d_cuda_diag_mold end interface interface - subroutine psb_d_diag_to_gpu(a,info, nzrm) - import :: psb_d_diag_sparse_mat, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_diag_to_gpu(a,info, nzrm) + import :: psb_d_cuda_diag_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_d_diag_to_gpu + end subroutine psb_d_cuda_diag_to_gpu end interface interface - subroutine psb_d_cp_diag_from_coo(a,b,info) - import :: psb_d_diag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_diag_from_coo(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_diag_from_coo + end subroutine psb_d_cuda_cp_diag_from_coo end interface interface - subroutine psb_d_cp_diag_from_fmt(a,b,info) - import :: psb_d_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_diag_from_fmt(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_diag_from_fmt + end subroutine psb_d_cuda_cp_diag_from_fmt end interface interface - subroutine psb_d_mv_diag_from_coo(a,b,info) - import :: psb_d_diag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_diag_from_coo(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_diag_from_coo + end subroutine psb_d_cuda_mv_diag_from_coo end interface interface - subroutine psb_d_mv_diag_from_fmt(a,b,info) - import :: psb_d_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_diag_from_fmt(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_diag_from_fmt + end subroutine psb_d_cuda_mv_diag_from_fmt end interface interface - subroutine psb_d_diag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_d_diag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_diag_csmv + end subroutine psb_d_cuda_diag_csmv end interface interface - subroutine psb_d_diag_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_d_diag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_diag_csmm + end subroutine psb_d_cuda_diag_csmm end interface interface - subroutine psb_d_diag_scal(d,a,info, side) - import :: psb_d_diag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_diag_scal(d,a,info, side) + import :: psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_d_diag_scal + end subroutine psb_d_cuda_diag_scal end interface interface - subroutine psb_d_diag_scals(d,a,info) - import :: psb_d_diag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_diag_scals(d,a,info) + import :: psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_diag_scals + end subroutine psb_d_cuda_diag_scals end interface @@ -230,9 +230,9 @@ contains ! == =================================== - function d_diag_sizeof(a) result(res) + function d_cuda_diag_sizeof(a) result(res) implicit none - class(psb_d_diag_sparse_mat), intent(in) :: a + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function d_diag_sizeof + end function d_cuda_diag_sizeof - function d_diag_get_fmt() result(res) + function d_cuda_diag_get_fmt() result(res) implicit none character(len=5) :: res res = 'DIAG' - end function d_diag_get_fmt + end function d_cuda_diag_get_fmt @@ -265,11 +265,11 @@ contains ! ! == =================================== - subroutine d_diag_free(a) + subroutine d_cuda_diag_free(a) use diagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_d_diag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) @@ -278,31 +278,31 @@ contains return - end subroutine d_diag_free + end subroutine d_cuda_diag_free - subroutine d_diag_finalize(a) + subroutine d_cuda_diag_finalize(a) use diagdev_mod implicit none - type(psb_d_diag_sparse_mat), intent(inout) :: a + type(psb_d_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine d_diag_finalize + end subroutine d_cuda_diag_finalize #else interface - subroutine psb_d_diag_mold(a,b,info) - import :: psb_d_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_diag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_diag_mold(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_diag_mold + end subroutine psb_d_cuda_diag_mold end interface #endif -end module psb_d_diag_mat_mod +end module psb_d_cuda_diag_mat_mod diff --git a/cuda/psb_d_dnsg_mat_mod.F90 b/cuda/psb_d_cuda_dnsg_mat_mod.F90 similarity index 51% rename from cuda/psb_d_dnsg_mat_mod.F90 rename to cuda/psb_d_cuda_dnsg_mat_mod.F90 index 966c2311..bb24eb1a 100644 --- a/cuda/psb_d_dnsg_mat_mod.F90 +++ b/cuda/psb_d_cuda_dnsg_mat_mod.F90 @@ -30,14 +30,14 @@ ! -module psb_d_dnsg_mat_mod +module psb_d_cuda_dnsg_mat_mod use iso_c_binding use psb_d_mat_mod use psb_d_dns_mat_mod use dnsdev_mod - type, extends(psb_d_dns_sparse_mat) :: psb_d_dnsg_sparse_mat + type, extends(psb_d_dns_sparse_mat) :: psb_d_cuda_dnsg_sparse_mat ! ! ITPACK/DNS format, extended. ! We are adding here the routines to create a copy of the data @@ -49,169 +49,169 @@ module psb_d_dnsg_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => d_dnsg_get_fmt - ! procedure, pass(a) :: sizeof => d_dnsg_sizeof - procedure, pass(a) :: vect_mv => psb_d_dnsg_vect_mv -!!$ procedure, pass(a) :: csmm => psb_d_dnsg_csmm -!!$ procedure, pass(a) :: csmv => psb_d_dnsg_csmv -!!$ procedure, pass(a) :: in_vect_sv => psb_d_dnsg_inner_vect_sv -!!$ procedure, pass(a) :: scals => psb_d_dnsg_scals -!!$ procedure, pass(a) :: scalv => psb_d_dnsg_scal -!!$ procedure, pass(a) :: reallocate_nz => psb_d_dnsg_reallocate_nz -!!$ procedure, pass(a) :: allocate_mnnz => psb_d_dnsg_allocate_mnnz + procedure, nopass :: get_fmt => d_cuda_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => d_cuda_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_d_cuda_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_d_cuda_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_d_cuda_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_d_cuda_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_d_cuda_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_d_cuda_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_d_cuda_dnsg_allocate_mnnz ! Note: we *do* need the TO methods, because of the need to invoke SYNC ! - procedure, pass(a) :: cp_from_coo => psb_d_cp_dnsg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_d_cp_dnsg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_mv_dnsg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_d_mv_dnsg_from_fmt - procedure, pass(a) :: free => d_dnsg_free - procedure, pass(a) :: mold => psb_d_dnsg_mold - procedure, pass(a) :: to_gpu => psb_d_dnsg_to_gpu - final :: d_dnsg_finalize + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_dnsg_from_fmt + procedure, pass(a) :: free => d_cuda_dnsg_free + procedure, pass(a) :: mold => psb_d_cuda_dnsg_mold + procedure, pass(a) :: to_gpu => psb_d_cuda_dnsg_to_gpu + final :: d_cuda_dnsg_finalize #else contains - procedure, pass(a) :: mold => psb_d_dnsg_mold + procedure, pass(a) :: mold => psb_d_cuda_dnsg_mold #endif - end type psb_d_dnsg_sparse_mat + end type psb_d_cuda_dnsg_sparse_mat #ifdef HAVE_SPGPU - private :: d_dnsg_get_nzeros, d_dnsg_free, d_dnsg_get_fmt, & - & d_dnsg_get_size, d_dnsg_get_nz_row + private :: d_cuda_dnsg_get_nzeros, d_cuda_dnsg_free, d_cuda_dnsg_get_fmt, & + & d_cuda_dnsg_get_size, d_cuda_dnsg_get_nz_row interface - subroutine psb_d_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_d_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_dnsg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_dnsg_vect_mv + end subroutine psb_d_cuda_dnsg_vect_mv end interface !!$ !!$ interface -!!$ subroutine psb_d_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_d_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type -!!$ class(psb_d_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_d_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a !!$ real(psb_dpk_), intent(in) :: alpha, beta !!$ class(psb_d_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_d_dnsg_inner_vect_sv +!!$ end subroutine psb_d_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_d_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_d_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_d_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_d_dnsg_reallocate_nz +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_d_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_d_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_d_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_d_dnsg_allocate_mnnz +!!$ end subroutine psb_d_cuda_dnsg_allocate_mnnz !!$ end interface interface - subroutine psb_d_dnsg_mold(a,b,info) - import :: psb_d_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_dnsg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_dnsg_mold(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_dnsg_mold + end subroutine psb_d_cuda_dnsg_mold end interface interface - subroutine psb_d_dnsg_to_gpu(a,info) - import :: psb_d_dnsg_sparse_mat, psb_ipk_ - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_dnsg_to_gpu(a,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_dnsg_to_gpu + end subroutine psb_d_cuda_dnsg_to_gpu end interface interface - subroutine psb_d_cp_dnsg_from_coo(a,b,info) - import :: psb_d_dnsg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_dnsg_from_coo(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_dnsg_from_coo + end subroutine psb_d_cuda_cp_dnsg_from_coo end interface interface - subroutine psb_d_cp_dnsg_from_fmt(a,b,info) - import :: psb_d_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_dnsg_from_fmt(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_dnsg_from_fmt + end subroutine psb_d_cuda_cp_dnsg_from_fmt end interface interface - subroutine psb_d_mv_dnsg_from_coo(a,b,info) - import :: psb_d_dnsg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_dnsg_from_coo(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_dnsg_from_coo + end subroutine psb_d_cuda_mv_dnsg_from_coo end interface interface - subroutine psb_d_mv_dnsg_from_fmt(a,b,info) - import :: psb_d_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_dnsg_from_fmt(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_dnsg_from_fmt + end subroutine psb_d_cuda_mv_dnsg_from_fmt end interface !!$ interface -!!$ subroutine psb_d_dnsg_csmv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_d_dnsg_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_d_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_d_cuda_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a !!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:) !!$ real(psb_dpk_), intent(inout) :: y(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_d_dnsg_csmv +!!$ end subroutine psb_d_cuda_dnsg_csmv !!$ end interface !!$ interface -!!$ subroutine psb_d_dnsg_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_d_dnsg_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_d_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_d_cuda_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a !!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) !!$ real(psb_dpk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_d_dnsg_csmm +!!$ end subroutine psb_d_cuda_dnsg_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_d_dnsg_scal(d,a,info, side) -!!$ import :: psb_d_dnsg_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_d_cuda_dnsg_scal(d,a,info, side) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ real(psb_dpk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_d_dnsg_scal +!!$ end subroutine psb_d_cuda_dnsg_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_d_dnsg_scals(d,a,info) -!!$ import :: psb_d_dnsg_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_d_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_d_cuda_dnsg_scals(d,a,info) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ real(psb_dpk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_d_dnsg_scals +!!$ end subroutine psb_d_cuda_dnsg_scals !!$ end interface !!$ @@ -231,11 +231,11 @@ contains - function d_dnsg_get_fmt() result(res) + function d_cuda_dnsg_get_fmt() result(res) implicit none character(len=5) :: res res = 'DNSG' - end function d_dnsg_get_fmt + end function d_cuda_dnsg_get_fmt @@ -251,11 +251,11 @@ contains ! ! == =================================== - subroutine d_dnsg_free(a) + subroutine d_cuda_dnsg_free(a) use dnsdev_mod implicit none integer(psb_ipk_) :: info - class(psb_d_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) @@ -264,31 +264,31 @@ contains return - end subroutine d_dnsg_free + end subroutine d_cuda_dnsg_free - subroutine d_dnsg_finalize(a) + subroutine d_cuda_dnsg_finalize(a) use dnsdev_mod implicit none - type(psb_d_dnsg_sparse_mat), intent(inout) :: a + type(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine d_dnsg_finalize + end subroutine d_cuda_dnsg_finalize #else interface - subroutine psb_d_dnsg_mold(a,b,info) - import :: psb_d_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_dnsg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_dnsg_mold(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_dnsg_mold + end subroutine psb_d_cuda_dnsg_mold end interface #endif -end module psb_d_dnsg_mat_mod +end module psb_d_cuda_dnsg_mat_mod diff --git a/cuda/psb_d_elg_mat_mod.F90 b/cuda/psb_d_cuda_elg_mat_mod.F90 similarity index 50% rename from cuda/psb_d_elg_mat_mod.F90 rename to cuda/psb_d_cuda_elg_mat_mod.F90 index eac7bb36..1ac47664 100644 --- a/cuda/psb_d_elg_mat_mod.F90 +++ b/cuda/psb_d_cuda_elg_mat_mod.F90 @@ -30,18 +30,18 @@ ! -module psb_d_elg_mat_mod +module psb_d_cuda_elg_mat_mod use iso_c_binding use psb_d_mat_mod use psb_d_ell_mat_mod - use psb_i_gpu_vect_mod + use psb_i_cuda_vect_mod integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_d_ell_sparse_mat) :: psb_d_elg_sparse_mat + type, extends(psb_d_ell_sparse_mat) :: psb_d_cuda_elg_sparse_mat ! ! ITPACK/ELL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,221 +54,221 @@ module psb_d_elg_mat_mod integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => d_elg_get_fmt - procedure, pass(a) :: sizeof => d_elg_sizeof - procedure, pass(a) :: vect_mv => psb_d_elg_vect_mv - procedure, pass(a) :: csmm => psb_d_elg_csmm - procedure, pass(a) :: csmv => psb_d_elg_csmv - procedure, pass(a) :: in_vect_sv => psb_d_elg_inner_vect_sv - procedure, pass(a) :: scals => psb_d_elg_scals - procedure, pass(a) :: scalv => psb_d_elg_scal - procedure, pass(a) :: reallocate_nz => psb_d_elg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_d_elg_allocate_mnnz - procedure, pass(a) :: reinit => d_elg_reinit + procedure, nopass :: get_fmt => d_cuda_elg_get_fmt + procedure, pass(a) :: sizeof => d_cuda_elg_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_elg_vect_mv + procedure, pass(a) :: csmm => psb_d_cuda_elg_csmm + procedure, pass(a) :: csmv => psb_d_cuda_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_d_cuda_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_d_cuda_elg_scals + procedure, pass(a) :: scalv => psb_d_cuda_elg_scal + procedure, pass(a) :: reallocate_nz => psb_d_cuda_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_cuda_elg_allocate_mnnz + procedure, pass(a) :: reinit => d_cuda_elg_reinit ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_d_cp_elg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_d_cp_elg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_mv_elg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_d_mv_elg_from_fmt - procedure, pass(a) :: free => d_elg_free - procedure, pass(a) :: mold => psb_d_elg_mold - procedure, pass(a) :: csput_a => psb_d_elg_csput_a - procedure, pass(a) :: csput_v => psb_d_elg_csput_v - procedure, pass(a) :: is_host => d_elg_is_host - procedure, pass(a) :: is_dev => d_elg_is_dev - procedure, pass(a) :: is_sync => d_elg_is_sync - procedure, pass(a) :: set_host => d_elg_set_host - procedure, pass(a) :: set_dev => d_elg_set_dev - procedure, pass(a) :: set_sync => d_elg_set_sync - procedure, pass(a) :: sync => d_elg_sync - procedure, pass(a) :: from_gpu => psb_d_elg_from_gpu - procedure, pass(a) :: to_gpu => psb_d_elg_to_gpu - procedure, pass(a) :: asb => psb_d_elg_asb - final :: d_elg_finalize + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_elg_from_fmt + procedure, pass(a) :: free => d_cuda_elg_free + procedure, pass(a) :: mold => psb_d_cuda_elg_mold + procedure, pass(a) :: csput_a => psb_d_cuda_elg_csput_a + procedure, pass(a) :: csput_v => psb_d_cuda_elg_csput_v + procedure, pass(a) :: is_host => d_cuda_elg_is_host + procedure, pass(a) :: is_dev => d_cuda_elg_is_dev + procedure, pass(a) :: is_sync => d_cuda_elg_is_sync + procedure, pass(a) :: set_host => d_cuda_elg_set_host + procedure, pass(a) :: set_dev => d_cuda_elg_set_dev + procedure, pass(a) :: set_sync => d_cuda_elg_set_sync + procedure, pass(a) :: sync => d_cuda_elg_sync + procedure, pass(a) :: from_gpu => psb_d_cuda_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_d_cuda_elg_to_gpu + procedure, pass(a) :: asb => psb_d_cuda_elg_asb + final :: d_cuda_elg_finalize #else contains - procedure, pass(a) :: mold => psb_d_elg_mold - procedure, pass(a) :: asb => psb_d_elg_asb + procedure, pass(a) :: mold => psb_d_cuda_elg_mold + procedure, pass(a) :: asb => psb_d_cuda_elg_asb #endif - end type psb_d_elg_sparse_mat + end type psb_d_cuda_elg_sparse_mat #ifdef HAVE_SPGPU - private :: d_elg_get_nzeros, d_elg_free, d_elg_get_fmt, & - & d_elg_get_size, d_elg_sizeof, d_elg_get_nz_row, d_elg_sync + private :: d_cuda_elg_get_nzeros, d_cuda_elg_free, d_cuda_elg_get_fmt, & + & d_cuda_elg_get_size, d_cuda_elg_sizeof, d_cuda_elg_get_nz_row, d_cuda_elg_sync interface - subroutine psb_d_elg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_d_elg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_elg_vect_mv + end subroutine psb_d_cuda_elg_vect_mv end interface interface - subroutine psb_d_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_d_elg_sparse_mat, psb_dpk_, psb_d_base_vect_type - class(psb_d_elg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_d_base_vect_type + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_elg_inner_vect_sv + end subroutine psb_d_cuda_elg_inner_vect_sv end interface interface - subroutine psb_d_elg_reallocate_nz(nz,a) - import :: psb_d_elg_sparse_mat, psb_ipk_ + subroutine psb_d_cuda_elg_reallocate_nz(nz,a) + import :: psb_d_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_d_elg_sparse_mat), intent(inout) :: a - end subroutine psb_d_elg_reallocate_nz + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_elg_reallocate_nz end interface interface - subroutine psb_d_elg_allocate_mnnz(m,n,a,nz) - import :: psb_d_elg_sparse_mat, psb_ipk_ + subroutine psb_d_cuda_elg_allocate_mnnz(m,n,a,nz) + import :: psb_d_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_d_elg_allocate_mnnz + end subroutine psb_d_cuda_elg_allocate_mnnz end interface interface - subroutine psb_d_elg_mold(a,b,info) - import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_elg_mold(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_elg_mold + end subroutine psb_d_cuda_elg_mold end interface interface - subroutine psb_d_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& & imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_elg_csput_a + end subroutine psb_d_cuda_elg_csput_a end interface interface - subroutine psb_d_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_d_base_vect_type,& + subroutine psb_d_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_d_base_vect_type,& & psb_i_base_vect_type - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_elg_csput_v + end subroutine psb_d_cuda_elg_csput_v end interface interface - subroutine psb_d_elg_from_gpu(a,info) - import :: psb_d_elg_sparse_mat, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_elg_from_gpu(a,info) + import :: psb_d_cuda_elg_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_elg_from_gpu + end subroutine psb_d_cuda_elg_from_gpu end interface interface - subroutine psb_d_elg_to_gpu(a,info, nzrm) - import :: psb_d_elg_sparse_mat, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_elg_to_gpu(a,info, nzrm) + import :: psb_d_cuda_elg_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_d_elg_to_gpu + end subroutine psb_d_cuda_elg_to_gpu end interface interface - subroutine psb_d_cp_elg_from_coo(a,b,info) - import :: psb_d_elg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_elg_from_coo(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_elg_from_coo + end subroutine psb_d_cuda_cp_elg_from_coo end interface interface - subroutine psb_d_cp_elg_from_fmt(a,b,info) - import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_elg_from_fmt(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_elg_from_fmt + end subroutine psb_d_cuda_cp_elg_from_fmt end interface interface - subroutine psb_d_mv_elg_from_coo(a,b,info) - import :: psb_d_elg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_elg_from_coo(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_elg_from_coo + end subroutine psb_d_cuda_mv_elg_from_coo end interface interface - subroutine psb_d_mv_elg_from_fmt(a,b,info) - import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_elg_from_fmt(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_elg_from_fmt + end subroutine psb_d_cuda_mv_elg_from_fmt end interface interface - subroutine psb_d_elg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_elg_csmv + end subroutine psb_d_cuda_elg_csmv end interface interface - subroutine psb_d_elg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_elg_csmm + end subroutine psb_d_cuda_elg_csmm end interface interface - subroutine psb_d_elg_scal(d,a,info, side) - import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_elg_scal(d,a,info, side) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_d_elg_scal + end subroutine psb_d_cuda_elg_scal end interface interface - subroutine psb_d_elg_scals(d,a,info) - import :: psb_d_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_elg_scals(d,a,info) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_elg_scals + end subroutine psb_d_cuda_elg_scals end interface interface - subroutine psb_d_elg_asb(a) - import :: psb_d_elg_sparse_mat - class(psb_d_elg_sparse_mat), intent(inout) :: a - end subroutine psb_d_elg_asb + subroutine psb_d_cuda_elg_asb(a) + import :: psb_d_cuda_elg_sparse_mat + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_elg_asb end interface @@ -287,9 +287,9 @@ contains ! == =================================== - function d_elg_sizeof(a) result(res) + function d_cuda_elg_sizeof(a) result(res) implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res if (a%is_dev()) call a%sync() @@ -302,13 +302,13 @@ contains ! on the GPU device side? ! res = 2*res - end function d_elg_sizeof + end function d_cuda_elg_sizeof - function d_elg_get_fmt() result(res) + function d_cuda_elg_get_fmt() result(res) implicit none character(len=5) :: res res = 'ELG' - end function d_elg_get_fmt + end function d_cuda_elg_get_fmt @@ -323,12 +323,12 @@ contains ! ! ! == =================================== - subroutine d_elg_reinit(a,clear) + subroutine d_cuda_elg_reinit(a,clear) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a logical, intent(in), optional :: clear integer(psb_ipk_) :: isz, err_act character(len=20) :: name='reinit' @@ -367,14 +367,14 @@ contains 9999 call psb_error_handler(err_act) return - end subroutine d_elg_reinit + end subroutine d_cuda_elg_reinit - subroutine d_elg_free(a) + subroutine d_cuda_elg_free(a) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) @@ -384,12 +384,12 @@ contains return - end subroutine d_elg_free + end subroutine d_cuda_elg_free - subroutine d_elg_sync(a) + subroutine d_cuda_elg_sync(a) implicit none - class(psb_d_elg_sparse_mat), target, intent(in) :: a - class(psb_d_elg_sparse_mat), pointer :: tmpa + class(psb_d_cuda_elg_sparse_mat), target, intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -401,83 +401,83 @@ contains call tmpa%set_sync() return - end subroutine d_elg_sync + end subroutine d_cuda_elg_sync - subroutine d_elg_set_host(a) + subroutine d_cuda_elg_set_host(a) implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine d_elg_set_host + end subroutine d_cuda_elg_set_host - subroutine d_elg_set_dev(a) + subroutine d_cuda_elg_set_dev(a) implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine d_elg_set_dev + end subroutine d_cuda_elg_set_dev - subroutine d_elg_set_sync(a) + subroutine d_cuda_elg_set_sync(a) implicit none - class(psb_d_elg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine d_elg_set_sync + end subroutine d_cuda_elg_set_sync - function d_elg_is_dev(a) result(res) + function d_cuda_elg_is_dev(a) result(res) implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function d_elg_is_dev + end function d_cuda_elg_is_dev - function d_elg_is_host(a) result(res) + function d_cuda_elg_is_host(a) result(res) implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function d_elg_is_host + end function d_cuda_elg_is_host - function d_elg_is_sync(a) result(res) + function d_cuda_elg_is_sync(a) result(res) implicit none - class(psb_d_elg_sparse_mat), intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function d_elg_is_sync + end function d_cuda_elg_is_sync - subroutine d_elg_finalize(a) + subroutine d_cuda_elg_finalize(a) use elldev_mod implicit none - type(psb_d_elg_sparse_mat), intent(inout) :: a + type(psb_d_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine d_elg_finalize + end subroutine d_cuda_elg_finalize #else interface - subroutine psb_d_elg_asb(a) - import :: psb_d_elg_sparse_mat - class(psb_d_elg_sparse_mat), intent(inout) :: a - end subroutine psb_d_elg_asb + subroutine psb_d_cuda_elg_asb(a) + import :: psb_d_cuda_elg_sparse_mat + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_elg_asb end interface interface - subroutine psb_d_elg_mold(a,b,info) - import :: psb_d_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_elg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_elg_mold(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_elg_mold + end subroutine psb_d_cuda_elg_mold end interface #endif -end module psb_d_elg_mat_mod +end module psb_d_cuda_elg_mat_mod diff --git a/cuda/psb_d_hdiag_mat_mod.F90 b/cuda/psb_d_cuda_hdiag_mat_mod.F90 similarity index 50% rename from cuda/psb_d_hdiag_mat_mod.F90 rename to cuda/psb_d_cuda_hdiag_mat_mod.F90 index 1bc70c8c..17bacffe 100644 --- a/cuda/psb_d_hdiag_mat_mod.F90 +++ b/cuda/psb_d_cuda_hdiag_mat_mod.F90 @@ -30,182 +30,182 @@ ! -module psb_d_hdiag_mat_mod +module psb_d_cuda_hdiag_mat_mod use iso_c_binding use psb_base_mod use psb_d_hdia_mat_mod - type, extends(psb_d_hdia_sparse_mat) :: psb_d_hdiag_sparse_mat + type, extends(psb_d_hdia_sparse_mat) :: psb_d_cuda_hdiag_sparse_mat ! #ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => d_hdiag_get_fmt - ! procedure, pass(a) :: sizeof => d_hdiag_sizeof - procedure, pass(a) :: vect_mv => psb_d_hdiag_vect_mv - ! procedure, pass(a) :: csmm => psb_d_hdiag_csmm - procedure, pass(a) :: csmv => psb_d_hdiag_csmv - ! procedure, pass(a) :: in_vect_sv => psb_d_hdiag_inner_vect_sv - ! procedure, pass(a) :: scals => psb_d_hdiag_scals - ! procedure, pass(a) :: scalv => psb_d_hdiag_scal - ! procedure, pass(a) :: reallocate_nz => psb_d_hdiag_reallocate_nz - ! procedure, pass(a) :: allocate_mnnz => psb_d_hdiag_allocate_mnnz + procedure, nopass :: get_fmt => d_cuda_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => d_cuda_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_d_cuda_hdiag_csmm + procedure, pass(a) :: csmv => psb_d_cuda_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_d_cuda_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_d_cuda_hdiag_scals + ! procedure, pass(a) :: scalv => psb_d_cuda_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_d_cuda_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_d_cuda_hdiag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_d_cp_hdiag_from_coo - ! procedure, pass(a) :: cp_from_fmt => psb_d_cp_hdiag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_mv_hdiag_from_coo - ! procedure, pass(a) :: mv_from_fmt => psb_d_mv_hdiag_from_fmt - procedure, pass(a) :: free => d_hdiag_free - procedure, pass(a) :: mold => psb_d_hdiag_mold - procedure, pass(a) :: to_gpu => psb_d_hdiag_to_gpu - final :: d_hdiag_finalize + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_hdiag_from_fmt + procedure, pass(a) :: free => d_cuda_hdiag_free + procedure, pass(a) :: mold => psb_d_cuda_hdiag_mold + procedure, pass(a) :: to_gpu => psb_d_cuda_hdiag_to_gpu + final :: d_cuda_hdiag_finalize #else contains - procedure, pass(a) :: mold => psb_d_hdiag_mold + procedure, pass(a) :: mold => psb_d_cuda_hdiag_mold #endif - end type psb_d_hdiag_sparse_mat + end type psb_d_cuda_hdiag_sparse_mat #ifdef HAVE_SPGPU - private :: d_hdiag_get_nzeros, d_hdiag_free, d_hdiag_get_fmt, & - & d_hdiag_get_size, d_hdiag_sizeof, d_hdiag_get_nz_row + private :: d_cuda_hdiag_get_nzeros, d_cuda_hdiag_free, d_cuda_hdiag_get_fmt, & + & d_cuda_hdiag_get_size, d_cuda_hdiag_sizeof, d_cuda_hdiag_get_nz_row interface - subroutine psb_d_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_d_hdiag_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_hdiag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hdiag_vect_mv + end subroutine psb_d_cuda_hdiag_vect_mv end interface !!$ interface -!!$ subroutine psb_d_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_d_hdiag_sparse_mat, psb_dpk_, psb_d_base_vect_type -!!$ class(psb_d_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_d_cuda_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_d_base_vect_type +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a !!$ real(psb_dpk_), intent(in) :: alpha, beta !!$ class(psb_d_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_d_hdiag_inner_vect_sv +!!$ end subroutine psb_d_cuda_hdiag_inner_vect_sv !!$ end interface !!$ !!$ interface -!!$ subroutine psb_d_hdiag_reallocate_nz(nz,a) -!!$ import :: psb_d_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_d_cuda_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_d_hdiag_reallocate_nz +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_cuda_hdiag_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_d_hdiag_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_d_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_d_cuda_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_d_hdiag_allocate_mnnz +!!$ end subroutine psb_d_cuda_hdiag_allocate_mnnz !!$ end interface interface - subroutine psb_d_hdiag_mold(a,b,info) - import :: psb_d_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hdiag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hdiag_mold(a,b,info) + import :: psb_d_cuda_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hdiag_mold + end subroutine psb_d_cuda_hdiag_mold end interface interface - subroutine psb_d_hdiag_to_gpu(a,info) - import :: psb_d_hdiag_sparse_mat, psb_ipk_ - class(psb_d_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_hdiag_to_gpu(a,info) + import :: psb_d_cuda_hdiag_sparse_mat, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hdiag_to_gpu + end subroutine psb_d_cuda_hdiag_to_gpu end interface interface - subroutine psb_d_cp_hdiag_from_coo(a,b,info) - import :: psb_d_hdiag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_hdiag_from_coo(a,b,info) + import :: psb_d_cuda_hdiag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_hdiag_from_coo + end subroutine psb_d_cuda_cp_hdiag_from_coo end interface !!$ interface -!!$ subroutine psb_d_cp_hdiag_from_fmt(a,b,info) -!!$ import :: psb_d_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ -!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_d_cuda_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_d_base_sparse_mat), intent(in) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_d_cp_hdiag_from_fmt +!!$ end subroutine psb_d_cuda_cp_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_d_mv_hdiag_from_coo(a,b,info) - import :: psb_d_hdiag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_hdiag_from_coo(a,b,info) + import :: psb_d_cuda_hdiag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_hdiag_from_coo + end subroutine psb_d_cuda_mv_hdiag_from_coo end interface !!$ !!$ interface -!!$ subroutine psb_d_mv_hdiag_from_fmt(a,b,info) -!!$ import :: psb_d_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ -!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_d_cuda_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_d_base_sparse_mat), intent(inout) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_d_mv_hdiag_from_fmt +!!$ end subroutine psb_d_cuda_mv_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_d_hdiag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_d_hdiag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hdiag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hdiag_csmv + end subroutine psb_d_cuda_hdiag_csmv end interface !!$ interface -!!$ subroutine psb_d_hdiag_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_d_hdiag_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_d_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_d_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a !!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) !!$ real(psb_dpk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_d_hdiag_csmm +!!$ end subroutine psb_d_cuda_hdiag_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_d_hdiag_scal(d,a,info, side) -!!$ import :: psb_d_hdiag_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_d_cuda_hdiag_scal(d,a,info, side) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ real(psb_dpk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_d_hdiag_scal +!!$ end subroutine psb_d_cuda_hdiag_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_d_hdiag_scals(d,a,info) -!!$ import :: psb_d_hdiag_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_d_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_d_cuda_hdiag_scals(d,a,info) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ real(psb_dpk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_d_hdiag_scals +!!$ end subroutine psb_d_cuda_hdiag_scals !!$ end interface !!$ @@ -223,11 +223,11 @@ contains ! ! == =================================== - function d_hdiag_get_fmt() result(res) + function d_cuda_hdiag_get_fmt() result(res) implicit none character(len=5) :: res res = 'HDIAG' - end function d_hdiag_get_fmt + end function d_cuda_hdiag_get_fmt @@ -243,11 +243,11 @@ contains ! ! == =================================== - subroutine d_hdiag_free(a) + subroutine d_cuda_hdiag_free(a) use hdiagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_d_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -256,12 +256,12 @@ contains return - end subroutine d_hdiag_free + end subroutine d_cuda_hdiag_free - subroutine d_hdiag_finalize(a) + subroutine d_cuda_hdiag_finalize(a) use hdiagdev_mod implicit none - type(psb_d_hdiag_sparse_mat), intent(inout) :: a + type(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -269,19 +269,19 @@ contains call a%psb_d_hdia_sparse_mat%free() return - end subroutine d_hdiag_finalize + end subroutine d_cuda_hdiag_finalize #else interface - subroutine psb_d_hdiag_mold(a,b,info) - import :: psb_d_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hdiag_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hdiag_mold(a,b,info) + import :: psb_d_cuda_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hdiag_mold + end subroutine psb_d_cuda_hdiag_mold end interface #endif -end module psb_d_hdiag_mat_mod +end module psb_d_cuda_hdiag_mat_mod diff --git a/cuda/psb_d_hlg_mat_mod.F90 b/cuda/psb_d_cuda_hlg_mat_mod.F90 similarity index 50% rename from cuda/psb_d_hlg_mat_mod.F90 rename to cuda/psb_d_cuda_hlg_mat_mod.F90 index 756d13aa..19ecb62b 100644 --- a/cuda/psb_d_hlg_mat_mod.F90 +++ b/cuda/psb_d_cuda_hlg_mat_mod.F90 @@ -30,7 +30,7 @@ ! -module psb_d_hlg_mat_mod +module psb_d_cuda_hlg_mat_mod use iso_c_binding use psb_d_mat_mod @@ -41,7 +41,7 @@ module psb_d_hlg_mat_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_d_hll_sparse_mat) :: psb_d_hlg_sparse_mat + type, extends(psb_d_hll_sparse_mat) :: psb_d_cuda_hlg_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,186 +54,186 @@ module psb_d_hlg_mat_mod integer :: devstate = is_host contains - procedure, nopass :: get_fmt => d_hlg_get_fmt - procedure, pass(a) :: sizeof => d_hlg_sizeof - procedure, pass(a) :: vect_mv => psb_d_hlg_vect_mv - procedure, pass(a) :: csmm => psb_d_hlg_csmm - procedure, pass(a) :: csmv => psb_d_hlg_csmv - procedure, pass(a) :: in_vect_sv => psb_d_hlg_inner_vect_sv - procedure, pass(a) :: scals => psb_d_hlg_scals - procedure, pass(a) :: scalv => psb_d_hlg_scal - procedure, pass(a) :: reallocate_nz => psb_d_hlg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_d_hlg_allocate_mnnz + procedure, nopass :: get_fmt => d_cuda_hlg_get_fmt + procedure, pass(a) :: sizeof => d_cuda_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_hlg_vect_mv + procedure, pass(a) :: csmm => psb_d_cuda_hlg_csmm + procedure, pass(a) :: csmv => psb_d_cuda_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_d_cuda_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_d_cuda_hlg_scals + procedure, pass(a) :: scalv => psb_d_cuda_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_d_cuda_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_cuda_hlg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_d_cp_hlg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_d_cp_hlg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_mv_hlg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_d_mv_hlg_from_fmt - procedure, pass(a) :: free => d_hlg_free - procedure, pass(a) :: mold => psb_d_hlg_mold - procedure, pass(a) :: is_host => d_hlg_is_host - procedure, pass(a) :: is_dev => d_hlg_is_dev - procedure, pass(a) :: is_sync => d_hlg_is_sync - procedure, pass(a) :: set_host => d_hlg_set_host - procedure, pass(a) :: set_dev => d_hlg_set_dev - procedure, pass(a) :: set_sync => d_hlg_set_sync - procedure, pass(a) :: sync => d_hlg_sync - procedure, pass(a) :: from_gpu => psb_d_hlg_from_gpu - procedure, pass(a) :: to_gpu => psb_d_hlg_to_gpu - final :: d_hlg_finalize + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_hlg_from_fmt + procedure, pass(a) :: free => d_cuda_hlg_free + procedure, pass(a) :: mold => psb_d_cuda_hlg_mold + procedure, pass(a) :: is_host => d_cuda_hlg_is_host + procedure, pass(a) :: is_dev => d_cuda_hlg_is_dev + procedure, pass(a) :: is_sync => d_cuda_hlg_is_sync + procedure, pass(a) :: set_host => d_cuda_hlg_set_host + procedure, pass(a) :: set_dev => d_cuda_hlg_set_dev + procedure, pass(a) :: set_sync => d_cuda_hlg_set_sync + procedure, pass(a) :: sync => d_cuda_hlg_sync + procedure, pass(a) :: from_gpu => psb_d_cuda_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_d_cuda_hlg_to_gpu + final :: d_cuda_hlg_finalize #else contains - procedure, pass(a) :: mold => psb_d_hlg_mold + procedure, pass(a) :: mold => psb_d_cuda_hlg_mold #endif - end type psb_d_hlg_sparse_mat + end type psb_d_cuda_hlg_sparse_mat #ifdef HAVE_SPGPU - private :: d_hlg_get_nzeros, d_hlg_free, d_hlg_get_fmt, & - & d_hlg_get_size, d_hlg_sizeof, d_hlg_get_nz_row + private :: d_cuda_hlg_get_nzeros, d_cuda_hlg_free, d_cuda_hlg_get_fmt, & + & d_cuda_hlg_get_size, d_cuda_hlg_sizeof, d_cuda_hlg_get_nz_row interface - subroutine psb_d_hlg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_d_hlg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hlg_vect_mv + end subroutine psb_d_cuda_hlg_vect_mv end interface interface - subroutine psb_d_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_d_hlg_sparse_mat, psb_dpk_, psb_d_base_vect_type - class(psb_d_hlg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_d_base_vect_type + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hlg_inner_vect_sv + end subroutine psb_d_cuda_hlg_inner_vect_sv end interface interface - subroutine psb_d_hlg_reallocate_nz(nz,a) - import :: psb_d_hlg_sparse_mat, psb_ipk_ + subroutine psb_d_cuda_hlg_reallocate_nz(nz,a) + import :: psb_d_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_d_hlg_sparse_mat), intent(inout) :: a - end subroutine psb_d_hlg_reallocate_nz + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_hlg_reallocate_nz end interface interface - subroutine psb_d_hlg_allocate_mnnz(m,n,a,nz) - import :: psb_d_hlg_sparse_mat, psb_ipk_ + subroutine psb_d_cuda_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_d_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_d_hlg_allocate_mnnz + end subroutine psb_d_cuda_hlg_allocate_mnnz end interface interface - subroutine psb_d_hlg_mold(a,b,info) - import :: psb_d_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hlg_mold(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hlg_mold + end subroutine psb_d_cuda_hlg_mold end interface interface - subroutine psb_d_hlg_from_gpu(a,info) - import :: psb_d_hlg_sparse_mat, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_hlg_from_gpu(a,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hlg_from_gpu + end subroutine psb_d_cuda_hlg_from_gpu end interface interface - subroutine psb_d_hlg_to_gpu(a,info, nzrm) - import :: psb_d_hlg_sparse_mat, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_hlg_to_gpu(a,info, nzrm) + import :: psb_d_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_d_hlg_to_gpu + end subroutine psb_d_cuda_hlg_to_gpu end interface interface - subroutine psb_d_cp_hlg_from_coo(a,b,info) - import :: psb_d_hlg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_hlg_from_coo(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_hlg_from_coo + end subroutine psb_d_cuda_cp_hlg_from_coo end interface interface - subroutine psb_d_cp_hlg_from_fmt(a,b,info) - import :: psb_d_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_hlg_from_fmt(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_hlg_from_fmt + end subroutine psb_d_cuda_cp_hlg_from_fmt end interface interface - subroutine psb_d_mv_hlg_from_coo(a,b,info) - import :: psb_d_hlg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_hlg_from_coo(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_hlg_from_coo + end subroutine psb_d_cuda_mv_hlg_from_coo end interface interface - subroutine psb_d_mv_hlg_from_fmt(a,b,info) - import :: psb_d_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_hlg_from_fmt(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_hlg_from_fmt + end subroutine psb_d_cuda_mv_hlg_from_fmt end interface interface - subroutine psb_d_hlg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_d_hlg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hlg_csmv + end subroutine psb_d_cuda_hlg_csmv end interface interface - subroutine psb_d_hlg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_d_hlg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hlg_csmm + end subroutine psb_d_cuda_hlg_csmm end interface interface - subroutine psb_d_hlg_scal(d,a,info, side) - import :: psb_d_hlg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_hlg_scal(d,a,info, side) + import :: psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_d_hlg_scal + end subroutine psb_d_cuda_hlg_scal end interface interface - subroutine psb_d_hlg_scals(d,a,info) - import :: psb_d_hlg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_hlg_scals(d,a,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hlg_scals + end subroutine psb_d_cuda_hlg_scals end interface @@ -252,9 +252,9 @@ contains ! == =================================== - function d_hlg_sizeof(a) result(res) + function d_cuda_hlg_sizeof(a) result(res) implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res @@ -269,13 +269,13 @@ contains ! on the GPU device side? ! res = 2*res - end function d_hlg_sizeof + end function d_cuda_hlg_sizeof - function d_hlg_get_fmt() result(res) + function d_cuda_hlg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HLG' - end function d_hlg_get_fmt + end function d_cuda_hlg_get_fmt @@ -291,11 +291,11 @@ contains ! ! == =================================== - subroutine d_hlg_free(a) + subroutine d_cuda_hlg_free(a) use hlldev_mod implicit none integer(psb_ipk_) :: info - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) @@ -304,13 +304,13 @@ contains return - end subroutine d_hlg_free + end subroutine d_cuda_hlg_free - subroutine d_hlg_sync(a) + subroutine d_cuda_hlg_sync(a) implicit none - class(psb_d_hlg_sparse_mat), target, intent(in) :: a - class(psb_d_hlg_sparse_mat), pointer :: tmpa + class(psb_d_cuda_hlg_sparse_mat), target, intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -322,77 +322,77 @@ contains call tmpa%set_sync() return - end subroutine d_hlg_sync + end subroutine d_cuda_hlg_sync - subroutine d_hlg_set_host(a) + subroutine d_cuda_hlg_set_host(a) implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine d_hlg_set_host + end subroutine d_cuda_hlg_set_host - subroutine d_hlg_set_dev(a) + subroutine d_cuda_hlg_set_dev(a) implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine d_hlg_set_dev + end subroutine d_cuda_hlg_set_dev - subroutine d_hlg_set_sync(a) + subroutine d_cuda_hlg_set_sync(a) implicit none - class(psb_d_hlg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine d_hlg_set_sync + end subroutine d_cuda_hlg_set_sync - function d_hlg_is_dev(a) result(res) + function d_cuda_hlg_is_dev(a) result(res) implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function d_hlg_is_dev + end function d_cuda_hlg_is_dev - function d_hlg_is_host(a) result(res) + function d_cuda_hlg_is_host(a) result(res) implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function d_hlg_is_host + end function d_cuda_hlg_is_host - function d_hlg_is_sync(a) result(res) + function d_cuda_hlg_is_sync(a) result(res) implicit none - class(psb_d_hlg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function d_hlg_is_sync + end function d_cuda_hlg_is_sync - subroutine d_hlg_finalize(a) + subroutine d_cuda_hlg_finalize(a) use hlldev_mod implicit none - type(psb_d_hlg_sparse_mat), intent(inout) :: a + type(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine d_hlg_finalize + end subroutine d_cuda_hlg_finalize #else interface - subroutine psb_d_hlg_mold(a,b,info) - import :: psb_d_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hlg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hlg_mold(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hlg_mold + end subroutine psb_d_cuda_hlg_mold end interface #endif -end module psb_d_hlg_mat_mod +end module psb_d_cuda_hlg_mat_mod diff --git a/cuda/psb_d_hybg_mat_mod.F90 b/cuda/psb_d_cuda_hybg_mat_mod.F90 similarity index 52% rename from cuda/psb_d_hybg_mat_mod.F90 rename to cuda/psb_d_cuda_hybg_mat_mod.F90 index d764daa7..be4c8392 100644 --- a/cuda/psb_d_hybg_mat_mod.F90 +++ b/cuda/psb_d_cuda_hybg_mat_mod.F90 @@ -31,13 +31,13 @@ #if CUDA_SHORT_VERSION <= 10 -module psb_d_hybg_mat_mod +module psb_d_cuda_hybg_mat_mod use iso_c_binding use psb_d_mat_mod use cusparse_mod - type, extends(psb_d_csr_sparse_mat) :: psb_d_hybg_sparse_mat + type, extends(psb_d_csr_sparse_mat) :: psb_d_cuda_hybg_sparse_mat ! ! HYBG. An interface to the cuSPARSE HYB ! On the CPU side we keep a CSR storage. @@ -49,170 +49,170 @@ module psb_d_hybg_mat_mod type(d_Hmat) :: deviceMat contains - procedure, nopass :: get_fmt => d_hybg_get_fmt - procedure, pass(a) :: sizeof => d_hybg_sizeof - procedure, pass(a) :: vect_mv => psb_d_hybg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_d_hybg_inner_vect_sv - procedure, pass(a) :: csmm => psb_d_hybg_csmm - procedure, pass(a) :: csmv => psb_d_hybg_csmv - procedure, pass(a) :: scals => psb_d_hybg_scals - procedure, pass(a) :: scalv => psb_d_hybg_scal - procedure, pass(a) :: reallocate_nz => psb_d_hybg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_d_hybg_allocate_mnnz + procedure, nopass :: get_fmt => d_cuda_hybg_get_fmt + procedure, pass(a) :: sizeof => d_cuda_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_cuda_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_cuda_hybg_csmm + procedure, pass(a) :: csmv => psb_d_cuda_hybg_csmv + procedure, pass(a) :: scals => psb_d_cuda_hybg_scals + procedure, pass(a) :: scalv => psb_d_cuda_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_d_cuda_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_cuda_hybg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_d_cp_hybg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_d_cp_hybg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_mv_hybg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_d_mv_hybg_from_fmt - procedure, pass(a) :: free => d_hybg_free - procedure, pass(a) :: mold => psb_d_hybg_mold - procedure, pass(a) :: to_gpu => psb_d_hybg_to_gpu - final :: d_hybg_finalize + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_hybg_from_fmt + procedure, pass(a) :: free => d_cuda_hybg_free + procedure, pass(a) :: mold => psb_d_cuda_hybg_mold + procedure, pass(a) :: to_gpu => psb_d_cuda_hybg_to_gpu + final :: d_cuda_hybg_finalize #else contains - procedure, pass(a) :: mold => psb_d_hybg_mold + procedure, pass(a) :: mold => psb_d_cuda_hybg_mold #endif - end type psb_d_hybg_sparse_mat + end type psb_d_cuda_hybg_sparse_mat #ifdef HAVE_SPGPU - private :: d_hybg_get_nzeros, d_hybg_free, d_hybg_get_fmt, & - & d_hybg_get_size, d_hybg_sizeof, d_hybg_get_nz_row + private :: d_cuda_hybg_get_nzeros, d_cuda_hybg_free, d_cuda_hybg_get_fmt, & + & d_cuda_hybg_get_size, d_cuda_hybg_sizeof, d_cuda_hybg_get_nz_row interface - subroutine psb_d_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hybg_inner_vect_sv + end subroutine psb_d_cuda_hybg_inner_vect_sv end interface interface - subroutine psb_d_hybg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hybg_vect_mv + end subroutine psb_d_cuda_hybg_vect_mv end interface interface - subroutine psb_d_hybg_reallocate_nz(nz,a) - import :: psb_d_hybg_sparse_mat, psb_ipk_ + subroutine psb_d_cuda_hybg_reallocate_nz(nz,a) + import :: psb_d_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_d_hybg_sparse_mat), intent(inout) :: a - end subroutine psb_d_hybg_reallocate_nz + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_hybg_reallocate_nz end interface interface - subroutine psb_d_hybg_allocate_mnnz(m,n,a,nz) - import :: psb_d_hybg_sparse_mat, psb_ipk_ + subroutine psb_d_cuda_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_d_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_d_hybg_allocate_mnnz + end subroutine psb_d_cuda_hybg_allocate_mnnz end interface interface - subroutine psb_d_hybg_mold(a,b,info) - import :: psb_d_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hybg_mold(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hybg_mold + end subroutine psb_d_cuda_hybg_mold end interface interface - subroutine psb_d_hybg_to_gpu(a,info, nzrm) - import :: psb_d_hybg_sparse_mat, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_hybg_to_gpu(a,info, nzrm) + import :: psb_d_cuda_hybg_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_d_hybg_to_gpu + end subroutine psb_d_cuda_hybg_to_gpu end interface interface - subroutine psb_d_cp_hybg_from_coo(a,b,info) - import :: psb_d_hybg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_hybg_from_coo(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_hybg_from_coo + end subroutine psb_d_cuda_cp_hybg_from_coo end interface interface - subroutine psb_d_cp_hybg_from_fmt(a,b,info) - import :: psb_d_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_cp_hybg_from_fmt(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cp_hybg_from_fmt + end subroutine psb_d_cuda_cp_hybg_from_fmt end interface interface - subroutine psb_d_mv_hybg_from_coo(a,b,info) - import :: psb_d_hybg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_hybg_from_coo(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_hybg_from_coo + end subroutine psb_d_cuda_mv_hybg_from_coo end interface interface - subroutine psb_d_mv_hybg_from_fmt(a,b,info) - import :: psb_d_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_mv_hybg_from_fmt(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_mv_hybg_from_fmt + end subroutine psb_d_cuda_mv_hybg_from_fmt end interface interface - subroutine psb_d_hybg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hybg_csmv + end subroutine psb_d_cuda_hybg_csmv end interface interface - subroutine psb_d_hybg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_d_hybg_csmm + end subroutine psb_d_cuda_hybg_csmm end interface interface - subroutine psb_d_hybg_scal(d,a,info,side) - import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_hybg_scal(d,a,info,side) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_d_hybg_scal + end subroutine psb_d_cuda_hybg_scal end interface interface - subroutine psb_d_hybg_scals(d,a,info) - import :: psb_d_hybg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(inout) :: a + subroutine psb_d_cuda_hybg_scals(d,a,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hybg_scals + end subroutine psb_d_cuda_hybg_scals end interface @@ -231,9 +231,9 @@ contains ! == =================================== - function d_hybg_sizeof(a) result(res) + function d_cuda_hybg_sizeof(a) result(res) implicit none - class(psb_d_hybg_sparse_mat), intent(in) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 res = res + psb_sizeof_dp * size(a%val) @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function d_hybg_sizeof + end function d_cuda_hybg_sizeof - function d_hybg_get_fmt() result(res) + function d_cuda_hybg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HYBG' - end function d_hybg_get_fmt + end function d_cuda_hybg_get_fmt @@ -265,42 +265,42 @@ contains ! ! == =================================== - subroutine d_hybg_free(a) + subroutine d_cuda_hybg_free(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - class(psb_d_hybg_sparse_mat), intent(inout) :: a + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) call a%psb_d_csr_sparse_mat%free() return - end subroutine d_hybg_free + end subroutine d_cuda_hybg_free - subroutine d_hybg_finalize(a) + subroutine d_cuda_hybg_finalize(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - type(psb_d_hybg_sparse_mat), intent(inout) :: a + type(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) return - end subroutine d_hybg_finalize + end subroutine d_cuda_hybg_finalize #else interface - subroutine psb_d_hybg_mold(a,b,info) - import :: psb_d_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_hybg_sparse_mat), intent(in) :: a + subroutine psb_d_cuda_hybg_mold(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_hybg_mold + end subroutine psb_d_cuda_hybg_mold end interface #endif -end module psb_d_hybg_mat_mod +end module psb_d_cuda_hybg_mat_mod #endif diff --git a/cuda/psb_d_gpu_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 similarity index 72% rename from cuda/psb_d_gpu_vect_mod.F90 rename to cuda/psb_d_cuda_vect_mod.F90 index cd3757c3..83ec108b 100644 --- a/cuda/psb_d_gpu_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -30,15 +30,15 @@ ! -module psb_d_gpu_vect_mod +module psb_d_cuda_vect_mod use iso_c_binding use psb_const_mod use psb_error_mod use psb_d_vect_mod use psb_i_vect_mod #ifdef HAVE_SPGPU - use psb_gpu_env_mod - use psb_i_gpu_vect_mod + use psb_cuda_env_mod + use psb_i_cuda_vect_mod use psb_i_vectordev_mod use psb_d_vectordev_mod #endif @@ -47,7 +47,7 @@ module psb_d_gpu_vect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_d_base_vect_type) :: psb_d_vect_gpu + type, extends(psb_d_base_vect_type) :: psb_d_vect_cuda #ifdef HAVE_SPGPU integer :: state = is_host type(c_ptr) :: deviceVect = c_null_ptr @@ -59,66 +59,66 @@ module psb_d_gpu_vect_mod type(c_ptr) :: i_buf = c_null_ptr integer :: i_buf_sz = 0 contains - procedure, pass(x) :: get_nrows => d_gpu_get_nrows - procedure, nopass :: get_fmt => d_gpu_get_fmt - - procedure, pass(x) :: all => d_gpu_all - procedure, pass(x) :: zero => d_gpu_zero - procedure, pass(x) :: asb_m => d_gpu_asb_m - procedure, pass(x) :: sync => d_gpu_sync - procedure, pass(x) :: sync_space => d_gpu_sync_space - procedure, pass(x) :: bld_x => d_gpu_bld_x - procedure, pass(x) :: bld_mn => d_gpu_bld_mn - procedure, pass(x) :: free => d_gpu_free - procedure, pass(x) :: ins_a => d_gpu_ins_a - procedure, pass(x) :: ins_v => d_gpu_ins_v - procedure, pass(x) :: is_host => d_gpu_is_host - procedure, pass(x) :: is_dev => d_gpu_is_dev - procedure, pass(x) :: is_sync => d_gpu_is_sync - procedure, pass(x) :: set_host => d_gpu_set_host - procedure, pass(x) :: set_dev => d_gpu_set_dev - procedure, pass(x) :: set_sync => d_gpu_set_sync - procedure, pass(x) :: set_scal => d_gpu_set_scal -!!$ procedure, pass(x) :: set_vect => d_gpu_set_vect - procedure, pass(x) :: gthzv_x => d_gpu_gthzv_x - procedure, pass(y) :: sctb => d_gpu_sctb - procedure, pass(y) :: sctb_x => d_gpu_sctb_x - procedure, pass(x) :: gthzbuf => d_gpu_gthzbuf - procedure, pass(y) :: sctb_buf => d_gpu_sctb_buf - procedure, pass(x) :: new_buffer => d_gpu_new_buffer - procedure, nopass :: device_wait => d_gpu_device_wait - procedure, pass(x) :: free_buffer => d_gpu_free_buffer - procedure, pass(x) :: maybe_free_buffer => d_gpu_maybe_free_buffer - procedure, pass(x) :: dot_v => d_gpu_dot_v - procedure, pass(x) :: dot_a => d_gpu_dot_a - procedure, pass(y) :: axpby_v => d_gpu_axpby_v - procedure, pass(y) :: axpby_a => d_gpu_axpby_a - procedure, pass(y) :: mlt_v => d_gpu_mlt_v - procedure, pass(y) :: mlt_a => d_gpu_mlt_a - procedure, pass(z) :: mlt_a_2 => d_gpu_mlt_a_2 - procedure, pass(z) :: mlt_v_2 => d_gpu_mlt_v_2 - procedure, pass(x) :: scal => d_gpu_scal - procedure, pass(x) :: nrm2 => d_gpu_nrm2 - procedure, pass(x) :: amax => d_gpu_amax - procedure, pass(x) :: asum => d_gpu_asum - procedure, pass(x) :: absval1 => d_gpu_absval1 - procedure, pass(x) :: absval2 => d_gpu_absval2 - - final :: d_gpu_vect_finalize + procedure, pass(x) :: get_nrows => d_cuda_get_nrows + procedure, nopass :: get_fmt => d_cuda_get_fmt + + procedure, pass(x) :: all => d_cuda_all + procedure, pass(x) :: zero => d_cuda_zero + procedure, pass(x) :: asb_m => d_cuda_asb_m + procedure, pass(x) :: sync => d_cuda_sync + procedure, pass(x) :: sync_space => d_cuda_sync_space + procedure, pass(x) :: bld_x => d_cuda_bld_x + procedure, pass(x) :: bld_mn => d_cuda_bld_mn + procedure, pass(x) :: free => d_cuda_free + procedure, pass(x) :: ins_a => d_cuda_ins_a + procedure, pass(x) :: ins_v => d_cuda_ins_v + procedure, pass(x) :: is_host => d_cuda_is_host + procedure, pass(x) :: is_dev => d_cuda_is_dev + procedure, pass(x) :: is_sync => d_cuda_is_sync + procedure, pass(x) :: set_host => d_cuda_set_host + procedure, pass(x) :: set_dev => d_cuda_set_dev + procedure, pass(x) :: set_sync => d_cuda_set_sync + procedure, pass(x) :: set_scal => d_cuda_set_scal +!!$ procedure, pass(x) :: set_vect => d_cuda_set_vect + procedure, pass(x) :: gthzv_x => d_cuda_gthzv_x + procedure, pass(y) :: sctb => d_cuda_sctb + procedure, pass(y) :: sctb_x => d_cuda_sctb_x + procedure, pass(x) :: gthzbuf => d_cuda_gthzbuf + procedure, pass(y) :: sctb_buf => d_cuda_sctb_buf + procedure, pass(x) :: new_buffer => d_cuda_new_buffer + procedure, nopass :: device_wait => d_cuda_device_wait + procedure, pass(x) :: free_buffer => d_cuda_free_buffer + procedure, pass(x) :: maybe_free_buffer => d_cuda_maybe_free_buffer + procedure, pass(x) :: dot_v => d_cuda_dot_v + procedure, pass(x) :: dot_a => d_cuda_dot_a + procedure, pass(y) :: axpby_v => d_cuda_axpby_v + procedure, pass(y) :: axpby_a => d_cuda_axpby_a + procedure, pass(y) :: mlt_v => d_cuda_mlt_v + procedure, pass(y) :: mlt_a => d_cuda_mlt_a + procedure, pass(z) :: mlt_a_2 => d_cuda_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => d_cuda_mlt_v_2 + procedure, pass(x) :: scal => d_cuda_scal + procedure, pass(x) :: nrm2 => d_cuda_nrm2 + procedure, pass(x) :: amax => d_cuda_amax + procedure, pass(x) :: asum => d_cuda_asum + procedure, pass(x) :: absval1 => d_cuda_absval1 + procedure, pass(x) :: absval2 => d_cuda_absval2 + + final :: d_cuda_vect_finalize #endif - end type psb_d_vect_gpu + end type psb_d_vect_cuda - public :: psb_d_vect_gpu_ + public :: psb_d_vect_cuda_ private :: constructor - interface psb_d_vect_gpu_ + interface psb_d_vect_cuda_ module procedure constructor - end interface psb_d_vect_gpu_ + end interface psb_d_vect_cuda_ contains function constructor(x) result(this) real(psb_dpk_) :: x(:) - type(psb_d_vect_gpu) :: this + type(psb_d_vect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -128,20 +128,20 @@ contains #ifdef HAVE_SPGPU - subroutine d_gpu_device_wait() + subroutine d_cuda_device_wait() call psb_cudaSync() - end subroutine d_gpu_device_wait + end subroutine d_cuda_device_wait - subroutine d_gpu_new_buffer(n,x,info) + subroutine d_cuda_new_buffer(n,x,info) use psb_realloc_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(x%combuf)) then if (size(x%combuf) idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then ! ! Only need a sync in this branch; in the others ! cudamemCpy acts as a sync point. @@ -331,14 +331,14 @@ contains end select - end subroutine d_gpu_gthzv_x + end subroutine d_cuda_gthzv_x - subroutine d_gpu_gthzbuf(i,n,idx,x) - use psb_gpu_env_mod + subroutine d_cuda_gthzbuf(i,n,idx,x) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i,n class(psb_i_base_vect_type) :: idx - class(psb_d_vect_gpu) :: x + class(psb_d_vect_cuda) :: x integer :: info, ni info = 0 @@ -349,11 +349,11 @@ contains end if select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = igathMultiVecDeviceDoubleVecIdx(x%deviceVect,& & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) @@ -384,14 +384,14 @@ contains end select - end subroutine d_gpu_gthzbuf + end subroutine d_cuda_gthzbuf - subroutine d_gpu_sctb(n,idx,x,beta,y) + subroutine d_cuda_sctb(n,idx,x,beta,y) implicit none !use psb_const_mod integer(psb_ipk_) :: n, idx(:) real(psb_dpk_) :: beta, x(:) - class(psb_d_vect_gpu) :: y + class(psb_d_vect_cuda) :: y integer(psb_ipk_) :: info if (n == 0) return @@ -401,24 +401,24 @@ contains call y%psb_d_base_vect_type%sctb(n,idx,x,beta) call y%set_host() - end subroutine d_gpu_sctb + end subroutine d_cuda_sctb - subroutine d_gpu_sctb_x(i,n,idx,x,beta,y) - use psb_gpu_env_mod + subroutine d_cuda_sctb_x(i,n,idx,x,beta,y) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx real(psb_dpk_) :: beta, x(:) - class(psb_d_vect_gpu) :: y + class(psb_d_vect_cuda) :: y integer :: info, ni select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() ! - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(y%pinned_buffer)) then if (size(y%pinned_buffer) < n) then call inner_unregister(y%pinned_buffer) @@ -506,16 +506,16 @@ contains call psb_cudaSync() call y%set_dev() - end subroutine d_gpu_sctb_x + end subroutine d_cuda_sctb_x - subroutine d_gpu_sctb_buf(i,n,idx,beta,y) + subroutine d_cuda_sctb_buf(i,n,idx,beta,y) use psi_serial_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx real(psb_dpk_) :: beta - class(psb_d_vect_gpu) :: y + class(psb_d_vect_cuda) :: y integer(psb_ipk_) :: info, ni !!$ write(0,*) 'Starting sctb_buf' @@ -526,11 +526,11 @@ contains select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = iscatMultiVecDeviceDoubleVecIdx(y%deviceVect,& & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) else @@ -557,106 +557,106 @@ contains end select !!$ write(0,*) 'Done sctb_buf' - end subroutine d_gpu_sctb_buf + end subroutine d_cuda_sctb_buf - subroutine d_gpu_bld_x(x,this) + subroutine d_cuda_bld_x(x,this) use psb_base_mod real(psb_dpk_), intent(in) :: this(:) - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call psb_realloc(size(this),x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'d_gpu_bld_x',& + call psb_errpush(info,'d_cuda_bld_x',& & i_err=(/size(this),izero,izero,izero,izero/)) end if x%v(:) = this(:) call x%set_host() call x%sync() - end subroutine d_gpu_bld_x + end subroutine d_cuda_bld_x - subroutine d_gpu_bld_mn(x,n) + subroutine d_cuda_bld_mn(x,n) integer(psb_mpk_), intent(in) :: n - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(n,info) if (info /= 0) then - call psb_errpush(info,'d_gpu_bld_n',i_err=(/n,n,n,n,n/)) + call psb_errpush(info,'d_cuda_bld_n',i_err=(/n,n,n,n,n/)) end if - end subroutine d_gpu_bld_mn + end subroutine d_cuda_bld_mn - subroutine d_gpu_set_host(x) + subroutine d_cuda_set_host(x) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x x%state = is_host - end subroutine d_gpu_set_host + end subroutine d_cuda_set_host - subroutine d_gpu_set_dev(x) + subroutine d_cuda_set_dev(x) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x x%state = is_dev - end subroutine d_gpu_set_dev + end subroutine d_cuda_set_dev - subroutine d_gpu_set_sync(x) + subroutine d_cuda_set_sync(x) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x x%state = is_sync - end subroutine d_gpu_set_sync + end subroutine d_cuda_set_sync - function d_gpu_is_dev(x) result(res) + function d_cuda_is_dev(x) result(res) implicit none - class(psb_d_vect_gpu), intent(in) :: x + class(psb_d_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function d_gpu_is_dev + end function d_cuda_is_dev - function d_gpu_is_host(x) result(res) + function d_cuda_is_host(x) result(res) implicit none - class(psb_d_vect_gpu), intent(in) :: x + class(psb_d_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function d_gpu_is_host + end function d_cuda_is_host - function d_gpu_is_sync(x) result(res) + function d_cuda_is_sync(x) result(res) implicit none - class(psb_d_vect_gpu), intent(in) :: x + class(psb_d_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function d_gpu_is_sync + end function d_cuda_is_sync - function d_gpu_get_nrows(x) result(res) + function d_cuda_get_nrows(x) result(res) implicit none - class(psb_d_vect_gpu), intent(in) :: x + class(psb_d_vect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = size(x%v) - end function d_gpu_get_nrows + end function d_cuda_get_nrows - function d_gpu_get_fmt() result(res) + function d_cuda_get_fmt() result(res) implicit none character(len=5) :: res res = 'dGPU' - end function d_gpu_get_fmt + end function d_cuda_get_fmt - subroutine d_gpu_all(n, x, info) + subroutine d_cuda_all(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: n - class(psb_d_vect_gpu), intent(out) :: x + class(psb_d_vect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(n,x%v,info) @@ -664,26 +664,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'d_gpu_all',& + call psb_errpush(info,'d_cuda_all',& & i_err=(/n,n,n,n,n/)) end if - end subroutine d_gpu_all + end subroutine d_cuda_all - subroutine d_gpu_zero(x) + subroutine d_cuda_zero(x) use psi_serial_mod implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=dzero call x%set_host() - end subroutine d_gpu_zero + end subroutine d_cuda_zero - subroutine d_gpu_asb_m(n, x, info) + subroutine d_cuda_asb_m(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_mpk_), intent(in) :: n - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: nd @@ -703,12 +703,12 @@ contains end if end if - end subroutine d_gpu_asb_m + end subroutine d_cuda_asb_m - subroutine d_gpu_sync_space(x,info) + subroutine d_cuda_sync_space(x,info) use psb_base_mod, only : psb_realloc implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nh, nd @@ -747,12 +747,12 @@ contains end if end if - end subroutine d_gpu_sync_space + end subroutine d_cuda_sync_space - subroutine d_gpu_sync(x) + subroutine d_cuda_sync(x) use psb_base_mod, only : psb_realloc implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -778,31 +778,31 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'d_gpu_sync') + call psb_errpush(info,'d_cuda_sync') end if - end subroutine d_gpu_sync + end subroutine d_cuda_sync - subroutine d_gpu_free(x, info) + subroutine d_cuda_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) deallocate(x%v, stat=info) if (c_associated(x%deviceVect)) then -!!$ write(0,*)'d_gpu_free Calling freeMultiVecDevice' +!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice' call freeMultiVecDevice(x%deviceVect) x%deviceVect=c_null_ptr end if call x%free_buffer(info) call x%set_sync() - end subroutine d_gpu_free + end subroutine d_cuda_free - subroutine d_gpu_set_scal(x,val,first,last) - class(psb_d_vect_gpu), intent(inout) :: x + subroutine d_cuda_set_scal(x,val,first,last) + class(psb_d_vect_cuda), intent(inout) :: x real(psb_dpk_), intent(in) :: val integer(psb_ipk_), optional :: first, last @@ -817,10 +817,10 @@ contains info = setScalDevice(val,first_,last_,1,x%deviceVect) call x%set_dev() - end subroutine d_gpu_set_scal + end subroutine d_cuda_set_scal !!$ -!!$ subroutine d_gpu_set_vect(x,val) -!!$ class(psb_d_vect_gpu), intent(inout) :: x +!!$ subroutine d_cuda_set_vect(x,val) +!!$ class(psb_d_vect_cuda), intent(inout) :: x !!$ real(psb_dpk_), intent(in) :: val(:) !!$ integer(psb_ipk_) :: nr !!$ integer(psb_ipk_) :: info @@ -829,13 +829,13 @@ contains !!$ call x%psb_d_base_vect_type%set_vect(val) !!$ call x%set_host() !!$ -!!$ end subroutine d_gpu_set_vect +!!$ end subroutine d_cuda_set_vect - function d_gpu_dot_v(n,x,y) result(res) + function d_cuda_dot_v(n,x,y) result(res) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res @@ -852,13 +852,13 @@ contains type is (psb_d_base_vect_type) if (x%is_dev()) call x%sync() res = ddot(n,x%v,1,yy%v,1) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) if (info /= 0) then info = psb_err_internal_error_ - call psb_errpush(info,'d_gpu_dot_v') + call psb_errpush(info,'d_cuda_dot_v') end if class default @@ -867,11 +867,11 @@ contains res = y%dot(n,x%v) end select - end function d_gpu_dot_v + end function d_cuda_dot_v - function d_gpu_dot_a(n,x,y) result(res) + function d_cuda_dot_a(n,x,y) result(res) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x real(psb_dpk_), intent(in) :: y(:) integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res @@ -880,14 +880,14 @@ contains if (x%is_dev()) call x%sync() res = ddot(n,y,1,x%v,1) - end function d_gpu_dot_a + end function d_cuda_dot_a - subroutine d_gpu_axpby_v(m,alpha, x, beta, y, info) + subroutine d_cuda_axpby_v(m,alpha, x, beta, y, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_vect_gpu), intent(inout) :: y + class(psb_d_vect_cuda), intent(inout) :: y real(psb_dpk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nx, ny @@ -895,7 +895,7 @@ contains info = psb_success_ select type(xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) ! Do something different here if ((beta /= dzero).and.y%is_host())& & call y%sync() @@ -915,14 +915,14 @@ contains call y%axpby(m,alpha,x%v,beta,info) end select - end subroutine d_gpu_axpby_v + end subroutine d_cuda_axpby_v - subroutine d_gpu_axpby_a(m,alpha, x, beta, y, info) + subroutine d_cuda_axpby_a(m,alpha, x, beta, y, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m real(psb_dpk_), intent(in) :: x(:) - class(psb_d_vect_gpu), intent(inout) :: y + class(psb_d_vect_cuda), intent(inout) :: y real(psb_dpk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info @@ -930,13 +930,13 @@ contains & call y%sync() call psb_geaxpby(m,alpha,x,beta,y%v,info) call y%set_host() - end subroutine d_gpu_axpby_a + end subroutine d_cuda_axpby_a - subroutine d_gpu_mlt_v(x, y, info) + subroutine d_cuda_mlt_v(x, y, info) use psi_serial_mod implicit none class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_vect_gpu), intent(inout) :: y + class(psb_d_vect_cuda), intent(inout) :: y integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -950,7 +950,7 @@ contains y%v(i) = y%v(i) * xx%v(i) end do call y%set_host() - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) ! Do something different here if (y%is_host()) call y%sync() if (xx%is_host()) call xx%sync() @@ -963,13 +963,13 @@ contains call y%set_host() end select - end subroutine d_gpu_mlt_v + end subroutine d_cuda_mlt_v - subroutine d_gpu_mlt_a(x, y, info) + subroutine d_cuda_mlt_a(x, y, info) use psi_serial_mod implicit none real(psb_dpk_), intent(in) :: x(:) - class(psb_d_vect_gpu), intent(inout) :: y + class(psb_d_vect_cuda), intent(inout) :: y integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -977,15 +977,15 @@ contains if (y%is_dev()) call y%sync() call y%psb_d_base_vect_type%mlt(x,info) ! set_host() is invoked in the base method - end subroutine d_gpu_mlt_a + end subroutine d_cuda_mlt_a - subroutine d_gpu_mlt_a_2(alpha,x,y,beta,z,info) + subroutine d_cuda_mlt_a_2(alpha,x,y,beta,z,info) use psi_serial_mod implicit none real(psb_dpk_), intent(in) :: alpha,beta real(psb_dpk_), intent(in) :: x(:) real(psb_dpk_), intent(in) :: y(:) - class(psb_d_vect_gpu), intent(inout) :: z + class(psb_d_vect_cuda), intent(inout) :: z integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -993,16 +993,16 @@ contains if (z%is_dev()) call z%sync() call z%psb_d_base_vect_type%mlt(alpha,x,y,beta,info) ! set_host() is invoked in the base method - end subroutine d_gpu_mlt_a_2 + end subroutine d_cuda_mlt_a_2 - subroutine d_gpu_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + subroutine d_cuda_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) use psi_serial_mod use psb_string_mod implicit none real(psb_dpk_), intent(in) :: alpha,beta class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y - class(psb_d_vect_gpu), intent(inout) :: z + class(psb_d_vect_cuda), intent(inout) :: z integer(psb_ipk_), intent(out) :: info character(len=1), intent(in), optional :: conjgx, conjgy integer(psb_ipk_) :: i, n @@ -1025,9 +1025,9 @@ contains ! info = 0 select type(xx => x) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) select type (yy => y) - type is (psb_d_vect_gpu) + type is (psb_d_vect_cuda) if (xx%is_host()) call xx%sync() if (yy%is_host()) call yy%sync() if ((beta /= dzero).and.(z%is_host())) call z%sync() @@ -1049,23 +1049,23 @@ contains call z%psb_d_base_vect_type%mlt(alpha,x,y,beta,info) call z%set_host() end select - end subroutine d_gpu_mlt_v_2 + end subroutine d_cuda_mlt_v_2 - subroutine d_gpu_scal(alpha, x) + subroutine d_cuda_scal(alpha, x) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x real(psb_dpk_), intent (in) :: alpha integer(psb_ipk_) :: info if (x%is_host()) call x%sync() info = scalMultiVecDevice(alpha,x%deviceVect) call x%set_dev() - end subroutine d_gpu_scal + end subroutine d_cuda_scal - function d_gpu_nrm2(n,x) result(res) + function d_cuda_nrm2(n,x) result(res) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res integer(psb_ipk_) :: info @@ -1073,11 +1073,11 @@ contains if (x%is_host()) call x%sync() info = nrm2MultiVecDevice(res,n,x%deviceVect) - end function d_gpu_nrm2 + end function d_cuda_nrm2 - function d_gpu_amax(n,x) result(res) + function d_cuda_amax(n,x) result(res) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res integer(psb_ipk_) :: info @@ -1085,11 +1085,11 @@ contains if (x%is_host()) call x%sync() info = amaxMultiVecDevice(res,n,x%deviceVect) - end function d_gpu_amax + end function d_cuda_amax - function d_gpu_asum(n,x) result(res) + function d_cuda_asum(n,x) result(res) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res integer(psb_ipk_) :: info @@ -1097,11 +1097,11 @@ contains if (x%is_host()) call x%sync() info = asumMultiVecDevice(res,n,x%deviceVect) - end function d_gpu_asum + end function d_cuda_asum - subroutine d_gpu_absval1(x) + subroutine d_cuda_absval1(x) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n integer(psb_ipk_) :: info @@ -1109,18 +1109,18 @@ contains n=x%get_nrows() info = absMultiVecDevice(n,done,x%deviceVect) - end subroutine d_gpu_absval1 + end subroutine d_cuda_absval1 - subroutine d_gpu_absval2(x,y) + subroutine d_cuda_absval2(x,y) implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: y integer(psb_ipk_) :: n integer(psb_ipk_) :: info n=min(x%get_nrows(),y%get_nrows()) select type (yy=> y) - class is (psb_d_vect_gpu) + class is (psb_d_vect_cuda) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() info = absMultiVecDevice(n,done,x%deviceVect,yy%deviceVect) @@ -1129,67 +1129,67 @@ contains if (y%is_dev()) call y%sync() call x%psb_d_base_vect_type%absval(y) end select - end subroutine d_gpu_absval2 + end subroutine d_cuda_absval2 - subroutine d_gpu_vect_finalize(x) + subroutine d_cuda_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_d_vect_gpu), intent(inout) :: x + type(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 call x%free(info) - end subroutine d_gpu_vect_finalize + end subroutine d_cuda_vect_finalize - subroutine d_gpu_ins_v(n,irl,val,dupl,x,info) + subroutine d_cuda_ins_v(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl class(psb_i_base_vect_type), intent(inout) :: irl class(psb_d_base_vect_type), intent(inout) :: val integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, isz - logical :: done_gpu + logical :: done_cuda info = 0 if (psb_errstatus_fatal()) return - done_gpu = .false. + done_cuda = .false. select type(virl => irl) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type(vval => val) - class is (psb_d_vect_gpu) + class is (psb_d_vect_cuda) if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() info = geinsMultiVecDeviceDouble(n,virl%deviceVect,& & vval%deviceVect,dupl,1,x%deviceVect) call x%set_dev() - done_gpu=.true. + done_cuda=.true. end select end select - if (.not.done_gpu) then + if (.not.done_cuda) then if (irl%is_dev()) call irl%sync() if (val%is_dev()) call val%sync() call x%ins(n,irl%v,val%v,dupl,info) end if if (info /= 0) then - call psb_errpush(info,'gpu_vect_ins') + call psb_errpush(info,'cuda_vect_ins') return end if - end subroutine d_gpu_ins_v + end subroutine d_cuda_ins_v - subroutine d_gpu_ins_a(n,irl,val,dupl,x,info) + subroutine d_cuda_ins_a(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_d_vect_gpu), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) real(psb_dpk_), intent(in) :: val(:) @@ -1202,11 +1202,11 @@ contains call x%psb_d_base_vect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine d_gpu_ins_a + end subroutine d_cuda_ins_a #endif -end module psb_d_gpu_vect_mod +end module psb_d_cuda_vect_mod ! @@ -1215,7 +1215,7 @@ end module psb_d_gpu_vect_mod -module psb_d_gpu_multivect_mod +module psb_d_cuda_multivect_mod use iso_c_binding use psb_const_mod use psb_error_mod @@ -1224,7 +1224,7 @@ module psb_d_gpu_multivect_mod use psb_i_multivect_mod #ifdef HAVE_SPGPU - use psb_i_gpu_multivect_mod + use psb_i_cuda_multivect_mod use psb_d_vectordev_mod #endif @@ -1232,7 +1232,7 @@ module psb_d_gpu_multivect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_d_base_multivect_type) :: psb_d_multivect_gpu + type, extends(psb_d_base_multivect_type) :: psb_d_multivect_cuda #ifdef HAVE_SPGPU integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 @@ -1240,48 +1240,48 @@ module psb_d_gpu_multivect_mod real(c_double), allocatable :: buffer(:,:) type(c_ptr) :: dt_buf = c_null_ptr contains - procedure, pass(x) :: get_nrows => d_gpu_multi_get_nrows - procedure, pass(x) :: get_ncols => d_gpu_multi_get_ncols - procedure, nopass :: get_fmt => d_gpu_multi_get_fmt -!!$ procedure, pass(x) :: dot_v => d_gpu_multi_dot_v -!!$ procedure, pass(x) :: dot_a => d_gpu_multi_dot_a -!!$ procedure, pass(y) :: axpby_v => d_gpu_multi_axpby_v -!!$ procedure, pass(y) :: axpby_a => d_gpu_multi_axpby_a -!!$ procedure, pass(y) :: mlt_v => d_gpu_multi_mlt_v -!!$ procedure, pass(y) :: mlt_a => d_gpu_multi_mlt_a -!!$ procedure, pass(z) :: mlt_a_2 => d_gpu_multi_mlt_a_2 -!!$ procedure, pass(z) :: mlt_v_2 => d_gpu_multi_mlt_v_2 -!!$ procedure, pass(x) :: scal => d_gpu_multi_scal -!!$ procedure, pass(x) :: nrm2 => d_gpu_multi_nrm2 -!!$ procedure, pass(x) :: amax => d_gpu_multi_amax -!!$ procedure, pass(x) :: asum => d_gpu_multi_asum - procedure, pass(x) :: all => d_gpu_multi_all - procedure, pass(x) :: zero => d_gpu_multi_zero - procedure, pass(x) :: asb => d_gpu_multi_asb - procedure, pass(x) :: sync => d_gpu_multi_sync - procedure, pass(x) :: sync_space => d_gpu_multi_sync_space - procedure, pass(x) :: bld_x => d_gpu_multi_bld_x - procedure, pass(x) :: bld_n => d_gpu_multi_bld_n - procedure, pass(x) :: free => d_gpu_multi_free - procedure, pass(x) :: ins => d_gpu_multi_ins - procedure, pass(x) :: is_host => d_gpu_multi_is_host - procedure, pass(x) :: is_dev => d_gpu_multi_is_dev - procedure, pass(x) :: is_sync => d_gpu_multi_is_sync - procedure, pass(x) :: set_host => d_gpu_multi_set_host - procedure, pass(x) :: set_dev => d_gpu_multi_set_dev - procedure, pass(x) :: set_sync => d_gpu_multi_set_sync - procedure, pass(x) :: set_scal => d_gpu_multi_set_scal - procedure, pass(x) :: set_vect => d_gpu_multi_set_vect -!!$ procedure, pass(x) :: gthzv_x => d_gpu_multi_gthzv_x -!!$ procedure, pass(y) :: sctb => d_gpu_multi_sctb -!!$ procedure, pass(y) :: sctb_x => d_gpu_multi_sctb_x - final :: d_gpu_multi_vect_finalize + procedure, pass(x) :: get_nrows => d_cuda_multi_get_nrows + procedure, pass(x) :: get_ncols => d_cuda_multi_get_ncols + procedure, nopass :: get_fmt => d_cuda_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => d_cuda_multi_dot_v +!!$ procedure, pass(x) :: dot_a => d_cuda_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => d_cuda_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => d_cuda_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => d_cuda_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => d_cuda_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => d_cuda_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => d_cuda_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => d_cuda_multi_scal +!!$ procedure, pass(x) :: nrm2 => d_cuda_multi_nrm2 +!!$ procedure, pass(x) :: amax => d_cuda_multi_amax +!!$ procedure, pass(x) :: asum => d_cuda_multi_asum + procedure, pass(x) :: all => d_cuda_multi_all + procedure, pass(x) :: zero => d_cuda_multi_zero + procedure, pass(x) :: asb => d_cuda_multi_asb + procedure, pass(x) :: sync => d_cuda_multi_sync + procedure, pass(x) :: sync_space => d_cuda_multi_sync_space + procedure, pass(x) :: bld_x => d_cuda_multi_bld_x + procedure, pass(x) :: bld_n => d_cuda_multi_bld_n + procedure, pass(x) :: free => d_cuda_multi_free + procedure, pass(x) :: ins => d_cuda_multi_ins + procedure, pass(x) :: is_host => d_cuda_multi_is_host + procedure, pass(x) :: is_dev => d_cuda_multi_is_dev + procedure, pass(x) :: is_sync => d_cuda_multi_is_sync + procedure, pass(x) :: set_host => d_cuda_multi_set_host + procedure, pass(x) :: set_dev => d_cuda_multi_set_dev + procedure, pass(x) :: set_sync => d_cuda_multi_set_sync + procedure, pass(x) :: set_scal => d_cuda_multi_set_scal + procedure, pass(x) :: set_vect => d_cuda_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => d_cuda_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => d_cuda_multi_sctb +!!$ procedure, pass(y) :: sctb_x => d_cuda_multi_sctb_x + final :: d_cuda_multi_vect_finalize #endif - end type psb_d_multivect_gpu + end type psb_d_multivect_cuda - public :: psb_d_multivect_gpu + public :: psb_d_multivect_cuda private :: constructor - interface psb_d_multivect_gpu + interface psb_d_multivect_cuda module procedure constructor end interface @@ -1289,7 +1289,7 @@ contains function constructor(x) result(this) real(psb_dpk_) :: x(:,:) - type(psb_d_multivect_gpu) :: this + type(psb_d_multivect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -1299,15 +1299,15 @@ contains #ifdef HAVE_SPGPU -!!$ subroutine d_gpu_multi_gthzv_x(i,n,idx,x,y) +!!$ subroutine d_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i,n !!$ class(psb_i_base_multivect_type) :: idx !!$ real(psb_dpk_) :: y(:) -!!$ class(psb_d_multivect_gpu) :: x +!!$ class(psb_d_multivect_cuda) :: x !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (x%is_host()) call x%sync() !!$ @@ -1332,16 +1332,16 @@ contains !!$ end select !!$ !!$ -!!$ end subroutine d_gpu_multi_gthzv_x +!!$ end subroutine d_cuda_multi_gthzv_x !!$ !!$ !!$ -!!$ subroutine d_gpu_multi_sctb(n,idx,x,beta,y) +!!$ subroutine d_cuda_multi_sctb(n,idx,x,beta,y) !!$ implicit none !!$ !use psb_const_mod !!$ integer(psb_ipk_) :: n, idx(:) !!$ real(psb_dpk_) :: beta, x(:) -!!$ class(psb_d_multivect_gpu) :: y +!!$ class(psb_d_multivect_cuda) :: y !!$ integer(psb_ipk_) :: info !!$ !!$ if (n == 0) return @@ -1351,17 +1351,17 @@ contains !!$ call y%psb_d_base_multivect_type%sctb(n,idx,x,beta) !!$ call y%set_host() !!$ -!!$ end subroutine d_gpu_multi_sctb +!!$ end subroutine d_cuda_multi_sctb !!$ -!!$ subroutine d_gpu_multi_sctb_x(i,n,idx,x,beta,y) +!!$ subroutine d_cuda_multi_sctb_x(i,n,idx,x,beta,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i, n !!$ class(psb_i_base_multivect_type) :: idx !!$ real(psb_dpk_) :: beta, x(:) -!!$ class(psb_d_multivect_gpu) :: y +!!$ class(psb_d_multivect_cuda) :: y !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (y%is_host()) call y%sync() !!$ @@ -1387,13 +1387,13 @@ contains !!$ call y%sct(n,ii%v(i:),x,beta) !!$ end select !!$ -!!$ end subroutine d_gpu_multi_sctb_x +!!$ end subroutine d_cuda_multi_sctb_x - subroutine d_gpu_multi_bld_x(x,this) + subroutine d_cuda_multi_bld_x(x,this) use psb_base_mod real(psb_dpk_), intent(in) :: this(:,:) - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info, m, n m=size(this,1) @@ -1403,101 +1403,101 @@ contains call psb_realloc(m,n,x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'d_gpu_multi_bld_x',& + call psb_errpush(info,'d_cuda_multi_bld_x',& & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) end if x%v(1:m,1:n) = this(1:m,1:n) call x%set_host() call x%sync() - end subroutine d_gpu_multi_bld_x + end subroutine d_cuda_multi_bld_x - subroutine d_gpu_multi_bld_n(x,m,n) + subroutine d_cuda_multi_bld_n(x,m,n) integer(psb_ipk_), intent(in) :: m,n - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(m,n,info) if (info /= 0) then - call psb_errpush(info,'d_gpu_multi_bld_n',i_err=(/m,n,n,n,n/)) + call psb_errpush(info,'d_cuda_multi_bld_n',i_err=(/m,n,n,n,n/)) end if - end subroutine d_gpu_multi_bld_n + end subroutine d_cuda_multi_bld_n - subroutine d_gpu_multi_set_host(x) + subroutine d_cuda_multi_set_host(x) implicit none - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x x%state = is_host - end subroutine d_gpu_multi_set_host + end subroutine d_cuda_multi_set_host - subroutine d_gpu_multi_set_dev(x) + subroutine d_cuda_multi_set_dev(x) implicit none - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x x%state = is_dev - end subroutine d_gpu_multi_set_dev + end subroutine d_cuda_multi_set_dev - subroutine d_gpu_multi_set_sync(x) + subroutine d_cuda_multi_set_sync(x) implicit none - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x x%state = is_sync - end subroutine d_gpu_multi_set_sync + end subroutine d_cuda_multi_set_sync - function d_gpu_multi_is_dev(x) result(res) + function d_cuda_multi_is_dev(x) result(res) implicit none - class(psb_d_multivect_gpu), intent(in) :: x + class(psb_d_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function d_gpu_multi_is_dev + end function d_cuda_multi_is_dev - function d_gpu_multi_is_host(x) result(res) + function d_cuda_multi_is_host(x) result(res) implicit none - class(psb_d_multivect_gpu), intent(in) :: x + class(psb_d_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function d_gpu_multi_is_host + end function d_cuda_multi_is_host - function d_gpu_multi_is_sync(x) result(res) + function d_cuda_multi_is_sync(x) result(res) implicit none - class(psb_d_multivect_gpu), intent(in) :: x + class(psb_d_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function d_gpu_multi_is_sync + end function d_cuda_multi_is_sync - function d_gpu_multi_get_nrows(x) result(res) + function d_cuda_multi_get_nrows(x) result(res) implicit none - class(psb_d_multivect_gpu), intent(in) :: x + class(psb_d_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_nrows - end function d_gpu_multi_get_nrows + end function d_cuda_multi_get_nrows - function d_gpu_multi_get_ncols(x) result(res) + function d_cuda_multi_get_ncols(x) result(res) implicit none - class(psb_d_multivect_gpu), intent(in) :: x + class(psb_d_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_ncols - end function d_gpu_multi_get_ncols + end function d_cuda_multi_get_ncols - function d_gpu_multi_get_fmt() result(res) + function d_cuda_multi_get_fmt() result(res) implicit none character(len=5) :: res res = 'dGPU' - end function d_gpu_multi_get_fmt + end function d_cuda_multi_get_fmt -!!$ function d_gpu_multi_dot_v(n,x,y) result(res) +!!$ function d_cuda_multi_dot_v(n,x,y) result(res) !!$ implicit none -!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ class(psb_d_multivect_cuda), intent(inout) :: x !!$ class(psb_d_base_multivect_type), intent(inout) :: y !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_dpk_) :: res @@ -1514,13 +1514,13 @@ contains !!$ type is (psb_d_base_multivect_type) !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,x%v,1,yy%v,1) -!!$ type is (psb_d_multivect_gpu) +!!$ type is (psb_d_multivect_cuda) !!$ if (x%is_host()) call x%sync() !!$ if (yy%is_host()) call yy%sync() !!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) !!$ if (info /= 0) then !!$ info = psb_err_internal_error_ -!!$ call psb_errpush(info,'d_gpu_multi_dot_v') +!!$ call psb_errpush(info,'d_cuda_multi_dot_v') !!$ end if !!$ !!$ class default @@ -1529,11 +1529,11 @@ contains !!$ res = y%dot(n,x%v) !!$ end select !!$ -!!$ end function d_gpu_multi_dot_v +!!$ end function d_cuda_multi_dot_v !!$ -!!$ function d_gpu_multi_dot_a(n,x,y) result(res) +!!$ function d_cuda_multi_dot_a(n,x,y) result(res) !!$ implicit none -!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ class(psb_d_multivect_cuda), intent(inout) :: x !!$ real(psb_dpk_), intent(in) :: y(:) !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_dpk_) :: res @@ -1542,14 +1542,14 @@ contains !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,y,1,x%v,1) !!$ -!!$ end function d_gpu_multi_dot_a +!!$ end function d_cuda_multi_dot_a !!$ -!!$ subroutine d_gpu_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ subroutine d_cuda_multi_axpby_v(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ class(psb_d_base_multivect_type), intent(inout) :: x -!!$ class(psb_d_multivect_gpu), intent(inout) :: y +!!$ class(psb_d_multivect_cuda), intent(inout) :: y !!$ real(psb_dpk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: nx, ny @@ -1562,7 +1562,7 @@ contains !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() -!!$ type is (psb_d_multivect_gpu) +!!$ type is (psb_d_multivect_cuda) !!$ ! Do something different here !!$ if ((beta /= dzero).and.y%is_host())& !!$ & call y%sync() @@ -1581,27 +1581,27 @@ contains !!$ call y%axpby(m,alpha,x%v,beta,info) !!$ end select !!$ -!!$ end subroutine d_gpu_multi_axpby_v +!!$ end subroutine d_cuda_multi_axpby_v !!$ -!!$ subroutine d_gpu_multi_axpby_a(m,alpha, x, beta, y, info) +!!$ subroutine d_cuda_multi_axpby_a(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ real(psb_dpk_), intent(in) :: x(:) -!!$ class(psb_d_multivect_gpu), intent(inout) :: y +!!$ class(psb_d_multivect_cuda), intent(inout) :: y !!$ real(psb_dpk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ if (y%is_dev()) call y%sync() !!$ call psb_geaxpby(m,alpha,x,beta,y%v,info) !!$ call y%set_host() -!!$ end subroutine d_gpu_multi_axpby_a +!!$ end subroutine d_cuda_multi_axpby_a !!$ -!!$ subroutine d_gpu_multi_mlt_v(x, y, info) +!!$ subroutine d_cuda_multi_mlt_v(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ class(psb_d_base_multivect_type), intent(inout) :: x -!!$ class(psb_d_multivect_gpu), intent(inout) :: y +!!$ class(psb_d_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ integer(psb_ipk_) :: i, n @@ -1615,7 +1615,7 @@ contains !!$ y%v(i) = y%v(i) * xx%v(i) !!$ end do !!$ call y%set_host() -!!$ type is (psb_d_multivect_gpu) +!!$ type is (psb_d_multivect_cuda) !!$ ! Do something different here !!$ if (y%is_host()) call y%sync() !!$ if (xx%is_host()) call xx%sync() @@ -1627,13 +1627,13 @@ contains !!$ call y%set_host() !!$ end select !!$ -!!$ end subroutine d_gpu_multi_mlt_v +!!$ end subroutine d_cuda_multi_mlt_v !!$ -!!$ subroutine d_gpu_multi_mlt_a(x, y, info) +!!$ subroutine d_cuda_multi_mlt_a(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ real(psb_dpk_), intent(in) :: x(:) -!!$ class(psb_d_multivect_gpu), intent(inout) :: y +!!$ class(psb_d_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1641,15 +1641,15 @@ contains !!$ call y%sync() !!$ call y%psb_d_base_multivect_type%mlt(x,info) !!$ call y%set_host() -!!$ end subroutine d_gpu_multi_mlt_a +!!$ end subroutine d_cuda_multi_mlt_a !!$ -!!$ subroutine d_gpu_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ subroutine d_cuda_multi_mlt_a_2(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ real(psb_dpk_), intent(in) :: alpha,beta !!$ real(psb_dpk_), intent(in) :: x(:) !!$ real(psb_dpk_), intent(in) :: y(:) -!!$ class(psb_d_multivect_gpu), intent(inout) :: z +!!$ class(psb_d_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1657,16 +1657,16 @@ contains !!$ if (z%is_dev()) call z%sync() !!$ call z%psb_d_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() -!!$ end subroutine d_gpu_multi_mlt_a_2 +!!$ end subroutine d_cuda_multi_mlt_a_2 !!$ -!!$ subroutine d_gpu_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ subroutine d_cuda_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) !!$ use psi_serial_mod !!$ use psb_string_mod !!$ implicit none !!$ real(psb_dpk_), intent(in) :: alpha,beta !!$ class(psb_d_base_multivect_type), intent(inout) :: x !!$ class(psb_d_base_multivect_type), intent(inout) :: y -!!$ class(psb_d_multivect_gpu), intent(inout) :: z +!!$ class(psb_d_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ character(len=1), intent(in), optional :: conjgx, conjgy !!$ integer(psb_ipk_) :: i, n @@ -1689,9 +1689,9 @@ contains !!$ ! !!$ info = 0 !!$ select type(xx => x) -!!$ type is (psb_d_multivect_gpu) +!!$ type is (psb_d_multivect_cuda) !!$ select type (yy => y) -!!$ type is (psb_d_multivect_gpu) +!!$ type is (psb_d_multivect_cuda) !!$ if (xx%is_host()) call xx%sync() !!$ if (yy%is_host()) call yy%sync() !!$ ! Z state is irrelevant: it will be done on the GPU. @@ -1711,11 +1711,11 @@ contains !!$ call z%psb_d_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() !!$ end select -!!$ end subroutine d_gpu_multi_mlt_v_2 +!!$ end subroutine d_cuda_multi_mlt_v_2 - subroutine d_gpu_multi_set_scal(x,val) - class(psb_d_multivect_gpu), intent(inout) :: x + subroutine d_cuda_multi_set_scal(x,val) + class(psb_d_multivect_cuda), intent(inout) :: x real(psb_dpk_), intent(in) :: val integer(psb_ipk_) :: info @@ -1723,10 +1723,10 @@ contains if (x%is_dev()) call x%sync() call x%psb_d_base_multivect_type%set_scal(val) call x%set_host() - end subroutine d_gpu_multi_set_scal + end subroutine d_cuda_multi_set_scal - subroutine d_gpu_multi_set_vect(x,val) - class(psb_d_multivect_gpu), intent(inout) :: x + subroutine d_cuda_multi_set_vect(x,val) + class(psb_d_multivect_cuda), intent(inout) :: x real(psb_dpk_), intent(in) :: val(:,:) integer(psb_ipk_) :: nr integer(psb_ipk_) :: info @@ -1735,24 +1735,24 @@ contains call x%psb_d_base_multivect_type%set_vect(val) call x%set_host() - end subroutine d_gpu_multi_set_vect + end subroutine d_cuda_multi_set_vect -!!$ subroutine d_gpu_multi_scal(alpha, x) +!!$ subroutine d_cuda_multi_scal(alpha, x) !!$ implicit none -!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ class(psb_d_multivect_cuda), intent(inout) :: x !!$ real(psb_dpk_), intent (in) :: alpha !!$ !!$ if (x%is_dev()) call x%sync() !!$ call x%psb_d_base_multivect_type%scal(alpha) !!$ call x%set_host() -!!$ end subroutine d_gpu_multi_scal +!!$ end subroutine d_cuda_multi_scal !!$ !!$ -!!$ function d_gpu_multi_nrm2(n,x) result(res) +!!$ function d_cuda_multi_nrm2(n,x) result(res) !!$ implicit none -!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ class(psb_d_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_dpk_) :: res !!$ integer(psb_ipk_) :: info @@ -1760,36 +1760,36 @@ contains !!$ if (x%is_host()) call x%sync() !!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) !!$ -!!$ end function d_gpu_multi_nrm2 +!!$ end function d_cuda_multi_nrm2 !!$ -!!$ function d_gpu_multi_amax(n,x) result(res) +!!$ function d_cuda_multi_amax(n,x) result(res) !!$ implicit none -!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ class(psb_d_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_dpk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = maxval(abs(x%v(1:n))) !!$ -!!$ end function d_gpu_multi_amax +!!$ end function d_cuda_multi_amax !!$ -!!$ function d_gpu_multi_asum(n,x) result(res) +!!$ function d_cuda_multi_asum(n,x) result(res) !!$ implicit none -!!$ class(psb_d_multivect_gpu), intent(inout) :: x +!!$ class(psb_d_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_dpk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = sum(abs(x%v(1:n))) !!$ -!!$ end function d_gpu_multi_asum +!!$ end function d_cuda_multi_asum - subroutine d_gpu_multi_all(m,n, x, info) + subroutine d_cuda_multi_all(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_d_multivect_gpu), intent(out) :: x + class(psb_d_multivect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(m,n,x%v,info,pad=dzero) @@ -1799,26 +1799,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'d_gpu_multi_all',& + call psb_errpush(info,'d_cuda_multi_all',& & i_err=(/m,n,n,n,n/)) end if - end subroutine d_gpu_multi_all + end subroutine d_cuda_multi_all - subroutine d_gpu_multi_zero(x) + subroutine d_cuda_multi_zero(x) use psi_serial_mod implicit none - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=dzero call x%set_host() - end subroutine d_gpu_multi_zero + end subroutine d_cuda_multi_zero - subroutine d_gpu_multi_asb(m,n, x, info) + subroutine d_cuda_multi_asb(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nd, nc @@ -1838,12 +1838,12 @@ contains call x%set_host() end if end if - end subroutine d_gpu_multi_asb + end subroutine d_cuda_multi_asb - subroutine d_gpu_multi_sync_space(x,info) + subroutine d_cuda_multi_sync_space(x,info) use psb_realloc_mod implicit none - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: mh,nh,md,nd @@ -1896,11 +1896,11 @@ contains end if - end subroutine d_gpu_multi_sync_space + end subroutine d_cuda_multi_sync_space - subroutine d_gpu_multi_sync(x) + subroutine d_cuda_multi_sync(x) implicit none - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -1916,16 +1916,16 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'d_gpu_multi_sync') + call psb_errpush(info,'d_cuda_multi_sync') end if - end subroutine d_gpu_multi_sync + end subroutine d_cuda_multi_sync - subroutine d_gpu_multi_free(x, info) + subroutine d_cuda_multi_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 @@ -1940,13 +1940,13 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine d_gpu_multi_free + end subroutine d_cuda_multi_free - subroutine d_gpu_multi_vect_finalize(x) + subroutine d_cuda_multi_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_d_multivect_gpu), intent(inout) :: x + type(psb_d_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 @@ -1961,12 +1961,12 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine d_gpu_multi_vect_finalize + end subroutine d_cuda_multi_vect_finalize - subroutine d_gpu_multi_ins(n,irl,val,dupl,x,info) + subroutine d_cuda_multi_ins(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_d_multivect_gpu), intent(inout) :: x + class(psb_d_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) real(psb_dpk_), intent(in) :: val(:,:) @@ -1979,11 +1979,11 @@ contains call x%psb_d_base_multivect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine d_gpu_multi_ins + end subroutine d_cuda_multi_ins #endif -end module psb_d_gpu_multivect_mod +end module psb_d_cuda_multivect_mod diff --git a/cuda/psb_i_csrg_mat_mod.F90 b/cuda/psb_i_csrg_mat_mod.F90 deleted file mode 100644 index 9a4a3852..00000000 --- a/cuda/psb_i_csrg_mat_mod.F90 +++ /dev/null @@ -1,393 +0,0 @@ -! Parallel Sparse BLAS GPU plugin -! (C) Copyright 2013 -! -! Salvatore Filippone -! Alessandro Fanfarillo -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! - - -module psb_i_csrg_mat_mod - - use iso_c_binding - use psb_i_mat_mod - use cusparse_mod - - integer(psb_ipk_), parameter, private :: is_host = -1 - integer(psb_ipk_), parameter, private :: is_sync = 0 - integer(psb_ipk_), parameter, private :: is_dev = 1 - - type, extends(psb_i_csr_sparse_mat) :: psb_i_csrg_sparse_mat - ! - ! cuSPARSE 4.0 CSR format. - ! - ! - ! - ! - ! -#ifdef HAVE_SPGPU - type(i_Cmat) :: deviceMat - integer(psb_ipk_) :: devstate = is_host - - contains - procedure, nopass :: get_fmt => i_csrg_get_fmt - procedure, pass(a) :: sizeof => i_csrg_sizeof - procedure, pass(a) :: vect_mv => psb_i_csrg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_i_csrg_inner_vect_sv - procedure, pass(a) :: csmm => psb_i_csrg_csmm - procedure, pass(a) :: csmv => psb_i_csrg_csmv - procedure, pass(a) :: scals => psb_i_csrg_scals - procedure, pass(a) :: scalv => psb_i_csrg_scal - procedure, pass(a) :: reallocate_nz => psb_i_csrg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_i_csrg_allocate_mnnz - ! Note: we do *not* need the TO methods, because the parent type - ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_i_cp_csrg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_i_cp_csrg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_i_mv_csrg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_i_mv_csrg_from_fmt - procedure, pass(a) :: free => i_csrg_free - procedure, pass(a) :: mold => psb_i_csrg_mold - procedure, pass(a) :: is_host => i_csrg_is_host - procedure, pass(a) :: is_dev => i_csrg_is_dev - procedure, pass(a) :: is_sync => i_csrg_is_sync - procedure, pass(a) :: set_host => i_csrg_set_host - procedure, pass(a) :: set_dev => i_csrg_set_dev - procedure, pass(a) :: set_sync => i_csrg_set_sync - procedure, pass(a) :: sync => i_csrg_sync - procedure, pass(a) :: to_gpu => psb_i_csrg_to_gpu - procedure, pass(a) :: from_gpu => psb_i_csrg_from_gpu - final :: i_csrg_finalize -#else - contains - procedure, pass(a) :: mold => psb_i_csrg_mold -#endif - end type psb_i_csrg_sparse_mat - -#ifdef HAVE_SPGPU - private :: i_csrg_get_nzeros, i_csrg_free, i_csrg_get_fmt, & - & i_csrg_get_size, i_csrg_sizeof, i_csrg_get_nz_row - - - interface - subroutine psb_i_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(in) :: a - integer(psb_ipk_), intent(in) :: alpha, beta - class(psb_i_base_vect_type), intent(inout) :: x - class(psb_i_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_i_csrg_inner_vect_sv - end interface - - - interface - subroutine psb_i_csrg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(in) :: a - integer(psb_ipk_), intent(in) :: alpha, beta - class(psb_i_base_vect_type), intent(inout) :: x - class(psb_i_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_i_csrg_vect_mv - end interface - - interface - subroutine psb_i_csrg_reallocate_nz(nz,a) - import :: psb_i_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: nz - class(psb_i_csrg_sparse_mat), intent(inout) :: a - end subroutine psb_i_csrg_reallocate_nz - end interface - - interface - subroutine psb_i_csrg_allocate_mnnz(m,n,a,nz) - import :: psb_i_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: m,n - class(psb_i_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_i_csrg_allocate_mnnz - end interface - - interface - subroutine psb_i_csrg_mold(a,b,info) - import :: psb_i_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(in) :: a - class(psb_i_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_csrg_mold - end interface - - interface - subroutine psb_i_csrg_to_gpu(a,info, nzrm) - import :: psb_i_csrg_sparse_mat, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_i_csrg_to_gpu - end interface - - interface - subroutine psb_i_csrg_from_gpu(a,info) - import :: psb_i_csrg_sparse_mat, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_csrg_from_gpu - end interface - - interface - subroutine psb_i_cp_csrg_from_coo(a,b,info) - import :: psb_i_csrg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(inout) :: a - class(psb_i_coo_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_csrg_from_coo - end interface - - interface - subroutine psb_i_cp_csrg_from_fmt(a,b,info) - import :: psb_i_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(inout) :: a - class(psb_i_base_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_csrg_from_fmt - end interface - - interface - subroutine psb_i_mv_csrg_from_coo(a,b,info) - import :: psb_i_csrg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(inout) :: a - class(psb_i_coo_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_csrg_from_coo - end interface - - interface - subroutine psb_i_mv_csrg_from_fmt(a,b,info) - import :: psb_i_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(inout) :: a - class(psb_i_base_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_csrg_from_fmt - end interface - - interface - subroutine psb_i_csrg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(in) :: a - integer(psb_ipk_), intent(in) :: alpha, beta, x(:) - integer(psb_ipk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_i_csrg_csmv - end interface - interface - subroutine psb_i_csrg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(in) :: a - integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) - integer(psb_ipk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_i_csrg_csmm - end interface - - interface - subroutine psb_i_csrg_scal(d,a,info,side) - import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in) :: d(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in), optional :: side - end subroutine psb_i_csrg_scal - end interface - - interface - subroutine psb_i_csrg_scals(d,a,info) - import :: psb_i_csrg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in) :: d - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_csrg_scals - end interface - - -contains - - ! == =================================== - ! - ! - ! - ! Getters - ! - ! - ! - ! - ! - ! == =================================== - - - function i_csrg_sizeof(a) result(res) - implicit none - class(psb_i_csrg_sparse_mat), intent(in) :: a - integer(psb_epk_) :: res - if (a%is_dev()) call a%sync() - res = 8 - res = res + psb_sizeof_ip * size(a%val) - res = res + psb_sizeof_ip * size(a%irp) - res = res + psb_sizeof_ip * size(a%ja) - ! Should we account for the shadow data structure - ! on the GPU device side? - ! res = 2*res - - end function i_csrg_sizeof - - function i_csrg_get_fmt() result(res) - implicit none - character(len=5) :: res - res = 'CSRG' - end function i_csrg_get_fmt - - - - ! == =================================== - ! - ! - ! - ! Data management - ! - ! - ! - ! - ! - ! == =================================== - - - subroutine i_csrg_set_host(a) - implicit none - class(psb_i_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_host - end subroutine i_csrg_set_host - - subroutine i_csrg_set_dev(a) - implicit none - class(psb_i_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_dev - end subroutine i_csrg_set_dev - - subroutine i_csrg_set_sync(a) - implicit none - class(psb_i_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_sync - end subroutine i_csrg_set_sync - - function i_csrg_is_dev(a) result(res) - implicit none - class(psb_i_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_dev) - end function i_csrg_is_dev - - function i_csrg_is_host(a) result(res) - implicit none - class(psb_i_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_host) - end function i_csrg_is_host - - function i_csrg_is_sync(a) result(res) - implicit none - class(psb_i_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_sync) - end function i_csrg_is_sync - - - subroutine i_csrg_sync(a) - implicit none - class(psb_i_csrg_sparse_mat), target, intent(in) :: a - class(psb_i_csrg_sparse_mat), pointer :: tmpa - integer(psb_ipk_) :: info - - tmpa => a - if (tmpa%is_host()) then - call tmpa%to_gpu(info) - else if (tmpa%is_dev()) then - call tmpa%from_gpu(info) - end if - call tmpa%set_sync() - return - - end subroutine i_csrg_sync - - subroutine i_csrg_free(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - class(psb_i_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - call a%psb_i_csr_sparse_mat%free() - - return - - end subroutine i_csrg_free - - subroutine i_csrg_finalize(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - type(psb_i_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - - return - - end subroutine i_csrg_finalize - -#else - interface - subroutine psb_i_csrg_mold(a,b,info) - import :: psb_i_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_csrg_sparse_mat), intent(in) :: a - class(psb_i_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_csrg_mold - end interface - -#endif - -end module psb_i_csrg_mat_mod diff --git a/cuda/psb_i_cuda_csrg_mat_mod.F90 b/cuda/psb_i_cuda_csrg_mat_mod.F90 new file mode 100644 index 00000000..de0eac09 --- /dev/null +++ b/cuda/psb_i_cuda_csrg_mat_mod.F90 @@ -0,0 +1,393 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_cuda_csrg_mat_mod + + use iso_c_binding + use psb_i_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_i_csr_sparse_mat) :: psb_i_cuda_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(i_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => i_cuda_csrg_get_fmt + procedure, pass(a) :: sizeof => i_cuda_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_i_cuda_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_i_cuda_csrg_csmm + procedure, pass(a) :: csmv => psb_i_cuda_csrg_csmv + procedure, pass(a) :: scals => psb_i_cuda_csrg_scals + procedure, pass(a) :: scalv => psb_i_cuda_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_i_cuda_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_cuda_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_csrg_from_fmt + procedure, pass(a) :: free => i_cuda_csrg_free + procedure, pass(a) :: mold => psb_i_cuda_csrg_mold + procedure, pass(a) :: is_host => i_cuda_csrg_is_host + procedure, pass(a) :: is_dev => i_cuda_csrg_is_dev + procedure, pass(a) :: is_sync => i_cuda_csrg_is_sync + procedure, pass(a) :: set_host => i_cuda_csrg_set_host + procedure, pass(a) :: set_dev => i_cuda_csrg_set_dev + procedure, pass(a) :: set_sync => i_cuda_csrg_set_sync + procedure, pass(a) :: sync => i_cuda_csrg_sync + procedure, pass(a) :: to_gpu => psb_i_cuda_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_i_cuda_csrg_from_gpu + final :: i_cuda_csrg_finalize +#else + contains + procedure, pass(a) :: mold => psb_i_cuda_csrg_mold +#endif + end type psb_i_cuda_csrg_sparse_mat + +#ifdef HAVE_SPGPU + private :: i_cuda_csrg_get_nzeros, i_cuda_csrg_free, i_cuda_csrg_get_fmt, & + & i_cuda_csrg_get_size, i_cuda_csrg_sizeof, i_cuda_csrg_get_nz_row + + + interface + subroutine psb_i_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_i_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_csrg_vect_mv + end interface + + interface + subroutine psb_i_cuda_csrg_reallocate_nz(nz,a) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_csrg_reallocate_nz + end interface + + interface + subroutine psb_i_cuda_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_i_cuda_csrg_allocate_mnnz + end interface + + interface + subroutine psb_i_cuda_csrg_mold(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_csrg_mold + end interface + + interface + subroutine psb_i_cuda_csrg_to_gpu(a,info, nzrm) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_i_cuda_csrg_to_gpu + end interface + + interface + subroutine psb_i_cuda_csrg_from_gpu(a,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_csrg_from_gpu + end interface + + interface + subroutine psb_i_cuda_cp_csrg_from_coo(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_csrg_from_coo + end interface + + interface + subroutine psb_i_cuda_cp_csrg_from_fmt(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_csrg_from_fmt + end interface + + interface + subroutine psb_i_cuda_mv_csrg_from_coo(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_csrg_from_coo + end interface + + interface + subroutine psb_i_cuda_mv_csrg_from_fmt(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_csrg_from_fmt + end interface + + interface + subroutine psb_i_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_csrg_csmv + end interface + interface + subroutine psb_i_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) + integer(psb_ipk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_csrg_csmm + end interface + + interface + subroutine psb_i_cuda_csrg_scal(d,a,info,side) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_i_cuda_csrg_scal + end interface + + interface + subroutine psb_i_cuda_csrg_scals(d,a,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function i_cuda_csrg_sizeof(a) result(res) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_ip * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function i_cuda_csrg_sizeof + + function i_cuda_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function i_cuda_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine i_cuda_csrg_set_host(a) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine i_cuda_csrg_set_host + + subroutine i_cuda_csrg_set_dev(a) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine i_cuda_csrg_set_dev + + subroutine i_cuda_csrg_set_sync(a) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine i_cuda_csrg_set_sync + + function i_cuda_csrg_is_dev(a) result(res) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function i_cuda_csrg_is_dev + + function i_cuda_csrg_is_host(a) result(res) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function i_cuda_csrg_is_host + + function i_cuda_csrg_is_sync(a) result(res) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function i_cuda_csrg_is_sync + + + subroutine i_cuda_csrg_sync(a) + implicit none + class(psb_i_cuda_csrg_sparse_mat), target, intent(in) :: a + class(psb_i_cuda_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine i_cuda_csrg_sync + + subroutine i_cuda_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_i_csr_sparse_mat%free() + + return + + end subroutine i_cuda_csrg_free + + subroutine i_cuda_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine i_cuda_csrg_finalize + +#else + interface + subroutine psb_i_cuda_csrg_mold(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_csrg_mold + end interface + +#endif + +end module psb_i_cuda_csrg_mat_mod diff --git a/cuda/psb_i_diag_mat_mod.F90 b/cuda/psb_i_cuda_diag_mat_mod.F90 similarity index 52% rename from cuda/psb_i_diag_mat_mod.F90 rename to cuda/psb_i_cuda_diag_mat_mod.F90 index b54ee8d5..94a3cc3e 100644 --- a/cuda/psb_i_diag_mat_mod.F90 +++ b/cuda/psb_i_cuda_diag_mat_mod.F90 @@ -30,13 +30,13 @@ ! -module psb_i_diag_mat_mod +module psb_i_cuda_diag_mat_mod use iso_c_binding use psb_base_mod use psb_i_dia_mat_mod - type, extends(psb_i_dia_sparse_mat) :: psb_i_diag_sparse_mat + type, extends(psb_i_dia_sparse_mat) :: psb_i_cuda_diag_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -48,170 +48,170 @@ module psb_i_diag_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => i_diag_get_fmt - procedure, pass(a) :: sizeof => i_diag_sizeof - procedure, pass(a) :: vect_mv => psb_i_diag_vect_mv -! procedure, pass(a) :: csmm => psb_i_diag_csmm - procedure, pass(a) :: csmv => psb_i_diag_csmv -! procedure, pass(a) :: in_vect_sv => psb_i_diag_inner_vect_sv -! procedure, pass(a) :: scals => psb_i_diag_scals -! procedure, pass(a) :: scalv => psb_i_diag_scal -! procedure, pass(a) :: reallocate_nz => psb_i_diag_reallocate_nz -! procedure, pass(a) :: allocate_mnnz => psb_i_diag_allocate_mnnz + procedure, nopass :: get_fmt => i_cuda_diag_get_fmt + procedure, pass(a) :: sizeof => i_cuda_diag_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_diag_vect_mv +! procedure, pass(a) :: csmm => psb_i_cuda_diag_csmm + procedure, pass(a) :: csmv => psb_i_cuda_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_i_cuda_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_i_cuda_diag_scals +! procedure, pass(a) :: scalv => psb_i_cuda_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_i_cuda_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_i_cuda_diag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_i_cp_diag_from_coo -! procedure, pass(a) :: cp_from_fmt => psb_i_cp_diag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_i_mv_diag_from_coo -! procedure, pass(a) :: mv_from_fmt => psb_i_mv_diag_from_fmt - procedure, pass(a) :: free => i_diag_free - procedure, pass(a) :: mold => psb_i_diag_mold - procedure, pass(a) :: to_gpu => psb_i_diag_to_gpu - final :: i_diag_finalize + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_diag_from_fmt + procedure, pass(a) :: free => i_cuda_diag_free + procedure, pass(a) :: mold => psb_i_cuda_diag_mold + procedure, pass(a) :: to_gpu => psb_i_cuda_diag_to_gpu + final :: i_cuda_diag_finalize #else contains - procedure, pass(a) :: mold => psb_i_diag_mold + procedure, pass(a) :: mold => psb_i_cuda_diag_mold #endif - end type psb_i_diag_sparse_mat + end type psb_i_cuda_diag_sparse_mat #ifdef HAVE_SPGPU - private :: i_diag_get_nzeros, i_diag_free, i_diag_get_fmt, & - & i_diag_get_size, i_diag_sizeof, i_diag_get_nz_row + private :: i_cuda_diag_get_nzeros, i_cuda_diag_free, i_cuda_diag_get_fmt, & + & i_cuda_diag_get_size, i_cuda_diag_sizeof, i_cuda_diag_get_nz_row interface - subroutine psb_i_diag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_i_diag_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x class(psb_i_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_diag_vect_mv + end subroutine psb_i_cuda_diag_vect_mv end interface interface - subroutine psb_i_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_i_diag_sparse_mat, psb_ipk_, psb_i_base_vect_type - class(psb_i_diag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_i_base_vect_type + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_diag_inner_vect_sv + end subroutine psb_i_cuda_diag_inner_vect_sv end interface interface - subroutine psb_i_diag_reallocate_nz(nz,a) - import :: psb_i_diag_sparse_mat, psb_ipk_ + subroutine psb_i_cuda_diag_reallocate_nz(nz,a) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_i_diag_sparse_mat), intent(inout) :: a - end subroutine psb_i_diag_reallocate_nz + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_diag_reallocate_nz end interface interface - subroutine psb_i_diag_allocate_mnnz(m,n,a,nz) - import :: psb_i_diag_sparse_mat, psb_ipk_ + subroutine psb_i_cuda_diag_allocate_mnnz(m,n,a,nz) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_i_diag_sparse_mat), intent(inout) :: a + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_i_diag_allocate_mnnz + end subroutine psb_i_cuda_diag_allocate_mnnz end interface interface - subroutine psb_i_diag_mold(a,b,info) - import :: psb_i_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_diag_mold(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_diag_mold + end subroutine psb_i_cuda_diag_mold end interface interface - subroutine psb_i_diag_to_gpu(a,info, nzrm) - import :: psb_i_diag_sparse_mat, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_diag_to_gpu(a,info, nzrm) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_i_diag_to_gpu + end subroutine psb_i_cuda_diag_to_gpu end interface interface - subroutine psb_i_cp_diag_from_coo(a,b,info) - import :: psb_i_diag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_diag_from_coo(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_diag_from_coo + end subroutine psb_i_cuda_cp_diag_from_coo end interface interface - subroutine psb_i_cp_diag_from_fmt(a,b,info) - import :: psb_i_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_diag_from_fmt(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_diag_from_fmt + end subroutine psb_i_cuda_cp_diag_from_fmt end interface interface - subroutine psb_i_mv_diag_from_coo(a,b,info) - import :: psb_i_diag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_diag_from_coo(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_diag_from_coo + end subroutine psb_i_cuda_mv_diag_from_coo end interface interface - subroutine psb_i_mv_diag_from_fmt(a,b,info) - import :: psb_i_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_diag_from_fmt(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_diag_from_fmt + end subroutine psb_i_cuda_mv_diag_from_fmt end interface interface - subroutine psb_i_diag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_i_diag_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:) integer(psb_ipk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_diag_csmv + end subroutine psb_i_cuda_diag_csmv end interface interface - subroutine psb_i_diag_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_i_diag_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) integer(psb_ipk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_diag_csmm + end subroutine psb_i_cuda_diag_csmm end interface interface - subroutine psb_i_diag_scal(d,a,info, side) - import :: psb_i_diag_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_diag_scal(d,a,info, side) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_i_diag_scal + end subroutine psb_i_cuda_diag_scal end interface interface - subroutine psb_i_diag_scals(d,a,info) - import :: psb_i_diag_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_diag_scals(d,a,info) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_diag_scals + end subroutine psb_i_cuda_diag_scals end interface @@ -230,9 +230,9 @@ contains ! == =================================== - function i_diag_sizeof(a) result(res) + function i_cuda_diag_sizeof(a) result(res) implicit none - class(psb_i_diag_sparse_mat), intent(in) :: a + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function i_diag_sizeof + end function i_cuda_diag_sizeof - function i_diag_get_fmt() result(res) + function i_cuda_diag_get_fmt() result(res) implicit none character(len=5) :: res res = 'DIAG' - end function i_diag_get_fmt + end function i_cuda_diag_get_fmt @@ -265,11 +265,11 @@ contains ! ! == =================================== - subroutine i_diag_free(a) + subroutine i_cuda_diag_free(a) use diagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_i_diag_sparse_mat), intent(inout) :: a + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) @@ -278,31 +278,31 @@ contains return - end subroutine i_diag_free + end subroutine i_cuda_diag_free - subroutine i_diag_finalize(a) + subroutine i_cuda_diag_finalize(a) use diagdev_mod implicit none - type(psb_i_diag_sparse_mat), intent(inout) :: a + type(psb_i_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine i_diag_finalize + end subroutine i_cuda_diag_finalize #else interface - subroutine psb_i_diag_mold(a,b,info) - import :: psb_i_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_diag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_diag_mold(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_diag_mold + end subroutine psb_i_cuda_diag_mold end interface #endif -end module psb_i_diag_mat_mod +end module psb_i_cuda_diag_mat_mod diff --git a/cuda/psb_i_dnsg_mat_mod.F90 b/cuda/psb_i_cuda_dnsg_mat_mod.F90 similarity index 51% rename from cuda/psb_i_dnsg_mat_mod.F90 rename to cuda/psb_i_cuda_dnsg_mat_mod.F90 index 978996ae..f357977e 100644 --- a/cuda/psb_i_dnsg_mat_mod.F90 +++ b/cuda/psb_i_cuda_dnsg_mat_mod.F90 @@ -30,14 +30,14 @@ ! -module psb_i_dnsg_mat_mod +module psb_i_cuda_dnsg_mat_mod use iso_c_binding use psb_i_mat_mod use psb_i_dns_mat_mod use dnsdev_mod - type, extends(psb_i_dns_sparse_mat) :: psb_i_dnsg_sparse_mat + type, extends(psb_i_dns_sparse_mat) :: psb_i_cuda_dnsg_sparse_mat ! ! ITPACK/DNS format, extended. ! We are adding here the routines to create a copy of the data @@ -49,169 +49,169 @@ module psb_i_dnsg_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => i_dnsg_get_fmt - ! procedure, pass(a) :: sizeof => i_dnsg_sizeof - procedure, pass(a) :: vect_mv => psb_i_dnsg_vect_mv -!!$ procedure, pass(a) :: csmm => psb_i_dnsg_csmm -!!$ procedure, pass(a) :: csmv => psb_i_dnsg_csmv -!!$ procedure, pass(a) :: in_vect_sv => psb_i_dnsg_inner_vect_sv -!!$ procedure, pass(a) :: scals => psb_i_dnsg_scals -!!$ procedure, pass(a) :: scalv => psb_i_dnsg_scal -!!$ procedure, pass(a) :: reallocate_nz => psb_i_dnsg_reallocate_nz -!!$ procedure, pass(a) :: allocate_mnnz => psb_i_dnsg_allocate_mnnz + procedure, nopass :: get_fmt => i_cuda_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => i_cuda_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_i_cuda_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_i_cuda_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_i_cuda_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_i_cuda_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_i_cuda_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_i_cuda_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_i_cuda_dnsg_allocate_mnnz ! Note: we *do* need the TO methods, because of the need to invoke SYNC ! - procedure, pass(a) :: cp_from_coo => psb_i_cp_dnsg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_i_cp_dnsg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_i_mv_dnsg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_i_mv_dnsg_from_fmt - procedure, pass(a) :: free => i_dnsg_free - procedure, pass(a) :: mold => psb_i_dnsg_mold - procedure, pass(a) :: to_gpu => psb_i_dnsg_to_gpu - final :: i_dnsg_finalize + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_dnsg_from_fmt + procedure, pass(a) :: free => i_cuda_dnsg_free + procedure, pass(a) :: mold => psb_i_cuda_dnsg_mold + procedure, pass(a) :: to_gpu => psb_i_cuda_dnsg_to_gpu + final :: i_cuda_dnsg_finalize #else contains - procedure, pass(a) :: mold => psb_i_dnsg_mold + procedure, pass(a) :: mold => psb_i_cuda_dnsg_mold #endif - end type psb_i_dnsg_sparse_mat + end type psb_i_cuda_dnsg_sparse_mat #ifdef HAVE_SPGPU - private :: i_dnsg_get_nzeros, i_dnsg_free, i_dnsg_get_fmt, & - & i_dnsg_get_size, i_dnsg_get_nz_row + private :: i_cuda_dnsg_get_nzeros, i_cuda_dnsg_free, i_cuda_dnsg_get_fmt, & + & i_cuda_dnsg_get_size, i_cuda_dnsg_get_nz_row interface - subroutine psb_i_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_i_dnsg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_dnsg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x class(psb_i_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_dnsg_vect_mv + end subroutine psb_i_cuda_dnsg_vect_mv end interface !!$ !!$ interface -!!$ subroutine psb_i_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_i_dnsg_sparse_mat, psb_ipk_, psb_i_base_vect_type -!!$ class(psb_i_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_i_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_i_base_vect_type +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a !!$ integer(psb_ipk_), intent(in) :: alpha, beta !!$ class(psb_i_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_i_dnsg_inner_vect_sv +!!$ end subroutine psb_i_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_i_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_i_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_i_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_i_dnsg_reallocate_nz +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_i_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_i_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_i_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_i_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_i_dnsg_allocate_mnnz +!!$ end subroutine psb_i_cuda_dnsg_allocate_mnnz !!$ end interface interface - subroutine psb_i_dnsg_mold(a,b,info) - import :: psb_i_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_dnsg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_dnsg_mold(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_dnsg_mold + end subroutine psb_i_cuda_dnsg_mold end interface interface - subroutine psb_i_dnsg_to_gpu(a,info) - import :: psb_i_dnsg_sparse_mat, psb_ipk_ - class(psb_i_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_dnsg_to_gpu(a,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_dnsg_to_gpu + end subroutine psb_i_cuda_dnsg_to_gpu end interface interface - subroutine psb_i_cp_dnsg_from_coo(a,b,info) - import :: psb_i_dnsg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_dnsg_from_coo(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_dnsg_from_coo + end subroutine psb_i_cuda_cp_dnsg_from_coo end interface interface - subroutine psb_i_cp_dnsg_from_fmt(a,b,info) - import :: psb_i_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_dnsg_from_fmt(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_dnsg_from_fmt + end subroutine psb_i_cuda_cp_dnsg_from_fmt end interface interface - subroutine psb_i_mv_dnsg_from_coo(a,b,info) - import :: psb_i_dnsg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_dnsg_from_coo(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_dnsg_from_coo + end subroutine psb_i_cuda_mv_dnsg_from_coo end interface interface - subroutine psb_i_mv_dnsg_from_fmt(a,b,info) - import :: psb_i_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_dnsg_from_fmt(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_dnsg_from_fmt + end subroutine psb_i_cuda_mv_dnsg_from_fmt end interface !!$ interface -!!$ subroutine psb_i_dnsg_csmv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_, psb_ipk_ -!!$ class(psb_i_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_i_cuda_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a !!$ integer(psb_ipk_), intent(in) :: alpha, beta, x(:) !!$ integer(psb_ipk_), intent(inout) :: y(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_i_dnsg_csmv +!!$ end subroutine psb_i_cuda_dnsg_csmv !!$ end interface !!$ interface -!!$ subroutine psb_i_dnsg_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_, psb_ipk_ -!!$ class(psb_i_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_i_cuda_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a !!$ integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) !!$ integer(psb_ipk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_i_dnsg_csmm +!!$ end subroutine psb_i_cuda_dnsg_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_i_dnsg_scal(d,a,info, side) -!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_, psb_ipk_ -!!$ class(psb_i_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_i_cuda_dnsg_scal(d,a,info, side) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_i_dnsg_scal +!!$ end subroutine psb_i_cuda_dnsg_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_i_dnsg_scals(d,a,info) -!!$ import :: psb_i_dnsg_sparse_mat, psb_ipk_, psb_ipk_ -!!$ class(psb_i_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_i_cuda_dnsg_scals(d,a,info) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_i_dnsg_scals +!!$ end subroutine psb_i_cuda_dnsg_scals !!$ end interface !!$ @@ -231,11 +231,11 @@ contains - function i_dnsg_get_fmt() result(res) + function i_cuda_dnsg_get_fmt() result(res) implicit none character(len=5) :: res res = 'DNSG' - end function i_dnsg_get_fmt + end function i_cuda_dnsg_get_fmt @@ -251,11 +251,11 @@ contains ! ! == =================================== - subroutine i_dnsg_free(a) + subroutine i_cuda_dnsg_free(a) use dnsdev_mod implicit none integer(psb_ipk_) :: info - class(psb_i_dnsg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) @@ -264,31 +264,31 @@ contains return - end subroutine i_dnsg_free + end subroutine i_cuda_dnsg_free - subroutine i_dnsg_finalize(a) + subroutine i_cuda_dnsg_finalize(a) use dnsdev_mod implicit none - type(psb_i_dnsg_sparse_mat), intent(inout) :: a + type(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine i_dnsg_finalize + end subroutine i_cuda_dnsg_finalize #else interface - subroutine psb_i_dnsg_mold(a,b,info) - import :: psb_i_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_dnsg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_dnsg_mold(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_dnsg_mold + end subroutine psb_i_cuda_dnsg_mold end interface #endif -end module psb_i_dnsg_mat_mod +end module psb_i_cuda_dnsg_mat_mod diff --git a/cuda/psb_i_elg_mat_mod.F90 b/cuda/psb_i_cuda_elg_mat_mod.F90 similarity index 50% rename from cuda/psb_i_elg_mat_mod.F90 rename to cuda/psb_i_cuda_elg_mat_mod.F90 index a421e611..aa3e2d4d 100644 --- a/cuda/psb_i_elg_mat_mod.F90 +++ b/cuda/psb_i_cuda_elg_mat_mod.F90 @@ -30,18 +30,18 @@ ! -module psb_i_elg_mat_mod +module psb_i_cuda_elg_mat_mod use iso_c_binding use psb_i_mat_mod use psb_i_ell_mat_mod - use psb_i_gpu_vect_mod + use psb_i_cuda_vect_mod integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_i_ell_sparse_mat) :: psb_i_elg_sparse_mat + type, extends(psb_i_ell_sparse_mat) :: psb_i_cuda_elg_sparse_mat ! ! ITPACK/ELL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,221 +54,221 @@ module psb_i_elg_mat_mod integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => i_elg_get_fmt - procedure, pass(a) :: sizeof => i_elg_sizeof - procedure, pass(a) :: vect_mv => psb_i_elg_vect_mv - procedure, pass(a) :: csmm => psb_i_elg_csmm - procedure, pass(a) :: csmv => psb_i_elg_csmv - procedure, pass(a) :: in_vect_sv => psb_i_elg_inner_vect_sv - procedure, pass(a) :: scals => psb_i_elg_scals - procedure, pass(a) :: scalv => psb_i_elg_scal - procedure, pass(a) :: reallocate_nz => psb_i_elg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_i_elg_allocate_mnnz - procedure, pass(a) :: reinit => i_elg_reinit + procedure, nopass :: get_fmt => i_cuda_elg_get_fmt + procedure, pass(a) :: sizeof => i_cuda_elg_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_elg_vect_mv + procedure, pass(a) :: csmm => psb_i_cuda_elg_csmm + procedure, pass(a) :: csmv => psb_i_cuda_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_i_cuda_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_i_cuda_elg_scals + procedure, pass(a) :: scalv => psb_i_cuda_elg_scal + procedure, pass(a) :: reallocate_nz => psb_i_cuda_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_cuda_elg_allocate_mnnz + procedure, pass(a) :: reinit => i_cuda_elg_reinit ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_i_cp_elg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_i_cp_elg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_i_mv_elg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_i_mv_elg_from_fmt - procedure, pass(a) :: free => i_elg_free - procedure, pass(a) :: mold => psb_i_elg_mold - procedure, pass(a) :: csput_a => psb_i_elg_csput_a - procedure, pass(a) :: csput_v => psb_i_elg_csput_v - procedure, pass(a) :: is_host => i_elg_is_host - procedure, pass(a) :: is_dev => i_elg_is_dev - procedure, pass(a) :: is_sync => i_elg_is_sync - procedure, pass(a) :: set_host => i_elg_set_host - procedure, pass(a) :: set_dev => i_elg_set_dev - procedure, pass(a) :: set_sync => i_elg_set_sync - procedure, pass(a) :: sync => i_elg_sync - procedure, pass(a) :: from_gpu => psb_i_elg_from_gpu - procedure, pass(a) :: to_gpu => psb_i_elg_to_gpu - procedure, pass(a) :: asb => psb_i_elg_asb - final :: i_elg_finalize + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_elg_from_fmt + procedure, pass(a) :: free => i_cuda_elg_free + procedure, pass(a) :: mold => psb_i_cuda_elg_mold + procedure, pass(a) :: csput_a => psb_i_cuda_elg_csput_a + procedure, pass(a) :: csput_v => psb_i_cuda_elg_csput_v + procedure, pass(a) :: is_host => i_cuda_elg_is_host + procedure, pass(a) :: is_dev => i_cuda_elg_is_dev + procedure, pass(a) :: is_sync => i_cuda_elg_is_sync + procedure, pass(a) :: set_host => i_cuda_elg_set_host + procedure, pass(a) :: set_dev => i_cuda_elg_set_dev + procedure, pass(a) :: set_sync => i_cuda_elg_set_sync + procedure, pass(a) :: sync => i_cuda_elg_sync + procedure, pass(a) :: from_gpu => psb_i_cuda_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_i_cuda_elg_to_gpu + procedure, pass(a) :: asb => psb_i_cuda_elg_asb + final :: i_cuda_elg_finalize #else contains - procedure, pass(a) :: mold => psb_i_elg_mold - procedure, pass(a) :: asb => psb_i_elg_asb + procedure, pass(a) :: mold => psb_i_cuda_elg_mold + procedure, pass(a) :: asb => psb_i_cuda_elg_asb #endif - end type psb_i_elg_sparse_mat + end type psb_i_cuda_elg_sparse_mat #ifdef HAVE_SPGPU - private :: i_elg_get_nzeros, i_elg_free, i_elg_get_fmt, & - & i_elg_get_size, i_elg_sizeof, i_elg_get_nz_row, i_elg_sync + private :: i_cuda_elg_get_nzeros, i_cuda_elg_free, i_cuda_elg_get_fmt, & + & i_cuda_elg_get_size, i_cuda_elg_sizeof, i_cuda_elg_get_nz_row, i_cuda_elg_sync interface - subroutine psb_i_elg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_i_elg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x class(psb_i_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_elg_vect_mv + end subroutine psb_i_cuda_elg_vect_mv end interface interface - subroutine psb_i_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_i_elg_sparse_mat, psb_ipk_, psb_i_base_vect_type - class(psb_i_elg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_i_base_vect_type + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_elg_inner_vect_sv + end subroutine psb_i_cuda_elg_inner_vect_sv end interface interface - subroutine psb_i_elg_reallocate_nz(nz,a) - import :: psb_i_elg_sparse_mat, psb_ipk_ + subroutine psb_i_cuda_elg_reallocate_nz(nz,a) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_i_elg_sparse_mat), intent(inout) :: a - end subroutine psb_i_elg_reallocate_nz + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_elg_reallocate_nz end interface interface - subroutine psb_i_elg_allocate_mnnz(m,n,a,nz) - import :: psb_i_elg_sparse_mat, psb_ipk_ + subroutine psb_i_cuda_elg_allocate_mnnz(m,n,a,nz) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_i_elg_allocate_mnnz + end subroutine psb_i_cuda_elg_allocate_mnnz end interface interface - subroutine psb_i_elg_mold(a,b,info) - import :: psb_i_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_elg_mold(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_elg_mold + end subroutine psb_i_cuda_elg_mold end interface interface - subroutine psb_i_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_i_elg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& & imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_elg_csput_a + end subroutine psb_i_cuda_elg_csput_a end interface interface - subroutine psb_i_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_i_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_i_base_vect_type,& + subroutine psb_i_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_i_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_i_base_vect_type,& & psb_i_base_vect_type - class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a class(psb_i_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_elg_csput_v + end subroutine psb_i_cuda_elg_csput_v end interface interface - subroutine psb_i_elg_from_gpu(a,info) - import :: psb_i_elg_sparse_mat, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_elg_from_gpu(a,info) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_elg_from_gpu + end subroutine psb_i_cuda_elg_from_gpu end interface interface - subroutine psb_i_elg_to_gpu(a,info, nzrm) - import :: psb_i_elg_sparse_mat, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_elg_to_gpu(a,info, nzrm) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_i_elg_to_gpu + end subroutine psb_i_cuda_elg_to_gpu end interface interface - subroutine psb_i_cp_elg_from_coo(a,b,info) - import :: psb_i_elg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_elg_from_coo(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_elg_from_coo + end subroutine psb_i_cuda_cp_elg_from_coo end interface interface - subroutine psb_i_cp_elg_from_fmt(a,b,info) - import :: psb_i_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_elg_from_fmt(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_elg_from_fmt + end subroutine psb_i_cuda_cp_elg_from_fmt end interface interface - subroutine psb_i_mv_elg_from_coo(a,b,info) - import :: psb_i_elg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_elg_from_coo(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_elg_from_coo + end subroutine psb_i_cuda_mv_elg_from_coo end interface interface - subroutine psb_i_mv_elg_from_fmt(a,b,info) - import :: psb_i_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_elg_from_fmt(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_elg_from_fmt + end subroutine psb_i_cuda_mv_elg_from_fmt end interface interface - subroutine psb_i_elg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_i_elg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:) integer(psb_ipk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_elg_csmv + end subroutine psb_i_cuda_elg_csmv end interface interface - subroutine psb_i_elg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_i_elg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) integer(psb_ipk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_elg_csmm + end subroutine psb_i_cuda_elg_csmm end interface interface - subroutine psb_i_elg_scal(d,a,info, side) - import :: psb_i_elg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_elg_scal(d,a,info, side) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_i_elg_scal + end subroutine psb_i_cuda_elg_scal end interface interface - subroutine psb_i_elg_scals(d,a,info) - import :: psb_i_elg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_elg_scals(d,a,info) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_elg_scals + end subroutine psb_i_cuda_elg_scals end interface interface - subroutine psb_i_elg_asb(a) - import :: psb_i_elg_sparse_mat - class(psb_i_elg_sparse_mat), intent(inout) :: a - end subroutine psb_i_elg_asb + subroutine psb_i_cuda_elg_asb(a) + import :: psb_i_cuda_elg_sparse_mat + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_elg_asb end interface @@ -287,9 +287,9 @@ contains ! == =================================== - function i_elg_sizeof(a) result(res) + function i_cuda_elg_sizeof(a) result(res) implicit none - class(psb_i_elg_sparse_mat), intent(in) :: a + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res if (a%is_dev()) call a%sync() @@ -302,13 +302,13 @@ contains ! on the GPU device side? ! res = 2*res - end function i_elg_sizeof + end function i_cuda_elg_sizeof - function i_elg_get_fmt() result(res) + function i_cuda_elg_get_fmt() result(res) implicit none character(len=5) :: res res = 'ELG' - end function i_elg_get_fmt + end function i_cuda_elg_get_fmt @@ -323,12 +323,12 @@ contains ! ! ! == =================================== - subroutine i_elg_reinit(a,clear) + subroutine i_cuda_elg_reinit(a,clear) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a logical, intent(in), optional :: clear integer(psb_ipk_) :: isz, err_act character(len=20) :: name='reinit' @@ -367,14 +367,14 @@ contains 9999 call psb_error_handler(err_act) return - end subroutine i_elg_reinit + end subroutine i_cuda_elg_reinit - subroutine i_elg_free(a) + subroutine i_cuda_elg_free(a) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) @@ -384,12 +384,12 @@ contains return - end subroutine i_elg_free + end subroutine i_cuda_elg_free - subroutine i_elg_sync(a) + subroutine i_cuda_elg_sync(a) implicit none - class(psb_i_elg_sparse_mat), target, intent(in) :: a - class(psb_i_elg_sparse_mat), pointer :: tmpa + class(psb_i_cuda_elg_sparse_mat), target, intent(in) :: a + class(psb_i_cuda_elg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -401,83 +401,83 @@ contains call tmpa%set_sync() return - end subroutine i_elg_sync + end subroutine i_cuda_elg_sync - subroutine i_elg_set_host(a) + subroutine i_cuda_elg_set_host(a) implicit none - class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine i_elg_set_host + end subroutine i_cuda_elg_set_host - subroutine i_elg_set_dev(a) + subroutine i_cuda_elg_set_dev(a) implicit none - class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine i_elg_set_dev + end subroutine i_cuda_elg_set_dev - subroutine i_elg_set_sync(a) + subroutine i_cuda_elg_set_sync(a) implicit none - class(psb_i_elg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine i_elg_set_sync + end subroutine i_cuda_elg_set_sync - function i_elg_is_dev(a) result(res) + function i_cuda_elg_is_dev(a) result(res) implicit none - class(psb_i_elg_sparse_mat), intent(in) :: a + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function i_elg_is_dev + end function i_cuda_elg_is_dev - function i_elg_is_host(a) result(res) + function i_cuda_elg_is_host(a) result(res) implicit none - class(psb_i_elg_sparse_mat), intent(in) :: a + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function i_elg_is_host + end function i_cuda_elg_is_host - function i_elg_is_sync(a) result(res) + function i_cuda_elg_is_sync(a) result(res) implicit none - class(psb_i_elg_sparse_mat), intent(in) :: a + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function i_elg_is_sync + end function i_cuda_elg_is_sync - subroutine i_elg_finalize(a) + subroutine i_cuda_elg_finalize(a) use elldev_mod implicit none - type(psb_i_elg_sparse_mat), intent(inout) :: a + type(psb_i_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine i_elg_finalize + end subroutine i_cuda_elg_finalize #else interface - subroutine psb_i_elg_asb(a) - import :: psb_i_elg_sparse_mat - class(psb_i_elg_sparse_mat), intent(inout) :: a - end subroutine psb_i_elg_asb + subroutine psb_i_cuda_elg_asb(a) + import :: psb_i_cuda_elg_sparse_mat + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_elg_asb end interface interface - subroutine psb_i_elg_mold(a,b,info) - import :: psb_i_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_elg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_elg_mold(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_elg_mold + end subroutine psb_i_cuda_elg_mold end interface #endif -end module psb_i_elg_mat_mod +end module psb_i_cuda_elg_mat_mod diff --git a/cuda/psb_i_hdiag_mat_mod.F90 b/cuda/psb_i_cuda_hdiag_mat_mod.F90 similarity index 50% rename from cuda/psb_i_hdiag_mat_mod.F90 rename to cuda/psb_i_cuda_hdiag_mat_mod.F90 index a42030b8..03ff573b 100644 --- a/cuda/psb_i_hdiag_mat_mod.F90 +++ b/cuda/psb_i_cuda_hdiag_mat_mod.F90 @@ -30,182 +30,182 @@ ! -module psb_i_hdiag_mat_mod +module psb_i_cuda_hdiag_mat_mod use iso_c_binding use psb_base_mod use psb_i_hdia_mat_mod - type, extends(psb_i_hdia_sparse_mat) :: psb_i_hdiag_sparse_mat + type, extends(psb_i_hdia_sparse_mat) :: psb_i_cuda_hdiag_sparse_mat ! #ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => i_hdiag_get_fmt - ! procedure, pass(a) :: sizeof => i_hdiag_sizeof - procedure, pass(a) :: vect_mv => psb_i_hdiag_vect_mv - ! procedure, pass(a) :: csmm => psb_i_hdiag_csmm - procedure, pass(a) :: csmv => psb_i_hdiag_csmv - ! procedure, pass(a) :: in_vect_sv => psb_i_hdiag_inner_vect_sv - ! procedure, pass(a) :: scals => psb_i_hdiag_scals - ! procedure, pass(a) :: scalv => psb_i_hdiag_scal - ! procedure, pass(a) :: reallocate_nz => psb_i_hdiag_reallocate_nz - ! procedure, pass(a) :: allocate_mnnz => psb_i_hdiag_allocate_mnnz + procedure, nopass :: get_fmt => i_cuda_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => i_cuda_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_i_cuda_hdiag_csmm + procedure, pass(a) :: csmv => psb_i_cuda_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_i_cuda_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_i_cuda_hdiag_scals + ! procedure, pass(a) :: scalv => psb_i_cuda_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_i_cuda_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_i_cuda_hdiag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_i_cp_hdiag_from_coo - ! procedure, pass(a) :: cp_from_fmt => psb_i_cp_hdiag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_i_mv_hdiag_from_coo - ! procedure, pass(a) :: mv_from_fmt => psb_i_mv_hdiag_from_fmt - procedure, pass(a) :: free => i_hdiag_free - procedure, pass(a) :: mold => psb_i_hdiag_mold - procedure, pass(a) :: to_gpu => psb_i_hdiag_to_gpu - final :: i_hdiag_finalize + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_hdiag_from_fmt + procedure, pass(a) :: free => i_cuda_hdiag_free + procedure, pass(a) :: mold => psb_i_cuda_hdiag_mold + procedure, pass(a) :: to_gpu => psb_i_cuda_hdiag_to_gpu + final :: i_cuda_hdiag_finalize #else contains - procedure, pass(a) :: mold => psb_i_hdiag_mold + procedure, pass(a) :: mold => psb_i_cuda_hdiag_mold #endif - end type psb_i_hdiag_sparse_mat + end type psb_i_cuda_hdiag_sparse_mat #ifdef HAVE_SPGPU - private :: i_hdiag_get_nzeros, i_hdiag_free, i_hdiag_get_fmt, & - & i_hdiag_get_size, i_hdiag_sizeof, i_hdiag_get_nz_row + private :: i_cuda_hdiag_get_nzeros, i_cuda_hdiag_free, i_cuda_hdiag_get_fmt, & + & i_cuda_hdiag_get_size, i_cuda_hdiag_sizeof, i_cuda_hdiag_get_nz_row interface - subroutine psb_i_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_i_hdiag_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_hdiag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x class(psb_i_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hdiag_vect_mv + end subroutine psb_i_cuda_hdiag_vect_mv end interface !!$ interface -!!$ subroutine psb_i_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_i_hdiag_sparse_mat, psb_ipk_, psb_i_base_vect_type -!!$ class(psb_i_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_i_cuda_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_i_base_vect_type +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a !!$ integer(psb_ipk_), intent(in) :: alpha, beta !!$ class(psb_i_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_i_hdiag_inner_vect_sv +!!$ end subroutine psb_i_cuda_hdiag_inner_vect_sv !!$ end interface !!$ !!$ interface -!!$ subroutine psb_i_hdiag_reallocate_nz(nz,a) -!!$ import :: psb_i_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_i_cuda_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_i_hdiag_reallocate_nz +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_i_cuda_hdiag_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_i_hdiag_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_i_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_i_cuda_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_i_hdiag_allocate_mnnz +!!$ end subroutine psb_i_cuda_hdiag_allocate_mnnz !!$ end interface interface - subroutine psb_i_hdiag_mold(a,b,info) - import :: psb_i_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hdiag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hdiag_mold(a,b,info) + import :: psb_i_cuda_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hdiag_mold + end subroutine psb_i_cuda_hdiag_mold end interface interface - subroutine psb_i_hdiag_to_gpu(a,info) - import :: psb_i_hdiag_sparse_mat, psb_ipk_ - class(psb_i_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_hdiag_to_gpu(a,info) + import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hdiag_to_gpu + end subroutine psb_i_cuda_hdiag_to_gpu end interface interface - subroutine psb_i_cp_hdiag_from_coo(a,b,info) - import :: psb_i_hdiag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_hdiag_from_coo(a,b,info) + import :: psb_i_cuda_hdiag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_hdiag_from_coo + end subroutine psb_i_cuda_cp_hdiag_from_coo end interface !!$ interface -!!$ subroutine psb_i_cp_hdiag_from_fmt(a,b,info) -!!$ import :: psb_i_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ -!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_i_cuda_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_i_base_sparse_mat), intent(in) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_i_cp_hdiag_from_fmt +!!$ end subroutine psb_i_cuda_cp_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_i_mv_hdiag_from_coo(a,b,info) - import :: psb_i_hdiag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_hdiag_from_coo(a,b,info) + import :: psb_i_cuda_hdiag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_hdiag_from_coo + end subroutine psb_i_cuda_mv_hdiag_from_coo end interface !!$ !!$ interface -!!$ subroutine psb_i_mv_hdiag_from_fmt(a,b,info) -!!$ import :: psb_i_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ -!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_i_cuda_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_i_base_sparse_mat), intent(inout) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_i_mv_hdiag_from_fmt +!!$ end subroutine psb_i_cuda_mv_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_i_hdiag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_i_hdiag_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hdiag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:) integer(psb_ipk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hdiag_csmv + end subroutine psb_i_cuda_hdiag_csmv end interface !!$ interface -!!$ subroutine psb_i_hdiag_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_i_hdiag_sparse_mat, psb_ipk_, psb_ipk_ -!!$ class(psb_i_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_i_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a !!$ integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) !!$ integer(psb_ipk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_i_hdiag_csmm +!!$ end subroutine psb_i_cuda_hdiag_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_i_hdiag_scal(d,a,info, side) -!!$ import :: psb_i_hdiag_sparse_mat, psb_ipk_, psb_ipk_ -!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_i_cuda_hdiag_scal(d,a,info, side) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_i_hdiag_scal +!!$ end subroutine psb_i_cuda_hdiag_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_i_hdiag_scals(d,a,info) -!!$ import :: psb_i_hdiag_sparse_mat, psb_ipk_, psb_ipk_ -!!$ class(psb_i_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_i_cuda_hdiag_scals(d,a,info) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_i_hdiag_scals +!!$ end subroutine psb_i_cuda_hdiag_scals !!$ end interface !!$ @@ -223,11 +223,11 @@ contains ! ! == =================================== - function i_hdiag_get_fmt() result(res) + function i_cuda_hdiag_get_fmt() result(res) implicit none character(len=5) :: res res = 'HDIAG' - end function i_hdiag_get_fmt + end function i_cuda_hdiag_get_fmt @@ -243,11 +243,11 @@ contains ! ! == =================================== - subroutine i_hdiag_free(a) + subroutine i_cuda_hdiag_free(a) use hdiagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_i_hdiag_sparse_mat), intent(inout) :: a + class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -256,12 +256,12 @@ contains return - end subroutine i_hdiag_free + end subroutine i_cuda_hdiag_free - subroutine i_hdiag_finalize(a) + subroutine i_cuda_hdiag_finalize(a) use hdiagdev_mod implicit none - type(psb_i_hdiag_sparse_mat), intent(inout) :: a + type(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -269,19 +269,19 @@ contains call a%psb_i_hdia_sparse_mat%free() return - end subroutine i_hdiag_finalize + end subroutine i_cuda_hdiag_finalize #else interface - subroutine psb_i_hdiag_mold(a,b,info) - import :: psb_i_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hdiag_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hdiag_mold(a,b,info) + import :: psb_i_cuda_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hdiag_mold + end subroutine psb_i_cuda_hdiag_mold end interface #endif -end module psb_i_hdiag_mat_mod +end module psb_i_cuda_hdiag_mat_mod diff --git a/cuda/psb_i_hlg_mat_mod.F90 b/cuda/psb_i_cuda_hlg_mat_mod.F90 similarity index 50% rename from cuda/psb_i_hlg_mat_mod.F90 rename to cuda/psb_i_cuda_hlg_mat_mod.F90 index 2ec881ce..f97470d2 100644 --- a/cuda/psb_i_hlg_mat_mod.F90 +++ b/cuda/psb_i_cuda_hlg_mat_mod.F90 @@ -30,7 +30,7 @@ ! -module psb_i_hlg_mat_mod +module psb_i_cuda_hlg_mat_mod use iso_c_binding use psb_i_mat_mod @@ -41,7 +41,7 @@ module psb_i_hlg_mat_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_i_hll_sparse_mat) :: psb_i_hlg_sparse_mat + type, extends(psb_i_hll_sparse_mat) :: psb_i_cuda_hlg_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,186 +54,186 @@ module psb_i_hlg_mat_mod integer :: devstate = is_host contains - procedure, nopass :: get_fmt => i_hlg_get_fmt - procedure, pass(a) :: sizeof => i_hlg_sizeof - procedure, pass(a) :: vect_mv => psb_i_hlg_vect_mv - procedure, pass(a) :: csmm => psb_i_hlg_csmm - procedure, pass(a) :: csmv => psb_i_hlg_csmv - procedure, pass(a) :: in_vect_sv => psb_i_hlg_inner_vect_sv - procedure, pass(a) :: scals => psb_i_hlg_scals - procedure, pass(a) :: scalv => psb_i_hlg_scal - procedure, pass(a) :: reallocate_nz => psb_i_hlg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_i_hlg_allocate_mnnz + procedure, nopass :: get_fmt => i_cuda_hlg_get_fmt + procedure, pass(a) :: sizeof => i_cuda_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_hlg_vect_mv + procedure, pass(a) :: csmm => psb_i_cuda_hlg_csmm + procedure, pass(a) :: csmv => psb_i_cuda_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_i_cuda_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_i_cuda_hlg_scals + procedure, pass(a) :: scalv => psb_i_cuda_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_i_cuda_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_cuda_hlg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_i_cp_hlg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_i_cp_hlg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_i_mv_hlg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_i_mv_hlg_from_fmt - procedure, pass(a) :: free => i_hlg_free - procedure, pass(a) :: mold => psb_i_hlg_mold - procedure, pass(a) :: is_host => i_hlg_is_host - procedure, pass(a) :: is_dev => i_hlg_is_dev - procedure, pass(a) :: is_sync => i_hlg_is_sync - procedure, pass(a) :: set_host => i_hlg_set_host - procedure, pass(a) :: set_dev => i_hlg_set_dev - procedure, pass(a) :: set_sync => i_hlg_set_sync - procedure, pass(a) :: sync => i_hlg_sync - procedure, pass(a) :: from_gpu => psb_i_hlg_from_gpu - procedure, pass(a) :: to_gpu => psb_i_hlg_to_gpu - final :: i_hlg_finalize + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_hlg_from_fmt + procedure, pass(a) :: free => i_cuda_hlg_free + procedure, pass(a) :: mold => psb_i_cuda_hlg_mold + procedure, pass(a) :: is_host => i_cuda_hlg_is_host + procedure, pass(a) :: is_dev => i_cuda_hlg_is_dev + procedure, pass(a) :: is_sync => i_cuda_hlg_is_sync + procedure, pass(a) :: set_host => i_cuda_hlg_set_host + procedure, pass(a) :: set_dev => i_cuda_hlg_set_dev + procedure, pass(a) :: set_sync => i_cuda_hlg_set_sync + procedure, pass(a) :: sync => i_cuda_hlg_sync + procedure, pass(a) :: from_gpu => psb_i_cuda_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_i_cuda_hlg_to_gpu + final :: i_cuda_hlg_finalize #else contains - procedure, pass(a) :: mold => psb_i_hlg_mold + procedure, pass(a) :: mold => psb_i_cuda_hlg_mold #endif - end type psb_i_hlg_sparse_mat + end type psb_i_cuda_hlg_sparse_mat #ifdef HAVE_SPGPU - private :: i_hlg_get_nzeros, i_hlg_free, i_hlg_get_fmt, & - & i_hlg_get_size, i_hlg_sizeof, i_hlg_get_nz_row + private :: i_cuda_hlg_get_nzeros, i_cuda_hlg_free, i_cuda_hlg_get_fmt, & + & i_cuda_hlg_get_size, i_cuda_hlg_sizeof, i_cuda_hlg_get_nz_row interface - subroutine psb_i_hlg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_i_hlg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x class(psb_i_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hlg_vect_mv + end subroutine psb_i_cuda_hlg_vect_mv end interface interface - subroutine psb_i_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_i_hlg_sparse_mat, psb_ipk_, psb_i_base_vect_type - class(psb_i_hlg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_i_base_vect_type + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hlg_inner_vect_sv + end subroutine psb_i_cuda_hlg_inner_vect_sv end interface interface - subroutine psb_i_hlg_reallocate_nz(nz,a) - import :: psb_i_hlg_sparse_mat, psb_ipk_ + subroutine psb_i_cuda_hlg_reallocate_nz(nz,a) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_i_hlg_sparse_mat), intent(inout) :: a - end subroutine psb_i_hlg_reallocate_nz + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_hlg_reallocate_nz end interface interface - subroutine psb_i_hlg_allocate_mnnz(m,n,a,nz) - import :: psb_i_hlg_sparse_mat, psb_ipk_ + subroutine psb_i_cuda_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_i_hlg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_i_hlg_allocate_mnnz + end subroutine psb_i_cuda_hlg_allocate_mnnz end interface interface - subroutine psb_i_hlg_mold(a,b,info) - import :: psb_i_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hlg_mold(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hlg_mold + end subroutine psb_i_cuda_hlg_mold end interface interface - subroutine psb_i_hlg_from_gpu(a,info) - import :: psb_i_hlg_sparse_mat, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_hlg_from_gpu(a,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hlg_from_gpu + end subroutine psb_i_cuda_hlg_from_gpu end interface interface - subroutine psb_i_hlg_to_gpu(a,info, nzrm) - import :: psb_i_hlg_sparse_mat, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_hlg_to_gpu(a,info, nzrm) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_i_hlg_to_gpu + end subroutine psb_i_cuda_hlg_to_gpu end interface interface - subroutine psb_i_cp_hlg_from_coo(a,b,info) - import :: psb_i_hlg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_hlg_from_coo(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_hlg_from_coo + end subroutine psb_i_cuda_cp_hlg_from_coo end interface interface - subroutine psb_i_cp_hlg_from_fmt(a,b,info) - import :: psb_i_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_hlg_from_fmt(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_hlg_from_fmt + end subroutine psb_i_cuda_cp_hlg_from_fmt end interface interface - subroutine psb_i_mv_hlg_from_coo(a,b,info) - import :: psb_i_hlg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_hlg_from_coo(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_hlg_from_coo + end subroutine psb_i_cuda_mv_hlg_from_coo end interface interface - subroutine psb_i_mv_hlg_from_fmt(a,b,info) - import :: psb_i_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_hlg_from_fmt(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_hlg_from_fmt + end subroutine psb_i_cuda_mv_hlg_from_fmt end interface interface - subroutine psb_i_hlg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_i_hlg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:) integer(psb_ipk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hlg_csmv + end subroutine psb_i_cuda_hlg_csmv end interface interface - subroutine psb_i_hlg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_i_hlg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) integer(psb_ipk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hlg_csmm + end subroutine psb_i_cuda_hlg_csmm end interface interface - subroutine psb_i_hlg_scal(d,a,info, side) - import :: psb_i_hlg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_hlg_scal(d,a,info, side) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_i_hlg_scal + end subroutine psb_i_cuda_hlg_scal end interface interface - subroutine psb_i_hlg_scals(d,a,info) - import :: psb_i_hlg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_hlg_scals(d,a,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hlg_scals + end subroutine psb_i_cuda_hlg_scals end interface @@ -252,9 +252,9 @@ contains ! == =================================== - function i_hlg_sizeof(a) result(res) + function i_cuda_hlg_sizeof(a) result(res) implicit none - class(psb_i_hlg_sparse_mat), intent(in) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res @@ -269,13 +269,13 @@ contains ! on the GPU device side? ! res = 2*res - end function i_hlg_sizeof + end function i_cuda_hlg_sizeof - function i_hlg_get_fmt() result(res) + function i_cuda_hlg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HLG' - end function i_hlg_get_fmt + end function i_cuda_hlg_get_fmt @@ -291,11 +291,11 @@ contains ! ! == =================================== - subroutine i_hlg_free(a) + subroutine i_cuda_hlg_free(a) use hlldev_mod implicit none integer(psb_ipk_) :: info - class(psb_i_hlg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) @@ -304,13 +304,13 @@ contains return - end subroutine i_hlg_free + end subroutine i_cuda_hlg_free - subroutine i_hlg_sync(a) + subroutine i_cuda_hlg_sync(a) implicit none - class(psb_i_hlg_sparse_mat), target, intent(in) :: a - class(psb_i_hlg_sparse_mat), pointer :: tmpa + class(psb_i_cuda_hlg_sparse_mat), target, intent(in) :: a + class(psb_i_cuda_hlg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -322,77 +322,77 @@ contains call tmpa%set_sync() return - end subroutine i_hlg_sync + end subroutine i_cuda_hlg_sync - subroutine i_hlg_set_host(a) + subroutine i_cuda_hlg_set_host(a) implicit none - class(psb_i_hlg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine i_hlg_set_host + end subroutine i_cuda_hlg_set_host - subroutine i_hlg_set_dev(a) + subroutine i_cuda_hlg_set_dev(a) implicit none - class(psb_i_hlg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine i_hlg_set_dev + end subroutine i_cuda_hlg_set_dev - subroutine i_hlg_set_sync(a) + subroutine i_cuda_hlg_set_sync(a) implicit none - class(psb_i_hlg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine i_hlg_set_sync + end subroutine i_cuda_hlg_set_sync - function i_hlg_is_dev(a) result(res) + function i_cuda_hlg_is_dev(a) result(res) implicit none - class(psb_i_hlg_sparse_mat), intent(in) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function i_hlg_is_dev + end function i_cuda_hlg_is_dev - function i_hlg_is_host(a) result(res) + function i_cuda_hlg_is_host(a) result(res) implicit none - class(psb_i_hlg_sparse_mat), intent(in) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function i_hlg_is_host + end function i_cuda_hlg_is_host - function i_hlg_is_sync(a) result(res) + function i_cuda_hlg_is_sync(a) result(res) implicit none - class(psb_i_hlg_sparse_mat), intent(in) :: a + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function i_hlg_is_sync + end function i_cuda_hlg_is_sync - subroutine i_hlg_finalize(a) + subroutine i_cuda_hlg_finalize(a) use hlldev_mod implicit none - type(psb_i_hlg_sparse_mat), intent(inout) :: a + type(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine i_hlg_finalize + end subroutine i_cuda_hlg_finalize #else interface - subroutine psb_i_hlg_mold(a,b,info) - import :: psb_i_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hlg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hlg_mold(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hlg_mold + end subroutine psb_i_cuda_hlg_mold end interface #endif -end module psb_i_hlg_mat_mod +end module psb_i_cuda_hlg_mat_mod diff --git a/cuda/psb_i_hybg_mat_mod.F90 b/cuda/psb_i_cuda_hybg_mat_mod.F90 similarity index 52% rename from cuda/psb_i_hybg_mat_mod.F90 rename to cuda/psb_i_cuda_hybg_mat_mod.F90 index 388a8801..10333c24 100644 --- a/cuda/psb_i_hybg_mat_mod.F90 +++ b/cuda/psb_i_cuda_hybg_mat_mod.F90 @@ -31,13 +31,13 @@ #if CUDA_SHORT_VERSION <= 10 -module psb_i_hybg_mat_mod +module psb_i_cuda_hybg_mat_mod use iso_c_binding use psb_i_mat_mod use cusparse_mod - type, extends(psb_i_csr_sparse_mat) :: psb_i_hybg_sparse_mat + type, extends(psb_i_csr_sparse_mat) :: psb_i_cuda_hybg_sparse_mat ! ! HYBG. An interface to the cuSPARSE HYB ! On the CPU side we keep a CSR storage. @@ -49,170 +49,170 @@ module psb_i_hybg_mat_mod type(i_Hmat) :: deviceMat contains - procedure, nopass :: get_fmt => i_hybg_get_fmt - procedure, pass(a) :: sizeof => i_hybg_sizeof - procedure, pass(a) :: vect_mv => psb_i_hybg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_i_hybg_inner_vect_sv - procedure, pass(a) :: csmm => psb_i_hybg_csmm - procedure, pass(a) :: csmv => psb_i_hybg_csmv - procedure, pass(a) :: scals => psb_i_hybg_scals - procedure, pass(a) :: scalv => psb_i_hybg_scal - procedure, pass(a) :: reallocate_nz => psb_i_hybg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_i_hybg_allocate_mnnz + procedure, nopass :: get_fmt => i_cuda_hybg_get_fmt + procedure, pass(a) :: sizeof => i_cuda_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_i_cuda_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_i_cuda_hybg_csmm + procedure, pass(a) :: csmv => psb_i_cuda_hybg_csmv + procedure, pass(a) :: scals => psb_i_cuda_hybg_scals + procedure, pass(a) :: scalv => psb_i_cuda_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_i_cuda_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_cuda_hybg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_i_cp_hybg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_i_cp_hybg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_i_mv_hybg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_i_mv_hybg_from_fmt - procedure, pass(a) :: free => i_hybg_free - procedure, pass(a) :: mold => psb_i_hybg_mold - procedure, pass(a) :: to_gpu => psb_i_hybg_to_gpu - final :: i_hybg_finalize + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_hybg_from_fmt + procedure, pass(a) :: free => i_cuda_hybg_free + procedure, pass(a) :: mold => psb_i_cuda_hybg_mold + procedure, pass(a) :: to_gpu => psb_i_cuda_hybg_to_gpu + final :: i_cuda_hybg_finalize #else contains - procedure, pass(a) :: mold => psb_i_hybg_mold + procedure, pass(a) :: mold => psb_i_cuda_hybg_mold #endif - end type psb_i_hybg_sparse_mat + end type psb_i_cuda_hybg_sparse_mat #ifdef HAVE_SPGPU - private :: i_hybg_get_nzeros, i_hybg_free, i_hybg_get_fmt, & - & i_hybg_get_size, i_hybg_sizeof, i_hybg_get_nz_row + private :: i_cuda_hybg_get_nzeros, i_cuda_hybg_free, i_cuda_hybg_get_fmt, & + & i_cuda_hybg_get_size, i_cuda_hybg_sizeof, i_cuda_hybg_get_nz_row interface - subroutine psb_i_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x class(psb_i_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hybg_inner_vect_sv + end subroutine psb_i_cuda_hybg_inner_vect_sv end interface interface - subroutine psb_i_hybg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta class(psb_i_base_vect_type), intent(inout) :: x class(psb_i_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hybg_vect_mv + end subroutine psb_i_cuda_hybg_vect_mv end interface interface - subroutine psb_i_hybg_reallocate_nz(nz,a) - import :: psb_i_hybg_sparse_mat, psb_ipk_ + subroutine psb_i_cuda_hybg_reallocate_nz(nz,a) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_i_hybg_sparse_mat), intent(inout) :: a - end subroutine psb_i_hybg_reallocate_nz + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_hybg_reallocate_nz end interface interface - subroutine psb_i_hybg_allocate_mnnz(m,n,a,nz) - import :: psb_i_hybg_sparse_mat, psb_ipk_ + subroutine psb_i_cuda_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_i_hybg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_i_hybg_allocate_mnnz + end subroutine psb_i_cuda_hybg_allocate_mnnz end interface interface - subroutine psb_i_hybg_mold(a,b,info) - import :: psb_i_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hybg_mold(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hybg_mold + end subroutine psb_i_cuda_hybg_mold end interface interface - subroutine psb_i_hybg_to_gpu(a,info, nzrm) - import :: psb_i_hybg_sparse_mat, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_hybg_to_gpu(a,info, nzrm) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_i_hybg_to_gpu + end subroutine psb_i_cuda_hybg_to_gpu end interface interface - subroutine psb_i_cp_hybg_from_coo(a,b,info) - import :: psb_i_hybg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_hybg_from_coo(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_hybg_from_coo + end subroutine psb_i_cuda_cp_hybg_from_coo end interface interface - subroutine psb_i_cp_hybg_from_fmt(a,b,info) - import :: psb_i_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_cp_hybg_from_fmt(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cp_hybg_from_fmt + end subroutine psb_i_cuda_cp_hybg_from_fmt end interface interface - subroutine psb_i_mv_hybg_from_coo(a,b,info) - import :: psb_i_hybg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_hybg_from_coo(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_i_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_hybg_from_coo + end subroutine psb_i_cuda_mv_hybg_from_coo end interface interface - subroutine psb_i_mv_hybg_from_fmt(a,b,info) - import :: psb_i_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_mv_hybg_from_fmt(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_i_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_mv_hybg_from_fmt + end subroutine psb_i_cuda_mv_hybg_from_fmt end interface interface - subroutine psb_i_hybg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:) integer(psb_ipk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hybg_csmv + end subroutine psb_i_cuda_hybg_csmv end interface interface - subroutine psb_i_hybg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) integer(psb_ipk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_i_hybg_csmm + end subroutine psb_i_cuda_hybg_csmm end interface interface - subroutine psb_i_hybg_scal(d,a,info,side) - import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_hybg_scal(d,a,info,side) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_i_hybg_scal + end subroutine psb_i_cuda_hybg_scal end interface interface - subroutine psb_i_hybg_scals(d,a,info) - import :: psb_i_hybg_sparse_mat, psb_ipk_, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(inout) :: a + subroutine psb_i_cuda_hybg_scals(d,a,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hybg_scals + end subroutine psb_i_cuda_hybg_scals end interface @@ -231,9 +231,9 @@ contains ! == =================================== - function i_hybg_sizeof(a) result(res) + function i_cuda_hybg_sizeof(a) result(res) implicit none - class(psb_i_hybg_sparse_mat), intent(in) :: a + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 res = res + psb_sizeof_ip * size(a%val) @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function i_hybg_sizeof + end function i_cuda_hybg_sizeof - function i_hybg_get_fmt() result(res) + function i_cuda_hybg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HYBG' - end function i_hybg_get_fmt + end function i_cuda_hybg_get_fmt @@ -265,42 +265,42 @@ contains ! ! == =================================== - subroutine i_hybg_free(a) + subroutine i_cuda_hybg_free(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - class(psb_i_hybg_sparse_mat), intent(inout) :: a + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) call a%psb_i_csr_sparse_mat%free() return - end subroutine i_hybg_free + end subroutine i_cuda_hybg_free - subroutine i_hybg_finalize(a) + subroutine i_cuda_hybg_finalize(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - type(psb_i_hybg_sparse_mat), intent(inout) :: a + type(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) return - end subroutine i_hybg_finalize + end subroutine i_cuda_hybg_finalize #else interface - subroutine psb_i_hybg_mold(a,b,info) - import :: psb_i_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_hybg_sparse_mat), intent(in) :: a + subroutine psb_i_cuda_hybg_mold(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a class(psb_i_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_hybg_mold + end subroutine psb_i_cuda_hybg_mold end interface #endif -end module psb_i_hybg_mat_mod +end module psb_i_cuda_hybg_mat_mod #endif diff --git a/cuda/psb_i_gpu_vect_mod.F90 b/cuda/psb_i_cuda_vect_mod.F90 similarity index 72% rename from cuda/psb_i_gpu_vect_mod.F90 rename to cuda/psb_i_cuda_vect_mod.F90 index ca4950a0..8d940513 100644 --- a/cuda/psb_i_gpu_vect_mod.F90 +++ b/cuda/psb_i_cuda_vect_mod.F90 @@ -30,13 +30,13 @@ ! -module psb_i_gpu_vect_mod +module psb_i_cuda_vect_mod use iso_c_binding use psb_const_mod use psb_error_mod use psb_i_vect_mod #ifdef HAVE_SPGPU - use psb_gpu_env_mod + use psb_cuda_env_mod use psb_i_vectordev_mod #endif @@ -44,7 +44,7 @@ module psb_i_gpu_vect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_i_base_vect_type) :: psb_i_vect_gpu + type, extends(psb_i_base_vect_type) :: psb_i_vect_cuda #ifdef HAVE_SPGPU integer :: state = is_host type(c_ptr) :: deviceVect = c_null_ptr @@ -56,52 +56,52 @@ module psb_i_gpu_vect_mod type(c_ptr) :: i_buf = c_null_ptr integer :: i_buf_sz = 0 contains - procedure, pass(x) :: get_nrows => i_gpu_get_nrows - procedure, nopass :: get_fmt => i_gpu_get_fmt - - procedure, pass(x) :: all => i_gpu_all - procedure, pass(x) :: zero => i_gpu_zero - procedure, pass(x) :: asb_m => i_gpu_asb_m - procedure, pass(x) :: sync => i_gpu_sync - procedure, pass(x) :: sync_space => i_gpu_sync_space - procedure, pass(x) :: bld_x => i_gpu_bld_x - procedure, pass(x) :: bld_mn => i_gpu_bld_mn - procedure, pass(x) :: free => i_gpu_free - procedure, pass(x) :: ins_a => i_gpu_ins_a - procedure, pass(x) :: ins_v => i_gpu_ins_v - procedure, pass(x) :: is_host => i_gpu_is_host - procedure, pass(x) :: is_dev => i_gpu_is_dev - procedure, pass(x) :: is_sync => i_gpu_is_sync - procedure, pass(x) :: set_host => i_gpu_set_host - procedure, pass(x) :: set_dev => i_gpu_set_dev - procedure, pass(x) :: set_sync => i_gpu_set_sync - procedure, pass(x) :: set_scal => i_gpu_set_scal -!!$ procedure, pass(x) :: set_vect => i_gpu_set_vect - procedure, pass(x) :: gthzv_x => i_gpu_gthzv_x - procedure, pass(y) :: sctb => i_gpu_sctb - procedure, pass(y) :: sctb_x => i_gpu_sctb_x - procedure, pass(x) :: gthzbuf => i_gpu_gthzbuf - procedure, pass(y) :: sctb_buf => i_gpu_sctb_buf - procedure, pass(x) :: new_buffer => i_gpu_new_buffer - procedure, nopass :: device_wait => i_gpu_device_wait - procedure, pass(x) :: free_buffer => i_gpu_free_buffer - procedure, pass(x) :: maybe_free_buffer => i_gpu_maybe_free_buffer - - final :: i_gpu_vect_finalize + procedure, pass(x) :: get_nrows => i_cuda_get_nrows + procedure, nopass :: get_fmt => i_cuda_get_fmt + + procedure, pass(x) :: all => i_cuda_all + procedure, pass(x) :: zero => i_cuda_zero + procedure, pass(x) :: asb_m => i_cuda_asb_m + procedure, pass(x) :: sync => i_cuda_sync + procedure, pass(x) :: sync_space => i_cuda_sync_space + procedure, pass(x) :: bld_x => i_cuda_bld_x + procedure, pass(x) :: bld_mn => i_cuda_bld_mn + procedure, pass(x) :: free => i_cuda_free + procedure, pass(x) :: ins_a => i_cuda_ins_a + procedure, pass(x) :: ins_v => i_cuda_ins_v + procedure, pass(x) :: is_host => i_cuda_is_host + procedure, pass(x) :: is_dev => i_cuda_is_dev + procedure, pass(x) :: is_sync => i_cuda_is_sync + procedure, pass(x) :: set_host => i_cuda_set_host + procedure, pass(x) :: set_dev => i_cuda_set_dev + procedure, pass(x) :: set_sync => i_cuda_set_sync + procedure, pass(x) :: set_scal => i_cuda_set_scal +!!$ procedure, pass(x) :: set_vect => i_cuda_set_vect + procedure, pass(x) :: gthzv_x => i_cuda_gthzv_x + procedure, pass(y) :: sctb => i_cuda_sctb + procedure, pass(y) :: sctb_x => i_cuda_sctb_x + procedure, pass(x) :: gthzbuf => i_cuda_gthzbuf + procedure, pass(y) :: sctb_buf => i_cuda_sctb_buf + procedure, pass(x) :: new_buffer => i_cuda_new_buffer + procedure, nopass :: device_wait => i_cuda_device_wait + procedure, pass(x) :: free_buffer => i_cuda_free_buffer + procedure, pass(x) :: maybe_free_buffer => i_cuda_maybe_free_buffer + + final :: i_cuda_vect_finalize #endif - end type psb_i_vect_gpu + end type psb_i_vect_cuda - public :: psb_i_vect_gpu_ + public :: psb_i_vect_cuda_ private :: constructor - interface psb_i_vect_gpu_ + interface psb_i_vect_cuda_ module procedure constructor - end interface psb_i_vect_gpu_ + end interface psb_i_vect_cuda_ contains function constructor(x) result(this) integer(psb_ipk_) :: x(:) - type(psb_i_vect_gpu) :: this + type(psb_i_vect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -111,20 +111,20 @@ contains #ifdef HAVE_SPGPU - subroutine i_gpu_device_wait() + subroutine i_cuda_device_wait() call psb_cudaSync() - end subroutine i_gpu_device_wait + end subroutine i_cuda_device_wait - subroutine i_gpu_new_buffer(n,x,info) + subroutine i_cuda_new_buffer(n,x,info) use psb_realloc_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(x%combuf)) then if (size(x%combuf) idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then ! ! Only need a sync in this branch; in the others ! cudamemCpy acts as a sync point. @@ -314,14 +314,14 @@ contains end select - end subroutine i_gpu_gthzv_x + end subroutine i_cuda_gthzv_x - subroutine i_gpu_gthzbuf(i,n,idx,x) - use psb_gpu_env_mod + subroutine i_cuda_gthzbuf(i,n,idx,x) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i,n class(psb_i_base_vect_type) :: idx - class(psb_i_vect_gpu) :: x + class(psb_i_vect_cuda) :: x integer :: info, ni info = 0 @@ -332,11 +332,11 @@ contains end if select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = igathMultiVecDeviceIntVecIdx(x%deviceVect,& & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) @@ -367,14 +367,14 @@ contains end select - end subroutine i_gpu_gthzbuf + end subroutine i_cuda_gthzbuf - subroutine i_gpu_sctb(n,idx,x,beta,y) + subroutine i_cuda_sctb(n,idx,x,beta,y) implicit none !use psb_const_mod integer(psb_ipk_) :: n, idx(:) integer(psb_ipk_) :: beta, x(:) - class(psb_i_vect_gpu) :: y + class(psb_i_vect_cuda) :: y integer(psb_ipk_) :: info if (n == 0) return @@ -384,24 +384,24 @@ contains call y%psb_i_base_vect_type%sctb(n,idx,x,beta) call y%set_host() - end subroutine i_gpu_sctb + end subroutine i_cuda_sctb - subroutine i_gpu_sctb_x(i,n,idx,x,beta,y) - use psb_gpu_env_mod + subroutine i_cuda_sctb_x(i,n,idx,x,beta,y) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx integer(psb_ipk_) :: beta, x(:) - class(psb_i_vect_gpu) :: y + class(psb_i_vect_cuda) :: y integer :: info, ni select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() ! - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(y%pinned_buffer)) then if (size(y%pinned_buffer) < n) then call inner_unregister(y%pinned_buffer) @@ -489,16 +489,16 @@ contains call psb_cudaSync() call y%set_dev() - end subroutine i_gpu_sctb_x + end subroutine i_cuda_sctb_x - subroutine i_gpu_sctb_buf(i,n,idx,beta,y) + subroutine i_cuda_sctb_buf(i,n,idx,beta,y) use psi_serial_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx integer(psb_ipk_) :: beta - class(psb_i_vect_gpu) :: y + class(psb_i_vect_cuda) :: y integer(psb_ipk_) :: info, ni !!$ write(0,*) 'Starting sctb_buf' @@ -509,11 +509,11 @@ contains select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = iscatMultiVecDeviceIntVecIdx(y%deviceVect,& & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) else @@ -540,106 +540,106 @@ contains end select !!$ write(0,*) 'Done sctb_buf' - end subroutine i_gpu_sctb_buf + end subroutine i_cuda_sctb_buf - subroutine i_gpu_bld_x(x,this) + subroutine i_cuda_bld_x(x,this) use psb_base_mod integer(psb_ipk_), intent(in) :: this(:) - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call psb_realloc(size(this),x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'i_gpu_bld_x',& + call psb_errpush(info,'i_cuda_bld_x',& & i_err=(/size(this),izero,izero,izero,izero/)) end if x%v(:) = this(:) call x%set_host() call x%sync() - end subroutine i_gpu_bld_x + end subroutine i_cuda_bld_x - subroutine i_gpu_bld_mn(x,n) + subroutine i_cuda_bld_mn(x,n) integer(psb_mpk_), intent(in) :: n - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(n,info) if (info /= 0) then - call psb_errpush(info,'i_gpu_bld_n',i_err=(/n,n,n,n,n/)) + call psb_errpush(info,'i_cuda_bld_n',i_err=(/n,n,n,n,n/)) end if - end subroutine i_gpu_bld_mn + end subroutine i_cuda_bld_mn - subroutine i_gpu_set_host(x) + subroutine i_cuda_set_host(x) implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x x%state = is_host - end subroutine i_gpu_set_host + end subroutine i_cuda_set_host - subroutine i_gpu_set_dev(x) + subroutine i_cuda_set_dev(x) implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x x%state = is_dev - end subroutine i_gpu_set_dev + end subroutine i_cuda_set_dev - subroutine i_gpu_set_sync(x) + subroutine i_cuda_set_sync(x) implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x x%state = is_sync - end subroutine i_gpu_set_sync + end subroutine i_cuda_set_sync - function i_gpu_is_dev(x) result(res) + function i_cuda_is_dev(x) result(res) implicit none - class(psb_i_vect_gpu), intent(in) :: x + class(psb_i_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function i_gpu_is_dev + end function i_cuda_is_dev - function i_gpu_is_host(x) result(res) + function i_cuda_is_host(x) result(res) implicit none - class(psb_i_vect_gpu), intent(in) :: x + class(psb_i_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function i_gpu_is_host + end function i_cuda_is_host - function i_gpu_is_sync(x) result(res) + function i_cuda_is_sync(x) result(res) implicit none - class(psb_i_vect_gpu), intent(in) :: x + class(psb_i_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function i_gpu_is_sync + end function i_cuda_is_sync - function i_gpu_get_nrows(x) result(res) + function i_cuda_get_nrows(x) result(res) implicit none - class(psb_i_vect_gpu), intent(in) :: x + class(psb_i_vect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = size(x%v) - end function i_gpu_get_nrows + end function i_cuda_get_nrows - function i_gpu_get_fmt() result(res) + function i_cuda_get_fmt() result(res) implicit none character(len=5) :: res res = 'iGPU' - end function i_gpu_get_fmt + end function i_cuda_get_fmt - subroutine i_gpu_all(n, x, info) + subroutine i_cuda_all(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: n - class(psb_i_vect_gpu), intent(out) :: x + class(psb_i_vect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(n,x%v,info) @@ -647,26 +647,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'i_gpu_all',& + call psb_errpush(info,'i_cuda_all',& & i_err=(/n,n,n,n,n/)) end if - end subroutine i_gpu_all + end subroutine i_cuda_all - subroutine i_gpu_zero(x) + subroutine i_cuda_zero(x) use psi_serial_mod implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=izero call x%set_host() - end subroutine i_gpu_zero + end subroutine i_cuda_zero - subroutine i_gpu_asb_m(n, x, info) + subroutine i_cuda_asb_m(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_mpk_), intent(in) :: n - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: nd @@ -686,12 +686,12 @@ contains end if end if - end subroutine i_gpu_asb_m + end subroutine i_cuda_asb_m - subroutine i_gpu_sync_space(x,info) + subroutine i_cuda_sync_space(x,info) use psb_base_mod, only : psb_realloc implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nh, nd @@ -730,12 +730,12 @@ contains end if end if - end subroutine i_gpu_sync_space + end subroutine i_cuda_sync_space - subroutine i_gpu_sync(x) + subroutine i_cuda_sync(x) use psb_base_mod, only : psb_realloc implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -761,31 +761,31 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'i_gpu_sync') + call psb_errpush(info,'i_cuda_sync') end if - end subroutine i_gpu_sync + end subroutine i_cuda_sync - subroutine i_gpu_free(x, info) + subroutine i_cuda_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) deallocate(x%v, stat=info) if (c_associated(x%deviceVect)) then -!!$ write(0,*)'d_gpu_free Calling freeMultiVecDevice' +!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice' call freeMultiVecDevice(x%deviceVect) x%deviceVect=c_null_ptr end if call x%free_buffer(info) call x%set_sync() - end subroutine i_gpu_free + end subroutine i_cuda_free - subroutine i_gpu_set_scal(x,val,first,last) - class(psb_i_vect_gpu), intent(inout) :: x + subroutine i_cuda_set_scal(x,val,first,last) + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: val integer(psb_ipk_), optional :: first, last @@ -800,10 +800,10 @@ contains info = setScalDevice(val,first_,last_,1,x%deviceVect) call x%set_dev() - end subroutine i_gpu_set_scal + end subroutine i_cuda_set_scal !!$ -!!$ subroutine i_gpu_set_vect(x,val) -!!$ class(psb_i_vect_gpu), intent(inout) :: x +!!$ subroutine i_cuda_set_vect(x,val) +!!$ class(psb_i_vect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: val(:) !!$ integer(psb_ipk_) :: nr !!$ integer(psb_ipk_) :: info @@ -812,68 +812,68 @@ contains !!$ call x%psb_i_base_vect_type%set_vect(val) !!$ call x%set_host() !!$ -!!$ end subroutine i_gpu_set_vect +!!$ end subroutine i_cuda_set_vect - subroutine i_gpu_vect_finalize(x) + subroutine i_cuda_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_i_vect_gpu), intent(inout) :: x + type(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 call x%free(info) - end subroutine i_gpu_vect_finalize + end subroutine i_cuda_vect_finalize - subroutine i_gpu_ins_v(n,irl,val,dupl,x,info) + subroutine i_cuda_ins_v(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl class(psb_i_base_vect_type), intent(inout) :: irl class(psb_i_base_vect_type), intent(inout) :: val integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, isz - logical :: done_gpu + logical :: done_cuda info = 0 if (psb_errstatus_fatal()) return - done_gpu = .false. + done_cuda = .false. select type(virl => irl) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type(vval => val) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() info = geinsMultiVecDeviceInt(n,virl%deviceVect,& & vval%deviceVect,dupl,1,x%deviceVect) call x%set_dev() - done_gpu=.true. + done_cuda=.true. end select end select - if (.not.done_gpu) then + if (.not.done_cuda) then if (irl%is_dev()) call irl%sync() if (val%is_dev()) call val%sync() call x%ins(n,irl%v,val%v,dupl,info) end if if (info /= 0) then - call psb_errpush(info,'gpu_vect_ins') + call psb_errpush(info,'cuda_vect_ins') return end if - end subroutine i_gpu_ins_v + end subroutine i_cuda_ins_v - subroutine i_gpu_ins_a(n,irl,val,dupl,x,info) + subroutine i_cuda_ins_a(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_i_vect_gpu), intent(inout) :: x + class(psb_i_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) integer(psb_ipk_), intent(in) :: val(:) @@ -886,11 +886,11 @@ contains call x%psb_i_base_vect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine i_gpu_ins_a + end subroutine i_cuda_ins_a #endif -end module psb_i_gpu_vect_mod +end module psb_i_cuda_vect_mod ! @@ -899,7 +899,7 @@ end module psb_i_gpu_vect_mod -module psb_i_gpu_multivect_mod +module psb_i_cuda_multivect_mod use iso_c_binding use psb_const_mod use psb_error_mod @@ -914,7 +914,7 @@ module psb_i_gpu_multivect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_i_base_multivect_type) :: psb_i_multivect_gpu + type, extends(psb_i_base_multivect_type) :: psb_i_multivect_cuda #ifdef HAVE_SPGPU integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 @@ -922,48 +922,48 @@ module psb_i_gpu_multivect_mod real(c_double), allocatable :: buffer(:,:) type(c_ptr) :: dt_buf = c_null_ptr contains - procedure, pass(x) :: get_nrows => i_gpu_multi_get_nrows - procedure, pass(x) :: get_ncols => i_gpu_multi_get_ncols - procedure, nopass :: get_fmt => i_gpu_multi_get_fmt -!!$ procedure, pass(x) :: dot_v => i_gpu_multi_dot_v -!!$ procedure, pass(x) :: dot_a => i_gpu_multi_dot_a -!!$ procedure, pass(y) :: axpby_v => i_gpu_multi_axpby_v -!!$ procedure, pass(y) :: axpby_a => i_gpu_multi_axpby_a -!!$ procedure, pass(y) :: mlt_v => i_gpu_multi_mlt_v -!!$ procedure, pass(y) :: mlt_a => i_gpu_multi_mlt_a -!!$ procedure, pass(z) :: mlt_a_2 => i_gpu_multi_mlt_a_2 -!!$ procedure, pass(z) :: mlt_v_2 => i_gpu_multi_mlt_v_2 -!!$ procedure, pass(x) :: scal => i_gpu_multi_scal -!!$ procedure, pass(x) :: nrm2 => i_gpu_multi_nrm2 -!!$ procedure, pass(x) :: amax => i_gpu_multi_amax -!!$ procedure, pass(x) :: asum => i_gpu_multi_asum - procedure, pass(x) :: all => i_gpu_multi_all - procedure, pass(x) :: zero => i_gpu_multi_zero - procedure, pass(x) :: asb => i_gpu_multi_asb - procedure, pass(x) :: sync => i_gpu_multi_sync - procedure, pass(x) :: sync_space => i_gpu_multi_sync_space - procedure, pass(x) :: bld_x => i_gpu_multi_bld_x - procedure, pass(x) :: bld_n => i_gpu_multi_bld_n - procedure, pass(x) :: free => i_gpu_multi_free - procedure, pass(x) :: ins => i_gpu_multi_ins - procedure, pass(x) :: is_host => i_gpu_multi_is_host - procedure, pass(x) :: is_dev => i_gpu_multi_is_dev - procedure, pass(x) :: is_sync => i_gpu_multi_is_sync - procedure, pass(x) :: set_host => i_gpu_multi_set_host - procedure, pass(x) :: set_dev => i_gpu_multi_set_dev - procedure, pass(x) :: set_sync => i_gpu_multi_set_sync - procedure, pass(x) :: set_scal => i_gpu_multi_set_scal - procedure, pass(x) :: set_vect => i_gpu_multi_set_vect -!!$ procedure, pass(x) :: gthzv_x => i_gpu_multi_gthzv_x -!!$ procedure, pass(y) :: sctb => i_gpu_multi_sctb -!!$ procedure, pass(y) :: sctb_x => i_gpu_multi_sctb_x - final :: i_gpu_multi_vect_finalize + procedure, pass(x) :: get_nrows => i_cuda_multi_get_nrows + procedure, pass(x) :: get_ncols => i_cuda_multi_get_ncols + procedure, nopass :: get_fmt => i_cuda_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => i_cuda_multi_dot_v +!!$ procedure, pass(x) :: dot_a => i_cuda_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => i_cuda_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => i_cuda_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => i_cuda_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => i_cuda_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => i_cuda_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => i_cuda_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => i_cuda_multi_scal +!!$ procedure, pass(x) :: nrm2 => i_cuda_multi_nrm2 +!!$ procedure, pass(x) :: amax => i_cuda_multi_amax +!!$ procedure, pass(x) :: asum => i_cuda_multi_asum + procedure, pass(x) :: all => i_cuda_multi_all + procedure, pass(x) :: zero => i_cuda_multi_zero + procedure, pass(x) :: asb => i_cuda_multi_asb + procedure, pass(x) :: sync => i_cuda_multi_sync + procedure, pass(x) :: sync_space => i_cuda_multi_sync_space + procedure, pass(x) :: bld_x => i_cuda_multi_bld_x + procedure, pass(x) :: bld_n => i_cuda_multi_bld_n + procedure, pass(x) :: free => i_cuda_multi_free + procedure, pass(x) :: ins => i_cuda_multi_ins + procedure, pass(x) :: is_host => i_cuda_multi_is_host + procedure, pass(x) :: is_dev => i_cuda_multi_is_dev + procedure, pass(x) :: is_sync => i_cuda_multi_is_sync + procedure, pass(x) :: set_host => i_cuda_multi_set_host + procedure, pass(x) :: set_dev => i_cuda_multi_set_dev + procedure, pass(x) :: set_sync => i_cuda_multi_set_sync + procedure, pass(x) :: set_scal => i_cuda_multi_set_scal + procedure, pass(x) :: set_vect => i_cuda_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => i_cuda_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => i_cuda_multi_sctb +!!$ procedure, pass(y) :: sctb_x => i_cuda_multi_sctb_x + final :: i_cuda_multi_vect_finalize #endif - end type psb_i_multivect_gpu + end type psb_i_multivect_cuda - public :: psb_i_multivect_gpu + public :: psb_i_multivect_cuda private :: constructor - interface psb_i_multivect_gpu + interface psb_i_multivect_cuda module procedure constructor end interface @@ -971,7 +971,7 @@ contains function constructor(x) result(this) integer(psb_ipk_) :: x(:,:) - type(psb_i_multivect_gpu) :: this + type(psb_i_multivect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -981,15 +981,15 @@ contains #ifdef HAVE_SPGPU -!!$ subroutine i_gpu_multi_gthzv_x(i,n,idx,x,y) +!!$ subroutine i_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i,n !!$ class(psb_i_base_multivect_type) :: idx !!$ integer(psb_ipk_) :: y(:) -!!$ class(psb_i_multivect_gpu) :: x +!!$ class(psb_i_multivect_cuda) :: x !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (x%is_host()) call x%sync() !!$ @@ -1014,16 +1014,16 @@ contains !!$ end select !!$ !!$ -!!$ end subroutine i_gpu_multi_gthzv_x +!!$ end subroutine i_cuda_multi_gthzv_x !!$ !!$ !!$ -!!$ subroutine i_gpu_multi_sctb(n,idx,x,beta,y) +!!$ subroutine i_cuda_multi_sctb(n,idx,x,beta,y) !!$ implicit none !!$ !use psb_const_mod !!$ integer(psb_ipk_) :: n, idx(:) !!$ integer(psb_ipk_) :: beta, x(:) -!!$ class(psb_i_multivect_gpu) :: y +!!$ class(psb_i_multivect_cuda) :: y !!$ integer(psb_ipk_) :: info !!$ !!$ if (n == 0) return @@ -1033,17 +1033,17 @@ contains !!$ call y%psb_i_base_multivect_type%sctb(n,idx,x,beta) !!$ call y%set_host() !!$ -!!$ end subroutine i_gpu_multi_sctb +!!$ end subroutine i_cuda_multi_sctb !!$ -!!$ subroutine i_gpu_multi_sctb_x(i,n,idx,x,beta,y) +!!$ subroutine i_cuda_multi_sctb_x(i,n,idx,x,beta,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i, n !!$ class(psb_i_base_multivect_type) :: idx !!$ integer(psb_ipk_) :: beta, x(:) -!!$ class(psb_i_multivect_gpu) :: y +!!$ class(psb_i_multivect_cuda) :: y !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (y%is_host()) call y%sync() !!$ @@ -1069,13 +1069,13 @@ contains !!$ call y%sct(n,ii%v(i:),x,beta) !!$ end select !!$ -!!$ end subroutine i_gpu_multi_sctb_x +!!$ end subroutine i_cuda_multi_sctb_x - subroutine i_gpu_multi_bld_x(x,this) + subroutine i_cuda_multi_bld_x(x,this) use psb_base_mod integer(psb_ipk_), intent(in) :: this(:,:) - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info, m, n m=size(this,1) @@ -1085,101 +1085,101 @@ contains call psb_realloc(m,n,x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'i_gpu_multi_bld_x',& + call psb_errpush(info,'i_cuda_multi_bld_x',& & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) end if x%v(1:m,1:n) = this(1:m,1:n) call x%set_host() call x%sync() - end subroutine i_gpu_multi_bld_x + end subroutine i_cuda_multi_bld_x - subroutine i_gpu_multi_bld_n(x,m,n) + subroutine i_cuda_multi_bld_n(x,m,n) integer(psb_ipk_), intent(in) :: m,n - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(m,n,info) if (info /= 0) then - call psb_errpush(info,'i_gpu_multi_bld_n',i_err=(/m,n,n,n,n/)) + call psb_errpush(info,'i_cuda_multi_bld_n',i_err=(/m,n,n,n,n/)) end if - end subroutine i_gpu_multi_bld_n + end subroutine i_cuda_multi_bld_n - subroutine i_gpu_multi_set_host(x) + subroutine i_cuda_multi_set_host(x) implicit none - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x x%state = is_host - end subroutine i_gpu_multi_set_host + end subroutine i_cuda_multi_set_host - subroutine i_gpu_multi_set_dev(x) + subroutine i_cuda_multi_set_dev(x) implicit none - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x x%state = is_dev - end subroutine i_gpu_multi_set_dev + end subroutine i_cuda_multi_set_dev - subroutine i_gpu_multi_set_sync(x) + subroutine i_cuda_multi_set_sync(x) implicit none - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x x%state = is_sync - end subroutine i_gpu_multi_set_sync + end subroutine i_cuda_multi_set_sync - function i_gpu_multi_is_dev(x) result(res) + function i_cuda_multi_is_dev(x) result(res) implicit none - class(psb_i_multivect_gpu), intent(in) :: x + class(psb_i_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function i_gpu_multi_is_dev + end function i_cuda_multi_is_dev - function i_gpu_multi_is_host(x) result(res) + function i_cuda_multi_is_host(x) result(res) implicit none - class(psb_i_multivect_gpu), intent(in) :: x + class(psb_i_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function i_gpu_multi_is_host + end function i_cuda_multi_is_host - function i_gpu_multi_is_sync(x) result(res) + function i_cuda_multi_is_sync(x) result(res) implicit none - class(psb_i_multivect_gpu), intent(in) :: x + class(psb_i_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function i_gpu_multi_is_sync + end function i_cuda_multi_is_sync - function i_gpu_multi_get_nrows(x) result(res) + function i_cuda_multi_get_nrows(x) result(res) implicit none - class(psb_i_multivect_gpu), intent(in) :: x + class(psb_i_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_nrows - end function i_gpu_multi_get_nrows + end function i_cuda_multi_get_nrows - function i_gpu_multi_get_ncols(x) result(res) + function i_cuda_multi_get_ncols(x) result(res) implicit none - class(psb_i_multivect_gpu), intent(in) :: x + class(psb_i_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_ncols - end function i_gpu_multi_get_ncols + end function i_cuda_multi_get_ncols - function i_gpu_multi_get_fmt() result(res) + function i_cuda_multi_get_fmt() result(res) implicit none character(len=5) :: res res = 'iGPU' - end function i_gpu_multi_get_fmt + end function i_cuda_multi_get_fmt -!!$ function i_gpu_multi_dot_v(n,x,y) result(res) +!!$ function i_cuda_multi_dot_v(n,x,y) result(res) !!$ implicit none -!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ class(psb_i_multivect_cuda), intent(inout) :: x !!$ class(psb_i_base_multivect_type), intent(inout) :: y !!$ integer(psb_ipk_), intent(in) :: n !!$ integer(psb_ipk_) :: res @@ -1196,13 +1196,13 @@ contains !!$ type is (psb_i_base_multivect_type) !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,x%v,1,yy%v,1) -!!$ type is (psb_i_multivect_gpu) +!!$ type is (psb_i_multivect_cuda) !!$ if (x%is_host()) call x%sync() !!$ if (yy%is_host()) call yy%sync() !!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) !!$ if (info /= 0) then !!$ info = psb_err_internal_error_ -!!$ call psb_errpush(info,'i_gpu_multi_dot_v') +!!$ call psb_errpush(info,'i_cuda_multi_dot_v') !!$ end if !!$ !!$ class default @@ -1211,11 +1211,11 @@ contains !!$ res = y%dot(n,x%v) !!$ end select !!$ -!!$ end function i_gpu_multi_dot_v +!!$ end function i_cuda_multi_dot_v !!$ -!!$ function i_gpu_multi_dot_a(n,x,y) result(res) +!!$ function i_cuda_multi_dot_a(n,x,y) result(res) !!$ implicit none -!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ class(psb_i_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: y(:) !!$ integer(psb_ipk_), intent(in) :: n !!$ integer(psb_ipk_) :: res @@ -1224,14 +1224,14 @@ contains !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,y,1,x%v,1) !!$ -!!$ end function i_gpu_multi_dot_a +!!$ end function i_cuda_multi_dot_a !!$ -!!$ subroutine i_gpu_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ subroutine i_cuda_multi_axpby_v(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ class(psb_i_base_multivect_type), intent(inout) :: x -!!$ class(psb_i_multivect_gpu), intent(inout) :: y +!!$ class(psb_i_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: nx, ny @@ -1244,7 +1244,7 @@ contains !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() -!!$ type is (psb_i_multivect_gpu) +!!$ type is (psb_i_multivect_cuda) !!$ ! Do something different here !!$ if ((beta /= dzero).and.y%is_host())& !!$ & call y%sync() @@ -1263,27 +1263,27 @@ contains !!$ call y%axpby(m,alpha,x%v,beta,info) !!$ end select !!$ -!!$ end subroutine i_gpu_multi_axpby_v +!!$ end subroutine i_cuda_multi_axpby_v !!$ -!!$ subroutine i_gpu_multi_axpby_a(m,alpha, x, beta, y, info) +!!$ subroutine i_cuda_multi_axpby_a(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ integer(psb_ipk_), intent(in) :: x(:) -!!$ class(psb_i_multivect_gpu), intent(inout) :: y +!!$ class(psb_i_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ if (y%is_dev()) call y%sync() !!$ call psb_geaxpby(m,alpha,x,beta,y%v,info) !!$ call y%set_host() -!!$ end subroutine i_gpu_multi_axpby_a +!!$ end subroutine i_cuda_multi_axpby_a !!$ -!!$ subroutine i_gpu_multi_mlt_v(x, y, info) +!!$ subroutine i_cuda_multi_mlt_v(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ class(psb_i_base_multivect_type), intent(inout) :: x -!!$ class(psb_i_multivect_gpu), intent(inout) :: y +!!$ class(psb_i_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ integer(psb_ipk_) :: i, n @@ -1297,7 +1297,7 @@ contains !!$ y%v(i) = y%v(i) * xx%v(i) !!$ end do !!$ call y%set_host() -!!$ type is (psb_i_multivect_gpu) +!!$ type is (psb_i_multivect_cuda) !!$ ! Do something different here !!$ if (y%is_host()) call y%sync() !!$ if (xx%is_host()) call xx%sync() @@ -1309,13 +1309,13 @@ contains !!$ call y%set_host() !!$ end select !!$ -!!$ end subroutine i_gpu_multi_mlt_v +!!$ end subroutine i_cuda_multi_mlt_v !!$ -!!$ subroutine i_gpu_multi_mlt_a(x, y, info) +!!$ subroutine i_cuda_multi_mlt_a(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: x(:) -!!$ class(psb_i_multivect_gpu), intent(inout) :: y +!!$ class(psb_i_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1323,15 +1323,15 @@ contains !!$ call y%sync() !!$ call y%psb_i_base_multivect_type%mlt(x,info) !!$ call y%set_host() -!!$ end subroutine i_gpu_multi_mlt_a +!!$ end subroutine i_cuda_multi_mlt_a !!$ -!!$ subroutine i_gpu_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ subroutine i_cuda_multi_mlt_a_2(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: alpha,beta !!$ integer(psb_ipk_), intent(in) :: x(:) !!$ integer(psb_ipk_), intent(in) :: y(:) -!!$ class(psb_i_multivect_gpu), intent(inout) :: z +!!$ class(psb_i_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1339,16 +1339,16 @@ contains !!$ if (z%is_dev()) call z%sync() !!$ call z%psb_i_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() -!!$ end subroutine i_gpu_multi_mlt_a_2 +!!$ end subroutine i_cuda_multi_mlt_a_2 !!$ -!!$ subroutine i_gpu_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ subroutine i_cuda_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) !!$ use psi_serial_mod !!$ use psb_string_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: alpha,beta !!$ class(psb_i_base_multivect_type), intent(inout) :: x !!$ class(psb_i_base_multivect_type), intent(inout) :: y -!!$ class(psb_i_multivect_gpu), intent(inout) :: z +!!$ class(psb_i_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ character(len=1), intent(in), optional :: conjgx, conjgy !!$ integer(psb_ipk_) :: i, n @@ -1371,9 +1371,9 @@ contains !!$ ! !!$ info = 0 !!$ select type(xx => x) -!!$ type is (psb_i_multivect_gpu) +!!$ type is (psb_i_multivect_cuda) !!$ select type (yy => y) -!!$ type is (psb_i_multivect_gpu) +!!$ type is (psb_i_multivect_cuda) !!$ if (xx%is_host()) call xx%sync() !!$ if (yy%is_host()) call yy%sync() !!$ ! Z state is irrelevant: it will be done on the GPU. @@ -1393,11 +1393,11 @@ contains !!$ call z%psb_i_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() !!$ end select -!!$ end subroutine i_gpu_multi_mlt_v_2 +!!$ end subroutine i_cuda_multi_mlt_v_2 - subroutine i_gpu_multi_set_scal(x,val) - class(psb_i_multivect_gpu), intent(inout) :: x + subroutine i_cuda_multi_set_scal(x,val) + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: val integer(psb_ipk_) :: info @@ -1405,10 +1405,10 @@ contains if (x%is_dev()) call x%sync() call x%psb_i_base_multivect_type%set_scal(val) call x%set_host() - end subroutine i_gpu_multi_set_scal + end subroutine i_cuda_multi_set_scal - subroutine i_gpu_multi_set_vect(x,val) - class(psb_i_multivect_gpu), intent(inout) :: x + subroutine i_cuda_multi_set_vect(x,val) + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: val(:,:) integer(psb_ipk_) :: nr integer(psb_ipk_) :: info @@ -1417,24 +1417,24 @@ contains call x%psb_i_base_multivect_type%set_vect(val) call x%set_host() - end subroutine i_gpu_multi_set_vect + end subroutine i_cuda_multi_set_vect -!!$ subroutine i_gpu_multi_scal(alpha, x) +!!$ subroutine i_cuda_multi_scal(alpha, x) !!$ implicit none -!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ class(psb_i_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent (in) :: alpha !!$ !!$ if (x%is_dev()) call x%sync() !!$ call x%psb_i_base_multivect_type%scal(alpha) !!$ call x%set_host() -!!$ end subroutine i_gpu_multi_scal +!!$ end subroutine i_cuda_multi_scal !!$ !!$ -!!$ function i_gpu_multi_nrm2(n,x) result(res) +!!$ function i_cuda_multi_nrm2(n,x) result(res) !!$ implicit none -!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ class(psb_i_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ integer(psb_ipk_) :: res !!$ integer(psb_ipk_) :: info @@ -1442,36 +1442,36 @@ contains !!$ if (x%is_host()) call x%sync() !!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) !!$ -!!$ end function i_gpu_multi_nrm2 +!!$ end function i_cuda_multi_nrm2 !!$ -!!$ function i_gpu_multi_amax(n,x) result(res) +!!$ function i_cuda_multi_amax(n,x) result(res) !!$ implicit none -!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ class(psb_i_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ integer(psb_ipk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = maxval(abs(x%v(1:n))) !!$ -!!$ end function i_gpu_multi_amax +!!$ end function i_cuda_multi_amax !!$ -!!$ function i_gpu_multi_asum(n,x) result(res) +!!$ function i_cuda_multi_asum(n,x) result(res) !!$ implicit none -!!$ class(psb_i_multivect_gpu), intent(inout) :: x +!!$ class(psb_i_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ integer(psb_ipk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = sum(abs(x%v(1:n))) !!$ -!!$ end function i_gpu_multi_asum +!!$ end function i_cuda_multi_asum - subroutine i_gpu_multi_all(m,n, x, info) + subroutine i_cuda_multi_all(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_i_multivect_gpu), intent(out) :: x + class(psb_i_multivect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(m,n,x%v,info,pad=izero) @@ -1481,26 +1481,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'i_gpu_multi_all',& + call psb_errpush(info,'i_cuda_multi_all',& & i_err=(/m,n,n,n,n/)) end if - end subroutine i_gpu_multi_all + end subroutine i_cuda_multi_all - subroutine i_gpu_multi_zero(x) + subroutine i_cuda_multi_zero(x) use psi_serial_mod implicit none - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=dzero call x%set_host() - end subroutine i_gpu_multi_zero + end subroutine i_cuda_multi_zero - subroutine i_gpu_multi_asb(m,n, x, info) + subroutine i_cuda_multi_asb(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nd, nc @@ -1520,12 +1520,12 @@ contains call x%set_host() end if end if - end subroutine i_gpu_multi_asb + end subroutine i_cuda_multi_asb - subroutine i_gpu_multi_sync_space(x,info) + subroutine i_cuda_multi_sync_space(x,info) use psb_realloc_mod implicit none - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: mh,nh,md,nd @@ -1578,11 +1578,11 @@ contains end if - end subroutine i_gpu_multi_sync_space + end subroutine i_cuda_multi_sync_space - subroutine i_gpu_multi_sync(x) + subroutine i_cuda_multi_sync(x) implicit none - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -1598,16 +1598,16 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'i_gpu_multi_sync') + call psb_errpush(info,'i_cuda_multi_sync') end if - end subroutine i_gpu_multi_sync + end subroutine i_cuda_multi_sync - subroutine i_gpu_multi_free(x, info) + subroutine i_cuda_multi_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 @@ -1622,13 +1622,13 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine i_gpu_multi_free + end subroutine i_cuda_multi_free - subroutine i_gpu_multi_vect_finalize(x) + subroutine i_cuda_multi_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_i_multivect_gpu), intent(inout) :: x + type(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 @@ -1643,12 +1643,12 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine i_gpu_multi_vect_finalize + end subroutine i_cuda_multi_vect_finalize - subroutine i_gpu_multi_ins(n,irl,val,dupl,x,info) + subroutine i_cuda_multi_ins(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_i_multivect_gpu), intent(inout) :: x + class(psb_i_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) integer(psb_ipk_), intent(in) :: val(:,:) @@ -1661,11 +1661,11 @@ contains call x%psb_i_base_multivect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine i_gpu_multi_ins + end subroutine i_cuda_multi_ins #endif -end module psb_i_gpu_multivect_mod +end module psb_i_cuda_multivect_mod diff --git a/cuda/psb_s_csrg_mat_mod.F90 b/cuda/psb_s_csrg_mat_mod.F90 deleted file mode 100644 index cface9f5..00000000 --- a/cuda/psb_s_csrg_mat_mod.F90 +++ /dev/null @@ -1,393 +0,0 @@ -! Parallel Sparse BLAS GPU plugin -! (C) Copyright 2013 -! -! Salvatore Filippone -! Alessandro Fanfarillo -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! - - -module psb_s_csrg_mat_mod - - use iso_c_binding - use psb_s_mat_mod - use cusparse_mod - - integer(psb_ipk_), parameter, private :: is_host = -1 - integer(psb_ipk_), parameter, private :: is_sync = 0 - integer(psb_ipk_), parameter, private :: is_dev = 1 - - type, extends(psb_s_csr_sparse_mat) :: psb_s_csrg_sparse_mat - ! - ! cuSPARSE 4.0 CSR format. - ! - ! - ! - ! - ! -#ifdef HAVE_SPGPU - type(s_Cmat) :: deviceMat - integer(psb_ipk_) :: devstate = is_host - - contains - procedure, nopass :: get_fmt => s_csrg_get_fmt - procedure, pass(a) :: sizeof => s_csrg_sizeof - procedure, pass(a) :: vect_mv => psb_s_csrg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_s_csrg_inner_vect_sv - procedure, pass(a) :: csmm => psb_s_csrg_csmm - procedure, pass(a) :: csmv => psb_s_csrg_csmv - procedure, pass(a) :: scals => psb_s_csrg_scals - procedure, pass(a) :: scalv => psb_s_csrg_scal - procedure, pass(a) :: reallocate_nz => psb_s_csrg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_s_csrg_allocate_mnnz - ! Note: we do *not* need the TO methods, because the parent type - ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_s_cp_csrg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_s_cp_csrg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_s_mv_csrg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_s_mv_csrg_from_fmt - procedure, pass(a) :: free => s_csrg_free - procedure, pass(a) :: mold => psb_s_csrg_mold - procedure, pass(a) :: is_host => s_csrg_is_host - procedure, pass(a) :: is_dev => s_csrg_is_dev - procedure, pass(a) :: is_sync => s_csrg_is_sync - procedure, pass(a) :: set_host => s_csrg_set_host - procedure, pass(a) :: set_dev => s_csrg_set_dev - procedure, pass(a) :: set_sync => s_csrg_set_sync - procedure, pass(a) :: sync => s_csrg_sync - procedure, pass(a) :: to_gpu => psb_s_csrg_to_gpu - procedure, pass(a) :: from_gpu => psb_s_csrg_from_gpu - final :: s_csrg_finalize -#else - contains - procedure, pass(a) :: mold => psb_s_csrg_mold -#endif - end type psb_s_csrg_sparse_mat - -#ifdef HAVE_SPGPU - private :: s_csrg_get_nzeros, s_csrg_free, s_csrg_get_fmt, & - & s_csrg_get_size, s_csrg_sizeof, s_csrg_get_nz_row - - - interface - subroutine psb_s_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_s_csrg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(in) :: a - real(psb_spk_), intent(in) :: alpha, beta - class(psb_s_base_vect_type), intent(inout) :: x - class(psb_s_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_s_csrg_inner_vect_sv - end interface - - - interface - subroutine psb_s_csrg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_s_csrg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(in) :: a - real(psb_spk_), intent(in) :: alpha, beta - class(psb_s_base_vect_type), intent(inout) :: x - class(psb_s_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_s_csrg_vect_mv - end interface - - interface - subroutine psb_s_csrg_reallocate_nz(nz,a) - import :: psb_s_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: nz - class(psb_s_csrg_sparse_mat), intent(inout) :: a - end subroutine psb_s_csrg_reallocate_nz - end interface - - interface - subroutine psb_s_csrg_allocate_mnnz(m,n,a,nz) - import :: psb_s_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: m,n - class(psb_s_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_s_csrg_allocate_mnnz - end interface - - interface - subroutine psb_s_csrg_mold(a,b,info) - import :: psb_s_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(in) :: a - class(psb_s_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_csrg_mold - end interface - - interface - subroutine psb_s_csrg_to_gpu(a,info, nzrm) - import :: psb_s_csrg_sparse_mat, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_s_csrg_to_gpu - end interface - - interface - subroutine psb_s_csrg_from_gpu(a,info) - import :: psb_s_csrg_sparse_mat, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_csrg_from_gpu - end interface - - interface - subroutine psb_s_cp_csrg_from_coo(a,b,info) - import :: psb_s_csrg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(inout) :: a - class(psb_s_coo_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_csrg_from_coo - end interface - - interface - subroutine psb_s_cp_csrg_from_fmt(a,b,info) - import :: psb_s_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(inout) :: a - class(psb_s_base_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_csrg_from_fmt - end interface - - interface - subroutine psb_s_mv_csrg_from_coo(a,b,info) - import :: psb_s_csrg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(inout) :: a - class(psb_s_coo_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_csrg_from_coo - end interface - - interface - subroutine psb_s_mv_csrg_from_fmt(a,b,info) - import :: psb_s_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(inout) :: a - class(psb_s_base_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_csrg_from_fmt - end interface - - interface - subroutine psb_s_csrg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_s_csrg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(in) :: a - real(psb_spk_), intent(in) :: alpha, beta, x(:) - real(psb_spk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_s_csrg_csmv - end interface - interface - subroutine psb_s_csrg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_s_csrg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(in) :: a - real(psb_spk_), intent(in) :: alpha, beta, x(:,:) - real(psb_spk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_s_csrg_csmm - end interface - - interface - subroutine psb_s_csrg_scal(d,a,info,side) - import :: psb_s_csrg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(inout) :: a - real(psb_spk_), intent(in) :: d(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in), optional :: side - end subroutine psb_s_csrg_scal - end interface - - interface - subroutine psb_s_csrg_scals(d,a,info) - import :: psb_s_csrg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(inout) :: a - real(psb_spk_), intent(in) :: d - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_csrg_scals - end interface - - -contains - - ! == =================================== - ! - ! - ! - ! Getters - ! - ! - ! - ! - ! - ! == =================================== - - - function s_csrg_sizeof(a) result(res) - implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a - integer(psb_epk_) :: res - if (a%is_dev()) call a%sync() - res = 8 - res = res + psb_sizeof_sp * size(a%val) - res = res + psb_sizeof_ip * size(a%irp) - res = res + psb_sizeof_ip * size(a%ja) - ! Should we account for the shadow data structure - ! on the GPU device side? - ! res = 2*res - - end function s_csrg_sizeof - - function s_csrg_get_fmt() result(res) - implicit none - character(len=5) :: res - res = 'CSRG' - end function s_csrg_get_fmt - - - - ! == =================================== - ! - ! - ! - ! Data management - ! - ! - ! - ! - ! - ! == =================================== - - - subroutine s_csrg_set_host(a) - implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_host - end subroutine s_csrg_set_host - - subroutine s_csrg_set_dev(a) - implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_dev - end subroutine s_csrg_set_dev - - subroutine s_csrg_set_sync(a) - implicit none - class(psb_s_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_sync - end subroutine s_csrg_set_sync - - function s_csrg_is_dev(a) result(res) - implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_dev) - end function s_csrg_is_dev - - function s_csrg_is_host(a) result(res) - implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_host) - end function s_csrg_is_host - - function s_csrg_is_sync(a) result(res) - implicit none - class(psb_s_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_sync) - end function s_csrg_is_sync - - - subroutine s_csrg_sync(a) - implicit none - class(psb_s_csrg_sparse_mat), target, intent(in) :: a - class(psb_s_csrg_sparse_mat), pointer :: tmpa - integer(psb_ipk_) :: info - - tmpa => a - if (tmpa%is_host()) then - call tmpa%to_gpu(info) - else if (tmpa%is_dev()) then - call tmpa%from_gpu(info) - end if - call tmpa%set_sync() - return - - end subroutine s_csrg_sync - - subroutine s_csrg_free(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - class(psb_s_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - call a%psb_s_csr_sparse_mat%free() - - return - - end subroutine s_csrg_free - - subroutine s_csrg_finalize(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - type(psb_s_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - - return - - end subroutine s_csrg_finalize - -#else - interface - subroutine psb_s_csrg_mold(a,b,info) - import :: psb_s_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_csrg_sparse_mat), intent(in) :: a - class(psb_s_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_csrg_mold - end interface - -#endif - -end module psb_s_csrg_mat_mod diff --git a/cuda/psb_s_cuda_csrg_mat_mod.F90 b/cuda/psb_s_cuda_csrg_mat_mod.F90 new file mode 100644 index 00000000..fb13d034 --- /dev/null +++ b/cuda/psb_s_cuda_csrg_mat_mod.F90 @@ -0,0 +1,393 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_cuda_csrg_mat_mod + + use iso_c_binding + use psb_s_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_csr_sparse_mat) :: psb_s_cuda_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(s_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => s_cuda_csrg_get_fmt + procedure, pass(a) :: sizeof => s_cuda_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_s_cuda_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_s_cuda_csrg_csmm + procedure, pass(a) :: csmv => psb_s_cuda_csrg_csmv + procedure, pass(a) :: scals => psb_s_cuda_csrg_scals + procedure, pass(a) :: scalv => psb_s_cuda_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_s_cuda_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_cuda_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_csrg_from_fmt + procedure, pass(a) :: free => s_cuda_csrg_free + procedure, pass(a) :: mold => psb_s_cuda_csrg_mold + procedure, pass(a) :: is_host => s_cuda_csrg_is_host + procedure, pass(a) :: is_dev => s_cuda_csrg_is_dev + procedure, pass(a) :: is_sync => s_cuda_csrg_is_sync + procedure, pass(a) :: set_host => s_cuda_csrg_set_host + procedure, pass(a) :: set_dev => s_cuda_csrg_set_dev + procedure, pass(a) :: set_sync => s_cuda_csrg_set_sync + procedure, pass(a) :: sync => s_cuda_csrg_sync + procedure, pass(a) :: to_gpu => psb_s_cuda_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_s_cuda_csrg_from_gpu + final :: s_cuda_csrg_finalize +#else + contains + procedure, pass(a) :: mold => psb_s_cuda_csrg_mold +#endif + end type psb_s_cuda_csrg_sparse_mat + +#ifdef HAVE_SPGPU + private :: s_cuda_csrg_get_nzeros, s_cuda_csrg_free, s_cuda_csrg_get_fmt, & + & s_cuda_csrg_get_size, s_cuda_csrg_sizeof, s_cuda_csrg_get_nz_row + + + interface + subroutine psb_s_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_s_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_csrg_vect_mv + end interface + + interface + subroutine psb_s_cuda_csrg_reallocate_nz(nz,a) + import :: psb_s_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_csrg_reallocate_nz + end interface + + interface + subroutine psb_s_cuda_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_s_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_cuda_csrg_allocate_mnnz + end interface + + interface + subroutine psb_s_cuda_csrg_mold(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_csrg_mold + end interface + + interface + subroutine psb_s_cuda_csrg_to_gpu(a,info, nzrm) + import :: psb_s_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_s_cuda_csrg_to_gpu + end interface + + interface + subroutine psb_s_cuda_csrg_from_gpu(a,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_csrg_from_gpu + end interface + + interface + subroutine psb_s_cuda_cp_csrg_from_coo(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_csrg_from_coo + end interface + + interface + subroutine psb_s_cuda_cp_csrg_from_fmt(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_csrg_from_fmt + end interface + + interface + subroutine psb_s_cuda_mv_csrg_from_coo(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_csrg_from_coo + end interface + + interface + subroutine psb_s_cuda_mv_csrg_from_fmt(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_csrg_from_fmt + end interface + + interface + subroutine psb_s_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_csrg_csmv + end interface + interface + subroutine psb_s_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_csrg_csmm + end interface + + interface + subroutine psb_s_cuda_csrg_scal(d,a,info,side) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_cuda_csrg_scal + end interface + + interface + subroutine psb_s_cuda_csrg_scals(d,a,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_cuda_csrg_sizeof(a) result(res) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function s_cuda_csrg_sizeof + + function s_cuda_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function s_cuda_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine s_cuda_csrg_set_host(a) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine s_cuda_csrg_set_host + + subroutine s_cuda_csrg_set_dev(a) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine s_cuda_csrg_set_dev + + subroutine s_cuda_csrg_set_sync(a) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine s_cuda_csrg_set_sync + + function s_cuda_csrg_is_dev(a) result(res) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function s_cuda_csrg_is_dev + + function s_cuda_csrg_is_host(a) result(res) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function s_cuda_csrg_is_host + + function s_cuda_csrg_is_sync(a) result(res) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function s_cuda_csrg_is_sync + + + subroutine s_cuda_csrg_sync(a) + implicit none + class(psb_s_cuda_csrg_sparse_mat), target, intent(in) :: a + class(psb_s_cuda_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine s_cuda_csrg_sync + + subroutine s_cuda_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_s_csr_sparse_mat%free() + + return + + end subroutine s_cuda_csrg_free + + subroutine s_cuda_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine s_cuda_csrg_finalize + +#else + interface + subroutine psb_s_cuda_csrg_mold(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_csrg_mold + end interface + +#endif + +end module psb_s_cuda_csrg_mat_mod diff --git a/cuda/psb_s_diag_mat_mod.F90 b/cuda/psb_s_cuda_diag_mat_mod.F90 similarity index 52% rename from cuda/psb_s_diag_mat_mod.F90 rename to cuda/psb_s_cuda_diag_mat_mod.F90 index 1ed54f88..709cd728 100644 --- a/cuda/psb_s_diag_mat_mod.F90 +++ b/cuda/psb_s_cuda_diag_mat_mod.F90 @@ -30,13 +30,13 @@ ! -module psb_s_diag_mat_mod +module psb_s_cuda_diag_mat_mod use iso_c_binding use psb_base_mod use psb_s_dia_mat_mod - type, extends(psb_s_dia_sparse_mat) :: psb_s_diag_sparse_mat + type, extends(psb_s_dia_sparse_mat) :: psb_s_cuda_diag_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -48,170 +48,170 @@ module psb_s_diag_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => s_diag_get_fmt - procedure, pass(a) :: sizeof => s_diag_sizeof - procedure, pass(a) :: vect_mv => psb_s_diag_vect_mv -! procedure, pass(a) :: csmm => psb_s_diag_csmm - procedure, pass(a) :: csmv => psb_s_diag_csmv -! procedure, pass(a) :: in_vect_sv => psb_s_diag_inner_vect_sv -! procedure, pass(a) :: scals => psb_s_diag_scals -! procedure, pass(a) :: scalv => psb_s_diag_scal -! procedure, pass(a) :: reallocate_nz => psb_s_diag_reallocate_nz -! procedure, pass(a) :: allocate_mnnz => psb_s_diag_allocate_mnnz + procedure, nopass :: get_fmt => s_cuda_diag_get_fmt + procedure, pass(a) :: sizeof => s_cuda_diag_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_diag_vect_mv +! procedure, pass(a) :: csmm => psb_s_cuda_diag_csmm + procedure, pass(a) :: csmv => psb_s_cuda_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_s_cuda_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_s_cuda_diag_scals +! procedure, pass(a) :: scalv => psb_s_cuda_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_s_cuda_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_s_cuda_diag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_s_cp_diag_from_coo -! procedure, pass(a) :: cp_from_fmt => psb_s_cp_diag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_s_mv_diag_from_coo -! procedure, pass(a) :: mv_from_fmt => psb_s_mv_diag_from_fmt - procedure, pass(a) :: free => s_diag_free - procedure, pass(a) :: mold => psb_s_diag_mold - procedure, pass(a) :: to_gpu => psb_s_diag_to_gpu - final :: s_diag_finalize + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_diag_from_fmt + procedure, pass(a) :: free => s_cuda_diag_free + procedure, pass(a) :: mold => psb_s_cuda_diag_mold + procedure, pass(a) :: to_gpu => psb_s_cuda_diag_to_gpu + final :: s_cuda_diag_finalize #else contains - procedure, pass(a) :: mold => psb_s_diag_mold + procedure, pass(a) :: mold => psb_s_cuda_diag_mold #endif - end type psb_s_diag_sparse_mat + end type psb_s_cuda_diag_sparse_mat #ifdef HAVE_SPGPU - private :: s_diag_get_nzeros, s_diag_free, s_diag_get_fmt, & - & s_diag_get_size, s_diag_sizeof, s_diag_get_nz_row + private :: s_cuda_diag_get_nzeros, s_cuda_diag_free, s_cuda_diag_get_fmt, & + & s_cuda_diag_get_size, s_cuda_diag_sizeof, s_cuda_diag_get_nz_row interface - subroutine psb_s_diag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_s_diag_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_diag_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_diag_vect_mv + end subroutine psb_s_cuda_diag_vect_mv end interface interface - subroutine psb_s_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_s_diag_sparse_mat, psb_spk_, psb_s_base_vect_type - class(psb_s_diag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_s_cuda_diag_sparse_mat, psb_spk_, psb_s_base_vect_type + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_diag_inner_vect_sv + end subroutine psb_s_cuda_diag_inner_vect_sv end interface interface - subroutine psb_s_diag_reallocate_nz(nz,a) - import :: psb_s_diag_sparse_mat, psb_ipk_ + subroutine psb_s_cuda_diag_reallocate_nz(nz,a) + import :: psb_s_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_s_diag_sparse_mat), intent(inout) :: a - end subroutine psb_s_diag_reallocate_nz + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_diag_reallocate_nz end interface interface - subroutine psb_s_diag_allocate_mnnz(m,n,a,nz) - import :: psb_s_diag_sparse_mat, psb_ipk_ + subroutine psb_s_cuda_diag_allocate_mnnz(m,n,a,nz) + import :: psb_s_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_s_diag_allocate_mnnz + end subroutine psb_s_cuda_diag_allocate_mnnz end interface interface - subroutine psb_s_diag_mold(a,b,info) - import :: psb_s_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_diag_mold(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_diag_mold + end subroutine psb_s_cuda_diag_mold end interface interface - subroutine psb_s_diag_to_gpu(a,info, nzrm) - import :: psb_s_diag_sparse_mat, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_diag_to_gpu(a,info, nzrm) + import :: psb_s_cuda_diag_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_s_diag_to_gpu + end subroutine psb_s_cuda_diag_to_gpu end interface interface - subroutine psb_s_cp_diag_from_coo(a,b,info) - import :: psb_s_diag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_diag_from_coo(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_diag_from_coo + end subroutine psb_s_cuda_cp_diag_from_coo end interface interface - subroutine psb_s_cp_diag_from_fmt(a,b,info) - import :: psb_s_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_diag_from_fmt(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_diag_from_fmt + end subroutine psb_s_cuda_cp_diag_from_fmt end interface interface - subroutine psb_s_mv_diag_from_coo(a,b,info) - import :: psb_s_diag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_diag_from_coo(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_diag_from_coo + end subroutine psb_s_cuda_mv_diag_from_coo end interface interface - subroutine psb_s_mv_diag_from_fmt(a,b,info) - import :: psb_s_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_diag_from_fmt(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_diag_from_fmt + end subroutine psb_s_cuda_mv_diag_from_fmt end interface interface - subroutine psb_s_diag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_s_diag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_diag_csmv + end subroutine psb_s_cuda_diag_csmv end interface interface - subroutine psb_s_diag_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_s_diag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) real(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_diag_csmm + end subroutine psb_s_cuda_diag_csmm end interface interface - subroutine psb_s_diag_scal(d,a,info, side) - import :: psb_s_diag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_diag_scal(d,a,info, side) + import :: psb_s_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_s_diag_scal + end subroutine psb_s_cuda_diag_scal end interface interface - subroutine psb_s_diag_scals(d,a,info) - import :: psb_s_diag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_diag_scals(d,a,info) + import :: psb_s_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_diag_scals + end subroutine psb_s_cuda_diag_scals end interface @@ -230,9 +230,9 @@ contains ! == =================================== - function s_diag_sizeof(a) result(res) + function s_cuda_diag_sizeof(a) result(res) implicit none - class(psb_s_diag_sparse_mat), intent(in) :: a + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function s_diag_sizeof + end function s_cuda_diag_sizeof - function s_diag_get_fmt() result(res) + function s_cuda_diag_get_fmt() result(res) implicit none character(len=5) :: res res = 'DIAG' - end function s_diag_get_fmt + end function s_cuda_diag_get_fmt @@ -265,11 +265,11 @@ contains ! ! == =================================== - subroutine s_diag_free(a) + subroutine s_cuda_diag_free(a) use diagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_s_diag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) @@ -278,31 +278,31 @@ contains return - end subroutine s_diag_free + end subroutine s_cuda_diag_free - subroutine s_diag_finalize(a) + subroutine s_cuda_diag_finalize(a) use diagdev_mod implicit none - type(psb_s_diag_sparse_mat), intent(inout) :: a + type(psb_s_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine s_diag_finalize + end subroutine s_cuda_diag_finalize #else interface - subroutine psb_s_diag_mold(a,b,info) - import :: psb_s_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_diag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_diag_mold(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_diag_mold + end subroutine psb_s_cuda_diag_mold end interface #endif -end module psb_s_diag_mat_mod +end module psb_s_cuda_diag_mat_mod diff --git a/cuda/psb_s_dnsg_mat_mod.F90 b/cuda/psb_s_cuda_dnsg_mat_mod.F90 similarity index 51% rename from cuda/psb_s_dnsg_mat_mod.F90 rename to cuda/psb_s_cuda_dnsg_mat_mod.F90 index 1c531463..b01c8365 100644 --- a/cuda/psb_s_dnsg_mat_mod.F90 +++ b/cuda/psb_s_cuda_dnsg_mat_mod.F90 @@ -30,14 +30,14 @@ ! -module psb_s_dnsg_mat_mod +module psb_s_cuda_dnsg_mat_mod use iso_c_binding use psb_s_mat_mod use psb_s_dns_mat_mod use dnsdev_mod - type, extends(psb_s_dns_sparse_mat) :: psb_s_dnsg_sparse_mat + type, extends(psb_s_dns_sparse_mat) :: psb_s_cuda_dnsg_sparse_mat ! ! ITPACK/DNS format, extended. ! We are adding here the routines to create a copy of the data @@ -49,169 +49,169 @@ module psb_s_dnsg_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => s_dnsg_get_fmt - ! procedure, pass(a) :: sizeof => s_dnsg_sizeof - procedure, pass(a) :: vect_mv => psb_s_dnsg_vect_mv -!!$ procedure, pass(a) :: csmm => psb_s_dnsg_csmm -!!$ procedure, pass(a) :: csmv => psb_s_dnsg_csmv -!!$ procedure, pass(a) :: in_vect_sv => psb_s_dnsg_inner_vect_sv -!!$ procedure, pass(a) :: scals => psb_s_dnsg_scals -!!$ procedure, pass(a) :: scalv => psb_s_dnsg_scal -!!$ procedure, pass(a) :: reallocate_nz => psb_s_dnsg_reallocate_nz -!!$ procedure, pass(a) :: allocate_mnnz => psb_s_dnsg_allocate_mnnz + procedure, nopass :: get_fmt => s_cuda_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => s_cuda_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_s_cuda_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_s_cuda_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_s_cuda_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_s_cuda_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_s_cuda_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_s_cuda_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_s_cuda_dnsg_allocate_mnnz ! Note: we *do* need the TO methods, because of the need to invoke SYNC ! - procedure, pass(a) :: cp_from_coo => psb_s_cp_dnsg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_s_cp_dnsg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_s_mv_dnsg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_s_mv_dnsg_from_fmt - procedure, pass(a) :: free => s_dnsg_free - procedure, pass(a) :: mold => psb_s_dnsg_mold - procedure, pass(a) :: to_gpu => psb_s_dnsg_to_gpu - final :: s_dnsg_finalize + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_dnsg_from_fmt + procedure, pass(a) :: free => s_cuda_dnsg_free + procedure, pass(a) :: mold => psb_s_cuda_dnsg_mold + procedure, pass(a) :: to_gpu => psb_s_cuda_dnsg_to_gpu + final :: s_cuda_dnsg_finalize #else contains - procedure, pass(a) :: mold => psb_s_dnsg_mold + procedure, pass(a) :: mold => psb_s_cuda_dnsg_mold #endif - end type psb_s_dnsg_sparse_mat + end type psb_s_cuda_dnsg_sparse_mat #ifdef HAVE_SPGPU - private :: s_dnsg_get_nzeros, s_dnsg_free, s_dnsg_get_fmt, & - & s_dnsg_get_size, s_dnsg_get_nz_row + private :: s_cuda_dnsg_get_nzeros, s_cuda_dnsg_free, s_cuda_dnsg_get_fmt, & + & s_cuda_dnsg_get_size, s_cuda_dnsg_get_nz_row interface - subroutine psb_s_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_s_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_dnsg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_dnsg_vect_mv + end subroutine psb_s_cuda_dnsg_vect_mv end interface !!$ !!$ interface -!!$ subroutine psb_s_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_s_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type -!!$ class(psb_s_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_s_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a !!$ real(psb_spk_), intent(in) :: alpha, beta !!$ class(psb_s_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_s_dnsg_inner_vect_sv +!!$ end subroutine psb_s_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_s_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_s_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_s_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_s_dnsg_reallocate_nz +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_s_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_s_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_s_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_s_dnsg_allocate_mnnz +!!$ end subroutine psb_s_cuda_dnsg_allocate_mnnz !!$ end interface interface - subroutine psb_s_dnsg_mold(a,b,info) - import :: psb_s_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_dnsg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_dnsg_mold(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_dnsg_mold + end subroutine psb_s_cuda_dnsg_mold end interface interface - subroutine psb_s_dnsg_to_gpu(a,info) - import :: psb_s_dnsg_sparse_mat, psb_ipk_ - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_dnsg_to_gpu(a,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_dnsg_to_gpu + end subroutine psb_s_cuda_dnsg_to_gpu end interface interface - subroutine psb_s_cp_dnsg_from_coo(a,b,info) - import :: psb_s_dnsg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_dnsg_from_coo(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_dnsg_from_coo + end subroutine psb_s_cuda_cp_dnsg_from_coo end interface interface - subroutine psb_s_cp_dnsg_from_fmt(a,b,info) - import :: psb_s_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_dnsg_from_fmt(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_dnsg_from_fmt + end subroutine psb_s_cuda_cp_dnsg_from_fmt end interface interface - subroutine psb_s_mv_dnsg_from_coo(a,b,info) - import :: psb_s_dnsg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_dnsg_from_coo(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_dnsg_from_coo + end subroutine psb_s_cuda_mv_dnsg_from_coo end interface interface - subroutine psb_s_mv_dnsg_from_fmt(a,b,info) - import :: psb_s_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_dnsg_from_fmt(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_dnsg_from_fmt + end subroutine psb_s_cuda_mv_dnsg_from_fmt end interface !!$ interface -!!$ subroutine psb_s_dnsg_csmv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_s_dnsg_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_s_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_s_cuda_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a !!$ real(psb_spk_), intent(in) :: alpha, beta, x(:) !!$ real(psb_spk_), intent(inout) :: y(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_s_dnsg_csmv +!!$ end subroutine psb_s_cuda_dnsg_csmv !!$ end interface !!$ interface -!!$ subroutine psb_s_dnsg_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_s_dnsg_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_s_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_s_cuda_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a !!$ real(psb_spk_), intent(in) :: alpha, beta, x(:,:) !!$ real(psb_spk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_s_dnsg_csmm +!!$ end subroutine psb_s_cuda_dnsg_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_s_dnsg_scal(d,a,info, side) -!!$ import :: psb_s_dnsg_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_s_cuda_dnsg_scal(d,a,info, side) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ real(psb_spk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_s_dnsg_scal +!!$ end subroutine psb_s_cuda_dnsg_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_s_dnsg_scals(d,a,info) -!!$ import :: psb_s_dnsg_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_s_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_s_cuda_dnsg_scals(d,a,info) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ real(psb_spk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_s_dnsg_scals +!!$ end subroutine psb_s_cuda_dnsg_scals !!$ end interface !!$ @@ -231,11 +231,11 @@ contains - function s_dnsg_get_fmt() result(res) + function s_cuda_dnsg_get_fmt() result(res) implicit none character(len=5) :: res res = 'DNSG' - end function s_dnsg_get_fmt + end function s_cuda_dnsg_get_fmt @@ -251,11 +251,11 @@ contains ! ! == =================================== - subroutine s_dnsg_free(a) + subroutine s_cuda_dnsg_free(a) use dnsdev_mod implicit none integer(psb_ipk_) :: info - class(psb_s_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) @@ -264,31 +264,31 @@ contains return - end subroutine s_dnsg_free + end subroutine s_cuda_dnsg_free - subroutine s_dnsg_finalize(a) + subroutine s_cuda_dnsg_finalize(a) use dnsdev_mod implicit none - type(psb_s_dnsg_sparse_mat), intent(inout) :: a + type(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine s_dnsg_finalize + end subroutine s_cuda_dnsg_finalize #else interface - subroutine psb_s_dnsg_mold(a,b,info) - import :: psb_s_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_dnsg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_dnsg_mold(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_dnsg_mold + end subroutine psb_s_cuda_dnsg_mold end interface #endif -end module psb_s_dnsg_mat_mod +end module psb_s_cuda_dnsg_mat_mod diff --git a/cuda/psb_s_elg_mat_mod.F90 b/cuda/psb_s_cuda_elg_mat_mod.F90 similarity index 50% rename from cuda/psb_s_elg_mat_mod.F90 rename to cuda/psb_s_cuda_elg_mat_mod.F90 index 5c4eae9b..d6b003b5 100644 --- a/cuda/psb_s_elg_mat_mod.F90 +++ b/cuda/psb_s_cuda_elg_mat_mod.F90 @@ -30,18 +30,18 @@ ! -module psb_s_elg_mat_mod +module psb_s_cuda_elg_mat_mod use iso_c_binding use psb_s_mat_mod use psb_s_ell_mat_mod - use psb_i_gpu_vect_mod + use psb_i_cuda_vect_mod integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_s_ell_sparse_mat) :: psb_s_elg_sparse_mat + type, extends(psb_s_ell_sparse_mat) :: psb_s_cuda_elg_sparse_mat ! ! ITPACK/ELL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,221 +54,221 @@ module psb_s_elg_mat_mod integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => s_elg_get_fmt - procedure, pass(a) :: sizeof => s_elg_sizeof - procedure, pass(a) :: vect_mv => psb_s_elg_vect_mv - procedure, pass(a) :: csmm => psb_s_elg_csmm - procedure, pass(a) :: csmv => psb_s_elg_csmv - procedure, pass(a) :: in_vect_sv => psb_s_elg_inner_vect_sv - procedure, pass(a) :: scals => psb_s_elg_scals - procedure, pass(a) :: scalv => psb_s_elg_scal - procedure, pass(a) :: reallocate_nz => psb_s_elg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_s_elg_allocate_mnnz - procedure, pass(a) :: reinit => s_elg_reinit + procedure, nopass :: get_fmt => s_cuda_elg_get_fmt + procedure, pass(a) :: sizeof => s_cuda_elg_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_elg_vect_mv + procedure, pass(a) :: csmm => psb_s_cuda_elg_csmm + procedure, pass(a) :: csmv => psb_s_cuda_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_s_cuda_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_s_cuda_elg_scals + procedure, pass(a) :: scalv => psb_s_cuda_elg_scal + procedure, pass(a) :: reallocate_nz => psb_s_cuda_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_cuda_elg_allocate_mnnz + procedure, pass(a) :: reinit => s_cuda_elg_reinit ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_s_cp_elg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_s_cp_elg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_s_mv_elg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_s_mv_elg_from_fmt - procedure, pass(a) :: free => s_elg_free - procedure, pass(a) :: mold => psb_s_elg_mold - procedure, pass(a) :: csput_a => psb_s_elg_csput_a - procedure, pass(a) :: csput_v => psb_s_elg_csput_v - procedure, pass(a) :: is_host => s_elg_is_host - procedure, pass(a) :: is_dev => s_elg_is_dev - procedure, pass(a) :: is_sync => s_elg_is_sync - procedure, pass(a) :: set_host => s_elg_set_host - procedure, pass(a) :: set_dev => s_elg_set_dev - procedure, pass(a) :: set_sync => s_elg_set_sync - procedure, pass(a) :: sync => s_elg_sync - procedure, pass(a) :: from_gpu => psb_s_elg_from_gpu - procedure, pass(a) :: to_gpu => psb_s_elg_to_gpu - procedure, pass(a) :: asb => psb_s_elg_asb - final :: s_elg_finalize + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_elg_from_fmt + procedure, pass(a) :: free => s_cuda_elg_free + procedure, pass(a) :: mold => psb_s_cuda_elg_mold + procedure, pass(a) :: csput_a => psb_s_cuda_elg_csput_a + procedure, pass(a) :: csput_v => psb_s_cuda_elg_csput_v + procedure, pass(a) :: is_host => s_cuda_elg_is_host + procedure, pass(a) :: is_dev => s_cuda_elg_is_dev + procedure, pass(a) :: is_sync => s_cuda_elg_is_sync + procedure, pass(a) :: set_host => s_cuda_elg_set_host + procedure, pass(a) :: set_dev => s_cuda_elg_set_dev + procedure, pass(a) :: set_sync => s_cuda_elg_set_sync + procedure, pass(a) :: sync => s_cuda_elg_sync + procedure, pass(a) :: from_gpu => psb_s_cuda_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_s_cuda_elg_to_gpu + procedure, pass(a) :: asb => psb_s_cuda_elg_asb + final :: s_cuda_elg_finalize #else contains - procedure, pass(a) :: mold => psb_s_elg_mold - procedure, pass(a) :: asb => psb_s_elg_asb + procedure, pass(a) :: mold => psb_s_cuda_elg_mold + procedure, pass(a) :: asb => psb_s_cuda_elg_asb #endif - end type psb_s_elg_sparse_mat + end type psb_s_cuda_elg_sparse_mat #ifdef HAVE_SPGPU - private :: s_elg_get_nzeros, s_elg_free, s_elg_get_fmt, & - & s_elg_get_size, s_elg_sizeof, s_elg_get_nz_row, s_elg_sync + private :: s_cuda_elg_get_nzeros, s_cuda_elg_free, s_cuda_elg_get_fmt, & + & s_cuda_elg_get_size, s_cuda_elg_sizeof, s_cuda_elg_get_nz_row, s_cuda_elg_sync interface - subroutine psb_s_elg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_s_elg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_elg_vect_mv + end subroutine psb_s_cuda_elg_vect_mv end interface interface - subroutine psb_s_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_s_elg_sparse_mat, psb_spk_, psb_s_base_vect_type - class(psb_s_elg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_s_cuda_elg_sparse_mat, psb_spk_, psb_s_base_vect_type + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_elg_inner_vect_sv + end subroutine psb_s_cuda_elg_inner_vect_sv end interface interface - subroutine psb_s_elg_reallocate_nz(nz,a) - import :: psb_s_elg_sparse_mat, psb_ipk_ + subroutine psb_s_cuda_elg_reallocate_nz(nz,a) + import :: psb_s_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_s_elg_sparse_mat), intent(inout) :: a - end subroutine psb_s_elg_reallocate_nz + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_elg_reallocate_nz end interface interface - subroutine psb_s_elg_allocate_mnnz(m,n,a,nz) - import :: psb_s_elg_sparse_mat, psb_ipk_ + subroutine psb_s_cuda_elg_allocate_mnnz(m,n,a,nz) + import :: psb_s_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_s_elg_allocate_mnnz + end subroutine psb_s_cuda_elg_allocate_mnnz end interface interface - subroutine psb_s_elg_mold(a,b,info) - import :: psb_s_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_elg_mold(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_elg_mold + end subroutine psb_s_cuda_elg_mold end interface interface - subroutine psb_s_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_s_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& & imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_elg_csput_a + end subroutine psb_s_cuda_elg_csput_a end interface interface - subroutine psb_s_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_s_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_s_base_vect_type,& + subroutine psb_s_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_s_base_vect_type,& & psb_i_base_vect_type - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_elg_csput_v + end subroutine psb_s_cuda_elg_csput_v end interface interface - subroutine psb_s_elg_from_gpu(a,info) - import :: psb_s_elg_sparse_mat, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_elg_from_gpu(a,info) + import :: psb_s_cuda_elg_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_elg_from_gpu + end subroutine psb_s_cuda_elg_from_gpu end interface interface - subroutine psb_s_elg_to_gpu(a,info, nzrm) - import :: psb_s_elg_sparse_mat, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_elg_to_gpu(a,info, nzrm) + import :: psb_s_cuda_elg_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_s_elg_to_gpu + end subroutine psb_s_cuda_elg_to_gpu end interface interface - subroutine psb_s_cp_elg_from_coo(a,b,info) - import :: psb_s_elg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_elg_from_coo(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_elg_from_coo + end subroutine psb_s_cuda_cp_elg_from_coo end interface interface - subroutine psb_s_cp_elg_from_fmt(a,b,info) - import :: psb_s_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_elg_from_fmt(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_elg_from_fmt + end subroutine psb_s_cuda_cp_elg_from_fmt end interface interface - subroutine psb_s_mv_elg_from_coo(a,b,info) - import :: psb_s_elg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_elg_from_coo(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_elg_from_coo + end subroutine psb_s_cuda_mv_elg_from_coo end interface interface - subroutine psb_s_mv_elg_from_fmt(a,b,info) - import :: psb_s_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_elg_from_fmt(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_elg_from_fmt + end subroutine psb_s_cuda_mv_elg_from_fmt end interface interface - subroutine psb_s_elg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_s_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_elg_csmv + end subroutine psb_s_cuda_elg_csmv end interface interface - subroutine psb_s_elg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_s_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) real(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_elg_csmm + end subroutine psb_s_cuda_elg_csmm end interface interface - subroutine psb_s_elg_scal(d,a,info, side) - import :: psb_s_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_elg_scal(d,a,info, side) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_s_elg_scal + end subroutine psb_s_cuda_elg_scal end interface interface - subroutine psb_s_elg_scals(d,a,info) - import :: psb_s_elg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_elg_scals(d,a,info) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_elg_scals + end subroutine psb_s_cuda_elg_scals end interface interface - subroutine psb_s_elg_asb(a) - import :: psb_s_elg_sparse_mat - class(psb_s_elg_sparse_mat), intent(inout) :: a - end subroutine psb_s_elg_asb + subroutine psb_s_cuda_elg_asb(a) + import :: psb_s_cuda_elg_sparse_mat + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_elg_asb end interface @@ -287,9 +287,9 @@ contains ! == =================================== - function s_elg_sizeof(a) result(res) + function s_cuda_elg_sizeof(a) result(res) implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res if (a%is_dev()) call a%sync() @@ -302,13 +302,13 @@ contains ! on the GPU device side? ! res = 2*res - end function s_elg_sizeof + end function s_cuda_elg_sizeof - function s_elg_get_fmt() result(res) + function s_cuda_elg_get_fmt() result(res) implicit none character(len=5) :: res res = 'ELG' - end function s_elg_get_fmt + end function s_cuda_elg_get_fmt @@ -323,12 +323,12 @@ contains ! ! ! == =================================== - subroutine s_elg_reinit(a,clear) + subroutine s_cuda_elg_reinit(a,clear) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a logical, intent(in), optional :: clear integer(psb_ipk_) :: isz, err_act character(len=20) :: name='reinit' @@ -367,14 +367,14 @@ contains 9999 call psb_error_handler(err_act) return - end subroutine s_elg_reinit + end subroutine s_cuda_elg_reinit - subroutine s_elg_free(a) + subroutine s_cuda_elg_free(a) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) @@ -384,12 +384,12 @@ contains return - end subroutine s_elg_free + end subroutine s_cuda_elg_free - subroutine s_elg_sync(a) + subroutine s_cuda_elg_sync(a) implicit none - class(psb_s_elg_sparse_mat), target, intent(in) :: a - class(psb_s_elg_sparse_mat), pointer :: tmpa + class(psb_s_cuda_elg_sparse_mat), target, intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -401,83 +401,83 @@ contains call tmpa%set_sync() return - end subroutine s_elg_sync + end subroutine s_cuda_elg_sync - subroutine s_elg_set_host(a) + subroutine s_cuda_elg_set_host(a) implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine s_elg_set_host + end subroutine s_cuda_elg_set_host - subroutine s_elg_set_dev(a) + subroutine s_cuda_elg_set_dev(a) implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine s_elg_set_dev + end subroutine s_cuda_elg_set_dev - subroutine s_elg_set_sync(a) + subroutine s_cuda_elg_set_sync(a) implicit none - class(psb_s_elg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine s_elg_set_sync + end subroutine s_cuda_elg_set_sync - function s_elg_is_dev(a) result(res) + function s_cuda_elg_is_dev(a) result(res) implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function s_elg_is_dev + end function s_cuda_elg_is_dev - function s_elg_is_host(a) result(res) + function s_cuda_elg_is_host(a) result(res) implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function s_elg_is_host + end function s_cuda_elg_is_host - function s_elg_is_sync(a) result(res) + function s_cuda_elg_is_sync(a) result(res) implicit none - class(psb_s_elg_sparse_mat), intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function s_elg_is_sync + end function s_cuda_elg_is_sync - subroutine s_elg_finalize(a) + subroutine s_cuda_elg_finalize(a) use elldev_mod implicit none - type(psb_s_elg_sparse_mat), intent(inout) :: a + type(psb_s_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine s_elg_finalize + end subroutine s_cuda_elg_finalize #else interface - subroutine psb_s_elg_asb(a) - import :: psb_s_elg_sparse_mat - class(psb_s_elg_sparse_mat), intent(inout) :: a - end subroutine psb_s_elg_asb + subroutine psb_s_cuda_elg_asb(a) + import :: psb_s_cuda_elg_sparse_mat + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_elg_asb end interface interface - subroutine psb_s_elg_mold(a,b,info) - import :: psb_s_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_elg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_elg_mold(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_elg_mold + end subroutine psb_s_cuda_elg_mold end interface #endif -end module psb_s_elg_mat_mod +end module psb_s_cuda_elg_mat_mod diff --git a/cuda/psb_s_hdiag_mat_mod.F90 b/cuda/psb_s_cuda_hdiag_mat_mod.F90 similarity index 50% rename from cuda/psb_s_hdiag_mat_mod.F90 rename to cuda/psb_s_cuda_hdiag_mat_mod.F90 index be0ef2b2..0a66ff09 100644 --- a/cuda/psb_s_hdiag_mat_mod.F90 +++ b/cuda/psb_s_cuda_hdiag_mat_mod.F90 @@ -30,182 +30,182 @@ ! -module psb_s_hdiag_mat_mod +module psb_s_cuda_hdiag_mat_mod use iso_c_binding use psb_base_mod use psb_s_hdia_mat_mod - type, extends(psb_s_hdia_sparse_mat) :: psb_s_hdiag_sparse_mat + type, extends(psb_s_hdia_sparse_mat) :: psb_s_cuda_hdiag_sparse_mat ! #ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => s_hdiag_get_fmt - ! procedure, pass(a) :: sizeof => s_hdiag_sizeof - procedure, pass(a) :: vect_mv => psb_s_hdiag_vect_mv - ! procedure, pass(a) :: csmm => psb_s_hdiag_csmm - procedure, pass(a) :: csmv => psb_s_hdiag_csmv - ! procedure, pass(a) :: in_vect_sv => psb_s_hdiag_inner_vect_sv - ! procedure, pass(a) :: scals => psb_s_hdiag_scals - ! procedure, pass(a) :: scalv => psb_s_hdiag_scal - ! procedure, pass(a) :: reallocate_nz => psb_s_hdiag_reallocate_nz - ! procedure, pass(a) :: allocate_mnnz => psb_s_hdiag_allocate_mnnz + procedure, nopass :: get_fmt => s_cuda_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => s_cuda_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_s_cuda_hdiag_csmm + procedure, pass(a) :: csmv => psb_s_cuda_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_s_cuda_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_s_cuda_hdiag_scals + ! procedure, pass(a) :: scalv => psb_s_cuda_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_s_cuda_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_s_cuda_hdiag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_s_cp_hdiag_from_coo - ! procedure, pass(a) :: cp_from_fmt => psb_s_cp_hdiag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_s_mv_hdiag_from_coo - ! procedure, pass(a) :: mv_from_fmt => psb_s_mv_hdiag_from_fmt - procedure, pass(a) :: free => s_hdiag_free - procedure, pass(a) :: mold => psb_s_hdiag_mold - procedure, pass(a) :: to_gpu => psb_s_hdiag_to_gpu - final :: s_hdiag_finalize + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_hdiag_from_fmt + procedure, pass(a) :: free => s_cuda_hdiag_free + procedure, pass(a) :: mold => psb_s_cuda_hdiag_mold + procedure, pass(a) :: to_gpu => psb_s_cuda_hdiag_to_gpu + final :: s_cuda_hdiag_finalize #else contains - procedure, pass(a) :: mold => psb_s_hdiag_mold + procedure, pass(a) :: mold => psb_s_cuda_hdiag_mold #endif - end type psb_s_hdiag_sparse_mat + end type psb_s_cuda_hdiag_sparse_mat #ifdef HAVE_SPGPU - private :: s_hdiag_get_nzeros, s_hdiag_free, s_hdiag_get_fmt, & - & s_hdiag_get_size, s_hdiag_sizeof, s_hdiag_get_nz_row + private :: s_cuda_hdiag_get_nzeros, s_cuda_hdiag_free, s_cuda_hdiag_get_fmt, & + & s_cuda_hdiag_get_size, s_cuda_hdiag_sizeof, s_cuda_hdiag_get_nz_row interface - subroutine psb_s_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_s_hdiag_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_hdiag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hdiag_vect_mv + end subroutine psb_s_cuda_hdiag_vect_mv end interface !!$ interface -!!$ subroutine psb_s_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_s_hdiag_sparse_mat, psb_spk_, psb_s_base_vect_type -!!$ class(psb_s_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_s_cuda_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_s_base_vect_type +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a !!$ real(psb_spk_), intent(in) :: alpha, beta !!$ class(psb_s_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_s_hdiag_inner_vect_sv +!!$ end subroutine psb_s_cuda_hdiag_inner_vect_sv !!$ end interface !!$ !!$ interface -!!$ subroutine psb_s_hdiag_reallocate_nz(nz,a) -!!$ import :: psb_s_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_s_cuda_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_s_hdiag_reallocate_nz +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_cuda_hdiag_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_s_hdiag_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_s_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_s_cuda_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_s_hdiag_allocate_mnnz +!!$ end subroutine psb_s_cuda_hdiag_allocate_mnnz !!$ end interface interface - subroutine psb_s_hdiag_mold(a,b,info) - import :: psb_s_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hdiag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hdiag_mold(a,b,info) + import :: psb_s_cuda_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hdiag_mold + end subroutine psb_s_cuda_hdiag_mold end interface interface - subroutine psb_s_hdiag_to_gpu(a,info) - import :: psb_s_hdiag_sparse_mat, psb_ipk_ - class(psb_s_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_hdiag_to_gpu(a,info) + import :: psb_s_cuda_hdiag_sparse_mat, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hdiag_to_gpu + end subroutine psb_s_cuda_hdiag_to_gpu end interface interface - subroutine psb_s_cp_hdiag_from_coo(a,b,info) - import :: psb_s_hdiag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_hdiag_from_coo(a,b,info) + import :: psb_s_cuda_hdiag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_hdiag_from_coo + end subroutine psb_s_cuda_cp_hdiag_from_coo end interface !!$ interface -!!$ subroutine psb_s_cp_hdiag_from_fmt(a,b,info) -!!$ import :: psb_s_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ -!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_s_cuda_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_s_base_sparse_mat), intent(in) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_s_cp_hdiag_from_fmt +!!$ end subroutine psb_s_cuda_cp_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_s_mv_hdiag_from_coo(a,b,info) - import :: psb_s_hdiag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_hdiag_from_coo(a,b,info) + import :: psb_s_cuda_hdiag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_hdiag_from_coo + end subroutine psb_s_cuda_mv_hdiag_from_coo end interface !!$ !!$ interface -!!$ subroutine psb_s_mv_hdiag_from_fmt(a,b,info) -!!$ import :: psb_s_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ -!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_s_cuda_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_s_base_sparse_mat), intent(inout) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_s_mv_hdiag_from_fmt +!!$ end subroutine psb_s_cuda_mv_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_s_hdiag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_s_hdiag_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hdiag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hdiag_csmv + end subroutine psb_s_cuda_hdiag_csmv end interface !!$ interface -!!$ subroutine psb_s_hdiag_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_s_hdiag_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_s_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_s_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a !!$ real(psb_spk_), intent(in) :: alpha, beta, x(:,:) !!$ real(psb_spk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_s_hdiag_csmm +!!$ end subroutine psb_s_cuda_hdiag_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_s_hdiag_scal(d,a,info, side) -!!$ import :: psb_s_hdiag_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_s_cuda_hdiag_scal(d,a,info, side) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ real(psb_spk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_s_hdiag_scal +!!$ end subroutine psb_s_cuda_hdiag_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_s_hdiag_scals(d,a,info) -!!$ import :: psb_s_hdiag_sparse_mat, psb_spk_, psb_ipk_ -!!$ class(psb_s_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_s_cuda_hdiag_scals(d,a,info) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ real(psb_spk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_s_hdiag_scals +!!$ end subroutine psb_s_cuda_hdiag_scals !!$ end interface !!$ @@ -223,11 +223,11 @@ contains ! ! == =================================== - function s_hdiag_get_fmt() result(res) + function s_cuda_hdiag_get_fmt() result(res) implicit none character(len=5) :: res res = 'HDIAG' - end function s_hdiag_get_fmt + end function s_cuda_hdiag_get_fmt @@ -243,11 +243,11 @@ contains ! ! == =================================== - subroutine s_hdiag_free(a) + subroutine s_cuda_hdiag_free(a) use hdiagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_s_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -256,12 +256,12 @@ contains return - end subroutine s_hdiag_free + end subroutine s_cuda_hdiag_free - subroutine s_hdiag_finalize(a) + subroutine s_cuda_hdiag_finalize(a) use hdiagdev_mod implicit none - type(psb_s_hdiag_sparse_mat), intent(inout) :: a + type(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -269,19 +269,19 @@ contains call a%psb_s_hdia_sparse_mat%free() return - end subroutine s_hdiag_finalize + end subroutine s_cuda_hdiag_finalize #else interface - subroutine psb_s_hdiag_mold(a,b,info) - import :: psb_s_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hdiag_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hdiag_mold(a,b,info) + import :: psb_s_cuda_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hdiag_mold + end subroutine psb_s_cuda_hdiag_mold end interface #endif -end module psb_s_hdiag_mat_mod +end module psb_s_cuda_hdiag_mat_mod diff --git a/cuda/psb_s_hlg_mat_mod.F90 b/cuda/psb_s_cuda_hlg_mat_mod.F90 similarity index 50% rename from cuda/psb_s_hlg_mat_mod.F90 rename to cuda/psb_s_cuda_hlg_mat_mod.F90 index 8f896e4b..81b94e5d 100644 --- a/cuda/psb_s_hlg_mat_mod.F90 +++ b/cuda/psb_s_cuda_hlg_mat_mod.F90 @@ -30,7 +30,7 @@ ! -module psb_s_hlg_mat_mod +module psb_s_cuda_hlg_mat_mod use iso_c_binding use psb_s_mat_mod @@ -41,7 +41,7 @@ module psb_s_hlg_mat_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_s_hll_sparse_mat) :: psb_s_hlg_sparse_mat + type, extends(psb_s_hll_sparse_mat) :: psb_s_cuda_hlg_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,186 +54,186 @@ module psb_s_hlg_mat_mod integer :: devstate = is_host contains - procedure, nopass :: get_fmt => s_hlg_get_fmt - procedure, pass(a) :: sizeof => s_hlg_sizeof - procedure, pass(a) :: vect_mv => psb_s_hlg_vect_mv - procedure, pass(a) :: csmm => psb_s_hlg_csmm - procedure, pass(a) :: csmv => psb_s_hlg_csmv - procedure, pass(a) :: in_vect_sv => psb_s_hlg_inner_vect_sv - procedure, pass(a) :: scals => psb_s_hlg_scals - procedure, pass(a) :: scalv => psb_s_hlg_scal - procedure, pass(a) :: reallocate_nz => psb_s_hlg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_s_hlg_allocate_mnnz + procedure, nopass :: get_fmt => s_cuda_hlg_get_fmt + procedure, pass(a) :: sizeof => s_cuda_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_hlg_vect_mv + procedure, pass(a) :: csmm => psb_s_cuda_hlg_csmm + procedure, pass(a) :: csmv => psb_s_cuda_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_s_cuda_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_s_cuda_hlg_scals + procedure, pass(a) :: scalv => psb_s_cuda_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_s_cuda_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_cuda_hlg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_s_cp_hlg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_s_cp_hlg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_s_mv_hlg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_s_mv_hlg_from_fmt - procedure, pass(a) :: free => s_hlg_free - procedure, pass(a) :: mold => psb_s_hlg_mold - procedure, pass(a) :: is_host => s_hlg_is_host - procedure, pass(a) :: is_dev => s_hlg_is_dev - procedure, pass(a) :: is_sync => s_hlg_is_sync - procedure, pass(a) :: set_host => s_hlg_set_host - procedure, pass(a) :: set_dev => s_hlg_set_dev - procedure, pass(a) :: set_sync => s_hlg_set_sync - procedure, pass(a) :: sync => s_hlg_sync - procedure, pass(a) :: from_gpu => psb_s_hlg_from_gpu - procedure, pass(a) :: to_gpu => psb_s_hlg_to_gpu - final :: s_hlg_finalize + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_hlg_from_fmt + procedure, pass(a) :: free => s_cuda_hlg_free + procedure, pass(a) :: mold => psb_s_cuda_hlg_mold + procedure, pass(a) :: is_host => s_cuda_hlg_is_host + procedure, pass(a) :: is_dev => s_cuda_hlg_is_dev + procedure, pass(a) :: is_sync => s_cuda_hlg_is_sync + procedure, pass(a) :: set_host => s_cuda_hlg_set_host + procedure, pass(a) :: set_dev => s_cuda_hlg_set_dev + procedure, pass(a) :: set_sync => s_cuda_hlg_set_sync + procedure, pass(a) :: sync => s_cuda_hlg_sync + procedure, pass(a) :: from_gpu => psb_s_cuda_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_s_cuda_hlg_to_gpu + final :: s_cuda_hlg_finalize #else contains - procedure, pass(a) :: mold => psb_s_hlg_mold + procedure, pass(a) :: mold => psb_s_cuda_hlg_mold #endif - end type psb_s_hlg_sparse_mat + end type psb_s_cuda_hlg_sparse_mat #ifdef HAVE_SPGPU - private :: s_hlg_get_nzeros, s_hlg_free, s_hlg_get_fmt, & - & s_hlg_get_size, s_hlg_sizeof, s_hlg_get_nz_row + private :: s_cuda_hlg_get_nzeros, s_cuda_hlg_free, s_cuda_hlg_get_fmt, & + & s_cuda_hlg_get_size, s_cuda_hlg_sizeof, s_cuda_hlg_get_nz_row interface - subroutine psb_s_hlg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_s_hlg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hlg_vect_mv + end subroutine psb_s_cuda_hlg_vect_mv end interface interface - subroutine psb_s_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_s_hlg_sparse_mat, psb_spk_, psb_s_base_vect_type - class(psb_s_hlg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_s_base_vect_type + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hlg_inner_vect_sv + end subroutine psb_s_cuda_hlg_inner_vect_sv end interface interface - subroutine psb_s_hlg_reallocate_nz(nz,a) - import :: psb_s_hlg_sparse_mat, psb_ipk_ + subroutine psb_s_cuda_hlg_reallocate_nz(nz,a) + import :: psb_s_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_s_hlg_sparse_mat), intent(inout) :: a - end subroutine psb_s_hlg_reallocate_nz + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_hlg_reallocate_nz end interface interface - subroutine psb_s_hlg_allocate_mnnz(m,n,a,nz) - import :: psb_s_hlg_sparse_mat, psb_ipk_ + subroutine psb_s_cuda_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_s_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_s_hlg_allocate_mnnz + end subroutine psb_s_cuda_hlg_allocate_mnnz end interface interface - subroutine psb_s_hlg_mold(a,b,info) - import :: psb_s_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hlg_mold(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hlg_mold + end subroutine psb_s_cuda_hlg_mold end interface interface - subroutine psb_s_hlg_from_gpu(a,info) - import :: psb_s_hlg_sparse_mat, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_hlg_from_gpu(a,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hlg_from_gpu + end subroutine psb_s_cuda_hlg_from_gpu end interface interface - subroutine psb_s_hlg_to_gpu(a,info, nzrm) - import :: psb_s_hlg_sparse_mat, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_hlg_to_gpu(a,info, nzrm) + import :: psb_s_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_s_hlg_to_gpu + end subroutine psb_s_cuda_hlg_to_gpu end interface interface - subroutine psb_s_cp_hlg_from_coo(a,b,info) - import :: psb_s_hlg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_hlg_from_coo(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_hlg_from_coo + end subroutine psb_s_cuda_cp_hlg_from_coo end interface interface - subroutine psb_s_cp_hlg_from_fmt(a,b,info) - import :: psb_s_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_hlg_from_fmt(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_hlg_from_fmt + end subroutine psb_s_cuda_cp_hlg_from_fmt end interface interface - subroutine psb_s_mv_hlg_from_coo(a,b,info) - import :: psb_s_hlg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_hlg_from_coo(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_hlg_from_coo + end subroutine psb_s_cuda_mv_hlg_from_coo end interface interface - subroutine psb_s_mv_hlg_from_fmt(a,b,info) - import :: psb_s_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_hlg_from_fmt(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_hlg_from_fmt + end subroutine psb_s_cuda_mv_hlg_from_fmt end interface interface - subroutine psb_s_hlg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_s_hlg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hlg_csmv + end subroutine psb_s_cuda_hlg_csmv end interface interface - subroutine psb_s_hlg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_s_hlg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) real(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hlg_csmm + end subroutine psb_s_cuda_hlg_csmm end interface interface - subroutine psb_s_hlg_scal(d,a,info, side) - import :: psb_s_hlg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_hlg_scal(d,a,info, side) + import :: psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_s_hlg_scal + end subroutine psb_s_cuda_hlg_scal end interface interface - subroutine psb_s_hlg_scals(d,a,info) - import :: psb_s_hlg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_hlg_scals(d,a,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hlg_scals + end subroutine psb_s_cuda_hlg_scals end interface @@ -252,9 +252,9 @@ contains ! == =================================== - function s_hlg_sizeof(a) result(res) + function s_cuda_hlg_sizeof(a) result(res) implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res @@ -269,13 +269,13 @@ contains ! on the GPU device side? ! res = 2*res - end function s_hlg_sizeof + end function s_cuda_hlg_sizeof - function s_hlg_get_fmt() result(res) + function s_cuda_hlg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HLG' - end function s_hlg_get_fmt + end function s_cuda_hlg_get_fmt @@ -291,11 +291,11 @@ contains ! ! == =================================== - subroutine s_hlg_free(a) + subroutine s_cuda_hlg_free(a) use hlldev_mod implicit none integer(psb_ipk_) :: info - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) @@ -304,13 +304,13 @@ contains return - end subroutine s_hlg_free + end subroutine s_cuda_hlg_free - subroutine s_hlg_sync(a) + subroutine s_cuda_hlg_sync(a) implicit none - class(psb_s_hlg_sparse_mat), target, intent(in) :: a - class(psb_s_hlg_sparse_mat), pointer :: tmpa + class(psb_s_cuda_hlg_sparse_mat), target, intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -322,77 +322,77 @@ contains call tmpa%set_sync() return - end subroutine s_hlg_sync + end subroutine s_cuda_hlg_sync - subroutine s_hlg_set_host(a) + subroutine s_cuda_hlg_set_host(a) implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine s_hlg_set_host + end subroutine s_cuda_hlg_set_host - subroutine s_hlg_set_dev(a) + subroutine s_cuda_hlg_set_dev(a) implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine s_hlg_set_dev + end subroutine s_cuda_hlg_set_dev - subroutine s_hlg_set_sync(a) + subroutine s_cuda_hlg_set_sync(a) implicit none - class(psb_s_hlg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine s_hlg_set_sync + end subroutine s_cuda_hlg_set_sync - function s_hlg_is_dev(a) result(res) + function s_cuda_hlg_is_dev(a) result(res) implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function s_hlg_is_dev + end function s_cuda_hlg_is_dev - function s_hlg_is_host(a) result(res) + function s_cuda_hlg_is_host(a) result(res) implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function s_hlg_is_host + end function s_cuda_hlg_is_host - function s_hlg_is_sync(a) result(res) + function s_cuda_hlg_is_sync(a) result(res) implicit none - class(psb_s_hlg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function s_hlg_is_sync + end function s_cuda_hlg_is_sync - subroutine s_hlg_finalize(a) + subroutine s_cuda_hlg_finalize(a) use hlldev_mod implicit none - type(psb_s_hlg_sparse_mat), intent(inout) :: a + type(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine s_hlg_finalize + end subroutine s_cuda_hlg_finalize #else interface - subroutine psb_s_hlg_mold(a,b,info) - import :: psb_s_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hlg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hlg_mold(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hlg_mold + end subroutine psb_s_cuda_hlg_mold end interface #endif -end module psb_s_hlg_mat_mod +end module psb_s_cuda_hlg_mat_mod diff --git a/cuda/psb_s_hybg_mat_mod.F90 b/cuda/psb_s_cuda_hybg_mat_mod.F90 similarity index 52% rename from cuda/psb_s_hybg_mat_mod.F90 rename to cuda/psb_s_cuda_hybg_mat_mod.F90 index 5a8e0e5d..ae76aac1 100644 --- a/cuda/psb_s_hybg_mat_mod.F90 +++ b/cuda/psb_s_cuda_hybg_mat_mod.F90 @@ -31,13 +31,13 @@ #if CUDA_SHORT_VERSION <= 10 -module psb_s_hybg_mat_mod +module psb_s_cuda_hybg_mat_mod use iso_c_binding use psb_s_mat_mod use cusparse_mod - type, extends(psb_s_csr_sparse_mat) :: psb_s_hybg_sparse_mat + type, extends(psb_s_csr_sparse_mat) :: psb_s_cuda_hybg_sparse_mat ! ! HYBG. An interface to the cuSPARSE HYB ! On the CPU side we keep a CSR storage. @@ -49,170 +49,170 @@ module psb_s_hybg_mat_mod type(s_Hmat) :: deviceMat contains - procedure, nopass :: get_fmt => s_hybg_get_fmt - procedure, pass(a) :: sizeof => s_hybg_sizeof - procedure, pass(a) :: vect_mv => psb_s_hybg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_s_hybg_inner_vect_sv - procedure, pass(a) :: csmm => psb_s_hybg_csmm - procedure, pass(a) :: csmv => psb_s_hybg_csmv - procedure, pass(a) :: scals => psb_s_hybg_scals - procedure, pass(a) :: scalv => psb_s_hybg_scal - procedure, pass(a) :: reallocate_nz => psb_s_hybg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_s_hybg_allocate_mnnz + procedure, nopass :: get_fmt => s_cuda_hybg_get_fmt + procedure, pass(a) :: sizeof => s_cuda_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_s_cuda_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_s_cuda_hybg_csmm + procedure, pass(a) :: csmv => psb_s_cuda_hybg_csmv + procedure, pass(a) :: scals => psb_s_cuda_hybg_scals + procedure, pass(a) :: scalv => psb_s_cuda_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_s_cuda_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_cuda_hybg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_s_cp_hybg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_s_cp_hybg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_s_mv_hybg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_s_mv_hybg_from_fmt - procedure, pass(a) :: free => s_hybg_free - procedure, pass(a) :: mold => psb_s_hybg_mold - procedure, pass(a) :: to_gpu => psb_s_hybg_to_gpu - final :: s_hybg_finalize + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_hybg_from_fmt + procedure, pass(a) :: free => s_cuda_hybg_free + procedure, pass(a) :: mold => psb_s_cuda_hybg_mold + procedure, pass(a) :: to_gpu => psb_s_cuda_hybg_to_gpu + final :: s_cuda_hybg_finalize #else contains - procedure, pass(a) :: mold => psb_s_hybg_mold + procedure, pass(a) :: mold => psb_s_cuda_hybg_mold #endif - end type psb_s_hybg_sparse_mat + end type psb_s_cuda_hybg_sparse_mat #ifdef HAVE_SPGPU - private :: s_hybg_get_nzeros, s_hybg_free, s_hybg_get_fmt, & - & s_hybg_get_size, s_hybg_sizeof, s_hybg_get_nz_row + private :: s_cuda_hybg_get_nzeros, s_cuda_hybg_free, s_cuda_hybg_get_fmt, & + & s_cuda_hybg_get_size, s_cuda_hybg_sizeof, s_cuda_hybg_get_nz_row interface - subroutine psb_s_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_s_hybg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hybg_inner_vect_sv + end subroutine psb_s_cuda_hybg_inner_vect_sv end interface interface - subroutine psb_s_hybg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_s_hybg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hybg_vect_mv + end subroutine psb_s_cuda_hybg_vect_mv end interface interface - subroutine psb_s_hybg_reallocate_nz(nz,a) - import :: psb_s_hybg_sparse_mat, psb_ipk_ + subroutine psb_s_cuda_hybg_reallocate_nz(nz,a) + import :: psb_s_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_s_hybg_sparse_mat), intent(inout) :: a - end subroutine psb_s_hybg_reallocate_nz + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_hybg_reallocate_nz end interface interface - subroutine psb_s_hybg_allocate_mnnz(m,n,a,nz) - import :: psb_s_hybg_sparse_mat, psb_ipk_ + subroutine psb_s_cuda_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_s_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_s_hybg_allocate_mnnz + end subroutine psb_s_cuda_hybg_allocate_mnnz end interface interface - subroutine psb_s_hybg_mold(a,b,info) - import :: psb_s_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hybg_mold(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hybg_mold + end subroutine psb_s_cuda_hybg_mold end interface interface - subroutine psb_s_hybg_to_gpu(a,info, nzrm) - import :: psb_s_hybg_sparse_mat, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_hybg_to_gpu(a,info, nzrm) + import :: psb_s_cuda_hybg_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_s_hybg_to_gpu + end subroutine psb_s_cuda_hybg_to_gpu end interface interface - subroutine psb_s_cp_hybg_from_coo(a,b,info) - import :: psb_s_hybg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_hybg_from_coo(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_hybg_from_coo + end subroutine psb_s_cuda_cp_hybg_from_coo end interface interface - subroutine psb_s_cp_hybg_from_fmt(a,b,info) - import :: psb_s_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_cp_hybg_from_fmt(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cp_hybg_from_fmt + end subroutine psb_s_cuda_cp_hybg_from_fmt end interface interface - subroutine psb_s_mv_hybg_from_coo(a,b,info) - import :: psb_s_hybg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_hybg_from_coo(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_hybg_from_coo + end subroutine psb_s_cuda_mv_hybg_from_coo end interface interface - subroutine psb_s_mv_hybg_from_fmt(a,b,info) - import :: psb_s_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_mv_hybg_from_fmt(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_mv_hybg_from_fmt + end subroutine psb_s_cuda_mv_hybg_from_fmt end interface interface - subroutine psb_s_hybg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_s_hybg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) real(psb_spk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hybg_csmv + end subroutine psb_s_cuda_hybg_csmv end interface interface - subroutine psb_s_hybg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_s_hybg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) real(psb_spk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_s_hybg_csmm + end subroutine psb_s_cuda_hybg_csmm end interface interface - subroutine psb_s_hybg_scal(d,a,info,side) - import :: psb_s_hybg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_hybg_scal(d,a,info,side) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_s_hybg_scal + end subroutine psb_s_cuda_hybg_scal end interface interface - subroutine psb_s_hybg_scals(d,a,info) - import :: psb_s_hybg_sparse_mat, psb_spk_, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(inout) :: a + subroutine psb_s_cuda_hybg_scals(d,a,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hybg_scals + end subroutine psb_s_cuda_hybg_scals end interface @@ -231,9 +231,9 @@ contains ! == =================================== - function s_hybg_sizeof(a) result(res) + function s_cuda_hybg_sizeof(a) result(res) implicit none - class(psb_s_hybg_sparse_mat), intent(in) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 res = res + psb_sizeof_sp * size(a%val) @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function s_hybg_sizeof + end function s_cuda_hybg_sizeof - function s_hybg_get_fmt() result(res) + function s_cuda_hybg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HYBG' - end function s_hybg_get_fmt + end function s_cuda_hybg_get_fmt @@ -265,42 +265,42 @@ contains ! ! == =================================== - subroutine s_hybg_free(a) + subroutine s_cuda_hybg_free(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - class(psb_s_hybg_sparse_mat), intent(inout) :: a + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) call a%psb_s_csr_sparse_mat%free() return - end subroutine s_hybg_free + end subroutine s_cuda_hybg_free - subroutine s_hybg_finalize(a) + subroutine s_cuda_hybg_finalize(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - type(psb_s_hybg_sparse_mat), intent(inout) :: a + type(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) return - end subroutine s_hybg_finalize + end subroutine s_cuda_hybg_finalize #else interface - subroutine psb_s_hybg_mold(a,b,info) - import :: psb_s_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_hybg_sparse_mat), intent(in) :: a + subroutine psb_s_cuda_hybg_mold(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_hybg_mold + end subroutine psb_s_cuda_hybg_mold end interface #endif -end module psb_s_hybg_mat_mod +end module psb_s_cuda_hybg_mat_mod #endif diff --git a/cuda/psb_s_gpu_vect_mod.F90 b/cuda/psb_s_cuda_vect_mod.F90 similarity index 72% rename from cuda/psb_s_gpu_vect_mod.F90 rename to cuda/psb_s_cuda_vect_mod.F90 index 1371db53..e19c980a 100644 --- a/cuda/psb_s_gpu_vect_mod.F90 +++ b/cuda/psb_s_cuda_vect_mod.F90 @@ -30,15 +30,15 @@ ! -module psb_s_gpu_vect_mod +module psb_s_cuda_vect_mod use iso_c_binding use psb_const_mod use psb_error_mod use psb_s_vect_mod use psb_i_vect_mod #ifdef HAVE_SPGPU - use psb_gpu_env_mod - use psb_i_gpu_vect_mod + use psb_cuda_env_mod + use psb_i_cuda_vect_mod use psb_i_vectordev_mod use psb_s_vectordev_mod #endif @@ -47,7 +47,7 @@ module psb_s_gpu_vect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_s_base_vect_type) :: psb_s_vect_gpu + type, extends(psb_s_base_vect_type) :: psb_s_vect_cuda #ifdef HAVE_SPGPU integer :: state = is_host type(c_ptr) :: deviceVect = c_null_ptr @@ -59,66 +59,66 @@ module psb_s_gpu_vect_mod type(c_ptr) :: i_buf = c_null_ptr integer :: i_buf_sz = 0 contains - procedure, pass(x) :: get_nrows => s_gpu_get_nrows - procedure, nopass :: get_fmt => s_gpu_get_fmt - - procedure, pass(x) :: all => s_gpu_all - procedure, pass(x) :: zero => s_gpu_zero - procedure, pass(x) :: asb_m => s_gpu_asb_m - procedure, pass(x) :: sync => s_gpu_sync - procedure, pass(x) :: sync_space => s_gpu_sync_space - procedure, pass(x) :: bld_x => s_gpu_bld_x - procedure, pass(x) :: bld_mn => s_gpu_bld_mn - procedure, pass(x) :: free => s_gpu_free - procedure, pass(x) :: ins_a => s_gpu_ins_a - procedure, pass(x) :: ins_v => s_gpu_ins_v - procedure, pass(x) :: is_host => s_gpu_is_host - procedure, pass(x) :: is_dev => s_gpu_is_dev - procedure, pass(x) :: is_sync => s_gpu_is_sync - procedure, pass(x) :: set_host => s_gpu_set_host - procedure, pass(x) :: set_dev => s_gpu_set_dev - procedure, pass(x) :: set_sync => s_gpu_set_sync - procedure, pass(x) :: set_scal => s_gpu_set_scal -!!$ procedure, pass(x) :: set_vect => s_gpu_set_vect - procedure, pass(x) :: gthzv_x => s_gpu_gthzv_x - procedure, pass(y) :: sctb => s_gpu_sctb - procedure, pass(y) :: sctb_x => s_gpu_sctb_x - procedure, pass(x) :: gthzbuf => s_gpu_gthzbuf - procedure, pass(y) :: sctb_buf => s_gpu_sctb_buf - procedure, pass(x) :: new_buffer => s_gpu_new_buffer - procedure, nopass :: device_wait => s_gpu_device_wait - procedure, pass(x) :: free_buffer => s_gpu_free_buffer - procedure, pass(x) :: maybe_free_buffer => s_gpu_maybe_free_buffer - procedure, pass(x) :: dot_v => s_gpu_dot_v - procedure, pass(x) :: dot_a => s_gpu_dot_a - procedure, pass(y) :: axpby_v => s_gpu_axpby_v - procedure, pass(y) :: axpby_a => s_gpu_axpby_a - procedure, pass(y) :: mlt_v => s_gpu_mlt_v - procedure, pass(y) :: mlt_a => s_gpu_mlt_a - procedure, pass(z) :: mlt_a_2 => s_gpu_mlt_a_2 - procedure, pass(z) :: mlt_v_2 => s_gpu_mlt_v_2 - procedure, pass(x) :: scal => s_gpu_scal - procedure, pass(x) :: nrm2 => s_gpu_nrm2 - procedure, pass(x) :: amax => s_gpu_amax - procedure, pass(x) :: asum => s_gpu_asum - procedure, pass(x) :: absval1 => s_gpu_absval1 - procedure, pass(x) :: absval2 => s_gpu_absval2 - - final :: s_gpu_vect_finalize + procedure, pass(x) :: get_nrows => s_cuda_get_nrows + procedure, nopass :: get_fmt => s_cuda_get_fmt + + procedure, pass(x) :: all => s_cuda_all + procedure, pass(x) :: zero => s_cuda_zero + procedure, pass(x) :: asb_m => s_cuda_asb_m + procedure, pass(x) :: sync => s_cuda_sync + procedure, pass(x) :: sync_space => s_cuda_sync_space + procedure, pass(x) :: bld_x => s_cuda_bld_x + procedure, pass(x) :: bld_mn => s_cuda_bld_mn + procedure, pass(x) :: free => s_cuda_free + procedure, pass(x) :: ins_a => s_cuda_ins_a + procedure, pass(x) :: ins_v => s_cuda_ins_v + procedure, pass(x) :: is_host => s_cuda_is_host + procedure, pass(x) :: is_dev => s_cuda_is_dev + procedure, pass(x) :: is_sync => s_cuda_is_sync + procedure, pass(x) :: set_host => s_cuda_set_host + procedure, pass(x) :: set_dev => s_cuda_set_dev + procedure, pass(x) :: set_sync => s_cuda_set_sync + procedure, pass(x) :: set_scal => s_cuda_set_scal +!!$ procedure, pass(x) :: set_vect => s_cuda_set_vect + procedure, pass(x) :: gthzv_x => s_cuda_gthzv_x + procedure, pass(y) :: sctb => s_cuda_sctb + procedure, pass(y) :: sctb_x => s_cuda_sctb_x + procedure, pass(x) :: gthzbuf => s_cuda_gthzbuf + procedure, pass(y) :: sctb_buf => s_cuda_sctb_buf + procedure, pass(x) :: new_buffer => s_cuda_new_buffer + procedure, nopass :: device_wait => s_cuda_device_wait + procedure, pass(x) :: free_buffer => s_cuda_free_buffer + procedure, pass(x) :: maybe_free_buffer => s_cuda_maybe_free_buffer + procedure, pass(x) :: dot_v => s_cuda_dot_v + procedure, pass(x) :: dot_a => s_cuda_dot_a + procedure, pass(y) :: axpby_v => s_cuda_axpby_v + procedure, pass(y) :: axpby_a => s_cuda_axpby_a + procedure, pass(y) :: mlt_v => s_cuda_mlt_v + procedure, pass(y) :: mlt_a => s_cuda_mlt_a + procedure, pass(z) :: mlt_a_2 => s_cuda_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => s_cuda_mlt_v_2 + procedure, pass(x) :: scal => s_cuda_scal + procedure, pass(x) :: nrm2 => s_cuda_nrm2 + procedure, pass(x) :: amax => s_cuda_amax + procedure, pass(x) :: asum => s_cuda_asum + procedure, pass(x) :: absval1 => s_cuda_absval1 + procedure, pass(x) :: absval2 => s_cuda_absval2 + + final :: s_cuda_vect_finalize #endif - end type psb_s_vect_gpu + end type psb_s_vect_cuda - public :: psb_s_vect_gpu_ + public :: psb_s_vect_cuda_ private :: constructor - interface psb_s_vect_gpu_ + interface psb_s_vect_cuda_ module procedure constructor - end interface psb_s_vect_gpu_ + end interface psb_s_vect_cuda_ contains function constructor(x) result(this) real(psb_spk_) :: x(:) - type(psb_s_vect_gpu) :: this + type(psb_s_vect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -128,20 +128,20 @@ contains #ifdef HAVE_SPGPU - subroutine s_gpu_device_wait() + subroutine s_cuda_device_wait() call psb_cudaSync() - end subroutine s_gpu_device_wait + end subroutine s_cuda_device_wait - subroutine s_gpu_new_buffer(n,x,info) + subroutine s_cuda_new_buffer(n,x,info) use psb_realloc_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(x%combuf)) then if (size(x%combuf) idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then ! ! Only need a sync in this branch; in the others ! cudamemCpy acts as a sync point. @@ -331,14 +331,14 @@ contains end select - end subroutine s_gpu_gthzv_x + end subroutine s_cuda_gthzv_x - subroutine s_gpu_gthzbuf(i,n,idx,x) - use psb_gpu_env_mod + subroutine s_cuda_gthzbuf(i,n,idx,x) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i,n class(psb_i_base_vect_type) :: idx - class(psb_s_vect_gpu) :: x + class(psb_s_vect_cuda) :: x integer :: info, ni info = 0 @@ -349,11 +349,11 @@ contains end if select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = igathMultiVecDeviceFloatVecIdx(x%deviceVect,& & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) @@ -384,14 +384,14 @@ contains end select - end subroutine s_gpu_gthzbuf + end subroutine s_cuda_gthzbuf - subroutine s_gpu_sctb(n,idx,x,beta,y) + subroutine s_cuda_sctb(n,idx,x,beta,y) implicit none !use psb_const_mod integer(psb_ipk_) :: n, idx(:) real(psb_spk_) :: beta, x(:) - class(psb_s_vect_gpu) :: y + class(psb_s_vect_cuda) :: y integer(psb_ipk_) :: info if (n == 0) return @@ -401,24 +401,24 @@ contains call y%psb_s_base_vect_type%sctb(n,idx,x,beta) call y%set_host() - end subroutine s_gpu_sctb + end subroutine s_cuda_sctb - subroutine s_gpu_sctb_x(i,n,idx,x,beta,y) - use psb_gpu_env_mod + subroutine s_cuda_sctb_x(i,n,idx,x,beta,y) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx real(psb_spk_) :: beta, x(:) - class(psb_s_vect_gpu) :: y + class(psb_s_vect_cuda) :: y integer :: info, ni select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() ! - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(y%pinned_buffer)) then if (size(y%pinned_buffer) < n) then call inner_unregister(y%pinned_buffer) @@ -506,16 +506,16 @@ contains call psb_cudaSync() call y%set_dev() - end subroutine s_gpu_sctb_x + end subroutine s_cuda_sctb_x - subroutine s_gpu_sctb_buf(i,n,idx,beta,y) + subroutine s_cuda_sctb_buf(i,n,idx,beta,y) use psi_serial_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx real(psb_spk_) :: beta - class(psb_s_vect_gpu) :: y + class(psb_s_vect_cuda) :: y integer(psb_ipk_) :: info, ni !!$ write(0,*) 'Starting sctb_buf' @@ -526,11 +526,11 @@ contains select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = iscatMultiVecDeviceFloatVecIdx(y%deviceVect,& & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) else @@ -557,106 +557,106 @@ contains end select !!$ write(0,*) 'Done sctb_buf' - end subroutine s_gpu_sctb_buf + end subroutine s_cuda_sctb_buf - subroutine s_gpu_bld_x(x,this) + subroutine s_cuda_bld_x(x,this) use psb_base_mod real(psb_spk_), intent(in) :: this(:) - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call psb_realloc(size(this),x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'s_gpu_bld_x',& + call psb_errpush(info,'s_cuda_bld_x',& & i_err=(/size(this),izero,izero,izero,izero/)) end if x%v(:) = this(:) call x%set_host() call x%sync() - end subroutine s_gpu_bld_x + end subroutine s_cuda_bld_x - subroutine s_gpu_bld_mn(x,n) + subroutine s_cuda_bld_mn(x,n) integer(psb_mpk_), intent(in) :: n - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(n,info) if (info /= 0) then - call psb_errpush(info,'s_gpu_bld_n',i_err=(/n,n,n,n,n/)) + call psb_errpush(info,'s_cuda_bld_n',i_err=(/n,n,n,n,n/)) end if - end subroutine s_gpu_bld_mn + end subroutine s_cuda_bld_mn - subroutine s_gpu_set_host(x) + subroutine s_cuda_set_host(x) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x x%state = is_host - end subroutine s_gpu_set_host + end subroutine s_cuda_set_host - subroutine s_gpu_set_dev(x) + subroutine s_cuda_set_dev(x) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x x%state = is_dev - end subroutine s_gpu_set_dev + end subroutine s_cuda_set_dev - subroutine s_gpu_set_sync(x) + subroutine s_cuda_set_sync(x) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x x%state = is_sync - end subroutine s_gpu_set_sync + end subroutine s_cuda_set_sync - function s_gpu_is_dev(x) result(res) + function s_cuda_is_dev(x) result(res) implicit none - class(psb_s_vect_gpu), intent(in) :: x + class(psb_s_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function s_gpu_is_dev + end function s_cuda_is_dev - function s_gpu_is_host(x) result(res) + function s_cuda_is_host(x) result(res) implicit none - class(psb_s_vect_gpu), intent(in) :: x + class(psb_s_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function s_gpu_is_host + end function s_cuda_is_host - function s_gpu_is_sync(x) result(res) + function s_cuda_is_sync(x) result(res) implicit none - class(psb_s_vect_gpu), intent(in) :: x + class(psb_s_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function s_gpu_is_sync + end function s_cuda_is_sync - function s_gpu_get_nrows(x) result(res) + function s_cuda_get_nrows(x) result(res) implicit none - class(psb_s_vect_gpu), intent(in) :: x + class(psb_s_vect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = size(x%v) - end function s_gpu_get_nrows + end function s_cuda_get_nrows - function s_gpu_get_fmt() result(res) + function s_cuda_get_fmt() result(res) implicit none character(len=5) :: res res = 'sGPU' - end function s_gpu_get_fmt + end function s_cuda_get_fmt - subroutine s_gpu_all(n, x, info) + subroutine s_cuda_all(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: n - class(psb_s_vect_gpu), intent(out) :: x + class(psb_s_vect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(n,x%v,info) @@ -664,26 +664,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'s_gpu_all',& + call psb_errpush(info,'s_cuda_all',& & i_err=(/n,n,n,n,n/)) end if - end subroutine s_gpu_all + end subroutine s_cuda_all - subroutine s_gpu_zero(x) + subroutine s_cuda_zero(x) use psi_serial_mod implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=szero call x%set_host() - end subroutine s_gpu_zero + end subroutine s_cuda_zero - subroutine s_gpu_asb_m(n, x, info) + subroutine s_cuda_asb_m(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_mpk_), intent(in) :: n - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: nd @@ -703,12 +703,12 @@ contains end if end if - end subroutine s_gpu_asb_m + end subroutine s_cuda_asb_m - subroutine s_gpu_sync_space(x,info) + subroutine s_cuda_sync_space(x,info) use psb_base_mod, only : psb_realloc implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nh, nd @@ -747,12 +747,12 @@ contains end if end if - end subroutine s_gpu_sync_space + end subroutine s_cuda_sync_space - subroutine s_gpu_sync(x) + subroutine s_cuda_sync(x) use psb_base_mod, only : psb_realloc implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -778,31 +778,31 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'s_gpu_sync') + call psb_errpush(info,'s_cuda_sync') end if - end subroutine s_gpu_sync + end subroutine s_cuda_sync - subroutine s_gpu_free(x, info) + subroutine s_cuda_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) deallocate(x%v, stat=info) if (c_associated(x%deviceVect)) then -!!$ write(0,*)'d_gpu_free Calling freeMultiVecDevice' +!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice' call freeMultiVecDevice(x%deviceVect) x%deviceVect=c_null_ptr end if call x%free_buffer(info) call x%set_sync() - end subroutine s_gpu_free + end subroutine s_cuda_free - subroutine s_gpu_set_scal(x,val,first,last) - class(psb_s_vect_gpu), intent(inout) :: x + subroutine s_cuda_set_scal(x,val,first,last) + class(psb_s_vect_cuda), intent(inout) :: x real(psb_spk_), intent(in) :: val integer(psb_ipk_), optional :: first, last @@ -817,10 +817,10 @@ contains info = setScalDevice(val,first_,last_,1,x%deviceVect) call x%set_dev() - end subroutine s_gpu_set_scal + end subroutine s_cuda_set_scal !!$ -!!$ subroutine s_gpu_set_vect(x,val) -!!$ class(psb_s_vect_gpu), intent(inout) :: x +!!$ subroutine s_cuda_set_vect(x,val) +!!$ class(psb_s_vect_cuda), intent(inout) :: x !!$ real(psb_spk_), intent(in) :: val(:) !!$ integer(psb_ipk_) :: nr !!$ integer(psb_ipk_) :: info @@ -829,13 +829,13 @@ contains !!$ call x%psb_s_base_vect_type%set_vect(val) !!$ call x%set_host() !!$ -!!$ end subroutine s_gpu_set_vect +!!$ end subroutine s_cuda_set_vect - function s_gpu_dot_v(n,x,y) result(res) + function s_cuda_dot_v(n,x,y) result(res) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res @@ -852,13 +852,13 @@ contains type is (psb_s_base_vect_type) if (x%is_dev()) call x%sync() res = ddot(n,x%v,1,yy%v,1) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) if (info /= 0) then info = psb_err_internal_error_ - call psb_errpush(info,'s_gpu_dot_v') + call psb_errpush(info,'s_cuda_dot_v') end if class default @@ -867,11 +867,11 @@ contains res = y%dot(n,x%v) end select - end function s_gpu_dot_v + end function s_cuda_dot_v - function s_gpu_dot_a(n,x,y) result(res) + function s_cuda_dot_a(n,x,y) result(res) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x real(psb_spk_), intent(in) :: y(:) integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res @@ -880,14 +880,14 @@ contains if (x%is_dev()) call x%sync() res = ddot(n,y,1,x%v,1) - end function s_gpu_dot_a + end function s_cuda_dot_a - subroutine s_gpu_axpby_v(m,alpha, x, beta, y, info) + subroutine s_cuda_axpby_v(m,alpha, x, beta, y, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m class(psb_s_base_vect_type), intent(inout) :: x - class(psb_s_vect_gpu), intent(inout) :: y + class(psb_s_vect_cuda), intent(inout) :: y real(psb_spk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nx, ny @@ -895,7 +895,7 @@ contains info = psb_success_ select type(xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) ! Do something different here if ((beta /= szero).and.y%is_host())& & call y%sync() @@ -915,14 +915,14 @@ contains call y%axpby(m,alpha,x%v,beta,info) end select - end subroutine s_gpu_axpby_v + end subroutine s_cuda_axpby_v - subroutine s_gpu_axpby_a(m,alpha, x, beta, y, info) + subroutine s_cuda_axpby_a(m,alpha, x, beta, y, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m real(psb_spk_), intent(in) :: x(:) - class(psb_s_vect_gpu), intent(inout) :: y + class(psb_s_vect_cuda), intent(inout) :: y real(psb_spk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info @@ -930,13 +930,13 @@ contains & call y%sync() call psb_geaxpby(m,alpha,x,beta,y%v,info) call y%set_host() - end subroutine s_gpu_axpby_a + end subroutine s_cuda_axpby_a - subroutine s_gpu_mlt_v(x, y, info) + subroutine s_cuda_mlt_v(x, y, info) use psi_serial_mod implicit none class(psb_s_base_vect_type), intent(inout) :: x - class(psb_s_vect_gpu), intent(inout) :: y + class(psb_s_vect_cuda), intent(inout) :: y integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -950,7 +950,7 @@ contains y%v(i) = y%v(i) * xx%v(i) end do call y%set_host() - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) ! Do something different here if (y%is_host()) call y%sync() if (xx%is_host()) call xx%sync() @@ -963,13 +963,13 @@ contains call y%set_host() end select - end subroutine s_gpu_mlt_v + end subroutine s_cuda_mlt_v - subroutine s_gpu_mlt_a(x, y, info) + subroutine s_cuda_mlt_a(x, y, info) use psi_serial_mod implicit none real(psb_spk_), intent(in) :: x(:) - class(psb_s_vect_gpu), intent(inout) :: y + class(psb_s_vect_cuda), intent(inout) :: y integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -977,15 +977,15 @@ contains if (y%is_dev()) call y%sync() call y%psb_s_base_vect_type%mlt(x,info) ! set_host() is invoked in the base method - end subroutine s_gpu_mlt_a + end subroutine s_cuda_mlt_a - subroutine s_gpu_mlt_a_2(alpha,x,y,beta,z,info) + subroutine s_cuda_mlt_a_2(alpha,x,y,beta,z,info) use psi_serial_mod implicit none real(psb_spk_), intent(in) :: alpha,beta real(psb_spk_), intent(in) :: x(:) real(psb_spk_), intent(in) :: y(:) - class(psb_s_vect_gpu), intent(inout) :: z + class(psb_s_vect_cuda), intent(inout) :: z integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -993,16 +993,16 @@ contains if (z%is_dev()) call z%sync() call z%psb_s_base_vect_type%mlt(alpha,x,y,beta,info) ! set_host() is invoked in the base method - end subroutine s_gpu_mlt_a_2 + end subroutine s_cuda_mlt_a_2 - subroutine s_gpu_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + subroutine s_cuda_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) use psi_serial_mod use psb_string_mod implicit none real(psb_spk_), intent(in) :: alpha,beta class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y - class(psb_s_vect_gpu), intent(inout) :: z + class(psb_s_vect_cuda), intent(inout) :: z integer(psb_ipk_), intent(out) :: info character(len=1), intent(in), optional :: conjgx, conjgy integer(psb_ipk_) :: i, n @@ -1025,9 +1025,9 @@ contains ! info = 0 select type(xx => x) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) select type (yy => y) - type is (psb_s_vect_gpu) + type is (psb_s_vect_cuda) if (xx%is_host()) call xx%sync() if (yy%is_host()) call yy%sync() if ((beta /= szero).and.(z%is_host())) call z%sync() @@ -1049,23 +1049,23 @@ contains call z%psb_s_base_vect_type%mlt(alpha,x,y,beta,info) call z%set_host() end select - end subroutine s_gpu_mlt_v_2 + end subroutine s_cuda_mlt_v_2 - subroutine s_gpu_scal(alpha, x) + subroutine s_cuda_scal(alpha, x) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x real(psb_spk_), intent (in) :: alpha integer(psb_ipk_) :: info if (x%is_host()) call x%sync() info = scalMultiVecDevice(alpha,x%deviceVect) call x%set_dev() - end subroutine s_gpu_scal + end subroutine s_cuda_scal - function s_gpu_nrm2(n,x) result(res) + function s_cuda_nrm2(n,x) result(res) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res integer(psb_ipk_) :: info @@ -1073,11 +1073,11 @@ contains if (x%is_host()) call x%sync() info = nrm2MultiVecDevice(res,n,x%deviceVect) - end function s_gpu_nrm2 + end function s_cuda_nrm2 - function s_gpu_amax(n,x) result(res) + function s_cuda_amax(n,x) result(res) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res integer(psb_ipk_) :: info @@ -1085,11 +1085,11 @@ contains if (x%is_host()) call x%sync() info = amaxMultiVecDevice(res,n,x%deviceVect) - end function s_gpu_amax + end function s_cuda_amax - function s_gpu_asum(n,x) result(res) + function s_cuda_asum(n,x) result(res) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res integer(psb_ipk_) :: info @@ -1097,11 +1097,11 @@ contains if (x%is_host()) call x%sync() info = asumMultiVecDevice(res,n,x%deviceVect) - end function s_gpu_asum + end function s_cuda_asum - subroutine s_gpu_absval1(x) + subroutine s_cuda_absval1(x) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n integer(psb_ipk_) :: info @@ -1109,18 +1109,18 @@ contains n=x%get_nrows() info = absMultiVecDevice(n,sone,x%deviceVect) - end subroutine s_gpu_absval1 + end subroutine s_cuda_absval1 - subroutine s_gpu_absval2(x,y) + subroutine s_cuda_absval2(x,y) implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: y integer(psb_ipk_) :: n integer(psb_ipk_) :: info n=min(x%get_nrows(),y%get_nrows()) select type (yy=> y) - class is (psb_s_vect_gpu) + class is (psb_s_vect_cuda) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() info = absMultiVecDevice(n,sone,x%deviceVect,yy%deviceVect) @@ -1129,67 +1129,67 @@ contains if (y%is_dev()) call y%sync() call x%psb_s_base_vect_type%absval(y) end select - end subroutine s_gpu_absval2 + end subroutine s_cuda_absval2 - subroutine s_gpu_vect_finalize(x) + subroutine s_cuda_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_s_vect_gpu), intent(inout) :: x + type(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 call x%free(info) - end subroutine s_gpu_vect_finalize + end subroutine s_cuda_vect_finalize - subroutine s_gpu_ins_v(n,irl,val,dupl,x,info) + subroutine s_cuda_ins_v(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl class(psb_i_base_vect_type), intent(inout) :: irl class(psb_s_base_vect_type), intent(inout) :: val integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, isz - logical :: done_gpu + logical :: done_cuda info = 0 if (psb_errstatus_fatal()) return - done_gpu = .false. + done_cuda = .false. select type(virl => irl) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type(vval => val) - class is (psb_s_vect_gpu) + class is (psb_s_vect_cuda) if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() info = geinsMultiVecDeviceFloat(n,virl%deviceVect,& & vval%deviceVect,dupl,1,x%deviceVect) call x%set_dev() - done_gpu=.true. + done_cuda=.true. end select end select - if (.not.done_gpu) then + if (.not.done_cuda) then if (irl%is_dev()) call irl%sync() if (val%is_dev()) call val%sync() call x%ins(n,irl%v,val%v,dupl,info) end if if (info /= 0) then - call psb_errpush(info,'gpu_vect_ins') + call psb_errpush(info,'cuda_vect_ins') return end if - end subroutine s_gpu_ins_v + end subroutine s_cuda_ins_v - subroutine s_gpu_ins_a(n,irl,val,dupl,x,info) + subroutine s_cuda_ins_a(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_s_vect_gpu), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) real(psb_spk_), intent(in) :: val(:) @@ -1202,11 +1202,11 @@ contains call x%psb_s_base_vect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine s_gpu_ins_a + end subroutine s_cuda_ins_a #endif -end module psb_s_gpu_vect_mod +end module psb_s_cuda_vect_mod ! @@ -1215,7 +1215,7 @@ end module psb_s_gpu_vect_mod -module psb_s_gpu_multivect_mod +module psb_s_cuda_multivect_mod use iso_c_binding use psb_const_mod use psb_error_mod @@ -1224,7 +1224,7 @@ module psb_s_gpu_multivect_mod use psb_i_multivect_mod #ifdef HAVE_SPGPU - use psb_i_gpu_multivect_mod + use psb_i_cuda_multivect_mod use psb_s_vectordev_mod #endif @@ -1232,7 +1232,7 @@ module psb_s_gpu_multivect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_s_base_multivect_type) :: psb_s_multivect_gpu + type, extends(psb_s_base_multivect_type) :: psb_s_multivect_cuda #ifdef HAVE_SPGPU integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 @@ -1240,48 +1240,48 @@ module psb_s_gpu_multivect_mod real(c_double), allocatable :: buffer(:,:) type(c_ptr) :: dt_buf = c_null_ptr contains - procedure, pass(x) :: get_nrows => s_gpu_multi_get_nrows - procedure, pass(x) :: get_ncols => s_gpu_multi_get_ncols - procedure, nopass :: get_fmt => s_gpu_multi_get_fmt -!!$ procedure, pass(x) :: dot_v => s_gpu_multi_dot_v -!!$ procedure, pass(x) :: dot_a => s_gpu_multi_dot_a -!!$ procedure, pass(y) :: axpby_v => s_gpu_multi_axpby_v -!!$ procedure, pass(y) :: axpby_a => s_gpu_multi_axpby_a -!!$ procedure, pass(y) :: mlt_v => s_gpu_multi_mlt_v -!!$ procedure, pass(y) :: mlt_a => s_gpu_multi_mlt_a -!!$ procedure, pass(z) :: mlt_a_2 => s_gpu_multi_mlt_a_2 -!!$ procedure, pass(z) :: mlt_v_2 => s_gpu_multi_mlt_v_2 -!!$ procedure, pass(x) :: scal => s_gpu_multi_scal -!!$ procedure, pass(x) :: nrm2 => s_gpu_multi_nrm2 -!!$ procedure, pass(x) :: amax => s_gpu_multi_amax -!!$ procedure, pass(x) :: asum => s_gpu_multi_asum - procedure, pass(x) :: all => s_gpu_multi_all - procedure, pass(x) :: zero => s_gpu_multi_zero - procedure, pass(x) :: asb => s_gpu_multi_asb - procedure, pass(x) :: sync => s_gpu_multi_sync - procedure, pass(x) :: sync_space => s_gpu_multi_sync_space - procedure, pass(x) :: bld_x => s_gpu_multi_bld_x - procedure, pass(x) :: bld_n => s_gpu_multi_bld_n - procedure, pass(x) :: free => s_gpu_multi_free - procedure, pass(x) :: ins => s_gpu_multi_ins - procedure, pass(x) :: is_host => s_gpu_multi_is_host - procedure, pass(x) :: is_dev => s_gpu_multi_is_dev - procedure, pass(x) :: is_sync => s_gpu_multi_is_sync - procedure, pass(x) :: set_host => s_gpu_multi_set_host - procedure, pass(x) :: set_dev => s_gpu_multi_set_dev - procedure, pass(x) :: set_sync => s_gpu_multi_set_sync - procedure, pass(x) :: set_scal => s_gpu_multi_set_scal - procedure, pass(x) :: set_vect => s_gpu_multi_set_vect -!!$ procedure, pass(x) :: gthzv_x => s_gpu_multi_gthzv_x -!!$ procedure, pass(y) :: sctb => s_gpu_multi_sctb -!!$ procedure, pass(y) :: sctb_x => s_gpu_multi_sctb_x - final :: s_gpu_multi_vect_finalize + procedure, pass(x) :: get_nrows => s_cuda_multi_get_nrows + procedure, pass(x) :: get_ncols => s_cuda_multi_get_ncols + procedure, nopass :: get_fmt => s_cuda_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => s_cuda_multi_dot_v +!!$ procedure, pass(x) :: dot_a => s_cuda_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => s_cuda_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => s_cuda_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => s_cuda_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => s_cuda_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => s_cuda_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => s_cuda_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => s_cuda_multi_scal +!!$ procedure, pass(x) :: nrm2 => s_cuda_multi_nrm2 +!!$ procedure, pass(x) :: amax => s_cuda_multi_amax +!!$ procedure, pass(x) :: asum => s_cuda_multi_asum + procedure, pass(x) :: all => s_cuda_multi_all + procedure, pass(x) :: zero => s_cuda_multi_zero + procedure, pass(x) :: asb => s_cuda_multi_asb + procedure, pass(x) :: sync => s_cuda_multi_sync + procedure, pass(x) :: sync_space => s_cuda_multi_sync_space + procedure, pass(x) :: bld_x => s_cuda_multi_bld_x + procedure, pass(x) :: bld_n => s_cuda_multi_bld_n + procedure, pass(x) :: free => s_cuda_multi_free + procedure, pass(x) :: ins => s_cuda_multi_ins + procedure, pass(x) :: is_host => s_cuda_multi_is_host + procedure, pass(x) :: is_dev => s_cuda_multi_is_dev + procedure, pass(x) :: is_sync => s_cuda_multi_is_sync + procedure, pass(x) :: set_host => s_cuda_multi_set_host + procedure, pass(x) :: set_dev => s_cuda_multi_set_dev + procedure, pass(x) :: set_sync => s_cuda_multi_set_sync + procedure, pass(x) :: set_scal => s_cuda_multi_set_scal + procedure, pass(x) :: set_vect => s_cuda_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => s_cuda_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => s_cuda_multi_sctb +!!$ procedure, pass(y) :: sctb_x => s_cuda_multi_sctb_x + final :: s_cuda_multi_vect_finalize #endif - end type psb_s_multivect_gpu + end type psb_s_multivect_cuda - public :: psb_s_multivect_gpu + public :: psb_s_multivect_cuda private :: constructor - interface psb_s_multivect_gpu + interface psb_s_multivect_cuda module procedure constructor end interface @@ -1289,7 +1289,7 @@ contains function constructor(x) result(this) real(psb_spk_) :: x(:,:) - type(psb_s_multivect_gpu) :: this + type(psb_s_multivect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -1299,15 +1299,15 @@ contains #ifdef HAVE_SPGPU -!!$ subroutine s_gpu_multi_gthzv_x(i,n,idx,x,y) +!!$ subroutine s_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i,n !!$ class(psb_i_base_multivect_type) :: idx !!$ real(psb_spk_) :: y(:) -!!$ class(psb_s_multivect_gpu) :: x +!!$ class(psb_s_multivect_cuda) :: x !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (x%is_host()) call x%sync() !!$ @@ -1332,16 +1332,16 @@ contains !!$ end select !!$ !!$ -!!$ end subroutine s_gpu_multi_gthzv_x +!!$ end subroutine s_cuda_multi_gthzv_x !!$ !!$ !!$ -!!$ subroutine s_gpu_multi_sctb(n,idx,x,beta,y) +!!$ subroutine s_cuda_multi_sctb(n,idx,x,beta,y) !!$ implicit none !!$ !use psb_const_mod !!$ integer(psb_ipk_) :: n, idx(:) !!$ real(psb_spk_) :: beta, x(:) -!!$ class(psb_s_multivect_gpu) :: y +!!$ class(psb_s_multivect_cuda) :: y !!$ integer(psb_ipk_) :: info !!$ !!$ if (n == 0) return @@ -1351,17 +1351,17 @@ contains !!$ call y%psb_s_base_multivect_type%sctb(n,idx,x,beta) !!$ call y%set_host() !!$ -!!$ end subroutine s_gpu_multi_sctb +!!$ end subroutine s_cuda_multi_sctb !!$ -!!$ subroutine s_gpu_multi_sctb_x(i,n,idx,x,beta,y) +!!$ subroutine s_cuda_multi_sctb_x(i,n,idx,x,beta,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i, n !!$ class(psb_i_base_multivect_type) :: idx !!$ real(psb_spk_) :: beta, x(:) -!!$ class(psb_s_multivect_gpu) :: y +!!$ class(psb_s_multivect_cuda) :: y !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (y%is_host()) call y%sync() !!$ @@ -1387,13 +1387,13 @@ contains !!$ call y%sct(n,ii%v(i:),x,beta) !!$ end select !!$ -!!$ end subroutine s_gpu_multi_sctb_x +!!$ end subroutine s_cuda_multi_sctb_x - subroutine s_gpu_multi_bld_x(x,this) + subroutine s_cuda_multi_bld_x(x,this) use psb_base_mod real(psb_spk_), intent(in) :: this(:,:) - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info, m, n m=size(this,1) @@ -1403,101 +1403,101 @@ contains call psb_realloc(m,n,x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'s_gpu_multi_bld_x',& + call psb_errpush(info,'s_cuda_multi_bld_x',& & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) end if x%v(1:m,1:n) = this(1:m,1:n) call x%set_host() call x%sync() - end subroutine s_gpu_multi_bld_x + end subroutine s_cuda_multi_bld_x - subroutine s_gpu_multi_bld_n(x,m,n) + subroutine s_cuda_multi_bld_n(x,m,n) integer(psb_ipk_), intent(in) :: m,n - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(m,n,info) if (info /= 0) then - call psb_errpush(info,'s_gpu_multi_bld_n',i_err=(/m,n,n,n,n/)) + call psb_errpush(info,'s_cuda_multi_bld_n',i_err=(/m,n,n,n,n/)) end if - end subroutine s_gpu_multi_bld_n + end subroutine s_cuda_multi_bld_n - subroutine s_gpu_multi_set_host(x) + subroutine s_cuda_multi_set_host(x) implicit none - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x x%state = is_host - end subroutine s_gpu_multi_set_host + end subroutine s_cuda_multi_set_host - subroutine s_gpu_multi_set_dev(x) + subroutine s_cuda_multi_set_dev(x) implicit none - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x x%state = is_dev - end subroutine s_gpu_multi_set_dev + end subroutine s_cuda_multi_set_dev - subroutine s_gpu_multi_set_sync(x) + subroutine s_cuda_multi_set_sync(x) implicit none - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x x%state = is_sync - end subroutine s_gpu_multi_set_sync + end subroutine s_cuda_multi_set_sync - function s_gpu_multi_is_dev(x) result(res) + function s_cuda_multi_is_dev(x) result(res) implicit none - class(psb_s_multivect_gpu), intent(in) :: x + class(psb_s_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function s_gpu_multi_is_dev + end function s_cuda_multi_is_dev - function s_gpu_multi_is_host(x) result(res) + function s_cuda_multi_is_host(x) result(res) implicit none - class(psb_s_multivect_gpu), intent(in) :: x + class(psb_s_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function s_gpu_multi_is_host + end function s_cuda_multi_is_host - function s_gpu_multi_is_sync(x) result(res) + function s_cuda_multi_is_sync(x) result(res) implicit none - class(psb_s_multivect_gpu), intent(in) :: x + class(psb_s_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function s_gpu_multi_is_sync + end function s_cuda_multi_is_sync - function s_gpu_multi_get_nrows(x) result(res) + function s_cuda_multi_get_nrows(x) result(res) implicit none - class(psb_s_multivect_gpu), intent(in) :: x + class(psb_s_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_nrows - end function s_gpu_multi_get_nrows + end function s_cuda_multi_get_nrows - function s_gpu_multi_get_ncols(x) result(res) + function s_cuda_multi_get_ncols(x) result(res) implicit none - class(psb_s_multivect_gpu), intent(in) :: x + class(psb_s_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_ncols - end function s_gpu_multi_get_ncols + end function s_cuda_multi_get_ncols - function s_gpu_multi_get_fmt() result(res) + function s_cuda_multi_get_fmt() result(res) implicit none character(len=5) :: res res = 'sGPU' - end function s_gpu_multi_get_fmt + end function s_cuda_multi_get_fmt -!!$ function s_gpu_multi_dot_v(n,x,y) result(res) +!!$ function s_cuda_multi_dot_v(n,x,y) result(res) !!$ implicit none -!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ class(psb_s_multivect_cuda), intent(inout) :: x !!$ class(psb_s_base_multivect_type), intent(inout) :: y !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_spk_) :: res @@ -1514,13 +1514,13 @@ contains !!$ type is (psb_s_base_multivect_type) !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,x%v,1,yy%v,1) -!!$ type is (psb_s_multivect_gpu) +!!$ type is (psb_s_multivect_cuda) !!$ if (x%is_host()) call x%sync() !!$ if (yy%is_host()) call yy%sync() !!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) !!$ if (info /= 0) then !!$ info = psb_err_internal_error_ -!!$ call psb_errpush(info,'s_gpu_multi_dot_v') +!!$ call psb_errpush(info,'s_cuda_multi_dot_v') !!$ end if !!$ !!$ class default @@ -1529,11 +1529,11 @@ contains !!$ res = y%dot(n,x%v) !!$ end select !!$ -!!$ end function s_gpu_multi_dot_v +!!$ end function s_cuda_multi_dot_v !!$ -!!$ function s_gpu_multi_dot_a(n,x,y) result(res) +!!$ function s_cuda_multi_dot_a(n,x,y) result(res) !!$ implicit none -!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ class(psb_s_multivect_cuda), intent(inout) :: x !!$ real(psb_spk_), intent(in) :: y(:) !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_spk_) :: res @@ -1542,14 +1542,14 @@ contains !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,y,1,x%v,1) !!$ -!!$ end function s_gpu_multi_dot_a +!!$ end function s_cuda_multi_dot_a !!$ -!!$ subroutine s_gpu_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ subroutine s_cuda_multi_axpby_v(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ class(psb_s_base_multivect_type), intent(inout) :: x -!!$ class(psb_s_multivect_gpu), intent(inout) :: y +!!$ class(psb_s_multivect_cuda), intent(inout) :: y !!$ real(psb_spk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: nx, ny @@ -1562,7 +1562,7 @@ contains !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() -!!$ type is (psb_s_multivect_gpu) +!!$ type is (psb_s_multivect_cuda) !!$ ! Do something different here !!$ if ((beta /= dzero).and.y%is_host())& !!$ & call y%sync() @@ -1581,27 +1581,27 @@ contains !!$ call y%axpby(m,alpha,x%v,beta,info) !!$ end select !!$ -!!$ end subroutine s_gpu_multi_axpby_v +!!$ end subroutine s_cuda_multi_axpby_v !!$ -!!$ subroutine s_gpu_multi_axpby_a(m,alpha, x, beta, y, info) +!!$ subroutine s_cuda_multi_axpby_a(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ real(psb_spk_), intent(in) :: x(:) -!!$ class(psb_s_multivect_gpu), intent(inout) :: y +!!$ class(psb_s_multivect_cuda), intent(inout) :: y !!$ real(psb_spk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ if (y%is_dev()) call y%sync() !!$ call psb_geaxpby(m,alpha,x,beta,y%v,info) !!$ call y%set_host() -!!$ end subroutine s_gpu_multi_axpby_a +!!$ end subroutine s_cuda_multi_axpby_a !!$ -!!$ subroutine s_gpu_multi_mlt_v(x, y, info) +!!$ subroutine s_cuda_multi_mlt_v(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ class(psb_s_base_multivect_type), intent(inout) :: x -!!$ class(psb_s_multivect_gpu), intent(inout) :: y +!!$ class(psb_s_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ integer(psb_ipk_) :: i, n @@ -1615,7 +1615,7 @@ contains !!$ y%v(i) = y%v(i) * xx%v(i) !!$ end do !!$ call y%set_host() -!!$ type is (psb_s_multivect_gpu) +!!$ type is (psb_s_multivect_cuda) !!$ ! Do something different here !!$ if (y%is_host()) call y%sync() !!$ if (xx%is_host()) call xx%sync() @@ -1627,13 +1627,13 @@ contains !!$ call y%set_host() !!$ end select !!$ -!!$ end subroutine s_gpu_multi_mlt_v +!!$ end subroutine s_cuda_multi_mlt_v !!$ -!!$ subroutine s_gpu_multi_mlt_a(x, y, info) +!!$ subroutine s_cuda_multi_mlt_a(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ real(psb_spk_), intent(in) :: x(:) -!!$ class(psb_s_multivect_gpu), intent(inout) :: y +!!$ class(psb_s_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1641,15 +1641,15 @@ contains !!$ call y%sync() !!$ call y%psb_s_base_multivect_type%mlt(x,info) !!$ call y%set_host() -!!$ end subroutine s_gpu_multi_mlt_a +!!$ end subroutine s_cuda_multi_mlt_a !!$ -!!$ subroutine s_gpu_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ subroutine s_cuda_multi_mlt_a_2(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ real(psb_spk_), intent(in) :: alpha,beta !!$ real(psb_spk_), intent(in) :: x(:) !!$ real(psb_spk_), intent(in) :: y(:) -!!$ class(psb_s_multivect_gpu), intent(inout) :: z +!!$ class(psb_s_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1657,16 +1657,16 @@ contains !!$ if (z%is_dev()) call z%sync() !!$ call z%psb_s_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() -!!$ end subroutine s_gpu_multi_mlt_a_2 +!!$ end subroutine s_cuda_multi_mlt_a_2 !!$ -!!$ subroutine s_gpu_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ subroutine s_cuda_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) !!$ use psi_serial_mod !!$ use psb_string_mod !!$ implicit none !!$ real(psb_spk_), intent(in) :: alpha,beta !!$ class(psb_s_base_multivect_type), intent(inout) :: x !!$ class(psb_s_base_multivect_type), intent(inout) :: y -!!$ class(psb_s_multivect_gpu), intent(inout) :: z +!!$ class(psb_s_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ character(len=1), intent(in), optional :: conjgx, conjgy !!$ integer(psb_ipk_) :: i, n @@ -1689,9 +1689,9 @@ contains !!$ ! !!$ info = 0 !!$ select type(xx => x) -!!$ type is (psb_s_multivect_gpu) +!!$ type is (psb_s_multivect_cuda) !!$ select type (yy => y) -!!$ type is (psb_s_multivect_gpu) +!!$ type is (psb_s_multivect_cuda) !!$ if (xx%is_host()) call xx%sync() !!$ if (yy%is_host()) call yy%sync() !!$ ! Z state is irrelevant: it will be done on the GPU. @@ -1711,11 +1711,11 @@ contains !!$ call z%psb_s_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() !!$ end select -!!$ end subroutine s_gpu_multi_mlt_v_2 +!!$ end subroutine s_cuda_multi_mlt_v_2 - subroutine s_gpu_multi_set_scal(x,val) - class(psb_s_multivect_gpu), intent(inout) :: x + subroutine s_cuda_multi_set_scal(x,val) + class(psb_s_multivect_cuda), intent(inout) :: x real(psb_spk_), intent(in) :: val integer(psb_ipk_) :: info @@ -1723,10 +1723,10 @@ contains if (x%is_dev()) call x%sync() call x%psb_s_base_multivect_type%set_scal(val) call x%set_host() - end subroutine s_gpu_multi_set_scal + end subroutine s_cuda_multi_set_scal - subroutine s_gpu_multi_set_vect(x,val) - class(psb_s_multivect_gpu), intent(inout) :: x + subroutine s_cuda_multi_set_vect(x,val) + class(psb_s_multivect_cuda), intent(inout) :: x real(psb_spk_), intent(in) :: val(:,:) integer(psb_ipk_) :: nr integer(psb_ipk_) :: info @@ -1735,24 +1735,24 @@ contains call x%psb_s_base_multivect_type%set_vect(val) call x%set_host() - end subroutine s_gpu_multi_set_vect + end subroutine s_cuda_multi_set_vect -!!$ subroutine s_gpu_multi_scal(alpha, x) +!!$ subroutine s_cuda_multi_scal(alpha, x) !!$ implicit none -!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ class(psb_s_multivect_cuda), intent(inout) :: x !!$ real(psb_spk_), intent (in) :: alpha !!$ !!$ if (x%is_dev()) call x%sync() !!$ call x%psb_s_base_multivect_type%scal(alpha) !!$ call x%set_host() -!!$ end subroutine s_gpu_multi_scal +!!$ end subroutine s_cuda_multi_scal !!$ !!$ -!!$ function s_gpu_multi_nrm2(n,x) result(res) +!!$ function s_cuda_multi_nrm2(n,x) result(res) !!$ implicit none -!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ class(psb_s_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_spk_) :: res !!$ integer(psb_ipk_) :: info @@ -1760,36 +1760,36 @@ contains !!$ if (x%is_host()) call x%sync() !!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) !!$ -!!$ end function s_gpu_multi_nrm2 +!!$ end function s_cuda_multi_nrm2 !!$ -!!$ function s_gpu_multi_amax(n,x) result(res) +!!$ function s_cuda_multi_amax(n,x) result(res) !!$ implicit none -!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ class(psb_s_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_spk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = maxval(abs(x%v(1:n))) !!$ -!!$ end function s_gpu_multi_amax +!!$ end function s_cuda_multi_amax !!$ -!!$ function s_gpu_multi_asum(n,x) result(res) +!!$ function s_cuda_multi_asum(n,x) result(res) !!$ implicit none -!!$ class(psb_s_multivect_gpu), intent(inout) :: x +!!$ class(psb_s_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_spk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = sum(abs(x%v(1:n))) !!$ -!!$ end function s_gpu_multi_asum +!!$ end function s_cuda_multi_asum - subroutine s_gpu_multi_all(m,n, x, info) + subroutine s_cuda_multi_all(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_s_multivect_gpu), intent(out) :: x + class(psb_s_multivect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(m,n,x%v,info,pad=szero) @@ -1799,26 +1799,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'s_gpu_multi_all',& + call psb_errpush(info,'s_cuda_multi_all',& & i_err=(/m,n,n,n,n/)) end if - end subroutine s_gpu_multi_all + end subroutine s_cuda_multi_all - subroutine s_gpu_multi_zero(x) + subroutine s_cuda_multi_zero(x) use psi_serial_mod implicit none - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=dzero call x%set_host() - end subroutine s_gpu_multi_zero + end subroutine s_cuda_multi_zero - subroutine s_gpu_multi_asb(m,n, x, info) + subroutine s_cuda_multi_asb(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nd, nc @@ -1838,12 +1838,12 @@ contains call x%set_host() end if end if - end subroutine s_gpu_multi_asb + end subroutine s_cuda_multi_asb - subroutine s_gpu_multi_sync_space(x,info) + subroutine s_cuda_multi_sync_space(x,info) use psb_realloc_mod implicit none - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: mh,nh,md,nd @@ -1896,11 +1896,11 @@ contains end if - end subroutine s_gpu_multi_sync_space + end subroutine s_cuda_multi_sync_space - subroutine s_gpu_multi_sync(x) + subroutine s_cuda_multi_sync(x) implicit none - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -1916,16 +1916,16 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'s_gpu_multi_sync') + call psb_errpush(info,'s_cuda_multi_sync') end if - end subroutine s_gpu_multi_sync + end subroutine s_cuda_multi_sync - subroutine s_gpu_multi_free(x, info) + subroutine s_cuda_multi_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 @@ -1940,13 +1940,13 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine s_gpu_multi_free + end subroutine s_cuda_multi_free - subroutine s_gpu_multi_vect_finalize(x) + subroutine s_cuda_multi_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_s_multivect_gpu), intent(inout) :: x + type(psb_s_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 @@ -1961,12 +1961,12 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine s_gpu_multi_vect_finalize + end subroutine s_cuda_multi_vect_finalize - subroutine s_gpu_multi_ins(n,irl,val,dupl,x,info) + subroutine s_cuda_multi_ins(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_s_multivect_gpu), intent(inout) :: x + class(psb_s_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) real(psb_spk_), intent(in) :: val(:,:) @@ -1979,11 +1979,11 @@ contains call x%psb_s_base_multivect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine s_gpu_multi_ins + end subroutine s_cuda_multi_ins #endif -end module psb_s_gpu_multivect_mod +end module psb_s_cuda_multivect_mod diff --git a/cuda/psb_z_csrg_mat_mod.F90 b/cuda/psb_z_csrg_mat_mod.F90 deleted file mode 100644 index 14df1124..00000000 --- a/cuda/psb_z_csrg_mat_mod.F90 +++ /dev/null @@ -1,393 +0,0 @@ -! Parallel Sparse BLAS GPU plugin -! (C) Copyright 2013 -! -! Salvatore Filippone -! Alessandro Fanfarillo -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! - - -module psb_z_csrg_mat_mod - - use iso_c_binding - use psb_z_mat_mod - use cusparse_mod - - integer(psb_ipk_), parameter, private :: is_host = -1 - integer(psb_ipk_), parameter, private :: is_sync = 0 - integer(psb_ipk_), parameter, private :: is_dev = 1 - - type, extends(psb_z_csr_sparse_mat) :: psb_z_csrg_sparse_mat - ! - ! cuSPARSE 4.0 CSR format. - ! - ! - ! - ! - ! -#ifdef HAVE_SPGPU - type(z_Cmat) :: deviceMat - integer(psb_ipk_) :: devstate = is_host - - contains - procedure, nopass :: get_fmt => z_csrg_get_fmt - procedure, pass(a) :: sizeof => z_csrg_sizeof - procedure, pass(a) :: vect_mv => psb_z_csrg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_z_csrg_inner_vect_sv - procedure, pass(a) :: csmm => psb_z_csrg_csmm - procedure, pass(a) :: csmv => psb_z_csrg_csmv - procedure, pass(a) :: scals => psb_z_csrg_scals - procedure, pass(a) :: scalv => psb_z_csrg_scal - procedure, pass(a) :: reallocate_nz => psb_z_csrg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_z_csrg_allocate_mnnz - ! Note: we do *not* need the TO methods, because the parent type - ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_z_cp_csrg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_z_cp_csrg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_z_mv_csrg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_z_mv_csrg_from_fmt - procedure, pass(a) :: free => z_csrg_free - procedure, pass(a) :: mold => psb_z_csrg_mold - procedure, pass(a) :: is_host => z_csrg_is_host - procedure, pass(a) :: is_dev => z_csrg_is_dev - procedure, pass(a) :: is_sync => z_csrg_is_sync - procedure, pass(a) :: set_host => z_csrg_set_host - procedure, pass(a) :: set_dev => z_csrg_set_dev - procedure, pass(a) :: set_sync => z_csrg_set_sync - procedure, pass(a) :: sync => z_csrg_sync - procedure, pass(a) :: to_gpu => psb_z_csrg_to_gpu - procedure, pass(a) :: from_gpu => psb_z_csrg_from_gpu - final :: z_csrg_finalize -#else - contains - procedure, pass(a) :: mold => psb_z_csrg_mold -#endif - end type psb_z_csrg_sparse_mat - -#ifdef HAVE_SPGPU - private :: z_csrg_get_nzeros, z_csrg_free, z_csrg_get_fmt, & - & z_csrg_get_size, z_csrg_sizeof, z_csrg_get_nz_row - - - interface - subroutine psb_z_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(in) :: a - complex(psb_dpk_), intent(in) :: alpha, beta - class(psb_z_base_vect_type), intent(inout) :: x - class(psb_z_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_z_csrg_inner_vect_sv - end interface - - - interface - subroutine psb_z_csrg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(in) :: a - complex(psb_dpk_), intent(in) :: alpha, beta - class(psb_z_base_vect_type), intent(inout) :: x - class(psb_z_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_z_csrg_vect_mv - end interface - - interface - subroutine psb_z_csrg_reallocate_nz(nz,a) - import :: psb_z_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: nz - class(psb_z_csrg_sparse_mat), intent(inout) :: a - end subroutine psb_z_csrg_reallocate_nz - end interface - - interface - subroutine psb_z_csrg_allocate_mnnz(m,n,a,nz) - import :: psb_z_csrg_sparse_mat, psb_ipk_ - integer(psb_ipk_), intent(in) :: m,n - class(psb_z_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_z_csrg_allocate_mnnz - end interface - - interface - subroutine psb_z_csrg_mold(a,b,info) - import :: psb_z_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(in) :: a - class(psb_z_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_csrg_mold - end interface - - interface - subroutine psb_z_csrg_to_gpu(a,info, nzrm) - import :: psb_z_csrg_sparse_mat, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_z_csrg_to_gpu - end interface - - interface - subroutine psb_z_csrg_from_gpu(a,info) - import :: psb_z_csrg_sparse_mat, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_csrg_from_gpu - end interface - - interface - subroutine psb_z_cp_csrg_from_coo(a,b,info) - import :: psb_z_csrg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(inout) :: a - class(psb_z_coo_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_csrg_from_coo - end interface - - interface - subroutine psb_z_cp_csrg_from_fmt(a,b,info) - import :: psb_z_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(inout) :: a - class(psb_z_base_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_csrg_from_fmt - end interface - - interface - subroutine psb_z_mv_csrg_from_coo(a,b,info) - import :: psb_z_csrg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(inout) :: a - class(psb_z_coo_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_csrg_from_coo - end interface - - interface - subroutine psb_z_mv_csrg_from_fmt(a,b,info) - import :: psb_z_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(inout) :: a - class(psb_z_base_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_csrg_from_fmt - end interface - - interface - subroutine psb_z_csrg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(in) :: a - complex(psb_dpk_), intent(in) :: alpha, beta, x(:) - complex(psb_dpk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_z_csrg_csmv - end interface - interface - subroutine psb_z_csrg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(in) :: a - complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) - complex(psb_dpk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_z_csrg_csmm - end interface - - interface - subroutine psb_z_csrg_scal(d,a,info,side) - import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(inout) :: a - complex(psb_dpk_), intent(in) :: d(:) - integer(psb_ipk_), intent(out) :: info - character, intent(in), optional :: side - end subroutine psb_z_csrg_scal - end interface - - interface - subroutine psb_z_csrg_scals(d,a,info) - import :: psb_z_csrg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(inout) :: a - complex(psb_dpk_), intent(in) :: d - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_csrg_scals - end interface - - -contains - - ! == =================================== - ! - ! - ! - ! Getters - ! - ! - ! - ! - ! - ! == =================================== - - - function z_csrg_sizeof(a) result(res) - implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a - integer(psb_epk_) :: res - if (a%is_dev()) call a%sync() - res = 8 - res = res + (2*psb_sizeof_dp) * size(a%val) - res = res + psb_sizeof_ip * size(a%irp) - res = res + psb_sizeof_ip * size(a%ja) - ! Should we account for the shadow data structure - ! on the GPU device side? - ! res = 2*res - - end function z_csrg_sizeof - - function z_csrg_get_fmt() result(res) - implicit none - character(len=5) :: res - res = 'CSRG' - end function z_csrg_get_fmt - - - - ! == =================================== - ! - ! - ! - ! Data management - ! - ! - ! - ! - ! - ! == =================================== - - - subroutine z_csrg_set_host(a) - implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_host - end subroutine z_csrg_set_host - - subroutine z_csrg_set_dev(a) - implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_dev - end subroutine z_csrg_set_dev - - subroutine z_csrg_set_sync(a) - implicit none - class(psb_z_csrg_sparse_mat), intent(inout) :: a - - a%devstate = is_sync - end subroutine z_csrg_set_sync - - function z_csrg_is_dev(a) result(res) - implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_dev) - end function z_csrg_is_dev - - function z_csrg_is_host(a) result(res) - implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_host) - end function z_csrg_is_host - - function z_csrg_is_sync(a) result(res) - implicit none - class(psb_z_csrg_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_sync) - end function z_csrg_is_sync - - - subroutine z_csrg_sync(a) - implicit none - class(psb_z_csrg_sparse_mat), target, intent(in) :: a - class(psb_z_csrg_sparse_mat), pointer :: tmpa - integer(psb_ipk_) :: info - - tmpa => a - if (tmpa%is_host()) then - call tmpa%to_gpu(info) - else if (tmpa%is_dev()) then - call tmpa%from_gpu(info) - end if - call tmpa%set_sync() - return - - end subroutine z_csrg_sync - - subroutine z_csrg_free(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - class(psb_z_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - call a%psb_z_csr_sparse_mat%free() - - return - - end subroutine z_csrg_free - - subroutine z_csrg_finalize(a) - use cusparse_mod - implicit none - integer(psb_ipk_) :: info - - type(psb_z_csrg_sparse_mat), intent(inout) :: a - - info = CSRGDeviceFree(a%deviceMat) - - return - - end subroutine z_csrg_finalize - -#else - interface - subroutine psb_z_csrg_mold(a,b,info) - import :: psb_z_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_csrg_sparse_mat), intent(in) :: a - class(psb_z_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_csrg_mold - end interface - -#endif - -end module psb_z_csrg_mat_mod diff --git a/cuda/psb_z_cuda_csrg_mat_mod.F90 b/cuda/psb_z_cuda_csrg_mat_mod.F90 new file mode 100644 index 00000000..75170185 --- /dev/null +++ b/cuda/psb_z_cuda_csrg_mat_mod.F90 @@ -0,0 +1,393 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_cuda_csrg_mat_mod + + use iso_c_binding + use psb_z_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_csr_sparse_mat) :: psb_z_cuda_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! +#ifdef HAVE_SPGPU + type(z_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => z_cuda_csrg_get_fmt + procedure, pass(a) :: sizeof => z_cuda_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_z_cuda_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_z_cuda_csrg_csmm + procedure, pass(a) :: csmv => psb_z_cuda_csrg_csmv + procedure, pass(a) :: scals => psb_z_cuda_csrg_scals + procedure, pass(a) :: scalv => psb_z_cuda_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_z_cuda_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_cuda_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_csrg_from_fmt + procedure, pass(a) :: free => z_cuda_csrg_free + procedure, pass(a) :: mold => psb_z_cuda_csrg_mold + procedure, pass(a) :: is_host => z_cuda_csrg_is_host + procedure, pass(a) :: is_dev => z_cuda_csrg_is_dev + procedure, pass(a) :: is_sync => z_cuda_csrg_is_sync + procedure, pass(a) :: set_host => z_cuda_csrg_set_host + procedure, pass(a) :: set_dev => z_cuda_csrg_set_dev + procedure, pass(a) :: set_sync => z_cuda_csrg_set_sync + procedure, pass(a) :: sync => z_cuda_csrg_sync + procedure, pass(a) :: to_gpu => psb_z_cuda_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_z_cuda_csrg_from_gpu + final :: z_cuda_csrg_finalize +#else + contains + procedure, pass(a) :: mold => psb_z_cuda_csrg_mold +#endif + end type psb_z_cuda_csrg_sparse_mat + +#ifdef HAVE_SPGPU + private :: z_cuda_csrg_get_nzeros, z_cuda_csrg_free, z_cuda_csrg_get_fmt, & + & z_cuda_csrg_get_size, z_cuda_csrg_sizeof, z_cuda_csrg_get_nz_row + + + interface + subroutine psb_z_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_z_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_csrg_vect_mv + end interface + + interface + subroutine psb_z_cuda_csrg_reallocate_nz(nz,a) + import :: psb_z_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_csrg_reallocate_nz + end interface + + interface + subroutine psb_z_cuda_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_z_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_cuda_csrg_allocate_mnnz + end interface + + interface + subroutine psb_z_cuda_csrg_mold(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_csrg_mold + end interface + + interface + subroutine psb_z_cuda_csrg_to_gpu(a,info, nzrm) + import :: psb_z_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_z_cuda_csrg_to_gpu + end interface + + interface + subroutine psb_z_cuda_csrg_from_gpu(a,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_csrg_from_gpu + end interface + + interface + subroutine psb_z_cuda_cp_csrg_from_coo(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_csrg_from_coo + end interface + + interface + subroutine psb_z_cuda_cp_csrg_from_fmt(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_csrg_from_fmt + end interface + + interface + subroutine psb_z_cuda_mv_csrg_from_coo(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_csrg_from_coo + end interface + + interface + subroutine psb_z_cuda_mv_csrg_from_fmt(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_csrg_from_fmt + end interface + + interface + subroutine psb_z_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_csrg_csmv + end interface + interface + subroutine psb_z_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_csrg_csmm + end interface + + interface + subroutine psb_z_cuda_csrg_scal(d,a,info,side) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_cuda_csrg_scal + end interface + + interface + subroutine psb_z_cuda_csrg_scals(d,a,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_cuda_csrg_sizeof(a) result(res) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function z_cuda_csrg_sizeof + + function z_cuda_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function z_cuda_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine z_cuda_csrg_set_host(a) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine z_cuda_csrg_set_host + + subroutine z_cuda_csrg_set_dev(a) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine z_cuda_csrg_set_dev + + subroutine z_cuda_csrg_set_sync(a) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine z_cuda_csrg_set_sync + + function z_cuda_csrg_is_dev(a) result(res) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function z_cuda_csrg_is_dev + + function z_cuda_csrg_is_host(a) result(res) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function z_cuda_csrg_is_host + + function z_cuda_csrg_is_sync(a) result(res) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function z_cuda_csrg_is_sync + + + subroutine z_cuda_csrg_sync(a) + implicit none + class(psb_z_cuda_csrg_sparse_mat), target, intent(in) :: a + class(psb_z_cuda_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine z_cuda_csrg_sync + + subroutine z_cuda_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_z_csr_sparse_mat%free() + + return + + end subroutine z_cuda_csrg_free + + subroutine z_cuda_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine z_cuda_csrg_finalize + +#else + interface + subroutine psb_z_cuda_csrg_mold(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_csrg_mold + end interface + +#endif + +end module psb_z_cuda_csrg_mat_mod diff --git a/cuda/psb_z_diag_mat_mod.F90 b/cuda/psb_z_cuda_diag_mat_mod.F90 similarity index 52% rename from cuda/psb_z_diag_mat_mod.F90 rename to cuda/psb_z_cuda_diag_mat_mod.F90 index 986d75d9..80906778 100644 --- a/cuda/psb_z_diag_mat_mod.F90 +++ b/cuda/psb_z_cuda_diag_mat_mod.F90 @@ -30,13 +30,13 @@ ! -module psb_z_diag_mat_mod +module psb_z_cuda_diag_mat_mod use iso_c_binding use psb_base_mod use psb_z_dia_mat_mod - type, extends(psb_z_dia_sparse_mat) :: psb_z_diag_sparse_mat + type, extends(psb_z_dia_sparse_mat) :: psb_z_cuda_diag_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -48,170 +48,170 @@ module psb_z_diag_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => z_diag_get_fmt - procedure, pass(a) :: sizeof => z_diag_sizeof - procedure, pass(a) :: vect_mv => psb_z_diag_vect_mv -! procedure, pass(a) :: csmm => psb_z_diag_csmm - procedure, pass(a) :: csmv => psb_z_diag_csmv -! procedure, pass(a) :: in_vect_sv => psb_z_diag_inner_vect_sv -! procedure, pass(a) :: scals => psb_z_diag_scals -! procedure, pass(a) :: scalv => psb_z_diag_scal -! procedure, pass(a) :: reallocate_nz => psb_z_diag_reallocate_nz -! procedure, pass(a) :: allocate_mnnz => psb_z_diag_allocate_mnnz + procedure, nopass :: get_fmt => z_cuda_diag_get_fmt + procedure, pass(a) :: sizeof => z_cuda_diag_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_diag_vect_mv +! procedure, pass(a) :: csmm => psb_z_cuda_diag_csmm + procedure, pass(a) :: csmv => psb_z_cuda_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_z_cuda_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_z_cuda_diag_scals +! procedure, pass(a) :: scalv => psb_z_cuda_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_z_cuda_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_z_cuda_diag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_z_cp_diag_from_coo -! procedure, pass(a) :: cp_from_fmt => psb_z_cp_diag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_z_mv_diag_from_coo -! procedure, pass(a) :: mv_from_fmt => psb_z_mv_diag_from_fmt - procedure, pass(a) :: free => z_diag_free - procedure, pass(a) :: mold => psb_z_diag_mold - procedure, pass(a) :: to_gpu => psb_z_diag_to_gpu - final :: z_diag_finalize + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_diag_from_fmt + procedure, pass(a) :: free => z_cuda_diag_free + procedure, pass(a) :: mold => psb_z_cuda_diag_mold + procedure, pass(a) :: to_gpu => psb_z_cuda_diag_to_gpu + final :: z_cuda_diag_finalize #else contains - procedure, pass(a) :: mold => psb_z_diag_mold + procedure, pass(a) :: mold => psb_z_cuda_diag_mold #endif - end type psb_z_diag_sparse_mat + end type psb_z_cuda_diag_sparse_mat #ifdef HAVE_SPGPU - private :: z_diag_get_nzeros, z_diag_free, z_diag_get_fmt, & - & z_diag_get_size, z_diag_sizeof, z_diag_get_nz_row + private :: z_cuda_diag_get_nzeros, z_cuda_diag_free, z_cuda_diag_get_fmt, & + & z_cuda_diag_get_size, z_cuda_diag_sizeof, z_cuda_diag_get_nz_row interface - subroutine psb_z_diag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_z_diag_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_diag_vect_mv + end subroutine psb_z_cuda_diag_vect_mv end interface interface - subroutine psb_z_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_z_diag_sparse_mat, psb_dpk_, psb_z_base_vect_type - class(psb_z_diag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_z_base_vect_type + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_diag_inner_vect_sv + end subroutine psb_z_cuda_diag_inner_vect_sv end interface interface - subroutine psb_z_diag_reallocate_nz(nz,a) - import :: psb_z_diag_sparse_mat, psb_ipk_ + subroutine psb_z_cuda_diag_reallocate_nz(nz,a) + import :: psb_z_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_z_diag_sparse_mat), intent(inout) :: a - end subroutine psb_z_diag_reallocate_nz + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_diag_reallocate_nz end interface interface - subroutine psb_z_diag_allocate_mnnz(m,n,a,nz) - import :: psb_z_diag_sparse_mat, psb_ipk_ + subroutine psb_z_cuda_diag_allocate_mnnz(m,n,a,nz) + import :: psb_z_cuda_diag_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_z_diag_allocate_mnnz + end subroutine psb_z_cuda_diag_allocate_mnnz end interface interface - subroutine psb_z_diag_mold(a,b,info) - import :: psb_z_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_diag_mold(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_diag_mold + end subroutine psb_z_cuda_diag_mold end interface interface - subroutine psb_z_diag_to_gpu(a,info, nzrm) - import :: psb_z_diag_sparse_mat, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_diag_to_gpu(a,info, nzrm) + import :: psb_z_cuda_diag_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_z_diag_to_gpu + end subroutine psb_z_cuda_diag_to_gpu end interface interface - subroutine psb_z_cp_diag_from_coo(a,b,info) - import :: psb_z_diag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_diag_from_coo(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_diag_from_coo + end subroutine psb_z_cuda_cp_diag_from_coo end interface interface - subroutine psb_z_cp_diag_from_fmt(a,b,info) - import :: psb_z_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_diag_from_fmt(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_diag_from_fmt + end subroutine psb_z_cuda_cp_diag_from_fmt end interface interface - subroutine psb_z_mv_diag_from_coo(a,b,info) - import :: psb_z_diag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_diag_from_coo(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_diag_from_coo + end subroutine psb_z_cuda_mv_diag_from_coo end interface interface - subroutine psb_z_mv_diag_from_fmt(a,b,info) - import :: psb_z_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_diag_from_fmt(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_diag_from_fmt + end subroutine psb_z_cuda_mv_diag_from_fmt end interface interface - subroutine psb_z_diag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_z_diag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_diag_csmv + end subroutine psb_z_cuda_diag_csmv end interface interface - subroutine psb_z_diag_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_z_diag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) complex(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_diag_csmm + end subroutine psb_z_cuda_diag_csmm end interface interface - subroutine psb_z_diag_scal(d,a,info, side) - import :: psb_z_diag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_diag_scal(d,a,info, side) + import :: psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_z_diag_scal + end subroutine psb_z_cuda_diag_scal end interface interface - subroutine psb_z_diag_scals(d,a,info) - import :: psb_z_diag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_diag_scals(d,a,info) + import :: psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_diag_scals + end subroutine psb_z_cuda_diag_scals end interface @@ -230,9 +230,9 @@ contains ! == =================================== - function z_diag_sizeof(a) result(res) + function z_cuda_diag_sizeof(a) result(res) implicit none - class(psb_z_diag_sparse_mat), intent(in) :: a + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function z_diag_sizeof + end function z_cuda_diag_sizeof - function z_diag_get_fmt() result(res) + function z_cuda_diag_get_fmt() result(res) implicit none character(len=5) :: res res = 'DIAG' - end function z_diag_get_fmt + end function z_cuda_diag_get_fmt @@ -265,11 +265,11 @@ contains ! ! == =================================== - subroutine z_diag_free(a) + subroutine z_cuda_diag_free(a) use diagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_z_diag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) @@ -278,31 +278,31 @@ contains return - end subroutine z_diag_free + end subroutine z_cuda_diag_free - subroutine z_diag_finalize(a) + subroutine z_cuda_diag_finalize(a) use diagdev_mod implicit none - type(psb_z_diag_sparse_mat), intent(inout) :: a + type(psb_z_cuda_diag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDiagDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine z_diag_finalize + end subroutine z_cuda_diag_finalize #else interface - subroutine psb_z_diag_mold(a,b,info) - import :: psb_z_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_diag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_diag_mold(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_diag_mold + end subroutine psb_z_cuda_diag_mold end interface #endif -end module psb_z_diag_mat_mod +end module psb_z_cuda_diag_mat_mod diff --git a/cuda/psb_z_dnsg_mat_mod.F90 b/cuda/psb_z_cuda_dnsg_mat_mod.F90 similarity index 51% rename from cuda/psb_z_dnsg_mat_mod.F90 rename to cuda/psb_z_cuda_dnsg_mat_mod.F90 index 6a3d4369..3fb2488b 100644 --- a/cuda/psb_z_dnsg_mat_mod.F90 +++ b/cuda/psb_z_cuda_dnsg_mat_mod.F90 @@ -30,14 +30,14 @@ ! -module psb_z_dnsg_mat_mod +module psb_z_cuda_dnsg_mat_mod use iso_c_binding use psb_z_mat_mod use psb_z_dns_mat_mod use dnsdev_mod - type, extends(psb_z_dns_sparse_mat) :: psb_z_dnsg_sparse_mat + type, extends(psb_z_dns_sparse_mat) :: psb_z_cuda_dnsg_sparse_mat ! ! ITPACK/DNS format, extended. ! We are adding here the routines to create a copy of the data @@ -49,169 +49,169 @@ module psb_z_dnsg_mat_mod type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => z_dnsg_get_fmt - ! procedure, pass(a) :: sizeof => z_dnsg_sizeof - procedure, pass(a) :: vect_mv => psb_z_dnsg_vect_mv -!!$ procedure, pass(a) :: csmm => psb_z_dnsg_csmm -!!$ procedure, pass(a) :: csmv => psb_z_dnsg_csmv -!!$ procedure, pass(a) :: in_vect_sv => psb_z_dnsg_inner_vect_sv -!!$ procedure, pass(a) :: scals => psb_z_dnsg_scals -!!$ procedure, pass(a) :: scalv => psb_z_dnsg_scal -!!$ procedure, pass(a) :: reallocate_nz => psb_z_dnsg_reallocate_nz -!!$ procedure, pass(a) :: allocate_mnnz => psb_z_dnsg_allocate_mnnz + procedure, nopass :: get_fmt => z_cuda_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => z_cuda_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_z_cuda_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_z_cuda_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_z_cuda_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_z_cuda_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_z_cuda_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_z_cuda_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_z_cuda_dnsg_allocate_mnnz ! Note: we *do* need the TO methods, because of the need to invoke SYNC ! - procedure, pass(a) :: cp_from_coo => psb_z_cp_dnsg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_z_cp_dnsg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_z_mv_dnsg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_z_mv_dnsg_from_fmt - procedure, pass(a) :: free => z_dnsg_free - procedure, pass(a) :: mold => psb_z_dnsg_mold - procedure, pass(a) :: to_gpu => psb_z_dnsg_to_gpu - final :: z_dnsg_finalize + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_dnsg_from_fmt + procedure, pass(a) :: free => z_cuda_dnsg_free + procedure, pass(a) :: mold => psb_z_cuda_dnsg_mold + procedure, pass(a) :: to_gpu => psb_z_cuda_dnsg_to_gpu + final :: z_cuda_dnsg_finalize #else contains - procedure, pass(a) :: mold => psb_z_dnsg_mold + procedure, pass(a) :: mold => psb_z_cuda_dnsg_mold #endif - end type psb_z_dnsg_sparse_mat + end type psb_z_cuda_dnsg_sparse_mat #ifdef HAVE_SPGPU - private :: z_dnsg_get_nzeros, z_dnsg_free, z_dnsg_get_fmt, & - & z_dnsg_get_size, z_dnsg_get_nz_row + private :: z_cuda_dnsg_get_nzeros, z_cuda_dnsg_free, z_cuda_dnsg_get_fmt, & + & z_cuda_dnsg_get_size, z_cuda_dnsg_get_nz_row interface - subroutine psb_z_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_z_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_dnsg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_dnsg_vect_mv + end subroutine psb_z_cuda_dnsg_vect_mv end interface !!$ !!$ interface -!!$ subroutine psb_z_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_z_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type -!!$ class(psb_z_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_z_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a !!$ complex(psb_dpk_), intent(in) :: alpha, beta !!$ class(psb_z_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_z_dnsg_inner_vect_sv +!!$ end subroutine psb_z_cuda_dnsg_inner_vect_sv !!$ end interface !!$ interface -!!$ subroutine psb_z_dnsg_reallocate_nz(nz,a) -!!$ import :: psb_z_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_z_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_z_dnsg_reallocate_nz +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_cuda_dnsg_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_z_dnsg_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_z_dnsg_sparse_mat, psb_ipk_ +!!$ subroutine psb_z_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_z_dnsg_allocate_mnnz +!!$ end subroutine psb_z_cuda_dnsg_allocate_mnnz !!$ end interface interface - subroutine psb_z_dnsg_mold(a,b,info) - import :: psb_z_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_dnsg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_dnsg_mold(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_dnsg_mold + end subroutine psb_z_cuda_dnsg_mold end interface interface - subroutine psb_z_dnsg_to_gpu(a,info) - import :: psb_z_dnsg_sparse_mat, psb_ipk_ - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_dnsg_to_gpu(a,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_dnsg_to_gpu + end subroutine psb_z_cuda_dnsg_to_gpu end interface interface - subroutine psb_z_cp_dnsg_from_coo(a,b,info) - import :: psb_z_dnsg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_dnsg_from_coo(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_dnsg_from_coo + end subroutine psb_z_cuda_cp_dnsg_from_coo end interface interface - subroutine psb_z_cp_dnsg_from_fmt(a,b,info) - import :: psb_z_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_dnsg_from_fmt(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_dnsg_from_fmt + end subroutine psb_z_cuda_cp_dnsg_from_fmt end interface interface - subroutine psb_z_mv_dnsg_from_coo(a,b,info) - import :: psb_z_dnsg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_dnsg_from_coo(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_dnsg_from_coo + end subroutine psb_z_cuda_mv_dnsg_from_coo end interface interface - subroutine psb_z_mv_dnsg_from_fmt(a,b,info) - import :: psb_z_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_dnsg_from_fmt(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_dnsg_from_fmt + end subroutine psb_z_cuda_mv_dnsg_from_fmt end interface !!$ interface -!!$ subroutine psb_z_dnsg_csmv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_z_dnsg_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_z_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_z_cuda_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a !!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:) !!$ complex(psb_dpk_), intent(inout) :: y(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_z_dnsg_csmv +!!$ end subroutine psb_z_cuda_dnsg_csmv !!$ end interface !!$ interface -!!$ subroutine psb_z_dnsg_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_z_dnsg_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_z_dnsg_sparse_mat), intent(in) :: a +!!$ subroutine psb_z_cuda_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a !!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) !!$ complex(psb_dpk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_z_dnsg_csmm +!!$ end subroutine psb_z_cuda_dnsg_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_z_dnsg_scal(d,a,info, side) -!!$ import :: psb_z_dnsg_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_z_cuda_dnsg_scal(d,a,info, side) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ complex(psb_dpk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_z_dnsg_scal +!!$ end subroutine psb_z_cuda_dnsg_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_z_dnsg_scals(d,a,info) -!!$ import :: psb_z_dnsg_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_z_dnsg_sparse_mat), intent(inout) :: a +!!$ subroutine psb_z_cuda_dnsg_scals(d,a,info) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a !!$ complex(psb_dpk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_z_dnsg_scals +!!$ end subroutine psb_z_cuda_dnsg_scals !!$ end interface !!$ @@ -231,11 +231,11 @@ contains - function z_dnsg_get_fmt() result(res) + function z_cuda_dnsg_get_fmt() result(res) implicit none character(len=5) :: res res = 'DNSG' - end function z_dnsg_get_fmt + end function z_cuda_dnsg_get_fmt @@ -251,11 +251,11 @@ contains ! ! == =================================== - subroutine z_dnsg_free(a) + subroutine z_cuda_dnsg_free(a) use dnsdev_mod implicit none integer(psb_ipk_) :: info - class(psb_z_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) @@ -264,31 +264,31 @@ contains return - end subroutine z_dnsg_free + end subroutine z_cuda_dnsg_free - subroutine z_dnsg_finalize(a) + subroutine z_cuda_dnsg_finalize(a) use dnsdev_mod implicit none - type(psb_z_dnsg_sparse_mat), intent(inout) :: a + type(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeDnsDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine z_dnsg_finalize + end subroutine z_cuda_dnsg_finalize #else interface - subroutine psb_z_dnsg_mold(a,b,info) - import :: psb_z_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_dnsg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_dnsg_mold(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_dnsg_mold + end subroutine psb_z_cuda_dnsg_mold end interface #endif -end module psb_z_dnsg_mat_mod +end module psb_z_cuda_dnsg_mat_mod diff --git a/cuda/psb_z_elg_mat_mod.F90 b/cuda/psb_z_cuda_elg_mat_mod.F90 similarity index 50% rename from cuda/psb_z_elg_mat_mod.F90 rename to cuda/psb_z_cuda_elg_mat_mod.F90 index cf9e479c..9090b0a2 100644 --- a/cuda/psb_z_elg_mat_mod.F90 +++ b/cuda/psb_z_cuda_elg_mat_mod.F90 @@ -30,18 +30,18 @@ ! -module psb_z_elg_mat_mod +module psb_z_cuda_elg_mat_mod use iso_c_binding use psb_z_mat_mod use psb_z_ell_mat_mod - use psb_i_gpu_vect_mod + use psb_i_cuda_vect_mod integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_z_ell_sparse_mat) :: psb_z_elg_sparse_mat + type, extends(psb_z_ell_sparse_mat) :: psb_z_cuda_elg_sparse_mat ! ! ITPACK/ELL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,221 +54,221 @@ module psb_z_elg_mat_mod integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => z_elg_get_fmt - procedure, pass(a) :: sizeof => z_elg_sizeof - procedure, pass(a) :: vect_mv => psb_z_elg_vect_mv - procedure, pass(a) :: csmm => psb_z_elg_csmm - procedure, pass(a) :: csmv => psb_z_elg_csmv - procedure, pass(a) :: in_vect_sv => psb_z_elg_inner_vect_sv - procedure, pass(a) :: scals => psb_z_elg_scals - procedure, pass(a) :: scalv => psb_z_elg_scal - procedure, pass(a) :: reallocate_nz => psb_z_elg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_z_elg_allocate_mnnz - procedure, pass(a) :: reinit => z_elg_reinit + procedure, nopass :: get_fmt => z_cuda_elg_get_fmt + procedure, pass(a) :: sizeof => z_cuda_elg_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_elg_vect_mv + procedure, pass(a) :: csmm => psb_z_cuda_elg_csmm + procedure, pass(a) :: csmv => psb_z_cuda_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_z_cuda_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_z_cuda_elg_scals + procedure, pass(a) :: scalv => psb_z_cuda_elg_scal + procedure, pass(a) :: reallocate_nz => psb_z_cuda_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_cuda_elg_allocate_mnnz + procedure, pass(a) :: reinit => z_cuda_elg_reinit ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_z_cp_elg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_z_cp_elg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_z_mv_elg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_z_mv_elg_from_fmt - procedure, pass(a) :: free => z_elg_free - procedure, pass(a) :: mold => psb_z_elg_mold - procedure, pass(a) :: csput_a => psb_z_elg_csput_a - procedure, pass(a) :: csput_v => psb_z_elg_csput_v - procedure, pass(a) :: is_host => z_elg_is_host - procedure, pass(a) :: is_dev => z_elg_is_dev - procedure, pass(a) :: is_sync => z_elg_is_sync - procedure, pass(a) :: set_host => z_elg_set_host - procedure, pass(a) :: set_dev => z_elg_set_dev - procedure, pass(a) :: set_sync => z_elg_set_sync - procedure, pass(a) :: sync => z_elg_sync - procedure, pass(a) :: from_gpu => psb_z_elg_from_gpu - procedure, pass(a) :: to_gpu => psb_z_elg_to_gpu - procedure, pass(a) :: asb => psb_z_elg_asb - final :: z_elg_finalize + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_elg_from_fmt + procedure, pass(a) :: free => z_cuda_elg_free + procedure, pass(a) :: mold => psb_z_cuda_elg_mold + procedure, pass(a) :: csput_a => psb_z_cuda_elg_csput_a + procedure, pass(a) :: csput_v => psb_z_cuda_elg_csput_v + procedure, pass(a) :: is_host => z_cuda_elg_is_host + procedure, pass(a) :: is_dev => z_cuda_elg_is_dev + procedure, pass(a) :: is_sync => z_cuda_elg_is_sync + procedure, pass(a) :: set_host => z_cuda_elg_set_host + procedure, pass(a) :: set_dev => z_cuda_elg_set_dev + procedure, pass(a) :: set_sync => z_cuda_elg_set_sync + procedure, pass(a) :: sync => z_cuda_elg_sync + procedure, pass(a) :: from_gpu => psb_z_cuda_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_z_cuda_elg_to_gpu + procedure, pass(a) :: asb => psb_z_cuda_elg_asb + final :: z_cuda_elg_finalize #else contains - procedure, pass(a) :: mold => psb_z_elg_mold - procedure, pass(a) :: asb => psb_z_elg_asb + procedure, pass(a) :: mold => psb_z_cuda_elg_mold + procedure, pass(a) :: asb => psb_z_cuda_elg_asb #endif - end type psb_z_elg_sparse_mat + end type psb_z_cuda_elg_sparse_mat #ifdef HAVE_SPGPU - private :: z_elg_get_nzeros, z_elg_free, z_elg_get_fmt, & - & z_elg_get_size, z_elg_sizeof, z_elg_get_nz_row, z_elg_sync + private :: z_cuda_elg_get_nzeros, z_cuda_elg_free, z_cuda_elg_get_fmt, & + & z_cuda_elg_get_size, z_cuda_elg_sizeof, z_cuda_elg_get_nz_row, z_cuda_elg_sync interface - subroutine psb_z_elg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_z_elg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_elg_vect_mv + end subroutine psb_z_cuda_elg_vect_mv end interface interface - subroutine psb_z_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_z_elg_sparse_mat, psb_dpk_, psb_z_base_vect_type - class(psb_z_elg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_z_base_vect_type + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_elg_inner_vect_sv + end subroutine psb_z_cuda_elg_inner_vect_sv end interface interface - subroutine psb_z_elg_reallocate_nz(nz,a) - import :: psb_z_elg_sparse_mat, psb_ipk_ + subroutine psb_z_cuda_elg_reallocate_nz(nz,a) + import :: psb_z_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_z_elg_sparse_mat), intent(inout) :: a - end subroutine psb_z_elg_reallocate_nz + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_elg_reallocate_nz end interface interface - subroutine psb_z_elg_allocate_mnnz(m,n,a,nz) - import :: psb_z_elg_sparse_mat, psb_ipk_ + subroutine psb_z_cuda_elg_allocate_mnnz(m,n,a,nz) + import :: psb_z_cuda_elg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_z_elg_allocate_mnnz + end subroutine psb_z_cuda_elg_allocate_mnnz end interface interface - subroutine psb_z_elg_mold(a,b,info) - import :: psb_z_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_elg_mold(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_elg_mold + end subroutine psb_z_cuda_elg_mold end interface interface - subroutine psb_z_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: val(:) integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& & imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_elg_csput_a + end subroutine psb_z_cuda_elg_csput_a end interface interface - subroutine psb_z_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) - import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_z_base_vect_type,& + subroutine psb_z_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_z_base_vect_type,& & psb_i_base_vect_type - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_base_vect_type), intent(inout) :: val class(psb_i_base_vect_type), intent(inout) :: ia, ja integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_elg_csput_v + end subroutine psb_z_cuda_elg_csput_v end interface interface - subroutine psb_z_elg_from_gpu(a,info) - import :: psb_z_elg_sparse_mat, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_elg_from_gpu(a,info) + import :: psb_z_cuda_elg_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_elg_from_gpu + end subroutine psb_z_cuda_elg_from_gpu end interface interface - subroutine psb_z_elg_to_gpu(a,info, nzrm) - import :: psb_z_elg_sparse_mat, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_elg_to_gpu(a,info, nzrm) + import :: psb_z_cuda_elg_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_z_elg_to_gpu + end subroutine psb_z_cuda_elg_to_gpu end interface interface - subroutine psb_z_cp_elg_from_coo(a,b,info) - import :: psb_z_elg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_elg_from_coo(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_elg_from_coo + end subroutine psb_z_cuda_cp_elg_from_coo end interface interface - subroutine psb_z_cp_elg_from_fmt(a,b,info) - import :: psb_z_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_elg_from_fmt(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_elg_from_fmt + end subroutine psb_z_cuda_cp_elg_from_fmt end interface interface - subroutine psb_z_mv_elg_from_coo(a,b,info) - import :: psb_z_elg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_elg_from_coo(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_elg_from_coo + end subroutine psb_z_cuda_mv_elg_from_coo end interface interface - subroutine psb_z_mv_elg_from_fmt(a,b,info) - import :: psb_z_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_elg_from_fmt(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_elg_from_fmt + end subroutine psb_z_cuda_mv_elg_from_fmt end interface interface - subroutine psb_z_elg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_elg_csmv + end subroutine psb_z_cuda_elg_csmv end interface interface - subroutine psb_z_elg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) complex(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_elg_csmm + end subroutine psb_z_cuda_elg_csmm end interface interface - subroutine psb_z_elg_scal(d,a,info, side) - import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_elg_scal(d,a,info, side) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_z_elg_scal + end subroutine psb_z_cuda_elg_scal end interface interface - subroutine psb_z_elg_scals(d,a,info) - import :: psb_z_elg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_elg_scals(d,a,info) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_elg_scals + end subroutine psb_z_cuda_elg_scals end interface interface - subroutine psb_z_elg_asb(a) - import :: psb_z_elg_sparse_mat - class(psb_z_elg_sparse_mat), intent(inout) :: a - end subroutine psb_z_elg_asb + subroutine psb_z_cuda_elg_asb(a) + import :: psb_z_cuda_elg_sparse_mat + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_elg_asb end interface @@ -287,9 +287,9 @@ contains ! == =================================== - function z_elg_sizeof(a) result(res) + function z_cuda_elg_sizeof(a) result(res) implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res if (a%is_dev()) call a%sync() @@ -302,13 +302,13 @@ contains ! on the GPU device side? ! res = 2*res - end function z_elg_sizeof + end function z_cuda_elg_sizeof - function z_elg_get_fmt() result(res) + function z_cuda_elg_get_fmt() result(res) implicit none character(len=5) :: res res = 'ELG' - end function z_elg_get_fmt + end function z_cuda_elg_get_fmt @@ -323,12 +323,12 @@ contains ! ! ! == =================================== - subroutine z_elg_reinit(a,clear) + subroutine z_cuda_elg_reinit(a,clear) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a logical, intent(in), optional :: clear integer(psb_ipk_) :: isz, err_act character(len=20) :: name='reinit' @@ -367,14 +367,14 @@ contains 9999 call psb_error_handler(err_act) return - end subroutine z_elg_reinit + end subroutine z_cuda_elg_reinit - subroutine z_elg_free(a) + subroutine z_cuda_elg_free(a) use elldev_mod implicit none integer(psb_ipk_) :: info - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) @@ -384,12 +384,12 @@ contains return - end subroutine z_elg_free + end subroutine z_cuda_elg_free - subroutine z_elg_sync(a) + subroutine z_cuda_elg_sync(a) implicit none - class(psb_z_elg_sparse_mat), target, intent(in) :: a - class(psb_z_elg_sparse_mat), pointer :: tmpa + class(psb_z_cuda_elg_sparse_mat), target, intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -401,83 +401,83 @@ contains call tmpa%set_sync() return - end subroutine z_elg_sync + end subroutine z_cuda_elg_sync - subroutine z_elg_set_host(a) + subroutine z_cuda_elg_set_host(a) implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine z_elg_set_host + end subroutine z_cuda_elg_set_host - subroutine z_elg_set_dev(a) + subroutine z_cuda_elg_set_dev(a) implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine z_elg_set_dev + end subroutine z_cuda_elg_set_dev - subroutine z_elg_set_sync(a) + subroutine z_cuda_elg_set_sync(a) implicit none - class(psb_z_elg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine z_elg_set_sync + end subroutine z_cuda_elg_set_sync - function z_elg_is_dev(a) result(res) + function z_cuda_elg_is_dev(a) result(res) implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function z_elg_is_dev + end function z_cuda_elg_is_dev - function z_elg_is_host(a) result(res) + function z_cuda_elg_is_host(a) result(res) implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function z_elg_is_host + end function z_cuda_elg_is_host - function z_elg_is_sync(a) result(res) + function z_cuda_elg_is_sync(a) result(res) implicit none - class(psb_z_elg_sparse_mat), intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function z_elg_is_sync + end function z_cuda_elg_is_sync - subroutine z_elg_finalize(a) + subroutine z_cuda_elg_finalize(a) use elldev_mod implicit none - type(psb_z_elg_sparse_mat), intent(inout) :: a + type(psb_z_cuda_elg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeEllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine z_elg_finalize + end subroutine z_cuda_elg_finalize #else interface - subroutine psb_z_elg_asb(a) - import :: psb_z_elg_sparse_mat - class(psb_z_elg_sparse_mat), intent(inout) :: a - end subroutine psb_z_elg_asb + subroutine psb_z_cuda_elg_asb(a) + import :: psb_z_cuda_elg_sparse_mat + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_elg_asb end interface interface - subroutine psb_z_elg_mold(a,b,info) - import :: psb_z_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_elg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_elg_mold(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_elg_mold + end subroutine psb_z_cuda_elg_mold end interface #endif -end module psb_z_elg_mat_mod +end module psb_z_cuda_elg_mat_mod diff --git a/cuda/psb_z_hdiag_mat_mod.F90 b/cuda/psb_z_cuda_hdiag_mat_mod.F90 similarity index 50% rename from cuda/psb_z_hdiag_mat_mod.F90 rename to cuda/psb_z_cuda_hdiag_mat_mod.F90 index 0b61cb47..b64498f6 100644 --- a/cuda/psb_z_hdiag_mat_mod.F90 +++ b/cuda/psb_z_cuda_hdiag_mat_mod.F90 @@ -30,182 +30,182 @@ ! -module psb_z_hdiag_mat_mod +module psb_z_cuda_hdiag_mat_mod use iso_c_binding use psb_base_mod use psb_z_hdia_mat_mod - type, extends(psb_z_hdia_sparse_mat) :: psb_z_hdiag_sparse_mat + type, extends(psb_z_hdia_sparse_mat) :: psb_z_cuda_hdiag_sparse_mat ! #ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains - procedure, nopass :: get_fmt => z_hdiag_get_fmt - ! procedure, pass(a) :: sizeof => z_hdiag_sizeof - procedure, pass(a) :: vect_mv => psb_z_hdiag_vect_mv - ! procedure, pass(a) :: csmm => psb_z_hdiag_csmm - procedure, pass(a) :: csmv => psb_z_hdiag_csmv - ! procedure, pass(a) :: in_vect_sv => psb_z_hdiag_inner_vect_sv - ! procedure, pass(a) :: scals => psb_z_hdiag_scals - ! procedure, pass(a) :: scalv => psb_z_hdiag_scal - ! procedure, pass(a) :: reallocate_nz => psb_z_hdiag_reallocate_nz - ! procedure, pass(a) :: allocate_mnnz => psb_z_hdiag_allocate_mnnz + procedure, nopass :: get_fmt => z_cuda_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => z_cuda_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_z_cuda_hdiag_csmm + procedure, pass(a) :: csmv => psb_z_cuda_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_z_cuda_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_z_cuda_hdiag_scals + ! procedure, pass(a) :: scalv => psb_z_cuda_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_z_cuda_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_z_cuda_hdiag_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_z_cp_hdiag_from_coo - ! procedure, pass(a) :: cp_from_fmt => psb_z_cp_hdiag_from_fmt - procedure, pass(a) :: mv_from_coo => psb_z_mv_hdiag_from_coo - ! procedure, pass(a) :: mv_from_fmt => psb_z_mv_hdiag_from_fmt - procedure, pass(a) :: free => z_hdiag_free - procedure, pass(a) :: mold => psb_z_hdiag_mold - procedure, pass(a) :: to_gpu => psb_z_hdiag_to_gpu - final :: z_hdiag_finalize + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_hdiag_from_fmt + procedure, pass(a) :: free => z_cuda_hdiag_free + procedure, pass(a) :: mold => psb_z_cuda_hdiag_mold + procedure, pass(a) :: to_gpu => psb_z_cuda_hdiag_to_gpu + final :: z_cuda_hdiag_finalize #else contains - procedure, pass(a) :: mold => psb_z_hdiag_mold + procedure, pass(a) :: mold => psb_z_cuda_hdiag_mold #endif - end type psb_z_hdiag_sparse_mat + end type psb_z_cuda_hdiag_sparse_mat #ifdef HAVE_SPGPU - private :: z_hdiag_get_nzeros, z_hdiag_free, z_hdiag_get_fmt, & - & z_hdiag_get_size, z_hdiag_sizeof, z_hdiag_get_nz_row + private :: z_cuda_hdiag_get_nzeros, z_cuda_hdiag_free, z_cuda_hdiag_get_fmt, & + & z_cuda_hdiag_get_size, z_cuda_hdiag_sizeof, z_cuda_hdiag_get_nz_row interface - subroutine psb_z_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_z_hdiag_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_hdiag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hdiag_vect_mv + end subroutine psb_z_cuda_hdiag_vect_mv end interface !!$ interface -!!$ subroutine psb_z_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_ipk_, psb_z_hdiag_sparse_mat, psb_dpk_, psb_z_base_vect_type -!!$ class(psb_z_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_z_cuda_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_z_base_vect_type +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a !!$ complex(psb_dpk_), intent(in) :: alpha, beta !!$ class(psb_z_base_vect_type), intent(inout) :: x, y !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_z_hdiag_inner_vect_sv +!!$ end subroutine psb_z_cuda_hdiag_inner_vect_sv !!$ end interface !!$ !!$ interface -!!$ subroutine psb_z_hdiag_reallocate_nz(nz,a) -!!$ import :: psb_z_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_z_cuda_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: nz -!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a -!!$ end subroutine psb_z_hdiag_reallocate_nz +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_cuda_hdiag_reallocate_nz !!$ end interface !!$ !!$ interface -!!$ subroutine psb_z_hdiag_allocate_mnnz(m,n,a,nz) -!!$ import :: psb_z_hdiag_sparse_mat, psb_ipk_ +!!$ subroutine psb_z_cuda_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_ipk_ !!$ integer(psb_ipk_), intent(in) :: m,n -!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ integer(psb_ipk_), intent(in), optional :: nz -!!$ end subroutine psb_z_hdiag_allocate_mnnz +!!$ end subroutine psb_z_cuda_hdiag_allocate_mnnz !!$ end interface interface - subroutine psb_z_hdiag_mold(a,b,info) - import :: psb_z_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hdiag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hdiag_mold(a,b,info) + import :: psb_z_cuda_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hdiag_mold + end subroutine psb_z_cuda_hdiag_mold end interface interface - subroutine psb_z_hdiag_to_gpu(a,info) - import :: psb_z_hdiag_sparse_mat, psb_ipk_ - class(psb_z_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_hdiag_to_gpu(a,info) + import :: psb_z_cuda_hdiag_sparse_mat, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hdiag_to_gpu + end subroutine psb_z_cuda_hdiag_to_gpu end interface interface - subroutine psb_z_cp_hdiag_from_coo(a,b,info) - import :: psb_z_hdiag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_hdiag_from_coo(a,b,info) + import :: psb_z_cuda_hdiag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_hdiag_from_coo + end subroutine psb_z_cuda_cp_hdiag_from_coo end interface !!$ interface -!!$ subroutine psb_z_cp_hdiag_from_fmt(a,b,info) -!!$ import :: psb_z_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ -!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_z_cuda_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_z_base_sparse_mat), intent(in) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_z_cp_hdiag_from_fmt +!!$ end subroutine psb_z_cuda_cp_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_z_mv_hdiag_from_coo(a,b,info) - import :: psb_z_hdiag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_hdiag_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_hdiag_from_coo(a,b,info) + import :: psb_z_cuda_hdiag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_hdiag_from_coo + end subroutine psb_z_cuda_mv_hdiag_from_coo end interface !!$ !!$ interface -!!$ subroutine psb_z_mv_hdiag_from_fmt(a,b,info) -!!$ import :: psb_z_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ -!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_z_cuda_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ class(psb_z_base_sparse_mat), intent(inout) :: b !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_z_mv_hdiag_from_fmt +!!$ end subroutine psb_z_cuda_mv_hdiag_from_fmt !!$ end interface !!$ interface - subroutine psb_z_hdiag_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_z_hdiag_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hdiag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hdiag_csmv + end subroutine psb_z_cuda_hdiag_csmv end interface !!$ interface -!!$ subroutine psb_z_hdiag_csmm(alpha,a,x,beta,y,info,trans) -!!$ import :: psb_z_hdiag_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_z_hdiag_sparse_mat), intent(in) :: a +!!$ subroutine psb_z_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a !!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) !!$ complex(psb_dpk_), intent(inout) :: y(:,:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, optional, intent(in) :: trans -!!$ end subroutine psb_z_hdiag_csmm +!!$ end subroutine psb_z_cuda_hdiag_csmm !!$ end interface !!$ !!$ interface -!!$ subroutine psb_z_hdiag_scal(d,a,info, side) -!!$ import :: psb_z_hdiag_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_z_cuda_hdiag_scal(d,a,info, side) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ complex(psb_dpk_), intent(in) :: d(:) !!$ integer(psb_ipk_), intent(out) :: info !!$ character, intent(in), optional :: side -!!$ end subroutine psb_z_hdiag_scal +!!$ end subroutine psb_z_cuda_hdiag_scal !!$ end interface !!$ !!$ interface -!!$ subroutine psb_z_hdiag_scals(d,a,info) -!!$ import :: psb_z_hdiag_sparse_mat, psb_dpk_, psb_ipk_ -!!$ class(psb_z_hdiag_sparse_mat), intent(inout) :: a +!!$ subroutine psb_z_cuda_hdiag_scals(d,a,info) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a !!$ complex(psb_dpk_), intent(in) :: d !!$ integer(psb_ipk_), intent(out) :: info -!!$ end subroutine psb_z_hdiag_scals +!!$ end subroutine psb_z_cuda_hdiag_scals !!$ end interface !!$ @@ -223,11 +223,11 @@ contains ! ! == =================================== - function z_hdiag_get_fmt() result(res) + function z_cuda_hdiag_get_fmt() result(res) implicit none character(len=5) :: res res = 'HDIAG' - end function z_hdiag_get_fmt + end function z_cuda_hdiag_get_fmt @@ -243,11 +243,11 @@ contains ! ! == =================================== - subroutine z_hdiag_free(a) + subroutine z_cuda_hdiag_free(a) use hdiagdev_mod implicit none integer(psb_ipk_) :: info - class(psb_z_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -256,12 +256,12 @@ contains return - end subroutine z_hdiag_free + end subroutine z_cuda_hdiag_free - subroutine z_hdiag_finalize(a) + subroutine z_cuda_hdiag_finalize(a) use hdiagdev_mod implicit none - type(psb_z_hdiag_sparse_mat), intent(inout) :: a + type(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHdiagDevice(a%deviceMat) @@ -269,19 +269,19 @@ contains call a%psb_z_hdia_sparse_mat%free() return - end subroutine z_hdiag_finalize + end subroutine z_cuda_hdiag_finalize #else interface - subroutine psb_z_hdiag_mold(a,b,info) - import :: psb_z_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hdiag_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hdiag_mold(a,b,info) + import :: psb_z_cuda_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hdiag_mold + end subroutine psb_z_cuda_hdiag_mold end interface #endif -end module psb_z_hdiag_mat_mod +end module psb_z_cuda_hdiag_mat_mod diff --git a/cuda/psb_z_hlg_mat_mod.F90 b/cuda/psb_z_cuda_hlg_mat_mod.F90 similarity index 50% rename from cuda/psb_z_hlg_mat_mod.F90 rename to cuda/psb_z_cuda_hlg_mat_mod.F90 index 09d490b3..29ed68fa 100644 --- a/cuda/psb_z_hlg_mat_mod.F90 +++ b/cuda/psb_z_cuda_hlg_mat_mod.F90 @@ -30,7 +30,7 @@ ! -module psb_z_hlg_mat_mod +module psb_z_cuda_hlg_mat_mod use iso_c_binding use psb_z_mat_mod @@ -41,7 +41,7 @@ module psb_z_hlg_mat_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_z_hll_sparse_mat) :: psb_z_hlg_sparse_mat + type, extends(psb_z_hll_sparse_mat) :: psb_z_cuda_hlg_sparse_mat ! ! ITPACK/HLL format, extended. ! We are adding here the routines to create a copy of the data @@ -54,186 +54,186 @@ module psb_z_hlg_mat_mod integer :: devstate = is_host contains - procedure, nopass :: get_fmt => z_hlg_get_fmt - procedure, pass(a) :: sizeof => z_hlg_sizeof - procedure, pass(a) :: vect_mv => psb_z_hlg_vect_mv - procedure, pass(a) :: csmm => psb_z_hlg_csmm - procedure, pass(a) :: csmv => psb_z_hlg_csmv - procedure, pass(a) :: in_vect_sv => psb_z_hlg_inner_vect_sv - procedure, pass(a) :: scals => psb_z_hlg_scals - procedure, pass(a) :: scalv => psb_z_hlg_scal - procedure, pass(a) :: reallocate_nz => psb_z_hlg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_z_hlg_allocate_mnnz + procedure, nopass :: get_fmt => z_cuda_hlg_get_fmt + procedure, pass(a) :: sizeof => z_cuda_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_hlg_vect_mv + procedure, pass(a) :: csmm => psb_z_cuda_hlg_csmm + procedure, pass(a) :: csmv => psb_z_cuda_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_z_cuda_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_z_cuda_hlg_scals + procedure, pass(a) :: scalv => psb_z_cuda_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_z_cuda_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_cuda_hlg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_z_cp_hlg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_z_cp_hlg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_z_mv_hlg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_z_mv_hlg_from_fmt - procedure, pass(a) :: free => z_hlg_free - procedure, pass(a) :: mold => psb_z_hlg_mold - procedure, pass(a) :: is_host => z_hlg_is_host - procedure, pass(a) :: is_dev => z_hlg_is_dev - procedure, pass(a) :: is_sync => z_hlg_is_sync - procedure, pass(a) :: set_host => z_hlg_set_host - procedure, pass(a) :: set_dev => z_hlg_set_dev - procedure, pass(a) :: set_sync => z_hlg_set_sync - procedure, pass(a) :: sync => z_hlg_sync - procedure, pass(a) :: from_gpu => psb_z_hlg_from_gpu - procedure, pass(a) :: to_gpu => psb_z_hlg_to_gpu - final :: z_hlg_finalize + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_hlg_from_fmt + procedure, pass(a) :: free => z_cuda_hlg_free + procedure, pass(a) :: mold => psb_z_cuda_hlg_mold + procedure, pass(a) :: is_host => z_cuda_hlg_is_host + procedure, pass(a) :: is_dev => z_cuda_hlg_is_dev + procedure, pass(a) :: is_sync => z_cuda_hlg_is_sync + procedure, pass(a) :: set_host => z_cuda_hlg_set_host + procedure, pass(a) :: set_dev => z_cuda_hlg_set_dev + procedure, pass(a) :: set_sync => z_cuda_hlg_set_sync + procedure, pass(a) :: sync => z_cuda_hlg_sync + procedure, pass(a) :: from_gpu => psb_z_cuda_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_z_cuda_hlg_to_gpu + final :: z_cuda_hlg_finalize #else contains - procedure, pass(a) :: mold => psb_z_hlg_mold + procedure, pass(a) :: mold => psb_z_cuda_hlg_mold #endif - end type psb_z_hlg_sparse_mat + end type psb_z_cuda_hlg_sparse_mat #ifdef HAVE_SPGPU - private :: z_hlg_get_nzeros, z_hlg_free, z_hlg_get_fmt, & - & z_hlg_get_size, z_hlg_sizeof, z_hlg_get_nz_row + private :: z_cuda_hlg_get_nzeros, z_cuda_hlg_free, z_cuda_hlg_get_fmt, & + & z_cuda_hlg_get_size, z_cuda_hlg_sizeof, z_cuda_hlg_get_nz_row interface - subroutine psb_z_hlg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_z_hlg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hlg_vect_mv + end subroutine psb_z_cuda_hlg_vect_mv end interface interface - subroutine psb_z_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_ipk_, psb_z_hlg_sparse_mat, psb_dpk_, psb_z_base_vect_type - class(psb_z_hlg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_z_base_vect_type + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hlg_inner_vect_sv + end subroutine psb_z_cuda_hlg_inner_vect_sv end interface interface - subroutine psb_z_hlg_reallocate_nz(nz,a) - import :: psb_z_hlg_sparse_mat, psb_ipk_ + subroutine psb_z_cuda_hlg_reallocate_nz(nz,a) + import :: psb_z_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_z_hlg_sparse_mat), intent(inout) :: a - end subroutine psb_z_hlg_reallocate_nz + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_hlg_reallocate_nz end interface interface - subroutine psb_z_hlg_allocate_mnnz(m,n,a,nz) - import :: psb_z_hlg_sparse_mat, psb_ipk_ + subroutine psb_z_cuda_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_z_cuda_hlg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_z_hlg_allocate_mnnz + end subroutine psb_z_cuda_hlg_allocate_mnnz end interface interface - subroutine psb_z_hlg_mold(a,b,info) - import :: psb_z_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hlg_mold(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hlg_mold + end subroutine psb_z_cuda_hlg_mold end interface interface - subroutine psb_z_hlg_from_gpu(a,info) - import :: psb_z_hlg_sparse_mat, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_hlg_from_gpu(a,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hlg_from_gpu + end subroutine psb_z_cuda_hlg_from_gpu end interface interface - subroutine psb_z_hlg_to_gpu(a,info, nzrm) - import :: psb_z_hlg_sparse_mat, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_hlg_to_gpu(a,info, nzrm) + import :: psb_z_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_z_hlg_to_gpu + end subroutine psb_z_cuda_hlg_to_gpu end interface interface - subroutine psb_z_cp_hlg_from_coo(a,b,info) - import :: psb_z_hlg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_hlg_from_coo(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_hlg_from_coo + end subroutine psb_z_cuda_cp_hlg_from_coo end interface interface - subroutine psb_z_cp_hlg_from_fmt(a,b,info) - import :: psb_z_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_hlg_from_fmt(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_hlg_from_fmt + end subroutine psb_z_cuda_cp_hlg_from_fmt end interface interface - subroutine psb_z_mv_hlg_from_coo(a,b,info) - import :: psb_z_hlg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_hlg_from_coo(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_hlg_from_coo + end subroutine psb_z_cuda_mv_hlg_from_coo end interface interface - subroutine psb_z_mv_hlg_from_fmt(a,b,info) - import :: psb_z_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_hlg_from_fmt(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_hlg_from_fmt + end subroutine psb_z_cuda_mv_hlg_from_fmt end interface interface - subroutine psb_z_hlg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_z_hlg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hlg_csmv + end subroutine psb_z_cuda_hlg_csmv end interface interface - subroutine psb_z_hlg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_z_hlg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) complex(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hlg_csmm + end subroutine psb_z_cuda_hlg_csmm end interface interface - subroutine psb_z_hlg_scal(d,a,info, side) - import :: psb_z_hlg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_hlg_scal(d,a,info, side) + import :: psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_z_hlg_scal + end subroutine psb_z_cuda_hlg_scal end interface interface - subroutine psb_z_hlg_scals(d,a,info) - import :: psb_z_hlg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_hlg_scals(d,a,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hlg_scals + end subroutine psb_z_cuda_hlg_scals end interface @@ -252,9 +252,9 @@ contains ! == =================================== - function z_hlg_sizeof(a) result(res) + function z_cuda_hlg_sizeof(a) result(res) implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res @@ -269,13 +269,13 @@ contains ! on the GPU device side? ! res = 2*res - end function z_hlg_sizeof + end function z_cuda_hlg_sizeof - function z_hlg_get_fmt() result(res) + function z_cuda_hlg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HLG' - end function z_hlg_get_fmt + end function z_cuda_hlg_get_fmt @@ -291,11 +291,11 @@ contains ! ! == =================================== - subroutine z_hlg_free(a) + subroutine z_cuda_hlg_free(a) use hlldev_mod implicit none integer(psb_ipk_) :: info - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) @@ -304,13 +304,13 @@ contains return - end subroutine z_hlg_free + end subroutine z_cuda_hlg_free - subroutine z_hlg_sync(a) + subroutine z_cuda_hlg_sync(a) implicit none - class(psb_z_hlg_sparse_mat), target, intent(in) :: a - class(psb_z_hlg_sparse_mat), pointer :: tmpa + class(psb_z_cuda_hlg_sparse_mat), target, intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info tmpa => a @@ -322,77 +322,77 @@ contains call tmpa%set_sync() return - end subroutine z_hlg_sync + end subroutine z_cuda_hlg_sync - subroutine z_hlg_set_host(a) + subroutine z_cuda_hlg_set_host(a) implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_host - end subroutine z_hlg_set_host + end subroutine z_cuda_hlg_set_host - subroutine z_hlg_set_dev(a) + subroutine z_cuda_hlg_set_dev(a) implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_dev - end subroutine z_hlg_set_dev + end subroutine z_cuda_hlg_set_dev - subroutine z_hlg_set_sync(a) + subroutine z_cuda_hlg_set_sync(a) implicit none - class(psb_z_hlg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a a%devstate = is_sync - end subroutine z_hlg_set_sync + end subroutine z_cuda_hlg_set_sync - function z_hlg_is_dev(a) result(res) + function z_cuda_hlg_is_dev(a) result(res) implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_dev) - end function z_hlg_is_dev + end function z_cuda_hlg_is_dev - function z_hlg_is_host(a) result(res) + function z_cuda_hlg_is_host(a) result(res) implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_host) - end function z_hlg_is_host + end function z_cuda_hlg_is_host - function z_hlg_is_sync(a) result(res) + function z_cuda_hlg_is_sync(a) result(res) implicit none - class(psb_z_hlg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a logical :: res res = (a%devstate == is_sync) - end function z_hlg_is_sync + end function z_cuda_hlg_is_sync - subroutine z_hlg_finalize(a) + subroutine z_cuda_hlg_finalize(a) use hlldev_mod implicit none - type(psb_z_hlg_sparse_mat), intent(inout) :: a + type(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a if (c_associated(a%deviceMat)) & & call freeHllDevice(a%deviceMat) a%deviceMat = c_null_ptr return - end subroutine z_hlg_finalize + end subroutine z_cuda_hlg_finalize #else interface - subroutine psb_z_hlg_mold(a,b,info) - import :: psb_z_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hlg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hlg_mold(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hlg_mold + end subroutine psb_z_cuda_hlg_mold end interface #endif -end module psb_z_hlg_mat_mod +end module psb_z_cuda_hlg_mat_mod diff --git a/cuda/psb_z_hybg_mat_mod.F90 b/cuda/psb_z_cuda_hybg_mat_mod.F90 similarity index 52% rename from cuda/psb_z_hybg_mat_mod.F90 rename to cuda/psb_z_cuda_hybg_mat_mod.F90 index 465677e3..1bbc11b2 100644 --- a/cuda/psb_z_hybg_mat_mod.F90 +++ b/cuda/psb_z_cuda_hybg_mat_mod.F90 @@ -31,13 +31,13 @@ #if CUDA_SHORT_VERSION <= 10 -module psb_z_hybg_mat_mod +module psb_z_cuda_hybg_mat_mod use iso_c_binding use psb_z_mat_mod use cusparse_mod - type, extends(psb_z_csr_sparse_mat) :: psb_z_hybg_sparse_mat + type, extends(psb_z_csr_sparse_mat) :: psb_z_cuda_hybg_sparse_mat ! ! HYBG. An interface to the cuSPARSE HYB ! On the CPU side we keep a CSR storage. @@ -49,170 +49,170 @@ module psb_z_hybg_mat_mod type(z_Hmat) :: deviceMat contains - procedure, nopass :: get_fmt => z_hybg_get_fmt - procedure, pass(a) :: sizeof => z_hybg_sizeof - procedure, pass(a) :: vect_mv => psb_z_hybg_vect_mv - procedure, pass(a) :: in_vect_sv => psb_z_hybg_inner_vect_sv - procedure, pass(a) :: csmm => psb_z_hybg_csmm - procedure, pass(a) :: csmv => psb_z_hybg_csmv - procedure, pass(a) :: scals => psb_z_hybg_scals - procedure, pass(a) :: scalv => psb_z_hybg_scal - procedure, pass(a) :: reallocate_nz => psb_z_hybg_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_z_hybg_allocate_mnnz + procedure, nopass :: get_fmt => z_cuda_hybg_get_fmt + procedure, pass(a) :: sizeof => z_cuda_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_z_cuda_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_z_cuda_hybg_csmm + procedure, pass(a) :: csmv => psb_z_cuda_hybg_csmv + procedure, pass(a) :: scals => psb_z_cuda_hybg_scals + procedure, pass(a) :: scalv => psb_z_cuda_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_z_cuda_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_cuda_hybg_allocate_mnnz ! Note: we do *not* need the TO methods, because the parent type ! methods will work. - procedure, pass(a) :: cp_from_coo => psb_z_cp_hybg_from_coo - procedure, pass(a) :: cp_from_fmt => psb_z_cp_hybg_from_fmt - procedure, pass(a) :: mv_from_coo => psb_z_mv_hybg_from_coo - procedure, pass(a) :: mv_from_fmt => psb_z_mv_hybg_from_fmt - procedure, pass(a) :: free => z_hybg_free - procedure, pass(a) :: mold => psb_z_hybg_mold - procedure, pass(a) :: to_gpu => psb_z_hybg_to_gpu - final :: z_hybg_finalize + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_hybg_from_fmt + procedure, pass(a) :: free => z_cuda_hybg_free + procedure, pass(a) :: mold => psb_z_cuda_hybg_mold + procedure, pass(a) :: to_gpu => psb_z_cuda_hybg_to_gpu + final :: z_cuda_hybg_finalize #else contains - procedure, pass(a) :: mold => psb_z_hybg_mold + procedure, pass(a) :: mold => psb_z_cuda_hybg_mold #endif - end type psb_z_hybg_sparse_mat + end type psb_z_cuda_hybg_sparse_mat #ifdef HAVE_SPGPU - private :: z_hybg_get_nzeros, z_hybg_free, z_hybg_get_fmt, & - & z_hybg_get_size, z_hybg_sizeof, z_hybg_get_nz_row + private :: z_cuda_hybg_get_nzeros, z_cuda_hybg_free, z_cuda_hybg_get_fmt, & + & z_cuda_hybg_get_size, z_cuda_hybg_sizeof, z_cuda_hybg_get_nz_row interface - subroutine psb_z_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) - import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hybg_inner_vect_sv + end subroutine psb_z_cuda_hybg_inner_vect_sv end interface interface - subroutine psb_z_hybg_vect_mv(alpha,a,x,beta,y,info,trans) - import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hybg_vect_mv + end subroutine psb_z_cuda_hybg_vect_mv end interface interface - subroutine psb_z_hybg_reallocate_nz(nz,a) - import :: psb_z_hybg_sparse_mat, psb_ipk_ + subroutine psb_z_cuda_hybg_reallocate_nz(nz,a) + import :: psb_z_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: nz - class(psb_z_hybg_sparse_mat), intent(inout) :: a - end subroutine psb_z_hybg_reallocate_nz + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_hybg_reallocate_nz end interface interface - subroutine psb_z_hybg_allocate_mnnz(m,n,a,nz) - import :: psb_z_hybg_sparse_mat, psb_ipk_ + subroutine psb_z_cuda_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_z_cuda_hybg_sparse_mat, psb_ipk_ integer(psb_ipk_), intent(in) :: m,n - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_z_hybg_allocate_mnnz + end subroutine psb_z_cuda_hybg_allocate_mnnz end interface interface - subroutine psb_z_hybg_mold(a,b,info) - import :: psb_z_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hybg_mold(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hybg_mold + end subroutine psb_z_cuda_hybg_mold end interface interface - subroutine psb_z_hybg_to_gpu(a,info, nzrm) - import :: psb_z_hybg_sparse_mat, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_hybg_to_gpu(a,info, nzrm) + import :: psb_z_cuda_hybg_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm - end subroutine psb_z_hybg_to_gpu + end subroutine psb_z_cuda_hybg_to_gpu end interface interface - subroutine psb_z_cp_hybg_from_coo(a,b,info) - import :: psb_z_hybg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_hybg_from_coo(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_hybg_from_coo + end subroutine psb_z_cuda_cp_hybg_from_coo end interface interface - subroutine psb_z_cp_hybg_from_fmt(a,b,info) - import :: psb_z_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_cp_hybg_from_fmt(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cp_hybg_from_fmt + end subroutine psb_z_cuda_cp_hybg_from_fmt end interface interface - subroutine psb_z_mv_hybg_from_coo(a,b,info) - import :: psb_z_hybg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_hybg_from_coo(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_hybg_from_coo + end subroutine psb_z_cuda_mv_hybg_from_coo end interface interface - subroutine psb_z_mv_hybg_from_fmt(a,b,info) - import :: psb_z_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_mv_hybg_from_fmt(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_mv_hybg_from_fmt + end subroutine psb_z_cuda_mv_hybg_from_fmt end interface interface - subroutine psb_z_hybg_csmv(alpha,a,x,beta,y,info,trans) - import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) complex(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hybg_csmv + end subroutine psb_z_cuda_hybg_csmv end interface interface - subroutine psb_z_hybg_csmm(alpha,a,x,beta,y,info,trans) - import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) complex(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - end subroutine psb_z_hybg_csmm + end subroutine psb_z_cuda_hybg_csmm end interface interface - subroutine psb_z_hybg_scal(d,a,info,side) - import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_hybg_scal(d,a,info,side) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - end subroutine psb_z_hybg_scal + end subroutine psb_z_cuda_hybg_scal end interface interface - subroutine psb_z_hybg_scals(d,a,info) - import :: psb_z_hybg_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(inout) :: a + subroutine psb_z_cuda_hybg_scals(d,a,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hybg_scals + end subroutine psb_z_cuda_hybg_scals end interface @@ -231,9 +231,9 @@ contains ! == =================================== - function z_hybg_sizeof(a) result(res) + function z_cuda_hybg_sizeof(a) result(res) implicit none - class(psb_z_hybg_sparse_mat), intent(in) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a integer(psb_epk_) :: res res = 8 res = res + (2*psb_sizeof_dp) * size(a%val) @@ -243,13 +243,13 @@ contains ! on the GPU device side? ! res = 2*res - end function z_hybg_sizeof + end function z_cuda_hybg_sizeof - function z_hybg_get_fmt() result(res) + function z_cuda_hybg_get_fmt() result(res) implicit none character(len=5) :: res res = 'HYBG' - end function z_hybg_get_fmt + end function z_cuda_hybg_get_fmt @@ -265,42 +265,42 @@ contains ! ! == =================================== - subroutine z_hybg_free(a) + subroutine z_cuda_hybg_free(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - class(psb_z_hybg_sparse_mat), intent(inout) :: a + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) call a%psb_z_csr_sparse_mat%free() return - end subroutine z_hybg_free + end subroutine z_cuda_hybg_free - subroutine z_hybg_finalize(a) + subroutine z_cuda_hybg_finalize(a) use cusparse_mod implicit none integer(psb_ipk_) :: info - type(psb_z_hybg_sparse_mat), intent(inout) :: a + type(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a info = HYBGDeviceFree(a%deviceMat) return - end subroutine z_hybg_finalize + end subroutine z_cuda_hybg_finalize #else interface - subroutine psb_z_hybg_mold(a,b,info) - import :: psb_z_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_hybg_sparse_mat), intent(in) :: a + subroutine psb_z_cuda_hybg_mold(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_hybg_mold + end subroutine psb_z_cuda_hybg_mold end interface #endif -end module psb_z_hybg_mat_mod +end module psb_z_cuda_hybg_mat_mod #endif diff --git a/cuda/psb_z_gpu_vect_mod.F90 b/cuda/psb_z_cuda_vect_mod.F90 similarity index 72% rename from cuda/psb_z_gpu_vect_mod.F90 rename to cuda/psb_z_cuda_vect_mod.F90 index ca5ac922..35bfb4b5 100644 --- a/cuda/psb_z_gpu_vect_mod.F90 +++ b/cuda/psb_z_cuda_vect_mod.F90 @@ -30,15 +30,15 @@ ! -module psb_z_gpu_vect_mod +module psb_z_cuda_vect_mod use iso_c_binding use psb_const_mod use psb_error_mod use psb_z_vect_mod use psb_i_vect_mod #ifdef HAVE_SPGPU - use psb_gpu_env_mod - use psb_i_gpu_vect_mod + use psb_cuda_env_mod + use psb_i_cuda_vect_mod use psb_i_vectordev_mod use psb_z_vectordev_mod #endif @@ -47,7 +47,7 @@ module psb_z_gpu_vect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_z_base_vect_type) :: psb_z_vect_gpu + type, extends(psb_z_base_vect_type) :: psb_z_vect_cuda #ifdef HAVE_SPGPU integer :: state = is_host type(c_ptr) :: deviceVect = c_null_ptr @@ -59,66 +59,66 @@ module psb_z_gpu_vect_mod type(c_ptr) :: i_buf = c_null_ptr integer :: i_buf_sz = 0 contains - procedure, pass(x) :: get_nrows => z_gpu_get_nrows - procedure, nopass :: get_fmt => z_gpu_get_fmt - - procedure, pass(x) :: all => z_gpu_all - procedure, pass(x) :: zero => z_gpu_zero - procedure, pass(x) :: asb_m => z_gpu_asb_m - procedure, pass(x) :: sync => z_gpu_sync - procedure, pass(x) :: sync_space => z_gpu_sync_space - procedure, pass(x) :: bld_x => z_gpu_bld_x - procedure, pass(x) :: bld_mn => z_gpu_bld_mn - procedure, pass(x) :: free => z_gpu_free - procedure, pass(x) :: ins_a => z_gpu_ins_a - procedure, pass(x) :: ins_v => z_gpu_ins_v - procedure, pass(x) :: is_host => z_gpu_is_host - procedure, pass(x) :: is_dev => z_gpu_is_dev - procedure, pass(x) :: is_sync => z_gpu_is_sync - procedure, pass(x) :: set_host => z_gpu_set_host - procedure, pass(x) :: set_dev => z_gpu_set_dev - procedure, pass(x) :: set_sync => z_gpu_set_sync - procedure, pass(x) :: set_scal => z_gpu_set_scal -!!$ procedure, pass(x) :: set_vect => z_gpu_set_vect - procedure, pass(x) :: gthzv_x => z_gpu_gthzv_x - procedure, pass(y) :: sctb => z_gpu_sctb - procedure, pass(y) :: sctb_x => z_gpu_sctb_x - procedure, pass(x) :: gthzbuf => z_gpu_gthzbuf - procedure, pass(y) :: sctb_buf => z_gpu_sctb_buf - procedure, pass(x) :: new_buffer => z_gpu_new_buffer - procedure, nopass :: device_wait => z_gpu_device_wait - procedure, pass(x) :: free_buffer => z_gpu_free_buffer - procedure, pass(x) :: maybe_free_buffer => z_gpu_maybe_free_buffer - procedure, pass(x) :: dot_v => z_gpu_dot_v - procedure, pass(x) :: dot_a => z_gpu_dot_a - procedure, pass(y) :: axpby_v => z_gpu_axpby_v - procedure, pass(y) :: axpby_a => z_gpu_axpby_a - procedure, pass(y) :: mlt_v => z_gpu_mlt_v - procedure, pass(y) :: mlt_a => z_gpu_mlt_a - procedure, pass(z) :: mlt_a_2 => z_gpu_mlt_a_2 - procedure, pass(z) :: mlt_v_2 => z_gpu_mlt_v_2 - procedure, pass(x) :: scal => z_gpu_scal - procedure, pass(x) :: nrm2 => z_gpu_nrm2 - procedure, pass(x) :: amax => z_gpu_amax - procedure, pass(x) :: asum => z_gpu_asum - procedure, pass(x) :: absval1 => z_gpu_absval1 - procedure, pass(x) :: absval2 => z_gpu_absval2 - - final :: z_gpu_vect_finalize + procedure, pass(x) :: get_nrows => z_cuda_get_nrows + procedure, nopass :: get_fmt => z_cuda_get_fmt + + procedure, pass(x) :: all => z_cuda_all + procedure, pass(x) :: zero => z_cuda_zero + procedure, pass(x) :: asb_m => z_cuda_asb_m + procedure, pass(x) :: sync => z_cuda_sync + procedure, pass(x) :: sync_space => z_cuda_sync_space + procedure, pass(x) :: bld_x => z_cuda_bld_x + procedure, pass(x) :: bld_mn => z_cuda_bld_mn + procedure, pass(x) :: free => z_cuda_free + procedure, pass(x) :: ins_a => z_cuda_ins_a + procedure, pass(x) :: ins_v => z_cuda_ins_v + procedure, pass(x) :: is_host => z_cuda_is_host + procedure, pass(x) :: is_dev => z_cuda_is_dev + procedure, pass(x) :: is_sync => z_cuda_is_sync + procedure, pass(x) :: set_host => z_cuda_set_host + procedure, pass(x) :: set_dev => z_cuda_set_dev + procedure, pass(x) :: set_sync => z_cuda_set_sync + procedure, pass(x) :: set_scal => z_cuda_set_scal +!!$ procedure, pass(x) :: set_vect => z_cuda_set_vect + procedure, pass(x) :: gthzv_x => z_cuda_gthzv_x + procedure, pass(y) :: sctb => z_cuda_sctb + procedure, pass(y) :: sctb_x => z_cuda_sctb_x + procedure, pass(x) :: gthzbuf => z_cuda_gthzbuf + procedure, pass(y) :: sctb_buf => z_cuda_sctb_buf + procedure, pass(x) :: new_buffer => z_cuda_new_buffer + procedure, nopass :: device_wait => z_cuda_device_wait + procedure, pass(x) :: free_buffer => z_cuda_free_buffer + procedure, pass(x) :: maybe_free_buffer => z_cuda_maybe_free_buffer + procedure, pass(x) :: dot_v => z_cuda_dot_v + procedure, pass(x) :: dot_a => z_cuda_dot_a + procedure, pass(y) :: axpby_v => z_cuda_axpby_v + procedure, pass(y) :: axpby_a => z_cuda_axpby_a + procedure, pass(y) :: mlt_v => z_cuda_mlt_v + procedure, pass(y) :: mlt_a => z_cuda_mlt_a + procedure, pass(z) :: mlt_a_2 => z_cuda_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => z_cuda_mlt_v_2 + procedure, pass(x) :: scal => z_cuda_scal + procedure, pass(x) :: nrm2 => z_cuda_nrm2 + procedure, pass(x) :: amax => z_cuda_amax + procedure, pass(x) :: asum => z_cuda_asum + procedure, pass(x) :: absval1 => z_cuda_absval1 + procedure, pass(x) :: absval2 => z_cuda_absval2 + + final :: z_cuda_vect_finalize #endif - end type psb_z_vect_gpu + end type psb_z_vect_cuda - public :: psb_z_vect_gpu_ + public :: psb_z_vect_cuda_ private :: constructor - interface psb_z_vect_gpu_ + interface psb_z_vect_cuda_ module procedure constructor - end interface psb_z_vect_gpu_ + end interface psb_z_vect_cuda_ contains function constructor(x) result(this) complex(psb_dpk_) :: x(:) - type(psb_z_vect_gpu) :: this + type(psb_z_vect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -128,20 +128,20 @@ contains #ifdef HAVE_SPGPU - subroutine z_gpu_device_wait() + subroutine z_cuda_device_wait() call psb_cudaSync() - end subroutine z_gpu_device_wait + end subroutine z_cuda_device_wait - subroutine z_gpu_new_buffer(n,x,info) + subroutine z_cuda_new_buffer(n,x,info) use psb_realloc_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(out) :: info - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(x%combuf)) then if (size(x%combuf) idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then ! ! Only need a sync in this branch; in the others ! cudamemCpy acts as a sync point. @@ -331,14 +331,14 @@ contains end select - end subroutine z_gpu_gthzv_x + end subroutine z_cuda_gthzv_x - subroutine z_gpu_gthzbuf(i,n,idx,x) - use psb_gpu_env_mod + subroutine z_cuda_gthzbuf(i,n,idx,x) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i,n class(psb_i_base_vect_type) :: idx - class(psb_z_vect_gpu) :: x + class(psb_z_vect_cuda) :: x integer :: info, ni info = 0 @@ -349,11 +349,11 @@ contains end if select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (x%is_host()) call x%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = igathMultiVecDeviceDoubleComplexVecIdx(x%deviceVect,& & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) @@ -384,14 +384,14 @@ contains end select - end subroutine z_gpu_gthzbuf + end subroutine z_cuda_gthzbuf - subroutine z_gpu_sctb(n,idx,x,beta,y) + subroutine z_cuda_sctb(n,idx,x,beta,y) implicit none !use psb_const_mod integer(psb_ipk_) :: n, idx(:) complex(psb_dpk_) :: beta, x(:) - class(psb_z_vect_gpu) :: y + class(psb_z_vect_cuda) :: y integer(psb_ipk_) :: info if (n == 0) return @@ -401,24 +401,24 @@ contains call y%psb_z_base_vect_type%sctb(n,idx,x,beta) call y%set_host() - end subroutine z_gpu_sctb + end subroutine z_cuda_sctb - subroutine z_gpu_sctb_x(i,n,idx,x,beta,y) - use psb_gpu_env_mod + subroutine z_cuda_sctb_x(i,n,idx,x,beta,y) + use psb_cuda_env_mod use psi_serial_mod integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx complex(psb_dpk_) :: beta, x(:) - class(psb_z_vect_gpu) :: y + class(psb_z_vect_cuda) :: y integer :: info, ni select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() ! - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then if (allocated(y%pinned_buffer)) then if (size(y%pinned_buffer) < n) then call inner_unregister(y%pinned_buffer) @@ -506,16 +506,16 @@ contains call psb_cudaSync() call y%set_dev() - end subroutine z_gpu_sctb_x + end subroutine z_cuda_sctb_x - subroutine z_gpu_sctb_buf(i,n,idx,beta,y) + subroutine z_cuda_sctb_buf(i,n,idx,beta,y) use psi_serial_mod - use psb_gpu_env_mod + use psb_cuda_env_mod implicit none integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx complex(psb_dpk_) :: beta - class(psb_z_vect_gpu) :: y + class(psb_z_vect_cuda) :: y integer(psb_ipk_) :: info, ni !!$ write(0,*) 'Starting sctb_buf' @@ -526,11 +526,11 @@ contains select type(ii=> idx) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() - if (psb_gpu_DeviceHasUVA()) then + if (psb_cuda_DeviceHasUVA()) then info = iscatMultiVecDeviceDoubleComplexVecIdx(y%deviceVect,& & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) else @@ -557,106 +557,106 @@ contains end select !!$ write(0,*) 'Done sctb_buf' - end subroutine z_gpu_sctb_buf + end subroutine z_cuda_sctb_buf - subroutine z_gpu_bld_x(x,this) + subroutine z_cuda_bld_x(x,this) use psb_base_mod complex(psb_dpk_), intent(in) :: this(:) - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call psb_realloc(size(this),x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'z_gpu_bld_x',& + call psb_errpush(info,'z_cuda_bld_x',& & i_err=(/size(this),izero,izero,izero,izero/)) end if x%v(:) = this(:) call x%set_host() call x%sync() - end subroutine z_gpu_bld_x + end subroutine z_cuda_bld_x - subroutine z_gpu_bld_mn(x,n) + subroutine z_cuda_bld_mn(x,n) integer(psb_mpk_), intent(in) :: n - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(n,info) if (info /= 0) then - call psb_errpush(info,'z_gpu_bld_n',i_err=(/n,n,n,n,n/)) + call psb_errpush(info,'z_cuda_bld_n',i_err=(/n,n,n,n,n/)) end if - end subroutine z_gpu_bld_mn + end subroutine z_cuda_bld_mn - subroutine z_gpu_set_host(x) + subroutine z_cuda_set_host(x) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x x%state = is_host - end subroutine z_gpu_set_host + end subroutine z_cuda_set_host - subroutine z_gpu_set_dev(x) + subroutine z_cuda_set_dev(x) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x x%state = is_dev - end subroutine z_gpu_set_dev + end subroutine z_cuda_set_dev - subroutine z_gpu_set_sync(x) + subroutine z_cuda_set_sync(x) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x x%state = is_sync - end subroutine z_gpu_set_sync + end subroutine z_cuda_set_sync - function z_gpu_is_dev(x) result(res) + function z_cuda_is_dev(x) result(res) implicit none - class(psb_z_vect_gpu), intent(in) :: x + class(psb_z_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function z_gpu_is_dev + end function z_cuda_is_dev - function z_gpu_is_host(x) result(res) + function z_cuda_is_host(x) result(res) implicit none - class(psb_z_vect_gpu), intent(in) :: x + class(psb_z_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function z_gpu_is_host + end function z_cuda_is_host - function z_gpu_is_sync(x) result(res) + function z_cuda_is_sync(x) result(res) implicit none - class(psb_z_vect_gpu), intent(in) :: x + class(psb_z_vect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function z_gpu_is_sync + end function z_cuda_is_sync - function z_gpu_get_nrows(x) result(res) + function z_cuda_get_nrows(x) result(res) implicit none - class(psb_z_vect_gpu), intent(in) :: x + class(psb_z_vect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = 0 if (allocated(x%v)) res = size(x%v) - end function z_gpu_get_nrows + end function z_cuda_get_nrows - function z_gpu_get_fmt() result(res) + function z_cuda_get_fmt() result(res) implicit none character(len=5) :: res res = 'zGPU' - end function z_gpu_get_fmt + end function z_cuda_get_fmt - subroutine z_gpu_all(n, x, info) + subroutine z_cuda_all(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: n - class(psb_z_vect_gpu), intent(out) :: x + class(psb_z_vect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(n,x%v,info) @@ -664,26 +664,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'z_gpu_all',& + call psb_errpush(info,'z_cuda_all',& & i_err=(/n,n,n,n,n/)) end if - end subroutine z_gpu_all + end subroutine z_cuda_all - subroutine z_gpu_zero(x) + subroutine z_cuda_zero(x) use psi_serial_mod implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=zzero call x%set_host() - end subroutine z_gpu_zero + end subroutine z_cuda_zero - subroutine z_gpu_asb_m(n, x, info) + subroutine z_cuda_asb_m(n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_mpk_), intent(in) :: n - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_mpk_) :: nd @@ -703,12 +703,12 @@ contains end if end if - end subroutine z_gpu_asb_m + end subroutine z_cuda_asb_m - subroutine z_gpu_sync_space(x,info) + subroutine z_cuda_sync_space(x,info) use psb_base_mod, only : psb_realloc implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nh, nd @@ -747,12 +747,12 @@ contains end if end if - end subroutine z_gpu_sync_space + end subroutine z_cuda_sync_space - subroutine z_gpu_sync(x) + subroutine z_cuda_sync(x) use psb_base_mod, only : psb_realloc implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -778,31 +778,31 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'z_gpu_sync') + call psb_errpush(info,'z_cuda_sync') end if - end subroutine z_gpu_sync + end subroutine z_cuda_sync - subroutine z_gpu_free(x, info) + subroutine z_cuda_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) deallocate(x%v, stat=info) if (c_associated(x%deviceVect)) then -!!$ write(0,*)'d_gpu_free Calling freeMultiVecDevice' +!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice' call freeMultiVecDevice(x%deviceVect) x%deviceVect=c_null_ptr end if call x%free_buffer(info) call x%set_sync() - end subroutine z_gpu_free + end subroutine z_cuda_free - subroutine z_gpu_set_scal(x,val,first,last) - class(psb_z_vect_gpu), intent(inout) :: x + subroutine z_cuda_set_scal(x,val,first,last) + class(psb_z_vect_cuda), intent(inout) :: x complex(psb_dpk_), intent(in) :: val integer(psb_ipk_), optional :: first, last @@ -817,10 +817,10 @@ contains info = setScalDevice(val,first_,last_,1,x%deviceVect) call x%set_dev() - end subroutine z_gpu_set_scal + end subroutine z_cuda_set_scal !!$ -!!$ subroutine z_gpu_set_vect(x,val) -!!$ class(psb_z_vect_gpu), intent(inout) :: x +!!$ subroutine z_cuda_set_vect(x,val) +!!$ class(psb_z_vect_cuda), intent(inout) :: x !!$ complex(psb_dpk_), intent(in) :: val(:) !!$ integer(psb_ipk_) :: nr !!$ integer(psb_ipk_) :: info @@ -829,13 +829,13 @@ contains !!$ call x%psb_z_base_vect_type%set_vect(val) !!$ call x%set_host() !!$ -!!$ end subroutine z_gpu_set_vect +!!$ end subroutine z_cuda_set_vect - function z_gpu_dot_v(n,x,y) result(res) + function z_cuda_dot_v(n,x,y) result(res) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_), intent(in) :: n complex(psb_dpk_) :: res @@ -852,13 +852,13 @@ contains type is (psb_z_base_vect_type) if (x%is_dev()) call x%sync() res = ddot(n,x%v,1,yy%v,1) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) if (info /= 0) then info = psb_err_internal_error_ - call psb_errpush(info,'z_gpu_dot_v') + call psb_errpush(info,'z_cuda_dot_v') end if class default @@ -867,11 +867,11 @@ contains res = y%dot(n,x%v) end select - end function z_gpu_dot_v + end function z_cuda_dot_v - function z_gpu_dot_a(n,x,y) result(res) + function z_cuda_dot_a(n,x,y) result(res) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x complex(psb_dpk_), intent(in) :: y(:) integer(psb_ipk_), intent(in) :: n complex(psb_dpk_) :: res @@ -880,14 +880,14 @@ contains if (x%is_dev()) call x%sync() res = ddot(n,y,1,x%v,1) - end function z_gpu_dot_a + end function z_cuda_dot_a - subroutine z_gpu_axpby_v(m,alpha, x, beta, y, info) + subroutine z_cuda_axpby_v(m,alpha, x, beta, y, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m class(psb_z_base_vect_type), intent(inout) :: x - class(psb_z_vect_gpu), intent(inout) :: y + class(psb_z_vect_cuda), intent(inout) :: y complex(psb_dpk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nx, ny @@ -895,7 +895,7 @@ contains info = psb_success_ select type(xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) ! Do something different here if ((beta /= zzero).and.y%is_host())& & call y%sync() @@ -915,14 +915,14 @@ contains call y%axpby(m,alpha,x%v,beta,info) end select - end subroutine z_gpu_axpby_v + end subroutine z_cuda_axpby_v - subroutine z_gpu_axpby_a(m,alpha, x, beta, y, info) + subroutine z_cuda_axpby_a(m,alpha, x, beta, y, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m complex(psb_dpk_), intent(in) :: x(:) - class(psb_z_vect_gpu), intent(inout) :: y + class(psb_z_vect_cuda), intent(inout) :: y complex(psb_dpk_), intent (in) :: alpha, beta integer(psb_ipk_), intent(out) :: info @@ -930,13 +930,13 @@ contains & call y%sync() call psb_geaxpby(m,alpha,x,beta,y%v,info) call y%set_host() - end subroutine z_gpu_axpby_a + end subroutine z_cuda_axpby_a - subroutine z_gpu_mlt_v(x, y, info) + subroutine z_cuda_mlt_v(x, y, info) use psi_serial_mod implicit none class(psb_z_base_vect_type), intent(inout) :: x - class(psb_z_vect_gpu), intent(inout) :: y + class(psb_z_vect_cuda), intent(inout) :: y integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -950,7 +950,7 @@ contains y%v(i) = y%v(i) * xx%v(i) end do call y%set_host() - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) ! Do something different here if (y%is_host()) call y%sync() if (xx%is_host()) call xx%sync() @@ -963,13 +963,13 @@ contains call y%set_host() end select - end subroutine z_gpu_mlt_v + end subroutine z_cuda_mlt_v - subroutine z_gpu_mlt_a(x, y, info) + subroutine z_cuda_mlt_a(x, y, info) use psi_serial_mod implicit none complex(psb_dpk_), intent(in) :: x(:) - class(psb_z_vect_gpu), intent(inout) :: y + class(psb_z_vect_cuda), intent(inout) :: y integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -977,15 +977,15 @@ contains if (y%is_dev()) call y%sync() call y%psb_z_base_vect_type%mlt(x,info) ! set_host() is invoked in the base method - end subroutine z_gpu_mlt_a + end subroutine z_cuda_mlt_a - subroutine z_gpu_mlt_a_2(alpha,x,y,beta,z,info) + subroutine z_cuda_mlt_a_2(alpha,x,y,beta,z,info) use psi_serial_mod implicit none complex(psb_dpk_), intent(in) :: alpha,beta complex(psb_dpk_), intent(in) :: x(:) complex(psb_dpk_), intent(in) :: y(:) - class(psb_z_vect_gpu), intent(inout) :: z + class(psb_z_vect_cuda), intent(inout) :: z integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, n @@ -993,16 +993,16 @@ contains if (z%is_dev()) call z%sync() call z%psb_z_base_vect_type%mlt(alpha,x,y,beta,info) ! set_host() is invoked in the base method - end subroutine z_gpu_mlt_a_2 + end subroutine z_cuda_mlt_a_2 - subroutine z_gpu_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + subroutine z_cuda_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) use psi_serial_mod use psb_string_mod implicit none complex(psb_dpk_), intent(in) :: alpha,beta class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y - class(psb_z_vect_gpu), intent(inout) :: z + class(psb_z_vect_cuda), intent(inout) :: z integer(psb_ipk_), intent(out) :: info character(len=1), intent(in), optional :: conjgx, conjgy integer(psb_ipk_) :: i, n @@ -1025,9 +1025,9 @@ contains ! info = 0 select type(xx => x) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) select type (yy => y) - type is (psb_z_vect_gpu) + type is (psb_z_vect_cuda) if (xx%is_host()) call xx%sync() if (yy%is_host()) call yy%sync() if ((beta /= zzero).and.(z%is_host())) call z%sync() @@ -1049,23 +1049,23 @@ contains call z%psb_z_base_vect_type%mlt(alpha,x,y,beta,info) call z%set_host() end select - end subroutine z_gpu_mlt_v_2 + end subroutine z_cuda_mlt_v_2 - subroutine z_gpu_scal(alpha, x) + subroutine z_cuda_scal(alpha, x) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x complex(psb_dpk_), intent (in) :: alpha integer(psb_ipk_) :: info if (x%is_host()) call x%sync() info = scalMultiVecDevice(alpha,x%deviceVect) call x%set_dev() - end subroutine z_gpu_scal + end subroutine z_cuda_scal - function z_gpu_nrm2(n,x) result(res) + function z_cuda_nrm2(n,x) result(res) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res integer(psb_ipk_) :: info @@ -1073,11 +1073,11 @@ contains if (x%is_host()) call x%sync() info = nrm2MultiVecDeviceComplex(res,n,x%deviceVect) - end function z_gpu_nrm2 + end function z_cuda_nrm2 - function z_gpu_amax(n,x) result(res) + function z_cuda_amax(n,x) result(res) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res integer(psb_ipk_) :: info @@ -1085,11 +1085,11 @@ contains if (x%is_host()) call x%sync() info = amaxMultiVecDeviceComplex(res,n,x%deviceVect) - end function z_gpu_amax + end function z_cuda_amax - function z_gpu_asum(n,x) result(res) + function z_cuda_asum(n,x) result(res) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res integer(psb_ipk_) :: info @@ -1097,11 +1097,11 @@ contains if (x%is_host()) call x%sync() info = asumMultiVecDeviceComplex(res,n,x%deviceVect) - end function z_gpu_asum + end function z_cuda_asum - subroutine z_gpu_absval1(x) + subroutine z_cuda_absval1(x) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: n integer(psb_ipk_) :: info @@ -1109,18 +1109,18 @@ contains n=x%get_nrows() info = absMultiVecDevice(n,zone,x%deviceVect) - end subroutine z_gpu_absval1 + end subroutine z_cuda_absval1 - subroutine z_gpu_absval2(x,y) + subroutine z_cuda_absval2(x,y) implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: y integer(psb_ipk_) :: n integer(psb_ipk_) :: info n=min(x%get_nrows(),y%get_nrows()) select type (yy=> y) - class is (psb_z_vect_gpu) + class is (psb_z_vect_cuda) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() info = absMultiVecDevice(n,zone,x%deviceVect,yy%deviceVect) @@ -1129,67 +1129,67 @@ contains if (y%is_dev()) call y%sync() call x%psb_z_base_vect_type%absval(y) end select - end subroutine z_gpu_absval2 + end subroutine z_cuda_absval2 - subroutine z_gpu_vect_finalize(x) + subroutine z_cuda_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_z_vect_gpu), intent(inout) :: x + type(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 call x%free(info) - end subroutine z_gpu_vect_finalize + end subroutine z_cuda_vect_finalize - subroutine z_gpu_ins_v(n,irl,val,dupl,x,info) + subroutine z_cuda_ins_v(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl class(psb_i_base_vect_type), intent(inout) :: irl class(psb_z_base_vect_type), intent(inout) :: val integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i, isz - logical :: done_gpu + logical :: done_cuda info = 0 if (psb_errstatus_fatal()) return - done_gpu = .false. + done_cuda = .false. select type(virl => irl) - class is (psb_i_vect_gpu) + class is (psb_i_vect_cuda) select type(vval => val) - class is (psb_z_vect_gpu) + class is (psb_z_vect_cuda) if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() if (x%is_host()) call x%sync() info = geinsMultiVecDeviceDoubleComplex(n,virl%deviceVect,& & vval%deviceVect,dupl,1,x%deviceVect) call x%set_dev() - done_gpu=.true. + done_cuda=.true. end select end select - if (.not.done_gpu) then + if (.not.done_cuda) then if (irl%is_dev()) call irl%sync() if (val%is_dev()) call val%sync() call x%ins(n,irl%v,val%v,dupl,info) end if if (info /= 0) then - call psb_errpush(info,'gpu_vect_ins') + call psb_errpush(info,'cuda_vect_ins') return end if - end subroutine z_gpu_ins_v + end subroutine z_cuda_ins_v - subroutine z_gpu_ins_a(n,irl,val,dupl,x,info) + subroutine z_cuda_ins_a(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_z_vect_gpu), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) complex(psb_dpk_), intent(in) :: val(:) @@ -1202,11 +1202,11 @@ contains call x%psb_z_base_vect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine z_gpu_ins_a + end subroutine z_cuda_ins_a #endif -end module psb_z_gpu_vect_mod +end module psb_z_cuda_vect_mod ! @@ -1215,7 +1215,7 @@ end module psb_z_gpu_vect_mod -module psb_z_gpu_multivect_mod +module psb_z_cuda_multivect_mod use iso_c_binding use psb_const_mod use psb_error_mod @@ -1224,7 +1224,7 @@ module psb_z_gpu_multivect_mod use psb_i_multivect_mod #ifdef HAVE_SPGPU - use psb_i_gpu_multivect_mod + use psb_i_cuda_multivect_mod use psb_z_vectordev_mod #endif @@ -1232,7 +1232,7 @@ module psb_z_gpu_multivect_mod integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - type, extends(psb_z_base_multivect_type) :: psb_z_multivect_gpu + type, extends(psb_z_base_multivect_type) :: psb_z_multivect_cuda #ifdef HAVE_SPGPU integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 @@ -1240,48 +1240,48 @@ module psb_z_gpu_multivect_mod real(c_double), allocatable :: buffer(:,:) type(c_ptr) :: dt_buf = c_null_ptr contains - procedure, pass(x) :: get_nrows => z_gpu_multi_get_nrows - procedure, pass(x) :: get_ncols => z_gpu_multi_get_ncols - procedure, nopass :: get_fmt => z_gpu_multi_get_fmt -!!$ procedure, pass(x) :: dot_v => z_gpu_multi_dot_v -!!$ procedure, pass(x) :: dot_a => z_gpu_multi_dot_a -!!$ procedure, pass(y) :: axpby_v => z_gpu_multi_axpby_v -!!$ procedure, pass(y) :: axpby_a => z_gpu_multi_axpby_a -!!$ procedure, pass(y) :: mlt_v => z_gpu_multi_mlt_v -!!$ procedure, pass(y) :: mlt_a => z_gpu_multi_mlt_a -!!$ procedure, pass(z) :: mlt_a_2 => z_gpu_multi_mlt_a_2 -!!$ procedure, pass(z) :: mlt_v_2 => z_gpu_multi_mlt_v_2 -!!$ procedure, pass(x) :: scal => z_gpu_multi_scal -!!$ procedure, pass(x) :: nrm2 => z_gpu_multi_nrm2 -!!$ procedure, pass(x) :: amax => z_gpu_multi_amax -!!$ procedure, pass(x) :: asum => z_gpu_multi_asum - procedure, pass(x) :: all => z_gpu_multi_all - procedure, pass(x) :: zero => z_gpu_multi_zero - procedure, pass(x) :: asb => z_gpu_multi_asb - procedure, pass(x) :: sync => z_gpu_multi_sync - procedure, pass(x) :: sync_space => z_gpu_multi_sync_space - procedure, pass(x) :: bld_x => z_gpu_multi_bld_x - procedure, pass(x) :: bld_n => z_gpu_multi_bld_n - procedure, pass(x) :: free => z_gpu_multi_free - procedure, pass(x) :: ins => z_gpu_multi_ins - procedure, pass(x) :: is_host => z_gpu_multi_is_host - procedure, pass(x) :: is_dev => z_gpu_multi_is_dev - procedure, pass(x) :: is_sync => z_gpu_multi_is_sync - procedure, pass(x) :: set_host => z_gpu_multi_set_host - procedure, pass(x) :: set_dev => z_gpu_multi_set_dev - procedure, pass(x) :: set_sync => z_gpu_multi_set_sync - procedure, pass(x) :: set_scal => z_gpu_multi_set_scal - procedure, pass(x) :: set_vect => z_gpu_multi_set_vect -!!$ procedure, pass(x) :: gthzv_x => z_gpu_multi_gthzv_x -!!$ procedure, pass(y) :: sctb => z_gpu_multi_sctb -!!$ procedure, pass(y) :: sctb_x => z_gpu_multi_sctb_x - final :: z_gpu_multi_vect_finalize + procedure, pass(x) :: get_nrows => z_cuda_multi_get_nrows + procedure, pass(x) :: get_ncols => z_cuda_multi_get_ncols + procedure, nopass :: get_fmt => z_cuda_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => z_cuda_multi_dot_v +!!$ procedure, pass(x) :: dot_a => z_cuda_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => z_cuda_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => z_cuda_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => z_cuda_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => z_cuda_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => z_cuda_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => z_cuda_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => z_cuda_multi_scal +!!$ procedure, pass(x) :: nrm2 => z_cuda_multi_nrm2 +!!$ procedure, pass(x) :: amax => z_cuda_multi_amax +!!$ procedure, pass(x) :: asum => z_cuda_multi_asum + procedure, pass(x) :: all => z_cuda_multi_all + procedure, pass(x) :: zero => z_cuda_multi_zero + procedure, pass(x) :: asb => z_cuda_multi_asb + procedure, pass(x) :: sync => z_cuda_multi_sync + procedure, pass(x) :: sync_space => z_cuda_multi_sync_space + procedure, pass(x) :: bld_x => z_cuda_multi_bld_x + procedure, pass(x) :: bld_n => z_cuda_multi_bld_n + procedure, pass(x) :: free => z_cuda_multi_free + procedure, pass(x) :: ins => z_cuda_multi_ins + procedure, pass(x) :: is_host => z_cuda_multi_is_host + procedure, pass(x) :: is_dev => z_cuda_multi_is_dev + procedure, pass(x) :: is_sync => z_cuda_multi_is_sync + procedure, pass(x) :: set_host => z_cuda_multi_set_host + procedure, pass(x) :: set_dev => z_cuda_multi_set_dev + procedure, pass(x) :: set_sync => z_cuda_multi_set_sync + procedure, pass(x) :: set_scal => z_cuda_multi_set_scal + procedure, pass(x) :: set_vect => z_cuda_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => z_cuda_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => z_cuda_multi_sctb +!!$ procedure, pass(y) :: sctb_x => z_cuda_multi_sctb_x + final :: z_cuda_multi_vect_finalize #endif - end type psb_z_multivect_gpu + end type psb_z_multivect_cuda - public :: psb_z_multivect_gpu + public :: psb_z_multivect_cuda private :: constructor - interface psb_z_multivect_gpu + interface psb_z_multivect_cuda module procedure constructor end interface @@ -1289,7 +1289,7 @@ contains function constructor(x) result(this) complex(psb_dpk_) :: x(:,:) - type(psb_z_multivect_gpu) :: this + type(psb_z_multivect_cuda) :: this integer(psb_ipk_) :: info this%v = x @@ -1299,15 +1299,15 @@ contains #ifdef HAVE_SPGPU -!!$ subroutine z_gpu_multi_gthzv_x(i,n,idx,x,y) +!!$ subroutine z_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i,n !!$ class(psb_i_base_multivect_type) :: idx !!$ complex(psb_dpk_) :: y(:) -!!$ class(psb_z_multivect_gpu) :: x +!!$ class(psb_z_multivect_cuda) :: x !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (x%is_host()) call x%sync() !!$ @@ -1332,16 +1332,16 @@ contains !!$ end select !!$ !!$ -!!$ end subroutine z_gpu_multi_gthzv_x +!!$ end subroutine z_cuda_multi_gthzv_x !!$ !!$ !!$ -!!$ subroutine z_gpu_multi_sctb(n,idx,x,beta,y) +!!$ subroutine z_cuda_multi_sctb(n,idx,x,beta,y) !!$ implicit none !!$ !use psb_const_mod !!$ integer(psb_ipk_) :: n, idx(:) !!$ complex(psb_dpk_) :: beta, x(:) -!!$ class(psb_z_multivect_gpu) :: y +!!$ class(psb_z_multivect_cuda) :: y !!$ integer(psb_ipk_) :: info !!$ !!$ if (n == 0) return @@ -1351,17 +1351,17 @@ contains !!$ call y%psb_z_base_multivect_type%sctb(n,idx,x,beta) !!$ call y%set_host() !!$ -!!$ end subroutine z_gpu_multi_sctb +!!$ end subroutine z_cuda_multi_sctb !!$ -!!$ subroutine z_gpu_multi_sctb_x(i,n,idx,x,beta,y) +!!$ subroutine z_cuda_multi_sctb_x(i,n,idx,x,beta,y) !!$ use psi_serial_mod !!$ integer(psb_ipk_) :: i, n !!$ class(psb_i_base_multivect_type) :: idx !!$ complex(psb_dpk_) :: beta, x(:) -!!$ class(psb_z_multivect_gpu) :: y +!!$ class(psb_z_multivect_cuda) :: y !!$ !!$ select type(ii=> idx) -!!$ class is (psb_i_vect_gpu) +!!$ class is (psb_i_vect_cuda) !!$ if (ii%is_host()) call ii%sync() !!$ if (y%is_host()) call y%sync() !!$ @@ -1387,13 +1387,13 @@ contains !!$ call y%sct(n,ii%v(i:),x,beta) !!$ end select !!$ -!!$ end subroutine z_gpu_multi_sctb_x +!!$ end subroutine z_cuda_multi_sctb_x - subroutine z_gpu_multi_bld_x(x,this) + subroutine z_cuda_multi_bld_x(x,this) use psb_base_mod complex(psb_dpk_), intent(in) :: this(:,:) - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info, m, n m=size(this,1) @@ -1403,101 +1403,101 @@ contains call psb_realloc(m,n,x%v,info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'z_gpu_multi_bld_x',& + call psb_errpush(info,'z_cuda_multi_bld_x',& & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) end if x%v(1:m,1:n) = this(1:m,1:n) call x%set_host() call x%sync() - end subroutine z_gpu_multi_bld_x + end subroutine z_cuda_multi_bld_x - subroutine z_gpu_multi_bld_n(x,m,n) + subroutine z_cuda_multi_bld_n(x,m,n) integer(psb_ipk_), intent(in) :: m,n - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info call x%all(m,n,info) if (info /= 0) then - call psb_errpush(info,'z_gpu_multi_bld_n',i_err=(/m,n,n,n,n/)) + call psb_errpush(info,'z_cuda_multi_bld_n',i_err=(/m,n,n,n,n/)) end if - end subroutine z_gpu_multi_bld_n + end subroutine z_cuda_multi_bld_n - subroutine z_gpu_multi_set_host(x) + subroutine z_cuda_multi_set_host(x) implicit none - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x x%state = is_host - end subroutine z_gpu_multi_set_host + end subroutine z_cuda_multi_set_host - subroutine z_gpu_multi_set_dev(x) + subroutine z_cuda_multi_set_dev(x) implicit none - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x x%state = is_dev - end subroutine z_gpu_multi_set_dev + end subroutine z_cuda_multi_set_dev - subroutine z_gpu_multi_set_sync(x) + subroutine z_cuda_multi_set_sync(x) implicit none - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x x%state = is_sync - end subroutine z_gpu_multi_set_sync + end subroutine z_cuda_multi_set_sync - function z_gpu_multi_is_dev(x) result(res) + function z_cuda_multi_is_dev(x) result(res) implicit none - class(psb_z_multivect_gpu), intent(in) :: x + class(psb_z_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_dev) - end function z_gpu_multi_is_dev + end function z_cuda_multi_is_dev - function z_gpu_multi_is_host(x) result(res) + function z_cuda_multi_is_host(x) result(res) implicit none - class(psb_z_multivect_gpu), intent(in) :: x + class(psb_z_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_host) - end function z_gpu_multi_is_host + end function z_cuda_multi_is_host - function z_gpu_multi_is_sync(x) result(res) + function z_cuda_multi_is_sync(x) result(res) implicit none - class(psb_z_multivect_gpu), intent(in) :: x + class(psb_z_multivect_cuda), intent(in) :: x logical :: res res = (x%state == is_sync) - end function z_gpu_multi_is_sync + end function z_cuda_multi_is_sync - function z_gpu_multi_get_nrows(x) result(res) + function z_cuda_multi_get_nrows(x) result(res) implicit none - class(psb_z_multivect_gpu), intent(in) :: x + class(psb_z_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_nrows - end function z_gpu_multi_get_nrows + end function z_cuda_multi_get_nrows - function z_gpu_multi_get_ncols(x) result(res) + function z_cuda_multi_get_ncols(x) result(res) implicit none - class(psb_z_multivect_gpu), intent(in) :: x + class(psb_z_multivect_cuda), intent(in) :: x integer(psb_ipk_) :: res res = x%m_ncols - end function z_gpu_multi_get_ncols + end function z_cuda_multi_get_ncols - function z_gpu_multi_get_fmt() result(res) + function z_cuda_multi_get_fmt() result(res) implicit none character(len=5) :: res res = 'zGPU' - end function z_gpu_multi_get_fmt + end function z_cuda_multi_get_fmt -!!$ function z_gpu_multi_dot_v(n,x,y) result(res) +!!$ function z_cuda_multi_dot_v(n,x,y) result(res) !!$ implicit none -!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ class(psb_z_multivect_cuda), intent(inout) :: x !!$ class(psb_z_base_multivect_type), intent(inout) :: y !!$ integer(psb_ipk_), intent(in) :: n !!$ complex(psb_dpk_) :: res @@ -1514,13 +1514,13 @@ contains !!$ type is (psb_z_base_multivect_type) !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,x%v,1,yy%v,1) -!!$ type is (psb_z_multivect_gpu) +!!$ type is (psb_z_multivect_cuda) !!$ if (x%is_host()) call x%sync() !!$ if (yy%is_host()) call yy%sync() !!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) !!$ if (info /= 0) then !!$ info = psb_err_internal_error_ -!!$ call psb_errpush(info,'z_gpu_multi_dot_v') +!!$ call psb_errpush(info,'z_cuda_multi_dot_v') !!$ end if !!$ !!$ class default @@ -1529,11 +1529,11 @@ contains !!$ res = y%dot(n,x%v) !!$ end select !!$ -!!$ end function z_gpu_multi_dot_v +!!$ end function z_cuda_multi_dot_v !!$ -!!$ function z_gpu_multi_dot_a(n,x,y) result(res) +!!$ function z_cuda_multi_dot_a(n,x,y) result(res) !!$ implicit none -!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ class(psb_z_multivect_cuda), intent(inout) :: x !!$ complex(psb_dpk_), intent(in) :: y(:) !!$ integer(psb_ipk_), intent(in) :: n !!$ complex(psb_dpk_) :: res @@ -1542,14 +1542,14 @@ contains !!$ if (x%is_dev()) call x%sync() !!$ res = ddot(n,y,1,x%v,1) !!$ -!!$ end function z_gpu_multi_dot_a +!!$ end function z_cuda_multi_dot_a !!$ -!!$ subroutine z_gpu_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ subroutine z_cuda_multi_axpby_v(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ class(psb_z_base_multivect_type), intent(inout) :: x -!!$ class(psb_z_multivect_gpu), intent(inout) :: y +!!$ class(psb_z_multivect_cuda), intent(inout) :: y !!$ complex(psb_dpk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: nx, ny @@ -1562,7 +1562,7 @@ contains !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() -!!$ type is (psb_z_multivect_gpu) +!!$ type is (psb_z_multivect_cuda) !!$ ! Do something different here !!$ if ((beta /= dzero).and.y%is_host())& !!$ & call y%sync() @@ -1581,27 +1581,27 @@ contains !!$ call y%axpby(m,alpha,x%v,beta,info) !!$ end select !!$ -!!$ end subroutine z_gpu_multi_axpby_v +!!$ end subroutine z_cuda_multi_axpby_v !!$ -!!$ subroutine z_gpu_multi_axpby_a(m,alpha, x, beta, y, info) +!!$ subroutine z_cuda_multi_axpby_a(m,alpha, x, beta, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ integer(psb_ipk_), intent(in) :: m !!$ complex(psb_dpk_), intent(in) :: x(:) -!!$ class(psb_z_multivect_gpu), intent(inout) :: y +!!$ class(psb_z_multivect_cuda), intent(inout) :: y !!$ complex(psb_dpk_), intent (in) :: alpha, beta !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ if (y%is_dev()) call y%sync() !!$ call psb_geaxpby(m,alpha,x,beta,y%v,info) !!$ call y%set_host() -!!$ end subroutine z_gpu_multi_axpby_a +!!$ end subroutine z_cuda_multi_axpby_a !!$ -!!$ subroutine z_gpu_multi_mlt_v(x, y, info) +!!$ subroutine z_cuda_multi_mlt_v(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ class(psb_z_base_multivect_type), intent(inout) :: x -!!$ class(psb_z_multivect_gpu), intent(inout) :: y +!!$ class(psb_z_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ !!$ integer(psb_ipk_) :: i, n @@ -1615,7 +1615,7 @@ contains !!$ y%v(i) = y%v(i) * xx%v(i) !!$ end do !!$ call y%set_host() -!!$ type is (psb_z_multivect_gpu) +!!$ type is (psb_z_multivect_cuda) !!$ ! Do something different here !!$ if (y%is_host()) call y%sync() !!$ if (xx%is_host()) call xx%sync() @@ -1627,13 +1627,13 @@ contains !!$ call y%set_host() !!$ end select !!$ -!!$ end subroutine z_gpu_multi_mlt_v +!!$ end subroutine z_cuda_multi_mlt_v !!$ -!!$ subroutine z_gpu_multi_mlt_a(x, y, info) +!!$ subroutine z_cuda_multi_mlt_a(x, y, info) !!$ use psi_serial_mod !!$ implicit none !!$ complex(psb_dpk_), intent(in) :: x(:) -!!$ class(psb_z_multivect_gpu), intent(inout) :: y +!!$ class(psb_z_multivect_cuda), intent(inout) :: y !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1641,15 +1641,15 @@ contains !!$ call y%sync() !!$ call y%psb_z_base_multivect_type%mlt(x,info) !!$ call y%set_host() -!!$ end subroutine z_gpu_multi_mlt_a +!!$ end subroutine z_cuda_multi_mlt_a !!$ -!!$ subroutine z_gpu_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ subroutine z_cuda_multi_mlt_a_2(alpha,x,y,beta,z,info) !!$ use psi_serial_mod !!$ implicit none !!$ complex(psb_dpk_), intent(in) :: alpha,beta !!$ complex(psb_dpk_), intent(in) :: x(:) !!$ complex(psb_dpk_), intent(in) :: y(:) -!!$ class(psb_z_multivect_gpu), intent(inout) :: z +!!$ class(psb_z_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ integer(psb_ipk_) :: i, n !!$ @@ -1657,16 +1657,16 @@ contains !!$ if (z%is_dev()) call z%sync() !!$ call z%psb_z_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() -!!$ end subroutine z_gpu_multi_mlt_a_2 +!!$ end subroutine z_cuda_multi_mlt_a_2 !!$ -!!$ subroutine z_gpu_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ subroutine z_cuda_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) !!$ use psi_serial_mod !!$ use psb_string_mod !!$ implicit none !!$ complex(psb_dpk_), intent(in) :: alpha,beta !!$ class(psb_z_base_multivect_type), intent(inout) :: x !!$ class(psb_z_base_multivect_type), intent(inout) :: y -!!$ class(psb_z_multivect_gpu), intent(inout) :: z +!!$ class(psb_z_multivect_cuda), intent(inout) :: z !!$ integer(psb_ipk_), intent(out) :: info !!$ character(len=1), intent(in), optional :: conjgx, conjgy !!$ integer(psb_ipk_) :: i, n @@ -1689,9 +1689,9 @@ contains !!$ ! !!$ info = 0 !!$ select type(xx => x) -!!$ type is (psb_z_multivect_gpu) +!!$ type is (psb_z_multivect_cuda) !!$ select type (yy => y) -!!$ type is (psb_z_multivect_gpu) +!!$ type is (psb_z_multivect_cuda) !!$ if (xx%is_host()) call xx%sync() !!$ if (yy%is_host()) call yy%sync() !!$ ! Z state is irrelevant: it will be done on the GPU. @@ -1711,11 +1711,11 @@ contains !!$ call z%psb_z_base_multivect_type%mlt(alpha,x,y,beta,info) !!$ call z%set_host() !!$ end select -!!$ end subroutine z_gpu_multi_mlt_v_2 +!!$ end subroutine z_cuda_multi_mlt_v_2 - subroutine z_gpu_multi_set_scal(x,val) - class(psb_z_multivect_gpu), intent(inout) :: x + subroutine z_cuda_multi_set_scal(x,val) + class(psb_z_multivect_cuda), intent(inout) :: x complex(psb_dpk_), intent(in) :: val integer(psb_ipk_) :: info @@ -1723,10 +1723,10 @@ contains if (x%is_dev()) call x%sync() call x%psb_z_base_multivect_type%set_scal(val) call x%set_host() - end subroutine z_gpu_multi_set_scal + end subroutine z_cuda_multi_set_scal - subroutine z_gpu_multi_set_vect(x,val) - class(psb_z_multivect_gpu), intent(inout) :: x + subroutine z_cuda_multi_set_vect(x,val) + class(psb_z_multivect_cuda), intent(inout) :: x complex(psb_dpk_), intent(in) :: val(:,:) integer(psb_ipk_) :: nr integer(psb_ipk_) :: info @@ -1735,24 +1735,24 @@ contains call x%psb_z_base_multivect_type%set_vect(val) call x%set_host() - end subroutine z_gpu_multi_set_vect + end subroutine z_cuda_multi_set_vect -!!$ subroutine z_gpu_multi_scal(alpha, x) +!!$ subroutine z_cuda_multi_scal(alpha, x) !!$ implicit none -!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ class(psb_z_multivect_cuda), intent(inout) :: x !!$ complex(psb_dpk_), intent (in) :: alpha !!$ !!$ if (x%is_dev()) call x%sync() !!$ call x%psb_z_base_multivect_type%scal(alpha) !!$ call x%set_host() -!!$ end subroutine z_gpu_multi_scal +!!$ end subroutine z_cuda_multi_scal !!$ !!$ -!!$ function z_gpu_multi_nrm2(n,x) result(res) +!!$ function z_cuda_multi_nrm2(n,x) result(res) !!$ implicit none -!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ class(psb_z_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_dpk_) :: res !!$ integer(psb_ipk_) :: info @@ -1760,36 +1760,36 @@ contains !!$ if (x%is_host()) call x%sync() !!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) !!$ -!!$ end function z_gpu_multi_nrm2 +!!$ end function z_cuda_multi_nrm2 !!$ -!!$ function z_gpu_multi_amax(n,x) result(res) +!!$ function z_cuda_multi_amax(n,x) result(res) !!$ implicit none -!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ class(psb_z_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_dpk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = maxval(abs(x%v(1:n))) !!$ -!!$ end function z_gpu_multi_amax +!!$ end function z_cuda_multi_amax !!$ -!!$ function z_gpu_multi_asum(n,x) result(res) +!!$ function z_cuda_multi_asum(n,x) result(res) !!$ implicit none -!!$ class(psb_z_multivect_gpu), intent(inout) :: x +!!$ class(psb_z_multivect_cuda), intent(inout) :: x !!$ integer(psb_ipk_), intent(in) :: n !!$ real(psb_dpk_) :: res !!$ !!$ if (x%is_dev()) call x%sync() !!$ res = sum(abs(x%v(1:n))) !!$ -!!$ end function z_gpu_multi_asum +!!$ end function z_cuda_multi_asum - subroutine z_gpu_multi_all(m,n, x, info) + subroutine z_cuda_multi_all(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_z_multivect_gpu), intent(out) :: x + class(psb_z_multivect_cuda), intent(out) :: x integer(psb_ipk_), intent(out) :: info call psb_realloc(m,n,x%v,info,pad=zzero) @@ -1799,26 +1799,26 @@ contains if (info == 0) call x%sync_space(info) if (info /= 0) then info=psb_err_alloc_request_ - call psb_errpush(info,'z_gpu_multi_all',& + call psb_errpush(info,'z_cuda_multi_all',& & i_err=(/m,n,n,n,n/)) end if - end subroutine z_gpu_multi_all + end subroutine z_cuda_multi_all - subroutine z_gpu_multi_zero(x) + subroutine z_cuda_multi_zero(x) use psi_serial_mod implicit none - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x if (allocated(x%v)) x%v=dzero call x%set_host() - end subroutine z_gpu_multi_zero + end subroutine z_cuda_multi_zero - subroutine z_gpu_multi_asb(m,n, x, info) + subroutine z_cuda_multi_asb(m,n, x, info) use psi_serial_mod use psb_realloc_mod implicit none integer(psb_ipk_), intent(in) :: m,n - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nd, nc @@ -1838,12 +1838,12 @@ contains call x%set_host() end if end if - end subroutine z_gpu_multi_asb + end subroutine z_cuda_multi_asb - subroutine z_gpu_multi_sync_space(x,info) + subroutine z_cuda_multi_sync_space(x,info) use psb_realloc_mod implicit none - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: mh,nh,md,nd @@ -1896,11 +1896,11 @@ contains end if - end subroutine z_gpu_multi_sync_space + end subroutine z_cuda_multi_sync_space - subroutine z_gpu_multi_sync(x) + subroutine z_cuda_multi_sync(x) implicit none - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: n,info info = 0 @@ -1916,16 +1916,16 @@ contains if (info == 0) call x%set_sync() if (info /= 0) then info=psb_err_internal_error_ - call psb_errpush(info,'z_gpu_multi_sync') + call psb_errpush(info,'z_cuda_multi_sync') end if - end subroutine z_gpu_multi_sync + end subroutine z_cuda_multi_sync - subroutine z_gpu_multi_free(x, info) + subroutine z_cuda_multi_free(x, info) use psi_serial_mod use psb_realloc_mod implicit none - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 @@ -1940,13 +1940,13 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine z_gpu_multi_free + end subroutine z_cuda_multi_free - subroutine z_gpu_multi_vect_finalize(x) + subroutine z_cuda_multi_vect_finalize(x) use psi_serial_mod use psb_realloc_mod implicit none - type(psb_z_multivect_gpu), intent(inout) :: x + type(psb_z_multivect_cuda), intent(inout) :: x integer(psb_ipk_) :: info info = 0 @@ -1961,12 +1961,12 @@ contains if (allocated(x%v)) deallocate(x%v, stat=info) call x%set_sync() - end subroutine z_gpu_multi_vect_finalize + end subroutine z_cuda_multi_vect_finalize - subroutine z_gpu_multi_ins(n,irl,val,dupl,x,info) + subroutine z_cuda_multi_ins(n,irl,val,dupl,x,info) use psi_serial_mod implicit none - class(psb_z_multivect_gpu), intent(inout) :: x + class(psb_z_multivect_cuda), intent(inout) :: x integer(psb_ipk_), intent(in) :: n, dupl integer(psb_ipk_), intent(in) :: irl(:) complex(psb_dpk_), intent(in) :: val(:,:) @@ -1979,11 +1979,11 @@ contains call x%psb_z_base_multivect_type%ins(n,irl,val,dupl,info) call x%set_host() - end subroutine z_gpu_multi_ins + end subroutine z_cuda_multi_ins #endif -end module psb_z_gpu_multivect_mod +end module psb_z_cuda_multivect_mod diff --git a/test/gpukern/Makefile b/test/cudakern/Makefile similarity index 100% rename from test/gpukern/Makefile rename to test/cudakern/Makefile diff --git a/test/gpukern/c_file_spmv.F90 b/test/cudakern/c_file_spmv.F90 similarity index 100% rename from test/gpukern/c_file_spmv.F90 rename to test/cudakern/c_file_spmv.F90 diff --git a/test/gpukern/d_file_spmv.F90 b/test/cudakern/d_file_spmv.F90 similarity index 100% rename from test/gpukern/d_file_spmv.F90 rename to test/cudakern/d_file_spmv.F90 diff --git a/test/gpukern/data_input.f90 b/test/cudakern/data_input.f90 similarity index 100% rename from test/gpukern/data_input.f90 rename to test/cudakern/data_input.f90 diff --git a/test/gpukern/dpdegenmv.F90 b/test/cudakern/dpdegenmv.F90 similarity index 99% rename from test/gpukern/dpdegenmv.F90 rename to test/cudakern/dpdegenmv.F90 index ab616471..b8a2ba2c 100644 --- a/test/gpukern/dpdegenmv.F90 +++ b/test/cudakern/dpdegenmv.F90 @@ -548,7 +548,7 @@ program pdgenmv use psb_util_mod use psb_ext_mod #ifdef HAVE_GPU - use psb_gpu_mod + use psb_cuda_mod #endif #ifdef HAVE_RSB use psb_rsb_mod @@ -619,7 +619,7 @@ program pdgenmv call psb_info(ctxt,iam,np) #ifdef HAVE_GPU - call psb_gpu_init(ctxt) + call psb_cuda_init(ctxt) #endif #ifdef HAVE_RSB call psb_rsb_init() @@ -641,7 +641,7 @@ program pdgenmv end if #ifdef HAVE_GPU write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() - write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_cuda_DeviceName()) #endif ! ! get parameters @@ -761,7 +761,7 @@ program pdgenmv call psb_barrier(ctxt) t1 = psb_wtime() call agpu%cscnv(info,mold=agmold) - call psb_gpu_DeviceSync() + call psb_cuda_DeviceSync() t2 = psb_Wtime() -t1 call psb_amx(ctxt,t2) if (j==1) tcnvg1 = t2 @@ -798,7 +798,7 @@ program pdgenmv end if end do - call psb_gpu_DeviceSync() + call psb_cuda_DeviceSync() call psb_barrier(ctxt) tt2 = psb_wtime() - tt1 call psb_amx(ctxt,tt2) @@ -825,7 +825,7 @@ program pdgenmv end if end do - call psb_gpu_DeviceSync() + call psb_cuda_DeviceSync() call psb_barrier(ctxt) gt2 = psb_wtime() - gt1 call psb_amx(ctxt,gt2) @@ -928,7 +928,7 @@ program pdgenmv #ifdef HAVE_GPU bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (GPU) : ",F20.3)') bdwdth - bdwdth = psb_gpu_MemoryPeakBandwidth() + bdwdth = psb_cuda_MemoryPeakBandwidth() write(psb_out_unit,'("MBYTES/S peak bandwidth (GPU) : ",F20.3)') bdwdth #endif write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() @@ -950,7 +950,7 @@ program pdgenmv goto 9999 end if #ifdef HAVE_GPU - call psb_gpu_exit() + call psb_cuda_exit() #endif call psb_exit(ctxt) stop diff --git a/test/gpukern/s_file_spmv.F90 b/test/cudakern/s_file_spmv.F90 similarity index 100% rename from test/gpukern/s_file_spmv.F90 rename to test/cudakern/s_file_spmv.F90 diff --git a/test/gpukern/spdegenmv.F90 b/test/cudakern/spdegenmv.F90 similarity index 100% rename from test/gpukern/spdegenmv.F90 rename to test/cudakern/spdegenmv.F90 diff --git a/test/gpukern/z_file_spmv.F90 b/test/cudakern/z_file_spmv.F90 similarity index 100% rename from test/gpukern/z_file_spmv.F90 rename to test/cudakern/z_file_spmv.F90 From 9b713c177bf90424c634c7c29c5a773835629459 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 28 Nov 2023 19:46:20 +0100 Subject: [PATCH 013/116] Fix cuda interfaces for renaming --- cuda/cuda_util.c | 46 ++++++++++++++++++------------------- cuda/cuda_util.h | 18 +++++++-------- cuda/cvectordev.c | 28 +++++++++++----------- cuda/diagdev.c | 4 ++-- cuda/dnsdev.c | 8 +++---- cuda/dvectordev.c | 28 +++++++++++----------- cuda/elldev.c | 24 +++++++++---------- cuda/hdiagdev.c | 4 ++-- cuda/hlldev.c | 16 ++++++------- cuda/ivectordev.c | 8 +++---- cuda/svectordev.c | 28 +++++++++++----------- cuda/zvectordev.c | 28 +++++++++++----------- test/cudakern/dpdegenmv.F90 | 16 ++++++------- 13 files changed, 128 insertions(+), 128 deletions(-) diff --git a/cuda/cuda_util.c b/cuda/cuda_util.c index 63c38b53..0fe4a8b7 100644 --- a/cuda/cuda_util.c +++ b/cuda/cuda_util.c @@ -37,7 +37,7 @@ static int hasUVA=-1; static struct cudaDeviceProp *prop=NULL; -static spgpuHandle_t psb_gpu_handle = NULL; +static spgpuHandle_t psb_cuda_handle = NULL; static cublasHandle_t psb_cublas_handle = NULL; @@ -228,7 +228,7 @@ int gpuInit(int dev) return SPGPU_UNSPECIFIED; } if (!psb_cublas_handle) - psb_gpuCreateCublasHandle(); + psb_cudaCreateCublasHandle(); hasUVA=getDeviceHasUVA(); return err; @@ -238,14 +238,14 @@ int gpuInit(int dev) void gpuClose() { cudaStream_t st1, st2; - if (! psb_gpu_handle) - st1=spgpuGetStream(psb_gpu_handle); + if (! psb_cuda_handle) + st1=spgpuGetStream(psb_cuda_handle); if (! psb_cublas_handle) cublasGetStream(psb_cublas_handle,&st2); - psb_gpuDestroyHandle(); + psb_cudaDestroyHandle(); if (st1 != st2) - psb_gpuDestroyCublasHandle(); + psb_cudaDestroyCublasHandle(); free(prop); prop=NULL; hasUVA=-1; @@ -391,49 +391,49 @@ void cudaReset() } -spgpuHandle_t psb_gpuGetHandle() +spgpuHandle_t psb_cudaGetHandle() { - return psb_gpu_handle; + return psb_cuda_handle; } -void psb_gpuCreateHandle() +void psb_cudaCreateHandle() { - if (!psb_gpu_handle) - spgpuCreate(&psb_gpu_handle, getDevice()); + if (!psb_cuda_handle) + spgpuCreate(&psb_cuda_handle, getDevice()); } -void psb_gpuDestroyHandle() +void psb_cudaDestroyHandle() { - if (!psb_gpu_handle) - spgpuDestroy(psb_gpu_handle); - psb_gpu_handle = NULL; + if (!psb_cuda_handle) + spgpuDestroy(psb_cuda_handle); + psb_cuda_handle = NULL; } -cudaStream_t psb_gpuGetStream() +cudaStream_t psb_cudaGetStream() { - return spgpuGetStream(psb_gpu_handle); + return spgpuGetStream(psb_cuda_handle); } -void psb_gpuSetStream(cudaStream_t stream) +void psb_cudaSetStream(cudaStream_t stream) { - spgpuSetStream(psb_gpu_handle, stream); + spgpuSetStream(psb_cuda_handle, stream); return ; } -cublasHandle_t psb_gpuGetCublasHandle() +cublasHandle_t psb_cudaGetCublasHandle() { if (!psb_cublas_handle) - psb_gpuCreateCublasHandle(); + psb_cudaCreateCublasHandle(); return psb_cublas_handle; } -void psb_gpuCreateCublasHandle() +void psb_cudaCreateCublasHandle() { if (!psb_cublas_handle) cublasCreate(&psb_cublas_handle); } -void psb_gpuDestroyCublasHandle() +void psb_cudaDestroyCublasHandle() { if (!psb_cublas_handle) cublasDestroy(psb_cublas_handle); diff --git a/cuda/cuda_util.h b/cuda/cuda_util.h index 03c7b488..789c08f4 100644 --- a/cuda/cuda_util.h +++ b/cuda/cuda_util.h @@ -71,15 +71,15 @@ void cudaReset(); void gpuClose(); -spgpuHandle_t psb_gpuGetHandle(); -void psb_gpuCreateHandle(); -void psb_gpuDestroyHandle(); -cudaStream_t psb_gpuGetStream(); -void psb_gpuSetStream(cudaStream_t stream); - -cublasHandle_t psb_gpuGetCublasHandle(); -void psb_gpuCreateCublasHandle(); -void psb_gpuDestroyCublasHandle(); +spgpuHandle_t psb_cudaGetHandle(); +void psb_cudaCreateHandle(); +void psb_cudaDestroyHandle(); +cudaStream_t psb_cudaGetStream(); +void psb_cudaSetStream(cudaStream_t stream); + +cublasHandle_t psb_cudaGetCublasHandle(); +void psb_cudaCreateCublasHandle(); +void psb_cudaDestroyCublasHandle(); int allocateInt(void **, int); diff --git a/cuda/cvectordev.c b/cuda/cvectordev.c index db55caef..1dc16667 100644 --- a/cuda/cvectordev.c +++ b/cuda/cvectordev.c @@ -89,7 +89,7 @@ int setscalMultiVecDeviceFloatComplex(cuFloatComplex val, int first, int last, { int i=0; int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuCsetscal(handle, first, last, indexBase, val, (cuFloatComplex *) devVecX->v_); @@ -104,7 +104,7 @@ int geinsMultiVecDeviceFloatComplex(int n, void* devMultiVecIrl, void* devMultiV struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecIrl->pitch_; if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) return SPGPU_UNSUPPORTED; @@ -144,7 +144,7 @@ int igathMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, int i, *idx =(int *) indexes;; cuFloatComplex *hv = (cuFloatComplex *) host_values;; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); i=0; hv = &(hv[hfirst-indexBase]); @@ -175,7 +175,7 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, cuFloatComplex *hv = (cuFloatComplex *) host_values; int *idx=(int *) indexes; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); idx = &(idx[first-indexBase]); hv = &(hv[hfirst-indexBase]); @@ -187,7 +187,7 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuCmnrm2(handle, y_res, n,(cuFloatComplex *)devVecA->v_, @@ -197,7 +197,7 @@ int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuCmamax(handle, y_res, n,(cuFloatComplex *)devVecA->v_, @@ -207,7 +207,7 @@ int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuCmasum(handle, y_res, n,(cuFloatComplex *)devVecA->v_, @@ -218,7 +218,7 @@ int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; // Note: inner kernel can handle aliased input/output spgpuCscal(handle, (cuFloatComplex *)devVecA->v_, devVecA->pitch_, @@ -231,7 +231,7 @@ int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, {int i=0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuCmdot(handle, y_res, n, (cuFloatComplex*)devVecA->v_, (cuFloatComplex*)devVecB->v_,devVecA->count_,devVecB->pitch_); @@ -244,7 +244,7 @@ int axpbyMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha, void* devMultiVe int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecY->pitch_; if ((n > devVecY->size_) || (n>devVecX->size_ )) return SPGPU_UNSUPPORTED; @@ -261,7 +261,7 @@ int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ )) return SPGPU_UNSUPPORTED; @@ -279,7 +279,7 @@ int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVec struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) return SPGPU_UNSUPPORTED; @@ -297,7 +297,7 @@ int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha, void *deviceVecA struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ )) return SPGPU_UNSUPPORTED; @@ -311,7 +311,7 @@ int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha, void *deviceVecA int absMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA) { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if (n > devVecA->size_) return SPGPU_UNSUPPORTED; diff --git a/cuda/diagdev.c b/cuda/diagdev.c index 64879455..a2acf1f4 100644 --- a/cuda/diagdev.c +++ b/cuda/diagdev.c @@ -186,7 +186,7 @@ int spmvDiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); #ifdef HAVE_SPGPU #ifdef VERBOSE @@ -268,7 +268,7 @@ int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); #ifdef HAVE_SPGPU #ifdef VERBOSE diff --git a/cuda/dnsdev.c b/cuda/dnsdev.c index fb4d339c..25cddc87 100644 --- a/cuda/dnsdev.c +++ b/cuda/dnsdev.c @@ -126,7 +126,7 @@ int spmvDnsDeviceFloat(char transa, int m, int n, int k, float *alpha, int status; #ifdef HAVE_SPGPU - cublasHandle_t handle=psb_gpuGetCublasHandle(); + cublasHandle_t handle=psb_cudaGetCublasHandle(); cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ if (n == 1) { @@ -157,7 +157,7 @@ int spmvDnsDeviceDouble(char transa, int m, int n, int k, double *alpha, int status; #ifdef HAVE_SPGPU - cublasHandle_t handle=psb_gpuGetCublasHandle(); + cublasHandle_t handle=psb_cudaGetCublasHandle(); cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ if (n == 1) { @@ -188,7 +188,7 @@ int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *a int status; #ifdef HAVE_SPGPU - cublasHandle_t handle=psb_gpuGetCublasHandle(); + cublasHandle_t handle=psb_cudaGetCublasHandle(); cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ if (n == 1) { @@ -219,7 +219,7 @@ int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex int status; #ifdef HAVE_SPGPU - cublasHandle_t handle=psb_gpuGetCublasHandle(); + cublasHandle_t handle=psb_cudaGetCublasHandle(); cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ if (n == 1) { diff --git a/cuda/dvectordev.c b/cuda/dvectordev.c index 8b020c16..eae82c1e 100644 --- a/cuda/dvectordev.c +++ b/cuda/dvectordev.c @@ -88,7 +88,7 @@ int setscalMultiVecDeviceDouble(double val, int first, int last, { int i=0; int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuDsetscal(handle, first, last, indexBase, val, (double *) devVecX->v_); @@ -104,7 +104,7 @@ int geinsMultiVecDeviceDouble(int n, void* devMultiVecIrl, void* devMultiVecVal, struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecIrl->pitch_; if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) return SPGPU_UNSUPPORTED; @@ -143,7 +143,7 @@ int igathMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int i, *idx =(int *) indexes;; double *hv = (double *) host_values;; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); i=0; hv = &(hv[hfirst-indexBase]); @@ -168,7 +168,7 @@ int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, v double *hv = (double *) host_values; int *idx=(int *) indexes; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); idx = &(idx[first-indexBase]); hv = &(hv[hfirst-indexBase]); @@ -179,7 +179,7 @@ int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, v int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; // Note: inner kernel can handle aliased input/output spgpuDscal(handle, (double *)devVecA->v_, devVecA->pitch_, @@ -189,7 +189,7 @@ int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA) int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuDmnrm2(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_); @@ -198,7 +198,7 @@ int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) int amaxMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuDmamax(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_); @@ -207,7 +207,7 @@ int amaxMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) int asumMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuDmasum(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_); @@ -219,7 +219,7 @@ int dotMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA, void* devM {int i=0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_,devVecA->count_,devVecB->pitch_); return(i); @@ -231,7 +231,7 @@ int axpbyMultiVecDeviceDouble(int n,double alpha, void* devMultiVecX, int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecY->pitch_; if ((n > devVecY->size_) || (n>devVecX->size_ )) return SPGPU_UNSUPPORTED; @@ -246,7 +246,7 @@ int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceV { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ )) return SPGPU_UNSUPPORTED; @@ -262,7 +262,7 @@ int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) return SPGPU_UNSUPPORTED; @@ -278,7 +278,7 @@ int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA, struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ )) return SPGPU_UNSUPPORTED; @@ -291,7 +291,7 @@ int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA, int absMultiVecDeviceDouble(int n, double alpha, void *deviceVecA) { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if (n > devVecA->size_) return SPGPU_UNSUPPORTED; diff --git a/cuda/elldev.c b/cuda/elldev.c index 8fd7aeb5..eff89efa 100644 --- a/cuda/elldev.c +++ b/cuda/elldev.c @@ -158,7 +158,7 @@ void sspmdmm_gpu(float *z,int s, int vPitch, float *y, float alpha, float* cM, i int avgRowSize, int maxRowSize, int rows, int pitch, float *x, float beta, int firstIndex) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); for (i=0; irows+1)*sizeof(int)); //cudaSync(); - handle = psb_gpuGetHandle(); + handle = psb_cudaGetHandle(); psi_cuda_s_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, (int *) devMat->rS, (int *) devMat->hackOffs, devIdisp,devJa,devVal, @@ -502,7 +502,7 @@ int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int is //fprintf(stderr,"WriteRemoteBuffer idisp %d\n",i); //cudaSync(); //fprintf(stderr," hacksz: %d \n",hacksz); - handle = psb_gpuGetHandle(); + handle = psb_cudaGetHandle(); psi_cuda_d_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, (int *) devMat->rS, (int *) devMat->hackOffs, devIdisp,devJa,devVal, @@ -545,7 +545,7 @@ int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz, int noffs, if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); //cudaSync(); - handle = psb_gpuGetHandle(); + handle = psb_cudaGetHandle(); psi_cuda_c_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, (int *) devMat->rS, (int *) devMat->hackOffs, devIdisp,devJa,devVal, @@ -588,7 +588,7 @@ int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz, int noffs, if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); //cudaSync(); - handle = psb_gpuGetHandle(); + handle = psb_cudaGetHandle(); psi_cuda_z_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, (int *) devMat->rS, (int *) devMat->hackOffs, devIdisp,devJa,devVal, diff --git a/cuda/ivectordev.c b/cuda/ivectordev.c index 93636465..71d5c472 100644 --- a/cuda/ivectordev.c +++ b/cuda/ivectordev.c @@ -90,7 +90,7 @@ int setscalMultiVecDeviceInt(int val, int first, int last, { int i=0; int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuIsetscal(handle, first, last, indexBase, val, (int *) devVecX->v_); @@ -105,7 +105,7 @@ int geinsMultiVecDeviceInt(int n, void* devMultiVecIrl, void* devMultiVecVal, struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecIrl->pitch_; if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) return SPGPU_UNSUPPORTED; @@ -144,7 +144,7 @@ int igathMultiVecDeviceInt(void* deviceVec, int vectorId, int n, int i, *idx =(int *) indexes;; int *hv = (int *) host_values;; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); i=0; hv = &(hv[hfirst-indexBase]); @@ -169,7 +169,7 @@ int iscatMultiVecDeviceInt(void* deviceVec, int vectorId, int n, int first, void int *hv = (int *) host_values; int *idx=(int *) indexes; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); idx = &(idx[first-indexBase]); hv = &(hv[hfirst-indexBase]); diff --git a/cuda/svectordev.c b/cuda/svectordev.c index d193a4d8..9a41ae1a 100644 --- a/cuda/svectordev.c +++ b/cuda/svectordev.c @@ -88,7 +88,7 @@ int setscalMultiVecDeviceFloat(float val, int first, int last, { int i=0; int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuSsetscal(handle, first, last, indexBase, val, (float *) devVecX->v_); @@ -103,7 +103,7 @@ int geinsMultiVecDeviceFloat(int n, void* devMultiVecIrl, void* devMultiVecVal, struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecIrl->pitch_; if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) return SPGPU_UNSUPPORTED; @@ -142,7 +142,7 @@ int igathMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, int i, *idx =(int *) indexes;; float *hv = (float *) host_values;; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); i=0; hv = &(hv[hfirst-indexBase]); @@ -167,7 +167,7 @@ int iscatMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, int first, vo float *hv = (float *) host_values; int *idx=(int *) indexes; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); idx = &(idx[first-indexBase]); hv = &(hv[hfirst-indexBase]); @@ -179,7 +179,7 @@ int iscatMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, int first, vo int nrm2MultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuSmnrm2(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_); @@ -188,7 +188,7 @@ int nrm2MultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) int amaxMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuSmamax(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_); @@ -197,7 +197,7 @@ int amaxMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) int asumMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuSmasum(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_); @@ -207,7 +207,7 @@ int asumMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) int scalMultiVecDeviceFloat(float alpha, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; // Note: inner kernel can handle aliased input/output spgpuSscal(handle, (float *)devVecA->v_, devVecA->pitch_, @@ -219,7 +219,7 @@ int dotMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA, void* devMul {int i=0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuSmdot(handle, y_res, n, (float*)devVecA->v_, (float*)devVecB->v_,devVecA->count_,devVecB->pitch_); return(i); @@ -231,7 +231,7 @@ int axpbyMultiVecDeviceFloat(int n,float alpha, void* devMultiVecX, int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecY->pitch_; if ((n > devVecY->size_) || (n>devVecX->size_ )) return SPGPU_UNSUPPORTED; @@ -246,7 +246,7 @@ int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVec { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ )) return SPGPU_UNSUPPORTED; @@ -262,7 +262,7 @@ int axybzMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) return SPGPU_UNSUPPORTED; @@ -278,7 +278,7 @@ int absMultiVecDeviceFloat2(int n, float alpha, void *deviceVecA, struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ )) return SPGPU_UNSUPPORTED; @@ -291,7 +291,7 @@ int absMultiVecDeviceFloat2(int n, float alpha, void *deviceVecA, int absMultiVecDeviceFloat(int n, float alpha, void *deviceVecA) { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if (n > devVecA->size_) return SPGPU_UNSUPPORTED; diff --git a/cuda/zvectordev.c b/cuda/zvectordev.c index c245719f..c3671a86 100644 --- a/cuda/zvectordev.c +++ b/cuda/zvectordev.c @@ -89,7 +89,7 @@ int setscalMultiVecDeviceDoubleComplex(cuDoubleComplex val, int first, int last, { int i=0; int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuZsetscal(handle, first, last, indexBase, val, (cuDoubleComplex *) devVecX->v_); @@ -104,7 +104,7 @@ int geinsMultiVecDeviceDoubleComplex(int n, void* devMultiVecIrl, void* devMulti struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecIrl->pitch_; if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) return SPGPU_UNSUPPORTED; @@ -144,7 +144,7 @@ int igathMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n, int i, *idx =(int *) indexes;; cuDoubleComplex *hv = (cuDoubleComplex *) host_values;; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); i=0; hv = &(hv[hfirst-indexBase]); @@ -174,7 +174,7 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n, cuDoubleComplex *hv = (cuDoubleComplex *) host_values; int *idx=(int *) indexes; struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); idx = &(idx[first-indexBase]); hv = &(hv[hfirst-indexBase]); @@ -186,7 +186,7 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n, int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuZmnrm2(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, devVecA->count_, devVecA->pitch_); @@ -195,7 +195,7 @@ int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuZmamax(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, @@ -205,7 +205,7 @@ int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; spgpuZmasum(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, @@ -216,7 +216,7 @@ int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA) { int i=0; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; // Note: inner kernel can handle aliased input/output spgpuZscal(handle, (cuDoubleComplex *)devVecA->v_, devVecA->pitch_, @@ -228,7 +228,7 @@ int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMulti {int i=0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); spgpuZmdot(handle, y_res, n, (cuDoubleComplex*)devVecA->v_, (cuDoubleComplex*)devVecB->v_,devVecA->count_,devVecB->pitch_); @@ -241,7 +241,7 @@ int axpbyMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, void* devMulti int pitch = 0; struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); pitch = devVecY->pitch_; if ((n > devVecY->size_) || (n>devVecX->size_ )) return SPGPU_UNSUPPORTED; @@ -258,7 +258,7 @@ int axyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ )) return SPGPU_UNSUPPORTED; @@ -275,7 +275,7 @@ int axybzMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceV struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) return SPGPU_UNSUPPORTED; @@ -293,7 +293,7 @@ int absMultiVecDeviceDoubleComplex2(int n, cuDoubleComplex alpha, void *deviceVe struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if ((n > devVecA->size_) || (n>devVecB->size_ )) return SPGPU_UNSUPPORTED; @@ -307,7 +307,7 @@ int absMultiVecDeviceDoubleComplex2(int n, cuDoubleComplex alpha, void *deviceVe int absMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA) { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; - spgpuHandle_t handle=psb_gpuGetHandle(); + spgpuHandle_t handle=psb_cudaGetHandle(); if (n > devVecA->size_) return SPGPU_UNSUPPORTED; diff --git a/test/cudakern/dpdegenmv.F90 b/test/cudakern/dpdegenmv.F90 index b8a2ba2c..d2cc2172 100644 --- a/test/cudakern/dpdegenmv.F90 +++ b/test/cudakern/dpdegenmv.F90 @@ -573,8 +573,8 @@ program pdgenmv ! dense matrices type(psb_d_vect_type), target :: xv, bv, xg, bg #ifdef HAVE_GPU - type(psb_d_vect_gpu) :: vmold - type(psb_i_vect_gpu) :: imold + type(psb_d_vect_cuda) :: vmold + type(psb_i_vect_cuda) :: imold #endif real(psb_dpk_), allocatable :: x1(:), x2(:), x0(:) ! blacs parameters @@ -595,14 +595,14 @@ program pdgenmv type(psb_d_rsb_sparse_mat), target :: arsb #endif #ifdef HAVE_GPU - type(psb_d_elg_sparse_mat), target :: aelg - type(psb_d_csrg_sparse_mat), target :: acsrg + type(psb_d_cuda_elg_sparse_mat), target :: aelg + type(psb_d_cuda_csrg_sparse_mat), target :: acsrg #if CUDA_SHORT_VERSION <= 10 - type(psb_d_hybg_sparse_mat), target :: ahybg + type(psb_d_cuda_hybg_sparse_mat), target :: ahybg #endif - type(psb_d_hlg_sparse_mat), target :: ahlg - type(psb_d_hdiag_sparse_mat), target :: ahdiag - type(psb_d_dnsg_sparse_mat), target :: adnsg + type(psb_d_cuda_hlg_sparse_mat), target :: ahlg + type(psb_d_cuda_hdiag_sparse_mat), target :: ahdiag + type(psb_d_cuda_dnsg_sparse_mat), target :: adnsg #endif class(psb_d_base_sparse_mat), pointer :: agmold, acmold ! other variables From 655c86caeda10756c110adf8863280bfab85da63 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 29 Nov 2023 10:20:38 +0100 Subject: [PATCH 014/116] Updated docs. --- README.md | 16 ++++++++++++++- cuda/License-spgpu.md | 21 ++++++++++++++++++++ test/cudakern/dpdegenmv.F90 | 4 ++-- test/cudakern/spdegenmv.F90 | 39 ++++++++++++++++++++----------------- 4 files changed, 59 insertions(+), 21 deletions(-) create mode 100644 cuda/License-spgpu.md diff --git a/README.md b/README.md index a9813f5e..afab1646 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -PSBLAS library, version 3.8 +PSBLAS library, version 3.9 =========================== The architecture of the Fortran 2003 sparse BLAS is described in: @@ -40,6 +40,15 @@ The main reference for the serial sparse BLAS is: >linear algebra subprograms for sparse matrices: a user level interface, >ACM Trans. Math. Softw., 23(3), 379-401, 1997. +CUDA and GPU support +-------------------- +This version of PSBLAS incorporates into a single package three +entities that were previouslty separated: +1. PSBLAS -- the base library +2. PSBLAS-EXT -- a library providing additional storage formats +3. SPGPU -- a package of kernels for NVIDIA GPUs originally + written by Davide Barbieri and Salvatore Filippone; + see the license file cuda/License-spgpu.md INSTALLING ---------- @@ -61,6 +70,11 @@ prerequisites (see also SERIAL below): specify `--with-amd` (see `./configure --help` for more details). We use the C interface to AMD. +5. If you have CUDA available, use + --with-cuda= to specify the CUDA toolkit location + --with-cudacc=XX,YY,ZZ to specify a list of target CCs (compute + capabilities) to compile the CUDA code for. + The configure script will generate a Make.inc file suitable for building the library. The script is capable of recognizing the needed libraries with their default names; if they are in unusual places consider adding diff --git a/cuda/License-spgpu.md b/cuda/License-spgpu.md new file mode 100644 index 00000000..7f4b8ff4 --- /dev/null +++ b/cuda/License-spgpu.md @@ -0,0 +1,21 @@ +(c) Copyright 2011-2021 Davide Barbieri, Salvatore Filippone + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY + EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/test/cudakern/dpdegenmv.F90 b/test/cudakern/dpdegenmv.F90 index d2cc2172..bde57f5f 100644 --- a/test/cudakern/dpdegenmv.F90 +++ b/test/cudakern/dpdegenmv.F90 @@ -631,7 +631,7 @@ program pdgenmv stop endif if(psb_get_errstatus() /= 0) goto 9999 - name='pdegenmv-gpu' + name='pdegenmv-cuda' ! ! Hello world ! @@ -974,7 +974,7 @@ contains if (iam == 0) then write(*,*) 'CPU side format?' read(psb_inp_unit,*) acfmt - write(*,*) 'GPU side format?' + write(*,*) 'CUDA side format?' read(psb_inp_unit,*) agfmt write(*,*) 'Size of discretization cube?' read(psb_inp_unit,*) idim diff --git a/test/cudakern/spdegenmv.F90 b/test/cudakern/spdegenmv.F90 index 1c7d646f..9644d8c7 100644 --- a/test/cudakern/spdegenmv.F90 +++ b/test/cudakern/spdegenmv.F90 @@ -548,7 +548,7 @@ program pdgenmv use psb_util_mod use psb_ext_mod #ifdef HAVE_GPU - use psb_gpu_mod + use psb_cuda_mod #endif use psb_s_pde3d_mod implicit none @@ -570,8 +570,8 @@ program pdgenmv ! dense matrices type(psb_s_vect_type), target :: xv,bv, xg, bg #ifdef HAVE_GPU - type(psb_s_vect_gpu) :: vmold - type(psb_i_vect_gpu) :: imold + type(psb_s_vect_cuda) :: vmold + type(psb_i_vect_cuda) :: imold #endif real(psb_spk_), allocatable :: x1(:), x2(:), x0(:) ! blacs parameters @@ -589,14 +589,14 @@ program pdgenmv type(psb_s_dia_sparse_mat), target :: adia type(psb_s_hdia_sparse_mat), target :: ahdia #ifdef HAVE_GPU - type(psb_s_elg_sparse_mat), target :: aelg - type(psb_s_csrg_sparse_mat), target :: acsrg + type(psb_s_cuda_elg_sparse_mat), target :: aelg + type(psb_s_cuda_csrg_sparse_mat), target :: acsrg #if CUDA_SHORT_VERSION <= 10 - type(psb_s_hybg_sparse_mat), target :: ahybg + type(psb_s_cuda_hybg_sparse_mat), target :: ahybg #endif - type(psb_s_hlg_sparse_mat), target :: ahlg - type(psb_s_dnsg_sparse_mat), target :: adnsg - type(psb_s_hdiag_sparse_mat), target :: ahdiag + type(psb_s_cuda_hlg_sparse_mat), target :: ahlg + type(psb_s_cuda_hdiag_sparse_mat), target :: ahdiag + type(psb_s_cuda_dnsg_sparse_mat), target :: adnsg #endif class(psb_s_base_sparse_mat), pointer :: agmold, acmold ! other variables @@ -613,7 +613,10 @@ program pdgenmv call psb_info(ctxt,iam,np) #ifdef HAVE_GPU - call psb_gpu_init(ctxt) + call psb_cuda_init(ctxt) +#endif +#ifdef HAVE_RSB + call psb_rsb_init() #endif if (iam < 0) then @@ -622,7 +625,7 @@ program pdgenmv stop endif if(psb_get_errstatus() /= 0) goto 9999 - name='pdegenmv-gpu' + name='pdegenmv-cuda' ! ! Hello world ! @@ -632,7 +635,7 @@ program pdgenmv end if #ifdef HAVE_GPU write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() - write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_cuda_DeviceName()) #endif ! ! get parameters @@ -752,7 +755,7 @@ program pdgenmv call psb_barrier(ctxt) t1 = psb_wtime() call agpu%cscnv(info,mold=agmold) - call psb_gpu_DeviceSync() + call psb_cuda_DeviceSync() t2 = psb_Wtime() -t1 call psb_amx(ctxt,t2) if (j==1) tcnvg1 = t2 @@ -789,7 +792,7 @@ program pdgenmv end if end do - call psb_gpu_DeviceSync() + call psb_cuda_DeviceSync() call psb_barrier(ctxt) tt2 = psb_wtime() - tt1 call psb_amx(ctxt,tt2) @@ -817,7 +820,7 @@ program pdgenmv end if end do - call psb_gpu_DeviceSync() + call psb_cuda_DeviceSync() call psb_barrier(ctxt) gt2 = psb_wtime() - gt1 call psb_amx(ctxt,gt2) @@ -919,7 +922,7 @@ program pdgenmv #ifdef HAVE_GPU bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (GPU) : ",F20.3)') bdwdth - bdwdth = psb_gpu_MemoryPeakBandwidth() + bdwdth = psb_cuda_MemoryPeakBandwidth() write(psb_out_unit,'("MBYTES/S peak bandwidth (GPU) : ",F20.3)') bdwdth #endif write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() @@ -941,7 +944,7 @@ program pdgenmv goto 9999 end if #ifdef HAVE_GPU - call psb_gpu_exit() + call psb_cuda_exit() #endif call psb_exit(ctxt) stop @@ -965,7 +968,7 @@ contains if (iam == 0) then write(*,*) 'CPU side format?' read(psb_inp_unit,*) acfmt - write(*,*) 'GPU side format?' + write(*,*) 'CUDA side format?' read(psb_inp_unit,*) agfmt write(*,*) 'Size of discretization cube?' read(psb_inp_unit,*) idim From d3b2b7816da16a01f97c01f7d2bd1b4b07ab32a9 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 29 Nov 2023 15:35:55 +0100 Subject: [PATCH 015/116] Fix coo insert OpenMP. Fix Make.inc.in --- Make.inc.in | 3 --- base/serial/impl/psb_c_coo_impl.F90 | 6 ++++++ base/serial/impl/psb_d_coo_impl.F90 | 6 ++++++ base/serial/impl/psb_s_coo_impl.F90 | 6 ++++++ base/serial/impl/psb_z_coo_impl.F90 | 6 ++++++ 5 files changed, 24 insertions(+), 3 deletions(-) diff --git a/Make.inc.in b/Make.inc.in index ca0fa7f7..6a4b378d 100755 --- a/Make.inc.in +++ b/Make.inc.in @@ -71,11 +71,8 @@ CUDAD=@CUDAD@ CUDALD=@CUDALD@ LCUDA=@LCUDA@ -#SPGPUDIR=@SPGPU_DIR@ -#SPGPU_INCDIR=@SPGPU_INCDIR@ SPGPU_LIBS=@SPGPU_LIBS@ SPGPU_DEFINES=@SPGPU_DEFINES@ -#SPGPU_INCLUDES=@SPGPU_INCLUDES@ CUDA_DIR=@CUDA_DIR@ CUDA_DEFINES=@CUDA_DEFINES@ diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index 46391dee..b1d71321 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -2878,14 +2878,20 @@ subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (isza < (nza+nz)) then info = psb_err_alloc_dealloc_; call psb_errpush(info,name) else +#if defined(OPENMP) nzaold = nza nza = nza + nz +#endif call a%set_nzeros(nza) end if !$omp end critical if (info /= 0) goto 9999 call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& & imin,imax,jmin,jmax,info) +#if !defined(OPENMP) + nza = nzaold + call a%set_nzeros(nza) +#endif call a%set_sorted(.false.) else if (a%is_upd()) then diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index c2babf8e..350085bb 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -2878,14 +2878,20 @@ subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (isza < (nza+nz)) then info = psb_err_alloc_dealloc_; call psb_errpush(info,name) else +#if defined(OPENMP) nzaold = nza nza = nza + nz +#endif call a%set_nzeros(nza) end if !$omp end critical if (info /= 0) goto 9999 call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& & imin,imax,jmin,jmax,info) +#if !defined(OPENMP) + nza = nzaold + call a%set_nzeros(nza) +#endif call a%set_sorted(.false.) else if (a%is_upd()) then diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index 402c608a..51858efd 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -2878,14 +2878,20 @@ subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (isza < (nza+nz)) then info = psb_err_alloc_dealloc_; call psb_errpush(info,name) else +#if defined(OPENMP) nzaold = nza nza = nza + nz +#endif call a%set_nzeros(nza) end if !$omp end critical if (info /= 0) goto 9999 call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& & imin,imax,jmin,jmax,info) +#if !defined(OPENMP) + nza = nzaold + call a%set_nzeros(nza) +#endif call a%set_sorted(.false.) else if (a%is_upd()) then diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index 542f842e..0624dd21 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -2878,14 +2878,20 @@ subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) if (isza < (nza+nz)) then info = psb_err_alloc_dealloc_; call psb_errpush(info,name) else +#if defined(OPENMP) nzaold = nza nza = nza + nz +#endif call a%set_nzeros(nza) end if !$omp end critical if (info /= 0) goto 9999 call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,& & imin,imax,jmin,jmax,info) +#if !defined(OPENMP) + nza = nzaold + call a%set_nzeros(nza) +#endif call a%set_sorted(.false.) else if (a%is_upd()) then From 6c9ca58282ce019a0d0eb29d02dfcb98c1cd0ce7 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 29 Nov 2023 16:14:30 +0100 Subject: [PATCH 016/116] Silly bug in coo insert --- base/serial/impl/psb_c_coo_impl.F90 | 2 +- base/serial/impl/psb_d_coo_impl.F90 | 2 +- base/serial/impl/psb_s_coo_impl.F90 | 2 +- base/serial/impl/psb_z_coo_impl.F90 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index b1d71321..5c90e287 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -2869,6 +2869,7 @@ subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) ! Hence the call to set_nzeros done here. !$omp critical nza = a%get_nzeros() + nzaold = nza isza = a%get_size() ! Build phase. Must handle reallocations in a sensible way. if (isza < (nza+nz)) then @@ -2879,7 +2880,6 @@ subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) info = psb_err_alloc_dealloc_; call psb_errpush(info,name) else #if defined(OPENMP) - nzaold = nza nza = nza + nz #endif call a%set_nzeros(nza) diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index 350085bb..f6a173d1 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -2869,6 +2869,7 @@ subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) ! Hence the call to set_nzeros done here. !$omp critical nza = a%get_nzeros() + nzaold = nza isza = a%get_size() ! Build phase. Must handle reallocations in a sensible way. if (isza < (nza+nz)) then @@ -2879,7 +2880,6 @@ subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) info = psb_err_alloc_dealloc_; call psb_errpush(info,name) else #if defined(OPENMP) - nzaold = nza nza = nza + nz #endif call a%set_nzeros(nza) diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index 51858efd..4c12d8fc 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -2869,6 +2869,7 @@ subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) ! Hence the call to set_nzeros done here. !$omp critical nza = a%get_nzeros() + nzaold = nza isza = a%get_size() ! Build phase. Must handle reallocations in a sensible way. if (isza < (nza+nz)) then @@ -2879,7 +2880,6 @@ subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) info = psb_err_alloc_dealloc_; call psb_errpush(info,name) else #if defined(OPENMP) - nzaold = nza nza = nza + nz #endif call a%set_nzeros(nza) diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index 0624dd21..44ee89b5 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -2869,6 +2869,7 @@ subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) ! Hence the call to set_nzeros done here. !$omp critical nza = a%get_nzeros() + nzaold = nza isza = a%get_size() ! Build phase. Must handle reallocations in a sensible way. if (isza < (nza+nz)) then @@ -2879,7 +2880,6 @@ subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) info = psb_err_alloc_dealloc_; call psb_errpush(info,name) else #if defined(OPENMP) - nzaold = nza nza = nza + nz #endif call a%set_nzeros(nza) From ab8631439f7f2e4f8afacfb42cbe53c5fe69eec2 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 29 Nov 2023 16:35:42 +0100 Subject: [PATCH 017/116] Update configure script --- config/pac.m4 | 2 +- configure | 36 +----------------------------------- configure.ac | 35 +++++++++++++++++------------------ 3 files changed, 19 insertions(+), 54 deletions(-) diff --git a/config/pac.m4 b/config/pac.m4 index 7a9ee07e..e4065402 100644 --- a/config/pac.m4 +++ b/config/pac.m4 @@ -2160,7 +2160,7 @@ AC_DEFUN([PAC_ARG_WITH_CUDACC], [ AC_ARG_WITH(cudacc, AC_HELP_STRING([--with-cudacc], [A comma-separated list of CCs to compile to, for example, - --with-cudacc=30,35,37,50,60]), + --with-cudacc=50,60,70,75]), [pac_cv_cudacc=$withval], [pac_cv_cudacc='']) ]) diff --git a/configure b/configure index 8b217231..b98c6ca0 100755 --- a/configure +++ b/configure @@ -830,7 +830,6 @@ enable_openmp with_blas with_blasdir with_lapack -with_rsb with_metis with_metisincfile with_metisdir @@ -1522,12 +1521,6 @@ Optional Packages: --with-blas= use BLAS library --with-blasdir= search for BLAS library in --with-lapack= use LAPACK library - --with-rsb Specify Recursive Sparse BLAS library linkage info - (that is, the output of librsb-config --static - --ldflags, or a directory where the usual - bin/include/lib subdirs with a regular RSB - installation resides, or nothing to make the - configure script invoke librsb-config) --with-metis=LIBNAME Specify the library name for METIS library. Default: "-lmetis" --with-metisincfile=DIR Specify the name for METIS include file. @@ -1542,7 +1535,7 @@ Optional Packages: --with-amdlibdir=DIR Specify the directory for AMD library. --with-cuda=DIR Specify the directory for CUDA library and includes. --with-cudacc A comma-separated list of CCs to compile to, for - example, --with-cudacc=30,35,37,50,60 + example, --with-cudacc=50,60,70,75 Some influential environment variables: FC Fortran compiler command @@ -3301,7 +3294,6 @@ psblas_cv_version="3.8.1" Be sure to specify the library paths of your interest. Examples: ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS - --with-spgpu=/path/to/spgpu FC=mpif90 CC=mpicc ./configure <- will force FC,CC See ./configure --help=short fore more info. @@ -3316,7 +3308,6 @@ printf "%s\n" "$as_me: Be sure to specify the library paths of your interest. Examples: ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS - --with-spgpu=/path/to/spgpu FC=mpif90 CC=mpicc ./configure <- will force FC,CC See ./configure --help=short fore more info. @@ -10021,31 +10012,6 @@ fi #AC_CHECK_LIB(umf,umfpack_di_solve,psblas_cv_have_umfpack=yes,psblas_cv_have_umfpack=no,[amd]) -# Check whether --with-rsb was given. -if test ${with_rsb+y} -then : - withval=$with_rsb; if test x"$withval" = xno; then -want_rsb_libs= ; else if test x"$withval" = xyes ; then want_rsb_libs=yes ; else want_rsb_libs="$withval" ; fi ; fi -else $as_nop - want_rsb_libs="" -fi - -if test x"$want_rsb_libs" != x ; then - if test x"$want_rsb_libs" = xyes ; then - want_rsb_libs="`librsb-config --static --ldflags`" - else - if test -d "$want_rsb_libs" ; then - want_rsb_libs="`$want_rsb_libs/bin/librsb-config --static --ldflags`" - else - true; - # we assume want_rsb_libs are linkage parameters - fi - fi - FDEFINES="$FDEFINES $psblas_cv_define_prepend-DHAVE_LIBRSB" -fi -RSB_LIBS="$want_rsb_libs" -LIBS="$RSB_LIBS ${LIBS}" - # Check whether --with-metis was given. if test ${with_metis+y} diff --git a/configure.ac b/configure.ac index bfc9b5f9..add09a79 100755 --- a/configure.ac +++ b/configure.ac @@ -57,7 +57,6 @@ AC_MSG_NOTICE([ Be sure to specify the library paths of your interest. Examples: ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS - [ --with-spgpu=/path/to/spgpu] FC=mpif90 CC=mpicc ./configure <- will force FC,CC See ./configure --help=short fore more info. @@ -730,23 +729,23 @@ PAC_MAKE_IS_GNUMAKE # Note : also umfdi_local_search, ... #AC_CHECK_LIB(umf,umfpack_di_solve,psblas_cv_have_umfpack=yes,psblas_cv_have_umfpack=no,[amd]) -AC_ARG_WITH(rsb, AS_HELP_STRING([--with-rsb], [Specify Recursive Sparse BLAS library linkage info (that is, the output of librsb-config --static --ldflags, or a directory where the usual bin/include/lib subdirs with a regular RSB installation resides, or nothing to make the configure script invoke librsb-config)]), [if test x"$withval" = xno; then -want_rsb_libs= ; else if test x"$withval" = xyes ; then want_rsb_libs=yes ; else want_rsb_libs="$withval" ; fi ; fi], [want_rsb_libs=""]) -if test x"$want_rsb_libs" != x ; then - if test x"$want_rsb_libs" = xyes ; then - want_rsb_libs="`librsb-config --static --ldflags`" - else - if test -d "$want_rsb_libs" ; then - want_rsb_libs="`$want_rsb_libs/bin/librsb-config --static --ldflags`" - else - true; - # we assume want_rsb_libs are linkage parameters - fi - fi - FDEFINES="$FDEFINES $psblas_cv_define_prepend-DHAVE_LIBRSB" -fi -RSB_LIBS="$want_rsb_libs" -LIBS="$RSB_LIBS ${LIBS}" +dnl AC_ARG_WITH(rsb, AS_HELP_STRING([--with-rsb], [Specify Recursive Sparse BLAS library linkage info (that is, the output of librsb-config --static --ldflags, or a directory where the usual bin/include/lib subdirs with a regular RSB installation resides, or nothing to make the configure script invoke librsb-config)]), [if test x"$withval" = xno; then +dnl want_rsb_libs= ; else if test x"$withval" = xyes ; then want_rsb_libs=yes ; else want_rsb_libs="$withval" ; fi ; fi], [want_rsb_libs=""]) +dnl if test x"$want_rsb_libs" != x ; then +dnl if test x"$want_rsb_libs" = xyes ; then +dnl want_rsb_libs="`librsb-config --static --ldflags`" +dnl else +dnl if test -d "$want_rsb_libs" ; then +dnl want_rsb_libs="`$want_rsb_libs/bin/librsb-config --static --ldflags`" +dnl else +dnl true; +dnl # we assume want_rsb_libs are linkage parameters +dnl fi +dnl fi +dnl FDEFINES="$FDEFINES $psblas_cv_define_prepend-DHAVE_LIBRSB" +dnl fi +dnl RSB_LIBS="$want_rsb_libs" +dnl LIBS="$RSB_LIBS ${LIBS}" dnl AC_CHECK_HEADERS([rsb.h], [ LIBS="${LIBS} $want_rsb_libs"], []) PAC_CHECK_METIS From a6016f00fa3f0d8c34deed848ab735e68ec3b35e Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 1 Dec 2023 15:10:13 +0100 Subject: [PATCH 018/116] Bump PSBLAS version to 3.9 --- base/modules/psb_const_mod.F90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/modules/psb_const_mod.F90 b/base/modules/psb_const_mod.F90 index 56134474..70631932 100644 --- a/base/modules/psb_const_mod.F90 +++ b/base/modules/psb_const_mod.F90 @@ -136,9 +136,9 @@ module psb_const_mod ! ! Version ! - character(len=*), parameter :: psb_version_string_ = "3.8.0" + character(len=*), parameter :: psb_version_string_ = "3.9.0" integer(psb_ipk_), parameter :: psb_version_major_ = 3 - integer(psb_ipk_), parameter :: psb_version_minor_ = 8 + integer(psb_ipk_), parameter :: psb_version_minor_ = 9 integer(psb_ipk_), parameter :: psb_patchlevel_ = 0 ! From e373ed7e0bcdce1f75bcd1ea8d32acfa91dab7ab Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 1 Dec 2023 15:10:39 +0100 Subject: [PATCH 019/116] Modify configry to only use HAVE_CUDA, since SPGU is recompiled. --- Make.inc.in | 1 - config/pac.m4 | 2 +- configure | 59 +++++++++++++++++++++++++++------------------------ configure.ac | 59 ++++++++++++++++++++++++++------------------------- 4 files changed, 62 insertions(+), 59 deletions(-) diff --git a/Make.inc.in b/Make.inc.in index 6a4b378d..858ad336 100755 --- a/Make.inc.in +++ b/Make.inc.in @@ -72,7 +72,6 @@ CUDALD=@CUDALD@ LCUDA=@LCUDA@ SPGPU_LIBS=@SPGPU_LIBS@ -SPGPU_DEFINES=@SPGPU_DEFINES@ CUDA_DIR=@CUDA_DIR@ CUDA_DEFINES=@CUDA_DEFINES@ diff --git a/config/pac.m4 b/config/pac.m4 index e4065402..52185439 100644 --- a/config/pac.m4 +++ b/config/pac.m4 @@ -2104,7 +2104,7 @@ dnl dnl @author Salvatore Filippone dnl AC_DEFUN(PAC_CHECK_CUDA, -[AC_ARG_WITH(cuda, AC_HELP_STRING([--with-cuda=DIR], [Specify the directory for CUDA library and includes.]), +[AC_ARG_WITH(cuda, AC_HELP_STRING([--with-cuda=DIR], [Specify the CUDA install directory.]), [pac_cv_cuda_dir=$withval], [pac_cv_cuda_dir='']) diff --git a/configure b/configure index b98c6ca0..48487f98 100755 --- a/configure +++ b/configure @@ -665,7 +665,6 @@ CUDA_INCLUDES CUDA_DEFINES CUDA_DIR EXTRALDLIBS -SPGPU_DEFINES SPGPU_LIBS SPGPU_FLAGS METISINCFILE @@ -1533,7 +1532,7 @@ Optional Packages: --with-amddir=DIR Specify the directory for AMD library and includes. --with-amdincdir=DIR Specify the directory for AMD includes. --with-amdlibdir=DIR Specify the directory for AMD library. - --with-cuda=DIR Specify the directory for CUDA library and includes. + --with-cuda=DIR Specify the CUDA install directory. --with-cudacc A comma-separated list of CCs to compile to, for example, --with-cudacc=50,60,70,75 @@ -10678,7 +10677,7 @@ CPPFLAGS="$SAVE_CPPFLAGS" if test "x$pac_cv_have_cuda" == "xyes"; then -ac_ext=c + ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' @@ -10809,17 +10808,15 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu LIBS="$SAVE_LIBS" CPPFLAGS="$SAVE_CPPFLAGS" -CUDA_VERSION="$pac_cv_cuda_version"; -CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); -if test "x$pac_cv_have_cuda" == "xyes" ; then - SPGPU_DEFINES="-DHAVE_SPGPU -DHAVE_GPU"; + CUDA_VERSION="$pac_cv_cuda_version"; + CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); + HAVE_CUDA="yes"; SPGPU_LIBS="-lspgpu"; CUDAD=cudad; CUDALD=cudald; LCUDA="-lpsb_cuda"; EXTRALDLIBS="-lstdc++"; -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: At this point GPUTARGET is $CUDAD $CUDALD" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: At this point GPUTARGET is $CUDAD $CUDALD" >&5 printf "%s\n" "$as_me: At this point GPUTARGET is $CUDAD $CUDALD" >&6;} @@ -10833,26 +10830,27 @@ else $as_nop fi -if test "x$pac_cv_cudacc" == "x"; then - pac_cv_cudacc="30,35,37,50,60"; -fi -CUDEFINES="--dopt=on"; -for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'` -do - CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; -done -if test "x$pac_cv_cuda_version" != "xunknown"; then - CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" - FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" - CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" -fi - + if test "x$pac_cv_cudacc" == "x"; then + pac_cv_cudacc="50,60,70,75"; + CUDA_CC="$pac_cv_cudacc"; + fi + CUDEFINES="--dopt=on"; + for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'` + do + CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; + done + if test "x$pac_cv_cuda_version" != "xunknown"; then + CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + fi fi if test "x$pac_cv_ipk_size" != "x4"; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&5 printf "%s\n" "$as_me: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&6;} - SPGPU_DEFINES=""; + HAVE_CUDA="no"; + CUDA_CC=""; SPGPU_LIBS=""; CUDAD=""; CUDALD=""; @@ -10921,9 +10919,9 @@ UTILLIBNAME=libpsb_util.a PSBLASRULES=' PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS) -CXXDEFINES=$(PSBCXXDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) -CDEFINES=$(PSBCDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) -FDEFINES=$(PSBFDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) +CXXDEFINES=$(PSBCXXDEFINES) $(CUDA_DEFINES) +CDEFINES=$(PSBCDEFINES) $(CUDA_DEFINES) +FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES) # These should be portable rules, arent they? @@ -10957,7 +10955,6 @@ FDEFINES=$(PSBFDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) - ############################################################################### @@ -12290,6 +12287,9 @@ fi FCOPT : ${FCOPT} CCOPT : ${CCOPT} + CUDA : ${HAVE_CUDA} + CUDA_CC : ${CUDA_CC} + BLAS : ${BLAS_LIBS} METIS usable : ${psblas_cv_have_metis} @@ -12320,6 +12320,9 @@ printf "%s\n" "$as_me: FCOPT : ${FCOPT} CCOPT : ${CCOPT} + CUDA : ${HAVE_CUDA} + CUDA_CC : ${CUDA_CC} + BLAS : ${BLAS_LIBS} METIS usable : ${psblas_cv_have_metis} diff --git a/configure.ac b/configure.ac index add09a79..4b76485b 100755 --- a/configure.ac +++ b/configure.ac @@ -793,41 +793,40 @@ fi PAC_CHECK_CUDA() if test "x$pac_cv_have_cuda" == "xyes"; then - -PAC_CHECK_CUDA_VERSION() -CUDA_VERSION="$pac_cv_cuda_version"; -CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); -dnl PAC_CHECK_SPGPU() -if test "x$pac_cv_have_cuda" == "xyes" ; then - SPGPU_DEFINES="-DHAVE_SPGPU -DHAVE_GPU"; + + PAC_CHECK_CUDA_VERSION() + CUDA_VERSION="$pac_cv_cuda_version"; + CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); + dnl PAC_CHECK_SPGPU() + HAVE_CUDA="yes"; SPGPU_LIBS="-lspgpu"; CUDAD=cudad; CUDALD=cudald; LCUDA="-lpsb_cuda"; EXTRALDLIBS="-lstdc++"; -fi -AC_MSG_NOTICE([At this point GPUTARGET is $CUDAD $CUDALD]) - -PAC_ARG_WITH_CUDACC() -if test "x$pac_cv_cudacc" == "x"; then - pac_cv_cudacc="30,35,37,50,60"; -fi -CUDEFINES="--dopt=on"; -for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'` -do - CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; -done -if test "x$pac_cv_cuda_version" != "xunknown"; then - CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" - FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" - CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" -fi + AC_MSG_NOTICE([At this point GPUTARGET is $CUDAD $CUDALD]) + PAC_ARG_WITH_CUDACC() + if test "x$pac_cv_cudacc" == "x"; then + pac_cv_cudacc="50,60,70,75"; + CUDA_CC="$pac_cv_cudacc"; + fi + CUDEFINES="--dopt=on"; + for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'` + do + CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; + done + if test "x$pac_cv_cuda_version" != "xunknown"; then + CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + fi fi if test "x$pac_cv_ipk_size" != "x4"; then AC_MSG_NOTICE([For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU]) - SPGPU_DEFINES=""; + HAVE_CUDA="no"; + CUDA_CC=""; SPGPU_LIBS=""; CUDAD=""; CUDALD=""; @@ -896,9 +895,9 @@ AC_SUBST(FINCLUDES) PSBLASRULES=' PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS) -CXXDEFINES=$(PSBCXXDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) -CDEFINES=$(PSBCDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) -FDEFINES=$(PSBFDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) +CXXDEFINES=$(PSBCXXDEFINES) $(CUDA_DEFINES) +CDEFINES=$(PSBCDEFINES) $(CUDA_DEFINES) +FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES) # These should be portable rules, arent they? @@ -923,7 +922,6 @@ AC_SUBST(METISINCFILE) AC_SUBST(SPGPU_FLAGS) AC_SUBST(SPGPU_LIBS) dnl AC_SUBST(SPGPU_DIR) -AC_SUBST(SPGPU_DEFINES) dnl AC_SUBST(SPGPU_INCLUDES) dnl AC_SUBST(SPGPU_INCDIR) AC_SUBST(EXTRALDLIBS) @@ -962,6 +960,9 @@ AC_MSG_NOTICE([ FCOPT : ${FCOPT} CCOPT : ${CCOPT} + CUDA : ${HAVE_CUDA} + CUDA_CC : ${CUDA_CC} + BLAS : ${BLAS_LIBS} METIS usable : ${psblas_cv_have_metis} From b2b7b074dff09c0b1df060f20dbcf53b52d5ff79 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 1 Dec 2023 15:11:17 +0100 Subject: [PATCH 020/116] Fix usage of HAVE_CUDA/HAVE_GPU (mostly disappeared) --- cuda/base_cusparse_mod.F90 | 4 - cuda/c_cusparse_mod.F90 | 5 -- cuda/ccusparse.c | 2 - cuda/cintrf.h | 4 - cuda/cuda_util.c | 9 --- cuda/cuda_util.h | 2 - cuda/cvectordev.c | 2 - cuda/cvectordev.h | 3 - cuda/d_cusparse_mod.F90 | 6 +- cuda/dcusparse.c | 2 - cuda/diagdev.c | 30 ------- cuda/diagdev.h | 5 -- cuda/diagdev_mod.F90 | 7 -- cuda/dnsdev.c | 62 --------------- cuda/dnsdev.h | 5 -- cuda/dnsdev_mod.F90 | 5 -- cuda/dvectordev.c | 4 - cuda/dvectordev.h | 3 - cuda/elldev.c | 87 --------------------- cuda/elldev.h | 6 -- cuda/elldev_mod.F90 | 5 -- cuda/fcusparse.c | 4 - cuda/fcusparse.h | 2 - cuda/hdiagdev.c | 39 --------- cuda/hdiagdev.h | 5 -- cuda/hdiagdev_mod.F90 | 4 - cuda/hlldev.c | 75 ------------------ cuda/hlldev.h | 5 -- cuda/hlldev_mod.F90 | 5 -- cuda/impl/psb_c_cuda_cp_csrg_from_coo.F90 | 6 -- cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 | 6 -- cuda/impl/psb_c_cuda_cp_diag_from_coo.F90 | 6 -- cuda/impl/psb_c_cuda_cp_elg_from_coo.F90 | 23 ------ cuda/impl/psb_c_cuda_cp_elg_from_fmt.F90 | 12 --- cuda/impl/psb_c_cuda_cp_hdiag_from_coo.F90 | 10 --- cuda/impl/psb_c_cuda_cp_hlg_from_coo.F90 | 8 -- cuda/impl/psb_c_cuda_cp_hlg_from_fmt.F90 | 6 -- cuda/impl/psb_c_cuda_cp_hybg_from_coo.F90 | 6 -- cuda/impl/psb_c_cuda_cp_hybg_from_fmt.F90 | 6 -- cuda/impl/psb_c_cuda_csrg_allocate_mnnz.F90 | 6 -- cuda/impl/psb_c_cuda_csrg_csmm.F90 | 8 -- cuda/impl/psb_c_cuda_csrg_csmv.F90 | 8 -- cuda/impl/psb_c_cuda_csrg_from_gpu.F90 | 6 -- cuda/impl/psb_c_cuda_csrg_inner_vect_sv.F90 | 11 --- cuda/impl/psb_c_cuda_csrg_reallocate_nz.F90 | 6 -- cuda/impl/psb_c_cuda_csrg_scal.F90 | 6 -- cuda/impl/psb_c_cuda_csrg_scals.F90 | 6 -- cuda/impl/psb_c_cuda_csrg_to_gpu.F90 | 6 -- cuda/impl/psb_c_cuda_csrg_vect_mv.F90 | 8 -- cuda/impl/psb_c_cuda_diag_csmv.F90 | 9 --- cuda/impl/psb_c_cuda_diag_to_gpu.F90 | 8 -- cuda/impl/psb_c_cuda_diag_vect_mv.F90 | 10 --- cuda/impl/psb_c_cuda_dnsg_mat_impl.F90 | 35 +-------- cuda/impl/psb_c_cuda_elg_allocate_mnnz.F90 | 14 ---- cuda/impl/psb_c_cuda_elg_asb.f90 | 1 - cuda/impl/psb_c_cuda_elg_csmm.F90 | 10 --- cuda/impl/psb_c_cuda_elg_csmv.F90 | 9 --- cuda/impl/psb_c_cuda_elg_csput.F90 | 9 --- cuda/impl/psb_c_cuda_elg_from_gpu.F90 | 9 +-- cuda/impl/psb_c_cuda_elg_inner_vect_sv.F90 | 7 +- cuda/impl/psb_c_cuda_elg_mold.F90 | 2 - cuda/impl/psb_c_cuda_elg_reallocate_nz.F90 | 7 -- cuda/impl/psb_c_cuda_elg_scal.F90 | 7 -- cuda/impl/psb_c_cuda_elg_scals.F90 | 7 -- cuda/impl/psb_c_cuda_elg_to_gpu.F90 | 9 --- cuda/impl/psb_c_cuda_elg_trim.f90 | 1 - cuda/impl/psb_c_cuda_elg_vect_mv.F90 | 10 --- cuda/impl/psb_c_cuda_hdiag_csmv.F90 | 10 --- cuda/impl/psb_c_cuda_hdiag_mold.F90 | 1 - cuda/impl/psb_c_cuda_hdiag_to_gpu.F90 | 10 --- cuda/impl/psb_c_cuda_hdiag_vect_mv.F90 | 9 --- cuda/impl/psb_c_cuda_hlg_allocate_mnnz.F90 | 9 --- cuda/impl/psb_c_cuda_hlg_csmm.F90 | 10 --- cuda/impl/psb_c_cuda_hlg_csmv.F90 | 9 --- cuda/impl/psb_c_cuda_hlg_from_gpu.F90 | 7 -- cuda/impl/psb_c_cuda_hlg_inner_vect_sv.F90 | 7 -- cuda/impl/psb_c_cuda_hlg_mold.F90 | 1 - cuda/impl/psb_c_cuda_hlg_reallocate_nz.F90 | 7 -- cuda/impl/psb_c_cuda_hlg_scal.F90 | 7 -- cuda/impl/psb_c_cuda_hlg_scals.F90 | 7 -- cuda/impl/psb_c_cuda_hlg_to_gpu.F90 | 7 -- cuda/impl/psb_c_cuda_hlg_vect_mv.F90 | 10 --- cuda/impl/psb_c_cuda_hybg_allocate_mnnz.F90 | 6 -- cuda/impl/psb_c_cuda_hybg_csmm.F90 | 9 --- cuda/impl/psb_c_cuda_hybg_csmv.F90 | 8 -- cuda/impl/psb_c_cuda_hybg_inner_vect_sv.F90 | 11 --- cuda/impl/psb_c_cuda_hybg_reallocate_nz.F90 | 6 -- cuda/impl/psb_c_cuda_hybg_scal.F90 | 6 -- cuda/impl/psb_c_cuda_hybg_scals.F90 | 6 -- cuda/impl/psb_c_cuda_hybg_to_gpu.F90 | 6 -- cuda/impl/psb_c_cuda_hybg_vect_mv.F90 | 9 --- cuda/impl/psb_c_cuda_mv_csrg_from_coo.F90 | 7 -- cuda/impl/psb_c_cuda_mv_csrg_from_fmt.F90 | 7 -- cuda/impl/psb_c_cuda_mv_diag_from_coo.F90 | 5 -- cuda/impl/psb_c_cuda_mv_elg_from_coo.F90 | 6 -- cuda/impl/psb_c_cuda_mv_elg_from_fmt.F90 | 15 +--- cuda/impl/psb_c_cuda_mv_hdiag_from_coo.F90 | 10 --- cuda/impl/psb_c_cuda_mv_hlg_from_coo.F90 | 6 -- cuda/impl/psb_c_cuda_mv_hlg_from_fmt.F90 | 6 -- cuda/impl/psb_c_cuda_mv_hybg_from_coo.F90 | 6 -- cuda/impl/psb_c_cuda_mv_hybg_from_fmt.F90 | 6 -- cuda/impl/psb_d_cuda_cp_csrg_from_coo.F90 | 6 -- cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 | 6 -- cuda/impl/psb_d_cuda_cp_diag_from_coo.F90 | 6 -- cuda/impl/psb_d_cuda_cp_elg_from_coo.F90 | 23 ------ cuda/impl/psb_d_cuda_cp_elg_from_fmt.F90 | 12 --- cuda/impl/psb_d_cuda_cp_hdiag_from_coo.F90 | 10 --- cuda/impl/psb_d_cuda_cp_hlg_from_coo.F90 | 8 -- cuda/impl/psb_d_cuda_cp_hlg_from_fmt.F90 | 6 -- cuda/impl/psb_d_cuda_cp_hybg_from_coo.F90 | 6 -- cuda/impl/psb_d_cuda_cp_hybg_from_fmt.F90 | 6 -- cuda/impl/psb_d_cuda_csrg_allocate_mnnz.F90 | 6 -- cuda/impl/psb_d_cuda_csrg_csmm.F90 | 8 -- cuda/impl/psb_d_cuda_csrg_csmv.F90 | 8 -- cuda/impl/psb_d_cuda_csrg_from_gpu.F90 | 6 -- cuda/impl/psb_d_cuda_csrg_inner_vect_sv.F90 | 11 --- cuda/impl/psb_d_cuda_csrg_reallocate_nz.F90 | 6 -- cuda/impl/psb_d_cuda_csrg_scal.F90 | 6 -- cuda/impl/psb_d_cuda_csrg_scals.F90 | 6 -- cuda/impl/psb_d_cuda_csrg_to_gpu.F90 | 6 -- cuda/impl/psb_d_cuda_csrg_vect_mv.F90 | 8 -- cuda/impl/psb_d_cuda_diag_csmv.F90 | 9 --- cuda/impl/psb_d_cuda_diag_to_gpu.F90 | 8 -- cuda/impl/psb_d_cuda_diag_vect_mv.F90 | 10 --- cuda/impl/psb_d_cuda_dnsg_mat_impl.F90 | 35 +-------- cuda/impl/psb_d_cuda_elg_allocate_mnnz.F90 | 14 ---- cuda/impl/psb_d_cuda_elg_asb.f90 | 1 - cuda/impl/psb_d_cuda_elg_csmm.F90 | 10 --- cuda/impl/psb_d_cuda_elg_csmv.F90 | 9 --- cuda/impl/psb_d_cuda_elg_csput.F90 | 9 --- cuda/impl/psb_d_cuda_elg_from_gpu.F90 | 9 +-- cuda/impl/psb_d_cuda_elg_inner_vect_sv.F90 | 7 +- cuda/impl/psb_d_cuda_elg_mold.F90 | 2 - cuda/impl/psb_d_cuda_elg_reallocate_nz.F90 | 7 -- cuda/impl/psb_d_cuda_elg_scal.F90 | 7 -- cuda/impl/psb_d_cuda_elg_scals.F90 | 7 -- cuda/impl/psb_d_cuda_elg_to_gpu.F90 | 9 --- cuda/impl/psb_d_cuda_elg_trim.f90 | 1 - cuda/impl/psb_d_cuda_elg_vect_mv.F90 | 10 --- cuda/impl/psb_d_cuda_hdiag_csmv.F90 | 10 --- cuda/impl/psb_d_cuda_hdiag_mold.F90 | 1 - cuda/impl/psb_d_cuda_hdiag_to_gpu.F90 | 10 --- cuda/impl/psb_d_cuda_hdiag_vect_mv.F90 | 9 --- cuda/impl/psb_d_cuda_hlg_allocate_mnnz.F90 | 9 --- cuda/impl/psb_d_cuda_hlg_csmm.F90 | 10 --- cuda/impl/psb_d_cuda_hlg_csmv.F90 | 9 --- cuda/impl/psb_d_cuda_hlg_from_gpu.F90 | 7 -- cuda/impl/psb_d_cuda_hlg_inner_vect_sv.F90 | 7 -- cuda/impl/psb_d_cuda_hlg_mold.F90 | 1 - cuda/impl/psb_d_cuda_hlg_reallocate_nz.F90 | 7 -- cuda/impl/psb_d_cuda_hlg_scal.F90 | 7 -- cuda/impl/psb_d_cuda_hlg_scals.F90 | 7 -- cuda/impl/psb_d_cuda_hlg_to_gpu.F90 | 7 -- cuda/impl/psb_d_cuda_hlg_vect_mv.F90 | 10 --- cuda/impl/psb_d_cuda_hybg_allocate_mnnz.F90 | 6 -- cuda/impl/psb_d_cuda_hybg_csmm.F90 | 9 --- cuda/impl/psb_d_cuda_hybg_csmv.F90 | 8 -- cuda/impl/psb_d_cuda_hybg_inner_vect_sv.F90 | 11 --- cuda/impl/psb_d_cuda_hybg_reallocate_nz.F90 | 6 -- cuda/impl/psb_d_cuda_hybg_scal.F90 | 6 -- cuda/impl/psb_d_cuda_hybg_scals.F90 | 6 -- cuda/impl/psb_d_cuda_hybg_to_gpu.F90 | 6 -- cuda/impl/psb_d_cuda_hybg_vect_mv.F90 | 9 --- cuda/impl/psb_d_cuda_mv_csrg_from_coo.F90 | 7 -- cuda/impl/psb_d_cuda_mv_csrg_from_fmt.F90 | 7 -- cuda/impl/psb_d_cuda_mv_diag_from_coo.F90 | 5 -- cuda/impl/psb_d_cuda_mv_elg_from_coo.F90 | 6 -- cuda/impl/psb_d_cuda_mv_elg_from_fmt.F90 | 15 +--- cuda/impl/psb_d_cuda_mv_hdiag_from_coo.F90 | 10 --- cuda/impl/psb_d_cuda_mv_hlg_from_coo.F90 | 6 -- cuda/impl/psb_d_cuda_mv_hlg_from_fmt.F90 | 6 -- cuda/impl/psb_d_cuda_mv_hybg_from_coo.F90 | 6 -- cuda/impl/psb_d_cuda_mv_hybg_from_fmt.F90 | 6 -- cuda/impl/psb_s_cuda_cp_csrg_from_coo.F90 | 6 -- cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 | 6 -- cuda/impl/psb_s_cuda_cp_diag_from_coo.F90 | 6 -- cuda/impl/psb_s_cuda_cp_elg_from_coo.F90 | 23 ------ cuda/impl/psb_s_cuda_cp_elg_from_fmt.F90 | 12 --- cuda/impl/psb_s_cuda_cp_hdiag_from_coo.F90 | 10 --- cuda/impl/psb_s_cuda_cp_hlg_from_coo.F90 | 8 -- cuda/impl/psb_s_cuda_cp_hlg_from_fmt.F90 | 6 -- cuda/impl/psb_s_cuda_cp_hybg_from_coo.F90 | 6 -- cuda/impl/psb_s_cuda_cp_hybg_from_fmt.F90 | 6 -- cuda/impl/psb_s_cuda_csrg_allocate_mnnz.F90 | 6 -- cuda/impl/psb_s_cuda_csrg_csmm.F90 | 8 -- cuda/impl/psb_s_cuda_csrg_csmv.F90 | 8 -- cuda/impl/psb_s_cuda_csrg_from_gpu.F90 | 6 -- cuda/impl/psb_s_cuda_csrg_inner_vect_sv.F90 | 11 --- cuda/impl/psb_s_cuda_csrg_reallocate_nz.F90 | 6 -- cuda/impl/psb_s_cuda_csrg_scal.F90 | 6 -- cuda/impl/psb_s_cuda_csrg_scals.F90 | 6 -- cuda/impl/psb_s_cuda_csrg_to_gpu.F90 | 6 -- cuda/impl/psb_s_cuda_csrg_vect_mv.F90 | 8 -- cuda/impl/psb_s_cuda_diag_csmv.F90 | 9 --- cuda/impl/psb_s_cuda_diag_to_gpu.F90 | 8 -- cuda/impl/psb_s_cuda_diag_vect_mv.F90 | 10 --- cuda/impl/psb_s_cuda_dnsg_mat_impl.F90 | 35 +-------- cuda/impl/psb_s_cuda_elg_allocate_mnnz.F90 | 14 ---- cuda/impl/psb_s_cuda_elg_asb.f90 | 1 - cuda/impl/psb_s_cuda_elg_csmm.F90 | 10 --- cuda/impl/psb_s_cuda_elg_csmv.F90 | 9 --- cuda/impl/psb_s_cuda_elg_csput.F90 | 9 --- cuda/impl/psb_s_cuda_elg_from_gpu.F90 | 9 +-- cuda/impl/psb_s_cuda_elg_inner_vect_sv.F90 | 7 +- cuda/impl/psb_s_cuda_elg_mold.F90 | 2 - cuda/impl/psb_s_cuda_elg_reallocate_nz.F90 | 7 -- cuda/impl/psb_s_cuda_elg_scal.F90 | 7 -- cuda/impl/psb_s_cuda_elg_scals.F90 | 7 -- cuda/impl/psb_s_cuda_elg_to_gpu.F90 | 9 --- cuda/impl/psb_s_cuda_elg_trim.f90 | 1 - cuda/impl/psb_s_cuda_elg_vect_mv.F90 | 10 --- cuda/impl/psb_s_cuda_hdiag_csmv.F90 | 10 --- cuda/impl/psb_s_cuda_hdiag_mold.F90 | 1 - cuda/impl/psb_s_cuda_hdiag_to_gpu.F90 | 10 --- cuda/impl/psb_s_cuda_hdiag_vect_mv.F90 | 9 --- cuda/impl/psb_s_cuda_hlg_allocate_mnnz.F90 | 9 --- cuda/impl/psb_s_cuda_hlg_csmm.F90 | 10 --- cuda/impl/psb_s_cuda_hlg_csmv.F90 | 9 --- cuda/impl/psb_s_cuda_hlg_from_gpu.F90 | 7 -- cuda/impl/psb_s_cuda_hlg_inner_vect_sv.F90 | 7 -- cuda/impl/psb_s_cuda_hlg_mold.F90 | 1 - cuda/impl/psb_s_cuda_hlg_reallocate_nz.F90 | 7 -- cuda/impl/psb_s_cuda_hlg_scal.F90 | 7 -- cuda/impl/psb_s_cuda_hlg_scals.F90 | 7 -- cuda/impl/psb_s_cuda_hlg_to_gpu.F90 | 7 -- cuda/impl/psb_s_cuda_hlg_vect_mv.F90 | 10 --- cuda/impl/psb_s_cuda_hybg_allocate_mnnz.F90 | 6 -- cuda/impl/psb_s_cuda_hybg_csmm.F90 | 9 --- cuda/impl/psb_s_cuda_hybg_csmv.F90 | 8 -- cuda/impl/psb_s_cuda_hybg_inner_vect_sv.F90 | 11 --- cuda/impl/psb_s_cuda_hybg_reallocate_nz.F90 | 6 -- cuda/impl/psb_s_cuda_hybg_scal.F90 | 6 -- cuda/impl/psb_s_cuda_hybg_scals.F90 | 6 -- cuda/impl/psb_s_cuda_hybg_to_gpu.F90 | 6 -- cuda/impl/psb_s_cuda_hybg_vect_mv.F90 | 9 --- cuda/impl/psb_s_cuda_mv_csrg_from_coo.F90 | 7 -- cuda/impl/psb_s_cuda_mv_csrg_from_fmt.F90 | 7 -- cuda/impl/psb_s_cuda_mv_diag_from_coo.F90 | 5 -- cuda/impl/psb_s_cuda_mv_elg_from_coo.F90 | 6 -- cuda/impl/psb_s_cuda_mv_elg_from_fmt.F90 | 15 +--- cuda/impl/psb_s_cuda_mv_hdiag_from_coo.F90 | 10 --- cuda/impl/psb_s_cuda_mv_hlg_from_coo.F90 | 6 -- cuda/impl/psb_s_cuda_mv_hlg_from_fmt.F90 | 6 -- cuda/impl/psb_s_cuda_mv_hybg_from_coo.F90 | 6 -- cuda/impl/psb_s_cuda_mv_hybg_from_fmt.F90 | 6 -- cuda/impl/psb_z_cuda_cp_csrg_from_coo.F90 | 6 -- cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 | 6 -- cuda/impl/psb_z_cuda_cp_diag_from_coo.F90 | 6 -- cuda/impl/psb_z_cuda_cp_elg_from_coo.F90 | 23 ------ cuda/impl/psb_z_cuda_cp_elg_from_fmt.F90 | 12 --- cuda/impl/psb_z_cuda_cp_hdiag_from_coo.F90 | 10 --- cuda/impl/psb_z_cuda_cp_hlg_from_coo.F90 | 8 -- cuda/impl/psb_z_cuda_cp_hlg_from_fmt.F90 | 6 -- cuda/impl/psb_z_cuda_cp_hybg_from_coo.F90 | 6 -- cuda/impl/psb_z_cuda_cp_hybg_from_fmt.F90 | 6 -- cuda/impl/psb_z_cuda_csrg_allocate_mnnz.F90 | 6 -- cuda/impl/psb_z_cuda_csrg_csmm.F90 | 8 -- cuda/impl/psb_z_cuda_csrg_csmv.F90 | 8 -- cuda/impl/psb_z_cuda_csrg_from_gpu.F90 | 6 -- cuda/impl/psb_z_cuda_csrg_inner_vect_sv.F90 | 11 --- cuda/impl/psb_z_cuda_csrg_reallocate_nz.F90 | 6 -- cuda/impl/psb_z_cuda_csrg_scal.F90 | 6 -- cuda/impl/psb_z_cuda_csrg_scals.F90 | 6 -- cuda/impl/psb_z_cuda_csrg_to_gpu.F90 | 6 -- cuda/impl/psb_z_cuda_csrg_vect_mv.F90 | 8 -- cuda/impl/psb_z_cuda_diag_csmv.F90 | 9 --- cuda/impl/psb_z_cuda_diag_to_gpu.F90 | 8 -- cuda/impl/psb_z_cuda_diag_vect_mv.F90 | 10 --- cuda/impl/psb_z_cuda_dnsg_mat_impl.F90 | 35 +-------- cuda/impl/psb_z_cuda_elg_allocate_mnnz.F90 | 14 ---- cuda/impl/psb_z_cuda_elg_asb.f90 | 1 - cuda/impl/psb_z_cuda_elg_csmm.F90 | 10 --- cuda/impl/psb_z_cuda_elg_csmv.F90 | 9 --- cuda/impl/psb_z_cuda_elg_csput.F90 | 9 --- cuda/impl/psb_z_cuda_elg_from_gpu.F90 | 9 +-- cuda/impl/psb_z_cuda_elg_inner_vect_sv.F90 | 7 +- cuda/impl/psb_z_cuda_elg_mold.F90 | 2 - cuda/impl/psb_z_cuda_elg_reallocate_nz.F90 | 7 -- cuda/impl/psb_z_cuda_elg_scal.F90 | 7 -- cuda/impl/psb_z_cuda_elg_scals.F90 | 7 -- cuda/impl/psb_z_cuda_elg_to_gpu.F90 | 9 --- cuda/impl/psb_z_cuda_elg_trim.f90 | 1 - cuda/impl/psb_z_cuda_elg_vect_mv.F90 | 10 --- cuda/impl/psb_z_cuda_hdiag_csmv.F90 | 10 --- cuda/impl/psb_z_cuda_hdiag_mold.F90 | 1 - cuda/impl/psb_z_cuda_hdiag_to_gpu.F90 | 10 --- cuda/impl/psb_z_cuda_hdiag_vect_mv.F90 | 9 --- cuda/impl/psb_z_cuda_hlg_allocate_mnnz.F90 | 9 --- cuda/impl/psb_z_cuda_hlg_csmm.F90 | 10 --- cuda/impl/psb_z_cuda_hlg_csmv.F90 | 9 --- cuda/impl/psb_z_cuda_hlg_from_gpu.F90 | 7 -- cuda/impl/psb_z_cuda_hlg_inner_vect_sv.F90 | 7 -- cuda/impl/psb_z_cuda_hlg_mold.F90 | 1 - cuda/impl/psb_z_cuda_hlg_reallocate_nz.F90 | 7 -- cuda/impl/psb_z_cuda_hlg_scal.F90 | 7 -- cuda/impl/psb_z_cuda_hlg_scals.F90 | 7 -- cuda/impl/psb_z_cuda_hlg_to_gpu.F90 | 7 -- cuda/impl/psb_z_cuda_hlg_vect_mv.F90 | 10 --- cuda/impl/psb_z_cuda_hybg_allocate_mnnz.F90 | 6 -- cuda/impl/psb_z_cuda_hybg_csmm.F90 | 9 --- cuda/impl/psb_z_cuda_hybg_csmv.F90 | 8 -- cuda/impl/psb_z_cuda_hybg_inner_vect_sv.F90 | 11 --- cuda/impl/psb_z_cuda_hybg_reallocate_nz.F90 | 6 -- cuda/impl/psb_z_cuda_hybg_scal.F90 | 6 -- cuda/impl/psb_z_cuda_hybg_scals.F90 | 6 -- cuda/impl/psb_z_cuda_hybg_to_gpu.F90 | 6 -- cuda/impl/psb_z_cuda_hybg_vect_mv.F90 | 9 --- cuda/impl/psb_z_cuda_mv_csrg_from_coo.F90 | 7 -- cuda/impl/psb_z_cuda_mv_csrg_from_fmt.F90 | 7 -- cuda/impl/psb_z_cuda_mv_diag_from_coo.F90 | 5 -- cuda/impl/psb_z_cuda_mv_elg_from_coo.F90 | 6 -- cuda/impl/psb_z_cuda_mv_elg_from_fmt.F90 | 15 +--- cuda/impl/psb_z_cuda_mv_hdiag_from_coo.F90 | 10 --- cuda/impl/psb_z_cuda_mv_hlg_from_coo.F90 | 6 -- cuda/impl/psb_z_cuda_mv_hlg_from_fmt.F90 | 6 -- cuda/impl/psb_z_cuda_mv_hybg_from_coo.F90 | 6 -- cuda/impl/psb_z_cuda_mv_hybg_from_fmt.F90 | 6 -- cuda/ivectordev.c | 4 - cuda/ivectordev.h | 3 - cuda/psb_base_vectordev_mod.F90 | 7 -- cuda/psb_c_cuda_csrg_mat_mod.F90 | 18 ----- cuda/psb_c_cuda_diag_mat_mod.F90 | 19 ----- cuda/psb_c_cuda_dnsg_mat_mod.F90 | 19 ----- cuda/psb_c_cuda_elg_mat_mod.F90 | 27 ------- cuda/psb_c_cuda_hdiag_mat_mod.F90 | 19 ----- cuda/psb_c_cuda_hlg_mat_mod.F90 | 19 ----- cuda/psb_c_cuda_hybg_mat_mod.F90 | 19 ----- cuda/psb_c_cuda_vect_mod.F90 | 20 +---- cuda/psb_c_vectordev_mod.F90 | 4 - cuda/psb_cuda_env_mod.F90 | 10 --- cuda/psb_d_cuda_csrg_mat_mod.F90 | 18 ----- cuda/psb_d_cuda_diag_mat_mod.F90 | 19 ----- cuda/psb_d_cuda_dnsg_mat_mod.F90 | 19 ----- cuda/psb_d_cuda_elg_mat_mod.F90 | 27 ------- cuda/psb_d_cuda_hdiag_mat_mod.F90 | 19 ----- cuda/psb_d_cuda_hlg_mat_mod.F90 | 19 ----- cuda/psb_d_cuda_hybg_mat_mod.F90 | 19 ----- cuda/psb_d_cuda_vect_mod.F90 | 20 +---- cuda/psb_d_vectordev_mod.F90 | 4 - cuda/psb_i_cuda_csrg_mat_mod.F90 | 18 ----- cuda/psb_i_cuda_diag_mat_mod.F90 | 19 ----- cuda/psb_i_cuda_dnsg_mat_mod.F90 | 19 ----- cuda/psb_i_cuda_elg_mat_mod.F90 | 27 ------- cuda/psb_i_cuda_hdiag_mat_mod.F90 | 19 ----- cuda/psb_i_cuda_hlg_mat_mod.F90 | 19 ----- cuda/psb_i_cuda_hybg_mat_mod.F90 | 19 ----- cuda/psb_i_cuda_vect_mod.F90 | 18 +---- cuda/psb_i_vectordev_mod.F90 | 4 - cuda/psb_s_cuda_csrg_mat_mod.F90 | 18 ----- cuda/psb_s_cuda_diag_mat_mod.F90 | 19 ----- cuda/psb_s_cuda_dnsg_mat_mod.F90 | 19 ----- cuda/psb_s_cuda_elg_mat_mod.F90 | 27 ------- cuda/psb_s_cuda_hdiag_mat_mod.F90 | 19 ----- cuda/psb_s_cuda_hlg_mat_mod.F90 | 19 ----- cuda/psb_s_cuda_hybg_mat_mod.F90 | 19 ----- cuda/psb_s_cuda_vect_mod.F90 | 20 +---- cuda/psb_s_vectordev_mod.F90 | 4 - cuda/psb_z_cuda_csrg_mat_mod.F90 | 18 ----- cuda/psb_z_cuda_diag_mat_mod.F90 | 19 ----- cuda/psb_z_cuda_dnsg_mat_mod.F90 | 19 ----- cuda/psb_z_cuda_elg_mat_mod.F90 | 27 ------- cuda/psb_z_cuda_hdiag_mat_mod.F90 | 19 ----- cuda/psb_z_cuda_hlg_mat_mod.F90 | 19 ----- cuda/psb_z_cuda_hybg_mat_mod.F90 | 19 ----- cuda/psb_z_cuda_vect_mod.F90 | 20 +---- cuda/psb_z_vectordev_mod.F90 | 4 - cuda/s_cusparse_mod.F90 | 5 -- cuda/scusparse.c | 2 - cuda/svectordev.c | 3 - cuda/svectordev.h | 2 - cuda/vectordev.c | 3 - cuda/vectordev.h | 3 - cuda/z_cusparse_mod.F90 | 4 - cuda/zcusparse.c | 2 - cuda/zvectordev.c | 3 - cuda/zvectordev.h | 3 - 376 files changed, 26 insertions(+), 3547 deletions(-) diff --git a/cuda/base_cusparse_mod.F90 b/cuda/base_cusparse_mod.F90 index 9f5628be..94a8255f 100644 --- a/cuda/base_cusparse_mod.F90 +++ b/cuda/base_cusparse_mod.F90 @@ -79,9 +79,6 @@ module base_cusparse_mod enumerator cusparse_direction_column end enum - -#if defined(HAVE_CUDA) && defined(HAVE_SPGPU) - interface function FcusparseCreate() & & bind(c,name="FcusparseCreate") result(res) @@ -113,5 +110,4 @@ contains res = FcusparseDestroy() end function closeFcusparse -#endif end module base_cusparse_mod diff --git a/cuda/c_cusparse_mod.F90 b/cuda/c_cusparse_mod.F90 index e7d37173..59f37732 100644 --- a/cuda/c_cusparse_mod.F90 +++ b/cuda/c_cusparse_mod.F90 @@ -43,9 +43,6 @@ module c_cusparse_mod end type c_Hmat #endif - -#if defined(HAVE_CUDA) && defined(HAVE_SPGPU) - interface CSRGDeviceFree function c_CSRGDeviceFree(Mat) & & bind(c,name="c_CSRGDeviceFree") result(res) @@ -300,6 +297,4 @@ module c_cusparse_mod end interface #endif -#endif - end module c_cusparse_mod diff --git a/cuda/ccusparse.c b/cuda/ccusparse.c index 6f1cfdb3..c5430306 100644 --- a/cuda/ccusparse.c +++ b/cuda/ccusparse.c @@ -33,7 +33,6 @@ #include #include -#ifdef HAVE_SPGPU #include #include #include "cintrf.h" @@ -94,4 +93,3 @@ #include "fcusparse_fct.h" -#endif diff --git a/cuda/cintrf.h b/cuda/cintrf.h index 1a3528aa..3a1f6476 100644 --- a/cuda/cintrf.h +++ b/cuda/cintrf.h @@ -36,7 +36,6 @@ #include #include -#if defined(HAVE_SPGPU) && defined(HAVE_CUDA) #include "core.h" #include "cuda_util.h" #include "vector.h" @@ -45,7 +44,4 @@ #define ELL_PITCH_ALIGN_S 32 #define ELL_PITCH_ALIGN_D 16 - -#endif - #endif diff --git a/cuda/cuda_util.c b/cuda/cuda_util.c index 0fe4a8b7..09265410 100644 --- a/cuda/cuda_util.c +++ b/cuda/cuda_util.c @@ -32,8 +32,6 @@ #include "cuda_util.h" -#if defined(HAVE_CUDA) - static int hasUVA=-1; static struct cudaDeviceProp *prop=NULL; @@ -440,10 +438,6 @@ void psb_cudaDestroyCublasHandle() psb_cublas_handle=NULL; } - - - - /* Simple memory tools */ int allocateInt(void **d_int, int n) @@ -803,6 +797,3 @@ double etime() } - - -#endif diff --git a/cuda/cuda_util.h b/cuda/cuda_util.h index 789c08f4..95c8d1dc 100644 --- a/cuda/cuda_util.h +++ b/cuda/cuda_util.h @@ -38,7 +38,6 @@ #include #include -#if defined(HAVE_CUDA) #include "cuda_runtime.h" #include "core.h" #include "cuComplex.h" @@ -134,6 +133,5 @@ void freeDoubleComplex(void *); double etime(); -#endif #endif diff --git a/cuda/cvectordev.c b/cuda/cvectordev.c index 1dc16667..518154d5 100644 --- a/cuda/cvectordev.c +++ b/cuda/cvectordev.c @@ -32,7 +32,6 @@ #include #include -#if defined(HAVE_SPGPU) //#include "utils.h" //#include "common.h" #include "cvectordev.h" @@ -321,5 +320,4 @@ int absMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA) return(i); } -#endif diff --git a/cuda/cvectordev.h b/cuda/cvectordev.h index f58fcca7..27c8984a 100644 --- a/cuda/cvectordev.h +++ b/cuda/cvectordev.h @@ -31,7 +31,6 @@ #pragma once -#if defined(HAVE_SPGPU) //#include "utils.h" #include #include "cuComplex.h" @@ -77,5 +76,3 @@ int absMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA) int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha, void *deviceVecA, void *deviceVecB); - -#endif diff --git a/cuda/d_cusparse_mod.F90 b/cuda/d_cusparse_mod.F90 index cd8bd52f..509253e6 100644 --- a/cuda/d_cusparse_mod.F90 +++ b/cuda/d_cusparse_mod.F90 @@ -43,9 +43,6 @@ module d_cusparse_mod end type d_Hmat #endif - -#if defined(HAVE_CUDA) && defined(HAVE_SPGPU) - interface CSRGDeviceFree function d_CSRGDeviceFree(Mat) & & bind(c,name="d_CSRGDeviceFree") result(res) @@ -298,8 +295,7 @@ module d_cusparse_mod integer(c_int) :: res end function d_HYBGHost2Device end interface -#endif - + #endif end module d_cusparse_mod diff --git a/cuda/dcusparse.c b/cuda/dcusparse.c index 9659c1f9..f14e787c 100644 --- a/cuda/dcusparse.c +++ b/cuda/dcusparse.c @@ -33,7 +33,6 @@ #include #include -#ifdef HAVE_SPGPU #include #include #include "cintrf.h" @@ -92,4 +91,3 @@ #include "fcusparse_fct.h" -#endif diff --git a/cuda/diagdev.c b/cuda/diagdev.c index a2acf1f4..0cf78a41 100644 --- a/cuda/diagdev.c +++ b/cuda/diagdev.c @@ -34,7 +34,6 @@ #include #include #include -#if defined(HAVE_SPGPU) //new DiagDeviceParams getDiagDeviceParams(unsigned int rows, unsigned int columns, unsigned int diags, unsigned int elementType) { @@ -111,7 +110,6 @@ void freeDiagDevice(void* remoteMatrix) //new int FallocDiagDevice(void** deviceMat, unsigned int rows, unsigned int columns,unsigned int diags,unsigned int elementType) { int i; -#ifdef HAVE_SPGPU DiagDeviceParams p; p = getDiagDeviceParams(rows, columns, diags,elementType); @@ -120,15 +118,11 @@ int FallocDiagDevice(void** deviceMat, unsigned int rows, unsigned int columns,u fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i); } return(i); -#else - return SPGPU_UNSUPPORTED; -#endif } int writeDiagDeviceDouble(void* deviceMat, double* a, int* off, int n) { int i,fo,fa; char buf_a[255], buf_o[255],tmp[255]; -#ifdef HAVE_SPGPU struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; // Ex updateFromHost function /* memset(buf_a,'\0',255); */ @@ -159,14 +153,10 @@ int writeDiagDeviceDouble(void* deviceMat, double* a, int* off, int n) return SPGPU_SUCCESS; else return SPGPU_UNSUPPORTED; -#else - return SPGPU_UNSUPPORTED; -#endif } int readDiagDeviceDouble(void* deviceMat, double* a, int* off) { int i; -#ifdef HAVE_SPGPU struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(double)); i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); @@ -174,9 +164,6 @@ int readDiagDeviceDouble(void* deviceMat, double* a, int* off) fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } //new @@ -188,7 +175,6 @@ int spmvDiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; spgpuHandle_t handle=psb_cudaGetHandle(); -#ifdef HAVE_SPGPU #ifdef VERBOSE /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ @@ -201,16 +187,12 @@ int spmvDiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, //cudaSync(); return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeDiagDeviceFloat(void* deviceMat, float* a, int* off, int n) { int i,fo,fa; char buf_a[255], buf_o[255],tmp[255]; -#ifdef HAVE_SPGPU struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; // Ex updateFromHost function /* memset(buf_a,'\0',255); */ @@ -241,14 +223,10 @@ int writeDiagDeviceFloat(void* deviceMat, float* a, int* off, int n) return SPGPU_SUCCESS; else return SPGPU_UNSUPPORTED; -#else - return SPGPU_UNSUPPORTED; -#endif } int readDiagDeviceFloat(void* deviceMat, float* a, int* off) { int i; -#ifdef HAVE_SPGPU struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(float)); i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); @@ -256,9 +234,6 @@ int readDiagDeviceFloat(void* deviceMat, float* a, int* off) fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } //new @@ -270,7 +245,6 @@ int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; spgpuHandle_t handle=psb_cudaGetHandle(); -#ifdef HAVE_SPGPU #ifdef VERBOSE /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ @@ -283,9 +257,5 @@ int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, //cudaSync(); return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } -#endif diff --git a/cuda/diagdev.h b/cuda/diagdev.h index 83f38289..2efbea92 100644 --- a/cuda/diagdev.h +++ b/cuda/diagdev.h @@ -32,7 +32,6 @@ #ifndef _DIAGDEV_H_ #define _DIAGDEV_H_ -#ifdef HAVE_SPGPU #include "cintrf.h" #include "dia.h" @@ -88,8 +87,4 @@ int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, -#else -#define CINTRF_UNSUPPORTED -1 -#endif - #endif diff --git a/cuda/diagdev_mod.F90 b/cuda/diagdev_mod.F90 index cbcc029e..70d58d4e 100644 --- a/cuda/diagdev_mod.F90 +++ b/cuda/diagdev_mod.F90 @@ -41,8 +41,6 @@ module diagdev_mod integer(c_int) :: firstIndex end type diagdev_parms -#ifdef HAVE_SPGPU - interface function FgetDiagDeviceParams(rows, columns, elementType, firstIndex) & & result(res) bind(c,name='getDiagDeviceParams') @@ -65,7 +63,6 @@ module diagdev_mod end function FallocDiagDevice end interface - interface writeDiagDevice function writeDiagDeviceFloat(deviceMat,a,off,n) & @@ -174,7 +171,6 @@ module diagdev_mod end function getDiagTimer end interface - interface function getDiagDevicePitch(deviceMat) & & bind(c,name='getDiagDevicePitch') result(res) @@ -225,7 +221,4 @@ module diagdev_mod end function spmvDiagDeviceDoubleComplex end interface spmvDiagDevice -#endif - - end module diagdev_mod diff --git a/cuda/dnsdev.c b/cuda/dnsdev.c index 25cddc87..0a991012 100644 --- a/cuda/dnsdev.c +++ b/cuda/dnsdev.c @@ -31,8 +31,6 @@ #include #include "dnsdev.h" -#if defined(HAVE_SPGPU) - #define PASS_RS 0 #define IMIN(a,b) ((a)<(b) ? (a) : (b)) @@ -102,7 +100,6 @@ int FallocDnsDevice(void** deviceMat, unsigned int rows, unsigned int columns, unsigned int elementType, unsigned int firstIndex) { int i; -#ifdef HAVE_SPGPU DnsDeviceParams p; p = getDnsDeviceParams(rows, columns, elementType, firstIndex); @@ -111,9 +108,6 @@ int FallocDnsDevice(void** deviceMat, unsigned int rows, fprintf(stderr,"From routine : %s : %d \n","FallocDnsDevice",i); } return(i); -#else - return SPGPU_UNSUPPORTED; -#endif } @@ -124,7 +118,6 @@ int spmvDnsDeviceFloat(char transa, int m, int n, int k, float *alpha, struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; int status; -#ifdef HAVE_SPGPU cublasHandle_t handle=psb_cudaGetCublasHandle(); cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); @@ -143,9 +136,6 @@ int spmvDnsDeviceFloat(char transa, int m, int n, int k, float *alpha, return SPGPU_SUCCESS; else return SPGPU_UNSUPPORTED; -#else - return SPGPU_UNSUPPORTED; -#endif } int spmvDnsDeviceDouble(char transa, int m, int n, int k, double *alpha, @@ -155,7 +145,6 @@ int spmvDnsDeviceDouble(char transa, int m, int n, int k, double *alpha, struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; int status; -#ifdef HAVE_SPGPU cublasHandle_t handle=psb_cudaGetCublasHandle(); cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); @@ -174,9 +163,6 @@ int spmvDnsDeviceDouble(char transa, int m, int n, int k, double *alpha, return SPGPU_SUCCESS; else return SPGPU_UNSUPPORTED; -#else - return SPGPU_UNSUPPORTED; -#endif } int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *alpha, @@ -186,7 +172,6 @@ int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *a struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; int status; -#ifdef HAVE_SPGPU cublasHandle_t handle=psb_cudaGetCublasHandle(); cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); @@ -205,9 +190,6 @@ int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *a return SPGPU_SUCCESS; else return SPGPU_UNSUPPORTED; -#else - return SPGPU_UNSUPPORTED; -#endif } int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex *alpha, @@ -217,7 +199,6 @@ int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; int status; -#ifdef HAVE_SPGPU cublasHandle_t handle=psb_cudaGetCublasHandle(); cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); @@ -236,15 +217,11 @@ int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex return SPGPU_SUCCESS; else return SPGPU_UNSUPPORTED; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc) { int i; -#ifdef HAVE_SPGPU struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; int pitch=devMat->pitch; i = cublasSetMatrix(lda,nc,sizeof(float), (void*) val,lda, (void *)devMat->cM, pitch); @@ -252,14 +229,10 @@ int writeDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc) fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceFloat",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc) { int i; -#ifdef HAVE_SPGPU struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; int pitch=devMat->pitch; i = cublasSetMatrix(lda,nc,sizeof(double), (void*) val,lda, (void *)devMat->cM, pitch); @@ -267,15 +240,11 @@ int writeDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc) fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceDouble",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc) { int i; -#ifdef HAVE_SPGPU struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; int pitch=devMat->pitch; i = cublasSetMatrix(lda,nc,sizeof(cuFloatComplex), (void*) val,lda, (void *)devMat->cM, pitch); @@ -283,14 +252,10 @@ int writeDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceFloatComplex",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc) { int i; -#ifdef HAVE_SPGPU struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; int pitch=devMat->pitch; i = cublasSetMatrix(lda,nc,sizeof(cuDoubleComplex), (void*) val,lda, (void *)devMat->cM, pitch); @@ -298,15 +263,11 @@ int writeDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, i fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceDoubleComplex",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int readDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc) { int i; -#ifdef HAVE_SPGPU struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; int pitch=devMat->pitch; i = cublasGetMatrix(lda,nc,sizeof(float), (void*) val,lda, (void *)devMat->cM, pitch); @@ -314,14 +275,10 @@ int readDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc) fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceFloat",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int readDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc) { int i; -#ifdef HAVE_SPGPU struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; int pitch=devMat->pitch; i = cublasGetMatrix(lda,nc,sizeof(double), (void*) val,lda, (void *)devMat->cM, pitch); @@ -329,15 +286,11 @@ int readDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc) fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceDouble",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int readDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc) { int i; -#ifdef HAVE_SPGPU struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; int pitch=devMat->pitch; i = cublasGetMatrix(lda,nc,sizeof(cuFloatComplex), (void*) val,lda, (void *)devMat->cM, pitch); @@ -345,14 +298,10 @@ int readDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceFloatComplex",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int readDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc) { int i; -#ifdef HAVE_SPGPU struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; int pitch=devMat->pitch; i = cublasGetMatrix(lda,nc,sizeof(cuDoubleComplex), (void*) val,lda, (void *)devMat->cM, pitch); @@ -360,24 +309,13 @@ int readDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, in fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceDoubleComplex",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int getDnsDevicePitch(void* deviceMat) { int i; struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; -#ifdef HAVE_SPGPU i = devMat->pitch; return(i); -#else - return SPGPU_UNSUPPORTED; -#endif } - - -#endif - diff --git a/cuda/dnsdev.h b/cuda/dnsdev.h index 7c8b06c9..1c335bf9 100644 --- a/cuda/dnsdev.h +++ b/cuda/dnsdev.h @@ -33,7 +33,6 @@ #ifndef _DNSDEV_H_ #define _DNSDEV_H_ -#if defined(HAVE_SPGPU) #include "cintrf.h" #include "cuComplex.h" #include "cublas_v2.h" @@ -115,8 +114,4 @@ int getDnsDevicePitch(void* deviceMat); //int spmvDnsDeviceFloat(void *deviceMat, float* alpha, void* deviceX, float* beta, void* deviceY); //int spmvDnsDeviceDouble(void *deviceMat, double* alpha, void* deviceX, double* beta, void* deviceY); -#else -#define CINTRF_UNSUPPORTED -1 -#endif - #endif diff --git a/cuda/dnsdev_mod.F90 b/cuda/dnsdev_mod.F90 index 8b96b918..fd257e0e 100644 --- a/cuda/dnsdev_mod.F90 +++ b/cuda/dnsdev_mod.F90 @@ -44,8 +44,6 @@ module dnsdev_mod integer(c_int) :: firstIndex end type dnsdev_parms -#ifdef HAVE_SPGPU - interface function FgetDnsDeviceParams(rows, columns, elementType, firstIndex) & & result(res) bind(c,name='getDnsDeviceParams') @@ -269,7 +267,4 @@ module dnsdev_mod end interface -#endif - - end module dnsdev_mod diff --git a/cuda/dvectordev.c b/cuda/dvectordev.c index eae82c1e..39aa5b2a 100644 --- a/cuda/dvectordev.c +++ b/cuda/dvectordev.c @@ -32,7 +32,6 @@ #include #include -#if defined(HAVE_SPGPU) //#include "utils.h" //#include "common.h" #include "dvectordev.h" @@ -300,6 +299,3 @@ int absMultiVecDeviceDouble(int n, double alpha, void *deviceVecA) return(i); } - -#endif - diff --git a/cuda/dvectordev.h b/cuda/dvectordev.h index 960958c5..25905c60 100644 --- a/cuda/dvectordev.h +++ b/cuda/dvectordev.h @@ -31,7 +31,6 @@ #pragma once -#if defined(HAVE_SPGPU) //#include "utils.h" #include "vectordev.h" #include "cuda_runtime.h" @@ -74,5 +73,3 @@ int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, int absMultiVecDeviceDouble(int n, double alpha, void *deviceVecA); int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA, void *deviceVecB); - -#endif diff --git a/cuda/elldev.c b/cuda/elldev.c index eff89efa..3b79a863 100644 --- a/cuda/elldev.c +++ b/cuda/elldev.c @@ -31,8 +31,6 @@ #include #include "elldev.h" -#if defined(HAVE_SPGPU) - #define PASS_RS 0 EllDeviceParams getEllDeviceParams(unsigned int rows, unsigned int maxRowSize, @@ -140,7 +138,6 @@ int FallocEllDevice(void** deviceMat,unsigned int rows, unsigned int maxRowSize, unsigned int columns, unsigned int elementType, unsigned int firstIndex) { int i; -#ifdef HAVE_SPGPU EllDeviceParams p; p = getEllDeviceParams(rows, maxRowSize, nnzeros, columns, elementType, firstIndex); @@ -149,9 +146,6 @@ int FallocEllDevice(void** deviceMat,unsigned int rows, unsigned int maxRowSize, fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i); } return(i); -#else - return SPGPU_UNSUPPORTED; -#endif } void sspmdmm_gpu(float *z,int s, int vPitch, float *y, float alpha, float* cM, int* rP, int* rS, @@ -182,7 +176,6 @@ int spmvEllDeviceFloat(void *deviceMat, float alpha, void* deviceX, struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; -#ifdef HAVE_SPGPU #ifdef VERBOSE __assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors"); __assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)"); @@ -196,9 +189,6 @@ int spmvEllDeviceFloat(void *deviceMat, float alpha, void* deviceX, devMat->avgRowSize, devMat->maxRowSize, devMat->rows, devMat->pitch, (float *)x->v_, beta, devMat->baseIndex); return(i); -#else - return SPGPU_UNSUPPORTED; -#endif } @@ -234,7 +224,6 @@ int spmvEllDeviceDouble(void *deviceMat, double alpha, void* deviceX, struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; -#ifdef HAVE_SPGPU /*spgpuDellspmv (handle, (double*) y->v_, (double*)y->v_, alpha, (double*) devMat->cM, devMat->rP, devMat->cMPitch, devMat->rPPitch, devMat->rS, devMat->rows, (double*)x->v_, beta, devMat->baseIndex);*/ /* fprintf(stderr,"From spmvEllDouble: mat %d %d %d %d y %d %d \n", */ /* devMat->avgRowSize, devMat->maxRowSize, devMat->rows, */ @@ -246,9 +235,6 @@ int spmvEllDeviceDouble(void *deviceMat, double alpha, void* deviceX, (double *)x->v_, beta, devMat->baseIndex); return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } void @@ -281,7 +267,6 @@ int spmvEllDeviceFloatComplex(void *deviceMat, float complex alpha, void* device struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; -#ifdef HAVE_SPGPU cuFloatComplex a = make_cuFloatComplex(crealf(alpha),cimagf(alpha)); cuFloatComplex b = make_cuFloatComplex(crealf(beta),cimagf(beta)); cspmdmm_gpu ((cuFloatComplex *)y->v_, y->count_, y->pitch_, (cuFloatComplex *)y->v_, a, (cuFloatComplex *)devMat->cM, @@ -289,9 +274,6 @@ int spmvEllDeviceFloatComplex(void *deviceMat, float complex alpha, void* device (cuFloatComplex *)x->v_, b, devMat->baseIndex); return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } void @@ -323,7 +305,6 @@ int spmvEllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* devi struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; -#ifdef HAVE_SPGPU cuDoubleComplex a = make_cuDoubleComplex(creal(alpha),cimag(alpha)); cuDoubleComplex b = make_cuDoubleComplex(creal(beta),cimag(beta)); zspmdmm_gpu ((cuDoubleComplex *)y->v_, y->count_, y->pitch_, (cuDoubleComplex *)y->v_, a, (cuDoubleComplex *)devMat->cM, @@ -331,14 +312,10 @@ int spmvEllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* devi devMat->pitch, (cuDoubleComplex *)x->v_, b, devMat->baseIndex); return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; // Ex updateFromHost function i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); @@ -350,14 +327,10 @@ int writeEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; // Ex updateFromHost function i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); @@ -370,14 +343,10 @@ int writeEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* ir fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; // Ex updateFromHost function i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex)); @@ -390,14 +359,10 @@ int writeEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; // Ex updateFromHost function i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex)); @@ -410,14 +375,10 @@ int writeEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, i fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int readEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); @@ -428,14 +389,10 @@ int readEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int readEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); @@ -445,14 +402,10 @@ int readEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int readEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex)); i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); @@ -462,14 +415,10 @@ int readEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int readEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex)); i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); @@ -479,32 +428,21 @@ int readEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, in fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int getEllDevicePitch(void* deviceMat) { int i; struct EllDevice *devMat = (struct EllDevice *) deviceMat; -#ifdef HAVE_SPGPU i = devMat->pitch; //old //i = getPitchEllDevice(deviceMat); return(i); -#else - return SPGPU_UNSUPPORTED; -#endif } int getEllDeviceMaxRowSize(void* deviceMat) { int i; struct EllDevice *devMat = (struct EllDevice *) deviceMat; -#ifdef HAVE_SPGPU i = devMat->maxRowSize; return(i); -#else - return SPGPU_UNSUPPORTED; -#endif } @@ -515,7 +453,6 @@ int getEllDeviceMaxRowSize(void* deviceMat) int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, int *idisp, int *ja, float *val, void *deviceMat) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; float *devVal; int *devIdisp, *devJa; @@ -548,9 +485,6 @@ int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm, fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } @@ -558,7 +492,6 @@ int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int psiCopyCooToElgDouble(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, int *idisp, int *ja, double *val, void *deviceMat) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; double *devVal; int *devIdisp, *devJa; @@ -591,16 +524,12 @@ int psiCopyCooToElgDouble(int nr, int nc, int nza, int hacksz, int ldv, int nzm, fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, int *idisp, int *ja, float complex *val, void *deviceMat) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; float complex *devVal; int *devIdisp, *devJa; @@ -633,9 +562,6 @@ int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, in fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloatComplex",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } @@ -643,7 +569,6 @@ int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, in int psiCopyCooToElgDoubleComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, int *idisp, int *ja, double complex *val, void *deviceMat) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; double complex *devVal; int *devIdisp, *devJa; @@ -676,15 +601,11 @@ int psiCopyCooToElgDoubleComplex(int nr, int nc, int nza, int hacksz, int ldv, i fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDoubleComplex",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int dev_csputEllDeviceFloat(void* deviceMat, int nnz, void *ia, void *ja, void *val) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; struct MultiVectDevice *devVal = (struct MultiVectDevice *) val; struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; @@ -699,13 +620,11 @@ int dev_csputEllDeviceFloat(void* deviceMat, int nnz, void *ia, void *ja, void * devMat->rP,devMat->pitch, devMat->pitch, devMat->rS, nnz, devIa->v_, devJa->v_, (float *) devVal->v_, 1); -#endif return SPGPU_SUCCESS; } int dev_csputEllDeviceDouble(void* deviceMat, int nnz, void *ia, void *ja, void *val) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; struct MultiVectDevice *devVal = (struct MultiVectDevice *) val; struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; @@ -720,7 +639,6 @@ int dev_csputEllDeviceDouble(void* deviceMat, int nnz, void *ia, void *ja, void devMat->rP,devMat->pitch, devMat->pitch, devMat->rS, nnz, devIa->v_, devJa->v_, (double *) devVal->v_, 1); -#endif return SPGPU_SUCCESS; } @@ -728,7 +646,6 @@ int dev_csputEllDeviceDouble(void* deviceMat, int nnz, void *ia, void *ja, void int dev_csputEllDeviceFloatComplex(void* deviceMat, int nnz, void *ia, void *ja, void *val) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; struct MultiVectDevice *devVal = (struct MultiVectDevice *) val; struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; @@ -743,14 +660,12 @@ int dev_csputEllDeviceFloatComplex(void* deviceMat, int nnz, devMat->rP,devMat->pitch, devMat->pitch, devMat->rS, nnz, devIa->v_, devJa->v_, (cuFloatComplex *) devVal->v_, 1); -#endif return SPGPU_SUCCESS; } int dev_csputEllDeviceDoubleComplex(void* deviceMat, int nnz, void *ia, void *ja, void *val) { int i; -#ifdef HAVE_SPGPU struct EllDevice *devMat = (struct EllDevice *) deviceMat; struct MultiVectDevice *devVal = (struct MultiVectDevice *) val; struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; @@ -765,9 +680,7 @@ int dev_csputEllDeviceDoubleComplex(void* deviceMat, int nnz, devMat->rP,devMat->pitch, devMat->pitch, devMat->rS, nnz, devIa->v_, devJa->v_, (cuDoubleComplex *) devVal->v_, 1); -#endif return SPGPU_SUCCESS; } -#endif diff --git a/cuda/elldev.h b/cuda/elldev.h index 6a0814e2..c1001439 100644 --- a/cuda/elldev.h +++ b/cuda/elldev.h @@ -33,12 +33,10 @@ #ifndef _ELLDEV_H_ #define _ELLDEV_H_ -#if defined(HAVE_SPGPU) #include "cintrf.h" #include "cuComplex.h" #include "ell.h" - struct EllDevice { // Compressed matrix @@ -176,8 +174,4 @@ int getEllDevicePitch(void* deviceMat); //int spmvEllDeviceFloat(void *deviceMat, float* alpha, void* deviceX, float* beta, void* deviceY); //int spmvEllDeviceDouble(void *deviceMat, double* alpha, void* deviceX, double* beta, void* deviceY); -#else -#define CINTRF_UNSUPPORTED -1 -#endif - #endif diff --git a/cuda/elldev_mod.F90 b/cuda/elldev_mod.F90 index 49656d19..40cf8e49 100644 --- a/cuda/elldev_mod.F90 +++ b/cuda/elldev_mod.F90 @@ -44,8 +44,6 @@ module elldev_mod integer(c_int) :: firstIndex end type elldev_parms -#ifdef HAVE_SPGPU - interface function FgetEllDeviceParams(rows, maxRowSize, nnzeros, columns, elementType, firstIndex) & & result(res) bind(c,name='getEllDeviceParams') @@ -320,7 +318,4 @@ module elldev_mod end function spmvEllDeviceDoubleComplex end interface -#endif - - end module elldev_mod diff --git a/cuda/fcusparse.c b/cuda/fcusparse.c index 5f0c12d9..c1b661ab 100644 --- a/cuda/fcusparse.c +++ b/cuda/fcusparse.c @@ -33,7 +33,6 @@ #include #include -#ifdef HAVE_SPGPU #include #include "cintrf.h" #include "fcusparse.h" @@ -72,6 +71,3 @@ cusparseHandle_t *getHandle() return(cusparse_handle); } - - -#endif diff --git a/cuda/fcusparse.h b/cuda/fcusparse.h index 2bab2aca..7d2972f8 100644 --- a/cuda/fcusparse.h +++ b/cuda/fcusparse.h @@ -33,7 +33,6 @@ #ifndef FCUSPARSE_ #define FCUSPARSE_ -#ifdef HAVE_SPGPU #include #if CUDA_SHORT_VERSION <= 10 #include @@ -67,4 +66,3 @@ cusparseHandle_t *getHandle(); } #endif -#endif diff --git a/cuda/hdiagdev.c b/cuda/hdiagdev.c index dd5a23cd..6302eed1 100644 --- a/cuda/hdiagdev.c +++ b/cuda/hdiagdev.c @@ -34,10 +34,7 @@ #include #include #include -#if defined(HAVE_SPGPU) #define DEBUG 0 - - void freeHdiagDevice(void* remoteMatrix) { struct HdiagDevice *devMat = (struct HdiagDevice *) remoteMatrix; @@ -138,7 +135,6 @@ int FallocHdiagDevice(void** deviceMat, unsigned int rows, unsigned int cols, unsigned int allocationHeight, unsigned int hackSize, unsigned int hackCount, unsigned int elementType) { int i=0; -#ifdef HAVE_SPGPU HdiagDeviceParams p; p = getHdiagDeviceParams(rows, cols, allocationHeight, hackSize, hackCount,elementType); @@ -152,17 +148,12 @@ int FallocHdiagDevice(void** deviceMat, unsigned int rows, unsigned int cols, fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i); } return(i); -#else - return SPGPU_UNSUPPORTED; -#endif - } int writeHdiagDeviceDouble(void* deviceMat, double* val, int* hdiaOffsets, int *hackOffsets) { int i=0,fo,fa,j,k,p; char buf_a[255], buf_o[255],tmp[255]; -#ifdef HAVE_SPGPU struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; i=SPGPU_SUCCESS; @@ -216,9 +207,6 @@ int writeHdiagDeviceDouble(void* deviceMat, double* val, int* hdiaOffsets, int * return SPGPU_SUCCESS; else return SPGPU_UNSUPPORTED; -#else - return SPGPU_UNSUPPORTED; -#endif } @@ -227,15 +215,12 @@ long long int sizeofHdiagDeviceDouble(void* deviceMat) { int i=0,fo,fa; int *hoff=NULL,*hackoff=NULL; long long int memsize=0; -#ifdef HAVE_SPGPU struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; memsize += (devMat->hackCount+1)*sizeof(int); memsize += devMat->allocationHeight*sizeof(int); memsize += devMat->allocationHeight*devMat->hackSize*sizeof(double); - -#endif return(memsize); } @@ -243,7 +228,6 @@ long long int sizeofHdiagDeviceDouble(void* deviceMat) int readHdiagDeviceDouble(void* deviceMat, double* a, int* off) { int i; -#ifdef HAVE_SPGPU struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; /* i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(double)); */ /* i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); */ @@ -253,9 +237,6 @@ int readHdiagDeviceDouble(void* deviceMat, double* a, int* off) fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int spmvHdiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, @@ -266,7 +247,6 @@ int spmvHdiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; spgpuHandle_t handle=psb_cudaGetHandle(); -#ifdef HAVE_SPGPU #ifdef VERBOSE /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ @@ -285,15 +265,11 @@ int spmvHdiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, //cudaSync(); return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeHdiagDeviceFloat(void* deviceMat, float* val, int* hdiaOffsets, int *hackOffsets) { int i=0,fo,fa,j,k,p; char buf_a[255], buf_o[255],tmp[255]; -#ifdef HAVE_SPGPU struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; i=SPGPU_SUCCESS; @@ -347,9 +323,6 @@ int writeHdiagDeviceFloat(void* deviceMat, float* val, int* hdiaOffsets, int *ha return SPGPU_SUCCESS; else return SPGPU_UNSUPPORTED; -#else - return SPGPU_UNSUPPORTED; -#endif } @@ -358,7 +331,6 @@ long long int sizeofHdiagDeviceFloat(void* deviceMat) { int i=0,fo,fa; int *hoff=NULL,*hackoff=NULL; long long int memsize=0; -#ifdef HAVE_SPGPU struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; @@ -366,7 +338,6 @@ long long int sizeofHdiagDeviceFloat(void* deviceMat) memsize += devMat->allocationHeight*sizeof(int); memsize += devMat->allocationHeight*devMat->hackSize*sizeof(float); -#endif return(memsize); } @@ -374,7 +345,6 @@ long long int sizeofHdiagDeviceFloat(void* deviceMat) int readHdiagDeviceFloat(void* deviceMat, float* a, int* off) { int i; -#ifdef HAVE_SPGPU struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; /* i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(float)); */ /* i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); */ @@ -384,9 +354,6 @@ int readHdiagDeviceFloat(void* deviceMat, float* a, int* off) fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int spmvHdiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, @@ -397,7 +364,6 @@ int spmvHdiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; spgpuHandle_t handle=psb_cudaGetHandle(); -#ifdef HAVE_SPGPU #ifdef VERBOSE /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ @@ -416,10 +382,5 @@ int spmvHdiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, //cudaSync(); return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } - -#endif diff --git a/cuda/hdiagdev.h b/cuda/hdiagdev.h index 4bce5066..5cd9f803 100644 --- a/cuda/hdiagdev.h +++ b/cuda/hdiagdev.h @@ -32,7 +32,6 @@ #ifndef _HDIAGDEV_H_ #define _HDIAGDEV_H_ -#ifdef HAVE_SPGPU #include "cintrf.h" #include "hdia.h" @@ -104,8 +103,4 @@ int spmvHdiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, double beta, void* deviceY); -#else -#define CINTRF_UNSUPPORTED -1 -#endif - #endif diff --git a/cuda/hdiagdev_mod.F90 b/cuda/hdiagdev_mod.F90 index ad0f7cc5..9a3530e7 100644 --- a/cuda/hdiagdev_mod.F90 +++ b/cuda/hdiagdev_mod.F90 @@ -43,8 +43,6 @@ module hdiagdev_mod integer(c_int) :: allocationHeight end type hdiagdev_parms -#ifdef HAVE_SPGPU - ! interface computeHdiaHacksCount ! function computeHdiaHacksCountDouble(allocationHeight,hackOffsets,hackSize, & ! & diaValues,diaValuesPitch,diags,rows)& @@ -197,7 +195,5 @@ module hdiagdev_mod !!$ complex(c_double_complex),value :: alpha, beta !!$ end function spmvHdiagDeviceDoubleComplex end interface spmvHdiagDevice - -#endif end module hdiagdev_mod diff --git a/cuda/hlldev.c b/cuda/hlldev.c index 42f396dd..9da6a48c 100644 --- a/cuda/hlldev.c +++ b/cuda/hlldev.c @@ -30,7 +30,6 @@ #include "hlldev.h" -#if defined(HAVE_SPGPU) //new HllDeviceParams bldHllDeviceParams(unsigned int hksize, unsigned int rows, unsigned int nzeros, unsigned int allocsize, unsigned int elementType, unsigned int firstIndex) @@ -147,7 +146,6 @@ int FallocHllDevice(void** deviceMat,unsigned int hksize, unsigned int rows, un unsigned int allocsize, unsigned int elementType, unsigned int firstIndex) { int i; -#ifdef HAVE_SPGPU HllDeviceParams p; p = bldHllDeviceParams(hksize, rows, nzeros, allocsize, elementType, firstIndex); @@ -156,9 +154,6 @@ int FallocHllDevice(void** deviceMat,unsigned int hksize, unsigned int rows, un fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i); } return(i); -#else - return SPGPU_UNSUPPORTED; -#endif } @@ -170,7 +165,6 @@ int spmvHllDeviceFloat(void *deviceMat, float alpha, void* deviceX, struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; spgpuHandle_t handle=psb_cudaGetHandle(); -#ifdef HAVE_SPGPU #ifdef VERBOSE /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ @@ -185,9 +179,6 @@ int spmvHllDeviceFloat(void *deviceMat, float alpha, void* deviceX, devMat->avgNzr, devMat->rows, (float *)x->v_, beta, devMat->baseIndex); return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } //new @@ -199,7 +190,6 @@ int spmvHllDeviceDouble(void *deviceMat, double alpha, void* deviceX, struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; spgpuHandle_t handle=psb_cudaGetHandle(); -#ifdef HAVE_SPGPU #ifdef VERBOSE /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ @@ -214,9 +204,6 @@ int spmvHllDeviceDouble(void *deviceMat, double alpha, void* deviceX, devMat->avgNzr, devMat->rows, (double *)x->v_, beta, devMat->baseIndex); //cudaSync(); return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int spmvHllDeviceFloatComplex(void *deviceMat, float complex alpha, void* deviceX, @@ -227,7 +214,6 @@ int spmvHllDeviceFloatComplex(void *deviceMat, float complex alpha, void* device struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; spgpuHandle_t handle=psb_cudaGetHandle(); -#ifdef HAVE_SPGPU cuFloatComplex a = make_cuFloatComplex(crealf(alpha),cimagf(alpha)); cuFloatComplex b = make_cuFloatComplex(crealf(beta),cimagf(beta)); #ifdef VERBOSE @@ -244,9 +230,6 @@ int spmvHllDeviceFloatComplex(void *deviceMat, float complex alpha, void* device devMat->avgNzr, devMat->rows, (cuFloatComplex *)x->v_, b, devMat->baseIndex); return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int spmvHllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* deviceX, @@ -257,7 +240,6 @@ int spmvHllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* devi struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; spgpuHandle_t handle=psb_cudaGetHandle(); -#ifdef HAVE_SPGPU cuDoubleComplex a = make_cuDoubleComplex(creal(alpha),cimag(alpha)); cuDoubleComplex b = make_cuDoubleComplex(creal(beta),cimag(beta)); #ifdef VERBOSE @@ -271,14 +253,10 @@ int spmvHllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* devi devMat->avgNzr,devMat->rows, (cuDoubleComplex *)x->v_, b, devMat->baseIndex); return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU HllDevice *devMat = (HllDevice *) deviceMat; // Ex updateFromHost function i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); @@ -291,14 +269,10 @@ int writeHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU HllDevice *devMat = (HllDevice *) deviceMat; // Ex updateFromHost function i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); @@ -311,14 +285,10 @@ int writeHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int *hkoffs, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU HllDevice *devMat = (HllDevice *) deviceMat; // Ex updateFromHost function i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex)); @@ -331,14 +301,10 @@ int writeHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int writeHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int *hkoffs, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU HllDevice *devMat = (HllDevice *) deviceMat; // Ex updateFromHost function i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex)); @@ -351,14 +317,10 @@ int writeHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, i fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int readHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU HllDevice *devMat = (HllDevice *) deviceMat; i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); @@ -370,14 +332,10 @@ int readHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* i fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int readHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU HllDevice *devMat = (HllDevice *) deviceMat; i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); @@ -388,14 +346,10 @@ int readHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int readHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int *hkoffs, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU HllDevice *devMat = (HllDevice *) deviceMat; i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex)); i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); @@ -406,14 +360,10 @@ int readHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int readHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int *hkoffs, int* irn, int *idiag) { int i; -#ifdef HAVE_SPGPU HllDevice *devMat = (HllDevice *) deviceMat; i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex)); i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); @@ -424,9 +374,6 @@ int readHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, in fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); }*/ return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } // New copy routines. @@ -435,7 +382,6 @@ int psiCopyCooToHlgFloat(int nr, int nc, int nza, int hacksz, int noffs, int isz int *irn, int *hoffs, int *idisp, int *ja, float *val, void *deviceMat) { int i,j; -#ifdef HAVE_SPGPU spgpuHandle_t handle; HllDevice *devMat = (HllDevice *) deviceMat; float *devVal; @@ -469,16 +415,12 @@ int psiCopyCooToHlgFloat(int nr, int nc, int nza, int hacksz, int noffs, int isz fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceFloat",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int isz, int *irn, int *hoffs, int *idisp, int *ja, double *val, void *deviceMat) { int i,j; -#ifdef HAVE_SPGPU spgpuHandle_t handle; HllDevice *devMat = (HllDevice *) deviceMat; double *devVal; @@ -517,16 +459,12 @@ int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int is fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceDouble",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz, int noffs, int isz, int *irn, int *hoffs, int *idisp, int *ja, float complex *val, void *deviceMat) { int i,j; -#ifdef HAVE_SPGPU spgpuHandle_t handle; HllDevice *devMat = (HllDevice *) deviceMat; float complex *devVal; @@ -560,16 +498,12 @@ int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz, int noffs, fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceFloatComplex",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz, int noffs, int isz, int *irn, int *hoffs, int *idisp, int *ja, double complex *val, void *deviceMat) { int i,j; -#ifdef HAVE_SPGPU spgpuHandle_t handle; HllDevice *devMat = (HllDevice *) deviceMat; double complex *devVal; @@ -603,13 +537,4 @@ int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz, int noffs, fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceDoubleComplex",i); } return SPGPU_SUCCESS; -#else - return SPGPU_UNSUPPORTED; -#endif } - - - - - -#endif diff --git a/cuda/hlldev.h b/cuda/hlldev.h index 478ad86e..e4f8259e 100644 --- a/cuda/hlldev.h +++ b/cuda/hlldev.h @@ -32,7 +32,6 @@ #ifndef _HLLDEV_H_ #define _HLLDEV_H_ -#ifdef HAVE_SPGPU #include "cintrf.h" #include "hell.h" @@ -154,8 +153,4 @@ int psi_cuda_z_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza, int *idiag, int *rP, double complex *cM); -#else -#define CINTRF_UNSUPPORTED -1 -#endif - #endif diff --git a/cuda/hlldev_mod.F90 b/cuda/hlldev_mod.F90 index 4eaa5ce0..90b8e13c 100644 --- a/cuda/hlldev_mod.F90 +++ b/cuda/hlldev_mod.F90 @@ -43,8 +43,6 @@ module hlldev_mod integer(c_int) :: firstIndex end type hlldev_parms -#ifdef HAVE_SPGPU - interface function bldHllDeviceParams(hksize, rows, nzeros, allocsize, elementType, firstIndex) & & result(res) bind(c,name='bldHllDeviceParams') @@ -267,7 +265,4 @@ module hlldev_mod end interface -#endif - - end module hlldev_mod diff --git a/cuda/impl/psb_c_cuda_cp_csrg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_csrg_from_coo.F90 index af3301ff..aa6a3ba3 100644 --- a/cuda/impl/psb_c_cuda_cp_csrg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_csrg_from_coo.F90 @@ -32,12 +32,8 @@ subroutine psb_c_cuda_cp_csrg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_cp_csrg_from_coo -#else - use psb_c_cuda_csrg_mat_mod -#endif implicit none class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a @@ -48,10 +44,8 @@ subroutine psb_c_cuda_cp_csrg_from_coo(a,b,info) call a%psb_c_csr_sparse_mat%cp_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 index 47845e52..65b12a11 100644 --- a/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 @@ -32,12 +32,8 @@ subroutine psb_c_cuda_cp_csrg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_cp_csrg_from_fmt -#else - use psb_c_cuda_csrg_mat_mod -#endif !use iso_c_binding implicit none @@ -53,9 +49,7 @@ subroutine psb_c_cuda_cp_csrg_from_fmt(a,b,info) class default call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_c_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_c_cuda_cp_diag_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_diag_from_coo.F90 index 5b1eb817..e70a044e 100644 --- a/cuda/impl/psb_c_cuda_cp_diag_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_diag_from_coo.F90 @@ -33,13 +33,9 @@ subroutine psb_c_cuda_cp_diag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_cp_diag_from_coo -#else - use psb_c_cuda_diag_mat_mod -#endif implicit none class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a @@ -50,10 +46,8 @@ subroutine psb_c_cuda_cp_diag_from_coo(a,b,info) info = psb_success_ call a%psb_c_dia_sparse_mat%cp_from_coo(b,info) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_c_cuda_cp_elg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_elg_from_coo.F90 index fedffa22..c6105e88 100644 --- a/cuda/impl/psb_c_cuda_cp_elg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_elg_from_coo.F90 @@ -28,20 +28,14 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_c_cuda_cp_elg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_cp_elg_from_coo use psi_ext_util_mod use psb_cuda_env_mod -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a @@ -57,16 +51,11 @@ subroutine psb_c_cuda_cp_elg_from_coo(a,b,info) integer(psb_ipk_), allocatable :: idisp(:) info = psb_success_ -#ifdef HAVE_SPGPU hacksize = max(1,psb_cuda_WarpSize()) -#else - hacksize = 1 -#endif if (b%is_dev()) call b%sync() if (b%is_by_rows()) then -#ifdef HAVE_SPGPU call psi_c_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize) @@ -82,15 +71,8 @@ subroutine psb_c_cuda_cp_elg_from_coo(a,b,info) if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & & a%irn,idisp,b%ja,b%val, a%deviceMat) call a%set_dev() -#else - - call psi_c_convert_ell_from_coo(a,b,info,hacksize=hacksize) - call a%set_host() -#endif - else call b%cp_to_coo(tmp,info) -#ifdef HAVE_SPGPU call psi_c_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize) @@ -107,11 +89,6 @@ subroutine psb_c_cuda_cp_elg_from_coo(a,b,info) & a%irn,idisp,tmp%ja,tmp%val, a%deviceMat) call a%set_dev() -#else - - call psi_c_convert_ell_from_coo(a,tmp,info,hacksize=hacksize) - call a%set_host() -#endif end if if (info /= psb_success_) goto 9999 diff --git a/cuda/impl/psb_c_cuda_cp_elg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_elg_from_fmt.F90 index 4c44d29a..f7e5351e 100644 --- a/cuda/impl/psb_c_cuda_cp_elg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_cp_elg_from_fmt.F90 @@ -33,13 +33,9 @@ subroutine psb_c_cuda_cp_elg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_cp_elg_from_fmt -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a @@ -51,9 +47,7 @@ subroutine psb_c_cuda_cp_elg_from_fmt(a,b,info) Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif info = psb_success_ if (b%is_dev()) call b%sync() @@ -67,13 +61,9 @@ subroutine psb_c_cuda_cp_elg_from_fmt(a,b,info) m = b%get_nrows() nc = b%get_ncols() nza = b%get_nzeros() -#ifdef HAVE_SPGPU gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) ld = gpu_parms%pitch nzm = gpu_parms%maxRowSize -#else - ld = m -#endif a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) @@ -88,9 +78,7 @@ subroutine psb_c_cuda_cp_elg_from_fmt(a,b,info) a%val(1:m,1:nzm) = b%val(1:m,1:nzm) end if a%nzt = nza -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif class default diff --git a/cuda/impl/psb_c_cuda_cp_hdiag_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_hdiag_from_coo.F90 index 436eabaa..9be741c9 100644 --- a/cuda/impl/psb_c_cuda_cp_hdiag_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_hdiag_from_coo.F90 @@ -28,19 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_c_cuda_cp_hdiag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_cp_hdiag_from_coo use psb_cuda_env_mod -#else - use psb_c_cuda_hdiag_mat_mod -#endif implicit none class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a @@ -53,16 +47,12 @@ subroutine psb_c_cuda_cp_hdiag_from_coo(a,b,info) info = psb_success_ -#ifdef HAVE_SPGPU a%hacksize = psb_cuda_WarpSize() -#endif call a%psb_c_hdia_sparse_mat%cp_from_coo(b,info) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_c_cuda_cp_hlg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_hlg_from_coo.F90 index d30fccbf..8b0d9f2a 100644 --- a/cuda/impl/psb_c_cuda_cp_hlg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_hlg_from_coo.F90 @@ -33,14 +33,10 @@ subroutine psb_c_cuda_cp_hlg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_cuda_env_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_cp_hlg_from_coo -#else - use psb_c_cuda_hlg_mat_mod -#endif implicit none class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a @@ -61,11 +57,7 @@ subroutine psb_c_cuda_cp_hlg_from_coo(a,b,info) info = psb_success_ debug_unit = psb_get_debug_unit() debug_level = psb_get_debug_level() -#ifdef HAVE_SPGPU hksz = max(1,psb_cuda_WarpSize()) -#else - hksz = psi_get_hksz() -#endif if (b%is_by_rows()) then diff --git a/cuda/impl/psb_c_cuda_cp_hlg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_hlg_from_fmt.F90 index 259364cd..96a5c5e8 100644 --- a/cuda/impl/psb_c_cuda_cp_hlg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_cp_hlg_from_fmt.F90 @@ -33,13 +33,9 @@ subroutine psb_c_cuda_cp_hlg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_cp_hlg_from_fmt -#else - use psb_c_cuda_hlg_mat_mod -#endif implicit none class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a @@ -53,9 +49,7 @@ subroutine psb_c_cuda_cp_hlg_from_fmt(a,b,info) call a%cp_from_coo(b,info) class default call a%psb_c_hll_sparse_mat%cp_from_fmt(b,info) -#ifdef HAVE_SPGPU if (info == 0) call a%to_gpu(info) -#endif end select if (info /= 0) goto 9999 diff --git a/cuda/impl/psb_c_cuda_cp_hybg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_hybg_from_coo.F90 index 7ebb5240..d29da0b7 100644 --- a/cuda/impl/psb_c_cuda_cp_hybg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_cp_hybg_from_coo.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_cp_hybg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_cp_hybg_from_coo -#else - use psb_c_cuda_hybg_mat_mod -#endif implicit none class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a @@ -49,10 +45,8 @@ subroutine psb_c_cuda_cp_hybg_from_coo(a,b,info) call a%psb_c_csr_sparse_mat%cp_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_c_cuda_cp_hybg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_hybg_from_fmt.F90 index 033ba966..9ed53040 100644 --- a/cuda/impl/psb_c_cuda_cp_hybg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_cp_hybg_from_fmt.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_cp_hybg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_cp_hybg_from_fmt -#else - use psb_c_cuda_hybg_mat_mod -#endif implicit none class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a @@ -53,9 +49,7 @@ subroutine psb_c_cuda_cp_hybg_from_fmt(a,b,info) class default call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_c_cuda_cp_hybg_from_fmt diff --git a/cuda/impl/psb_c_cuda_csrg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_csrg_allocate_mnnz.F90 index d9736d23..f1e002f3 100644 --- a/cuda/impl/psb_c_cuda_csrg_allocate_mnnz.F90 +++ b/cuda/impl/psb_c_cuda_csrg_allocate_mnnz.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_csrg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_allocate_mnnz -#else - use psb_c_cuda_csrg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a @@ -52,11 +48,9 @@ subroutine psb_c_cuda_csrg_allocate_mnnz(m,n,a,nz) call a%psb_c_csr_sparse_mat%allocate(m,n,nz) -#ifdef HAVE_SPGPU info = initFcusparse() if (info == 0) call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_csrg_csmm.F90 b/cuda/impl/psb_c_cuda_csrg_csmm.F90 index 8f2f55b9..b3012952 100644 --- a/cuda/impl/psb_c_cuda_csrg_csmm.F90 +++ b/cuda/impl/psb_c_cuda_csrg_csmm.F90 @@ -33,14 +33,10 @@ subroutine psb_c_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_csmm -#else - use psb_c_cuda_csrg_mat_mod -#endif implicit none class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) @@ -94,7 +90,6 @@ subroutine psb_c_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -119,9 +114,6 @@ subroutine psb_c_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_csrg_csmv.F90 b/cuda/impl/psb_c_cuda_csrg_csmv.F90 index ba681401..ae90cb7e 100644 --- a/cuda/impl/psb_c_cuda_csrg_csmv.F90 +++ b/cuda/impl/psb_c_cuda_csrg_csmv.F90 @@ -33,14 +33,10 @@ subroutine psb_c_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_csmv -#else - use psb_c_cuda_csrg_mat_mod -#endif implicit none class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) @@ -96,7 +92,6 @@ subroutine psb_c_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -124,9 +119,6 @@ subroutine psb_c_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_csrg_from_gpu.F90 b/cuda/impl/psb_c_cuda_csrg_from_gpu.F90 index b1bed7e5..503bc57e 100644 --- a/cuda/impl/psb_c_cuda_csrg_from_gpu.F90 +++ b/cuda/impl/psb_c_cuda_csrg_from_gpu.F90 @@ -33,13 +33,9 @@ subroutine psb_c_cuda_csrg_from_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_from_gpu -#else - use psb_c_cuda_csrg_mat_mod -#endif implicit none class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -48,7 +44,6 @@ subroutine psb_c_cuda_csrg_from_gpu(a,info) info = 0 -#ifdef HAVE_SPGPU if (.not.(c_associated(a%deviceMat%mat))) then call a%free() return @@ -68,6 +63,5 @@ subroutine psb_c_cuda_csrg_from_gpu(a,info) #endif call a%set_sync() -#endif end subroutine psb_c_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_c_cuda_csrg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_csrg_inner_vect_sv.F90 index 32dec5ef..7e5bb614 100644 --- a/cuda/impl/psb_c_cuda_csrg_inner_vect_sv.F90 +++ b/cuda/impl/psb_c_cuda_csrg_inner_vect_sv.F90 @@ -32,13 +32,9 @@ subroutine psb_c_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_inner_vect_sv -#else - use psb_c_cuda_csrg_mat_mod -#endif use psb_c_cuda_vect_mod implicit none class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a @@ -75,7 +71,6 @@ subroutine psb_c_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra.or.(beta/=dzero)) then call x%sync() call y%sync() @@ -112,12 +107,6 @@ subroutine psb_c_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call x%sync() - call y%sync() - call a%psb_c_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) - call y%set_host() -#endif if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name, a_err='csrg_vect_sv') diff --git a/cuda/impl/psb_c_cuda_csrg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_csrg_reallocate_nz.F90 index 22f9f118..a757f477 100644 --- a/cuda/impl/psb_c_cuda_csrg_reallocate_nz.F90 +++ b/cuda/impl/psb_c_cuda_csrg_reallocate_nz.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_csrg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_reallocate_nz -#else - use psb_c_cuda_csrg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: nz class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a @@ -55,10 +51,8 @@ subroutine psb_c_cuda_csrg_reallocate_nz(nz,a) ! call a%psb_c_csr_sparse_mat%reallocate(nz) -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_csrg_scal.F90 b/cuda/impl/psb_c_cuda_csrg_scal.F90 index 556a0ec5..13716339 100644 --- a/cuda/impl/psb_c_cuda_csrg_scal.F90 +++ b/cuda/impl/psb_c_cuda_csrg_scal.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_csrg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_scal -#else - use psb_c_cuda_csrg_mat_mod -#endif implicit none class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) @@ -58,10 +54,8 @@ subroutine psb_c_cuda_csrg_scal(d,a,info,side) call a%psb_c_csr_sparse_mat%scal(d,info,side=side) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_csrg_scals.F90 b/cuda/impl/psb_c_cuda_csrg_scals.F90 index a67e91cd..5334be3d 100644 --- a/cuda/impl/psb_c_cuda_csrg_scals.F90 +++ b/cuda/impl/psb_c_cuda_csrg_scals.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_csrg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_scals -#else - use psb_c_cuda_csrg_mat_mod -#endif implicit none class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d @@ -56,10 +52,8 @@ subroutine psb_c_cuda_csrg_scals(d,a,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 index ea710cbc..aebb07e4 100644 --- a/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_to_gpu -#else - use psb_c_cuda_csrg_mat_mod -#endif implicit none class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -51,7 +47,6 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return m = a%get_nrows() @@ -320,6 +315,5 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) if (info /= 0) then write(0,*) 'Error in CSRG_TO_GPU ',info end if -#endif end subroutine psb_c_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_c_cuda_csrg_vect_mv.F90 b/cuda/impl/psb_c_cuda_csrg_vect_mv.F90 index cb556d20..c58e7ec0 100644 --- a/cuda/impl/psb_c_cuda_csrg_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_csrg_vect_mv.F90 @@ -33,14 +33,10 @@ subroutine psb_c_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_vect_mv -#else - use psb_c_cuda_csrg_mat_mod -#endif use psb_c_cuda_vect_mod implicit none class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a @@ -72,7 +68,6 @@ subroutine psb_c_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= czero) then @@ -112,9 +107,6 @@ subroutine psb_c_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_diag_csmv.F90 b/cuda/impl/psb_c_cuda_diag_csmv.F90 index 00ab742d..c0940903 100644 --- a/cuda/impl/psb_c_cuda_diag_csmv.F90 +++ b/cuda/impl/psb_c_cuda_diag_csmv.F90 @@ -33,13 +33,9 @@ subroutine psb_c_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_csmv -#else - use psb_c_cuda_diag_mat_mod -#endif implicit none class(psb_c_cuda_diag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +90,6 @@ subroutine psb_c_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_c_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +116,6 @@ subroutine psb_c_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_c_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return @@ -132,5 +124,4 @@ subroutine psb_c_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) return - end subroutine psb_c_cuda_diag_csmv diff --git a/cuda/impl/psb_c_cuda_diag_to_gpu.F90 b/cuda/impl/psb_c_cuda_diag_to_gpu.F90 index 4f2c21d9..88bbd8b5 100644 --- a/cuda/impl/psb_c_cuda_diag_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_diag_to_gpu.F90 @@ -33,13 +33,9 @@ subroutine psb_c_cuda_diag_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_to_gpu -#else - use psb_c_cuda_diag_mat_mod -#endif use iso_c_binding implicit none class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a @@ -47,13 +43,10 @@ subroutine psb_c_cuda_diag_to_gpu(a,info,nzrm) integer(psb_ipk_), intent(in), optional :: nzrm integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d -#ifdef HAVE_SPGPU type(diagdev_parms) :: gpu_parms -#endif info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return n = size(a%data,1) @@ -69,6 +62,5 @@ subroutine psb_c_cuda_diag_to_gpu(a,info,nzrm) if (info == 0) info = & & writeDiagDevice(a%deviceMat,a%data,a%offset,n) ! if (info /= 0) goto 9999 -#endif end subroutine psb_c_cuda_diag_to_gpu diff --git a/cuda/impl/psb_c_cuda_diag_vect_mv.F90 b/cuda/impl/psb_c_cuda_diag_vect_mv.F90 index 02bb9587..fba22bc5 100644 --- a/cuda/impl/psb_c_cuda_diag_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_diag_vect_mv.F90 @@ -28,18 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_c_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_vect_mv -#else - use psb_c_cuda_diag_mat_mod -#endif use psb_c_cuda_vect_mod implicit none class(psb_c_cuda_diag_sparse_mat), intent(in) :: a @@ -71,7 +65,6 @@ subroutine psb_c_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= szero) then @@ -112,9 +105,6 @@ subroutine psb_c_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - call a%psb_c_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_dnsg_mat_impl.F90 b/cuda/impl/psb_c_cuda_dnsg_mat_impl.F90 index bb2ec97b..7aaa37f1 100644 --- a/cuda/impl/psb_c_cuda_dnsg_mat_impl.F90 +++ b/cuda/impl/psb_c_cuda_dnsg_mat_impl.F90 @@ -32,13 +32,9 @@ subroutine psb_c_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod use psb_c_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_vect_mv -#else - use psb_c_cuda_dnsg_mat_mod -#endif implicit none class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta @@ -123,13 +119,9 @@ end subroutine psb_c_cuda_dnsg_vect_mv subroutine psb_c_cuda_dnsg_mold(a,b,info) use psb_base_mod use psb_c_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_mold -#else - use psb_c_cuda_dnsg_mat_mod -#endif implicit none class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_c_base_sparse_mat), intent(inout), allocatable :: b @@ -190,17 +182,12 @@ end subroutine psb_c_cuda_dnsg_mold !!$ end subroutine psb_c_cuda_dnsg_allocate_mnnz !!$ end interface - subroutine psb_c_cuda_dnsg_to_gpu(a,info) use psb_base_mod use psb_c_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_to_gpu -#else - use psb_c_cuda_dnsg_mat_mod -#endif class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act, pitch, lda @@ -209,15 +196,12 @@ subroutine psb_c_cuda_dnsg_to_gpu(a,info) call psb_erractionsave(err_act) info = psb_success_ -#ifdef HAVE_SPGPU if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2) info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),& & spgpu_type_complex_float,1) if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2)) if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info - -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return @@ -233,13 +217,9 @@ end subroutine psb_c_cuda_dnsg_to_gpu subroutine psb_c_cuda_cp_dnsg_from_coo(a,b,info) use psb_base_mod use psb_c_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_cp_dnsg_from_coo -#else - use psb_c_cuda_dnsg_mat_mod -#endif implicit none class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a @@ -272,13 +252,9 @@ end subroutine psb_c_cuda_cp_dnsg_from_coo subroutine psb_c_cuda_cp_dnsg_from_fmt(a,b,info) use psb_base_mod use psb_c_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_cp_dnsg_from_fmt -#else - use psb_c_cuda_dnsg_mat_mod -#endif implicit none class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a @@ -348,13 +324,9 @@ end subroutine psb_c_cuda_cp_dnsg_from_fmt subroutine psb_c_cuda_mv_dnsg_from_coo(a,b,info) use psb_base_mod use psb_c_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_mv_dnsg_from_coo -#else - use psb_c_cuda_dnsg_mat_mod -#endif implicit none class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a @@ -383,18 +355,13 @@ subroutine psb_c_cuda_mv_dnsg_from_coo(a,b,info) return end subroutine psb_c_cuda_mv_dnsg_from_coo - - + subroutine psb_c_cuda_mv_dnsg_from_fmt(a,b,info) use psb_base_mod use psb_c_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_c_vectordev_mod use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_mv_dnsg_from_fmt -#else - use psb_c_cuda_dnsg_mat_mod -#endif implicit none class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_c_base_sparse_mat), intent(inout) :: b diff --git a/cuda/impl/psb_c_cuda_elg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_elg_allocate_mnnz.F90 index 01ca7189..7f50d547 100644 --- a/cuda/impl/psb_c_cuda_elg_allocate_mnnz.F90 +++ b/cuda/impl/psb_c_cuda_elg_allocate_mnnz.F90 @@ -28,18 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_c_cuda_elg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_allocate_mnnz -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a @@ -47,9 +41,7 @@ subroutine psb_c_cuda_elg_allocate_mnnz(m,n,a,nz) Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' logical, parameter :: debug=.false. -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif call psb_erractionsave(err_act) info = psb_success_ @@ -74,13 +66,9 @@ subroutine psb_c_cuda_elg_allocate_mnnz(m,n,a,nz) goto 9999 endif -#ifdef HAVE_SPGPU gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_complex_float,1) ld = gpu_parms%pitch nz_ = gpu_parms%maxRowSize -#else - ld = m -#endif if (info == psb_success_) call psb_realloc(m,a%irn,info) if (info == psb_success_) call psb_realloc(m,a%idiag,info) @@ -98,10 +86,8 @@ subroutine psb_c_cuda_elg_allocate_mnnz(m,n,a,nz) call a%set_dupl(psb_dupl_def_) end if -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz_) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_elg_asb.f90 b/cuda/impl/psb_c_cuda_elg_asb.f90 index 24af1cc9..16d70736 100644 --- a/cuda/impl/psb_c_cuda_elg_asb.f90 +++ b/cuda/impl/psb_c_cuda_elg_asb.f90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_elg_asb(a) use psb_base_mod diff --git a/cuda/impl/psb_c_cuda_elg_csmm.F90 b/cuda/impl/psb_c_cuda_elg_csmm.F90 index a5f0e3d5..f7ae9892 100644 --- a/cuda/impl/psb_c_cuda_elg_csmm.F90 +++ b/cuda/impl/psb_c_cuda_elg_csmm.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csmm -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) @@ -92,8 +87,6 @@ subroutine psb_c_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) goto 9999 end if - -#ifdef HAVE_SPGPU if (tra) then if (a%is_dev()) call a%sync() call a%psb_c_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) @@ -119,9 +112,6 @@ subroutine psb_c_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_c_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_elg_csmv.F90 b/cuda/impl/psb_c_cuda_elg_csmv.F90 index 00f39e8c..a23d4a60 100644 --- a/cuda/impl/psb_c_cuda_elg_csmv.F90 +++ b/cuda/impl/psb_c_cuda_elg_csmv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csmv -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none class(psb_c_cuda_elg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +89,6 @@ subroutine psb_c_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then if (a%is_dev()) call a%sync() call a%psb_c_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) @@ -122,9 +116,6 @@ subroutine psb_c_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_c_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_elg_csput.F90 b/cuda/impl/psb_c_cuda_elg_csput.F90 index cc6fc024..3da928f0 100644 --- a/cuda/impl/psb_c_cuda_elg_csput.F90 +++ b/cuda/impl/psb_c_cuda_elg_csput.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding -#ifdef HAVE_SPGPU use elldev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csput_a -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a @@ -128,13 +123,9 @@ subroutine psb_c_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding -#ifdef HAVE_SPGPU use elldev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csput_v use psb_c_cuda_vect_mod -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a diff --git a/cuda/impl/psb_c_cuda_elg_from_gpu.F90 b/cuda/impl/psb_c_cuda_elg_from_gpu.F90 index 593b52be..34c6e4a6 100644 --- a/cuda/impl/psb_c_cuda_elg_from_gpu.F90 +++ b/cuda/impl/psb_c_cuda_elg_from_gpu.F90 @@ -28,18 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - + subroutine psb_c_cuda_elg_from_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_from_gpu -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -48,7 +43,6 @@ subroutine psb_c_cuda_elg_from_gpu(a,info) info = 0 -#ifdef HAVE_SPGPU if (.not.(c_associated(a%deviceMat))) then call a%free() return @@ -69,6 +63,5 @@ subroutine psb_c_cuda_elg_from_gpu(a,info) if (info == 0) info = & & readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag) call a%set_sync() -#endif end subroutine psb_c_cuda_elg_from_gpu diff --git a/cuda/impl/psb_c_cuda_elg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_elg_inner_vect_sv.F90 index 43843dc6..148d72d2 100644 --- a/cuda/impl/psb_c_cuda_elg_inner_vect_sv.F90 +++ b/cuda/impl/psb_c_cuda_elg_inner_vect_sv.F90 @@ -27,19 +27,14 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! - +! subroutine psb_c_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_inner_vect_sv -#else - use psb_c_cuda_elg_mat_mod -#endif use psb_c_cuda_vect_mod implicit none class(psb_c_cuda_elg_sparse_mat), intent(in) :: a diff --git a/cuda/impl/psb_c_cuda_elg_mold.F90 b/cuda/impl/psb_c_cuda_elg_mold.F90 index b428055c..bb94bf07 100644 --- a/cuda/impl/psb_c_cuda_elg_mold.F90 +++ b/cuda/impl/psb_c_cuda_elg_mold.F90 @@ -28,8 +28,6 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_c_cuda_elg_mold(a,b,info) use psb_base_mod diff --git a/cuda/impl/psb_c_cuda_elg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_elg_reallocate_nz.F90 index b97530e1..6a1f8763 100644 --- a/cuda/impl/psb_c_cuda_elg_reallocate_nz.F90 +++ b/cuda/impl/psb_c_cuda_elg_reallocate_nz.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_elg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_reallocate_nz -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: nz class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a @@ -64,10 +59,8 @@ subroutine psb_c_cuda_elg_reallocate_nz(nz,a) goto 9999 end if -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nzrm) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_elg_scal.F90 b/cuda/impl/psb_c_cuda_elg_scal.F90 index b169451b..65f84768 100644 --- a/cuda/impl/psb_c_cuda_elg_scal.F90 +++ b/cuda/impl/psb_c_cuda_elg_scal.F90 @@ -28,18 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_elg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_scal -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) @@ -63,10 +58,8 @@ subroutine psb_c_cuda_elg_scal(d,a,info,side) call a%psb_c_ell_sparse_mat%scal(d,info,side) if (info /= psb_success_) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_elg_scals.F90 b/cuda/impl/psb_c_cuda_elg_scals.F90 index d20ee568..966f2e91 100644 --- a/cuda/impl/psb_c_cuda_elg_scals.F90 +++ b/cuda/impl/psb_c_cuda_elg_scals.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_elg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_scals -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d @@ -59,10 +54,8 @@ subroutine psb_c_cuda_elg_scals(d,a,info) a%val(:,:) = a%val(:,:) * d -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_c_cuda_elg_to_gpu.F90 b/cuda/impl/psb_c_cuda_elg_to_gpu.F90 index 5ea61a41..495207c7 100644 --- a/cuda/impl/psb_c_cuda_elg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_elg_to_gpu.F90 @@ -29,30 +29,22 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_elg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_to_gpu -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return m = a%get_nrows() @@ -88,6 +80,5 @@ subroutine psb_c_cuda_elg_to_gpu(a,info,nzrm) if (info == 0) info = & & writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag) call a%set_sync() -#endif end subroutine psb_c_cuda_elg_to_gpu diff --git a/cuda/impl/psb_c_cuda_elg_trim.f90 b/cuda/impl/psb_c_cuda_elg_trim.f90 index 483e189d..78dbe193 100644 --- a/cuda/impl/psb_c_cuda_elg_trim.f90 +++ b/cuda/impl/psb_c_cuda_elg_trim.f90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_elg_trim(a) use psb_base_mod diff --git a/cuda/impl/psb_c_cuda_elg_vect_mv.F90 b/cuda/impl/psb_c_cuda_elg_vect_mv.F90 index b89ba5a2..9da6a34a 100644 --- a/cuda/impl/psb_c_cuda_elg_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_elg_vect_mv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_vect_mv -#else - use psb_c_cuda_elg_mat_mod -#endif use psb_c_cuda_vect_mod implicit none class(psb_c_cuda_elg_sparse_mat), intent(in) :: a @@ -71,7 +66,6 @@ subroutine psb_c_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (a%is_dev()) call a%sync() if (.not.x%is_host()) call x%sync() @@ -116,10 +110,6 @@ subroutine psb_c_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - if (a%is_dev()) call a%sync() - call a%psb_c_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hdiag_csmv.F90 b/cuda/impl/psb_c_cuda_hdiag_csmv.F90 index 4ea2c269..36928062 100644 --- a/cuda/impl/psb_c_cuda_hdiag_csmv.F90 +++ b/cuda/impl/psb_c_cuda_hdiag_csmv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_csmv -#else - use psb_c_cuda_hdiag_mat_mod -#endif implicit none class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +89,6 @@ subroutine psb_c_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_c_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +115,6 @@ subroutine psb_c_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_c_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return @@ -132,5 +123,4 @@ subroutine psb_c_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) return - end subroutine psb_c_cuda_hdiag_csmv diff --git a/cuda/impl/psb_c_cuda_hdiag_mold.F90 b/cuda/impl/psb_c_cuda_hdiag_mold.F90 index 67e0b92e..27402cfc 100644 --- a/cuda/impl/psb_c_cuda_hdiag_mold.F90 +++ b/cuda/impl/psb_c_cuda_hdiag_mold.F90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hdiag_mold(a,b,info) use psb_base_mod diff --git a/cuda/impl/psb_c_cuda_hdiag_to_gpu.F90 b/cuda/impl/psb_c_cuda_hdiag_to_gpu.F90 index 63ab178a..8d1b61a1 100644 --- a/cuda/impl/psb_c_cuda_hdiag_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_hdiag_to_gpu.F90 @@ -29,29 +29,21 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hdiag_to_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_to_gpu -#else - use psb_c_cuda_hdiag_mat_mod -#endif use iso_c_binding implicit none class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight -#ifdef HAVE_SPGPU type(hdiagdev_parms) :: gpu_parms -#endif info = 0 -#ifdef HAVE_SPGPU nr = a%get_nrows() nc = a%get_ncols() hacksize = a%hackSize @@ -81,6 +73,4 @@ subroutine psb_c_cuda_hdiag_to_gpu(a,info) if (info == 0) info = & & writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets) -#endif - end subroutine psb_c_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_c_cuda_hdiag_vect_mv.F90 b/cuda/impl/psb_c_cuda_hdiag_vect_mv.F90 index fb80611f..0c7ce856 100644 --- a/cuda/impl/psb_c_cuda_hdiag_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_hdiag_vect_mv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_vect_mv -#else - use psb_c_cuda_hdiag_mat_mod -#endif use psb_c_cuda_vect_mod implicit none class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a @@ -71,7 +66,6 @@ subroutine psb_c_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= dzero) then @@ -112,9 +106,6 @@ subroutine psb_c_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - call a%psb_c_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hlg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_hlg_allocate_mnnz.F90 index 277b974f..1b41f132 100644 --- a/cuda/impl/psb_c_cuda_hlg_allocate_mnnz.F90 +++ b/cuda/impl/psb_c_cuda_hlg_allocate_mnnz.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hlg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_allocate_mnnz -#else - use psb_c_cuda_hlg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a @@ -47,19 +42,15 @@ subroutine psb_c_cuda_hlg_allocate_mnnz(m,n,a,nz) Integer(psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' logical, parameter :: debug=.false. -#ifdef HAVE_SPGPU type(hlldev_parms) :: gpu_parms -#endif call psb_erractionsave(err_act) info = psb_success_ call a%psb_c_hll_sparse_mat%allocate(m,n,nz) -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz_) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hlg_csmm.F90 b/cuda/impl/psb_c_cuda_hlg_csmm.F90 index f351ffd0..88aa53a8 100644 --- a/cuda/impl/psb_c_cuda_hlg_csmm.F90 +++ b/cuda/impl/psb_c_cuda_hlg_csmm.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_csmm -#else - use psb_c_cuda_hlg_mat_mod -#endif implicit none class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) @@ -93,7 +88,6 @@ subroutine psb_c_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_c_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -118,9 +112,6 @@ subroutine psb_c_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_c_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return @@ -128,5 +119,4 @@ subroutine psb_c_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) return - end subroutine psb_c_cuda_hlg_csmm diff --git a/cuda/impl/psb_c_cuda_hlg_csmv.F90 b/cuda/impl/psb_c_cuda_hlg_csmv.F90 index d39e5f51..18db6ad1 100644 --- a/cuda/impl/psb_c_cuda_hlg_csmv.F90 +++ b/cuda/impl/psb_c_cuda_hlg_csmv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_csmv -#else - use psb_c_cuda_hlg_mat_mod -#endif implicit none class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +89,6 @@ subroutine psb_c_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_c_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +115,6 @@ subroutine psb_c_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_c_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hlg_from_gpu.F90 b/cuda/impl/psb_c_cuda_hlg_from_gpu.F90 index f823153d..d06d5488 100644 --- a/cuda/impl/psb_c_cuda_hlg_from_gpu.F90 +++ b/cuda/impl/psb_c_cuda_hlg_from_gpu.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hlg_from_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_from_gpu -#else - use psb_c_cuda_hlg_mat_mod -#endif implicit none class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -48,7 +43,6 @@ subroutine psb_c_cuda_hlg_from_gpu(a,info) info = 0 -#ifdef HAVE_SPGPU if (a%is_sync()) return if (a%is_host()) return if (.not.(c_associated(a%deviceMat))) then @@ -71,6 +65,5 @@ subroutine psb_c_cuda_hlg_from_gpu(a,info) if (info == 0) info = & & readHllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) call a%set_sync() -#endif end subroutine psb_c_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_c_cuda_hlg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_hlg_inner_vect_sv.F90 index 6202885c..87d7c662 100644 --- a/cuda/impl/psb_c_cuda_hlg_inner_vect_sv.F90 +++ b/cuda/impl/psb_c_cuda_hlg_inner_vect_sv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_inner_vect_sv -#else - use psb_c_cuda_hlg_mat_mod -#endif use psb_c_cuda_vect_mod implicit none class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a @@ -69,11 +64,9 @@ subroutine psb_c_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) goto 9999 end if - call psb_erractionrestore(err_act) return - 9999 call psb_error_handler(err_act) return diff --git a/cuda/impl/psb_c_cuda_hlg_mold.F90 b/cuda/impl/psb_c_cuda_hlg_mold.F90 index 85453422..a702e211 100644 --- a/cuda/impl/psb_c_cuda_hlg_mold.F90 +++ b/cuda/impl/psb_c_cuda_hlg_mold.F90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hlg_mold(a,b,info) use psb_base_mod diff --git a/cuda/impl/psb_c_cuda_hlg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_hlg_reallocate_nz.F90 index 848b659d..2ec5fa2c 100644 --- a/cuda/impl/psb_c_cuda_hlg_reallocate_nz.F90 +++ b/cuda/impl/psb_c_cuda_hlg_reallocate_nz.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hlg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_reallocate_nz -#else - use psb_c_cuda_hlg_mat_mod -#endif use iso_c_binding implicit none integer(psb_ipk_), intent(in) :: nz @@ -52,10 +47,8 @@ subroutine psb_c_cuda_hlg_reallocate_nz(nz,a) call a%psb_c_hll_sparse_mat%reallocate(nz) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hlg_scal.F90 b/cuda/impl/psb_c_cuda_hlg_scal.F90 index d768048f..770d1734 100644 --- a/cuda/impl/psb_c_cuda_hlg_scal.F90 +++ b/cuda/impl/psb_c_cuda_hlg_scal.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hlg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_scal -#else - use psb_c_cuda_hlg_mat_mod -#endif implicit none class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) @@ -60,10 +55,8 @@ subroutine psb_c_cuda_hlg_scal(d,a,info,side) call a%psb_c_hll_sparse_mat%scal(d,info,side) if (info /= psb_success_) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hlg_scals.F90 b/cuda/impl/psb_c_cuda_hlg_scals.F90 index 7574bf94..ef6bc1e3 100644 --- a/cuda/impl/psb_c_cuda_hlg_scals.F90 +++ b/cuda/impl/psb_c_cuda_hlg_scals.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hlg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_scals -#else - use psb_c_cuda_hlg_mat_mod -#endif use iso_c_binding implicit none class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a @@ -59,10 +54,8 @@ subroutine psb_c_cuda_hlg_scals(d,a,info) call a%psb_c_hll_sparse_mat%scal(d,info) if (info /= psb_success_) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hlg_to_gpu.F90 b/cuda/impl/psb_c_cuda_hlg_to_gpu.F90 index d7fc8fb2..d7d179e7 100644 --- a/cuda/impl/psb_c_cuda_hlg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_hlg_to_gpu.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hlg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_to_gpu -#else - use psb_c_cuda_hlg_mat_mod -#endif use iso_c_binding implicit none class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a @@ -50,7 +45,6 @@ subroutine psb_c_cuda_hlg_to_gpu(a,info,nzrm) info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return n = a%get_nrows() @@ -63,6 +57,5 @@ subroutine psb_c_cuda_hlg_to_gpu(a,info,nzrm) if (info == 0) info = & & writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) ! if (info /= 0) goto 9999 -#endif end subroutine psb_c_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_c_cuda_hlg_vect_mv.F90 b/cuda/impl/psb_c_cuda_hlg_vect_mv.F90 index 2d7a679e..3789ef17 100644 --- a/cuda/impl/psb_c_cuda_hlg_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_hlg_vect_mv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_vect_mv -#else - use psb_c_cuda_hlg_mat_mod -#endif use psb_c_cuda_vect_mod implicit none class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a @@ -69,9 +64,7 @@ subroutine psb_c_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) goto 9999 endif - tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= czero) then @@ -115,9 +108,6 @@ subroutine psb_c_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - call a%psb_c_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hybg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_hybg_allocate_mnnz.F90 index eced26e0..bc9f3889 100644 --- a/cuda/impl/psb_c_cuda_hybg_allocate_mnnz.F90 +++ b/cuda/impl/psb_c_cuda_hybg_allocate_mnnz.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_hybg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_allocate_mnnz -#else - use psb_c_cuda_hybg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a @@ -52,11 +48,9 @@ subroutine psb_c_cuda_hybg_allocate_mnnz(m,n,a,nz) call a%psb_c_csr_sparse_mat%allocate(m,n,nz) -#ifdef HAVE_SPGPU info = initFcusparse() call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hybg_csmm.F90 b/cuda/impl/psb_c_cuda_hybg_csmm.F90 index cc459f66..227b7d5c 100644 --- a/cuda/impl/psb_c_cuda_hybg_csmm.F90 +++ b/cuda/impl/psb_c_cuda_hybg_csmm.F90 @@ -33,14 +33,10 @@ subroutine psb_c_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_csmm -#else - use psb_c_cuda_hybg_mat_mod -#endif implicit none class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) @@ -92,8 +88,6 @@ subroutine psb_c_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) goto 9999 end if - -#ifdef HAVE_SPGPU if (tra) then call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +115,6 @@ subroutine psb_c_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hybg_csmv.F90 b/cuda/impl/psb_c_cuda_hybg_csmv.F90 index ab07d756..e1084022 100644 --- a/cuda/impl/psb_c_cuda_hybg_csmv.F90 +++ b/cuda/impl/psb_c_cuda_hybg_csmv.F90 @@ -33,14 +33,10 @@ subroutine psb_c_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_csmv -#else - use psb_c_cuda_hybg_mat_mod -#endif implicit none class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_spk_), intent(in) :: alpha, beta, x(:) @@ -95,7 +91,6 @@ subroutine psb_c_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -123,9 +118,6 @@ subroutine psb_c_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hybg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_hybg_inner_vect_sv.F90 index fcaf49ff..e19aafeb 100644 --- a/cuda/impl/psb_c_cuda_hybg_inner_vect_sv.F90 +++ b/cuda/impl/psb_c_cuda_hybg_inner_vect_sv.F90 @@ -33,13 +33,9 @@ subroutine psb_c_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_inner_vect_sv -#else - use psb_c_cuda_hybg_mat_mod -#endif use psb_c_cuda_vect_mod implicit none class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a @@ -76,7 +72,6 @@ subroutine psb_c_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra.or.(beta/=czero)) then call x%sync() call y%sync() @@ -113,12 +108,6 @@ subroutine psb_c_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call x%sync() - call y%sync() - call a%psb_c_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) - call y%set_host() -#endif if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name, a_err='hybg_vect_sv') diff --git a/cuda/impl/psb_c_cuda_hybg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_hybg_reallocate_nz.F90 index 979eaad8..15f33077 100644 --- a/cuda/impl/psb_c_cuda_hybg_reallocate_nz.F90 +++ b/cuda/impl/psb_c_cuda_hybg_reallocate_nz.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_hybg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_reallocate_nz -#else - use psb_c_cuda_hybg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: nz class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a @@ -55,10 +51,8 @@ subroutine psb_c_cuda_hybg_reallocate_nz(nz,a) ! call a%psb_c_csr_sparse_mat%reallocate(nz) -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hybg_scal.F90 b/cuda/impl/psb_c_cuda_hybg_scal.F90 index ac4d788e..ff1c1515 100644 --- a/cuda/impl/psb_c_cuda_hybg_scal.F90 +++ b/cuda/impl/psb_c_cuda_hybg_scal.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_hybg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_scal -#else - use psb_c_cuda_hybg_mat_mod -#endif implicit none class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d(:) @@ -60,10 +56,8 @@ subroutine psb_c_cuda_hybg_scal(d,a,info,side) call a%psb_c_csr_sparse_mat%scal(d,info,side=side) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hybg_scals.F90 b/cuda/impl/psb_c_cuda_hybg_scals.F90 index 7def71d2..1b2ddf32 100644 --- a/cuda/impl/psb_c_cuda_hybg_scals.F90 +++ b/cuda/impl/psb_c_cuda_hybg_scals.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_hybg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_scals -#else - use psb_c_cuda_hybg_mat_mod -#endif implicit none class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_spk_), intent(in) :: d @@ -60,10 +56,8 @@ subroutine psb_c_cuda_hybg_scals(d,a,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_hybg_to_gpu.F90 b/cuda/impl/psb_c_cuda_hybg_to_gpu.F90 index 1a77586e..15a65abc 100644 --- a/cuda/impl/psb_c_cuda_hybg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_hybg_to_gpu.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_hybg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_to_gpu -#else - use psb_c_cuda_hybg_mat_mod -#endif implicit none class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -51,7 +47,6 @@ subroutine psb_c_cuda_hybg_to_gpu(a,info,nzrm) info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return m = a%get_nrows() @@ -148,7 +143,6 @@ subroutine psb_c_cuda_hybg_to_gpu(a,info,nzrm) if (info /= 0) then write(0,*) 'Error in HYBG_TO_GPU ',info end if -#endif end subroutine psb_c_cuda_hybg_to_gpu #endif diff --git a/cuda/impl/psb_c_cuda_hybg_vect_mv.F90 b/cuda/impl/psb_c_cuda_hybg_vect_mv.F90 index da20ca41..58ce0386 100644 --- a/cuda/impl/psb_c_cuda_hybg_vect_mv.F90 +++ b/cuda/impl/psb_c_cuda_hybg_vect_mv.F90 @@ -33,14 +33,10 @@ subroutine psb_c_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_vect_mv -#else - use psb_c_cuda_hybg_mat_mod -#endif use psb_c_cuda_vect_mod implicit none class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a @@ -71,8 +67,6 @@ subroutine psb_c_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') - -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= czero) then @@ -112,9 +106,6 @@ subroutine psb_c_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_c_cuda_mv_csrg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_csrg_from_coo.F90 index f80a8f87..f0a74c09 100644 --- a/cuda/impl/psb_c_cuda_mv_csrg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_csrg_from_coo.F90 @@ -29,16 +29,11 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_mv_csrg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_mv_csrg_from_coo -#else - use psb_c_cuda_csrg_mat_mod -#endif implicit none class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a @@ -51,9 +46,7 @@ subroutine psb_c_cuda_mv_csrg_from_coo(a,b,info) call a%psb_c_csr_sparse_mat%mv_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif if (info /= 0) goto 9999 return diff --git a/cuda/impl/psb_c_cuda_mv_csrg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_csrg_from_fmt.F90 index 1f23a6c2..eb3698b0 100644 --- a/cuda/impl/psb_c_cuda_mv_csrg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_mv_csrg_from_fmt.F90 @@ -29,16 +29,11 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_mv_csrg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_mv_csrg_from_fmt -#else - use psb_c_cuda_csrg_mat_mod -#endif implicit none class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a @@ -55,9 +50,7 @@ subroutine psb_c_cuda_mv_csrg_from_fmt(a,b,info) class default call a%psb_c_csr_sparse_mat%mv_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_c_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_c_cuda_mv_diag_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_diag_from_coo.F90 index e20e0b0a..c1ee2ba9 100644 --- a/cuda/impl/psb_c_cuda_mv_diag_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_diag_from_coo.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_mv_diag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_mv_diag_from_coo -#else - use psb_c_cuda_diag_mat_mod -#endif implicit none diff --git a/cuda/impl/psb_c_cuda_mv_elg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_elg_from_coo.F90 index 741058cd..f9555729 100644 --- a/cuda/impl/psb_c_cuda_mv_elg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_elg_from_coo.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_c_cuda_mv_elg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_mv_elg_from_coo -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a @@ -57,5 +52,4 @@ subroutine psb_c_cuda_mv_elg_from_coo(a,b,info) return - end subroutine psb_c_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_c_cuda_mv_elg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_elg_from_fmt.F90 index b375bc63..59615e25 100644 --- a/cuda/impl/psb_c_cuda_mv_elg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_mv_elg_from_fmt.F90 @@ -28,18 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - + subroutine psb_c_cuda_mv_elg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_mv_elg_from_fmt -#else - use psb_c_cuda_elg_mat_mod -#endif implicit none class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a @@ -49,9 +44,7 @@ subroutine psb_c_cuda_mv_elg_from_fmt(a,b,info) !locals type(psb_c_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif info = psb_success_ @@ -65,13 +58,9 @@ subroutine psb_c_cuda_mv_elg_from_fmt(a,b,info) m = b%get_nrows() nc = b%get_ncols() nza = b%get_nzeros() -#ifdef HAVE_SPGPU gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) ld = gpu_parms%pitch nzm = gpu_parms%maxRowSize -#else - ld = m -#endif a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat call move_alloc(b%irn, a%irn) call move_alloc(b%idiag, a%idiag) @@ -87,9 +76,7 @@ subroutine psb_c_cuda_mv_elg_from_fmt(a,b,info) end if a%nzt = nza call b%free() -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif class default call b%mv_to_coo(tmp,info) diff --git a/cuda/impl/psb_c_cuda_mv_hdiag_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_hdiag_from_coo.F90 index 8826081f..21ee731d 100644 --- a/cuda/impl/psb_c_cuda_mv_hdiag_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_hdiag_from_coo.F90 @@ -28,19 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_c_cuda_mv_hdiag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_mv_hdiag_from_coo use psb_cuda_env_mod -#else - use psb_c_cuda_hdiag_mat_mod -#endif implicit none @@ -54,16 +48,12 @@ subroutine psb_c_cuda_mv_hdiag_from_coo(a,b,info) info = psb_success_ -#ifdef HAVE_SPGPU a%hacksize = psb_cuda_WarpSize() -#endif call a%psb_c_hdia_sparse_mat%mv_from_coo(b,info) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_c_cuda_mv_hlg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_hlg_from_coo.F90 index 416bbaed..50c20fad 100644 --- a/cuda/impl/psb_c_cuda_mv_hlg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_hlg_from_coo.F90 @@ -28,19 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_c_cuda_mv_hlg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_cuda_env_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_mv_hlg_from_coo -#else - use psb_c_cuda_hlg_mat_mod -#endif implicit none class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a diff --git a/cuda/impl/psb_c_cuda_mv_hlg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_hlg_from_fmt.F90 index aafe692d..3fba905a 100644 --- a/cuda/impl/psb_c_cuda_mv_hlg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_mv_hlg_from_fmt.F90 @@ -28,18 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_c_cuda_mv_hlg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_mv_hlg_from_fmt -#else - use psb_c_cuda_hlg_mat_mod -#endif implicit none class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a diff --git a/cuda/impl/psb_c_cuda_mv_hybg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_hybg_from_coo.F90 index eb5ba685..f7cdae40 100644 --- a/cuda/impl/psb_c_cuda_mv_hybg_from_coo.F90 +++ b/cuda/impl/psb_c_cuda_mv_hybg_from_coo.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_mv_hybg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_mv_hybg_from_coo -#else - use psb_c_cuda_hybg_mat_mod -#endif implicit none class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a @@ -50,10 +46,8 @@ subroutine psb_c_cuda_mv_hybg_from_coo(a,b,info) call a%psb_c_csr_sparse_mat%mv_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_c_cuda_mv_hybg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_hybg_from_fmt.F90 index d74e89bd..9be9f0b4 100644 --- a/cuda/impl/psb_c_cuda_mv_hybg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_mv_hybg_from_fmt.F90 @@ -33,12 +33,8 @@ subroutine psb_c_cuda_mv_hybg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_mv_hybg_from_fmt -#else - use psb_c_cuda_hybg_mat_mod -#endif implicit none class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a @@ -54,9 +50,7 @@ subroutine psb_c_cuda_mv_hybg_from_fmt(a,b,info) class default call a%psb_c_csr_sparse_mat%mv_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_c_cuda_mv_hybg_from_fmt #endif diff --git a/cuda/impl/psb_d_cuda_cp_csrg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_csrg_from_coo.F90 index e3383af1..ab3a7256 100644 --- a/cuda/impl/psb_d_cuda_cp_csrg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_csrg_from_coo.F90 @@ -32,12 +32,8 @@ subroutine psb_d_cuda_cp_csrg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_cp_csrg_from_coo -#else - use psb_d_cuda_csrg_mat_mod -#endif implicit none class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a @@ -48,10 +44,8 @@ subroutine psb_d_cuda_cp_csrg_from_coo(a,b,info) call a%psb_d_csr_sparse_mat%cp_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 index 28b46c76..d030538e 100644 --- a/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 @@ -32,12 +32,8 @@ subroutine psb_d_cuda_cp_csrg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_cp_csrg_from_fmt -#else - use psb_d_cuda_csrg_mat_mod -#endif !use iso_c_binding implicit none @@ -53,9 +49,7 @@ subroutine psb_d_cuda_cp_csrg_from_fmt(a,b,info) class default call a%psb_d_csr_sparse_mat%cp_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_d_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_d_cuda_cp_diag_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_diag_from_coo.F90 index d21bb469..dc0401d5 100644 --- a/cuda/impl/psb_d_cuda_cp_diag_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_diag_from_coo.F90 @@ -33,13 +33,9 @@ subroutine psb_d_cuda_cp_diag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_cp_diag_from_coo -#else - use psb_d_cuda_diag_mat_mod -#endif implicit none class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a @@ -50,10 +46,8 @@ subroutine psb_d_cuda_cp_diag_from_coo(a,b,info) info = psb_success_ call a%psb_d_dia_sparse_mat%cp_from_coo(b,info) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_d_cuda_cp_elg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_elg_from_coo.F90 index a4d58297..890bdc39 100644 --- a/cuda/impl/psb_d_cuda_cp_elg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_elg_from_coo.F90 @@ -28,20 +28,14 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_d_cuda_cp_elg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_cp_elg_from_coo use psi_ext_util_mod use psb_cuda_env_mod -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a @@ -57,16 +51,11 @@ subroutine psb_d_cuda_cp_elg_from_coo(a,b,info) integer(psb_ipk_), allocatable :: idisp(:) info = psb_success_ -#ifdef HAVE_SPGPU hacksize = max(1,psb_cuda_WarpSize()) -#else - hacksize = 1 -#endif if (b%is_dev()) call b%sync() if (b%is_by_rows()) then -#ifdef HAVE_SPGPU call psi_d_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize) @@ -82,15 +71,8 @@ subroutine psb_d_cuda_cp_elg_from_coo(a,b,info) if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & & a%irn,idisp,b%ja,b%val, a%deviceMat) call a%set_dev() -#else - - call psi_d_convert_ell_from_coo(a,b,info,hacksize=hacksize) - call a%set_host() -#endif - else call b%cp_to_coo(tmp,info) -#ifdef HAVE_SPGPU call psi_d_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize) @@ -107,11 +89,6 @@ subroutine psb_d_cuda_cp_elg_from_coo(a,b,info) & a%irn,idisp,tmp%ja,tmp%val, a%deviceMat) call a%set_dev() -#else - - call psi_d_convert_ell_from_coo(a,tmp,info,hacksize=hacksize) - call a%set_host() -#endif end if if (info /= psb_success_) goto 9999 diff --git a/cuda/impl/psb_d_cuda_cp_elg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_elg_from_fmt.F90 index 31786c1b..7beea7f1 100644 --- a/cuda/impl/psb_d_cuda_cp_elg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_cp_elg_from_fmt.F90 @@ -33,13 +33,9 @@ subroutine psb_d_cuda_cp_elg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_cp_elg_from_fmt -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a @@ -51,9 +47,7 @@ subroutine psb_d_cuda_cp_elg_from_fmt(a,b,info) Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif info = psb_success_ if (b%is_dev()) call b%sync() @@ -67,13 +61,9 @@ subroutine psb_d_cuda_cp_elg_from_fmt(a,b,info) m = b%get_nrows() nc = b%get_ncols() nza = b%get_nzeros() -#ifdef HAVE_SPGPU gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) ld = gpu_parms%pitch nzm = gpu_parms%maxRowSize -#else - ld = m -#endif a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) @@ -88,9 +78,7 @@ subroutine psb_d_cuda_cp_elg_from_fmt(a,b,info) a%val(1:m,1:nzm) = b%val(1:m,1:nzm) end if a%nzt = nza -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif class default diff --git a/cuda/impl/psb_d_cuda_cp_hdiag_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_hdiag_from_coo.F90 index efcf9d66..82ef4876 100644 --- a/cuda/impl/psb_d_cuda_cp_hdiag_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_hdiag_from_coo.F90 @@ -28,19 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_d_cuda_cp_hdiag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_cp_hdiag_from_coo use psb_cuda_env_mod -#else - use psb_d_cuda_hdiag_mat_mod -#endif implicit none class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a @@ -53,16 +47,12 @@ subroutine psb_d_cuda_cp_hdiag_from_coo(a,b,info) info = psb_success_ -#ifdef HAVE_SPGPU a%hacksize = psb_cuda_WarpSize() -#endif call a%psb_d_hdia_sparse_mat%cp_from_coo(b,info) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_d_cuda_cp_hlg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_hlg_from_coo.F90 index 2fc898b2..34b999a9 100644 --- a/cuda/impl/psb_d_cuda_cp_hlg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_hlg_from_coo.F90 @@ -33,14 +33,10 @@ subroutine psb_d_cuda_cp_hlg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_cuda_env_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_cp_hlg_from_coo -#else - use psb_d_cuda_hlg_mat_mod -#endif implicit none class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a @@ -61,11 +57,7 @@ subroutine psb_d_cuda_cp_hlg_from_coo(a,b,info) info = psb_success_ debug_unit = psb_get_debug_unit() debug_level = psb_get_debug_level() -#ifdef HAVE_SPGPU hksz = max(1,psb_cuda_WarpSize()) -#else - hksz = psi_get_hksz() -#endif if (b%is_by_rows()) then diff --git a/cuda/impl/psb_d_cuda_cp_hlg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_hlg_from_fmt.F90 index 0796630c..ecb15157 100644 --- a/cuda/impl/psb_d_cuda_cp_hlg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_cp_hlg_from_fmt.F90 @@ -33,13 +33,9 @@ subroutine psb_d_cuda_cp_hlg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_cp_hlg_from_fmt -#else - use psb_d_cuda_hlg_mat_mod -#endif implicit none class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a @@ -53,9 +49,7 @@ subroutine psb_d_cuda_cp_hlg_from_fmt(a,b,info) call a%cp_from_coo(b,info) class default call a%psb_d_hll_sparse_mat%cp_from_fmt(b,info) -#ifdef HAVE_SPGPU if (info == 0) call a%to_gpu(info) -#endif end select if (info /= 0) goto 9999 diff --git a/cuda/impl/psb_d_cuda_cp_hybg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_hybg_from_coo.F90 index f1f62a89..b236a91d 100644 --- a/cuda/impl/psb_d_cuda_cp_hybg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_cp_hybg_from_coo.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_cp_hybg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_cp_hybg_from_coo -#else - use psb_d_cuda_hybg_mat_mod -#endif implicit none class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a @@ -49,10 +45,8 @@ subroutine psb_d_cuda_cp_hybg_from_coo(a,b,info) call a%psb_d_csr_sparse_mat%cp_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_d_cuda_cp_hybg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_hybg_from_fmt.F90 index 37c9cc42..87d25252 100644 --- a/cuda/impl/psb_d_cuda_cp_hybg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_cp_hybg_from_fmt.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_cp_hybg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_cp_hybg_from_fmt -#else - use psb_d_cuda_hybg_mat_mod -#endif implicit none class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a @@ -53,9 +49,7 @@ subroutine psb_d_cuda_cp_hybg_from_fmt(a,b,info) class default call a%psb_d_csr_sparse_mat%cp_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_d_cuda_cp_hybg_from_fmt diff --git a/cuda/impl/psb_d_cuda_csrg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_csrg_allocate_mnnz.F90 index 3858672c..056f2deb 100644 --- a/cuda/impl/psb_d_cuda_csrg_allocate_mnnz.F90 +++ b/cuda/impl/psb_d_cuda_csrg_allocate_mnnz.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_csrg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_allocate_mnnz -#else - use psb_d_cuda_csrg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a @@ -52,11 +48,9 @@ subroutine psb_d_cuda_csrg_allocate_mnnz(m,n,a,nz) call a%psb_d_csr_sparse_mat%allocate(m,n,nz) -#ifdef HAVE_SPGPU info = initFcusparse() if (info == 0) call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_csrg_csmm.F90 b/cuda/impl/psb_d_cuda_csrg_csmm.F90 index 58251d9a..ddac1373 100644 --- a/cuda/impl/psb_d_cuda_csrg_csmm.F90 +++ b/cuda/impl/psb_d_cuda_csrg_csmm.F90 @@ -33,14 +33,10 @@ subroutine psb_d_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_csmm -#else - use psb_d_cuda_csrg_mat_mod -#endif implicit none class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) @@ -94,7 +90,6 @@ subroutine psb_d_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -119,9 +114,6 @@ subroutine psb_d_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_csrg_csmv.F90 b/cuda/impl/psb_d_cuda_csrg_csmv.F90 index 269760f0..c1c889f8 100644 --- a/cuda/impl/psb_d_cuda_csrg_csmv.F90 +++ b/cuda/impl/psb_d_cuda_csrg_csmv.F90 @@ -33,14 +33,10 @@ subroutine psb_d_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_csmv -#else - use psb_d_cuda_csrg_mat_mod -#endif implicit none class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) @@ -96,7 +92,6 @@ subroutine psb_d_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -124,9 +119,6 @@ subroutine psb_d_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_csrg_from_gpu.F90 b/cuda/impl/psb_d_cuda_csrg_from_gpu.F90 index c451a99f..9d1c2285 100644 --- a/cuda/impl/psb_d_cuda_csrg_from_gpu.F90 +++ b/cuda/impl/psb_d_cuda_csrg_from_gpu.F90 @@ -33,13 +33,9 @@ subroutine psb_d_cuda_csrg_from_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_from_gpu -#else - use psb_d_cuda_csrg_mat_mod -#endif implicit none class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -48,7 +44,6 @@ subroutine psb_d_cuda_csrg_from_gpu(a,info) info = 0 -#ifdef HAVE_SPGPU if (.not.(c_associated(a%deviceMat%mat))) then call a%free() return @@ -68,6 +63,5 @@ subroutine psb_d_cuda_csrg_from_gpu(a,info) #endif call a%set_sync() -#endif end subroutine psb_d_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_d_cuda_csrg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_csrg_inner_vect_sv.F90 index 60ee541f..9a45ee17 100644 --- a/cuda/impl/psb_d_cuda_csrg_inner_vect_sv.F90 +++ b/cuda/impl/psb_d_cuda_csrg_inner_vect_sv.F90 @@ -32,13 +32,9 @@ subroutine psb_d_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_inner_vect_sv -#else - use psb_d_cuda_csrg_mat_mod -#endif use psb_d_cuda_vect_mod implicit none class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a @@ -75,7 +71,6 @@ subroutine psb_d_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra.or.(beta/=dzero)) then call x%sync() call y%sync() @@ -112,12 +107,6 @@ subroutine psb_d_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call x%sync() - call y%sync() - call a%psb_d_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) - call y%set_host() -#endif if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name, a_err='csrg_vect_sv') diff --git a/cuda/impl/psb_d_cuda_csrg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_csrg_reallocate_nz.F90 index dbf34958..c27cb943 100644 --- a/cuda/impl/psb_d_cuda_csrg_reallocate_nz.F90 +++ b/cuda/impl/psb_d_cuda_csrg_reallocate_nz.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_csrg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_reallocate_nz -#else - use psb_d_cuda_csrg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: nz class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a @@ -55,10 +51,8 @@ subroutine psb_d_cuda_csrg_reallocate_nz(nz,a) ! call a%psb_d_csr_sparse_mat%reallocate(nz) -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_csrg_scal.F90 b/cuda/impl/psb_d_cuda_csrg_scal.F90 index 73e1b9f3..860e9396 100644 --- a/cuda/impl/psb_d_cuda_csrg_scal.F90 +++ b/cuda/impl/psb_d_cuda_csrg_scal.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_csrg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_scal -#else - use psb_d_cuda_csrg_mat_mod -#endif implicit none class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) @@ -58,10 +54,8 @@ subroutine psb_d_cuda_csrg_scal(d,a,info,side) call a%psb_d_csr_sparse_mat%scal(d,info,side=side) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_csrg_scals.F90 b/cuda/impl/psb_d_cuda_csrg_scals.F90 index cf8d6270..87ef588d 100644 --- a/cuda/impl/psb_d_cuda_csrg_scals.F90 +++ b/cuda/impl/psb_d_cuda_csrg_scals.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_csrg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_scals -#else - use psb_d_cuda_csrg_mat_mod -#endif implicit none class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d @@ -56,10 +52,8 @@ subroutine psb_d_cuda_csrg_scals(d,a,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 index a0e72cb4..d1949421 100644 --- a/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_to_gpu -#else - use psb_d_cuda_csrg_mat_mod -#endif implicit none class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -51,7 +47,6 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return m = a%get_nrows() @@ -320,6 +315,5 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) if (info /= 0) then write(0,*) 'Error in CSRG_TO_GPU ',info end if -#endif end subroutine psb_d_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_d_cuda_csrg_vect_mv.F90 b/cuda/impl/psb_d_cuda_csrg_vect_mv.F90 index b828d878..03fefbdd 100644 --- a/cuda/impl/psb_d_cuda_csrg_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_csrg_vect_mv.F90 @@ -33,14 +33,10 @@ subroutine psb_d_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_vect_mv -#else - use psb_d_cuda_csrg_mat_mod -#endif use psb_d_cuda_vect_mod implicit none class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a @@ -72,7 +68,6 @@ subroutine psb_d_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= dzero) then @@ -112,9 +107,6 @@ subroutine psb_d_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_diag_csmv.F90 b/cuda/impl/psb_d_cuda_diag_csmv.F90 index 8b49769e..0317a369 100644 --- a/cuda/impl/psb_d_cuda_diag_csmv.F90 +++ b/cuda/impl/psb_d_cuda_diag_csmv.F90 @@ -33,13 +33,9 @@ subroutine psb_d_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_diag_csmv -#else - use psb_d_cuda_diag_mat_mod -#endif implicit none class(psb_d_cuda_diag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +90,6 @@ subroutine psb_d_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_d_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +116,6 @@ subroutine psb_d_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_d_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return @@ -132,5 +124,4 @@ subroutine psb_d_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) return - end subroutine psb_d_cuda_diag_csmv diff --git a/cuda/impl/psb_d_cuda_diag_to_gpu.F90 b/cuda/impl/psb_d_cuda_diag_to_gpu.F90 index 4903de8a..9b648962 100644 --- a/cuda/impl/psb_d_cuda_diag_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_diag_to_gpu.F90 @@ -33,13 +33,9 @@ subroutine psb_d_cuda_diag_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_diag_to_gpu -#else - use psb_d_cuda_diag_mat_mod -#endif use iso_c_binding implicit none class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a @@ -47,13 +43,10 @@ subroutine psb_d_cuda_diag_to_gpu(a,info,nzrm) integer(psb_ipk_), intent(in), optional :: nzrm integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d -#ifdef HAVE_SPGPU type(diagdev_parms) :: gpu_parms -#endif info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return n = size(a%data,1) @@ -69,6 +62,5 @@ subroutine psb_d_cuda_diag_to_gpu(a,info,nzrm) if (info == 0) info = & & writeDiagDevice(a%deviceMat,a%data,a%offset,n) ! if (info /= 0) goto 9999 -#endif end subroutine psb_d_cuda_diag_to_gpu diff --git a/cuda/impl/psb_d_cuda_diag_vect_mv.F90 b/cuda/impl/psb_d_cuda_diag_vect_mv.F90 index 0f23d363..3bc2372d 100644 --- a/cuda/impl/psb_d_cuda_diag_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_diag_vect_mv.F90 @@ -28,18 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_d_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_diag_vect_mv -#else - use psb_d_cuda_diag_mat_mod -#endif use psb_d_cuda_vect_mod implicit none class(psb_d_cuda_diag_sparse_mat), intent(in) :: a @@ -71,7 +65,6 @@ subroutine psb_d_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= szero) then @@ -112,9 +105,6 @@ subroutine psb_d_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - call a%psb_d_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_dnsg_mat_impl.F90 b/cuda/impl/psb_d_cuda_dnsg_mat_impl.F90 index 8d922d82..e90e816a 100644 --- a/cuda/impl/psb_d_cuda_dnsg_mat_impl.F90 +++ b/cuda/impl/psb_d_cuda_dnsg_mat_impl.F90 @@ -32,13 +32,9 @@ subroutine psb_d_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod use psb_d_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_dnsg_vect_mv -#else - use psb_d_cuda_dnsg_mat_mod -#endif implicit none class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta @@ -123,13 +119,9 @@ end subroutine psb_d_cuda_dnsg_vect_mv subroutine psb_d_cuda_dnsg_mold(a,b,info) use psb_base_mod use psb_d_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_dnsg_mold -#else - use psb_d_cuda_dnsg_mat_mod -#endif implicit none class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b @@ -190,17 +182,12 @@ end subroutine psb_d_cuda_dnsg_mold !!$ end subroutine psb_d_cuda_dnsg_allocate_mnnz !!$ end interface - subroutine psb_d_cuda_dnsg_to_gpu(a,info) use psb_base_mod use psb_d_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_dnsg_to_gpu -#else - use psb_d_cuda_dnsg_mat_mod -#endif class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act, pitch, lda @@ -209,15 +196,12 @@ subroutine psb_d_cuda_dnsg_to_gpu(a,info) call psb_erractionsave(err_act) info = psb_success_ -#ifdef HAVE_SPGPU if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2) info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),& & spgpu_type_double,1) if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2)) if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info - -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return @@ -233,13 +217,9 @@ end subroutine psb_d_cuda_dnsg_to_gpu subroutine psb_d_cuda_cp_dnsg_from_coo(a,b,info) use psb_base_mod use psb_d_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_cp_dnsg_from_coo -#else - use psb_d_cuda_dnsg_mat_mod -#endif implicit none class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a @@ -272,13 +252,9 @@ end subroutine psb_d_cuda_cp_dnsg_from_coo subroutine psb_d_cuda_cp_dnsg_from_fmt(a,b,info) use psb_base_mod use psb_d_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_cp_dnsg_from_fmt -#else - use psb_d_cuda_dnsg_mat_mod -#endif implicit none class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a @@ -348,13 +324,9 @@ end subroutine psb_d_cuda_cp_dnsg_from_fmt subroutine psb_d_cuda_mv_dnsg_from_coo(a,b,info) use psb_base_mod use psb_d_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_mv_dnsg_from_coo -#else - use psb_d_cuda_dnsg_mat_mod -#endif implicit none class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a @@ -383,18 +355,13 @@ subroutine psb_d_cuda_mv_dnsg_from_coo(a,b,info) return end subroutine psb_d_cuda_mv_dnsg_from_coo - - + subroutine psb_d_cuda_mv_dnsg_from_fmt(a,b,info) use psb_base_mod use psb_d_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_d_vectordev_mod use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_mv_dnsg_from_fmt -#else - use psb_d_cuda_dnsg_mat_mod -#endif implicit none class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b diff --git a/cuda/impl/psb_d_cuda_elg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_elg_allocate_mnnz.F90 index b9308514..6db20c96 100644 --- a/cuda/impl/psb_d_cuda_elg_allocate_mnnz.F90 +++ b/cuda/impl/psb_d_cuda_elg_allocate_mnnz.F90 @@ -28,18 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_d_cuda_elg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_allocate_mnnz -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a @@ -47,9 +41,7 @@ subroutine psb_d_cuda_elg_allocate_mnnz(m,n,a,nz) Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' logical, parameter :: debug=.false. -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif call psb_erractionsave(err_act) info = psb_success_ @@ -74,13 +66,9 @@ subroutine psb_d_cuda_elg_allocate_mnnz(m,n,a,nz) goto 9999 endif -#ifdef HAVE_SPGPU gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_double,1) ld = gpu_parms%pitch nz_ = gpu_parms%maxRowSize -#else - ld = m -#endif if (info == psb_success_) call psb_realloc(m,a%irn,info) if (info == psb_success_) call psb_realloc(m,a%idiag,info) @@ -98,10 +86,8 @@ subroutine psb_d_cuda_elg_allocate_mnnz(m,n,a,nz) call a%set_dupl(psb_dupl_def_) end if -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz_) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_elg_asb.f90 b/cuda/impl/psb_d_cuda_elg_asb.f90 index c158ccde..7d510ee2 100644 --- a/cuda/impl/psb_d_cuda_elg_asb.f90 +++ b/cuda/impl/psb_d_cuda_elg_asb.f90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_elg_asb(a) use psb_base_mod diff --git a/cuda/impl/psb_d_cuda_elg_csmm.F90 b/cuda/impl/psb_d_cuda_elg_csmm.F90 index 2d9883fa..f77d72d8 100644 --- a/cuda/impl/psb_d_cuda_elg_csmm.F90 +++ b/cuda/impl/psb_d_cuda_elg_csmm.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_csmm -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) @@ -92,8 +87,6 @@ subroutine psb_d_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) goto 9999 end if - -#ifdef HAVE_SPGPU if (tra) then if (a%is_dev()) call a%sync() call a%psb_d_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) @@ -119,9 +112,6 @@ subroutine psb_d_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_d_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_elg_csmv.F90 b/cuda/impl/psb_d_cuda_elg_csmv.F90 index 6420e28d..351ad99d 100644 --- a/cuda/impl/psb_d_cuda_elg_csmv.F90 +++ b/cuda/impl/psb_d_cuda_elg_csmv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_csmv -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none class(psb_d_cuda_elg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +89,6 @@ subroutine psb_d_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then if (a%is_dev()) call a%sync() call a%psb_d_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) @@ -122,9 +116,6 @@ subroutine psb_d_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_d_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_elg_csput.F90 b/cuda/impl/psb_d_cuda_elg_csput.F90 index 19d26c43..0d16de1f 100644 --- a/cuda/impl/psb_d_cuda_elg_csput.F90 +++ b/cuda/impl/psb_d_cuda_elg_csput.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding -#ifdef HAVE_SPGPU use elldev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_csput_a -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a @@ -128,13 +123,9 @@ subroutine psb_d_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding -#ifdef HAVE_SPGPU use elldev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_csput_v use psb_d_cuda_vect_mod -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a diff --git a/cuda/impl/psb_d_cuda_elg_from_gpu.F90 b/cuda/impl/psb_d_cuda_elg_from_gpu.F90 index b532a83c..720a6d73 100644 --- a/cuda/impl/psb_d_cuda_elg_from_gpu.F90 +++ b/cuda/impl/psb_d_cuda_elg_from_gpu.F90 @@ -28,18 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - + subroutine psb_d_cuda_elg_from_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_from_gpu -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -48,7 +43,6 @@ subroutine psb_d_cuda_elg_from_gpu(a,info) info = 0 -#ifdef HAVE_SPGPU if (.not.(c_associated(a%deviceMat))) then call a%free() return @@ -69,6 +63,5 @@ subroutine psb_d_cuda_elg_from_gpu(a,info) if (info == 0) info = & & readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag) call a%set_sync() -#endif end subroutine psb_d_cuda_elg_from_gpu diff --git a/cuda/impl/psb_d_cuda_elg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_elg_inner_vect_sv.F90 index c262969f..5e5d72ef 100644 --- a/cuda/impl/psb_d_cuda_elg_inner_vect_sv.F90 +++ b/cuda/impl/psb_d_cuda_elg_inner_vect_sv.F90 @@ -27,19 +27,14 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! - +! subroutine psb_d_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_inner_vect_sv -#else - use psb_d_cuda_elg_mat_mod -#endif use psb_d_cuda_vect_mod implicit none class(psb_d_cuda_elg_sparse_mat), intent(in) :: a diff --git a/cuda/impl/psb_d_cuda_elg_mold.F90 b/cuda/impl/psb_d_cuda_elg_mold.F90 index f887f96f..107f19af 100644 --- a/cuda/impl/psb_d_cuda_elg_mold.F90 +++ b/cuda/impl/psb_d_cuda_elg_mold.F90 @@ -28,8 +28,6 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_d_cuda_elg_mold(a,b,info) use psb_base_mod diff --git a/cuda/impl/psb_d_cuda_elg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_elg_reallocate_nz.F90 index 66c583e1..47464760 100644 --- a/cuda/impl/psb_d_cuda_elg_reallocate_nz.F90 +++ b/cuda/impl/psb_d_cuda_elg_reallocate_nz.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_elg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_reallocate_nz -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: nz class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a @@ -64,10 +59,8 @@ subroutine psb_d_cuda_elg_reallocate_nz(nz,a) goto 9999 end if -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nzrm) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_elg_scal.F90 b/cuda/impl/psb_d_cuda_elg_scal.F90 index 7aa21c93..420c710e 100644 --- a/cuda/impl/psb_d_cuda_elg_scal.F90 +++ b/cuda/impl/psb_d_cuda_elg_scal.F90 @@ -28,18 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_elg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_scal -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) @@ -63,10 +58,8 @@ subroutine psb_d_cuda_elg_scal(d,a,info,side) call a%psb_d_ell_sparse_mat%scal(d,info,side) if (info /= psb_success_) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_elg_scals.F90 b/cuda/impl/psb_d_cuda_elg_scals.F90 index 1950b366..ff22002e 100644 --- a/cuda/impl/psb_d_cuda_elg_scals.F90 +++ b/cuda/impl/psb_d_cuda_elg_scals.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_elg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_scals -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d @@ -59,10 +54,8 @@ subroutine psb_d_cuda_elg_scals(d,a,info) a%val(:,:) = a%val(:,:) * d -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_d_cuda_elg_to_gpu.F90 b/cuda/impl/psb_d_cuda_elg_to_gpu.F90 index b589ec2d..9b88af69 100644 --- a/cuda/impl/psb_d_cuda_elg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_elg_to_gpu.F90 @@ -29,30 +29,22 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_elg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_to_gpu -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return m = a%get_nrows() @@ -88,6 +80,5 @@ subroutine psb_d_cuda_elg_to_gpu(a,info,nzrm) if (info == 0) info = & & writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag) call a%set_sync() -#endif end subroutine psb_d_cuda_elg_to_gpu diff --git a/cuda/impl/psb_d_cuda_elg_trim.f90 b/cuda/impl/psb_d_cuda_elg_trim.f90 index be573c8c..a371c673 100644 --- a/cuda/impl/psb_d_cuda_elg_trim.f90 +++ b/cuda/impl/psb_d_cuda_elg_trim.f90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_elg_trim(a) use psb_base_mod diff --git a/cuda/impl/psb_d_cuda_elg_vect_mv.F90 b/cuda/impl/psb_d_cuda_elg_vect_mv.F90 index 1be57d22..f0b83c2b 100644 --- a/cuda/impl/psb_d_cuda_elg_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_elg_vect_mv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_vect_mv -#else - use psb_d_cuda_elg_mat_mod -#endif use psb_d_cuda_vect_mod implicit none class(psb_d_cuda_elg_sparse_mat), intent(in) :: a @@ -71,7 +66,6 @@ subroutine psb_d_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (a%is_dev()) call a%sync() if (.not.x%is_host()) call x%sync() @@ -116,10 +110,6 @@ subroutine psb_d_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - if (a%is_dev()) call a%sync() - call a%psb_d_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hdiag_csmv.F90 b/cuda/impl/psb_d_cuda_hdiag_csmv.F90 index 4bcd6e7a..bf4dacc1 100644 --- a/cuda/impl/psb_d_cuda_hdiag_csmv.F90 +++ b/cuda/impl/psb_d_cuda_hdiag_csmv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_csmv -#else - use psb_d_cuda_hdiag_mat_mod -#endif implicit none class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +89,6 @@ subroutine psb_d_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_d_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +115,6 @@ subroutine psb_d_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_d_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return @@ -132,5 +123,4 @@ subroutine psb_d_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) return - end subroutine psb_d_cuda_hdiag_csmv diff --git a/cuda/impl/psb_d_cuda_hdiag_mold.F90 b/cuda/impl/psb_d_cuda_hdiag_mold.F90 index c5028c07..b858b56d 100644 --- a/cuda/impl/psb_d_cuda_hdiag_mold.F90 +++ b/cuda/impl/psb_d_cuda_hdiag_mold.F90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hdiag_mold(a,b,info) use psb_base_mod diff --git a/cuda/impl/psb_d_cuda_hdiag_to_gpu.F90 b/cuda/impl/psb_d_cuda_hdiag_to_gpu.F90 index ca79b9fa..73c4a47d 100644 --- a/cuda/impl/psb_d_cuda_hdiag_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_hdiag_to_gpu.F90 @@ -29,29 +29,21 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hdiag_to_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_to_gpu -#else - use psb_d_cuda_hdiag_mat_mod -#endif use iso_c_binding implicit none class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight -#ifdef HAVE_SPGPU type(hdiagdev_parms) :: gpu_parms -#endif info = 0 -#ifdef HAVE_SPGPU nr = a%get_nrows() nc = a%get_ncols() hacksize = a%hackSize @@ -81,6 +73,4 @@ subroutine psb_d_cuda_hdiag_to_gpu(a,info) if (info == 0) info = & & writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets) -#endif - end subroutine psb_d_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_d_cuda_hdiag_vect_mv.F90 b/cuda/impl/psb_d_cuda_hdiag_vect_mv.F90 index 74233f90..c18c80ac 100644 --- a/cuda/impl/psb_d_cuda_hdiag_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_hdiag_vect_mv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_vect_mv -#else - use psb_d_cuda_hdiag_mat_mod -#endif use psb_d_cuda_vect_mod implicit none class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a @@ -71,7 +66,6 @@ subroutine psb_d_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= dzero) then @@ -112,9 +106,6 @@ subroutine psb_d_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - call a%psb_d_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hlg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_hlg_allocate_mnnz.F90 index 3382327f..68d9ab50 100644 --- a/cuda/impl/psb_d_cuda_hlg_allocate_mnnz.F90 +++ b/cuda/impl/psb_d_cuda_hlg_allocate_mnnz.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hlg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_allocate_mnnz -#else - use psb_d_cuda_hlg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a @@ -47,19 +42,15 @@ subroutine psb_d_cuda_hlg_allocate_mnnz(m,n,a,nz) Integer(psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' logical, parameter :: debug=.false. -#ifdef HAVE_SPGPU type(hlldev_parms) :: gpu_parms -#endif call psb_erractionsave(err_act) info = psb_success_ call a%psb_d_hll_sparse_mat%allocate(m,n,nz) -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz_) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hlg_csmm.F90 b/cuda/impl/psb_d_cuda_hlg_csmm.F90 index a223aace..ee8424e6 100644 --- a/cuda/impl/psb_d_cuda_hlg_csmm.F90 +++ b/cuda/impl/psb_d_cuda_hlg_csmm.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_csmm -#else - use psb_d_cuda_hlg_mat_mod -#endif implicit none class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) @@ -93,7 +88,6 @@ subroutine psb_d_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_d_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -118,9 +112,6 @@ subroutine psb_d_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_d_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return @@ -128,5 +119,4 @@ subroutine psb_d_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) return - end subroutine psb_d_cuda_hlg_csmm diff --git a/cuda/impl/psb_d_cuda_hlg_csmv.F90 b/cuda/impl/psb_d_cuda_hlg_csmv.F90 index 04779296..58892c1f 100644 --- a/cuda/impl/psb_d_cuda_hlg_csmv.F90 +++ b/cuda/impl/psb_d_cuda_hlg_csmv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_csmv -#else - use psb_d_cuda_hlg_mat_mod -#endif implicit none class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +89,6 @@ subroutine psb_d_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_d_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +115,6 @@ subroutine psb_d_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_d_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hlg_from_gpu.F90 b/cuda/impl/psb_d_cuda_hlg_from_gpu.F90 index 7c1a2de8..752fd944 100644 --- a/cuda/impl/psb_d_cuda_hlg_from_gpu.F90 +++ b/cuda/impl/psb_d_cuda_hlg_from_gpu.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hlg_from_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_from_gpu -#else - use psb_d_cuda_hlg_mat_mod -#endif implicit none class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -48,7 +43,6 @@ subroutine psb_d_cuda_hlg_from_gpu(a,info) info = 0 -#ifdef HAVE_SPGPU if (a%is_sync()) return if (a%is_host()) return if (.not.(c_associated(a%deviceMat))) then @@ -71,6 +65,5 @@ subroutine psb_d_cuda_hlg_from_gpu(a,info) if (info == 0) info = & & readHllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) call a%set_sync() -#endif end subroutine psb_d_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_d_cuda_hlg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_hlg_inner_vect_sv.F90 index c6bd68b5..f4a0424d 100644 --- a/cuda/impl/psb_d_cuda_hlg_inner_vect_sv.F90 +++ b/cuda/impl/psb_d_cuda_hlg_inner_vect_sv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_inner_vect_sv -#else - use psb_d_cuda_hlg_mat_mod -#endif use psb_d_cuda_vect_mod implicit none class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a @@ -69,11 +64,9 @@ subroutine psb_d_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) goto 9999 end if - call psb_erractionrestore(err_act) return - 9999 call psb_error_handler(err_act) return diff --git a/cuda/impl/psb_d_cuda_hlg_mold.F90 b/cuda/impl/psb_d_cuda_hlg_mold.F90 index dddce134..e41d56f2 100644 --- a/cuda/impl/psb_d_cuda_hlg_mold.F90 +++ b/cuda/impl/psb_d_cuda_hlg_mold.F90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hlg_mold(a,b,info) use psb_base_mod diff --git a/cuda/impl/psb_d_cuda_hlg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_hlg_reallocate_nz.F90 index aa2954d6..e696d304 100644 --- a/cuda/impl/psb_d_cuda_hlg_reallocate_nz.F90 +++ b/cuda/impl/psb_d_cuda_hlg_reallocate_nz.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hlg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_reallocate_nz -#else - use psb_d_cuda_hlg_mat_mod -#endif use iso_c_binding implicit none integer(psb_ipk_), intent(in) :: nz @@ -52,10 +47,8 @@ subroutine psb_d_cuda_hlg_reallocate_nz(nz,a) call a%psb_d_hll_sparse_mat%reallocate(nz) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hlg_scal.F90 b/cuda/impl/psb_d_cuda_hlg_scal.F90 index 3cbfada0..042e5805 100644 --- a/cuda/impl/psb_d_cuda_hlg_scal.F90 +++ b/cuda/impl/psb_d_cuda_hlg_scal.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hlg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_scal -#else - use psb_d_cuda_hlg_mat_mod -#endif implicit none class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) @@ -60,10 +55,8 @@ subroutine psb_d_cuda_hlg_scal(d,a,info,side) call a%psb_d_hll_sparse_mat%scal(d,info,side) if (info /= psb_success_) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hlg_scals.F90 b/cuda/impl/psb_d_cuda_hlg_scals.F90 index 1ddf764f..4c81faa9 100644 --- a/cuda/impl/psb_d_cuda_hlg_scals.F90 +++ b/cuda/impl/psb_d_cuda_hlg_scals.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hlg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_scals -#else - use psb_d_cuda_hlg_mat_mod -#endif use iso_c_binding implicit none class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a @@ -59,10 +54,8 @@ subroutine psb_d_cuda_hlg_scals(d,a,info) call a%psb_d_hll_sparse_mat%scal(d,info) if (info /= psb_success_) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hlg_to_gpu.F90 b/cuda/impl/psb_d_cuda_hlg_to_gpu.F90 index 82737315..566c94bd 100644 --- a/cuda/impl/psb_d_cuda_hlg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_hlg_to_gpu.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hlg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_to_gpu -#else - use psb_d_cuda_hlg_mat_mod -#endif use iso_c_binding implicit none class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a @@ -50,7 +45,6 @@ subroutine psb_d_cuda_hlg_to_gpu(a,info,nzrm) info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return n = a%get_nrows() @@ -63,6 +57,5 @@ subroutine psb_d_cuda_hlg_to_gpu(a,info,nzrm) if (info == 0) info = & & writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) ! if (info /= 0) goto 9999 -#endif end subroutine psb_d_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_d_cuda_hlg_vect_mv.F90 b/cuda/impl/psb_d_cuda_hlg_vect_mv.F90 index 9d0741c4..cccba74b 100644 --- a/cuda/impl/psb_d_cuda_hlg_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_hlg_vect_mv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_vect_mv -#else - use psb_d_cuda_hlg_mat_mod -#endif use psb_d_cuda_vect_mod implicit none class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a @@ -69,9 +64,7 @@ subroutine psb_d_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) goto 9999 endif - tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= dzero) then @@ -115,9 +108,6 @@ subroutine psb_d_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - call a%psb_d_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hybg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_hybg_allocate_mnnz.F90 index b0bff6c0..50bef667 100644 --- a/cuda/impl/psb_d_cuda_hybg_allocate_mnnz.F90 +++ b/cuda/impl/psb_d_cuda_hybg_allocate_mnnz.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_hybg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_allocate_mnnz -#else - use psb_d_cuda_hybg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a @@ -52,11 +48,9 @@ subroutine psb_d_cuda_hybg_allocate_mnnz(m,n,a,nz) call a%psb_d_csr_sparse_mat%allocate(m,n,nz) -#ifdef HAVE_SPGPU info = initFcusparse() call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hybg_csmm.F90 b/cuda/impl/psb_d_cuda_hybg_csmm.F90 index 3fcfd17f..d5b49829 100644 --- a/cuda/impl/psb_d_cuda_hybg_csmm.F90 +++ b/cuda/impl/psb_d_cuda_hybg_csmm.F90 @@ -33,14 +33,10 @@ subroutine psb_d_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_csmm -#else - use psb_d_cuda_hybg_mat_mod -#endif implicit none class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) @@ -92,8 +88,6 @@ subroutine psb_d_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) goto 9999 end if - -#ifdef HAVE_SPGPU if (tra) then call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +115,6 @@ subroutine psb_d_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hybg_csmv.F90 b/cuda/impl/psb_d_cuda_hybg_csmv.F90 index 5e06f633..b29b889a 100644 --- a/cuda/impl/psb_d_cuda_hybg_csmv.F90 +++ b/cuda/impl/psb_d_cuda_hybg_csmv.F90 @@ -33,14 +33,10 @@ subroutine psb_d_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_csmv -#else - use psb_d_cuda_hybg_mat_mod -#endif implicit none class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta, x(:) @@ -95,7 +91,6 @@ subroutine psb_d_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -123,9 +118,6 @@ subroutine psb_d_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hybg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_hybg_inner_vect_sv.F90 index a30c1abe..6f5eba3b 100644 --- a/cuda/impl/psb_d_cuda_hybg_inner_vect_sv.F90 +++ b/cuda/impl/psb_d_cuda_hybg_inner_vect_sv.F90 @@ -33,13 +33,9 @@ subroutine psb_d_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_inner_vect_sv -#else - use psb_d_cuda_hybg_mat_mod -#endif use psb_d_cuda_vect_mod implicit none class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a @@ -76,7 +72,6 @@ subroutine psb_d_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra.or.(beta/=dzero)) then call x%sync() call y%sync() @@ -113,12 +108,6 @@ subroutine psb_d_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call x%sync() - call y%sync() - call a%psb_d_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) - call y%set_host() -#endif if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name, a_err='hybg_vect_sv') diff --git a/cuda/impl/psb_d_cuda_hybg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_hybg_reallocate_nz.F90 index cadce8d3..f44c3f10 100644 --- a/cuda/impl/psb_d_cuda_hybg_reallocate_nz.F90 +++ b/cuda/impl/psb_d_cuda_hybg_reallocate_nz.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_hybg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_reallocate_nz -#else - use psb_d_cuda_hybg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: nz class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a @@ -55,10 +51,8 @@ subroutine psb_d_cuda_hybg_reallocate_nz(nz,a) ! call a%psb_d_csr_sparse_mat%reallocate(nz) -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hybg_scal.F90 b/cuda/impl/psb_d_cuda_hybg_scal.F90 index 126e25cb..5be8d801 100644 --- a/cuda/impl/psb_d_cuda_hybg_scal.F90 +++ b/cuda/impl/psb_d_cuda_hybg_scal.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_hybg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_scal -#else - use psb_d_cuda_hybg_mat_mod -#endif implicit none class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) @@ -60,10 +56,8 @@ subroutine psb_d_cuda_hybg_scal(d,a,info,side) call a%psb_d_csr_sparse_mat%scal(d,info,side=side) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hybg_scals.F90 b/cuda/impl/psb_d_cuda_hybg_scals.F90 index 88b7e05c..59e21dd2 100644 --- a/cuda/impl/psb_d_cuda_hybg_scals.F90 +++ b/cuda/impl/psb_d_cuda_hybg_scals.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_hybg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_scals -#else - use psb_d_cuda_hybg_mat_mod -#endif implicit none class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d @@ -60,10 +56,8 @@ subroutine psb_d_cuda_hybg_scals(d,a,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_hybg_to_gpu.F90 b/cuda/impl/psb_d_cuda_hybg_to_gpu.F90 index d94a75c7..7b8e2e5f 100644 --- a/cuda/impl/psb_d_cuda_hybg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_hybg_to_gpu.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_hybg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_to_gpu -#else - use psb_d_cuda_hybg_mat_mod -#endif implicit none class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -51,7 +47,6 @@ subroutine psb_d_cuda_hybg_to_gpu(a,info,nzrm) info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return m = a%get_nrows() @@ -148,7 +143,6 @@ subroutine psb_d_cuda_hybg_to_gpu(a,info,nzrm) if (info /= 0) then write(0,*) 'Error in HYBG_TO_GPU ',info end if -#endif end subroutine psb_d_cuda_hybg_to_gpu #endif diff --git a/cuda/impl/psb_d_cuda_hybg_vect_mv.F90 b/cuda/impl/psb_d_cuda_hybg_vect_mv.F90 index 9d0aedb7..f1119439 100644 --- a/cuda/impl/psb_d_cuda_hybg_vect_mv.F90 +++ b/cuda/impl/psb_d_cuda_hybg_vect_mv.F90 @@ -33,14 +33,10 @@ subroutine psb_d_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_vect_mv -#else - use psb_d_cuda_hybg_mat_mod -#endif use psb_d_cuda_vect_mod implicit none class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a @@ -71,8 +67,6 @@ subroutine psb_d_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') - -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= dzero) then @@ -112,9 +106,6 @@ subroutine psb_d_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_d_cuda_mv_csrg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_csrg_from_coo.F90 index 18e7c636..559bfb2c 100644 --- a/cuda/impl/psb_d_cuda_mv_csrg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_csrg_from_coo.F90 @@ -29,16 +29,11 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_mv_csrg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_mv_csrg_from_coo -#else - use psb_d_cuda_csrg_mat_mod -#endif implicit none class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a @@ -51,9 +46,7 @@ subroutine psb_d_cuda_mv_csrg_from_coo(a,b,info) call a%psb_d_csr_sparse_mat%mv_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif if (info /= 0) goto 9999 return diff --git a/cuda/impl/psb_d_cuda_mv_csrg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_csrg_from_fmt.F90 index 837c78c1..c2411e90 100644 --- a/cuda/impl/psb_d_cuda_mv_csrg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_mv_csrg_from_fmt.F90 @@ -29,16 +29,11 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_mv_csrg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_mv_csrg_from_fmt -#else - use psb_d_cuda_csrg_mat_mod -#endif implicit none class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a @@ -55,9 +50,7 @@ subroutine psb_d_cuda_mv_csrg_from_fmt(a,b,info) class default call a%psb_d_csr_sparse_mat%mv_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_d_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_d_cuda_mv_diag_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_diag_from_coo.F90 index 8d33c459..a6a39a1c 100644 --- a/cuda/impl/psb_d_cuda_mv_diag_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_diag_from_coo.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_mv_diag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_mv_diag_from_coo -#else - use psb_d_cuda_diag_mat_mod -#endif implicit none diff --git a/cuda/impl/psb_d_cuda_mv_elg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_elg_from_coo.F90 index ad9e7f10..9886e90c 100644 --- a/cuda/impl/psb_d_cuda_mv_elg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_elg_from_coo.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_d_cuda_mv_elg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_mv_elg_from_coo -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a @@ -57,5 +52,4 @@ subroutine psb_d_cuda_mv_elg_from_coo(a,b,info) return - end subroutine psb_d_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_d_cuda_mv_elg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_elg_from_fmt.F90 index 9cdf790e..da2d47a1 100644 --- a/cuda/impl/psb_d_cuda_mv_elg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_mv_elg_from_fmt.F90 @@ -28,18 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - + subroutine psb_d_cuda_mv_elg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_mv_elg_from_fmt -#else - use psb_d_cuda_elg_mat_mod -#endif implicit none class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a @@ -49,9 +44,7 @@ subroutine psb_d_cuda_mv_elg_from_fmt(a,b,info) !locals type(psb_d_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif info = psb_success_ @@ -65,13 +58,9 @@ subroutine psb_d_cuda_mv_elg_from_fmt(a,b,info) m = b%get_nrows() nc = b%get_ncols() nza = b%get_nzeros() -#ifdef HAVE_SPGPU gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) ld = gpu_parms%pitch nzm = gpu_parms%maxRowSize -#else - ld = m -#endif a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat call move_alloc(b%irn, a%irn) call move_alloc(b%idiag, a%idiag) @@ -87,9 +76,7 @@ subroutine psb_d_cuda_mv_elg_from_fmt(a,b,info) end if a%nzt = nza call b%free() -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif class default call b%mv_to_coo(tmp,info) diff --git a/cuda/impl/psb_d_cuda_mv_hdiag_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_hdiag_from_coo.F90 index aff5e0c0..b3c4f650 100644 --- a/cuda/impl/psb_d_cuda_mv_hdiag_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_hdiag_from_coo.F90 @@ -28,19 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_d_cuda_mv_hdiag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_mv_hdiag_from_coo use psb_cuda_env_mod -#else - use psb_d_cuda_hdiag_mat_mod -#endif implicit none @@ -54,16 +48,12 @@ subroutine psb_d_cuda_mv_hdiag_from_coo(a,b,info) info = psb_success_ -#ifdef HAVE_SPGPU a%hacksize = psb_cuda_WarpSize() -#endif call a%psb_d_hdia_sparse_mat%mv_from_coo(b,info) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_d_cuda_mv_hlg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_hlg_from_coo.F90 index a2b358c4..95e86293 100644 --- a/cuda/impl/psb_d_cuda_mv_hlg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_hlg_from_coo.F90 @@ -28,19 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_d_cuda_mv_hlg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_cuda_env_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_mv_hlg_from_coo -#else - use psb_d_cuda_hlg_mat_mod -#endif implicit none class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a diff --git a/cuda/impl/psb_d_cuda_mv_hlg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_hlg_from_fmt.F90 index 130d88c2..02578b19 100644 --- a/cuda/impl/psb_d_cuda_mv_hlg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_mv_hlg_from_fmt.F90 @@ -28,18 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_d_cuda_mv_hlg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_mv_hlg_from_fmt -#else - use psb_d_cuda_hlg_mat_mod -#endif implicit none class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a diff --git a/cuda/impl/psb_d_cuda_mv_hybg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_hybg_from_coo.F90 index 8b0ad032..3113fb13 100644 --- a/cuda/impl/psb_d_cuda_mv_hybg_from_coo.F90 +++ b/cuda/impl/psb_d_cuda_mv_hybg_from_coo.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_mv_hybg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_mv_hybg_from_coo -#else - use psb_d_cuda_hybg_mat_mod -#endif implicit none class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a @@ -50,10 +46,8 @@ subroutine psb_d_cuda_mv_hybg_from_coo(a,b,info) call a%psb_d_csr_sparse_mat%mv_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_d_cuda_mv_hybg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_hybg_from_fmt.F90 index 71badfc5..520792e9 100644 --- a/cuda/impl/psb_d_cuda_mv_hybg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_mv_hybg_from_fmt.F90 @@ -33,12 +33,8 @@ subroutine psb_d_cuda_mv_hybg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_mv_hybg_from_fmt -#else - use psb_d_cuda_hybg_mat_mod -#endif implicit none class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a @@ -54,9 +50,7 @@ subroutine psb_d_cuda_mv_hybg_from_fmt(a,b,info) class default call a%psb_d_csr_sparse_mat%mv_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_d_cuda_mv_hybg_from_fmt #endif diff --git a/cuda/impl/psb_s_cuda_cp_csrg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_csrg_from_coo.F90 index b7bebc95..0e3f9113 100644 --- a/cuda/impl/psb_s_cuda_cp_csrg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_csrg_from_coo.F90 @@ -32,12 +32,8 @@ subroutine psb_s_cuda_cp_csrg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_cp_csrg_from_coo -#else - use psb_s_cuda_csrg_mat_mod -#endif implicit none class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a @@ -48,10 +44,8 @@ subroutine psb_s_cuda_cp_csrg_from_coo(a,b,info) call a%psb_s_csr_sparse_mat%cp_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 index 7ab9283d..29bbea6e 100644 --- a/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 @@ -32,12 +32,8 @@ subroutine psb_s_cuda_cp_csrg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_cp_csrg_from_fmt -#else - use psb_s_cuda_csrg_mat_mod -#endif !use iso_c_binding implicit none @@ -53,9 +49,7 @@ subroutine psb_s_cuda_cp_csrg_from_fmt(a,b,info) class default call a%psb_s_csr_sparse_mat%cp_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_s_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_s_cuda_cp_diag_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_diag_from_coo.F90 index 9f038a09..07025d77 100644 --- a/cuda/impl/psb_s_cuda_cp_diag_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_diag_from_coo.F90 @@ -33,13 +33,9 @@ subroutine psb_s_cuda_cp_diag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_cp_diag_from_coo -#else - use psb_s_cuda_diag_mat_mod -#endif implicit none class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a @@ -50,10 +46,8 @@ subroutine psb_s_cuda_cp_diag_from_coo(a,b,info) info = psb_success_ call a%psb_s_dia_sparse_mat%cp_from_coo(b,info) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_s_cuda_cp_elg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_elg_from_coo.F90 index f6e1ba42..66abf76a 100644 --- a/cuda/impl/psb_s_cuda_cp_elg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_elg_from_coo.F90 @@ -28,20 +28,14 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_s_cuda_cp_elg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_cp_elg_from_coo use psi_ext_util_mod use psb_cuda_env_mod -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a @@ -57,16 +51,11 @@ subroutine psb_s_cuda_cp_elg_from_coo(a,b,info) integer(psb_ipk_), allocatable :: idisp(:) info = psb_success_ -#ifdef HAVE_SPGPU hacksize = max(1,psb_cuda_WarpSize()) -#else - hacksize = 1 -#endif if (b%is_dev()) call b%sync() if (b%is_by_rows()) then -#ifdef HAVE_SPGPU call psi_s_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize) @@ -82,15 +71,8 @@ subroutine psb_s_cuda_cp_elg_from_coo(a,b,info) if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & & a%irn,idisp,b%ja,b%val, a%deviceMat) call a%set_dev() -#else - - call psi_s_convert_ell_from_coo(a,b,info,hacksize=hacksize) - call a%set_host() -#endif - else call b%cp_to_coo(tmp,info) -#ifdef HAVE_SPGPU call psi_s_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize) @@ -107,11 +89,6 @@ subroutine psb_s_cuda_cp_elg_from_coo(a,b,info) & a%irn,idisp,tmp%ja,tmp%val, a%deviceMat) call a%set_dev() -#else - - call psi_s_convert_ell_from_coo(a,tmp,info,hacksize=hacksize) - call a%set_host() -#endif end if if (info /= psb_success_) goto 9999 diff --git a/cuda/impl/psb_s_cuda_cp_elg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_elg_from_fmt.F90 index 0c811426..77df12b3 100644 --- a/cuda/impl/psb_s_cuda_cp_elg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_cp_elg_from_fmt.F90 @@ -33,13 +33,9 @@ subroutine psb_s_cuda_cp_elg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_cp_elg_from_fmt -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a @@ -51,9 +47,7 @@ subroutine psb_s_cuda_cp_elg_from_fmt(a,b,info) Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif info = psb_success_ if (b%is_dev()) call b%sync() @@ -67,13 +61,9 @@ subroutine psb_s_cuda_cp_elg_from_fmt(a,b,info) m = b%get_nrows() nc = b%get_ncols() nza = b%get_nzeros() -#ifdef HAVE_SPGPU gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) ld = gpu_parms%pitch nzm = gpu_parms%maxRowSize -#else - ld = m -#endif a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) @@ -88,9 +78,7 @@ subroutine psb_s_cuda_cp_elg_from_fmt(a,b,info) a%val(1:m,1:nzm) = b%val(1:m,1:nzm) end if a%nzt = nza -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif class default diff --git a/cuda/impl/psb_s_cuda_cp_hdiag_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_hdiag_from_coo.F90 index 07b56fa6..75210478 100644 --- a/cuda/impl/psb_s_cuda_cp_hdiag_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_hdiag_from_coo.F90 @@ -28,19 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_s_cuda_cp_hdiag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_cp_hdiag_from_coo use psb_cuda_env_mod -#else - use psb_s_cuda_hdiag_mat_mod -#endif implicit none class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a @@ -53,16 +47,12 @@ subroutine psb_s_cuda_cp_hdiag_from_coo(a,b,info) info = psb_success_ -#ifdef HAVE_SPGPU a%hacksize = psb_cuda_WarpSize() -#endif call a%psb_s_hdia_sparse_mat%cp_from_coo(b,info) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_s_cuda_cp_hlg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_hlg_from_coo.F90 index 055fa046..c254b15a 100644 --- a/cuda/impl/psb_s_cuda_cp_hlg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_hlg_from_coo.F90 @@ -33,14 +33,10 @@ subroutine psb_s_cuda_cp_hlg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_cuda_env_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_cp_hlg_from_coo -#else - use psb_s_cuda_hlg_mat_mod -#endif implicit none class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a @@ -61,11 +57,7 @@ subroutine psb_s_cuda_cp_hlg_from_coo(a,b,info) info = psb_success_ debug_unit = psb_get_debug_unit() debug_level = psb_get_debug_level() -#ifdef HAVE_SPGPU hksz = max(1,psb_cuda_WarpSize()) -#else - hksz = psi_get_hksz() -#endif if (b%is_by_rows()) then diff --git a/cuda/impl/psb_s_cuda_cp_hlg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_hlg_from_fmt.F90 index b49be761..f04b65e5 100644 --- a/cuda/impl/psb_s_cuda_cp_hlg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_cp_hlg_from_fmt.F90 @@ -33,13 +33,9 @@ subroutine psb_s_cuda_cp_hlg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_cp_hlg_from_fmt -#else - use psb_s_cuda_hlg_mat_mod -#endif implicit none class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a @@ -53,9 +49,7 @@ subroutine psb_s_cuda_cp_hlg_from_fmt(a,b,info) call a%cp_from_coo(b,info) class default call a%psb_s_hll_sparse_mat%cp_from_fmt(b,info) -#ifdef HAVE_SPGPU if (info == 0) call a%to_gpu(info) -#endif end select if (info /= 0) goto 9999 diff --git a/cuda/impl/psb_s_cuda_cp_hybg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_hybg_from_coo.F90 index ab135944..69aa615b 100644 --- a/cuda/impl/psb_s_cuda_cp_hybg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_cp_hybg_from_coo.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_cp_hybg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_cp_hybg_from_coo -#else - use psb_s_cuda_hybg_mat_mod -#endif implicit none class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a @@ -49,10 +45,8 @@ subroutine psb_s_cuda_cp_hybg_from_coo(a,b,info) call a%psb_s_csr_sparse_mat%cp_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_s_cuda_cp_hybg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_hybg_from_fmt.F90 index 62a54759..95e00df1 100644 --- a/cuda/impl/psb_s_cuda_cp_hybg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_cp_hybg_from_fmt.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_cp_hybg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_cp_hybg_from_fmt -#else - use psb_s_cuda_hybg_mat_mod -#endif implicit none class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a @@ -53,9 +49,7 @@ subroutine psb_s_cuda_cp_hybg_from_fmt(a,b,info) class default call a%psb_s_csr_sparse_mat%cp_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_s_cuda_cp_hybg_from_fmt diff --git a/cuda/impl/psb_s_cuda_csrg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_csrg_allocate_mnnz.F90 index 53ca8f12..7e6f0c86 100644 --- a/cuda/impl/psb_s_cuda_csrg_allocate_mnnz.F90 +++ b/cuda/impl/psb_s_cuda_csrg_allocate_mnnz.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_csrg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_allocate_mnnz -#else - use psb_s_cuda_csrg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a @@ -52,11 +48,9 @@ subroutine psb_s_cuda_csrg_allocate_mnnz(m,n,a,nz) call a%psb_s_csr_sparse_mat%allocate(m,n,nz) -#ifdef HAVE_SPGPU info = initFcusparse() if (info == 0) call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_csrg_csmm.F90 b/cuda/impl/psb_s_cuda_csrg_csmm.F90 index c8ff4a9e..453f5260 100644 --- a/cuda/impl/psb_s_cuda_csrg_csmm.F90 +++ b/cuda/impl/psb_s_cuda_csrg_csmm.F90 @@ -33,14 +33,10 @@ subroutine psb_s_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_csmm -#else - use psb_s_cuda_csrg_mat_mod -#endif implicit none class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) @@ -94,7 +90,6 @@ subroutine psb_s_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -119,9 +114,6 @@ subroutine psb_s_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_csrg_csmv.F90 b/cuda/impl/psb_s_cuda_csrg_csmv.F90 index 72658c28..a387fd34 100644 --- a/cuda/impl/psb_s_cuda_csrg_csmv.F90 +++ b/cuda/impl/psb_s_cuda_csrg_csmv.F90 @@ -33,14 +33,10 @@ subroutine psb_s_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_csmv -#else - use psb_s_cuda_csrg_mat_mod -#endif implicit none class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) @@ -96,7 +92,6 @@ subroutine psb_s_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -124,9 +119,6 @@ subroutine psb_s_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_csrg_from_gpu.F90 b/cuda/impl/psb_s_cuda_csrg_from_gpu.F90 index 7811f746..adcb2536 100644 --- a/cuda/impl/psb_s_cuda_csrg_from_gpu.F90 +++ b/cuda/impl/psb_s_cuda_csrg_from_gpu.F90 @@ -33,13 +33,9 @@ subroutine psb_s_cuda_csrg_from_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_from_gpu -#else - use psb_s_cuda_csrg_mat_mod -#endif implicit none class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -48,7 +44,6 @@ subroutine psb_s_cuda_csrg_from_gpu(a,info) info = 0 -#ifdef HAVE_SPGPU if (.not.(c_associated(a%deviceMat%mat))) then call a%free() return @@ -68,6 +63,5 @@ subroutine psb_s_cuda_csrg_from_gpu(a,info) #endif call a%set_sync() -#endif end subroutine psb_s_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_s_cuda_csrg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_csrg_inner_vect_sv.F90 index 7f9965d8..df11952c 100644 --- a/cuda/impl/psb_s_cuda_csrg_inner_vect_sv.F90 +++ b/cuda/impl/psb_s_cuda_csrg_inner_vect_sv.F90 @@ -32,13 +32,9 @@ subroutine psb_s_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_inner_vect_sv -#else - use psb_s_cuda_csrg_mat_mod -#endif use psb_s_cuda_vect_mod implicit none class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a @@ -75,7 +71,6 @@ subroutine psb_s_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra.or.(beta/=dzero)) then call x%sync() call y%sync() @@ -112,12 +107,6 @@ subroutine psb_s_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call x%sync() - call y%sync() - call a%psb_s_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) - call y%set_host() -#endif if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name, a_err='csrg_vect_sv') diff --git a/cuda/impl/psb_s_cuda_csrg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_csrg_reallocate_nz.F90 index fed3b0e7..dfd115e7 100644 --- a/cuda/impl/psb_s_cuda_csrg_reallocate_nz.F90 +++ b/cuda/impl/psb_s_cuda_csrg_reallocate_nz.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_csrg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_reallocate_nz -#else - use psb_s_cuda_csrg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: nz class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a @@ -55,10 +51,8 @@ subroutine psb_s_cuda_csrg_reallocate_nz(nz,a) ! call a%psb_s_csr_sparse_mat%reallocate(nz) -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_csrg_scal.F90 b/cuda/impl/psb_s_cuda_csrg_scal.F90 index 826ab2dd..ea3406a1 100644 --- a/cuda/impl/psb_s_cuda_csrg_scal.F90 +++ b/cuda/impl/psb_s_cuda_csrg_scal.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_csrg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_scal -#else - use psb_s_cuda_csrg_mat_mod -#endif implicit none class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) @@ -58,10 +54,8 @@ subroutine psb_s_cuda_csrg_scal(d,a,info,side) call a%psb_s_csr_sparse_mat%scal(d,info,side=side) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_csrg_scals.F90 b/cuda/impl/psb_s_cuda_csrg_scals.F90 index 04f4c29a..307d5849 100644 --- a/cuda/impl/psb_s_cuda_csrg_scals.F90 +++ b/cuda/impl/psb_s_cuda_csrg_scals.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_csrg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_scals -#else - use psb_s_cuda_csrg_mat_mod -#endif implicit none class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d @@ -56,10 +52,8 @@ subroutine psb_s_cuda_csrg_scals(d,a,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 index eadca5df..cf052e13 100644 --- a/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_to_gpu -#else - use psb_s_cuda_csrg_mat_mod -#endif implicit none class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -51,7 +47,6 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return m = a%get_nrows() @@ -320,6 +315,5 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) if (info /= 0) then write(0,*) 'Error in CSRG_TO_GPU ',info end if -#endif end subroutine psb_s_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_s_cuda_csrg_vect_mv.F90 b/cuda/impl/psb_s_cuda_csrg_vect_mv.F90 index 38e2dfc0..52820436 100644 --- a/cuda/impl/psb_s_cuda_csrg_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_csrg_vect_mv.F90 @@ -33,14 +33,10 @@ subroutine psb_s_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_vect_mv -#else - use psb_s_cuda_csrg_mat_mod -#endif use psb_s_cuda_vect_mod implicit none class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a @@ -72,7 +68,6 @@ subroutine psb_s_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= szero) then @@ -112,9 +107,6 @@ subroutine psb_s_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_diag_csmv.F90 b/cuda/impl/psb_s_cuda_diag_csmv.F90 index 214cf6f8..016b82bc 100644 --- a/cuda/impl/psb_s_cuda_diag_csmv.F90 +++ b/cuda/impl/psb_s_cuda_diag_csmv.F90 @@ -33,13 +33,9 @@ subroutine psb_s_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_diag_csmv -#else - use psb_s_cuda_diag_mat_mod -#endif implicit none class(psb_s_cuda_diag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +90,6 @@ subroutine psb_s_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_s_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +116,6 @@ subroutine psb_s_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_s_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return @@ -132,5 +124,4 @@ subroutine psb_s_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) return - end subroutine psb_s_cuda_diag_csmv diff --git a/cuda/impl/psb_s_cuda_diag_to_gpu.F90 b/cuda/impl/psb_s_cuda_diag_to_gpu.F90 index c1ee7401..c8578e75 100644 --- a/cuda/impl/psb_s_cuda_diag_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_diag_to_gpu.F90 @@ -33,13 +33,9 @@ subroutine psb_s_cuda_diag_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_diag_to_gpu -#else - use psb_s_cuda_diag_mat_mod -#endif use iso_c_binding implicit none class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a @@ -47,13 +43,10 @@ subroutine psb_s_cuda_diag_to_gpu(a,info,nzrm) integer(psb_ipk_), intent(in), optional :: nzrm integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d -#ifdef HAVE_SPGPU type(diagdev_parms) :: gpu_parms -#endif info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return n = size(a%data,1) @@ -69,6 +62,5 @@ subroutine psb_s_cuda_diag_to_gpu(a,info,nzrm) if (info == 0) info = & & writeDiagDevice(a%deviceMat,a%data,a%offset,n) ! if (info /= 0) goto 9999 -#endif end subroutine psb_s_cuda_diag_to_gpu diff --git a/cuda/impl/psb_s_cuda_diag_vect_mv.F90 b/cuda/impl/psb_s_cuda_diag_vect_mv.F90 index ab655b7c..d68e5193 100644 --- a/cuda/impl/psb_s_cuda_diag_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_diag_vect_mv.F90 @@ -28,18 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_s_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_diag_vect_mv -#else - use psb_s_cuda_diag_mat_mod -#endif use psb_s_cuda_vect_mod implicit none class(psb_s_cuda_diag_sparse_mat), intent(in) :: a @@ -71,7 +65,6 @@ subroutine psb_s_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= szero) then @@ -112,9 +105,6 @@ subroutine psb_s_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - call a%psb_s_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_dnsg_mat_impl.F90 b/cuda/impl/psb_s_cuda_dnsg_mat_impl.F90 index 861724aa..86fec6a7 100644 --- a/cuda/impl/psb_s_cuda_dnsg_mat_impl.F90 +++ b/cuda/impl/psb_s_cuda_dnsg_mat_impl.F90 @@ -32,13 +32,9 @@ subroutine psb_s_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod use psb_s_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_dnsg_vect_mv -#else - use psb_s_cuda_dnsg_mat_mod -#endif implicit none class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta @@ -123,13 +119,9 @@ end subroutine psb_s_cuda_dnsg_vect_mv subroutine psb_s_cuda_dnsg_mold(a,b,info) use psb_base_mod use psb_s_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_dnsg_mold -#else - use psb_s_cuda_dnsg_mat_mod -#endif implicit none class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_s_base_sparse_mat), intent(inout), allocatable :: b @@ -190,17 +182,12 @@ end subroutine psb_s_cuda_dnsg_mold !!$ end subroutine psb_s_cuda_dnsg_allocate_mnnz !!$ end interface - subroutine psb_s_cuda_dnsg_to_gpu(a,info) use psb_base_mod use psb_s_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_dnsg_to_gpu -#else - use psb_s_cuda_dnsg_mat_mod -#endif class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act, pitch, lda @@ -209,15 +196,12 @@ subroutine psb_s_cuda_dnsg_to_gpu(a,info) call psb_erractionsave(err_act) info = psb_success_ -#ifdef HAVE_SPGPU if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2) info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),& & spgpu_type_float,1) if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2)) if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info - -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return @@ -233,13 +217,9 @@ end subroutine psb_s_cuda_dnsg_to_gpu subroutine psb_s_cuda_cp_dnsg_from_coo(a,b,info) use psb_base_mod use psb_s_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_cp_dnsg_from_coo -#else - use psb_s_cuda_dnsg_mat_mod -#endif implicit none class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a @@ -272,13 +252,9 @@ end subroutine psb_s_cuda_cp_dnsg_from_coo subroutine psb_s_cuda_cp_dnsg_from_fmt(a,b,info) use psb_base_mod use psb_s_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_cp_dnsg_from_fmt -#else - use psb_s_cuda_dnsg_mat_mod -#endif implicit none class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a @@ -348,13 +324,9 @@ end subroutine psb_s_cuda_cp_dnsg_from_fmt subroutine psb_s_cuda_mv_dnsg_from_coo(a,b,info) use psb_base_mod use psb_s_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_mv_dnsg_from_coo -#else - use psb_s_cuda_dnsg_mat_mod -#endif implicit none class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a @@ -383,18 +355,13 @@ subroutine psb_s_cuda_mv_dnsg_from_coo(a,b,info) return end subroutine psb_s_cuda_mv_dnsg_from_coo - - + subroutine psb_s_cuda_mv_dnsg_from_fmt(a,b,info) use psb_base_mod use psb_s_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_s_vectordev_mod use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_mv_dnsg_from_fmt -#else - use psb_s_cuda_dnsg_mat_mod -#endif implicit none class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_s_base_sparse_mat), intent(inout) :: b diff --git a/cuda/impl/psb_s_cuda_elg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_elg_allocate_mnnz.F90 index 63c41644..b771ca1b 100644 --- a/cuda/impl/psb_s_cuda_elg_allocate_mnnz.F90 +++ b/cuda/impl/psb_s_cuda_elg_allocate_mnnz.F90 @@ -28,18 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_s_cuda_elg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_allocate_mnnz -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a @@ -47,9 +41,7 @@ subroutine psb_s_cuda_elg_allocate_mnnz(m,n,a,nz) Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' logical, parameter :: debug=.false. -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif call psb_erractionsave(err_act) info = psb_success_ @@ -74,13 +66,9 @@ subroutine psb_s_cuda_elg_allocate_mnnz(m,n,a,nz) goto 9999 endif -#ifdef HAVE_SPGPU gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_float,1) ld = gpu_parms%pitch nz_ = gpu_parms%maxRowSize -#else - ld = m -#endif if (info == psb_success_) call psb_realloc(m,a%irn,info) if (info == psb_success_) call psb_realloc(m,a%idiag,info) @@ -98,10 +86,8 @@ subroutine psb_s_cuda_elg_allocate_mnnz(m,n,a,nz) call a%set_dupl(psb_dupl_def_) end if -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz_) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_elg_asb.f90 b/cuda/impl/psb_s_cuda_elg_asb.f90 index 0d53c26a..53a17a32 100644 --- a/cuda/impl/psb_s_cuda_elg_asb.f90 +++ b/cuda/impl/psb_s_cuda_elg_asb.f90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_elg_asb(a) use psb_base_mod diff --git a/cuda/impl/psb_s_cuda_elg_csmm.F90 b/cuda/impl/psb_s_cuda_elg_csmm.F90 index e7f88a2e..ff7b7848 100644 --- a/cuda/impl/psb_s_cuda_elg_csmm.F90 +++ b/cuda/impl/psb_s_cuda_elg_csmm.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_csmm -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) @@ -92,8 +87,6 @@ subroutine psb_s_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) goto 9999 end if - -#ifdef HAVE_SPGPU if (tra) then if (a%is_dev()) call a%sync() call a%psb_s_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) @@ -119,9 +112,6 @@ subroutine psb_s_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_s_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_elg_csmv.F90 b/cuda/impl/psb_s_cuda_elg_csmv.F90 index 1844d338..caf106cb 100644 --- a/cuda/impl/psb_s_cuda_elg_csmv.F90 +++ b/cuda/impl/psb_s_cuda_elg_csmv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_csmv -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none class(psb_s_cuda_elg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +89,6 @@ subroutine psb_s_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then if (a%is_dev()) call a%sync() call a%psb_s_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) @@ -122,9 +116,6 @@ subroutine psb_s_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_s_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_elg_csput.F90 b/cuda/impl/psb_s_cuda_elg_csput.F90 index 036eabb2..d43294bf 100644 --- a/cuda/impl/psb_s_cuda_elg_csput.F90 +++ b/cuda/impl/psb_s_cuda_elg_csput.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding -#ifdef HAVE_SPGPU use elldev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_csput_a -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a @@ -128,13 +123,9 @@ subroutine psb_s_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding -#ifdef HAVE_SPGPU use elldev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_csput_v use psb_s_cuda_vect_mod -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a diff --git a/cuda/impl/psb_s_cuda_elg_from_gpu.F90 b/cuda/impl/psb_s_cuda_elg_from_gpu.F90 index bdc55790..d995157e 100644 --- a/cuda/impl/psb_s_cuda_elg_from_gpu.F90 +++ b/cuda/impl/psb_s_cuda_elg_from_gpu.F90 @@ -28,18 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - + subroutine psb_s_cuda_elg_from_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_from_gpu -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -48,7 +43,6 @@ subroutine psb_s_cuda_elg_from_gpu(a,info) info = 0 -#ifdef HAVE_SPGPU if (.not.(c_associated(a%deviceMat))) then call a%free() return @@ -69,6 +63,5 @@ subroutine psb_s_cuda_elg_from_gpu(a,info) if (info == 0) info = & & readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag) call a%set_sync() -#endif end subroutine psb_s_cuda_elg_from_gpu diff --git a/cuda/impl/psb_s_cuda_elg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_elg_inner_vect_sv.F90 index 79e546f5..537365a6 100644 --- a/cuda/impl/psb_s_cuda_elg_inner_vect_sv.F90 +++ b/cuda/impl/psb_s_cuda_elg_inner_vect_sv.F90 @@ -27,19 +27,14 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! - +! subroutine psb_s_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_inner_vect_sv -#else - use psb_s_cuda_elg_mat_mod -#endif use psb_s_cuda_vect_mod implicit none class(psb_s_cuda_elg_sparse_mat), intent(in) :: a diff --git a/cuda/impl/psb_s_cuda_elg_mold.F90 b/cuda/impl/psb_s_cuda_elg_mold.F90 index dc8730bb..7ff9c7ae 100644 --- a/cuda/impl/psb_s_cuda_elg_mold.F90 +++ b/cuda/impl/psb_s_cuda_elg_mold.F90 @@ -28,8 +28,6 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_s_cuda_elg_mold(a,b,info) use psb_base_mod diff --git a/cuda/impl/psb_s_cuda_elg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_elg_reallocate_nz.F90 index 3f34fcec..5b99b9ad 100644 --- a/cuda/impl/psb_s_cuda_elg_reallocate_nz.F90 +++ b/cuda/impl/psb_s_cuda_elg_reallocate_nz.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_elg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_reallocate_nz -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: nz class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a @@ -64,10 +59,8 @@ subroutine psb_s_cuda_elg_reallocate_nz(nz,a) goto 9999 end if -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nzrm) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_elg_scal.F90 b/cuda/impl/psb_s_cuda_elg_scal.F90 index cd6e1a5b..dfa99cf1 100644 --- a/cuda/impl/psb_s_cuda_elg_scal.F90 +++ b/cuda/impl/psb_s_cuda_elg_scal.F90 @@ -28,18 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_elg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_scal -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) @@ -63,10 +58,8 @@ subroutine psb_s_cuda_elg_scal(d,a,info,side) call a%psb_s_ell_sparse_mat%scal(d,info,side) if (info /= psb_success_) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_elg_scals.F90 b/cuda/impl/psb_s_cuda_elg_scals.F90 index 4ee8a64d..f0aa2504 100644 --- a/cuda/impl/psb_s_cuda_elg_scals.F90 +++ b/cuda/impl/psb_s_cuda_elg_scals.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_elg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_scals -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d @@ -59,10 +54,8 @@ subroutine psb_s_cuda_elg_scals(d,a,info) a%val(:,:) = a%val(:,:) * d -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_s_cuda_elg_to_gpu.F90 b/cuda/impl/psb_s_cuda_elg_to_gpu.F90 index 7d04d2b0..9c16ea8d 100644 --- a/cuda/impl/psb_s_cuda_elg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_elg_to_gpu.F90 @@ -29,30 +29,22 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_elg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_to_gpu -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return m = a%get_nrows() @@ -88,6 +80,5 @@ subroutine psb_s_cuda_elg_to_gpu(a,info,nzrm) if (info == 0) info = & & writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag) call a%set_sync() -#endif end subroutine psb_s_cuda_elg_to_gpu diff --git a/cuda/impl/psb_s_cuda_elg_trim.f90 b/cuda/impl/psb_s_cuda_elg_trim.f90 index 516aebc4..2d390343 100644 --- a/cuda/impl/psb_s_cuda_elg_trim.f90 +++ b/cuda/impl/psb_s_cuda_elg_trim.f90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_elg_trim(a) use psb_base_mod diff --git a/cuda/impl/psb_s_cuda_elg_vect_mv.F90 b/cuda/impl/psb_s_cuda_elg_vect_mv.F90 index dad62418..6c898fda 100644 --- a/cuda/impl/psb_s_cuda_elg_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_elg_vect_mv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_vect_mv -#else - use psb_s_cuda_elg_mat_mod -#endif use psb_s_cuda_vect_mod implicit none class(psb_s_cuda_elg_sparse_mat), intent(in) :: a @@ -71,7 +66,6 @@ subroutine psb_s_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (a%is_dev()) call a%sync() if (.not.x%is_host()) call x%sync() @@ -116,10 +110,6 @@ subroutine psb_s_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - if (a%is_dev()) call a%sync() - call a%psb_s_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hdiag_csmv.F90 b/cuda/impl/psb_s_cuda_hdiag_csmv.F90 index 8e7e4931..3f34c2e7 100644 --- a/cuda/impl/psb_s_cuda_hdiag_csmv.F90 +++ b/cuda/impl/psb_s_cuda_hdiag_csmv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_csmv -#else - use psb_s_cuda_hdiag_mat_mod -#endif implicit none class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +89,6 @@ subroutine psb_s_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_s_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +115,6 @@ subroutine psb_s_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_s_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return @@ -132,5 +123,4 @@ subroutine psb_s_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) return - end subroutine psb_s_cuda_hdiag_csmv diff --git a/cuda/impl/psb_s_cuda_hdiag_mold.F90 b/cuda/impl/psb_s_cuda_hdiag_mold.F90 index e662b07b..c11283dd 100644 --- a/cuda/impl/psb_s_cuda_hdiag_mold.F90 +++ b/cuda/impl/psb_s_cuda_hdiag_mold.F90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hdiag_mold(a,b,info) use psb_base_mod diff --git a/cuda/impl/psb_s_cuda_hdiag_to_gpu.F90 b/cuda/impl/psb_s_cuda_hdiag_to_gpu.F90 index 5fe493aa..bc3fa325 100644 --- a/cuda/impl/psb_s_cuda_hdiag_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_hdiag_to_gpu.F90 @@ -29,29 +29,21 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hdiag_to_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_to_gpu -#else - use psb_s_cuda_hdiag_mat_mod -#endif use iso_c_binding implicit none class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight -#ifdef HAVE_SPGPU type(hdiagdev_parms) :: gpu_parms -#endif info = 0 -#ifdef HAVE_SPGPU nr = a%get_nrows() nc = a%get_ncols() hacksize = a%hackSize @@ -81,6 +73,4 @@ subroutine psb_s_cuda_hdiag_to_gpu(a,info) if (info == 0) info = & & writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets) -#endif - end subroutine psb_s_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_s_cuda_hdiag_vect_mv.F90 b/cuda/impl/psb_s_cuda_hdiag_vect_mv.F90 index 3496a637..03215047 100644 --- a/cuda/impl/psb_s_cuda_hdiag_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_hdiag_vect_mv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_vect_mv -#else - use psb_s_cuda_hdiag_mat_mod -#endif use psb_s_cuda_vect_mod implicit none class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a @@ -71,7 +66,6 @@ subroutine psb_s_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= dzero) then @@ -112,9 +106,6 @@ subroutine psb_s_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - call a%psb_s_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hlg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_hlg_allocate_mnnz.F90 index 3f2765c4..480f6677 100644 --- a/cuda/impl/psb_s_cuda_hlg_allocate_mnnz.F90 +++ b/cuda/impl/psb_s_cuda_hlg_allocate_mnnz.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hlg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_allocate_mnnz -#else - use psb_s_cuda_hlg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a @@ -47,19 +42,15 @@ subroutine psb_s_cuda_hlg_allocate_mnnz(m,n,a,nz) Integer(psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' logical, parameter :: debug=.false. -#ifdef HAVE_SPGPU type(hlldev_parms) :: gpu_parms -#endif call psb_erractionsave(err_act) info = psb_success_ call a%psb_s_hll_sparse_mat%allocate(m,n,nz) -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz_) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hlg_csmm.F90 b/cuda/impl/psb_s_cuda_hlg_csmm.F90 index 2e274c22..0dc28c7f 100644 --- a/cuda/impl/psb_s_cuda_hlg_csmm.F90 +++ b/cuda/impl/psb_s_cuda_hlg_csmm.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_csmm -#else - use psb_s_cuda_hlg_mat_mod -#endif implicit none class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) @@ -93,7 +88,6 @@ subroutine psb_s_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_s_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -118,9 +112,6 @@ subroutine psb_s_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_s_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return @@ -128,5 +119,4 @@ subroutine psb_s_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) return - end subroutine psb_s_cuda_hlg_csmm diff --git a/cuda/impl/psb_s_cuda_hlg_csmv.F90 b/cuda/impl/psb_s_cuda_hlg_csmv.F90 index 56ea8cdb..c029c908 100644 --- a/cuda/impl/psb_s_cuda_hlg_csmv.F90 +++ b/cuda/impl/psb_s_cuda_hlg_csmv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_csmv -#else - use psb_s_cuda_hlg_mat_mod -#endif implicit none class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +89,6 @@ subroutine psb_s_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_s_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +115,6 @@ subroutine psb_s_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_s_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hlg_from_gpu.F90 b/cuda/impl/psb_s_cuda_hlg_from_gpu.F90 index 14ab19b7..8d9a315b 100644 --- a/cuda/impl/psb_s_cuda_hlg_from_gpu.F90 +++ b/cuda/impl/psb_s_cuda_hlg_from_gpu.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hlg_from_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_from_gpu -#else - use psb_s_cuda_hlg_mat_mod -#endif implicit none class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -48,7 +43,6 @@ subroutine psb_s_cuda_hlg_from_gpu(a,info) info = 0 -#ifdef HAVE_SPGPU if (a%is_sync()) return if (a%is_host()) return if (.not.(c_associated(a%deviceMat))) then @@ -71,6 +65,5 @@ subroutine psb_s_cuda_hlg_from_gpu(a,info) if (info == 0) info = & & readHllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) call a%set_sync() -#endif end subroutine psb_s_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_s_cuda_hlg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_hlg_inner_vect_sv.F90 index a9f4f743..2985a1ab 100644 --- a/cuda/impl/psb_s_cuda_hlg_inner_vect_sv.F90 +++ b/cuda/impl/psb_s_cuda_hlg_inner_vect_sv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_inner_vect_sv -#else - use psb_s_cuda_hlg_mat_mod -#endif use psb_s_cuda_vect_mod implicit none class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a @@ -69,11 +64,9 @@ subroutine psb_s_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) goto 9999 end if - call psb_erractionrestore(err_act) return - 9999 call psb_error_handler(err_act) return diff --git a/cuda/impl/psb_s_cuda_hlg_mold.F90 b/cuda/impl/psb_s_cuda_hlg_mold.F90 index 90e9cebf..89e329e7 100644 --- a/cuda/impl/psb_s_cuda_hlg_mold.F90 +++ b/cuda/impl/psb_s_cuda_hlg_mold.F90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hlg_mold(a,b,info) use psb_base_mod diff --git a/cuda/impl/psb_s_cuda_hlg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_hlg_reallocate_nz.F90 index d5b9333c..03742958 100644 --- a/cuda/impl/psb_s_cuda_hlg_reallocate_nz.F90 +++ b/cuda/impl/psb_s_cuda_hlg_reallocate_nz.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hlg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_reallocate_nz -#else - use psb_s_cuda_hlg_mat_mod -#endif use iso_c_binding implicit none integer(psb_ipk_), intent(in) :: nz @@ -52,10 +47,8 @@ subroutine psb_s_cuda_hlg_reallocate_nz(nz,a) call a%psb_s_hll_sparse_mat%reallocate(nz) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hlg_scal.F90 b/cuda/impl/psb_s_cuda_hlg_scal.F90 index e803a63d..7074b8b6 100644 --- a/cuda/impl/psb_s_cuda_hlg_scal.F90 +++ b/cuda/impl/psb_s_cuda_hlg_scal.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hlg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_scal -#else - use psb_s_cuda_hlg_mat_mod -#endif implicit none class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) @@ -60,10 +55,8 @@ subroutine psb_s_cuda_hlg_scal(d,a,info,side) call a%psb_s_hll_sparse_mat%scal(d,info,side) if (info /= psb_success_) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hlg_scals.F90 b/cuda/impl/psb_s_cuda_hlg_scals.F90 index eec592e1..2c9f5ae8 100644 --- a/cuda/impl/psb_s_cuda_hlg_scals.F90 +++ b/cuda/impl/psb_s_cuda_hlg_scals.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hlg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_scals -#else - use psb_s_cuda_hlg_mat_mod -#endif use iso_c_binding implicit none class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a @@ -59,10 +54,8 @@ subroutine psb_s_cuda_hlg_scals(d,a,info) call a%psb_s_hll_sparse_mat%scal(d,info) if (info /= psb_success_) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hlg_to_gpu.F90 b/cuda/impl/psb_s_cuda_hlg_to_gpu.F90 index 14a2a629..91cfd5ad 100644 --- a/cuda/impl/psb_s_cuda_hlg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_hlg_to_gpu.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hlg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_to_gpu -#else - use psb_s_cuda_hlg_mat_mod -#endif use iso_c_binding implicit none class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a @@ -50,7 +45,6 @@ subroutine psb_s_cuda_hlg_to_gpu(a,info,nzrm) info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return n = a%get_nrows() @@ -63,6 +57,5 @@ subroutine psb_s_cuda_hlg_to_gpu(a,info,nzrm) if (info == 0) info = & & writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) ! if (info /= 0) goto 9999 -#endif end subroutine psb_s_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_s_cuda_hlg_vect_mv.F90 b/cuda/impl/psb_s_cuda_hlg_vect_mv.F90 index 2b964f91..94696949 100644 --- a/cuda/impl/psb_s_cuda_hlg_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_hlg_vect_mv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_vect_mv -#else - use psb_s_cuda_hlg_mat_mod -#endif use psb_s_cuda_vect_mod implicit none class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a @@ -69,9 +64,7 @@ subroutine psb_s_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) goto 9999 endif - tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= szero) then @@ -115,9 +108,6 @@ subroutine psb_s_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - call a%psb_s_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hybg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_hybg_allocate_mnnz.F90 index 0cf1e2bc..d4649d4c 100644 --- a/cuda/impl/psb_s_cuda_hybg_allocate_mnnz.F90 +++ b/cuda/impl/psb_s_cuda_hybg_allocate_mnnz.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_hybg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_allocate_mnnz -#else - use psb_s_cuda_hybg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a @@ -52,11 +48,9 @@ subroutine psb_s_cuda_hybg_allocate_mnnz(m,n,a,nz) call a%psb_s_csr_sparse_mat%allocate(m,n,nz) -#ifdef HAVE_SPGPU info = initFcusparse() call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hybg_csmm.F90 b/cuda/impl/psb_s_cuda_hybg_csmm.F90 index f89df384..f321ce8b 100644 --- a/cuda/impl/psb_s_cuda_hybg_csmm.F90 +++ b/cuda/impl/psb_s_cuda_hybg_csmm.F90 @@ -33,14 +33,10 @@ subroutine psb_s_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_csmm -#else - use psb_s_cuda_hybg_mat_mod -#endif implicit none class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:,:) @@ -92,8 +88,6 @@ subroutine psb_s_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) goto 9999 end if - -#ifdef HAVE_SPGPU if (tra) then call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +115,6 @@ subroutine psb_s_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hybg_csmv.F90 b/cuda/impl/psb_s_cuda_hybg_csmv.F90 index 01642146..5f1a8c90 100644 --- a/cuda/impl/psb_s_cuda_hybg_csmv.F90 +++ b/cuda/impl/psb_s_cuda_hybg_csmv.F90 @@ -33,14 +33,10 @@ subroutine psb_s_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_csmv -#else - use psb_s_cuda_hybg_mat_mod -#endif implicit none class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a real(psb_spk_), intent(in) :: alpha, beta, x(:) @@ -95,7 +91,6 @@ subroutine psb_s_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -123,9 +118,6 @@ subroutine psb_s_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hybg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_hybg_inner_vect_sv.F90 index f0006f5c..722505ff 100644 --- a/cuda/impl/psb_s_cuda_hybg_inner_vect_sv.F90 +++ b/cuda/impl/psb_s_cuda_hybg_inner_vect_sv.F90 @@ -33,13 +33,9 @@ subroutine psb_s_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_inner_vect_sv -#else - use psb_s_cuda_hybg_mat_mod -#endif use psb_s_cuda_vect_mod implicit none class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a @@ -76,7 +72,6 @@ subroutine psb_s_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra.or.(beta/=szero)) then call x%sync() call y%sync() @@ -113,12 +108,6 @@ subroutine psb_s_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call x%sync() - call y%sync() - call a%psb_s_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) - call y%set_host() -#endif if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name, a_err='hybg_vect_sv') diff --git a/cuda/impl/psb_s_cuda_hybg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_hybg_reallocate_nz.F90 index 7ee15f52..7311ed64 100644 --- a/cuda/impl/psb_s_cuda_hybg_reallocate_nz.F90 +++ b/cuda/impl/psb_s_cuda_hybg_reallocate_nz.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_hybg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_reallocate_nz -#else - use psb_s_cuda_hybg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: nz class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a @@ -55,10 +51,8 @@ subroutine psb_s_cuda_hybg_reallocate_nz(nz,a) ! call a%psb_s_csr_sparse_mat%reallocate(nz) -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hybg_scal.F90 b/cuda/impl/psb_s_cuda_hybg_scal.F90 index 7a3978b7..d6f8eb8d 100644 --- a/cuda/impl/psb_s_cuda_hybg_scal.F90 +++ b/cuda/impl/psb_s_cuda_hybg_scal.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_hybg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_scal -#else - use psb_s_cuda_hybg_mat_mod -#endif implicit none class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d(:) @@ -60,10 +56,8 @@ subroutine psb_s_cuda_hybg_scal(d,a,info,side) call a%psb_s_csr_sparse_mat%scal(d,info,side=side) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hybg_scals.F90 b/cuda/impl/psb_s_cuda_hybg_scals.F90 index a19ae3f6..f80d8efe 100644 --- a/cuda/impl/psb_s_cuda_hybg_scals.F90 +++ b/cuda/impl/psb_s_cuda_hybg_scals.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_hybg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_scals -#else - use psb_s_cuda_hybg_mat_mod -#endif implicit none class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a real(psb_spk_), intent(in) :: d @@ -60,10 +56,8 @@ subroutine psb_s_cuda_hybg_scals(d,a,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_hybg_to_gpu.F90 b/cuda/impl/psb_s_cuda_hybg_to_gpu.F90 index ec415176..168a0981 100644 --- a/cuda/impl/psb_s_cuda_hybg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_hybg_to_gpu.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_hybg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_to_gpu -#else - use psb_s_cuda_hybg_mat_mod -#endif implicit none class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -51,7 +47,6 @@ subroutine psb_s_cuda_hybg_to_gpu(a,info,nzrm) info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return m = a%get_nrows() @@ -148,7 +143,6 @@ subroutine psb_s_cuda_hybg_to_gpu(a,info,nzrm) if (info /= 0) then write(0,*) 'Error in HYBG_TO_GPU ',info end if -#endif end subroutine psb_s_cuda_hybg_to_gpu #endif diff --git a/cuda/impl/psb_s_cuda_hybg_vect_mv.F90 b/cuda/impl/psb_s_cuda_hybg_vect_mv.F90 index a83c4561..6273f31a 100644 --- a/cuda/impl/psb_s_cuda_hybg_vect_mv.F90 +++ b/cuda/impl/psb_s_cuda_hybg_vect_mv.F90 @@ -33,14 +33,10 @@ subroutine psb_s_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_vect_mv -#else - use psb_s_cuda_hybg_mat_mod -#endif use psb_s_cuda_vect_mod implicit none class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a @@ -71,8 +67,6 @@ subroutine psb_s_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') - -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= szero) then @@ -112,9 +106,6 @@ subroutine psb_s_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_s_cuda_mv_csrg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_csrg_from_coo.F90 index a9e297bd..b61e94d6 100644 --- a/cuda/impl/psb_s_cuda_mv_csrg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_csrg_from_coo.F90 @@ -29,16 +29,11 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_mv_csrg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_mv_csrg_from_coo -#else - use psb_s_cuda_csrg_mat_mod -#endif implicit none class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a @@ -51,9 +46,7 @@ subroutine psb_s_cuda_mv_csrg_from_coo(a,b,info) call a%psb_s_csr_sparse_mat%mv_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif if (info /= 0) goto 9999 return diff --git a/cuda/impl/psb_s_cuda_mv_csrg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_csrg_from_fmt.F90 index 54bc0ae4..52643a10 100644 --- a/cuda/impl/psb_s_cuda_mv_csrg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_mv_csrg_from_fmt.F90 @@ -29,16 +29,11 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_mv_csrg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_mv_csrg_from_fmt -#else - use psb_s_cuda_csrg_mat_mod -#endif implicit none class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a @@ -55,9 +50,7 @@ subroutine psb_s_cuda_mv_csrg_from_fmt(a,b,info) class default call a%psb_s_csr_sparse_mat%mv_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_s_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_s_cuda_mv_diag_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_diag_from_coo.F90 index fda60d96..1b2fe8a1 100644 --- a/cuda/impl/psb_s_cuda_mv_diag_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_diag_from_coo.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_mv_diag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_mv_diag_from_coo -#else - use psb_s_cuda_diag_mat_mod -#endif implicit none diff --git a/cuda/impl/psb_s_cuda_mv_elg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_elg_from_coo.F90 index 447e2971..e8dbbabf 100644 --- a/cuda/impl/psb_s_cuda_mv_elg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_elg_from_coo.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_s_cuda_mv_elg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_mv_elg_from_coo -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a @@ -57,5 +52,4 @@ subroutine psb_s_cuda_mv_elg_from_coo(a,b,info) return - end subroutine psb_s_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_s_cuda_mv_elg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_elg_from_fmt.F90 index e88080dd..21d9a339 100644 --- a/cuda/impl/psb_s_cuda_mv_elg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_mv_elg_from_fmt.F90 @@ -28,18 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - + subroutine psb_s_cuda_mv_elg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_mv_elg_from_fmt -#else - use psb_s_cuda_elg_mat_mod -#endif implicit none class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a @@ -49,9 +44,7 @@ subroutine psb_s_cuda_mv_elg_from_fmt(a,b,info) !locals type(psb_s_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif info = psb_success_ @@ -65,13 +58,9 @@ subroutine psb_s_cuda_mv_elg_from_fmt(a,b,info) m = b%get_nrows() nc = b%get_ncols() nza = b%get_nzeros() -#ifdef HAVE_SPGPU gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) ld = gpu_parms%pitch nzm = gpu_parms%maxRowSize -#else - ld = m -#endif a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat call move_alloc(b%irn, a%irn) call move_alloc(b%idiag, a%idiag) @@ -87,9 +76,7 @@ subroutine psb_s_cuda_mv_elg_from_fmt(a,b,info) end if a%nzt = nza call b%free() -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif class default call b%mv_to_coo(tmp,info) diff --git a/cuda/impl/psb_s_cuda_mv_hdiag_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_hdiag_from_coo.F90 index f3252eb2..b0370ebb 100644 --- a/cuda/impl/psb_s_cuda_mv_hdiag_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_hdiag_from_coo.F90 @@ -28,19 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_s_cuda_mv_hdiag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_mv_hdiag_from_coo use psb_cuda_env_mod -#else - use psb_s_cuda_hdiag_mat_mod -#endif implicit none @@ -54,16 +48,12 @@ subroutine psb_s_cuda_mv_hdiag_from_coo(a,b,info) info = psb_success_ -#ifdef HAVE_SPGPU a%hacksize = psb_cuda_WarpSize() -#endif call a%psb_s_hdia_sparse_mat%mv_from_coo(b,info) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_s_cuda_mv_hlg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_hlg_from_coo.F90 index 9810a85e..4c8aab71 100644 --- a/cuda/impl/psb_s_cuda_mv_hlg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_hlg_from_coo.F90 @@ -28,19 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_s_cuda_mv_hlg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_cuda_env_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_mv_hlg_from_coo -#else - use psb_s_cuda_hlg_mat_mod -#endif implicit none class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a diff --git a/cuda/impl/psb_s_cuda_mv_hlg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_hlg_from_fmt.F90 index 700dc151..a162e2aa 100644 --- a/cuda/impl/psb_s_cuda_mv_hlg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_mv_hlg_from_fmt.F90 @@ -28,18 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_s_cuda_mv_hlg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_mv_hlg_from_fmt -#else - use psb_s_cuda_hlg_mat_mod -#endif implicit none class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a diff --git a/cuda/impl/psb_s_cuda_mv_hybg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_hybg_from_coo.F90 index ca9f34c1..ddae4546 100644 --- a/cuda/impl/psb_s_cuda_mv_hybg_from_coo.F90 +++ b/cuda/impl/psb_s_cuda_mv_hybg_from_coo.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_mv_hybg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_mv_hybg_from_coo -#else - use psb_s_cuda_hybg_mat_mod -#endif implicit none class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a @@ -50,10 +46,8 @@ subroutine psb_s_cuda_mv_hybg_from_coo(a,b,info) call a%psb_s_csr_sparse_mat%mv_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_s_cuda_mv_hybg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_hybg_from_fmt.F90 index 5ba606af..c6250104 100644 --- a/cuda/impl/psb_s_cuda_mv_hybg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_mv_hybg_from_fmt.F90 @@ -33,12 +33,8 @@ subroutine psb_s_cuda_mv_hybg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_mv_hybg_from_fmt -#else - use psb_s_cuda_hybg_mat_mod -#endif implicit none class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a @@ -54,9 +50,7 @@ subroutine psb_s_cuda_mv_hybg_from_fmt(a,b,info) class default call a%psb_s_csr_sparse_mat%mv_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_s_cuda_mv_hybg_from_fmt #endif diff --git a/cuda/impl/psb_z_cuda_cp_csrg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_csrg_from_coo.F90 index 186190ac..90f3fb4f 100644 --- a/cuda/impl/psb_z_cuda_cp_csrg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_csrg_from_coo.F90 @@ -32,12 +32,8 @@ subroutine psb_z_cuda_cp_csrg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_cp_csrg_from_coo -#else - use psb_z_cuda_csrg_mat_mod -#endif implicit none class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a @@ -48,10 +44,8 @@ subroutine psb_z_cuda_cp_csrg_from_coo(a,b,info) call a%psb_z_csr_sparse_mat%cp_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 index d1e1a82d..26490a15 100644 --- a/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 @@ -32,12 +32,8 @@ subroutine psb_z_cuda_cp_csrg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_cp_csrg_from_fmt -#else - use psb_z_cuda_csrg_mat_mod -#endif !use iso_c_binding implicit none @@ -53,9 +49,7 @@ subroutine psb_z_cuda_cp_csrg_from_fmt(a,b,info) class default call a%psb_z_csr_sparse_mat%cp_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_z_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_z_cuda_cp_diag_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_diag_from_coo.F90 index c303b127..34706502 100644 --- a/cuda/impl/psb_z_cuda_cp_diag_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_diag_from_coo.F90 @@ -33,13 +33,9 @@ subroutine psb_z_cuda_cp_diag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_cp_diag_from_coo -#else - use psb_z_cuda_diag_mat_mod -#endif implicit none class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a @@ -50,10 +46,8 @@ subroutine psb_z_cuda_cp_diag_from_coo(a,b,info) info = psb_success_ call a%psb_z_dia_sparse_mat%cp_from_coo(b,info) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_z_cuda_cp_elg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_elg_from_coo.F90 index 4b18b89b..e8553cbf 100644 --- a/cuda/impl/psb_z_cuda_cp_elg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_elg_from_coo.F90 @@ -28,20 +28,14 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_z_cuda_cp_elg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_cp_elg_from_coo use psi_ext_util_mod use psb_cuda_env_mod -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a @@ -57,16 +51,11 @@ subroutine psb_z_cuda_cp_elg_from_coo(a,b,info) integer(psb_ipk_), allocatable :: idisp(:) info = psb_success_ -#ifdef HAVE_SPGPU hacksize = max(1,psb_cuda_WarpSize()) -#else - hacksize = 1 -#endif if (b%is_dev()) call b%sync() if (b%is_by_rows()) then -#ifdef HAVE_SPGPU call psi_z_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize) @@ -82,15 +71,8 @@ subroutine psb_z_cuda_cp_elg_from_coo(a,b,info) if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & & a%irn,idisp,b%ja,b%val, a%deviceMat) call a%set_dev() -#else - - call psi_z_convert_ell_from_coo(a,b,info,hacksize=hacksize) - call a%set_host() -#endif - else call b%cp_to_coo(tmp,info) -#ifdef HAVE_SPGPU call psi_z_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize) @@ -107,11 +89,6 @@ subroutine psb_z_cuda_cp_elg_from_coo(a,b,info) & a%irn,idisp,tmp%ja,tmp%val, a%deviceMat) call a%set_dev() -#else - - call psi_z_convert_ell_from_coo(a,tmp,info,hacksize=hacksize) - call a%set_host() -#endif end if if (info /= psb_success_) goto 9999 diff --git a/cuda/impl/psb_z_cuda_cp_elg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_elg_from_fmt.F90 index 6fa91de6..85066cef 100644 --- a/cuda/impl/psb_z_cuda_cp_elg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_cp_elg_from_fmt.F90 @@ -33,13 +33,9 @@ subroutine psb_z_cuda_cp_elg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_cp_elg_from_fmt -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a @@ -51,9 +47,7 @@ subroutine psb_z_cuda_cp_elg_from_fmt(a,b,info) Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif info = psb_success_ if (b%is_dev()) call b%sync() @@ -67,13 +61,9 @@ subroutine psb_z_cuda_cp_elg_from_fmt(a,b,info) m = b%get_nrows() nc = b%get_ncols() nza = b%get_nzeros() -#ifdef HAVE_SPGPU gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) ld = gpu_parms%pitch nzm = gpu_parms%maxRowSize -#else - ld = m -#endif a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) @@ -88,9 +78,7 @@ subroutine psb_z_cuda_cp_elg_from_fmt(a,b,info) a%val(1:m,1:nzm) = b%val(1:m,1:nzm) end if a%nzt = nza -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif class default diff --git a/cuda/impl/psb_z_cuda_cp_hdiag_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_hdiag_from_coo.F90 index c94d8824..36013faa 100644 --- a/cuda/impl/psb_z_cuda_cp_hdiag_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_hdiag_from_coo.F90 @@ -28,19 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_z_cuda_cp_hdiag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_cp_hdiag_from_coo use psb_cuda_env_mod -#else - use psb_z_cuda_hdiag_mat_mod -#endif implicit none class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a @@ -53,16 +47,12 @@ subroutine psb_z_cuda_cp_hdiag_from_coo(a,b,info) info = psb_success_ -#ifdef HAVE_SPGPU a%hacksize = psb_cuda_WarpSize() -#endif call a%psb_z_hdia_sparse_mat%cp_from_coo(b,info) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_z_cuda_cp_hlg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_hlg_from_coo.F90 index 1607f1b6..f7be0835 100644 --- a/cuda/impl/psb_z_cuda_cp_hlg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_hlg_from_coo.F90 @@ -33,14 +33,10 @@ subroutine psb_z_cuda_cp_hlg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_cuda_env_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_cp_hlg_from_coo -#else - use psb_z_cuda_hlg_mat_mod -#endif implicit none class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a @@ -61,11 +57,7 @@ subroutine psb_z_cuda_cp_hlg_from_coo(a,b,info) info = psb_success_ debug_unit = psb_get_debug_unit() debug_level = psb_get_debug_level() -#ifdef HAVE_SPGPU hksz = max(1,psb_cuda_WarpSize()) -#else - hksz = psi_get_hksz() -#endif if (b%is_by_rows()) then diff --git a/cuda/impl/psb_z_cuda_cp_hlg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_hlg_from_fmt.F90 index e8c1f95d..253a034f 100644 --- a/cuda/impl/psb_z_cuda_cp_hlg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_cp_hlg_from_fmt.F90 @@ -33,13 +33,9 @@ subroutine psb_z_cuda_cp_hlg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_cp_hlg_from_fmt -#else - use psb_z_cuda_hlg_mat_mod -#endif implicit none class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a @@ -53,9 +49,7 @@ subroutine psb_z_cuda_cp_hlg_from_fmt(a,b,info) call a%cp_from_coo(b,info) class default call a%psb_z_hll_sparse_mat%cp_from_fmt(b,info) -#ifdef HAVE_SPGPU if (info == 0) call a%to_gpu(info) -#endif end select if (info /= 0) goto 9999 diff --git a/cuda/impl/psb_z_cuda_cp_hybg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_hybg_from_coo.F90 index 6031526a..a6491d68 100644 --- a/cuda/impl/psb_z_cuda_cp_hybg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_cp_hybg_from_coo.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_cp_hybg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_cp_hybg_from_coo -#else - use psb_z_cuda_hybg_mat_mod -#endif implicit none class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a @@ -49,10 +45,8 @@ subroutine psb_z_cuda_cp_hybg_from_coo(a,b,info) call a%psb_z_csr_sparse_mat%cp_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_z_cuda_cp_hybg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_hybg_from_fmt.F90 index 0202ef24..748fadca 100644 --- a/cuda/impl/psb_z_cuda_cp_hybg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_cp_hybg_from_fmt.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_cp_hybg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_cp_hybg_from_fmt -#else - use psb_z_cuda_hybg_mat_mod -#endif implicit none class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a @@ -53,9 +49,7 @@ subroutine psb_z_cuda_cp_hybg_from_fmt(a,b,info) class default call a%psb_z_csr_sparse_mat%cp_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_z_cuda_cp_hybg_from_fmt diff --git a/cuda/impl/psb_z_cuda_csrg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_csrg_allocate_mnnz.F90 index a7533e59..a7988dd3 100644 --- a/cuda/impl/psb_z_cuda_csrg_allocate_mnnz.F90 +++ b/cuda/impl/psb_z_cuda_csrg_allocate_mnnz.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_csrg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_allocate_mnnz -#else - use psb_z_cuda_csrg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a @@ -52,11 +48,9 @@ subroutine psb_z_cuda_csrg_allocate_mnnz(m,n,a,nz) call a%psb_z_csr_sparse_mat%allocate(m,n,nz) -#ifdef HAVE_SPGPU info = initFcusparse() if (info == 0) call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_csrg_csmm.F90 b/cuda/impl/psb_z_cuda_csrg_csmm.F90 index 49fb9fcf..731b7417 100644 --- a/cuda/impl/psb_z_cuda_csrg_csmm.F90 +++ b/cuda/impl/psb_z_cuda_csrg_csmm.F90 @@ -33,14 +33,10 @@ subroutine psb_z_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_csmm -#else - use psb_z_cuda_csrg_mat_mod -#endif implicit none class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) @@ -94,7 +90,6 @@ subroutine psb_z_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -119,9 +114,6 @@ subroutine psb_z_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_csrg_csmv.F90 b/cuda/impl/psb_z_cuda_csrg_csmv.F90 index 54ad6f4f..f6b01e7e 100644 --- a/cuda/impl/psb_z_cuda_csrg_csmv.F90 +++ b/cuda/impl/psb_z_cuda_csrg_csmv.F90 @@ -33,14 +33,10 @@ subroutine psb_z_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_csmv -#else - use psb_z_cuda_csrg_mat_mod -#endif implicit none class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) @@ -96,7 +92,6 @@ subroutine psb_z_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -124,9 +119,6 @@ subroutine psb_z_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_csrg_from_gpu.F90 b/cuda/impl/psb_z_cuda_csrg_from_gpu.F90 index bb3b49d5..78db4435 100644 --- a/cuda/impl/psb_z_cuda_csrg_from_gpu.F90 +++ b/cuda/impl/psb_z_cuda_csrg_from_gpu.F90 @@ -33,13 +33,9 @@ subroutine psb_z_cuda_csrg_from_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_from_gpu -#else - use psb_z_cuda_csrg_mat_mod -#endif implicit none class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -48,7 +44,6 @@ subroutine psb_z_cuda_csrg_from_gpu(a,info) info = 0 -#ifdef HAVE_SPGPU if (.not.(c_associated(a%deviceMat%mat))) then call a%free() return @@ -68,6 +63,5 @@ subroutine psb_z_cuda_csrg_from_gpu(a,info) #endif call a%set_sync() -#endif end subroutine psb_z_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_z_cuda_csrg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_csrg_inner_vect_sv.F90 index 6c7b1fcb..9a3f8281 100644 --- a/cuda/impl/psb_z_cuda_csrg_inner_vect_sv.F90 +++ b/cuda/impl/psb_z_cuda_csrg_inner_vect_sv.F90 @@ -32,13 +32,9 @@ subroutine psb_z_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_inner_vect_sv -#else - use psb_z_cuda_csrg_mat_mod -#endif use psb_z_cuda_vect_mod implicit none class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a @@ -75,7 +71,6 @@ subroutine psb_z_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra.or.(beta/=dzero)) then call x%sync() call y%sync() @@ -112,12 +107,6 @@ subroutine psb_z_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call x%sync() - call y%sync() - call a%psb_z_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) - call y%set_host() -#endif if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name, a_err='csrg_vect_sv') diff --git a/cuda/impl/psb_z_cuda_csrg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_csrg_reallocate_nz.F90 index 61ae0f59..964cd84e 100644 --- a/cuda/impl/psb_z_cuda_csrg_reallocate_nz.F90 +++ b/cuda/impl/psb_z_cuda_csrg_reallocate_nz.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_csrg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_reallocate_nz -#else - use psb_z_cuda_csrg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: nz class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a @@ -55,10 +51,8 @@ subroutine psb_z_cuda_csrg_reallocate_nz(nz,a) ! call a%psb_z_csr_sparse_mat%reallocate(nz) -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_csrg_scal.F90 b/cuda/impl/psb_z_cuda_csrg_scal.F90 index a2099933..9d97433e 100644 --- a/cuda/impl/psb_z_cuda_csrg_scal.F90 +++ b/cuda/impl/psb_z_cuda_csrg_scal.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_csrg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_scal -#else - use psb_z_cuda_csrg_mat_mod -#endif implicit none class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) @@ -58,10 +54,8 @@ subroutine psb_z_cuda_csrg_scal(d,a,info,side) call a%psb_z_csr_sparse_mat%scal(d,info,side=side) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_csrg_scals.F90 b/cuda/impl/psb_z_cuda_csrg_scals.F90 index 72fee99b..1479ea3a 100644 --- a/cuda/impl/psb_z_cuda_csrg_scals.F90 +++ b/cuda/impl/psb_z_cuda_csrg_scals.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_csrg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_scals -#else - use psb_z_cuda_csrg_mat_mod -#endif implicit none class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d @@ -56,10 +52,8 @@ subroutine psb_z_cuda_csrg_scals(d,a,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 index c6f217ab..f7e65627 100644 --- a/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_to_gpu -#else - use psb_z_cuda_csrg_mat_mod -#endif implicit none class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -51,7 +47,6 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return m = a%get_nrows() @@ -320,6 +315,5 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) if (info /= 0) then write(0,*) 'Error in CSRG_TO_GPU ',info end if -#endif end subroutine psb_z_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_z_cuda_csrg_vect_mv.F90 b/cuda/impl/psb_z_cuda_csrg_vect_mv.F90 index 964134fb..977d7ff9 100644 --- a/cuda/impl/psb_z_cuda_csrg_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_csrg_vect_mv.F90 @@ -33,14 +33,10 @@ subroutine psb_z_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_vect_mv -#else - use psb_z_cuda_csrg_mat_mod -#endif use psb_z_cuda_vect_mod implicit none class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a @@ -72,7 +68,6 @@ subroutine psb_z_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= zzero) then @@ -112,9 +107,6 @@ subroutine psb_z_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_diag_csmv.F90 b/cuda/impl/psb_z_cuda_diag_csmv.F90 index 2e86f0f8..fde7147e 100644 --- a/cuda/impl/psb_z_cuda_diag_csmv.F90 +++ b/cuda/impl/psb_z_cuda_diag_csmv.F90 @@ -33,13 +33,9 @@ subroutine psb_z_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_diag_csmv -#else - use psb_z_cuda_diag_mat_mod -#endif implicit none class(psb_z_cuda_diag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +90,6 @@ subroutine psb_z_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_z_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +116,6 @@ subroutine psb_z_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_z_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return @@ -132,5 +124,4 @@ subroutine psb_z_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) return - end subroutine psb_z_cuda_diag_csmv diff --git a/cuda/impl/psb_z_cuda_diag_to_gpu.F90 b/cuda/impl/psb_z_cuda_diag_to_gpu.F90 index a28858b5..672ce938 100644 --- a/cuda/impl/psb_z_cuda_diag_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_diag_to_gpu.F90 @@ -33,13 +33,9 @@ subroutine psb_z_cuda_diag_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_diag_to_gpu -#else - use psb_z_cuda_diag_mat_mod -#endif use iso_c_binding implicit none class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a @@ -47,13 +43,10 @@ subroutine psb_z_cuda_diag_to_gpu(a,info,nzrm) integer(psb_ipk_), intent(in), optional :: nzrm integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d -#ifdef HAVE_SPGPU type(diagdev_parms) :: gpu_parms -#endif info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return n = size(a%data,1) @@ -69,6 +62,5 @@ subroutine psb_z_cuda_diag_to_gpu(a,info,nzrm) if (info == 0) info = & & writeDiagDevice(a%deviceMat,a%data,a%offset,n) ! if (info /= 0) goto 9999 -#endif end subroutine psb_z_cuda_diag_to_gpu diff --git a/cuda/impl/psb_z_cuda_diag_vect_mv.F90 b/cuda/impl/psb_z_cuda_diag_vect_mv.F90 index 12f3c3e7..c6d11f04 100644 --- a/cuda/impl/psb_z_cuda_diag_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_diag_vect_mv.F90 @@ -28,18 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_z_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_diag_vect_mv -#else - use psb_z_cuda_diag_mat_mod -#endif use psb_z_cuda_vect_mod implicit none class(psb_z_cuda_diag_sparse_mat), intent(in) :: a @@ -71,7 +65,6 @@ subroutine psb_z_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= szero) then @@ -112,9 +105,6 @@ subroutine psb_z_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - call a%psb_z_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_dnsg_mat_impl.F90 b/cuda/impl/psb_z_cuda_dnsg_mat_impl.F90 index c2a641b6..c79a7e8c 100644 --- a/cuda/impl/psb_z_cuda_dnsg_mat_impl.F90 +++ b/cuda/impl/psb_z_cuda_dnsg_mat_impl.F90 @@ -32,13 +32,9 @@ subroutine psb_z_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod use psb_z_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_dnsg_vect_mv -#else - use psb_z_cuda_dnsg_mat_mod -#endif implicit none class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta @@ -123,13 +119,9 @@ end subroutine psb_z_cuda_dnsg_vect_mv subroutine psb_z_cuda_dnsg_mold(a,b,info) use psb_base_mod use psb_z_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_dnsg_mold -#else - use psb_z_cuda_dnsg_mat_mod -#endif implicit none class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a class(psb_z_base_sparse_mat), intent(inout), allocatable :: b @@ -190,17 +182,12 @@ end subroutine psb_z_cuda_dnsg_mold !!$ end subroutine psb_z_cuda_dnsg_allocate_mnnz !!$ end interface - subroutine psb_z_cuda_dnsg_to_gpu(a,info) use psb_base_mod use psb_z_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_dnsg_to_gpu -#else - use psb_z_cuda_dnsg_mat_mod -#endif class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info Integer(Psb_ipk_) :: err_act, pitch, lda @@ -209,15 +196,12 @@ subroutine psb_z_cuda_dnsg_to_gpu(a,info) call psb_erractionsave(err_act) info = psb_success_ -#ifdef HAVE_SPGPU if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2) info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),& & spgpu_type_complex_double,1) if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2)) if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info - -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return @@ -233,13 +217,9 @@ end subroutine psb_z_cuda_dnsg_to_gpu subroutine psb_z_cuda_cp_dnsg_from_coo(a,b,info) use psb_base_mod use psb_z_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_cp_dnsg_from_coo -#else - use psb_z_cuda_dnsg_mat_mod -#endif implicit none class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a @@ -272,13 +252,9 @@ end subroutine psb_z_cuda_cp_dnsg_from_coo subroutine psb_z_cuda_cp_dnsg_from_fmt(a,b,info) use psb_base_mod use psb_z_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_cp_dnsg_from_fmt -#else - use psb_z_cuda_dnsg_mat_mod -#endif implicit none class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a @@ -348,13 +324,9 @@ end subroutine psb_z_cuda_cp_dnsg_from_fmt subroutine psb_z_cuda_mv_dnsg_from_coo(a,b,info) use psb_base_mod use psb_z_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_mv_dnsg_from_coo -#else - use psb_z_cuda_dnsg_mat_mod -#endif implicit none class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a @@ -383,18 +355,13 @@ subroutine psb_z_cuda_mv_dnsg_from_coo(a,b,info) return end subroutine psb_z_cuda_mv_dnsg_from_coo - - + subroutine psb_z_cuda_mv_dnsg_from_fmt(a,b,info) use psb_base_mod use psb_z_cuda_vect_mod -#ifdef HAVE_SPGPU use dnsdev_mod use psb_z_vectordev_mod use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_mv_dnsg_from_fmt -#else - use psb_z_cuda_dnsg_mat_mod -#endif implicit none class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a class(psb_z_base_sparse_mat), intent(inout) :: b diff --git a/cuda/impl/psb_z_cuda_elg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_elg_allocate_mnnz.F90 index a2e36feb..5c54d00b 100644 --- a/cuda/impl/psb_z_cuda_elg_allocate_mnnz.F90 +++ b/cuda/impl/psb_z_cuda_elg_allocate_mnnz.F90 @@ -28,18 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_z_cuda_elg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_allocate_mnnz -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a @@ -47,9 +41,7 @@ subroutine psb_z_cuda_elg_allocate_mnnz(m,n,a,nz) Integer(Psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' logical, parameter :: debug=.false. -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif call psb_erractionsave(err_act) info = psb_success_ @@ -74,13 +66,9 @@ subroutine psb_z_cuda_elg_allocate_mnnz(m,n,a,nz) goto 9999 endif -#ifdef HAVE_SPGPU gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_complex_double,1) ld = gpu_parms%pitch nz_ = gpu_parms%maxRowSize -#else - ld = m -#endif if (info == psb_success_) call psb_realloc(m,a%irn,info) if (info == psb_success_) call psb_realloc(m,a%idiag,info) @@ -98,10 +86,8 @@ subroutine psb_z_cuda_elg_allocate_mnnz(m,n,a,nz) call a%set_dupl(psb_dupl_def_) end if -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz_) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_elg_asb.f90 b/cuda/impl/psb_z_cuda_elg_asb.f90 index 511183f5..65b58425 100644 --- a/cuda/impl/psb_z_cuda_elg_asb.f90 +++ b/cuda/impl/psb_z_cuda_elg_asb.f90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_elg_asb(a) use psb_base_mod diff --git a/cuda/impl/psb_z_cuda_elg_csmm.F90 b/cuda/impl/psb_z_cuda_elg_csmm.F90 index d4034b65..4414f0e6 100644 --- a/cuda/impl/psb_z_cuda_elg_csmm.F90 +++ b/cuda/impl/psb_z_cuda_elg_csmm.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_csmm -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) @@ -92,8 +87,6 @@ subroutine psb_z_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) goto 9999 end if - -#ifdef HAVE_SPGPU if (tra) then if (a%is_dev()) call a%sync() call a%psb_z_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) @@ -119,9 +112,6 @@ subroutine psb_z_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_z_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_elg_csmv.F90 b/cuda/impl/psb_z_cuda_elg_csmv.F90 index eba12d16..060147b9 100644 --- a/cuda/impl/psb_z_cuda_elg_csmv.F90 +++ b/cuda/impl/psb_z_cuda_elg_csmv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_csmv -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none class(psb_z_cuda_elg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +89,6 @@ subroutine psb_z_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then if (a%is_dev()) call a%sync() call a%psb_z_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) @@ -122,9 +116,6 @@ subroutine psb_z_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_z_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_elg_csput.F90 b/cuda/impl/psb_z_cuda_elg_csput.F90 index 5a52f4f8..a9eb7c43 100644 --- a/cuda/impl/psb_z_cuda_elg_csput.F90 +++ b/cuda/impl/psb_z_cuda_elg_csput.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding -#ifdef HAVE_SPGPU use elldev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_csput_a -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a @@ -128,13 +123,9 @@ subroutine psb_z_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) use psb_base_mod use iso_c_binding -#ifdef HAVE_SPGPU use elldev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_csput_v use psb_z_cuda_vect_mod -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a diff --git a/cuda/impl/psb_z_cuda_elg_from_gpu.F90 b/cuda/impl/psb_z_cuda_elg_from_gpu.F90 index ffed4349..b1291ab2 100644 --- a/cuda/impl/psb_z_cuda_elg_from_gpu.F90 +++ b/cuda/impl/psb_z_cuda_elg_from_gpu.F90 @@ -28,18 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - + subroutine psb_z_cuda_elg_from_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_from_gpu -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -48,7 +43,6 @@ subroutine psb_z_cuda_elg_from_gpu(a,info) info = 0 -#ifdef HAVE_SPGPU if (.not.(c_associated(a%deviceMat))) then call a%free() return @@ -69,6 +63,5 @@ subroutine psb_z_cuda_elg_from_gpu(a,info) if (info == 0) info = & & readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag) call a%set_sync() -#endif end subroutine psb_z_cuda_elg_from_gpu diff --git a/cuda/impl/psb_z_cuda_elg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_elg_inner_vect_sv.F90 index 7564d5dd..443e7cbb 100644 --- a/cuda/impl/psb_z_cuda_elg_inner_vect_sv.F90 +++ b/cuda/impl/psb_z_cuda_elg_inner_vect_sv.F90 @@ -27,19 +27,14 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! - +! subroutine psb_z_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_inner_vect_sv -#else - use psb_z_cuda_elg_mat_mod -#endif use psb_z_cuda_vect_mod implicit none class(psb_z_cuda_elg_sparse_mat), intent(in) :: a diff --git a/cuda/impl/psb_z_cuda_elg_mold.F90 b/cuda/impl/psb_z_cuda_elg_mold.F90 index e027c9f2..e9a3891d 100644 --- a/cuda/impl/psb_z_cuda_elg_mold.F90 +++ b/cuda/impl/psb_z_cuda_elg_mold.F90 @@ -28,8 +28,6 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_z_cuda_elg_mold(a,b,info) use psb_base_mod diff --git a/cuda/impl/psb_z_cuda_elg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_elg_reallocate_nz.F90 index 16cebe70..3a8c2760 100644 --- a/cuda/impl/psb_z_cuda_elg_reallocate_nz.F90 +++ b/cuda/impl/psb_z_cuda_elg_reallocate_nz.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_elg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_reallocate_nz -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: nz class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a @@ -64,10 +59,8 @@ subroutine psb_z_cuda_elg_reallocate_nz(nz,a) goto 9999 end if -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nzrm) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_elg_scal.F90 b/cuda/impl/psb_z_cuda_elg_scal.F90 index 4802aaaa..a9846362 100644 --- a/cuda/impl/psb_z_cuda_elg_scal.F90 +++ b/cuda/impl/psb_z_cuda_elg_scal.F90 @@ -28,18 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_elg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_scal -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) @@ -63,10 +58,8 @@ subroutine psb_z_cuda_elg_scal(d,a,info,side) call a%psb_z_ell_sparse_mat%scal(d,info,side) if (info /= psb_success_) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_elg_scals.F90 b/cuda/impl/psb_z_cuda_elg_scals.F90 index 5db823da..b4462589 100644 --- a/cuda/impl/psb_z_cuda_elg_scals.F90 +++ b/cuda/impl/psb_z_cuda_elg_scals.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_elg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_scals -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d @@ -59,10 +54,8 @@ subroutine psb_z_cuda_elg_scals(d,a,info) a%val(:,:) = a%val(:,:) * d -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_z_cuda_elg_to_gpu.F90 b/cuda/impl/psb_z_cuda_elg_to_gpu.F90 index 6d86bdd9..3a0ecd14 100644 --- a/cuda/impl/psb_z_cuda_elg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_elg_to_gpu.F90 @@ -29,30 +29,22 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_elg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_to_gpu -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(in), optional :: nzrm integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return m = a%get_nrows() @@ -88,6 +80,5 @@ subroutine psb_z_cuda_elg_to_gpu(a,info,nzrm) if (info == 0) info = & & writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag) call a%set_sync() -#endif end subroutine psb_z_cuda_elg_to_gpu diff --git a/cuda/impl/psb_z_cuda_elg_trim.f90 b/cuda/impl/psb_z_cuda_elg_trim.f90 index 3d261150..98f92efe 100644 --- a/cuda/impl/psb_z_cuda_elg_trim.f90 +++ b/cuda/impl/psb_z_cuda_elg_trim.f90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_elg_trim(a) use psb_base_mod diff --git a/cuda/impl/psb_z_cuda_elg_vect_mv.F90 b/cuda/impl/psb_z_cuda_elg_vect_mv.F90 index 4bd1b3ed..1b1a0720 100644 --- a/cuda/impl/psb_z_cuda_elg_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_elg_vect_mv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_vect_mv -#else - use psb_z_cuda_elg_mat_mod -#endif use psb_z_cuda_vect_mod implicit none class(psb_z_cuda_elg_sparse_mat), intent(in) :: a @@ -71,7 +66,6 @@ subroutine psb_z_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (a%is_dev()) call a%sync() if (.not.x%is_host()) call x%sync() @@ -116,10 +110,6 @@ subroutine psb_z_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - if (a%is_dev()) call a%sync() - call a%psb_z_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hdiag_csmv.F90 b/cuda/impl/psb_z_cuda_hdiag_csmv.F90 index 8be14704..187655a8 100644 --- a/cuda/impl/psb_z_cuda_hdiag_csmv.F90 +++ b/cuda/impl/psb_z_cuda_hdiag_csmv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_csmv -#else - use psb_z_cuda_hdiag_mat_mod -#endif implicit none class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +89,6 @@ subroutine psb_z_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_z_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +115,6 @@ subroutine psb_z_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_z_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return @@ -132,5 +123,4 @@ subroutine psb_z_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) return - end subroutine psb_z_cuda_hdiag_csmv diff --git a/cuda/impl/psb_z_cuda_hdiag_mold.F90 b/cuda/impl/psb_z_cuda_hdiag_mold.F90 index 33fdd8eb..7b86b3a3 100644 --- a/cuda/impl/psb_z_cuda_hdiag_mold.F90 +++ b/cuda/impl/psb_z_cuda_hdiag_mold.F90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hdiag_mold(a,b,info) use psb_base_mod diff --git a/cuda/impl/psb_z_cuda_hdiag_to_gpu.F90 b/cuda/impl/psb_z_cuda_hdiag_to_gpu.F90 index 47126aca..a1140961 100644 --- a/cuda/impl/psb_z_cuda_hdiag_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_hdiag_to_gpu.F90 @@ -29,29 +29,21 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hdiag_to_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_to_gpu -#else - use psb_z_cuda_hdiag_mat_mod -#endif use iso_c_binding implicit none class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight -#ifdef HAVE_SPGPU type(hdiagdev_parms) :: gpu_parms -#endif info = 0 -#ifdef HAVE_SPGPU nr = a%get_nrows() nc = a%get_ncols() hacksize = a%hackSize @@ -81,6 +73,4 @@ subroutine psb_z_cuda_hdiag_to_gpu(a,info) if (info == 0) info = & & writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets) -#endif - end subroutine psb_z_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_z_cuda_hdiag_vect_mv.F90 b/cuda/impl/psb_z_cuda_hdiag_vect_mv.F90 index cf0b3457..aef5628c 100644 --- a/cuda/impl/psb_z_cuda_hdiag_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_hdiag_vect_mv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_vect_mv -#else - use psb_z_cuda_hdiag_mat_mod -#endif use psb_z_cuda_vect_mod implicit none class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a @@ -71,7 +66,6 @@ subroutine psb_z_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= dzero) then @@ -112,9 +106,6 @@ subroutine psb_z_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - call a%psb_z_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hlg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_hlg_allocate_mnnz.F90 index 228244f1..f8566661 100644 --- a/cuda/impl/psb_z_cuda_hlg_allocate_mnnz.F90 +++ b/cuda/impl/psb_z_cuda_hlg_allocate_mnnz.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hlg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_allocate_mnnz -#else - use psb_z_cuda_hlg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a @@ -47,19 +42,15 @@ subroutine psb_z_cuda_hlg_allocate_mnnz(m,n,a,nz) Integer(psb_ipk_) :: err_act, info, nz_,ld character(len=20) :: name='allocate_mnz' logical, parameter :: debug=.false. -#ifdef HAVE_SPGPU type(hlldev_parms) :: gpu_parms -#endif call psb_erractionsave(err_act) info = psb_success_ call a%psb_z_hll_sparse_mat%allocate(m,n,nz) -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz_) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hlg_csmm.F90 b/cuda/impl/psb_z_cuda_hlg_csmm.F90 index 325ab0d0..8eb30ef9 100644 --- a/cuda/impl/psb_z_cuda_hlg_csmm.F90 +++ b/cuda/impl/psb_z_cuda_hlg_csmm.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_csmm -#else - use psb_z_cuda_hlg_mat_mod -#endif implicit none class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) @@ -93,7 +88,6 @@ subroutine psb_z_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_z_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -118,9 +112,6 @@ subroutine psb_z_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_z_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return @@ -128,5 +119,4 @@ subroutine psb_z_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) return - end subroutine psb_z_cuda_hlg_csmm diff --git a/cuda/impl/psb_z_cuda_hlg_csmv.F90 b/cuda/impl/psb_z_cuda_hlg_csmv.F90 index ac84190e..1a807016 100644 --- a/cuda/impl/psb_z_cuda_hlg_csmv.F90 +++ b/cuda/impl/psb_z_cuda_hlg_csmv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_csmv -#else - use psb_z_cuda_hlg_mat_mod -#endif implicit none class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) @@ -94,7 +89,6 @@ subroutine psb_z_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_z_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +115,6 @@ subroutine psb_z_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_z_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hlg_from_gpu.F90 b/cuda/impl/psb_z_cuda_hlg_from_gpu.F90 index 4db6c3ce..8260ecd7 100644 --- a/cuda/impl/psb_z_cuda_hlg_from_gpu.F90 +++ b/cuda/impl/psb_z_cuda_hlg_from_gpu.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hlg_from_gpu(a,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_from_gpu -#else - use psb_z_cuda_hlg_mat_mod -#endif implicit none class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -48,7 +43,6 @@ subroutine psb_z_cuda_hlg_from_gpu(a,info) info = 0 -#ifdef HAVE_SPGPU if (a%is_sync()) return if (a%is_host()) return if (.not.(c_associated(a%deviceMat))) then @@ -71,6 +65,5 @@ subroutine psb_z_cuda_hlg_from_gpu(a,info) if (info == 0) info = & & readHllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) call a%set_sync() -#endif end subroutine psb_z_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_z_cuda_hlg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_hlg_inner_vect_sv.F90 index f99a5a9e..6a914fc0 100644 --- a/cuda/impl/psb_z_cuda_hlg_inner_vect_sv.F90 +++ b/cuda/impl/psb_z_cuda_hlg_inner_vect_sv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_inner_vect_sv -#else - use psb_z_cuda_hlg_mat_mod -#endif use psb_z_cuda_vect_mod implicit none class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a @@ -69,11 +64,9 @@ subroutine psb_z_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) goto 9999 end if - call psb_erractionrestore(err_act) return - 9999 call psb_error_handler(err_act) return diff --git a/cuda/impl/psb_z_cuda_hlg_mold.F90 b/cuda/impl/psb_z_cuda_hlg_mold.F90 index cc9ad510..bc631ece 100644 --- a/cuda/impl/psb_z_cuda_hlg_mold.F90 +++ b/cuda/impl/psb_z_cuda_hlg_mold.F90 @@ -29,7 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hlg_mold(a,b,info) use psb_base_mod diff --git a/cuda/impl/psb_z_cuda_hlg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_hlg_reallocate_nz.F90 index aaba9be5..ac3fbbd1 100644 --- a/cuda/impl/psb_z_cuda_hlg_reallocate_nz.F90 +++ b/cuda/impl/psb_z_cuda_hlg_reallocate_nz.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hlg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_reallocate_nz -#else - use psb_z_cuda_hlg_mat_mod -#endif use iso_c_binding implicit none integer(psb_ipk_), intent(in) :: nz @@ -52,10 +47,8 @@ subroutine psb_z_cuda_hlg_reallocate_nz(nz,a) call a%psb_z_hll_sparse_mat%reallocate(nz) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hlg_scal.F90 b/cuda/impl/psb_z_cuda_hlg_scal.F90 index 3ffda36a..7b9df998 100644 --- a/cuda/impl/psb_z_cuda_hlg_scal.F90 +++ b/cuda/impl/psb_z_cuda_hlg_scal.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hlg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_scal -#else - use psb_z_cuda_hlg_mat_mod -#endif implicit none class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) @@ -60,10 +55,8 @@ subroutine psb_z_cuda_hlg_scal(d,a,info,side) call a%psb_z_hll_sparse_mat%scal(d,info,side) if (info /= psb_success_) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hlg_scals.F90 b/cuda/impl/psb_z_cuda_hlg_scals.F90 index bae50c7c..b867e3b8 100644 --- a/cuda/impl/psb_z_cuda_hlg_scals.F90 +++ b/cuda/impl/psb_z_cuda_hlg_scals.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hlg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_scals -#else - use psb_z_cuda_hlg_mat_mod -#endif use iso_c_binding implicit none class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a @@ -59,10 +54,8 @@ subroutine psb_z_cuda_hlg_scals(d,a,info) call a%psb_z_hll_sparse_mat%scal(d,info) if (info /= psb_success_) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hlg_to_gpu.F90 b/cuda/impl/psb_z_cuda_hlg_to_gpu.F90 index 93c9f043..8f81842a 100644 --- a/cuda/impl/psb_z_cuda_hlg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_hlg_to_gpu.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hlg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_to_gpu -#else - use psb_z_cuda_hlg_mat_mod -#endif use iso_c_binding implicit none class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a @@ -50,7 +45,6 @@ subroutine psb_z_cuda_hlg_to_gpu(a,info,nzrm) info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return n = a%get_nrows() @@ -63,6 +57,5 @@ subroutine psb_z_cuda_hlg_to_gpu(a,info,nzrm) if (info == 0) info = & & writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) ! if (info /= 0) goto 9999 -#endif end subroutine psb_z_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_z_cuda_hlg_vect_mv.F90 b/cuda/impl/psb_z_cuda_hlg_vect_mv.F90 index f377efec..e2e93b85 100644 --- a/cuda/impl/psb_z_cuda_hlg_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_hlg_vect_mv.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_vect_mv -#else - use psb_z_cuda_hlg_mat_mod -#endif use psb_z_cuda_vect_mod implicit none class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a @@ -69,9 +64,7 @@ subroutine psb_z_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) goto 9999 endif - tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= zzero) then @@ -115,9 +108,6 @@ subroutine psb_z_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) end select end if -#else - call a%psb_z_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hybg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_hybg_allocate_mnnz.F90 index 0c6f9aa9..39e10134 100644 --- a/cuda/impl/psb_z_cuda_hybg_allocate_mnnz.F90 +++ b/cuda/impl/psb_z_cuda_hybg_allocate_mnnz.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_hybg_allocate_mnnz(m,n,a,nz) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_allocate_mnnz -#else - use psb_z_cuda_hybg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: m,n class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a @@ -52,11 +48,9 @@ subroutine psb_z_cuda_hybg_allocate_mnnz(m,n,a,nz) call a%psb_z_csr_sparse_mat%allocate(m,n,nz) -#ifdef HAVE_SPGPU info = initFcusparse() call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hybg_csmm.F90 b/cuda/impl/psb_z_cuda_hybg_csmm.F90 index d4a32420..bcd1874a 100644 --- a/cuda/impl/psb_z_cuda_hybg_csmm.F90 +++ b/cuda/impl/psb_z_cuda_hybg_csmm.F90 @@ -33,14 +33,10 @@ subroutine psb_z_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_csmm -#else - use psb_z_cuda_hybg_mat_mod -#endif implicit none class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) @@ -92,8 +88,6 @@ subroutine psb_z_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) goto 9999 end if - -#ifdef HAVE_SPGPU if (tra) then call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -121,9 +115,6 @@ subroutine psb_z_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hybg_csmv.F90 b/cuda/impl/psb_z_cuda_hybg_csmv.F90 index 180a8ae1..d7c5cab6 100644 --- a/cuda/impl/psb_z_cuda_hybg_csmv.F90 +++ b/cuda/impl/psb_z_cuda_hybg_csmv.F90 @@ -33,14 +33,10 @@ subroutine psb_z_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_csmv -#else - use psb_z_cuda_hybg_mat_mod -#endif implicit none class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a complex(psb_dpk_), intent(in) :: alpha, beta, x(:) @@ -95,7 +91,6 @@ subroutine psb_z_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) goto 9999 end if -#ifdef HAVE_SPGPU if (tra) then call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) else @@ -123,9 +118,6 @@ subroutine psb_z_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) endif -#else - call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hybg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_hybg_inner_vect_sv.F90 index 1df47788..36fa3890 100644 --- a/cuda/impl/psb_z_cuda_hybg_inner_vect_sv.F90 +++ b/cuda/impl/psb_z_cuda_hybg_inner_vect_sv.F90 @@ -33,13 +33,9 @@ subroutine psb_z_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_inner_vect_sv -#else - use psb_z_cuda_hybg_mat_mod -#endif use psb_z_cuda_vect_mod implicit none class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a @@ -76,7 +72,6 @@ subroutine psb_z_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') -#ifdef HAVE_SPGPU if (tra.or.(beta/=zzero)) then call x%sync() call y%sync() @@ -113,12 +108,6 @@ subroutine psb_z_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call x%sync() - call y%sync() - call a%psb_z_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) - call y%set_host() -#endif if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name, a_err='hybg_vect_sv') diff --git a/cuda/impl/psb_z_cuda_hybg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_hybg_reallocate_nz.F90 index 5278ba35..f295b332 100644 --- a/cuda/impl/psb_z_cuda_hybg_reallocate_nz.F90 +++ b/cuda/impl/psb_z_cuda_hybg_reallocate_nz.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_hybg_reallocate_nz(nz,a) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_reallocate_nz -#else - use psb_z_cuda_hybg_mat_mod -#endif implicit none integer(psb_ipk_), intent(in) :: nz class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a @@ -55,10 +51,8 @@ subroutine psb_z_cuda_hybg_reallocate_nz(nz,a) ! call a%psb_z_csr_sparse_mat%reallocate(nz) -#ifdef HAVE_SPGPU call a%to_gpu(info,nzrm=nz) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hybg_scal.F90 b/cuda/impl/psb_z_cuda_hybg_scal.F90 index cd436e76..40b9d6b0 100644 --- a/cuda/impl/psb_z_cuda_hybg_scal.F90 +++ b/cuda/impl/psb_z_cuda_hybg_scal.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_hybg_scal(d,a,info,side) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_scal -#else - use psb_z_cuda_hybg_mat_mod -#endif implicit none class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d(:) @@ -60,10 +56,8 @@ subroutine psb_z_cuda_hybg_scal(d,a,info,side) call a%psb_z_csr_sparse_mat%scal(d,info,side=side) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hybg_scals.F90 b/cuda/impl/psb_z_cuda_hybg_scals.F90 index 0a9ee79d..bd7b9224 100644 --- a/cuda/impl/psb_z_cuda_hybg_scals.F90 +++ b/cuda/impl/psb_z_cuda_hybg_scals.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_hybg_scals(d,a,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_scals -#else - use psb_z_cuda_hybg_mat_mod -#endif implicit none class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a complex(psb_dpk_), intent(in) :: d @@ -60,10 +56,8 @@ subroutine psb_z_cuda_hybg_scals(d,a,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_hybg_to_gpu.F90 b/cuda/impl/psb_z_cuda_hybg_to_gpu.F90 index 107b5049..f3a32c81 100644 --- a/cuda/impl/psb_z_cuda_hybg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_hybg_to_gpu.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_hybg_to_gpu(a,info,nzrm) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_to_gpu -#else - use psb_z_cuda_hybg_mat_mod -#endif implicit none class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a integer(psb_ipk_), intent(out) :: info @@ -51,7 +47,6 @@ subroutine psb_z_cuda_hybg_to_gpu(a,info,nzrm) info = 0 -#ifdef HAVE_SPGPU if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return m = a%get_nrows() @@ -148,7 +143,6 @@ subroutine psb_z_cuda_hybg_to_gpu(a,info,nzrm) if (info /= 0) then write(0,*) 'Error in HYBG_TO_GPU ',info end if -#endif end subroutine psb_z_cuda_hybg_to_gpu #endif diff --git a/cuda/impl/psb_z_cuda_hybg_vect_mv.F90 b/cuda/impl/psb_z_cuda_hybg_vect_mv.F90 index 22751f2d..0cb1db22 100644 --- a/cuda/impl/psb_z_cuda_hybg_vect_mv.F90 +++ b/cuda/impl/psb_z_cuda_hybg_vect_mv.F90 @@ -33,14 +33,10 @@ subroutine psb_z_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use elldev_mod use psb_vectordev_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_vect_mv -#else - use psb_z_cuda_hybg_mat_mod -#endif use psb_z_cuda_vect_mod implicit none class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a @@ -71,8 +67,6 @@ subroutine psb_z_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') - -#ifdef HAVE_SPGPU if (tra) then if (.not.x%is_host()) call x%sync() if (beta /= zzero) then @@ -112,9 +106,6 @@ subroutine psb_z_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) call y%bld(ry) end select end if -#else - call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) -#endif if (info /= 0) goto 9999 call psb_erractionrestore(err_act) return diff --git a/cuda/impl/psb_z_cuda_mv_csrg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_csrg_from_coo.F90 index d5390ee3..7cfd4f19 100644 --- a/cuda/impl/psb_z_cuda_mv_csrg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_csrg_from_coo.F90 @@ -29,16 +29,11 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_mv_csrg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_mv_csrg_from_coo -#else - use psb_z_cuda_csrg_mat_mod -#endif implicit none class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a @@ -51,9 +46,7 @@ subroutine psb_z_cuda_mv_csrg_from_coo(a,b,info) call a%psb_z_csr_sparse_mat%mv_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif if (info /= 0) goto 9999 return diff --git a/cuda/impl/psb_z_cuda_mv_csrg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_csrg_from_fmt.F90 index e2bfdb73..f03294c8 100644 --- a/cuda/impl/psb_z_cuda_mv_csrg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_mv_csrg_from_fmt.F90 @@ -29,16 +29,11 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_mv_csrg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_mv_csrg_from_fmt -#else - use psb_z_cuda_csrg_mat_mod -#endif implicit none class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a @@ -55,9 +50,7 @@ subroutine psb_z_cuda_mv_csrg_from_fmt(a,b,info) class default call a%psb_z_csr_sparse_mat%mv_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_z_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_z_cuda_mv_diag_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_diag_from_coo.F90 index b61813bf..8e702e7e 100644 --- a/cuda/impl/psb_z_cuda_mv_diag_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_diag_from_coo.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_mv_diag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use diagdev_mod use psb_vectordev_mod use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_mv_diag_from_coo -#else - use psb_z_cuda_diag_mat_mod -#endif implicit none diff --git a/cuda/impl/psb_z_cuda_mv_elg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_elg_from_coo.F90 index e3ff4036..f0cb23f3 100644 --- a/cuda/impl/psb_z_cuda_mv_elg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_elg_from_coo.F90 @@ -29,17 +29,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! - subroutine psb_z_cuda_mv_elg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_mv_elg_from_coo -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a @@ -57,5 +52,4 @@ subroutine psb_z_cuda_mv_elg_from_coo(a,b,info) return - end subroutine psb_z_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_z_cuda_mv_elg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_elg_from_fmt.F90 index 07a80173..29f63423 100644 --- a/cuda/impl/psb_z_cuda_mv_elg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_mv_elg_from_fmt.F90 @@ -28,18 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - + subroutine psb_z_cuda_mv_elg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use elldev_mod use psb_vectordev_mod use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_mv_elg_from_fmt -#else - use psb_z_cuda_elg_mat_mod -#endif implicit none class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a @@ -49,9 +44,7 @@ subroutine psb_z_cuda_mv_elg_from_fmt(a,b,info) !locals type(psb_z_coo_sparse_mat) :: tmp Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m -#ifdef HAVE_SPGPU type(elldev_parms) :: gpu_parms -#endif info = psb_success_ @@ -65,13 +58,9 @@ subroutine psb_z_cuda_mv_elg_from_fmt(a,b,info) m = b%get_nrows() nc = b%get_ncols() nza = b%get_nzeros() -#ifdef HAVE_SPGPU gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) ld = gpu_parms%pitch nzm = gpu_parms%maxRowSize -#else - ld = m -#endif a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat call move_alloc(b%irn, a%irn) call move_alloc(b%idiag, a%idiag) @@ -87,9 +76,7 @@ subroutine psb_z_cuda_mv_elg_from_fmt(a,b,info) end if a%nzt = nza call b%free() -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif class default call b%mv_to_coo(tmp,info) diff --git a/cuda/impl/psb_z_cuda_mv_hdiag_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_hdiag_from_coo.F90 index f25e6370..dd6dae1e 100644 --- a/cuda/impl/psb_z_cuda_mv_hdiag_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_hdiag_from_coo.F90 @@ -28,19 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_z_cuda_mv_hdiag_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hdiagdev_mod use psb_vectordev_mod use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_mv_hdiag_from_coo use psb_cuda_env_mod -#else - use psb_z_cuda_hdiag_mat_mod -#endif implicit none @@ -54,16 +48,12 @@ subroutine psb_z_cuda_mv_hdiag_from_coo(a,b,info) info = psb_success_ -#ifdef HAVE_SPGPU a%hacksize = psb_cuda_WarpSize() -#endif call a%psb_z_hdia_sparse_mat%mv_from_coo(b,info) -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_z_cuda_mv_hlg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_hlg_from_coo.F90 index 3bc630de..609680b9 100644 --- a/cuda/impl/psb_z_cuda_mv_hlg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_hlg_from_coo.F90 @@ -28,19 +28,13 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_z_cuda_mv_hlg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_cuda_env_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_mv_hlg_from_coo -#else - use psb_z_cuda_hlg_mat_mod -#endif implicit none class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a diff --git a/cuda/impl/psb_z_cuda_mv_hlg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_hlg_from_fmt.F90 index d746a341..e67c8d83 100644 --- a/cuda/impl/psb_z_cuda_mv_hlg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_mv_hlg_from_fmt.F90 @@ -28,18 +28,12 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - subroutine psb_z_cuda_mv_hlg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use hlldev_mod use psb_vectordev_mod use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_mv_hlg_from_fmt -#else - use psb_z_cuda_hlg_mat_mod -#endif implicit none class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a diff --git a/cuda/impl/psb_z_cuda_mv_hybg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_hybg_from_coo.F90 index 7d0d9eec..69fda3d6 100644 --- a/cuda/impl/psb_z_cuda_mv_hybg_from_coo.F90 +++ b/cuda/impl/psb_z_cuda_mv_hybg_from_coo.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_mv_hybg_from_coo(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_mv_hybg_from_coo -#else - use psb_z_cuda_hybg_mat_mod -#endif implicit none class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a @@ -50,10 +46,8 @@ subroutine psb_z_cuda_mv_hybg_from_coo(a,b,info) call a%psb_z_csr_sparse_mat%mv_from_coo(b,info) if (info /= 0) goto 9999 -#ifdef HAVE_SPGPU call a%to_gpu(info) if (info /= 0) goto 9999 -#endif return diff --git a/cuda/impl/psb_z_cuda_mv_hybg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_hybg_from_fmt.F90 index 7bfc27e3..55c33a09 100644 --- a/cuda/impl/psb_z_cuda_mv_hybg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_mv_hybg_from_fmt.F90 @@ -33,12 +33,8 @@ subroutine psb_z_cuda_mv_hybg_from_fmt(a,b,info) use psb_base_mod -#ifdef HAVE_SPGPU use cusparse_mod use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_mv_hybg_from_fmt -#else - use psb_z_cuda_hybg_mat_mod -#endif implicit none class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a @@ -54,9 +50,7 @@ subroutine psb_z_cuda_mv_hybg_from_fmt(a,b,info) class default call a%psb_z_csr_sparse_mat%mv_from_fmt(b,info) if (info /= 0) return -#ifdef HAVE_SPGPU call a%to_gpu(info) -#endif end select end subroutine psb_z_cuda_mv_hybg_from_fmt #endif diff --git a/cuda/ivectordev.c b/cuda/ivectordev.c index 71d5c472..241f1115 100644 --- a/cuda/ivectordev.c +++ b/cuda/ivectordev.c @@ -32,7 +32,6 @@ #include #include -#if defined(HAVE_SPGPU) //#include "utils.h" //#include "common.h" #include "ivectordev.h" @@ -177,6 +176,3 @@ int iscatMultiVecDeviceInt(void* deviceVec, int vectorId, int n, int first, void return SPGPU_SUCCESS; } - -#endif - diff --git a/cuda/ivectordev.h b/cuda/ivectordev.h index 5f7ca974..2db54be4 100644 --- a/cuda/ivectordev.h +++ b/cuda/ivectordev.h @@ -31,7 +31,6 @@ #pragma once -#if defined(HAVE_SPGPU) //#include "utils.h" #include "vectordev.h" #include "cuda_runtime.h" @@ -60,5 +59,3 @@ int iscatMultiVecDeviceIntVecIdx(void* deviceVec, int vectorId, int n, int first int indexBase, int beta); int iscatMultiVecDeviceInt(void* deviceVec, int vectorId, int n, int first, void *indexes, int hfirst, void* host_values, int indexBase, int beta); - -#endif diff --git a/cuda/psb_base_vectordev_mod.F90 b/cuda/psb_base_vectordev_mod.F90 index f8c303d0..da02b2f3 100644 --- a/cuda/psb_base_vectordev_mod.F90 +++ b/cuda/psb_base_vectordev_mod.F90 @@ -40,9 +40,6 @@ module psb_base_vectordev_mod integer(c_int) :: pitch integer(c_int) :: size end type multivec_dev_parms - -#ifdef HAVE_SPGPU - interface function FallocMultiVecDevice(deviceVec,count,Size,elementType) & @@ -54,7 +51,6 @@ module psb_base_vectordev_mod end function FallocMultiVecDevice end interface - interface subroutine unregisterMapped(buf) & & bind(c,name='unregisterMapped') @@ -98,7 +94,4 @@ module psb_base_vectordev_mod end function getMultiVecDevicePitch end interface -#endif - - end module psb_base_vectordev_mod diff --git a/cuda/psb_c_cuda_csrg_mat_mod.F90 b/cuda/psb_c_cuda_csrg_mat_mod.F90 index a98d7e99..1fdeec4a 100644 --- a/cuda/psb_c_cuda_csrg_mat_mod.F90 +++ b/cuda/psb_c_cuda_csrg_mat_mod.F90 @@ -48,7 +48,6 @@ module psb_c_cuda_csrg_mat_mod ! ! ! -#ifdef HAVE_SPGPU type(c_Cmat) :: deviceMat integer(psb_ipk_) :: devstate = is_host @@ -81,13 +80,8 @@ module psb_c_cuda_csrg_mat_mod procedure, pass(a) :: to_gpu => psb_c_cuda_csrg_to_gpu procedure, pass(a) :: from_gpu => psb_c_cuda_csrg_from_gpu final :: c_cuda_csrg_finalize -#else - contains - procedure, pass(a) :: mold => psb_c_cuda_csrg_mold -#endif end type psb_c_cuda_csrg_sparse_mat -#ifdef HAVE_SPGPU private :: c_cuda_csrg_get_nzeros, c_cuda_csrg_free, c_cuda_csrg_get_fmt, & & c_cuda_csrg_get_size, c_cuda_csrg_sizeof, c_cuda_csrg_get_nz_row @@ -378,16 +372,4 @@ contains end subroutine c_cuda_csrg_finalize -#else - interface - subroutine psb_c_cuda_csrg_mold(a,b,info) - import :: psb_c_cuda_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a - class(psb_c_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cuda_csrg_mold - end interface - -#endif - end module psb_c_cuda_csrg_mat_mod diff --git a/cuda/psb_c_cuda_diag_mat_mod.F90 b/cuda/psb_c_cuda_diag_mat_mod.F90 index 1d5db05b..93ffe498 100644 --- a/cuda/psb_c_cuda_diag_mat_mod.F90 +++ b/cuda/psb_c_cuda_diag_mat_mod.F90 @@ -44,7 +44,6 @@ module psb_c_cuda_diag_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of HLL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -68,13 +67,8 @@ module psb_c_cuda_diag_mat_mod procedure, pass(a) :: mold => psb_c_cuda_diag_mold procedure, pass(a) :: to_gpu => psb_c_cuda_diag_to_gpu final :: c_cuda_diag_finalize -#else - contains - procedure, pass(a) :: mold => psb_c_cuda_diag_mold -#endif end type psb_c_cuda_diag_sparse_mat -#ifdef HAVE_SPGPU private :: c_cuda_diag_get_nzeros, c_cuda_diag_free, c_cuda_diag_get_fmt, & & c_cuda_diag_get_size, c_cuda_diag_sizeof, c_cuda_diag_get_nz_row @@ -292,17 +286,4 @@ contains return end subroutine c_cuda_diag_finalize -#else - - interface - subroutine psb_c_cuda_diag_mold(a,b,info) - import :: psb_c_cuda_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_cuda_diag_sparse_mat), intent(in) :: a - class(psb_c_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cuda_diag_mold - end interface - -#endif - end module psb_c_cuda_diag_mat_mod diff --git a/cuda/psb_c_cuda_dnsg_mat_mod.F90 b/cuda/psb_c_cuda_dnsg_mat_mod.F90 index e89e117b..b0ca8c46 100644 --- a/cuda/psb_c_cuda_dnsg_mat_mod.F90 +++ b/cuda/psb_c_cuda_dnsg_mat_mod.F90 @@ -45,7 +45,6 @@ module psb_c_cuda_dnsg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of DNS, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -69,13 +68,8 @@ module psb_c_cuda_dnsg_mat_mod procedure, pass(a) :: mold => psb_c_cuda_dnsg_mold procedure, pass(a) :: to_gpu => psb_c_cuda_dnsg_to_gpu final :: c_cuda_dnsg_finalize -#else - contains - procedure, pass(a) :: mold => psb_c_cuda_dnsg_mold -#endif end type psb_c_cuda_dnsg_sparse_mat -#ifdef HAVE_SPGPU private :: c_cuda_dnsg_get_nzeros, c_cuda_dnsg_free, c_cuda_dnsg_get_fmt, & & c_cuda_dnsg_get_size, c_cuda_dnsg_get_nz_row @@ -278,17 +272,4 @@ contains return end subroutine c_cuda_dnsg_finalize -#else - - interface - subroutine psb_c_cuda_dnsg_mold(a,b,info) - import :: psb_c_cuda_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a - class(psb_c_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cuda_dnsg_mold - end interface - -#endif - end module psb_c_cuda_dnsg_mat_mod diff --git a/cuda/psb_c_cuda_elg_mat_mod.F90 b/cuda/psb_c_cuda_elg_mat_mod.F90 index 43250ce3..c9b48005 100644 --- a/cuda/psb_c_cuda_elg_mat_mod.F90 +++ b/cuda/psb_c_cuda_elg_mat_mod.F90 @@ -49,7 +49,6 @@ module psb_c_cuda_elg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of ELL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr integer(psb_ipk_) :: devstate = is_host @@ -86,14 +85,8 @@ module psb_c_cuda_elg_mat_mod procedure, pass(a) :: to_gpu => psb_c_cuda_elg_to_gpu procedure, pass(a) :: asb => psb_c_cuda_elg_asb final :: c_cuda_elg_finalize -#else - contains - procedure, pass(a) :: mold => psb_c_cuda_elg_mold - procedure, pass(a) :: asb => psb_c_cuda_elg_asb -#endif end type psb_c_cuda_elg_sparse_mat -#ifdef HAVE_SPGPU private :: c_cuda_elg_get_nzeros, c_cuda_elg_free, c_cuda_elg_get_fmt, & & c_cuda_elg_get_size, c_cuda_elg_sizeof, c_cuda_elg_get_nz_row, c_cuda_elg_sync @@ -460,24 +453,4 @@ contains end subroutine c_cuda_elg_finalize -#else - - interface - subroutine psb_c_cuda_elg_asb(a) - import :: psb_c_cuda_elg_sparse_mat - class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a - end subroutine psb_c_cuda_elg_asb - end interface - - interface - subroutine psb_c_cuda_elg_mold(a,b,info) - import :: psb_c_cuda_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_cuda_elg_sparse_mat), intent(in) :: a - class(psb_c_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cuda_elg_mold - end interface - -#endif - end module psb_c_cuda_elg_mat_mod diff --git a/cuda/psb_c_cuda_hdiag_mat_mod.F90 b/cuda/psb_c_cuda_hdiag_mat_mod.F90 index 54f47684..f06e501e 100644 --- a/cuda/psb_c_cuda_hdiag_mat_mod.F90 +++ b/cuda/psb_c_cuda_hdiag_mat_mod.F90 @@ -38,7 +38,6 @@ module psb_c_cuda_hdiag_mat_mod type, extends(psb_c_hdia_sparse_mat) :: psb_c_cuda_hdiag_sparse_mat ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -62,13 +61,8 @@ module psb_c_cuda_hdiag_mat_mod procedure, pass(a) :: mold => psb_c_cuda_hdiag_mold procedure, pass(a) :: to_gpu => psb_c_cuda_hdiag_to_gpu final :: c_cuda_hdiag_finalize -#else - contains - procedure, pass(a) :: mold => psb_c_cuda_hdiag_mold -#endif end type psb_c_cuda_hdiag_sparse_mat -#ifdef HAVE_SPGPU private :: c_cuda_hdiag_get_nzeros, c_cuda_hdiag_free, c_cuda_hdiag_get_fmt, & & c_cuda_hdiag_get_size, c_cuda_hdiag_sizeof, c_cuda_hdiag_get_nz_row @@ -271,17 +265,4 @@ contains return end subroutine c_cuda_hdiag_finalize -#else - - interface - subroutine psb_c_cuda_hdiag_mold(a,b,info) - import :: psb_c_cuda_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a - class(psb_c_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cuda_hdiag_mold - end interface - -#endif - end module psb_c_cuda_hdiag_mat_mod diff --git a/cuda/psb_c_cuda_hlg_mat_mod.F90 b/cuda/psb_c_cuda_hlg_mat_mod.F90 index 74284f30..e98f2474 100644 --- a/cuda/psb_c_cuda_hlg_mat_mod.F90 +++ b/cuda/psb_c_cuda_hlg_mat_mod.F90 @@ -49,7 +49,6 @@ module psb_c_cuda_hlg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of HLL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr integer :: devstate = is_host @@ -82,13 +81,8 @@ module psb_c_cuda_hlg_mat_mod procedure, pass(a) :: from_gpu => psb_c_cuda_hlg_from_gpu procedure, pass(a) :: to_gpu => psb_c_cuda_hlg_to_gpu final :: c_cuda_hlg_finalize -#else - contains - procedure, pass(a) :: mold => psb_c_cuda_hlg_mold -#endif end type psb_c_cuda_hlg_sparse_mat -#ifdef HAVE_SPGPU private :: c_cuda_hlg_get_nzeros, c_cuda_hlg_free, c_cuda_hlg_get_fmt, & & c_cuda_hlg_get_size, c_cuda_hlg_sizeof, c_cuda_hlg_get_nz_row @@ -382,17 +376,4 @@ contains return end subroutine c_cuda_hlg_finalize -#else - - interface - subroutine psb_c_cuda_hlg_mold(a,b,info) - import :: psb_c_cuda_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a - class(psb_c_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cuda_hlg_mold - end interface - -#endif - end module psb_c_cuda_hlg_mat_mod diff --git a/cuda/psb_c_cuda_hybg_mat_mod.F90 b/cuda/psb_c_cuda_hybg_mat_mod.F90 index d16988ba..1c94bc0f 100644 --- a/cuda/psb_c_cuda_hybg_mat_mod.F90 +++ b/cuda/psb_c_cuda_hybg_mat_mod.F90 @@ -45,7 +45,6 @@ module psb_c_cuda_hybg_mat_mod ! ! ! -#ifdef HAVE_SPGPU type(c_Hmat) :: deviceMat contains @@ -69,13 +68,8 @@ module psb_c_cuda_hybg_mat_mod procedure, pass(a) :: mold => psb_c_cuda_hybg_mold procedure, pass(a) :: to_gpu => psb_c_cuda_hybg_to_gpu final :: c_cuda_hybg_finalize -#else - contains - procedure, pass(a) :: mold => psb_c_cuda_hybg_mold -#endif end type psb_c_cuda_hybg_sparse_mat -#ifdef HAVE_SPGPU private :: c_cuda_hybg_get_nzeros, c_cuda_hybg_free, c_cuda_hybg_get_fmt, & & c_cuda_hybg_get_size, c_cuda_hybg_sizeof, c_cuda_hybg_get_nz_row @@ -289,18 +283,5 @@ contains return end subroutine c_cuda_hybg_finalize -#else - - interface - subroutine psb_c_cuda_hybg_mold(a,b,info) - import :: psb_c_cuda_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ - class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a - class(psb_c_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_cuda_hybg_mold - end interface - -#endif - end module psb_c_cuda_hybg_mat_mod #endif diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 index be06167e..c140dadb 100644 --- a/cuda/psb_c_cuda_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -35,20 +35,17 @@ module psb_c_cuda_vect_mod use psb_const_mod use psb_error_mod use psb_c_vect_mod - use psb_i_vect_mod -#ifdef HAVE_SPGPU use psb_cuda_env_mod + use psb_i_vect_mod use psb_i_cuda_vect_mod use psb_i_vectordev_mod use psb_c_vectordev_mod -#endif integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 type, extends(psb_c_base_vect_type) :: psb_c_vect_cuda -#ifdef HAVE_SPGPU integer :: state = is_host type(c_ptr) :: deviceVect = c_null_ptr complex(c_float_complex), allocatable :: pinned_buffer(:) @@ -105,7 +102,6 @@ module psb_c_cuda_vect_mod procedure, pass(x) :: absval2 => c_cuda_absval2 final :: c_cuda_vect_finalize -#endif end type psb_c_vect_cuda public :: psb_c_vect_cuda_ @@ -126,8 +122,6 @@ contains end function constructor -#ifdef HAVE_SPGPU - subroutine c_cuda_device_wait() call psb_cudaSync() end subroutine c_cuda_device_wait @@ -1204,8 +1198,6 @@ contains end subroutine c_cuda_ins_a -#endif - end module psb_c_cuda_vect_mod @@ -1221,20 +1213,16 @@ module psb_c_cuda_multivect_mod use psb_error_mod use psb_c_multivect_mod use psb_c_base_multivect_mod - + use psb_cuda_env_mod use psb_i_multivect_mod -#ifdef HAVE_SPGPU use psb_i_cuda_multivect_mod use psb_c_vectordev_mod -#endif integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 type, extends(psb_c_base_multivect_type) :: psb_c_multivect_cuda -#ifdef HAVE_SPGPU - integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 type(c_ptr) :: deviceVect = c_null_ptr real(c_double), allocatable :: buffer(:,:) @@ -1276,7 +1264,6 @@ module psb_c_cuda_multivect_mod !!$ procedure, pass(y) :: sctb => c_cuda_multi_sctb !!$ procedure, pass(y) :: sctb_x => c_cuda_multi_sctb_x final :: c_cuda_multi_vect_finalize -#endif end type psb_c_multivect_cuda public :: psb_c_multivect_cuda @@ -1297,7 +1284,6 @@ contains end function constructor -#ifdef HAVE_SPGPU !!$ subroutine c_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ use psi_serial_mod @@ -1981,8 +1967,6 @@ contains end subroutine c_cuda_multi_ins -#endif - end module psb_c_cuda_multivect_mod diff --git a/cuda/psb_c_vectordev_mod.F90 b/cuda/psb_c_vectordev_mod.F90 index f3c243a6..b15b2371 100644 --- a/cuda/psb_c_vectordev_mod.F90 +++ b/cuda/psb_c_vectordev_mod.F90 @@ -34,8 +34,6 @@ module psb_c_vectordev_mod use psb_base_vectordev_mod -#ifdef HAVE_SPGPU - interface registerMapped function registerMappedFloatComplex(buf,d_p,n,dummy) & & result(res) bind(c,name='registerMappedFloatComplex') @@ -385,6 +383,4 @@ contains call unregisterMapped(c_loc(buffer)) end subroutine inner_unregisterFloatComplex -#endif - end module psb_c_vectordev_mod diff --git a/cuda/psb_cuda_env_mod.F90 b/cuda/psb_cuda_env_mod.F90 index 0d1d4ced..4778d229 100644 --- a/cuda/psb_cuda_env_mod.F90 +++ b/cuda/psb_cuda_env_mod.F90 @@ -37,7 +37,6 @@ module psb_cuda_env_mod ! interface psb_cuda_init ! module procedure psb_cuda_init ! end interface -#if defined(HAVE_CUDA) use core_mod interface @@ -126,7 +125,6 @@ module psb_cuda_env_mod use iso_c_binding end subroutine psb_cuda_innerClose end interface -#endif interface function psb_C_DeviceHasUVA() & @@ -209,7 +207,6 @@ Contains info = psb_success_ call psb_erractionsave(err_act) -#if defined (HAVE_CUDA) #if defined(SERIAL_MPI) iam = 0 #else @@ -234,7 +231,6 @@ Contains goto 9999 end if call psb_cudaCreateHandle() -#endif call psb_erractionrestore(err_act) return 9999 call psb_error_handler(ctxt,err_act) @@ -245,18 +241,12 @@ Contains subroutine psb_cuda_DeviceSync() -#if defined(HAVE_CUDA) call psb_cudaSync() -#endif end subroutine psb_cuda_DeviceSync function psb_cuda_getDeviceCount() result(res) integer :: res -#if defined(HAVE_CUDA) res = psb_cuda_inner_getDeviceCount() -#else - res = 0 -#endif end function psb_cuda_getDeviceCount subroutine psb_cuda_exit() diff --git a/cuda/psb_d_cuda_csrg_mat_mod.F90 b/cuda/psb_d_cuda_csrg_mat_mod.F90 index 465c16a7..101959bd 100644 --- a/cuda/psb_d_cuda_csrg_mat_mod.F90 +++ b/cuda/psb_d_cuda_csrg_mat_mod.F90 @@ -48,7 +48,6 @@ module psb_d_cuda_csrg_mat_mod ! ! ! -#ifdef HAVE_SPGPU type(d_Cmat) :: deviceMat integer(psb_ipk_) :: devstate = is_host @@ -81,13 +80,8 @@ module psb_d_cuda_csrg_mat_mod procedure, pass(a) :: to_gpu => psb_d_cuda_csrg_to_gpu procedure, pass(a) :: from_gpu => psb_d_cuda_csrg_from_gpu final :: d_cuda_csrg_finalize -#else - contains - procedure, pass(a) :: mold => psb_d_cuda_csrg_mold -#endif end type psb_d_cuda_csrg_sparse_mat -#ifdef HAVE_SPGPU private :: d_cuda_csrg_get_nzeros, d_cuda_csrg_free, d_cuda_csrg_get_fmt, & & d_cuda_csrg_get_size, d_cuda_csrg_sizeof, d_cuda_csrg_get_nz_row @@ -378,16 +372,4 @@ contains end subroutine d_cuda_csrg_finalize -#else - interface - subroutine psb_d_cuda_csrg_mold(a,b,info) - import :: psb_d_cuda_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cuda_csrg_mold - end interface - -#endif - end module psb_d_cuda_csrg_mat_mod diff --git a/cuda/psb_d_cuda_diag_mat_mod.F90 b/cuda/psb_d_cuda_diag_mat_mod.F90 index 1d55faa0..11e4e349 100644 --- a/cuda/psb_d_cuda_diag_mat_mod.F90 +++ b/cuda/psb_d_cuda_diag_mat_mod.F90 @@ -44,7 +44,6 @@ module psb_d_cuda_diag_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of HLL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -68,13 +67,8 @@ module psb_d_cuda_diag_mat_mod procedure, pass(a) :: mold => psb_d_cuda_diag_mold procedure, pass(a) :: to_gpu => psb_d_cuda_diag_to_gpu final :: d_cuda_diag_finalize -#else - contains - procedure, pass(a) :: mold => psb_d_cuda_diag_mold -#endif end type psb_d_cuda_diag_sparse_mat -#ifdef HAVE_SPGPU private :: d_cuda_diag_get_nzeros, d_cuda_diag_free, d_cuda_diag_get_fmt, & & d_cuda_diag_get_size, d_cuda_diag_sizeof, d_cuda_diag_get_nz_row @@ -292,17 +286,4 @@ contains return end subroutine d_cuda_diag_finalize -#else - - interface - subroutine psb_d_cuda_diag_mold(a,b,info) - import :: psb_d_cuda_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_cuda_diag_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cuda_diag_mold - end interface - -#endif - end module psb_d_cuda_diag_mat_mod diff --git a/cuda/psb_d_cuda_dnsg_mat_mod.F90 b/cuda/psb_d_cuda_dnsg_mat_mod.F90 index bb24eb1a..f11e7823 100644 --- a/cuda/psb_d_cuda_dnsg_mat_mod.F90 +++ b/cuda/psb_d_cuda_dnsg_mat_mod.F90 @@ -45,7 +45,6 @@ module psb_d_cuda_dnsg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of DNS, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -69,13 +68,8 @@ module psb_d_cuda_dnsg_mat_mod procedure, pass(a) :: mold => psb_d_cuda_dnsg_mold procedure, pass(a) :: to_gpu => psb_d_cuda_dnsg_to_gpu final :: d_cuda_dnsg_finalize -#else - contains - procedure, pass(a) :: mold => psb_d_cuda_dnsg_mold -#endif end type psb_d_cuda_dnsg_sparse_mat -#ifdef HAVE_SPGPU private :: d_cuda_dnsg_get_nzeros, d_cuda_dnsg_free, d_cuda_dnsg_get_fmt, & & d_cuda_dnsg_get_size, d_cuda_dnsg_get_nz_row @@ -278,17 +272,4 @@ contains return end subroutine d_cuda_dnsg_finalize -#else - - interface - subroutine psb_d_cuda_dnsg_mold(a,b,info) - import :: psb_d_cuda_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cuda_dnsg_mold - end interface - -#endif - end module psb_d_cuda_dnsg_mat_mod diff --git a/cuda/psb_d_cuda_elg_mat_mod.F90 b/cuda/psb_d_cuda_elg_mat_mod.F90 index 1ac47664..1af80f2a 100644 --- a/cuda/psb_d_cuda_elg_mat_mod.F90 +++ b/cuda/psb_d_cuda_elg_mat_mod.F90 @@ -49,7 +49,6 @@ module psb_d_cuda_elg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of ELL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr integer(psb_ipk_) :: devstate = is_host @@ -86,14 +85,8 @@ module psb_d_cuda_elg_mat_mod procedure, pass(a) :: to_gpu => psb_d_cuda_elg_to_gpu procedure, pass(a) :: asb => psb_d_cuda_elg_asb final :: d_cuda_elg_finalize -#else - contains - procedure, pass(a) :: mold => psb_d_cuda_elg_mold - procedure, pass(a) :: asb => psb_d_cuda_elg_asb -#endif end type psb_d_cuda_elg_sparse_mat -#ifdef HAVE_SPGPU private :: d_cuda_elg_get_nzeros, d_cuda_elg_free, d_cuda_elg_get_fmt, & & d_cuda_elg_get_size, d_cuda_elg_sizeof, d_cuda_elg_get_nz_row, d_cuda_elg_sync @@ -460,24 +453,4 @@ contains end subroutine d_cuda_elg_finalize -#else - - interface - subroutine psb_d_cuda_elg_asb(a) - import :: psb_d_cuda_elg_sparse_mat - class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a - end subroutine psb_d_cuda_elg_asb - end interface - - interface - subroutine psb_d_cuda_elg_mold(a,b,info) - import :: psb_d_cuda_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_cuda_elg_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cuda_elg_mold - end interface - -#endif - end module psb_d_cuda_elg_mat_mod diff --git a/cuda/psb_d_cuda_hdiag_mat_mod.F90 b/cuda/psb_d_cuda_hdiag_mat_mod.F90 index 17bacffe..46b63b43 100644 --- a/cuda/psb_d_cuda_hdiag_mat_mod.F90 +++ b/cuda/psb_d_cuda_hdiag_mat_mod.F90 @@ -38,7 +38,6 @@ module psb_d_cuda_hdiag_mat_mod type, extends(psb_d_hdia_sparse_mat) :: psb_d_cuda_hdiag_sparse_mat ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -62,13 +61,8 @@ module psb_d_cuda_hdiag_mat_mod procedure, pass(a) :: mold => psb_d_cuda_hdiag_mold procedure, pass(a) :: to_gpu => psb_d_cuda_hdiag_to_gpu final :: d_cuda_hdiag_finalize -#else - contains - procedure, pass(a) :: mold => psb_d_cuda_hdiag_mold -#endif end type psb_d_cuda_hdiag_sparse_mat -#ifdef HAVE_SPGPU private :: d_cuda_hdiag_get_nzeros, d_cuda_hdiag_free, d_cuda_hdiag_get_fmt, & & d_cuda_hdiag_get_size, d_cuda_hdiag_sizeof, d_cuda_hdiag_get_nz_row @@ -271,17 +265,4 @@ contains return end subroutine d_cuda_hdiag_finalize -#else - - interface - subroutine psb_d_cuda_hdiag_mold(a,b,info) - import :: psb_d_cuda_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cuda_hdiag_mold - end interface - -#endif - end module psb_d_cuda_hdiag_mat_mod diff --git a/cuda/psb_d_cuda_hlg_mat_mod.F90 b/cuda/psb_d_cuda_hlg_mat_mod.F90 index 19ecb62b..6627f824 100644 --- a/cuda/psb_d_cuda_hlg_mat_mod.F90 +++ b/cuda/psb_d_cuda_hlg_mat_mod.F90 @@ -49,7 +49,6 @@ module psb_d_cuda_hlg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of HLL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr integer :: devstate = is_host @@ -82,13 +81,8 @@ module psb_d_cuda_hlg_mat_mod procedure, pass(a) :: from_gpu => psb_d_cuda_hlg_from_gpu procedure, pass(a) :: to_gpu => psb_d_cuda_hlg_to_gpu final :: d_cuda_hlg_finalize -#else - contains - procedure, pass(a) :: mold => psb_d_cuda_hlg_mold -#endif end type psb_d_cuda_hlg_sparse_mat -#ifdef HAVE_SPGPU private :: d_cuda_hlg_get_nzeros, d_cuda_hlg_free, d_cuda_hlg_get_fmt, & & d_cuda_hlg_get_size, d_cuda_hlg_sizeof, d_cuda_hlg_get_nz_row @@ -382,17 +376,4 @@ contains return end subroutine d_cuda_hlg_finalize -#else - - interface - subroutine psb_d_cuda_hlg_mold(a,b,info) - import :: psb_d_cuda_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cuda_hlg_mold - end interface - -#endif - end module psb_d_cuda_hlg_mat_mod diff --git a/cuda/psb_d_cuda_hybg_mat_mod.F90 b/cuda/psb_d_cuda_hybg_mat_mod.F90 index be4c8392..065c0db3 100644 --- a/cuda/psb_d_cuda_hybg_mat_mod.F90 +++ b/cuda/psb_d_cuda_hybg_mat_mod.F90 @@ -45,7 +45,6 @@ module psb_d_cuda_hybg_mat_mod ! ! ! -#ifdef HAVE_SPGPU type(d_Hmat) :: deviceMat contains @@ -69,13 +68,8 @@ module psb_d_cuda_hybg_mat_mod procedure, pass(a) :: mold => psb_d_cuda_hybg_mold procedure, pass(a) :: to_gpu => psb_d_cuda_hybg_to_gpu final :: d_cuda_hybg_finalize -#else - contains - procedure, pass(a) :: mold => psb_d_cuda_hybg_mold -#endif end type psb_d_cuda_hybg_sparse_mat -#ifdef HAVE_SPGPU private :: d_cuda_hybg_get_nzeros, d_cuda_hybg_free, d_cuda_hybg_get_fmt, & & d_cuda_hybg_get_size, d_cuda_hybg_sizeof, d_cuda_hybg_get_nz_row @@ -289,18 +283,5 @@ contains return end subroutine d_cuda_hybg_finalize -#else - - interface - subroutine psb_d_cuda_hybg_mold(a,b,info) - import :: psb_d_cuda_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_cuda_hybg_mold - end interface - -#endif - end module psb_d_cuda_hybg_mat_mod #endif diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 index 83ec108b..44381c99 100644 --- a/cuda/psb_d_cuda_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -35,20 +35,17 @@ module psb_d_cuda_vect_mod use psb_const_mod use psb_error_mod use psb_d_vect_mod - use psb_i_vect_mod -#ifdef HAVE_SPGPU use psb_cuda_env_mod + use psb_i_vect_mod use psb_i_cuda_vect_mod use psb_i_vectordev_mod use psb_d_vectordev_mod -#endif integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 type, extends(psb_d_base_vect_type) :: psb_d_vect_cuda -#ifdef HAVE_SPGPU integer :: state = is_host type(c_ptr) :: deviceVect = c_null_ptr real(c_double), allocatable :: pinned_buffer(:) @@ -105,7 +102,6 @@ module psb_d_cuda_vect_mod procedure, pass(x) :: absval2 => d_cuda_absval2 final :: d_cuda_vect_finalize -#endif end type psb_d_vect_cuda public :: psb_d_vect_cuda_ @@ -126,8 +122,6 @@ contains end function constructor -#ifdef HAVE_SPGPU - subroutine d_cuda_device_wait() call psb_cudaSync() end subroutine d_cuda_device_wait @@ -1204,8 +1198,6 @@ contains end subroutine d_cuda_ins_a -#endif - end module psb_d_cuda_vect_mod @@ -1221,20 +1213,16 @@ module psb_d_cuda_multivect_mod use psb_error_mod use psb_d_multivect_mod use psb_d_base_multivect_mod - + use psb_cuda_env_mod use psb_i_multivect_mod -#ifdef HAVE_SPGPU use psb_i_cuda_multivect_mod use psb_d_vectordev_mod -#endif integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 type, extends(psb_d_base_multivect_type) :: psb_d_multivect_cuda -#ifdef HAVE_SPGPU - integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 type(c_ptr) :: deviceVect = c_null_ptr real(c_double), allocatable :: buffer(:,:) @@ -1276,7 +1264,6 @@ module psb_d_cuda_multivect_mod !!$ procedure, pass(y) :: sctb => d_cuda_multi_sctb !!$ procedure, pass(y) :: sctb_x => d_cuda_multi_sctb_x final :: d_cuda_multi_vect_finalize -#endif end type psb_d_multivect_cuda public :: psb_d_multivect_cuda @@ -1297,7 +1284,6 @@ contains end function constructor -#ifdef HAVE_SPGPU !!$ subroutine d_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ use psi_serial_mod @@ -1981,8 +1967,6 @@ contains end subroutine d_cuda_multi_ins -#endif - end module psb_d_cuda_multivect_mod diff --git a/cuda/psb_d_vectordev_mod.F90 b/cuda/psb_d_vectordev_mod.F90 index cda0d9d7..802add96 100644 --- a/cuda/psb_d_vectordev_mod.F90 +++ b/cuda/psb_d_vectordev_mod.F90 @@ -34,8 +34,6 @@ module psb_d_vectordev_mod use psb_base_vectordev_mod -#ifdef HAVE_SPGPU - interface registerMapped function registerMappedDouble(buf,d_p,n,dummy) & & result(res) bind(c,name='registerMappedDouble') @@ -385,6 +383,4 @@ contains call unregisterMapped(c_loc(buffer)) end subroutine inner_unregisterDouble -#endif - end module psb_d_vectordev_mod diff --git a/cuda/psb_i_cuda_csrg_mat_mod.F90 b/cuda/psb_i_cuda_csrg_mat_mod.F90 index de0eac09..0867a8a5 100644 --- a/cuda/psb_i_cuda_csrg_mat_mod.F90 +++ b/cuda/psb_i_cuda_csrg_mat_mod.F90 @@ -48,7 +48,6 @@ module psb_i_cuda_csrg_mat_mod ! ! ! -#ifdef HAVE_SPGPU type(i_Cmat) :: deviceMat integer(psb_ipk_) :: devstate = is_host @@ -81,13 +80,8 @@ module psb_i_cuda_csrg_mat_mod procedure, pass(a) :: to_gpu => psb_i_cuda_csrg_to_gpu procedure, pass(a) :: from_gpu => psb_i_cuda_csrg_from_gpu final :: i_cuda_csrg_finalize -#else - contains - procedure, pass(a) :: mold => psb_i_cuda_csrg_mold -#endif end type psb_i_cuda_csrg_sparse_mat -#ifdef HAVE_SPGPU private :: i_cuda_csrg_get_nzeros, i_cuda_csrg_free, i_cuda_csrg_get_fmt, & & i_cuda_csrg_get_size, i_cuda_csrg_sizeof, i_cuda_csrg_get_nz_row @@ -378,16 +372,4 @@ contains end subroutine i_cuda_csrg_finalize -#else - interface - subroutine psb_i_cuda_csrg_mold(a,b,info) - import :: psb_i_cuda_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a - class(psb_i_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cuda_csrg_mold - end interface - -#endif - end module psb_i_cuda_csrg_mat_mod diff --git a/cuda/psb_i_cuda_diag_mat_mod.F90 b/cuda/psb_i_cuda_diag_mat_mod.F90 index 94a3cc3e..98090f06 100644 --- a/cuda/psb_i_cuda_diag_mat_mod.F90 +++ b/cuda/psb_i_cuda_diag_mat_mod.F90 @@ -44,7 +44,6 @@ module psb_i_cuda_diag_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of HLL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -68,13 +67,8 @@ module psb_i_cuda_diag_mat_mod procedure, pass(a) :: mold => psb_i_cuda_diag_mold procedure, pass(a) :: to_gpu => psb_i_cuda_diag_to_gpu final :: i_cuda_diag_finalize -#else - contains - procedure, pass(a) :: mold => psb_i_cuda_diag_mold -#endif end type psb_i_cuda_diag_sparse_mat -#ifdef HAVE_SPGPU private :: i_cuda_diag_get_nzeros, i_cuda_diag_free, i_cuda_diag_get_fmt, & & i_cuda_diag_get_size, i_cuda_diag_sizeof, i_cuda_diag_get_nz_row @@ -292,17 +286,4 @@ contains return end subroutine i_cuda_diag_finalize -#else - - interface - subroutine psb_i_cuda_diag_mold(a,b,info) - import :: psb_i_cuda_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_cuda_diag_sparse_mat), intent(in) :: a - class(psb_i_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cuda_diag_mold - end interface - -#endif - end module psb_i_cuda_diag_mat_mod diff --git a/cuda/psb_i_cuda_dnsg_mat_mod.F90 b/cuda/psb_i_cuda_dnsg_mat_mod.F90 index f357977e..6586d115 100644 --- a/cuda/psb_i_cuda_dnsg_mat_mod.F90 +++ b/cuda/psb_i_cuda_dnsg_mat_mod.F90 @@ -45,7 +45,6 @@ module psb_i_cuda_dnsg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of DNS, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -69,13 +68,8 @@ module psb_i_cuda_dnsg_mat_mod procedure, pass(a) :: mold => psb_i_cuda_dnsg_mold procedure, pass(a) :: to_gpu => psb_i_cuda_dnsg_to_gpu final :: i_cuda_dnsg_finalize -#else - contains - procedure, pass(a) :: mold => psb_i_cuda_dnsg_mold -#endif end type psb_i_cuda_dnsg_sparse_mat -#ifdef HAVE_SPGPU private :: i_cuda_dnsg_get_nzeros, i_cuda_dnsg_free, i_cuda_dnsg_get_fmt, & & i_cuda_dnsg_get_size, i_cuda_dnsg_get_nz_row @@ -278,17 +272,4 @@ contains return end subroutine i_cuda_dnsg_finalize -#else - - interface - subroutine psb_i_cuda_dnsg_mold(a,b,info) - import :: psb_i_cuda_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a - class(psb_i_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cuda_dnsg_mold - end interface - -#endif - end module psb_i_cuda_dnsg_mat_mod diff --git a/cuda/psb_i_cuda_elg_mat_mod.F90 b/cuda/psb_i_cuda_elg_mat_mod.F90 index aa3e2d4d..48311eea 100644 --- a/cuda/psb_i_cuda_elg_mat_mod.F90 +++ b/cuda/psb_i_cuda_elg_mat_mod.F90 @@ -49,7 +49,6 @@ module psb_i_cuda_elg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of ELL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr integer(psb_ipk_) :: devstate = is_host @@ -86,14 +85,8 @@ module psb_i_cuda_elg_mat_mod procedure, pass(a) :: to_gpu => psb_i_cuda_elg_to_gpu procedure, pass(a) :: asb => psb_i_cuda_elg_asb final :: i_cuda_elg_finalize -#else - contains - procedure, pass(a) :: mold => psb_i_cuda_elg_mold - procedure, pass(a) :: asb => psb_i_cuda_elg_asb -#endif end type psb_i_cuda_elg_sparse_mat -#ifdef HAVE_SPGPU private :: i_cuda_elg_get_nzeros, i_cuda_elg_free, i_cuda_elg_get_fmt, & & i_cuda_elg_get_size, i_cuda_elg_sizeof, i_cuda_elg_get_nz_row, i_cuda_elg_sync @@ -460,24 +453,4 @@ contains end subroutine i_cuda_elg_finalize -#else - - interface - subroutine psb_i_cuda_elg_asb(a) - import :: psb_i_cuda_elg_sparse_mat - class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a - end subroutine psb_i_cuda_elg_asb - end interface - - interface - subroutine psb_i_cuda_elg_mold(a,b,info) - import :: psb_i_cuda_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_cuda_elg_sparse_mat), intent(in) :: a - class(psb_i_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cuda_elg_mold - end interface - -#endif - end module psb_i_cuda_elg_mat_mod diff --git a/cuda/psb_i_cuda_hdiag_mat_mod.F90 b/cuda/psb_i_cuda_hdiag_mat_mod.F90 index 03ff573b..22277f3a 100644 --- a/cuda/psb_i_cuda_hdiag_mat_mod.F90 +++ b/cuda/psb_i_cuda_hdiag_mat_mod.F90 @@ -38,7 +38,6 @@ module psb_i_cuda_hdiag_mat_mod type, extends(psb_i_hdia_sparse_mat) :: psb_i_cuda_hdiag_sparse_mat ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -62,13 +61,8 @@ module psb_i_cuda_hdiag_mat_mod procedure, pass(a) :: mold => psb_i_cuda_hdiag_mold procedure, pass(a) :: to_gpu => psb_i_cuda_hdiag_to_gpu final :: i_cuda_hdiag_finalize -#else - contains - procedure, pass(a) :: mold => psb_i_cuda_hdiag_mold -#endif end type psb_i_cuda_hdiag_sparse_mat -#ifdef HAVE_SPGPU private :: i_cuda_hdiag_get_nzeros, i_cuda_hdiag_free, i_cuda_hdiag_get_fmt, & & i_cuda_hdiag_get_size, i_cuda_hdiag_sizeof, i_cuda_hdiag_get_nz_row @@ -271,17 +265,4 @@ contains return end subroutine i_cuda_hdiag_finalize -#else - - interface - subroutine psb_i_cuda_hdiag_mold(a,b,info) - import :: psb_i_cuda_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a - class(psb_i_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cuda_hdiag_mold - end interface - -#endif - end module psb_i_cuda_hdiag_mat_mod diff --git a/cuda/psb_i_cuda_hlg_mat_mod.F90 b/cuda/psb_i_cuda_hlg_mat_mod.F90 index f97470d2..b4e3046e 100644 --- a/cuda/psb_i_cuda_hlg_mat_mod.F90 +++ b/cuda/psb_i_cuda_hlg_mat_mod.F90 @@ -49,7 +49,6 @@ module psb_i_cuda_hlg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of HLL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr integer :: devstate = is_host @@ -82,13 +81,8 @@ module psb_i_cuda_hlg_mat_mod procedure, pass(a) :: from_gpu => psb_i_cuda_hlg_from_gpu procedure, pass(a) :: to_gpu => psb_i_cuda_hlg_to_gpu final :: i_cuda_hlg_finalize -#else - contains - procedure, pass(a) :: mold => psb_i_cuda_hlg_mold -#endif end type psb_i_cuda_hlg_sparse_mat -#ifdef HAVE_SPGPU private :: i_cuda_hlg_get_nzeros, i_cuda_hlg_free, i_cuda_hlg_get_fmt, & & i_cuda_hlg_get_size, i_cuda_hlg_sizeof, i_cuda_hlg_get_nz_row @@ -382,17 +376,4 @@ contains return end subroutine i_cuda_hlg_finalize -#else - - interface - subroutine psb_i_cuda_hlg_mold(a,b,info) - import :: psb_i_cuda_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a - class(psb_i_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cuda_hlg_mold - end interface - -#endif - end module psb_i_cuda_hlg_mat_mod diff --git a/cuda/psb_i_cuda_hybg_mat_mod.F90 b/cuda/psb_i_cuda_hybg_mat_mod.F90 index 10333c24..f6fc3110 100644 --- a/cuda/psb_i_cuda_hybg_mat_mod.F90 +++ b/cuda/psb_i_cuda_hybg_mat_mod.F90 @@ -45,7 +45,6 @@ module psb_i_cuda_hybg_mat_mod ! ! ! -#ifdef HAVE_SPGPU type(i_Hmat) :: deviceMat contains @@ -69,13 +68,8 @@ module psb_i_cuda_hybg_mat_mod procedure, pass(a) :: mold => psb_i_cuda_hybg_mold procedure, pass(a) :: to_gpu => psb_i_cuda_hybg_to_gpu final :: i_cuda_hybg_finalize -#else - contains - procedure, pass(a) :: mold => psb_i_cuda_hybg_mold -#endif end type psb_i_cuda_hybg_sparse_mat -#ifdef HAVE_SPGPU private :: i_cuda_hybg_get_nzeros, i_cuda_hybg_free, i_cuda_hybg_get_fmt, & & i_cuda_hybg_get_size, i_cuda_hybg_sizeof, i_cuda_hybg_get_nz_row @@ -289,18 +283,5 @@ contains return end subroutine i_cuda_hybg_finalize -#else - - interface - subroutine psb_i_cuda_hybg_mold(a,b,info) - import :: psb_i_cuda_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ - class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a - class(psb_i_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_i_cuda_hybg_mold - end interface - -#endif - end module psb_i_cuda_hybg_mat_mod #endif diff --git a/cuda/psb_i_cuda_vect_mod.F90 b/cuda/psb_i_cuda_vect_mod.F90 index 8d940513..df8f4113 100644 --- a/cuda/psb_i_cuda_vect_mod.F90 +++ b/cuda/psb_i_cuda_vect_mod.F90 @@ -35,17 +35,14 @@ module psb_i_cuda_vect_mod use psb_const_mod use psb_error_mod use psb_i_vect_mod -#ifdef HAVE_SPGPU use psb_cuda_env_mod use psb_i_vectordev_mod -#endif integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 type, extends(psb_i_base_vect_type) :: psb_i_vect_cuda -#ifdef HAVE_SPGPU integer :: state = is_host type(c_ptr) :: deviceVect = c_null_ptr integer(c_int), allocatable :: pinned_buffer(:) @@ -88,7 +85,6 @@ module psb_i_cuda_vect_mod procedure, pass(x) :: maybe_free_buffer => i_cuda_maybe_free_buffer final :: i_cuda_vect_finalize -#endif end type psb_i_vect_cuda public :: psb_i_vect_cuda_ @@ -109,8 +105,6 @@ contains end function constructor -#ifdef HAVE_SPGPU - subroutine i_cuda_device_wait() call psb_cudaSync() end subroutine i_cuda_device_wait @@ -888,8 +882,6 @@ contains end subroutine i_cuda_ins_a -#endif - end module psb_i_cuda_vect_mod @@ -905,18 +897,14 @@ module psb_i_cuda_multivect_mod use psb_error_mod use psb_i_multivect_mod use psb_i_base_multivect_mod - -#ifdef HAVE_SPGPU + use psb_cuda_env_mod use psb_i_vectordev_mod -#endif integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 type, extends(psb_i_base_multivect_type) :: psb_i_multivect_cuda -#ifdef HAVE_SPGPU - integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 type(c_ptr) :: deviceVect = c_null_ptr real(c_double), allocatable :: buffer(:,:) @@ -958,7 +946,6 @@ module psb_i_cuda_multivect_mod !!$ procedure, pass(y) :: sctb => i_cuda_multi_sctb !!$ procedure, pass(y) :: sctb_x => i_cuda_multi_sctb_x final :: i_cuda_multi_vect_finalize -#endif end type psb_i_multivect_cuda public :: psb_i_multivect_cuda @@ -979,7 +966,6 @@ contains end function constructor -#ifdef HAVE_SPGPU !!$ subroutine i_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ use psi_serial_mod @@ -1663,8 +1649,6 @@ contains end subroutine i_cuda_multi_ins -#endif - end module psb_i_cuda_multivect_mod diff --git a/cuda/psb_i_vectordev_mod.F90 b/cuda/psb_i_vectordev_mod.F90 index 9998d355..84037aaf 100644 --- a/cuda/psb_i_vectordev_mod.F90 +++ b/cuda/psb_i_vectordev_mod.F90 @@ -34,8 +34,6 @@ module psb_i_vectordev_mod use psb_base_vectordev_mod -#ifdef HAVE_SPGPU - interface registerMapped function registerMappedInt(buf,d_p,n,dummy) & & result(res) bind(c,name='registerMappedInt') @@ -278,6 +276,4 @@ contains call unregisterMapped(c_loc(buffer)) end subroutine inner_unregisterInt -#endif - end module psb_i_vectordev_mod diff --git a/cuda/psb_s_cuda_csrg_mat_mod.F90 b/cuda/psb_s_cuda_csrg_mat_mod.F90 index fb13d034..bd654dbb 100644 --- a/cuda/psb_s_cuda_csrg_mat_mod.F90 +++ b/cuda/psb_s_cuda_csrg_mat_mod.F90 @@ -48,7 +48,6 @@ module psb_s_cuda_csrg_mat_mod ! ! ! -#ifdef HAVE_SPGPU type(s_Cmat) :: deviceMat integer(psb_ipk_) :: devstate = is_host @@ -81,13 +80,8 @@ module psb_s_cuda_csrg_mat_mod procedure, pass(a) :: to_gpu => psb_s_cuda_csrg_to_gpu procedure, pass(a) :: from_gpu => psb_s_cuda_csrg_from_gpu final :: s_cuda_csrg_finalize -#else - contains - procedure, pass(a) :: mold => psb_s_cuda_csrg_mold -#endif end type psb_s_cuda_csrg_sparse_mat -#ifdef HAVE_SPGPU private :: s_cuda_csrg_get_nzeros, s_cuda_csrg_free, s_cuda_csrg_get_fmt, & & s_cuda_csrg_get_size, s_cuda_csrg_sizeof, s_cuda_csrg_get_nz_row @@ -378,16 +372,4 @@ contains end subroutine s_cuda_csrg_finalize -#else - interface - subroutine psb_s_cuda_csrg_mold(a,b,info) - import :: psb_s_cuda_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a - class(psb_s_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cuda_csrg_mold - end interface - -#endif - end module psb_s_cuda_csrg_mat_mod diff --git a/cuda/psb_s_cuda_diag_mat_mod.F90 b/cuda/psb_s_cuda_diag_mat_mod.F90 index 709cd728..b13a8502 100644 --- a/cuda/psb_s_cuda_diag_mat_mod.F90 +++ b/cuda/psb_s_cuda_diag_mat_mod.F90 @@ -44,7 +44,6 @@ module psb_s_cuda_diag_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of HLL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -68,13 +67,8 @@ module psb_s_cuda_diag_mat_mod procedure, pass(a) :: mold => psb_s_cuda_diag_mold procedure, pass(a) :: to_gpu => psb_s_cuda_diag_to_gpu final :: s_cuda_diag_finalize -#else - contains - procedure, pass(a) :: mold => psb_s_cuda_diag_mold -#endif end type psb_s_cuda_diag_sparse_mat -#ifdef HAVE_SPGPU private :: s_cuda_diag_get_nzeros, s_cuda_diag_free, s_cuda_diag_get_fmt, & & s_cuda_diag_get_size, s_cuda_diag_sizeof, s_cuda_diag_get_nz_row @@ -292,17 +286,4 @@ contains return end subroutine s_cuda_diag_finalize -#else - - interface - subroutine psb_s_cuda_diag_mold(a,b,info) - import :: psb_s_cuda_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_cuda_diag_sparse_mat), intent(in) :: a - class(psb_s_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cuda_diag_mold - end interface - -#endif - end module psb_s_cuda_diag_mat_mod diff --git a/cuda/psb_s_cuda_dnsg_mat_mod.F90 b/cuda/psb_s_cuda_dnsg_mat_mod.F90 index b01c8365..8193655e 100644 --- a/cuda/psb_s_cuda_dnsg_mat_mod.F90 +++ b/cuda/psb_s_cuda_dnsg_mat_mod.F90 @@ -45,7 +45,6 @@ module psb_s_cuda_dnsg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of DNS, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -69,13 +68,8 @@ module psb_s_cuda_dnsg_mat_mod procedure, pass(a) :: mold => psb_s_cuda_dnsg_mold procedure, pass(a) :: to_gpu => psb_s_cuda_dnsg_to_gpu final :: s_cuda_dnsg_finalize -#else - contains - procedure, pass(a) :: mold => psb_s_cuda_dnsg_mold -#endif end type psb_s_cuda_dnsg_sparse_mat -#ifdef HAVE_SPGPU private :: s_cuda_dnsg_get_nzeros, s_cuda_dnsg_free, s_cuda_dnsg_get_fmt, & & s_cuda_dnsg_get_size, s_cuda_dnsg_get_nz_row @@ -278,17 +272,4 @@ contains return end subroutine s_cuda_dnsg_finalize -#else - - interface - subroutine psb_s_cuda_dnsg_mold(a,b,info) - import :: psb_s_cuda_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a - class(psb_s_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cuda_dnsg_mold - end interface - -#endif - end module psb_s_cuda_dnsg_mat_mod diff --git a/cuda/psb_s_cuda_elg_mat_mod.F90 b/cuda/psb_s_cuda_elg_mat_mod.F90 index d6b003b5..1aa9d36d 100644 --- a/cuda/psb_s_cuda_elg_mat_mod.F90 +++ b/cuda/psb_s_cuda_elg_mat_mod.F90 @@ -49,7 +49,6 @@ module psb_s_cuda_elg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of ELL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr integer(psb_ipk_) :: devstate = is_host @@ -86,14 +85,8 @@ module psb_s_cuda_elg_mat_mod procedure, pass(a) :: to_gpu => psb_s_cuda_elg_to_gpu procedure, pass(a) :: asb => psb_s_cuda_elg_asb final :: s_cuda_elg_finalize -#else - contains - procedure, pass(a) :: mold => psb_s_cuda_elg_mold - procedure, pass(a) :: asb => psb_s_cuda_elg_asb -#endif end type psb_s_cuda_elg_sparse_mat -#ifdef HAVE_SPGPU private :: s_cuda_elg_get_nzeros, s_cuda_elg_free, s_cuda_elg_get_fmt, & & s_cuda_elg_get_size, s_cuda_elg_sizeof, s_cuda_elg_get_nz_row, s_cuda_elg_sync @@ -460,24 +453,4 @@ contains end subroutine s_cuda_elg_finalize -#else - - interface - subroutine psb_s_cuda_elg_asb(a) - import :: psb_s_cuda_elg_sparse_mat - class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a - end subroutine psb_s_cuda_elg_asb - end interface - - interface - subroutine psb_s_cuda_elg_mold(a,b,info) - import :: psb_s_cuda_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_cuda_elg_sparse_mat), intent(in) :: a - class(psb_s_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cuda_elg_mold - end interface - -#endif - end module psb_s_cuda_elg_mat_mod diff --git a/cuda/psb_s_cuda_hdiag_mat_mod.F90 b/cuda/psb_s_cuda_hdiag_mat_mod.F90 index 0a66ff09..cac72c86 100644 --- a/cuda/psb_s_cuda_hdiag_mat_mod.F90 +++ b/cuda/psb_s_cuda_hdiag_mat_mod.F90 @@ -38,7 +38,6 @@ module psb_s_cuda_hdiag_mat_mod type, extends(psb_s_hdia_sparse_mat) :: psb_s_cuda_hdiag_sparse_mat ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -62,13 +61,8 @@ module psb_s_cuda_hdiag_mat_mod procedure, pass(a) :: mold => psb_s_cuda_hdiag_mold procedure, pass(a) :: to_gpu => psb_s_cuda_hdiag_to_gpu final :: s_cuda_hdiag_finalize -#else - contains - procedure, pass(a) :: mold => psb_s_cuda_hdiag_mold -#endif end type psb_s_cuda_hdiag_sparse_mat -#ifdef HAVE_SPGPU private :: s_cuda_hdiag_get_nzeros, s_cuda_hdiag_free, s_cuda_hdiag_get_fmt, & & s_cuda_hdiag_get_size, s_cuda_hdiag_sizeof, s_cuda_hdiag_get_nz_row @@ -271,17 +265,4 @@ contains return end subroutine s_cuda_hdiag_finalize -#else - - interface - subroutine psb_s_cuda_hdiag_mold(a,b,info) - import :: psb_s_cuda_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a - class(psb_s_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cuda_hdiag_mold - end interface - -#endif - end module psb_s_cuda_hdiag_mat_mod diff --git a/cuda/psb_s_cuda_hlg_mat_mod.F90 b/cuda/psb_s_cuda_hlg_mat_mod.F90 index 81b94e5d..4f7b4b6f 100644 --- a/cuda/psb_s_cuda_hlg_mat_mod.F90 +++ b/cuda/psb_s_cuda_hlg_mat_mod.F90 @@ -49,7 +49,6 @@ module psb_s_cuda_hlg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of HLL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr integer :: devstate = is_host @@ -82,13 +81,8 @@ module psb_s_cuda_hlg_mat_mod procedure, pass(a) :: from_gpu => psb_s_cuda_hlg_from_gpu procedure, pass(a) :: to_gpu => psb_s_cuda_hlg_to_gpu final :: s_cuda_hlg_finalize -#else - contains - procedure, pass(a) :: mold => psb_s_cuda_hlg_mold -#endif end type psb_s_cuda_hlg_sparse_mat -#ifdef HAVE_SPGPU private :: s_cuda_hlg_get_nzeros, s_cuda_hlg_free, s_cuda_hlg_get_fmt, & & s_cuda_hlg_get_size, s_cuda_hlg_sizeof, s_cuda_hlg_get_nz_row @@ -382,17 +376,4 @@ contains return end subroutine s_cuda_hlg_finalize -#else - - interface - subroutine psb_s_cuda_hlg_mold(a,b,info) - import :: psb_s_cuda_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a - class(psb_s_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cuda_hlg_mold - end interface - -#endif - end module psb_s_cuda_hlg_mat_mod diff --git a/cuda/psb_s_cuda_hybg_mat_mod.F90 b/cuda/psb_s_cuda_hybg_mat_mod.F90 index ae76aac1..f1446b15 100644 --- a/cuda/psb_s_cuda_hybg_mat_mod.F90 +++ b/cuda/psb_s_cuda_hybg_mat_mod.F90 @@ -45,7 +45,6 @@ module psb_s_cuda_hybg_mat_mod ! ! ! -#ifdef HAVE_SPGPU type(s_Hmat) :: deviceMat contains @@ -69,13 +68,8 @@ module psb_s_cuda_hybg_mat_mod procedure, pass(a) :: mold => psb_s_cuda_hybg_mold procedure, pass(a) :: to_gpu => psb_s_cuda_hybg_to_gpu final :: s_cuda_hybg_finalize -#else - contains - procedure, pass(a) :: mold => psb_s_cuda_hybg_mold -#endif end type psb_s_cuda_hybg_sparse_mat -#ifdef HAVE_SPGPU private :: s_cuda_hybg_get_nzeros, s_cuda_hybg_free, s_cuda_hybg_get_fmt, & & s_cuda_hybg_get_size, s_cuda_hybg_sizeof, s_cuda_hybg_get_nz_row @@ -289,18 +283,5 @@ contains return end subroutine s_cuda_hybg_finalize -#else - - interface - subroutine psb_s_cuda_hybg_mold(a,b,info) - import :: psb_s_cuda_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ - class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a - class(psb_s_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_cuda_hybg_mold - end interface - -#endif - end module psb_s_cuda_hybg_mat_mod #endif diff --git a/cuda/psb_s_cuda_vect_mod.F90 b/cuda/psb_s_cuda_vect_mod.F90 index e19c980a..7778eb50 100644 --- a/cuda/psb_s_cuda_vect_mod.F90 +++ b/cuda/psb_s_cuda_vect_mod.F90 @@ -35,20 +35,17 @@ module psb_s_cuda_vect_mod use psb_const_mod use psb_error_mod use psb_s_vect_mod - use psb_i_vect_mod -#ifdef HAVE_SPGPU use psb_cuda_env_mod + use psb_i_vect_mod use psb_i_cuda_vect_mod use psb_i_vectordev_mod use psb_s_vectordev_mod -#endif integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 type, extends(psb_s_base_vect_type) :: psb_s_vect_cuda -#ifdef HAVE_SPGPU integer :: state = is_host type(c_ptr) :: deviceVect = c_null_ptr real(c_float), allocatable :: pinned_buffer(:) @@ -105,7 +102,6 @@ module psb_s_cuda_vect_mod procedure, pass(x) :: absval2 => s_cuda_absval2 final :: s_cuda_vect_finalize -#endif end type psb_s_vect_cuda public :: psb_s_vect_cuda_ @@ -126,8 +122,6 @@ contains end function constructor -#ifdef HAVE_SPGPU - subroutine s_cuda_device_wait() call psb_cudaSync() end subroutine s_cuda_device_wait @@ -1204,8 +1198,6 @@ contains end subroutine s_cuda_ins_a -#endif - end module psb_s_cuda_vect_mod @@ -1221,20 +1213,16 @@ module psb_s_cuda_multivect_mod use psb_error_mod use psb_s_multivect_mod use psb_s_base_multivect_mod - + use psb_cuda_env_mod use psb_i_multivect_mod -#ifdef HAVE_SPGPU use psb_i_cuda_multivect_mod use psb_s_vectordev_mod -#endif integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 type, extends(psb_s_base_multivect_type) :: psb_s_multivect_cuda -#ifdef HAVE_SPGPU - integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 type(c_ptr) :: deviceVect = c_null_ptr real(c_double), allocatable :: buffer(:,:) @@ -1276,7 +1264,6 @@ module psb_s_cuda_multivect_mod !!$ procedure, pass(y) :: sctb => s_cuda_multi_sctb !!$ procedure, pass(y) :: sctb_x => s_cuda_multi_sctb_x final :: s_cuda_multi_vect_finalize -#endif end type psb_s_multivect_cuda public :: psb_s_multivect_cuda @@ -1297,7 +1284,6 @@ contains end function constructor -#ifdef HAVE_SPGPU !!$ subroutine s_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ use psi_serial_mod @@ -1981,8 +1967,6 @@ contains end subroutine s_cuda_multi_ins -#endif - end module psb_s_cuda_multivect_mod diff --git a/cuda/psb_s_vectordev_mod.F90 b/cuda/psb_s_vectordev_mod.F90 index a7319b95..3ecabe70 100644 --- a/cuda/psb_s_vectordev_mod.F90 +++ b/cuda/psb_s_vectordev_mod.F90 @@ -34,8 +34,6 @@ module psb_s_vectordev_mod use psb_base_vectordev_mod -#ifdef HAVE_SPGPU - interface registerMapped function registerMappedFloat(buf,d_p,n,dummy) & & result(res) bind(c,name='registerMappedFloat') @@ -385,6 +383,4 @@ contains call unregisterMapped(c_loc(buffer)) end subroutine inner_unregisterFloat -#endif - end module psb_s_vectordev_mod diff --git a/cuda/psb_z_cuda_csrg_mat_mod.F90 b/cuda/psb_z_cuda_csrg_mat_mod.F90 index 75170185..389149aa 100644 --- a/cuda/psb_z_cuda_csrg_mat_mod.F90 +++ b/cuda/psb_z_cuda_csrg_mat_mod.F90 @@ -48,7 +48,6 @@ module psb_z_cuda_csrg_mat_mod ! ! ! -#ifdef HAVE_SPGPU type(z_Cmat) :: deviceMat integer(psb_ipk_) :: devstate = is_host @@ -81,13 +80,8 @@ module psb_z_cuda_csrg_mat_mod procedure, pass(a) :: to_gpu => psb_z_cuda_csrg_to_gpu procedure, pass(a) :: from_gpu => psb_z_cuda_csrg_from_gpu final :: z_cuda_csrg_finalize -#else - contains - procedure, pass(a) :: mold => psb_z_cuda_csrg_mold -#endif end type psb_z_cuda_csrg_sparse_mat -#ifdef HAVE_SPGPU private :: z_cuda_csrg_get_nzeros, z_cuda_csrg_free, z_cuda_csrg_get_fmt, & & z_cuda_csrg_get_size, z_cuda_csrg_sizeof, z_cuda_csrg_get_nz_row @@ -378,16 +372,4 @@ contains end subroutine z_cuda_csrg_finalize -#else - interface - subroutine psb_z_cuda_csrg_mold(a,b,info) - import :: psb_z_cuda_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a - class(psb_z_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cuda_csrg_mold - end interface - -#endif - end module psb_z_cuda_csrg_mat_mod diff --git a/cuda/psb_z_cuda_diag_mat_mod.F90 b/cuda/psb_z_cuda_diag_mat_mod.F90 index 80906778..592d50b2 100644 --- a/cuda/psb_z_cuda_diag_mat_mod.F90 +++ b/cuda/psb_z_cuda_diag_mat_mod.F90 @@ -44,7 +44,6 @@ module psb_z_cuda_diag_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of HLL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -68,13 +67,8 @@ module psb_z_cuda_diag_mat_mod procedure, pass(a) :: mold => psb_z_cuda_diag_mold procedure, pass(a) :: to_gpu => psb_z_cuda_diag_to_gpu final :: z_cuda_diag_finalize -#else - contains - procedure, pass(a) :: mold => psb_z_cuda_diag_mold -#endif end type psb_z_cuda_diag_sparse_mat -#ifdef HAVE_SPGPU private :: z_cuda_diag_get_nzeros, z_cuda_diag_free, z_cuda_diag_get_fmt, & & z_cuda_diag_get_size, z_cuda_diag_sizeof, z_cuda_diag_get_nz_row @@ -292,17 +286,4 @@ contains return end subroutine z_cuda_diag_finalize -#else - - interface - subroutine psb_z_cuda_diag_mold(a,b,info) - import :: psb_z_cuda_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_cuda_diag_sparse_mat), intent(in) :: a - class(psb_z_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cuda_diag_mold - end interface - -#endif - end module psb_z_cuda_diag_mat_mod diff --git a/cuda/psb_z_cuda_dnsg_mat_mod.F90 b/cuda/psb_z_cuda_dnsg_mat_mod.F90 index 3fb2488b..3669d6ac 100644 --- a/cuda/psb_z_cuda_dnsg_mat_mod.F90 +++ b/cuda/psb_z_cuda_dnsg_mat_mod.F90 @@ -45,7 +45,6 @@ module psb_z_cuda_dnsg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of DNS, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -69,13 +68,8 @@ module psb_z_cuda_dnsg_mat_mod procedure, pass(a) :: mold => psb_z_cuda_dnsg_mold procedure, pass(a) :: to_gpu => psb_z_cuda_dnsg_to_gpu final :: z_cuda_dnsg_finalize -#else - contains - procedure, pass(a) :: mold => psb_z_cuda_dnsg_mold -#endif end type psb_z_cuda_dnsg_sparse_mat -#ifdef HAVE_SPGPU private :: z_cuda_dnsg_get_nzeros, z_cuda_dnsg_free, z_cuda_dnsg_get_fmt, & & z_cuda_dnsg_get_size, z_cuda_dnsg_get_nz_row @@ -278,17 +272,4 @@ contains return end subroutine z_cuda_dnsg_finalize -#else - - interface - subroutine psb_z_cuda_dnsg_mold(a,b,info) - import :: psb_z_cuda_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a - class(psb_z_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cuda_dnsg_mold - end interface - -#endif - end module psb_z_cuda_dnsg_mat_mod diff --git a/cuda/psb_z_cuda_elg_mat_mod.F90 b/cuda/psb_z_cuda_elg_mat_mod.F90 index 9090b0a2..bd36e4b6 100644 --- a/cuda/psb_z_cuda_elg_mat_mod.F90 +++ b/cuda/psb_z_cuda_elg_mat_mod.F90 @@ -49,7 +49,6 @@ module psb_z_cuda_elg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of ELL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr integer(psb_ipk_) :: devstate = is_host @@ -86,14 +85,8 @@ module psb_z_cuda_elg_mat_mod procedure, pass(a) :: to_gpu => psb_z_cuda_elg_to_gpu procedure, pass(a) :: asb => psb_z_cuda_elg_asb final :: z_cuda_elg_finalize -#else - contains - procedure, pass(a) :: mold => psb_z_cuda_elg_mold - procedure, pass(a) :: asb => psb_z_cuda_elg_asb -#endif end type psb_z_cuda_elg_sparse_mat -#ifdef HAVE_SPGPU private :: z_cuda_elg_get_nzeros, z_cuda_elg_free, z_cuda_elg_get_fmt, & & z_cuda_elg_get_size, z_cuda_elg_sizeof, z_cuda_elg_get_nz_row, z_cuda_elg_sync @@ -460,24 +453,4 @@ contains end subroutine z_cuda_elg_finalize -#else - - interface - subroutine psb_z_cuda_elg_asb(a) - import :: psb_z_cuda_elg_sparse_mat - class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a - end subroutine psb_z_cuda_elg_asb - end interface - - interface - subroutine psb_z_cuda_elg_mold(a,b,info) - import :: psb_z_cuda_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_cuda_elg_sparse_mat), intent(in) :: a - class(psb_z_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cuda_elg_mold - end interface - -#endif - end module psb_z_cuda_elg_mat_mod diff --git a/cuda/psb_z_cuda_hdiag_mat_mod.F90 b/cuda/psb_z_cuda_hdiag_mat_mod.F90 index b64498f6..70897664 100644 --- a/cuda/psb_z_cuda_hdiag_mat_mod.F90 +++ b/cuda/psb_z_cuda_hdiag_mat_mod.F90 @@ -38,7 +38,6 @@ module psb_z_cuda_hdiag_mat_mod type, extends(psb_z_hdia_sparse_mat) :: psb_z_cuda_hdiag_sparse_mat ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr contains @@ -62,13 +61,8 @@ module psb_z_cuda_hdiag_mat_mod procedure, pass(a) :: mold => psb_z_cuda_hdiag_mold procedure, pass(a) :: to_gpu => psb_z_cuda_hdiag_to_gpu final :: z_cuda_hdiag_finalize -#else - contains - procedure, pass(a) :: mold => psb_z_cuda_hdiag_mold -#endif end type psb_z_cuda_hdiag_sparse_mat -#ifdef HAVE_SPGPU private :: z_cuda_hdiag_get_nzeros, z_cuda_hdiag_free, z_cuda_hdiag_get_fmt, & & z_cuda_hdiag_get_size, z_cuda_hdiag_sizeof, z_cuda_hdiag_get_nz_row @@ -271,17 +265,4 @@ contains return end subroutine z_cuda_hdiag_finalize -#else - - interface - subroutine psb_z_cuda_hdiag_mold(a,b,info) - import :: psb_z_cuda_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a - class(psb_z_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cuda_hdiag_mold - end interface - -#endif - end module psb_z_cuda_hdiag_mat_mod diff --git a/cuda/psb_z_cuda_hlg_mat_mod.F90 b/cuda/psb_z_cuda_hlg_mat_mod.F90 index 29ed68fa..2acf43f1 100644 --- a/cuda/psb_z_cuda_hlg_mat_mod.F90 +++ b/cuda/psb_z_cuda_hlg_mat_mod.F90 @@ -49,7 +49,6 @@ module psb_z_cuda_hlg_mat_mod ! If HAVE_SPGPU is undefined this is just ! a copy of HLL, indistinguishable. ! -#ifdef HAVE_SPGPU type(c_ptr) :: deviceMat = c_null_ptr integer :: devstate = is_host @@ -82,13 +81,8 @@ module psb_z_cuda_hlg_mat_mod procedure, pass(a) :: from_gpu => psb_z_cuda_hlg_from_gpu procedure, pass(a) :: to_gpu => psb_z_cuda_hlg_to_gpu final :: z_cuda_hlg_finalize -#else - contains - procedure, pass(a) :: mold => psb_z_cuda_hlg_mold -#endif end type psb_z_cuda_hlg_sparse_mat -#ifdef HAVE_SPGPU private :: z_cuda_hlg_get_nzeros, z_cuda_hlg_free, z_cuda_hlg_get_fmt, & & z_cuda_hlg_get_size, z_cuda_hlg_sizeof, z_cuda_hlg_get_nz_row @@ -382,17 +376,4 @@ contains return end subroutine z_cuda_hlg_finalize -#else - - interface - subroutine psb_z_cuda_hlg_mold(a,b,info) - import :: psb_z_cuda_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a - class(psb_z_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cuda_hlg_mold - end interface - -#endif - end module psb_z_cuda_hlg_mat_mod diff --git a/cuda/psb_z_cuda_hybg_mat_mod.F90 b/cuda/psb_z_cuda_hybg_mat_mod.F90 index 1bbc11b2..96620300 100644 --- a/cuda/psb_z_cuda_hybg_mat_mod.F90 +++ b/cuda/psb_z_cuda_hybg_mat_mod.F90 @@ -45,7 +45,6 @@ module psb_z_cuda_hybg_mat_mod ! ! ! -#ifdef HAVE_SPGPU type(z_Hmat) :: deviceMat contains @@ -69,13 +68,8 @@ module psb_z_cuda_hybg_mat_mod procedure, pass(a) :: mold => psb_z_cuda_hybg_mold procedure, pass(a) :: to_gpu => psb_z_cuda_hybg_to_gpu final :: z_cuda_hybg_finalize -#else - contains - procedure, pass(a) :: mold => psb_z_cuda_hybg_mold -#endif end type psb_z_cuda_hybg_sparse_mat -#ifdef HAVE_SPGPU private :: z_cuda_hybg_get_nzeros, z_cuda_hybg_free, z_cuda_hybg_get_fmt, & & z_cuda_hybg_get_size, z_cuda_hybg_sizeof, z_cuda_hybg_get_nz_row @@ -289,18 +283,5 @@ contains return end subroutine z_cuda_hybg_finalize -#else - - interface - subroutine psb_z_cuda_hybg_mold(a,b,info) - import :: psb_z_cuda_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ - class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a - class(psb_z_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_cuda_hybg_mold - end interface - -#endif - end module psb_z_cuda_hybg_mat_mod #endif diff --git a/cuda/psb_z_cuda_vect_mod.F90 b/cuda/psb_z_cuda_vect_mod.F90 index 35bfb4b5..53484911 100644 --- a/cuda/psb_z_cuda_vect_mod.F90 +++ b/cuda/psb_z_cuda_vect_mod.F90 @@ -35,20 +35,17 @@ module psb_z_cuda_vect_mod use psb_const_mod use psb_error_mod use psb_z_vect_mod - use psb_i_vect_mod -#ifdef HAVE_SPGPU use psb_cuda_env_mod + use psb_i_vect_mod use psb_i_cuda_vect_mod use psb_i_vectordev_mod use psb_z_vectordev_mod -#endif integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 type, extends(psb_z_base_vect_type) :: psb_z_vect_cuda -#ifdef HAVE_SPGPU integer :: state = is_host type(c_ptr) :: deviceVect = c_null_ptr complex(c_double_complex), allocatable :: pinned_buffer(:) @@ -105,7 +102,6 @@ module psb_z_cuda_vect_mod procedure, pass(x) :: absval2 => z_cuda_absval2 final :: z_cuda_vect_finalize -#endif end type psb_z_vect_cuda public :: psb_z_vect_cuda_ @@ -126,8 +122,6 @@ contains end function constructor -#ifdef HAVE_SPGPU - subroutine z_cuda_device_wait() call psb_cudaSync() end subroutine z_cuda_device_wait @@ -1204,8 +1198,6 @@ contains end subroutine z_cuda_ins_a -#endif - end module psb_z_cuda_vect_mod @@ -1221,20 +1213,16 @@ module psb_z_cuda_multivect_mod use psb_error_mod use psb_z_multivect_mod use psb_z_base_multivect_mod - + use psb_cuda_env_mod use psb_i_multivect_mod -#ifdef HAVE_SPGPU use psb_i_cuda_multivect_mod use psb_z_vectordev_mod -#endif integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 type, extends(psb_z_base_multivect_type) :: psb_z_multivect_cuda -#ifdef HAVE_SPGPU - integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 type(c_ptr) :: deviceVect = c_null_ptr real(c_double), allocatable :: buffer(:,:) @@ -1276,7 +1264,6 @@ module psb_z_cuda_multivect_mod !!$ procedure, pass(y) :: sctb => z_cuda_multi_sctb !!$ procedure, pass(y) :: sctb_x => z_cuda_multi_sctb_x final :: z_cuda_multi_vect_finalize -#endif end type psb_z_multivect_cuda public :: psb_z_multivect_cuda @@ -1297,7 +1284,6 @@ contains end function constructor -#ifdef HAVE_SPGPU !!$ subroutine z_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ use psi_serial_mod @@ -1981,8 +1967,6 @@ contains end subroutine z_cuda_multi_ins -#endif - end module psb_z_cuda_multivect_mod diff --git a/cuda/psb_z_vectordev_mod.F90 b/cuda/psb_z_vectordev_mod.F90 index 58c43a43..8f07cd56 100644 --- a/cuda/psb_z_vectordev_mod.F90 +++ b/cuda/psb_z_vectordev_mod.F90 @@ -34,8 +34,6 @@ module psb_z_vectordev_mod use psb_base_vectordev_mod -#ifdef HAVE_SPGPU - interface registerMapped function registerMappedDoubleComplex(buf,d_p,n,dummy) & & result(res) bind(c,name='registerMappedDoubleComplex') @@ -385,6 +383,4 @@ contains call unregisterMapped(c_loc(buffer)) end subroutine inner_unregisterDoubleComplex -#endif - end module psb_z_vectordev_mod diff --git a/cuda/s_cusparse_mod.F90 b/cuda/s_cusparse_mod.F90 index 6e628fa1..ab322129 100644 --- a/cuda/s_cusparse_mod.F90 +++ b/cuda/s_cusparse_mod.F90 @@ -43,9 +43,6 @@ module s_cusparse_mod end type s_Hmat #endif - -#if defined(HAVE_CUDA) && defined(HAVE_SPGPU) - interface CSRGDeviceFree function s_CSRGDeviceFree(Mat) & & bind(c,name="s_CSRGDeviceFree") result(res) @@ -300,6 +297,4 @@ module s_cusparse_mod end interface #endif -#endif - end module s_cusparse_mod diff --git a/cuda/scusparse.c b/cuda/scusparse.c index 70a0cbd7..2ad2e2dc 100644 --- a/cuda/scusparse.c +++ b/cuda/scusparse.c @@ -33,7 +33,6 @@ #include #include -#ifdef HAVE_SPGPU #include #include #include "cintrf.h" @@ -92,4 +91,3 @@ #include "fcusparse_fct.h" -#endif diff --git a/cuda/svectordev.c b/cuda/svectordev.c index 9a41ae1a..bfa4061a 100644 --- a/cuda/svectordev.c +++ b/cuda/svectordev.c @@ -32,7 +32,6 @@ #include #include -#if defined(HAVE_SPGPU) //#include "utils.h" //#include "common.h" #include "svectordev.h" @@ -300,5 +299,3 @@ int absMultiVecDeviceFloat(int n, float alpha, void *deviceVecA) return(i); } -#endif - diff --git a/cuda/svectordev.h b/cuda/svectordev.h index 1fd4fd11..bf25fcb1 100644 --- a/cuda/svectordev.h +++ b/cuda/svectordev.h @@ -31,7 +31,6 @@ #pragma once -#if defined(HAVE_SPGPU) //#include "utils.h" #include "vectordev.h" #include "cuda_runtime.h" @@ -75,4 +74,3 @@ int absMultiVecDeviceFloat(int n, float alpha, void *deviceVecA); int absMultiVecDeviceFloat2(int n, float alpha, void *deviceVecA, void *deviceVecB); -#endif diff --git a/cuda/vectordev.c b/cuda/vectordev.c index 2b22a8a6..65b4d533 100644 --- a/cuda/vectordev.c +++ b/cuda/vectordev.c @@ -32,7 +32,6 @@ #include #include -#if defined(HAVE_SPGPU) #include "cuComplex.h" #include "vectordev.h" #include "cuda_runtime.h" @@ -194,5 +193,3 @@ int getMultiVecDevicePitch(void* deviceVec) return(i); } -#endif - diff --git a/cuda/vectordev.h b/cuda/vectordev.h index 9739c01b..8eca7063 100644 --- a/cuda/vectordev.h +++ b/cuda/vectordev.h @@ -31,7 +31,6 @@ #pragma once -#if defined(HAVE_SPGPU) //#include "utils.h" #include "cuda_runtime.h" //#include "common.h" @@ -86,5 +85,3 @@ int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams * int getMultiVecDeviceSize(void* deviceVec); int getMultiVecDeviceCount(void* deviceVec); int getMultiVecDevicePitch(void* deviceVec); - -#endif diff --git a/cuda/z_cusparse_mod.F90 b/cuda/z_cusparse_mod.F90 index 020f1de5..c3f21c0c 100644 --- a/cuda/z_cusparse_mod.F90 +++ b/cuda/z_cusparse_mod.F90 @@ -43,9 +43,6 @@ module z_cusparse_mod end type z_Hmat #endif - -#if defined(HAVE_CUDA) && defined(HAVE_SPGPU) - interface CSRGDeviceFree function z_CSRGDeviceFree(Mat) & & bind(c,name="z_CSRGDeviceFree") result(res) @@ -298,7 +295,6 @@ module z_cusparse_mod integer(c_int) :: res end function z_HYBGHost2Device end interface -#endif #endif diff --git a/cuda/zcusparse.c b/cuda/zcusparse.c index 3991359a..050c0ccd 100644 --- a/cuda/zcusparse.c +++ b/cuda/zcusparse.c @@ -33,7 +33,6 @@ #include #include -#ifdef HAVE_SPGPU #include #include #include "cintrf.h" @@ -91,4 +90,3 @@ #include "fcusparse_fct.h" -#endif diff --git a/cuda/zvectordev.c b/cuda/zvectordev.c index c3671a86..0fb1c67e 100644 --- a/cuda/zvectordev.c +++ b/cuda/zvectordev.c @@ -32,7 +32,6 @@ #include #include -#if defined(HAVE_SPGPU) //#include "utils.h" //#include "common.h" #include "zvectordev.h" @@ -317,5 +316,3 @@ int absMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVec return(i); } -#endif - diff --git a/cuda/zvectordev.h b/cuda/zvectordev.h index ca3c966e..96330a7a 100644 --- a/cuda/zvectordev.h +++ b/cuda/zvectordev.h @@ -31,7 +31,6 @@ #pragma once -#if defined(HAVE_SPGPU) //#include "utils.h" #include #include "cuComplex.h" @@ -87,5 +86,3 @@ int absMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVec int absMultiVecDeviceDoubleComplex2(int n, cuDoubleComplex alpha, void *deviceVecA, void *deviceVecB); - -#endif From 41491f7b9c3dee27c2200e9178a10911e0f7edc2 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 1 Dec 2023 15:14:50 +0100 Subject: [PATCH 021/116] Fix HAVE_CUDA in test programs --- test/cudakern/dpdegenmv.F90 | 24 ++++++++++++------------ test/cudakern/spdegenmv.F90 | 24 ++++++++++++------------ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/test/cudakern/dpdegenmv.F90 b/test/cudakern/dpdegenmv.F90 index bde57f5f..db845d71 100644 --- a/test/cudakern/dpdegenmv.F90 +++ b/test/cudakern/dpdegenmv.F90 @@ -547,7 +547,7 @@ program pdgenmv use psb_base_mod use psb_util_mod use psb_ext_mod -#ifdef HAVE_GPU +#ifdef HAVE_CUDA use psb_cuda_mod #endif #ifdef HAVE_RSB @@ -572,7 +572,7 @@ program pdgenmv type(psb_desc_type) :: desc_a ! dense matrices type(psb_d_vect_type), target :: xv, bv, xg, bg -#ifdef HAVE_GPU +#ifdef HAVE_CUDA type(psb_d_vect_cuda) :: vmold type(psb_i_vect_cuda) :: imold #endif @@ -594,7 +594,7 @@ program pdgenmv #ifdef HAVE_RSB type(psb_d_rsb_sparse_mat), target :: arsb #endif -#ifdef HAVE_GPU +#ifdef HAVE_CUDA type(psb_d_cuda_elg_sparse_mat), target :: aelg type(psb_d_cuda_csrg_sparse_mat), target :: acsrg #if CUDA_SHORT_VERSION <= 10 @@ -618,7 +618,7 @@ program pdgenmv call psb_init(ctxt) call psb_info(ctxt,iam,np) -#ifdef HAVE_GPU +#ifdef HAVE_CUDA call psb_cuda_init(ctxt) #endif #ifdef HAVE_RSB @@ -639,7 +639,7 @@ program pdgenmv write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ write(*,*) 'This is the ',trim(name),' sample program' end if -#ifdef HAVE_GPU +#ifdef HAVE_CUDA write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_cuda_DeviceName()) #endif @@ -698,7 +698,7 @@ program pdgenmv stop end if -#ifdef HAVE_GPU +#ifdef HAVE_CUDA select case(psb_toupper(agfmt)) case('ELG') agmold => aelg @@ -753,7 +753,7 @@ program pdgenmv call xv%bld(x0) call psb_geasb(bv,desc_a,info,scratch=.true.) -#ifdef HAVE_GPU +#ifdef HAVE_CUDA call aux_a%cscnv(agpu,info,mold=acoo) call xg%bld(x0,mold=vmold) @@ -780,7 +780,7 @@ program pdgenmv t2 = psb_wtime() - t1 call psb_amx(ctxt,t2) -#ifdef HAVE_GPU +#ifdef HAVE_CUDA call xg%set(x0) ! FIXME: cache flush needed here @@ -870,7 +870,7 @@ program pdgenmv tflops = flops gflops = flops * ngpu write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() -#ifdef HAVE_GPU +#ifdef HAVE_CUDA write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& & tcnvc1 @@ -900,7 +900,7 @@ program pdgenmv & t2*1.d3/(1.d0*ntests) write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& & flops/1.d6 -#ifdef HAVE_GPU +#ifdef HAVE_CUDA write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& & ntests, tt2 write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& @@ -925,7 +925,7 @@ program pdgenmv bdwdth = ntests*nbytes/(t2*1.d6) write(psb_out_unit,*) write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (CPU) : ",F20.3)') bdwdth -#ifdef HAVE_GPU +#ifdef HAVE_CUDA bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (GPU) : ",F20.3)') bdwdth bdwdth = psb_cuda_MemoryPeakBandwidth() @@ -949,7 +949,7 @@ program pdgenmv call psb_errpush(info,name,a_err=ch_err) goto 9999 end if -#ifdef HAVE_GPU +#ifdef HAVE_CUDA call psb_cuda_exit() #endif call psb_exit(ctxt) diff --git a/test/cudakern/spdegenmv.F90 b/test/cudakern/spdegenmv.F90 index 9644d8c7..f953e163 100644 --- a/test/cudakern/spdegenmv.F90 +++ b/test/cudakern/spdegenmv.F90 @@ -547,7 +547,7 @@ program pdgenmv use psb_base_mod use psb_util_mod use psb_ext_mod -#ifdef HAVE_GPU +#ifdef HAVE_CUDA use psb_cuda_mod #endif use psb_s_pde3d_mod @@ -569,7 +569,7 @@ program pdgenmv type(psb_desc_type) :: desc_a ! dense matrices type(psb_s_vect_type), target :: xv,bv, xg, bg -#ifdef HAVE_GPU +#ifdef HAVE_CUDA type(psb_s_vect_cuda) :: vmold type(psb_i_vect_cuda) :: imold #endif @@ -588,7 +588,7 @@ program pdgenmv type(psb_s_hll_sparse_mat), target :: ahll type(psb_s_dia_sparse_mat), target :: adia type(psb_s_hdia_sparse_mat), target :: ahdia -#ifdef HAVE_GPU +#ifdef HAVE_CUDA type(psb_s_cuda_elg_sparse_mat), target :: aelg type(psb_s_cuda_csrg_sparse_mat), target :: acsrg #if CUDA_SHORT_VERSION <= 10 @@ -612,7 +612,7 @@ program pdgenmv call psb_init(ctxt) call psb_info(ctxt,iam,np) -#ifdef HAVE_GPU +#ifdef HAVE_CUDA call psb_cuda_init(ctxt) #endif #ifdef HAVE_RSB @@ -633,7 +633,7 @@ program pdgenmv write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ write(*,*) 'This is the ',trim(name),' sample program' end if -#ifdef HAVE_GPU +#ifdef HAVE_CUDA write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_cuda_DeviceName()) #endif @@ -692,7 +692,7 @@ program pdgenmv stop end if -#ifdef HAVE_GPU +#ifdef HAVE_CUDA select case(psb_toupper(agfmt)) case('ELG') agmold => aelg @@ -747,7 +747,7 @@ program pdgenmv call xv%bld(x0) call psb_geasb(bv,desc_a,info,scratch=.true.) -#ifdef HAVE_GPU +#ifdef HAVE_CUDA call aux_a%cscnv(agpu,info,mold=acoo) call xg%bld(x0,mold=vmold) @@ -774,7 +774,7 @@ program pdgenmv t2 = psb_wtime() - t1 call psb_amx(ctxt,t2) -#ifdef HAVE_GPU +#ifdef HAVE_CUDA call xg%set(x0) ! FIXME: cache flush needed here @@ -864,7 +864,7 @@ program pdgenmv tflops = flops gflops = flops * ngpu write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() -#ifdef HAVE_GPU +#ifdef HAVE_CUDA write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& & tcnvc1 @@ -894,7 +894,7 @@ program pdgenmv & t2*1.d3/(1.d0*ntests) write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& & flops/1.d6 -#ifdef HAVE_GPU +#ifdef HAVE_CUDA write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& & ntests, tt2 write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& @@ -919,7 +919,7 @@ program pdgenmv bdwdth = ntests*nbytes/(t2*1.d6) write(psb_out_unit,*) write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (CPU) : ",F20.3)') bdwdth -#ifdef HAVE_GPU +#ifdef HAVE_CUDA bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (GPU) : ",F20.3)') bdwdth bdwdth = psb_cuda_MemoryPeakBandwidth() @@ -943,7 +943,7 @@ program pdgenmv call psb_errpush(info,name,a_err=ch_err) goto 9999 end if -#ifdef HAVE_GPU +#ifdef HAVE_CUDA call psb_cuda_exit() #endif call psb_exit(ctxt) From 0230fbb7afb9663287e2a94aa62b61e0511221a1 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 7 Dec 2023 12:09:00 +0100 Subject: [PATCH 022/116] Identufied problems with CSRG. Will fix in a branch --- cuda/CUDA/psi_cuda_CopyCooToElg.cuh | 2 +- cuda/d_cusparse_mod.F90 | 2 + cuda/fcusparse_fct.h | 8 ++- cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 | 2 +- cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 | 2 +- cuda/impl/psb_d_cuda_csrg_to_gpu.F90 | 82 ++++++++++++++++++++++- cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 | 2 +- cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 | 2 +- 8 files changed, 93 insertions(+), 9 deletions(-) diff --git a/cuda/CUDA/psi_cuda_CopyCooToElg.cuh b/cuda/CUDA/psi_cuda_CopyCooToElg.cuh index 10a81a36..98aac050 100644 --- a/cuda/CUDA/psi_cuda_CopyCooToElg.cuh +++ b/cuda/CUDA/psi_cuda_CopyCooToElg.cuh @@ -89,7 +89,7 @@ GEN_PSI_FUNC_NAME(TYPE_SYMBOL) (spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, int hacksz, int ldv, int nzm, int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal, int *idiag, int *rP, VALUE_TYPE *cM) -{ int i,j,k, nrws; +{ int i,j, nrws; //int maxNForACall = THREAD_BLOCK*handle->maxGridSizeX; int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); diff --git a/cuda/d_cusparse_mod.F90 b/cuda/d_cusparse_mod.F90 index 509253e6..ae9bcceb 100644 --- a/cuda/d_cusparse_mod.F90 +++ b/cuda/d_cusparse_mod.F90 @@ -97,6 +97,7 @@ module d_cusparse_mod end function d_CSRGDeviceSetMatIndexBase end interface +#if CUDA_SHORT_VERSION <= 10 interface CSRGDeviceCsrsmAnalysis function d_CSRGDeviceCsrsmAnalysis(Mat) & & bind(c,name="d_CSRGDeviceCsrsmAnalysis") result(res) @@ -106,6 +107,7 @@ module d_cusparse_mod integer(c_int) :: res end function d_CSRGDeviceCsrsmAnalysis end interface +#endif interface CSRGDeviceAlloc function d_CSRGDeviceAlloc(Mat,nr,nc,nz) & diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h index 5a3b1ac6..5afe410d 100644 --- a/cuda/fcusparse_fct.h +++ b/cuda/fcusparse_fct.h @@ -187,7 +187,7 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, (void *) vY, CUSPARSE_BASE_TYPE, CUSPARSE_BASE_TYPE, (void *) cMat->mvbuffer)); -#else +#elif CUDA_VERSION <= 12030 cusparseDnVecDescr_t vecX, vecY; size_t bfsz; vX=x->v_; @@ -212,6 +212,8 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, cMat->mvbuffer)); CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) ); CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) ); +#else + fprintf(stderr,"Unsupported CUSPARSE version\n"); #endif } @@ -244,7 +246,7 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, (const TYPE *) vX, (TYPE *) vY, CUSPARSE_SOLVE_POLICY_USE_LEVEL, (void *) cMat->svbuffer)); -#else +#elif CUDA_VERSION <= 12030 cusparseDnVecDescr_t vecX, vecY; size_t bfsz; vX=x->v_; @@ -285,6 +287,8 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, *(cMat->spsvDescr))); CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) ); CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) ); +#else + fprintf(stderr,"Unsupported CUSPARSE version\n"); #endif } diff --git a/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 index 65b12a11..e9d42139 100644 --- a/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 @@ -38,7 +38,7 @@ subroutine psb_c_cuda_cp_csrg_from_fmt(a,b,info) implicit none class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a - class(psb_c_base_sparse_mat), intent(inout) :: b + class(psb_c_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info diff --git a/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 index d030538e..8f8e8cbe 100644 --- a/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 @@ -38,7 +38,7 @@ subroutine psb_d_cuda_cp_csrg_from_fmt(a,b,info) implicit none class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a - class(psb_d_base_sparse_mat), intent(inout) :: b + class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info diff --git a/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 index d1949421..16cb541d 100644 --- a/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 @@ -227,7 +227,7 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) endif -#else +#elif 0 if (a%is_unit()) then ! @@ -308,7 +308,85 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) !!$ if ((info == 0) .and. a%is_triangle()) then !!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) !!$ end if - +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = done + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = done + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) +!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) +!!$ if (a%is_triangle()) then +!!$ if (info == 0) then +!!$ if (a%is_unit()) then +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) +!!$ else +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) +!!$ end if +!!$ end if +!!$ if ((info == 0) )then +!!$ if ((info == 0).and.a%is_upper()) then +!!$ info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) +!!$ else +!!$ info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) +!!$ end if +!!$ end if +!!$ end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + #endif call a%set_sync() diff --git a/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 index 29bbea6e..76871b59 100644 --- a/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 @@ -38,7 +38,7 @@ subroutine psb_s_cuda_cp_csrg_from_fmt(a,b,info) implicit none class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a - class(psb_s_base_sparse_mat), intent(inout) :: b + class(psb_s_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info diff --git a/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 index 26490a15..e086c8a4 100644 --- a/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 +++ b/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 @@ -38,7 +38,7 @@ subroutine psb_z_cuda_cp_csrg_from_fmt(a,b,info) implicit none class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a - class(psb_z_base_sparse_mat), intent(inout) :: b + class(psb_z_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info From 6b65199afba2bd6ef5d6838db53ed77daa352fed Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 11 Dec 2023 13:21:23 +0100 Subject: [PATCH 023/116] Check CUDA version for -dopt=on only from 11.7 --- configure | 5 ++++- configure.ac | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/configure b/configure index 48487f98..3a774ee1 100755 --- a/configure +++ b/configure @@ -10834,7 +10834,10 @@ fi pac_cv_cudacc="50,60,70,75"; CUDA_CC="$pac_cv_cudacc"; fi - CUDEFINES="--dopt=on"; + if (( $pac_cv_cuda_version >= 11070 )) + then + CUDEFINES="--dopt=on"; + fi for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'` do CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; diff --git a/configure.ac b/configure.ac index 4b76485b..62aad4f8 100755 --- a/configure.ac +++ b/configure.ac @@ -811,7 +811,10 @@ if test "x$pac_cv_have_cuda" == "xyes"; then pac_cv_cudacc="50,60,70,75"; CUDA_CC="$pac_cv_cudacc"; fi - CUDEFINES="--dopt=on"; + if (( $pac_cv_cuda_version >= 11070 )) + then + CUDEFINES="--dopt=on"; + fi for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'` do CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; From d28ea462d98d23e2a70c55f8d37bb4fd9d9fc570 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Fri, 15 Dec 2023 03:41:19 -0500 Subject: [PATCH 024/116] Modified CSRG to work with latest versions; cusparse docs are unclear --- cuda/c_cusparse_mod.F90 | 14 +++- cuda/ccusparse.c | 34 ++++---- cuda/cuda_util.c | 4 +- cuda/d_cusparse_mod.F90 | 12 ++- cuda/dcusparse.c | 6 ++ cuda/fcusparse.c | 7 +- cuda/fcusparse_fct.h | 118 +++++++++++++++++++-------- cuda/impl/psb_c_cuda_csrg_to_gpu.F90 | 63 +++++++++++++- cuda/impl/psb_d_cuda_csrg_to_gpu.F90 | 21 +---- cuda/impl/psb_s_cuda_csrg_to_gpu.F90 | 63 +++++++++++++- cuda/impl/psb_z_cuda_csrg_to_gpu.F90 | 63 +++++++++++++- cuda/s_cusparse_mod.F90 | 14 +++- cuda/scusparse.c | 14 +++- cuda/z_cusparse_mod.F90 | 14 +++- cuda/zcusparse.c | 13 ++- 15 files changed, 368 insertions(+), 92 deletions(-) diff --git a/cuda/c_cusparse_mod.F90 b/cuda/c_cusparse_mod.F90 index 59f37732..07e57a55 100644 --- a/cuda/c_cusparse_mod.F90 +++ b/cuda/c_cusparse_mod.F90 @@ -97,6 +97,7 @@ module c_cusparse_mod end function c_CSRGDeviceSetMatIndexBase end interface +#if CUDA_SHORT_VERSION <= 10 interface CSRGDeviceCsrsmAnalysis function c_CSRGDeviceCsrsmAnalysis(Mat) & & bind(c,name="c_CSRGDeviceCsrsmAnalysis") result(res) @@ -106,7 +107,18 @@ module c_cusparse_mod integer(c_int) :: res end function c_CSRGDeviceCsrsmAnalysis end interface - +#else + interface CSRGIsNullSvBuffer + function c_CSRGIsNullSvBuffer(Mat) & + & bind(c,name="c_CSRGIsNullSvBuffer") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int) :: res + end function c_CSRGIsNullSvBuffer + end interface +#endif + interface CSRGDeviceAlloc function c_CSRGDeviceAlloc(Mat,nr,nc,nz) & & bind(c,name="c_CSRGDeviceAlloc") result(res) diff --git a/cuda/ccusparse.c b/cuda/ccusparse.c index c5430306..bab6ede0 100644 --- a/cuda/ccusparse.c +++ b/cuda/ccusparse.c @@ -38,8 +38,9 @@ #include "cintrf.h" #include "fcusparse.h" -/* Single precision complex */ -#define TYPE float complex + +/* Double precision real */ +#define TYPE float complex #define CUSPARSE_BASE_TYPE CUDA_C_32F #define T_CSRGDeviceMat c_CSRGDeviceMat #define T_Cmat c_Cmat @@ -54,25 +55,12 @@ #define T_CSRGDeviceGetParms c_CSRGDeviceGetParms #if CUDA_SHORT_VERSION <= 10 - #define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType #define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase #define T_CSRGDeviceCsrsmAnalysis c_CSRGDeviceCsrsmAnalysis #define cusparseTcsrmv cusparseCcsrmv #define cusparseTcsrsv_solve cusparseCcsrsv_solve #define cusparseTcsrsv_analysis cusparseCcsrsv_analysis - -#elif CUDA_VERSION < 11030 - -#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType -#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase -#define T_CSRGDeviceCsrsv2Analysis c_CSRGDeviceCsrsv2Analysis -#define cusparseTcsrsv2_bufferSize cusparseCcsrsv2_bufferSize -#define cusparseTcsrsv2_analysis cusparseCcsrsv2_analysis -#define cusparseTcsrsv2_solve cusparseCcsrsv2_solve - -#else - #define T_HYBGDeviceMat c_HYBGDeviceMat #define T_Hmat c_Hmat #define T_HYBGDeviceFree c_HYBGDeviceFree @@ -89,6 +77,22 @@ #define cusparseThybsv_solve cusparseChybsv_solve #define cusparseThybsv_analysis cusparseChybsv_analysis #define cusparseTcsr2hyb cusparseCcsr2hyb + +#elif CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis c_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseCcsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseCcsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseCcsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer c_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr c_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr c_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr c_CSRGCreateSpMVDescr + #endif #include "fcusparse_fct.h" diff --git a/cuda/cuda_util.c b/cuda/cuda_util.c index 09265410..c0e5c6e5 100644 --- a/cuda/cuda_util.c +++ b/cuda/cuda_util.c @@ -228,7 +228,7 @@ int gpuInit(int dev) if (!psb_cublas_handle) psb_cudaCreateCublasHandle(); hasUVA=getDeviceHasUVA(); - + FcusparseCreate(); return err; } @@ -240,7 +240,7 @@ void gpuClose() st1=spgpuGetStream(psb_cuda_handle); if (! psb_cublas_handle) cublasGetStream(psb_cublas_handle,&st2); - + FcusparseDestroy(); psb_cudaDestroyHandle(); if (st1 != st2) psb_cudaDestroyCublasHandle(); diff --git a/cuda/d_cusparse_mod.F90 b/cuda/d_cusparse_mod.F90 index ae9bcceb..399ac085 100644 --- a/cuda/d_cusparse_mod.F90 +++ b/cuda/d_cusparse_mod.F90 @@ -107,8 +107,18 @@ module d_cusparse_mod integer(c_int) :: res end function d_CSRGDeviceCsrsmAnalysis end interface +#else + interface CSRGIsNullSvBuffer + function d_CSRGIsNullSvBuffer(Mat) & + & bind(c,name="d_CSRGIsNullSvBuffer") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int) :: res + end function d_CSRGIsNullSvBuffer + end interface #endif - + interface CSRGDeviceAlloc function d_CSRGDeviceAlloc(Mat,nr,nc,nz) & & bind(c,name="d_CSRGDeviceAlloc") result(res) diff --git a/cuda/dcusparse.c b/cuda/dcusparse.c index f14e787c..657ca5be 100644 --- a/cuda/dcusparse.c +++ b/cuda/dcusparse.c @@ -86,6 +86,12 @@ #define cusparseTcsrsv2_bufferSize cusparseDcsrsv2_bufferSize #define cusparseTcsrsv2_analysis cusparseDcsrsv2_analysis #define cusparseTcsrsv2_solve cusparseDcsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer d_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr d_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr d_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr d_CSRGCreateSpMVDescr #endif diff --git a/cuda/fcusparse.c b/cuda/fcusparse.c index c1b661ab..1b37272c 100644 --- a/cuda/fcusparse.c +++ b/cuda/fcusparse.c @@ -53,14 +53,17 @@ int FcusparseCreate() if (ret == CUSPARSE_STATUS_SUCCESS) cusparse_handle = handle; } + fprintf(stderr,"Created cusparses_handle\n"); return (ret); } int FcusparseDestroy() { int val; - val = (int) cusparseDestroy(*cusparse_handle); - free(cusparse_handle); + if (cusparse_handle!=NULL){ + val = (int) cusparseDestroy(*cusparse_handle); + free(cusparse_handle); + } cusparse_handle=NULL; return(val); } diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h index 5afe410d..578c8b51 100644 --- a/cuda/fcusparse_fct.h +++ b/cuda/fcusparse_fct.h @@ -39,7 +39,7 @@ typedef struct T_CSRGDeviceMat size_t mvbsize, svbsize; void *mvbuffer, *svbuffer; #else - cusparseSpMatDescr_t descr; + cusparseSpMatDescr_t *spmvDescr; cusparseSpSVDescr_t *spsvDescr; size_t mvbsize, svbsize; void *mvbuffer, *svbuffer; @@ -102,6 +102,12 @@ int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type); int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type); int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type); int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type); +#else + +int T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat); +int T_CSRGIsNullSvBuffer(T_CSRGDeviceMat *cMat); +int T_CSRGIsNullSvDescr(T_CSRGDeviceMat *cMat); +int T_CSRGIsNullMvDescr(T_CSRGDeviceMat *cMat); #endif @@ -187,15 +193,20 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, (void *) vY, CUSPARSE_BASE_TYPE, CUSPARSE_BASE_TYPE, (void *) cMat->mvbuffer)); -#elif CUDA_VERSION <= 12030 +#else cusparseDnVecDescr_t vecX, vecY; size_t bfsz; + + if (T_CSRGIsNullMvDescr(cMat)) { + cMat->spmvDescr = (cusparseSpMatDescr_t *) malloc(sizeof(cusparseSpMatDescr_t *)); + } + T_CSRGCreateSpMVDescr(cMat); vX=x->v_; vY=y->v_; CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) ); CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) ); CHECK_CUSPARSE(cusparseSpMV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, - &alpha,cMat->descr,vecX,&beta,vecY, + &alpha,(*(cMat->spmvDescr)),vecX,&beta,vecY, CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT, &bfsz)); if (bfsz > cMat->mvbsize) { @@ -207,13 +218,12 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, cMat->mvbsize = bfsz; } CHECK_CUSPARSE(cusparseSpMV(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, - &alpha,cMat->descr,vecX,&beta,vecY, + &alpha,(*(cMat->spmvDescr)),vecX,&beta,vecY, CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT, cMat->mvbuffer)); CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) ); CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) ); -#else - fprintf(stderr,"Unsupported CUSPARSE version\n"); + CHECK_CUSPARSE(cusparseDestroySpMat(*(cMat->spmvDescr))); #endif } @@ -246,16 +256,24 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, (const TYPE *) vX, (TYPE *) vY, CUSPARSE_SOLVE_POLICY_USE_LEVEL, (void *) cMat->svbuffer)); -#elif CUDA_VERSION <= 12030 +#else cusparseDnVecDescr_t vecX, vecY; size_t bfsz; vX=x->v_; vY=y->v_; - cMat->spsvDescr=(cusparseSpSVDescr_t *) malloc(sizeof(cusparseSpSVDescr_t *)); CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) ); CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) ); + // fprintf(stderr,"Entry to SpSVDevice: %d %p\n", + // T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr); + if (T_CSRGIsNullSvDescr(cMat)) { + cMat->spsvDescr=(cusparseSpSVDescr_t *) malloc(sizeof(cusparseSpSVDescr_t *)); + cMat->svbsize=0; + CHECK_CUSPARSE( cusparseSpSV_createDescr(cMat->spsvDescr) ); + } + //fprintf(stderr,"Entry to SpSVDevice: %d %p %d\n", + // T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr,cMat->svbsize); CHECK_CUSPARSE(cusparseSpSV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, - &alpha,cMat->descr,vecX,vecY, + &alpha,*(cMat->spmvDescr),vecX,vecY, CUSPARSE_BASE_TYPE, CUSPARSE_SPSV_ALG_DEFAULT, *(cMat->spsvDescr), @@ -267,31 +285,49 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, } CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); cMat->svbsize=bfsz; - } - if (cMat->spsvDescr==NULL) { CHECK_CUSPARSE(cusparseSpSV_analysis(*my_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, - cMat->descr, + *(cMat->spmvDescr), vecX, vecY, CUSPARSE_BASE_TYPE, CUSPARSE_SPSV_ALG_DEFAULT, *(cMat->spsvDescr), cMat->svbuffer)); } - + if (T_CSRGIsNullSvBuffer(cMat)) { + fprintf(stderr,"SpSV_SOLVE NULL spsv-buffer\n"); + } + T_CSRGCreateSpMVDescr(cMat); CHECK_CUSPARSE(cusparseSpSV_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, - &alpha,cMat->descr,vecX,vecY, + &alpha,*(cMat->spmvDescr),vecX,vecY, CUSPARSE_BASE_TYPE, CUSPARSE_SPSV_ALG_DEFAULT, *(cMat->spsvDescr))); CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) ); CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) ); -#else - fprintf(stderr,"Unsupported CUSPARSE version\n"); + CHECK_CUSPARSE(cusparseDestroySpMat(*(cMat->spmvDescr))); #endif } + +T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat) +{ + int64_t tr,tc,tz; + tr = cMat->m; + tc = cMat->n; + tz = cMat->nz; + CHECK_CUSPARSE(cusparseCreateCsr(cMat->spmvDescr, + tr,tc,tz, + (void *) cMat->irp, + (void *) cMat->ja, + (void *) cMat->val, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ONE, + CUSPARSE_BASE_TYPE) ); +} + int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz) { T_CSRGDeviceMat *cMat; @@ -353,17 +389,8 @@ int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz) #else - int64_t rows=nr, cols=nc, nnz=nz; - CHECK_CUSPARSE(cusparseCreateCsr(&(cMat->descr), - rows, cols, nnz, - (void *) cMat->irp, - (void *) cMat->ja, - (void *) cMat->val, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ONE, - CUSPARSE_BASE_TYPE) ); + cMat->spmvDescr=NULL; cMat->spsvDescr=NULL; cMat->mvbuffer=NULL; cMat->svbuffer=NULL; @@ -389,20 +416,23 @@ int T_CSRGDeviceFree(T_Cmat *Matrix) cusparseDestroyMatDescr(cMat->descr); cusparseDestroyCsrsv2Info(cMat->triang); #else - cusparseDestroySpMat(cMat->descr); - if (cMat->spsvDescr!=NULL) { - CHECK_CUSPARSE( cusparseSpSV_destroyDescr(*(cMat->spsvDescr))); - free(cMat->spsvDescr); - cMat->spsvDescr=NULL; + if (!T_CSRGIsNullMvDescr(cMat)) { + // already destroyed spmvDescr, just free the pointer + free(cMat->spmvDescr); + cMat->spmvDescr=NULL; } if (cMat->mvbuffer!=NULL) CHECK_CUDA( cudaFree(cMat->mvbuffer)); + cMat->mvbuffer=NULL; + cMat->mvbsize=0; + if (!T_CSRGIsNullSvDescr(cMat)) { + CHECK_CUSPARSE(cusparseSpSV_destroyDescr(*(cMat->spsvDescr))); + free(cMat->spsvDescr); + cMat->spsvDescr=NULL; + } if (cMat->svbuffer!=NULL) CHECK_CUDA( cudaFree(cMat->svbuffer)); - cMat->spsvDescr=NULL; - cMat->mvbuffer=NULL; cMat->svbuffer=NULL; - cMat->mvbsize=0; cMat->svbsize=0; #endif free(cMat); @@ -500,7 +530,7 @@ int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type) T_CSRGDeviceMat *cMat= Matrix->mat; cusparseFillMode_t mode=type; - CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->descr, + CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr, CUSPARSE_SPMAT_FILL_MODE, (const void*) &mode, sizeof(cusparseFillMode_t))); @@ -511,13 +541,27 @@ int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type) { T_CSRGDeviceMat *cMat= Matrix->mat; cusparseDiagType_t cutype=type; - CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->descr, + CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr, CUSPARSE_SPMAT_DIAG_TYPE, (const void*) &cutype, sizeof(cusparseDiagType_t))); return(0); } +int T_CSRGIsNullMvDescr(T_CSRGDeviceMat *cMat) +{ + return(cMat->spmvDescr == NULL); +} + +int T_CSRGIsNullSvBuffer(T_CSRGDeviceMat *cMat) +{ + return(cMat->svbuffer == NULL); +} +int T_CSRGIsNullSvDescr(T_CSRGDeviceMat *cMat) +{ + return(cMat->spsvDescr == NULL); +} + #endif int T_CSRGHost2Device(T_Cmat *Matrix, int m, int n, int nz, @@ -550,6 +594,8 @@ int T_CSRGHost2Device(T_Cmat *Matrix, int m, int n, int nz, cMat->triang, CUSPARSE_SOLVE_POLICY_USE_LEVEL, cMat->svbuffer)); } +#else + //cusparseSetMatType(*(cMat->spmvDescr),CUSPARSE_MATRIX_TYPE_GENERAL); #endif return(CUSPARSE_STATUS_SUCCESS); } diff --git a/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 index aebb07e4..8e7d25a9 100644 --- a/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 @@ -227,7 +227,7 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) endif -#else +#elif 0 if (a%is_unit()) then ! @@ -308,7 +308,66 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) !!$ if ((info == 0) .and. a%is_triangle()) then !!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) !!$ end if - + +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + if ((info == 0) .and. a%is_triangle()) then + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = cone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = cone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + #endif call a%set_sync() diff --git a/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 index 16cb541d..4ecb0bbc 100644 --- a/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 @@ -308,6 +308,7 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) !!$ if ((info == 0) .and. a%is_triangle()) then !!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) !!$ end if + #else if (a%is_unit()) then @@ -325,9 +326,7 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) end if end if !!! We are explicitly adding the diagonal - !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) if ((info == 0) .and. a%is_triangle()) then -!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else @@ -366,24 +365,6 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) else if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) - !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) -!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) -!!$ if (a%is_triangle()) then -!!$ if (info == 0) then -!!$ if (a%is_unit()) then -!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) -!!$ else -!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) -!!$ end if -!!$ end if -!!$ if ((info == 0) )then -!!$ if ((info == 0).and.a%is_upper()) then -!!$ info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) -!!$ else -!!$ info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) -!!$ end if -!!$ end if -!!$ end if if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) endif diff --git a/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 index cf052e13..246e780d 100644 --- a/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 @@ -227,7 +227,7 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) endif -#else +#elif 0 if (a%is_unit()) then ! @@ -308,7 +308,66 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) !!$ if ((info == 0) .and. a%is_triangle()) then !!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) !!$ end if - + +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + if ((info == 0) .and. a%is_triangle()) then + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = sone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = sone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + #endif call a%set_sync() diff --git a/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 index f7e65627..41c96f68 100644 --- a/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 @@ -227,7 +227,7 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) endif -#else +#elif 0 if (a%is_unit()) then ! @@ -308,7 +308,66 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) !!$ if ((info == 0) .and. a%is_triangle()) then !!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) !!$ end if - + +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + if ((info == 0) .and. a%is_triangle()) then + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = zone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = zone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + #endif call a%set_sync() diff --git a/cuda/s_cusparse_mod.F90 b/cuda/s_cusparse_mod.F90 index ab322129..a0214110 100644 --- a/cuda/s_cusparse_mod.F90 +++ b/cuda/s_cusparse_mod.F90 @@ -97,6 +97,7 @@ module s_cusparse_mod end function s_CSRGDeviceSetMatIndexBase end interface +#if CUDA_SHORT_VERSION <= 10 interface CSRGDeviceCsrsmAnalysis function s_CSRGDeviceCsrsmAnalysis(Mat) & & bind(c,name="s_CSRGDeviceCsrsmAnalysis") result(res) @@ -106,7 +107,18 @@ module s_cusparse_mod integer(c_int) :: res end function s_CSRGDeviceCsrsmAnalysis end interface - +#else + interface CSRGIsNullSvBuffer + function s_CSRGIsNullSvBuffer(Mat) & + & bind(c,name="s_CSRGIsNullSvBuffer") result(res) + use iso_c_binding + import s_Cmat + type(s_Cmat) :: Mat + integer(c_int) :: res + end function s_CSRGIsNullSvBuffer + end interface +#endif + interface CSRGDeviceAlloc function s_CSRGDeviceAlloc(Mat,nr,nc,nz) & & bind(c,name="s_CSRGDeviceAlloc") result(res) diff --git a/cuda/scusparse.c b/cuda/scusparse.c index 2ad2e2dc..d4db9b7c 100644 --- a/cuda/scusparse.c +++ b/cuda/scusparse.c @@ -38,8 +38,9 @@ #include "cintrf.h" #include "fcusparse.h" -/* Single precision real */ -#define TYPE float + +/* Double precision real */ +#define TYPE float #define CUSPARSE_BASE_TYPE CUDA_R_32F #define T_CSRGDeviceMat s_CSRGDeviceMat #define T_Cmat s_Cmat @@ -60,7 +61,6 @@ #define cusparseTcsrmv cusparseScsrmv #define cusparseTcsrsv_solve cusparseScsrsv_solve #define cusparseTcsrsv_analysis cusparseScsrsv_analysis - #define T_HYBGDeviceMat s_HYBGDeviceMat #define T_Hmat s_Hmat #define T_HYBGDeviceFree s_HYBGDeviceFree @@ -78,7 +78,6 @@ #define cusparseThybsv_analysis cusparseShybsv_analysis #define cusparseTcsr2hyb cusparseScsr2hyb - #elif CUDA_VERSION < 11030 #define T_CSRGDeviceSetMatType s_CSRGDeviceSetMatType @@ -87,6 +86,13 @@ #define cusparseTcsrsv2_bufferSize cusparseScsrsv2_bufferSize #define cusparseTcsrsv2_analysis cusparseScsrsv2_analysis #define cusparseTcsrsv2_solve cusparseScsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer s_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr s_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr s_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr s_CSRGCreateSpMVDescr + #endif #include "fcusparse_fct.h" diff --git a/cuda/z_cusparse_mod.F90 b/cuda/z_cusparse_mod.F90 index c3f21c0c..91a6fcbd 100644 --- a/cuda/z_cusparse_mod.F90 +++ b/cuda/z_cusparse_mod.F90 @@ -97,6 +97,7 @@ module z_cusparse_mod end function z_CSRGDeviceSetMatIndexBase end interface +#if CUDA_SHORT_VERSION <= 10 interface CSRGDeviceCsrsmAnalysis function z_CSRGDeviceCsrsmAnalysis(Mat) & & bind(c,name="z_CSRGDeviceCsrsmAnalysis") result(res) @@ -106,7 +107,18 @@ module z_cusparse_mod integer(c_int) :: res end function z_CSRGDeviceCsrsmAnalysis end interface - +#else + interface CSRGIsNullSvBuffer + function z_CSRGIsNullSvBuffer(Mat) & + & bind(c,name="z_CSRGIsNullSvBuffer") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int) :: res + end function z_CSRGIsNullSvBuffer + end interface +#endif + interface CSRGDeviceAlloc function z_CSRGDeviceAlloc(Mat,nr,nc,nz) & & bind(c,name="z_CSRGDeviceAlloc") result(res) diff --git a/cuda/zcusparse.c b/cuda/zcusparse.c index 050c0ccd..a70a6573 100644 --- a/cuda/zcusparse.c +++ b/cuda/zcusparse.c @@ -38,8 +38,9 @@ #include "cintrf.h" #include "fcusparse.h" -/* Double precision complex */ -#define TYPE double complex + +/* Double precision real */ +#define TYPE double complex #define CUSPARSE_BASE_TYPE CUDA_C_64F #define T_CSRGDeviceMat z_CSRGDeviceMat #define T_Cmat z_Cmat @@ -85,8 +86,14 @@ #define cusparseTcsrsv2_bufferSize cusparseZcsrsv2_bufferSize #define cusparseTcsrsv2_analysis cusparseZcsrsv2_analysis #define cusparseTcsrsv2_solve cusparseZcsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer z_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr z_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr z_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr z_CSRGCreateSpMVDescr + #endif #include "fcusparse_fct.h" - From 62db7c0449f2046fc9a7cd890f123a8a71ef7336 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Fri, 15 Dec 2023 04:11:01 -0500 Subject: [PATCH 025/116] Fix spsv with CSRG handling of descriptors. --- cuda/fcusparse_fct.h | 59 +++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h index 578c8b51..85473c99 100644 --- a/cuda/fcusparse_fct.h +++ b/cuda/fcusparse_fct.h @@ -263,42 +263,45 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, vY=y->v_; CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) ); CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) ); + if (T_CSRGIsNullMvDescr(cMat)) { + cMat->spmvDescr = (cusparseSpMatDescr_t *) malloc(sizeof(cusparseSpMatDescr_t *)); + } + T_CSRGCreateSpMVDescr(cMat); // fprintf(stderr,"Entry to SpSVDevice: %d %p\n", // T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr); if (T_CSRGIsNullSvDescr(cMat)) { cMat->spsvDescr=(cusparseSpSVDescr_t *) malloc(sizeof(cusparseSpSVDescr_t *)); cMat->svbsize=0; CHECK_CUSPARSE( cusparseSpSV_createDescr(cMat->spsvDescr) ); - } - //fprintf(stderr,"Entry to SpSVDevice: %d %p %d\n", - // T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr,cMat->svbsize); - CHECK_CUSPARSE(cusparseSpSV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, - &alpha,*(cMat->spmvDescr),vecX,vecY, - CUSPARSE_BASE_TYPE, - CUSPARSE_SPSV_ALG_DEFAULT, - *(cMat->spsvDescr), - &bfsz)); - if (bfsz > cMat->svbsize) { - if (cMat->svbuffer != NULL) { - CHECK_CUDA(cudaFree(cMat->svbuffer)); - cMat->svbuffer = NULL; + //fprintf(stderr,"Entry to SpSVDevice: %d %p %d\n", + // T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr,cMat->svbsize); + CHECK_CUSPARSE(cusparseSpSV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha,*(cMat->spmvDescr),vecX,vecY, + CUSPARSE_BASE_TYPE, + CUSPARSE_SPSV_ALG_DEFAULT, + *(cMat->spsvDescr), + &bfsz)); + if (bfsz > cMat->svbsize) { + if (cMat->svbuffer != NULL) { + CHECK_CUDA(cudaFree(cMat->svbuffer)); + cMat->svbuffer = NULL; + } + CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); + cMat->svbsize=bfsz; + CHECK_CUSPARSE(cusparseSpSV_analysis(*my_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, + *(cMat->spmvDescr), + vecX, vecY, + CUSPARSE_BASE_TYPE, + CUSPARSE_SPSV_ALG_DEFAULT, + *(cMat->spsvDescr), + cMat->svbuffer)); + } + if (T_CSRGIsNullSvBuffer(cMat)) { + fprintf(stderr,"SpSV_SOLVE NULL spsv-buffer\n"); } - CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); - cMat->svbsize=bfsz; - CHECK_CUSPARSE(cusparseSpSV_analysis(*my_handle, - CUSPARSE_OPERATION_NON_TRANSPOSE, - &alpha, - *(cMat->spmvDescr), - vecX, vecY, - CUSPARSE_BASE_TYPE, - CUSPARSE_SPSV_ALG_DEFAULT, - *(cMat->spsvDescr), - cMat->svbuffer)); - } - if (T_CSRGIsNullSvBuffer(cMat)) { - fprintf(stderr,"SpSV_SOLVE NULL spsv-buffer\n"); } - T_CSRGCreateSpMVDescr(cMat); CHECK_CUSPARSE(cusparseSpSV_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha,*(cMat->spmvDescr),vecX,vecY, CUSPARSE_BASE_TYPE, From 1bc2a884e21c2c5a8d96fd5192181fb07a38a518 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 15 Dec 2023 10:52:48 +0100 Subject: [PATCH 026/116] Adjust conditional compilation on CUDA version --- cuda/fcusparse_fct.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h index 85473c99..52df7b78 100644 --- a/cuda/fcusparse_fct.h +++ b/cuda/fcusparse_fct.h @@ -313,7 +313,7 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, #endif } - +#if CUDA_VERSION >= 11030 T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat) { int64_t tr,tc,tz; @@ -330,7 +330,7 @@ T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat) CUSPARSE_INDEX_BASE_ONE, CUSPARSE_BASE_TYPE) ); } - +#endif int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz) { T_CSRGDeviceMat *cMat; From 20a01d4d71df474faea3e5fed5b518c1de8bef9e Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 19 Dec 2023 11:54:42 -0500 Subject: [PATCH 027/116] Attempt at fixing CSRG in CUDA 10.2. Not complete yet. --- cuda/fcusparse_fct.h | 13 +++++++------ cuda/impl/psb_c_cuda_csrg_to_gpu.F90 | 20 ++++++++++---------- cuda/impl/psb_d_cuda_csrg_to_gpu.F90 | 20 ++++++++++---------- cuda/impl/psb_s_cuda_csrg_to_gpu.F90 | 20 ++++++++++---------- cuda/impl/psb_z_cuda_csrg_to_gpu.F90 | 20 ++++++++++---------- 5 files changed, 47 insertions(+), 46 deletions(-) diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h index 52df7b78..06facdc0 100644 --- a/cuda/fcusparse_fct.h +++ b/cuda/fcusparse_fct.h @@ -141,15 +141,16 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, cusparseHandle_t *my_handle=getHandle(); TYPE ealpha=alpha, ebeta=beta; #if CUDA_SHORT_VERSION <= 10 - /*getAddrMultiVecDevice(deviceX, &vX); - getAddrMultiVecDevice(deviceY, &vY); */ + /* getAddrMultiVecDevice(deviceX, &vX); */ + /* getAddrMultiVecDevice(deviceY, &vY); */ vX=x->v_; vY=y->v_; - return cusparseTcsrmv(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, - cMat->m,cMat->n,cMat->nz,(const TYPE *) &alpha,cMat->descr, - cMat->val, cMat->irp, cMat->ja, - (const TYPE *) vX, (const TYPE *) &beta, (TYPE *) vY); + CHECK_CUSPARSE(cusparseTcsrmv(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->n,cMat->nz,(const TYPE *) &alpha,cMat->descr, + cMat->val, cMat->irp, cMat->ja, + (const TYPE *) vX, (const TYPE *) &beta, (TYPE *) vY)); + #elif CUDA_VERSION < 11030 size_t bfsz; vX=x->v_; diff --git a/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 index 8e7d25a9..cc3fbaaf 100644 --- a/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 @@ -55,7 +55,7 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) if (c_associated(a%deviceMat%Mat)) then info = CSRGDeviceFree(a%deviceMat) end if -#if CUDA_SHORT_VERSION <= 10 +#if (CUDA_SHORT_VERSION <= 10 ) if (a%is_unit()) then ! ! CUSPARSE has the habit of storing the diagonal and then ignoring, @@ -74,7 +74,7 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) !!! We are explicitly adding the diagonal !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) if ((info == 0) .and. a%is_triangle()) then - info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else @@ -114,15 +114,15 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) - if (info == 0) then - if (a%is_unit()) then - info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) - else - info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) - end if - end if +!!$ if (info == 0) then +!!$ if (a%is_unit()) then +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) +!!$ else +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) +!!$ end if +!!$ end if if ((info == 0) .and. a%is_triangle()) then - info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else diff --git a/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 index 4ecb0bbc..d7a1b1e7 100644 --- a/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 @@ -55,7 +55,7 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) if (c_associated(a%deviceMat%Mat)) then info = CSRGDeviceFree(a%deviceMat) end if -#if CUDA_SHORT_VERSION <= 10 +#if (CUDA_SHORT_VERSION <= 10 ) if (a%is_unit()) then ! ! CUSPARSE has the habit of storing the diagonal and then ignoring, @@ -74,7 +74,7 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) !!! We are explicitly adding the diagonal !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) if ((info == 0) .and. a%is_triangle()) then - info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else @@ -114,15 +114,15 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) - if (info == 0) then - if (a%is_unit()) then - info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) - else - info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) - end if - end if +!!$ if (info == 0) then +!!$ if (a%is_unit()) then +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) +!!$ else +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) +!!$ end if +!!$ end if if ((info == 0) .and. a%is_triangle()) then - info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else diff --git a/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 index 246e780d..cc5b9c8d 100644 --- a/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 @@ -55,7 +55,7 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) if (c_associated(a%deviceMat%Mat)) then info = CSRGDeviceFree(a%deviceMat) end if -#if CUDA_SHORT_VERSION <= 10 +#if (CUDA_SHORT_VERSION <= 10 ) if (a%is_unit()) then ! ! CUSPARSE has the habit of storing the diagonal and then ignoring, @@ -74,7 +74,7 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) !!! We are explicitly adding the diagonal !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) if ((info == 0) .and. a%is_triangle()) then - info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else @@ -114,15 +114,15 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) - if (info == 0) then - if (a%is_unit()) then - info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) - else - info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) - end if - end if +!!$ if (info == 0) then +!!$ if (a%is_unit()) then +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) +!!$ else +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) +!!$ end if +!!$ end if if ((info == 0) .and. a%is_triangle()) then - info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else diff --git a/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 index 41c96f68..56943f37 100644 --- a/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 +++ b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 @@ -55,7 +55,7 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) if (c_associated(a%deviceMat%Mat)) then info = CSRGDeviceFree(a%deviceMat) end if -#if CUDA_SHORT_VERSION <= 10 +#if (CUDA_SHORT_VERSION <= 10 ) if (a%is_unit()) then ! ! CUSPARSE has the habit of storing the diagonal and then ignoring, @@ -74,7 +74,7 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) !!! We are explicitly adding the diagonal !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) if ((info == 0) .and. a%is_triangle()) then - info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else @@ -114,15 +114,15 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) - if (info == 0) then - if (a%is_unit()) then - info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) - else - info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) - end if - end if +!!$ if (info == 0) then +!!$ if (a%is_unit()) then +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) +!!$ else +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) +!!$ end if +!!$ end if if ((info == 0) .and. a%is_triangle()) then - info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) if ((info == 0).and.a%is_upper()) then info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) else From e9d1238b43c3f1c6b9689920701ead8d65ce8d0f Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 20 Dec 2023 13:30:09 +0100 Subject: [PATCH 028/116] Add detailed measurements. --- base/psblas/psb_dspmm.f90 | 39 ++++++++++++++++++++++++++++--------- test/kernel/pdgenspmv.f90 | 27 +++++++++++++++---------- test/pargen/psb_d_pde3d.F90 | 4 ++-- test/pargen/runs/ppde.inp | 2 +- 4 files changed, 50 insertions(+), 22 deletions(-) diff --git a/base/psblas/psb_dspmm.f90 b/base/psblas/psb_dspmm.f90 index 7888188a..780b4d24 100644 --- a/base/psblas/psb_dspmm.f90 +++ b/base/psblas/psb_dspmm.f90 @@ -83,6 +83,9 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& character(len=20) :: name, ch_err logical :: aliw, doswap_ integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: do_timings=.true. + integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1 + integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1 name='psb_dspmv' info=psb_success_ @@ -130,6 +133,19 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& call psb_errpush(info,name) goto 9999 end if + if ((do_timings).and.(mv_phase1==-1)) & + & mv_phase1 = psb_get_timer_idx("SPMM: and send ") + if ((do_timings).and.(mv_phase2==-1)) & + & mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad") + if ((do_timings).and.(mv_phase3==-1)) & + & mv_phase3 = psb_get_timer_idx("SPMM: and rcv") + if ((do_timings).and.(mv_phase4==-1)) & + & mv_phase4 = psb_get_timer_idx("SPMM: and cmp and") + if ((do_timings).and.(mv_phase11==-1)) & + & mv_phase11 = psb_get_timer_idx("SPMM: noand exch ") + if ((do_timings).and.(mv_phase12==-1)) & + & mv_phase12 = psb_get_timer_idx("SPMM: noand cmp") + m = desc_a%get_global_rows() n = desc_a%get_global_cols() @@ -184,18 +200,22 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& logical, parameter :: do_timings=.true. real(psb_dpk_) :: t1, t2, t3, t4, t5 if (do_timings) call psb_barrier(ctxt) - if (do_timings) t1= psb_wtime() + if (do_timings) call psb_tic(mv_phase1) if (doswap_) call psi_swapdata(psb_swap_send_,& & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - if (do_timings) t2= psb_wtime() + if (do_timings) call psb_toc(mv_phase1) + if (do_timings) call psb_tic(mv_phase2) call a%ad%spmm(alpha,x%v,beta,y%v,info) - if (do_timings) t3= psb_wtime() + if (do_timings) call psb_toc(mv_phase2) + if (do_timings) call psb_tic(mv_phase3) if (doswap_) call psi_swapdata(psb_swap_recv_,& & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase3) + if (do_timings) call psb_tic(mv_phase4) if (do_timings) t4= psb_wtime() call a%and%spmm(alpha,x%v,done,y%v,info) - if (do_timings) t5= psb_wtime() - if (do_timings) write(0,*) me,' SPMM:',t2-t1,t3-t2,t4-t3,t5-t4 + if (do_timings) call psb_toc(mv_phase4) + end block else @@ -203,15 +223,16 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& logical, parameter :: do_timings=.true. real(psb_dpk_) :: t1, t2, t3, t4, t5 if (do_timings) call psb_barrier(ctxt) - if (do_timings) t1= psb_wtime() + + if (do_timings) call psb_tic(mv_phase11) if (doswap_) then call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) end if - if (do_timings) t2= psb_wtime() + if (do_timings) call psb_toc(mv_phase11) + if (do_timings) call psb_tic(mv_phase12) call psb_csmm(alpha,a,x,beta,y,info) - if (do_timings) t3= psb_wtime() - if (do_timings) write(0,*) me,' SPMM:',t2-t1,t3-t2 + if (do_timings) call psb_toc(mv_phase12) end block end if diff --git a/test/kernel/pdgenspmv.f90 b/test/kernel/pdgenspmv.f90 index d5fd9ba4..b7204edd 100644 --- a/test/kernel/pdgenspmv.f90 +++ b/test/kernel/pdgenspmv.f90 @@ -142,7 +142,7 @@ contains ! the rhs. ! subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,& - & f,amold,vmold,imold,partition,nrl,iv) + & f,amold,vmold,imold,partition,nrl,iv,tnd) use psb_base_mod use psb_util_mod ! @@ -173,7 +173,7 @@ contains class(psb_d_base_vect_type), optional :: vmold class(psb_i_base_vect_type), optional :: imold integer(psb_ipk_), optional :: partition, nrl,iv(:) - + logical, optional :: tnd ! Local variables. integer(psb_ipk_), parameter :: nb=20 @@ -202,6 +202,7 @@ contains real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb integer(psb_ipk_) :: err_act procedure(d_func_3d), pointer :: f_ + logical :: tnd_ character(len=20) :: name, ch_err,tmpfmt info = psb_success_ @@ -495,9 +496,9 @@ contains t1 = psb_wtime() if (info == psb_success_) then if (present(amold)) then - call psb_spasb(a,desc_a,info,mold=amold) + call psb_spasb(a,desc_a,info,mold=amold,bld_and=tnd) else - call psb_spasb(a,desc_a,info,afmt=afmt) + call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=tnd) end if end if call psb_barrier(ctxt) @@ -549,13 +550,14 @@ program pdgenspmv use psb_base_mod use psb_util_mod use psb_d_pde3d_mod + implicit none ! input parameters character(len=20) :: kmethd, ptype character(len=5) :: afmt integer(psb_ipk_) :: idim - + logical :: tnd ! miscellaneous real(psb_dpk_), parameter :: one = done real(psb_dpk_) :: t1, t2, tprec, flops, tflops, tt1, tt2, bdwdth @@ -606,14 +608,14 @@ program pdgenspmv ! ! get parameters ! - call get_parms(ctxt,afmt,idim) - + call get_parms(ctxt,afmt,idim,tnd) + call psb_init_timers() ! ! allocate and fill in the coefficient matrix, rhs and initial guess ! call psb_barrier(ctxt) t1 = psb_wtime() - call psb_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info) + call psb_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,tnd=tnd) call psb_barrier(ctxt) t2 = psb_wtime() - t1 if(info /= psb_success_) then @@ -694,7 +696,7 @@ program pdgenspmv write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize end if - + call psb_print_timers(ctxt) ! ! cleanup storage and exit @@ -721,10 +723,11 @@ contains ! ! get iteration parameters from standard input ! - subroutine get_parms(ctxt,afmt,idim) + subroutine get_parms(ctxt,afmt,idim,tnd) type(psb_ctxt_type) :: ctxt character(len=*) :: afmt integer(psb_ipk_) :: idim + logical :: tnd integer(psb_ipk_) :: np, iam integer(psb_ipk_) :: intbuf(10), ip @@ -733,9 +736,11 @@ contains if (iam == 0) then read(psb_inp_unit,*) afmt read(psb_inp_unit,*) idim + read(psb_inp_unit,*) tnd endif call psb_bcast(ctxt,afmt) call psb_bcast(ctxt,idim) + call psb_bcast(ctxt,tnd) if (iam == 0) then write(psb_out_unit,'("Testing matrix : ell1")') @@ -743,6 +748,8 @@ contains write(psb_out_unit,'("Number of processors : ",i0)')np write(psb_out_unit,'("Data distribution : BLOCK")') write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Storage format ",a)') afmt + write(psb_out_unit,'("Testing overlap ND ",l8)') tnd end if return diff --git a/test/pargen/psb_d_pde3d.F90 b/test/pargen/psb_d_pde3d.F90 index 62bb8b40..4748569c 100644 --- a/test/pargen/psb_d_pde3d.F90 +++ b/test/pargen/psb_d_pde3d.F90 @@ -868,8 +868,8 @@ program psb_d_pde3d call psb_errpush(info,name,a_err=ch_err) goto 9999 end if - - call psb_exit(ctxt) + call psb_print_timers(ctxt) + call psb_exit(ctxt) stop 9999 call psb_error(ctxt) diff --git a/test/pargen/runs/ppde.inp b/test/pargen/runs/ppde.inp index cf7179ac..470bcf58 100644 --- a/test/pargen/runs/ppde.inp +++ b/test/pargen/runs/ppde.inp @@ -5,7 +5,7 @@ CSR Storage format for matrix A: CSR COO 200 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) 3 Partition: 1 BLOCK 3 3D 2 Stopping criterion 1 2 -0008 MAXIT +0200 MAXIT 10 ITRACE 002 IRST restart for RGMRES and BiCGSTABL INVK Block Solver ILU,ILUT,INVK,AINVT,AORTH From be7571f56868362159e9ad6ea459434009fae794 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 20 Dec 2023 13:45:43 +0100 Subject: [PATCH 029/116] Fix missing directive --- base/serial/impl/psb_c_csr_impl.F90 | 1 + base/serial/impl/psb_d_csr_impl.F90 | 1 + base/serial/impl/psb_s_csr_impl.F90 | 1 + base/serial/impl/psb_z_csr_impl.F90 | 1 + 4 files changed, 4 insertions(+) diff --git a/base/serial/impl/psb_c_csr_impl.F90 b/base/serial/impl/psb_c_csr_impl.F90 index f6426f49..6c21f639 100644 --- a/base/serial/impl/psb_c_csr_impl.F90 +++ b/base/serial/impl/psb_c_csr_impl.F90 @@ -4310,6 +4310,7 @@ contains end subroutine csr_spspmm end subroutine psb_ccsrspspmm +#endif subroutine psb_c_ecsr_mold(a,b,info) use psb_c_csr_mat_mod, psb_protect_name => psb_c_ecsr_mold diff --git a/base/serial/impl/psb_d_csr_impl.F90 b/base/serial/impl/psb_d_csr_impl.F90 index 40d97bc0..9f1d509c 100644 --- a/base/serial/impl/psb_d_csr_impl.F90 +++ b/base/serial/impl/psb_d_csr_impl.F90 @@ -4310,6 +4310,7 @@ contains end subroutine csr_spspmm end subroutine psb_dcsrspspmm +#endif subroutine psb_d_ecsr_mold(a,b,info) use psb_d_csr_mat_mod, psb_protect_name => psb_d_ecsr_mold diff --git a/base/serial/impl/psb_s_csr_impl.F90 b/base/serial/impl/psb_s_csr_impl.F90 index abce0086..a4e1ab82 100644 --- a/base/serial/impl/psb_s_csr_impl.F90 +++ b/base/serial/impl/psb_s_csr_impl.F90 @@ -4310,6 +4310,7 @@ contains end subroutine csr_spspmm end subroutine psb_scsrspspmm +#endif subroutine psb_s_ecsr_mold(a,b,info) use psb_s_csr_mat_mod, psb_protect_name => psb_s_ecsr_mold diff --git a/base/serial/impl/psb_z_csr_impl.F90 b/base/serial/impl/psb_z_csr_impl.F90 index b550e8f1..28ac121e 100644 --- a/base/serial/impl/psb_z_csr_impl.F90 +++ b/base/serial/impl/psb_z_csr_impl.F90 @@ -4310,6 +4310,7 @@ contains end subroutine csr_spspmm end subroutine psb_zcsrspspmm +#endif subroutine psb_z_ecsr_mold(a,b,info) use psb_z_csr_mat_mod, psb_protect_name => psb_z_ecsr_mold From 49e99a3e82a49d78726bc0b82eda0e07b5eb80db Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 22 Dec 2023 12:01:41 +0100 Subject: [PATCH 030/116] Fix conversion and product to enable overlap with GPU --- base/psblas/psb_dspmm.f90 | 1 + base/serial/impl/psb_d_mat_impl.F90 | 313 ++++++++++++++++++++-------- 2 files changed, 231 insertions(+), 83 deletions(-) diff --git a/base/psblas/psb_dspmm.f90 b/base/psblas/psb_dspmm.f90 index 780b4d24..8e48c4c2 100644 --- a/base/psblas/psb_dspmm.f90 +++ b/base/psblas/psb_dspmm.f90 @@ -199,6 +199,7 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& block logical, parameter :: do_timings=.true. real(psb_dpk_) :: t1, t2, t3, t4, t5 + !write(0,*) 'Going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt() if (do_timings) call psb_barrier(ctxt) if (do_timings) call psb_tic(mv_phase1) if (doswap_) call psi_swapdata(psb_swap_send_,& diff --git a/base/serial/impl/psb_d_mat_impl.F90 b/base/serial/impl/psb_d_mat_impl.F90 index 2a6fb9a5..caf725d1 100644 --- a/base/serial/impl/psb_d_mat_impl.F90 +++ b/base/serial/impl/psb_d_mat_impl.F90 @@ -1246,54 +1246,66 @@ subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl) goto 9999 end if - if (present(mold)) then - - allocate(altmp, mold=mold,stat=info) - - else if (present(type)) then - - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_d_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_d_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_d_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) - goto 9999 - end select - else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) +!!$ if (present(mold)) then +!!$ +!!$ allocate(altmp, mold=mold,stat=info) +!!$ +!!$ else if (present(type)) then +!!$ +!!$ select case (psb_toupper(type)) +!!$ case ('CSR') +!!$ allocate(psb_d_csr_sparse_mat :: altmp, stat=info) +!!$ case ('COO') +!!$ allocate(psb_d_coo_sparse_mat :: altmp, stat=info) +!!$ case ('CSC') +!!$ allocate(psb_d_csc_sparse_mat :: altmp, stat=info) +!!$ case default +!!$ info = psb_err_format_unknown_ +!!$ call psb_errpush(info,name,a_err=type) +!!$ goto 9999 +!!$ end select +!!$ else +!!$ allocate(altmp, mold=psb_get_mat_default(a),stat=info) +!!$ end if +!!$ +!!$ if (info /= psb_success_) then +!!$ info = psb_err_alloc_dealloc_ +!!$ call psb_errpush(info,name) +!!$ goto 9999 +!!$ end if +!!$ +!!$ +!!$ if (present(dupl)) then +!!$ call altmp%set_dupl(dupl) +!!$ else if (a%is_bld()) then +!!$ ! Does this make sense at all?? Who knows.. +!!$ call altmp%set_dupl(psb_dupl_def_) +!!$ end if +!!$ +!!$ if (debug) write(psb_err_unit,*) 'Converting from ',& +!!$ & a%get_fmt(),' to ',altmp%get_fmt() +!!$ +!!$ call altmp%cp_from_fmt(a%a, info) +!!$ +!!$ if (info /= psb_success_) then +!!$ info = psb_err_from_subroutine_ +!!$ call psb_errpush(info,name,a_err="mv_from") +!!$ goto 9999 +!!$ end if +!!$ +!!$ call move_alloc(altmp,b%a) + call inner_cp_alloc(a%a,b%a,info,type,mold) + if (info /= 0) goto 9999 + if (allocated(a%ad)) then + call inner_cp_alloc(a%ad,b%ad,info,type,mold) + if (info /= 0) goto 9999 end if - - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if - - - if (present(dupl)) then - call altmp%set_dupl(dupl) - else if (a%is_bld()) then - ! Does this make sense at all?? Who knows.. - call altmp%set_dupl(psb_dupl_def_) + if (allocated(a%and)) then + call inner_cp_alloc(a%and,b%and,info,type,mold) + if (info /= 0) goto 9999 end if - if (debug) write(psb_err_unit,*) 'Converting from ',& - & a%get_fmt(),' to ',altmp%get_fmt() - - call altmp%cp_from_fmt(a%a, info) - - if (info /= psb_success_) then - info = psb_err_from_subroutine_ - call psb_errpush(info,name,a_err="mv_from") - goto 9999 - end if - call move_alloc(altmp,b%a) call b%trim() call b%set_asb() call psb_erractionrestore(err_act) @@ -1303,6 +1315,69 @@ subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_cp_alloc(a,b,info,type,mold) + class(psb_d_base_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + character(len=*), optional, intent(in) :: type + class(psb_d_base_sparse_mat), intent(in), optional :: mold + + class(psb_d_base_sparse_mat), allocatable :: altmp + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_d_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_d_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else +!!$ allocate(altmp, mold=psb_get_mat_default(a),stat=info) + allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + if (debug) write(psb_err_unit,*) 'Converting in-place from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%cp_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,b) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_cp_alloc end subroutine psb_d_cscnv @@ -1345,46 +1420,57 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl) goto 9999 end if - if (present(mold)) then - - allocate(altmp, mold=mold,stat=info) - - else if (present(type)) then - - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_d_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_d_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_d_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) - goto 9999 - end select - else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) +!!$ if (present(mold)) then +!!$ +!!$ allocate(altmp, mold=mold,stat=info) +!!$ +!!$ else if (present(type)) then +!!$ +!!$ select case (psb_toupper(type)) +!!$ case ('CSR') +!!$ allocate(psb_d_csr_sparse_mat :: altmp, stat=info) +!!$ case ('COO') +!!$ allocate(psb_d_coo_sparse_mat :: altmp, stat=info) +!!$ case ('CSC') +!!$ allocate(psb_d_csc_sparse_mat :: altmp, stat=info) +!!$ case default +!!$ info = psb_err_format_unknown_ +!!$ call psb_errpush(info,name,a_err=type) +!!$ goto 9999 +!!$ end select +!!$ else +!!$ allocate(altmp, mold=psb_get_mat_default(a),stat=info) +!!$ end if +!!$ +!!$ if (info /= psb_success_) then +!!$ info = psb_err_alloc_dealloc_ +!!$ call psb_errpush(info,name) +!!$ goto 9999 +!!$ end if +!!$ +!!$ if (debug) write(psb_err_unit,*) 'Converting in-place from ',& +!!$ & a%get_fmt(),' to ',altmp%get_fmt() +!!$ +!!$ call altmp%mv_from_fmt(a%a, info) +!!$ +!!$ if (info /= psb_success_) then +!!$ info = psb_err_from_subroutine_ +!!$ call psb_errpush(info,name,a_err="mv_from") +!!$ goto 9999 +!!$ end if +!!$ +!!$ call move_alloc(altmp,a%a) + + call inner_mv_alloc(a%a,info,type,mold) + if (info /= 0) goto 9999 + if (allocated(a%ad)) then + call inner_mv_alloc(a%ad,info,type,mold) + if (info /= 0) goto 9999 end if - - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 + if (allocated(a%and)) then + call inner_mv_alloc(a%and,info,type,mold) + if (info /= 0) goto 9999 end if - - if (debug) write(psb_err_unit,*) 'Converting in-place from ',& - & a%get_fmt(),' to ',altmp%get_fmt() - - call altmp%mv_from_fmt(a%a, info) - - if (info /= psb_success_) then - info = psb_err_from_subroutine_ - call psb_errpush(info,name,a_err="mv_from") - goto 9999 - end if - - call move_alloc(altmp,a%a) call a%trim() call a%set_asb() call psb_erractionrestore(err_act) @@ -1394,7 +1480,68 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl) 9999 call psb_error_handler(err_act) return - +contains + subroutine inner_mv_alloc(a,info,type,mold) + class(psb_d_base_sparse_mat), intent(inout), allocatable :: a + integer(psb_ipk_), intent(out) :: info + character(len=*), optional, intent(in) :: type + class(psb_d_base_sparse_mat), intent(in), optional :: mold + + class(psb_d_base_sparse_mat), allocatable :: altmp + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_d_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_d_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else +!!$ allocate(altmp, mold=psb_get_mat_default(a),stat=info) + allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + if (debug) write(psb_err_unit,*) 'Converting in-place from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%mv_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,a) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_mv_alloc end subroutine psb_d_cscnv_ip From 4d051c777d52eea024c1e6da36483b98e650ba77 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 22 Dec 2023 12:01:59 +0100 Subject: [PATCH 031/116] Fix makefile and test program --- test/cudakern/Makefile | 5 ++++- test/cudakern/dpdegenmv.F90 | 38 +++++++++++++++++++++++++++---------- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/test/cudakern/Makefile b/test/cudakern/Makefile index fdd4f588..5d938973 100755 --- a/test/cudakern/Makefile +++ b/test/cudakern/Makefile @@ -24,9 +24,12 @@ DPGOBJS=dpdegenmv.o SPGOBJS=spdegenmv.o EXEDIR=./runs -all: pgen file +all: dir pgen file pgen: dpdegenmv spdegenmv file: s_file_spmv c_file_spmv d_file_spmv z_file_spmv +dpdegenmv spdegenmv s_file_spmv c_file_spmv d_file_spmv z_file_spmv: dir +dir: + (if test ! -d $(EXEDIR); then mkdir $(EXEDIR); fi) dpdegenmv: $(DPGOBJS) $(FLINK) $(LOPT) $(DPGOBJS) -fopenmp -o dpdegenmv $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS) diff --git a/test/cudakern/dpdegenmv.F90 b/test/cudakern/dpdegenmv.F90 index db845d71..85059e81 100644 --- a/test/cudakern/dpdegenmv.F90 +++ b/test/cudakern/dpdegenmv.F90 @@ -70,6 +70,16 @@ contains ! ! functions parametrizing the differential equation ! + + ! + ! Note: b1, b2 and b3 are the coefficients of the first + ! derivative of the unknown function. The default + ! we apply here is to have them zero, so that the resulting + ! matrix is symmetric/hermitian and suitable for + ! testing with CG and FCG. + ! When testing methods for non-hermitian matrices you can + ! change the B1/B2/B3 functions to e.g. done/sqrt((3*done)) + ! function b1(x,y,z) use psb_base_mod, only : psb_dpk_, done, dzero implicit none @@ -138,7 +148,7 @@ contains ! the rhs. ! subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,& - & f,amold,vmold,imold,partition,nrl,iv) + & f,amold,vmold,imold,partition,nrl,iv,tnd) use psb_base_mod use psb_util_mod ! @@ -169,7 +179,7 @@ contains class(psb_d_base_vect_type), optional :: vmold class(psb_i_base_vect_type), optional :: imold integer(psb_ipk_), optional :: partition, nrl,iv(:) - + logical, optional :: tnd ! Local variables. integer(psb_ipk_), parameter :: nb=20 @@ -198,6 +208,7 @@ contains real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb integer(psb_ipk_) :: err_act procedure(d_func_3d), pointer :: f_ + logical :: tnd_ character(len=20) :: name, ch_err,tmpfmt info = psb_success_ @@ -492,9 +503,9 @@ contains t1 = psb_wtime() if (info == psb_success_) then if (present(amold)) then - call psb_spasb(a,desc_a,info,mold=amold) + call psb_spasb(a,desc_a,info,mold=amold,bld_and=tnd) else - call psb_spasb(a,desc_a,info,afmt=afmt) + call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=tnd) end if end if call psb_barrier(ctxt) @@ -559,7 +570,7 @@ program pdgenmv ! input parameters character(len=5) :: acfmt, agfmt integer :: idim - + logical :: tnd ! miscellaneous real(psb_dpk_), parameter :: one = 1.d0 real(psb_dpk_) :: t1, t2, tprec, flops, tflops,& @@ -646,14 +657,14 @@ program pdgenmv ! ! get parameters ! - call get_parms(ctxt,acfmt,agfmt,idim) - + call get_parms(ctxt,acfmt,agfmt,idim,tnd) + call psb_init_timers() ! ! allocate and fill in the coefficient matrix and initial vectors ! call psb_barrier(ctxt) t1 = psb_wtime() - call psb_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,'CSR ',info,partition=3) + call psb_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,'CSR ',info,partition=3,tnd=tnd) call psb_barrier(ctxt) t2 = psb_wtime() - t1 if(info /= psb_success_) then @@ -935,6 +946,7 @@ program pdgenmv write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize end if + call psb_print_timers(ctxt) ! ! cleanup storage and exit @@ -962,10 +974,11 @@ contains ! ! get iteration parameters from standard input ! - subroutine get_parms(ctxt,acfmt,agfmt,idim) + subroutine get_parms(ctxt,acfmt,agfmt,idim,tnd) type(psb_ctxt_type) :: ctxt character(len=*) :: agfmt, acfmt integer :: idim + logical :: tnd integer :: np, iam integer :: intbuf(10), ip @@ -978,17 +991,22 @@ contains read(psb_inp_unit,*) agfmt write(*,*) 'Size of discretization cube?' read(psb_inp_unit,*) idim + write(*,*) 'Try comm/comp overlap?' + read(psb_inp_unit,*) tnd endif call psb_bcast(ctxt,acfmt) call psb_bcast(ctxt,agfmt) call psb_bcast(ctxt,idim) - + call psb_bcast(ctxt,tnd) + if (iam == 0) then write(psb_out_unit,'("Testing matrix : ell1")') write(psb_out_unit,'("Grid dimensions : ",i4,"x",i4,"x",i4)')idim,idim,idim write(psb_out_unit,'("Number of processors : ",i0)')np write(psb_out_unit,'("Data distribution : BLOCK")') write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Storage formats ",a)') acfmt,' ',agfmt + write(psb_out_unit,'("Testing overlap ND ",l8)') tnd end if return From 3aa3c795e98fc2a8a4dc9f8ee3aa36e74f13fef8 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Sat, 23 Dec 2023 13:15:01 +0100 Subject: [PATCH 032/116] Refactor assembly and cnv --- base/modules/serial/psb_c_mat_mod.F90 | 15 +- base/modules/serial/psb_d_mat_mod.F90 | 15 +- base/modules/serial/psb_s_mat_mod.F90 | 15 +- base/modules/serial/psb_z_mat_mod.F90 | 15 +- base/psblas/psb_cspmm.f90 | 74 ++++-- base/psblas/psb_dspmm.f90 | 71 +++-- base/psblas/psb_sspmm.f90 | 74 ++++-- base/psblas/psb_zspmm.f90 | 74 ++++-- base/serial/impl/psb_c_mat_impl.F90 | 356 ++++++++++++++++++++----- base/serial/impl/psb_d_mat_impl.F90 | 369 +++++++++++++++----------- base/serial/impl/psb_s_mat_impl.F90 | 356 ++++++++++++++++++++----- base/serial/impl/psb_z_mat_impl.F90 | 356 ++++++++++++++++++++----- base/tools/psb_cspasb.f90 | 73 ++--- base/tools/psb_dspasb.f90 | 73 ++--- base/tools/psb_sspasb.f90 | 73 ++--- base/tools/psb_zspasb.f90 | 73 ++--- 16 files changed, 1457 insertions(+), 625 deletions(-) diff --git a/base/modules/serial/psb_c_mat_mod.F90 b/base/modules/serial/psb_c_mat_mod.F90 index aa891381..ee819535 100644 --- a/base/modules/serial/psb_c_mat_mod.F90 +++ b/base/modules/serial/psb_c_mat_mod.F90 @@ -204,6 +204,7 @@ module psb_c_mat_mod procedure, pass(a) :: cscnv_ip => psb_c_cscnv_ip procedure, pass(a) :: cscnv_base => psb_c_cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base + procedure, pass(a) :: split_nd => psb_c_split_nd procedure, pass(a) :: clone => psb_cspmat_clone procedure, pass(a) :: move_alloc => psb_cspmat_type_move ! @@ -842,6 +843,18 @@ module psb_c_mat_mod ! ! + interface + subroutine psb_c_split_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat + class(psb_cspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_c_base_sparse_mat), intent(in), optional :: mold + end subroutine psb_c_split_nd + end interface + ! ! CSCNV: switches to a different internal derived type. ! 3 versions: copying to target @@ -861,7 +874,6 @@ module psb_c_mat_mod end subroutine psb_c_cscnv end interface - interface subroutine psb_c_cscnv_ip(a,iinfo,type,mold,dupl) import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat @@ -873,7 +885,6 @@ module psb_c_mat_mod end subroutine psb_c_cscnv_ip end interface - interface subroutine psb_c_cscnv_base(a,b,info,dupl) import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat diff --git a/base/modules/serial/psb_d_mat_mod.F90 b/base/modules/serial/psb_d_mat_mod.F90 index c647e76b..82d2e822 100644 --- a/base/modules/serial/psb_d_mat_mod.F90 +++ b/base/modules/serial/psb_d_mat_mod.F90 @@ -204,6 +204,7 @@ module psb_d_mat_mod procedure, pass(a) :: cscnv_ip => psb_d_cscnv_ip procedure, pass(a) :: cscnv_base => psb_d_cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base + procedure, pass(a) :: split_nd => psb_d_split_nd procedure, pass(a) :: clone => psb_dspmat_clone procedure, pass(a) :: move_alloc => psb_dspmat_type_move ! @@ -842,6 +843,18 @@ module psb_d_mat_mod ! ! + interface + subroutine psb_d_split_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat + class(psb_dspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_d_base_sparse_mat), intent(in), optional :: mold + end subroutine psb_d_split_nd + end interface + ! ! CSCNV: switches to a different internal derived type. ! 3 versions: copying to target @@ -861,7 +874,6 @@ module psb_d_mat_mod end subroutine psb_d_cscnv end interface - interface subroutine psb_d_cscnv_ip(a,iinfo,type,mold,dupl) import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat @@ -873,7 +885,6 @@ module psb_d_mat_mod end subroutine psb_d_cscnv_ip end interface - interface subroutine psb_d_cscnv_base(a,b,info,dupl) import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat diff --git a/base/modules/serial/psb_s_mat_mod.F90 b/base/modules/serial/psb_s_mat_mod.F90 index 3e6b286a..d8a2e6ae 100644 --- a/base/modules/serial/psb_s_mat_mod.F90 +++ b/base/modules/serial/psb_s_mat_mod.F90 @@ -204,6 +204,7 @@ module psb_s_mat_mod procedure, pass(a) :: cscnv_ip => psb_s_cscnv_ip procedure, pass(a) :: cscnv_base => psb_s_cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base + procedure, pass(a) :: split_nd => psb_s_split_nd procedure, pass(a) :: clone => psb_sspmat_clone procedure, pass(a) :: move_alloc => psb_sspmat_type_move ! @@ -842,6 +843,18 @@ module psb_s_mat_mod ! ! + interface + subroutine psb_s_split_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat + class(psb_sspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_s_base_sparse_mat), intent(in), optional :: mold + end subroutine psb_s_split_nd + end interface + ! ! CSCNV: switches to a different internal derived type. ! 3 versions: copying to target @@ -861,7 +874,6 @@ module psb_s_mat_mod end subroutine psb_s_cscnv end interface - interface subroutine psb_s_cscnv_ip(a,iinfo,type,mold,dupl) import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat @@ -873,7 +885,6 @@ module psb_s_mat_mod end subroutine psb_s_cscnv_ip end interface - interface subroutine psb_s_cscnv_base(a,b,info,dupl) import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat diff --git a/base/modules/serial/psb_z_mat_mod.F90 b/base/modules/serial/psb_z_mat_mod.F90 index 148e9ab9..694d4efc 100644 --- a/base/modules/serial/psb_z_mat_mod.F90 +++ b/base/modules/serial/psb_z_mat_mod.F90 @@ -204,6 +204,7 @@ module psb_z_mat_mod procedure, pass(a) :: cscnv_ip => psb_z_cscnv_ip procedure, pass(a) :: cscnv_base => psb_z_cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base + procedure, pass(a) :: split_nd => psb_z_split_nd procedure, pass(a) :: clone => psb_zspmat_clone procedure, pass(a) :: move_alloc => psb_zspmat_type_move ! @@ -842,6 +843,18 @@ module psb_z_mat_mod ! ! + interface + subroutine psb_z_split_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat + class(psb_zspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_z_base_sparse_mat), intent(in), optional :: mold + end subroutine psb_z_split_nd + end interface + ! ! CSCNV: switches to a different internal derived type. ! 3 versions: copying to target @@ -861,7 +874,6 @@ module psb_z_mat_mod end subroutine psb_z_cscnv end interface - interface subroutine psb_z_cscnv_ip(a,iinfo,type,mold,dupl) import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat @@ -873,7 +885,6 @@ module psb_z_mat_mod end subroutine psb_z_cscnv_ip end interface - interface subroutine psb_z_cscnv_base(a,b,info,dupl) import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat diff --git a/base/psblas/psb_cspmm.f90 b/base/psblas/psb_cspmm.f90 index 25a6bc56..22c6408f 100644 --- a/base/psblas/psb_cspmm.f90 +++ b/base/psblas/psb_cspmm.f90 @@ -83,6 +83,9 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,& character(len=20) :: name, ch_err logical :: aliw, doswap_ integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: do_timings=.true. + integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1 + integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1 name='psb_cspmv' info=psb_success_ @@ -130,6 +133,19 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,& call psb_errpush(info,name) goto 9999 end if + if ((do_timings).and.(mv_phase1==-1)) & + & mv_phase1 = psb_get_timer_idx("SPMM: and send ") + if ((do_timings).and.(mv_phase2==-1)) & + & mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad") + if ((do_timings).and.(mv_phase3==-1)) & + & mv_phase3 = psb_get_timer_idx("SPMM: and rcv") + if ((do_timings).and.(mv_phase4==-1)) & + & mv_phase4 = psb_get_timer_idx("SPMM: and cmp and") + if ((do_timings).and.(mv_phase11==-1)) & + & mv_phase11 = psb_get_timer_idx("SPMM: noand exch ") + if ((do_timings).and.(mv_phase12==-1)) & + & mv_phase12 = psb_get_timer_idx("SPMM: noand cmp") + m = desc_a%get_global_rows() n = desc_a%get_global_cols() @@ -178,34 +194,44 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - + if (allocated(a%ad)) then block - logical, parameter :: do_timings=.true. - real(psb_dpk_) :: t1, t2, t3, t4, t5 - if (do_timings) call psb_barrier(ctxt) - if (do_timings) t1= psb_wtime() - if (doswap_) call psi_swapdata(psb_swap_send_,& - & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - if (do_timings) t2= psb_wtime() - call a%ad%spmm(alpha,x%v,beta,y%v,info) - if (do_timings) t3= psb_wtime() - if (doswap_) call psi_swapdata(psb_swap_recv_,& - & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - if (do_timings) t4= psb_wtime() - call a%and%spmm(alpha,x%v,cone,y%v,info) - if (do_timings) t5= psb_wtime() - if (do_timings) write(0,*) me,' SPMM:',t2-t1,t3-t2,t4-t3,t5-t4 - end block - - else - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + !if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt() + if (do_timings) call psb_barrier(ctxt) + if (do_timings) call psb_tic(mv_phase1) + if (doswap_) call psi_swapdata(psb_swap_send_,& & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - end if - - call psb_csmm(alpha,a,x,beta,y,info) + if (do_timings) call psb_toc(mv_phase1) + if (do_timings) call psb_tic(mv_phase2) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) call psb_tic(mv_phase3) + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase3) + if (do_timings) call psb_tic(mv_phase4) + call a%and%spmm(alpha,x%v,cone,y%v,info) + if (do_timings) call psb_toc(mv_phase4) + end block + else + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + + if (do_timings) call psb_tic(mv_phase11) + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + if (do_timings) call psb_toc(mv_phase11) + if (do_timings) call psb_tic(mv_phase12) + call psb_csmm(alpha,a,x,beta,y,info) + if (do_timings) call psb_toc(mv_phase12) + end block end if if(info /= psb_success_) then diff --git a/base/psblas/psb_dspmm.f90 b/base/psblas/psb_dspmm.f90 index 8e48c4c2..fa256276 100644 --- a/base/psblas/psb_dspmm.f90 +++ b/base/psblas/psb_dspmm.f90 @@ -194,48 +194,45 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - + if (allocated(a%ad)) then block - logical, parameter :: do_timings=.true. - real(psb_dpk_) :: t1, t2, t3, t4, t5 - !write(0,*) 'Going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt() - if (do_timings) call psb_barrier(ctxt) - if (do_timings) call psb_tic(mv_phase1) - if (doswap_) call psi_swapdata(psb_swap_send_,& - & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - if (do_timings) call psb_toc(mv_phase1) - if (do_timings) call psb_tic(mv_phase2) - call a%ad%spmm(alpha,x%v,beta,y%v,info) - if (do_timings) call psb_toc(mv_phase2) - if (do_timings) call psb_tic(mv_phase3) - if (doswap_) call psi_swapdata(psb_swap_recv_,& - & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - if (do_timings) call psb_toc(mv_phase3) - if (do_timings) call psb_tic(mv_phase4) - if (do_timings) t4= psb_wtime() - call a%and%spmm(alpha,x%v,done,y%v,info) - if (do_timings) call psb_toc(mv_phase4) - - end block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + !if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt() + if (do_timings) call psb_barrier(ctxt) + if (do_timings) call psb_tic(mv_phase1) + if (doswap_) call psi_swapdata(psb_swap_send_,& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase1) + if (do_timings) call psb_tic(mv_phase2) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) call psb_tic(mv_phase3) + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase3) + if (do_timings) call psb_tic(mv_phase4) + call a%and%spmm(alpha,x%v,done,y%v,info) + if (do_timings) call psb_toc(mv_phase4) + end block else block - logical, parameter :: do_timings=.true. - real(psb_dpk_) :: t1, t2, t3, t4, t5 - if (do_timings) call psb_barrier(ctxt) - - if (do_timings) call psb_tic(mv_phase11) - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& - & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - end if - if (do_timings) call psb_toc(mv_phase11) - if (do_timings) call psb_tic(mv_phase12) - call psb_csmm(alpha,a,x,beta,y,info) - if (do_timings) call psb_toc(mv_phase12) - end block - end if + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + + if (do_timings) call psb_tic(mv_phase11) + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + if (do_timings) call psb_toc(mv_phase11) + if (do_timings) call psb_tic(mv_phase12) + call psb_csmm(alpha,a,x,beta,y,info) + if (do_timings) call psb_toc(mv_phase12) + end block + end if if(info /= psb_success_) then info = psb_err_from_subroutine_non_ diff --git a/base/psblas/psb_sspmm.f90 b/base/psblas/psb_sspmm.f90 index cf8919f0..6c723831 100644 --- a/base/psblas/psb_sspmm.f90 +++ b/base/psblas/psb_sspmm.f90 @@ -83,6 +83,9 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,& character(len=20) :: name, ch_err logical :: aliw, doswap_ integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: do_timings=.true. + integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1 + integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1 name='psb_sspmv' info=psb_success_ @@ -130,6 +133,19 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,& call psb_errpush(info,name) goto 9999 end if + if ((do_timings).and.(mv_phase1==-1)) & + & mv_phase1 = psb_get_timer_idx("SPMM: and send ") + if ((do_timings).and.(mv_phase2==-1)) & + & mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad") + if ((do_timings).and.(mv_phase3==-1)) & + & mv_phase3 = psb_get_timer_idx("SPMM: and rcv") + if ((do_timings).and.(mv_phase4==-1)) & + & mv_phase4 = psb_get_timer_idx("SPMM: and cmp and") + if ((do_timings).and.(mv_phase11==-1)) & + & mv_phase11 = psb_get_timer_idx("SPMM: noand exch ") + if ((do_timings).and.(mv_phase12==-1)) & + & mv_phase12 = psb_get_timer_idx("SPMM: noand cmp") + m = desc_a%get_global_rows() n = desc_a%get_global_cols() @@ -178,34 +194,44 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - + if (allocated(a%ad)) then block - logical, parameter :: do_timings=.true. - real(psb_dpk_) :: t1, t2, t3, t4, t5 - if (do_timings) call psb_barrier(ctxt) - if (do_timings) t1= psb_wtime() - if (doswap_) call psi_swapdata(psb_swap_send_,& - & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - if (do_timings) t2= psb_wtime() - call a%ad%spmm(alpha,x%v,beta,y%v,info) - if (do_timings) t3= psb_wtime() - if (doswap_) call psi_swapdata(psb_swap_recv_,& - & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - if (do_timings) t4= psb_wtime() - call a%and%spmm(alpha,x%v,sone,y%v,info) - if (do_timings) t5= psb_wtime() - if (do_timings) write(0,*) me,' SPMM:',t2-t1,t3-t2,t4-t3,t5-t4 - end block - - else - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + !if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt() + if (do_timings) call psb_barrier(ctxt) + if (do_timings) call psb_tic(mv_phase1) + if (doswap_) call psi_swapdata(psb_swap_send_,& & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - end if - - call psb_csmm(alpha,a,x,beta,y,info) + if (do_timings) call psb_toc(mv_phase1) + if (do_timings) call psb_tic(mv_phase2) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) call psb_tic(mv_phase3) + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase3) + if (do_timings) call psb_tic(mv_phase4) + call a%and%spmm(alpha,x%v,sone,y%v,info) + if (do_timings) call psb_toc(mv_phase4) + end block + else + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + + if (do_timings) call psb_tic(mv_phase11) + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + if (do_timings) call psb_toc(mv_phase11) + if (do_timings) call psb_tic(mv_phase12) + call psb_csmm(alpha,a,x,beta,y,info) + if (do_timings) call psb_toc(mv_phase12) + end block end if if(info /= psb_success_) then diff --git a/base/psblas/psb_zspmm.f90 b/base/psblas/psb_zspmm.f90 index 629fcf2b..179e4fad 100644 --- a/base/psblas/psb_zspmm.f90 +++ b/base/psblas/psb_zspmm.f90 @@ -83,6 +83,9 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,& character(len=20) :: name, ch_err logical :: aliw, doswap_ integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: do_timings=.true. + integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1 + integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1 name='psb_zspmv' info=psb_success_ @@ -130,6 +133,19 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,& call psb_errpush(info,name) goto 9999 end if + if ((do_timings).and.(mv_phase1==-1)) & + & mv_phase1 = psb_get_timer_idx("SPMM: and send ") + if ((do_timings).and.(mv_phase2==-1)) & + & mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad") + if ((do_timings).and.(mv_phase3==-1)) & + & mv_phase3 = psb_get_timer_idx("SPMM: and rcv") + if ((do_timings).and.(mv_phase4==-1)) & + & mv_phase4 = psb_get_timer_idx("SPMM: and cmp and") + if ((do_timings).and.(mv_phase11==-1)) & + & mv_phase11 = psb_get_timer_idx("SPMM: noand exch ") + if ((do_timings).and.(mv_phase12==-1)) & + & mv_phase12 = psb_get_timer_idx("SPMM: noand cmp") + m = desc_a%get_global_rows() n = desc_a%get_global_cols() @@ -178,34 +194,44 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - + if (allocated(a%ad)) then block - logical, parameter :: do_timings=.true. - real(psb_dpk_) :: t1, t2, t3, t4, t5 - if (do_timings) call psb_barrier(ctxt) - if (do_timings) t1= psb_wtime() - if (doswap_) call psi_swapdata(psb_swap_send_,& - & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - if (do_timings) t2= psb_wtime() - call a%ad%spmm(alpha,x%v,beta,y%v,info) - if (do_timings) t3= psb_wtime() - if (doswap_) call psi_swapdata(psb_swap_recv_,& - & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - if (do_timings) t4= psb_wtime() - call a%and%spmm(alpha,x%v,zone,y%v,info) - if (do_timings) t5= psb_wtime() - if (do_timings) write(0,*) me,' SPMM:',t2-t1,t3-t2,t4-t3,t5-t4 - end block - - else - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + !if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt() + if (do_timings) call psb_barrier(ctxt) + if (do_timings) call psb_tic(mv_phase1) + if (doswap_) call psi_swapdata(psb_swap_send_,& & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - end if - - call psb_csmm(alpha,a,x,beta,y,info) + if (do_timings) call psb_toc(mv_phase1) + if (do_timings) call psb_tic(mv_phase2) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) call psb_tic(mv_phase3) + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase3) + if (do_timings) call psb_tic(mv_phase4) + call a%and%spmm(alpha,x%v,zone,y%v,info) + if (do_timings) call psb_toc(mv_phase4) + end block + else + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + + if (do_timings) call psb_tic(mv_phase11) + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + if (do_timings) call psb_toc(mv_phase11) + if (do_timings) call psb_tic(mv_phase12) + call psb_csmm(alpha,a,x,beta,y,info) + if (do_timings) call psb_toc(mv_phase12) + end block end if if(info /= psb_success_) then diff --git a/base/serial/impl/psb_c_mat_impl.F90 b/base/serial/impl/psb_c_mat_impl.F90 index df5c4cd9..bbac0406 100644 --- a/base/serial/impl/psb_c_mat_impl.F90 +++ b/base/serial/impl/psb_c_mat_impl.F90 @@ -1213,6 +1213,56 @@ subroutine psb_c_b_csclip(a,b,info,& end subroutine psb_c_b_csclip +subroutine psb_c_split_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_c_mat_mod, psb_protect_name => psb_c_split_nd + implicit none + class(psb_cspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_c_base_sparse_mat), intent(in), optional :: mold + type(psb_c_coo_sparse_mat) :: acoo + type(psb_c_csr_sparse_mat), allocatable :: aclip + type(psb_c_ecsr_sparse_mat), allocatable :: andclip + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + allocate(aclip) + call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.) + allocate(a%ad,mold=a%a) + call a%ad%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.) + if (use_ecsr) then + allocate(andclip) + call andclip%mv_from_coo(acoo,info) + call move_alloc(andclip,a%and) + else + allocate(a%and,mold=a%a) + call a%and%mv_from_coo(acoo,info) + end if + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_split_nd + subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl) use psb_error_mod use psb_string_mod @@ -1246,54 +1296,65 @@ subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl) goto 9999 end if - if (present(mold)) then - - allocate(altmp, mold=mold,stat=info) - - else if (present(type)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_c_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_c_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_c_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_c_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_c_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_c_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) goto 9999 - end select - else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) - end if + end if - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if - if (present(dupl)) then - call altmp%set_dupl(dupl) - else if (a%is_bld()) then - ! Does this make sense at all?? Who knows.. - call altmp%set_dupl(psb_dupl_def_) - end if + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - if (debug) write(psb_err_unit,*) 'Converting from ',& - & a%get_fmt(),' to ',altmp%get_fmt() + call altmp%cp_from_fmt(a%a, info) - call altmp%cp_from_fmt(a%a, info) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if - if (info /= psb_success_) then - info = psb_err_from_subroutine_ - call psb_errpush(info,name,a_err="mv_from") - goto 9999 + call move_alloc(altmp,b%a) + else + call inner_cp_fmt(a%a,b%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_cp_fmt(a%and,b%and,info,type,mold,dupl) + end if end if - call move_alloc(altmp,b%a) call b%trim() call b%set_asb() call psb_erractionrestore(err_act) @@ -1303,7 +1364,79 @@ subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_cp_fmt(a,b,info,type,mold,dupl) + class(psb_c_base_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_c_base_sparse_mat), intent(in), optional :: mold + + class(psb_c_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_c_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_c_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_c_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_c_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%cp_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,b) + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_cp_fmt end subroutine psb_c_cscnv subroutine psb_c_cscnv_ip(a,info,type,mold,dupl) @@ -1312,13 +1445,12 @@ subroutine psb_c_cscnv_ip(a,info,type,mold,dupl) use psb_c_mat_mod, psb_protect_name => psb_c_cscnv_ip implicit none - class(psb_cspmat_type), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_),optional, intent(in) :: dupl - character(len=*), optional, intent(in) :: type + class(psb_cspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type class(psb_c_base_sparse_mat), intent(in), optional :: mold - class(psb_c_base_sparse_mat), allocatable :: altmp integer(psb_ipk_) :: err_act character(len=20) :: name='cscnv_ip' @@ -1345,46 +1477,55 @@ subroutine psb_c_cscnv_ip(a,info,type,mold,dupl) goto 9999 end if - if (present(mold)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_c_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_c_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_c_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - allocate(altmp, mold=mold,stat=info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if - else if (present(type)) then + if (debug) write(psb_err_unit,*) 'Converting in-place from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_c_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_c_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_c_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) - goto 9999 - end select + call altmp%mv_from_fmt(a%a, info) + call move_alloc(altmp,a%a) else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) + call inner_mv_fmt(a%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_mv_fmt(a%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_mv_fmt(a%and,info,type,mold,dupl) + end if end if - - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if - - if (debug) write(psb_err_unit,*) 'Converting in-place from ',& - & a%get_fmt(),' to ',altmp%get_fmt() - - call altmp%mv_from_fmt(a%a, info) - if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err="mv_from") goto 9999 end if - call move_alloc(altmp,a%a) call a%trim() call a%set_asb() call psb_erractionrestore(err_act) @@ -1394,6 +1535,77 @@ subroutine psb_c_cscnv_ip(a,info,type,mold,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_mv_fmt(a,info,type,mold,dupl) + class(psb_c_base_sparse_mat), intent(inout), allocatable :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_c_base_sparse_mat), intent(in), optional :: mold + class(psb_c_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_c_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_c_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_c_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_c_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%mv_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,a) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_mv_fmt end subroutine psb_c_cscnv_ip diff --git a/base/serial/impl/psb_d_mat_impl.F90 b/base/serial/impl/psb_d_mat_impl.F90 index caf725d1..9af64b3f 100644 --- a/base/serial/impl/psb_d_mat_impl.F90 +++ b/base/serial/impl/psb_d_mat_impl.F90 @@ -1213,6 +1213,56 @@ subroutine psb_d_b_csclip(a,b,info,& end subroutine psb_d_b_csclip +subroutine psb_d_split_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_d_mat_mod, psb_protect_name => psb_d_split_nd + implicit none + class(psb_dspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_d_base_sparse_mat), intent(in), optional :: mold + type(psb_d_coo_sparse_mat) :: acoo + type(psb_d_csr_sparse_mat), allocatable :: aclip + type(psb_d_ecsr_sparse_mat), allocatable :: andclip + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + allocate(aclip) + call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.) + allocate(a%ad,mold=a%a) + call a%ad%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.) + if (use_ecsr) then + allocate(andclip) + call andclip%mv_from_coo(acoo,info) + call move_alloc(andclip,a%and) + else + allocate(a%and,mold=a%a) + call a%and%mv_from_coo(acoo,info) + end if + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_split_nd + subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl) use psb_error_mod use psb_string_mod @@ -1246,65 +1296,64 @@ subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl) goto 9999 end if -!!$ if (present(mold)) then -!!$ -!!$ allocate(altmp, mold=mold,stat=info) -!!$ -!!$ else if (present(type)) then -!!$ -!!$ select case (psb_toupper(type)) -!!$ case ('CSR') -!!$ allocate(psb_d_csr_sparse_mat :: altmp, stat=info) -!!$ case ('COO') -!!$ allocate(psb_d_coo_sparse_mat :: altmp, stat=info) -!!$ case ('CSC') -!!$ allocate(psb_d_csc_sparse_mat :: altmp, stat=info) -!!$ case default -!!$ info = psb_err_format_unknown_ -!!$ call psb_errpush(info,name,a_err=type) -!!$ goto 9999 -!!$ end select -!!$ else -!!$ allocate(altmp, mold=psb_get_mat_default(a),stat=info) -!!$ end if -!!$ -!!$ if (info /= psb_success_) then -!!$ info = psb_err_alloc_dealloc_ -!!$ call psb_errpush(info,name) -!!$ goto 9999 -!!$ end if -!!$ -!!$ -!!$ if (present(dupl)) then -!!$ call altmp%set_dupl(dupl) -!!$ else if (a%is_bld()) then -!!$ ! Does this make sense at all?? Who knows.. -!!$ call altmp%set_dupl(psb_dupl_def_) -!!$ end if -!!$ -!!$ if (debug) write(psb_err_unit,*) 'Converting from ',& -!!$ & a%get_fmt(),' to ',altmp%get_fmt() -!!$ -!!$ call altmp%cp_from_fmt(a%a, info) -!!$ -!!$ if (info /= psb_success_) then -!!$ info = psb_err_from_subroutine_ -!!$ call psb_errpush(info,name,a_err="mv_from") -!!$ goto 9999 -!!$ end if -!!$ -!!$ call move_alloc(altmp,b%a) - call inner_cp_alloc(a%a,b%a,info,type,mold) - if (info /= 0) goto 9999 - if (allocated(a%ad)) then - call inner_cp_alloc(a%ad,b%ad,info,type,mold) - if (info /= 0) goto 9999 - end if - if (allocated(a%and)) then - call inner_cp_alloc(a%and,b%and,info,type,mold) - if (info /= 0) goto 9999 - end if + if (.false.) then + if (present(mold)) then + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_d_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_d_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%cp_from_fmt(a%a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,b%a) + else + call inner_cp_fmt(a%a,b%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_cp_fmt(a%and,b%and,info,type,mold,dupl) + end if + end if call b%trim() call b%set_asb() @@ -1316,24 +1365,26 @@ subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl) return contains - subroutine inner_cp_alloc(a,b,info,type,mold) + subroutine inner_cp_fmt(a,b,info,type,mold,dupl) class(psb_d_base_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl character(len=*), optional, intent(in) :: type class(psb_d_base_sparse_mat), intent(in), optional :: mold - + class(psb_d_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act info = psb_success_ call psb_erractionsave(err_act) - + if (present(mold)) then - + allocate(altmp, mold=mold,stat=info) - + else if (present(type)) then - + select case (psb_toupper(type)) case ('CSR') allocate(psb_d_csr_sparse_mat :: altmp, stat=info) @@ -1347,38 +1398,45 @@ contains goto 9999 end select else -!!$ allocate(altmp, mold=psb_get_mat_default(a),stat=info) allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) end if - + if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if - - if (debug) write(psb_err_unit,*) 'Converting in-place from ',& + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& & a%get_fmt(),' to ',altmp%get_fmt() - + call altmp%cp_from_fmt(a, info) - + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err="mv_from") goto 9999 end if - + call move_alloc(altmp,b) - + call psb_erractionrestore(err_act) return - - + + 9999 call psb_error_handler(err_act) - - return - end subroutine inner_cp_alloc + return + end subroutine inner_cp_fmt end subroutine psb_d_cscnv subroutine psb_d_cscnv_ip(a,info,type,mold,dupl) @@ -1387,13 +1445,12 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl) use psb_d_mat_mod, psb_protect_name => psb_d_cscnv_ip implicit none - class(psb_dspmat_type), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_),optional, intent(in) :: dupl - character(len=*), optional, intent(in) :: type + class(psb_dspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type class(psb_d_base_sparse_mat), intent(in), optional :: mold - class(psb_d_base_sparse_mat), allocatable :: altmp integer(psb_ipk_) :: err_act character(len=20) :: name='cscnv_ip' @@ -1420,57 +1477,55 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl) goto 9999 end if -!!$ if (present(mold)) then -!!$ -!!$ allocate(altmp, mold=mold,stat=info) -!!$ -!!$ else if (present(type)) then -!!$ -!!$ select case (psb_toupper(type)) -!!$ case ('CSR') -!!$ allocate(psb_d_csr_sparse_mat :: altmp, stat=info) -!!$ case ('COO') -!!$ allocate(psb_d_coo_sparse_mat :: altmp, stat=info) -!!$ case ('CSC') -!!$ allocate(psb_d_csc_sparse_mat :: altmp, stat=info) -!!$ case default -!!$ info = psb_err_format_unknown_ -!!$ call psb_errpush(info,name,a_err=type) -!!$ goto 9999 -!!$ end select -!!$ else -!!$ allocate(altmp, mold=psb_get_mat_default(a),stat=info) -!!$ end if -!!$ -!!$ if (info /= psb_success_) then -!!$ info = psb_err_alloc_dealloc_ -!!$ call psb_errpush(info,name) -!!$ goto 9999 -!!$ end if -!!$ -!!$ if (debug) write(psb_err_unit,*) 'Converting in-place from ',& -!!$ & a%get_fmt(),' to ',altmp%get_fmt() -!!$ -!!$ call altmp%mv_from_fmt(a%a, info) -!!$ -!!$ if (info /= psb_success_) then -!!$ info = psb_err_from_subroutine_ -!!$ call psb_errpush(info,name,a_err="mv_from") -!!$ goto 9999 -!!$ end if -!!$ -!!$ call move_alloc(altmp,a%a) - - call inner_mv_alloc(a%a,info,type,mold) - if (info /= 0) goto 9999 - if (allocated(a%ad)) then - call inner_mv_alloc(a%ad,info,type,mold) - if (info /= 0) goto 9999 + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_d_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_d_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + if (debug) write(psb_err_unit,*) 'Converting in-place from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%mv_from_fmt(a%a, info) + call move_alloc(altmp,a%a) + else + call inner_mv_fmt(a%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_mv_fmt(a%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_mv_fmt(a%and,info,type,mold,dupl) + end if end if - if (allocated(a%and)) then - call inner_mv_alloc(a%and,info,type,mold) - if (info /= 0) goto 9999 + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 end if + call a%trim() call a%set_asb() call psb_erractionrestore(err_act) @@ -1481,23 +1536,24 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl) return contains - subroutine inner_mv_alloc(a,info,type,mold) - class(psb_d_base_sparse_mat), intent(inout), allocatable :: a + subroutine inner_mv_fmt(a,info,type,mold,dupl) + class(psb_d_base_sparse_mat), intent(inout), allocatable :: a integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl character(len=*), optional, intent(in) :: type class(psb_d_base_sparse_mat), intent(in), optional :: mold - class(psb_d_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act info = psb_success_ call psb_erractionsave(err_act) - + if (present(mold)) then - + allocate(altmp, mold=mold,stat=info) - + else if (present(type)) then - + select case (psb_toupper(type)) case ('CSR') allocate(psb_d_csr_sparse_mat :: altmp, stat=info) @@ -1511,37 +1567,46 @@ contains goto 9999 end select else -!!$ allocate(altmp, mold=psb_get_mat_default(a),stat=info) allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) end if - + if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if - - if (debug) write(psb_err_unit,*) 'Converting in-place from ',& + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& & a%get_fmt(),' to ',altmp%get_fmt() - + call altmp%mv_from_fmt(a, info) - + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err="mv_from") goto 9999 end if - + call move_alloc(altmp,a) - + call psb_erractionrestore(err_act) return - - + + 9999 call psb_error_handler(err_act) - + return - end subroutine inner_mv_alloc + end subroutine inner_mv_fmt + end subroutine psb_d_cscnv_ip diff --git a/base/serial/impl/psb_s_mat_impl.F90 b/base/serial/impl/psb_s_mat_impl.F90 index ce7ce653..c0370774 100644 --- a/base/serial/impl/psb_s_mat_impl.F90 +++ b/base/serial/impl/psb_s_mat_impl.F90 @@ -1213,6 +1213,56 @@ subroutine psb_s_b_csclip(a,b,info,& end subroutine psb_s_b_csclip +subroutine psb_s_split_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_s_mat_mod, psb_protect_name => psb_s_split_nd + implicit none + class(psb_sspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_s_base_sparse_mat), intent(in), optional :: mold + type(psb_s_coo_sparse_mat) :: acoo + type(psb_s_csr_sparse_mat), allocatable :: aclip + type(psb_s_ecsr_sparse_mat), allocatable :: andclip + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + allocate(aclip) + call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.) + allocate(a%ad,mold=a%a) + call a%ad%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.) + if (use_ecsr) then + allocate(andclip) + call andclip%mv_from_coo(acoo,info) + call move_alloc(andclip,a%and) + else + allocate(a%and,mold=a%a) + call a%and%mv_from_coo(acoo,info) + end if + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_split_nd + subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl) use psb_error_mod use psb_string_mod @@ -1246,54 +1296,65 @@ subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl) goto 9999 end if - if (present(mold)) then - - allocate(altmp, mold=mold,stat=info) - - else if (present(type)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_s_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_s_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_s_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_s_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_s_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_s_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) goto 9999 - end select - else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) - end if + end if - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if - if (present(dupl)) then - call altmp%set_dupl(dupl) - else if (a%is_bld()) then - ! Does this make sense at all?? Who knows.. - call altmp%set_dupl(psb_dupl_def_) - end if + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - if (debug) write(psb_err_unit,*) 'Converting from ',& - & a%get_fmt(),' to ',altmp%get_fmt() + call altmp%cp_from_fmt(a%a, info) - call altmp%cp_from_fmt(a%a, info) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if - if (info /= psb_success_) then - info = psb_err_from_subroutine_ - call psb_errpush(info,name,a_err="mv_from") - goto 9999 + call move_alloc(altmp,b%a) + else + call inner_cp_fmt(a%a,b%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_cp_fmt(a%and,b%and,info,type,mold,dupl) + end if end if - call move_alloc(altmp,b%a) call b%trim() call b%set_asb() call psb_erractionrestore(err_act) @@ -1303,7 +1364,79 @@ subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_cp_fmt(a,b,info,type,mold,dupl) + class(psb_s_base_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_s_base_sparse_mat), intent(in), optional :: mold + + class(psb_s_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_s_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_s_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_s_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_s_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%cp_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,b) + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_cp_fmt end subroutine psb_s_cscnv subroutine psb_s_cscnv_ip(a,info,type,mold,dupl) @@ -1312,13 +1445,12 @@ subroutine psb_s_cscnv_ip(a,info,type,mold,dupl) use psb_s_mat_mod, psb_protect_name => psb_s_cscnv_ip implicit none - class(psb_sspmat_type), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_),optional, intent(in) :: dupl - character(len=*), optional, intent(in) :: type + class(psb_sspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type class(psb_s_base_sparse_mat), intent(in), optional :: mold - class(psb_s_base_sparse_mat), allocatable :: altmp integer(psb_ipk_) :: err_act character(len=20) :: name='cscnv_ip' @@ -1345,46 +1477,55 @@ subroutine psb_s_cscnv_ip(a,info,type,mold,dupl) goto 9999 end if - if (present(mold)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_s_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_s_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_s_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - allocate(altmp, mold=mold,stat=info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if - else if (present(type)) then + if (debug) write(psb_err_unit,*) 'Converting in-place from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_s_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_s_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_s_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) - goto 9999 - end select + call altmp%mv_from_fmt(a%a, info) + call move_alloc(altmp,a%a) else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) + call inner_mv_fmt(a%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_mv_fmt(a%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_mv_fmt(a%and,info,type,mold,dupl) + end if end if - - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if - - if (debug) write(psb_err_unit,*) 'Converting in-place from ',& - & a%get_fmt(),' to ',altmp%get_fmt() - - call altmp%mv_from_fmt(a%a, info) - if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err="mv_from") goto 9999 end if - call move_alloc(altmp,a%a) call a%trim() call a%set_asb() call psb_erractionrestore(err_act) @@ -1394,6 +1535,77 @@ subroutine psb_s_cscnv_ip(a,info,type,mold,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_mv_fmt(a,info,type,mold,dupl) + class(psb_s_base_sparse_mat), intent(inout), allocatable :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_s_base_sparse_mat), intent(in), optional :: mold + class(psb_s_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_s_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_s_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_s_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_s_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%mv_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,a) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_mv_fmt end subroutine psb_s_cscnv_ip diff --git a/base/serial/impl/psb_z_mat_impl.F90 b/base/serial/impl/psb_z_mat_impl.F90 index 2cebf9e7..20815cb0 100644 --- a/base/serial/impl/psb_z_mat_impl.F90 +++ b/base/serial/impl/psb_z_mat_impl.F90 @@ -1213,6 +1213,56 @@ subroutine psb_z_b_csclip(a,b,info,& end subroutine psb_z_b_csclip +subroutine psb_z_split_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_z_mat_mod, psb_protect_name => psb_z_split_nd + implicit none + class(psb_zspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_z_base_sparse_mat), intent(in), optional :: mold + type(psb_z_coo_sparse_mat) :: acoo + type(psb_z_csr_sparse_mat), allocatable :: aclip + type(psb_z_ecsr_sparse_mat), allocatable :: andclip + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + allocate(aclip) + call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.) + allocate(a%ad,mold=a%a) + call a%ad%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.) + if (use_ecsr) then + allocate(andclip) + call andclip%mv_from_coo(acoo,info) + call move_alloc(andclip,a%and) + else + allocate(a%and,mold=a%a) + call a%and%mv_from_coo(acoo,info) + end if + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_split_nd + subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl) use psb_error_mod use psb_string_mod @@ -1246,54 +1296,65 @@ subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl) goto 9999 end if - if (present(mold)) then - - allocate(altmp, mold=mold,stat=info) - - else if (present(type)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_z_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_z_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_z_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_z_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_z_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_z_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) goto 9999 - end select - else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) - end if + end if - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if - if (present(dupl)) then - call altmp%set_dupl(dupl) - else if (a%is_bld()) then - ! Does this make sense at all?? Who knows.. - call altmp%set_dupl(psb_dupl_def_) - end if + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - if (debug) write(psb_err_unit,*) 'Converting from ',& - & a%get_fmt(),' to ',altmp%get_fmt() + call altmp%cp_from_fmt(a%a, info) - call altmp%cp_from_fmt(a%a, info) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if - if (info /= psb_success_) then - info = psb_err_from_subroutine_ - call psb_errpush(info,name,a_err="mv_from") - goto 9999 + call move_alloc(altmp,b%a) + else + call inner_cp_fmt(a%a,b%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_cp_fmt(a%and,b%and,info,type,mold,dupl) + end if end if - call move_alloc(altmp,b%a) call b%trim() call b%set_asb() call psb_erractionrestore(err_act) @@ -1303,7 +1364,79 @@ subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_cp_fmt(a,b,info,type,mold,dupl) + class(psb_z_base_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_z_base_sparse_mat), intent(in), optional :: mold + + class(psb_z_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_z_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_z_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_z_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_z_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%cp_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,b) + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_cp_fmt end subroutine psb_z_cscnv subroutine psb_z_cscnv_ip(a,info,type,mold,dupl) @@ -1312,13 +1445,12 @@ subroutine psb_z_cscnv_ip(a,info,type,mold,dupl) use psb_z_mat_mod, psb_protect_name => psb_z_cscnv_ip implicit none - class(psb_zspmat_type), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_),optional, intent(in) :: dupl - character(len=*), optional, intent(in) :: type + class(psb_zspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type class(psb_z_base_sparse_mat), intent(in), optional :: mold - class(psb_z_base_sparse_mat), allocatable :: altmp integer(psb_ipk_) :: err_act character(len=20) :: name='cscnv_ip' @@ -1345,46 +1477,55 @@ subroutine psb_z_cscnv_ip(a,info,type,mold,dupl) goto 9999 end if - if (present(mold)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_z_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_z_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_z_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - allocate(altmp, mold=mold,stat=info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if - else if (present(type)) then + if (debug) write(psb_err_unit,*) 'Converting in-place from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_z_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_z_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_z_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) - goto 9999 - end select + call altmp%mv_from_fmt(a%a, info) + call move_alloc(altmp,a%a) else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) + call inner_mv_fmt(a%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_mv_fmt(a%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_mv_fmt(a%and,info,type,mold,dupl) + end if end if - - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if - - if (debug) write(psb_err_unit,*) 'Converting in-place from ',& - & a%get_fmt(),' to ',altmp%get_fmt() - - call altmp%mv_from_fmt(a%a, info) - if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err="mv_from") goto 9999 end if - call move_alloc(altmp,a%a) call a%trim() call a%set_asb() call psb_erractionrestore(err_act) @@ -1394,6 +1535,77 @@ subroutine psb_z_cscnv_ip(a,info,type,mold,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_mv_fmt(a,info,type,mold,dupl) + class(psb_z_base_sparse_mat), intent(inout), allocatable :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_z_base_sparse_mat), intent(in), optional :: mold + class(psb_z_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_z_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_z_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_z_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_z_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%mv_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,a) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_mv_fmt end subroutine psb_z_cscnv_ip diff --git a/base/tools/psb_cspasb.f90 b/base/tools/psb_cspasb.f90 index 8263e309..db8af75a 100644 --- a/base/tools/psb_cspasb.f90 +++ b/base/tools/psb_cspasb.f90 @@ -178,41 +178,44 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold, bld_and) end if if (bld_and_) then - block - character(len=1024) :: fname - type(psb_c_coo_sparse_mat) :: acoo - type(psb_c_csr_sparse_mat), allocatable :: aclip - type(psb_c_ecsr_sparse_mat), allocatable :: andclip - logical, parameter :: use_ecsr=.true. - allocate(aclip) - call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) - allocate(a%ad,mold=a%a) - call a%ad%mv_from_coo(acoo,info) - call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) - if (use_ecsr) then - allocate(andclip) - call andclip%mv_from_coo(acoo,info) - call move_alloc(andclip,a%and) - else - allocate(a%and,mold=a%a) - call a%and%mv_from_coo(acoo,info) - end if - if (.false.) then - write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' - open(25,file=fname) - call a%ad%print(25) - close(25) - write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' - open(25,file=fname) - call a%and%print(25) - close(25) - !call andclip%set_cols(n_col) - write(*,*) me,' ',trim(name),' ad ',& - &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col - write(*,*) me,' ',trim(name),' and ',& - &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col - end if - end block +!!$ allocate(a%ad,mold=a%a) +!!$ allocate(a%and,mold=a%a)o + call a%split_nd(n_row,n_col,info) +!!$ block +!!$ character(len=1024) :: fname +!!$ type(psb_c_coo_sparse_mat) :: acoo +!!$ type(psb_c_csr_sparse_mat), allocatable :: aclip +!!$ type(psb_c_ecsr_sparse_mat), allocatable :: andclip +!!$ logical, parameter :: use_ecsr=.true. +!!$ allocate(aclip) +!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) +!!$ allocate(a%ad,mold=a%a) +!!$ call a%ad%mv_from_coo(acoo,info) +!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) +!!$ if (use_ecsr) then +!!$ allocate(andclip) +!!$ call andclip%mv_from_coo(acoo,info) +!!$ call move_alloc(andclip,a%and) +!!$ else +!!$ allocate(a%and,mold=a%a) +!!$ call a%and%mv_from_coo(acoo,info) +!!$ end if +!!$ if (.false.) then +!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%ad%print(25) +!!$ close(25) +!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%and%print(25) +!!$ close(25) +!!$ !call andclip%set_cols(n_col) +!!$ write(*,*) me,' ',trim(name),' ad ',& +!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col +!!$ write(*,*) me,' ',trim(name),' and ',& +!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col +!!$ end if +!!$ end block else if (allocated(a%ad)) deallocate(a%ad) if (allocated(a%and)) deallocate(a%and) diff --git a/base/tools/psb_dspasb.f90 b/base/tools/psb_dspasb.f90 index 6beb0e6f..236568a1 100644 --- a/base/tools/psb_dspasb.f90 +++ b/base/tools/psb_dspasb.f90 @@ -178,41 +178,44 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold, bld_and) end if if (bld_and_) then - block - character(len=1024) :: fname - type(psb_d_coo_sparse_mat) :: acoo - type(psb_d_csr_sparse_mat), allocatable :: aclip - type(psb_d_ecsr_sparse_mat), allocatable :: andclip - logical, parameter :: use_ecsr=.true. - allocate(aclip) - call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) - allocate(a%ad,mold=a%a) - call a%ad%mv_from_coo(acoo,info) - call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) - if (use_ecsr) then - allocate(andclip) - call andclip%mv_from_coo(acoo,info) - call move_alloc(andclip,a%and) - else - allocate(a%and,mold=a%a) - call a%and%mv_from_coo(acoo,info) - end if - if (.false.) then - write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' - open(25,file=fname) - call a%ad%print(25) - close(25) - write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' - open(25,file=fname) - call a%and%print(25) - close(25) - !call andclip%set_cols(n_col) - write(*,*) me,' ',trim(name),' ad ',& - &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col - write(*,*) me,' ',trim(name),' and ',& - &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col - end if - end block +!!$ allocate(a%ad,mold=a%a) +!!$ allocate(a%and,mold=a%a)o + call a%split_nd(n_row,n_col,info) +!!$ block +!!$ character(len=1024) :: fname +!!$ type(psb_d_coo_sparse_mat) :: acoo +!!$ type(psb_d_csr_sparse_mat), allocatable :: aclip +!!$ type(psb_d_ecsr_sparse_mat), allocatable :: andclip +!!$ logical, parameter :: use_ecsr=.true. +!!$ allocate(aclip) +!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) +!!$ allocate(a%ad,mold=a%a) +!!$ call a%ad%mv_from_coo(acoo,info) +!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) +!!$ if (use_ecsr) then +!!$ allocate(andclip) +!!$ call andclip%mv_from_coo(acoo,info) +!!$ call move_alloc(andclip,a%and) +!!$ else +!!$ allocate(a%and,mold=a%a) +!!$ call a%and%mv_from_coo(acoo,info) +!!$ end if +!!$ if (.false.) then +!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%ad%print(25) +!!$ close(25) +!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%and%print(25) +!!$ close(25) +!!$ !call andclip%set_cols(n_col) +!!$ write(*,*) me,' ',trim(name),' ad ',& +!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col +!!$ write(*,*) me,' ',trim(name),' and ',& +!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col +!!$ end if +!!$ end block else if (allocated(a%ad)) deallocate(a%ad) if (allocated(a%and)) deallocate(a%and) diff --git a/base/tools/psb_sspasb.f90 b/base/tools/psb_sspasb.f90 index f273c7f4..110097c5 100644 --- a/base/tools/psb_sspasb.f90 +++ b/base/tools/psb_sspasb.f90 @@ -178,41 +178,44 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold, bld_and) end if if (bld_and_) then - block - character(len=1024) :: fname - type(psb_s_coo_sparse_mat) :: acoo - type(psb_s_csr_sparse_mat), allocatable :: aclip - type(psb_s_ecsr_sparse_mat), allocatable :: andclip - logical, parameter :: use_ecsr=.true. - allocate(aclip) - call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) - allocate(a%ad,mold=a%a) - call a%ad%mv_from_coo(acoo,info) - call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) - if (use_ecsr) then - allocate(andclip) - call andclip%mv_from_coo(acoo,info) - call move_alloc(andclip,a%and) - else - allocate(a%and,mold=a%a) - call a%and%mv_from_coo(acoo,info) - end if - if (.false.) then - write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' - open(25,file=fname) - call a%ad%print(25) - close(25) - write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' - open(25,file=fname) - call a%and%print(25) - close(25) - !call andclip%set_cols(n_col) - write(*,*) me,' ',trim(name),' ad ',& - &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col - write(*,*) me,' ',trim(name),' and ',& - &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col - end if - end block +!!$ allocate(a%ad,mold=a%a) +!!$ allocate(a%and,mold=a%a)o + call a%split_nd(n_row,n_col,info) +!!$ block +!!$ character(len=1024) :: fname +!!$ type(psb_s_coo_sparse_mat) :: acoo +!!$ type(psb_s_csr_sparse_mat), allocatable :: aclip +!!$ type(psb_s_ecsr_sparse_mat), allocatable :: andclip +!!$ logical, parameter :: use_ecsr=.true. +!!$ allocate(aclip) +!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) +!!$ allocate(a%ad,mold=a%a) +!!$ call a%ad%mv_from_coo(acoo,info) +!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) +!!$ if (use_ecsr) then +!!$ allocate(andclip) +!!$ call andclip%mv_from_coo(acoo,info) +!!$ call move_alloc(andclip,a%and) +!!$ else +!!$ allocate(a%and,mold=a%a) +!!$ call a%and%mv_from_coo(acoo,info) +!!$ end if +!!$ if (.false.) then +!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%ad%print(25) +!!$ close(25) +!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%and%print(25) +!!$ close(25) +!!$ !call andclip%set_cols(n_col) +!!$ write(*,*) me,' ',trim(name),' ad ',& +!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col +!!$ write(*,*) me,' ',trim(name),' and ',& +!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col +!!$ end if +!!$ end block else if (allocated(a%ad)) deallocate(a%ad) if (allocated(a%and)) deallocate(a%and) diff --git a/base/tools/psb_zspasb.f90 b/base/tools/psb_zspasb.f90 index 1a381303..2cb53368 100644 --- a/base/tools/psb_zspasb.f90 +++ b/base/tools/psb_zspasb.f90 @@ -178,41 +178,44 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold, bld_and) end if if (bld_and_) then - block - character(len=1024) :: fname - type(psb_z_coo_sparse_mat) :: acoo - type(psb_z_csr_sparse_mat), allocatable :: aclip - type(psb_z_ecsr_sparse_mat), allocatable :: andclip - logical, parameter :: use_ecsr=.true. - allocate(aclip) - call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) - allocate(a%ad,mold=a%a) - call a%ad%mv_from_coo(acoo,info) - call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) - if (use_ecsr) then - allocate(andclip) - call andclip%mv_from_coo(acoo,info) - call move_alloc(andclip,a%and) - else - allocate(a%and,mold=a%a) - call a%and%mv_from_coo(acoo,info) - end if - if (.false.) then - write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' - open(25,file=fname) - call a%ad%print(25) - close(25) - write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' - open(25,file=fname) - call a%and%print(25) - close(25) - !call andclip%set_cols(n_col) - write(*,*) me,' ',trim(name),' ad ',& - &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col - write(*,*) me,' ',trim(name),' and ',& - &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col - end if - end block +!!$ allocate(a%ad,mold=a%a) +!!$ allocate(a%and,mold=a%a)o + call a%split_nd(n_row,n_col,info) +!!$ block +!!$ character(len=1024) :: fname +!!$ type(psb_z_coo_sparse_mat) :: acoo +!!$ type(psb_z_csr_sparse_mat), allocatable :: aclip +!!$ type(psb_z_ecsr_sparse_mat), allocatable :: andclip +!!$ logical, parameter :: use_ecsr=.true. +!!$ allocate(aclip) +!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) +!!$ allocate(a%ad,mold=a%a) +!!$ call a%ad%mv_from_coo(acoo,info) +!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) +!!$ if (use_ecsr) then +!!$ allocate(andclip) +!!$ call andclip%mv_from_coo(acoo,info) +!!$ call move_alloc(andclip,a%and) +!!$ else +!!$ allocate(a%and,mold=a%a) +!!$ call a%and%mv_from_coo(acoo,info) +!!$ end if +!!$ if (.false.) then +!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%ad%print(25) +!!$ close(25) +!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%and%print(25) +!!$ close(25) +!!$ !call andclip%set_cols(n_col) +!!$ write(*,*) me,' ',trim(name),' ad ',& +!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col +!!$ write(*,*) me,' ',trim(name),' and ',& +!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col +!!$ end if +!!$ end block else if (allocated(a%ad)) deallocate(a%ad) if (allocated(a%and)) deallocate(a%and) From 097d63147aa9a0c15f2e069865e7881c660e8724 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 24 Jan 2024 17:58:39 +0100 Subject: [PATCH 033/116] Fix cuda dir makefile --- cuda/Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cuda/Makefile b/cuda/Makefile index 0f03e359..49c5f0cb 100755 --- a/cuda/Makefile +++ b/cuda/Makefile @@ -47,7 +47,10 @@ COBJS= elldev.o hlldev.o diagdev.o hdiagdev.o vectordev.o ivectordev.o dnsdev.o OBJS=$(COBJS) $(FOBJS) -lib: objs +lib: objs ilib cudalib spgpulib + ar cur $(LIBNAME) $(OBJS) + /bin/cp -p $(LIBNAME) $(LIBDIR) + objs: spgpuinc $(OBJS) iobjs cudaobjs spgpuobjs /bin/cp -p *$(.mod) $(MODDIR) @@ -60,9 +63,6 @@ spgpuobjs: spgpulib: $(MAKE) -C spgpu lib -lib: ilib cudalib spgpulib - ar cur $(LIBNAME) $(OBJS) - /bin/cp -p $(LIBNAME) $(LIBDIR) dnsdev_mod.o hlldev_mod.o elldev_mod.o psb_base_vectordev_mod.o: core_mod.o psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o: psb_i_cuda_vect_mod.o From 6433dc797efac808b0628222d328bcc90d86fe8a Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 2 Feb 2024 15:27:42 +0100 Subject: [PATCH 034/116] Fix CUDA implementation of %set_scal and %zero --- cuda/psb_c_cuda_vect_mod.F90 | 4 +--- cuda/psb_d_cuda_vect_mod.F90 | 4 +--- cuda/psb_i_cuda_vect_mod.F90 | 4 +--- cuda/psb_s_cuda_vect_mod.F90 | 4 +--- cuda/psb_z_cuda_vect_mod.F90 | 4 +--- 5 files changed, 5 insertions(+), 15 deletions(-) diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 index c140dadb..9d84d23b 100644 --- a/cuda/psb_c_cuda_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -668,8 +668,7 @@ contains implicit none class(psb_c_vect_cuda), intent(inout) :: x - if (allocated(x%v)) x%v=czero - call x%set_host() + call x%set_scal(czero) end subroutine c_cuda_zero subroutine c_cuda_asb_m(n, x, info) @@ -807,7 +806,6 @@ contains if (present(first)) first_ = max(1,first) if (present(last)) last_ = min(last,last_) - if (x%is_host()) call x%sync() info = setScalDevice(val,first_,last_,1,x%deviceVect) call x%set_dev() diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 index 44381c99..120bdce1 100644 --- a/cuda/psb_d_cuda_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -668,8 +668,7 @@ contains implicit none class(psb_d_vect_cuda), intent(inout) :: x - if (allocated(x%v)) x%v=dzero - call x%set_host() + call x%set_scal(dzero) end subroutine d_cuda_zero subroutine d_cuda_asb_m(n, x, info) @@ -807,7 +806,6 @@ contains if (present(first)) first_ = max(1,first) if (present(last)) last_ = min(last,last_) - if (x%is_host()) call x%sync() info = setScalDevice(val,first_,last_,1,x%deviceVect) call x%set_dev() diff --git a/cuda/psb_i_cuda_vect_mod.F90 b/cuda/psb_i_cuda_vect_mod.F90 index df8f4113..051c4bd0 100644 --- a/cuda/psb_i_cuda_vect_mod.F90 +++ b/cuda/psb_i_cuda_vect_mod.F90 @@ -651,8 +651,7 @@ contains implicit none class(psb_i_vect_cuda), intent(inout) :: x - if (allocated(x%v)) x%v=izero - call x%set_host() + call x%set_scal(izero) end subroutine i_cuda_zero subroutine i_cuda_asb_m(n, x, info) @@ -790,7 +789,6 @@ contains if (present(first)) first_ = max(1,first) if (present(last)) last_ = min(last,last_) - if (x%is_host()) call x%sync() info = setScalDevice(val,first_,last_,1,x%deviceVect) call x%set_dev() diff --git a/cuda/psb_s_cuda_vect_mod.F90 b/cuda/psb_s_cuda_vect_mod.F90 index 7778eb50..996ec46d 100644 --- a/cuda/psb_s_cuda_vect_mod.F90 +++ b/cuda/psb_s_cuda_vect_mod.F90 @@ -668,8 +668,7 @@ contains implicit none class(psb_s_vect_cuda), intent(inout) :: x - if (allocated(x%v)) x%v=szero - call x%set_host() + call x%set_scal(szero) end subroutine s_cuda_zero subroutine s_cuda_asb_m(n, x, info) @@ -807,7 +806,6 @@ contains if (present(first)) first_ = max(1,first) if (present(last)) last_ = min(last,last_) - if (x%is_host()) call x%sync() info = setScalDevice(val,first_,last_,1,x%deviceVect) call x%set_dev() diff --git a/cuda/psb_z_cuda_vect_mod.F90 b/cuda/psb_z_cuda_vect_mod.F90 index 53484911..9da93415 100644 --- a/cuda/psb_z_cuda_vect_mod.F90 +++ b/cuda/psb_z_cuda_vect_mod.F90 @@ -668,8 +668,7 @@ contains implicit none class(psb_z_vect_cuda), intent(inout) :: x - if (allocated(x%v)) x%v=zzero - call x%set_host() + call x%set_scal(zzero) end subroutine z_cuda_zero subroutine z_cuda_asb_m(n, x, info) @@ -807,7 +806,6 @@ contains if (present(first)) first_ = max(1,first) if (present(last)) last_ = min(last,last_) - if (x%is_host()) call x%sync() info = setScalDevice(val,first_,last_,1,x%deviceVect) call x%set_dev() From 14c4ff0f32ab7f6a92f964e124f1035919644e5d Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Mon, 12 Feb 2024 14:15:19 +0100 Subject: [PATCH 035/116] Added new methd for two combined axpbys --- base/modules/psblas/psb_c_psblas_mod.F90 | 14 +++++++++++++ base/modules/psblas/psb_d_psblas_mod.F90 | 14 +++++++++++++ base/modules/psblas/psb_s_psblas_mod.F90 | 14 +++++++++++++ base/modules/psblas/psb_z_psblas_mod.F90 | 14 +++++++++++++ base/modules/serial/psb_c_base_vect_mod.F90 | 19 ++++++++++++++++++ base/modules/serial/psb_c_vect_mod.F90 | 18 +++++++++++++++++ base/modules/serial/psb_d_base_vect_mod.F90 | 19 ++++++++++++++++++ base/modules/serial/psb_d_vect_mod.F90 | 18 +++++++++++++++++ base/modules/serial/psb_s_base_vect_mod.F90 | 19 ++++++++++++++++++ base/modules/serial/psb_s_vect_mod.F90 | 18 +++++++++++++++++ base/modules/serial/psb_z_base_vect_mod.F90 | 19 ++++++++++++++++++ base/modules/serial/psb_z_vect_mod.F90 | 18 +++++++++++++++++ cuda/psb_c_cuda_vect_mod.F90 | 22 +++++++++++++++++++++ cuda/psb_d_cuda_vect_mod.F90 | 22 +++++++++++++++++++++ cuda/psb_s_cuda_vect_mod.F90 | 22 +++++++++++++++++++++ cuda/psb_z_cuda_vect_mod.F90 | 22 +++++++++++++++++++++ test/pargen/psb_d_pde3d.F90 | 8 ++++---- 17 files changed, 296 insertions(+), 4 deletions(-) diff --git a/base/modules/psblas/psb_c_psblas_mod.F90 b/base/modules/psblas/psb_c_psblas_mod.F90 index 98deebd8..d660597a 100644 --- a/base/modules/psblas/psb_c_psblas_mod.F90 +++ b/base/modules/psblas/psb_c_psblas_mod.F90 @@ -143,6 +143,20 @@ module psb_c_psblas_mod end subroutine psb_caxpby end interface + interface psb_abgdxyx + subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + import :: psb_desc_type, psb_spk_, psb_ipk_, & + & psb_c_vect_type, psb_cspmat_type + type(psb_c_vect_type), intent (inout) :: x + type(psb_c_vect_type), intent (inout) :: y + type(psb_c_vect_type), intent (inout) :: z + complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_cabgdxyz_vect + end interface psb_abgdxyx + interface psb_geamax function psb_camax(x, desc_a, info, jx,global) import :: psb_desc_type, psb_spk_, psb_ipk_, & diff --git a/base/modules/psblas/psb_d_psblas_mod.F90 b/base/modules/psblas/psb_d_psblas_mod.F90 index e4988387..734ed633 100644 --- a/base/modules/psblas/psb_d_psblas_mod.F90 +++ b/base/modules/psblas/psb_d_psblas_mod.F90 @@ -143,6 +143,20 @@ module psb_d_psblas_mod end subroutine psb_daxpby end interface + interface psb_abgdxyx + subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + import :: psb_desc_type, psb_dpk_, psb_ipk_, & + & psb_d_vect_type, psb_dspmat_type + type(psb_d_vect_type), intent (inout) :: x + type(psb_d_vect_type), intent (inout) :: y + type(psb_d_vect_type), intent (inout) :: z + real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_dabgdxyz_vect + end interface psb_abgdxyx + interface psb_geamax function psb_damax(x, desc_a, info, jx,global) import :: psb_desc_type, psb_dpk_, psb_ipk_, & diff --git a/base/modules/psblas/psb_s_psblas_mod.F90 b/base/modules/psblas/psb_s_psblas_mod.F90 index 93fe74b9..0f7d29e6 100644 --- a/base/modules/psblas/psb_s_psblas_mod.F90 +++ b/base/modules/psblas/psb_s_psblas_mod.F90 @@ -143,6 +143,20 @@ module psb_s_psblas_mod end subroutine psb_saxpby end interface + interface psb_abgdxyx + subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + import :: psb_desc_type, psb_spk_, psb_ipk_, & + & psb_s_vect_type, psb_sspmat_type + type(psb_s_vect_type), intent (inout) :: x + type(psb_s_vect_type), intent (inout) :: y + type(psb_s_vect_type), intent (inout) :: z + real(psb_spk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_sabgdxyz_vect + end interface psb_abgdxyx + interface psb_geamax function psb_samax(x, desc_a, info, jx,global) import :: psb_desc_type, psb_spk_, psb_ipk_, & diff --git a/base/modules/psblas/psb_z_psblas_mod.F90 b/base/modules/psblas/psb_z_psblas_mod.F90 index 06be1b82..17674600 100644 --- a/base/modules/psblas/psb_z_psblas_mod.F90 +++ b/base/modules/psblas/psb_z_psblas_mod.F90 @@ -143,6 +143,20 @@ module psb_z_psblas_mod end subroutine psb_zaxpby end interface + interface psb_abgdxyx + subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + import :: psb_desc_type, psb_dpk_, psb_ipk_, & + & psb_z_vect_type, psb_zspmat_type + type(psb_z_vect_type), intent (inout) :: x + type(psb_z_vect_type), intent (inout) :: y + type(psb_z_vect_type), intent (inout) :: z + complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_zabgdxyz_vect + end interface psb_abgdxyx + interface psb_geamax function psb_zamax(x, desc_a, info, jx,global) import :: psb_desc_type, psb_dpk_, psb_ipk_, & diff --git a/base/modules/serial/psb_c_base_vect_mod.F90 b/base/modules/serial/psb_c_base_vect_mod.F90 index df15e0c9..e4f398a7 100644 --- a/base/modules/serial/psb_c_base_vect_mod.F90 +++ b/base/modules/serial/psb_c_base_vect_mod.F90 @@ -155,6 +155,8 @@ module psb_c_base_vect_mod procedure, pass(z) :: axpby_v2 => c_base_axpby_v2 procedure, pass(z) :: axpby_a2 => c_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: abgdxyz => c_base_abgdxyz + ! ! Vector by vector multiplication. Need all variants ! to handle multiple requirements from preconditioners @@ -1126,6 +1128,23 @@ contains end subroutine c_base_axpby_a2 + subroutine c_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + class(psb_c_base_vect_type), intent(inout) :: z + complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev()) call x%sync() + + call y%axpby(m,alpha,x,beta,info) + call z%axpby(m,gamma,y,delta,info) + + end subroutine c_base_abgdxyz + ! ! Multiple variants of two operations: diff --git a/base/modules/serial/psb_c_vect_mod.F90 b/base/modules/serial/psb_c_vect_mod.F90 index 1a336d11..8b2941ff 100644 --- a/base/modules/serial/psb_c_vect_mod.F90 +++ b/base/modules/serial/psb_c_vect_mod.F90 @@ -102,6 +102,8 @@ module psb_c_vect_mod procedure, pass(z) :: axpby_v2 => c_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => c_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: abgdxyz => c_vect_abgdxyz + procedure, pass(y) :: mlt_v => c_vect_mlt_v procedure, pass(y) :: mlt_a => c_vect_mlt_a procedure, pass(z) :: mlt_a_2 => c_vect_mlt_a_2 @@ -771,6 +773,22 @@ contains end subroutine c_vect_axpby_a2 + subroutine c_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_vect_type), intent(inout) :: x + class(psb_c_vect_type), intent(inout) :: y + class(psb_c_vect_type), intent(inout) :: z + complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (allocated(z%v)) & + call z%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) + + end subroutine c_vect_abgdxyz + + subroutine c_vect_mlt_v(x, y, info) use psi_serial_mod implicit none diff --git a/base/modules/serial/psb_d_base_vect_mod.F90 b/base/modules/serial/psb_d_base_vect_mod.F90 index 87f5b0e4..7ad2d6e7 100644 --- a/base/modules/serial/psb_d_base_vect_mod.F90 +++ b/base/modules/serial/psb_d_base_vect_mod.F90 @@ -155,6 +155,8 @@ module psb_d_base_vect_mod procedure, pass(z) :: axpby_v2 => d_base_axpby_v2 procedure, pass(z) :: axpby_a2 => d_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: abgdxyz => d_base_abgdxyz + ! ! Vector by vector multiplication. Need all variants ! to handle multiple requirements from preconditioners @@ -1133,6 +1135,23 @@ contains end subroutine d_base_axpby_a2 + subroutine d_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_base_vect_type), intent(inout) :: z + real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev()) call x%sync() + + call y%axpby(m,alpha,x,beta,info) + call z%axpby(m,gamma,y,delta,info) + + end subroutine d_base_abgdxyz + ! ! Multiple variants of two operations: diff --git a/base/modules/serial/psb_d_vect_mod.F90 b/base/modules/serial/psb_d_vect_mod.F90 index 88fa3262..ef75be87 100644 --- a/base/modules/serial/psb_d_vect_mod.F90 +++ b/base/modules/serial/psb_d_vect_mod.F90 @@ -102,6 +102,8 @@ module psb_d_vect_mod procedure, pass(z) :: axpby_v2 => d_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => d_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: abgdxyz => d_vect_abgdxyz + procedure, pass(y) :: mlt_v => d_vect_mlt_v procedure, pass(y) :: mlt_a => d_vect_mlt_a procedure, pass(z) :: mlt_a_2 => d_vect_mlt_a_2 @@ -778,6 +780,22 @@ contains end subroutine d_vect_axpby_a2 + subroutine d_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_vect_type), intent(inout) :: x + class(psb_d_vect_type), intent(inout) :: y + class(psb_d_vect_type), intent(inout) :: z + real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (allocated(z%v)) & + call z%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) + + end subroutine d_vect_abgdxyz + + subroutine d_vect_mlt_v(x, y, info) use psi_serial_mod implicit none diff --git a/base/modules/serial/psb_s_base_vect_mod.F90 b/base/modules/serial/psb_s_base_vect_mod.F90 index fccd846b..4e9c0dd3 100644 --- a/base/modules/serial/psb_s_base_vect_mod.F90 +++ b/base/modules/serial/psb_s_base_vect_mod.F90 @@ -155,6 +155,8 @@ module psb_s_base_vect_mod procedure, pass(z) :: axpby_v2 => s_base_axpby_v2 procedure, pass(z) :: axpby_a2 => s_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: abgdxyz => s_base_abgdxyz + ! ! Vector by vector multiplication. Need all variants ! to handle multiple requirements from preconditioners @@ -1133,6 +1135,23 @@ contains end subroutine s_base_axpby_a2 + subroutine s_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + class(psb_s_base_vect_type), intent(inout) :: z + real(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev()) call x%sync() + + call y%axpby(m,alpha,x,beta,info) + call z%axpby(m,gamma,y,delta,info) + + end subroutine s_base_abgdxyz + ! ! Multiple variants of two operations: diff --git a/base/modules/serial/psb_s_vect_mod.F90 b/base/modules/serial/psb_s_vect_mod.F90 index 7a54ecf0..34479856 100644 --- a/base/modules/serial/psb_s_vect_mod.F90 +++ b/base/modules/serial/psb_s_vect_mod.F90 @@ -102,6 +102,8 @@ module psb_s_vect_mod procedure, pass(z) :: axpby_v2 => s_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => s_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: abgdxyz => s_vect_abgdxyz + procedure, pass(y) :: mlt_v => s_vect_mlt_v procedure, pass(y) :: mlt_a => s_vect_mlt_a procedure, pass(z) :: mlt_a_2 => s_vect_mlt_a_2 @@ -778,6 +780,22 @@ contains end subroutine s_vect_axpby_a2 + subroutine s_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_vect_type), intent(inout) :: x + class(psb_s_vect_type), intent(inout) :: y + class(psb_s_vect_type), intent(inout) :: z + real(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (allocated(z%v)) & + call z%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) + + end subroutine s_vect_abgdxyz + + subroutine s_vect_mlt_v(x, y, info) use psi_serial_mod implicit none diff --git a/base/modules/serial/psb_z_base_vect_mod.F90 b/base/modules/serial/psb_z_base_vect_mod.F90 index 2a14de21..60c3c854 100644 --- a/base/modules/serial/psb_z_base_vect_mod.F90 +++ b/base/modules/serial/psb_z_base_vect_mod.F90 @@ -155,6 +155,8 @@ module psb_z_base_vect_mod procedure, pass(z) :: axpby_v2 => z_base_axpby_v2 procedure, pass(z) :: axpby_a2 => z_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: abgdxyz => z_base_abgdxyz + ! ! Vector by vector multiplication. Need all variants ! to handle multiple requirements from preconditioners @@ -1126,6 +1128,23 @@ contains end subroutine z_base_axpby_a2 + subroutine z_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + class(psb_z_base_vect_type), intent(inout) :: z + complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev()) call x%sync() + + call y%axpby(m,alpha,x,beta,info) + call z%axpby(m,gamma,y,delta,info) + + end subroutine z_base_abgdxyz + ! ! Multiple variants of two operations: diff --git a/base/modules/serial/psb_z_vect_mod.F90 b/base/modules/serial/psb_z_vect_mod.F90 index e8a34859..54ddfebe 100644 --- a/base/modules/serial/psb_z_vect_mod.F90 +++ b/base/modules/serial/psb_z_vect_mod.F90 @@ -102,6 +102,8 @@ module psb_z_vect_mod procedure, pass(z) :: axpby_v2 => z_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => z_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: abgdxyz => z_vect_abgdxyz + procedure, pass(y) :: mlt_v => z_vect_mlt_v procedure, pass(y) :: mlt_a => z_vect_mlt_a procedure, pass(z) :: mlt_a_2 => z_vect_mlt_a_2 @@ -771,6 +773,22 @@ contains end subroutine z_vect_axpby_a2 + subroutine z_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_vect_type), intent(inout) :: x + class(psb_z_vect_type), intent(inout) :: y + class(psb_z_vect_type), intent(inout) :: z + complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (allocated(z%v)) & + call z%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) + + end subroutine z_vect_abgdxyz + + subroutine z_vect_mlt_v(x, y, info) use psi_serial_mod implicit none diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 index c140dadb..db988e56 100644 --- a/cuda/psb_c_cuda_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -90,6 +90,7 @@ module psb_c_cuda_vect_mod procedure, pass(x) :: dot_a => c_cuda_dot_a procedure, pass(y) :: axpby_v => c_cuda_axpby_v procedure, pass(y) :: axpby_a => c_cuda_axpby_a + procedure, pass(z) :: abgdxyz => c_cuda_abgdxyz procedure, pass(y) :: mlt_v => c_cuda_mlt_v procedure, pass(y) :: mlt_a => c_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => c_cuda_mlt_a_2 @@ -911,6 +912,27 @@ contains end subroutine c_cuda_axpby_v + + subroutine c_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + class(psb_c_vect_cuda), intent(inout) :: z + complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + call z%psb_c_base_vect_type(m,alpha,beta,gamma,delta,x,y,info) +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ +!!$ call y%axpby(m,alpha,x,beta,info) +!!$ call z%axpby(m,gamma,y,delta,info) + + end subroutine c_cuda_abgdxyz + + subroutine c_cuda_axpby_a(m,alpha, x, beta, y, info) use psi_serial_mod implicit none diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 index 44381c99..7f84807b 100644 --- a/cuda/psb_d_cuda_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -90,6 +90,7 @@ module psb_d_cuda_vect_mod procedure, pass(x) :: dot_a => d_cuda_dot_a procedure, pass(y) :: axpby_v => d_cuda_axpby_v procedure, pass(y) :: axpby_a => d_cuda_axpby_a + procedure, pass(z) :: abgdxyz => d_cuda_abgdxyz procedure, pass(y) :: mlt_v => d_cuda_mlt_v procedure, pass(y) :: mlt_a => d_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => d_cuda_mlt_a_2 @@ -911,6 +912,27 @@ contains end subroutine d_cuda_axpby_v + + subroutine d_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_vect_cuda), intent(inout) :: z + real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + call z%psb_d_base_vect_type(m,alpha,beta,gamma,delta,x,y,info) +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ +!!$ call y%axpby(m,alpha,x,beta,info) +!!$ call z%axpby(m,gamma,y,delta,info) + + end subroutine d_cuda_abgdxyz + + subroutine d_cuda_axpby_a(m,alpha, x, beta, y, info) use psi_serial_mod implicit none diff --git a/cuda/psb_s_cuda_vect_mod.F90 b/cuda/psb_s_cuda_vect_mod.F90 index 7778eb50..8858c6d9 100644 --- a/cuda/psb_s_cuda_vect_mod.F90 +++ b/cuda/psb_s_cuda_vect_mod.F90 @@ -90,6 +90,7 @@ module psb_s_cuda_vect_mod procedure, pass(x) :: dot_a => s_cuda_dot_a procedure, pass(y) :: axpby_v => s_cuda_axpby_v procedure, pass(y) :: axpby_a => s_cuda_axpby_a + procedure, pass(z) :: abgdxyz => s_cuda_abgdxyz procedure, pass(y) :: mlt_v => s_cuda_mlt_v procedure, pass(y) :: mlt_a => s_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => s_cuda_mlt_a_2 @@ -911,6 +912,27 @@ contains end subroutine s_cuda_axpby_v + + subroutine s_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + class(psb_s_vect_cuda), intent(inout) :: z + real(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + call z%psb_s_base_vect_type(m,alpha,beta,gamma,delta,x,y,info) +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ +!!$ call y%axpby(m,alpha,x,beta,info) +!!$ call z%axpby(m,gamma,y,delta,info) + + end subroutine s_cuda_abgdxyz + + subroutine s_cuda_axpby_a(m,alpha, x, beta, y, info) use psi_serial_mod implicit none diff --git a/cuda/psb_z_cuda_vect_mod.F90 b/cuda/psb_z_cuda_vect_mod.F90 index 53484911..a7243ff9 100644 --- a/cuda/psb_z_cuda_vect_mod.F90 +++ b/cuda/psb_z_cuda_vect_mod.F90 @@ -90,6 +90,7 @@ module psb_z_cuda_vect_mod procedure, pass(x) :: dot_a => z_cuda_dot_a procedure, pass(y) :: axpby_v => z_cuda_axpby_v procedure, pass(y) :: axpby_a => z_cuda_axpby_a + procedure, pass(z) :: abgdxyz => z_cuda_abgdxyz procedure, pass(y) :: mlt_v => z_cuda_mlt_v procedure, pass(y) :: mlt_a => z_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => z_cuda_mlt_a_2 @@ -911,6 +912,27 @@ contains end subroutine z_cuda_axpby_v + + subroutine z_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + class(psb_z_vect_cuda), intent(inout) :: z + complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + call z%psb_z_base_vect_type(m,alpha,beta,gamma,delta,x,y,info) +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ +!!$ call y%axpby(m,alpha,x,beta,info) +!!$ call z%axpby(m,gamma,y,delta,info) + + end subroutine z_cuda_abgdxyz + + subroutine z_cuda_axpby_a(m,alpha, x, beta, y, info) use psi_serial_mod implicit none diff --git a/test/pargen/psb_d_pde3d.F90 b/test/pargen/psb_d_pde3d.F90 index 4748569c..6e895c00 100644 --- a/test/pargen/psb_d_pde3d.F90 +++ b/test/pargen/psb_d_pde3d.F90 @@ -592,9 +592,9 @@ contains t1 = psb_wtime() if (info == psb_success_) then if (present(amold)) then - call psb_spasb(a,desc_a,info,mold=amold,bld_and=.false.) + call psb_spasb(a,desc_a,info,mold=amold) else - call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=.false.) + call psb_spasb(a,desc_a,info,afmt=afmt) end if end if call psb_barrier(ctxt) @@ -868,8 +868,8 @@ program psb_d_pde3d call psb_errpush(info,name,a_err=ch_err) goto 9999 end if - call psb_print_timers(ctxt) - call psb_exit(ctxt) + + call psb_exit(ctxt) stop 9999 call psb_error(ctxt) From 45f00e6e1963142d6532a02007c910d5ab752e97 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 12 Feb 2024 15:10:58 +0100 Subject: [PATCH 036/116] Fixed comments --- base/modules/serial/psb_c_base_vect_mod.F90 | 23 ++++++++++++++++++--- base/modules/serial/psb_d_base_vect_mod.F90 | 23 ++++++++++++++++++--- base/modules/serial/psb_s_base_vect_mod.F90 | 23 ++++++++++++++++++--- base/modules/serial/psb_z_base_vect_mod.F90 | 23 ++++++++++++++++++--- 4 files changed, 80 insertions(+), 12 deletions(-) diff --git a/base/modules/serial/psb_c_base_vect_mod.F90 b/base/modules/serial/psb_c_base_vect_mod.F90 index e4f398a7..793df3bc 100644 --- a/base/modules/serial/psb_c_base_vect_mod.F90 +++ b/base/modules/serial/psb_c_base_vect_mod.F90 @@ -1020,7 +1020,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine c_base_axpby_v(m,alpha, x, beta, y, info) @@ -1049,7 +1049,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param y The class(base_vect) to be added !! \param z The class(base_vect) to be returned !! \param info return code @@ -1080,7 +1080,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x(:) The array to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine c_base_axpby_a(m,alpha, x, beta, y, info) @@ -1128,6 +1128,23 @@ contains end subroutine c_base_axpby_a2 + ! + ! ABGDXYZ is invoked via Z, hence the structure below. + ! + ! + !> Function base_abgdxyz + !! \memberof psb_c_base_vect_type + !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \param m Number of entries to be considered + !! \param alpha scalar alpha + !! \param beta scalar beta + !! \param gamma scalar gamma + !! \param delta scalar delta + !! \param x The class(base_vect) to be added + !! \param y The class(base_vect) to be added + !! \param z The class(base_vect) to be added + !! \param info return code + !! subroutine c_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none diff --git a/base/modules/serial/psb_d_base_vect_mod.F90 b/base/modules/serial/psb_d_base_vect_mod.F90 index 7ad2d6e7..29a2ccd8 100644 --- a/base/modules/serial/psb_d_base_vect_mod.F90 +++ b/base/modules/serial/psb_d_base_vect_mod.F90 @@ -1027,7 +1027,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine d_base_axpby_v(m,alpha, x, beta, y, info) @@ -1056,7 +1056,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param y The class(base_vect) to be added !! \param z The class(base_vect) to be returned !! \param info return code @@ -1087,7 +1087,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x(:) The array to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine d_base_axpby_a(m,alpha, x, beta, y, info) @@ -1135,6 +1135,23 @@ contains end subroutine d_base_axpby_a2 + ! + ! ABGDXYZ is invoked via Z, hence the structure below. + ! + ! + !> Function base_abgdxyz + !! \memberof psb_d_base_vect_type + !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \param m Number of entries to be considered + !! \param alpha scalar alpha + !! \param beta scalar beta + !! \param gamma scalar gamma + !! \param delta scalar delta + !! \param x The class(base_vect) to be added + !! \param y The class(base_vect) to be added + !! \param z The class(base_vect) to be added + !! \param info return code + !! subroutine d_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none diff --git a/base/modules/serial/psb_s_base_vect_mod.F90 b/base/modules/serial/psb_s_base_vect_mod.F90 index 4e9c0dd3..61ae27d2 100644 --- a/base/modules/serial/psb_s_base_vect_mod.F90 +++ b/base/modules/serial/psb_s_base_vect_mod.F90 @@ -1027,7 +1027,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine s_base_axpby_v(m,alpha, x, beta, y, info) @@ -1056,7 +1056,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param y The class(base_vect) to be added !! \param z The class(base_vect) to be returned !! \param info return code @@ -1087,7 +1087,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x(:) The array to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine s_base_axpby_a(m,alpha, x, beta, y, info) @@ -1135,6 +1135,23 @@ contains end subroutine s_base_axpby_a2 + ! + ! ABGDXYZ is invoked via Z, hence the structure below. + ! + ! + !> Function base_abgdxyz + !! \memberof psb_s_base_vect_type + !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \param m Number of entries to be considered + !! \param alpha scalar alpha + !! \param beta scalar beta + !! \param gamma scalar gamma + !! \param delta scalar delta + !! \param x The class(base_vect) to be added + !! \param y The class(base_vect) to be added + !! \param z The class(base_vect) to be added + !! \param info return code + !! subroutine s_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none diff --git a/base/modules/serial/psb_z_base_vect_mod.F90 b/base/modules/serial/psb_z_base_vect_mod.F90 index 60c3c854..53f3ea8e 100644 --- a/base/modules/serial/psb_z_base_vect_mod.F90 +++ b/base/modules/serial/psb_z_base_vect_mod.F90 @@ -1020,7 +1020,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine z_base_axpby_v(m,alpha, x, beta, y, info) @@ -1049,7 +1049,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param y The class(base_vect) to be added !! \param z The class(base_vect) to be returned !! \param info return code @@ -1080,7 +1080,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x(:) The array to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine z_base_axpby_a(m,alpha, x, beta, y, info) @@ -1128,6 +1128,23 @@ contains end subroutine z_base_axpby_a2 + ! + ! ABGDXYZ is invoked via Z, hence the structure below. + ! + ! + !> Function base_abgdxyz + !! \memberof psb_z_base_vect_type + !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \param m Number of entries to be considered + !! \param alpha scalar alpha + !! \param beta scalar beta + !! \param gamma scalar gamma + !! \param delta scalar delta + !! \param x The class(base_vect) to be added + !! \param y The class(base_vect) to be added + !! \param z The class(base_vect) to be added + !! \param info return code + !! subroutine z_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none From ebc7c6b3b40fe21705db6f4d148128ae14410707 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Mon, 12 Feb 2024 16:29:48 +0100 Subject: [PATCH 037/116] Fix call to base%abgdxyz --- cuda/psb_c_cuda_vect_mod.F90 | 2 +- cuda/psb_d_cuda_vect_mod.F90 | 2 +- cuda/psb_s_cuda_vect_mod.F90 | 2 +- cuda/psb_z_cuda_vect_mod.F90 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 index db988e56..56cc80e6 100644 --- a/cuda/psb_c_cuda_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -923,7 +923,7 @@ contains complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - call z%psb_c_base_vect_type(m,alpha,beta,gamma,delta,x,y,info) + call z%psb_c_base_vect_type%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) !!$ !!$ if (x%is_dev()) call x%sync() !!$ diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 index 7f84807b..03e65f91 100644 --- a/cuda/psb_d_cuda_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -923,7 +923,7 @@ contains real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - call z%psb_d_base_vect_type(m,alpha,beta,gamma,delta,x,y,info) + call z%psb_d_base_vect_type%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) !!$ !!$ if (x%is_dev()) call x%sync() !!$ diff --git a/cuda/psb_s_cuda_vect_mod.F90 b/cuda/psb_s_cuda_vect_mod.F90 index 8858c6d9..9616b3a6 100644 --- a/cuda/psb_s_cuda_vect_mod.F90 +++ b/cuda/psb_s_cuda_vect_mod.F90 @@ -923,7 +923,7 @@ contains real(psb_spk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - call z%psb_s_base_vect_type(m,alpha,beta,gamma,delta,x,y,info) + call z%psb_s_base_vect_type%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) !!$ !!$ if (x%is_dev()) call x%sync() !!$ diff --git a/cuda/psb_z_cuda_vect_mod.F90 b/cuda/psb_z_cuda_vect_mod.F90 index a7243ff9..1153fc61 100644 --- a/cuda/psb_z_cuda_vect_mod.F90 +++ b/cuda/psb_z_cuda_vect_mod.F90 @@ -923,7 +923,7 @@ contains complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - call z%psb_z_base_vect_type(m,alpha,beta,gamma,delta,x,y,info) + call z%psb_z_base_vect_type%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) !!$ !!$ if (x%is_dev()) call x%sync() !!$ From 83ededd02b36c458c2c2ca23ac50b570d6623c8e Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 13 Feb 2024 12:54:37 +0100 Subject: [PATCH 038/116] Implementatino of abgd_xyz --- base/modules/auxil/psi_c_serial_mod.f90 | 13 ++ base/modules/auxil/psi_d_serial_mod.f90 | 13 ++ base/modules/auxil/psi_e_serial_mod.f90 | 13 ++ base/modules/auxil/psi_i2_serial_mod.f90 | 13 ++ base/modules/auxil/psi_m_serial_mod.f90 | 13 ++ base/modules/auxil/psi_s_serial_mod.f90 | 13 ++ base/modules/auxil/psi_z_serial_mod.f90 | 13 ++ base/serial/psi_c_serial_impl.F90 | 225 +++++++++++++++++++++++ base/serial/psi_d_serial_impl.F90 | 225 +++++++++++++++++++++++ base/serial/psi_e_serial_impl.F90 | 225 +++++++++++++++++++++++ base/serial/psi_i2_serial_impl.F90 | 225 +++++++++++++++++++++++ base/serial/psi_m_serial_impl.F90 | 225 +++++++++++++++++++++++ base/serial/psi_s_serial_impl.F90 | 225 +++++++++++++++++++++++ base/serial/psi_z_serial_impl.F90 | 225 +++++++++++++++++++++++ 14 files changed, 1666 insertions(+) diff --git a/base/modules/auxil/psi_c_serial_mod.f90 b/base/modules/auxil/psi_c_serial_mod.f90 index 0fdff04b..6926d6bd 100644 --- a/base/modules/auxil/psi_c_serial_mod.f90 +++ b/base/modules/auxil/psi_c_serial_mod.f90 @@ -99,6 +99,19 @@ module psi_c_serial_mod end subroutine psi_caxpbyv2 end interface psb_geaxpby + interface psi_abgdxyz + subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_spk_ + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_spk_), intent (in) :: x(:) + complex(psb_spk_), intent (inout) :: y(:) + complex(psb_spk_), intent (inout) :: z(:) + complex(psb_spk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_cabgdxyz + end interface psi_abgdxyz + interface psi_gth subroutine psi_cgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_spk_ diff --git a/base/modules/auxil/psi_d_serial_mod.f90 b/base/modules/auxil/psi_d_serial_mod.f90 index 0ce14dbb..42185d21 100644 --- a/base/modules/auxil/psi_d_serial_mod.f90 +++ b/base/modules/auxil/psi_d_serial_mod.f90 @@ -99,6 +99,19 @@ module psi_d_serial_mod end subroutine psi_daxpbyv2 end interface psb_geaxpby + interface psi_abgdxyz + subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_dpk_ + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_dpk_), intent (in) :: x(:) + real(psb_dpk_), intent (inout) :: y(:) + real(psb_dpk_), intent (inout) :: z(:) + real(psb_dpk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_dabgdxyz + end interface psi_abgdxyz + interface psi_gth subroutine psi_dgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_dpk_ diff --git a/base/modules/auxil/psi_e_serial_mod.f90 b/base/modules/auxil/psi_e_serial_mod.f90 index f0372e01..ffba06fd 100644 --- a/base/modules/auxil/psi_e_serial_mod.f90 +++ b/base/modules/auxil/psi_e_serial_mod.f90 @@ -99,6 +99,19 @@ module psi_e_serial_mod end subroutine psi_eaxpbyv2 end interface psb_geaxpby + interface psi_abgdxyz + subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_epk_), intent (in) :: x(:) + integer(psb_epk_), intent (inout) :: y(:) + integer(psb_epk_), intent (inout) :: z(:) + integer(psb_epk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_eabgdxyz + end interface psi_abgdxyz + interface psi_gth subroutine psi_egthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ diff --git a/base/modules/auxil/psi_i2_serial_mod.f90 b/base/modules/auxil/psi_i2_serial_mod.f90 index 70dd95e1..d61a1146 100644 --- a/base/modules/auxil/psi_i2_serial_mod.f90 +++ b/base/modules/auxil/psi_i2_serial_mod.f90 @@ -99,6 +99,19 @@ module psi_i2_serial_mod end subroutine psi_i2axpbyv2 end interface psb_geaxpby + interface psi_abgdxyz + subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_i2pk_), intent (in) :: x(:) + integer(psb_i2pk_), intent (inout) :: y(:) + integer(psb_i2pk_), intent (inout) :: z(:) + integer(psb_i2pk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_i2abgdxyz + end interface psi_abgdxyz + interface psi_gth subroutine psi_i2gthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ diff --git a/base/modules/auxil/psi_m_serial_mod.f90 b/base/modules/auxil/psi_m_serial_mod.f90 index cfd1348e..76131d75 100644 --- a/base/modules/auxil/psi_m_serial_mod.f90 +++ b/base/modules/auxil/psi_m_serial_mod.f90 @@ -99,6 +99,19 @@ module psi_m_serial_mod end subroutine psi_maxpbyv2 end interface psb_geaxpby + interface psi_abgdxyz + subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_mpk_), intent (in) :: x(:) + integer(psb_mpk_), intent (inout) :: y(:) + integer(psb_mpk_), intent (inout) :: z(:) + integer(psb_mpk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_mabgdxyz + end interface psi_abgdxyz + interface psi_gth subroutine psi_mgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ diff --git a/base/modules/auxil/psi_s_serial_mod.f90 b/base/modules/auxil/psi_s_serial_mod.f90 index 25c4a7ef..02b96311 100644 --- a/base/modules/auxil/psi_s_serial_mod.f90 +++ b/base/modules/auxil/psi_s_serial_mod.f90 @@ -99,6 +99,19 @@ module psi_s_serial_mod end subroutine psi_saxpbyv2 end interface psb_geaxpby + interface psi_abgdxyz + subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_spk_ + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_spk_), intent (in) :: x(:) + real(psb_spk_), intent (inout) :: y(:) + real(psb_spk_), intent (inout) :: z(:) + real(psb_spk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_sabgdxyz + end interface psi_abgdxyz + interface psi_gth subroutine psi_sgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_spk_ diff --git a/base/modules/auxil/psi_z_serial_mod.f90 b/base/modules/auxil/psi_z_serial_mod.f90 index b40cf05a..a86bdd70 100644 --- a/base/modules/auxil/psi_z_serial_mod.f90 +++ b/base/modules/auxil/psi_z_serial_mod.f90 @@ -99,6 +99,19 @@ module psi_z_serial_mod end subroutine psi_zaxpbyv2 end interface psb_geaxpby + interface psi_abgdxyz + subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_dpk_ + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_dpk_), intent (in) :: x(:) + complex(psb_dpk_), intent (inout) :: y(:) + complex(psb_dpk_), intent (inout) :: z(:) + complex(psb_dpk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_zabgdxyz + end interface psi_abgdxyz + interface psi_gth subroutine psi_zgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_dpk_ diff --git a/base/serial/psi_c_serial_impl.F90 b/base/serial/psi_c_serial_impl.F90 index a3898349..129e8484 100644 --- a/base/serial/psi_c_serial_impl.F90 +++ b/base/serial/psi_c_serial_impl.F90 @@ -1567,3 +1567,228 @@ subroutine caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine caxpbyv2 + +subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_spk_), intent (in) :: x(:) + complex(psb_spk_), intent (inout) :: y(:) + complex(psb_spk_), intent (inout) :: z(:) + complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='cabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == czero) then + if (gamma == czero) then + if (alpha == czero) then + if (delta == czero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = czero + z(i) = czero + end do + else if (delta /= czero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = czero + z(i) = delta*z(i) + end do + end if + else if (alpha /= czero) then + if (delta == czero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = czero + end do + else if (delta /= czero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= czero) then + + if (alpha == czero) then + + if (delta == czero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = czero + z(i) = czero ! gamma*y(i) + end do + + else if (delta /= czero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = czero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= czero) then + + if (delta == czero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= czero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= czero) then + + if (gamma == czero) then + if (alpha == czero) then + if (delta == czero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = czero + end do + + else if (delta /= czero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= czero) then + if (delta == czero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = czero + end do + + else if (delta /= czero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= czero) then + if (alpha == czero) then + if (delta == czero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= czero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= czero) then + if (delta == czero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= czero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_cabgdxyz diff --git a/base/serial/psi_d_serial_impl.F90 b/base/serial/psi_d_serial_impl.F90 index 1b5b1442..bd0c82df 100644 --- a/base/serial/psi_d_serial_impl.F90 +++ b/base/serial/psi_d_serial_impl.F90 @@ -1567,3 +1567,228 @@ subroutine daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine daxpbyv2 + +subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_dpk_), intent (in) :: x(:) + real(psb_dpk_), intent (inout) :: y(:) + real(psb_dpk_), intent (inout) :: z(:) + real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='dabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == dzero) then + if (gamma == dzero) then + if (alpha == dzero) then + if (delta == dzero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = dzero + z(i) = dzero + end do + else if (delta /= dzero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = dzero + z(i) = delta*z(i) + end do + end if + else if (alpha /= dzero) then + if (delta == dzero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = dzero + end do + else if (delta /= dzero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= dzero) then + + if (alpha == dzero) then + + if (delta == dzero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = dzero + z(i) = dzero ! gamma*y(i) + end do + + else if (delta /= dzero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = dzero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= dzero) then + + if (delta == dzero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= dzero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= dzero) then + + if (gamma == dzero) then + if (alpha == dzero) then + if (delta == dzero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = dzero + end do + + else if (delta /= dzero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= dzero) then + if (delta == dzero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = dzero + end do + + else if (delta /= dzero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= dzero) then + if (alpha == dzero) then + if (delta == dzero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= dzero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= dzero) then + if (delta == dzero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= dzero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_dabgdxyz diff --git a/base/serial/psi_e_serial_impl.F90 b/base/serial/psi_e_serial_impl.F90 index 9cdcdf0e..8b17aeb8 100644 --- a/base/serial/psi_e_serial_impl.F90 +++ b/base/serial/psi_e_serial_impl.F90 @@ -1567,3 +1567,228 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine eaxpbyv2 + +subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_epk_), intent (in) :: x(:) + integer(psb_epk_), intent (inout) :: y(:) + integer(psb_epk_), intent (inout) :: z(:) + integer(psb_epk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='eabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == ezero) then + if (gamma == ezero) then + if (alpha == ezero) then + if (delta == ezero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = ezero + z(i) = ezero + end do + else if (delta /= ezero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = ezero + z(i) = delta*z(i) + end do + end if + else if (alpha /= ezero) then + if (delta == ezero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = ezero + end do + else if (delta /= ezero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= ezero) then + + if (alpha == ezero) then + + if (delta == ezero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = ezero + z(i) = ezero ! gamma*y(i) + end do + + else if (delta /= ezero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = ezero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= ezero) then + + if (delta == ezero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= ezero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= ezero) then + + if (gamma == ezero) then + if (alpha == ezero) then + if (delta == ezero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = ezero + end do + + else if (delta /= ezero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= ezero) then + if (delta == ezero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = ezero + end do + + else if (delta /= ezero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= ezero) then + if (alpha == ezero) then + if (delta == ezero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= ezero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= ezero) then + if (delta == ezero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= ezero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_eabgdxyz diff --git a/base/serial/psi_i2_serial_impl.F90 b/base/serial/psi_i2_serial_impl.F90 index d25617a9..9a2c36c6 100644 --- a/base/serial/psi_i2_serial_impl.F90 +++ b/base/serial/psi_i2_serial_impl.F90 @@ -1567,3 +1567,228 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine i2axpbyv2 + +subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_i2pk_), intent (in) :: x(:) + integer(psb_i2pk_), intent (inout) :: y(:) + integer(psb_i2pk_), intent (inout) :: z(:) + integer(psb_i2pk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='i2abgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == i2zero) then + if (gamma == i2zero) then + if (alpha == i2zero) then + if (delta == i2zero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = i2zero + z(i) = i2zero + end do + else if (delta /= i2zero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = i2zero + z(i) = delta*z(i) + end do + end if + else if (alpha /= i2zero) then + if (delta == i2zero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = i2zero + end do + else if (delta /= i2zero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= i2zero) then + + if (alpha == i2zero) then + + if (delta == i2zero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = i2zero + z(i) = i2zero ! gamma*y(i) + end do + + else if (delta /= i2zero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = i2zero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= i2zero) then + + if (delta == i2zero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= i2zero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= i2zero) then + + if (gamma == i2zero) then + if (alpha == i2zero) then + if (delta == i2zero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = i2zero + end do + + else if (delta /= i2zero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= i2zero) then + if (delta == i2zero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = i2zero + end do + + else if (delta /= i2zero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= i2zero) then + if (alpha == i2zero) then + if (delta == i2zero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= i2zero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= i2zero) then + if (delta == i2zero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= i2zero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_i2abgdxyz diff --git a/base/serial/psi_m_serial_impl.F90 b/base/serial/psi_m_serial_impl.F90 index 05c8e60f..dd114a45 100644 --- a/base/serial/psi_m_serial_impl.F90 +++ b/base/serial/psi_m_serial_impl.F90 @@ -1567,3 +1567,228 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine maxpbyv2 + +subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_mpk_), intent (in) :: x(:) + integer(psb_mpk_), intent (inout) :: y(:) + integer(psb_mpk_), intent (inout) :: z(:) + integer(psb_mpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='mabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == mzero) then + if (gamma == mzero) then + if (alpha == mzero) then + if (delta == mzero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = mzero + z(i) = mzero + end do + else if (delta /= mzero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = mzero + z(i) = delta*z(i) + end do + end if + else if (alpha /= mzero) then + if (delta == mzero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = mzero + end do + else if (delta /= mzero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= mzero) then + + if (alpha == mzero) then + + if (delta == mzero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = mzero + z(i) = mzero ! gamma*y(i) + end do + + else if (delta /= mzero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = mzero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= mzero) then + + if (delta == mzero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= mzero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= mzero) then + + if (gamma == mzero) then + if (alpha == mzero) then + if (delta == mzero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = mzero + end do + + else if (delta /= mzero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= mzero) then + if (delta == mzero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = mzero + end do + + else if (delta /= mzero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= mzero) then + if (alpha == mzero) then + if (delta == mzero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= mzero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= mzero) then + if (delta == mzero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= mzero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_mabgdxyz diff --git a/base/serial/psi_s_serial_impl.F90 b/base/serial/psi_s_serial_impl.F90 index 26a57e68..8e2dda0f 100644 --- a/base/serial/psi_s_serial_impl.F90 +++ b/base/serial/psi_s_serial_impl.F90 @@ -1567,3 +1567,228 @@ subroutine saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine saxpbyv2 + +subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_spk_), intent (in) :: x(:) + real(psb_spk_), intent (inout) :: y(:) + real(psb_spk_), intent (inout) :: z(:) + real(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='sabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == szero) then + if (gamma == szero) then + if (alpha == szero) then + if (delta == szero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = szero + z(i) = szero + end do + else if (delta /= szero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = szero + z(i) = delta*z(i) + end do + end if + else if (alpha /= szero) then + if (delta == szero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = szero + end do + else if (delta /= szero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= szero) then + + if (alpha == szero) then + + if (delta == szero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = szero + z(i) = szero ! gamma*y(i) + end do + + else if (delta /= szero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = szero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= szero) then + + if (delta == szero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= szero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= szero) then + + if (gamma == szero) then + if (alpha == szero) then + if (delta == szero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = szero + end do + + else if (delta /= szero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= szero) then + if (delta == szero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = szero + end do + + else if (delta /= szero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= szero) then + if (alpha == szero) then + if (delta == szero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= szero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= szero) then + if (delta == szero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= szero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_sabgdxyz diff --git a/base/serial/psi_z_serial_impl.F90 b/base/serial/psi_z_serial_impl.F90 index 0b15b2d6..c6a7e01d 100644 --- a/base/serial/psi_z_serial_impl.F90 +++ b/base/serial/psi_z_serial_impl.F90 @@ -1567,3 +1567,228 @@ subroutine zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine zaxpbyv2 + +subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_dpk_), intent (in) :: x(:) + complex(psb_dpk_), intent (inout) :: y(:) + complex(psb_dpk_), intent (inout) :: z(:) + complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='zabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == zzero) then + if (gamma == zzero) then + if (alpha == zzero) then + if (delta == zzero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = zzero + z(i) = zzero + end do + else if (delta /= zzero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = zzero + z(i) = delta*z(i) + end do + end if + else if (alpha /= zzero) then + if (delta == zzero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = zzero + end do + else if (delta /= zzero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= zzero) then + + if (alpha == zzero) then + + if (delta == zzero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = zzero + z(i) = zzero ! gamma*y(i) + end do + + else if (delta /= zzero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = zzero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= zzero) then + + if (delta == zzero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= zzero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= zzero) then + + if (gamma == zzero) then + if (alpha == zzero) then + if (delta == zzero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = zzero + end do + + else if (delta /= zzero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= zzero) then + if (delta == zzero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = zzero + end do + + else if (delta /= zzero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= zzero) then + if (alpha == zzero) then + if (delta == zzero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= zzero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= zzero) then + if (delta == zzero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= zzero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_zabgdxyz From 6c53b6ec79dce2d6d1d0c985f11f015622b9f7fd Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 13 Feb 2024 15:48:43 +0100 Subject: [PATCH 039/116] Fix typo in interface for psb_abgdxyz --- base/modules/psblas/psb_c_psblas_mod.F90 | 4 ++-- base/modules/psblas/psb_d_psblas_mod.F90 | 4 ++-- base/modules/psblas/psb_s_psblas_mod.F90 | 4 ++-- base/modules/psblas/psb_z_psblas_mod.F90 | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/base/modules/psblas/psb_c_psblas_mod.F90 b/base/modules/psblas/psb_c_psblas_mod.F90 index d660597a..7f7f937c 100644 --- a/base/modules/psblas/psb_c_psblas_mod.F90 +++ b/base/modules/psblas/psb_c_psblas_mod.F90 @@ -143,7 +143,7 @@ module psb_c_psblas_mod end subroutine psb_caxpby end interface - interface psb_abgdxyx + interface psb_abgdxyz subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_spk_, psb_ipk_, & @@ -155,7 +155,7 @@ module psb_c_psblas_mod type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info end subroutine psb_cabgdxyz_vect - end interface psb_abgdxyx + end interface psb_abgdxyz interface psb_geamax function psb_camax(x, desc_a, info, jx,global) diff --git a/base/modules/psblas/psb_d_psblas_mod.F90 b/base/modules/psblas/psb_d_psblas_mod.F90 index 734ed633..12090956 100644 --- a/base/modules/psblas/psb_d_psblas_mod.F90 +++ b/base/modules/psblas/psb_d_psblas_mod.F90 @@ -143,7 +143,7 @@ module psb_d_psblas_mod end subroutine psb_daxpby end interface - interface psb_abgdxyx + interface psb_abgdxyz subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_dpk_, psb_ipk_, & @@ -155,7 +155,7 @@ module psb_d_psblas_mod type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info end subroutine psb_dabgdxyz_vect - end interface psb_abgdxyx + end interface psb_abgdxyz interface psb_geamax function psb_damax(x, desc_a, info, jx,global) diff --git a/base/modules/psblas/psb_s_psblas_mod.F90 b/base/modules/psblas/psb_s_psblas_mod.F90 index 0f7d29e6..7a7ce783 100644 --- a/base/modules/psblas/psb_s_psblas_mod.F90 +++ b/base/modules/psblas/psb_s_psblas_mod.F90 @@ -143,7 +143,7 @@ module psb_s_psblas_mod end subroutine psb_saxpby end interface - interface psb_abgdxyx + interface psb_abgdxyz subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_spk_, psb_ipk_, & @@ -155,7 +155,7 @@ module psb_s_psblas_mod type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info end subroutine psb_sabgdxyz_vect - end interface psb_abgdxyx + end interface psb_abgdxyz interface psb_geamax function psb_samax(x, desc_a, info, jx,global) diff --git a/base/modules/psblas/psb_z_psblas_mod.F90 b/base/modules/psblas/psb_z_psblas_mod.F90 index 17674600..bcfe9caa 100644 --- a/base/modules/psblas/psb_z_psblas_mod.F90 +++ b/base/modules/psblas/psb_z_psblas_mod.F90 @@ -143,7 +143,7 @@ module psb_z_psblas_mod end subroutine psb_zaxpby end interface - interface psb_abgdxyx + interface psb_abgdxyz subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_dpk_, psb_ipk_, & @@ -155,7 +155,7 @@ module psb_z_psblas_mod type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info end subroutine psb_zabgdxyz_vect - end interface psb_abgdxyx + end interface psb_abgdxyz interface psb_geamax function psb_zamax(x, desc_a, info, jx,global) From 29669b56a24868b42c9bee2836fae05f9a57f480 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 13 Feb 2024 16:07:06 +0100 Subject: [PATCH 040/116] Implementation of psb_abgdxyz --- base/psblas/psb_caxpby.f90 | 82 ++++++++++++++++++++++++++++++++++++++ base/psblas/psb_daxpby.f90 | 82 ++++++++++++++++++++++++++++++++++++++ base/psblas/psb_saxpby.f90 | 82 ++++++++++++++++++++++++++++++++++++++ base/psblas/psb_zaxpby.f90 | 82 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 328 insertions(+) diff --git a/base/psblas/psb_caxpby.f90 b/base/psblas/psb_caxpby.f90 index da3dd93b..a41e6ef2 100644 --- a/base/psblas/psb_caxpby.f90 +++ b/base/psblas/psb_caxpby.f90 @@ -741,3 +741,85 @@ subroutine psb_caddconst_vect(x,b,z,desc_a,info) return end subroutine psb_caddconst_vect + + +subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + import :: psb_desc_type, psb_spk_, psb_ipk_, & + & psb_c_vect_type, psb_cspmat_type + type(psb_c_vect_type), intent (inout) :: x + type(psb_c_vect_type), intent (inout) :: y + type(psb_c_vect_type), intent (inout) :: z + complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + ! locals + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np, me,& + & err_act, iix, jjx, iiy, jjy + integer(psb_lpk_) :: ix, ijx, iy, ijy, m + character(len=20) :: name, ch_err + + name='psb_c_addconst_vect' + if (psb_errstatus_fatal()) return + info=psb_success_ + call psb_erractionsave(err_act) + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + if (np == -ione) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(z%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + ix = ione + iy = ione + + m = desc_a%get_global_rows() + + ! check vector correctness + call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 1' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 2' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if(desc_a%get_local_rows() > 0) then + call z%abgdxyz(alpha,beta,gamma,delta,x,y,info) + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_cabgdxyz_vect + diff --git a/base/psblas/psb_daxpby.f90 b/base/psblas/psb_daxpby.f90 index c386f8f2..4805727e 100644 --- a/base/psblas/psb_daxpby.f90 +++ b/base/psblas/psb_daxpby.f90 @@ -741,3 +741,85 @@ subroutine psb_daddconst_vect(x,b,z,desc_a,info) return end subroutine psb_daddconst_vect + + +subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + import :: psb_desc_type, psb_dpk_, psb_ipk_, & + & psb_d_vect_type, psb_dspmat_type + type(psb_d_vect_type), intent (inout) :: x + type(psb_d_vect_type), intent (inout) :: y + type(psb_d_vect_type), intent (inout) :: z + real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + ! locals + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np, me,& + & err_act, iix, jjx, iiy, jjy + integer(psb_lpk_) :: ix, ijx, iy, ijy, m + character(len=20) :: name, ch_err + + name='psb_d_addconst_vect' + if (psb_errstatus_fatal()) return + info=psb_success_ + call psb_erractionsave(err_act) + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + if (np == -ione) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(z%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + ix = ione + iy = ione + + m = desc_a%get_global_rows() + + ! check vector correctness + call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 1' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 2' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if(desc_a%get_local_rows() > 0) then + call z%abgdxyz(alpha,beta,gamma,delta,x,y,info) + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_dabgdxyz_vect + diff --git a/base/psblas/psb_saxpby.f90 b/base/psblas/psb_saxpby.f90 index 78f4d01a..581d64cd 100644 --- a/base/psblas/psb_saxpby.f90 +++ b/base/psblas/psb_saxpby.f90 @@ -741,3 +741,85 @@ subroutine psb_saddconst_vect(x,b,z,desc_a,info) return end subroutine psb_saddconst_vect + + +subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + import :: psb_desc_type, psb_spk_, psb_ipk_, & + & psb_s_vect_type, psb_sspmat_type + type(psb_s_vect_type), intent (inout) :: x + type(psb_s_vect_type), intent (inout) :: y + type(psb_s_vect_type), intent (inout) :: z + real(psb_spk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + ! locals + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np, me,& + & err_act, iix, jjx, iiy, jjy + integer(psb_lpk_) :: ix, ijx, iy, ijy, m + character(len=20) :: name, ch_err + + name='psb_s_addconst_vect' + if (psb_errstatus_fatal()) return + info=psb_success_ + call psb_erractionsave(err_act) + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + if (np == -ione) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(z%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + ix = ione + iy = ione + + m = desc_a%get_global_rows() + + ! check vector correctness + call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 1' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 2' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if(desc_a%get_local_rows() > 0) then + call z%abgdxyz(alpha,beta,gamma,delta,x,y,info) + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_sabgdxyz_vect + diff --git a/base/psblas/psb_zaxpby.f90 b/base/psblas/psb_zaxpby.f90 index 2258f38f..df13f242 100644 --- a/base/psblas/psb_zaxpby.f90 +++ b/base/psblas/psb_zaxpby.f90 @@ -741,3 +741,85 @@ subroutine psb_zaddconst_vect(x,b,z,desc_a,info) return end subroutine psb_zaddconst_vect + + +subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + import :: psb_desc_type, psb_dpk_, psb_ipk_, & + & psb_z_vect_type, psb_zspmat_type + type(psb_z_vect_type), intent (inout) :: x + type(psb_z_vect_type), intent (inout) :: y + type(psb_z_vect_type), intent (inout) :: z + complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + ! locals + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np, me,& + & err_act, iix, jjx, iiy, jjy + integer(psb_lpk_) :: ix, ijx, iy, ijy, m + character(len=20) :: name, ch_err + + name='psb_z_addconst_vect' + if (psb_errstatus_fatal()) return + info=psb_success_ + call psb_erractionsave(err_act) + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + if (np == -ione) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(z%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + ix = ione + iy = ione + + m = desc_a%get_global_rows() + + ! check vector correctness + call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 1' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 2' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if(desc_a%get_local_rows() > 0) then + call z%abgdxyz(alpha,beta,gamma,delta,x,y,info) + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_zabgdxyz_vect + From 5c3d5f023582a8b4773ad6df999ee038206d6954 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 13 Feb 2024 16:13:06 +0100 Subject: [PATCH 041/116] Silly bug in abgdxyz implementation --- base/psblas/psb_caxpby.f90 | 4 ++-- base/psblas/psb_daxpby.f90 | 4 ++-- base/psblas/psb_saxpby.f90 | 4 ++-- base/psblas/psb_zaxpby.f90 | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/base/psblas/psb_caxpby.f90 b/base/psblas/psb_caxpby.f90 index a41e6ef2..f19f2caf 100644 --- a/base/psblas/psb_caxpby.f90 +++ b/base/psblas/psb_caxpby.f90 @@ -745,8 +745,8 @@ end subroutine psb_caddconst_vect subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - import :: psb_desc_type, psb_spk_, psb_ipk_, & - & psb_c_vect_type, psb_cspmat_type + use psb_base_mod, psb_protect_name => psb_cabgdxyz_vect + implicit none type(psb_c_vect_type), intent (inout) :: x type(psb_c_vect_type), intent (inout) :: y type(psb_c_vect_type), intent (inout) :: z diff --git a/base/psblas/psb_daxpby.f90 b/base/psblas/psb_daxpby.f90 index 4805727e..690c5080 100644 --- a/base/psblas/psb_daxpby.f90 +++ b/base/psblas/psb_daxpby.f90 @@ -745,8 +745,8 @@ end subroutine psb_daddconst_vect subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - import :: psb_desc_type, psb_dpk_, psb_ipk_, & - & psb_d_vect_type, psb_dspmat_type + use psb_base_mod, psb_protect_name => psb_dabgdxyz_vect + implicit none type(psb_d_vect_type), intent (inout) :: x type(psb_d_vect_type), intent (inout) :: y type(psb_d_vect_type), intent (inout) :: z diff --git a/base/psblas/psb_saxpby.f90 b/base/psblas/psb_saxpby.f90 index 581d64cd..4b48f363 100644 --- a/base/psblas/psb_saxpby.f90 +++ b/base/psblas/psb_saxpby.f90 @@ -745,8 +745,8 @@ end subroutine psb_saddconst_vect subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - import :: psb_desc_type, psb_spk_, psb_ipk_, & - & psb_s_vect_type, psb_sspmat_type + use psb_base_mod, psb_protect_name => psb_sabgdxyz_vect + implicit none type(psb_s_vect_type), intent (inout) :: x type(psb_s_vect_type), intent (inout) :: y type(psb_s_vect_type), intent (inout) :: z diff --git a/base/psblas/psb_zaxpby.f90 b/base/psblas/psb_zaxpby.f90 index df13f242..6bacacda 100644 --- a/base/psblas/psb_zaxpby.f90 +++ b/base/psblas/psb_zaxpby.f90 @@ -745,8 +745,8 @@ end subroutine psb_zaddconst_vect subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - import :: psb_desc_type, psb_dpk_, psb_ipk_, & - & psb_z_vect_type, psb_zspmat_type + use psb_base_mod, psb_protect_name => psb_zabgdxyz_vect + implicit none type(psb_z_vect_type), intent (inout) :: x type(psb_z_vect_type), intent (inout) :: y type(psb_z_vect_type), intent (inout) :: z From 3121c435822da38de4e22dfe0a7c99a519728243 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 13 Feb 2024 16:16:13 +0100 Subject: [PATCH 042/116] Silly bug in abgdxyz implementation --- base/psblas/psb_caxpby.f90 | 7 ++++--- base/psblas/psb_daxpby.f90 | 7 ++++--- base/psblas/psb_saxpby.f90 | 7 ++++--- base/psblas/psb_zaxpby.f90 | 7 ++++--- 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/base/psblas/psb_caxpby.f90 b/base/psblas/psb_caxpby.f90 index f19f2caf..3351149b 100644 --- a/base/psblas/psb_caxpby.f90 +++ b/base/psblas/psb_caxpby.f90 @@ -757,7 +757,7 @@ subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np, me,& & err_act, iix, jjx, iiy, jjy - integer(psb_lpk_) :: ix, ijx, iy, ijy, m + integer(psb_lpk_) :: ix, ijx, iy, ijy, m, nr character(len=20) :: name, ch_err name='psb_c_addconst_vect' @@ -792,7 +792,8 @@ subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& ix = ione iy = ione - m = desc_a%get_global_rows() + m = desc_a%get_global_rows() + nr = desc_a%get_local_rows() ! check vector correctness call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx) @@ -811,7 +812,7 @@ subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%abgdxyz(alpha,beta,gamma,delta,x,y,info) + call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) diff --git a/base/psblas/psb_daxpby.f90 b/base/psblas/psb_daxpby.f90 index 690c5080..8d43b6ac 100644 --- a/base/psblas/psb_daxpby.f90 +++ b/base/psblas/psb_daxpby.f90 @@ -757,7 +757,7 @@ subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np, me,& & err_act, iix, jjx, iiy, jjy - integer(psb_lpk_) :: ix, ijx, iy, ijy, m + integer(psb_lpk_) :: ix, ijx, iy, ijy, m, nr character(len=20) :: name, ch_err name='psb_d_addconst_vect' @@ -792,7 +792,8 @@ subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& ix = ione iy = ione - m = desc_a%get_global_rows() + m = desc_a%get_global_rows() + nr = desc_a%get_local_rows() ! check vector correctness call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx) @@ -811,7 +812,7 @@ subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%abgdxyz(alpha,beta,gamma,delta,x,y,info) + call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) diff --git a/base/psblas/psb_saxpby.f90 b/base/psblas/psb_saxpby.f90 index 4b48f363..6a5441cd 100644 --- a/base/psblas/psb_saxpby.f90 +++ b/base/psblas/psb_saxpby.f90 @@ -757,7 +757,7 @@ subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np, me,& & err_act, iix, jjx, iiy, jjy - integer(psb_lpk_) :: ix, ijx, iy, ijy, m + integer(psb_lpk_) :: ix, ijx, iy, ijy, m, nr character(len=20) :: name, ch_err name='psb_s_addconst_vect' @@ -792,7 +792,8 @@ subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& ix = ione iy = ione - m = desc_a%get_global_rows() + m = desc_a%get_global_rows() + nr = desc_a%get_local_rows() ! check vector correctness call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx) @@ -811,7 +812,7 @@ subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%abgdxyz(alpha,beta,gamma,delta,x,y,info) + call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) diff --git a/base/psblas/psb_zaxpby.f90 b/base/psblas/psb_zaxpby.f90 index 6bacacda..75f16ea8 100644 --- a/base/psblas/psb_zaxpby.f90 +++ b/base/psblas/psb_zaxpby.f90 @@ -757,7 +757,7 @@ subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np, me,& & err_act, iix, jjx, iiy, jjy - integer(psb_lpk_) :: ix, ijx, iy, ijy, m + integer(psb_lpk_) :: ix, ijx, iy, ijy, m, nr character(len=20) :: name, ch_err name='psb_z_addconst_vect' @@ -792,7 +792,8 @@ subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& ix = ione iy = ione - m = desc_a%get_global_rows() + m = desc_a%get_global_rows() + nr = desc_a%get_local_rows() ! check vector correctness call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx) @@ -811,7 +812,7 @@ subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%abgdxyz(alpha,beta,gamma,delta,x,y,info) + call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) From 9ced67634dc17248602b5871bbc3e2aa6ccdbdb8 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Wed, 14 Feb 2024 08:52:38 +0100 Subject: [PATCH 043/116] Fix KIND for NR in axpby --- base/psblas/psb_caxpby.f90 | 4 ++-- base/psblas/psb_daxpby.f90 | 4 ++-- base/psblas/psb_saxpby.f90 | 4 ++-- base/psblas/psb_zaxpby.f90 | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/base/psblas/psb_caxpby.f90 b/base/psblas/psb_caxpby.f90 index 3351149b..7c22bb06 100644 --- a/base/psblas/psb_caxpby.f90 +++ b/base/psblas/psb_caxpby.f90 @@ -756,8 +756,8 @@ subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& ! locals type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np, me,& - & err_act, iix, jjx, iiy, jjy - integer(psb_lpk_) :: ix, ijx, iy, ijy, m, nr + & err_act, iix, jjx, iiy, jjy, nr + integer(psb_lpk_) :: ix, ijx, iy, ijy, m character(len=20) :: name, ch_err name='psb_c_addconst_vect' diff --git a/base/psblas/psb_daxpby.f90 b/base/psblas/psb_daxpby.f90 index 8d43b6ac..1de77647 100644 --- a/base/psblas/psb_daxpby.f90 +++ b/base/psblas/psb_daxpby.f90 @@ -756,8 +756,8 @@ subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& ! locals type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np, me,& - & err_act, iix, jjx, iiy, jjy - integer(psb_lpk_) :: ix, ijx, iy, ijy, m, nr + & err_act, iix, jjx, iiy, jjy, nr + integer(psb_lpk_) :: ix, ijx, iy, ijy, m character(len=20) :: name, ch_err name='psb_d_addconst_vect' diff --git a/base/psblas/psb_saxpby.f90 b/base/psblas/psb_saxpby.f90 index 6a5441cd..1b1f24e6 100644 --- a/base/psblas/psb_saxpby.f90 +++ b/base/psblas/psb_saxpby.f90 @@ -756,8 +756,8 @@ subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& ! locals type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np, me,& - & err_act, iix, jjx, iiy, jjy - integer(psb_lpk_) :: ix, ijx, iy, ijy, m, nr + & err_act, iix, jjx, iiy, jjy, nr + integer(psb_lpk_) :: ix, ijx, iy, ijy, m character(len=20) :: name, ch_err name='psb_s_addconst_vect' diff --git a/base/psblas/psb_zaxpby.f90 b/base/psblas/psb_zaxpby.f90 index 75f16ea8..0f37a1f4 100644 --- a/base/psblas/psb_zaxpby.f90 +++ b/base/psblas/psb_zaxpby.f90 @@ -756,8 +756,8 @@ subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& ! locals type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np, me,& - & err_act, iix, jjx, iiy, jjy - integer(psb_lpk_) :: ix, ijx, iy, ijy, m, nr + & err_act, iix, jjx, iiy, jjy, nr + integer(psb_lpk_) :: ix, ijx, iy, ijy, m character(len=20) :: name, ch_err name='psb_z_addconst_vect' From 4e611bb078d54f1eea74e6439db722414c9c269a Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Wed, 14 Feb 2024 15:55:55 +0100 Subject: [PATCH 044/116] Enable psi_abgdxyz --- base/modules/serial/psb_c_base_vect_mod.F90 | 15 +++++++++++---- base/modules/serial/psb_c_vect_mod.F90 | 2 +- base/modules/serial/psb_d_base_vect_mod.F90 | 15 +++++++++++---- base/modules/serial/psb_d_vect_mod.F90 | 2 +- base/modules/serial/psb_s_base_vect_mod.F90 | 15 +++++++++++---- base/modules/serial/psb_s_vect_mod.F90 | 2 +- base/modules/serial/psb_z_base_vect_mod.F90 | 15 +++++++++++---- base/modules/serial/psb_z_vect_mod.F90 | 2 +- base/serial/psi_c_serial_impl.F90 | 4 ++-- base/serial/psi_d_serial_impl.F90 | 4 ++-- base/serial/psi_e_serial_impl.F90 | 4 ++-- base/serial/psi_i2_serial_impl.F90 | 4 ++-- base/serial/psi_m_serial_impl.F90 | 4 ++-- base/serial/psi_s_serial_impl.F90 | 4 ++-- base/serial/psi_z_serial_impl.F90 | 4 ++-- 15 files changed, 62 insertions(+), 34 deletions(-) diff --git a/base/modules/serial/psb_c_base_vect_mod.F90 b/base/modules/serial/psb_c_base_vect_mod.F90 index 793df3bc..a4772103 100644 --- a/base/modules/serial/psb_c_base_vect_mod.F90 +++ b/base/modules/serial/psb_c_base_vect_mod.F90 @@ -1155,10 +1155,17 @@ contains complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - if (x%is_dev()) call x%sync() - - call y%axpby(m,alpha,x,beta,info) - call z%axpby(m,gamma,y,delta,info) + if (.false.) then + if (x%is_dev()) call x%sync() + + call y%axpby(m,alpha,x,beta,info) + call z%axpby(m,gamma,y,delta,info) + else + if (x%is_dev().and.(alpha/=czero))) call x%sync() + if (y%is_dev().and.(beta/=czero)) call y%sync() + if (z%is_dev().and.(delta/=czero)) call z%sync() + call psi_cabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + end if end subroutine c_base_abgdxyz diff --git a/base/modules/serial/psb_c_vect_mod.F90 b/base/modules/serial/psb_c_vect_mod.F90 index 8b2941ff..2eebb0da 100644 --- a/base/modules/serial/psb_c_vect_mod.F90 +++ b/base/modules/serial/psb_c_vect_mod.F90 @@ -1152,7 +1152,7 @@ contains end if end function c_vect_nrm2_weight - + function c_vect_nrm2_weight_mask(n,x,w,id,info,aux) result(res) use psi_serial_mod implicit none diff --git a/base/modules/serial/psb_d_base_vect_mod.F90 b/base/modules/serial/psb_d_base_vect_mod.F90 index 29a2ccd8..59b43fce 100644 --- a/base/modules/serial/psb_d_base_vect_mod.F90 +++ b/base/modules/serial/psb_d_base_vect_mod.F90 @@ -1162,10 +1162,17 @@ contains real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - if (x%is_dev()) call x%sync() - - call y%axpby(m,alpha,x,beta,info) - call z%axpby(m,gamma,y,delta,info) + if (.false.) then + if (x%is_dev()) call x%sync() + + call y%axpby(m,alpha,x,beta,info) + call z%axpby(m,gamma,y,delta,info) + else + if (x%is_dev().and.(alpha/=dzero))) call x%sync() + if (y%is_dev().and.(beta/=dzero)) call y%sync() + if (z%is_dev().and.(delta/=dzero)) call z%sync() + call psi_dabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + end if end subroutine d_base_abgdxyz diff --git a/base/modules/serial/psb_d_vect_mod.F90 b/base/modules/serial/psb_d_vect_mod.F90 index ef75be87..bbb966ed 100644 --- a/base/modules/serial/psb_d_vect_mod.F90 +++ b/base/modules/serial/psb_d_vect_mod.F90 @@ -1159,7 +1159,7 @@ contains end if end function d_vect_nrm2_weight - + function d_vect_nrm2_weight_mask(n,x,w,id,info,aux) result(res) use psi_serial_mod implicit none diff --git a/base/modules/serial/psb_s_base_vect_mod.F90 b/base/modules/serial/psb_s_base_vect_mod.F90 index 61ae27d2..dee48ca5 100644 --- a/base/modules/serial/psb_s_base_vect_mod.F90 +++ b/base/modules/serial/psb_s_base_vect_mod.F90 @@ -1162,10 +1162,17 @@ contains real(psb_spk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - if (x%is_dev()) call x%sync() - - call y%axpby(m,alpha,x,beta,info) - call z%axpby(m,gamma,y,delta,info) + if (.false.) then + if (x%is_dev()) call x%sync() + + call y%axpby(m,alpha,x,beta,info) + call z%axpby(m,gamma,y,delta,info) + else + if (x%is_dev().and.(alpha/=szero))) call x%sync() + if (y%is_dev().and.(beta/=szero)) call y%sync() + if (z%is_dev().and.(delta/=szero)) call z%sync() + call psi_sabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + end if end subroutine s_base_abgdxyz diff --git a/base/modules/serial/psb_s_vect_mod.F90 b/base/modules/serial/psb_s_vect_mod.F90 index 34479856..0ffd199f 100644 --- a/base/modules/serial/psb_s_vect_mod.F90 +++ b/base/modules/serial/psb_s_vect_mod.F90 @@ -1159,7 +1159,7 @@ contains end if end function s_vect_nrm2_weight - + function s_vect_nrm2_weight_mask(n,x,w,id,info,aux) result(res) use psi_serial_mod implicit none diff --git a/base/modules/serial/psb_z_base_vect_mod.F90 b/base/modules/serial/psb_z_base_vect_mod.F90 index 53f3ea8e..0ab2f945 100644 --- a/base/modules/serial/psb_z_base_vect_mod.F90 +++ b/base/modules/serial/psb_z_base_vect_mod.F90 @@ -1155,10 +1155,17 @@ contains complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - if (x%is_dev()) call x%sync() - - call y%axpby(m,alpha,x,beta,info) - call z%axpby(m,gamma,y,delta,info) + if (.false.) then + if (x%is_dev()) call x%sync() + + call y%axpby(m,alpha,x,beta,info) + call z%axpby(m,gamma,y,delta,info) + else + if (x%is_dev().and.(alpha/=zzero))) call x%sync() + if (y%is_dev().and.(beta/=zzero)) call y%sync() + if (z%is_dev().and.(delta/=zzero)) call z%sync() + call psi_zabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + end if end subroutine z_base_abgdxyz diff --git a/base/modules/serial/psb_z_vect_mod.F90 b/base/modules/serial/psb_z_vect_mod.F90 index 54ddfebe..1ea1fd4a 100644 --- a/base/modules/serial/psb_z_vect_mod.F90 +++ b/base/modules/serial/psb_z_vect_mod.F90 @@ -1152,7 +1152,7 @@ contains end if end function z_vect_nrm2_weight - + function z_vect_nrm2_weight_mask(n,x,w,id,info,aux) result(res) use psi_serial_mod implicit none diff --git a/base/serial/psi_c_serial_impl.F90 b/base/serial/psi_c_serial_impl.F90 index 129e8484..557220e5 100644 --- a/base/serial/psi_c_serial_impl.F90 +++ b/base/serial/psi_c_serial_impl.F90 @@ -1616,7 +1616,7 @@ subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_errpush(info,name,int_err) goto 9999 endif - + if (beta == czero) then if (gamma == czero) then if (alpha == czero) then @@ -1773,7 +1773,7 @@ subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) end do else if (delta /= czero) then - ! a n b n g n d 0 + ! a n b n g n d n !$omp parallel do private(i) do i=1,m y(i) = alpha*x(i)+beta*y(i) diff --git a/base/serial/psi_d_serial_impl.F90 b/base/serial/psi_d_serial_impl.F90 index bd0c82df..d423b401 100644 --- a/base/serial/psi_d_serial_impl.F90 +++ b/base/serial/psi_d_serial_impl.F90 @@ -1616,7 +1616,7 @@ subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_errpush(info,name,int_err) goto 9999 endif - + if (beta == dzero) then if (gamma == dzero) then if (alpha == dzero) then @@ -1773,7 +1773,7 @@ subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) end do else if (delta /= dzero) then - ! a n b n g n d 0 + ! a n b n g n d n !$omp parallel do private(i) do i=1,m y(i) = alpha*x(i)+beta*y(i) diff --git a/base/serial/psi_e_serial_impl.F90 b/base/serial/psi_e_serial_impl.F90 index 8b17aeb8..c7977c35 100644 --- a/base/serial/psi_e_serial_impl.F90 +++ b/base/serial/psi_e_serial_impl.F90 @@ -1616,7 +1616,7 @@ subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_errpush(info,name,int_err) goto 9999 endif - + if (beta == ezero) then if (gamma == ezero) then if (alpha == ezero) then @@ -1773,7 +1773,7 @@ subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) end do else if (delta /= ezero) then - ! a n b n g n d 0 + ! a n b n g n d n !$omp parallel do private(i) do i=1,m y(i) = alpha*x(i)+beta*y(i) diff --git a/base/serial/psi_i2_serial_impl.F90 b/base/serial/psi_i2_serial_impl.F90 index 9a2c36c6..ce4aff80 100644 --- a/base/serial/psi_i2_serial_impl.F90 +++ b/base/serial/psi_i2_serial_impl.F90 @@ -1616,7 +1616,7 @@ subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_errpush(info,name,int_err) goto 9999 endif - + if (beta == i2zero) then if (gamma == i2zero) then if (alpha == i2zero) then @@ -1773,7 +1773,7 @@ subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) end do else if (delta /= i2zero) then - ! a n b n g n d 0 + ! a n b n g n d n !$omp parallel do private(i) do i=1,m y(i) = alpha*x(i)+beta*y(i) diff --git a/base/serial/psi_m_serial_impl.F90 b/base/serial/psi_m_serial_impl.F90 index dd114a45..8d9d19f4 100644 --- a/base/serial/psi_m_serial_impl.F90 +++ b/base/serial/psi_m_serial_impl.F90 @@ -1616,7 +1616,7 @@ subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_errpush(info,name,int_err) goto 9999 endif - + if (beta == mzero) then if (gamma == mzero) then if (alpha == mzero) then @@ -1773,7 +1773,7 @@ subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) end do else if (delta /= mzero) then - ! a n b n g n d 0 + ! a n b n g n d n !$omp parallel do private(i) do i=1,m y(i) = alpha*x(i)+beta*y(i) diff --git a/base/serial/psi_s_serial_impl.F90 b/base/serial/psi_s_serial_impl.F90 index 8e2dda0f..df251b27 100644 --- a/base/serial/psi_s_serial_impl.F90 +++ b/base/serial/psi_s_serial_impl.F90 @@ -1616,7 +1616,7 @@ subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_errpush(info,name,int_err) goto 9999 endif - + if (beta == szero) then if (gamma == szero) then if (alpha == szero) then @@ -1773,7 +1773,7 @@ subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) end do else if (delta /= szero) then - ! a n b n g n d 0 + ! a n b n g n d n !$omp parallel do private(i) do i=1,m y(i) = alpha*x(i)+beta*y(i) diff --git a/base/serial/psi_z_serial_impl.F90 b/base/serial/psi_z_serial_impl.F90 index c6a7e01d..44ea5ae7 100644 --- a/base/serial/psi_z_serial_impl.F90 +++ b/base/serial/psi_z_serial_impl.F90 @@ -1616,7 +1616,7 @@ subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_errpush(info,name,int_err) goto 9999 endif - + if (beta == zzero) then if (gamma == zzero) then if (alpha == zzero) then @@ -1773,7 +1773,7 @@ subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) end do else if (delta /= zzero) then - ! a n b n g n d 0 + ! a n b n g n d n !$omp parallel do private(i) do i=1,m y(i) = alpha*x(i)+beta*y(i) From 2a40b82b5830d17dfaa8a731abe76ec8bae5fdba Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Wed, 14 Feb 2024 16:01:16 +0100 Subject: [PATCH 045/116] Fix typo in base_vect_mod --- base/modules/serial/psb_c_base_vect_mod.F90 | 2 +- base/modules/serial/psb_d_base_vect_mod.F90 | 2 +- base/modules/serial/psb_s_base_vect_mod.F90 | 2 +- base/modules/serial/psb_z_base_vect_mod.F90 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/base/modules/serial/psb_c_base_vect_mod.F90 b/base/modules/serial/psb_c_base_vect_mod.F90 index a4772103..b158ac64 100644 --- a/base/modules/serial/psb_c_base_vect_mod.F90 +++ b/base/modules/serial/psb_c_base_vect_mod.F90 @@ -1161,7 +1161,7 @@ contains call y%axpby(m,alpha,x,beta,info) call z%axpby(m,gamma,y,delta,info) else - if (x%is_dev().and.(alpha/=czero))) call x%sync() + if (x%is_dev().and.(alpha/=czero)) call x%sync() if (y%is_dev().and.(beta/=czero)) call y%sync() if (z%is_dev().and.(delta/=czero)) call z%sync() call psi_cabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) diff --git a/base/modules/serial/psb_d_base_vect_mod.F90 b/base/modules/serial/psb_d_base_vect_mod.F90 index 59b43fce..f53bc590 100644 --- a/base/modules/serial/psb_d_base_vect_mod.F90 +++ b/base/modules/serial/psb_d_base_vect_mod.F90 @@ -1168,7 +1168,7 @@ contains call y%axpby(m,alpha,x,beta,info) call z%axpby(m,gamma,y,delta,info) else - if (x%is_dev().and.(alpha/=dzero))) call x%sync() + if (x%is_dev().and.(alpha/=dzero)) call x%sync() if (y%is_dev().and.(beta/=dzero)) call y%sync() if (z%is_dev().and.(delta/=dzero)) call z%sync() call psi_dabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) diff --git a/base/modules/serial/psb_s_base_vect_mod.F90 b/base/modules/serial/psb_s_base_vect_mod.F90 index dee48ca5..12626c72 100644 --- a/base/modules/serial/psb_s_base_vect_mod.F90 +++ b/base/modules/serial/psb_s_base_vect_mod.F90 @@ -1168,7 +1168,7 @@ contains call y%axpby(m,alpha,x,beta,info) call z%axpby(m,gamma,y,delta,info) else - if (x%is_dev().and.(alpha/=szero))) call x%sync() + if (x%is_dev().and.(alpha/=szero)) call x%sync() if (y%is_dev().and.(beta/=szero)) call y%sync() if (z%is_dev().and.(delta/=szero)) call z%sync() call psi_sabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) diff --git a/base/modules/serial/psb_z_base_vect_mod.F90 b/base/modules/serial/psb_z_base_vect_mod.F90 index 0ab2f945..fe990a9e 100644 --- a/base/modules/serial/psb_z_base_vect_mod.F90 +++ b/base/modules/serial/psb_z_base_vect_mod.F90 @@ -1161,7 +1161,7 @@ contains call y%axpby(m,alpha,x,beta,info) call z%axpby(m,gamma,y,delta,info) else - if (x%is_dev().and.(alpha/=zzero))) call x%sync() + if (x%is_dev().and.(alpha/=zzero)) call x%sync() if (y%is_dev().and.(beta/=zzero)) call y%sync() if (z%is_dev().and.(delta/=zzero)) call z%sync() call psi_zabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) From b8f9badf954aec07365af71b4d7fc1209fcc890a Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Wed, 14 Feb 2024 20:05:52 +0100 Subject: [PATCH 046/116] Fix interface between vect and base_vect%ABGD --- base/modules/serial/psb_c_vect_mod.F90 | 2 +- base/modules/serial/psb_d_vect_mod.F90 | 2 +- base/modules/serial/psb_s_vect_mod.F90 | 2 +- base/modules/serial/psb_z_vect_mod.F90 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/base/modules/serial/psb_c_vect_mod.F90 b/base/modules/serial/psb_c_vect_mod.F90 index 2eebb0da..e0488def 100644 --- a/base/modules/serial/psb_c_vect_mod.F90 +++ b/base/modules/serial/psb_c_vect_mod.F90 @@ -784,7 +784,7 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) + call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) end subroutine c_vect_abgdxyz diff --git a/base/modules/serial/psb_d_vect_mod.F90 b/base/modules/serial/psb_d_vect_mod.F90 index bbb966ed..07007452 100644 --- a/base/modules/serial/psb_d_vect_mod.F90 +++ b/base/modules/serial/psb_d_vect_mod.F90 @@ -791,7 +791,7 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) + call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) end subroutine d_vect_abgdxyz diff --git a/base/modules/serial/psb_s_vect_mod.F90 b/base/modules/serial/psb_s_vect_mod.F90 index 0ffd199f..aa16a04d 100644 --- a/base/modules/serial/psb_s_vect_mod.F90 +++ b/base/modules/serial/psb_s_vect_mod.F90 @@ -791,7 +791,7 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) + call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) end subroutine s_vect_abgdxyz diff --git a/base/modules/serial/psb_z_vect_mod.F90 b/base/modules/serial/psb_z_vect_mod.F90 index 1ea1fd4a..58bf6b18 100644 --- a/base/modules/serial/psb_z_vect_mod.F90 +++ b/base/modules/serial/psb_z_vect_mod.F90 @@ -784,7 +784,7 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) + call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) end subroutine z_vect_abgdxyz From f4c7604f610fd91127bc3e68b8467a1638acb301 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Sat, 17 Feb 2024 09:40:09 +0100 Subject: [PATCH 047/116] Fix base implementation of abgdxyz to call set_host --- base/modules/serial/psb_c_base_vect_mod.F90 | 2 ++ base/modules/serial/psb_d_base_vect_mod.F90 | 2 ++ base/modules/serial/psb_s_base_vect_mod.F90 | 2 ++ base/modules/serial/psb_z_base_vect_mod.F90 | 2 ++ 4 files changed, 8 insertions(+) diff --git a/base/modules/serial/psb_c_base_vect_mod.F90 b/base/modules/serial/psb_c_base_vect_mod.F90 index b158ac64..5a468d55 100644 --- a/base/modules/serial/psb_c_base_vect_mod.F90 +++ b/base/modules/serial/psb_c_base_vect_mod.F90 @@ -1165,6 +1165,8 @@ contains if (y%is_dev().and.(beta/=czero)) call y%sync() if (z%is_dev().and.(delta/=czero)) call z%sync() call psi_cabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call y%set_host() + call z%set_host() end if end subroutine c_base_abgdxyz diff --git a/base/modules/serial/psb_d_base_vect_mod.F90 b/base/modules/serial/psb_d_base_vect_mod.F90 index f53bc590..8f583cd3 100644 --- a/base/modules/serial/psb_d_base_vect_mod.F90 +++ b/base/modules/serial/psb_d_base_vect_mod.F90 @@ -1172,6 +1172,8 @@ contains if (y%is_dev().and.(beta/=dzero)) call y%sync() if (z%is_dev().and.(delta/=dzero)) call z%sync() call psi_dabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call y%set_host() + call z%set_host() end if end subroutine d_base_abgdxyz diff --git a/base/modules/serial/psb_s_base_vect_mod.F90 b/base/modules/serial/psb_s_base_vect_mod.F90 index 12626c72..85bb3bda 100644 --- a/base/modules/serial/psb_s_base_vect_mod.F90 +++ b/base/modules/serial/psb_s_base_vect_mod.F90 @@ -1172,6 +1172,8 @@ contains if (y%is_dev().and.(beta/=szero)) call y%sync() if (z%is_dev().and.(delta/=szero)) call z%sync() call psi_sabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call y%set_host() + call z%set_host() end if end subroutine s_base_abgdxyz diff --git a/base/modules/serial/psb_z_base_vect_mod.F90 b/base/modules/serial/psb_z_base_vect_mod.F90 index fe990a9e..b30b1586 100644 --- a/base/modules/serial/psb_z_base_vect_mod.F90 +++ b/base/modules/serial/psb_z_base_vect_mod.F90 @@ -1165,6 +1165,8 @@ contains if (y%is_dev().and.(beta/=zzero)) call y%sync() if (z%is_dev().and.(delta/=zzero)) call z%sync() call psi_zabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call y%set_host() + call z%set_host() end if end subroutine z_base_abgdxyz From a41b209144ed25837f1b8c8196c1e4b87569b02a Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Sat, 17 Feb 2024 17:18:59 +0100 Subject: [PATCH 048/116] Better AXPBY implementation in CUDA. --- cuda/spgpu/kernels/caxpby.cu | 40 ++++++++++++++++++++++++++++++----- cuda/spgpu/kernels/daxpby.cu | 41 +++++++++++++++++++++++++++++++----- cuda/spgpu/kernels/saxpby.cu | 30 +++++++++++++++++++++++--- cuda/spgpu/kernels/zaxpby.cu | 29 ++++++++++++++++++++++--- 4 files changed, 124 insertions(+), 16 deletions(-) diff --git a/cuda/spgpu/kernels/caxpby.cu b/cuda/spgpu/kernels/caxpby.cu index d3d326ef..16eb87ed 100644 --- a/cuda/spgpu/kernels/caxpby.cu +++ b/cuda/spgpu/kernels/caxpby.cu @@ -32,8 +32,9 @@ extern "C" __global__ void spgpuCaxpby_krn(cuFloatComplex *z, int n, cuFloatComplex beta, cuFloatComplex *y, cuFloatComplex alpha, cuFloatComplex* x) { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; - - if (id < n) + unsigned int gridSize = blockDim.x * gridDim.x; + for ( ; id < n; id +=gridSize) + //if (id,n) { // Since z, x and y are accessed with the same offset by the same thread, // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). @@ -45,7 +46,30 @@ __global__ void spgpuCaxpby_krn(cuFloatComplex *z, int n, cuFloatComplex beta, c } } +#if 1 +void spgpuCaxpby(spgpuHandle_t handle, + __device cuFloatComplex *z, + int n, + cuFloatComplex beta, + __device cuFloatComplex *y, + cuFloatComplex alpha, + __device cuFloatComplex* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, 0); + num_mp = deviceProp.multiProcessorCount; + max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuCaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} +#else void spgpuCaxpby_(spgpuHandle_t handle, __device cuFloatComplex *z, int n, @@ -55,9 +79,15 @@ void spgpuCaxpby_(spgpuHandle_t handle, __device cuFloatComplex* x) { int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; - + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; dim3 block(BLOCK_SIZE); - dim3 grid(msize); + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, 0); + num_mp = deviceProp.multiProcessorCount; + max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); spgpuCaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); } @@ -86,7 +116,7 @@ void spgpuCaxpby(spgpuHandle_t handle, cudaCheckError("CUDA error on saxpby"); } - +#endif void spgpuCmaxpby(spgpuHandle_t handle, __device cuFloatComplex *z, int n, diff --git a/cuda/spgpu/kernels/daxpby.cu b/cuda/spgpu/kernels/daxpby.cu index 83724ce2..a0a163a2 100644 --- a/cuda/spgpu/kernels/daxpby.cu +++ b/cuda/spgpu/kernels/daxpby.cu @@ -16,6 +16,7 @@ #include "cudadebug.h" #include "cudalang.h" +#include extern "C" { @@ -31,8 +32,9 @@ extern "C" __global__ void spgpuDaxpby_krn(double *z, int n, double beta, double *y, double alpha, double* x) { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; - - if (id < n) + unsigned int gridSize = blockDim.x * gridDim.x; + for ( ; id < n; id +=gridSize) + //if (id,n) { // Since z, x and y are accessed with the same offset by the same thread, // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). @@ -44,8 +46,9 @@ __global__ void spgpuDaxpby_krn(double *z, int n, double beta, double *y, double } } +#if 1 -void spgpuDaxpby_(spgpuHandle_t handle, +void spgpuDaxpby(spgpuHandle_t handle, __device double *z, int n, double beta, @@ -54,9 +57,37 @@ void spgpuDaxpby_(spgpuHandle_t handle, __device double* x) { int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, 0); + num_mp = deviceProp.multiProcessorCount; + max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + spgpuDaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} +#else +void spgpuDaxpby_(spgpuHandle_t handle, + __device double *z, + int n, + double beta, + __device double *y, + double alpha, + __device double* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; dim3 block(BLOCK_SIZE); - dim3 grid(msize); + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, 0); + num_mp = deviceProp.multiProcessorCount; + max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); spgpuDaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); } @@ -84,7 +115,7 @@ void spgpuDaxpby(spgpuHandle_t handle, cudaCheckError("CUDA error on daxpby"); } - +#endif void spgpuDmaxpby(spgpuHandle_t handle, __device double *z, int n, diff --git a/cuda/spgpu/kernels/saxpby.cu b/cuda/spgpu/kernels/saxpby.cu index 2c46f19e..42e2a7a7 100644 --- a/cuda/spgpu/kernels/saxpby.cu +++ b/cuda/spgpu/kernels/saxpby.cu @@ -30,8 +30,9 @@ extern "C" __global__ void spgpuSaxpby_krn(float *z, int n, float beta, float *y, float alpha, float* x) { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; - - if (id < n) + unsigned int gridSize = blockDim.x * gridDim.x; + for ( ; id < n; id +=gridSize) + //if (id,n) { // Since z, x and y are accessed with the same offset by the same thread, // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). @@ -44,6 +45,29 @@ __global__ void spgpuSaxpby_krn(float *z, int n, float beta, float *y, float alp } +#if 1 +void spgpuSaxpby(spgpuHandle_t handle, + __device float *z, + int n, + float beta, + __device float *y, + float alpha, + __device float* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, 0); + num_mp = deviceProp.multiProcessorCount; + max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuSaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} +#else void spgpuSaxpby_(spgpuHandle_t handle, __device float *z, int n, @@ -83,7 +107,7 @@ void spgpuSaxpby(spgpuHandle_t handle, cudaCheckError("CUDA error on saxpby"); } - +#endif void spgpuSmaxpby(spgpuHandle_t handle, __device float *z, int n, diff --git a/cuda/spgpu/kernels/zaxpby.cu b/cuda/spgpu/kernels/zaxpby.cu index 7f9d5797..da438fc2 100644 --- a/cuda/spgpu/kernels/zaxpby.cu +++ b/cuda/spgpu/kernels/zaxpby.cu @@ -33,8 +33,9 @@ extern "C" __global__ void spgpuZaxpby_krn(cuDoubleComplex *z, int n, cuDoubleComplex beta, cuDoubleComplex *y, cuDoubleComplex alpha, cuDoubleComplex* x) { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; - - if (id < n) + unsigned int gridSize = blockDim.x * gridDim.x; + for ( ; id < n; id +=gridSize) + //if (id,n) { // Since z, x and y are accessed with the same offset by the same thread, // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). @@ -46,7 +47,29 @@ __global__ void spgpuZaxpby_krn(cuDoubleComplex *z, int n, cuDoubleComplex beta, } } +#if 1 +void spgpuZaxpby(spgpuHandle_t handle, + __device cuDoubleComplex *z, + int n, + cuDoubleComplex beta, + __device cuDoubleComplex *y, + cuDoubleComplex alpha, + __device cuDoubleComplex* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, 0); + num_mp = deviceProp.multiProcessorCount; + max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + spgpuZaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} +#else void spgpuZaxpby_(spgpuHandle_t handle, __device cuDoubleComplex *z, int n, @@ -86,7 +109,7 @@ void spgpuZaxpby(spgpuHandle_t handle, cudaCheckError("CUDA error on daxpby"); } - +#endif void spgpuZmaxpby(spgpuHandle_t handle, __device cuDoubleComplex *z, int n, From 864872ecacff43d40eed244a1044c0eb293db117 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Sat, 17 Feb 2024 17:28:32 +0100 Subject: [PATCH 049/116] Intermediate implementation of abgdxyz on cuda --- cuda/psb_d_cuda_vect_mod.F90 | 39 ++++++++++++++++++++++++++++++------ cuda/spgpu/vector.h | 15 +++++++++++++- 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 index 03e65f91..fe5d3a38 100644 --- a/cuda/psb_d_cuda_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -923,13 +923,40 @@ contains real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - call z%psb_d_base_vect_type%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) -!!$ -!!$ if (x%is_dev()) call x%sync() -!!$ -!!$ call y%axpby(m,alpha,x,beta,info) -!!$ call z%axpby(m,gamma,y,delta,info) + + info = psb_success_ + if (.false.) then + + select type(xx => x) + type is (psb_d_vect_cuda) + ! Do something different here + if ((beta /= dzero).and.y%is_host())& + & call y%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(y%deviceVect) + if ((nx Date: Sat, 17 Feb 2024 17:46:09 +0100 Subject: [PATCH 050/116] Intermediate impl of ABGDXYZ --- cuda/psb_d_cuda_vect_mod.F90 | 54 +++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 index fe5d3a38..36fac14e 100644 --- a/cuda/psb_d_cuda_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -922,33 +922,47 @@ contains class(psb_d_vect_cuda), intent(inout) :: z real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - + integer(psb_ipk_) :: nx, ny, nz + logical :: gpu_done info = psb_success_ if (.false.) then - + gpu_done = .false. select type(xx => x) - type is (psb_d_vect_cuda) - ! Do something different here - if ((beta /= dzero).and.y%is_host())& - & call y%sync() - if (xx%is_host()) call xx%sync() - nx = getMultiVecDeviceSize(xx%deviceVect) - ny = getMultiVecDeviceSize(y%deviceVect) - if ((nx y) + class is (psb_d_vect_cuda) + select type(zz => z) + class is (psb_d_vect_cuda) + ! Do something different here + if ((beta /= dzero).and.yy%is_host())& + & call yy%sync() + if ((delta /= dzero).and.zz%is_host())& + & call zz%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + if ((nx Date: Sat, 17 Feb 2024 18:20:12 +0100 Subject: [PATCH 051/116] New implementation for ABGDXYZ in CUDA --- cuda/dvectordev.c | 24 +++++++++++ cuda/psb_d_cuda_vect_mod.F90 | 3 +- cuda/psb_d_vectordev_mod.F90 | 13 ++++++ cuda/spgpu/kernels/Makefile | 2 +- cuda/spgpu/kernels/dabgdxyz.cu | 79 ++++++++++++++++++++++++++++++++++ 5 files changed, 119 insertions(+), 2 deletions(-) create mode 100644 cuda/spgpu/kernels/dabgdxyz.cu diff --git a/cuda/dvectordev.c b/cuda/dvectordev.c index 39aa5b2a..785753dd 100644 --- a/cuda/dvectordev.c +++ b/cuda/dvectordev.c @@ -241,6 +241,30 @@ int axpbyMultiVecDeviceDouble(int n,double alpha, void* devMultiVecX, return(i); } + +int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + +#if 1 + spgpuDabgdxyz(handle,n, alpha,beta,gamma,delta, + (double*)devVecX->v_,(double*) devVecY->v_,(double*) devVecZ->v_); +#else + for(j=0;jcount_;j++) + spgpuDaxpby(handle,(double*)devVecY->v_+pitch*j, n, beta, + (double*)devVecY->v_+pitch*j, alpha,(double*) devVecX->v_+pitch*j); +#endif + return(i); +} + int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB) { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 index 36fac14e..8256eaa0 100644 --- a/cuda/psb_d_cuda_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -947,7 +947,8 @@ contains if ((nx + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuDabgdxyz_krn(int n, double alpha, double beta, double gamma, double delta, + double* x, double *y, double *z) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + double t; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + if (beta == 0.0) + t = PREC_DMUL(alpha,x[id]); + else + t = PREC_DADD(PREC_DMUL(alpha, x[id]), PREC_DMUL(beta,y[id])); + if (delta == 0.0) + z[id] = gamma * t; + else + z[id] = PREC_DADD(PREC_DMUL(gamma, t), PREC_DMUL(delta,z[id])); + y[id] = t; + } +} + + +void spgpuDabgdxyz(spgpuHandle_t handle, + int n, + double alpha, + double beta, + double gamma, + double delta, + __device double* x, + __device double* y, + __device double *z) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, 0); + num_mp = deviceProp.multiProcessorCount; + max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuDabgdxyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, + x, y, z); +} + From f9677bc8920a187bdb0184e2879da80932fe6a55 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Sat, 17 Feb 2024 18:42:56 +0100 Subject: [PATCH 052/116] Enabled new CUDA version of ABGDXYZ --- cuda/psb_d_cuda_vect_mod.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 index 8256eaa0..f2ef2be3 100644 --- a/cuda/psb_d_cuda_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -927,7 +927,7 @@ contains info = psb_success_ - if (.false.) then + if (.true.) then gpu_done = .false. select type(xx => x) class is (psb_d_vect_cuda) From 1ba8dfc7b7079eed0e86e90539e9b7c7daffe701 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Sun, 18 Feb 2024 10:31:32 +0100 Subject: [PATCH 053/116] Switch FOR and IF in AXPBY --- cuda/spgpu/kernels/caxpby.cu | 25 +++++++++++++++---------- cuda/spgpu/kernels/daxpby.cu | 25 +++++++++++++++---------- cuda/spgpu/kernels/saxpby.cu | 25 +++++++++++++++---------- cuda/spgpu/kernels/zaxpby.cu | 25 +++++++++++++++---------- 4 files changed, 60 insertions(+), 40 deletions(-) diff --git a/cuda/spgpu/kernels/caxpby.cu b/cuda/spgpu/kernels/caxpby.cu index 16eb87ed..33deecbc 100644 --- a/cuda/spgpu/kernels/caxpby.cu +++ b/cuda/spgpu/kernels/caxpby.cu @@ -33,16 +33,21 @@ __global__ void spgpuCaxpby_krn(cuFloatComplex *z, int n, cuFloatComplex beta, c { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; unsigned int gridSize = blockDim.x * gridDim.x; - for ( ; id < n; id +=gridSize) - //if (id,n) - { - // Since z, x and y are accessed with the same offset by the same thread, - // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). - - if (cuFloatComplex_isZero(beta)) - z[id] = cuCmulf(alpha,x[id]); - else - z[id] = cuCfmaf(beta, y[id], cuCmulf(alpha, x[id])); + if (cuFloatComplex_isZero(beta)) { + for ( ; id < n; id +=gridSize) + //if (id,n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + z[id] = cuCmulf(alpha,x[id]); + } + } else { + for ( ; id < n; id +=gridSize) + //if (id,n) + { + z[id] = cuCfmaf(beta, y[id], cuCmulf(alpha, x[id])); + } } } diff --git a/cuda/spgpu/kernels/daxpby.cu b/cuda/spgpu/kernels/daxpby.cu index a0a163a2..ce7c0dd4 100644 --- a/cuda/spgpu/kernels/daxpby.cu +++ b/cuda/spgpu/kernels/daxpby.cu @@ -33,16 +33,21 @@ __global__ void spgpuDaxpby_krn(double *z, int n, double beta, double *y, double { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; unsigned int gridSize = blockDim.x * gridDim.x; - for ( ; id < n; id +=gridSize) - //if (id,n) - { - // Since z, x and y are accessed with the same offset by the same thread, - // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). - - if (beta == 0.0) - z[id] = PREC_DMUL(alpha,x[id]); - else - z[id] = PREC_DADD(PREC_DMUL(alpha, x[id]), PREC_DMUL(beta,y[id])); + if (beta == 0.0) { + for ( ; id < n; id +=gridSize) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + z[id] = PREC_DMUL(alpha,x[id]); + } + } else { + for ( ; id < n; id +=gridSize) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + z[id] = PREC_DADD(PREC_DMUL(alpha, x[id]), PREC_DMUL(beta,y[id])); + } } } diff --git a/cuda/spgpu/kernels/saxpby.cu b/cuda/spgpu/kernels/saxpby.cu index 42e2a7a7..36c3cdbe 100644 --- a/cuda/spgpu/kernels/saxpby.cu +++ b/cuda/spgpu/kernels/saxpby.cu @@ -31,16 +31,21 @@ __global__ void spgpuSaxpby_krn(float *z, int n, float beta, float *y, float alp { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; unsigned int gridSize = blockDim.x * gridDim.x; - for ( ; id < n; id +=gridSize) - //if (id,n) - { - // Since z, x and y are accessed with the same offset by the same thread, - // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). - - if (beta == 0.0f) - z[id] = PREC_FMUL(alpha,x[id]); - else - z[id] = PREC_FADD(PREC_FMUL(alpha, x[id]), PREC_FMUL(beta,y[id])); + if (beta == 0.0f) { + for ( ; id < n; id +=gridSize) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + z[id] = PREC_FMUL(alpha,x[id]); + } + } else { + for ( ; id < n; id +=gridSize) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + z[id] = PREC_FADD(PREC_FMUL(alpha, x[id]), PREC_FMUL(beta,y[id])); + } } } diff --git a/cuda/spgpu/kernels/zaxpby.cu b/cuda/spgpu/kernels/zaxpby.cu index da438fc2..8aec3e17 100644 --- a/cuda/spgpu/kernels/zaxpby.cu +++ b/cuda/spgpu/kernels/zaxpby.cu @@ -34,16 +34,21 @@ __global__ void spgpuZaxpby_krn(cuDoubleComplex *z, int n, cuDoubleComplex beta, { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; unsigned int gridSize = blockDim.x * gridDim.x; - for ( ; id < n; id +=gridSize) - //if (id,n) - { - // Since z, x and y are accessed with the same offset by the same thread, - // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). - - if (cuDoubleComplex_isZero(beta)) - z[id] = cuCmul(alpha,x[id]); - else - z[id] = cuCfma(alpha, x[id], cuCmul(beta,y[id])); + if (cuDoubleComplex_isZero(beta)) { + for ( ; id < n; id +=gridSize) + //if (id,n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + z[id] = cuCmul(alpha,x[id]); + } + } else { + for ( ; id < n; id +=gridSize) + //if (id,n) + { + z[id] = cuCfma(beta, y[id], cuCmul(alpha, x[id])); + } } } From 35d68aa4e326dc0b11a18cdd4f15ec62ec3802f4 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Sun, 18 Feb 2024 16:44:37 +0100 Subject: [PATCH 054/116] Reuse calls to getDeviceProperties done at init time --- cuda/spgpu/kernels/caxpby.cu | 29 ++++++++++++++++++++++++----- cuda/spgpu/kernels/dabgdxyz.cu | 8 ++++---- cuda/spgpu/kernels/daxpby.cu | 34 ++++++++++++++++++++++++---------- cuda/spgpu/kernels/saxpby.cu | 33 +++++++++++++++++++++++++++------ cuda/spgpu/kernels/zaxpby.cu | 28 +++++++++++++++++++++++----- 5 files changed, 102 insertions(+), 30 deletions(-) diff --git a/cuda/spgpu/kernels/caxpby.cu b/cuda/spgpu/kernels/caxpby.cu index 33deecbc..3e97f75f 100644 --- a/cuda/spgpu/kernels/caxpby.cu +++ b/cuda/spgpu/kernels/caxpby.cu @@ -22,6 +22,9 @@ extern "C" { #include "core.h" #include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); + //#include "cuda_util.h" } @@ -29,6 +32,8 @@ extern "C" #define BLOCK_SIZE 512 +#if 1 + __global__ void spgpuCaxpby_krn(cuFloatComplex *z, int n, cuFloatComplex beta, cuFloatComplex *y, cuFloatComplex alpha, cuFloatComplex* x) { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; @@ -51,7 +56,6 @@ __global__ void spgpuCaxpby_krn(cuFloatComplex *z, int n, cuFloatComplex beta, c } } -#if 1 void spgpuCaxpby(spgpuHandle_t handle, __device cuFloatComplex *z, int n, @@ -63,10 +67,8 @@ void spgpuCaxpby(spgpuHandle_t handle, int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; int num_mp, max_threads_mp, num_blocks_mp, num_blocks; dim3 block(BLOCK_SIZE); - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, 0); - num_mp = deviceProp.multiProcessorCount; - max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); num_blocks_mp = max_threads_mp/BLOCK_SIZE; num_blocks = num_blocks_mp*num_mp; dim3 grid(num_blocks); @@ -75,6 +77,23 @@ void spgpuCaxpby(spgpuHandle_t handle, } #else + +__global__ void spgpuCaxpby_krn(cuFloatComplex *z, int n, cuFloatComplex beta, cuFloatComplex *y, cuFloatComplex alpha, cuFloatComplex* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + if (cuFloatComplex_isZero(beta)) + z[id] = cuCmulf(alpha,x[id]); + else + z[id] = cuCfmaf(beta, y[id], cuCmulf(alpha, x[id])); + } +} + void spgpuCaxpby_(spgpuHandle_t handle, __device cuFloatComplex *z, int n, diff --git a/cuda/spgpu/kernels/dabgdxyz.cu b/cuda/spgpu/kernels/dabgdxyz.cu index 525371d3..f2b18e02 100644 --- a/cuda/spgpu/kernels/dabgdxyz.cu +++ b/cuda/spgpu/kernels/dabgdxyz.cu @@ -22,6 +22,8 @@ extern "C" { #include "core.h" #include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); } @@ -65,10 +67,8 @@ void spgpuDabgdxyz(spgpuHandle_t handle, int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; int num_mp, max_threads_mp, num_blocks_mp, num_blocks; dim3 block(BLOCK_SIZE); - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, 0); - num_mp = deviceProp.multiProcessorCount; - max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); num_blocks_mp = max_threads_mp/BLOCK_SIZE; num_blocks = num_blocks_mp*num_mp; dim3 grid(num_blocks); diff --git a/cuda/spgpu/kernels/daxpby.cu b/cuda/spgpu/kernels/daxpby.cu index ce7c0dd4..fa87d996 100644 --- a/cuda/spgpu/kernels/daxpby.cu +++ b/cuda/spgpu/kernels/daxpby.cu @@ -22,6 +22,9 @@ extern "C" { #include "core.h" #include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); + //#include "cuda_util.h" } @@ -29,6 +32,8 @@ extern "C" #define BLOCK_SIZE 512 + +#if 1 __global__ void spgpuDaxpby_krn(double *z, int n, double beta, double *y, double alpha, double* x) { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; @@ -36,23 +41,17 @@ __global__ void spgpuDaxpby_krn(double *z, int n, double beta, double *y, double if (beta == 0.0) { for ( ; id < n; id +=gridSize) { - // Since z, x and y are accessed with the same offset by the same thread, - // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). z[id] = PREC_DMUL(alpha,x[id]); } } else { for ( ; id < n; id +=gridSize) { - // Since z, x and y are accessed with the same offset by the same thread, - // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). z[id] = PREC_DADD(PREC_DMUL(alpha, x[id]), PREC_DMUL(beta,y[id])); } } } -#if 1 - void spgpuDaxpby(spgpuHandle_t handle, __device double *z, int n, @@ -64,10 +63,8 @@ void spgpuDaxpby(spgpuHandle_t handle, int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; int num_mp, max_threads_mp, num_blocks_mp, num_blocks; dim3 block(BLOCK_SIZE); - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, 0); - num_mp = deviceProp.multiProcessorCount; - max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); num_blocks_mp = max_threads_mp/BLOCK_SIZE; num_blocks = num_blocks_mp*num_mp; dim3 grid(num_blocks); @@ -75,6 +72,23 @@ void spgpuDaxpby(spgpuHandle_t handle, spgpuDaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); } #else + +__global__ void spgpuDaxpby_krn(double *z, int n, double beta, double *y, double alpha, double* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + if (beta == 0.0) + z[id] = PREC_DMUL(alpha,x[id]); + else + z[id] = PREC_DADD(PREC_DMUL(alpha, x[id]), PREC_DMUL(beta,y[id])); + } +} + void spgpuDaxpby_(spgpuHandle_t handle, __device double *z, int n, diff --git a/cuda/spgpu/kernels/saxpby.cu b/cuda/spgpu/kernels/saxpby.cu index 36c3cdbe..2f06e39c 100644 --- a/cuda/spgpu/kernels/saxpby.cu +++ b/cuda/spgpu/kernels/saxpby.cu @@ -20,6 +20,9 @@ extern "C" { #include "core.h" #include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); + //#include "cuda_util.h" } @@ -27,6 +30,8 @@ extern "C" #define BLOCK_SIZE 512 + +#if 1 __global__ void spgpuSaxpby_krn(float *z, int n, float beta, float *y, float alpha, float* x) { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; @@ -49,8 +54,6 @@ __global__ void spgpuSaxpby_krn(float *z, int n, float beta, float *y, float alp } } - -#if 1 void spgpuSaxpby(spgpuHandle_t handle, __device float *z, int n, @@ -62,17 +65,35 @@ void spgpuSaxpby(spgpuHandle_t handle, int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; int num_mp, max_threads_mp, num_blocks_mp, num_blocks; dim3 block(BLOCK_SIZE); - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, 0); - num_mp = deviceProp.multiProcessorCount; - max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); num_blocks_mp = max_threads_mp/BLOCK_SIZE; num_blocks = num_blocks_mp*num_mp; dim3 grid(num_blocks); spgpuSaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); } + #else + +__global__ void spgpuSaxpby_krn(float *z, int n, float beta, float *y, float alpha, float* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + if (beta == 0.0f) + z[id] = PREC_FMUL(alpha,x[id]); + else + z[id] = PREC_FADD(PREC_FMUL(alpha, x[id]), PREC_FMUL(beta,y[id])); + } +} + + + void spgpuSaxpby_(spgpuHandle_t handle, __device float *z, int n, diff --git a/cuda/spgpu/kernels/zaxpby.cu b/cuda/spgpu/kernels/zaxpby.cu index 8aec3e17..8efc40d2 100644 --- a/cuda/spgpu/kernels/zaxpby.cu +++ b/cuda/spgpu/kernels/zaxpby.cu @@ -23,6 +23,9 @@ extern "C" { #include "core.h" #include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); + //#include "cuda_util.h" } @@ -30,6 +33,7 @@ extern "C" #define BLOCK_SIZE 512 +#if 1 __global__ void spgpuZaxpby_krn(cuDoubleComplex *z, int n, cuDoubleComplex beta, cuDoubleComplex *y, cuDoubleComplex alpha, cuDoubleComplex* x) { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; @@ -52,7 +56,6 @@ __global__ void spgpuZaxpby_krn(cuDoubleComplex *z, int n, cuDoubleComplex beta, } } -#if 1 void spgpuZaxpby(spgpuHandle_t handle, __device cuDoubleComplex *z, int n, @@ -64,10 +67,8 @@ void spgpuZaxpby(spgpuHandle_t handle, int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; int num_mp, max_threads_mp, num_blocks_mp, num_blocks; dim3 block(BLOCK_SIZE); - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, 0); - num_mp = deviceProp.multiProcessorCount; - max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); num_blocks_mp = max_threads_mp/BLOCK_SIZE; num_blocks = num_blocks_mp*num_mp; dim3 grid(num_blocks); @@ -75,6 +76,23 @@ void spgpuZaxpby(spgpuHandle_t handle, spgpuZaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); } #else +__global__ void spgpuZaxpby_krn(cuDoubleComplex *z, int n, cuDoubleComplex beta, cuDoubleComplex *y, cuDoubleComplex alpha, cuDoubleComplex* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + if (cuDoubleComplex_isZero(beta)) + z[id] = cuCmul(alpha,x[id]); + else + z[id] = cuCfma(alpha, x[id], cuCmul(beta,y[id])); + } +} + + void spgpuZaxpby_(spgpuHandle_t handle, __device cuDoubleComplex *z, int n, From 0568a83734e7148c6c8676669009103d0f6c6455 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 19 Feb 2024 10:53:23 +0100 Subject: [PATCH 055/116] Fix ifdef and old code --- cuda/spgpu/kernels/caxpby.cu | 12 ++++-------- cuda/spgpu/kernels/daxpby.cu | 12 ++++-------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/cuda/spgpu/kernels/caxpby.cu b/cuda/spgpu/kernels/caxpby.cu index 3e97f75f..817fdf53 100644 --- a/cuda/spgpu/kernels/caxpby.cu +++ b/cuda/spgpu/kernels/caxpby.cu @@ -78,6 +78,7 @@ void spgpuCaxpby(spgpuHandle_t handle, #else + __global__ void spgpuCaxpby_krn(cuFloatComplex *z, int n, cuFloatComplex beta, cuFloatComplex *y, cuFloatComplex alpha, cuFloatComplex* x) { int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; @@ -94,6 +95,7 @@ __global__ void spgpuCaxpby_krn(cuFloatComplex *z, int n, cuFloatComplex beta, c } } + void spgpuCaxpby_(spgpuHandle_t handle, __device cuFloatComplex *z, int n, @@ -103,15 +105,9 @@ void spgpuCaxpby_(spgpuHandle_t handle, __device cuFloatComplex* x) { int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; - int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, 0); - num_mp = deviceProp.multiProcessorCount; - max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; - num_blocks_mp = max_threads_mp/BLOCK_SIZE; - num_blocks = num_blocks_mp*num_mp; - dim3 grid(num_blocks); + dim3 grid(msize); spgpuCaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); } diff --git a/cuda/spgpu/kernels/daxpby.cu b/cuda/spgpu/kernels/daxpby.cu index fa87d996..e4823b34 100644 --- a/cuda/spgpu/kernels/daxpby.cu +++ b/cuda/spgpu/kernels/daxpby.cu @@ -89,6 +89,7 @@ __global__ void spgpuDaxpby_krn(double *z, int n, double beta, double *y, double } } + void spgpuDaxpby_(spgpuHandle_t handle, __device double *z, int n, @@ -98,15 +99,9 @@ void spgpuDaxpby_(spgpuHandle_t handle, __device double* x) { int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; - int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, 0); - num_mp = deviceProp.multiProcessorCount; - max_threads_mp = deviceProp.maxThreadsPerMultiProcessor; - num_blocks_mp = max_threads_mp/BLOCK_SIZE; - num_blocks = num_blocks_mp*num_mp; - dim3 grid(num_blocks); + dim3 grid(msize); spgpuDaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); } @@ -134,6 +129,7 @@ void spgpuDaxpby(spgpuHandle_t handle, cudaCheckError("CUDA error on daxpby"); } + #endif void spgpuDmaxpby(spgpuHandle_t handle, __device double *z, From 93c71c43162fb6663cca9c2f0fff0e8f2ff4c47e Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 20 Feb 2024 10:25:31 +0100 Subject: [PATCH 056/116] Fix %ZERO() on cuda --- cuda/psb_c_cuda_vect_mod.F90 | 6 ++-- cuda/psb_d_cuda_vect_mod.F90 | 60 ++++++------------------------------ cuda/psb_d_vectordev_mod.F90 | 13 -------- cuda/psb_i_cuda_vect_mod.F90 | 6 ++-- cuda/psb_s_cuda_vect_mod.F90 | 6 ++-- cuda/psb_z_cuda_vect_mod.F90 | 6 ++-- 6 files changed, 21 insertions(+), 76 deletions(-) diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 index 56cc80e6..fca1c616 100644 --- a/cuda/psb_c_cuda_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -668,9 +668,9 @@ contains use psi_serial_mod implicit none class(psb_c_vect_cuda), intent(inout) :: x - - if (allocated(x%v)) x%v=czero - call x%set_host() + + call x%set_scal(czero) + end subroutine c_cuda_zero subroutine c_cuda_asb_m(n, x, info) diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 index f2ef2be3..2220b26c 100644 --- a/cuda/psb_d_cuda_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -668,9 +668,9 @@ contains use psi_serial_mod implicit none class(psb_d_vect_cuda), intent(inout) :: x - - if (allocated(x%v)) x%v=dzero - call x%set_host() + + call x%set_scal(dzero) + end subroutine d_cuda_zero subroutine d_cuda_asb_m(n, x, info) @@ -922,56 +922,14 @@ contains class(psb_d_vect_cuda), intent(inout) :: z real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: nx, ny, nz - logical :: gpu_done - info = psb_success_ + call z%psb_d_base_vect_type%abgdxyz(m,alpha,beta,gamma,delta,x,y,info) +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ +!!$ call y%axpby(m,alpha,x,beta,info) +!!$ call z%axpby(m,gamma,y,delta,info) - if (.true.) then - gpu_done = .false. - select type(xx => x) - class is (psb_d_vect_cuda) - select type(yy => y) - class is (psb_d_vect_cuda) - select type(zz => z) - class is (psb_d_vect_cuda) - ! Do something different here - if ((beta /= dzero).and.yy%is_host())& - & call yy%sync() - if ((delta /= dzero).and.zz%is_host())& - & call zz%sync() - if (xx%is_host()) call xx%sync() - nx = getMultiVecDeviceSize(xx%deviceVect) - ny = getMultiVecDeviceSize(yy%deviceVect) - nz = getMultiVecDeviceSize(zz%deviceVect) - if ((nx Date: Tue, 20 Feb 2024 12:30:07 +0100 Subject: [PATCH 057/116] X_cuda_vect%abgdxyz --- cuda/psb_c_cuda_vect_mod.F90 | 64 ++++++++++++++++++++++++++++++------ cuda/psb_d_cuda_vect_mod.F90 | 56 +++++++++++++++++++++++++++---- cuda/psb_i_cuda_vect_mod.F90 | 8 ++--- cuda/psb_s_cuda_vect_mod.F90 | 64 ++++++++++++++++++++++++++++++------ cuda/psb_z_cuda_vect_mod.F90 | 64 ++++++++++++++++++++++++++++++------ 5 files changed, 216 insertions(+), 40 deletions(-) diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 index fca1c616..9b3b6fb1 100644 --- a/cuda/psb_c_cuda_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -922,13 +922,57 @@ contains class(psb_c_vect_cuda), intent(inout) :: z complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, nz + logical :: gpu_done + + info = psb_success_ + + if (.true.) then + gpu_done = .false. + select type(xx => x) + class is (psb_c_vect_cuda) + select type(yy => y) + class is (psb_c_vect_cuda) + select type(zz => z) + class is (psb_c_vect_cuda) + ! Do something different here + if ((beta /= czero).and.yy%is_host())& + & call yy%sync() + if ((delta /= czero).and.zz%is_host())& + & call zz%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + if ((nx x) !!$ type is (psb_c_base_multivect_type) -!!$ if ((beta /= dzero).and.(y%is_dev()))& +!!$ if ((beta /= czero).and.(y%is_dev()))& !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() !!$ type is (psb_c_multivect_cuda) !!$ ! Do something different here -!!$ if ((beta /= dzero).and.y%is_host())& +!!$ if ((beta /= czero).and.y%is_host())& !!$ & call y%sync() !!$ if (xx%is_host()) call xx%sync() !!$ nx = getMultiVecDeviceSize(xx%deviceVect) @@ -1817,7 +1861,7 @@ contains implicit none class(psb_c_multivect_cuda), intent(inout) :: x - if (allocated(x%v)) x%v=dzero + if (allocated(x%v)) x%v=czero call x%set_host() end subroutine c_cuda_multi_zero diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 index 2220b26c..c98d66f6 100644 --- a/cuda/psb_d_cuda_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -922,13 +922,57 @@ contains class(psb_d_vect_cuda), intent(inout) :: z real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, nz + logical :: gpu_done + + info = psb_success_ + + if (.true.) then + gpu_done = .false. + select type(xx => x) + class is (psb_d_vect_cuda) + select type(yy => y) + class is (psb_d_vect_cuda) + select type(zz => z) + class is (psb_d_vect_cuda) + ! Do something different here + if ((beta /= dzero).and.yy%is_host())& + & call yy%sync() + if ((delta /= dzero).and.zz%is_host())& + & call zz%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + if ((nx x) !!$ type is (psb_i_base_multivect_type) -!!$ if ((beta /= dzero).and.(y%is_dev()))& +!!$ if ((beta /= izero).and.(y%is_dev()))& !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() !!$ type is (psb_i_multivect_cuda) !!$ ! Do something different here -!!$ if ((beta /= dzero).and.y%is_host())& +!!$ if ((beta /= izero).and.y%is_host())& !!$ & call y%sync() !!$ if (xx%is_host()) call xx%sync() !!$ nx = getMultiVecDeviceSize(xx%deviceVect) @@ -1477,7 +1477,7 @@ contains implicit none class(psb_i_multivect_cuda), intent(inout) :: x - if (allocated(x%v)) x%v=dzero + if (allocated(x%v)) x%v=izero call x%set_host() end subroutine i_cuda_multi_zero diff --git a/cuda/psb_s_cuda_vect_mod.F90 b/cuda/psb_s_cuda_vect_mod.F90 index 80c60bc3..55ed4a7d 100644 --- a/cuda/psb_s_cuda_vect_mod.F90 +++ b/cuda/psb_s_cuda_vect_mod.F90 @@ -922,13 +922,57 @@ contains class(psb_s_vect_cuda), intent(inout) :: z real(psb_spk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, nz + logical :: gpu_done + + info = psb_success_ + + if (.true.) then + gpu_done = .false. + select type(xx => x) + class is (psb_s_vect_cuda) + select type(yy => y) + class is (psb_s_vect_cuda) + select type(zz => z) + class is (psb_s_vect_cuda) + ! Do something different here + if ((beta /= szero).and.yy%is_host())& + & call yy%sync() + if ((delta /= szero).and.zz%is_host())& + & call zz%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + if ((nx x) !!$ type is (psb_s_base_multivect_type) -!!$ if ((beta /= dzero).and.(y%is_dev()))& +!!$ if ((beta /= szero).and.(y%is_dev()))& !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() !!$ type is (psb_s_multivect_cuda) !!$ ! Do something different here -!!$ if ((beta /= dzero).and.y%is_host())& +!!$ if ((beta /= szero).and.y%is_host())& !!$ & call y%sync() !!$ if (xx%is_host()) call xx%sync() !!$ nx = getMultiVecDeviceSize(xx%deviceVect) @@ -1817,7 +1861,7 @@ contains implicit none class(psb_s_multivect_cuda), intent(inout) :: x - if (allocated(x%v)) x%v=dzero + if (allocated(x%v)) x%v=szero call x%set_host() end subroutine s_cuda_multi_zero diff --git a/cuda/psb_z_cuda_vect_mod.F90 b/cuda/psb_z_cuda_vect_mod.F90 index 9f801742..2114723b 100644 --- a/cuda/psb_z_cuda_vect_mod.F90 +++ b/cuda/psb_z_cuda_vect_mod.F90 @@ -922,13 +922,57 @@ contains class(psb_z_vect_cuda), intent(inout) :: z complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, nz + logical :: gpu_done + + info = psb_success_ + + if (.true.) then + gpu_done = .false. + select type(xx => x) + class is (psb_z_vect_cuda) + select type(yy => y) + class is (psb_z_vect_cuda) + select type(zz => z) + class is (psb_z_vect_cuda) + ! Do something different here + if ((beta /= zzero).and.yy%is_host())& + & call yy%sync() + if ((delta /= zzero).and.zz%is_host())& + & call zz%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + if ((nx x) !!$ type is (psb_z_base_multivect_type) -!!$ if ((beta /= dzero).and.(y%is_dev()))& +!!$ if ((beta /= zzero).and.(y%is_dev()))& !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() !!$ type is (psb_z_multivect_cuda) !!$ ! Do something different here -!!$ if ((beta /= dzero).and.y%is_host())& +!!$ if ((beta /= zzero).and.y%is_host())& !!$ & call y%sync() !!$ if (xx%is_host()) call xx%sync() !!$ nx = getMultiVecDeviceSize(xx%deviceVect) @@ -1817,7 +1861,7 @@ contains implicit none class(psb_z_multivect_cuda), intent(inout) :: x - if (allocated(x%v)) x%v=dzero + if (allocated(x%v)) x%v=zzero call x%set_host() end subroutine z_cuda_multi_zero From 2a75d677d05da6616103c86e3ba769de16159d34 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 20 Feb 2024 13:04:40 +0100 Subject: [PATCH 058/116] ABGDXYZ in vectordev_mod --- cuda/psb_c_vectordev_mod.F90 | 13 ++++++++++++- cuda/psb_d_vectordev_mod.F90 | 13 ++++++++++++- cuda/psb_s_vectordev_mod.F90 | 13 ++++++++++++- cuda/psb_z_vectordev_mod.F90 | 13 ++++++++++++- 4 files changed, 48 insertions(+), 4 deletions(-) diff --git a/cuda/psb_c_vectordev_mod.F90 b/cuda/psb_c_vectordev_mod.F90 index b15b2371..88888f61 100644 --- a/cuda/psb_c_vectordev_mod.F90 +++ b/cuda/psb_c_vectordev_mod.F90 @@ -304,7 +304,6 @@ module psb_c_vectordev_mod end function asumMultiVecDeviceFloatComplex end interface - interface axpbyMultiVecDevice function axpbyMultiVecDeviceFloatComplex(n,alpha,deviceVecA,beta,deviceVecB) & & result(res) bind(c,name='axpbyMultiVecDeviceFloatComplex') @@ -316,6 +315,18 @@ module psb_c_vectordev_mod end function axpbyMultiVecDeviceFloatComplex end interface + interface abgdxyzMultiVecDevice + function abgdxyzMultiVecDeviceFloatComplex(n,alpha,beta,gamma,delta,deviceVecX,& + & deviceVecY,deviceVecZ) & + & result(res) bind(c,name='abgdxyzMultiVecDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + type(c_float_complex), value :: alpha, beta,gamma,delta + type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ + end function abgdxyzMultiVecDeviceFloatComplex + end interface + interface axyMultiVecDevice function axyMultiVecDeviceFloatComplex(n,alpha,deviceVecA,deviceVecB) & & result(res) bind(c,name='axyMultiVecDeviceFloatComplex') diff --git a/cuda/psb_d_vectordev_mod.F90 b/cuda/psb_d_vectordev_mod.F90 index 802add96..176e8a6e 100644 --- a/cuda/psb_d_vectordev_mod.F90 +++ b/cuda/psb_d_vectordev_mod.F90 @@ -304,7 +304,6 @@ module psb_d_vectordev_mod end function asumMultiVecDeviceDouble end interface - interface axpbyMultiVecDevice function axpbyMultiVecDeviceDouble(n,alpha,deviceVecA,beta,deviceVecB) & & result(res) bind(c,name='axpbyMultiVecDeviceDouble') @@ -316,6 +315,18 @@ module psb_d_vectordev_mod end function axpbyMultiVecDeviceDouble end interface + interface abgdxyzMultiVecDevice + function abgdxyzMultiVecDeviceDouble(n,alpha,beta,gamma,delta,deviceVecX,& + & deviceVecY,deviceVecZ) & + & result(res) bind(c,name='abgdxyzMultiVecDeviceDouble') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + type(c_double), value :: alpha, beta,gamma,delta + type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ + end function abgdxyzMultiVecDeviceDouble + end interface + interface axyMultiVecDevice function axyMultiVecDeviceDouble(n,alpha,deviceVecA,deviceVecB) & & result(res) bind(c,name='axyMultiVecDeviceDouble') diff --git a/cuda/psb_s_vectordev_mod.F90 b/cuda/psb_s_vectordev_mod.F90 index 3ecabe70..73bb7445 100644 --- a/cuda/psb_s_vectordev_mod.F90 +++ b/cuda/psb_s_vectordev_mod.F90 @@ -304,7 +304,6 @@ module psb_s_vectordev_mod end function asumMultiVecDeviceFloat end interface - interface axpbyMultiVecDevice function axpbyMultiVecDeviceFloat(n,alpha,deviceVecA,beta,deviceVecB) & & result(res) bind(c,name='axpbyMultiVecDeviceFloat') @@ -316,6 +315,18 @@ module psb_s_vectordev_mod end function axpbyMultiVecDeviceFloat end interface + interface abgdxyzMultiVecDevice + function abgdxyzMultiVecDeviceFloat(n,alpha,beta,gamma,delta,deviceVecX,& + & deviceVecY,deviceVecZ) & + & result(res) bind(c,name='abgdxyzMultiVecDeviceFloat') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + type(c_float), value :: alpha, beta,gamma,delta + type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ + end function abgdxyzMultiVecDeviceFloat + end interface + interface axyMultiVecDevice function axyMultiVecDeviceFloat(n,alpha,deviceVecA,deviceVecB) & & result(res) bind(c,name='axyMultiVecDeviceFloat') diff --git a/cuda/psb_z_vectordev_mod.F90 b/cuda/psb_z_vectordev_mod.F90 index 8f07cd56..fa858acc 100644 --- a/cuda/psb_z_vectordev_mod.F90 +++ b/cuda/psb_z_vectordev_mod.F90 @@ -304,7 +304,6 @@ module psb_z_vectordev_mod end function asumMultiVecDeviceDoubleComplex end interface - interface axpbyMultiVecDevice function axpbyMultiVecDeviceDoubleComplex(n,alpha,deviceVecA,beta,deviceVecB) & & result(res) bind(c,name='axpbyMultiVecDeviceDoubleComplex') @@ -316,6 +315,18 @@ module psb_z_vectordev_mod end function axpbyMultiVecDeviceDoubleComplex end interface + interface abgdxyzMultiVecDevice + function abgdxyzMultiVecDeviceDoubleComplex(n,alpha,beta,gamma,delta,deviceVecX,& + & deviceVecY,deviceVecZ) & + & result(res) bind(c,name='abgdxyzMultiVecDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: n + type(c_double_complex), value :: alpha, beta,gamma,delta + type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ + end function abgdxyzMultiVecDeviceDoubleComplex + end interface + interface axyMultiVecDevice function axyMultiVecDeviceDoubleComplex(n,alpha,deviceVecA,deviceVecB) & & result(res) bind(c,name='axyMultiVecDeviceDoubleComplex') From 2d3773df9887885d4758d4263871c69d0a53c6e4 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 20 Feb 2024 13:05:16 +0100 Subject: [PATCH 059/116] CUDA kernels for ABGDXYZ --- cuda/cvectordev.c | 18 ++++++++ cuda/cvectordev.h | 3 ++ cuda/dvectordev.c | 7 --- cuda/dvectordev.h | 2 + cuda/spgpu/kernels/Makefile | 6 +-- cuda/spgpu/kernels/cabgdxyz.cu | 80 ++++++++++++++++++++++++++++++++++ cuda/spgpu/kernels/sabgdxyz.cu | 79 +++++++++++++++++++++++++++++++++ cuda/spgpu/kernels/zabgdxyz.cu | 80 ++++++++++++++++++++++++++++++++++ cuda/spgpu/vector.h | 36 +++++++++++++++ cuda/svectordev.c | 17 ++++++++ cuda/svectordev.h | 2 + cuda/zvectordev.c | 18 ++++++++ cuda/zvectordev.h | 3 ++ 13 files changed, 341 insertions(+), 10 deletions(-) create mode 100644 cuda/spgpu/kernels/cabgdxyz.cu create mode 100644 cuda/spgpu/kernels/sabgdxyz.cu create mode 100644 cuda/spgpu/kernels/zabgdxyz.cu diff --git a/cuda/cvectordev.c b/cuda/cvectordev.c index 518154d5..9db5202e 100644 --- a/cuda/cvectordev.c +++ b/cuda/cvectordev.c @@ -255,6 +255,24 @@ int axpbyMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha, void* devMultiVe return(i); } +int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta, + cuFloatComplex gamma, cuFloatComplex delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuCabgdxyz(handle,n, alpha,beta,gamma,delta, + (cuFloatComplex *)devVecX->v_,(cuFloatComplex *) devVecY->v_,(cuFloatComplex *) devVecZ->v_); + return(i); +} + int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, void *deviceVecB) { int i = 0; diff --git a/cuda/cvectordev.h b/cuda/cvectordev.h index 27c8984a..fc18e328 100644 --- a/cuda/cvectordev.h +++ b/cuda/cvectordev.h @@ -69,6 +69,9 @@ int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA); int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA, void* devVecB); int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX, cuFloatComplex beta, void* devVecY); +int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta, + cuFloatComplex gamma, cuFloatComplex delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, void *deviceVecB); int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, void *deviceVecB, cuFloatComplex beta, void *deviceVecZ); diff --git a/cuda/dvectordev.c b/cuda/dvectordev.c index 785753dd..b4ca95f4 100644 --- a/cuda/dvectordev.c +++ b/cuda/dvectordev.c @@ -241,7 +241,6 @@ int axpbyMultiVecDeviceDouble(int n,double alpha, void* devMultiVecX, return(i); } - int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) { int j=0, i=0; @@ -254,14 +253,8 @@ int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, do if ((n > devVecY->size_) || (n>devVecX->size_ )) return SPGPU_UNSUPPORTED; -#if 1 spgpuDabgdxyz(handle,n, alpha,beta,gamma,delta, (double*)devVecX->v_,(double*) devVecY->v_,(double*) devVecZ->v_); -#else - for(j=0;jcount_;j++) - spgpuDaxpby(handle,(double*)devVecY->v_+pitch*j, n, beta, - (double*)devVecY->v_+pitch*j, alpha,(double*) devVecX->v_+pitch*j); -#endif return(i); } diff --git a/cuda/dvectordev.h b/cuda/dvectordev.h index 25905c60..81a2e8f6 100644 --- a/cuda/dvectordev.h +++ b/cuda/dvectordev.h @@ -67,6 +67,8 @@ int asumMultiVecDeviceDouble(double* y_res, int n, void* devVecA); int dotMultiVecDeviceDouble(double* y_res, int n, void* devVecA, void* devVecB); int axpbyMultiVecDeviceDouble(int n, double alpha, void* devVecX, double beta, void* devVecY); +int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB); int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB, double beta, void *deviceVecZ); diff --git a/cuda/spgpu/kernels/Makefile b/cuda/spgpu/kernels/Makefile index 5ada698a..3e668b4e 100644 --- a/cuda/spgpu/kernels/Makefile +++ b/cuda/spgpu/kernels/Makefile @@ -11,15 +11,15 @@ LIBNAME=$(UP)/libspgpu.a CINCLUDES=-I$(INCDIR) OBJS=cabs.o camax.o casum.o caxpby.o caxy.o cdot.o cgath.o \ - cnrm2.o cscal.o cscat.o csetscal.o \ + cnrm2.o cscal.o cscat.o csetscal.o cabgdxyz.o\ dabs.o damax.o dasum.o daxpby.o daxy.o ddot.o dgath.o dabgdxyz.o\ dia_cspmv.o dia_dspmv.o dia_sspmv.o dia_zspmv.o dnrm2.o \ dscal.o dscat.o dsetscal.o ell_ccsput.o ell_cspmv.o \ ell_dcsput.o ell_dspmv.o ell_scsput.o ell_sspmv.o ell_zcsput.o ell_zspmv.o \ hdia_cspmv.o hdia_dspmv.o hdia_sspmv.o hdia_zspmv.o hell_cspmv.o hell_dspmv.o \ hell_sspmv.o hell_zspmv.o igath.o iscat.o isetscal.o sabs.o samax.o sasum.o \ - saxpby.o saxy.o sdot.o sgath.o snrm2.o sscal.o sscat.o ssetscal.o zabs.o zamax.o \ - zasum.o zaxpby.o zaxy.o zdot.o zgath.o znrm2.o zscal.o zscat.o zsetscal.o + saxpby.o saxy.o sdot.o sgath.o snrm2.o sscal.o sscat.o ssetscal.o zabs.o zamax.o sabgdxyz.o\ + zasum.o zaxpby.o zaxy.o zdot.o zgath.o znrm2.o zscal.o zscat.o zsetscal.o zabgdxyz.o objs: $(OBJS) lib: objs diff --git a/cuda/spgpu/kernels/cabgdxyz.cu b/cuda/spgpu/kernels/cabgdxyz.cu new file mode 100644 index 00000000..00dc6ab4 --- /dev/null +++ b/cuda/spgpu/kernels/cabgdxyz.cu @@ -0,0 +1,80 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuCabgdxyz_krn(int n, cuFloatComplex alpha, cuFloatComplex beta, + cuFloatComplex gamma, cuFloatComplex delta, + cuFloatComplex * x, cuFloatComplex *y, cuFloatComplex *z) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + cuFloatComplex t; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + if (cuFloatComplex_isZero(beta)) + t = cuCmulf(alpha,x[id]); + else + t = cuCfmaf(alpha, x[id], cuCmulf(beta,y[id])); + if (cuFloatComplex_isZero(delta)) + z[id] = cuCmulf(gamma, t); + else + z[id] = cuCfmafmulf(gamma, t, cuCmulf(delta,z[id])); + y[id] = t; + } +} + + +void spgpuCabgdxyz(spgpuHandle_t handle, + int n, + cuFloatComplex alpha, + cuFloatComplex beta, + cuFloatComplex gamma, + cuFloatComplex delta, + __device cuFloatComplex * x, + __device cuFloatComplex * y, + __device cuFloatComplex *z) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuCabgdxyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, + x, y, z); +} + diff --git a/cuda/spgpu/kernels/sabgdxyz.cu b/cuda/spgpu/kernels/sabgdxyz.cu new file mode 100644 index 00000000..8c137ed3 --- /dev/null +++ b/cuda/spgpu/kernels/sabgdxyz.cu @@ -0,0 +1,79 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuSabgdxyz_krn(int n, float alpha, float beta, float gamma, float delta, + float* x, float *y, float *z) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + float t; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + if (beta == 0.0) + t = PREC_FMUL(alpha,x[id]); + else + t = PREC_FADD(PREC_FMUL(alpha, x[id]), PREC_FMUL(beta,y[id])); + if (delta == 0.0) + z[id] = gamma * t; + else + z[id] = PREC_FADD(PREC_FMUL(gamma, t), PREC_FMUL(delta,z[id])); + y[id] = t; + } +} + + +void spgpuSabgdxyz(spgpuHandle_t handle, + int n, + float alpha, + float beta, + float gamma, + float delta, + __device float* x, + __device float* y, + __device float *z) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuSabgdxyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, + x, y, z); +} + diff --git a/cuda/spgpu/kernels/zabgdxyz.cu b/cuda/spgpu/kernels/zabgdxyz.cu new file mode 100644 index 00000000..48def937 --- /dev/null +++ b/cuda/spgpu/kernels/zabgdxyz.cu @@ -0,0 +1,80 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuZabgdxyz_krn(int n, cuDoubleComplex alpha, cuDoubleComplex beta, + cuDoubleComplex gamma, cuDoubleComplex delta, + cuDoubleComplex * x, cuDoubleComplex *y, cuDoubleComplex *z) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + cuDoubleComplex t; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + if (cuDoubleComplex_isZero(beta)) + t = cuCmul(alpha,x[id]); + else + t = cuCfma(alpha, x[id], cuCmul(beta,y[id])); + if (cuDoubleComplex_isZero(delta)) + z[id] = cuCmul(gamma, t); + else + z[id] = cuCfma(gamma, t, cuCmul(delta,z[id])); + y[id] = t; + } +} + + +void spgpuZabgdxyz(spgpuHandle_t handle, + int n, + cuDoubleComplex alpha, + cuDoubleComplex beta, + cuDoubleComplex gamma, + cuDoubleComplex delta, + __device cuDoubleComplex * x, + __device cuDoubleComplex * y, + __device cuDoubleComplex *z) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuZabgdxyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, + x, y, z); +} + diff --git a/cuda/spgpu/vector.h b/cuda/spgpu/vector.h index 69ffedf0..9fc3e658 100644 --- a/cuda/spgpu/vector.h +++ b/cuda/spgpu/vector.h @@ -181,6 +181,18 @@ void spgpuSaxpby(spgpuHandle_t handle, float alpha, __device float* x); + +void spgpuSabgdxyz(spgpuHandle_t handle, + int n, + float alpha, + float beta, + float gamma, + float delta, + __device float* x, + __device float *y, + __device float *z) +; + /** * \fn void spgpuSmaxpby(spgpuHandle_t handle, __device float *z, int n, float beta, __device float *y, float alpha, __device float* x, int count, int pitch) * Computes the single precision z = beta * y + alpha * x of x and y multivectors. z could be exactly x or y (without offset) or another vector. @@ -755,6 +767,18 @@ void spgpuCaxpby(spgpuHandle_t handle, cuFloatComplex alpha, __device cuFloatComplex* x); + +void spgpuCabgdxyz(spgpuHandle_t handle, + int n, + cuFloatComplex alpha, + cuFloatComplex beta, + cuFloatComplex gamma, + cuFloatComplex delta, + __device cuFloatComplex* x, + __device cuFloatComplex *y, + __device cuFloatComplex *z) +; + /** * \fn void spgpuCmaxpby(spgpuHandle_t handle, __device cuFloatComplex *z, int n, cuFloatComplex beta, __device cuFloatComplex *y, cuFloatComplex alpha, __device cuFloatComplex* x, int count, int pitch) * Computes the single precision complex z = beta * y + alpha * x of x and y multivectors. z could be exactly x or y (without offset) or another vector. @@ -1034,6 +1058,18 @@ void spgpuZaxpby(spgpuHandle_t handle, cuDoubleComplex alpha, __device cuDoubleComplex* x); + +void spgpuZabgdxyz(spgpuHandle_t handle, + int n, + cuDoubleComplex alpha, + cuDoubleComplex beta, + cuDoubleComplex gamma, + cuDoubleComplex delta, + __device cuDoubleComplex* x, + __device cuDoubleComplex *y, + __device cuDoubleComplex *z) +; + /** * \fn void spgpuZmaxpby(spgpuHandle_t handle, __device cuDoubleComplex *z, int n, cuDoubleComplex beta, __device cuDoubleComplex *y, cuDoubleComplex alpha, __device cuDoubleComplex* x, int count, int pitch) * Computes the double precision complex z = beta * y + alpha * x of x and y multivectors. z could be exactly x or y (without offset) or another vector. diff --git a/cuda/svectordev.c b/cuda/svectordev.c index bfa4061a..b84718f5 100644 --- a/cuda/svectordev.c +++ b/cuda/svectordev.c @@ -241,6 +241,23 @@ int axpbyMultiVecDeviceFloat(int n,float alpha, void* devMultiVecX, return(i); } +int abgdxyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuSabgdxyz(handle,n, alpha,beta,gamma,delta, + (float*)devVecX->v_,(float*) devVecY->v_,(float*) devVecZ->v_); + return(i); +} + int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVecB) { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; diff --git a/cuda/svectordev.h b/cuda/svectordev.h index bf25fcb1..730f929a 100644 --- a/cuda/svectordev.h +++ b/cuda/svectordev.h @@ -67,6 +67,8 @@ int asumMultiVecDeviceFloat(float* y_res, int n, void* devVecA); int dotMultiVecDeviceFloat(float* y_res, int n, void* devVecA, void* devVecB); int axpbyMultiVecDeviceFloat(int n, float alpha, void* devVecX, float beta, void* devVecY); +int abgdxyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVecB); int axybzMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVecB, float beta, void *deviceVecZ); diff --git a/cuda/zvectordev.c b/cuda/zvectordev.c index 0fb1c67e..d1f23f2a 100644 --- a/cuda/zvectordev.c +++ b/cuda/zvectordev.c @@ -234,6 +234,24 @@ int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMulti return(i); } +int abgdxyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, + cuDoubleComplex beta, cuDoubleComplex gamma, cuDoubleComplex delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuZabgdxyz(handle,n, alpha,beta,gamma,delta, + (cuDoubleComplex *)devVecX->v_,(cuDoubleComplex *) devVecY->v_,(cuDoubleComplex *) devVecZ->v_); + return(i); +} + int axpbyMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, void* devMultiVecX, cuDoubleComplex beta, void* devMultiVecY) { int j=0, i=0; diff --git a/cuda/zvectordev.h b/cuda/zvectordev.h index 96330a7a..4c32f11c 100644 --- a/cuda/zvectordev.h +++ b/cuda/zvectordev.h @@ -77,6 +77,9 @@ int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, int axpbyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void* devVecX, cuDoubleComplex beta, void* devVecY); +int abgdxyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, + cuDoubleComplex beta, cuDoubleComplex gamma, cuDoubleComplex delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); int axyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA, void *deviceVecB); int axybzMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA, From d95077ffd66389d11e5782ca76f26432c05cc270 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 20 Feb 2024 13:46:58 +0100 Subject: [PATCH 060/116] Fix typo in vectordev_mod --- cuda/psb_c_vectordev_mod.F90 | 4 +--- cuda/psb_d_vectordev_mod.F90 | 4 +--- cuda/psb_i_vectordev_mod.F90 | 2 -- cuda/psb_s_vectordev_mod.F90 | 4 +--- cuda/psb_z_vectordev_mod.F90 | 4 +--- 5 files changed, 4 insertions(+), 14 deletions(-) diff --git a/cuda/psb_c_vectordev_mod.F90 b/cuda/psb_c_vectordev_mod.F90 index 88888f61..20a4ac3f 100644 --- a/cuda/psb_c_vectordev_mod.F90 +++ b/cuda/psb_c_vectordev_mod.F90 @@ -28,8 +28,6 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - module psb_c_vectordev_mod use psb_base_vectordev_mod @@ -322,7 +320,7 @@ module psb_c_vectordev_mod use iso_c_binding integer(c_int) :: res integer(c_int), value :: n - type(c_float_complex), value :: alpha, beta,gamma,delta + complex(c_float_complex), value :: alpha, beta,gamma,delta type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ end function abgdxyzMultiVecDeviceFloatComplex end interface diff --git a/cuda/psb_d_vectordev_mod.F90 b/cuda/psb_d_vectordev_mod.F90 index 176e8a6e..080b27fe 100644 --- a/cuda/psb_d_vectordev_mod.F90 +++ b/cuda/psb_d_vectordev_mod.F90 @@ -28,8 +28,6 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - module psb_d_vectordev_mod use psb_base_vectordev_mod @@ -322,7 +320,7 @@ module psb_d_vectordev_mod use iso_c_binding integer(c_int) :: res integer(c_int), value :: n - type(c_double), value :: alpha, beta,gamma,delta + real(c_double), value :: alpha, beta,gamma,delta type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ end function abgdxyzMultiVecDeviceDouble end interface diff --git a/cuda/psb_i_vectordev_mod.F90 b/cuda/psb_i_vectordev_mod.F90 index 84037aaf..ce3dc5e1 100644 --- a/cuda/psb_i_vectordev_mod.F90 +++ b/cuda/psb_i_vectordev_mod.F90 @@ -28,8 +28,6 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - module psb_i_vectordev_mod use psb_base_vectordev_mod diff --git a/cuda/psb_s_vectordev_mod.F90 b/cuda/psb_s_vectordev_mod.F90 index 73bb7445..19776cbc 100644 --- a/cuda/psb_s_vectordev_mod.F90 +++ b/cuda/psb_s_vectordev_mod.F90 @@ -28,8 +28,6 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - module psb_s_vectordev_mod use psb_base_vectordev_mod @@ -322,7 +320,7 @@ module psb_s_vectordev_mod use iso_c_binding integer(c_int) :: res integer(c_int), value :: n - type(c_float), value :: alpha, beta,gamma,delta + real(c_float), value :: alpha, beta,gamma,delta type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ end function abgdxyzMultiVecDeviceFloat end interface diff --git a/cuda/psb_z_vectordev_mod.F90 b/cuda/psb_z_vectordev_mod.F90 index fa858acc..07e4ba37 100644 --- a/cuda/psb_z_vectordev_mod.F90 +++ b/cuda/psb_z_vectordev_mod.F90 @@ -28,8 +28,6 @@ ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! - - module psb_z_vectordev_mod use psb_base_vectordev_mod @@ -322,7 +320,7 @@ module psb_z_vectordev_mod use iso_c_binding integer(c_int) :: res integer(c_int), value :: n - type(c_double_complex), value :: alpha, beta,gamma,delta + complex(c_double_complex), value :: alpha, beta,gamma,delta type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ end function abgdxyzMultiVecDeviceDoubleComplex end interface From 0e269ed6418dd4e953ce57b6b04ce74b585f5a3e Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 20 Feb 2024 13:57:32 +0100 Subject: [PATCH 061/116] typo in Cabgdxyz --- cuda/spgpu/kernels/cabgdxyz.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda/spgpu/kernels/cabgdxyz.cu b/cuda/spgpu/kernels/cabgdxyz.cu index 00dc6ab4..a85b3873 100644 --- a/cuda/spgpu/kernels/cabgdxyz.cu +++ b/cuda/spgpu/kernels/cabgdxyz.cu @@ -49,7 +49,7 @@ __global__ void spgpuCabgdxyz_krn(int n, cuFloatComplex alpha, cuFloatComplex if (cuFloatComplex_isZero(delta)) z[id] = cuCmulf(gamma, t); else - z[id] = cuCfmafmulf(gamma, t, cuCmulf(delta,z[id])); + z[id] = cuCfmaf(gamma, t, cuCmulf(delta,z[id])); y[id] = t; } } From b5d5f9766107f1d2b6c5a61885fab6df388b4cda Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 20 Feb 2024 14:04:23 +0100 Subject: [PATCH 062/116] Improve cuda%zero() --- cuda/psb_c_cuda_vect_mod.F90 | 4 +++- cuda/psb_d_cuda_vect_mod.F90 | 4 +++- cuda/psb_i_cuda_vect_mod.F90 | 4 +++- cuda/psb_s_cuda_vect_mod.F90 | 4 +++- cuda/psb_z_cuda_vect_mod.F90 | 4 +++- 5 files changed, 15 insertions(+), 5 deletions(-) diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 index 9b3b6fb1..7eee128f 100644 --- a/cuda/psb_c_cuda_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -668,7 +668,9 @@ contains use psi_serial_mod implicit none class(psb_c_vect_cuda), intent(inout) :: x - + ! Since we are overwriting, make sure to do it + ! on the GPU side + call x%set_dev() call x%set_scal(czero) end subroutine c_cuda_zero diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 index c98d66f6..1e6e9f2a 100644 --- a/cuda/psb_d_cuda_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -668,7 +668,9 @@ contains use psi_serial_mod implicit none class(psb_d_vect_cuda), intent(inout) :: x - + ! Since we are overwriting, make sure to do it + ! on the GPU side + call x%set_dev() call x%set_scal(dzero) end subroutine d_cuda_zero diff --git a/cuda/psb_i_cuda_vect_mod.F90 b/cuda/psb_i_cuda_vect_mod.F90 index a018713e..903c4a08 100644 --- a/cuda/psb_i_cuda_vect_mod.F90 +++ b/cuda/psb_i_cuda_vect_mod.F90 @@ -650,7 +650,9 @@ contains use psi_serial_mod implicit none class(psb_i_vect_cuda), intent(inout) :: x - + ! Since we are overwriting, make sure to do it + ! on the GPU side + call x%set_dev() call x%set_scal(izero) end subroutine i_cuda_zero diff --git a/cuda/psb_s_cuda_vect_mod.F90 b/cuda/psb_s_cuda_vect_mod.F90 index 55ed4a7d..3497c33e 100644 --- a/cuda/psb_s_cuda_vect_mod.F90 +++ b/cuda/psb_s_cuda_vect_mod.F90 @@ -668,7 +668,9 @@ contains use psi_serial_mod implicit none class(psb_s_vect_cuda), intent(inout) :: x - + ! Since we are overwriting, make sure to do it + ! on the GPU side + call x%set_dev() call x%set_scal(szero) end subroutine s_cuda_zero diff --git a/cuda/psb_z_cuda_vect_mod.F90 b/cuda/psb_z_cuda_vect_mod.F90 index 2114723b..8483544c 100644 --- a/cuda/psb_z_cuda_vect_mod.F90 +++ b/cuda/psb_z_cuda_vect_mod.F90 @@ -668,7 +668,9 @@ contains use psi_serial_mod implicit none class(psb_z_vect_cuda), intent(inout) :: x - + ! Since we are overwriting, make sure to do it + ! on the GPU side + call x%set_dev() call x%set_scal(zzero) end subroutine z_cuda_zero From 86be8ebcd0452b108be6580d8af58fa55692a08a Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 4 Mar 2024 16:29:47 +0100 Subject: [PATCH 063/116] New method W%XYZW() --- base/modules/auxil/psi_c_serial_mod.f90 | 14 ++++ base/modules/auxil/psi_d_serial_mod.f90 | 14 ++++ base/modules/auxil/psi_e_serial_mod.f90 | 14 ++++ base/modules/auxil/psi_i2_serial_mod.f90 | 14 ++++ base/modules/auxil/psi_m_serial_mod.f90 | 14 ++++ base/modules/auxil/psi_s_serial_mod.f90 | 14 ++++ base/modules/auxil/psi_z_serial_mod.f90 | 14 ++++ base/modules/serial/psb_c_base_vect_mod.F90 | 44 +++++++++---- base/modules/serial/psb_c_vect_mod.F90 | 17 +++++ base/modules/serial/psb_d_base_vect_mod.F90 | 44 +++++++++---- base/modules/serial/psb_d_vect_mod.F90 | 17 +++++ base/modules/serial/psb_s_base_vect_mod.F90 | 44 +++++++++---- base/modules/serial/psb_s_vect_mod.F90 | 17 +++++ base/modules/serial/psb_z_base_vect_mod.F90 | 44 +++++++++---- base/modules/serial/psb_z_vect_mod.F90 | 17 +++++ base/serial/psi_c_serial_impl.F90 | 72 +++++++++++++++++++++ base/serial/psi_d_serial_impl.F90 | 72 +++++++++++++++++++++ base/serial/psi_e_serial_impl.F90 | 72 +++++++++++++++++++++ base/serial/psi_i2_serial_impl.F90 | 72 +++++++++++++++++++++ base/serial/psi_m_serial_impl.F90 | 72 +++++++++++++++++++++ base/serial/psi_s_serial_impl.F90 | 72 +++++++++++++++++++++ base/serial/psi_z_serial_impl.F90 | 72 +++++++++++++++++++++ 22 files changed, 790 insertions(+), 56 deletions(-) diff --git a/base/modules/auxil/psi_c_serial_mod.f90 b/base/modules/auxil/psi_c_serial_mod.f90 index 6926d6bd..38b740a7 100644 --- a/base/modules/auxil/psi_c_serial_mod.f90 +++ b/base/modules/auxil/psi_c_serial_mod.f90 @@ -112,6 +112,20 @@ module psi_c_serial_mod end subroutine psi_cabgdxyz end interface psi_abgdxyz + interface psi_xyzw + subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_spk_ + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_spk_), intent (in) :: x(:) + complex(psb_spk_), intent (inout) :: y(:) + complex(psb_spk_), intent (inout) :: z(:) + complex(psb_spk_), intent (inout) :: w(:) + complex(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_cxyzw + end interface psi_xyzw + interface psi_gth subroutine psi_cgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_spk_ diff --git a/base/modules/auxil/psi_d_serial_mod.f90 b/base/modules/auxil/psi_d_serial_mod.f90 index 42185d21..1d65c5f6 100644 --- a/base/modules/auxil/psi_d_serial_mod.f90 +++ b/base/modules/auxil/psi_d_serial_mod.f90 @@ -112,6 +112,20 @@ module psi_d_serial_mod end subroutine psi_dabgdxyz end interface psi_abgdxyz + interface psi_xyzw + subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_dpk_ + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_dpk_), intent (in) :: x(:) + real(psb_dpk_), intent (inout) :: y(:) + real(psb_dpk_), intent (inout) :: z(:) + real(psb_dpk_), intent (inout) :: w(:) + real(psb_dpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_dxyzw + end interface psi_xyzw + interface psi_gth subroutine psi_dgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_dpk_ diff --git a/base/modules/auxil/psi_e_serial_mod.f90 b/base/modules/auxil/psi_e_serial_mod.f90 index ffba06fd..6f4e8c06 100644 --- a/base/modules/auxil/psi_e_serial_mod.f90 +++ b/base/modules/auxil/psi_e_serial_mod.f90 @@ -112,6 +112,20 @@ module psi_e_serial_mod end subroutine psi_eabgdxyz end interface psi_abgdxyz + interface psi_xyzw + subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_epk_), intent (in) :: x(:) + integer(psb_epk_), intent (inout) :: y(:) + integer(psb_epk_), intent (inout) :: z(:) + integer(psb_epk_), intent (inout) :: w(:) + integer(psb_epk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_exyzw + end interface psi_xyzw + interface psi_gth subroutine psi_egthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ diff --git a/base/modules/auxil/psi_i2_serial_mod.f90 b/base/modules/auxil/psi_i2_serial_mod.f90 index d61a1146..ffa14059 100644 --- a/base/modules/auxil/psi_i2_serial_mod.f90 +++ b/base/modules/auxil/psi_i2_serial_mod.f90 @@ -112,6 +112,20 @@ module psi_i2_serial_mod end subroutine psi_i2abgdxyz end interface psi_abgdxyz + interface psi_xyzw + subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_i2pk_), intent (in) :: x(:) + integer(psb_i2pk_), intent (inout) :: y(:) + integer(psb_i2pk_), intent (inout) :: z(:) + integer(psb_i2pk_), intent (inout) :: w(:) + integer(psb_i2pk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_i2xyzw + end interface psi_xyzw + interface psi_gth subroutine psi_i2gthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ diff --git a/base/modules/auxil/psi_m_serial_mod.f90 b/base/modules/auxil/psi_m_serial_mod.f90 index 76131d75..5661fdbf 100644 --- a/base/modules/auxil/psi_m_serial_mod.f90 +++ b/base/modules/auxil/psi_m_serial_mod.f90 @@ -112,6 +112,20 @@ module psi_m_serial_mod end subroutine psi_mabgdxyz end interface psi_abgdxyz + interface psi_xyzw + subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_mpk_), intent (in) :: x(:) + integer(psb_mpk_), intent (inout) :: y(:) + integer(psb_mpk_), intent (inout) :: z(:) + integer(psb_mpk_), intent (inout) :: w(:) + integer(psb_mpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_mxyzw + end interface psi_xyzw + interface psi_gth subroutine psi_mgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ diff --git a/base/modules/auxil/psi_s_serial_mod.f90 b/base/modules/auxil/psi_s_serial_mod.f90 index 02b96311..5cc17d58 100644 --- a/base/modules/auxil/psi_s_serial_mod.f90 +++ b/base/modules/auxil/psi_s_serial_mod.f90 @@ -112,6 +112,20 @@ module psi_s_serial_mod end subroutine psi_sabgdxyz end interface psi_abgdxyz + interface psi_xyzw + subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_spk_ + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_spk_), intent (in) :: x(:) + real(psb_spk_), intent (inout) :: y(:) + real(psb_spk_), intent (inout) :: z(:) + real(psb_spk_), intent (inout) :: w(:) + real(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_sxyzw + end interface psi_xyzw + interface psi_gth subroutine psi_sgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_spk_ diff --git a/base/modules/auxil/psi_z_serial_mod.f90 b/base/modules/auxil/psi_z_serial_mod.f90 index a86bdd70..8a3f053d 100644 --- a/base/modules/auxil/psi_z_serial_mod.f90 +++ b/base/modules/auxil/psi_z_serial_mod.f90 @@ -112,6 +112,20 @@ module psi_z_serial_mod end subroutine psi_zabgdxyz end interface psi_abgdxyz + interface psi_xyzw + subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_dpk_ + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_dpk_), intent (in) :: x(:) + complex(psb_dpk_), intent (inout) :: y(:) + complex(psb_dpk_), intent (inout) :: z(:) + complex(psb_dpk_), intent (inout) :: w(:) + complex(psb_dpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_zxyzw + end interface psi_xyzw + interface psi_gth subroutine psi_zgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_dpk_ diff --git a/base/modules/serial/psb_c_base_vect_mod.F90 b/base/modules/serial/psb_c_base_vect_mod.F90 index 5a468d55..41bab5ab 100644 --- a/base/modules/serial/psb_c_base_vect_mod.F90 +++ b/base/modules/serial/psb_c_base_vect_mod.F90 @@ -156,6 +156,7 @@ module psb_c_base_vect_mod procedure, pass(z) :: axpby_a2 => c_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 procedure, pass(z) :: abgdxyz => c_base_abgdxyz + procedure, pass(w) :: xyzw => c_base_xyzw ! ! Vector by vector multiplication. Need all variants @@ -1155,22 +1156,37 @@ contains complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - if (.false.) then - if (x%is_dev()) call x%sync() - - call y%axpby(m,alpha,x,beta,info) - call z%axpby(m,gamma,y,delta,info) - else - if (x%is_dev().and.(alpha/=czero)) call x%sync() - if (y%is_dev().and.(beta/=czero)) call y%sync() - if (z%is_dev().and.(delta/=czero)) call z%sync() - call psi_cabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) - call y%set_host() - call z%set_host() - end if - + if (x%is_dev().and.(alpha/=czero)) call x%sync() + if (y%is_dev().and.(beta/=czero)) call y%sync() + if (z%is_dev().and.(delta/=czero)) call z%sync() + call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call y%set_host() + call z%set_host() + end subroutine c_base_abgdxyz + subroutine c_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + class(psb_c_base_vect_type), intent(inout) :: z + class(psb_c_base_vect_type), intent(inout) :: w + complex(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev().and.(a/=czero)) call x%sync() + if (y%is_dev().and.(b/=czero)) call y%sync() + if (z%is_dev().and.(d/=czero)) call z%sync() + if (w%is_dev().and.(f/=czero)) call w%sync() + call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info) + call y%set_host() + call z%set_host() + call w%set_host() + + end subroutine c_base_xyzw + ! ! Multiple variants of two operations: diff --git a/base/modules/serial/psb_c_vect_mod.F90 b/base/modules/serial/psb_c_vect_mod.F90 index e0488def..865f9456 100644 --- a/base/modules/serial/psb_c_vect_mod.F90 +++ b/base/modules/serial/psb_c_vect_mod.F90 @@ -103,6 +103,7 @@ module psb_c_vect_mod procedure, pass(z) :: axpby_a2 => c_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 procedure, pass(z) :: abgdxyz => c_vect_abgdxyz + procedure, pass(z) :: xyzw => c_vect_xyzw procedure, pass(y) :: mlt_v => c_vect_mlt_v procedure, pass(y) :: mlt_a => c_vect_mlt_a @@ -788,6 +789,22 @@ contains end subroutine c_vect_abgdxyz + subroutine c_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_vect_type), intent(inout) :: x + class(psb_c_vect_type), intent(inout) :: y + class(psb_c_vect_type), intent(inout) :: z + class(psb_c_vect_type), intent(inout) :: w + complex(psb_spk_), intent (in) :: a, b, c, d, e, f + integer(psb_ipk_), intent(out) :: info + + if (allocated(w%v)) & + call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info) + + end subroutine c_vect_xyzw + subroutine c_vect_mlt_v(x, y, info) use psi_serial_mod diff --git a/base/modules/serial/psb_d_base_vect_mod.F90 b/base/modules/serial/psb_d_base_vect_mod.F90 index 8f583cd3..1ad1ffa5 100644 --- a/base/modules/serial/psb_d_base_vect_mod.F90 +++ b/base/modules/serial/psb_d_base_vect_mod.F90 @@ -156,6 +156,7 @@ module psb_d_base_vect_mod procedure, pass(z) :: axpby_a2 => d_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 procedure, pass(z) :: abgdxyz => d_base_abgdxyz + procedure, pass(w) :: xyzw => d_base_xyzw ! ! Vector by vector multiplication. Need all variants @@ -1162,22 +1163,37 @@ contains real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - if (.false.) then - if (x%is_dev()) call x%sync() - - call y%axpby(m,alpha,x,beta,info) - call z%axpby(m,gamma,y,delta,info) - else - if (x%is_dev().and.(alpha/=dzero)) call x%sync() - if (y%is_dev().and.(beta/=dzero)) call y%sync() - if (z%is_dev().and.(delta/=dzero)) call z%sync() - call psi_dabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) - call y%set_host() - call z%set_host() - end if - + if (x%is_dev().and.(alpha/=dzero)) call x%sync() + if (y%is_dev().and.(beta/=dzero)) call y%sync() + if (z%is_dev().and.(delta/=dzero)) call z%sync() + call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call y%set_host() + call z%set_host() + end subroutine d_base_abgdxyz + subroutine d_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_base_vect_type), intent(inout) :: z + class(psb_d_base_vect_type), intent(inout) :: w + real(psb_dpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev().and.(a/=dzero)) call x%sync() + if (y%is_dev().and.(b/=dzero)) call y%sync() + if (z%is_dev().and.(d/=dzero)) call z%sync() + if (w%is_dev().and.(f/=dzero)) call w%sync() + call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info) + call y%set_host() + call z%set_host() + call w%set_host() + + end subroutine d_base_xyzw + ! ! Multiple variants of two operations: diff --git a/base/modules/serial/psb_d_vect_mod.F90 b/base/modules/serial/psb_d_vect_mod.F90 index 07007452..55dd8230 100644 --- a/base/modules/serial/psb_d_vect_mod.F90 +++ b/base/modules/serial/psb_d_vect_mod.F90 @@ -103,6 +103,7 @@ module psb_d_vect_mod procedure, pass(z) :: axpby_a2 => d_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 procedure, pass(z) :: abgdxyz => d_vect_abgdxyz + procedure, pass(z) :: xyzw => d_vect_xyzw procedure, pass(y) :: mlt_v => d_vect_mlt_v procedure, pass(y) :: mlt_a => d_vect_mlt_a @@ -795,6 +796,22 @@ contains end subroutine d_vect_abgdxyz + subroutine d_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_vect_type), intent(inout) :: x + class(psb_d_vect_type), intent(inout) :: y + class(psb_d_vect_type), intent(inout) :: z + class(psb_d_vect_type), intent(inout) :: w + real(psb_dpk_), intent (in) :: a, b, c, d, e, f + integer(psb_ipk_), intent(out) :: info + + if (allocated(w%v)) & + call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info) + + end subroutine d_vect_xyzw + subroutine d_vect_mlt_v(x, y, info) use psi_serial_mod diff --git a/base/modules/serial/psb_s_base_vect_mod.F90 b/base/modules/serial/psb_s_base_vect_mod.F90 index 85bb3bda..26b82c31 100644 --- a/base/modules/serial/psb_s_base_vect_mod.F90 +++ b/base/modules/serial/psb_s_base_vect_mod.F90 @@ -156,6 +156,7 @@ module psb_s_base_vect_mod procedure, pass(z) :: axpby_a2 => s_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 procedure, pass(z) :: abgdxyz => s_base_abgdxyz + procedure, pass(w) :: xyzw => s_base_xyzw ! ! Vector by vector multiplication. Need all variants @@ -1162,22 +1163,37 @@ contains real(psb_spk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - if (.false.) then - if (x%is_dev()) call x%sync() - - call y%axpby(m,alpha,x,beta,info) - call z%axpby(m,gamma,y,delta,info) - else - if (x%is_dev().and.(alpha/=szero)) call x%sync() - if (y%is_dev().and.(beta/=szero)) call y%sync() - if (z%is_dev().and.(delta/=szero)) call z%sync() - call psi_sabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) - call y%set_host() - call z%set_host() - end if - + if (x%is_dev().and.(alpha/=szero)) call x%sync() + if (y%is_dev().and.(beta/=szero)) call y%sync() + if (z%is_dev().and.(delta/=szero)) call z%sync() + call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call y%set_host() + call z%set_host() + end subroutine s_base_abgdxyz + subroutine s_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + class(psb_s_base_vect_type), intent(inout) :: z + class(psb_s_base_vect_type), intent(inout) :: w + real(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev().and.(a/=szero)) call x%sync() + if (y%is_dev().and.(b/=szero)) call y%sync() + if (z%is_dev().and.(d/=szero)) call z%sync() + if (w%is_dev().and.(f/=szero)) call w%sync() + call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info) + call y%set_host() + call z%set_host() + call w%set_host() + + end subroutine s_base_xyzw + ! ! Multiple variants of two operations: diff --git a/base/modules/serial/psb_s_vect_mod.F90 b/base/modules/serial/psb_s_vect_mod.F90 index aa16a04d..a50b2a0a 100644 --- a/base/modules/serial/psb_s_vect_mod.F90 +++ b/base/modules/serial/psb_s_vect_mod.F90 @@ -103,6 +103,7 @@ module psb_s_vect_mod procedure, pass(z) :: axpby_a2 => s_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 procedure, pass(z) :: abgdxyz => s_vect_abgdxyz + procedure, pass(z) :: xyzw => s_vect_xyzw procedure, pass(y) :: mlt_v => s_vect_mlt_v procedure, pass(y) :: mlt_a => s_vect_mlt_a @@ -795,6 +796,22 @@ contains end subroutine s_vect_abgdxyz + subroutine s_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_vect_type), intent(inout) :: x + class(psb_s_vect_type), intent(inout) :: y + class(psb_s_vect_type), intent(inout) :: z + class(psb_s_vect_type), intent(inout) :: w + real(psb_spk_), intent (in) :: a, b, c, d, e, f + integer(psb_ipk_), intent(out) :: info + + if (allocated(w%v)) & + call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info) + + end subroutine s_vect_xyzw + subroutine s_vect_mlt_v(x, y, info) use psi_serial_mod diff --git a/base/modules/serial/psb_z_base_vect_mod.F90 b/base/modules/serial/psb_z_base_vect_mod.F90 index b30b1586..a3afc9c1 100644 --- a/base/modules/serial/psb_z_base_vect_mod.F90 +++ b/base/modules/serial/psb_z_base_vect_mod.F90 @@ -156,6 +156,7 @@ module psb_z_base_vect_mod procedure, pass(z) :: axpby_a2 => z_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 procedure, pass(z) :: abgdxyz => z_base_abgdxyz + procedure, pass(w) :: xyzw => z_base_xyzw ! ! Vector by vector multiplication. Need all variants @@ -1155,22 +1156,37 @@ contains complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info - if (.false.) then - if (x%is_dev()) call x%sync() - - call y%axpby(m,alpha,x,beta,info) - call z%axpby(m,gamma,y,delta,info) - else - if (x%is_dev().and.(alpha/=zzero)) call x%sync() - if (y%is_dev().and.(beta/=zzero)) call y%sync() - if (z%is_dev().and.(delta/=zzero)) call z%sync() - call psi_zabgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) - call y%set_host() - call z%set_host() - end if - + if (x%is_dev().and.(alpha/=zzero)) call x%sync() + if (y%is_dev().and.(beta/=zzero)) call y%sync() + if (z%is_dev().and.(delta/=zzero)) call z%sync() + call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call y%set_host() + call z%set_host() + end subroutine z_base_abgdxyz + subroutine z_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + class(psb_z_base_vect_type), intent(inout) :: z + class(psb_z_base_vect_type), intent(inout) :: w + complex(psb_dpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev().and.(a/=zzero)) call x%sync() + if (y%is_dev().and.(b/=zzero)) call y%sync() + if (z%is_dev().and.(d/=zzero)) call z%sync() + if (w%is_dev().and.(f/=zzero)) call w%sync() + call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info) + call y%set_host() + call z%set_host() + call w%set_host() + + end subroutine z_base_xyzw + ! ! Multiple variants of two operations: diff --git a/base/modules/serial/psb_z_vect_mod.F90 b/base/modules/serial/psb_z_vect_mod.F90 index 58bf6b18..21e0c546 100644 --- a/base/modules/serial/psb_z_vect_mod.F90 +++ b/base/modules/serial/psb_z_vect_mod.F90 @@ -103,6 +103,7 @@ module psb_z_vect_mod procedure, pass(z) :: axpby_a2 => z_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 procedure, pass(z) :: abgdxyz => z_vect_abgdxyz + procedure, pass(z) :: xyzw => z_vect_xyzw procedure, pass(y) :: mlt_v => z_vect_mlt_v procedure, pass(y) :: mlt_a => z_vect_mlt_a @@ -788,6 +789,22 @@ contains end subroutine z_vect_abgdxyz + subroutine z_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_vect_type), intent(inout) :: x + class(psb_z_vect_type), intent(inout) :: y + class(psb_z_vect_type), intent(inout) :: z + class(psb_z_vect_type), intent(inout) :: w + complex(psb_dpk_), intent (in) :: a, b, c, d, e, f + integer(psb_ipk_), intent(out) :: info + + if (allocated(w%v)) & + call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info) + + end subroutine z_vect_xyzw + subroutine z_vect_mlt_v(x, y, info) use psi_serial_mod diff --git a/base/serial/psi_c_serial_impl.F90 b/base/serial/psi_c_serial_impl.F90 index 557220e5..e230a1e0 100644 --- a/base/serial/psi_c_serial_impl.F90 +++ b/base/serial/psi_c_serial_impl.F90 @@ -1792,3 +1792,75 @@ subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) return end subroutine psi_cabgdxyz + +subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_spk_), intent (in) :: x(:) + complex(psb_spk_), intent (inout) :: y(:) + complex(psb_spk_), intent (inout) :: z(:) + complex(psb_spk_), intent (inout) :: w(:) + complex(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='cabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==czero).or.(b==czero).or. & + & (c==czero).or.(d==czero).or.& + & (e==czero).or.(f==czero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_cxyzw diff --git a/base/serial/psi_d_serial_impl.F90 b/base/serial/psi_d_serial_impl.F90 index d423b401..bf1b2917 100644 --- a/base/serial/psi_d_serial_impl.F90 +++ b/base/serial/psi_d_serial_impl.F90 @@ -1792,3 +1792,75 @@ subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) return end subroutine psi_dabgdxyz + +subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_dpk_), intent (in) :: x(:) + real(psb_dpk_), intent (inout) :: y(:) + real(psb_dpk_), intent (inout) :: z(:) + real(psb_dpk_), intent (inout) :: w(:) + real(psb_dpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='dabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==dzero).or.(b==dzero).or. & + & (c==dzero).or.(d==dzero).or.& + & (e==dzero).or.(f==dzero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_dxyzw diff --git a/base/serial/psi_e_serial_impl.F90 b/base/serial/psi_e_serial_impl.F90 index c7977c35..911ab4ec 100644 --- a/base/serial/psi_e_serial_impl.F90 +++ b/base/serial/psi_e_serial_impl.F90 @@ -1792,3 +1792,75 @@ subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) return end subroutine psi_eabgdxyz + +subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_epk_), intent (in) :: x(:) + integer(psb_epk_), intent (inout) :: y(:) + integer(psb_epk_), intent (inout) :: z(:) + integer(psb_epk_), intent (inout) :: w(:) + integer(psb_epk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='eabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==ezero).or.(b==ezero).or. & + & (c==ezero).or.(d==ezero).or.& + & (e==ezero).or.(f==ezero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_exyzw diff --git a/base/serial/psi_i2_serial_impl.F90 b/base/serial/psi_i2_serial_impl.F90 index ce4aff80..fb42dfcd 100644 --- a/base/serial/psi_i2_serial_impl.F90 +++ b/base/serial/psi_i2_serial_impl.F90 @@ -1792,3 +1792,75 @@ subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) return end subroutine psi_i2abgdxyz + +subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_i2pk_), intent (in) :: x(:) + integer(psb_i2pk_), intent (inout) :: y(:) + integer(psb_i2pk_), intent (inout) :: z(:) + integer(psb_i2pk_), intent (inout) :: w(:) + integer(psb_i2pk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='i2abgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==i2zero).or.(b==i2zero).or. & + & (c==i2zero).or.(d==i2zero).or.& + & (e==i2zero).or.(f==i2zero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_i2xyzw diff --git a/base/serial/psi_m_serial_impl.F90 b/base/serial/psi_m_serial_impl.F90 index 8d9d19f4..346fd897 100644 --- a/base/serial/psi_m_serial_impl.F90 +++ b/base/serial/psi_m_serial_impl.F90 @@ -1792,3 +1792,75 @@ subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) return end subroutine psi_mabgdxyz + +subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_mpk_), intent (in) :: x(:) + integer(psb_mpk_), intent (inout) :: y(:) + integer(psb_mpk_), intent (inout) :: z(:) + integer(psb_mpk_), intent (inout) :: w(:) + integer(psb_mpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='mabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==mzero).or.(b==mzero).or. & + & (c==mzero).or.(d==mzero).or.& + & (e==mzero).or.(f==mzero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_mxyzw diff --git a/base/serial/psi_s_serial_impl.F90 b/base/serial/psi_s_serial_impl.F90 index df251b27..52f86bcd 100644 --- a/base/serial/psi_s_serial_impl.F90 +++ b/base/serial/psi_s_serial_impl.F90 @@ -1792,3 +1792,75 @@ subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) return end subroutine psi_sabgdxyz + +subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_spk_), intent (in) :: x(:) + real(psb_spk_), intent (inout) :: y(:) + real(psb_spk_), intent (inout) :: z(:) + real(psb_spk_), intent (inout) :: w(:) + real(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='sabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==szero).or.(b==szero).or. & + & (c==szero).or.(d==szero).or.& + & (e==szero).or.(f==szero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_sxyzw diff --git a/base/serial/psi_z_serial_impl.F90 b/base/serial/psi_z_serial_impl.F90 index 44ea5ae7..7e680273 100644 --- a/base/serial/psi_z_serial_impl.F90 +++ b/base/serial/psi_z_serial_impl.F90 @@ -1792,3 +1792,75 @@ subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) return end subroutine psi_zabgdxyz + +subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_dpk_), intent (in) :: x(:) + complex(psb_dpk_), intent (inout) :: y(:) + complex(psb_dpk_), intent (inout) :: z(:) + complex(psb_dpk_), intent (inout) :: w(:) + complex(psb_dpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='zabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==zzero).or.(b==zzero).or. & + & (c==zzero).or.(d==zzero).or.& + & (e==zzero).or.(f==zzero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_zxyzw From a11f328e62785f3e1684fb667b4a512ce7f4e77e Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 5 Mar 2024 12:42:21 +0100 Subject: [PATCH 064/116] Added CUDA version of XYZW --- cuda/cvectordev.c | 22 +++++++++++++ cuda/cvectordev.h | 5 +++ cuda/dvectordev.c | 19 +++++++++++ cuda/dvectordev.h | 3 ++ cuda/psb_c_cuda_vect_mod.F90 | 64 ++++++++++++++++++++++++++++++++++-- cuda/psb_c_vectordev_mod.F90 | 12 +++++++ cuda/psb_d_cuda_vect_mod.F90 | 64 ++++++++++++++++++++++++++++++++++-- cuda/psb_d_vectordev_mod.F90 | 12 +++++++ cuda/psb_s_cuda_vect_mod.F90 | 64 ++++++++++++++++++++++++++++++++++-- cuda/psb_s_vectordev_mod.F90 | 12 +++++++ cuda/psb_z_cuda_vect_mod.F90 | 64 ++++++++++++++++++++++++++++++++++-- cuda/psb_z_vectordev_mod.F90 | 12 +++++++ cuda/spgpu/kernels/Makefile | 3 +- cuda/spgpu/vector.h | 46 ++++++++++++++++++++++++++ cuda/svectordev.c | 21 ++++++++++++ cuda/svectordev.h | 3 ++ cuda/zvectordev.c | 24 +++++++++++++- cuda/zvectordev.h | 5 +++ 18 files changed, 445 insertions(+), 10 deletions(-) diff --git a/cuda/cvectordev.c b/cuda/cvectordev.c index 9db5202e..cdfda481 100644 --- a/cuda/cvectordev.c +++ b/cuda/cvectordev.c @@ -273,6 +273,28 @@ int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex return(i); } +int xyzwMultiVecDeviceFloatComplex(int n,cuFloatComplex a,cuFloatComplex b, + cuFloatComplex c, cuFloatComplex d, + cuFloatComplex e, cuFloatComplex f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + struct MultiVectDevice *devVecW = (struct MultiVectDevice *) devMultiVecW; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuCxyzw(handle,n, a,b,c,d,e,f, + (cuFloatComplex *)devVecX->v_,(cuFloatComplex *) devVecY->v_, + (cuFloatComplex *) devVecZ->v_,(cuFloatComplex *) devVecW->v_); + return(i); +} + int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, void *deviceVecB) { int i = 0; diff --git a/cuda/cvectordev.h b/cuda/cvectordev.h index fc18e328..62693e27 100644 --- a/cuda/cvectordev.h +++ b/cuda/cvectordev.h @@ -72,6 +72,11 @@ int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX, int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta, cuFloatComplex gamma, cuFloatComplex delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); +int xyzwMultiVecDeviceFloatComplex(int n,cuFloatComplex a,cuFloatComplex b, + cuFloatComplex c, cuFloatComplex d, + cuFloatComplex e, cuFloatComplex f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW); int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, void *deviceVecB); int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, void *deviceVecB, cuFloatComplex beta, void *deviceVecZ); diff --git a/cuda/dvectordev.c b/cuda/dvectordev.c index b4ca95f4..723f48d8 100644 --- a/cuda/dvectordev.c +++ b/cuda/dvectordev.c @@ -258,6 +258,25 @@ int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, do return(i); } +int xyzwMultiVecDeviceDouble(int n,double a, double b, double c, double d, double e, double f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + struct MultiVectDevice *devVecW = (struct MultiVectDevice *) devMultiVecW; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuDxyzw(handle,n, a,b,c,d,e,f, + (double*)devVecX->v_,(double*) devVecY->v_,(double*) devVecZ->v_,(double*) devVecW->v_); + return(i); +} + int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB) { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; diff --git a/cuda/dvectordev.h b/cuda/dvectordev.h index 81a2e8f6..c2bfa1b5 100644 --- a/cuda/dvectordev.h +++ b/cuda/dvectordev.h @@ -69,6 +69,9 @@ int dotMultiVecDeviceDouble(double* y_res, int n, void* devVecA, void* devVecB); int axpbyMultiVecDeviceDouble(int n, double alpha, void* devVecX, double beta, void* devVecY); int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); +int xyzwMultiVecDeviceDouble(int n,double a, double b, double c, double d, double e, double f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW); int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB); int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB, double beta, void *deviceVecZ); diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 index 7eee128f..727249df 100644 --- a/cuda/psb_c_cuda_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -914,7 +914,6 @@ contains end subroutine c_cuda_axpby_v - subroutine c_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none @@ -975,9 +974,70 @@ contains call z%axpby(m,gamma,y,delta,info) end if - end subroutine c_cuda_abgdxyz + subroutine c_cuda_xyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + class(psb_c_base_vect_type), intent(inout) :: z + class(psb_c_vect_cuda), intent(inout) :: w + complex(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, nz, nw + logical :: gpu_done + + info = psb_success_ + + gpu_done = .false. + if ((a==czero).or.(b==czero).or. & + & (c==czero).or.(d==czero).or.& + & (e==czero).or.(f==czero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + select type(xx => x) + class is (psb_c_vect_cuda) + select type(yy => y) + class is (psb_c_vect_cuda) + select type(zz => z) + class is (psb_c_vect_cuda) + ! Do something different here + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if (zz%is_host()) call zz%sync() + if (w%is_host()) call w%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + nw = getMultiVecDeviceSize(w%deviceVect) + if ((nx x) + class is (psb_d_vect_cuda) + select type(yy => y) + class is (psb_d_vect_cuda) + select type(zz => z) + class is (psb_d_vect_cuda) + ! Do something different here + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if (zz%is_host()) call zz%sync() + if (w%is_host()) call w%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + nw = getMultiVecDeviceSize(w%deviceVect) + if ((nx x) + class is (psb_s_vect_cuda) + select type(yy => y) + class is (psb_s_vect_cuda) + select type(zz => z) + class is (psb_s_vect_cuda) + ! Do something different here + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if (zz%is_host()) call zz%sync() + if (w%is_host()) call w%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + nw = getMultiVecDeviceSize(w%deviceVect) + if ((nx x) + class is (psb_z_vect_cuda) + select type(yy => y) + class is (psb_z_vect_cuda) + select type(zz => z) + class is (psb_z_vect_cuda) + ! Do something different here + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if (zz%is_host()) call zz%sync() + if (w%is_host()) call w%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + nw = getMultiVecDeviceSize(w%deviceVect) + if ((nxpitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuSxyzw(handle,n, a,b,c,d,e,f, + (float*)devVecX->v_,(float*) devVecY->v_, + (float*) devVecZ->v_,(float*) devVecW->v_); + return(i); +} + int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVecB) { int i = 0; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; diff --git a/cuda/svectordev.h b/cuda/svectordev.h index 730f929a..363c0108 100644 --- a/cuda/svectordev.h +++ b/cuda/svectordev.h @@ -69,6 +69,9 @@ int dotMultiVecDeviceFloat(float* y_res, int n, void* devVecA, void* devVecB); int axpbyMultiVecDeviceFloat(int n, float alpha, void* devVecX, float beta, void* devVecY); int abgdxyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); +int xyzwMultiVecDeviceFloat(int n,float a,float b, float c, float d, float e, float f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW); int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVecB); int axybzMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVecB, float beta, void *deviceVecZ); diff --git a/cuda/zvectordev.c b/cuda/zvectordev.c index d1f23f2a..e9f0cec7 100644 --- a/cuda/zvectordev.c +++ b/cuda/zvectordev.c @@ -251,7 +251,29 @@ int abgdxyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, (cuDoubleComplex *)devVecX->v_,(cuDoubleComplex *) devVecY->v_,(cuDoubleComplex *) devVecZ->v_); return(i); } - + +int xyzwMultiVecDeviceDoubleComplex(int n,cuDoubleComplex a, cuDoubleComplex b, + cuDoubleComplex c, cuDoubleComplex d, + cuDoubleComplex e, cuDoubleComplex f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + struct MultiVectDevice *devVecW = (struct MultiVectDevice *) devMultiVecW; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuZxyzw(handle,n, a,b,c,d,e,f, + (cuDoubleComplex *)devVecX->v_,(cuDoubleComplex *) devVecY->v_, + (cuDoubleComplex *) devVecZ->v_,(cuDoubleComplex *) devVecW->v_); + return(i); +} + int axpbyMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, void* devMultiVecX, cuDoubleComplex beta, void* devMultiVecY) { int j=0, i=0; diff --git a/cuda/zvectordev.h b/cuda/zvectordev.h index 4c32f11c..ae623bdb 100644 --- a/cuda/zvectordev.h +++ b/cuda/zvectordev.h @@ -80,6 +80,11 @@ int axpbyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void* devVecX int abgdxyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, cuDoubleComplex beta, cuDoubleComplex gamma, cuDoubleComplex delta, void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); +int xyzwMultiVecDeviceDoubleComplex(int n,cuDoubleComplex a, cuDoubleComplex b, + cuDoubleComplex c, cuDoubleComplex d, + cuDoubleComplex e, cuDoubleComplex f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW); int axyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA, void *deviceVecB); int axybzMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA, From 48455190ecd4e78a43be5a5d6f9c9749cce606a2 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 5 Mar 2024 13:57:03 +0100 Subject: [PATCH 065/116] Add GPU version of XYZW --- cuda/spgpu/kernels/cxyzw.cu | 78 +++++++++++++++++++++++++++++++++++++ cuda/spgpu/kernels/dxyzw.cu | 78 +++++++++++++++++++++++++++++++++++++ cuda/spgpu/kernels/sxyzw.cu | 78 +++++++++++++++++++++++++++++++++++++ cuda/spgpu/kernels/zxyzw.cu | 78 +++++++++++++++++++++++++++++++++++++ 4 files changed, 312 insertions(+) create mode 100644 cuda/spgpu/kernels/cxyzw.cu create mode 100644 cuda/spgpu/kernels/dxyzw.cu create mode 100644 cuda/spgpu/kernels/sxyzw.cu create mode 100644 cuda/spgpu/kernels/zxyzw.cu diff --git a/cuda/spgpu/kernels/cxyzw.cu b/cuda/spgpu/kernels/cxyzw.cu new file mode 100644 index 00000000..d2b332b1 --- /dev/null +++ b/cuda/spgpu/kernels/cxyzw.cu @@ -0,0 +1,78 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuCxyzw_krn(int n, cuFloatComplex a, cuFloatComplex b, + cuFloatComplex c, cuFloatComplex d, + cuFloatComplex e, cuFloatComplex f, + cuFloatComplex * x, cuFloatComplex *y, + cuFloatComplex *z, cuFloatComplex *w) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + cuFloatComplex ty, tz; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + ty = cuCfmaf(a, x[id], cuCmulf(b,y[id])); + tz = cuCfmaf(c, ty, cuCmulf(d,z[id])); + w[id] = cuCfmaf(e, tz, cuCmulf(f,w[id])); + y[id] = ty; + z[id] = tz; + } +} + + +void spgpuCxyzw(spgpuHandle_t handle, + int n, + cuFloatComplex a, cuFloatComplex b, + cuFloatComplex c, cuFloatComplex d, + cuFloatComplex e, cuFloatComplex f, + __device cuFloatComplex * x, + __device cuFloatComplex * y, + __device cuFloatComplex * z, + __device cuFloatComplex *w) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuCxyzw_krn<<currentStream>>>(n, a,b,c,d,e,f, + x, y, z,w); +} + diff --git a/cuda/spgpu/kernels/dxyzw.cu b/cuda/spgpu/kernels/dxyzw.cu new file mode 100644 index 00000000..afd36651 --- /dev/null +++ b/cuda/spgpu/kernels/dxyzw.cu @@ -0,0 +1,78 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuDxyzw_krn(int n, double a, double b, + double c, double d, + double e, double f, + double * x, double *y, + double *z, double *w) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + double ty, tz; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + ty = PREC_DADD(PREC_DADD(a, x[id]), PREC_DMUL(b,y[id])); + tz = PREC_DADD(PREC_DADD(c, ty), PREC_DMUL(d,z[id])); + w[id] = PREC_DADD(PREC_DADD(e, tz), PREC_DMUL(f,w[id])); + y[id] = ty; + z[id] = tz; + } +} + + +void spgpuDxyzw(spgpuHandle_t handle, + int n, + double a, double b, + double c, double d, + double e, double f, + __device double * x, + __device double * y, + __device double * z, + __device double *w) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuDxyzw_krn<<currentStream>>>(n, a,b,c,d,e,f, + x, y, z,w); +} + diff --git a/cuda/spgpu/kernels/sxyzw.cu b/cuda/spgpu/kernels/sxyzw.cu new file mode 100644 index 00000000..9cedd02f --- /dev/null +++ b/cuda/spgpu/kernels/sxyzw.cu @@ -0,0 +1,78 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuSxyzw_krn(int n, float a, float b, + float c, float d, + float e, float f, + float * x, float *y, + float *z, float *w) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + float ty, tz; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + ty = PREC_FADD(PREC_FMUL(a, x[id]), PREC_FMUL(b,y[id])); + tz = PREC_FADD(PREC_FMUL(c, ty), PREC_FMUL(d,z[id])); + w[id] = PREC_FADD(PREC_FMUL(e, tz), PREC_FMUL(f,w[id])); + y[id] = ty; + z[id] = tz; + } +} + + +void spgpuSxyzw(spgpuHandle_t handle, + int n, + float a, float b, + float c, float d, + float e, float f, + __device float * x, + __device float * y, + __device float * z, + __device float *w) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuSxyzw_krn<<currentStream>>>(n, a,b,c,d,e,f, + x, y, z,w); +} + diff --git a/cuda/spgpu/kernels/zxyzw.cu b/cuda/spgpu/kernels/zxyzw.cu new file mode 100644 index 00000000..7a61edee --- /dev/null +++ b/cuda/spgpu/kernels/zxyzw.cu @@ -0,0 +1,78 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuZxyzw_krn(int n, cuDoubleComplex a, cuDoubleComplex b, + cuDoubleComplex c, cuDoubleComplex d, + cuDoubleComplex e, cuDoubleComplex f, + cuDoubleComplex * x, cuDoubleComplex *y, + cuDoubleComplex *z, cuDoubleComplex *w) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + cuDoubleComplex ty, tz; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + ty = cuCfma(a, x[id], cuCmul(b,y[id])); + tz = cuCfma(c, ty, cuCmul(d,z[id])); + w[id] = cuCfma(e, tz, cuCmul(f,w[id])); + y[id] = ty; + z[id] = tz; + } +} + + +void spgpuZxyzw(spgpuHandle_t handle, + int n, + cuDoubleComplex a, cuDoubleComplex b, + cuDoubleComplex c, cuDoubleComplex d, + cuDoubleComplex e, cuDoubleComplex f, + __device cuDoubleComplex * x, + __device cuDoubleComplex * y, + __device cuDoubleComplex * z, + __device cuDoubleComplex *w) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuZxyzw_krn<<currentStream>>>(n, a,b,c,d,e,f, + x, y, z,w); +} + From e0a4d362fa5c89705bbe1a77c89e0cfab9826af9 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 14 Mar 2024 12:08:57 +0100 Subject: [PATCH 066/116] Define flag TRACK_CUDA_MALLOC --- cuda/cuda_util.c | 10 +++++++++- cuda/fcusparse_fct.h | 15 +++++++++++---- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/cuda/cuda_util.c b/cuda/cuda_util.c index c0e5c6e5..3fe61cc0 100644 --- a/cuda/cuda_util.c +++ b/cuda/cuda_util.c @@ -37,11 +37,19 @@ static int hasUVA=-1; static struct cudaDeviceProp *prop=NULL; static spgpuHandle_t psb_cuda_handle = NULL; static cublasHandle_t psb_cublas_handle = NULL; - +#if defined(TRACK_CUDA_MALLOC) +static long long total_cuda_mem = 0; +#endif int allocRemoteBuffer(void** buffer, int count) { cudaError_t err = cudaMalloc(buffer, count); +#if defined(TRACK_CUDA_MALLOC) + total_cuda_mem += count; + fprintf(stderr,"Tracking CUDA allocRemoteBuffer for %ld bytes total %ld address %p\n", + count, total_cuda_mem, *buffer); +#endif + if (err == cudaSuccess) { return SPGPU_SUCCESS; diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h index 06facdc0..689bdc93 100644 --- a/cuda/fcusparse_fct.h +++ b/cuda/fcusparse_fct.h @@ -178,7 +178,8 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, CHECK_CUDA(cudaFree(cMat->mvbuffer)); cMat->mvbuffer = NULL; } - CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz)); + //CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz)); + allocRemoteBuffer((void **) &(cMat->mvbuffer), bfsz); cMat->mvbsize = bfsz; } CHECK_CUSPARSE(cusparseCsrmvEx(*my_handle, @@ -215,7 +216,9 @@ int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, CHECK_CUDA(cudaFree(cMat->mvbuffer)); cMat->mvbuffer = NULL; } - CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz)); + //CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz)); + allocRemoteBuffer((void **) &(cMat->mvbuffer), bfsz); + cMat->mvbsize = bfsz; } CHECK_CUSPARSE(cusparseSpMV(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, @@ -287,7 +290,9 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, CHECK_CUDA(cudaFree(cMat->svbuffer)); cMat->svbuffer = NULL; } - CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); + //CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); + allocRemoteBuffer((void **) &(cMat->svbuffer), bfsz); + cMat->svbsize=bfsz; CHECK_CUSPARSE(cusparseSpSV_analysis(*my_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, @@ -382,7 +387,9 @@ int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz) /* cMat->svbuffer = NULL; */ /* } */ if (bfsz > 0) { - CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); + //CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); + allocRemoteBuffer((void **) &(cMat->svbuffer), bfsz); + } else { cMat->svbuffer=NULL; } From 472f16f0df839b7ee678b795477daf411e4bd663 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 19 Mar 2024 14:00:03 +0100 Subject: [PATCH 067/116] Fix compilation with --enable-serial --- base/modules/penv/psi_c_collective_mod.F90 | 6 +++--- base/modules/penv/psi_d_collective_mod.F90 | 6 +++--- base/modules/penv/psi_e_collective_mod.F90 | 6 +++--- base/modules/penv/psi_i2_collective_mod.F90 | 6 +++--- base/modules/penv/psi_m_collective_mod.F90 | 6 +++--- base/modules/penv/psi_s_collective_mod.F90 | 6 +++--- base/modules/penv/psi_z_collective_mod.F90 | 6 +++--- 7 files changed, 21 insertions(+), 21 deletions(-) diff --git a/base/modules/penv/psi_c_collective_mod.F90 b/base/modules/penv/psi_c_collective_mod.F90 index 8da302d0..80a4b6a1 100644 --- a/base/modules/penv/psi_c_collective_mod.F90 +++ b/base/modules/penv/psi_c_collective_mod.F90 @@ -107,7 +107,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(1) = dat #else call psb_info(ctxt,iam,np) @@ -175,7 +175,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) @@ -245,7 +245,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) diff --git a/base/modules/penv/psi_d_collective_mod.F90 b/base/modules/penv/psi_d_collective_mod.F90 index 9639d650..67f95f55 100644 --- a/base/modules/penv/psi_d_collective_mod.F90 +++ b/base/modules/penv/psi_d_collective_mod.F90 @@ -747,7 +747,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(1) = dat #else call psb_info(ctxt,iam,np) @@ -815,7 +815,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) @@ -885,7 +885,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) diff --git a/base/modules/penv/psi_e_collective_mod.F90 b/base/modules/penv/psi_e_collective_mod.F90 index b9ab089b..5d66eed6 100644 --- a/base/modules/penv/psi_e_collective_mod.F90 +++ b/base/modules/penv/psi_e_collective_mod.F90 @@ -585,7 +585,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(1) = dat #else call psb_info(ctxt,iam,np) @@ -653,7 +653,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) @@ -723,7 +723,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) diff --git a/base/modules/penv/psi_i2_collective_mod.F90 b/base/modules/penv/psi_i2_collective_mod.F90 index 339e4281..88d40b66 100644 --- a/base/modules/penv/psi_i2_collective_mod.F90 +++ b/base/modules/penv/psi_i2_collective_mod.F90 @@ -585,7 +585,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(1) = dat #else call psb_info(ctxt,iam,np) @@ -653,7 +653,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) @@ -723,7 +723,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) diff --git a/base/modules/penv/psi_m_collective_mod.F90 b/base/modules/penv/psi_m_collective_mod.F90 index 8f45d398..c97ac5a3 100644 --- a/base/modules/penv/psi_m_collective_mod.F90 +++ b/base/modules/penv/psi_m_collective_mod.F90 @@ -585,7 +585,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(1) = dat #else call psb_info(ctxt,iam,np) @@ -653,7 +653,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) @@ -723,7 +723,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) diff --git a/base/modules/penv/psi_s_collective_mod.F90 b/base/modules/penv/psi_s_collective_mod.F90 index 6ffaae05..6dcc5253 100644 --- a/base/modules/penv/psi_s_collective_mod.F90 +++ b/base/modules/penv/psi_s_collective_mod.F90 @@ -747,7 +747,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(1) = dat #else call psb_info(ctxt,iam,np) @@ -815,7 +815,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) @@ -885,7 +885,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) diff --git a/base/modules/penv/psi_z_collective_mod.F90 b/base/modules/penv/psi_z_collective_mod.F90 index 8b3ec277..ff5e6a2d 100644 --- a/base/modules/penv/psi_z_collective_mod.F90 +++ b/base/modules/penv/psi_z_collective_mod.F90 @@ -107,7 +107,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(1) = dat #else call psb_info(ctxt,iam,np) @@ -175,7 +175,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) @@ -245,7 +245,7 @@ contains logical :: collective_start, collective_end, collective_sync #if defined(SERIAL_MPI) - resv(0) = dat + resv(:) = dat(:) #else call psb_info(ctxt,iam,np) From 373d841bce7fab5672f43c48e49c995e42262dfa Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 22 Mar 2024 15:44:51 +0100 Subject: [PATCH 068/116] Don't need renaming of psi_gth and psi_sct --- base/modules/serial/psb_serial_mod.f90 | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/base/modules/serial/psb_serial_mod.f90 b/base/modules/serial/psb_serial_mod.f90 index 627b318e..a25c1c37 100644 --- a/base/modules/serial/psb_serial_mod.f90 +++ b/base/modules/serial/psb_serial_mod.f90 @@ -36,9 +36,7 @@ module psb_serial_mod use psb_string_mod use psb_sort_mod - use psi_serial_mod, & - & psb_gth => psi_gth,& - & psb_sct => psi_sct + use psi_serial_mod use psb_s_serial_mod use psb_d_serial_mod From 3a25d7b04a7931e5fb4e793dfafbf3adeb011ff0 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 22 Mar 2024 17:10:30 +0100 Subject: [PATCH 069/116] Fixes for LLVM compilation --- base/comm/internals/psi_cswapdata_a.F90 | 4 ++-- base/comm/internals/psi_cswaptran_a.F90 | 4 ++-- base/comm/internals/psi_dswapdata_a.F90 | 4 ++-- base/comm/internals/psi_dswaptran_a.F90 | 4 ++-- base/comm/internals/psi_eswapdata_a.F90 | 4 ++-- base/comm/internals/psi_eswaptran_a.F90 | 4 ++-- base/comm/internals/psi_i2swapdata_a.F90 | 4 ++-- base/comm/internals/psi_i2swaptran_a.F90 | 4 ++-- base/comm/internals/psi_mswapdata_a.F90 | 4 ++-- base/comm/internals/psi_mswaptran_a.F90 | 4 ++-- base/comm/internals/psi_sswapdata_a.F90 | 4 ++-- base/comm/internals/psi_sswaptran_a.F90 | 4 ++-- base/comm/internals/psi_zswapdata_a.F90 | 4 ++-- base/comm/internals/psi_zswaptran_a.F90 | 4 ++-- cuda/psb_c_cuda_vect_mod.F90 | 2 +- ext/psb_c_ell_mat_mod.f90 | 8 ++++++++ ext/psb_d_ell_mat_mod.f90 | 8 ++++++++ ext/psb_s_ell_mat_mod.f90 | 8 ++++++++ ext/psb_z_ell_mat_mod.f90 | 8 ++++++++ 19 files changed, 61 insertions(+), 29 deletions(-) diff --git a/base/comm/internals/psi_cswapdata_a.F90 b/base/comm/internals/psi_cswapdata_a.F90 index 715b674e..153fa4d1 100644 --- a/base/comm/internals/psi_cswapdata_a.F90 +++ b/base/comm/internals/psi_cswapdata_a.F90 @@ -191,7 +191,7 @@ subroutine psi_cswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -676,7 +676,7 @@ subroutine psi_cswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_cswaptran_a.F90 b/base/comm/internals/psi_cswaptran_a.F90 index a7f2c687..bfb192c1 100644 --- a/base/comm/internals/psi_cswaptran_a.F90 +++ b/base/comm/internals/psi_cswaptran_a.F90 @@ -195,7 +195,7 @@ subroutine psi_ctranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -688,7 +688,7 @@ subroutine psi_ctranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_dswapdata_a.F90 b/base/comm/internals/psi_dswapdata_a.F90 index aff32517..4422597e 100644 --- a/base/comm/internals/psi_dswapdata_a.F90 +++ b/base/comm/internals/psi_dswapdata_a.F90 @@ -191,7 +191,7 @@ subroutine psi_dswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -676,7 +676,7 @@ subroutine psi_dswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_dswaptran_a.F90 b/base/comm/internals/psi_dswaptran_a.F90 index ed13df40..a5b3fbfc 100644 --- a/base/comm/internals/psi_dswaptran_a.F90 +++ b/base/comm/internals/psi_dswaptran_a.F90 @@ -195,7 +195,7 @@ subroutine psi_dtranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -688,7 +688,7 @@ subroutine psi_dtranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_eswapdata_a.F90 b/base/comm/internals/psi_eswapdata_a.F90 index 6a644563..74a5aea8 100644 --- a/base/comm/internals/psi_eswapdata_a.F90 +++ b/base/comm/internals/psi_eswapdata_a.F90 @@ -191,7 +191,7 @@ subroutine psi_eswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -676,7 +676,7 @@ subroutine psi_eswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_eswaptran_a.F90 b/base/comm/internals/psi_eswaptran_a.F90 index 78ed7d8b..e10b574c 100644 --- a/base/comm/internals/psi_eswaptran_a.F90 +++ b/base/comm/internals/psi_eswaptran_a.F90 @@ -195,7 +195,7 @@ subroutine psi_etranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -688,7 +688,7 @@ subroutine psi_etranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_i2swapdata_a.F90 b/base/comm/internals/psi_i2swapdata_a.F90 index 42b4498e..75e0c870 100644 --- a/base/comm/internals/psi_i2swapdata_a.F90 +++ b/base/comm/internals/psi_i2swapdata_a.F90 @@ -191,7 +191,7 @@ subroutine psi_i2swapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -676,7 +676,7 @@ subroutine psi_i2swapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_i2swaptran_a.F90 b/base/comm/internals/psi_i2swaptran_a.F90 index f94bf29e..0615e2ba 100644 --- a/base/comm/internals/psi_i2swaptran_a.F90 +++ b/base/comm/internals/psi_i2swaptran_a.F90 @@ -195,7 +195,7 @@ subroutine psi_i2tranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -688,7 +688,7 @@ subroutine psi_i2tranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_mswapdata_a.F90 b/base/comm/internals/psi_mswapdata_a.F90 index e71f3a52..01277d8e 100644 --- a/base/comm/internals/psi_mswapdata_a.F90 +++ b/base/comm/internals/psi_mswapdata_a.F90 @@ -191,7 +191,7 @@ subroutine psi_mswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -676,7 +676,7 @@ subroutine psi_mswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_mswaptran_a.F90 b/base/comm/internals/psi_mswaptran_a.F90 index 3a780142..ee8c1d21 100644 --- a/base/comm/internals/psi_mswaptran_a.F90 +++ b/base/comm/internals/psi_mswaptran_a.F90 @@ -195,7 +195,7 @@ subroutine psi_mtranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -688,7 +688,7 @@ subroutine psi_mtranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_sswapdata_a.F90 b/base/comm/internals/psi_sswapdata_a.F90 index 044dc141..47830330 100644 --- a/base/comm/internals/psi_sswapdata_a.F90 +++ b/base/comm/internals/psi_sswapdata_a.F90 @@ -191,7 +191,7 @@ subroutine psi_sswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -676,7 +676,7 @@ subroutine psi_sswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_sswaptran_a.F90 b/base/comm/internals/psi_sswaptran_a.F90 index 434cec4c..ae54ee21 100644 --- a/base/comm/internals/psi_sswaptran_a.F90 +++ b/base/comm/internals/psi_sswaptran_a.F90 @@ -195,7 +195,7 @@ subroutine psi_stranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -688,7 +688,7 @@ subroutine psi_stranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_zswapdata_a.F90 b/base/comm/internals/psi_zswapdata_a.F90 index 2d265c76..49617d2d 100644 --- a/base/comm/internals/psi_zswapdata_a.F90 +++ b/base/comm/internals/psi_zswapdata_a.F90 @@ -191,7 +191,7 @@ subroutine psi_zswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -676,7 +676,7 @@ subroutine psi_zswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_zswaptran_a.F90 b/base/comm/internals/psi_zswaptran_a.F90 index 508d4045..586fa11b 100644 --- a/base/comm/internals/psi_zswaptran_a.F90 +++ b/base/comm/internals/psi_zswaptran_a.F90 @@ -195,7 +195,7 @@ subroutine psi_ztranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ @@ -688,7 +688,7 @@ subroutine psi_ztranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - volatile :: sndbuf, rcvbuf + !volatile :: sndbuf, rcvbuf character(len=20) :: name info=psb_success_ diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 index 38480e34..2c2a4f61 100644 --- a/cuda/psb_c_cuda_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -30,7 +30,7 @@ ! -module psb_c_cuda_vect_mod +module psb_c_cuda_vect_mod use iso_c_binding use psb_const_mod use psb_error_mod diff --git a/ext/psb_c_ell_mat_mod.f90 b/ext/psb_c_ell_mat_mod.f90 index 8eaf01ba..6954946f 100644 --- a/ext/psb_c_ell_mat_mod.f90 +++ b/ext/psb_c_ell_mat_mod.f90 @@ -94,6 +94,7 @@ module psb_c_ell_mat_mod procedure, pass(a) :: print => psb_c_ell_print procedure, pass(a) :: free => c_ell_free procedure, pass(a) :: mold => psb_c_ell_mold + procedure, pass(a) :: get_nrm => c_ell_get_nrm end type psb_c_ell_sparse_mat @@ -459,6 +460,13 @@ contains res = 'ELL' end function c_ell_get_fmt + function c_ell_get_nrm(a) result(res) + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = size(a%val,2) + end function c_ell_get_nrm + function c_ell_get_nzeros(a) result(res) implicit none class(psb_c_ell_sparse_mat), intent(in) :: a diff --git a/ext/psb_d_ell_mat_mod.f90 b/ext/psb_d_ell_mat_mod.f90 index 3e34d63e..cc945baf 100644 --- a/ext/psb_d_ell_mat_mod.f90 +++ b/ext/psb_d_ell_mat_mod.f90 @@ -94,6 +94,7 @@ module psb_d_ell_mat_mod procedure, pass(a) :: print => psb_d_ell_print procedure, pass(a) :: free => d_ell_free procedure, pass(a) :: mold => psb_d_ell_mold + procedure, pass(a) :: get_nrm => d_ell_get_nrm end type psb_d_ell_sparse_mat @@ -459,6 +460,13 @@ contains res = 'ELL' end function d_ell_get_fmt + function d_ell_get_nrm(a) result(res) + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = size(a%val,2) + end function d_ell_get_nrm + function d_ell_get_nzeros(a) result(res) implicit none class(psb_d_ell_sparse_mat), intent(in) :: a diff --git a/ext/psb_s_ell_mat_mod.f90 b/ext/psb_s_ell_mat_mod.f90 index 5f09913a..9e3dd8b4 100644 --- a/ext/psb_s_ell_mat_mod.f90 +++ b/ext/psb_s_ell_mat_mod.f90 @@ -94,6 +94,7 @@ module psb_s_ell_mat_mod procedure, pass(a) :: print => psb_s_ell_print procedure, pass(a) :: free => s_ell_free procedure, pass(a) :: mold => psb_s_ell_mold + procedure, pass(a) :: get_nrm => s_ell_get_nrm end type psb_s_ell_sparse_mat @@ -459,6 +460,13 @@ contains res = 'ELL' end function s_ell_get_fmt + function s_ell_get_nrm(a) result(res) + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = size(a%val,2) + end function s_ell_get_nrm + function s_ell_get_nzeros(a) result(res) implicit none class(psb_s_ell_sparse_mat), intent(in) :: a diff --git a/ext/psb_z_ell_mat_mod.f90 b/ext/psb_z_ell_mat_mod.f90 index 52dc62b1..0e1f0e00 100644 --- a/ext/psb_z_ell_mat_mod.f90 +++ b/ext/psb_z_ell_mat_mod.f90 @@ -94,6 +94,7 @@ module psb_z_ell_mat_mod procedure, pass(a) :: print => psb_z_ell_print procedure, pass(a) :: free => z_ell_free procedure, pass(a) :: mold => psb_z_ell_mold + procedure, pass(a) :: get_nrm => z_ell_get_nrm end type psb_z_ell_sparse_mat @@ -459,6 +460,13 @@ contains res = 'ELL' end function z_ell_get_fmt + function z_ell_get_nrm(a) result(res) + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = size(a%val,2) + end function z_ell_get_nrm + function z_ell_get_nzeros(a) result(res) implicit none class(psb_z_ell_sparse_mat), intent(in) :: a From 0023b8ac7803dc9c05a2de87e2b9af2479e1bc6c Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 25 Mar 2024 15:41:47 +0100 Subject: [PATCH 070/116] Compile adjcncy_fnd_owner --- base/internals/psi_adjcncy_fnd_owner.F90 | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/base/internals/psi_adjcncy_fnd_owner.F90 b/base/internals/psi_adjcncy_fnd_owner.F90 index 639cdb5d..83e18623 100644 --- a/base/internals/psi_adjcncy_fnd_owner.F90 +++ b/base/internals/psi_adjcncy_fnd_owner.F90 @@ -87,7 +87,7 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) integer(psb_lpk_) :: mglob, ih type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me - logical, parameter :: gettime=.true., debug=.false. + logical, parameter :: debug=.false. integer(psb_mpk_) :: xchg_alg logical, parameter :: do_timings=.false. integer(psb_ipk_), save :: idx_phase1=-1, idx_phase2=-1, idx_phase3=-1 @@ -132,10 +132,6 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) goto 9999 end if - if (gettime) then - t0 = psb_wtime() - end if - nadj = size(adj) nidx = size(idx) call psb_realloc(nidx,iprc,info) From 59e6df73a4f7b65730d3ee774e99732812df960b Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 1 Apr 2024 15:57:44 +0200 Subject: [PATCH 071/116] Make sure configure recognizes FLANG --- configure | 21 +++++++++++++-------- configure.ac | 15 ++++++++++----- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/configure b/configure index 3a774ee1..0421872d 100755 --- a/configure +++ b/configure @@ -3452,7 +3452,7 @@ ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu if test -n "$ac_tool_prefix"; then - for ac_prog in ftn xlf2003_r xlf2003 xlf95_r xlf95 xlf90 xlf pgf95 pgf90 ifort ifc nagfor gfortran + for ac_prog in ftn xlf2003_r xlf2003 xlf95_r xlf95 xlf90 xlf pgf95 pgf90 flang-new ifort ifc nagfor gfortran do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 @@ -3501,7 +3501,7 @@ fi fi if test -z "$FC"; then ac_ct_FC=$FC - for ac_prog in ftn xlf2003_r xlf2003 xlf95_r xlf95 xlf90 xlf pgf95 pgf90 ifort ifc nagfor gfortran + for ac_prog in ftn xlf2003_r xlf2003 xlf95_r xlf95 xlf90 xlf pgf95 pgf90 flang-new ifort ifc nagfor gfortran do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 @@ -3928,7 +3928,7 @@ ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu if test -n "$ac_tool_prefix"; then - for ac_prog in xlc pgcc icc gcc cc + for ac_prog in xlc pgcc clang icc gcc cc do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 @@ -3977,7 +3977,7 @@ fi fi if test -z "$CC"; then ac_ct_CC=$CC - for ac_prog in xlc pgcc icc gcc cc + for ac_prog in xlc pgcc clang icc gcc cc do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 @@ -4424,7 +4424,7 @@ if test -z "$CXX"; then CXX=$CCC else if test -n "$ac_tool_prefix"; then - for ac_prog in CC xlc++ icpc g++ + for ac_prog in CC xlc++ clang++ icpc g++ do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 @@ -4473,7 +4473,7 @@ fi fi if test -z "$CXX"; then ac_ct_CXX=$CXX - for ac_prog in CC xlc++ icpc g++ + for ac_prog in CC xlc++ clang++ icpc g++ do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 @@ -6874,10 +6874,12 @@ if test x"$psblas_cv_fc" == "x" ; then # Sun compiler detection psblas_cv_fc="sun" - elif eval "$MPIFC -V 2>&1 | grep Portland 2>/dev/null" ; then + elif eval "$MPIFC --version 2>&1 | grep flang-new 2>/dev/null" ; then # Portland group compiler detection - psblas_cv_fc="pg" + psblas_cv_fc="flang" + psblas_cv_define_prepend="" + FDEFINES="$psblas_cv_define_prepend-DFLANG $FDEFINES" elif eval "$MPIFC -V 2>&1 | grep Intel.*Fortran.*Compiler 2>/dev/null" ; then # Intel compiler identification @@ -7301,6 +7303,9 @@ if test "X$FCOPT" == "X" ; then elif test "X$psblas_cv_fc" == X"pg" ; then # other compilers .. FCOPT="-fast $FCOPT" + elif test "X$psblas_cv_fc" == X"flang" ; then + # other compilers .. + FCOPT="-O3" # NOTE : PG & Sun use -fast instead -O3 elif test "X$psblas_cv_fc" == X"sun" ; then # other compilers .. diff --git a/configure.ac b/configure.ac index 62aad4f8..c5cff358 100755 --- a/configure.ac +++ b/configure.ac @@ -100,17 +100,17 @@ dnl We set our own FC flags, ignore those from AC_PROG_FC but not those from the dnl environment variable. Same for C dnl save_FCFLAGS="$FCFLAGS"; -AC_PROG_FC([ftn xlf2003_r xlf2003 xlf95_r xlf95 xlf90 xlf pgf95 pgf90 ifort ifc nagfor gfortran]) +AC_PROG_FC([ftn xlf2003_r xlf2003 xlf95_r xlf95 xlf90 xlf pgf95 pgf90 flang-new ifort ifc nagfor gfortran]) FCFLAGS="$save_FCFLAGS"; save_CFLAGS="$CFLAGS"; -AC_PROG_CC([xlc pgcc icc gcc cc ]) +AC_PROG_CC([xlc pgcc clang icc gcc cc ]) if test "x$ac_cv_prog_cc_stdc" == "xno" ; then AC_MSG_ERROR([Problem : Need a C99 compiler ! ]) else C99OPT="$ac_cv_prog_cc_stdc"; fi CFLAGS="$save_CFLAGS"; -AC_PROG_CXX([CC xlc++ icpc g++]) +AC_PROG_CXX([CC xlc++ clang++ icpc g++]) dnl AC_PROG_F90 doesn't exist, at the time of writing this ! dnl AC_PROG_F90 @@ -248,10 +248,12 @@ if test x"$psblas_cv_fc" == "x" ; then # Sun compiler detection psblas_cv_fc="sun" - elif eval "$MPIFC -V 2>&1 | grep Portland 2>/dev/null" ; then + elif eval "$MPIFC --version 2>&1 | grep flang-new 2>/dev/null" ; then # Portland group compiler detection - psblas_cv_fc="pg" + psblas_cv_fc="flang" + psblas_cv_define_prepend="" + FDEFINES="$psblas_cv_define_prepend-DFLANG $FDEFINES" elif eval "$MPIFC -V 2>&1 | grep Intel.*Fortran.*Compiler 2>/dev/null" ; then # Intel compiler identification @@ -427,6 +429,9 @@ if test "X$FCOPT" == "X" ; then elif test "X$psblas_cv_fc" == X"pg" ; then # other compilers .. FCOPT="-fast $FCOPT" + elif test "X$psblas_cv_fc" == X"flang" ; then + # other compilers .. + FCOPT="-O3" # NOTE : PG & Sun use -fast instead -O3 elif test "X$psblas_cv_fc" == X"sun" ; then # other compilers .. From a2f92e616f5c25467b7b36a2bf7ff7a7b1db472e Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 1 Apr 2024 15:58:32 +0200 Subject: [PATCH 072/116] Put VOLATILE under ifdef for FLANG --- base/comm/internals/psi_cswapdata_a.F90 | 8 ++++++-- base/comm/internals/psi_cswaptran_a.F90 | 8 ++++++-- base/comm/internals/psi_dswapdata_a.F90 | 8 ++++++-- base/comm/internals/psi_dswaptran_a.F90 | 8 ++++++-- base/comm/internals/psi_eswapdata_a.F90 | 8 ++++++-- base/comm/internals/psi_eswaptran_a.F90 | 8 ++++++-- base/comm/internals/psi_i2swapdata_a.F90 | 8 ++++++-- base/comm/internals/psi_i2swaptran_a.F90 | 8 ++++++-- base/comm/internals/psi_mswapdata_a.F90 | 8 ++++++-- base/comm/internals/psi_mswaptran_a.F90 | 8 ++++++-- base/comm/internals/psi_sswapdata_a.F90 | 8 ++++++-- base/comm/internals/psi_sswaptran_a.F90 | 8 ++++++-- base/comm/internals/psi_zswapdata_a.F90 | 8 ++++++-- base/comm/internals/psi_zswaptran_a.F90 | 8 ++++++-- 14 files changed, 84 insertions(+), 28 deletions(-) diff --git a/base/comm/internals/psi_cswapdata_a.F90 b/base/comm/internals/psi_cswapdata_a.F90 index 153fa4d1..acf7f342 100644 --- a/base/comm/internals/psi_cswapdata_a.F90 +++ b/base/comm/internals/psi_cswapdata_a.F90 @@ -191,7 +191,9 @@ subroutine psi_cswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -676,7 +678,9 @@ subroutine psi_cswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_cswaptran_a.F90 b/base/comm/internals/psi_cswaptran_a.F90 index bfb192c1..0aa5d6f6 100644 --- a/base/comm/internals/psi_cswaptran_a.F90 +++ b/base/comm/internals/psi_cswaptran_a.F90 @@ -195,7 +195,9 @@ subroutine psi_ctranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -688,7 +690,9 @@ subroutine psi_ctranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_dswapdata_a.F90 b/base/comm/internals/psi_dswapdata_a.F90 index 4422597e..ff562994 100644 --- a/base/comm/internals/psi_dswapdata_a.F90 +++ b/base/comm/internals/psi_dswapdata_a.F90 @@ -191,7 +191,9 @@ subroutine psi_dswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -676,7 +678,9 @@ subroutine psi_dswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_dswaptran_a.F90 b/base/comm/internals/psi_dswaptran_a.F90 index a5b3fbfc..a0506a2b 100644 --- a/base/comm/internals/psi_dswaptran_a.F90 +++ b/base/comm/internals/psi_dswaptran_a.F90 @@ -195,7 +195,9 @@ subroutine psi_dtranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -688,7 +690,9 @@ subroutine psi_dtranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_eswapdata_a.F90 b/base/comm/internals/psi_eswapdata_a.F90 index 74a5aea8..acc6a6bf 100644 --- a/base/comm/internals/psi_eswapdata_a.F90 +++ b/base/comm/internals/psi_eswapdata_a.F90 @@ -191,7 +191,9 @@ subroutine psi_eswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -676,7 +678,9 @@ subroutine psi_eswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_eswaptran_a.F90 b/base/comm/internals/psi_eswaptran_a.F90 index e10b574c..ce12a7e7 100644 --- a/base/comm/internals/psi_eswaptran_a.F90 +++ b/base/comm/internals/psi_eswaptran_a.F90 @@ -195,7 +195,9 @@ subroutine psi_etranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -688,7 +690,9 @@ subroutine psi_etranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_i2swapdata_a.F90 b/base/comm/internals/psi_i2swapdata_a.F90 index 75e0c870..e2a8ad4e 100644 --- a/base/comm/internals/psi_i2swapdata_a.F90 +++ b/base/comm/internals/psi_i2swapdata_a.F90 @@ -191,7 +191,9 @@ subroutine psi_i2swapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -676,7 +678,9 @@ subroutine psi_i2swapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_i2swaptran_a.F90 b/base/comm/internals/psi_i2swaptran_a.F90 index 0615e2ba..36652565 100644 --- a/base/comm/internals/psi_i2swaptran_a.F90 +++ b/base/comm/internals/psi_i2swaptran_a.F90 @@ -195,7 +195,9 @@ subroutine psi_i2tranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -688,7 +690,9 @@ subroutine psi_i2tranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_mswapdata_a.F90 b/base/comm/internals/psi_mswapdata_a.F90 index 01277d8e..c7f1965b 100644 --- a/base/comm/internals/psi_mswapdata_a.F90 +++ b/base/comm/internals/psi_mswapdata_a.F90 @@ -191,7 +191,9 @@ subroutine psi_mswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -676,7 +678,9 @@ subroutine psi_mswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_mswaptran_a.F90 b/base/comm/internals/psi_mswaptran_a.F90 index ee8c1d21..1e9fcede 100644 --- a/base/comm/internals/psi_mswaptran_a.F90 +++ b/base/comm/internals/psi_mswaptran_a.F90 @@ -195,7 +195,9 @@ subroutine psi_mtranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -688,7 +690,9 @@ subroutine psi_mtranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_sswapdata_a.F90 b/base/comm/internals/psi_sswapdata_a.F90 index 47830330..c802578c 100644 --- a/base/comm/internals/psi_sswapdata_a.F90 +++ b/base/comm/internals/psi_sswapdata_a.F90 @@ -191,7 +191,9 @@ subroutine psi_sswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -676,7 +678,9 @@ subroutine psi_sswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_sswaptran_a.F90 b/base/comm/internals/psi_sswaptran_a.F90 index ae54ee21..c4290be1 100644 --- a/base/comm/internals/psi_sswaptran_a.F90 +++ b/base/comm/internals/psi_sswaptran_a.F90 @@ -195,7 +195,9 @@ subroutine psi_stranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -688,7 +690,9 @@ subroutine psi_stranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_zswapdata_a.F90 b/base/comm/internals/psi_zswapdata_a.F90 index 49617d2d..ec21072c 100644 --- a/base/comm/internals/psi_zswapdata_a.F90 +++ b/base/comm/internals/psi_zswapdata_a.F90 @@ -191,7 +191,9 @@ subroutine psi_zswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -676,7 +678,9 @@ subroutine psi_zswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_zswaptran_a.F90 b/base/comm/internals/psi_zswaptran_a.F90 index 586fa11b..c1062839 100644 --- a/base/comm/internals/psi_zswaptran_a.F90 +++ b/base/comm/internals/psi_zswaptran_a.F90 @@ -195,7 +195,9 @@ subroutine psi_ztranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -688,7 +690,9 @@ subroutine psi_ztranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf - !volatile :: sndbuf, rcvbuf +#if !defined(FLANG) + volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ From 4347c663c298088cb128b7c155536d3b5d1c09af Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 4 Apr 2024 10:52:41 +0200 Subject: [PATCH 073/116] Change conftest **argv to recognize CUDA_VERSION. --- config/pac.m4 | 2 +- configure | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/config/pac.m4 b/config/pac.m4 index 52185439..0d22392f 100644 --- a/config/pac.m4 +++ b/config/pac.m4 @@ -2251,7 +2251,7 @@ if test "x$pac_cv_have_cuda" == "xyes"; then #include #include -int main(int argc, char *argv[]) +int main(int argc, char **argv) { printf("%d",CUDA_VERSION); return(0); diff --git a/configure b/configure index 0421872d..3d8c5cd6 100755 --- a/configure +++ b/configure @@ -10786,7 +10786,7 @@ printf %s "checking for CUDA version... " >&6; } #include #include -int main(int argc, char *argv) +int main(int argc, char **argv) { printf("%d",CUDA_VERSION); return(0); From 0760e4d553e84cc495a3d6707f5bd206c41238fa Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 4 Apr 2024 10:53:20 +0200 Subject: [PATCH 074/116] Fix C function declarations for compilation with LLVM/clang in CUDA --- cuda/cuda_util.c | 14 ++++++++++++++ cuda/cuda_util.h | 1 + cuda/elldev.c | 2 +- cuda/elldev.h | 1 + cuda/fcusparse_fct.h | 2 +- 5 files changed, 18 insertions(+), 2 deletions(-) diff --git a/cuda/cuda_util.c b/cuda/cuda_util.c index 3fe61cc0..b7d0199e 100644 --- a/cuda/cuda_util.c +++ b/cuda/cuda_util.c @@ -378,22 +378,36 @@ void cudaSync() { cudaError_t err; err = cudaDeviceSynchronize(); +#if 0 if (err == cudaSuccess) return SPGPU_SUCCESS; else { fprintf(stderr,"CUDA Error cudaSync: %s\n", cudaGetErrorString(err)); return SPGPU_UNSPECIFIED; } +#else + if (err != cudaSuccess) { + fprintf(stderr,"CUDA Error cudaSync: %s\n", cudaGetErrorString(err)); + } + return ; +#endif } void cudaReset() { cudaError_t err; err = cudaDeviceReset(); +#if 0 if (err != cudaSuccess) { fprintf(stderr,"CUDA Error Reset: %s\n", cudaGetErrorString(err)); return SPGPU_UNSPECIFIED; } +#else + if (err != cudaSuccess) { + fprintf(stderr,"CUDA Error Reset: %s\n", cudaGetErrorString(err)); + } + return ; +#endif } diff --git a/cuda/cuda_util.h b/cuda/cuda_util.h index 95c8d1dc..4eafb5bf 100644 --- a/cuda/cuda_util.h +++ b/cuda/cuda_util.h @@ -54,6 +54,7 @@ int freeRemoteBuffer(void* buffer); int gpuInit(int dev); int getDeviceCount(); int getDevice(); +int getDeviceHasUVA(); int setDevice(int dev); int getGPUMultiProcessors(); int getGPUMemoryBusWidth(); diff --git a/cuda/elldev.c b/cuda/elldev.c index 3b79a863..a5d893c1 100644 --- a/cuda/elldev.c +++ b/cuda/elldev.c @@ -112,7 +112,7 @@ void zeroEllDevice(void *remoteMatrix) else if (tmp->dataType == SPGPU_TYPE_COMPLEX_DOUBLE) cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(cuDoubleComplex)); else - return SPGPU_UNSUPPORTED; // Unsupported params + return ; // Unsupported params //fprintf(stderr,"From allocEllDevice: %d %d %d %p %p %p\n",tmp->maxRowSize, // tmp->avgRowSize,tmp->allocsize,tmp->rS,tmp->rP,tmp->cM); diff --git a/cuda/elldev.h b/cuda/elldev.h index c1001439..5305057a 100644 --- a/cuda/elldev.h +++ b/cuda/elldev.h @@ -95,6 +95,7 @@ typedef struct EllDeviceParams unsigned int firstIndex; } EllDeviceParams; +int computeEllAllocPitch(int rowsCount); int FallocEllDevice(void** deviceMat, unsigned int rows, unsigned int maxRowSize, unsigned int nnzeros, unsigned int columns, unsigned int elementType, diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h index 689bdc93..28d900da 100644 --- a/cuda/fcusparse_fct.h +++ b/cuda/fcusparse_fct.h @@ -320,7 +320,7 @@ int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, } #if CUDA_VERSION >= 11030 -T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat) +int T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat) { int64_t tr,tc,tz; tr = cMat->m; From 2f575894fcf0f31de8df480c2549ff8184abb196 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 15 Apr 2024 11:18:51 +0200 Subject: [PATCH 075/116] Fix --with-cudacc in configure --- configure | 4 +- configure.ac | 2 +- prec/impl/psb_cilu_fct.f90 | 438 ------------------------------------ prec/impl/psb_dilu_fct.f90 | 441 ------------------------------------- prec/impl/psb_silu_fct.f90 | 440 ------------------------------------ prec/impl/psb_zilu_fct.f90 | 438 ------------------------------------ 6 files changed, 3 insertions(+), 1760 deletions(-) delete mode 100644 prec/impl/psb_cilu_fct.f90 delete mode 100644 prec/impl/psb_dilu_fct.f90 delete mode 100644 prec/impl/psb_silu_fct.f90 delete mode 100644 prec/impl/psb_zilu_fct.f90 diff --git a/configure b/configure index 3d8c5cd6..b7e5f329 100755 --- a/configure +++ b/configure @@ -12296,7 +12296,7 @@ fi CCOPT : ${CCOPT} CUDA : ${HAVE_CUDA} - CUDA_CC : ${CUDA_CC} + CUDA_CC : ${pac_cv_cudacc} BLAS : ${BLAS_LIBS} @@ -12329,7 +12329,7 @@ printf "%s\n" "$as_me: CCOPT : ${CCOPT} CUDA : ${HAVE_CUDA} - CUDA_CC : ${CUDA_CC} + CUDA_CC : ${pac_cv_cudacc} BLAS : ${BLAS_LIBS} diff --git a/configure.ac b/configure.ac index c5cff358..10f9e2f4 100755 --- a/configure.ac +++ b/configure.ac @@ -969,7 +969,7 @@ AC_MSG_NOTICE([ CCOPT : ${CCOPT} CUDA : ${HAVE_CUDA} - CUDA_CC : ${CUDA_CC} + CUDA_CC : ${pac_cv_cudacc} BLAS : ${BLAS_LIBS} diff --git a/prec/impl/psb_cilu_fct.f90 b/prec/impl/psb_cilu_fct.f90 deleted file mode 100644 index d54769bf..00000000 --- a/prec/impl/psb_cilu_fct.f90 +++ /dev/null @@ -1,438 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -subroutine psb_cilu_fct(a,l,u,d,info,blck) - - ! - ! This routine copies and factors "on the fly" from A and BLCK - ! into L/D/U. - ! - ! - use psb_base_mod - implicit none - ! .. Scalar Arguments .. - integer(psb_ipk_), intent(out) :: info - ! .. Array Arguments .. - type(psb_cspmat_type),intent(in) :: a - type(psb_c_csr_sparse_mat),intent(inout) :: l,u - type(psb_cspmat_type),intent(in), optional, target :: blck - complex(psb_spk_), intent(inout) :: d(:) - ! .. Local Scalars .. - integer(psb_ipk_) :: l1, l2,m,err_act - type(psb_cspmat_type), pointer :: blck_ - character(len=20) :: name, ch_err - name='psb_ilu_fct' - info = psb_success_ - call psb_erractionsave(err_act) - ! .. Executable Statements .. - ! - - if (present(blck)) then - blck_ => blck - else - allocate(blck_,stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - call blck_%csall(izero,izero,info,ione) - - endif - - call psb_cilu_fctint(m,a%get_nrows(),a,blck_%get_nrows(),blck_,& - & d,l%val,l%ja,l%irp,u%val,u%ja,u%irp,l1,l2,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_cilu_fctint' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - call l%set_triangle() - call l%set_lower() - call l%set_unit() - call u%set_triangle() - call u%set_upper() - call u%set_unit() - call l%set_nrows(m) - call l%set_ncols(m) - call u%set_nrows(m) - call u%set_ncols(m) - - if (present(blck)) then - blck_ => null() - else - call blck_%free() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_free' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - deallocate(blck_) - endif - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - -contains - subroutine psb_cilu_fctint(m,ma,a,mb,b,& - & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) - implicit none - - type(psb_cspmat_type) :: a,b - integer(psb_ipk_) :: m,ma,mb,l1,l2,info - integer(psb_ipk_), dimension(:) :: lia1,lia2,uia1,uia2 - complex(psb_spk_), dimension(:) :: laspk,uaspk,d - - integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act, nz - complex(psb_spk_) :: dia,temp - integer(psb_ipk_), parameter :: nrb=60 - type(psb_c_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) - character(len=20) :: name, ch_err - - name='psb_cilu_fctint' - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - call trw%allocate(izero,izero,ione) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_all' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - lia2(1) = 1 - uia2(1) = 1 - l1=0 - l2=0 - m = ma+mb - - do i = 1, ma - d(i) = czero - - ! - ! - select type(aa => a%a) - type is (psb_c_csr_sparse_mat) - do j = aa%irp(i), aa%irp(i+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - nz = trw%get_nzeros() - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select -!!$ - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloop: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloop - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - ! write(psb_err_unit,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) - dia = dia - temp*uaspk(jj) - ! write(psb_err_unit,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) - cycle updateloop - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloop - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloop - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < s_epstol) then - ! - ! Pivot too small: unstable factorization - ! - info = psb_err_pivot_too_small_ - int_err(1) = i - write(ch_err,'(g20.10)') abs(dia) - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = cone/dia - end if - d(i) = dia - ! write(psb_err_unit,*)'diag(',i,')=',d(i) - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - do i = ma+1, m - d(i) = czero - - select type(aa => b%a) - type is (psb_c_csr_sparse_mat) - do j = aa%irp(i-ma), aa%irp(i-ma+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i-ma,i-ma+irb-1,trw,info) - nz = trw%get_nzeros() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < s_epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') abs(dia) - info = psb_err_pivot_too_small_ - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = cone/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - call trw%free() - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - end subroutine psb_cilu_fctint -end subroutine psb_cilu_fct diff --git a/prec/impl/psb_dilu_fct.f90 b/prec/impl/psb_dilu_fct.f90 deleted file mode 100644 index b97b88ec..00000000 --- a/prec/impl/psb_dilu_fct.f90 +++ /dev/null @@ -1,441 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -subroutine psb_dilu_fct(a,l,u,d,info,blck) - - ! - ! This routine copies and factors "on the fly" from A and BLCK - ! into L/D/U. - ! - ! - use psb_base_mod - implicit none - ! .. Scalar Arguments .. - integer(psb_ipk_), intent(out) :: info - ! .. Array Arguments .. - type(psb_dspmat_type),intent(in) :: a - type(psb_d_csr_sparse_mat),intent(inout) :: l,u - type(psb_dspmat_type),intent(in), optional, target :: blck - real(psb_dpk_), intent(inout) :: d(:) - ! .. Local Scalars .. - integer(psb_ipk_) :: l1,l2,m,err_act - type(psb_dspmat_type), pointer :: blck_ - character(len=20) :: name, ch_err - name='psb_ilu_fct' - info = psb_success_ - call psb_erractionsave(err_act) - ! .. Executable Statements .. - ! - - if (present(blck)) then - blck_ => blck - else - allocate(blck_,stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - call blck_%csall(izero,izero,info,ione) - - endif - - call psb_dilu_fctint(m,a%get_nrows(),a,blck_%get_nrows(),blck_,& - & d,l%val,l%ja,l%irp,u%val,u%ja,u%irp,l1,l2,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_dilu_fctint' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - call l%set_triangle() - call l%set_lower() - call l%set_unit() - call u%set_triangle() - call u%set_upper() - call u%set_unit() - call l%set_nrows(m) - call l%set_ncols(m) - call u%set_nrows(m) - call u%set_ncols(m) - - if (present(blck)) then - blck_ => null() - else - call blck_%free() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_free' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - deallocate(blck_) - endif - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - -contains - subroutine psb_dilu_fctint(m,ma,a,mb,b,& - & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) - use psb_mat_mod - - implicit none - - type(psb_dspmat_type), target :: a - type(psb_dspmat_type), target :: b - integer(psb_ipk_) :: m,ma,mb,l1,l2,info - integer(psb_ipk_), dimension(:) :: lia1,lia2,uia1,uia2 - real(psb_dpk_), dimension(:) :: laspk,uaspk,d - - integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act, nz - real(psb_dpk_) :: dia,temp - integer(psb_ipk_), parameter :: nrb=60 - type(psb_d_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) - character(len=20) :: name, ch_err - - - name='psb_dilu_fctint' - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - call trw%allocate(izero,izero,ione) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_all' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - lia2(1) = 1 - uia2(1) = 1 - l1=0 - l2=0 - m = ma+mb - - do i = 1, ma - d(i) = dzero - ! - ! - select type(aa => a%a) - type is (psb_d_csr_sparse_mat) - do j = aa%irp(i), aa%irp(i+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - nz = trw%get_nzeros() - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select -!!$ - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloop: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloop - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - ! write(psb_err_unit,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) - dia = dia - temp*uaspk(jj) - ! write(psb_err_unit,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) - cycle updateloop - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloop - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloop - enddo - ! - ! - ! Non singularity - ! - if (dabs(dia) < d_epstol) then - ! - ! Pivot too small: unstable factorization - ! - info = psb_err_pivot_too_small_ - int_err(1) = i - write(ch_err,'(g20.10)') dia - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = done/dia - end if - d(i) = dia - ! write(psb_err_unit,*)'diag(',i,')=',d(i) - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - do i = ma+1, m - d(i) = dzero - - select type(aa => b%a) - type is (psb_d_csr_sparse_mat) - do j = aa%irp(i-ma), aa%irp(i-ma+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i-ma,i-ma+irb-1,trw,info) - nz = trw%get_nzeros() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (dabs(dia) < d_epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') dia - info = psb_err_pivot_too_small_ - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = done/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - call trw%free() - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - end subroutine psb_dilu_fctint -end subroutine psb_dilu_fct diff --git a/prec/impl/psb_silu_fct.f90 b/prec/impl/psb_silu_fct.f90 deleted file mode 100644 index 85b58bad..00000000 --- a/prec/impl/psb_silu_fct.f90 +++ /dev/null @@ -1,440 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -subroutine psb_silu_fct(a,l,u,d,info,blck) - - ! - ! This routine copies and factors "on the fly" from A and BLCK - ! into L/D/U. - ! - ! - use psb_base_mod - implicit none - ! .. Scalar Arguments .. - integer(psb_ipk_), intent(out) :: info - ! .. Array Arguments .. - type(psb_sspmat_type),intent(in) :: a - type(psb_s_csr_sparse_mat),intent(inout) :: l,u - type(psb_sspmat_type),intent(in), optional, target :: blck - real(psb_spk_), intent(inout) :: d(:) - ! .. Local Scalars .. - integer(psb_ipk_) :: l1,l2,m,err_act - type(psb_sspmat_type), pointer :: blck_ - character(len=20) :: name, ch_err - name='psb_ilu_fct' - info = psb_success_ - call psb_erractionsave(err_act) - ! .. Executable Statements .. - ! - - if (present(blck)) then - blck_ => blck - else - allocate(blck_,stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - call blck_%csall(izero,izero,info,ione) - - endif - - call psb_silu_fctint(m,a%get_nrows(),a,blck_%get_nrows(),blck_,& - & d,l%val,l%ja,l%irp,u%val,u%ja,u%irp,l1,l2,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_silu_fctint' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - call l%set_triangle() - call l%set_lower() - call l%set_unit() - call u%set_triangle() - call u%set_upper() - call u%set_unit() - call l%set_nrows(m) - call l%set_ncols(m) - call u%set_nrows(m) - call u%set_ncols(m) - - if (present(blck)) then - blck_ => null() - else - call blck_%free() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_free' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - deallocate(blck_) - endif - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - -contains - subroutine psb_silu_fctint(m,ma,a,mb,b,& - & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) - use psb_mat_mod - - implicit none - - type(psb_sspmat_type) :: a - type(psb_sspmat_type) :: b - integer(psb_ipk_) :: m,ma,mb,l1,l2,info - integer(psb_ipk_), dimension(:) :: lia1,lia2,uia1,uia2 - real(psb_spk_), dimension(:) :: laspk,uaspk,d - - integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act, nz - real(psb_spk_) :: dia,temp - integer(psb_ipk_), parameter :: nrb=60 - type(psb_s_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) - character(len=20) :: name, ch_err - - name='psb_silu_fctint' - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - call trw%allocate(izero,izero,ione) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_all' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - lia2(1) = 1 - uia2(1) = 1 - l1=0 - l2=0 - m = ma+mb - - do i = 1, ma - d(i) = szero - ! - ! - select type(aa => a%a) - type is (psb_s_csr_sparse_mat) - do j = aa%irp(i), aa%irp(i+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - nz = trw%get_nzeros() - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select -!!$ - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloop: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloop - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - ! write(psb_err_unit,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) - dia = dia - temp*uaspk(jj) - ! write(psb_err_unit,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) - cycle updateloop - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloop - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloop - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < s_epstol) then - ! - ! Pivot too small: unstable factorization - ! - info = psb_err_pivot_too_small_ - int_err(1) = i - write(ch_err,'(g20.10)') dia - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = sone/dia - end if - d(i) = dia - ! write(psb_err_unit,*)'diag(',i,')=',d(i) - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - do i = ma+1, m - d(i) = szero - - select type(aa => b%a) - type is (psb_s_csr_sparse_mat) - do j = aa%irp(i-ma), aa%irp(i-ma+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i-ma,i-ma+irb-1,trw,info) - nz = trw%get_nzeros() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < s_epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') dia - info = psb_err_pivot_too_small_ - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = sone/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - call trw%free() - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - end subroutine psb_silu_fctint -end subroutine psb_silu_fct diff --git a/prec/impl/psb_zilu_fct.f90 b/prec/impl/psb_zilu_fct.f90 deleted file mode 100644 index e5ea4b0d..00000000 --- a/prec/impl/psb_zilu_fct.f90 +++ /dev/null @@ -1,438 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -subroutine psb_zilu_fct(a,l,u,d,info,blck) - - ! - ! This routine copies and factors "on the fly" from A and BLCK - ! into L/D/U. - ! - ! - use psb_base_mod - implicit none - ! .. Scalar Arguments .. - integer(psb_ipk_), intent(out) :: info - ! .. Array Arguments .. - type(psb_zspmat_type),intent(in) :: a - type(psb_z_csr_sparse_mat),intent(inout) :: l,u - type(psb_zspmat_type),intent(in), optional, target :: blck - complex(psb_dpk_), intent(inout) :: d(:) - ! .. Local Scalars .. - integer(psb_ipk_) :: l1, l2,m,err_act - type(psb_zspmat_type), pointer :: blck_ - character(len=20) :: name, ch_err - name='psb_ilu_fct' - info = psb_success_ - call psb_erractionsave(err_act) - ! .. Executable Statements .. - ! - - if (present(blck)) then - blck_ => blck - else - allocate(blck_,stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - call blck_%csall(izero,izero,info,ione) - - endif - - call psb_zilu_fctint(m,a%get_nrows(),a,blck_%get_nrows(),blck_,& - & d,l%val,l%ja,l%irp,u%val,u%ja,u%irp,l1,l2,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_zilu_fctint' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - call l%set_triangle() - call l%set_lower() - call l%set_unit() - call u%set_triangle() - call u%set_upper() - call u%set_unit() - call l%set_nrows(m) - call l%set_ncols(m) - call u%set_nrows(m) - call u%set_ncols(m) - - if (present(blck)) then - blck_ => null() - else - call blck_%free() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_free' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - deallocate(blck_) - endif - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - -contains - subroutine psb_zilu_fctint(m,ma,a,mb,b,& - & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) - implicit none - - type(psb_zspmat_type) :: a,b - integer(psb_ipk_) :: m,ma,mb,l1,l2,info - integer(psb_ipk_), dimension(:) :: lia1,lia2,uia1,uia2 - complex(psb_dpk_), dimension(:) :: laspk,uaspk,d - - integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act, nz - complex(psb_dpk_) :: dia,temp - integer(psb_ipk_), parameter :: nrb=60 - type(psb_z_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) - character(len=20) :: name, ch_err - - name='psb_zilu_fctint' - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - call trw%allocate(izero,izero,ione) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_all' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - lia2(1) = 1 - uia2(1) = 1 - l1=0 - l2=0 - m = ma+mb - - do i = 1, ma - d(i) = zzero - - ! - ! - select type(aa => a%a) - type is (psb_z_csr_sparse_mat) - do j = aa%irp(i), aa%irp(i+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - nz = trw%get_nzeros() - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select -!!$ - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloop: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloop - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - ! write(psb_err_unit,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) - dia = dia - temp*uaspk(jj) - ! write(psb_err_unit,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) - cycle updateloop - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloop - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloop - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < d_epstol) then - ! - ! Pivot too small: unstable factorization - ! - info = psb_err_pivot_too_small_ - int_err(1) = i - write(ch_err,'(g20.10)') abs(dia) - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = zone/dia - end if - d(i) = dia - ! write(psb_err_unit,*)'diag(',i,')=',d(i) - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - do i = ma+1, m - d(i) = zzero - - select type(aa => b%a) - type is (psb_z_csr_sparse_mat) - do j = aa%irp(i-ma), aa%irp(i-ma+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i-ma,i-ma+irb-1,trw,info) - nz = trw%get_nzeros() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < d_epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') abs(dia) - info = psb_err_pivot_too_small_ - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = zone/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - call trw%free() - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - end subroutine psb_zilu_fctint -end subroutine psb_zilu_fct From 553531eefbffbb7f4f0092055350b88aa3288ee4 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 15 Apr 2024 12:16:43 +0200 Subject: [PATCH 076/116] Take out obsolete ilu_fct source files --- prec/impl/Makefile | 8 +- prec/impl/psb_cilu_fct.f90 | 438 ------------------------------------ prec/impl/psb_dilu_fct.f90 | 441 ------------------------------------- prec/impl/psb_silu_fct.f90 | 440 ------------------------------------ prec/impl/psb_zilu_fct.f90 | 438 ------------------------------------ 5 files changed, 4 insertions(+), 1761 deletions(-) delete mode 100644 prec/impl/psb_cilu_fct.f90 delete mode 100644 prec/impl/psb_dilu_fct.f90 delete mode 100644 prec/impl/psb_silu_fct.f90 delete mode 100644 prec/impl/psb_zilu_fct.f90 diff --git a/prec/impl/Makefile b/prec/impl/Makefile index bc5ef2e1..2b6b1dc5 100644 --- a/prec/impl/Makefile +++ b/prec/impl/Makefile @@ -7,16 +7,16 @@ HERE=.. OBJS=psb_s_prec_type_impl.o psb_d_prec_type_impl.o \ psb_c_prec_type_impl.o psb_z_prec_type_impl.o \ psb_d_diagprec_impl.o psb_d_bjacprec_impl.o psb_d_nullprec_impl.o \ - psb_dilu_fct.o psb_d_ilu0_fact.o psb_d_iluk_fact.o psb_d_ilut_fact.o \ + psb_d_ilu0_fact.o psb_d_iluk_fact.o psb_d_ilut_fact.o \ psb_dprecbld.o psb_dprecinit.o \ psb_s_diagprec_impl.o psb_s_bjacprec_impl.o psb_s_nullprec_impl.o \ - psb_silu_fct.o psb_s_ilu0_fact.o psb_s_iluk_fact.o psb_s_ilut_fact.o \ + psb_s_ilu0_fact.o psb_s_iluk_fact.o psb_s_ilut_fact.o \ psb_sprecbld.o psb_sprecinit.o \ psb_c_diagprec_impl.o psb_c_bjacprec_impl.o psb_c_nullprec_impl.o \ - psb_cilu_fct.o psb_c_ilu0_fact.o psb_c_iluk_fact.o psb_c_ilut_fact.o \ + psb_c_ilu0_fact.o psb_c_iluk_fact.o psb_c_ilut_fact.o \ psb_cprecbld.o psb_cprecinit.o \ psb_z_diagprec_impl.o psb_z_bjacprec_impl.o psb_z_nullprec_impl.o \ - psb_zilu_fct.o psb_z_ilu0_fact.o psb_z_iluk_fact.o psb_z_ilut_fact.o \ + psb_z_ilu0_fact.o psb_z_iluk_fact.o psb_z_ilut_fact.o \ psb_zprecbld.o psb_zprecinit.o \ psb_c_sparsify.o psb_d_sparsify.o psb_s_sparsify.o psb_z_sparsify.o \ psb_crwclip.o psb_drwclip.o psb_srwclip.o psb_zrwclip.o \ diff --git a/prec/impl/psb_cilu_fct.f90 b/prec/impl/psb_cilu_fct.f90 deleted file mode 100644 index d54769bf..00000000 --- a/prec/impl/psb_cilu_fct.f90 +++ /dev/null @@ -1,438 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -subroutine psb_cilu_fct(a,l,u,d,info,blck) - - ! - ! This routine copies and factors "on the fly" from A and BLCK - ! into L/D/U. - ! - ! - use psb_base_mod - implicit none - ! .. Scalar Arguments .. - integer(psb_ipk_), intent(out) :: info - ! .. Array Arguments .. - type(psb_cspmat_type),intent(in) :: a - type(psb_c_csr_sparse_mat),intent(inout) :: l,u - type(psb_cspmat_type),intent(in), optional, target :: blck - complex(psb_spk_), intent(inout) :: d(:) - ! .. Local Scalars .. - integer(psb_ipk_) :: l1, l2,m,err_act - type(psb_cspmat_type), pointer :: blck_ - character(len=20) :: name, ch_err - name='psb_ilu_fct' - info = psb_success_ - call psb_erractionsave(err_act) - ! .. Executable Statements .. - ! - - if (present(blck)) then - blck_ => blck - else - allocate(blck_,stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - call blck_%csall(izero,izero,info,ione) - - endif - - call psb_cilu_fctint(m,a%get_nrows(),a,blck_%get_nrows(),blck_,& - & d,l%val,l%ja,l%irp,u%val,u%ja,u%irp,l1,l2,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_cilu_fctint' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - call l%set_triangle() - call l%set_lower() - call l%set_unit() - call u%set_triangle() - call u%set_upper() - call u%set_unit() - call l%set_nrows(m) - call l%set_ncols(m) - call u%set_nrows(m) - call u%set_ncols(m) - - if (present(blck)) then - blck_ => null() - else - call blck_%free() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_free' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - deallocate(blck_) - endif - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - -contains - subroutine psb_cilu_fctint(m,ma,a,mb,b,& - & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) - implicit none - - type(psb_cspmat_type) :: a,b - integer(psb_ipk_) :: m,ma,mb,l1,l2,info - integer(psb_ipk_), dimension(:) :: lia1,lia2,uia1,uia2 - complex(psb_spk_), dimension(:) :: laspk,uaspk,d - - integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act, nz - complex(psb_spk_) :: dia,temp - integer(psb_ipk_), parameter :: nrb=60 - type(psb_c_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) - character(len=20) :: name, ch_err - - name='psb_cilu_fctint' - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - call trw%allocate(izero,izero,ione) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_all' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - lia2(1) = 1 - uia2(1) = 1 - l1=0 - l2=0 - m = ma+mb - - do i = 1, ma - d(i) = czero - - ! - ! - select type(aa => a%a) - type is (psb_c_csr_sparse_mat) - do j = aa%irp(i), aa%irp(i+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - nz = trw%get_nzeros() - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select -!!$ - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloop: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloop - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - ! write(psb_err_unit,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) - dia = dia - temp*uaspk(jj) - ! write(psb_err_unit,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) - cycle updateloop - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloop - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloop - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < s_epstol) then - ! - ! Pivot too small: unstable factorization - ! - info = psb_err_pivot_too_small_ - int_err(1) = i - write(ch_err,'(g20.10)') abs(dia) - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = cone/dia - end if - d(i) = dia - ! write(psb_err_unit,*)'diag(',i,')=',d(i) - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - do i = ma+1, m - d(i) = czero - - select type(aa => b%a) - type is (psb_c_csr_sparse_mat) - do j = aa%irp(i-ma), aa%irp(i-ma+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i-ma,i-ma+irb-1,trw,info) - nz = trw%get_nzeros() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < s_epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') abs(dia) - info = psb_err_pivot_too_small_ - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = cone/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - call trw%free() - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - end subroutine psb_cilu_fctint -end subroutine psb_cilu_fct diff --git a/prec/impl/psb_dilu_fct.f90 b/prec/impl/psb_dilu_fct.f90 deleted file mode 100644 index b97b88ec..00000000 --- a/prec/impl/psb_dilu_fct.f90 +++ /dev/null @@ -1,441 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -subroutine psb_dilu_fct(a,l,u,d,info,blck) - - ! - ! This routine copies and factors "on the fly" from A and BLCK - ! into L/D/U. - ! - ! - use psb_base_mod - implicit none - ! .. Scalar Arguments .. - integer(psb_ipk_), intent(out) :: info - ! .. Array Arguments .. - type(psb_dspmat_type),intent(in) :: a - type(psb_d_csr_sparse_mat),intent(inout) :: l,u - type(psb_dspmat_type),intent(in), optional, target :: blck - real(psb_dpk_), intent(inout) :: d(:) - ! .. Local Scalars .. - integer(psb_ipk_) :: l1,l2,m,err_act - type(psb_dspmat_type), pointer :: blck_ - character(len=20) :: name, ch_err - name='psb_ilu_fct' - info = psb_success_ - call psb_erractionsave(err_act) - ! .. Executable Statements .. - ! - - if (present(blck)) then - blck_ => blck - else - allocate(blck_,stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - call blck_%csall(izero,izero,info,ione) - - endif - - call psb_dilu_fctint(m,a%get_nrows(),a,blck_%get_nrows(),blck_,& - & d,l%val,l%ja,l%irp,u%val,u%ja,u%irp,l1,l2,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_dilu_fctint' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - call l%set_triangle() - call l%set_lower() - call l%set_unit() - call u%set_triangle() - call u%set_upper() - call u%set_unit() - call l%set_nrows(m) - call l%set_ncols(m) - call u%set_nrows(m) - call u%set_ncols(m) - - if (present(blck)) then - blck_ => null() - else - call blck_%free() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_free' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - deallocate(blck_) - endif - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - -contains - subroutine psb_dilu_fctint(m,ma,a,mb,b,& - & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) - use psb_mat_mod - - implicit none - - type(psb_dspmat_type), target :: a - type(psb_dspmat_type), target :: b - integer(psb_ipk_) :: m,ma,mb,l1,l2,info - integer(psb_ipk_), dimension(:) :: lia1,lia2,uia1,uia2 - real(psb_dpk_), dimension(:) :: laspk,uaspk,d - - integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act, nz - real(psb_dpk_) :: dia,temp - integer(psb_ipk_), parameter :: nrb=60 - type(psb_d_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) - character(len=20) :: name, ch_err - - - name='psb_dilu_fctint' - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - call trw%allocate(izero,izero,ione) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_all' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - lia2(1) = 1 - uia2(1) = 1 - l1=0 - l2=0 - m = ma+mb - - do i = 1, ma - d(i) = dzero - ! - ! - select type(aa => a%a) - type is (psb_d_csr_sparse_mat) - do j = aa%irp(i), aa%irp(i+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - nz = trw%get_nzeros() - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select -!!$ - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloop: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloop - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - ! write(psb_err_unit,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) - dia = dia - temp*uaspk(jj) - ! write(psb_err_unit,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) - cycle updateloop - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloop - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloop - enddo - ! - ! - ! Non singularity - ! - if (dabs(dia) < d_epstol) then - ! - ! Pivot too small: unstable factorization - ! - info = psb_err_pivot_too_small_ - int_err(1) = i - write(ch_err,'(g20.10)') dia - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = done/dia - end if - d(i) = dia - ! write(psb_err_unit,*)'diag(',i,')=',d(i) - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - do i = ma+1, m - d(i) = dzero - - select type(aa => b%a) - type is (psb_d_csr_sparse_mat) - do j = aa%irp(i-ma), aa%irp(i-ma+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i-ma,i-ma+irb-1,trw,info) - nz = trw%get_nzeros() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (dabs(dia) < d_epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') dia - info = psb_err_pivot_too_small_ - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = done/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - call trw%free() - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - end subroutine psb_dilu_fctint -end subroutine psb_dilu_fct diff --git a/prec/impl/psb_silu_fct.f90 b/prec/impl/psb_silu_fct.f90 deleted file mode 100644 index 85b58bad..00000000 --- a/prec/impl/psb_silu_fct.f90 +++ /dev/null @@ -1,440 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -subroutine psb_silu_fct(a,l,u,d,info,blck) - - ! - ! This routine copies and factors "on the fly" from A and BLCK - ! into L/D/U. - ! - ! - use psb_base_mod - implicit none - ! .. Scalar Arguments .. - integer(psb_ipk_), intent(out) :: info - ! .. Array Arguments .. - type(psb_sspmat_type),intent(in) :: a - type(psb_s_csr_sparse_mat),intent(inout) :: l,u - type(psb_sspmat_type),intent(in), optional, target :: blck - real(psb_spk_), intent(inout) :: d(:) - ! .. Local Scalars .. - integer(psb_ipk_) :: l1,l2,m,err_act - type(psb_sspmat_type), pointer :: blck_ - character(len=20) :: name, ch_err - name='psb_ilu_fct' - info = psb_success_ - call psb_erractionsave(err_act) - ! .. Executable Statements .. - ! - - if (present(blck)) then - blck_ => blck - else - allocate(blck_,stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - call blck_%csall(izero,izero,info,ione) - - endif - - call psb_silu_fctint(m,a%get_nrows(),a,blck_%get_nrows(),blck_,& - & d,l%val,l%ja,l%irp,u%val,u%ja,u%irp,l1,l2,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_silu_fctint' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - call l%set_triangle() - call l%set_lower() - call l%set_unit() - call u%set_triangle() - call u%set_upper() - call u%set_unit() - call l%set_nrows(m) - call l%set_ncols(m) - call u%set_nrows(m) - call u%set_ncols(m) - - if (present(blck)) then - blck_ => null() - else - call blck_%free() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_free' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - deallocate(blck_) - endif - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - -contains - subroutine psb_silu_fctint(m,ma,a,mb,b,& - & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) - use psb_mat_mod - - implicit none - - type(psb_sspmat_type) :: a - type(psb_sspmat_type) :: b - integer(psb_ipk_) :: m,ma,mb,l1,l2,info - integer(psb_ipk_), dimension(:) :: lia1,lia2,uia1,uia2 - real(psb_spk_), dimension(:) :: laspk,uaspk,d - - integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act, nz - real(psb_spk_) :: dia,temp - integer(psb_ipk_), parameter :: nrb=60 - type(psb_s_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) - character(len=20) :: name, ch_err - - name='psb_silu_fctint' - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - call trw%allocate(izero,izero,ione) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_all' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - lia2(1) = 1 - uia2(1) = 1 - l1=0 - l2=0 - m = ma+mb - - do i = 1, ma - d(i) = szero - ! - ! - select type(aa => a%a) - type is (psb_s_csr_sparse_mat) - do j = aa%irp(i), aa%irp(i+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - nz = trw%get_nzeros() - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select -!!$ - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloop: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloop - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - ! write(psb_err_unit,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) - dia = dia - temp*uaspk(jj) - ! write(psb_err_unit,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) - cycle updateloop - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloop - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloop - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < s_epstol) then - ! - ! Pivot too small: unstable factorization - ! - info = psb_err_pivot_too_small_ - int_err(1) = i - write(ch_err,'(g20.10)') dia - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = sone/dia - end if - d(i) = dia - ! write(psb_err_unit,*)'diag(',i,')=',d(i) - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - do i = ma+1, m - d(i) = szero - - select type(aa => b%a) - type is (psb_s_csr_sparse_mat) - do j = aa%irp(i-ma), aa%irp(i-ma+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i-ma,i-ma+irb-1,trw,info) - nz = trw%get_nzeros() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < s_epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') dia - info = psb_err_pivot_too_small_ - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = sone/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - call trw%free() - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - end subroutine psb_silu_fctint -end subroutine psb_silu_fct diff --git a/prec/impl/psb_zilu_fct.f90 b/prec/impl/psb_zilu_fct.f90 deleted file mode 100644 index e5ea4b0d..00000000 --- a/prec/impl/psb_zilu_fct.f90 +++ /dev/null @@ -1,438 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -subroutine psb_zilu_fct(a,l,u,d,info,blck) - - ! - ! This routine copies and factors "on the fly" from A and BLCK - ! into L/D/U. - ! - ! - use psb_base_mod - implicit none - ! .. Scalar Arguments .. - integer(psb_ipk_), intent(out) :: info - ! .. Array Arguments .. - type(psb_zspmat_type),intent(in) :: a - type(psb_z_csr_sparse_mat),intent(inout) :: l,u - type(psb_zspmat_type),intent(in), optional, target :: blck - complex(psb_dpk_), intent(inout) :: d(:) - ! .. Local Scalars .. - integer(psb_ipk_) :: l1, l2,m,err_act - type(psb_zspmat_type), pointer :: blck_ - character(len=20) :: name, ch_err - name='psb_ilu_fct' - info = psb_success_ - call psb_erractionsave(err_act) - ! .. Executable Statements .. - ! - - if (present(blck)) then - blck_ => blck - else - allocate(blck_,stat=info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') - goto 9999 - end if - - call blck_%csall(izero,izero,info,ione) - - endif - - call psb_zilu_fctint(m,a%get_nrows(),a,blck_%get_nrows(),blck_,& - & d,l%val,l%ja,l%irp,u%val,u%ja,u%irp,l1,l2,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_zilu_fctint' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - call l%set_triangle() - call l%set_lower() - call l%set_unit() - call u%set_triangle() - call u%set_upper() - call u%set_unit() - call l%set_nrows(m) - call l%set_ncols(m) - call u%set_nrows(m) - call u%set_ncols(m) - - if (present(blck)) then - blck_ => null() - else - call blck_%free() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_free' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - deallocate(blck_) - endif - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - -contains - subroutine psb_zilu_fctint(m,ma,a,mb,b,& - & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) - implicit none - - type(psb_zspmat_type) :: a,b - integer(psb_ipk_) :: m,ma,mb,l1,l2,info - integer(psb_ipk_), dimension(:) :: lia1,lia2,uia1,uia2 - complex(psb_dpk_), dimension(:) :: laspk,uaspk,d - - integer(psb_ipk_) :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act, nz - complex(psb_dpk_) :: dia,temp - integer(psb_ipk_), parameter :: nrb=60 - type(psb_z_coo_sparse_mat) :: trw - integer(psb_ipk_) :: int_err(5) - character(len=20) :: name, ch_err - - name='psb_zilu_fctint' - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - call trw%allocate(izero,izero,ione) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='psb_sp_all' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - lia2(1) = 1 - uia2(1) = 1 - l1=0 - l2=0 - m = ma+mb - - do i = 1, ma - d(i) = zzero - - ! - ! - select type(aa => a%a) - type is (psb_z_csr_sparse_mat) - do j = aa%irp(i), aa%irp(i+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i,i+irb-1,trw,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - nz = trw%get_nzeros() - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select -!!$ - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloop: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloop - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - ! write(psb_err_unit,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) - dia = dia - temp*uaspk(jj) - ! write(psb_err_unit,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) - cycle updateloop - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloop - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloop - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < d_epstol) then - ! - ! Pivot too small: unstable factorization - ! - info = psb_err_pivot_too_small_ - int_err(1) = i - write(ch_err,'(g20.10)') abs(dia) - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = zone/dia - end if - d(i) = dia - ! write(psb_err_unit,*)'diag(',i,')=',d(i) - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - do i = ma+1, m - d(i) = zzero - - select type(aa => b%a) - type is (psb_z_csr_sparse_mat) - do j = aa%irp(i-ma), aa%irp(i-ma+1) - 1 - k = aa%ja(j) - ! write(psb_err_unit,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = aa%val(j) - lia1(l1) = k - else if (k == i) then - d(i) = aa%val(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = aa%val(j) - uia1(l2) = k - end if - enddo - - class default - - if ((mod(i,nrb) == 1).or.(nrb == 1)) then - irb = min(ma-i+1,nrb) - call aa%csget(i-ma,i-ma+irb-1,trw,info) - nz = trw%get_nzeros() - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='a%csget' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > nz ) exit - if (trw%ia(ktrw) > i) exit - k = trw%ja(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%val(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%val(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%val(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - end select - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! for milu al=1.; for ilu al=0. - ! al = 1.d0 - ! dia = dia - al*temp*aup(jj) - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < d_epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') abs(dia) - info = psb_err_pivot_too_small_ - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = zone/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - - call trw%free() - - call psb_erractionrestore(err_act) - return - -9999 continue - call psb_erractionrestore(err_act) - if (err_act == psb_act_abort_) then - call psb_error() - return - end if - return - end subroutine psb_zilu_fctint -end subroutine psb_zilu_fct From e18de650f2379f6cb35d0eac81145a0e03e1340c Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 17 Apr 2024 13:53:04 +0200 Subject: [PATCH 077/116] Take out debug print --- cuda/fcusparse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda/fcusparse.c b/cuda/fcusparse.c index 1b37272c..e8e46c63 100644 --- a/cuda/fcusparse.c +++ b/cuda/fcusparse.c @@ -53,7 +53,7 @@ int FcusparseCreate() if (ret == CUSPARSE_STATUS_SUCCESS) cusparse_handle = handle; } - fprintf(stderr,"Created cusparses_handle\n"); + //fprintf(stderr,"Created cusparses_handle\n"); return (ret); } From aca1848401829ccfc8de8c8834341649589d6b17 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 17 Apr 2024 13:53:23 +0200 Subject: [PATCH 078/116] New timings in CG --- base/internals/psi_desc_index.F90 | 2 +- base/tools/psb_icdasb.F90 | 2 +- krylov/psb_dcg.F90 | 31 +++++++++++++++++++++++++++---- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/base/internals/psi_desc_index.F90 b/base/internals/psi_desc_index.F90 index 35c8d921..6423c116 100644 --- a/base/internals/psi_desc_index.F90 +++ b/base/internals/psi_desc_index.F90 @@ -137,7 +137,7 @@ subroutine psi_i_desc_index(desc,index_in,dep_list,& & idxr, idxs, iszs, iszr, nesd, nerv, ixp, idx integer(psb_mpk_) :: icomm, minfo - logical, parameter :: do_timings=.true., oldstyle=.false., debug=.false. + logical, parameter :: do_timings=.false., oldstyle=.false., debug=.false. integer(psb_ipk_), save :: idx_phase1=-1, idx_phase2=-1, idx_phase3=-1, idx_phase4=-1 logical, parameter :: usempi=.false. integer(psb_ipk_) :: debug_level, debug_unit diff --git a/base/tools/psb_icdasb.F90 b/base/tools/psb_icdasb.F90 index 31d92133..04f95738 100644 --- a/base/tools/psb_icdasb.F90 +++ b/base/tools/psb_icdasb.F90 @@ -67,7 +67,7 @@ subroutine psb_icdasb(desc,info,ext_hv,mold) integer(psb_mpk_) :: icomm integer(psb_ipk_) :: np,me logical :: ext_hv_ - logical, parameter :: do_timings=.true. + logical, parameter :: do_timings=.false. integer(psb_ipk_), save :: idx_phase1=-1, idx_phase2=-1, idx_phase3=-1 integer(psb_ipk_), save :: idx_phase11=-1, idx_phase12=-1, idx_phase13=-1 integer(psb_ipk_), save :: idx_total=-1 diff --git a/krylov/psb_dcg.F90 b/krylov/psb_dcg.F90 index 669573be..caebb712 100644 --- a/krylov/psb_dcg.F90 +++ b/krylov/psb_dcg.F90 @@ -129,6 +129,8 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& logical :: do_cond character(len=20) :: name character(len=*), parameter :: methdname='CG' + logical, parameter :: do_timings=.true. + integer(psb_ipk_), save :: cg_vect=-1, cg_mv=-1, cg_prec=-1 info = psb_success_ name = 'psb_dcg' @@ -149,6 +151,12 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& call psb_errpush(info,name) goto 9999 endif + if ((do_timings).and.(cg_vect==-1)) & + & cg_vect = psb_get_timer_idx("CG: vector ops ") + if ((do_timings).and.(cg_mv==-1)) & + & cg_mv = psb_get_timer_idx("CG: MV product") + if ((do_timings).and.(cg_prec==-1)) & + & cg_prec = psb_get_timer_idx("CG: preconditioner") mglob = desc_a%get_global_rows() @@ -219,17 +227,21 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& ! = ! = r0 = b-Ax0 ! = + if (do_timings) call psb_tic(cg_vect) if (itx>= itmax_) exit restart - it = 0 call psb_geaxpby(done,b,dzero,r,desc_a,info) + if (do_timings) call psb_toc(cg_vect) + if (do_timings) call psb_tic(cg_mv) if (info == psb_success_) call psb_spmm(-done,a,x,done,r,desc_a,info,work=aux) if (info /= psb_success_) then info=psb_err_from_subroutine_non_ call psb_errpush(info,name) goto 9999 end if - + if (do_timings) call psb_toc(cg_mv) + + if (do_timings) call psb_tic(cg_vect) rho = dzero call psb_init_conv(methdname,istop_,itrace_,itmax_,a,x,b,eps,desc_a,stopdat,info) @@ -237,13 +249,18 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& call psb_errpush(psb_err_from_subroutine_non_,name) goto 9999 End If - + if (do_timings) call psb_toc(cg_vect) + iteration: do it = it + 1 itx = itx + 1 - + if (do_timings) call psb_tic(cg_prec) + call prec%apply(r,z,desc_a,info,work=aux) + if (do_timings) call psb_toc(cg_prec) + if (do_timings) call psb_tic(cg_vect) + rho_old = rho rho = psb_gedot(r,z,desc_a,info) @@ -254,13 +271,18 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& if (debug_level >= psb_debug_ext_)& & write(debug_unit,*) me,' ',trim(name),& & ': CG Iteration breakdown rho' + if (do_timings) call psb_toc(cg_vect) exit iteration endif beta = rho/rho_old call psb_geaxpby(done,z,beta,p,desc_a,info) end if + if (do_timings) call psb_toc(cg_vect) + if (do_timings) call psb_tic(cg_mv) call psb_spmm(done,a,p,dzero,q,desc_a,info,work=aux) + if (do_timings) call psb_toc(cg_mv) + if (do_timings) call psb_tic(cg_vect) sigma = psb_gedot(p,q,desc_a,info) if (sigma == dzero) then if (debug_level >= psb_debug_ext_)& @@ -293,6 +315,7 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& end do iteration end do restart + if (do_timings) call psb_toc(cg_vect) if (do_cond) then if (me == psb_root_) then #if defined(HAVE_LAPACK) From ba8c32c507160763fc81406ddc16c5c67f07a4ca Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 17 Apr 2024 14:27:35 +0200 Subject: [PATCH 079/116] Define merge_nd method --- base/modules/serial/psb_c_mat_mod.F90 | 13 +++++-- base/modules/serial/psb_d_mat_mod.F90 | 13 +++++-- base/modules/serial/psb_s_mat_mod.F90 | 13 +++++-- base/modules/serial/psb_z_mat_mod.F90 | 13 +++++-- base/serial/impl/psb_c_mat_impl.F90 | 50 +++++++++++++++++++++++++++ base/serial/impl/psb_d_mat_impl.F90 | 50 +++++++++++++++++++++++++++ base/serial/impl/psb_s_mat_impl.F90 | 50 +++++++++++++++++++++++++++ base/serial/impl/psb_z_mat_impl.F90 | 50 +++++++++++++++++++++++++++ 8 files changed, 240 insertions(+), 12 deletions(-) diff --git a/base/modules/serial/psb_c_mat_mod.F90 b/base/modules/serial/psb_c_mat_mod.F90 index ee819535..e7c84b00 100644 --- a/base/modules/serial/psb_c_mat_mod.F90 +++ b/base/modules/serial/psb_c_mat_mod.F90 @@ -205,6 +205,7 @@ module psb_c_mat_mod procedure, pass(a) :: cscnv_base => psb_c_cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base procedure, pass(a) :: split_nd => psb_c_split_nd + procedure, pass(a) :: merge_nd => psb_c_merge_nd procedure, pass(a) :: clone => psb_cspmat_clone procedure, pass(a) :: move_alloc => psb_cspmat_type_move ! @@ -849,11 +850,17 @@ module psb_c_mat_mod class(psb_cspmat_type), intent(inout) :: a integer(psb_ipk_), intent(in) :: n_rows, n_cols integer(psb_ipk_), intent(out) :: info -!!$ integer(psb_ipk_),optional, intent(in) :: dupl -!!$ character(len=*), optional, intent(in) :: type -!!$ class(psb_c_base_sparse_mat), intent(in), optional :: mold end subroutine psb_c_split_nd end interface + + interface + subroutine psb_c_merge_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat + class(psb_cspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_merge_nd + end interface ! ! CSCNV: switches to a different internal derived type. diff --git a/base/modules/serial/psb_d_mat_mod.F90 b/base/modules/serial/psb_d_mat_mod.F90 index 82d2e822..fe09d83a 100644 --- a/base/modules/serial/psb_d_mat_mod.F90 +++ b/base/modules/serial/psb_d_mat_mod.F90 @@ -205,6 +205,7 @@ module psb_d_mat_mod procedure, pass(a) :: cscnv_base => psb_d_cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base procedure, pass(a) :: split_nd => psb_d_split_nd + procedure, pass(a) :: merge_nd => psb_d_merge_nd procedure, pass(a) :: clone => psb_dspmat_clone procedure, pass(a) :: move_alloc => psb_dspmat_type_move ! @@ -849,11 +850,17 @@ module psb_d_mat_mod class(psb_dspmat_type), intent(inout) :: a integer(psb_ipk_), intent(in) :: n_rows, n_cols integer(psb_ipk_), intent(out) :: info -!!$ integer(psb_ipk_),optional, intent(in) :: dupl -!!$ character(len=*), optional, intent(in) :: type -!!$ class(psb_d_base_sparse_mat), intent(in), optional :: mold end subroutine psb_d_split_nd end interface + + interface + subroutine psb_d_merge_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat + class(psb_dspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_merge_nd + end interface ! ! CSCNV: switches to a different internal derived type. diff --git a/base/modules/serial/psb_s_mat_mod.F90 b/base/modules/serial/psb_s_mat_mod.F90 index d8a2e6ae..868583b2 100644 --- a/base/modules/serial/psb_s_mat_mod.F90 +++ b/base/modules/serial/psb_s_mat_mod.F90 @@ -205,6 +205,7 @@ module psb_s_mat_mod procedure, pass(a) :: cscnv_base => psb_s_cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base procedure, pass(a) :: split_nd => psb_s_split_nd + procedure, pass(a) :: merge_nd => psb_s_merge_nd procedure, pass(a) :: clone => psb_sspmat_clone procedure, pass(a) :: move_alloc => psb_sspmat_type_move ! @@ -849,11 +850,17 @@ module psb_s_mat_mod class(psb_sspmat_type), intent(inout) :: a integer(psb_ipk_), intent(in) :: n_rows, n_cols integer(psb_ipk_), intent(out) :: info -!!$ integer(psb_ipk_),optional, intent(in) :: dupl -!!$ character(len=*), optional, intent(in) :: type -!!$ class(psb_s_base_sparse_mat), intent(in), optional :: mold end subroutine psb_s_split_nd end interface + + interface + subroutine psb_s_merge_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat + class(psb_sspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_merge_nd + end interface ! ! CSCNV: switches to a different internal derived type. diff --git a/base/modules/serial/psb_z_mat_mod.F90 b/base/modules/serial/psb_z_mat_mod.F90 index 694d4efc..48b670de 100644 --- a/base/modules/serial/psb_z_mat_mod.F90 +++ b/base/modules/serial/psb_z_mat_mod.F90 @@ -205,6 +205,7 @@ module psb_z_mat_mod procedure, pass(a) :: cscnv_base => psb_z_cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base procedure, pass(a) :: split_nd => psb_z_split_nd + procedure, pass(a) :: merge_nd => psb_z_merge_nd procedure, pass(a) :: clone => psb_zspmat_clone procedure, pass(a) :: move_alloc => psb_zspmat_type_move ! @@ -849,11 +850,17 @@ module psb_z_mat_mod class(psb_zspmat_type), intent(inout) :: a integer(psb_ipk_), intent(in) :: n_rows, n_cols integer(psb_ipk_), intent(out) :: info -!!$ integer(psb_ipk_),optional, intent(in) :: dupl -!!$ character(len=*), optional, intent(in) :: type -!!$ class(psb_z_base_sparse_mat), intent(in), optional :: mold end subroutine psb_z_split_nd end interface + + interface + subroutine psb_z_merge_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat + class(psb_zspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_merge_nd + end interface ! ! CSCNV: switches to a different internal derived type. diff --git a/base/serial/impl/psb_c_mat_impl.F90 b/base/serial/impl/psb_c_mat_impl.F90 index bbac0406..532cb9b8 100644 --- a/base/serial/impl/psb_c_mat_impl.F90 +++ b/base/serial/impl/psb_c_mat_impl.F90 @@ -1263,6 +1263,56 @@ subroutine psb_c_split_nd(a,n_rows,n_cols,info) end subroutine psb_c_split_nd +subroutine psb_c_merge_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_c_mat_mod, psb_protect_name => psb_c_merge_nd + implicit none + class(psb_cspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_c_base_sparse_mat), intent(in), optional :: mold + type(psb_c_coo_sparse_mat) :: acoo1,acoo2 + integer(psb_ipk_) :: nz + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + + call a%ad%mv_to_coo(acoo1,info) + call acoo1%set_bld() + call acoo1%set_nrows(n_rows) + call acoo1%set_ncols(n_cols) + call a%and%mv_to_coo(acoo2,info) + nz=acoo2%get_nzeros() + call acoo1%csput(nz,acoo2%ia,acoo2%ja,acoo2%val,ione,n_rows,ione,n_cols,info) + if (allocated(a%a)) then + call a%a%free() + deallocate(a%a) + end if + allocate(a%a,mold=a%ad) + call a%a%mv_from_coo(acoo1,info) + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_merge_nd + subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl) use psb_error_mod use psb_string_mod diff --git a/base/serial/impl/psb_d_mat_impl.F90 b/base/serial/impl/psb_d_mat_impl.F90 index 9af64b3f..e48654ce 100644 --- a/base/serial/impl/psb_d_mat_impl.F90 +++ b/base/serial/impl/psb_d_mat_impl.F90 @@ -1263,6 +1263,56 @@ subroutine psb_d_split_nd(a,n_rows,n_cols,info) end subroutine psb_d_split_nd +subroutine psb_d_merge_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_d_mat_mod, psb_protect_name => psb_d_merge_nd + implicit none + class(psb_dspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_d_base_sparse_mat), intent(in), optional :: mold + type(psb_d_coo_sparse_mat) :: acoo1,acoo2 + integer(psb_ipk_) :: nz + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + + call a%ad%mv_to_coo(acoo1,info) + call acoo1%set_bld() + call acoo1%set_nrows(n_rows) + call acoo1%set_ncols(n_cols) + call a%and%mv_to_coo(acoo2,info) + nz=acoo2%get_nzeros() + call acoo1%csput(nz,acoo2%ia,acoo2%ja,acoo2%val,ione,n_rows,ione,n_cols,info) + if (allocated(a%a)) then + call a%a%free() + deallocate(a%a) + end if + allocate(a%a,mold=a%ad) + call a%a%mv_from_coo(acoo1,info) + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_merge_nd + subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl) use psb_error_mod use psb_string_mod diff --git a/base/serial/impl/psb_s_mat_impl.F90 b/base/serial/impl/psb_s_mat_impl.F90 index c0370774..77ac81e6 100644 --- a/base/serial/impl/psb_s_mat_impl.F90 +++ b/base/serial/impl/psb_s_mat_impl.F90 @@ -1263,6 +1263,56 @@ subroutine psb_s_split_nd(a,n_rows,n_cols,info) end subroutine psb_s_split_nd +subroutine psb_s_merge_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_s_mat_mod, psb_protect_name => psb_s_merge_nd + implicit none + class(psb_sspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_s_base_sparse_mat), intent(in), optional :: mold + type(psb_s_coo_sparse_mat) :: acoo1,acoo2 + integer(psb_ipk_) :: nz + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + + call a%ad%mv_to_coo(acoo1,info) + call acoo1%set_bld() + call acoo1%set_nrows(n_rows) + call acoo1%set_ncols(n_cols) + call a%and%mv_to_coo(acoo2,info) + nz=acoo2%get_nzeros() + call acoo1%csput(nz,acoo2%ia,acoo2%ja,acoo2%val,ione,n_rows,ione,n_cols,info) + if (allocated(a%a)) then + call a%a%free() + deallocate(a%a) + end if + allocate(a%a,mold=a%ad) + call a%a%mv_from_coo(acoo1,info) + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_merge_nd + subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl) use psb_error_mod use psb_string_mod diff --git a/base/serial/impl/psb_z_mat_impl.F90 b/base/serial/impl/psb_z_mat_impl.F90 index 20815cb0..18d6c03d 100644 --- a/base/serial/impl/psb_z_mat_impl.F90 +++ b/base/serial/impl/psb_z_mat_impl.F90 @@ -1263,6 +1263,56 @@ subroutine psb_z_split_nd(a,n_rows,n_cols,info) end subroutine psb_z_split_nd +subroutine psb_z_merge_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_z_mat_mod, psb_protect_name => psb_z_merge_nd + implicit none + class(psb_zspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_z_base_sparse_mat), intent(in), optional :: mold + type(psb_z_coo_sparse_mat) :: acoo1,acoo2 + integer(psb_ipk_) :: nz + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + + call a%ad%mv_to_coo(acoo1,info) + call acoo1%set_bld() + call acoo1%set_nrows(n_rows) + call acoo1%set_ncols(n_cols) + call a%and%mv_to_coo(acoo2,info) + nz=acoo2%get_nzeros() + call acoo1%csput(nz,acoo2%ia,acoo2%ja,acoo2%val,ione,n_rows,ione,n_cols,info) + if (allocated(a%a)) then + call a%a%free() + deallocate(a%a) + end if + allocate(a%a,mold=a%ad) + call a%a%mv_from_coo(acoo1,info) + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_merge_nd + subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl) use psb_error_mod use psb_string_mod From 025350a361dbfc8775dd1fa4028d79aa3567ebee Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 23 Apr 2024 14:06:27 +0200 Subject: [PATCH 080/116] Make sure realloc is always called with size >0 --- base/serial/impl/psb_c_csc_impl.F90 | 8 ++++---- base/serial/impl/psb_c_csr_impl.F90 | 8 ++++---- base/serial/impl/psb_d_csc_impl.F90 | 8 ++++---- base/serial/impl/psb_d_csr_impl.F90 | 8 ++++---- base/serial/impl/psb_s_csc_impl.F90 | 8 ++++---- base/serial/impl/psb_s_csr_impl.F90 | 8 ++++---- base/serial/impl/psb_z_csc_impl.F90 | 8 ++++---- base/serial/impl/psb_z_csr_impl.F90 | 8 ++++---- 8 files changed, 32 insertions(+), 32 deletions(-) diff --git a/base/serial/impl/psb_c_csc_impl.F90 b/base/serial/impl/psb_c_csc_impl.F90 index 54332d06..fe7227dd 100644 --- a/base/serial/impl/psb_c_csc_impl.F90 +++ b/base/serial/impl/psb_c_csc_impl.F90 @@ -2163,7 +2163,7 @@ subroutine psb_c_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -2328,7 +2328,7 @@ subroutine psb_c_cp_csc_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat nc = a%get_ncols() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) @@ -2461,7 +2461,7 @@ subroutine psb_c_cp_csc_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat nc = b%get_ncols() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) @@ -4058,7 +4058,7 @@ subroutine psb_lc_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_lc_base_sparse_mat = a%psb_lc_base_sparse_mat call b%set_nzeros(a%get_nzeros()) diff --git a/base/serial/impl/psb_c_csr_impl.F90 b/base/serial/impl/psb_c_csr_impl.F90 index 6c21f639..9354098b 100644 --- a/base/serial/impl/psb_c_csr_impl.F90 +++ b/base/serial/impl/psb_c_csr_impl.F90 @@ -3318,7 +3318,7 @@ subroutine psb_c_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -3489,7 +3489,7 @@ subroutine psb_c_cp_csr_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat nr = a%get_nrows() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info) if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info) @@ -3594,7 +3594,7 @@ subroutine psb_c_cp_csr_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat nr = b%get_nrows() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info) if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info) @@ -6281,7 +6281,7 @@ subroutine psb_lc_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_lc_base_sparse_mat = a%psb_lc_base_sparse_mat call b%set_nzeros(a%get_nzeros()) diff --git a/base/serial/impl/psb_d_csc_impl.F90 b/base/serial/impl/psb_d_csc_impl.F90 index 1761b051..d9fa2874 100644 --- a/base/serial/impl/psb_d_csc_impl.F90 +++ b/base/serial/impl/psb_d_csc_impl.F90 @@ -2163,7 +2163,7 @@ subroutine psb_d_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -2328,7 +2328,7 @@ subroutine psb_d_cp_csc_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat nc = a%get_ncols() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) @@ -2461,7 +2461,7 @@ subroutine psb_d_cp_csc_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat nc = b%get_ncols() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) @@ -4058,7 +4058,7 @@ subroutine psb_ld_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_ld_base_sparse_mat = a%psb_ld_base_sparse_mat call b%set_nzeros(a%get_nzeros()) diff --git a/base/serial/impl/psb_d_csr_impl.F90 b/base/serial/impl/psb_d_csr_impl.F90 index 9f1d509c..bc0efcc9 100644 --- a/base/serial/impl/psb_d_csr_impl.F90 +++ b/base/serial/impl/psb_d_csr_impl.F90 @@ -3318,7 +3318,7 @@ subroutine psb_d_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -3489,7 +3489,7 @@ subroutine psb_d_cp_csr_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat nr = a%get_nrows() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info) if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info) @@ -3594,7 +3594,7 @@ subroutine psb_d_cp_csr_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat nr = b%get_nrows() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info) if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info) @@ -6281,7 +6281,7 @@ subroutine psb_ld_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_ld_base_sparse_mat = a%psb_ld_base_sparse_mat call b%set_nzeros(a%get_nzeros()) diff --git a/base/serial/impl/psb_s_csc_impl.F90 b/base/serial/impl/psb_s_csc_impl.F90 index a66b7dc0..c135c9ef 100644 --- a/base/serial/impl/psb_s_csc_impl.F90 +++ b/base/serial/impl/psb_s_csc_impl.F90 @@ -2163,7 +2163,7 @@ subroutine psb_s_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -2328,7 +2328,7 @@ subroutine psb_s_cp_csc_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat nc = a%get_ncols() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) @@ -2461,7 +2461,7 @@ subroutine psb_s_cp_csc_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat nc = b%get_ncols() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) @@ -4058,7 +4058,7 @@ subroutine psb_ls_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_ls_base_sparse_mat = a%psb_ls_base_sparse_mat call b%set_nzeros(a%get_nzeros()) diff --git a/base/serial/impl/psb_s_csr_impl.F90 b/base/serial/impl/psb_s_csr_impl.F90 index a4e1ab82..ba70e021 100644 --- a/base/serial/impl/psb_s_csr_impl.F90 +++ b/base/serial/impl/psb_s_csr_impl.F90 @@ -3318,7 +3318,7 @@ subroutine psb_s_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -3489,7 +3489,7 @@ subroutine psb_s_cp_csr_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat nr = a%get_nrows() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info) if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info) @@ -3594,7 +3594,7 @@ subroutine psb_s_cp_csr_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat nr = b%get_nrows() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info) if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info) @@ -6281,7 +6281,7 @@ subroutine psb_ls_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_ls_base_sparse_mat = a%psb_ls_base_sparse_mat call b%set_nzeros(a%get_nzeros()) diff --git a/base/serial/impl/psb_z_csc_impl.F90 b/base/serial/impl/psb_z_csc_impl.F90 index e5516bd9..becb7003 100644 --- a/base/serial/impl/psb_z_csc_impl.F90 +++ b/base/serial/impl/psb_z_csc_impl.F90 @@ -2163,7 +2163,7 @@ subroutine psb_z_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -2328,7 +2328,7 @@ subroutine psb_z_cp_csc_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat nc = a%get_ncols() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) @@ -2461,7 +2461,7 @@ subroutine psb_z_cp_csc_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat nc = b%get_ncols() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) @@ -4058,7 +4058,7 @@ subroutine psb_lz_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_lz_base_sparse_mat = a%psb_lz_base_sparse_mat call b%set_nzeros(a%get_nzeros()) diff --git a/base/serial/impl/psb_z_csr_impl.F90 b/base/serial/impl/psb_z_csr_impl.F90 index 28ac121e..23a4fb84 100644 --- a/base/serial/impl/psb_z_csr_impl.F90 +++ b/base/serial/impl/psb_z_csr_impl.F90 @@ -3318,7 +3318,7 @@ subroutine psb_z_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -3489,7 +3489,7 @@ subroutine psb_z_cp_csr_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat nr = a%get_nrows() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info) if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info) @@ -3594,7 +3594,7 @@ subroutine psb_z_cp_csr_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat nr = b%get_nrows() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info) if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info) @@ -6281,7 +6281,7 @@ subroutine psb_lz_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_lz_base_sparse_mat = a%psb_lz_base_sparse_mat call b%set_nzeros(a%get_nzeros()) From 5e2e1e34fde0e9f82105820878a58b6ce890aa62 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Thu, 25 Apr 2024 18:59:24 +0200 Subject: [PATCH 081/116] Introduce set_host() in inner_vect_sv --- base/serial/impl/psb_c_base_mat_impl.F90 | 5 +++-- base/serial/impl/psb_d_base_mat_impl.F90 | 5 +++-- base/serial/impl/psb_s_base_mat_impl.F90 | 5 +++-- base/serial/impl/psb_z_base_mat_impl.F90 | 5 +++-- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/base/serial/impl/psb_c_base_mat_impl.F90 b/base/serial/impl/psb_c_base_mat_impl.F90 index 17f2cdc8..2a2ed693 100644 --- a/base/serial/impl/psb_c_base_mat_impl.F90 +++ b/base/serial/impl/psb_c_base_mat_impl.F90 @@ -2060,8 +2060,6 @@ subroutine psb_c_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) goto 9999 end if - call x%sync() - call y%sync() if (present(d)) then call d%sync() if (present(scale)) then @@ -2161,8 +2159,11 @@ subroutine psb_c_base_inner_vect_sv(alpha,a,x,beta,y,info,trans) info = psb_success_ call psb_erractionsave(err_act) + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%inner_spsm(alpha,x%v,beta,y%v,info,trans) + call y%set_host() if (info /= psb_success_) then info = psb_err_from_subroutine_ diff --git a/base/serial/impl/psb_d_base_mat_impl.F90 b/base/serial/impl/psb_d_base_mat_impl.F90 index 69112529..73c2c920 100644 --- a/base/serial/impl/psb_d_base_mat_impl.F90 +++ b/base/serial/impl/psb_d_base_mat_impl.F90 @@ -2060,8 +2060,6 @@ subroutine psb_d_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) goto 9999 end if - call x%sync() - call y%sync() if (present(d)) then call d%sync() if (present(scale)) then @@ -2161,8 +2159,11 @@ subroutine psb_d_base_inner_vect_sv(alpha,a,x,beta,y,info,trans) info = psb_success_ call psb_erractionsave(err_act) + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%inner_spsm(alpha,x%v,beta,y%v,info,trans) + call y%set_host() if (info /= psb_success_) then info = psb_err_from_subroutine_ diff --git a/base/serial/impl/psb_s_base_mat_impl.F90 b/base/serial/impl/psb_s_base_mat_impl.F90 index 4a99a684..1c52d982 100644 --- a/base/serial/impl/psb_s_base_mat_impl.F90 +++ b/base/serial/impl/psb_s_base_mat_impl.F90 @@ -2060,8 +2060,6 @@ subroutine psb_s_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) goto 9999 end if - call x%sync() - call y%sync() if (present(d)) then call d%sync() if (present(scale)) then @@ -2161,8 +2159,11 @@ subroutine psb_s_base_inner_vect_sv(alpha,a,x,beta,y,info,trans) info = psb_success_ call psb_erractionsave(err_act) + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%inner_spsm(alpha,x%v,beta,y%v,info,trans) + call y%set_host() if (info /= psb_success_) then info = psb_err_from_subroutine_ diff --git a/base/serial/impl/psb_z_base_mat_impl.F90 b/base/serial/impl/psb_z_base_mat_impl.F90 index 404027c5..ef7b2dd6 100644 --- a/base/serial/impl/psb_z_base_mat_impl.F90 +++ b/base/serial/impl/psb_z_base_mat_impl.F90 @@ -2060,8 +2060,6 @@ subroutine psb_z_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) goto 9999 end if - call x%sync() - call y%sync() if (present(d)) then call d%sync() if (present(scale)) then @@ -2161,8 +2159,11 @@ subroutine psb_z_base_inner_vect_sv(alpha,a,x,beta,y,info,trans) info = psb_success_ call psb_erractionsave(err_act) + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%inner_spsm(alpha,x%v,beta,y%v,info,trans) + call y%set_host() if (info /= psb_success_) then info = psb_err_from_subroutine_ From d444a12879c02f7ef2505c67e49b7d1ff0ce6af6 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Fri, 26 Apr 2024 11:35:44 +0200 Subject: [PATCH 082/116] Condition call to x%sync() in vect_mv --- base/serial/impl/psb_c_base_mat_impl.F90 | 4 ++-- base/serial/impl/psb_d_base_mat_impl.F90 | 4 ++-- base/serial/impl/psb_s_base_mat_impl.F90 | 4 ++-- base/serial/impl/psb_z_base_mat_impl.F90 | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/base/serial/impl/psb_c_base_mat_impl.F90 b/base/serial/impl/psb_c_base_mat_impl.F90 index 2a2ed693..f4bc43cc 100644 --- a/base/serial/impl/psb_c_base_mat_impl.F90 +++ b/base/serial/impl/psb_c_base_mat_impl.F90 @@ -2006,8 +2006,8 @@ subroutine psb_c_base_vect_mv(alpha,a,x,beta,y,info,trans) ! For the time being we just throw everything back ! onto the normal routines. - call x%sync() - call y%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%spmm(alpha,x%v,beta,y%v,info,trans) call y%set_host() end subroutine psb_c_base_vect_mv diff --git a/base/serial/impl/psb_d_base_mat_impl.F90 b/base/serial/impl/psb_d_base_mat_impl.F90 index 73c2c920..1a8dc084 100644 --- a/base/serial/impl/psb_d_base_mat_impl.F90 +++ b/base/serial/impl/psb_d_base_mat_impl.F90 @@ -2006,8 +2006,8 @@ subroutine psb_d_base_vect_mv(alpha,a,x,beta,y,info,trans) ! For the time being we just throw everything back ! onto the normal routines. - call x%sync() - call y%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%spmm(alpha,x%v,beta,y%v,info,trans) call y%set_host() end subroutine psb_d_base_vect_mv diff --git a/base/serial/impl/psb_s_base_mat_impl.F90 b/base/serial/impl/psb_s_base_mat_impl.F90 index 1c52d982..0b47d472 100644 --- a/base/serial/impl/psb_s_base_mat_impl.F90 +++ b/base/serial/impl/psb_s_base_mat_impl.F90 @@ -2006,8 +2006,8 @@ subroutine psb_s_base_vect_mv(alpha,a,x,beta,y,info,trans) ! For the time being we just throw everything back ! onto the normal routines. - call x%sync() - call y%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%spmm(alpha,x%v,beta,y%v,info,trans) call y%set_host() end subroutine psb_s_base_vect_mv diff --git a/base/serial/impl/psb_z_base_mat_impl.F90 b/base/serial/impl/psb_z_base_mat_impl.F90 index ef7b2dd6..2d97f698 100644 --- a/base/serial/impl/psb_z_base_mat_impl.F90 +++ b/base/serial/impl/psb_z_base_mat_impl.F90 @@ -2006,8 +2006,8 @@ subroutine psb_z_base_vect_mv(alpha,a,x,beta,y,info,trans) ! For the time being we just throw everything back ! onto the normal routines. - call x%sync() - call y%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%spmm(alpha,x%v,beta,y%v,info,trans) call y%set_host() end subroutine psb_z_base_vect_mv From e72c0f0bf96c3fc56c83620230caac5d1463ee7c Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 29 Apr 2024 15:04:23 +0200 Subject: [PATCH 083/116] Fix OMP impl of sparse-sparse product --- base/serial/impl/psb_c_csr_impl.F90 | 54 +++++++++++++++++------------ base/serial/impl/psb_d_csr_impl.F90 | 54 +++++++++++++++++------------ base/serial/impl/psb_s_csr_impl.F90 | 54 +++++++++++++++++------------ base/serial/impl/psb_z_csr_impl.F90 | 54 +++++++++++++++++------------ 4 files changed, 128 insertions(+), 88 deletions(-) diff --git a/base/serial/impl/psb_c_csr_impl.F90 b/base/serial/impl/psb_c_csr_impl.F90 index 9354098b..276d3d1c 100644 --- a/base/serial/impl/psb_c_csr_impl.F90 +++ b/base/serial/impl/psb_c_csr_impl.F90 @@ -3805,6 +3805,7 @@ contains integer(psb_ipk_) :: ma, nb integer(psb_ipk_), allocatable :: col_inds(:), offsets(:) integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx + integer(psb_ipk_) :: nth, lth,ith ma = a%get_nrows() nb = b%get_ncols() @@ -3815,12 +3816,19 @@ contains ! dense accumulator ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf call psb_realloc(nb, acc, info) + !$omp parallel shared(nth,lth) + !$omp single + nth = omp_get_num_threads() + lth = min(nth, ma) + !$omp end single + !$omp end parallel allocate(offsets(omp_get_max_threads())) !$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) & - !$omp shared(a,b,c,offsets) + !$omp num_threads(lth) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma if (start_idx == 0) then @@ -3876,15 +3884,14 @@ contains !$omp end single !$omp barrier - - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -3892,9 +3899,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson @@ -3930,6 +3938,7 @@ contains !$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma do jj = a%irp(irw), a%irp(irw + 1) - 1 @@ -3996,14 +4005,14 @@ contains !$omp barrier - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -4011,9 +4020,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson_1d diff --git a/base/serial/impl/psb_d_csr_impl.F90 b/base/serial/impl/psb_d_csr_impl.F90 index bc0efcc9..a0aaeeee 100644 --- a/base/serial/impl/psb_d_csr_impl.F90 +++ b/base/serial/impl/psb_d_csr_impl.F90 @@ -3805,6 +3805,7 @@ contains integer(psb_ipk_) :: ma, nb integer(psb_ipk_), allocatable :: col_inds(:), offsets(:) integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx + integer(psb_ipk_) :: nth, lth,ith ma = a%get_nrows() nb = b%get_ncols() @@ -3815,12 +3816,19 @@ contains ! dense accumulator ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf call psb_realloc(nb, acc, info) + !$omp parallel shared(nth,lth) + !$omp single + nth = omp_get_num_threads() + lth = min(nth, ma) + !$omp end single + !$omp end parallel allocate(offsets(omp_get_max_threads())) !$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) & - !$omp shared(a,b,c,offsets) + !$omp num_threads(lth) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma if (start_idx == 0) then @@ -3876,15 +3884,14 @@ contains !$omp end single !$omp barrier - - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -3892,9 +3899,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson @@ -3930,6 +3938,7 @@ contains !$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma do jj = a%irp(irw), a%irp(irw + 1) - 1 @@ -3996,14 +4005,14 @@ contains !$omp barrier - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -4011,9 +4020,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson_1d diff --git a/base/serial/impl/psb_s_csr_impl.F90 b/base/serial/impl/psb_s_csr_impl.F90 index ba70e021..9d5dc145 100644 --- a/base/serial/impl/psb_s_csr_impl.F90 +++ b/base/serial/impl/psb_s_csr_impl.F90 @@ -3805,6 +3805,7 @@ contains integer(psb_ipk_) :: ma, nb integer(psb_ipk_), allocatable :: col_inds(:), offsets(:) integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx + integer(psb_ipk_) :: nth, lth,ith ma = a%get_nrows() nb = b%get_ncols() @@ -3815,12 +3816,19 @@ contains ! dense accumulator ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf call psb_realloc(nb, acc, info) + !$omp parallel shared(nth,lth) + !$omp single + nth = omp_get_num_threads() + lth = min(nth, ma) + !$omp end single + !$omp end parallel allocate(offsets(omp_get_max_threads())) !$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) & - !$omp shared(a,b,c,offsets) + !$omp num_threads(lth) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma if (start_idx == 0) then @@ -3876,15 +3884,14 @@ contains !$omp end single !$omp barrier - - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -3892,9 +3899,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson @@ -3930,6 +3938,7 @@ contains !$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma do jj = a%irp(irw), a%irp(irw + 1) - 1 @@ -3996,14 +4005,14 @@ contains !$omp barrier - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -4011,9 +4020,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson_1d diff --git a/base/serial/impl/psb_z_csr_impl.F90 b/base/serial/impl/psb_z_csr_impl.F90 index 23a4fb84..7f11c3bd 100644 --- a/base/serial/impl/psb_z_csr_impl.F90 +++ b/base/serial/impl/psb_z_csr_impl.F90 @@ -3805,6 +3805,7 @@ contains integer(psb_ipk_) :: ma, nb integer(psb_ipk_), allocatable :: col_inds(:), offsets(:) integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx + integer(psb_ipk_) :: nth, lth,ith ma = a%get_nrows() nb = b%get_ncols() @@ -3815,12 +3816,19 @@ contains ! dense accumulator ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf call psb_realloc(nb, acc, info) + !$omp parallel shared(nth,lth) + !$omp single + nth = omp_get_num_threads() + lth = min(nth, ma) + !$omp end single + !$omp end parallel allocate(offsets(omp_get_max_threads())) !$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) & - !$omp shared(a,b,c,offsets) + !$omp num_threads(lth) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma if (start_idx == 0) then @@ -3876,15 +3884,14 @@ contains !$omp end single !$omp barrier - - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -3892,9 +3899,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson @@ -3930,6 +3938,7 @@ contains !$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma do jj = a%irp(irw), a%irp(irw + 1) - 1 @@ -3996,14 +4005,14 @@ contains !$omp barrier - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -4011,9 +4020,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson_1d From b99aa7a90f8b369019e985d3090d8ab2d13d63f0 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 29 Apr 2024 15:07:49 +0200 Subject: [PATCH 084/116] Switch off OMP in HASH g2l_ins --- base/modules/desc/psb_hash_map_mod.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/modules/desc/psb_hash_map_mod.F90 b/base/modules/desc/psb_hash_map_mod.F90 index 058dbb8d..752e0baf 100644 --- a/base/modules/desc/psb_hash_map_mod.F90 +++ b/base/modules/desc/psb_hash_map_mod.F90 @@ -683,7 +683,7 @@ contains mglob = idxmap%get_gr() nrow = idxmap%get_lr() !write(0,*) me,name,' before loop ',psb_errstatus_fatal() -#ifdef OPENMP +#if 0 && defined(OPENMP) !call OMP_init_lock(ins_lck) if (idxmap%is_bld()) then From 188dee6842d79fa70ec3a435ef03120edc5d3217 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 7 May 2024 04:58:31 -0400 Subject: [PATCH 085/116] Add indx_map%inc_lc() method --- base/modules/desc/psb_indx_map_mod.f90 | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/base/modules/desc/psb_indx_map_mod.f90 b/base/modules/desc/psb_indx_map_mod.f90 index 7753db23..422be5f3 100644 --- a/base/modules/desc/psb_indx_map_mod.f90 +++ b/base/modules/desc/psb_indx_map_mod.f90 @@ -158,6 +158,7 @@ module psb_indx_map_mod procedure, pass(idxmap) :: set_lr => base_set_lr procedure, pass(idxmap) :: set_lc => base_set_lc + procedure, pass(idxmap) :: inc_lc => base_inc_lc procedure, pass(idxmap) :: set_p_adjcncy => base_set_p_adjcncy procedure, pass(idxmap) :: xtnd_p_adjcncy => base_xtnd_p_adjcncy @@ -235,7 +236,7 @@ module psb_indx_map_mod & base_get_gr, base_get_gc, base_get_lr, base_get_lc, base_get_ctxt,& & base_get_mpic, base_sizeof, base_set_null, & & base_set_grl, base_set_gcl, & - & base_set_lr, base_set_lc, base_set_ctxt,& + & base_set_lr, base_set_lc, base_inc_lc, base_set_ctxt,& & base_set_mpic, base_get_fmt, base_asb, base_free,& & base_l2gs1, base_l2gs2, base_l2gv1, base_l2gv2,& & base_g2ls1, base_g2ls2, base_g2lv1, base_g2lv2,& @@ -573,6 +574,14 @@ contains idxmap%local_cols = val end subroutine base_set_lc + subroutine base_inc_lc(idxmap) + implicit none + class(psb_indx_map), intent(inout) :: idxmap + !$omp atomic + idxmap%local_cols = idxmap%local_cols + 1 + !$omp end atomic + end subroutine base_inc_lc + subroutine base_set_p_adjcncy(idxmap,val) use psb_realloc_mod use psb_sort_mod From fa86c914111ef02c0b10fa41d850bba32e8d2b52 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 7 May 2024 05:04:48 -0400 Subject: [PATCH 086/116] Fix OpenMP version of hash_map and hash --- base/modules/desc/psb_hash_map_mod.F90 | 504 ++++++++++--------------- base/modules/desc/psb_hash_mod.F90 | 103 ++--- 2 files changed, 261 insertions(+), 346 deletions(-) diff --git a/base/modules/desc/psb_hash_map_mod.F90 b/base/modules/desc/psb_hash_map_mod.F90 index 058dbb8d..2ccb9b28 100644 --- a/base/modules/desc/psb_hash_map_mod.F90 +++ b/base/modules/desc/psb_hash_map_mod.F90 @@ -363,6 +363,9 @@ contains else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,is,idx,mglob,idxmap,nrm,ncol,nrow,owned_) & + !$omp private(i,ip,lip,tlip,info) do i = 1, is if (mask(i)) then ip = idx(i) @@ -388,7 +391,7 @@ contains endif end if enddo - + !$omp end parallel do else write(0,*) 'Hash status: invalid ',idxmap%get_state() idx(1:is) = -1 @@ -404,6 +407,9 @@ contains else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(is,idx,mglob,idxmap,nrm,ncol,nrow,owned_) & + !$omp private(i,ip,lip,tlip,info) do i = 1, is ip = idx(i) if ((ip < 1 ).or.(ip>mglob)) then @@ -427,14 +433,12 @@ contains idx(i) = lip endif enddo - + !$omp end parallel do else write(0,*) 'Hash status: invalid ',idxmap%get_state() idx(1:is) = -1 info = -1 - end if - end if end subroutine hash_g2lv1 @@ -493,6 +497,9 @@ contains else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,is,idxin,idxout,mglob,idxmap,nrm,ncol,nrow,owned_) & + !$omp private(i,ip,lip,tlip,info) do i = 1, is if (mask(i)) then ip = idxin(i) @@ -518,6 +525,7 @@ contains endif end if enddo + !$omp end parallel do else write(0,*) 'Hash status: invalid ',idxmap%get_state() idxout(1:is) = -1 @@ -533,6 +541,9 @@ contains else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(is,idxin,idxout,mglob,idxmap,nrm,ncol,nrow,owned_) & + !$omp private(i,ip,lip,tlip,info) do i = 1, is ip = idxin(i) if ((ip < 1 ).or.(ip>mglob)) then @@ -556,14 +567,12 @@ contains idxout(i) = lip endif enddo - + !$omp end parallel do else write(0,*) 'Hash status: invalid ',idxmap%get_state() idxout(1:is) = -1 info = -1 - end if - end if end subroutine hash_g2lv2 @@ -649,7 +658,7 @@ contains & err_act integer(psb_lpk_) :: mglob, ip, nxt, tlip type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: me, np + integer(psb_ipk_) :: me, np,ith character(len=20) :: name,ch_err logical, allocatable :: mask_(:) !!$ logical :: use_openmp = .true. @@ -683,363 +692,243 @@ contains mglob = idxmap%get_gr() nrow = idxmap%get_lr() !write(0,*) me,name,' before loop ',psb_errstatus_fatal() -#ifdef OPENMP - !call OMP_init_lock(ins_lck) - - if (idxmap%is_bld()) then - - isLoopValid = .true. - ncol = idxmap%get_lc() - if (present(mask)) then - mask_ = mask - else - allocate(mask_(size(idx))) - mask_ = .true. - end if +#if defined(OPENMP) + isLoopValid = .true. + if (idxmap%is_bld()) then if (present(lidx)) then - if (present(mask)) then - !$omp critical(hash_g2l_ins) - - ! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) & - ! $ OMP shared(name,me,is,idx,ins_lck,mask,mglob,idxmap,ncol,nrow,laddsz,lidx) & - ! $ OMP private(i,ip,lip,tlip,nxt,info) & - ! $ OMP reduction(.AND.:isLoopValid) + if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(lidx,mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & + !$omp private(i,ip,lip,tlip,nxt,info) & + !$omp reduction(.AND.:isLoopValid) do i = 1, is - info = 0 - if (.not. isLoopValid) cycle - if (mask(i)) then - ip = idx(i) - if ((ip < 1 ).or.(ip>mglob)) then + if (mask(i)) then + ip = idx(i) + if ((ip < 1 ).or.(ip>mglob) ) then idx(i) = -1 cycle endif - !call OMP_set_lock(ins_lck) ncol = idxmap%get_lc() - !call OMP_unset_lock(ins_lck) - - ! At first, we check the index presence in 'idxmap'. Usually - ! the index is found. If it is not found, we repeat the checking, - ! but inside a critical region. call hash_inner_cnv(ip,lip,idxmap%hashvmask,& & idxmap%hashv,idxmap%glb_lc,ncol) - if (lip < 0) then - !call OMP_set_lock(ins_lck) - - ! We check again if the index is already in 'idxmap', this - ! time inside a critical region (we assume that the index - ! is often already existing). - ncol = idxmap%get_lc() - nxt = lidx(i) - call hash_inner_cnv(ip,lip,idxmap%hashvmask,& - & idxmap%hashv,idxmap%glb_lc,ncol) - - if (lip > 0) then - idx(i) = lip - else if (lip < 0) then - ! Index not found - call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) - lip = tlip - - - if (info >= 0) then - ! 'nxt' is not equal to 'tlip' when the key is already inside - ! the hash map. In that case 'tlip' is the value corresponding - ! to the existing mapping. - if (nxt == tlip) then - - ncol = MAX(ncol,nxt) - - call psb_ensure_size(ncol,idxmap%loc_to_glob,info,& - & pad=-1_psb_lpk_,addsz=laddsz) - - if (info /= psb_success_) then - !write(0,*) 'Error spot 1' - call psb_errpush(psb_err_from_subroutine_ai_,name,& - &a_err='psb_ensure_size',i_err=(/info/)) - - isLoopValid = .false. - idx(i) = -1 - else - idx(i) = lip - idxmap%loc_to_glob(nxt) = ip - call idxmap%set_lc(ncol) - end if - end if + if (lip > 0) then + idx(i) = lip + info = psb_success_ + else + !$omp critical(hash_g2l_ins) + tlip = lip + nxt = lidx(i) + if (nxt <= nrow) then + idx(i) = -1 + else + call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,& + & idxmap%glb_lc,ncol) + if (lip > 0) then + idx(i) = lip else - idx(i) = -1 + call psb_hash_searchinskey(ip,lip,nxt,idxmap%hash,info) + if (info >=0) then + if (nxt == lip) then + call psb_ensure_size(nxt,idxmap%loc_to_glob,info,& + & pad=-1_psb_lpk_,addsz=laddsz) + if (info /= psb_success_) then + info=1 + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='psb_ensure_size',i_err=(/info/)) + isLoopValid = .false. + end if + idxmap%loc_to_glob(nxt) = ip + call idxmap%set_lc(max(ncol,nxt)) + endif + idx(i) = lip + info = psb_success_ + else + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='SearchInsKeyVal',i_err=(/info/)) + isLoopValid = .false. + end if end if - !call OMP_unset_lock(ins_lck) - end if - else - idx(i) = lip + endif + !$omp end critical(hash_g2l_ins) end if else idx(i) = -1 end if + enddo + !$omp end parallel do - end do - ! $ OMP END PARALLEL DO - !$omp end critical(hash_g2l_ins) - - if (.not. isLoopValid) then - goto 9999 - end if - else - !$omp critical(hash_g2l_ins) + else if (.not.present(mask)) then - ! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) & - ! $ OMP shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz,lidx) & - ! $ OMP private(i,ip,lip,tlip,nxt,info) & - ! $ OMP reduction(.AND.:isLoopValid) + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(lidx,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & + !$omp private(i,ip,lip,tlip,nxt,info) & + !$omp reduction(.AND.:isLoopValid) do i = 1, is - info = 0 - if (.not. isLoopValid) cycle - ip = idx(i) - if ((ip < 1 ).or.(ip>mglob)) then + ip = idx(i) + if ((ip < 1 ).or.(ip>mglob) ) then idx(i) = -1 cycle endif - !call OMP_set_lock(ins_lck) ncol = idxmap%get_lc() - !call OMP_unset_lock(ins_lck) - - ! At first, we check the index presence in 'idxmap'. Usually - ! the index is found. If it is not found, we repeat the checking, - ! but inside a critical region. call hash_inner_cnv(ip,lip,idxmap%hashvmask,& & idxmap%hashv,idxmap%glb_lc,ncol) - if (lip < 0) then - !call OMP_set_lock(ins_lck) - ! We check again if the index is already in 'idxmap', this - ! time inside a critical region (we assume that the index - ! is often already existing). - ncol = idxmap%get_lc() - nxt = lidx(i) - call hash_inner_cnv(ip,lip,idxmap%hashvmask,& - & idxmap%hashv,idxmap%glb_lc,ncol) - - if (lip > 0) then - idx(i) = lip - else if (lip < 0) then - call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) - lip = tlip - - if (info >= 0) then - ! 'nxt' is not equal to 'tlip' when the key is already inside - ! the hash map. In that case 'tlip' is the value corresponding - ! to the existing mapping. - if (nxt == tlip) then - - ncol = MAX(ncol,nxt) - - call psb_ensure_size(ncol,idxmap%loc_to_glob,info,& - & pad=-1_psb_lpk_,addsz=laddsz) - - if (info /= psb_success_) then - !write(0,*) 'Error spot 2' - call psb_errpush(psb_err_from_subroutine_ai_,name,& - &a_err='psb_ensure_size',i_err=(/info/)) - - isLoopValid = .false. - idx(i) = -1 - else - idx(i) = lip - idxmap%loc_to_glob(nxt) = ip - call idxmap%set_lc(ncol) - end if - end if + if (lip > 0) then + idx(i) = lip + info = psb_success_ + else + !$omp critical(hash_g2l_ins) + tlip = lip + nxt = lidx(i) + if (nxt <= nrow) then + idx(i) = -1 + else + call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,& + & idxmap%glb_lc,ncol) + if (lip > 0) then + idx(i) = lip else - idx(i) = -1 + call psb_hash_searchinskey(ip,lip,nxt,idxmap%hash,info) + if (info >=0) then + if (nxt == lip) then + call psb_ensure_size(nxt,idxmap%loc_to_glob,info,& + & pad=-1_psb_lpk_,addsz=laddsz) + if (info /= psb_success_) then + info=1 + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='psb_ensure_size',i_err=(/info/)) + isLoopValid = .false. + end if + idxmap%loc_to_glob(nxt) = ip + call idxmap%set_lc(max(ncol,nxt)) + endif + idx(i) = lip + info = psb_success_ + else + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='SearchInsKeyVal',i_err=(/info/)) + isLoopValid = .false. + end if end if - !call OMP_unset_lock(ins_lck) - end if - else - idx(i) = lip + endif + !$omp end critical(hash_g2l_ins) end if - - end do - ! $ OMP END PARALLEL DO - !$omp end critical(hash_g2l_ins) - - if (.not. isLoopValid) then - goto 9999 - end if + enddo + !$omp end parallel do end if + else if (.not.present(lidx)) then - if(present(mask)) then - ! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) & - ! $ OMP shared(name,me,is,idx,ins_lck,mask,mglob,idxmap,ncol,nrow,laddsz) & - ! $ OMP private(i,ip,lip,tlip,nxt,info) & - ! $ OMP reduction(.AND.:isLoopValid) - !$omp critical(hash_g2l_ins) + if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & + !$omp private(i,ip,lip,tlip,nxt,info) & + !$omp reduction(.AND.:isLoopValid) do i = 1, is - info = 0 - if (.not. isLoopValid) cycle if (mask(i)) then - ip = idx(i) - if ((ip < 1 ).or.(ip>mglob)) then + ip = idx(i) + if ((ip < 1 ).or.(ip>mglob)) then idx(i) = -1 cycle endif - !call OMP_set_lock(ins_lck) - ncol = idxmap%get_lc() - !call OMP_unset_lock(ins_lck) - - ! At first, we check the index presence in 'idxmap'. Usually - ! the index is found. If it is not found, we repeat the checking, - ! but inside a critical region. - !write(0,*) me,name,' b hic 1 ',psb_errstatus_fatal() + ncol = idxmap%get_lc() call hash_inner_cnv(ip,lip,idxmap%hashvmask,& & idxmap%hashv,idxmap%glb_lc,ncol) - !write(0,*) me,name,' a hic 1 ',psb_errstatus_fatal() - if (lip < 0) then - !call OMP_set_lock(ins_lck) - ! We check again if the index is already in 'idxmap', this - ! time inside a critical region (we assume that the index - ! is often already existing, so this lock is relatively rare). - ncol = idxmap%get_lc() - nxt = ncol + 1 - !write(0,*) me,name,' b hic 2 ',psb_errstatus_fatal() - call hash_inner_cnv(ip,lip,idxmap%hashvmask,& - & idxmap%hashv,idxmap%glb_lc,ncol) - !write(0,*) me,name,' a hic 2 ',psb_errstatus_fatal() + if (lip > 0) then + idx(i) = lip + info = psb_success_ + else + !$omp critical(hash_g2l_ins) + ncol = idxmap%get_lc() + nxt = ncol + 1 + call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,& + & idxmap%glb_lc,ncol) if (lip > 0) then - idx(i) = lip - else if (lip < 0) then - ! Index not found - !write(0,*) me,name,' b hsik ',psb_errstatus_fatal() - call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) - if (psb_errstatus_fatal()) write(0,*) me,name,' a hsik ',info,omp_get_thread_num() - !write(0,*) me,name,' a hsik ',psb_errstatus_fatal() - lip = tlip - - if (info >= 0) then - !write(0,*) 'Error before spot 3', info - ! 'nxt' is not equal to 'tlip' when the key is already inside - ! the hash map. In that case 'tlip' is the value corresponding - ! to the existing mapping. - if (nxt == tlip) then - - ncol = MAX(ncol,nxt) - call psb_ensure_size(ncol,idxmap%loc_to_glob,info,& + idx(i) = lip + else + call psb_hash_searchinskey(ip,lip,nxt,idxmap%hash,info) + if (info >=0) then + if (nxt == lip) then + call psb_ensure_size(nxt,idxmap%loc_to_glob,info,& & pad=-1_psb_lpk_,addsz=laddsz) - if (psb_errstatus_fatal()) write(0,*) me,name,' a esz ',info,omp_get_thread_num() if (info /= psb_success_) then - !write(0,*) 'Error spot 3', info + info=1 call psb_errpush(psb_err_from_subroutine_ai_,name,& - &a_err='psb_ensure_size',i_err=(/info/)) - + & a_err='psb_ensure_size',i_err=(/info/)) isLoopValid = .false. - idx(i) = -1 - else - idx(i) = lip - idxmap%loc_to_glob(nxt) = ip - call idxmap%set_lc(ncol) end if - end if + idxmap%loc_to_glob(nxt) = ip + call idxmap%set_lc(nxt) + endif + idx(i) = lip + info = psb_success_ else - idx(i) = -1 + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='SearchInsKeyVal',i_err=(/info/)) + isLoopValid = .false. end if - !call OMP_unset_lock(ins_lck) end if - else - idx(i) = lip + !$omp end critical(hash_g2l_ins) end if else idx(i) = -1 end if + enddo + !$omp end parallel do - end do - ! $ OMP END PARALLEL DO - !$omp end critical(hash_g2l_ins) + else if (.not.present(mask)) then - if (.not. isLoopValid) then - goto 9999 - end if - else - ! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) & - ! $ OMP shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & - ! $ OMP private(i,ip,lip,tlip,nxt,info) & - ! $ OMP reduction(.AND.:isLoopValid) - !$omp critical(hash_g2l_ins) + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & + !$omp private(i,ip,lip,tlip,nxt,info) & + !$omp reduction(.AND.:isLoopValid) do i = 1, is - info = 0 - if (.not. isLoopValid) cycle - ip = idx(i) - if ((ip < 1 ).or.(ip>mglob)) then + ip = idx(i) + if ((ip < 1 ).or.(ip>mglob)) then idx(i) = -1 cycle endif - !call OMP_set_lock(ins_lck) - ncol = idxmap%get_lc() - !call OMP_unset_lock(ins_lck) - - ! At first, we check the index presence in 'idxmap'. Usually - ! the index is found. If it is not found, we repeat the checking, - ! but inside a critical region. + ncol = idxmap%get_lc() call hash_inner_cnv(ip,lip,idxmap%hashvmask,& & idxmap%hashv,idxmap%glb_lc,ncol) - if (lip < 0) then - !call OMP_set_lock(ins_lck) - ! We check again if the index is already in 'idxmap', this - ! time inside a critical region (we assume that the index - ! is often already existing). - ncol = idxmap%get_lc() - nxt = ncol + 1 - call hash_inner_cnv(ip,lip,idxmap%hashvmask,& - & idxmap%hashv,idxmap%glb_lc,ncol) - + if (lip > 0) then + idx(i) = lip + info = psb_success_ + else + !$omp critical(hash_g2l_ins) + ncol = idxmap%get_lc() + nxt = ncol + 1 + call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,& + & idxmap%glb_lc,ncol) if (lip > 0) then - idx(i) = lip - else if (lip < 0) then - ! Index not found - call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) - lip = tlip - - if (info >= 0) then - ! 'nxt' is not equal to 'tlip' when the key is already inside - ! the hash map. In that case 'tlip' is the value corresponding - ! to the existing mapping. - if (nxt == tlip) then - - ncol = MAX(ncol,nxt) - - call psb_ensure_size(ncol,idxmap%loc_to_glob,info,& + idx(i) = lip + else + call psb_hash_searchinskey(ip,lip,nxt,idxmap%hash,info) + if (info >=0) then + if (nxt == lip) then + call psb_ensure_size(nxt,idxmap%loc_to_glob,info,& & pad=-1_psb_lpk_,addsz=laddsz) - if (info /= psb_success_) then - !write(0,*) 'Error spot 4' + info=1 call psb_errpush(psb_err_from_subroutine_ai_,name,& - &a_err='psb_ensure_size',i_err=(/info/)) - + & a_err='psb_ensure_size',i_err=(/info/)) isLoopValid = .false. - idx(i) = -1 - else - idx(i) = lip - idxmap%loc_to_glob(nxt) = ip - call idxmap%set_lc(ncol) end if - end if + idxmap%loc_to_glob(nxt) = ip + call idxmap%set_lc(nxt) + endif + idx(i) = lip + info = psb_success_ else - idx(i) = -1 + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='SearchInsKeyVal',i_err=(/info/)) + isLoopValid = .false. end if - !call OMP_unset_lock(ins_lck) end if - - else - idx(i) = lip + !$omp end critical(hash_g2l_ins) end if - - end do - ! $ OMP END PARALLEL DO - !$omp end critical(hash_g2l_ins) - - if (.not. isLoopValid) then - goto 9999 - end if - + enddo + !$omp end parallel do end if end if else @@ -1047,7 +936,7 @@ contains idx = -1 info = -1 end if - !call OMP_destroy_lock(ins_lck) + if (.not. isLoopValid) goto 9999 #else !!$ else if (.not.use_openmp) then isLoopValid = .true. @@ -1066,13 +955,13 @@ contains call hash_inner_cnv(ip,lip,idxmap%hashvmask,& & idxmap%hashv,idxmap%glb_lc,ncol) if (lip < 0) then - tlip = lip nxt = lidx(i) if (nxt <= nrow) then idx(i) = -1 cycle endif call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) + lip = tlip if (info >=0) then if (nxt == tlip) then ncol = max(ncol,nxt) @@ -1747,6 +1636,9 @@ contains ! for a width of psb_hash_bits ! if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(n,hashv,hashmask,x,glb_lc,nrm,mask) & + !$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm) do i=1, n if (mask(i)) then key = x(i) @@ -1784,7 +1676,11 @@ contains end if end if end do + !$omp end parallel do else + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(n,hashv,hashmask,x,glb_lc,nrm) & + !$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm) do i=1, n key = x(i) ih = iand(key,hashmask) @@ -1820,6 +1716,7 @@ contains x(i) = tmp end if end do + !$omp end parallel do end if end subroutine hash_inner_cnv1 @@ -1842,6 +1739,9 @@ contains ! for a width of psb_hash_bits ! if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(n,hashv,hashmask,x,y,glb_lc,nrm,mask,psb_err_unit) & + !$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm) do i=1, n if (mask(i)) then key = x(i) @@ -1882,9 +1782,12 @@ contains end if end if end do - + !$omp end parallel do else + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(n,hashv,hashmask,x,y,glb_lc,nrm,psb_err_unit) & + !$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm) do i=1, n key = x(i) ih = iand(key,hashmask) @@ -1923,6 +1826,7 @@ contains y(i) = tmp end if end do + !$omp end parallel do end if end subroutine hash_inner_cnv2 diff --git a/base/modules/desc/psb_hash_mod.F90 b/base/modules/desc/psb_hash_mod.F90 index eb5556a2..18b1142d 100644 --- a/base/modules/desc/psb_hash_mod.F90 +++ b/base/modules/desc/psb_hash_mod.F90 @@ -383,12 +383,12 @@ contains integer(psb_lpk_), intent(out) :: val integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: hsize,hmask, hk, hd + integer(psb_ipk_) :: hsize,hmask, hk, hd, i + logical :: redo info = HashOK hsize = hash%hsize hmask = hash%hmask - val = -1 hk = iand(psb_hashval(key),hmask) if (hk == 0) then hd = 1 @@ -400,56 +400,57 @@ contains info = HashOutOfMemory return end if - + val = -1 + !$omp atomic hash%nsrch = hash%nsrch + 1 + !$omp end atomic do + !$omp atomic hash%nacc = hash%nacc + 1 + !$omp end atomic if (hash%table(hk,1) == key) then val = hash%table(hk,2) info = HashDuplicate - !write(0,*) 'In searchinskey 1 : ', info, HashDuplicate return end if + redo = .false. !$omp critical(hashsearchins) - if (hash%table(hk,1) == key) then - val = hash%table(hk,2) - info = HashDuplicate - else - if (hash%table(hk,1) == HashFreeEntry) then - if (hash%nk == hash%hsize -1) then - ! - ! Note: because of the way we allocate things at CDALL - ! time this is really unlikely; if we get here, we - ! have at least as many halo indices as internals, which - ! means we're already in trouble. But we try to keep going. - ! - call psb_hash_realloc(hash,info) - if (info /= HashOk) then - info = HashOutOfMemory - !return - else - call psb_hash_searchinskey(key,val,nextval,hash,info) - !return - end if + if (hash%table(hk,1) == HashFreeEntry) then + if (hash%nk == hash%hsize -1) then + ! + ! Note: because of the way we allocate things at CDALL + ! time this is really unlikely; if we get here, we + ! have at least as many halo indices as internals, which + ! means we're already in trouble. But we try to keep going. + ! + call psb_hash_realloc(hash,info) + if (info /= HashOk) then + info = HashOutOfMemory else - hash%nk = hash%nk + 1 - hash%table(hk,1) = key - hash%table(hk,2) = nextval - val = nextval - !return + redo = .true. end if + else + hash%nk = hash%nk + 1 + hash%table(hk,1) = key + hash%table(hk,2) = nextval + val = nextval + info = HashOk end if + else if (hash%table(hk,1) == key) then + val = hash%table(hk,2) + info = HashDuplicate + else + info = HashNotFound end if !$omp end critical(hashsearchins) - if (info /= HashOk) then - write(0,*) 'In searchinskey 2: ', info + if (redo) then + call psb_hash_searchinskey(key,val,nextval,hash,info) return end if - if (val > 0) return + if (val > 0) exit hk = hk - hd if (hk < 0) hk = hk + hsize end do - !write(0,*) 'In searchinskey 3: ', info end subroutine psb_hash_lsearchinskey recursive subroutine psb_hash_isearchinskey(key,val,nextval,hash,info) @@ -459,10 +460,11 @@ contains integer(psb_ipk_) :: hsize,hmask, hk, hd logical :: redo + info = HashOK hsize = hash%hsize hmask = hash%hmask - + hk = iand(psb_hashval(key),hmask) if (hk == 0) then hd = 1 @@ -475,17 +477,22 @@ contains return end if val = -1 + val = -1 + !$omp atomic hash%nsrch = hash%nsrch + 1 + !$omp end atomic do + !$omp atomic hash%nacc = hash%nacc + 1 + !$omp end atomic if (hash%table(hk,1) == key) then val = hash%table(hk,2) info = HashDuplicate return end if redo = .false. - !$OMP CRITICAL - if (hash%table(hk,1) == HashFreeEntry) then + !$omp critical(hashsearchins) + if (hash%table(hk,1) == HashFreeEntry) then if (hash%nk == hash%hsize -1) then ! ! Note: because of the way we allocate things at CDALL @@ -496,24 +503,28 @@ contains call psb_hash_realloc(hash,info) if (info /= HashOk) then info = HashOutOfMemory - !return else redo = .true. -!!$ call psb_hash_searchinskey(key,val,nextval,hash,info) -!!$ return end if else hash%nk = hash%nk + 1 hash%table(hk,1) = key hash%table(hk,2) = nextval val = nextval - !return + info = HashOk end if + else if (hash%table(hk,1) == key) then + val = hash%table(hk,2) + info = HashDuplicate + else + info = HashNotFound end if - !$OMP END CRITICAL - if (redo) call psb_hash_searchinskey(key,val,nextval,hash,info) - if (info /= HashOk) return - if (val > 0) return + !$omp end critical(hashsearchins) + if (redo) then + call psb_hash_searchinskey(key,val,nextval,hash,info) + return + end if + if (val > 0) exit hk = hk - hd if (hk < 0) hk = hk + hsize end do @@ -551,7 +562,7 @@ contains end if if (hash%table(hk,1) == HashFreeEntry) then val = HashFreeEntry -! !$ info = HashNotFound + info = HashNotFound return end if hk = hk - hd @@ -591,7 +602,7 @@ contains end if if (hash%table(hk,1) == HashFreeEntry) then val = HashFreeEntry -! !$ info = HashNotFound + info = HashNotFound return end if hk = hk - hd From 98a9005602469c2e711486984102a152e5dcb1a8 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 7 May 2024 05:50:45 -0400 Subject: [PATCH 087/116] Further advances on OpenMP versions of various index maps. --- base/modules/desc/psb_gen_block_map_mod.F90 | 48 +++++++++++++++------ base/modules/desc/psb_hash_map_mod.F90 | 10 ++++- base/modules/desc/psb_list_map_mod.F90 | 35 +++++++++++++-- 3 files changed, 75 insertions(+), 18 deletions(-) diff --git a/base/modules/desc/psb_gen_block_map_mod.F90 b/base/modules/desc/psb_gen_block_map_mod.F90 index f0c433e0..650bb430 100644 --- a/base/modules/desc/psb_gen_block_map_mod.F90 +++ b/base/modules/desc/psb_gen_block_map_mod.F90 @@ -215,7 +215,9 @@ contains end if if (present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,idxmap,owned_,info) & + !$omp private(i) do i=1, size(idx) if (mask(i)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then @@ -229,9 +231,11 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idx,idxmap,owned_,info) & + !$omp private(i) do i=1, size(idx) if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then idx(i) = idxmap%min_glob_row + idx(i) - 1 @@ -243,7 +247,7 @@ contains info = -1 end if end do - + !$omp end parallel do end if end subroutine block_ll2gv1 @@ -277,7 +281,9 @@ contains end if if (present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idxin,idxout,idxmap,owned_,info) & + !$omp private(i) do i=1, im if (mask(i)) then if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then @@ -291,9 +297,11 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idxin,idxout,idxmap,owned_,info) & + !$omp private(i) do i=1, im if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then idxout(i) = idxmap%min_glob_row + idxin(i) - 1 @@ -305,7 +313,7 @@ contains info = -1 end if end do - + !$omp end parallel do end if if (is > im) then @@ -392,6 +400,9 @@ contains if (present(mask)) then if (idxmap%is_asb()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,is,idx,idxmap,owned_) & + !$omp private(i,nv,tidx) do i=1, is if (mask(i)) then if ((idxmap%min_glob_row <= idx(i)).and. & @@ -408,7 +419,11 @@ contains end if end if end do + !$omp end parallel do else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,is,idx,idxmap,owned_) & + !$omp private(i,ip,lip,tidx,info) do i=1,is if (mask(i)) then if ((idxmap%min_glob_row <= idx(i)).and.& @@ -424,8 +439,8 @@ contains end if end if end do + !$omp end parallel do else -!!$ write(0,*) 'Block status: invalid ',idxmap%get_state() idx(1:is) = -1 info = -1 end if @@ -433,6 +448,9 @@ contains else if (.not.present(mask)) then if (idxmap%is_asb()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(is,idx,idxmap,owned_) & + !$omp private(i,nv,tidx) do i=1, is if ((idxmap%min_glob_row <= idx(i)).and.& & (idx(i) <= idxmap%max_glob_row)) then @@ -447,8 +465,11 @@ contains idx(i) = -1 end if end do - + !$omp end parallel do else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(is,idx,idxmap,owned_) & + !$omp private(i,ip,lip,tidx,info) do i=1,is if ((idxmap%min_glob_row <= idx(i)).and.& & (idx(i) <= idxmap%max_glob_row)) then @@ -462,6 +483,7 @@ contains idx(i) = -1 end if end do + !$omp end parallel do else idx(1:is) = -1 info = -1 @@ -953,7 +975,9 @@ contains end if info = psb_success_ else - info = -5 + write(0,*) 'From has_search_ins:',info,ip,lip,nxt,& + & idxmap%min_glob_row,idxmap%max_glob_row + info = -6 return end if idxout(i) = lip + idxmap%local_rows @@ -1131,7 +1155,7 @@ contains idxmap%global_cols = ntot idxmap%local_rows = nl idxmap%local_cols = nl - idxmap%ctxt = ctxt + idxmap%ctxt = ctxt idxmap%state = psb_desc_bld_ idxmap%mpic = psb_get_mpi_comm(ctxt) idxmap%min_glob_row = vnl(iam)+1 diff --git a/base/modules/desc/psb_hash_map_mod.F90 b/base/modules/desc/psb_hash_map_mod.F90 index 2ccb9b28..96c917d4 100644 --- a/base/modules/desc/psb_hash_map_mod.F90 +++ b/base/modules/desc/psb_hash_map_mod.F90 @@ -221,6 +221,9 @@ contains if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,idxmap,owned_) & + !$omp private(i) do i=1, size(idx) if (mask(i)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then @@ -233,9 +236,12 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idx,idxmap,owned_) & + !$omp private(i) do i=1, size(idx) if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then idx(i) = idxmap%loc_to_glob(idx(i)) @@ -246,7 +252,7 @@ contains idx(i) = -1 end if end do - + !$omp end parallel do end if end subroutine hash_l2gv1 diff --git a/base/modules/desc/psb_list_map_mod.F90 b/base/modules/desc/psb_list_map_mod.F90 index 6b61cf52..913145da 100644 --- a/base/modules/desc/psb_list_map_mod.F90 +++ b/base/modules/desc/psb_list_map_mod.F90 @@ -178,7 +178,10 @@ contains end if if (present(mask)) then - + + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,idxmap,owned_) & + !$omp private(i) do i=1, size(idx) if (mask(i)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%get_lr())) then @@ -191,9 +194,12 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idx,idxmap,owned_) & + !$omp private(i) do i=1, size(idx) if ((1<=idx(i)).and.(idx(i) <= idxmap%get_lr())) then idx(i) = idxmap%loc_to_glob(idx(i)) @@ -204,7 +210,8 @@ contains idx(i) = -1 end if end do - + !$omp end parallel do + end if end subroutine list_ll2gv1 @@ -298,6 +305,9 @@ contains if (present(mask)) then if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,is,idx,idxmap,owned_) & + !$omp private(i,ix) do i=1,is if (mask(i)) then if ((1 <= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -309,6 +319,7 @@ contains end if end if end do + !$omp end parallel do else idx(1:is) = -1 info = -1 @@ -317,6 +328,9 @@ contains else if (.not.present(mask)) then if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(is,idx,idxmap,owned_) & + !$omp private(i,ix) do i=1, is if ((1 <= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ix = idxmap%glob_to_loc(idx(i)) @@ -326,6 +340,7 @@ contains idx(i) = -1 end if end do + !$omp end parallel do else idx(1:is) = -1 info = -1 @@ -365,6 +380,9 @@ contains if (present(mask)) then if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,is,idxin,idxout,idxmap,owned_) & + !$omp private(i,ix) do i=1,is if (mask(i)) then if ((1 <= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -376,6 +394,7 @@ contains end if end if end do + !$omp end parallel do else idxout(1:is) = -1 info = -1 @@ -384,6 +403,9 @@ contains else if (.not.present(mask)) then if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(is,idxin,idxout,idxmap,owned_) & + !$omp private(i,ix) do i=1, is if ((1 <= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then ix = idxmap%glob_to_loc(idxin(i)) @@ -393,6 +415,7 @@ contains idxout(i) = -1 end if end do + !$omp end parallel do else idxout(1:is) = -1 info = -1 @@ -541,6 +564,10 @@ contains else if (.not.present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,is,idx,idxmap,laddsz,lidx) & + !$omp private(i,ix,info) + ! $ o m p reduction(.AND.:isLoopValid) do i=1, is if (info /= 0) cycle if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -579,8 +606,8 @@ contains idx(i) = -1 end if end do + !$omp end parallel do end if - else if (.not.present(lidx)) then if (present(mask)) then From 773b79e7bc7437ad270c8b8459c88b2fe1a927c4 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 7 May 2024 06:11:15 -0400 Subject: [PATCH 088/116] OpenMP in repl_map --- base/modules/desc/psb_repl_map_mod.F90 | 94 ++++++++++++++++++++++---- 1 file changed, 80 insertions(+), 14 deletions(-) diff --git a/base/modules/desc/psb_repl_map_mod.F90 b/base/modules/desc/psb_repl_map_mod.F90 index fe51b7b1..738d6de2 100644 --- a/base/modules/desc/psb_repl_map_mod.F90 +++ b/base/modules/desc/psb_repl_map_mod.F90 @@ -169,7 +169,9 @@ contains end if if (present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,idxmap) & + !$omp private(i) do i=1, size(idx) if (mask(i)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then @@ -179,9 +181,11 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idx,idxmap) & + !$omp private(i) do i=1, size(idx) if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then ! do nothing @@ -189,7 +193,7 @@ contains idx(i) = -1 end if end do - + !$omp end parallel do end if end subroutine repl_l2gv1 @@ -223,7 +227,9 @@ contains end if if (present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idxin,idxout,idxmap) & + !$omp private(i) do i=1, im if (mask(i)) then if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then @@ -233,9 +239,11 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idxin,idxout,idxmap) & + !$omp private(i) do i=1, im if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then idxout(i) = idxin(i) @@ -243,7 +251,7 @@ contains idxout(i) = -1 end if end do - + !$omp end parallel do end if if (is > im) info = -3 @@ -324,6 +332,9 @@ contains if (present(mask)) then if (idxmap%is_asb()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,idxmap) & + !$omp private(i) do i=1, is if (mask(i)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -333,7 +344,11 @@ contains end if end if end do + !$omp end parallel do else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,idxmap) & + !$omp private(i) do i=1,is if (mask(i)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -344,6 +359,7 @@ contains end if end if end do + !$omp end parallel do else idx(1:is) = -1 info = -1 @@ -352,6 +368,9 @@ contains else if (.not.present(mask)) then if (idxmap%is_asb()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idx,idxmap) & + !$omp private(i) do i=1, is if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ! do nothing @@ -359,7 +378,11 @@ contains idx(i) = -1 end if end do + !$omp end parallel do else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idx,idxmap) & + !$omp private(i) do i=1,is if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ! do nothing @@ -367,6 +390,7 @@ contains idx(i) = -1 end if end do + !$omp end parallel do else idx(1:is) = -1 info = -1 @@ -409,6 +433,9 @@ contains if (present(mask)) then if (idxmap%is_asb()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idxin,idxout,idxmap) & + !$omp private(i) do i=1, im if (mask(i)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -418,7 +445,11 @@ contains end if end if end do + !$omp end parallel do else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idxin,idxout,idxmap) & + !$omp private(i) do i=1,im if (mask(i)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -428,6 +459,7 @@ contains end if end if end do + !$omp end parallel do else idxout(1:im) = -1 info = -1 @@ -436,6 +468,9 @@ contains else if (.not.present(mask)) then if (idxmap%is_asb()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idxin,idxout,idxmap) & + !$omp private(i) do i=1, im if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then idxout(i) = idxin(i) @@ -443,7 +478,11 @@ contains idxout(i) = -1 end if end do + !$omp end parallel do else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idxin,idxout,idxmap) & + !$omp private(i) do i=1,im if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then idxout(i) = idxin(i) @@ -451,6 +490,7 @@ contains idxout(i) = -1 end if end do + !$omp end parallel do else idxout(1:im) = -1 info = -1 @@ -557,6 +597,9 @@ contains else if (idxmap%is_valid()) then if (present(lidx)) then if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,lidx,is,idxmap) & + !$omp private(i) do i=1, is if (mask(i)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -566,9 +609,11 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,lidx,is,idxmap) & + !$omp private(i) do i=1, is if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ! do nothing @@ -576,9 +621,13 @@ contains idx(i) = -1 end if end do + !$omp end parallel do end if else if (.not.present(lidx)) then if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,is,idxmap) & + !$omp private(i) do i=1, is if (mask(i)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -588,8 +637,11 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idx,is,idxmap) & + !$omp private(i) do i=1, is if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ! do nothing @@ -597,6 +649,7 @@ contains idx(i) = -1 end if end do + !$omp end parallel do end if end if else @@ -644,6 +697,9 @@ contains else if (idxmap%is_valid()) then if (present(lidx)) then if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idxin,idxout,im,idxmap) & + !$omp private(i) do i=1, im if (mask(i)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -653,9 +709,11 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idxin,idxout,im,idxmap) & + !$omp private(i) do i=1, im if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then idxout(i) = idxin(i) @@ -663,9 +721,13 @@ contains idxout(i) = -1 end if end do + !$omp end parallel do end if else if (.not.present(lidx)) then if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idxin,idxout,im,idxmap) & + !$omp private(i) do i=1, im if (mask(i)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -675,8 +737,11 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idxin,idxout,im,idxmap) & + !$omp private(i) do i=1, im if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then idxout(i) = idxin(i) @@ -684,6 +749,7 @@ contains idxout(i) = -1 end if end do + !$omp end parallel do end if end if else From e711c53fab67b1f1e87ad933704cff97e845a9aa Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 7 May 2024 07:07:18 -0400 Subject: [PATCH 089/116] Make sure we compile when LPK /= IPK --- base/modules/desc/psb_hash_map_mod.F90 | 36 ++++++++++++++++---------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/base/modules/desc/psb_hash_map_mod.F90 b/base/modules/desc/psb_hash_map_mod.F90 index 09cde3d4..952aef30 100644 --- a/base/modules/desc/psb_hash_map_mod.F90 +++ b/base/modules/desc/psb_hash_map_mod.F90 @@ -733,10 +733,11 @@ contains if (lip > 0) then idx(i) = lip else - call psb_hash_searchinskey(ip,lip,nxt,idxmap%hash,info) + call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) + lip = tlip if (info >=0) then if (nxt == lip) then - call psb_ensure_size(nxt,idxmap%loc_to_glob,info,& + call psb_ensure_size(lip,idxmap%loc_to_glob,info,& & pad=-1_psb_lpk_,addsz=laddsz) if (info /= psb_success_) then info=1 @@ -745,7 +746,8 @@ contains isLoopValid = .false. end if idxmap%loc_to_glob(nxt) = ip - call idxmap%set_lc(max(ncol,nxt)) + ncol = max(ncol,nxt) + call idxmap%set_lc(ncol) endif idx(i) = lip info = psb_success_ @@ -794,10 +796,11 @@ contains if (lip > 0) then idx(i) = lip else - call psb_hash_searchinskey(ip,lip,nxt,idxmap%hash,info) + call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) + lip = tlip if (info >=0) then if (nxt == lip) then - call psb_ensure_size(nxt,idxmap%loc_to_glob,info,& + call psb_ensure_size(lip,idxmap%loc_to_glob,info,& & pad=-1_psb_lpk_,addsz=laddsz) if (info /= psb_success_) then info=1 @@ -806,7 +809,8 @@ contains isLoopValid = .false. end if idxmap%loc_to_glob(nxt) = ip - call idxmap%set_lc(max(ncol,nxt)) + ncol = max(ncol,nxt) + call idxmap%set_lc(ncol) endif idx(i) = lip info = psb_success_ @@ -852,10 +856,11 @@ contains if (lip > 0) then idx(i) = lip else - call psb_hash_searchinskey(ip,lip,nxt,idxmap%hash,info) + call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) + lip = tlip if (info >=0) then if (nxt == lip) then - call psb_ensure_size(nxt,idxmap%loc_to_glob,info,& + call psb_ensure_size(lip,idxmap%loc_to_glob,info,& & pad=-1_psb_lpk_,addsz=laddsz) if (info /= psb_success_) then info=1 @@ -863,8 +868,9 @@ contains & a_err='psb_ensure_size',i_err=(/info/)) isLoopValid = .false. end if - idxmap%loc_to_glob(nxt) = ip - call idxmap%set_lc(nxt) + ncol = nxt + idxmap%loc_to_glob(ncol) = ip + call idxmap%set_lc(ncol) endif idx(i) = lip info = psb_success_ @@ -909,10 +915,11 @@ contains if (lip > 0) then idx(i) = lip else - call psb_hash_searchinskey(ip,lip,nxt,idxmap%hash,info) + call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) + lip = tlip if (info >=0) then if (nxt == lip) then - call psb_ensure_size(nxt,idxmap%loc_to_glob,info,& + call psb_ensure_size(lip,idxmap%loc_to_glob,info,& & pad=-1_psb_lpk_,addsz=laddsz) if (info /= psb_success_) then info=1 @@ -920,8 +927,9 @@ contains & a_err='psb_ensure_size',i_err=(/info/)) isLoopValid = .false. end if - idxmap%loc_to_glob(nxt) = ip - call idxmap%set_lc(nxt) + ncol = nxt + idxmap%loc_to_glob(ncol) = ip + call idxmap%set_lc(ncol) endif idx(i) = lip info = psb_success_ From 7dc64692cc5bc75b70e4e22eb013b9a4fdb80a02 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 8 May 2024 14:46:27 +0200 Subject: [PATCH 090/116] Fix for OpenMP runs in hash_map_mod --- base/modules/desc/psb_hash_map_mod.F90 | 30 +++++++++++++------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/base/modules/desc/psb_hash_map_mod.F90 b/base/modules/desc/psb_hash_map_mod.F90 index 952aef30..8695fb93 100644 --- a/base/modules/desc/psb_hash_map_mod.F90 +++ b/base/modules/desc/psb_hash_map_mod.F90 @@ -660,7 +660,7 @@ contains logical, intent(in), optional :: mask(:) integer(psb_ipk_), intent(in), optional :: lidx(:) - integer(psb_ipk_) :: i, is, lip, nrow, ncol, & + integer(psb_ipk_) :: i, is, lip, nrow, ncol, inxt,& & err_act integer(psb_lpk_) :: mglob, ip, nxt, tlip type(psb_ctxt_type) :: ctxt @@ -706,7 +706,7 @@ contains if (present(mask)) then !$omp parallel do default(none) schedule(dynamic) & !$omp shared(lidx,mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & - !$omp private(i,ip,lip,tlip,nxt,info) & + !$omp private(i,ip,lip,tlip,nxt,inxt,info) & !$omp reduction(.AND.:isLoopValid) do i = 1, is if (mask(i)) then @@ -746,8 +746,8 @@ contains isLoopValid = .false. end if idxmap%loc_to_glob(nxt) = ip - ncol = max(ncol,nxt) - call idxmap%set_lc(ncol) + inxt = max(ncol,nxt) + call idxmap%set_lc(inxt) endif idx(i) = lip info = psb_success_ @@ -770,7 +770,7 @@ contains !$omp parallel do default(none) schedule(dynamic) & !$omp shared(lidx,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & - !$omp private(i,ip,lip,tlip,nxt,info) & + !$omp private(i,ip,lip,tlip,nxt,inxt,info) & !$omp reduction(.AND.:isLoopValid) do i = 1, is ip = idx(i) @@ -809,8 +809,8 @@ contains isLoopValid = .false. end if idxmap%loc_to_glob(nxt) = ip - ncol = max(ncol,nxt) - call idxmap%set_lc(ncol) + inxt = max(ncol,nxt) + call idxmap%set_lc(inxt) endif idx(i) = lip info = psb_success_ @@ -832,7 +832,7 @@ contains if (present(mask)) then !$omp parallel do default(none) schedule(dynamic) & !$omp shared(mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & - !$omp private(i,ip,lip,tlip,nxt,info) & + !$omp private(i,ip,lip,tlip,nxt,inxt,info) & !$omp reduction(.AND.:isLoopValid) do i = 1, is if (mask(i)) then @@ -868,9 +868,9 @@ contains & a_err='psb_ensure_size',i_err=(/info/)) isLoopValid = .false. end if - ncol = nxt - idxmap%loc_to_glob(ncol) = ip - call idxmap%set_lc(ncol) + inxt = nxt + idxmap%loc_to_glob(inxt) = ip + call idxmap%set_lc(inxt) endif idx(i) = lip info = psb_success_ @@ -892,7 +892,7 @@ contains !$omp parallel do default(none) schedule(dynamic) & !$omp shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & - !$omp private(i,ip,lip,tlip,nxt,info) & + !$omp private(i,ip,lip,tlip,nxt,inxt,info) & !$omp reduction(.AND.:isLoopValid) do i = 1, is ip = idx(i) @@ -927,9 +927,9 @@ contains & a_err='psb_ensure_size',i_err=(/info/)) isLoopValid = .false. end if - ncol = nxt - idxmap%loc_to_glob(ncol) = ip - call idxmap%set_lc(ncol) + inxt = nxt + idxmap%loc_to_glob(inxt) = ip + call idxmap%set_lc(inxt) endif idx(i) = lip info = psb_success_ From 7ec394ce1cc946697b134f33a54315b0d5921c10 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 9 May 2024 11:29:23 +0200 Subject: [PATCH 091/116] Rename indx_map_mod and put SET_LR/C under ifdef --- ..._indx_map_mod.f90 => psb_indx_map_mod.F90} | 48 +++++++++++++++---- 1 file changed, 40 insertions(+), 8 deletions(-) rename base/modules/desc/{psb_indx_map_mod.f90 => psb_indx_map_mod.F90} (97%) diff --git a/base/modules/desc/psb_indx_map_mod.f90 b/base/modules/desc/psb_indx_map_mod.F90 similarity index 97% rename from base/modules/desc/psb_indx_map_mod.f90 rename to base/modules/desc/psb_indx_map_mod.F90 index 422be5f3..fa3e5a69 100644 --- a/base/modules/desc/psb_indx_map_mod.f90 +++ b/base/modules/desc/psb_indx_map_mod.F90 @@ -153,12 +153,25 @@ module psb_indx_map_mod procedure, pass(idxmap) :: set_gci => base_set_gci procedure, pass(idxmap) :: set_grl => base_set_grl procedure, pass(idxmap) :: set_gcl => base_set_gcl +#if defined(IPK4) && defined(LPK8) + generic, public :: set_gr => set_grl, set_gri + generic, public :: set_gc => set_gcl, set_gci +#else generic, public :: set_gr => set_grl generic, public :: set_gc => set_gcl - - procedure, pass(idxmap) :: set_lr => base_set_lr - procedure, pass(idxmap) :: set_lc => base_set_lc +#endif + procedure, pass(idxmap) :: set_lri => base_set_lri + procedure, pass(idxmap) :: set_lrl => base_set_lrl + procedure, pass(idxmap) :: set_lci => base_set_lci + procedure, pass(idxmap) :: set_lcl => base_set_lcl procedure, pass(idxmap) :: inc_lc => base_inc_lc +#if defined(IPK4) && defined(LPK8) + generic, public :: set_lr => set_lrl, set_lri + generic, public :: set_lc => set_lcl, set_lci +#else + generic, public :: set_lr => set_lri + generic, public :: set_lc => set_lci +#endif procedure, pass(idxmap) :: set_p_adjcncy => base_set_p_adjcncy procedure, pass(idxmap) :: xtnd_p_adjcncy => base_xtnd_p_adjcncy @@ -236,7 +249,8 @@ module psb_indx_map_mod & base_get_gr, base_get_gc, base_get_lr, base_get_lc, base_get_ctxt,& & base_get_mpic, base_sizeof, base_set_null, & & base_set_grl, base_set_gcl, & - & base_set_lr, base_set_lc, base_inc_lc, base_set_ctxt,& + & base_set_lri, base_set_lci, base_set_lrl, base_set_lcl, & + & base_inc_lc, base_set_ctxt,& & base_set_mpic, base_get_fmt, base_asb, base_free,& & base_l2gs1, base_l2gs2, base_l2gv1, base_l2gv2,& & base_g2ls1, base_g2ls2, base_g2lv1, base_g2lv2,& @@ -558,21 +572,39 @@ contains idxmap%global_cols = val end subroutine base_set_gcl - subroutine base_set_lr(idxmap,val) + subroutine base_set_lri(idxmap,val) implicit none class(psb_indx_map), intent(inout) :: idxmap integer(psb_ipk_), intent(in) :: val idxmap%local_rows = val - end subroutine base_set_lr + end subroutine base_set_lri - subroutine base_set_lc(idxmap,val) + subroutine base_set_lci(idxmap,val) implicit none class(psb_indx_map), intent(inout) :: idxmap integer(psb_ipk_), intent(in) :: val + !$omp critical + idxmap%local_cols = val + !$omp end critical + end subroutine base_set_lci + + subroutine base_set_lrl(idxmap,val) + implicit none + class(psb_indx_map), intent(inout) :: idxmap + integer(psb_lpk_), intent(in) :: val + idxmap%local_rows = val + end subroutine base_set_lrl + + subroutine base_set_lcl(idxmap,val) + implicit none + class(psb_indx_map), intent(inout) :: idxmap + integer(psb_lpk_), intent(in) :: val + !$omp critical idxmap%local_cols = val - end subroutine base_set_lc + !$omp end critical + end subroutine base_set_lcl subroutine base_inc_lc(idxmap) implicit none From d8ed01218dcf12c7698b6f2471cebcb05c174055 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 9 May 2024 12:30:06 +0200 Subject: [PATCH 092/116] Cleanup hash_map using new indx_map%set_lc --- base/modules/desc/psb_hash_map_mod.F90 | 28 ++++++++++++-------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/base/modules/desc/psb_hash_map_mod.F90 b/base/modules/desc/psb_hash_map_mod.F90 index 8695fb93..c3d833c6 100644 --- a/base/modules/desc/psb_hash_map_mod.F90 +++ b/base/modules/desc/psb_hash_map_mod.F90 @@ -660,7 +660,7 @@ contains logical, intent(in), optional :: mask(:) integer(psb_ipk_), intent(in), optional :: lidx(:) - integer(psb_ipk_) :: i, is, lip, nrow, ncol, inxt,& + integer(psb_ipk_) :: i, is, lip, nrow, ncol,& & err_act integer(psb_lpk_) :: mglob, ip, nxt, tlip type(psb_ctxt_type) :: ctxt @@ -706,7 +706,7 @@ contains if (present(mask)) then !$omp parallel do default(none) schedule(dynamic) & !$omp shared(lidx,mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & - !$omp private(i,ip,lip,tlip,nxt,inxt,info) & + !$omp private(i,ip,lip,tlip,nxt,info) & !$omp reduction(.AND.:isLoopValid) do i = 1, is if (mask(i)) then @@ -746,8 +746,8 @@ contains isLoopValid = .false. end if idxmap%loc_to_glob(nxt) = ip - inxt = max(ncol,nxt) - call idxmap%set_lc(inxt) + nxt = max(ncol,nxt) + call idxmap%set_lc(nxt) endif idx(i) = lip info = psb_success_ @@ -770,7 +770,7 @@ contains !$omp parallel do default(none) schedule(dynamic) & !$omp shared(lidx,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & - !$omp private(i,ip,lip,tlip,nxt,inxt,info) & + !$omp private(i,ip,lip,tlip,nxt,info) & !$omp reduction(.AND.:isLoopValid) do i = 1, is ip = idx(i) @@ -809,8 +809,8 @@ contains isLoopValid = .false. end if idxmap%loc_to_glob(nxt) = ip - inxt = max(ncol,nxt) - call idxmap%set_lc(inxt) + nxt = max(ncol,nxt) + call idxmap%set_lc(nxt) endif idx(i) = lip info = psb_success_ @@ -832,7 +832,7 @@ contains if (present(mask)) then !$omp parallel do default(none) schedule(dynamic) & !$omp shared(mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & - !$omp private(i,ip,lip,tlip,nxt,inxt,info) & + !$omp private(i,ip,lip,tlip,nxt,info) & !$omp reduction(.AND.:isLoopValid) do i = 1, is if (mask(i)) then @@ -868,9 +868,8 @@ contains & a_err='psb_ensure_size',i_err=(/info/)) isLoopValid = .false. end if - inxt = nxt - idxmap%loc_to_glob(inxt) = ip - call idxmap%set_lc(inxt) + idxmap%loc_to_glob(nxt) = ip + call idxmap%set_lc(nxt) endif idx(i) = lip info = psb_success_ @@ -892,7 +891,7 @@ contains !$omp parallel do default(none) schedule(dynamic) & !$omp shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & - !$omp private(i,ip,lip,tlip,nxt,inxt,info) & + !$omp private(i,ip,lip,tlip,nxt,info) & !$omp reduction(.AND.:isLoopValid) do i = 1, is ip = idx(i) @@ -927,9 +926,8 @@ contains & a_err='psb_ensure_size',i_err=(/info/)) isLoopValid = .false. end if - inxt = nxt - idxmap%loc_to_glob(inxt) = ip - call idxmap%set_lc(inxt) + idxmap%loc_to_glob(nxt) = ip + call idxmap%set_lc(nxt) endif idx(i) = lip info = psb_success_ From d01b8145c6f4de5b3beb4ebabc88c6ec69e52bfe Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 9 May 2024 17:08:45 +0200 Subject: [PATCH 093/116] Fix cuda makefile dependencies --- cuda/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda/Makefile b/cuda/Makefile index 49c5f0cb..33787f4e 100755 --- a/cuda/Makefile +++ b/cuda/Makefile @@ -64,7 +64,7 @@ spgpulib: $(MAKE) -C spgpu lib -dnsdev_mod.o hlldev_mod.o elldev_mod.o psb_base_vectordev_mod.o: core_mod.o +diagdev_mod.o dnsdev_mod.o hlldev_mod.o elldev_mod.o psb_base_vectordev_mod.o: core_mod.o psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o: psb_i_cuda_vect_mod.o psb_i_cuda_vect_mod.o : psb_vectordev_mod.o psb_cuda_env_mod.o cusparse_mod.o: s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o From a613e963dbf823664a4aa90413931b12e8584705 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 10 May 2024 10:10:54 +0200 Subject: [PATCH 094/116] First step in fix for coo_impl on OpenMP --- base/serial/impl/psb_c_coo_impl.F90 | 2 +- base/serial/impl/psb_d_coo_impl.F90 | 2 +- base/serial/impl/psb_s_coo_impl.F90 | 2 +- base/serial/impl/psb_z_coo_impl.F90 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index 5c90e287..84517ead 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -4205,7 +4205,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) ! 'iaux' has to allow the threads to have an exclusive group ! of indices as work space. Since each thread handles one ! row/column at the time, we allocate this way. - allocate(iaux(MAX((nc+2),(nr+2))*maxthreads),stat=info) + allocate(iaux(MAX((nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index f6a173d1..1b122fd8 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -4205,7 +4205,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) ! 'iaux' has to allow the threads to have an exclusive group ! of indices as work space. Since each thread handles one ! row/column at the time, we allocate this way. - allocate(iaux(MAX((nc+2),(nr+2))*maxthreads),stat=info) + allocate(iaux(MAX((nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index 4c12d8fc..3df01ba6 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -4205,7 +4205,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) ! 'iaux' has to allow the threads to have an exclusive group ! of indices as work space. Since each thread handles one ! row/column at the time, we allocate this way. - allocate(iaux(MAX((nc+2),(nr+2))*maxthreads),stat=info) + allocate(iaux(MAX((nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index 44ee89b5..356914de 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -4205,7 +4205,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) ! 'iaux' has to allow the threads to have an exclusive group ! of indices as work space. Since each thread handles one ! row/column at the time, we allocate this way. - allocate(iaux(MAX((nc+2),(nr+2))*maxthreads),stat=info) + allocate(iaux(MAX((nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) From ecccb1391437fea6dc6984129e79e32140293091 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 10 May 2024 13:53:35 +0200 Subject: [PATCH 095/116] Fix COO fix_coo_inner_rowmajor not to overflow on integers. --- base/serial/impl/psb_c_coo_impl.F90 | 109 +++++++++++++++------------- base/serial/impl/psb_d_coo_impl.F90 | 109 +++++++++++++++------------- base/serial/impl/psb_s_coo_impl.F90 | 109 +++++++++++++++------------- base/serial/impl/psb_z_coo_impl.F90 | 109 +++++++++++++++------------- 4 files changed, 240 insertions(+), 196 deletions(-) diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index 84517ead..cc3c6842 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -4268,7 +4268,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !locals integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) complex(psb_spk_), allocatable :: vs(:) - integer(psb_ipk_) :: nza, nzl,iret + integer(psb_ipk_) :: nza, nzl,iret, maxnr integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name = 'psb_fixcoo' @@ -4277,7 +4277,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf #if defined(OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads - integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) + integer(psb_ipk_), allocatable :: kaux(:),idxaux(:) #endif info = psb_success_ @@ -4302,10 +4302,13 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if if (use_buffers) then - iaux(:) = 0 #if defined(OPENMP) + !$omp workshare + iaux(:) = 0 + !$omp end workshare + maxnr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nzin,ia,nr,iaux) & + !$OMP shared(nzin,ia,nr,iaux,maxnr) & !$OMP private(i) & !$OMP reduction(.and.:use_buffers) do i=1,nzin @@ -4319,7 +4322,9 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO + maxnr = maxval(iaux(1:nr)) #else + iaux(:) = 0 !srt_inp = .true. do i=1,nzin if ((ia(i) < 1).or.(ia(i) > nr)) then @@ -4342,22 +4347,21 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf if (use_buffers) then #if defined(OPENMP) maxthreads = omp_get_max_threads() - allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info) + allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if + !$omp workshare kaux(:) = 0 - sum(:) = 0 - sum(1) = 1 + !$omp end workshare err = 0 - ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! index for each row. We do the same on 'kaux' !$OMP PARALLEL default(none) & - !$OMP shared(t0,t1,idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP shared(maxnr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) @@ -4382,60 +4386,67 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !write(0,*) 'fix_coo_inner: trying with exscan' call psi_exscan(nr+1,iaux,info,shift=ione) !$OMP BARRIER - !$OMP SINGLE - !t0 = omp_get_wtime() - !$OMP END SINGLE - + + ! ------------------ Sorting and buffers ------------------- ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving ! unmodified 'iaux' - do j=idxstart,idxend + + !$omp do private(j) + do j=1,nr+1 idxaux(j) = iaux(j) end do + !$omp end do + ! Here we sort data inside the auxiliary buffers + !$omp do private(act_row,i,i1) do i=1,nzin act_row = ia(i) - if ((act_row >= idxstart) .and. (act_row <= idxend)) then - ias(idxaux(act_row)) = ia(i) - jas(idxaux(act_row)) = ja(i) - vs(idxaux(act_row)) = val(i) - idxaux(act_row) = idxaux(act_row) + 1 - end if + !$omp atomic capture + i1 =idxaux(act_row) + idxaux(act_row) = idxaux(act_row) + 1 + !$omp end atomic + ias(i1) = ia(i) + jas(i1) = ja(i) + vs(i1) = val(i) end do + !$omp end do - !$OMP BARRIER - !$OMP SINGLE - !t1 = omp_get_wtime() - !write(0,*) ithread,'Srt&Cpy :',t1-t0 - !$OMP END SINGLE ! Let's sort column indices and values. After that we will store ! the number of unique values in 'kaux' - do j=idxstart,idxend - first_elem = iaux(j) - last_elem = iaux(j+1) - 1 - nzl = last_elem - first_elem + 1 - - ! The row has elements? - if (nzl > 0) then - call psi_msort_up(nzl,jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) - if (iret == 0) then - call psb_ip_reord(nzl,vs(first_elem:last_elem),& - & ias(first_elem:last_elem),jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) - end if + block + integer(psb_ipk_), allocatable :: ixt(:) + allocate(ixt(maxnr+2)) + !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) + do j=1,nr + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + nzl = last_elem - first_elem + 1 - ! Over each row we count the unique values - kaux(j) = 1 - do i=first_elem+1,last_elem - if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then - cycle + ! The row has elements? + if (nzl > 0) then + call psi_msort_up(nzl,jas(first_elem:last_elem), & + & ixt,iret) + if (iret == 0) then + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & ixt) end if - kaux(j) = kaux(j) + 1 - end do - end if - end do + + ! Over each row we count the unique values + kaux(j) = 1 + do i=first_elem+1,last_elem + if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then + cycle + end if + kaux(j) = kaux(j) + 1 + end do + end if + end do + !$omp end do + deallocate(ixt) + end block ! -------------------------------------------------- ! ---------------- kaux composition ---------------- @@ -4553,7 +4564,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf nzout = kaux(nr+1) - 1 - deallocate(sum,kaux,idxaux,stat=info) + deallocate(kaux,idxaux,stat=info) #else !if (.not.srt_inp) then diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index 1b122fd8..e523d83d 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -4268,7 +4268,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !locals integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) real(psb_dpk_), allocatable :: vs(:) - integer(psb_ipk_) :: nza, nzl,iret + integer(psb_ipk_) :: nza, nzl,iret, maxnr integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name = 'psb_fixcoo' @@ -4277,7 +4277,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf #if defined(OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads - integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) + integer(psb_ipk_), allocatable :: kaux(:),idxaux(:) #endif info = psb_success_ @@ -4302,10 +4302,13 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if if (use_buffers) then - iaux(:) = 0 #if defined(OPENMP) + !$omp workshare + iaux(:) = 0 + !$omp end workshare + maxnr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nzin,ia,nr,iaux) & + !$OMP shared(nzin,ia,nr,iaux,maxnr) & !$OMP private(i) & !$OMP reduction(.and.:use_buffers) do i=1,nzin @@ -4319,7 +4322,9 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO + maxnr = maxval(iaux(1:nr)) #else + iaux(:) = 0 !srt_inp = .true. do i=1,nzin if ((ia(i) < 1).or.(ia(i) > nr)) then @@ -4342,22 +4347,21 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf if (use_buffers) then #if defined(OPENMP) maxthreads = omp_get_max_threads() - allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info) + allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if + !$omp workshare kaux(:) = 0 - sum(:) = 0 - sum(1) = 1 + !$omp end workshare err = 0 - ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! index for each row. We do the same on 'kaux' !$OMP PARALLEL default(none) & - !$OMP shared(t0,t1,idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP shared(maxnr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) @@ -4382,60 +4386,67 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !write(0,*) 'fix_coo_inner: trying with exscan' call psi_exscan(nr+1,iaux,info,shift=ione) !$OMP BARRIER - !$OMP SINGLE - !t0 = omp_get_wtime() - !$OMP END SINGLE - + + ! ------------------ Sorting and buffers ------------------- ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving ! unmodified 'iaux' - do j=idxstart,idxend + + !$omp do private(j) + do j=1,nr+1 idxaux(j) = iaux(j) end do + !$omp end do + ! Here we sort data inside the auxiliary buffers + !$omp do private(act_row,i,i1) do i=1,nzin act_row = ia(i) - if ((act_row >= idxstart) .and. (act_row <= idxend)) then - ias(idxaux(act_row)) = ia(i) - jas(idxaux(act_row)) = ja(i) - vs(idxaux(act_row)) = val(i) - idxaux(act_row) = idxaux(act_row) + 1 - end if + !$omp atomic capture + i1 =idxaux(act_row) + idxaux(act_row) = idxaux(act_row) + 1 + !$omp end atomic + ias(i1) = ia(i) + jas(i1) = ja(i) + vs(i1) = val(i) end do + !$omp end do - !$OMP BARRIER - !$OMP SINGLE - !t1 = omp_get_wtime() - !write(0,*) ithread,'Srt&Cpy :',t1-t0 - !$OMP END SINGLE ! Let's sort column indices and values. After that we will store ! the number of unique values in 'kaux' - do j=idxstart,idxend - first_elem = iaux(j) - last_elem = iaux(j+1) - 1 - nzl = last_elem - first_elem + 1 - - ! The row has elements? - if (nzl > 0) then - call psi_msort_up(nzl,jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) - if (iret == 0) then - call psb_ip_reord(nzl,vs(first_elem:last_elem),& - & ias(first_elem:last_elem),jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) - end if + block + integer(psb_ipk_), allocatable :: ixt(:) + allocate(ixt(maxnr+2)) + !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) + do j=1,nr + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + nzl = last_elem - first_elem + 1 - ! Over each row we count the unique values - kaux(j) = 1 - do i=first_elem+1,last_elem - if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then - cycle + ! The row has elements? + if (nzl > 0) then + call psi_msort_up(nzl,jas(first_elem:last_elem), & + & ixt,iret) + if (iret == 0) then + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & ixt) end if - kaux(j) = kaux(j) + 1 - end do - end if - end do + + ! Over each row we count the unique values + kaux(j) = 1 + do i=first_elem+1,last_elem + if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then + cycle + end if + kaux(j) = kaux(j) + 1 + end do + end if + end do + !$omp end do + deallocate(ixt) + end block ! -------------------------------------------------- ! ---------------- kaux composition ---------------- @@ -4553,7 +4564,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf nzout = kaux(nr+1) - 1 - deallocate(sum,kaux,idxaux,stat=info) + deallocate(kaux,idxaux,stat=info) #else !if (.not.srt_inp) then diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index 3df01ba6..b46731fc 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -4268,7 +4268,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !locals integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) real(psb_spk_), allocatable :: vs(:) - integer(psb_ipk_) :: nza, nzl,iret + integer(psb_ipk_) :: nza, nzl,iret, maxnr integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name = 'psb_fixcoo' @@ -4277,7 +4277,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf #if defined(OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads - integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) + integer(psb_ipk_), allocatable :: kaux(:),idxaux(:) #endif info = psb_success_ @@ -4302,10 +4302,13 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if if (use_buffers) then - iaux(:) = 0 #if defined(OPENMP) + !$omp workshare + iaux(:) = 0 + !$omp end workshare + maxnr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nzin,ia,nr,iaux) & + !$OMP shared(nzin,ia,nr,iaux,maxnr) & !$OMP private(i) & !$OMP reduction(.and.:use_buffers) do i=1,nzin @@ -4319,7 +4322,9 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO + maxnr = maxval(iaux(1:nr)) #else + iaux(:) = 0 !srt_inp = .true. do i=1,nzin if ((ia(i) < 1).or.(ia(i) > nr)) then @@ -4342,22 +4347,21 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf if (use_buffers) then #if defined(OPENMP) maxthreads = omp_get_max_threads() - allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info) + allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if + !$omp workshare kaux(:) = 0 - sum(:) = 0 - sum(1) = 1 + !$omp end workshare err = 0 - ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! index for each row. We do the same on 'kaux' !$OMP PARALLEL default(none) & - !$OMP shared(t0,t1,idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP shared(maxnr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) @@ -4382,60 +4386,67 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !write(0,*) 'fix_coo_inner: trying with exscan' call psi_exscan(nr+1,iaux,info,shift=ione) !$OMP BARRIER - !$OMP SINGLE - !t0 = omp_get_wtime() - !$OMP END SINGLE - + + ! ------------------ Sorting and buffers ------------------- ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving ! unmodified 'iaux' - do j=idxstart,idxend + + !$omp do private(j) + do j=1,nr+1 idxaux(j) = iaux(j) end do + !$omp end do + ! Here we sort data inside the auxiliary buffers + !$omp do private(act_row,i,i1) do i=1,nzin act_row = ia(i) - if ((act_row >= idxstart) .and. (act_row <= idxend)) then - ias(idxaux(act_row)) = ia(i) - jas(idxaux(act_row)) = ja(i) - vs(idxaux(act_row)) = val(i) - idxaux(act_row) = idxaux(act_row) + 1 - end if + !$omp atomic capture + i1 =idxaux(act_row) + idxaux(act_row) = idxaux(act_row) + 1 + !$omp end atomic + ias(i1) = ia(i) + jas(i1) = ja(i) + vs(i1) = val(i) end do + !$omp end do - !$OMP BARRIER - !$OMP SINGLE - !t1 = omp_get_wtime() - !write(0,*) ithread,'Srt&Cpy :',t1-t0 - !$OMP END SINGLE ! Let's sort column indices and values. After that we will store ! the number of unique values in 'kaux' - do j=idxstart,idxend - first_elem = iaux(j) - last_elem = iaux(j+1) - 1 - nzl = last_elem - first_elem + 1 - - ! The row has elements? - if (nzl > 0) then - call psi_msort_up(nzl,jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) - if (iret == 0) then - call psb_ip_reord(nzl,vs(first_elem:last_elem),& - & ias(first_elem:last_elem),jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) - end if + block + integer(psb_ipk_), allocatable :: ixt(:) + allocate(ixt(maxnr+2)) + !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) + do j=1,nr + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + nzl = last_elem - first_elem + 1 - ! Over each row we count the unique values - kaux(j) = 1 - do i=first_elem+1,last_elem - if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then - cycle + ! The row has elements? + if (nzl > 0) then + call psi_msort_up(nzl,jas(first_elem:last_elem), & + & ixt,iret) + if (iret == 0) then + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & ixt) end if - kaux(j) = kaux(j) + 1 - end do - end if - end do + + ! Over each row we count the unique values + kaux(j) = 1 + do i=first_elem+1,last_elem + if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then + cycle + end if + kaux(j) = kaux(j) + 1 + end do + end if + end do + !$omp end do + deallocate(ixt) + end block ! -------------------------------------------------- ! ---------------- kaux composition ---------------- @@ -4553,7 +4564,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf nzout = kaux(nr+1) - 1 - deallocate(sum,kaux,idxaux,stat=info) + deallocate(kaux,idxaux,stat=info) #else !if (.not.srt_inp) then diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index 356914de..a7e527e5 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -4268,7 +4268,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !locals integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) complex(psb_dpk_), allocatable :: vs(:) - integer(psb_ipk_) :: nza, nzl,iret + integer(psb_ipk_) :: nza, nzl,iret, maxnr integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name = 'psb_fixcoo' @@ -4277,7 +4277,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf #if defined(OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads - integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) + integer(psb_ipk_), allocatable :: kaux(:),idxaux(:) #endif info = psb_success_ @@ -4302,10 +4302,13 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if if (use_buffers) then - iaux(:) = 0 #if defined(OPENMP) + !$omp workshare + iaux(:) = 0 + !$omp end workshare + maxnr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nzin,ia,nr,iaux) & + !$OMP shared(nzin,ia,nr,iaux,maxnr) & !$OMP private(i) & !$OMP reduction(.and.:use_buffers) do i=1,nzin @@ -4319,7 +4322,9 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO + maxnr = maxval(iaux(1:nr)) #else + iaux(:) = 0 !srt_inp = .true. do i=1,nzin if ((ia(i) < 1).or.(ia(i) > nr)) then @@ -4342,22 +4347,21 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf if (use_buffers) then #if defined(OPENMP) maxthreads = omp_get_max_threads() - allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info) + allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if + !$omp workshare kaux(:) = 0 - sum(:) = 0 - sum(1) = 1 + !$omp end workshare err = 0 - ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! index for each row. We do the same on 'kaux' !$OMP PARALLEL default(none) & - !$OMP shared(t0,t1,idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP shared(maxnr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) @@ -4382,60 +4386,67 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !write(0,*) 'fix_coo_inner: trying with exscan' call psi_exscan(nr+1,iaux,info,shift=ione) !$OMP BARRIER - !$OMP SINGLE - !t0 = omp_get_wtime() - !$OMP END SINGLE - + + ! ------------------ Sorting and buffers ------------------- ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving ! unmodified 'iaux' - do j=idxstart,idxend + + !$omp do private(j) + do j=1,nr+1 idxaux(j) = iaux(j) end do + !$omp end do + ! Here we sort data inside the auxiliary buffers + !$omp do private(act_row,i,i1) do i=1,nzin act_row = ia(i) - if ((act_row >= idxstart) .and. (act_row <= idxend)) then - ias(idxaux(act_row)) = ia(i) - jas(idxaux(act_row)) = ja(i) - vs(idxaux(act_row)) = val(i) - idxaux(act_row) = idxaux(act_row) + 1 - end if + !$omp atomic capture + i1 =idxaux(act_row) + idxaux(act_row) = idxaux(act_row) + 1 + !$omp end atomic + ias(i1) = ia(i) + jas(i1) = ja(i) + vs(i1) = val(i) end do + !$omp end do - !$OMP BARRIER - !$OMP SINGLE - !t1 = omp_get_wtime() - !write(0,*) ithread,'Srt&Cpy :',t1-t0 - !$OMP END SINGLE ! Let's sort column indices and values. After that we will store ! the number of unique values in 'kaux' - do j=idxstart,idxend - first_elem = iaux(j) - last_elem = iaux(j+1) - 1 - nzl = last_elem - first_elem + 1 - - ! The row has elements? - if (nzl > 0) then - call psi_msort_up(nzl,jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) - if (iret == 0) then - call psb_ip_reord(nzl,vs(first_elem:last_elem),& - & ias(first_elem:last_elem),jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) - end if + block + integer(psb_ipk_), allocatable :: ixt(:) + allocate(ixt(maxnr+2)) + !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) + do j=1,nr + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + nzl = last_elem - first_elem + 1 - ! Over each row we count the unique values - kaux(j) = 1 - do i=first_elem+1,last_elem - if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then - cycle + ! The row has elements? + if (nzl > 0) then + call psi_msort_up(nzl,jas(first_elem:last_elem), & + & ixt,iret) + if (iret == 0) then + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & ixt) end if - kaux(j) = kaux(j) + 1 - end do - end if - end do + + ! Over each row we count the unique values + kaux(j) = 1 + do i=first_elem+1,last_elem + if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then + cycle + end if + kaux(j) = kaux(j) + 1 + end do + end if + end do + !$omp end do + deallocate(ixt) + end block ! -------------------------------------------------- ! ---------------- kaux composition ---------------- @@ -4553,7 +4564,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf nzout = kaux(nr+1) - 1 - deallocate(sum,kaux,idxaux,stat=info) + deallocate(kaux,idxaux,stat=info) #else !if (.not.srt_inp) then From 70f51b9da89cb36897bedbdb80f29fbc439ac21e Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 15 May 2024 12:05:15 +0200 Subject: [PATCH 096/116] Improve handling of fix_coo buffers with OpenMP --- base/serial/impl/psb_c_coo_impl.F90 | 81 +++++++++++++++++------------ base/serial/impl/psb_d_coo_impl.F90 | 81 +++++++++++++++++------------ base/serial/impl/psb_s_coo_impl.F90 | 81 +++++++++++++++++------------ base/serial/impl/psb_z_coo_impl.F90 | 81 +++++++++++++++++------------ 4 files changed, 188 insertions(+), 136 deletions(-) diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index cc3c6842..1b015ab1 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -4174,7 +4174,6 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #if defined(OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads - integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) #endif info = psb_success_ @@ -4301,7 +4300,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf use_buffers = .false. end if - if (use_buffers) then + !if (use_buffers) then #if defined(OPENMP) !$omp workshare iaux(:) = 0 @@ -4322,7 +4321,14 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO - maxnr = maxval(iaux(1:nr)) + maxnr = 0 + !$OMP PARALLEL DO default(none) schedule(STATIC) & + !$OMP private(i) shared(nr,iaux)& + !$OMP reduction(max:maxnr) + do i=1,nr + maxnr = max(maxnr,iaux(i)) + end do + !$OMP END PARALLEL DO #else iaux(:) = 0 !srt_inp = .true. @@ -4338,8 +4344,12 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) end do + maxnr = 0 + do i=1,nr + maxnr = max(maxnr,iaux(i)) + end do #endif - end if + !end if ! Check again use_buffers. We enter here if nzin >= nr and ! all the indices are valid @@ -4417,7 +4427,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! the number of unique values in 'kaux' block integer(psb_ipk_), allocatable :: ixt(:) - allocate(ixt(maxnr+2)) + allocate(ixt(2*maxnr+2)) !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) do j=1,nr first_elem = iaux(j) @@ -4721,7 +4731,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf & call psb_ip_reord(nzin,val,ia,ja,iaux) #if defined(OPENMP) !$OMP PARALLEL default(none) & - !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnr) & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & !$OMP work,first_elem,last_elem) @@ -4743,38 +4753,41 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf idxend = idxstart + work - 1 - ! --------------------------------------------------- + block + integer(psb_ipk_), allocatable :: ixt(:) + allocate(ixt(2*maxnr+2)) + ! --------------------------------------------------- + + first_elem = 0 + last_elem = -1 + act_row = idxstart + do j=1,nzin + if (ia(j) < act_row) then + cycle + else if ((ia(j) > idxend) .or. (work < 1)) then + exit + else if (ia(j) > act_row) then + nzl = last_elem - first_elem + 1 - first_elem = 0 - last_elem = -1 - act_row = idxstart - do j=1,nzin - if (ia(j) < act_row) then - cycle - else if ((ia(j) > idxend) .or. (work < 1)) then - exit - else if (ia(j) > act_row) then - nzl = last_elem - first_elem + 1 + if (nzl > 0) then + call psi_msort_up(nzl,ja(first_elem:last_elem),ixt,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(first_elem:last_elem),& + & ia(first_elem:last_elem),ja(first_elem:last_elem),ixt) + end if - if (nzl > 0) then - call psi_msort_up(nzl,ja(first_elem:),iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) - if (iret == 0) & - & call psb_ip_reord(nzl,val(first_elem:last_elem),& - & ia(first_elem:last_elem),ja(first_elem:last_elem),& - & iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) - end if + act_row = act_row + 1 + first_elem = 0 + last_elem = -1 + else + if (first_elem == 0) then + first_elem = j + end if - act_row = act_row + 1 - first_elem = 0 - last_elem = -1 - else - if (first_elem == 0) then - first_elem = j + last_elem = j end if - - last_elem = j - end if - end do + end do + end block !$OMP END PARALLEL #else i = 1 diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index e523d83d..e714ef5e 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -4174,7 +4174,6 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #if defined(OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads - integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) #endif info = psb_success_ @@ -4301,7 +4300,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf use_buffers = .false. end if - if (use_buffers) then + !if (use_buffers) then #if defined(OPENMP) !$omp workshare iaux(:) = 0 @@ -4322,7 +4321,14 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO - maxnr = maxval(iaux(1:nr)) + maxnr = 0 + !$OMP PARALLEL DO default(none) schedule(STATIC) & + !$OMP private(i) shared(nr,iaux)& + !$OMP reduction(max:maxnr) + do i=1,nr + maxnr = max(maxnr,iaux(i)) + end do + !$OMP END PARALLEL DO #else iaux(:) = 0 !srt_inp = .true. @@ -4338,8 +4344,12 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) end do + maxnr = 0 + do i=1,nr + maxnr = max(maxnr,iaux(i)) + end do #endif - end if + !end if ! Check again use_buffers. We enter here if nzin >= nr and ! all the indices are valid @@ -4417,7 +4427,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! the number of unique values in 'kaux' block integer(psb_ipk_), allocatable :: ixt(:) - allocate(ixt(maxnr+2)) + allocate(ixt(2*maxnr+2)) !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) do j=1,nr first_elem = iaux(j) @@ -4721,7 +4731,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf & call psb_ip_reord(nzin,val,ia,ja,iaux) #if defined(OPENMP) !$OMP PARALLEL default(none) & - !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnr) & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & !$OMP work,first_elem,last_elem) @@ -4743,38 +4753,41 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf idxend = idxstart + work - 1 - ! --------------------------------------------------- + block + integer(psb_ipk_), allocatable :: ixt(:) + allocate(ixt(2*maxnr+2)) + ! --------------------------------------------------- + + first_elem = 0 + last_elem = -1 + act_row = idxstart + do j=1,nzin + if (ia(j) < act_row) then + cycle + else if ((ia(j) > idxend) .or. (work < 1)) then + exit + else if (ia(j) > act_row) then + nzl = last_elem - first_elem + 1 - first_elem = 0 - last_elem = -1 - act_row = idxstart - do j=1,nzin - if (ia(j) < act_row) then - cycle - else if ((ia(j) > idxend) .or. (work < 1)) then - exit - else if (ia(j) > act_row) then - nzl = last_elem - first_elem + 1 + if (nzl > 0) then + call psi_msort_up(nzl,ja(first_elem:last_elem),ixt,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(first_elem:last_elem),& + & ia(first_elem:last_elem),ja(first_elem:last_elem),ixt) + end if - if (nzl > 0) then - call psi_msort_up(nzl,ja(first_elem:),iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) - if (iret == 0) & - & call psb_ip_reord(nzl,val(first_elem:last_elem),& - & ia(first_elem:last_elem),ja(first_elem:last_elem),& - & iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) - end if + act_row = act_row + 1 + first_elem = 0 + last_elem = -1 + else + if (first_elem == 0) then + first_elem = j + end if - act_row = act_row + 1 - first_elem = 0 - last_elem = -1 - else - if (first_elem == 0) then - first_elem = j + last_elem = j end if - - last_elem = j - end if - end do + end do + end block !$OMP END PARALLEL #else i = 1 diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index b46731fc..029b9dbb 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -4174,7 +4174,6 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #if defined(OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads - integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) #endif info = psb_success_ @@ -4301,7 +4300,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf use_buffers = .false. end if - if (use_buffers) then + !if (use_buffers) then #if defined(OPENMP) !$omp workshare iaux(:) = 0 @@ -4322,7 +4321,14 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO - maxnr = maxval(iaux(1:nr)) + maxnr = 0 + !$OMP PARALLEL DO default(none) schedule(STATIC) & + !$OMP private(i) shared(nr,iaux)& + !$OMP reduction(max:maxnr) + do i=1,nr + maxnr = max(maxnr,iaux(i)) + end do + !$OMP END PARALLEL DO #else iaux(:) = 0 !srt_inp = .true. @@ -4338,8 +4344,12 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) end do + maxnr = 0 + do i=1,nr + maxnr = max(maxnr,iaux(i)) + end do #endif - end if + !end if ! Check again use_buffers. We enter here if nzin >= nr and ! all the indices are valid @@ -4417,7 +4427,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! the number of unique values in 'kaux' block integer(psb_ipk_), allocatable :: ixt(:) - allocate(ixt(maxnr+2)) + allocate(ixt(2*maxnr+2)) !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) do j=1,nr first_elem = iaux(j) @@ -4721,7 +4731,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf & call psb_ip_reord(nzin,val,ia,ja,iaux) #if defined(OPENMP) !$OMP PARALLEL default(none) & - !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnr) & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & !$OMP work,first_elem,last_elem) @@ -4743,38 +4753,41 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf idxend = idxstart + work - 1 - ! --------------------------------------------------- + block + integer(psb_ipk_), allocatable :: ixt(:) + allocate(ixt(2*maxnr+2)) + ! --------------------------------------------------- + + first_elem = 0 + last_elem = -1 + act_row = idxstart + do j=1,nzin + if (ia(j) < act_row) then + cycle + else if ((ia(j) > idxend) .or. (work < 1)) then + exit + else if (ia(j) > act_row) then + nzl = last_elem - first_elem + 1 - first_elem = 0 - last_elem = -1 - act_row = idxstart - do j=1,nzin - if (ia(j) < act_row) then - cycle - else if ((ia(j) > idxend) .or. (work < 1)) then - exit - else if (ia(j) > act_row) then - nzl = last_elem - first_elem + 1 + if (nzl > 0) then + call psi_msort_up(nzl,ja(first_elem:last_elem),ixt,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(first_elem:last_elem),& + & ia(first_elem:last_elem),ja(first_elem:last_elem),ixt) + end if - if (nzl > 0) then - call psi_msort_up(nzl,ja(first_elem:),iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) - if (iret == 0) & - & call psb_ip_reord(nzl,val(first_elem:last_elem),& - & ia(first_elem:last_elem),ja(first_elem:last_elem),& - & iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) - end if + act_row = act_row + 1 + first_elem = 0 + last_elem = -1 + else + if (first_elem == 0) then + first_elem = j + end if - act_row = act_row + 1 - first_elem = 0 - last_elem = -1 - else - if (first_elem == 0) then - first_elem = j + last_elem = j end if - - last_elem = j - end if - end do + end do + end block !$OMP END PARALLEL #else i = 1 diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index a7e527e5..32dd80b9 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -4174,7 +4174,6 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #if defined(OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads - integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) #endif info = psb_success_ @@ -4301,7 +4300,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf use_buffers = .false. end if - if (use_buffers) then + !if (use_buffers) then #if defined(OPENMP) !$omp workshare iaux(:) = 0 @@ -4322,7 +4321,14 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO - maxnr = maxval(iaux(1:nr)) + maxnr = 0 + !$OMP PARALLEL DO default(none) schedule(STATIC) & + !$OMP private(i) shared(nr,iaux)& + !$OMP reduction(max:maxnr) + do i=1,nr + maxnr = max(maxnr,iaux(i)) + end do + !$OMP END PARALLEL DO #else iaux(:) = 0 !srt_inp = .true. @@ -4338,8 +4344,12 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) end do + maxnr = 0 + do i=1,nr + maxnr = max(maxnr,iaux(i)) + end do #endif - end if + !end if ! Check again use_buffers. We enter here if nzin >= nr and ! all the indices are valid @@ -4417,7 +4427,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! the number of unique values in 'kaux' block integer(psb_ipk_), allocatable :: ixt(:) - allocate(ixt(maxnr+2)) + allocate(ixt(2*maxnr+2)) !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) do j=1,nr first_elem = iaux(j) @@ -4721,7 +4731,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf & call psb_ip_reord(nzin,val,ia,ja,iaux) #if defined(OPENMP) !$OMP PARALLEL default(none) & - !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnr) & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & !$OMP work,first_elem,last_elem) @@ -4743,38 +4753,41 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf idxend = idxstart + work - 1 - ! --------------------------------------------------- + block + integer(psb_ipk_), allocatable :: ixt(:) + allocate(ixt(2*maxnr+2)) + ! --------------------------------------------------- + + first_elem = 0 + last_elem = -1 + act_row = idxstart + do j=1,nzin + if (ia(j) < act_row) then + cycle + else if ((ia(j) > idxend) .or. (work < 1)) then + exit + else if (ia(j) > act_row) then + nzl = last_elem - first_elem + 1 - first_elem = 0 - last_elem = -1 - act_row = idxstart - do j=1,nzin - if (ia(j) < act_row) then - cycle - else if ((ia(j) > idxend) .or. (work < 1)) then - exit - else if (ia(j) > act_row) then - nzl = last_elem - first_elem + 1 + if (nzl > 0) then + call psi_msort_up(nzl,ja(first_elem:last_elem),ixt,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(first_elem:last_elem),& + & ia(first_elem:last_elem),ja(first_elem:last_elem),ixt) + end if - if (nzl > 0) then - call psi_msort_up(nzl,ja(first_elem:),iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) - if (iret == 0) & - & call psb_ip_reord(nzl,val(first_elem:last_elem),& - & ia(first_elem:last_elem),ja(first_elem:last_elem),& - & iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) - end if + act_row = act_row + 1 + first_elem = 0 + last_elem = -1 + else + if (first_elem == 0) then + first_elem = j + end if - act_row = act_row + 1 - first_elem = 0 - last_elem = -1 - else - if (first_elem == 0) then - first_elem = j + last_elem = j end if - - last_elem = j - end if - end do + end do + end block !$OMP END PARALLEL #else i = 1 From c8cc2275d07182637c75309cd5c42b4a8595e0c4 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 15 May 2024 16:39:22 +0200 Subject: [PATCH 097/116] Fix cuda/makefile for make -j --- cuda/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda/Makefile b/cuda/Makefile index 33787f4e..7e428629 100755 --- a/cuda/Makefile +++ b/cuda/Makefile @@ -64,7 +64,7 @@ spgpulib: $(MAKE) -C spgpu lib -diagdev_mod.o dnsdev_mod.o hlldev_mod.o elldev_mod.o psb_base_vectordev_mod.o: core_mod.o +hdiagdev_mod.o diagdev_mod.o dnsdev_mod.o hlldev_mod.o elldev_mod.o psb_base_vectordev_mod.o: core_mod.o psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o: psb_i_cuda_vect_mod.o psb_i_cuda_vect_mod.o : psb_vectordev_mod.o psb_cuda_env_mod.o cusparse_mod.o: s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o From 35096a2ef9da6a698af6c7b479f3fb26498da518 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 16 May 2024 10:05:11 +0200 Subject: [PATCH 098/116] Cosmetic changes to coo_impl --- base/serial/impl/psb_c_coo_impl.F90 | 24 ++++++++++++------------ base/serial/impl/psb_d_coo_impl.F90 | 24 ++++++++++++------------ base/serial/impl/psb_s_coo_impl.F90 | 24 ++++++++++++------------ base/serial/impl/psb_z_coo_impl.F90 | 24 ++++++++++++------------ 4 files changed, 48 insertions(+), 48 deletions(-) diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index 1b015ab1..60155771 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -4267,7 +4267,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !locals integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) complex(psb_spk_), allocatable :: vs(:) - integer(psb_ipk_) :: nza, nzl,iret, maxnr + integer(psb_ipk_) :: nza, nzl,iret, maxnzr integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name = 'psb_fixcoo' @@ -4305,9 +4305,9 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !$omp workshare iaux(:) = 0 !$omp end workshare - maxnr = 0 + maxnzr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nzin,ia,nr,iaux,maxnr) & + !$OMP shared(nzin,ia,nr,iaux,maxnzr) & !$OMP private(i) & !$OMP reduction(.and.:use_buffers) do i=1,nzin @@ -4321,12 +4321,12 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO - maxnr = 0 + maxnzr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & !$OMP private(i) shared(nr,iaux)& - !$OMP reduction(max:maxnr) + !$OMP reduction(max:maxnzr) do i=1,nr - maxnr = max(maxnr,iaux(i)) + maxnzr = max(maxnzr,iaux(i)) end do !$OMP END PARALLEL DO #else @@ -4344,9 +4344,9 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) end do - maxnr = 0 + maxnzr = 0 do i=1,nr - maxnr = max(maxnr,iaux(i)) + maxnzr = max(maxnzr,iaux(i)) end do #endif !end if @@ -4371,7 +4371,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! index for each row. We do the same on 'kaux' !$OMP PARALLEL default(none) & - !$OMP shared(maxnr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP shared(maxnzr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) @@ -4427,7 +4427,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! the number of unique values in 'kaux' block integer(psb_ipk_), allocatable :: ixt(:) - allocate(ixt(2*maxnr+2)) + allocate(ixt(2*maxnzr+2)) !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) do j=1,nr first_elem = iaux(j) @@ -4731,7 +4731,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf & call psb_ip_reord(nzin,val,ia,ja,iaux) #if defined(OPENMP) !$OMP PARALLEL default(none) & - !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnr) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnzr) & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & !$OMP work,first_elem,last_elem) @@ -4755,7 +4755,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf block integer(psb_ipk_), allocatable :: ixt(:) - allocate(ixt(2*maxnr+2)) + allocate(ixt(2*maxnzr+2)) ! --------------------------------------------------- first_elem = 0 diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index e714ef5e..d92b0dc5 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -4267,7 +4267,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !locals integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) real(psb_dpk_), allocatable :: vs(:) - integer(psb_ipk_) :: nza, nzl,iret, maxnr + integer(psb_ipk_) :: nza, nzl,iret, maxnzr integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name = 'psb_fixcoo' @@ -4305,9 +4305,9 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !$omp workshare iaux(:) = 0 !$omp end workshare - maxnr = 0 + maxnzr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nzin,ia,nr,iaux,maxnr) & + !$OMP shared(nzin,ia,nr,iaux,maxnzr) & !$OMP private(i) & !$OMP reduction(.and.:use_buffers) do i=1,nzin @@ -4321,12 +4321,12 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO - maxnr = 0 + maxnzr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & !$OMP private(i) shared(nr,iaux)& - !$OMP reduction(max:maxnr) + !$OMP reduction(max:maxnzr) do i=1,nr - maxnr = max(maxnr,iaux(i)) + maxnzr = max(maxnzr,iaux(i)) end do !$OMP END PARALLEL DO #else @@ -4344,9 +4344,9 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) end do - maxnr = 0 + maxnzr = 0 do i=1,nr - maxnr = max(maxnr,iaux(i)) + maxnzr = max(maxnzr,iaux(i)) end do #endif !end if @@ -4371,7 +4371,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! index for each row. We do the same on 'kaux' !$OMP PARALLEL default(none) & - !$OMP shared(maxnr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP shared(maxnzr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) @@ -4427,7 +4427,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! the number of unique values in 'kaux' block integer(psb_ipk_), allocatable :: ixt(:) - allocate(ixt(2*maxnr+2)) + allocate(ixt(2*maxnzr+2)) !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) do j=1,nr first_elem = iaux(j) @@ -4731,7 +4731,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf & call psb_ip_reord(nzin,val,ia,ja,iaux) #if defined(OPENMP) !$OMP PARALLEL default(none) & - !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnr) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnzr) & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & !$OMP work,first_elem,last_elem) @@ -4755,7 +4755,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf block integer(psb_ipk_), allocatable :: ixt(:) - allocate(ixt(2*maxnr+2)) + allocate(ixt(2*maxnzr+2)) ! --------------------------------------------------- first_elem = 0 diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index 029b9dbb..01d82399 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -4267,7 +4267,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !locals integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) real(psb_spk_), allocatable :: vs(:) - integer(psb_ipk_) :: nza, nzl,iret, maxnr + integer(psb_ipk_) :: nza, nzl,iret, maxnzr integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name = 'psb_fixcoo' @@ -4305,9 +4305,9 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !$omp workshare iaux(:) = 0 !$omp end workshare - maxnr = 0 + maxnzr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nzin,ia,nr,iaux,maxnr) & + !$OMP shared(nzin,ia,nr,iaux,maxnzr) & !$OMP private(i) & !$OMP reduction(.and.:use_buffers) do i=1,nzin @@ -4321,12 +4321,12 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO - maxnr = 0 + maxnzr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & !$OMP private(i) shared(nr,iaux)& - !$OMP reduction(max:maxnr) + !$OMP reduction(max:maxnzr) do i=1,nr - maxnr = max(maxnr,iaux(i)) + maxnzr = max(maxnzr,iaux(i)) end do !$OMP END PARALLEL DO #else @@ -4344,9 +4344,9 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) end do - maxnr = 0 + maxnzr = 0 do i=1,nr - maxnr = max(maxnr,iaux(i)) + maxnzr = max(maxnzr,iaux(i)) end do #endif !end if @@ -4371,7 +4371,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! index for each row. We do the same on 'kaux' !$OMP PARALLEL default(none) & - !$OMP shared(maxnr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP shared(maxnzr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) @@ -4427,7 +4427,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! the number of unique values in 'kaux' block integer(psb_ipk_), allocatable :: ixt(:) - allocate(ixt(2*maxnr+2)) + allocate(ixt(2*maxnzr+2)) !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) do j=1,nr first_elem = iaux(j) @@ -4731,7 +4731,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf & call psb_ip_reord(nzin,val,ia,ja,iaux) #if defined(OPENMP) !$OMP PARALLEL default(none) & - !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnr) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnzr) & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & !$OMP work,first_elem,last_elem) @@ -4755,7 +4755,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf block integer(psb_ipk_), allocatable :: ixt(:) - allocate(ixt(2*maxnr+2)) + allocate(ixt(2*maxnzr+2)) ! --------------------------------------------------- first_elem = 0 diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index 32dd80b9..c17ce628 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -4267,7 +4267,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !locals integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) complex(psb_dpk_), allocatable :: vs(:) - integer(psb_ipk_) :: nza, nzl,iret, maxnr + integer(psb_ipk_) :: nza, nzl,iret, maxnzr integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name = 'psb_fixcoo' @@ -4305,9 +4305,9 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !$omp workshare iaux(:) = 0 !$omp end workshare - maxnr = 0 + maxnzr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nzin,ia,nr,iaux,maxnr) & + !$OMP shared(nzin,ia,nr,iaux,maxnzr) & !$OMP private(i) & !$OMP reduction(.and.:use_buffers) do i=1,nzin @@ -4321,12 +4321,12 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO - maxnr = 0 + maxnzr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & !$OMP private(i) shared(nr,iaux)& - !$OMP reduction(max:maxnr) + !$OMP reduction(max:maxnzr) do i=1,nr - maxnr = max(maxnr,iaux(i)) + maxnzr = max(maxnzr,iaux(i)) end do !$OMP END PARALLEL DO #else @@ -4344,9 +4344,9 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) end do - maxnr = 0 + maxnzr = 0 do i=1,nr - maxnr = max(maxnr,iaux(i)) + maxnzr = max(maxnzr,iaux(i)) end do #endif !end if @@ -4371,7 +4371,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! index for each row. We do the same on 'kaux' !$OMP PARALLEL default(none) & - !$OMP shared(maxnr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP shared(maxnzr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) @@ -4427,7 +4427,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf ! the number of unique values in 'kaux' block integer(psb_ipk_), allocatable :: ixt(:) - allocate(ixt(2*maxnr+2)) + allocate(ixt(2*maxnzr+2)) !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) do j=1,nr first_elem = iaux(j) @@ -4731,7 +4731,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf & call psb_ip_reord(nzin,val,ia,ja,iaux) #if defined(OPENMP) !$OMP PARALLEL default(none) & - !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnr) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnzr) & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & !$OMP work,first_elem,last_elem) @@ -4755,7 +4755,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf block integer(psb_ipk_), allocatable :: ixt(:) - allocate(ixt(2*maxnr+2)) + allocate(ixt(2*maxnzr+2)) ! --------------------------------------------------- first_elem = 0 From e19284eb6cb36d599ea3a9c73aafbd6f16ecfb98 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 16 May 2024 10:10:08 +0200 Subject: [PATCH 099/116] Small omp addition --- base/serial/impl/psb_c_coo_impl.F90 | 3 ++- base/serial/impl/psb_d_coo_impl.F90 | 3 ++- base/serial/impl/psb_s_coo_impl.F90 | 3 ++- base/serial/impl/psb_z_coo_impl.F90 | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index 60155771..d6117546 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -165,7 +165,8 @@ subroutine psb_c_coo_scals(d,a,info) if (a%is_unit()) then call a%make_nonunit() end if - + + !$omp parallel do private(i) do i=1,a%get_nzeros() a%val(i) = a%val(i) * d enddo diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index d92b0dc5..4cb4c3ec 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -165,7 +165,8 @@ subroutine psb_d_coo_scals(d,a,info) if (a%is_unit()) then call a%make_nonunit() end if - + + !$omp parallel do private(i) do i=1,a%get_nzeros() a%val(i) = a%val(i) * d enddo diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index 01d82399..f706db33 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -165,7 +165,8 @@ subroutine psb_s_coo_scals(d,a,info) if (a%is_unit()) then call a%make_nonunit() end if - + + !$omp parallel do private(i) do i=1,a%get_nzeros() a%val(i) = a%val(i) * d enddo diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index c17ce628..c368ce91 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -165,7 +165,8 @@ subroutine psb_z_coo_scals(d,a,info) if (a%is_unit()) then call a%make_nonunit() end if - + + !$omp parallel do private(i) do i=1,a%get_nzeros() a%val(i) = a%val(i) * d enddo From 12a4c21fedff07f50328f2ed856e0187522469ab Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 20 May 2024 11:47:32 +0200 Subject: [PATCH 100/116] Fixes for OpenMP compilation in map_mod --- base/modules/desc/psb_gen_block_map_mod.F90 | 4 ++-- base/modules/desc/psb_repl_map_mod.F90 | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/base/modules/desc/psb_gen_block_map_mod.F90 b/base/modules/desc/psb_gen_block_map_mod.F90 index 650bb430..82a4cc15 100644 --- a/base/modules/desc/psb_gen_block_map_mod.F90 +++ b/base/modules/desc/psb_gen_block_map_mod.F90 @@ -282,7 +282,7 @@ contains if (present(mask)) then !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,idxin,idxout,idxmap,owned_,info) & + !$omp shared(mask,idxin,idxout,idxmap,owned_,info,im) & !$omp private(i) do i=1, im if (mask(i)) then @@ -300,7 +300,7 @@ contains !$omp end parallel do else if (.not.present(mask)) then !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(idxin,idxout,idxmap,owned_,info) & + !$omp shared(idxin,idxout,idxmap,owned_,info,im) & !$omp private(i) do i=1, im if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then diff --git a/base/modules/desc/psb_repl_map_mod.F90 b/base/modules/desc/psb_repl_map_mod.F90 index 738d6de2..f68ae3b8 100644 --- a/base/modules/desc/psb_repl_map_mod.F90 +++ b/base/modules/desc/psb_repl_map_mod.F90 @@ -228,7 +228,7 @@ contains if (present(mask)) then !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,idxin,idxout,idxmap) & + !$omp shared(mask,idxin,idxout,idxmap,im) & !$omp private(i) do i=1, im if (mask(i)) then @@ -242,7 +242,7 @@ contains !$omp end parallel do else if (.not.present(mask)) then !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(idxin,idxout,idxmap) & + !$omp shared(idxin,idxout,idxmap,im) & !$omp private(i) do i=1, im if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then @@ -333,7 +333,7 @@ contains if (idxmap%is_asb()) then !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,idx,idxmap) & + !$omp shared(mask,idx,idxmap,is) & !$omp private(i) do i=1, is if (mask(i)) then @@ -347,7 +347,7 @@ contains !$omp end parallel do else if (idxmap%is_valid()) then !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,idx,idxmap) & + !$omp shared(mask,idx,idxmap,is) & !$omp private(i) do i=1,is if (mask(i)) then @@ -369,7 +369,7 @@ contains if (idxmap%is_asb()) then !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(idx,idxmap) & + !$omp shared(idx,idxmap,is) & !$omp private(i) do i=1, is if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -381,7 +381,7 @@ contains !$omp end parallel do else if (idxmap%is_valid()) then !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(idx,idxmap) & + !$omp shared(idx,idxmap,is) & !$omp private(i) do i=1,is if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -434,7 +434,7 @@ contains if (idxmap%is_asb()) then !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,idxin,idxout,idxmap) & + !$omp shared(mask,idxin,idxout,idxmap,im) & !$omp private(i) do i=1, im if (mask(i)) then @@ -448,7 +448,7 @@ contains !$omp end parallel do else if (idxmap%is_valid()) then !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(mask,idxin,idxout,idxmap) & + !$omp shared(mask,idxin,idxout,idxmap,im) & !$omp private(i) do i=1,im if (mask(i)) then @@ -469,7 +469,7 @@ contains if (idxmap%is_asb()) then !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(idxin,idxout,idxmap) & + !$omp shared(idxin,idxout,idxmap,im) & !$omp private(i) do i=1, im if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -481,7 +481,7 @@ contains !$omp end parallel do else if (idxmap%is_valid()) then !$omp parallel do default(none) schedule(dynamic) & - !$omp shared(idxin,idxout,idxmap) & + !$omp shared(idxin,idxout,idxmap,im) & !$omp private(i) do i=1,im if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then From ee66db5efde31451f7673bf05bbb40fe482c9055 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 21 May 2024 11:25:38 +0200 Subject: [PATCH 101/116] Refactor interface to cusparse in preparation for CSR Adaptive --- cuda/ccusparse.c | 59 +-------------------------- cuda/ccusparse.h | 100 ++++++++++++++++++++++++++++++++++++++++++++++ cuda/dcusparse.c | 59 +-------------------------- cuda/dcusparse.h | 101 ++++++++++++++++++++++++++++++++++++++++++++++ cuda/scusparse.c | 58 +-------------------------- cuda/scusparse.h | 102 +++++++++++++++++++++++++++++++++++++++++++++++ cuda/zcusparse.c | 59 +-------------------------- cuda/zcusparse.h | 101 ++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 408 insertions(+), 231 deletions(-) create mode 100644 cuda/ccusparse.h create mode 100644 cuda/dcusparse.h create mode 100644 cuda/scusparse.h create mode 100644 cuda/zcusparse.h diff --git a/cuda/ccusparse.c b/cuda/ccusparse.c index bab6ede0..3a6556ce 100644 --- a/cuda/ccusparse.c +++ b/cuda/ccusparse.c @@ -38,62 +38,5 @@ #include "cintrf.h" #include "fcusparse.h" - -/* Double precision real */ -#define TYPE float complex -#define CUSPARSE_BASE_TYPE CUDA_C_32F -#define T_CSRGDeviceMat c_CSRGDeviceMat -#define T_Cmat c_Cmat -#define T_spmvCSRGDevice c_spmvCSRGDevice -#define T_spsvCSRGDevice c_spsvCSRGDevice -#define T_CSRGDeviceAlloc c_CSRGDeviceAlloc -#define T_CSRGDeviceFree c_CSRGDeviceFree -#define T_CSRGHost2Device c_CSRGHost2Device -#define T_CSRGDevice2Host c_CSRGDevice2Host -#define T_CSRGDeviceSetMatFillMode c_CSRGDeviceSetMatFillMode -#define T_CSRGDeviceSetMatDiagType c_CSRGDeviceSetMatDiagType -#define T_CSRGDeviceGetParms c_CSRGDeviceGetParms - -#if CUDA_SHORT_VERSION <= 10 -#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType -#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase -#define T_CSRGDeviceCsrsmAnalysis c_CSRGDeviceCsrsmAnalysis -#define cusparseTcsrmv cusparseCcsrmv -#define cusparseTcsrsv_solve cusparseCcsrsv_solve -#define cusparseTcsrsv_analysis cusparseCcsrsv_analysis -#define T_HYBGDeviceMat c_HYBGDeviceMat -#define T_Hmat c_Hmat -#define T_HYBGDeviceFree c_HYBGDeviceFree -#define T_spmvHYBGDevice c_spmvHYBGDevice -#define T_HYBGDeviceAlloc c_HYBGDeviceAlloc -#define T_HYBGDeviceSetMatDiagType c_HYBGDeviceSetMatDiagType -#define T_HYBGDeviceSetMatIndexBase c_HYBGDeviceSetMatIndexBase -#define T_HYBGDeviceSetMatType c_HYBGDeviceSetMatType -#define T_HYBGDeviceSetMatFillMode c_HYBGDeviceSetMatFillMode -#define T_HYBGDeviceHybsmAnalysis c_HYBGDeviceHybsmAnalysis -#define T_spsvHYBGDevice c_spsvHYBGDevice -#define T_HYBGHost2Device c_HYBGHost2Device -#define cusparseThybmv cusparseChybmv -#define cusparseThybsv_solve cusparseChybsv_solve -#define cusparseThybsv_analysis cusparseChybsv_analysis -#define cusparseTcsr2hyb cusparseCcsr2hyb - -#elif CUDA_VERSION < 11030 - -#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType -#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase -#define T_CSRGDeviceCsrsv2Analysis c_CSRGDeviceCsrsv2Analysis -#define cusparseTcsrsv2_bufferSize cusparseCcsrsv2_bufferSize -#define cusparseTcsrsv2_analysis cusparseCcsrsv2_analysis -#define cusparseTcsrsv2_solve cusparseCcsrsv2_solve -#else - -#define T_CSRGIsNullSvBuffer c_CSRGIsNullSvBuffer -#define T_CSRGIsNullSvDescr c_CSRGIsNullSvDescr -#define T_CSRGIsNullMvDescr c_CSRGIsNullMvDescr -#define T_CSRGCreateSpMVDescr c_CSRGCreateSpMVDescr - -#endif - +#include "ccusparse.h" #include "fcusparse_fct.h" - diff --git a/cuda/ccusparse.h b/cuda/ccusparse.h new file mode 100644 index 00000000..f101c73d --- /dev/null +++ b/cuda/ccusparse.h @@ -0,0 +1,100 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#ifndef CCUSPARSE_ +#define CCUSPARSE_ + + +#include +#include + +#include +#include +#include "cintrf.h" +#include "fcusparse.h" + + +/* Double precision real */ +#define TYPE float complex +#define CUSPARSE_BASE_TYPE CUDA_C_32F +#define T_CSRGDeviceMat c_CSRGDeviceMat +#define T_Cmat c_Cmat +#define T_spmvCSRGDevice c_spmvCSRGDevice +#define T_spsvCSRGDevice c_spsvCSRGDevice +#define T_CSRGDeviceAlloc c_CSRGDeviceAlloc +#define T_CSRGDeviceFree c_CSRGDeviceFree +#define T_CSRGHost2Device c_CSRGHost2Device +#define T_CSRGDevice2Host c_CSRGDevice2Host +#define T_CSRGDeviceSetMatFillMode c_CSRGDeviceSetMatFillMode +#define T_CSRGDeviceSetMatDiagType c_CSRGDeviceSetMatDiagType +#define T_CSRGDeviceGetParms c_CSRGDeviceGetParms + +#if CUDA_SHORT_VERSION <= 10 +#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsmAnalysis c_CSRGDeviceCsrsmAnalysis +#define cusparseTcsrmv cusparseCcsrmv +#define cusparseTcsrsv_solve cusparseCcsrsv_solve +#define cusparseTcsrsv_analysis cusparseCcsrsv_analysis +#define T_HYBGDeviceMat c_HYBGDeviceMat +#define T_Hmat c_Hmat +#define T_HYBGDeviceFree c_HYBGDeviceFree +#define T_spmvHYBGDevice c_spmvHYBGDevice +#define T_HYBGDeviceAlloc c_HYBGDeviceAlloc +#define T_HYBGDeviceSetMatDiagType c_HYBGDeviceSetMatDiagType +#define T_HYBGDeviceSetMatIndexBase c_HYBGDeviceSetMatIndexBase +#define T_HYBGDeviceSetMatType c_HYBGDeviceSetMatType +#define T_HYBGDeviceSetMatFillMode c_HYBGDeviceSetMatFillMode +#define T_HYBGDeviceHybsmAnalysis c_HYBGDeviceHybsmAnalysis +#define T_spsvHYBGDevice c_spsvHYBGDevice +#define T_HYBGHost2Device c_HYBGHost2Device +#define cusparseThybmv cusparseChybmv +#define cusparseThybsv_solve cusparseChybsv_solve +#define cusparseThybsv_analysis cusparseChybsv_analysis +#define cusparseTcsr2hyb cusparseCcsr2hyb + +#elif CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis c_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseCcsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseCcsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseCcsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer c_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr c_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr c_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr c_CSRGCreateSpMVDescr + +#endif + +#endif diff --git a/cuda/dcusparse.c b/cuda/dcusparse.c index 657ca5be..6bb726c9 100644 --- a/cuda/dcusparse.c +++ b/cuda/dcusparse.c @@ -38,62 +38,5 @@ #include "cintrf.h" #include "fcusparse.h" - -/* Double precision real */ -#define TYPE double -#define CUSPARSE_BASE_TYPE CUDA_R_64F -#define T_CSRGDeviceMat d_CSRGDeviceMat -#define T_Cmat d_Cmat -#define T_spmvCSRGDevice d_spmvCSRGDevice -#define T_spsvCSRGDevice d_spsvCSRGDevice -#define T_CSRGDeviceAlloc d_CSRGDeviceAlloc -#define T_CSRGDeviceFree d_CSRGDeviceFree -#define T_CSRGHost2Device d_CSRGHost2Device -#define T_CSRGDevice2Host d_CSRGDevice2Host -#define T_CSRGDeviceSetMatFillMode d_CSRGDeviceSetMatFillMode -#define T_CSRGDeviceSetMatDiagType d_CSRGDeviceSetMatDiagType -#define T_CSRGDeviceGetParms d_CSRGDeviceGetParms - -#if CUDA_SHORT_VERSION <= 10 -#define T_CSRGDeviceSetMatType d_CSRGDeviceSetMatType -#define T_CSRGDeviceSetMatIndexBase d_CSRGDeviceSetMatIndexBase -#define T_CSRGDeviceCsrsmAnalysis d_CSRGDeviceCsrsmAnalysis -#define cusparseTcsrmv cusparseDcsrmv -#define cusparseTcsrsv_solve cusparseDcsrsv_solve -#define cusparseTcsrsv_analysis cusparseDcsrsv_analysis -#define T_HYBGDeviceMat d_HYBGDeviceMat -#define T_Hmat d_Hmat -#define T_HYBGDeviceFree d_HYBGDeviceFree -#define T_spmvHYBGDevice d_spmvHYBGDevice -#define T_HYBGDeviceAlloc d_HYBGDeviceAlloc -#define T_HYBGDeviceSetMatDiagType d_HYBGDeviceSetMatDiagType -#define T_HYBGDeviceSetMatIndexBase d_HYBGDeviceSetMatIndexBase -#define T_HYBGDeviceSetMatType d_HYBGDeviceSetMatType -#define T_HYBGDeviceSetMatFillMode d_HYBGDeviceSetMatFillMode -#define T_HYBGDeviceHybsmAnalysis d_HYBGDeviceHybsmAnalysis -#define T_spsvHYBGDevice d_spsvHYBGDevice -#define T_HYBGHost2Device d_HYBGHost2Device -#define cusparseThybmv cusparseDhybmv -#define cusparseThybsv_solve cusparseDhybsv_solve -#define cusparseThybsv_analysis cusparseDhybsv_analysis -#define cusparseTcsr2hyb cusparseDcsr2hyb - -#elif CUDA_VERSION < 11030 - -#define T_CSRGDeviceSetMatType d_CSRGDeviceSetMatType -#define T_CSRGDeviceSetMatIndexBase d_CSRGDeviceSetMatIndexBase -#define T_CSRGDeviceCsrsv2Analysis d_CSRGDeviceCsrsv2Analysis -#define cusparseTcsrsv2_bufferSize cusparseDcsrsv2_bufferSize -#define cusparseTcsrsv2_analysis cusparseDcsrsv2_analysis -#define cusparseTcsrsv2_solve cusparseDcsrsv2_solve -#else - -#define T_CSRGIsNullSvBuffer d_CSRGIsNullSvBuffer -#define T_CSRGIsNullSvDescr d_CSRGIsNullSvDescr -#define T_CSRGIsNullMvDescr d_CSRGIsNullMvDescr -#define T_CSRGCreateSpMVDescr d_CSRGCreateSpMVDescr - -#endif - +#include "dcusparse.h" #include "fcusparse_fct.h" - diff --git a/cuda/dcusparse.h b/cuda/dcusparse.h new file mode 100644 index 00000000..d7875650 --- /dev/null +++ b/cuda/dcusparse.h @@ -0,0 +1,101 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#ifndef DCUSPARSE_ +#define DCUSPARSE_ + + +#include +#include + +#include +#include +#include "cintrf.h" +#include "fcusparse.h" + + +/* Double precision real */ +#define TYPE double +#define CUSPARSE_BASE_TYPE CUDA_R_64F +#define T_CSRGDeviceMat d_CSRGDeviceMat +#define T_Cmat d_Cmat +#define T_spmvCSRGDevice d_spmvCSRGDevice +#define T_spsvCSRGDevice d_spsvCSRGDevice +#define T_CSRGDeviceAlloc d_CSRGDeviceAlloc +#define T_CSRGDeviceFree d_CSRGDeviceFree +#define T_CSRGHost2Device d_CSRGHost2Device +#define T_CSRGDevice2Host d_CSRGDevice2Host +#define T_CSRGDeviceSetMatFillMode d_CSRGDeviceSetMatFillMode +#define T_CSRGDeviceSetMatDiagType d_CSRGDeviceSetMatDiagType +#define T_CSRGDeviceGetParms d_CSRGDeviceGetParms + +#if CUDA_SHORT_VERSION <= 10 +#define T_CSRGDeviceSetMatType d_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase d_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsmAnalysis d_CSRGDeviceCsrsmAnalysis +#define cusparseTcsrmv cusparseDcsrmv +#define cusparseTcsrsv_solve cusparseDcsrsv_solve +#define cusparseTcsrsv_analysis cusparseDcsrsv_analysis +#define T_HYBGDeviceMat d_HYBGDeviceMat +#define T_Hmat d_Hmat +#define T_HYBGDeviceFree d_HYBGDeviceFree +#define T_spmvHYBGDevice d_spmvHYBGDevice +#define T_HYBGDeviceAlloc d_HYBGDeviceAlloc +#define T_HYBGDeviceSetMatDiagType d_HYBGDeviceSetMatDiagType +#define T_HYBGDeviceSetMatIndexBase d_HYBGDeviceSetMatIndexBase +#define T_HYBGDeviceSetMatType d_HYBGDeviceSetMatType +#define T_HYBGDeviceSetMatFillMode d_HYBGDeviceSetMatFillMode +#define T_HYBGDeviceHybsmAnalysis d_HYBGDeviceHybsmAnalysis +#define T_spsvHYBGDevice d_spsvHYBGDevice +#define T_HYBGHost2Device d_HYBGHost2Device +#define cusparseThybmv cusparseDhybmv +#define cusparseThybsv_solve cusparseDhybsv_solve +#define cusparseThybsv_analysis cusparseDhybsv_analysis +#define cusparseTcsr2hyb cusparseDcsr2hyb + +#elif CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType d_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase d_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis d_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseDcsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseDcsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseDcsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer d_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr d_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr d_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr d_CSRGCreateSpMVDescr + +#endif + +#endif + diff --git a/cuda/scusparse.c b/cuda/scusparse.c index d4db9b7c..f54a35fd 100644 --- a/cuda/scusparse.c +++ b/cuda/scusparse.c @@ -38,62 +38,6 @@ #include "cintrf.h" #include "fcusparse.h" - -/* Double precision real */ -#define TYPE float -#define CUSPARSE_BASE_TYPE CUDA_R_32F -#define T_CSRGDeviceMat s_CSRGDeviceMat -#define T_Cmat s_Cmat -#define T_spmvCSRGDevice s_spmvCSRGDevice -#define T_spsvCSRGDevice s_spsvCSRGDevice -#define T_CSRGDeviceAlloc s_CSRGDeviceAlloc -#define T_CSRGDeviceFree s_CSRGDeviceFree -#define T_CSRGHost2Device s_CSRGHost2Device -#define T_CSRGDevice2Host s_CSRGDevice2Host -#define T_CSRGDeviceSetMatFillMode s_CSRGDeviceSetMatFillMode -#define T_CSRGDeviceSetMatDiagType s_CSRGDeviceSetMatDiagType -#define T_CSRGDeviceGetParms s_CSRGDeviceGetParms - -#if CUDA_SHORT_VERSION <= 10 -#define T_CSRGDeviceSetMatType s_CSRGDeviceSetMatType -#define T_CSRGDeviceSetMatIndexBase s_CSRGDeviceSetMatIndexBase -#define T_CSRGDeviceCsrsmAnalysis s_CSRGDeviceCsrsmAnalysis -#define cusparseTcsrmv cusparseScsrmv -#define cusparseTcsrsv_solve cusparseScsrsv_solve -#define cusparseTcsrsv_analysis cusparseScsrsv_analysis -#define T_HYBGDeviceMat s_HYBGDeviceMat -#define T_Hmat s_Hmat -#define T_HYBGDeviceFree s_HYBGDeviceFree -#define T_spmvHYBGDevice s_spmvHYBGDevice -#define T_HYBGDeviceAlloc s_HYBGDeviceAlloc -#define T_HYBGDeviceSetMatDiagType s_HYBGDeviceSetMatDiagType -#define T_HYBGDeviceSetMatIndexBase s_HYBGDeviceSetMatIndexBase -#define T_HYBGDeviceSetMatType s_HYBGDeviceSetMatType -#define T_HYBGDeviceSetMatFillMode s_HYBGDeviceSetMatFillMode -#define T_HYBGDeviceHybsmAnalysis s_HYBGDeviceHybsmAnalysis -#define T_spsvHYBGDevice s_spsvHYBGDevice -#define T_HYBGHost2Device s_HYBGHost2Device -#define cusparseThybmv cusparseShybmv -#define cusparseThybsv_solve cusparseShybsv_solve -#define cusparseThybsv_analysis cusparseShybsv_analysis -#define cusparseTcsr2hyb cusparseScsr2hyb - -#elif CUDA_VERSION < 11030 - -#define T_CSRGDeviceSetMatType s_CSRGDeviceSetMatType -#define T_CSRGDeviceSetMatIndexBase s_CSRGDeviceSetMatIndexBase -#define T_CSRGDeviceCsrsv2Analysis s_CSRGDeviceCsrsv2Analysis -#define cusparseTcsrsv2_bufferSize cusparseScsrsv2_bufferSize -#define cusparseTcsrsv2_analysis cusparseScsrsv2_analysis -#define cusparseTcsrsv2_solve cusparseScsrsv2_solve -#else - -#define T_CSRGIsNullSvBuffer s_CSRGIsNullSvBuffer -#define T_CSRGIsNullSvDescr s_CSRGIsNullSvDescr -#define T_CSRGIsNullMvDescr s_CSRGIsNullMvDescr -#define T_CSRGCreateSpMVDescr s_CSRGCreateSpMVDescr - -#endif - +#include "scusparse.h" #include "fcusparse_fct.h" diff --git a/cuda/scusparse.h b/cuda/scusparse.h new file mode 100644 index 00000000..724bdc2a --- /dev/null +++ b/cuda/scusparse.h @@ -0,0 +1,102 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#ifndef SCUSPARSE_ +#define SCUSPARSE_ + + + +#include +#include + +#include +#include +#include "cintrf.h" +#include "fcusparse.h" + + +/* Double precision real */ +#define TYPE float +#define CUSPARSE_BASE_TYPE CUDA_R_32F +#define T_CSRGDeviceMat s_CSRGDeviceMat +#define T_Cmat s_Cmat +#define T_spmvCSRGDevice s_spmvCSRGDevice +#define T_spsvCSRGDevice s_spsvCSRGDevice +#define T_CSRGDeviceAlloc s_CSRGDeviceAlloc +#define T_CSRGDeviceFree s_CSRGDeviceFree +#define T_CSRGHost2Device s_CSRGHost2Device +#define T_CSRGDevice2Host s_CSRGDevice2Host +#define T_CSRGDeviceSetMatFillMode s_CSRGDeviceSetMatFillMode +#define T_CSRGDeviceSetMatDiagType s_CSRGDeviceSetMatDiagType +#define T_CSRGDeviceGetParms s_CSRGDeviceGetParms + +#if CUDA_SHORT_VERSION <= 10 +#define T_CSRGDeviceSetMatType s_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase s_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsmAnalysis s_CSRGDeviceCsrsmAnalysis +#define cusparseTcsrmv cusparseScsrmv +#define cusparseTcsrsv_solve cusparseScsrsv_solve +#define cusparseTcsrsv_analysis cusparseScsrsv_analysis +#define T_HYBGDeviceMat s_HYBGDeviceMat +#define T_Hmat s_Hmat +#define T_HYBGDeviceFree s_HYBGDeviceFree +#define T_spmvHYBGDevice s_spmvHYBGDevice +#define T_HYBGDeviceAlloc s_HYBGDeviceAlloc +#define T_HYBGDeviceSetMatDiagType s_HYBGDeviceSetMatDiagType +#define T_HYBGDeviceSetMatIndexBase s_HYBGDeviceSetMatIndexBase +#define T_HYBGDeviceSetMatType s_HYBGDeviceSetMatType +#define T_HYBGDeviceSetMatFillMode s_HYBGDeviceSetMatFillMode +#define T_HYBGDeviceHybsmAnalysis s_HYBGDeviceHybsmAnalysis +#define T_spsvHYBGDevice s_spsvHYBGDevice +#define T_HYBGHost2Device s_HYBGHost2Device +#define cusparseThybmv cusparseShybmv +#define cusparseThybsv_solve cusparseShybsv_solve +#define cusparseThybsv_analysis cusparseShybsv_analysis +#define cusparseTcsr2hyb cusparseScsr2hyb + +#elif CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType s_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase s_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis s_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseScsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseScsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseScsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer s_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr s_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr s_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr s_CSRGCreateSpMVDescr + +#endif + +#endif + diff --git a/cuda/zcusparse.c b/cuda/zcusparse.c index a70a6573..2ae0c04e 100644 --- a/cuda/zcusparse.c +++ b/cuda/zcusparse.c @@ -38,62 +38,5 @@ #include "cintrf.h" #include "fcusparse.h" - -/* Double precision real */ -#define TYPE double complex -#define CUSPARSE_BASE_TYPE CUDA_C_64F -#define T_CSRGDeviceMat z_CSRGDeviceMat -#define T_Cmat z_Cmat -#define T_spmvCSRGDevice z_spmvCSRGDevice -#define T_spsvCSRGDevice z_spsvCSRGDevice -#define T_CSRGDeviceAlloc z_CSRGDeviceAlloc -#define T_CSRGDeviceFree z_CSRGDeviceFree -#define T_CSRGHost2Device z_CSRGHost2Device -#define T_CSRGDevice2Host z_CSRGDevice2Host -#define T_CSRGDeviceSetMatFillMode z_CSRGDeviceSetMatFillMode -#define T_CSRGDeviceSetMatDiagType z_CSRGDeviceSetMatDiagType -#define T_CSRGDeviceGetParms z_CSRGDeviceGetParms - -#if CUDA_SHORT_VERSION <= 10 -#define T_CSRGDeviceSetMatType z_CSRGDeviceSetMatType -#define T_CSRGDeviceSetMatIndexBase z_CSRGDeviceSetMatIndexBase -#define T_CSRGDeviceCsrsmAnalysis z_CSRGDeviceCsrsmAnalysis -#define cusparseTcsrmv cusparseZcsrmv -#define cusparseTcsrsv_solve cusparseZcsrsv_solve -#define cusparseTcsrsv_analysis cusparseZcsrsv_analysis -#define T_HYBGDeviceMat z_HYBGDeviceMat -#define T_Hmat z_Hmat -#define T_HYBGDeviceFree z_HYBGDeviceFree -#define T_spmvHYBGDevice z_spmvHYBGDevice -#define T_HYBGDeviceAlloc z_HYBGDeviceAlloc -#define T_HYBGDeviceSetMatDiagType z_HYBGDeviceSetMatDiagType -#define T_HYBGDeviceSetMatIndexBase z_HYBGDeviceSetMatIndexBase -#define T_HYBGDeviceSetMatType z_HYBGDeviceSetMatType -#define T_HYBGDeviceSetMatFillMode z_HYBGDeviceSetMatFillMode -#define T_HYBGDeviceHybsmAnalysis z_HYBGDeviceHybsmAnalysis -#define T_spsvHYBGDevice z_spsvHYBGDevice -#define T_HYBGHost2Device z_HYBGHost2Device -#define cusparseThybmv cusparseZhybmv -#define cusparseThybsv_solve cusparseZhybsv_solve -#define cusparseThybsv_analysis cusparseZhybsv_analysis -#define cusparseTcsr2hyb cusparseZcsr2hyb - -#elif CUDA_VERSION < 11030 - -#define T_CSRGDeviceSetMatType z_CSRGDeviceSetMatType -#define T_CSRGDeviceSetMatIndexBase z_CSRGDeviceSetMatIndexBase -#define T_CSRGDeviceCsrsv2Analysis z_CSRGDeviceCsrsv2Analysis -#define cusparseTcsrsv2_bufferSize cusparseZcsrsv2_bufferSize -#define cusparseTcsrsv2_analysis cusparseZcsrsv2_analysis -#define cusparseTcsrsv2_solve cusparseZcsrsv2_solve -#else - -#define T_CSRGIsNullSvBuffer z_CSRGIsNullSvBuffer -#define T_CSRGIsNullSvDescr z_CSRGIsNullSvDescr -#define T_CSRGIsNullMvDescr z_CSRGIsNullMvDescr -#define T_CSRGCreateSpMVDescr z_CSRGCreateSpMVDescr - -#endif - +#include "zcusparse.h" #include "fcusparse_fct.h" - diff --git a/cuda/zcusparse.h b/cuda/zcusparse.h new file mode 100644 index 00000000..364e8ecc --- /dev/null +++ b/cuda/zcusparse.h @@ -0,0 +1,101 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#ifndef ZCUSPARSE_ +#define ZCUSPARSE_ + + +#include +#include + +#include +#include +#include "cintrf.h" +#include "fcusparse.h" + + +/* Double precision real */ +#define TYPE double complex +#define CUSPARSE_BASE_TYPE CUDA_C_64F +#define T_CSRGDeviceMat z_CSRGDeviceMat +#define T_Cmat z_Cmat +#define T_spmvCSRGDevice z_spmvCSRGDevice +#define T_spsvCSRGDevice z_spsvCSRGDevice +#define T_CSRGDeviceAlloc z_CSRGDeviceAlloc +#define T_CSRGDeviceFree z_CSRGDeviceFree +#define T_CSRGHost2Device z_CSRGHost2Device +#define T_CSRGDevice2Host z_CSRGDevice2Host +#define T_CSRGDeviceSetMatFillMode z_CSRGDeviceSetMatFillMode +#define T_CSRGDeviceSetMatDiagType z_CSRGDeviceSetMatDiagType +#define T_CSRGDeviceGetParms z_CSRGDeviceGetParms + +#if CUDA_SHORT_VERSION <= 10 +#define T_CSRGDeviceSetMatType z_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase z_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsmAnalysis z_CSRGDeviceCsrsmAnalysis +#define cusparseTcsrmv cusparseZcsrmv +#define cusparseTcsrsv_solve cusparseZcsrsv_solve +#define cusparseTcsrsv_analysis cusparseZcsrsv_analysis +#define T_HYBGDeviceMat z_HYBGDeviceMat +#define T_Hmat z_Hmat +#define T_HYBGDeviceFree z_HYBGDeviceFree +#define T_spmvHYBGDevice z_spmvHYBGDevice +#define T_HYBGDeviceAlloc z_HYBGDeviceAlloc +#define T_HYBGDeviceSetMatDiagType z_HYBGDeviceSetMatDiagType +#define T_HYBGDeviceSetMatIndexBase z_HYBGDeviceSetMatIndexBase +#define T_HYBGDeviceSetMatType z_HYBGDeviceSetMatType +#define T_HYBGDeviceSetMatFillMode z_HYBGDeviceSetMatFillMode +#define T_HYBGDeviceHybsmAnalysis z_HYBGDeviceHybsmAnalysis +#define T_spsvHYBGDevice z_spsvHYBGDevice +#define T_HYBGHost2Device z_HYBGHost2Device +#define cusparseThybmv cusparseZhybmv +#define cusparseThybsv_solve cusparseZhybsv_solve +#define cusparseThybsv_analysis cusparseZhybsv_analysis +#define cusparseTcsr2hyb cusparseZcsr2hyb + +#elif CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType z_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase z_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis z_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseZcsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseZcsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseZcsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer z_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr z_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr z_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr z_CSRGCreateSpMVDescr + +#endif + +#endif + From 2e3f862e4292277e3c6ec43b332251fab4096496 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 21 May 2024 12:31:02 +0200 Subject: [PATCH 102/116] Start refactoring cusparse.h --- cuda/ccusparse.h | 4 ++-- cuda/dcusparse.h | 4 ++-- cuda/fcusparse.h | 3 +++ cuda/fcusparse_fct.h | 4 ++-- cuda/scusparse.h | 6 ++---- cuda/zcusparse.h | 5 ++--- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/cuda/ccusparse.h b/cuda/ccusparse.h index f101c73d..a5380a0a 100644 --- a/cuda/ccusparse.h +++ b/cuda/ccusparse.h @@ -38,8 +38,6 @@ #include #include #include "cintrf.h" -#include "fcusparse.h" - /* Double precision real */ #define TYPE float complex @@ -97,4 +95,6 @@ #endif +#include "fcusparse.h" + #endif diff --git a/cuda/dcusparse.h b/cuda/dcusparse.h index d7875650..34849b1e 100644 --- a/cuda/dcusparse.h +++ b/cuda/dcusparse.h @@ -38,8 +38,6 @@ #include #include #include "cintrf.h" -#include "fcusparse.h" - /* Double precision real */ #define TYPE double @@ -97,5 +95,7 @@ #endif +#include "fcusparse.h" + #endif diff --git a/cuda/fcusparse.h b/cuda/fcusparse.h index 7d2972f8..6ef22db2 100644 --- a/cuda/fcusparse.h +++ b/cuda/fcusparse.h @@ -65,4 +65,7 @@ cusparseHandle_t *getHandle(); } \ } + #endif + + diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h index 28d900da..ecba8484 100644 --- a/cuda/fcusparse_fct.h +++ b/cuda/fcusparse_fct.h @@ -27,7 +27,7 @@ /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ /* POSSIBILITY OF SUCH DAMAGE. */ - + typedef struct T_CSRGDeviceMat { #if CUDA_SHORT_VERSION <= 10 @@ -129,7 +129,7 @@ int T_spsvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, int T_HYBGHost2Device(T_Hmat *Matrix, int m, int n, int nz, int *irp, int *ja, TYPE *val); #endif - + int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, TYPE beta, void *deviceY) { diff --git a/cuda/scusparse.h b/cuda/scusparse.h index 724bdc2a..479f5069 100644 --- a/cuda/scusparse.h +++ b/cuda/scusparse.h @@ -31,7 +31,6 @@ #ifndef SCUSPARSE_ #define SCUSPARSE_ - #include #include @@ -39,8 +38,6 @@ #include #include #include "cintrf.h" -#include "fcusparse.h" - /* Double precision real */ #define TYPE float @@ -98,5 +95,6 @@ #endif -#endif +#include "fcusparse.h" +#endif diff --git a/cuda/zcusparse.h b/cuda/zcusparse.h index 364e8ecc..b63aa103 100644 --- a/cuda/zcusparse.h +++ b/cuda/zcusparse.h @@ -38,8 +38,6 @@ #include #include #include "cintrf.h" -#include "fcusparse.h" - /* Double precision real */ #define TYPE double complex @@ -97,5 +95,6 @@ #endif -#endif +#include "fcusparse.h" +#endif From d71d355b6858fa27c5cbfcdba3a32ab9d25f16c0 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 21 May 2024 12:59:44 +0200 Subject: [PATCH 103/116] Refactor cusparse includes.. --- cuda/ccusparse.c | 1 + cuda/dcusparse.c | 1 + cuda/fcusparse_dat.h | 136 +++++++++++++++++++++++++++++++++++++++++++ cuda/fcusparse_fct.h | 102 -------------------------------- cuda/scusparse.c | 2 +- cuda/zcusparse.c | 1 + 6 files changed, 140 insertions(+), 103 deletions(-) create mode 100644 cuda/fcusparse_dat.h diff --git a/cuda/ccusparse.c b/cuda/ccusparse.c index 3a6556ce..a5fee796 100644 --- a/cuda/ccusparse.c +++ b/cuda/ccusparse.c @@ -39,4 +39,5 @@ #include "fcusparse.h" #include "ccusparse.h" +#include "fcusparse_dat.h" #include "fcusparse_fct.h" diff --git a/cuda/dcusparse.c b/cuda/dcusparse.c index 6bb726c9..41f93603 100644 --- a/cuda/dcusparse.c +++ b/cuda/dcusparse.c @@ -39,4 +39,5 @@ #include "fcusparse.h" #include "dcusparse.h" +#include "fcusparse_dat.h" #include "fcusparse_fct.h" diff --git a/cuda/fcusparse_dat.h b/cuda/fcusparse_dat.h new file mode 100644 index 00000000..4b7cd6ce --- /dev/null +++ b/cuda/fcusparse_dat.h @@ -0,0 +1,136 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ +#ifndef FCUSPARSE_DAT_ +#define FCUSPARSE_DAT_ + + +typedef struct T_CSRGDeviceMat +{ +#if CUDA_SHORT_VERSION <= 10 + cusparseMatDescr_t descr; + cusparseSolveAnalysisInfo_t triang; +#elif CUDA_VERSION < 11030 + cusparseMatDescr_t descr; + csrsv2Info_t triang; + size_t mvbsize, svbsize; + void *mvbuffer, *svbuffer; +#else + cusparseSpMatDescr_t *spmvDescr; + cusparseSpSVDescr_t *spsvDescr; + size_t mvbsize, svbsize; + void *mvbuffer, *svbuffer; +#endif + int m, n, nz; + TYPE *val; + int *irp; + int *ja; +} T_CSRGDeviceMat; + +/* Interoperability: type coming from Fortran side to distinguish D/S/C/Z. */ +typedef struct T_Cmat +{ + T_CSRGDeviceMat *mat; +} T_Cmat; + +#if CUDA_SHORT_VERSION <= 10 +typedef struct T_HYBGDeviceMat +{ + cusparseMatDescr_t descr; + cusparseSolveAnalysisInfo_t triang; + cusparseHybMat_t hybA; + int m, n, nz; + TYPE *val; + int *irp; + int *ja; +} T_HYBGDeviceMat; + + +/* Interoperability: type coming from Fortran side to distinguish D/S/C/Z. */ +typedef struct T_Hmat +{ + T_HYBGDeviceMat *mat; +} T_Hmat; +#endif + +int T_spmvCSRGDevice(T_Cmat *Mat, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY); +int T_spsvCSRGDevice(T_Cmat *Mat, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY); +int T_CSRGDeviceAlloc(T_Cmat *Mat,int nr, int nc, int nz); +int T_CSRGDeviceFree(T_Cmat *Mat); + + +int T_CSRGHost2Device(T_Cmat *Mat, int m, int n, int nz, + int *irp, int *ja, TYPE *val); +int T_CSRGDevice2Host(T_Cmat *Mat, int m, int n, int nz, + int *irp, int *ja, TYPE *val); + +int T_CSRGDeviceGetParms(T_Cmat *Mat,int *nr, int *nc, int *nz); + +#if CUDA_SHORT_VERSION <= 10 +int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type); +int T_CSRGDeviceCsrsmAnalysis(T_Cmat *Mat); +#elif CUDA_VERSION < 11030 +int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type); +#else + +int T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat); +int T_CSRGIsNullSvBuffer(T_CSRGDeviceMat *cMat); +int T_CSRGIsNullSvDescr(T_CSRGDeviceMat *cMat); +int T_CSRGIsNullMvDescr(T_CSRGDeviceMat *cMat); +#endif + + + +#if CUDA_SHORT_VERSION <= 10 + + +int T_HYBGDeviceFree(T_Hmat *Matrix); +int T_spmvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY); +int T_HYBGDeviceAlloc(T_Hmat *Matrix,int nr, int nc, int nz); +int T_HYBGDeviceSetMatDiagType(T_Hmat *Matrix, int type); +int T_HYBGDeviceSetMatIndexBase(T_Hmat *Matrix, int type); +int T_HYBGDeviceSetMatType(T_Hmat *Matrix, int type); +int T_HYBGDeviceSetMatFillMode(T_Hmat *Matrix, int type); +int T_HYBGDeviceHybsmAnalysis(T_Hmat *Matrix); +int T_spsvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY); +int T_HYBGHost2Device(T_Hmat *Matrix, int m, int n, int nz, + int *irp, int *ja, TYPE *val); +#endif + +#endif diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h index ecba8484..12be21bd 100644 --- a/cuda/fcusparse_fct.h +++ b/cuda/fcusparse_fct.h @@ -28,108 +28,6 @@ /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ /* POSSIBILITY OF SUCH DAMAGE. */ -typedef struct T_CSRGDeviceMat -{ -#if CUDA_SHORT_VERSION <= 10 - cusparseMatDescr_t descr; - cusparseSolveAnalysisInfo_t triang; -#elif CUDA_VERSION < 11030 - cusparseMatDescr_t descr; - csrsv2Info_t triang; - size_t mvbsize, svbsize; - void *mvbuffer, *svbuffer; -#else - cusparseSpMatDescr_t *spmvDescr; - cusparseSpSVDescr_t *spsvDescr; - size_t mvbsize, svbsize; - void *mvbuffer, *svbuffer; -#endif - int m, n, nz; - TYPE *val; - int *irp; - int *ja; -} T_CSRGDeviceMat; - -/* Interoperability: type coming from Fortran side to distinguish D/S/C/Z. */ -typedef struct T_Cmat -{ - T_CSRGDeviceMat *mat; -} T_Cmat; - -#if CUDA_SHORT_VERSION <= 10 -typedef struct T_HYBGDeviceMat -{ - cusparseMatDescr_t descr; - cusparseSolveAnalysisInfo_t triang; - cusparseHybMat_t hybA; - int m, n, nz; - TYPE *val; - int *irp; - int *ja; -} T_HYBGDeviceMat; - - -/* Interoperability: type coming from Fortran side to distinguish D/S/C/Z. */ -typedef struct T_Hmat -{ - T_HYBGDeviceMat *mat; -} T_Hmat; -#endif - -int T_spmvCSRGDevice(T_Cmat *Mat, TYPE alpha, void *deviceX, - TYPE beta, void *deviceY); -int T_spsvCSRGDevice(T_Cmat *Mat, TYPE alpha, void *deviceX, - TYPE beta, void *deviceY); -int T_CSRGDeviceAlloc(T_Cmat *Mat,int nr, int nc, int nz); -int T_CSRGDeviceFree(T_Cmat *Mat); - - -int T_CSRGHost2Device(T_Cmat *Mat, int m, int n, int nz, - int *irp, int *ja, TYPE *val); -int T_CSRGDevice2Host(T_Cmat *Mat, int m, int n, int nz, - int *irp, int *ja, TYPE *val); - -int T_CSRGDeviceGetParms(T_Cmat *Mat,int *nr, int *nc, int *nz); - -#if CUDA_SHORT_VERSION <= 10 -int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type); -int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type); -int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type); -int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type); -int T_CSRGDeviceCsrsmAnalysis(T_Cmat *Mat); -#elif CUDA_VERSION < 11030 -int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type); -int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type); -int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type); -int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type); -#else - -int T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat); -int T_CSRGIsNullSvBuffer(T_CSRGDeviceMat *cMat); -int T_CSRGIsNullSvDescr(T_CSRGDeviceMat *cMat); -int T_CSRGIsNullMvDescr(T_CSRGDeviceMat *cMat); -#endif - - - -#if CUDA_SHORT_VERSION <= 10 - - -int T_HYBGDeviceFree(T_Hmat *Matrix); -int T_spmvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, - TYPE beta, void *deviceY); -int T_HYBGDeviceAlloc(T_Hmat *Matrix,int nr, int nc, int nz); -int T_HYBGDeviceSetMatDiagType(T_Hmat *Matrix, int type); -int T_HYBGDeviceSetMatIndexBase(T_Hmat *Matrix, int type); -int T_HYBGDeviceSetMatType(T_Hmat *Matrix, int type); -int T_HYBGDeviceSetMatFillMode(T_Hmat *Matrix, int type); -int T_HYBGDeviceHybsmAnalysis(T_Hmat *Matrix); -int T_spsvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, - TYPE beta, void *deviceY); -int T_HYBGHost2Device(T_Hmat *Matrix, int m, int n, int nz, - int *irp, int *ja, TYPE *val); -#endif - int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, TYPE beta, void *deviceY) { diff --git a/cuda/scusparse.c b/cuda/scusparse.c index f54a35fd..da21506c 100644 --- a/cuda/scusparse.c +++ b/cuda/scusparse.c @@ -39,5 +39,5 @@ #include "fcusparse.h" #include "scusparse.h" +#include "fcusparse_dat.h" #include "fcusparse_fct.h" - diff --git a/cuda/zcusparse.c b/cuda/zcusparse.c index 2ae0c04e..62b94b3b 100644 --- a/cuda/zcusparse.c +++ b/cuda/zcusparse.c @@ -39,4 +39,5 @@ #include "fcusparse.h" #include "zcusparse.h" +#include "fcusparse_dat.h" #include "fcusparse_fct.h" From a177e94ba5f7fa97dab023357a17e90704dc50ff Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 21 May 2024 17:49:40 +0200 Subject: [PATCH 104/116] Fix comments, --- base/modules/desc/psb_desc_mod.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/modules/desc/psb_desc_mod.F90 b/base/modules/desc/psb_desc_mod.F90 index 226d9d2b..ad0bc74c 100644 --- a/base/modules/desc/psb_desc_mod.F90 +++ b/base/modules/desc/psb_desc_mod.F90 @@ -409,7 +409,7 @@ contains ! ! Since the hashed lists take up (somewhat) more than 2*N_COL integers, ! it makes no sense to use them if you don't have at least - ! 3 processes, no matter what the size of the process. + ! 3 processes, no matter what the size of the index space. ! val = psb_cd_is_large_size(m) .and. (np > 2) end function psb_cd_choose_large_state From 42293c62b6c8b731f64f38c0b9b989dd474cfbc7 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 21 May 2024 17:49:48 +0200 Subject: [PATCH 105/116] Fix usage of sync() --- base/serial/impl/psb_c_base_mat_impl.F90 | 3 +++ base/serial/impl/psb_d_base_mat_impl.F90 | 3 +++ base/serial/impl/psb_s_base_mat_impl.F90 | 3 +++ base/serial/impl/psb_z_base_mat_impl.F90 | 3 +++ 4 files changed, 12 insertions(+) diff --git a/base/serial/impl/psb_c_base_mat_impl.F90 b/base/serial/impl/psb_c_base_mat_impl.F90 index f4bc43cc..fc170a6a 100644 --- a/base/serial/impl/psb_c_base_mat_impl.F90 +++ b/base/serial/impl/psb_c_base_mat_impl.F90 @@ -2060,6 +2060,8 @@ subroutine psb_c_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) goto 9999 end if + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() if (present(d)) then call d%sync() if (present(scale)) then @@ -2080,6 +2082,7 @@ subroutine psb_c_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) if (info == psb_success_)& & call a%inner_spsm(alpha,tmpv,beta,y,info,trans) + call y%set_host() if (info == psb_success_) then call tmpv%free(info) if (info == psb_success_) deallocate(tmpv,stat=info) diff --git a/base/serial/impl/psb_d_base_mat_impl.F90 b/base/serial/impl/psb_d_base_mat_impl.F90 index 1a8dc084..01a942d1 100644 --- a/base/serial/impl/psb_d_base_mat_impl.F90 +++ b/base/serial/impl/psb_d_base_mat_impl.F90 @@ -2060,6 +2060,8 @@ subroutine psb_d_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) goto 9999 end if + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() if (present(d)) then call d%sync() if (present(scale)) then @@ -2080,6 +2082,7 @@ subroutine psb_d_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) if (info == psb_success_)& & call a%inner_spsm(alpha,tmpv,beta,y,info,trans) + call y%set_host() if (info == psb_success_) then call tmpv%free(info) if (info == psb_success_) deallocate(tmpv,stat=info) diff --git a/base/serial/impl/psb_s_base_mat_impl.F90 b/base/serial/impl/psb_s_base_mat_impl.F90 index 0b47d472..205d0358 100644 --- a/base/serial/impl/psb_s_base_mat_impl.F90 +++ b/base/serial/impl/psb_s_base_mat_impl.F90 @@ -2060,6 +2060,8 @@ subroutine psb_s_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) goto 9999 end if + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() if (present(d)) then call d%sync() if (present(scale)) then @@ -2080,6 +2082,7 @@ subroutine psb_s_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) if (info == psb_success_)& & call a%inner_spsm(alpha,tmpv,beta,y,info,trans) + call y%set_host() if (info == psb_success_) then call tmpv%free(info) if (info == psb_success_) deallocate(tmpv,stat=info) diff --git a/base/serial/impl/psb_z_base_mat_impl.F90 b/base/serial/impl/psb_z_base_mat_impl.F90 index 2d97f698..915993f0 100644 --- a/base/serial/impl/psb_z_base_mat_impl.F90 +++ b/base/serial/impl/psb_z_base_mat_impl.F90 @@ -2060,6 +2060,8 @@ subroutine psb_z_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) goto 9999 end if + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() if (present(d)) then call d%sync() if (present(scale)) then @@ -2080,6 +2082,7 @@ subroutine psb_z_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) if (info == psb_success_)& & call a%inner_spsm(alpha,tmpv,beta,y,info,trans) + call y%set_host() if (info == psb_success_) then call tmpv%free(info) if (info == psb_success_) deallocate(tmpv,stat=info) From b9ad357648e3b719ea6b35b2ff1ffa364e6a9eba Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 29 May 2024 16:25:49 +0200 Subject: [PATCH 106/116] Improve temp memory allocation in fix_coo --- base/serial/impl/psb_c_coo_impl.F90 | 2 +- base/serial/impl/psb_d_coo_impl.F90 | 2 +- base/serial/impl/psb_s_coo_impl.F90 | 2 +- base/serial/impl/psb_z_coo_impl.F90 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index d6117546..1a1cb3ab 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -4214,7 +4214,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(nzin+2),stat=info) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index 4cb4c3ec..0f7da5c0 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -4214,7 +4214,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(nzin+2),stat=info) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index f706db33..c6340a91 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -4214,7 +4214,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(nzin+2),stat=info) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index c368ce91..87e2c8d8 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -4214,7 +4214,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(nzin+2),stat=info) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) From a38867be25d96751bb9e9dbdb4ce11cca6fe517c Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 29 May 2024 16:51:58 +0200 Subject: [PATCH 107/116] Fix allocation in coo_impl --- base/serial/impl/psb_c_coo_impl.F90 | 2 +- base/serial/impl/psb_d_coo_impl.F90 | 2 +- base/serial/impl/psb_s_coo_impl.F90 | 2 +- base/serial/impl/psb_z_coo_impl.F90 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index 1a1cb3ab..b97f493c 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -4214,7 +4214,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info)) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index 0f7da5c0..c8a22914 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -4214,7 +4214,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info)) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index c6340a91..4b05cb17 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -4214,7 +4214,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info)) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index 87e2c8d8..4b15ef84 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -4214,7 +4214,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info)) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) From 39cfcd3893f9770308b14ad253c92614fe24fade Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 29 May 2024 16:54:10 +0200 Subject: [PATCH 108/116] Fix allocation in coo_impl --- base/serial/impl/psb_c_coo_impl.F90 | 2 +- base/serial/impl/psb_d_coo_impl.F90 | 2 +- base/serial/impl/psb_s_coo_impl.F90 | 2 +- base/serial/impl/psb_z_coo_impl.F90 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index b97f493c..ccf7e42d 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -4214,7 +4214,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info)) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index c8a22914..beb438aa 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -4214,7 +4214,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info)) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index 4b05cb17..d833cf96 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -4214,7 +4214,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info)) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index 4b15ef84..ea6b2492 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -4214,7 +4214,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(MAX((nzin+2),(nc+2),(nr+2)),stat=info)) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) From e3a55967a560802d8543f33057b9bc78d6a41371 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Sun, 23 Jun 2024 16:03:10 +0200 Subject: [PATCH 109/116] Modify CUDA code to compile with 12.4/12.5 --- cuda/CUDA/psi_cuda_CopyCooToElg.cuh | 19 +++++++++---------- cuda/CUDA/psi_cuda_CopyCooToHlg.cuh | 2 -- cuda/CUDA/psi_cuda_c_CopyCooToElg.cu | 4 +--- cuda/CUDA/psi_cuda_c_CopyCooToHlg.cu | 3 --- cuda/CUDA/psi_cuda_d_CopyCooToElg.cu | 3 --- cuda/CUDA/psi_cuda_d_CopyCooToHlg.cu | 3 --- cuda/CUDA/psi_cuda_s_CopyCooToElg.cu | 3 --- cuda/CUDA/psi_cuda_s_CopyCooToHlg.cu | 3 --- cuda/CUDA/psi_cuda_z_CopyCooToElg.cu | 3 --- cuda/CUDA/psi_cuda_z_CopyCooToHlg.cu | 3 --- cuda/ccusparse.c | 1 - cuda/cintrf.h | 7 +++---- cuda/dcusparse.c | 1 - cuda/diagdev.h | 1 + cuda/dnsdev.h | 1 + cuda/elldev.h | 1 + cuda/fcusparse.c | 1 - cuda/fcusparse.h | 1 + cuda/hdiagdev.h | 1 + cuda/hlldev.h | 2 +- cuda/scusparse.c | 1 - cuda/vectordev.h | 4 ++-- cuda/zcusparse.c | 1 - 23 files changed, 21 insertions(+), 48 deletions(-) diff --git a/cuda/CUDA/psi_cuda_CopyCooToElg.cuh b/cuda/CUDA/psi_cuda_CopyCooToElg.cuh index 98aac050..5b723be1 100644 --- a/cuda/CUDA/psi_cuda_CopyCooToElg.cuh +++ b/cuda/CUDA/psi_cuda_CopyCooToElg.cuh @@ -1,8 +1,6 @@ #include #include -#include "cintrf.h" -#include "vectordev.h" #include "psi_cuda_common.cuh" @@ -62,11 +60,9 @@ __global__ void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn)(int ii, int nrws, ir += ldv; } idiag[i]=idval; - } - void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_)(spgpuHandle_t handle, int nrws, int i, int nr, int nza, int baseIdx, int hacksz, int ldv, int nzm, int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal, @@ -76,8 +72,10 @@ void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_)(spgpuHandle_t handle, int nrws, i dim3 grid ((nrws + THREAD_BLOCK - 1) / THREAD_BLOCK); CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn) - <<< grid, block, 0, handle->currentStream >>>(i,nrws, nr, nza, baseIdx, hacksz, ldv, nzm, - rS,devIdisp,devJa,devVal,idiag, rP,cM); + <<< grid, block, 0, handle->currentStream >>>(i,nrws, nr, nza, baseIdx, + hacksz, ldv, nzm, + rS,devIdisp,devJa,devVal, + idiag, rP,cM); } @@ -89,16 +87,17 @@ GEN_PSI_FUNC_NAME(TYPE_SYMBOL) (spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, int hacksz, int ldv, int nzm, int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal, int *idiag, int *rP, VALUE_TYPE *cM) -{ int i,j, nrws; +{ int i, nrws; //int maxNForACall = THREAD_BLOCK*handle->maxGridSizeX; int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); - //fprintf(stderr,"Loop on j: %d\n",j); for (i=0; i #include -#include "cintrf.h" -#include "vectordev.h" #include "psi_cuda_common.cuh" diff --git a/cuda/CUDA/psi_cuda_c_CopyCooToElg.cu b/cuda/CUDA/psi_cuda_c_CopyCooToElg.cu index e069ff1e..651a8d9f 100644 --- a/cuda/CUDA/psi_cuda_c_CopyCooToElg.cu +++ b/cuda/CUDA/psi_cuda_c_CopyCooToElg.cu @@ -2,9 +2,7 @@ #include #include "cintrf.h" -#include "vectordev.h" - - #define VALUE_TYPE cuFloatComplex #define TYPE_SYMBOL c #include "psi_cuda_CopyCooToElg.cuh" + diff --git a/cuda/CUDA/psi_cuda_c_CopyCooToHlg.cu b/cuda/CUDA/psi_cuda_c_CopyCooToHlg.cu index f2b5c86d..e36728b1 100644 --- a/cuda/CUDA/psi_cuda_c_CopyCooToHlg.cu +++ b/cuda/CUDA/psi_cuda_c_CopyCooToHlg.cu @@ -2,9 +2,6 @@ #include #include "cintrf.h" -#include "vectordev.h" - - #define VALUE_TYPE cuFloatComplex #define TYPE_SYMBOL c #include "psi_cuda_CopyCooToHlg.cuh" diff --git a/cuda/CUDA/psi_cuda_d_CopyCooToElg.cu b/cuda/CUDA/psi_cuda_d_CopyCooToElg.cu index f306ffe1..233bae06 100644 --- a/cuda/CUDA/psi_cuda_d_CopyCooToElg.cu +++ b/cuda/CUDA/psi_cuda_d_CopyCooToElg.cu @@ -2,9 +2,6 @@ #include #include "cintrf.h" -#include "vectordev.h" - - #define VALUE_TYPE double #define TYPE_SYMBOL d #include "psi_cuda_CopyCooToElg.cuh" diff --git a/cuda/CUDA/psi_cuda_d_CopyCooToHlg.cu b/cuda/CUDA/psi_cuda_d_CopyCooToHlg.cu index 9c0e371e..94e076ae 100644 --- a/cuda/CUDA/psi_cuda_d_CopyCooToHlg.cu +++ b/cuda/CUDA/psi_cuda_d_CopyCooToHlg.cu @@ -2,9 +2,6 @@ #include #include "cintrf.h" -#include "vectordev.h" - - #define VALUE_TYPE double #define TYPE_SYMBOL d #include "psi_cuda_CopyCooToHlg.cuh" diff --git a/cuda/CUDA/psi_cuda_s_CopyCooToElg.cu b/cuda/CUDA/psi_cuda_s_CopyCooToElg.cu index 76e10de1..e083708c 100644 --- a/cuda/CUDA/psi_cuda_s_CopyCooToElg.cu +++ b/cuda/CUDA/psi_cuda_s_CopyCooToElg.cu @@ -2,9 +2,6 @@ #include #include "cintrf.h" -#include "vectordev.h" - - #define VALUE_TYPE float #define TYPE_SYMBOL s #include "psi_cuda_CopyCooToElg.cuh" diff --git a/cuda/CUDA/psi_cuda_s_CopyCooToHlg.cu b/cuda/CUDA/psi_cuda_s_CopyCooToHlg.cu index c2d76c0a..90ad5fdf 100644 --- a/cuda/CUDA/psi_cuda_s_CopyCooToHlg.cu +++ b/cuda/CUDA/psi_cuda_s_CopyCooToHlg.cu @@ -2,9 +2,6 @@ #include #include "cintrf.h" -#include "vectordev.h" - - #define VALUE_TYPE float #define TYPE_SYMBOL s #include "psi_cuda_CopyCooToHlg.cuh" diff --git a/cuda/CUDA/psi_cuda_z_CopyCooToElg.cu b/cuda/CUDA/psi_cuda_z_CopyCooToElg.cu index a57ad637..b5ec817d 100644 --- a/cuda/CUDA/psi_cuda_z_CopyCooToElg.cu +++ b/cuda/CUDA/psi_cuda_z_CopyCooToElg.cu @@ -2,9 +2,6 @@ #include #include "cintrf.h" -#include "vectordev.h" - - #define VALUE_TYPE cuDoubleComplex #define TYPE_SYMBOL z #include "psi_cuda_CopyCooToElg.cuh" diff --git a/cuda/CUDA/psi_cuda_z_CopyCooToHlg.cu b/cuda/CUDA/psi_cuda_z_CopyCooToHlg.cu index 2ff9b869..24d39ec4 100644 --- a/cuda/CUDA/psi_cuda_z_CopyCooToHlg.cu +++ b/cuda/CUDA/psi_cuda_z_CopyCooToHlg.cu @@ -2,9 +2,6 @@ #include #include "cintrf.h" -#include "vectordev.h" - - #define VALUE_TYPE cuDoubleComplex #define TYPE_SYMBOL z #include "psi_cuda_CopyCooToHlg.cuh" diff --git a/cuda/ccusparse.c b/cuda/ccusparse.c index a5fee796..6b5c8ea6 100644 --- a/cuda/ccusparse.c +++ b/cuda/ccusparse.c @@ -35,7 +35,6 @@ #include #include -#include "cintrf.h" #include "fcusparse.h" #include "ccusparse.h" diff --git a/cuda/cintrf.h b/cuda/cintrf.h index 3a1f6476..7119378a 100644 --- a/cuda/cintrf.h +++ b/cuda/cintrf.h @@ -37,10 +37,9 @@ #include #include "core.h" -#include "cuda_util.h" -#include "vector.h" -#include "vectordev.h" - +//#include "cuda_util.h" +//#include "vector.h" +//#include "vectordev.h" #define ELL_PITCH_ALIGN_S 32 #define ELL_PITCH_ALIGN_D 16 diff --git a/cuda/dcusparse.c b/cuda/dcusparse.c index 41f93603..9af4ce38 100644 --- a/cuda/dcusparse.c +++ b/cuda/dcusparse.c @@ -35,7 +35,6 @@ #include #include -#include "cintrf.h" #include "fcusparse.h" #include "dcusparse.h" diff --git a/cuda/diagdev.h b/cuda/diagdev.h index 2efbea92..3a062fb7 100644 --- a/cuda/diagdev.h +++ b/cuda/diagdev.h @@ -33,6 +33,7 @@ #define _DIAGDEV_H_ #include "cintrf.h" +#include "vectordev.h" #include "dia.h" struct DiagDevice diff --git a/cuda/dnsdev.h b/cuda/dnsdev.h index 1c335bf9..aa536105 100644 --- a/cuda/dnsdev.h +++ b/cuda/dnsdev.h @@ -34,6 +34,7 @@ #define _DNSDEV_H_ #include "cintrf.h" +#include "vectordev.h" #include "cuComplex.h" #include "cublas_v2.h" diff --git a/cuda/elldev.h b/cuda/elldev.h index 5305057a..4e69bb3a 100644 --- a/cuda/elldev.h +++ b/cuda/elldev.h @@ -34,6 +34,7 @@ #define _ELLDEV_H_ #include "cintrf.h" +#include "vectordev.h" #include "cuComplex.h" #include "ell.h" diff --git a/cuda/fcusparse.c b/cuda/fcusparse.c index e8e46c63..094348ce 100644 --- a/cuda/fcusparse.c +++ b/cuda/fcusparse.c @@ -34,7 +34,6 @@ #include #include -#include "cintrf.h" #include "fcusparse.h" static cusparseHandle_t *cusparse_handle=NULL; diff --git a/cuda/fcusparse.h b/cuda/fcusparse.h index 6ef22db2..02a45fa3 100644 --- a/cuda/fcusparse.h +++ b/cuda/fcusparse.h @@ -40,6 +40,7 @@ #include #endif #include "cintrf.h" +#include "vectordev.h" int FcusparseCreate(); int FcusparseDestroy(); diff --git a/cuda/hdiagdev.h b/cuda/hdiagdev.h index 5cd9f803..c02fcc69 100644 --- a/cuda/hdiagdev.h +++ b/cuda/hdiagdev.h @@ -33,6 +33,7 @@ #define _HDIAGDEV_H_ #include "cintrf.h" +#include "vectordev.h" #include "hdia.h" struct HdiagDevice diff --git a/cuda/hlldev.h b/cuda/hlldev.h index e4f8259e..3b47f5ea 100644 --- a/cuda/hlldev.h +++ b/cuda/hlldev.h @@ -34,7 +34,7 @@ #include "cintrf.h" #include "hell.h" - +#include "vectordev.h" typedef struct hlldevice { diff --git a/cuda/scusparse.c b/cuda/scusparse.c index da21506c..b0229907 100644 --- a/cuda/scusparse.c +++ b/cuda/scusparse.c @@ -35,7 +35,6 @@ #include #include -#include "cintrf.h" #include "fcusparse.h" #include "scusparse.h" diff --git a/cuda/vectordev.h b/cuda/vectordev.h index 8eca7063..df5fbd82 100644 --- a/cuda/vectordev.h +++ b/cuda/vectordev.h @@ -28,13 +28,12 @@ /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ /* POSSIBILITY OF SUCH DAMAGE. */ - #pragma once //#include "utils.h" #include "cuda_runtime.h" //#include "common.h" -#include "cintrf.h" +//#include "cintrf.h" #include struct MultiVectDevice @@ -85,3 +84,4 @@ int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams * int getMultiVecDeviceSize(void* deviceVec); int getMultiVecDeviceCount(void* deviceVec); int getMultiVecDevicePitch(void* deviceVec); + diff --git a/cuda/zcusparse.c b/cuda/zcusparse.c index 62b94b3b..93142d22 100644 --- a/cuda/zcusparse.c +++ b/cuda/zcusparse.c @@ -35,7 +35,6 @@ #include #include -#include "cintrf.h" #include "fcusparse.h" #include "zcusparse.h" From 9f2b8a2623a5b7c5e3cbfdac29d45cdf8fd3e7c7 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 24 Jun 2024 08:18:07 +0200 Subject: [PATCH 110/116] Cleanup --- cuda/cintrf.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cuda/cintrf.h b/cuda/cintrf.h index 7119378a..58778e6c 100644 --- a/cuda/cintrf.h +++ b/cuda/cintrf.h @@ -37,9 +37,7 @@ #include #include "core.h" -//#include "cuda_util.h" -//#include "vector.h" -//#include "vectordev.h" + #define ELL_PITCH_ALIGN_S 32 #define ELL_PITCH_ALIGN_D 16 From 681ea2fff762419ad1d70299f30507f99d18b46b Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 11 Jul 2024 12:10:14 +0200 Subject: [PATCH 111/116] Updated docs --- config/pac.m4 | 69 + configure.ac | 34 +- docs/html/psblaslibraryext.png | Bin 0 -> 29336 bytes docs/{psblas-3.8.pdf => psblas-3.9.pdf} | 40109 +++++++++++++--------- docs/src/Makefile | 5 +- docs/src/biblio.tex | 25 +- docs/src/cuda.tex | 244 + docs/src/ext-intro.tex | 412 + docs/src/figures/dia.pdf | Bin 0 -> 15952 bytes docs/src/figures/ell.pdf | Bin 0 -> 16892 bytes docs/src/figures/hdia.pdf | Bin 0 -> 18361 bytes docs/src/figures/hll.pdf | Bin 0 -> 18339 bytes docs/src/figures/mat.pdf | Bin 0 -> 6413 bytes docs/src/figures/psblaslibraryext.png | Bin 0 -> 29336 bytes docs/src/userguide.pdf | 1 + docs/src/userguide.tex | 12 +- docs/src/userhtml.tex | 10 +- rsb/Makefile | 53 + rsb/impl/Makefile | 30 + rsb/impl/psb_d_cp_rsb_from_coo.F90 | 78 + rsb/impl/psb_d_cp_rsb_to_coo.f90 | 77 + rsb/impl/psb_d_mv_rsb_from_coo.f90 | 114 + rsb/impl/psb_d_rsb_csmv.F90 | 138 + rsb/psb_d_rsb_mat_mod.f90 | 487 + rsb/psb_rsb_mod.F90 | 50 + rsb/psb_rsb_penv_mod.F90 | 99 + rsb/rsb_int.c | 110 + rsb/rsb_int.h | 2 + rsb/rsb_mod.F90 | 235 + 29 files changed, 25661 insertions(+), 16733 deletions(-) create mode 100644 docs/html/psblaslibraryext.png rename docs/{psblas-3.8.pdf => psblas-3.9.pdf} (70%) create mode 100644 docs/src/cuda.tex create mode 100644 docs/src/ext-intro.tex create mode 100644 docs/src/figures/dia.pdf create mode 100644 docs/src/figures/ell.pdf create mode 100644 docs/src/figures/hdia.pdf create mode 100644 docs/src/figures/hll.pdf create mode 100644 docs/src/figures/mat.pdf create mode 100644 docs/src/figures/psblaslibraryext.png create mode 120000 docs/src/userguide.pdf create mode 100755 rsb/Makefile create mode 100755 rsb/impl/Makefile create mode 100644 rsb/impl/psb_d_cp_rsb_from_coo.F90 create mode 100644 rsb/impl/psb_d_cp_rsb_to_coo.f90 create mode 100644 rsb/impl/psb_d_mv_rsb_from_coo.f90 create mode 100644 rsb/impl/psb_d_rsb_csmv.F90 create mode 100644 rsb/psb_d_rsb_mat_mod.f90 create mode 100644 rsb/psb_rsb_mod.F90 create mode 100644 rsb/psb_rsb_penv_mod.F90 create mode 100644 rsb/rsb_int.c create mode 100644 rsb/rsb_int.h create mode 100644 rsb/rsb_mod.F90 diff --git a/config/pac.m4 b/config/pac.m4 index 0d22392f..72ad3d6a 100644 --- a/config/pac.m4 +++ b/config/pac.m4 @@ -2018,6 +2018,75 @@ CPPFLAGS="$SAVE_CPPFLAGS"; ])dnl +dnl @synopsis PAC_ARG_WITH_LIBRSB +dnl +dnl Test for --with-librsb="pathname". +dnl +dnl Defines the path to LIBRSB build dir. +dnl +dnl note: Renamed after PAC_ARG_WITH_LIBS as in the Trilinos package. +dnl +dnl Example use: +dnl +dnl PAC_ARG_WITH_LIBRSB +dnl +dnl tests for --with-librsb and pre-pends to LIBRSB_PATH +dnl +dnl @author Salvatore Filippone +dnl + +AC_DEFUN(PAC_ARG_WITH_LIBRSB, + [SAVE_LIBS="$LIBS" + SAVE_CPPFLAGS="$CPPFLAGS" + + AC_ARG_WITH(librsb, + AC_HELP_STRING([--with-librsb], [The directory for LIBRSB, for example, + --with-librsb=/opt/packages/librsb]), + [pac_cv_librsb_dir=$withval], + [pac_cv_librsb_dir='']) + + if test "x$pac_cv_librsb_dir" != "x"; then + LIBS="-L$pac_cv_librsb_dir $LIBS" + RSB_INCLUDES="-I$pac_cv_librsb_dir" + # CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS" + RSB_LIBDIR="-L$pac_cv_librsb_dir" + fi + #AC_MSG_CHECKING([librsb dir $pac_cv_librsb_dir]) + AC_CHECK_HEADER([$pac_cv_librsb_dir/rsb.h], + [pac_rsb_header_ok=yes], + [pac_rsb_header_ok=no; RSB_INCLUDES=""]) + + if test "x$pac_rsb_header_ok" == "xyes" ; then + RSB_LIBS="-lrsb $RSB_LIBDIR" + # LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS"; + # AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS]) + # AC_TRY_LINK_FUNC(spgpuCreate, + # [pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ], + # [pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""]) + # AC_MSG_RESULT($pac_gpu_lib_ok) + # if test "x$pac_cv_have_spgpu" == "xyes" ; then + # AC_MSG_NOTICE([Have found SPGPU]) + RSBLIBNAME="librsb.a"; + LIBRSB_DIR="$pac_cv_librsb_dir"; + # SPGPU_DEFINES="-DHAVE_SPGPU"; + LIBRSB_INCDIR="$LIBRSB_DIR"; + LIBRSB_INCLUDES="-I$LIBRSB_INCDIR"; + LIBRSB_LIBS="-lrsb -L$LIBRSB_DIR"; + # CUDA_DIR="$pac_cv_cuda_dir"; + LIBRSB_DEFINES="-DHAVE_RSB"; + LRSB=-lpsb_rsb + # CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + # CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + FDEFINES="$LIBRSB_DEFINES $psblas_cv_define_prepend $FDEFINES"; + CDEFINES="$LIBRSB_DEFINES $CDEFINES";#CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES"; + fi +# fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" +]) +dnl + + dnl @synopsis PAC_CHECK_SPGPU dnl dnl Will try to find the spgpu library and headers. diff --git a/configure.ac b/configure.ac index 10f9e2f4..5d68eda1 100755 --- a/configure.ac +++ b/configure.ac @@ -204,7 +204,7 @@ PAC_ARG_WITH_FLAGS(module-path,MODULE_PATH) # we just gave the user the chance to append values to these variables -############################################################################### + dnl Library oriented Autotools facilities (we don't care about this for now) @@ -844,6 +844,30 @@ if test "x$pac_cv_ipk_size" != "x4"; then fi +############################################################################### +PAC_ARG_WITH_LIBRSB +LIBRSB_DIR="$pac_cv_librsb_dir"; +AC_MSG_CHECKING([for LIBRSB install dir]) +case $LIBRSB_DIR in + /*) ;; + *) dnl AC_MSG_ERROR([The LIBRSB installation dir must be an absolute pathname + dnl specified with --with-librsb=/path/to/librsb]) +esac +dnl if test ! -d "$LIBRSB_DIR" ; then +dnl AC_MSG_ERROR([Could not find LIBRSB build dir $LIBRSB_DIR!]) +dnl fi +pac_cv_status_file="$LIBRSB_DIR/librsb.a" +if test ! -f "$pac_cv_status_file" ; then + AC_MSG_RESULT([no]) + #AC_MSG_ERROR([Could not find an installation in $LIBRSB_DIR.]) +else + AC_MSG_RESULT([$LIBRSB_DIR]) + RSBTARGETLIB=rsbd; + RSBTARGETOBJ=rsbobj; +fi + + + ############################################################################### @@ -944,6 +968,14 @@ AC_SUBST(CUDEFINES) AC_SUBST(CUDAD) AC_SUBST(CUDALD) AC_SUBST(LCUDA) +AC_SUBST(LIBRSB_LIBS) +AC_SUBST(LIBRSB_INCLUDES) +AC_SUBST(LIBRSB_INCDIR) +AC_SUBST(LIBRSB_DIR) +AC_SUBST(LIBRSB_DEFINES) +AC_SUBST(LRSB) + + ############################################################################### # the following files will be created by Automake diff --git a/docs/html/psblaslibraryext.png b/docs/html/psblaslibraryext.png new file mode 100644 index 0000000000000000000000000000000000000000..da3b3a9ce0a0edbc627df73fa0872b6244e4ce33 GIT binary patch literal 29336 zcmXtA1yoeu*PQ_gX+*k3y1P3CLApC7q=s&#r35Kq2tm5LyTd`cJBDuQ_@2N2`dDig zFzc;1_uYH$Is5Fr??kGq$YY|Bpn*Uj%y$Yh>L3t!8+e7FA_MQBrV3;Lzg|G3-f5x& zKYpl|5y1a1ofY&TAP`pXzZW=(8H*Hnlh{pG*GB(m6Xb-V4bGBl0 za3{wKVj_Gcz zVg9Z#Zjclzb%+;Iz6U6W0@j2qkt!vKBM~IUiV-B|yo4QV^wclL`g`q6c|7mq`BTLh#)zx zeg``g>X2_U+4+Zk0&sqW8<`_Av3;@rxpcd5s{g&8CjkEp;sU)b%;=L(Lfjo7Dp3^% z23DK6+eCGSH7SYN#43cSTj+r*g=&j=mwy;40Cxe!W}oSCxMW~<{)ny;pT+{7gmpX~ z`S~UR35UlrYB@-MWN0#0?}$#fQSF9~&VW~-L71!o!c21L-aV`9@VC;nCcE3LR_mpS zMP0gSU3wySCueYsfm!`Dnu$d{sVSc2+8T#mA91l*c2g8sPyt_vKg2 z(x}u@Q;FLK2vmi8U3L&~eU{(5PyzDbaqUQDBuLUQzbkA&Df)j`(QZJo5uJkz>&2d5 z=8?xnq#rW+livQlm!U;oOVUXH0>qhJA>+v3S1&r?-hX|aD>u;bH{4jrJRTDH5y=cS zDNxrs5@>Lx25k!&mAVBx2s(AcH`i_ios!jasT@6W9QlC>ktu2of>5yj8~y_+uhmNd zxDO~wrcN5)2+N=g%!SCO{p@dbGV(#1tn|0~yuv@Ah-l){%z*Jzb zLG6E5W}dI_yAKx6GYI0k>|AeH0YQtOqhHMFhe8Y-Jt0r}U;ekaMia)U# z{;JJlKYO)}IX~xeZk2bw3B3sTmw$vP?1qo)UIztE2IFH6RJClm<8OSMSungID<1XE zNuU6gT>*)l!EV^GTaXfS4NdI`4(xu%s8{S3Riu@Y-W)4dz#RSCMs6%qiOXwKLPN~p z%KSP!_m*$Qx4{ppy;gxbrCn7EWs2J=0&wtupNR#0rVDxycRIyZ<>o*uaji}@_T5=; zk0N^OrQBw@NeIq{$5i5C1cUWBt(qJARoBWTyV^rt?XhhIvg6?7uVWYK*98%=NfZ3I?hgj)*XCeo*)0a{#+>m>r~j zOQts?x?b@iL0rDbj~xDB_;31;D0#Q*y|IFN6hH9pNI5VY|CA!zaJ(O0kvD#%$UA3( zHDv+8^X)1otvX5oE;`&xfL0C?C-kq8H z=8>7_A+%#yT#Fj(ED@mxe8_&U`fn?($o_kP`)!VjWS%%k0Bul(Y~6G?{f5^OUqgU# zB^!xRa2OR+HamiQJnB>!jRN6Ia`d2aZKSAWP;ecB6{ftZ^{`aY9!IH%Mv1t5#mlhd zJ)_}|)mqH{=?7C5>=davK1}VXBrq^TNkDWITjC8=baId!X5`p7K5jTD-7yZJg?VkoCB*b z(9>v9d~^B@=W?!F%1*9#DLtyp{Y~cYZ|LAF1aMOFdmK7X6E>!KaI{(Uk>go9FsdMx zBaqlGH7ZGUOnTMNwLQKLZ+MVcWA=d|@Np!geFf-3J8`{#UMgT@1fe(omJN6mJ7n_4 z3``DO+VO+!@=`}&zL#_`MFSoIDdkEx>~i=7xQ(qQ-lr}QPQQ;^Lk=(h*iBGr3*6KW<-51Ya((Z$gcZ}br;a**eyvCx zRuX_`{M%x+?7%ujKr&xQ+<;sy)(wfu@Y&iblJ{IRPI!rkT&xT>ZbODdwW&DWS{FDM z4uPF&RzDl5m;n}H7Q~|dMgG%fn6-f{s7H0>l?L5aa{K0ay77z!?OmwR)J7INFM0pj z!lyzmc(*BQc#Hu~VT??riB0c|UsPgVFQgtf3}KtmG3n$xCidCE9W%EPdx0Drh710J z#=iy7JsdA&WEhKoBh9H_BCs9O8J<6YVYkB3RUKCFHz7A-cEhZlg5ubnv^fjStpjG8 z1SuGVrcIkI7H?YQec;lJ}#fOL5VTYm@Chuw4d*3_o~VR<&! z#Rpp(fx{a{C%GE*6gPB=oii3L%V`H0>uLSGb2OMY{lBc#9+y^)2fa`QL1gRNtWIjZ z)Ue`fa?(v|fx4H=!1J(uZu4NE)M3?-)TJNm7knZ5OWLhUy?XFeV5`L7sn zY!)&KU-OWBC@-+BoHmE7OQo#xbc=(H(r@c^fu{zt9ORaJ%JQT2Tc04l_v)|4NS$vfD>bCI8Tei&T#_2pGSx*y*T`y zo5-PDnesy0;A?XBu%DFX1fni25P!YlY8u*CBB1aj5hhaC%9^btzBU)7%p&Vq9}nl` zf{=rjT>C#b1>zbuf{ctYWCSg^;&Abwn(aXlv45E|;v^@Z1qdS2PTrh_y9aush0lAd zZytRG%lPN))Et~2z3cSd(m;mu7s?+~FMfCcY3iNg+c97RK`{q?;~^_g$#xK2_);8% z;^!%4#&?Bqxr@Z~H7GY|glbOmKxWzTmv+G9&KRn#wM(4P$PmG&4_@|JP6ll(a1=ca z!%UsB>0R8y7=cGBi`^~4@Q`nuM0moO zoF#ujT8xb-OK}i2TE$DbO;*=<)X>c*(3jVCKXJ0~YC(x|2`Eyxv#(=kPGqh*f>I1_ zNu(Aq?S^9=UvT8_Nn*ujol|HnL}zO~@F>E2Opn+XP5dS~O)MC+&_>bst5uDC9 zYDXvMW6Lw|(@3}ayziGD$NE`s&U$Gan<~$=I|$f5dXA0K9=uo+tdqvcA+Q@RNC$VC z$zn$>N0^{lw;V~ccmz-SWUK7Ka!;xur0oI$>5Dr?zRcgtNLrLuHHE_}S~4JF>PI*s zg`d;T8KD;fpcfQDFKcy);WXLL*goc~Hz?!Lx8>1a zv?zb!1)EonDc_qms|vPPQD+S1xp;HohYbB-N;6MHQYb%q~2EVcOX#h=MTp&(L@X8^B=T#ys-wX zv4oZm6PZ1^7Y|IUXF4rQ`ot@MMrs-4hM&NccOq`kj>eBq(tdM0(wJ$5sZ69k1U z1@w??QFxxf>j*F44&A-){Bg(*Ia-)WIz<>tOXU@OWyTzyLOHFGPYVlYxGgb|WsQ%L z#~3-S_!f0Hg2!Z5RS286FZ^VK%bMpy^~@3U>&0uc=up@1UKEH=Cg-WAf5qC3z*lup3l)jX$%Ng#VSWSVIsV3{*lH<*CGU)9A++5{;CreP`?vtD~&r$qNQ|jy2*C z_ljxM@xDgj{+}}z)0WMOopeXoH19mpP%$A-4}ui|AEIh_;R(m=5d@f@=N)=3!vmZb z;k7yy9o3`*v1o9({kXtxNnO-8oQ>~=j8UQ(o$Jg7sqLh6Zn#)~9P5B?0Vt53cicY% zjGT3zO>ovv9!O1m6Aa)FGXc+)Q3kbkb90S*DhB`pO-!%WPf~t&@&__5erRrXv~9I$ z%B361d;ZK~O=>-+Zn*b}<>Na3gruqlfoZfABF^yuKbxaa4#HieVwrihmW4%E&F?2e z0K-rytb~qNVb}S*Q#MIdqiub6Ca%8|o__C_Cy=T<@W7cjptMWOHM4hbsa*ycxdCBL zZcI>&gx2jd;Tan?xBbo@TrZXk$o5qBKm3hKisDkTlpBy zY|6HJZcXq7c4kgYE1-2~o*B%De{ok!1rizT;QV#9eSoy%vdWY$nrZd4M2{PQBB_Tv z*nQlEP}c8`Kf&_y0od=H<4EWe60QF5V>jr@%W_Tfmrg+3QgcROLvr^Ayy&@3OAOSG z>Um*wGO|ULa%ZI#Ts6-(Y|h!0SG%=YOXtMoL);&pzPN1qc8PbcPfcNVY5-a6*K!V= zN5vUdfq2?fcx5(&KUnJVR8~$4VB^MuXSzPTmKM<(@z>gHnS2oGjRF;v8gHM=zv2M0 z30b`NITm#I57V|Bc}hRl{Y>TL!2J0IQ!dsrd1FbfwmOB``nMqAq_qHxI5x-{WW&*~ z&-ajtLn+fxkzpEL20l$MKD(bM>lYd^#III%l2bps>8Hc4RJ4a6E=0-xj{p_O<5l53FVK&5N^y`1)Mc8Xg=ab>DzlsK8$Y%|o^0UA}p z$ZU=0A4kYMbNiXKhApj6oA(>RO8K!0}?g+YF!pYqh&9OQf3i7*wWCnVer%VD=hS(1e&ghP=L?>Eg0Xbe}Sb$xd`cp67E+HWqM$mVpiHI2Ij zJq_KUIFz!=O@A{%Yn31CI{03HZ)R1c#)_xzWha35zU0U0p=?nxU8THDz;h>qB7XS& zKFCXxjhv?$sE>Ry^c#+D#XYw1l^?(lnCpr1CEnIb=z#*J$d2#7qD z8nLY2h$Q}G@(itgua$>8lJZcl*`5RuQvu+n$Y-F*C$d=lS*h3e%Ml&-*8z(l?pesz z>Co=vo}byL+k`TXbX)c6_mu4|*n36YwC{;YbnbE%HG2zyZJtgHvP+}Dn3COiC?6}t z32*$Kd<(RB@X)%1@9thlc}4)il^(NM^y8qbPpJ{$?oxoDXe8v_<};1PmtNPK8Qk?9 zr~O#@kEVcqZNgRq`-j%aQ=Bf%B=k151#YdkRA?j(M+w5!{;j17K$bWnfx;h+c;81c ziW(#2%1-*JXY(Pz-Mtlcl|*%;%hKcCsmMHidYVjEHDF#M6nTYKLxXyTG{w0T1PK6x z);DHN$T~0slLF}kacxV6mQyU;4yCaM1mQ&d8}4M{+CQa=XVB5Ek;1dgdrB`H{d~{DvU^Uv>!rISw4yXPyg+s$$m=ck zZrI9WbGCg0>WlDNzi{c2zOdo1LYkUWxvjOxp-4##gUs@h#z?~>c8Qsr#r+>D`YIMr zWKY_=C6k1~(N@vkdUjIK-26j2=!Fc(7DNX7(m}}HbLV$frn=$pxOah>&_fT9|N4gt z$8{KCJ|_#`n{CW7U>&;{7^2*`pFhVEzB)GL()*TT5SAMr_07EzM=v}|&7|OoAPEjl zj~HujQY-S~wm8HCdNo^SfGFz~%)&NRncM{W!x7saH9J?Y%lEDC&3C<)mIHGu>s|Nw zSd415T;Et3HHU3B8w{*RQ@@-LPlsezW+SYY&&MG8PW!h%9n;U6PtZQf^!Wu5;<5iJLNvv!$q|DwJc-bg z>O>}~@E1y@qAQt{AwZRJ=Oa2dW)YO1ls7(7s?FZ8afjZUL9muQ;rvW*U3s>P;{_mz zUYy@!VV2eAT~KAk?F&J;&futYLAqWA#<`dm5c-c#P#y~6Cxwk+jRq#9gL9}%1V|LWzd6kb^H8P>~;KZ$e6Gti0gAMHRR8 zpclr#7M{=-sza=WQ2un8O@pKnupK>mI%P@6f@)Z?BzqCqyoy z9#YjK6?J+?3c`uums28JSq$>F?=bfd-gA(Qu0d%S<|rj4pHP9_T2M*Vd%YWd{8DW< z6DO3-sK@_&@%Ae`a9~>pTf)Kca#L{iyb1qW8>)#?@Nqe{dt&+8k%EdT#D&j0zblG#-eh{Sh(# zVoKO?x3GPaQPVpckg8G6z#l()AgI@Xij7# z`anVwf5V--eaC8Cg*fCSU~IYBvtQ5R&TJ{fG9;8=Eckin!suqN<5rDYd^W+J-$ zpKt^mb~lxXum98@q8dc67ePIGENsqHu;^x`fI=(=I;xj6Imb9N?e|QWp-3xrCw0cF zcTv3JgvOJ`T3RF-pWeFS!Tnl+1i%AGJTlQ$86Yu&X}0LBI8&oUKnWVl6HR`3qL})d zPvq+K1F(Hz4)yhSyo`t~zEqJXcQ=oSHwQsILShn|60sNOVgi)9@dve5!hlHt~s%iJ+)tHmPZs1s`mTH6Y%QZ83O$+|y3KRk+>T$xN^F9}=!N}Rx1f`sm^SbMjnRTOXeu9tm~ZIB&)9))>uoqhg`NAnufA0sC2R$~w%lC#j*_Z>l!4silhujm2&QEg^=N&w@$e1Oyt*KpOQ{KRJ85KWhGcHG=Sakls7hvX7%CYaR!|0E| z_CiY(j<1~gjm~`3T!3nc3xBXjomnrdf%bl#`phv;Mj?tvXgKlc9UYT#>>^yJRBofIKd(l{|W#MDYVjY<~YpdL8NlzSZ;O-X# zMCNaSx%s7Bu-lrCOTM@N^fTr?8Z`$upswn#=dYNUO7w1H4U}mMBfn*nHY(E#)hxTd zfsVxUP#NFBNieMsSHH&?En?Gfmw{R{mJW;`*QXn|s^6;Zy$yQ$FA-=;BDPIkiv(>d zW80--0a8IZAll~24>J6cd63ujJ(yk=oBRe|yNIhVIgjDxP>8F-OE{Z59O3jFawp&4p;)D=`zi&87zmwcPHO}XY0U$2A2wvUKQk5O&U^`N01%Jb`J_#q zbfB`8xE?&+QIz)6SL$`XC?9bRp>c5e@*BK`%d%qR2nbBdhaWLjllqp*EfyG+flT|ME zd~}Ul&)BF5kKVOJhL)V9fc4cIa$c(wPRyX5x5FI%BCtI!jZlI1kq>wg7cIZ>XIx{o z)=I|k3FA+F{-stOozmc;x2=DflqG^FkA1yfu|1ACYe8ydrGxP=GFJ*U#Q^Q94#)%` z!0dr+amWyqB2x$1u+D)yExrgsp-4q|otQhLcSt+P>r~ChAnSLr z(n11huePlKpNXUyeGzu`Q*h68xBS>vVu^2Cuvvzai-2Q|3x=`}?zUBTo4dcULyB{B z#pE=cF2~LvPL4q@swU!37CAdX@ z>Lxy?Exz5bG2ou~=T9k@)43#?#mFTHGokjcLb~^durOpCEkVMAK zLxGjgF7L+0DnKjm;%?l08u(aA#c0M@s@Rk`O<(?$_q zGMnYWB0-8q*O)#vhtuad2<4YKBWESQ2+xv|zO;r|f9h;;I?@sjn?*f(9r47t%<<6~ z8-pZat$uMieTK5P;N<@JHkH$K*~xP~y31%Pc4OoT0q8(x+Gnhj06jcq1Mss5U`RB& zirN0y=+?)eq#~PAQHeYmC#I3V?g@!R#CcVG$x!1>?>fwJ9XYm~`o3LS-iE5W=ETU$ z=LZUrMMVjDx6b&kJ$oNf$&1t41wp^%YFdNo*tNgs9}@q=1v9nSRsKq0BPV$CdbZGb zr_`r8UAw{Ks&9}Ly(64ES?Lc~*%m)FQ7>k%Abu-1lWQhvEolR zh;?;!OuxIQAz zuBIoS!*%{+El+II>(+kDS0*u}m;~zzA6qhV>*Yk2M=WbQKfdlTkqrfV2oGTLV@rOd zALz2LfoH{Gic_ljIk`43kkA0NFWYmqQM*Gs_pHr+uzXo)ec~CF6x|g9DWVp3Ih^C{ z9T=eDnKBCtMebCBUT{WpuC}=6y!kKMVW83=Q>vA$dvxb#qt!2v*e|Vi&8+wA2LXe< zo(E140Sa=w=)Jg@o(I8nO@-}rrjY{>q4pRLe_;z<1M|w$Dal8kSPNrT0`yoi!5pbK zt6hTf`uZd=v3mzK@7F;FAU_8FB&UnEk%icGIS^Lhh#-pf*g{b264O>zcG%L?iuie@ zy?lA4$E-h1eBvt_LAAwD547Ie0XA9lEZw3kQnELhZ1Cu5bLrqX-fcAtJPxl^FEmYU zZGutjRYMHf{OW41*Mx-3NhWL1@S)Mo1E3@j!@Q|TP~HjL?2Jg?*a!Xu++N$q=^d|E zNM`qVQX6&=@L$zoT3|?rU3VXb&aRyardX`4QBXX;Z!-?hj4bOwWneG)+l1byv+|0a zz?_5V+xPD@{L$D-N=h0e&IIUd#6vEABtWsaU5n}LRbo?0fDZa6Aop7If7jrEH`#OELEpx)W_fq@*-&vPtnSa^;9mOuA`VBt&zB>oT+3o_aNB*?M!A(RlE5!D;Qsm1ALWjT*;ERpmrR z;dl9J>9O#u@aSbb_#ipiT+flVk4FRxapO#7H)1C6MtQISYIBp_*2r2VU$ ziEpGvy6O^JF>7WAvAa9Z+ArSvM-Q?8E6d`QOZ^p%&?9*pE*w{<@ z&T`w&j<`fbK(nhNy1FA{WAg62%ki#SE-svkii$XRG-R5$Z{U8S7Ny1>Y$kv&%9kb7 zRHyKA>if-kO)yjMnZM`CCDK)%=po0l>`6r*^cKDeOK(?Y`~gQoy3Y-C)w%rO(aLZZ z!b*yLJ4#j2Al@wJ#ouXNGya-^UqMMpT2nLeBh1GWR3m}d0nJ&QH3sxtq@YvqEU?d> zu8+cMVM7~2e&CFa9l!IA{{(9E0flcqwMgGo>6&4Y&-#3~L zh5or0W-C+Bhev(z==^vZGr;WBN*_>a%k={qLqS)Tn3RO{{oTz8jIxsp2zoSLtG}Y4 zY?H}g?I`lG-;Y|M0!LBHSfiCu_-Zg78;=FekRT?rJz%8=3WgLb{1T-GTms zSdbg{^#gT+uQXe$XKW`yJ7QR?*6%$YHn|CpkG;o zW}%zb6nfla+E=>x18Ye>pxl7jhtAB7KVvQ44n8Tn$`GCJ=??zW;zV{w>n^+cWb`Lj zr*@>oB{KE5_Akoeo+r`yxrTcN5|Z9~J>QxTW8Ar_XE4W(SMdl8=D|l4*{BvrA?n;u zY{QT=W)=M@o@fB@=O#x(%E6T=ou~PA8H=vYzZ2uO!}m=+$WoceLJf?px*NGoq`iN5 z0S{epG5LlrAJ4ocghq%DqsnDHBJwC??HlL?%Dwc-2})ZB*5}X9&oO-t_1&tgK|pmZ zW;#$a)C}F|Rs>qbuE-^>xz-z)q|!t4rtx}m20;{3I=@p`--dyK@*Pt@QGYaX@Tv3q zat04bn#iKva~ci?_=-*FD$?3f{=kt(tP;!@CH(0sCJ)VDPhF9R@6$8;3zN*#A(X(4 zGx?ceY}uvbR&<}uTy{sC8FdW}TYI^o?(S|5k3-#cBXrOtkwuB6&AA-UAEh@iZ`2^9 zoCP@Lvn-}w!sw;CgCV=I%2}8}nCYOippwR`G5ytB1{e38=*c%FUAmkq&{PNC4 zVBWQ(A}e3hstrGYb|)@P{o9C@2tR32s|>h5XRM-T#oO^a_C~pcOMixHF*2Qz3{La-?7eo=5;KW2PWI9qrP*AY2tPDSi z2V^YDOH|*p|EI%CPXJB>MwN%M`0eNr#qc0#vI69H-Km$R8;A9%ki09&gm#q+bt8~k zx)&nx=o8bx@ez<%>2NH@q;P37liOH;2_hSzCfa*~wcnO5|r^^O1f740=XK4TIg1h!oky=esi2_pD|5Xgc&j{i96 z{igDi=QXPdT1xcP9*>vfo@{rKM(=&JNDE3>*1WRr$nda~v~)0_O#A?nNM1&hIcPK~ zz2EoGUVrdEOYk1Zhri7KS(${Q@xY+S`^%R{mwQpT&*9^d2UhUUvtApnG&L?0qG0SXw?+>H&_BNS=4{bzC{n@xNRS6}c|%x`jGqW&uG$Ly&|}pY zHBWSZv4Q>W`h=Zn9)w{LO`b^h&OJ7lIzsq&P&PGxYN@Ws3ICqi!$U2RiwEUi7CMiZ zJkwx_gtZSC>*+^u^pxM6!wMw9Z@r1N@%GBB+m>gHO@y+;aqvr*#Mo2YG-d+^E}tc6 zaBwg?>@W|gIk=RRacgU9`LwJgDE=8eL~s7m*`swaF}?>1EMH@s5*sC;g`6z@6f9%0X)pCRj0R6xUT!%MT~ZXD<@k+-(fqk?80@g%)=#>=Me?qjG*J!)5czo=yvwD zokQXemqPE`&0p+@28#MzMym^BId%lZIO8FUw-KNxzgMTmb)f^`1^+5r`@M#$YS8eo zB0zsL11TYig0mOV3*h&)fO84a_&6dJ;;xMt_F}gNp#m%H^-2Pn#?F{w7$-$`v0u>; z#k_1fD*4RJo!+;qX4ss|9L8K%e9?w`k<`pP42~u~M?^ICn{uqpRaq!*%L~FEEqB!L z0P$fHYJw(llVo?S(b2iTX|V2{kd;NCVPp8^Cyh^0ngtAxRMp7p)eHg4kGF=bUk6hX zU&U0xBc992ggr3ia@AblOb`H_jAEd*4~X0vh4DZAX7;CxpfXB2pQYJ>bsw{&>sLSr zX~R~Qx_;sAWc4BIF8I_-ssi=F(=Oa3bC$XFZ`X9U;1(t(<{%7~Oc_xd`P*7rPA=>} zS>E!8_Y?rC+KvHgvp;LvZXl;AT`OWU{o&EQ(Pkg%&;C?8S*6)B?yrK(*E9LQ$)awK zu`F&PWWKY1kUSB~2(23(eXm=#B21sjz|6uTxNz?ks)b3m%7Qx00>pm;J%hpty^et!RaW?L`H zztaDyY@W0;H(^sj9(!QP1`8DKTh2R8*Lu^0?X9yo4Fmbc!ZG~7`Okf4qp!5i-OvPnhW~5BRmL+Z?H@Z3P9K_#4c~Gf;ZhW^<3JFCqJFU!|yAJmmL!nH+!eh%w2w5|2lRj$7BV7OD z;?!mLv&*;o#fv@X=UTOHeE{YT4-W&HRHpOIshQi+0{+tz*2NeixD~Xfe_OPby*PVZ zg%Q-Vnz;m3^gD%#;cpa{{Axs74Xi_{u?Yy9>VPa>8q>)Cb=P2;)h+W75!|#fD>Hd! zSPGa$9f}mH*I8*L3kZm{z!ZSPkYD0bTGZ)hDBHG#K~@|IF|+fRcVAL~NeG<#FYehZI3ppscq;;+)C-cEV-_IeZ|g=~vM!Wsl8r^5! zgW~jw04volQpHR%;wym@57p#ygCDJ*HZjS59Q}@$Bb@d8dF5%>j-v}dt=#+C*Nt>60VUn)CC7BV3om<~y28tA?2*tvYt5^X*?wm;DpZfVVZT z#h>4{4>n)*cMmtq({Q9Gp-{hC)dK2yYpckA^_=jBw5z&ow{C4>EWM7cz^!2PY57Lgtc& zv9G}maGEX{+|=}mv_YcPz}h;0LRNxE6Ey_*nbW-4odwTZED?05RUlJfW~T7G^)Uf%8tCh1K@lYQ1_xb;+`Q4MNPkLdJ9 zj{ZO70Hp4YZ#ZWG4XkF4&TRl0yqV2Du!$T0+3PYAvV~$dklZdp{8=*A6M*KxFj3CO zve}V(KuffovGRb^fEISE3gVBli-La&>sm9y>^10hk8!@OgK59g=;`YCdV24)@mtoh z4e~-AxgR+HBqlo$vbKMK{L&C8Wjb+V0LvI zr)Fu6bGUgxm;VXiolG;g1SlkxoA3qTI$)fYEWe#d6T{4?#T8o22zhgpGq>bq2DVaH z3zBP&sq`wpp_A@~A2#lQLRr-EBdmf{q?TDO^N1t%70CH=SB-~CiwtsL8~${G992ZMgkxTKWR1}!@RtWlb}H%1UCZNH1~f5!MY?}5oH%+t zYT);C<5OT7g6tD3D=X&s`1qfXWbFq1U@wvwbdg)KyZj{#m^W9DuS}g)R6|aEz6FLr zWCVufdxzAGK-_q0$4`Tx(;gjPKl7fuVkW#5fBT9`4_H%Vs@pqpKrGNutu%IF9_{Xy z($xGz&UpTefs6<+MA!O(A&^XfA!-2}L@TZ!tP(;?a>kwl%BLbgB&4R{rS$c@hrz@F zIU?e#>se4O(%1(n(`-)`{Qci6cWX?$4`d(Sd9*CguQdNc6#jn-NHj<*VY3nNFVG9X z@xX>G#lL;+XiBy9A+vbJB^X}!b;{jZHLdf}D1omjP6#N&?kR)}TuD|Z8UkO_L1ztg zg*^^aCn8haocYo*iHV8-vm`2z5zJ6P#pGOMIKjXu?bg3A{~lGMeIY9=%fQO2#njO&h@J4GpzLFI zO0_RYp!Ce9#^s;V$<%B6Y*&n_d;9PK2$vj>*D1>ZkMOzltI8flO)yxf)XyJH>~ZZ< zo&x+?2q>4RtIQ%IH~~Hb?pJmCVG*cj-gF!a1&t-z1ss7KlJLnrp6@%7n1^(rWk*i9v-)sd=OiXYDJa~<-o&e`HgR#r5WR>-R zGl6JpdTK}Cv9YMQrO%3^8ySEVDKS}&C{{c(Cu`4XY{be6Jm+_8?Q=2vStEblIiG7} z;#iwoR94m(iC4>jZ3}Q5H>Z7V%~VESo;fMyPEi^e zIQ0M=S&-V?H^MSDt|GU}YkyRR0Y%g0Qh~GH;eB7`vbuO^5lJ>pGF- zgw*HHwfwxcKeo0k$Nj88gUF|8TLPZ|AUA#iBBMCZ7%b~vbUIX{jyOuo5NkGI$nE`@u4eJ)Q{SLb)Gegy+eKCSE- zf)~2Qq3;jxCBTl~!O?#Rz1Rbh(LpnLi&35++aSN^m<_4jqlNm_3AJxJB6y4|oc zM}NQuv;xW5c5(rVgjZ!NoxBkl|81m#Bq%cy&^7_Yd^VjX4RFC>El!VUdjpiAS*v>* zokSKy0?t$($4f=(TpML>LgYYCSfLpZ$60S;P2u^oPr8=^dF}KskUdqrcGVVQ;}{HD zH(sruO$GPe>?k#3mh`GR^`G}`{J@NlkKbLW1I|kKF8)1UCIy4L1)tnOI|{Lzd{c=? zL>BX^y1x(Fi)o{YR*MNe8DNeI@7{e@k>|fs$PEet zy`>WpI~LgX?$?bU6*-ns)|;0CH7R4??bYd4S|*^mMDU}d&1{z6%c0rZcZ_u&XVjv~ zDJ=!gHgs=-!QkD=g19Z7Uf{qfy>yAY;SrXESP(vttbilbNYR{#udg=A0*&9WH5OI? zLr!nIg5hJ0&7hhAUo`N)?*)(scbeCmoqbu=RM<-Qa2)9Zw1<$o-d>rIkdS}nQ(0Fx zwPt|$160WKp2cohg`gz0$eg$@n5q7*m z;OoM2Ac;XnM8;61#yz70e&dv{W0_(HeE?PxC}qEWae{jGkzR_@oUWqS3X37M-ky z5@BFKZS%cOhK3cE=~Nh_Vqp;^je;^0$ObHc1A8EgQshntwNK~UL#Z6`Dmdihfs`Vo z8M(dpBx8X-G^GRs@SI=UeC1^v4LV>NBL-n>}T;}g$WIGwQSlh9ttg$dXp3O z^mxzwTZ0{3DdS2Om!mQv70oFOun>ZNk9)J?Xr4=u{0CJT-Vu`@Z^-EXTpd+~2dW!7tqfffO{fUma;u=6#sYW_Xb%2!0UN0+ zL{NMXgOmdy?cfyXhRH)R@Lk3zgP1wlL4#WD-ns#?Ccgr2MoLQRPl*mYKn$>Oa&p3s z!$wV`BO3%S%m5d1B&Rl(X=b6vZ_Hqq8HT<^mjA2itHY{%p0^JnptN*%cS?hBC~1+D zZjkQgD0S#mKuQTgIwYhUY3VL$1nI7K`Fwvb*Tuyj=VABR*`1ktX71UOM?vi4iH$)v zs-o*>J1dx+JfWyM!Q+2p$r?+E>qIeEZY+6u6E#iF)Dp$BX}{1W(ldhs!U{lO11w=7 zv2C*wmwc|$PmTpOI^`_^KMyY>7Fe53dW`yPMLxYPe=w*Y`Jq~n`_gz{U0NCuR8Qta5ebToPv3z`00AcE{y){vI(_0s z5jD%!&)yv!9Sv*y(n`n`j0>kDo}rktt5{K{ZL=8yCnNd9S!R4q>!VyW#StkhTNm5& z?+k#QSGT-2=XM}Kbs*)VSDKD@g%yMH>sJ6d4UKR3+Gj?q++8^d&JlYrlReKIlbn=+saAYh~hJ>T8Y|eMZ(YIV3;|N&vN%(t7 z9;?22`c6?%@%LYf11p^6$HO}jABW&A>q@=@lxXo8f8bk*80~?>g|R3pP3C}QvIFBM zrNc+rk$o23C+M}mu%XVqn8wejh%T?Lz8bd%9wXt_!QpVJ6i!2MZ3RycDM5wwHm+nG`rqq73;^^F84|J{3+SbDHl50DQ_yO@i>IYEi8jDdu_og38imXMaQ#mX<&zgrdPXr#zzIP<>SFg!*| zc(^*CSO#WPnA|$rbVaIx_*?+&G#WOMW9U27LubDkwx0}uT|VLt!Ajri=?XEK%Qe@6 z%goswZ?Gurab#Ild*544Oh%*x(z_Dx4(T>{7grGC;=XrW?22l_Imn%~8*L2=%0HHm zt_mJX;1(=rgtFsgl-7 z&Y~}KVjG>VUU@Gp1B8Hx-=&0%jLbZwhyvRpG4}y)0-D>&czU8CR*`mVjHey4DXdFb zG)DB)3gM3kSodw~Id`ptFei|y)x(ni-ja{Bu9C7cQ~_;&X>%wsG4Ttid*vfS2IzY= z0MF==9E>SduYP^YKt#*^TMaBqzr4lc$t}R-&;NgI4VowHwRrlZL_556*!Fiy%HN6a zN!8X5YCJu=mYYW?6UpM{A`qqV;Ke_kk69hoE*g$WkQ+(j#2s-9a5^(07uBZ$wDkxO zQfRlHhgT315QNs(3q7V(^YZeT_dnh;!Qw!@_Zb)o#`6imNIho%lp58wqq&^6iT^ocKtw{y#AKvR1RlzI-XU;^ z_{PwR(g^@bP;{w*WZ_S8{se^y)jYhAI-mW0wvgWH>i(luzZ-@EnzEVmc^C5AY6acD zi;XWne0XG5(~A3@L!F%~3N6Aqe5!LnCPj}50mtZw!#nW#>Akz?(A$NZHa&POZWcXv z&@iJe2gR}KXSIgA9)S*)clxD-EG#Twfe)KoTU+);a|6+p>}fttAPI*pQp3JL2O*zA z+?y1A?zQ9x{7Tn*)q?glH8q^6M+or^lj53u)lHRfdUYK89)If3pFcQwcyz+JS9QQi zb&jYBq30t$Yy9Uy4}=`ebIt^t%$Ymw9RW_Ia2=ZWeWRF>j{DaUy=k4>V0N`)C^WA$ zE8{1YfWRNj&_^-<5THJ)h>ypir>AHBxW(Nga(jw$ozBub4(SJt6i-Ljc6_$JO$A-t6VJI&UACJsN?T{k9~&si*JS+uOG%m6}e7j@X$rzwmxN+{n)bH?6dP zFK)Gl*07b6glL%Kmy<_?gMtVvpmQ)qT>J8UVzKv>^6&IAE?9N8`d%?r=VT020(9L} zf>2D@&z(D%XS1SKMjoKJtQQD-4!-d`CaYsnU&cqNLhXaYJ-b1=f@sP}uTsV(7wM0v zV`CtukQe!B{`gLYN*|4553yUV&aqX`%m~OS@$DifW_(6_K!b$yL0`M_J2rU~-l|h9 zq#qcwh#9@nJqg-h^fAbooJ2ibPWy-u7VuPQ#_N}r+4H{5L}>Bk564M;hZ_CAuVY9Z z_%*8Qy9|r9iYIj(_N^x8rfR=AmMZa+swhLAA}jnt(SucVa$np9+gNTcp<1Q-SfLDG zob*vipju(I52pPbY2I4dVG;xtx0CRm%+kw&fsbRlfY0)0&Br&Ll9!j)q)zE#1jFRe zBY~n@R!<&!N`qgz_G#G{@z0+tjC`ZvoAx)#gYjP4jcc`azZZwtY|XpJ*oHdoKtW`Aw}|#-=*-MrIrUf<>!exBizCtK;dWfy zd;jCJ%xcs7b!VZ<-A`v@sf&)&bC`qv)LdNH+QnLgcBmElTkanyi7Yd5HSl1f7M19M zLYSHb1-luem(^B_W;6T$xSmU62tG?#_+UZvuX{_fcQ}mDwxps0hcHTZE4l|Yedt@w z_?;^4pEXR*dS=v~PDt%fBYtAEcmIqqj8smt;R@)1-hCLhdo?<&6-T))0&3#xx}}UE z^18T`re~MSeGEjmiq~NS_JX;Eg}+`rt)hvHiqZ;}+C>n{aTB3w$KOWop`&$0qxppB z{8yRhZpJ+$V9BEC^o9#Des<_JjAC!U5!?8V94%_KwTFWv0NU=e00nJ{CBn+YV zx16uBHf3A7(}c+=%RdfE5T3ye8$G|))Jzsgc6t+%kws0s+2~&$i71pG@t1x=v;m>X zsSCj=hd9q8HknN-UQYj=w9#>=H(O$1e%18>ZzEVL@a(eJW)7m%(G*+_ACX3 z=#DGNjPUQ{)nSVv1cX*{cKYnE$IUbZH-e^bZcx>A=0kMuD-Z-GAIQ{?#-ohoEIWC1 zOJ$6&;b@kYmX|BR*m?4WsayQ984i#`YYNS$EKU9tx=IM{4Mxh!k!@!$hOnhX%;z;n zvKb7I#k{}Z#Yx_N9m(dhmBt{<;w34b$_yVU7bX#WyS-SKQEjq5(BMWGeZO!JJ?o?E zwtST;Nay#aF=xn%?pc&(f}qQGLO~WK>c3{qC45FkMwFeM9V1Dmn`}@R?dtCClSXW- z%5_6XMFs+-&5?s#^qHj?1hPlIs&{EAXBT0hx_WZ_e)nSOigRxMu8#F3@*wL-@3EG3 z2O*_M=&TK|*``ast%Rf1$E{AGNJb&e*uIVp9WFWDRCnF?BVt$ED0|D(oG}Ch1mD9L zDsXv;G4wCCjm3QVlUbZKu$uh1>vv4ilKNl*UR+pU(c?0$Vlut~erGRlkGeS;*1383 z5VrhV4N|s_L%;W0(2PkOYbT#GlQF6s{qxq)O(l`M?u*D}-OZ{$mA=~jK4~{e%Jxf+ z8YHv@(^Uv}Q7T?23IYZkPEy~Rq>OjvQ49GXgH(bvY-d#Lyqa0B2l8% z%%Evj(dRr4rlyPrHE-^BPKRFD9cI`46$>1!VKb7xi~UA3O}j6+zRvhZbZb~nJNCLZ zxLqn$j063kna=5sn0-$}P&sS&s$hv_oEO&?TSmUon(SS{NWW%J2+rk&WNX{}-?|*1 z)|-Dexqa{-r$X!N47v8NP+l>rkdHpUWRSeXtKF|husZooCu)3Vd%v9_mhl3u@duNd4lBvP#rzLIS}i`7GT(wc~`ud&ln6MRft^Vify}kD62R z_UsuJ|4#vXGFk?KDeIBHT-96n%qfYXFxT*^QH!n;S4$bk2HMbnfPGJ@>dHJw6_moSdBS zfP(?xO{$QF#LAO44yZd6XgvYZ2K7s5jXL?a*5l8v7F$KeKT-|mx@ro^niUaNHLaa#s(`YWnfx&E(|=MZj(>_8fFVIBXi;31R2#Z@1)tiI)*^ zXwx~qZRCdRgwz_hI@g}c{!w^~P1$fE-YV|om+mx#~3+Kytufy^k6@Na{m_J>QCb=hdaGBm#1qK#Kh`O6~`7yx=w1@R<~Wc<$%+Oq`q7(LN7=YNCxRuk~x$b3XM&LaviyXA;L&L`?D{VUqI7My`z9?((Hip>PL==x(JKmiOpE=vTp5C{a3>LB)&XjB0RG9 ziHamMwU(E7+#KPezZWLL^44uUjFR}&MTC)%u$XrLejYuX%%)J8sjKstDCNPX`q$x< zb%!C@Fc_e#=<3P~;*8%53t~lE#F*gin_3-s)AH(U;R-c)q;k2qD8aXUX+=Jf?5MP3 zBGY$5xDGl`(#XMihC=5&1S2xt~%B#dYIOu9M zDyKYi%lI~$qOT-l$d~1Jov9(&Drw7Mh%JOtVY?Pv{@^#>Ls7roi*CZ-7bSaj+ks19 zY2wT`?KMKt(px)fA!XE#yiTBed980Pk8M|UVLJcG@Nzv@{|D6$<)~YO{?$p)>Y&>I z4#EBsw`G4ai;a`l0pd5Eml!X@5lWuAKmQ7kay@JyX!{8X^QW(#9n`J7h8kR%Uu`C< zTi3nC&9yn@G5ugDvf5|HLrY`oe6?3{pd#&Ee}kSr&QjsK(HfAxhvwmRv6@}(&_P!2 zBo`EP_|NNUT!q;j#>U=T`^7PMnUnqNzq@`FAt_ujrPb+)UYs!s z*Q$H=P1GdvY*9(ORR2i{3KC0-t30258yVN#J-Yqk^HiVRVFaceGg=&6lCUYl{hNaK z{3rGPLz%;5lJ3S43zw^&mKwFX%Beh}PSBXw4I+L=1E>!XmTaG>#X4^OSo4!`n|o5m zrlm!-HXlE|zdmN7M~DGJ~~pB*lbV1W;j}HRKk#EuPE}Ds^RW z;ENF_Z)kW|zZL6+bbgDQV5;lgZ!xH+9#TN{Hf|`L6!B9OVJW6jKmeBg()v^mMf!_L zU=qJ()QMTY9ECTg2uui=@As!1CKVIWh{9;r0s+g<#PAM+_ue@4Z{O(dZSMODzDeH3qLFb=#bJLNc+ASdkl!kE)bKh25 zO^JKC*gH?t`i%x1OSJLI8XSiUv%Sdu!r}L%+w{^TL9ek~p{JENW?T@;)7|`IA3X)d z8uMkNW&F)v%1czw0}=l9&$ISWgAWuwPw*-ZlUe$4W8{($Qf^P_OPW@@zcjQONR+F) zO<(qs4oOSgSD^4&4UGK8!<0CG7qlew#~=hEacv^{`1|^`Ik&AH<10o>>T;70I!f~e z**<&mn0ES{jb@CzyyjslxUi-zrr*KswhFxRkY9m2SdgLz;YMoPR=?!kZ$4D?ZgX+F zZ7UiKI;xPO#aw+!O|q~+8?=Al{?WjqT;uVi*-?nrHF${8Mq0c%TRb}WBz&4ab!OUp zI;~r(zTvI~kQ4jH8^NIG$FWcNbd)~wi*Y%`?ce4W+HX3 zII`-j@M0Q`X2n^^|6Cji3CUR`ylhWiPL5W1_AYRJ{X2`Uza7HVL+3*;V|x1B;Ye#h z*a>Gw@$F}Tm_K}DCc-bErQvx+LL|L1m)=r(MJ%10X$j)YXOXWp3~Q$UlB1qL&i~oA zFNt$_KPz!nkZ@|nqL+S`0mF`u8^m5#8m@9`(v2Z+3KuB-BnS2C~NL0{0TZCwb;TP-h@nCX6zVwz|z`;|98q62xKR1cWKNw zy-})>!OA;?fFUV)JV-9jKX9}#s)~&Uxu{maY;m_)*QZxFHTAorlU?1ri)l_~<+dv# zRjp7b23kjqHurY2<`2m@9attG*=yalRQX@L$So}$+E=r-W@>0?Sh#oh^2%*(HNLhk z!M89u#FVqlzpd3)F2SrTOL#$eRMR4HMfRl}Du>dWrc*qbCp<8tV>LLe`R_Mn>HYnC zQymKBNOM2qpLfHuMdja-4@;)|cs6MO?0Cljds;?BfrV;kH<9j0ToLpTXsOgSP?iyT z8M1mH6RX|Nt(h3cHs_lr!8h%6m)F{7Rc+Fc!z6^edKqfToz{HtcPTJWo^NTi?BB7+ zpByprphVa3a}Ov!c`LghC&1>?(r|mo^e;030f8~gcY<$ZWkcPcwAt`0hzedk$6d`c zWhA4pJzGv@RU0I++L=pWWkif)E_~*bG&S&}g{O8@2wvUD& z=~K4JVy1GuQQx>EQ+xc&ELZ0SRM@dl8i4a+Dx+(DAb~OLmDTlCK`RekQ)u@cGU-`$cS298}|bAlZ!)qY6~nKBTHx z&s|w)xY-12_q_bjy*4AOD&!@o~Lhaai2Bl>Q=fS`hlpse}=~^ z_{?8TxZVI_@{L_xqupt)xzc)sl3c`Hfwa+cmK-{_fys5d@w?qqj0$;;%&dC6BltICVHb6|cJq#Pf7A3T>o2R!wU%o?!Nnd{Nlp;J zu1KSqA~}b%*ZN&J{-`Z#xUO8apRZN%^_;gC{1ah4GR6^iJ;w+llKu)6_ET=__vZ!c zagv8EeUbK`C*rrV%11V|F1ana;yU zv3y;79MjJGnC)RV0os6rBTa8!!Q41oX^z^^IN7K+`|FL44rtIJARPF;VMn|2eQTs+ zk0mZ3z~Q1y=Cf#^*7Y^{_TPPZd3i1npPG&Q>nyx-<(d-{yI*&I#y`b2nn6;YltWh|HyVfA043z%+!BH$)q@T8$dxa9coC@>U?o811*nTk+&Fh zI?d!C$qT_5fz)@M`Kqa5FZpaySjO3IrU8nm@@INUN&g#;zej(Wjf<`J-Z1R^U_u4`N*G@qj87E9@tGy%fp&1Y>+|h!T;cOl? zuvgqPUHFi65TdTk0opYw8QGbdT{JE2U>YAP56?fc3|~JA=Dr3haq)#F+mdJKe5=iB zi&CtiykMTyD7RoeV7V*9OT*3TTzcZPI!rRaLJW%u{1jm6W26hlXvHBI9*%%H8CYvGHE- zUG9v;YKBhZ;&LfgegTRp6@p_0x#%~$xj5);5B66cGX`cQ&n+hcZRB=y0b}@4%zT71%>|BPTzT!K&y`PPi(NAyb=p3rw`l{(M48Q8M zwZ=E~^a*2aN``xPprf-IBzrK1Fe5vMvTFnCWJ`-gCLd`l7NuAr`LWBtivntgoC_Ho z-IX*Bxds3Eh@!*nh+ikR^wC!&kLgv^HV4wbD>@Je?$P6S1oSG(%ZFDH(4S-jVz#Xt zVetFIJ*SbUQgVYjf&N#OzlLLyLa!kUAb}YjgGOXY@svgt;@rTlt)m-G1N?QLdV%Jf-6nTJ;U5J)E&n!0~g(?m-_1qMkf1Hm{5RVD2gskP1AkjeSVFFdU=H_&dt(b0d zgIb!Jc%RcZlF~S@`Y`AjWp^`hPp0<0I6m~T2iO)P9{w2K;oijM?7yxjfzAIm3h!*M zP7GE6tj7%13p`#BJ*%}kdqQ~4vOe=kqBC(0YKE2r#;{Pw2T~$SNpKZ7k}hl1u8xR>#nFq*1pP%Y+RULuHWRjBLIR=KU3xVxu4zJOkSz3} zz)LmSCs>l@CIaUJw>NTsDRteCA0`J&&DTubau(pPko$H#zvs}I8!iFCH#a$Zq3H!~ zBE{n&?|pUqDhqaE7mf4m1B6sI^h5l@l{$xcL0dkeq*<7;xR(`>4J%0+PyJ~DpVq1< z+@=+*QIOOi`A!NA7iGm!&weCJT*&{jVwi+St){Ln_v_b}r0GG@SXP6treDx}b064e z>L9gB4BbVzl?DqXBLL!Yp@6YZN2wOz83$hF-_LTF19Qyg)hl^~5u}fWG9IVWTi2?Y z^K2Ok75CljoUs9~$3p{s4$B@e^YHNK&unJDw8^AXQ;7me(UB=(x^oq1n}ZiSvZ6eL zIT9@8YzSUCx|Xx&=LK<2znY$(AE-{0Q3q#6|LxoS%8j}#ScUUW70YQxPsR1TD9 z#hgYsqQ8dx#x%))JeYi7J}BCe17wUc2;9_ss->d?VD0k02<{=M89brRJ!0cwLDVsD z%V71V!rdR^qw4XGh^oRyqfv%T^_i)ts0cZ)z<}tD|L*U11Cl^~<=%o&M$qb;N4xP@ zvf1-2W}iUr9(3&i$$ZP@dPJ%3k7`Hto>YdElr*_iV|vMrSDEpCYu$;#vQKF1mZj)$ z2s%+_EfIZ1+8P<1h(n9g=J220o14E9Fe|~4Hkq$=7)u?p>Isg^EOV1v8*z9?)M1mH z^?nXx#V2#zba1PlIc}*G^4x&y{Nf__+c!qY4tJ0UI3ra~S7}?HXhfM*K~W|yNsXhe zX{}mol`P^{0I{GsO<2@1Ga8L6#DF1ca$@33dAZtFbSfpV5c$3HaJ2s^D(Q`~6_R$D zQQ@{LjBO@n;?{5h{OiZd-X8NYFuBt|@Kwz7Wv zkNKO``H1cKM?2zVkStCgQh}gL7q!3COy=O=0Gt!z_naJQ-{KAvYO(J07CDDA!zj~Q zV#}{~5Vc+4N&|akXWa%6l--*P1vcYi48afRnhEFk9E|4AsNmKCVvhx}y{`MK7S^U{ z_`*#VFX5khW=KMN8{IL!P`?s*)lI z41-IIgDpPolJ!}q3|l`aE-h7=^-PH20vk7moB3$IB_2)GGZh_UNpLw$LX|oQd>-dA zbkU$hpy`^0Yz~-#GKm;l8Ukc)eqO`am@8%|EhRLqn$LZh`7|$2ZncWds;I*YcT__B z#8eqbJ)9<_!sO7XDm~!leD!d(6Nvr8EO^P$`FXY&@uQL-2;?_mAK$5d7_#~l>WSnH z1!irWf@lLYKT?mJ<&5vF>-7!Bbgrs9UVn}^>#Z5s+|AI~2K6IMrD1n6rTdR$5ehBc znUy?HkqN z*w~(hpKdR^9=hZdA*nJ@GQ>Be zU0+(QJteZNxk0Ee#jCBWJBi{TO90QEUbr#+(%5b{dHc4f2Y5&Dbsk9&eRbayA&o!4khU91gQ)6Reqv$Xka;{Kz0om5}AHjuG8I@r{_9G8= z5UC~kF$VOv94jX%I8I05upOqDxno6wq z>#0c^Fzr3k08)kKE(SX$c0b3%dn87=$+6=T$H?L1;}>>};EghZ;}#uC$k|T{-&TVx zed>Otq~wE#?Ut0Dj-Ecmi0<*8iJYe=tX<2}TQb{gIWn) zj@*A^P;&NnQs=yav?g~SA0OjT4tM^Y{Q}6xlRwwRA(Rus6g|3VAQ!<~hvb~TX2(ye zVPHTGbJwkih>ev49SN*W8Spo~(ZuF^?+lXVVenXpE=^Q^ULG|MkH%KCZUqbAq+@1Z zlD8eW1H~fD0oHxNjhLKfOK$B+S_~2(mW=Vl$Quq|@)DcVb<6sM#16YQKMS^22?z8} zsHbP`y<#Pdj~IiQmGyO~F>TJp=^z+p4wxMnTmtDZnq6C69a5|%fu9v)2&4|!U-6;W z)H`Us#OC_oAxB}DOc{|1Bc9VWsWm$xI#}YH)$oWy?Gb-s^RM6phr_qIVOmwXzP=4p z&9Z_$K)`MGf&yS??5u)-Km<=uGMM8% z90in2mFjzPa&rMC_-oK>WgP|vhN2h7T9B+MAix>ocG+1a=qUnhdB6;iLn?IHfmt|B z1XZ-WJbJB5Us~5~U>NQ9R&vBdUYIyv-r`tQ>%Du8gKi4V6KFX3KXBO71NJ*t3VQQ( z=66zBTEzCY4FJ3MpAr%f8W%YM7}PEL?~Q>c+!javGIvIc`xO*t9{ndznzB9{wHofI z;Nzahh=Z+)fM0V9Esd(!+m}qmBNCb&pPXo6n$01U{O^ssWM@&~+shE6r0z!r;!Oi^ z^-AB{yIz}ZvIaVl2V8t*B|sAg21s89&T;^G3)~t5EP2Zt0|SiUm)9ftlx^UyxNsgp zIpD)^Y3H7-1nRLfF`)tpfyMhX2ns&3ii!?ZpcBRa4HgwuLE)04=y0FJP8cx`27fsGRx*pwJ(nso;@Gg#Hv`_?741-5z4|r9U})=Fn4K zh%HhCXl4QG0_3yAOB$nJofxD9&HkT|0k)%>M%wmjwyB69jbFRyP#8XXVs6pc)PK zf(oGFzb_#7Z0O0KJ{5h|sDO;8erzIXLkH0gF`0Zl>*eU^8m|(M&`TR|xY0^$W7wsD z2w)*0$Xb_jw62c`(%v8VRBPy|Fs#6`612brqWn$*!?J8Dk=~@4}P4rmoqZT zV!b8Cb(jVYb>^~5R!>q5A>I=P;FYEWyO{_JGc&B)EFo0?|Fp&>GO9{7))7ZPvOTL$ zW@g{h!pdql8y^V*1u1DZHYI|VOgPp~DwerDIS9bFoGdFr1J5Lrz@~L~cX!dp z$p1qn|2@%4hVTP$ZqkRC+1UX)UVH2BF9zzb0R6##m4*wOM_-};AHk=lN*DCd+9^|? zAkmK5`CJR)Hb25qo*r3YZc2oT$w#FCGz$>BtKl)JLfzeeu=7J!L;&)P=HcvrF9pS7 zHaayoH?O(ozF?njEf2tJ4GF>_?D+Ist+zAsZ*BwhFVEL}E$`@RF?PHAuSo zPJ+=qf}jFt!trE9(B+q1O%CA3f{CynEp9;X0b@?U<2INZujY_TK_}3EHOpv?F0-{DbFNukIv zOZhEhz8>AS#{}Cn;ZpwO;)(RNYD2i^kBad&IVf$AnO{?VlX-5O^VC zTT+#7V4#G(VEQmg=r_=jL(j-q)bZl^C=3APo*8FQODUg+kUKZq@RQcWo>%0;75*OV zI6;3HJ^BWFdhj_5tsO#zG$FXxNXFm42@LZIW69SAOX%-+ify#)IA#9^dr72*e0VN(1rlpm`bH_WYzD8|yd- zqfQ&zP1I|z-iT+_34@?0&W$@wIjc$779m~@8ZLK@8O`&JiL;+Z3hjs;7KN1AOYAI; zD~=Cvj=6GZlq(uYO(GSd;81KENb6$4cxiXh-D$zUbX+9MKyU>R>cxNigw&_kQ}gwF zpa4ZWTxm2sm> -stream -1 0 5 38 6 77 9 117 10 144 13 185 14 217 17 263 18 299 21 345 -22 379 25 425 26 464 29 515 30 562 33 608 34 643 37 684 38 727 41 773 -42 816 45 867 46 905 49 956 50 1026 53 1077 54 1147 57 1198 58 1270 61 1321 -62 1393 65 1444 66 1522 69 1573 70 1638 73 1689 74 1740 77 1791 78 1851 81 1903 -82 2006 85 2058 86 2161 89 2213 90 2285 93 2337 94 2409 97 2461 98 2542 101 2594 -102 2631 105 2678 106 2716 109 2768 110 2810 113 2862 114 2938 117 2990 118 3069 121 3121 -122 3211 125 3263 126 3358 129 3410 130 3492 133 3544 134 3618 137 3670 138 3740 141 3792 -142 3883 145 3936 146 4006 149 4059 150 4114 153 4167 154 4236 157 4289 158 4345 161 4398 -162 4459 165 4512 166 4569 169 4622 170 4679 173 4732 174 4814 177 4867 178 4920 181 4973 -182 5010 185 5057 186 5103 189 5155 190 5190 193 5242 194 5317 197 5369 198 5450 201 5502 +/First 865 +/Length 16060 +>> +stream +1 0 5 38 6 195 9 235 10 329 13 370 14 488 17 534 18 672 21 718 +22 846 25 892 26 1045 29 1096 30 1292 33 1338 34 1471 37 1512 38 1691 41 1737 +42 1913 45 1964 46 2112 49 2163 50 2450 53 2501 54 2788 57 2839 58 3136 61 3187 +62 3484 65 3535 66 3862 69 3913 70 4181 73 4232 74 4442 77 4493 78 4751 81 4803 +82 5234 85 5286 86 5717 89 5769 90 6063 93 6115 94 6409 97 6461 98 6827 101 6879 +102 7018 105 7065 106 7212 109 7264 110 7431 113 7483 114 7817 117 7869 118 8218 121 8270 +122 8677 125 8729 126 9164 129 9216 130 9595 133 9647 134 9965 137 10017 138 10285 141 10337 +142 10701 145 10754 146 11064 149 11117 150 11346 153 11399 154 11683 157 11736 158 11955 161 12008 +162 12255 165 12308 166 12547 169 12600 170 12839 173 12892 174 13220 177 13273 178 13489 181 13542 +182 13681 185 13728 186 13918 189 13970 190 14099 193 14151 194 14480 197 14532 198 14906 201 14958 % 1 0 obj << /S /GoTo /D (title.0) >> % 5 0 obj -(PSBLAS-v3.8.0 User's Guide) +(\376\377\000P\000S\000B\000L\000A\000S\000-\000v\0003\000.\0009\000.\0000\000\040\000U\000s\000e\000r\000'\000s\000\040\000G\000u\000i\000d\000e) % 6 0 obj << /S /GoTo /D (section.1) >> % 9 0 obj -(1 Introduction) +(\376\377\0001\000\040\000I\000n\000t\000r\000o\000d\000u\000c\000t\000i\000o\000n) % 10 0 obj << /S /GoTo /D (section.2) >> % 13 0 obj -(2 General overview) +(\376\377\0002\000\040\000G\000e\000n\000e\000r\000a\000l\000\040\000o\000v\000e\000r\000v\000i\000e\000w) % 14 0 obj << /S /GoTo /D (subsection.2.1) >> % 17 0 obj -(2.1 Basic Nomenclature) +(\376\377\0002\000.\0001\000\040\000B\000a\000s\000i\000c\000\040\000N\000o\000m\000e\000n\000c\000l\000a\000t\000u\000r\000e) % 18 0 obj << /S /GoTo /D (subsection.2.2) >> % 21 0 obj -(2.2 Library contents) +(\376\377\0002\000.\0002\000\040\000L\000i\000b\000r\000a\000r\000y\000\040\000c\000o\000n\000t\000e\000n\000t\000s) % 22 0 obj << /S /GoTo /D (subsection.2.3) >> % 25 0 obj -(2.3 Application structure) +(\376\377\0002\000.\0003\000\040\000A\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n\000\040\000s\000t\000r\000u\000c\000t\000u\000r\000e) % 26 0 obj << /S /GoTo /D (subsubsection.2.3.1) >> % 29 0 obj -(2.3.1 User-defined index mappings) +(\376\377\0002\000.\0003\000.\0001\000\040\000U\000s\000e\000r\000-\000d\000e\000f\000i\000n\000e\000d\000\040\000i\000n\000d\000e\000x\000\040\000m\000a\000p\000p\000i\000n\000g\000s) % 30 0 obj << /S /GoTo /D (subsection.2.4) >> % 33 0 obj -(2.4 Programming model) +(\376\377\0002\000.\0004\000\040\000P\000r\000o\000g\000r\000a\000m\000m\000i\000n\000g\000\040\000m\000o\000d\000e\000l) % 34 0 obj << /S /GoTo /D (section.3) >> % 37 0 obj -(3 Data Structures and Classes) +(\376\377\0003\000\040\000D\000a\000t\000a\000\040\000S\000t\000r\000u\000c\000t\000u\000r\000e\000s\000\040\000a\000n\000d\000\040\000C\000l\000a\000s\000s\000e\000s) % 38 0 obj << /S /GoTo /D (subsection.3.1) >> % 41 0 obj -(3.1 Descriptor data structure) +(\376\377\0003\000.\0001\000\040\000D\000e\000s\000c\000r\000i\000p\000t\000o\000r\000\040\000d\000a\000t\000a\000\040\000s\000t\000r\000u\000c\000t\000u\000r\000e) % 42 0 obj << /S /GoTo /D (subsubsection.3.1.1) >> % 45 0 obj -(3.1.1 Descriptor Methods) +(\376\377\0003\000.\0001\000.\0001\000\040\000D\000e\000s\000c\000r\000i\000p\000t\000o\000r\000\040\000M\000e\000t\000h\000o\000d\000s) % 46 0 obj << /S /GoTo /D (subsubsection.3.1.2) >> % 49 0 obj -(3.1.2 get\137local\137rows \204 Get number of local rows) +(\376\377\0003\000.\0001\000.\0002\000\040\000g\000e\000t\000\137\000l\000o\000c\000a\000l\000\137\000r\000o\000w\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000l\000o\000c\000a\000l\000\040\000r\000o\000w\000s) % 50 0 obj << /S /GoTo /D (subsubsection.3.1.3) >> % 53 0 obj -(3.1.3 get\137local\137cols \204 Get number of local cols) +(\376\377\0003\000.\0001\000.\0003\000\040\000g\000e\000t\000\137\000l\000o\000c\000a\000l\000\137\000c\000o\000l\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000l\000o\000c\000a\000l\000\040\000c\000o\000l\000s) % 54 0 obj << /S /GoTo /D (subsubsection.3.1.4) >> % 57 0 obj -(3.1.4 get\137global\137rows \204 Get number of global rows) +(\376\377\0003\000.\0001\000.\0004\000\040\000g\000e\000t\000\137\000g\000l\000o\000b\000a\000l\000\137\000r\000o\000w\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000g\000l\000o\000b\000a\000l\000\040\000r\000o\000w\000s) % 58 0 obj << /S /GoTo /D (subsubsection.3.1.5) >> % 61 0 obj -(3.1.5 get\137global\137cols \204 Get number of global cols) +(\376\377\0003\000.\0001\000.\0005\000\040\000g\000e\000t\000\137\000g\000l\000o\000b\000a\000l\000\137\000c\000o\000l\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000g\000l\000o\000b\000a\000l\000\040\000c\000o\000l\000s) % 62 0 obj << /S /GoTo /D (subsubsection.3.1.6) >> % 65 0 obj -(3.1.6 get\137global\137indices \204 Get vector of global indices) +(\376\377\0003\000.\0001\000.\0006\000\040\000g\000e\000t\000\137\000g\000l\000o\000b\000a\000l\000\137\000i\000n\000d\000i\000c\000e\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000v\000e\000c\000t\000o\000r\000\040\000o\000f\000\040\000g\000l\000o\000b\000a\000l\000\040\000i\000n\000d\000i\000c\000e\000s) % 66 0 obj << /S /GoTo /D (subsubsection.3.1.7) >> % 69 0 obj -(3.1.7 get\137context \204 Get communication context) +(\376\377\0003\000.\0001\000.\0007\000\040\000g\000e\000t\000\137\000c\000o\000n\000t\000e\000x\000t\000\040\040\024\000\040\000G\000e\000t\000\040\000c\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n\000\040\000c\000o\000n\000t\000e\000x\000t) % 70 0 obj << /S /GoTo /D (subsubsection.3.1.8) >> % 73 0 obj -(3.1.8 Clone \204 clone current object) +(\376\377\0003\000.\0001\000.\0008\000\040\000C\000l\000o\000n\000e\000\040\040\024\000\040\000c\000l\000o\000n\000e\000\040\000c\000u\000r\000r\000e\000n\000t\000\040\000o\000b\000j\000e\000c\000t) % 74 0 obj << /S /GoTo /D (subsubsection.3.1.9) >> % 77 0 obj -(3.1.9 CNV \204 convert internal storage format) +(\376\377\0003\000.\0001\000.\0009\000\040\000C\000N\000V\000\040\040\024\000\040\000c\000o\000n\000v\000e\000r\000t\000\040\000i\000n\000t\000e\000r\000n\000a\000l\000\040\000s\000t\000o\000r\000a\000g\000e\000\040\000f\000o\000r\000m\000a\000t) % 78 0 obj << /S /GoTo /D (subsubsection.3.1.10) >> % 81 0 obj -(3.1.10 psb\137cd\137get\137large\137threshold \204 Get threshold for index mapping switch) +(\376\377\0003\000.\0001\000.\0001\0000\000\040\000p\000s\000b\000\137\000c\000d\000\137\000g\000e\000t\000\137\000l\000a\000r\000g\000e\000\137\000t\000h\000r\000e\000s\000h\000o\000l\000d\000\040\040\024\000\040\000G\000e\000t\000\040\000t\000h\000r\000e\000s\000h\000o\000l\000d\000\040\000f\000o\000r\000\040\000i\000n\000d\000e\000x\000\040\000m\000a\000p\000p\000i\000n\000g\000\040\000s\000w\000i\000t\000c\000h) % 82 0 obj << /S /GoTo /D (subsubsection.3.1.11) >> % 85 0 obj -(3.1.11 psb\137cd\137set\137large\137threshold \204 Set threshold for index mapping switch) +(\376\377\0003\000.\0001\000.\0001\0001\000\040\000p\000s\000b\000\137\000c\000d\000\137\000s\000e\000t\000\137\000l\000a\000r\000g\000e\000\137\000t\000h\000r\000e\000s\000h\000o\000l\000d\000\040\040\024\000\040\000S\000e\000t\000\040\000t\000h\000r\000e\000s\000h\000o\000l\000d\000\040\000f\000o\000r\000\040\000i\000n\000d\000e\000x\000\040\000m\000a\000p\000p\000i\000n\000g\000\040\000s\000w\000i\000t\000c\000h) % 86 0 obj << /S /GoTo /D (subsubsection.3.1.12) >> % 89 0 obj -(3.1.12 get\137p\137adjcncy \204 Get process adjacency list) +(\376\377\0003\000.\0001\000.\0001\0002\000\040\000g\000e\000t\000\137\000p\000\137\000a\000d\000j\000c\000n\000c\000y\000\040\040\024\000\040\000G\000e\000t\000\040\000p\000r\000o\000c\000e\000s\000s\000\040\000a\000d\000j\000a\000c\000e\000n\000c\000y\000\040\000l\000i\000s\000t) % 90 0 obj << /S /GoTo /D (subsubsection.3.1.13) >> % 93 0 obj -(3.1.13 set\137p\137adjcncy \204 Set process adjacency list) +(\376\377\0003\000.\0001\000.\0001\0003\000\040\000s\000e\000t\000\137\000p\000\137\000a\000d\000j\000c\000n\000c\000y\000\040\040\024\000\040\000S\000e\000t\000\040\000p\000r\000o\000c\000e\000s\000s\000\040\000a\000d\000j\000a\000c\000e\000n\000c\000y\000\040\000l\000i\000s\000t) % 94 0 obj << /S /GoTo /D (subsubsection.3.1.14) >> % 97 0 obj -(3.1.14 fnd\137owner \204 Find the owner process of a set of indices) +(\376\377\0003\000.\0001\000.\0001\0004\000\040\000f\000n\000d\000\137\000o\000w\000n\000e\000r\000\040\040\024\000\040\000F\000i\000n\000d\000\040\000t\000h\000e\000\040\000o\000w\000n\000e\000r\000\040\000p\000r\000o\000c\000e\000s\000s\000\040\000o\000f\000\040\000a\000\040\000s\000e\000t\000\040\000o\000f\000\040\000i\000n\000d\000i\000c\000e\000s) % 98 0 obj << /S /GoTo /D (subsubsection.3.1.15) >> % 101 0 obj -(3.1.15 Named Constants) +(\376\377\0003\000.\0001\000.\0001\0005\000\040\000N\000a\000m\000e\000d\000\040\000C\000o\000n\000s\000t\000a\000n\000t\000s) % 102 0 obj << /S /GoTo /D (subsection.3.2) >> % 105 0 obj -(3.2 Sparse Matrix class) +(\376\377\0003\000.\0002\000\040\000S\000p\000a\000r\000s\000e\000\040\000M\000a\000t\000r\000i\000x\000\040\000c\000l\000a\000s\000s) % 106 0 obj << /S /GoTo /D (subsubsection.3.2.1) >> % 109 0 obj -(3.2.1 Sparse Matrix Methods) +(\376\377\0003\000.\0002\000.\0001\000\040\000S\000p\000a\000r\000s\000e\000\040\000M\000a\000t\000r\000i\000x\000\040\000M\000e\000t\000h\000o\000d\000s) % 110 0 obj << /S /GoTo /D (subsubsection.3.2.2) >> % 113 0 obj -(3.2.2 get\137nrows \204 Get number of rows in a sparse matrix) +(\376\377\0003\000.\0002\000.\0002\000\040\000g\000e\000t\000\137\000n\000r\000o\000w\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000r\000o\000w\000s\000\040\000i\000n\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 114 0 obj << /S /GoTo /D (subsubsection.3.2.3) >> % 117 0 obj -(3.2.3 get\137ncols \204 Get number of columns in a sparse matrix) +(\376\377\0003\000.\0002\000.\0003\000\040\000g\000e\000t\000\137\000n\000c\000o\000l\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000c\000o\000l\000u\000m\000n\000s\000\040\000i\000n\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 118 0 obj << /S /GoTo /D (subsubsection.3.2.4) >> % 121 0 obj -(3.2.4 get\137nnzeros \204 Get number of nonzero elements in a sparse matrix) +(\376\377\0003\000.\0002\000.\0004\000\040\000g\000e\000t\000\137\000n\000n\000z\000e\000r\000o\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000n\000o\000n\000z\000e\000r\000o\000\040\000e\000l\000e\000m\000e\000n\000t\000s\000\040\000i\000n\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 122 0 obj << /S /GoTo /D (subsubsection.3.2.5) >> % 125 0 obj -(3.2.5 get\137size \204 Get maximum number of nonzero elements in a sparse matrix) +(\376\377\0003\000.\0002\000.\0005\000\040\000g\000e\000t\000\137\000s\000i\000z\000e\000\040\040\024\000\040\000G\000e\000t\000\040\000m\000a\000x\000i\000m\000u\000m\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000n\000o\000n\000z\000e\000r\000o\000\040\000e\000l\000e\000m\000e\000n\000t\000s\000\040\000i\000n\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 126 0 obj << /S /GoTo /D (subsubsection.3.2.6) >> % 129 0 obj -(3.2.6 sizeof \204 Get memory occupation in bytes of a sparse matrix) +(\376\377\0003\000.\0002\000.\0006\000\040\000s\000i\000z\000e\000o\000f\000\040\040\024\000\040\000G\000e\000t\000\040\000m\000e\000m\000o\000r\000y\000\040\000o\000c\000c\000u\000p\000a\000t\000i\000o\000n\000\040\000i\000n\000\040\000b\000y\000t\000e\000s\000\040\000o\000f\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 130 0 obj << /S /GoTo /D (subsubsection.3.2.7) >> % 133 0 obj -(3.2.7 get\137fmt \204 Short description of the dynamic type) +(\376\377\0003\000.\0002\000.\0007\000\040\000g\000e\000t\000\137\000f\000m\000t\000\040\040\024\000\040\000S\000h\000o\000r\000t\000\040\000d\000e\000s\000c\000r\000i\000p\000t\000i\000o\000n\000\040\000o\000f\000\040\000t\000h\000e\000\040\000d\000y\000n\000a\000m\000i\000c\000\040\000t\000y\000p\000e) % 134 0 obj << /S /GoTo /D (subsubsection.3.2.8) >> % 137 0 obj -(3.2.8 is\137bld, is\137upd, is\137asb \204 Status check) +(\376\377\0003\000.\0002\000.\0008\000\040\000i\000s\000\137\000b\000l\000d\000,\000\040\000i\000s\000\137\000u\000p\000d\000,\000\040\000i\000s\000\137\000a\000s\000b\000\040\040\024\000\040\000S\000t\000a\000t\000u\000s\000\040\000c\000h\000e\000c\000k) % 138 0 obj << /S /GoTo /D (subsubsection.3.2.9) >> % 141 0 obj -(3.2.9 is\137lower, is\137upper, is\137triangle, is\137unit \204 Format check) +(\376\377\0003\000.\0002\000.\0009\000\040\000i\000s\000\137\000l\000o\000w\000e\000r\000,\000\040\000i\000s\000\137\000u\000p\000p\000e\000r\000,\000\040\000i\000s\000\137\000t\000r\000i\000a\000n\000g\000l\000e\000,\000\040\000i\000s\000\137\000u\000n\000i\000t\000\040\040\024\000\040\000F\000o\000r\000m\000a\000t\000\040\000c\000h\000e\000c\000k) % 142 0 obj << /S /GoTo /D (subsubsection.3.2.10) >> % 145 0 obj -(3.2.10 cscnv \204 Convert to a different storage format) +(\376\377\0003\000.\0002\000.\0001\0000\000\040\000c\000s\000c\000n\000v\000\040\040\024\000\040\000C\000o\000n\000v\000e\000r\000t\000\040\000t\000o\000\040\000a\000\040\000d\000i\000f\000f\000e\000r\000e\000n\000t\000\040\000s\000t\000o\000r\000a\000g\000e\000\040\000f\000o\000r\000m\000a\000t) % 146 0 obj << /S /GoTo /D (subsubsection.3.2.11) >> % 149 0 obj -(3.2.11 csclip \204 Reduce to a submatrix) +(\376\377\0003\000.\0002\000.\0001\0001\000\040\000c\000s\000c\000l\000i\000p\000\040\040\024\000\040\000R\000e\000d\000u\000c\000e\000\040\000t\000o\000\040\000a\000\040\000s\000u\000b\000m\000a\000t\000r\000i\000x) % 150 0 obj << /S /GoTo /D (subsubsection.3.2.12) >> % 153 0 obj -(3.2.12 clean\137zeros \204 Eliminate zero coefficients) +(\376\377\0003\000.\0002\000.\0001\0002\000\040\000c\000l\000e\000a\000n\000\137\000z\000e\000r\000o\000s\000\040\040\024\000\040\000E\000l\000i\000m\000i\000n\000a\000t\000e\000\040\000z\000e\000r\000o\000\040\000c\000o\000e\000f\000f\000i\000c\000i\000e\000n\000t\000s) % 154 0 obj << /S /GoTo /D (subsubsection.3.2.13) >> % 157 0 obj -(3.2.13 get\137diag \204 Get main diagonal) +(\376\377\0003\000.\0002\000.\0001\0003\000\040\000g\000e\000t\000\137\000d\000i\000a\000g\000\040\040\024\000\040\000G\000e\000t\000\040\000m\000a\000i\000n\000\040\000d\000i\000a\000g\000o\000n\000a\000l) % 158 0 obj << /S /GoTo /D (subsubsection.3.2.14) >> % 161 0 obj -(3.2.14 clip\137diag \204 Cut out main diagonal) +(\376\377\0003\000.\0002\000.\0001\0004\000\040\000c\000l\000i\000p\000\137\000d\000i\000a\000g\000\040\040\024\000\040\000C\000u\000t\000\040\000o\000u\000t\000\040\000m\000a\000i\000n\000\040\000d\000i\000a\000g\000o\000n\000a\000l) % 162 0 obj << /S /GoTo /D (subsubsection.3.2.15) >> % 165 0 obj -(3.2.15 tril \204 Return the lower triangle) +(\376\377\0003\000.\0002\000.\0001\0005\000\040\000t\000r\000i\000l\000\040\040\024\000\040\000R\000e\000t\000u\000r\000n\000\040\000t\000h\000e\000\040\000l\000o\000w\000e\000r\000\040\000t\000r\000i\000a\000n\000g\000l\000e) % 166 0 obj << /S /GoTo /D (subsubsection.3.2.16) >> % 169 0 obj -(3.2.16 triu \204 Return the upper triangle) +(\376\377\0003\000.\0002\000.\0001\0006\000\040\000t\000r\000i\000u\000\040\040\024\000\040\000R\000e\000t\000u\000r\000n\000\040\000t\000h\000e\000\040\000u\000p\000p\000e\000r\000\040\000t\000r\000i\000a\000n\000g\000l\000e) % 170 0 obj << /S /GoTo /D (subsubsection.3.2.17) >> % 173 0 obj -(3.2.17 psb\137set\137mat\137default \204 Set default storage format) +(\376\377\0003\000.\0002\000.\0001\0007\000\040\000p\000s\000b\000\137\000s\000e\000t\000\137\000m\000a\000t\000\137\000d\000e\000f\000a\000u\000l\000t\000\040\040\024\000\040\000S\000e\000t\000\040\000d\000e\000f\000a\000u\000l\000t\000\040\000s\000t\000o\000r\000a\000g\000e\000\040\000f\000o\000r\000m\000a\000t) % 174 0 obj << /S /GoTo /D (subsubsection.3.2.18) >> % 177 0 obj -(3.2.18 clone \204 Clone current object) +(\376\377\0003\000.\0002\000.\0001\0008\000\040\000c\000l\000o\000n\000e\000\040\040\024\000\040\000C\000l\000o\000n\000e\000\040\000c\000u\000r\000r\000e\000n\000t\000\040\000o\000b\000j\000e\000c\000t) % 178 0 obj << /S /GoTo /D (subsubsection.3.2.19) >> % 181 0 obj -(3.2.19 Named Constants) +(\376\377\0003\000.\0002\000.\0001\0009\000\040\000N\000a\000m\000e\000d\000\040\000C\000o\000n\000s\000t\000a\000n\000t\000s) % 182 0 obj << /S /GoTo /D (subsection.3.3) >> % 185 0 obj -(3.3 Dense Vector Data Structure) +(\376\377\0003\000.\0003\000\040\000D\000e\000n\000s\000e\000\040\000V\000e\000c\000t\000o\000r\000\040\000D\000a\000t\000a\000\040\000S\000t\000r\000u\000c\000t\000u\000r\000e) % 186 0 obj << /S /GoTo /D (subsubsection.3.3.1) >> % 189 0 obj -(3.3.1 Vector Methods) +(\376\377\0003\000.\0003\000.\0001\000\040\000V\000e\000c\000t\000o\000r\000\040\000M\000e\000t\000h\000o\000d\000s) % 190 0 obj << /S /GoTo /D (subsubsection.3.3.2) >> % 193 0 obj -(3.3.2 get\137nrows \204 Get number of rows in a dense vector) +(\376\377\0003\000.\0003\000.\0002\000\040\000g\000e\000t\000\137\000n\000r\000o\000w\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000r\000o\000w\000s\000\040\000i\000n\000\040\000a\000\040\000d\000e\000n\000s\000e\000\040\000v\000e\000c\000t\000o\000r) % 194 0 obj << /S /GoTo /D (subsubsection.3.3.3) >> % 197 0 obj -(3.3.3 sizeof \204 Get memory occupation in bytes of a dense vector) +(\376\377\0003\000.\0003\000.\0003\000\040\000s\000i\000z\000e\000o\000f\000\040\040\024\000\040\000G\000e\000t\000\040\000m\000e\000m\000o\000r\000y\000\040\000o\000c\000c\000u\000p\000a\000t\000i\000o\000n\000\040\000i\000n\000\040\000b\000y\000t\000e\000s\000\040\000o\000f\000\040\000a\000\040\000d\000e\000n\000s\000e\000\040\000v\000e\000c\000t\000o\000r) % 198 0 obj << /S /GoTo /D (subsubsection.3.3.4) >> % 201 0 obj -(3.3.4 set \204 Set contents of the vector) +(\376\377\0003\000.\0003\000.\0004\000\040\000s\000e\000t\000\040\040\024\000\040\000S\000e\000t\000\040\000c\000o\000n\000t\000e\000n\000t\000s\000\040\000o\000f\000\040\000t\000h\000e\000\040\000v\000e\000c\000t\000o\000r) endstream endobj @@ -225,226 +225,447 @@ endobj << /Type /ObjStm /N 100 -/First 877 -/Length 6351 ->> -stream -202 0 206 52 207 123 210 175 211 227 214 274 215 322 218 369 219 407 222 449 -223 488 226 535 227 598 230 645 231 693 234 740 235 801 238 848 239 908 242 955 -243 1019 246 1066 247 1119 250 1166 251 1233 254 1280 255 1333 258 1380 259 1447 262 1495 -263 1556 266 1604 267 1672 270 1720 271 1794 274 1842 275 1902 278 1950 279 2005 282 2053 -283 2109 286 2157 287 2214 290 2256 291 2295 294 2342 295 2401 298 2448 299 2498 302 2545 -303 2609 306 2656 307 2722 310 2764 311 2805 314 2852 315 2925 318 2972 319 3048 322 3095 -323 3173 326 3220 327 3290 330 3337 331 3407 334 3454 335 3536 338 3583 339 3645 342 3692 -343 3778 346 3825 347 3892 350 3940 351 4000 354 4048 355 4136 358 4184 359 4246 362 4294 -363 4362 366 4410 367 4471 370 4519 371 4578 374 4626 375 4707 378 4755 379 4839 382 4887 -383 4971 386 5019 387 5063 390 5111 391 5158 394 5206 395 5250 398 5298 399 5345 402 5393 +/First 919 +/Length 15874 +>> +stream +202 0 206 52 207 358 210 410 211 621 214 668 215 865 218 912 219 1059 222 1101 +223 1250 226 1297 227 1554 230 1601 231 1777 234 1824 235 2068 238 2115 239 2354 242 2401 +243 2660 246 2707 247 2911 250 2958 251 3235 254 3282 255 3486 258 3533 259 3810 262 3858 +263 4105 266 4153 267 4438 270 4486 271 4804 274 4852 275 5091 278 5139 279 5350 282 5398 +283 5614 286 5662 287 5883 290 5925 291 6074 294 6121 295 6355 298 6402 299 6588 302 6635 +303 6897 306 6944 307 7216 310 7258 311 7420 314 7467 315 7774 318 7821 319 8143 322 8190 +323 8522 326 8569 327 8861 330 8908 331 9200 334 9247 335 9602 338 9649 339 9901 342 9948 +343 10335 346 10382 347 10659 350 10707 351 10949 354 10997 355 11388 358 11436 359 11688 362 11736 +363 12018 366 12066 367 12313 370 12361 371 12598 374 12646 375 13005 378 13053 379 13394 382 13442 +383 13783 386 13831 387 13972 390 14020 391 14176 394 14224 395 14365 398 14413 399 14569 402 14617 % 202 0 obj << /S /GoTo /D (subsubsection.3.3.5) >> % 206 0 obj -(3.3.5 get\137vect \204 Get a copy of the vector contents) +(\376\377\0003\000.\0003\000.\0005\000\040\000g\000e\000t\000\137\000v\000e\000c\000t\000\040\040\024\000\040\000G\000e\000t\000\040\000a\000\040\000c\000o\000p\000y\000\040\000o\000f\000\040\000t\000h\000e\000\040\000v\000e\000c\000t\000o\000r\000\040\000c\000o\000n\000t\000e\000n\000t\000s) % 207 0 obj << /S /GoTo /D (subsubsection.3.3.6) >> % 210 0 obj -(3.3.6 clone \204 Clone current object) +(\376\377\0003\000.\0003\000.\0006\000\040\000c\000l\000o\000n\000e\000\040\040\024\000\040\000C\000l\000o\000n\000e\000\040\000c\000u\000r\000r\000e\000n\000t\000\040\000o\000b\000j\000e\000c\000t) % 211 0 obj << /S /GoTo /D (subsection.3.4) >> % 214 0 obj -(3.4 Preconditioner data structure) +(\376\377\0003\000.\0004\000\040\000P\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r\000\040\000d\000a\000t\000a\000\040\000s\000t\000r\000u\000c\000t\000u\000r\000e) % 215 0 obj << /S /GoTo /D (subsection.3.5) >> % 218 0 obj -(3.5 Heap data structure) +(\376\377\0003\000.\0005\000\040\000H\000e\000a\000p\000\040\000d\000a\000t\000a\000\040\000s\000t\000r\000u\000c\000t\000u\000r\000e) % 219 0 obj << /S /GoTo /D (section.4) >> % 222 0 obj -(4 Computational routines) +(\376\377\0004\000\040\000C\000o\000m\000p\000u\000t\000a\000t\000i\000o\000n\000a\000l\000\040\000r\000o\000u\000t\000i\000n\000e\000s) % 223 0 obj << /S /GoTo /D (subsection.4.1) >> % 226 0 obj -(4.1 psb\137geaxpby \204 General Dense Matrix Sum) +(\376\377\0004\000.\0001\000\040\000p\000s\000b\000\137\000g\000e\000a\000x\000p\000b\000y\000\040\040\024\000\040\000G\000e\000n\000e\000r\000a\000l\000\040\000D\000e\000n\000s\000e\000\040\000M\000a\000t\000r\000i\000x\000\040\000S\000u\000m) % 227 0 obj << /S /GoTo /D (subsection.4.2) >> % 230 0 obj -(4.2 psb\137gedot \204 Dot Product) +(\376\377\0004\000.\0002\000\040\000p\000s\000b\000\137\000g\000e\000d\000o\000t\000\040\040\024\000\040\000D\000o\000t\000\040\000P\000r\000o\000d\000u\000c\000t) % 231 0 obj << /S /GoTo /D (subsection.4.3) >> % 234 0 obj -(4.3 psb\137gedots \204 Generalized Dot Product) +(\376\377\0004\000.\0003\000\040\000p\000s\000b\000\137\000g\000e\000d\000o\000t\000s\000\040\040\024\000\040\000G\000e\000n\000e\000r\000a\000l\000i\000z\000e\000d\000\040\000D\000o\000t\000\040\000P\000r\000o\000d\000u\000c\000t) % 235 0 obj << /S /GoTo /D (subsection.4.4) >> % 238 0 obj -(4.4 psb\137normi \204 Infinity-Norm of Vector) +(\376\377\0004\000.\0004\000\040\000p\000s\000b\000\137\000n\000o\000r\000m\000i\000\040\040\024\000\040\000I\000n\000f\000i\000n\000i\000t\000y\000-\000N\000o\000r\000m\000\040\000o\000f\000\040\000V\000e\000c\000t\000o\000r) % 239 0 obj << /S /GoTo /D (subsection.4.5) >> % 242 0 obj -(4.5 psb\137geamaxs \204 Generalized Infinity Norm) +(\376\377\0004\000.\0005\000\040\000p\000s\000b\000\137\000g\000e\000a\000m\000a\000x\000s\000\040\040\024\000\040\000G\000e\000n\000e\000r\000a\000l\000i\000z\000e\000d\000\040\000I\000n\000f\000i\000n\000i\000t\000y\000\040\000N\000o\000r\000m) % 243 0 obj << /S /GoTo /D (subsection.4.6) >> % 246 0 obj -(4.6 psb\137norm1 \204 1-Norm of Vector) +(\376\377\0004\000.\0006\000\040\000p\000s\000b\000\137\000n\000o\000r\000m\0001\000\040\040\024\000\040\0001\000-\000N\000o\000r\000m\000\040\000o\000f\000\040\000V\000e\000c\000t\000o\000r) % 247 0 obj << /S /GoTo /D (subsection.4.7) >> % 250 0 obj -(4.7 psb\137geasums \204 Generalized 1-Norm of Vector) +(\376\377\0004\000.\0007\000\040\000p\000s\000b\000\137\000g\000e\000a\000s\000u\000m\000s\000\040\040\024\000\040\000G\000e\000n\000e\000r\000a\000l\000i\000z\000e\000d\000\040\0001\000-\000N\000o\000r\000m\000\040\000o\000f\000\040\000V\000e\000c\000t\000o\000r) % 251 0 obj << /S /GoTo /D (subsection.4.8) >> % 254 0 obj -(4.8 psb\137norm2 \204 2-Norm of Vector) +(\376\377\0004\000.\0008\000\040\000p\000s\000b\000\137\000n\000o\000r\000m\0002\000\040\040\024\000\040\0002\000-\000N\000o\000r\000m\000\040\000o\000f\000\040\000V\000e\000c\000t\000o\000r) % 255 0 obj << /S /GoTo /D (subsection.4.9) >> % 258 0 obj -(4.9 psb\137genrm2s \204 Generalized 2-Norm of Vector) +(\376\377\0004\000.\0009\000\040\000p\000s\000b\000\137\000g\000e\000n\000r\000m\0002\000s\000\040\040\024\000\040\000G\000e\000n\000e\000r\000a\000l\000i\000z\000e\000d\000\040\0002\000-\000N\000o\000r\000m\000\040\000o\000f\000\040\000V\000e\000c\000t\000o\000r) % 259 0 obj << /S /GoTo /D (subsection.4.10) >> % 262 0 obj -(4.10 psb\137norm1 \204 1-Norm of Sparse Matrix) +(\376\377\0004\000.\0001\0000\000\040\000p\000s\000b\000\137\000n\000o\000r\000m\0001\000\040\040\024\000\040\0001\000-\000N\000o\000r\000m\000\040\000o\000f\000\040\000S\000p\000a\000r\000s\000e\000\040\000M\000a\000t\000r\000i\000x) % 263 0 obj << /S /GoTo /D (subsection.4.11) >> % 266 0 obj -(4.11 psb\137normi \204 Infinity Norm of Sparse Matrix) +(\376\377\0004\000.\0001\0001\000\040\000p\000s\000b\000\137\000n\000o\000r\000m\000i\000\040\040\024\000\040\000I\000n\000f\000i\000n\000i\000t\000y\000\040\000N\000o\000r\000m\000\040\000o\000f\000\040\000S\000p\000a\000r\000s\000e\000\040\000M\000a\000t\000r\000i\000x) % 267 0 obj << /S /GoTo /D (subsection.4.12) >> % 270 0 obj -(4.12 psb\137spmm \204 Sparse Matrix by Dense Matrix Product) +(\376\377\0004\000.\0001\0002\000\040\000p\000s\000b\000\137\000s\000p\000m\000m\000\040\040\024\000\040\000S\000p\000a\000r\000s\000e\000\040\000M\000a\000t\000r\000i\000x\000\040\000b\000y\000\040\000D\000e\000n\000s\000e\000\040\000M\000a\000t\000r\000i\000x\000\040\000P\000r\000o\000d\000u\000c\000t) % 271 0 obj << /S /GoTo /D (subsection.4.13) >> % 274 0 obj -(4.13 psb\137spsm \204 Triangular System Solve) +(\376\377\0004\000.\0001\0003\000\040\000p\000s\000b\000\137\000s\000p\000s\000m\000\040\040\024\000\040\000T\000r\000i\000a\000n\000g\000u\000l\000a\000r\000\040\000S\000y\000s\000t\000e\000m\000\040\000S\000o\000l\000v\000e) % 275 0 obj << /S /GoTo /D (subsection.4.14) >> % 278 0 obj -(4.14 psb\137gemlt \204 Entrywise Product) +(\376\377\0004\000.\0001\0004\000\040\000p\000s\000b\000\137\000g\000e\000m\000l\000t\000\040\040\024\000\040\000E\000n\000t\000r\000y\000w\000i\000s\000e\000\040\000P\000r\000o\000d\000u\000c\000t) % 279 0 obj << /S /GoTo /D (subsection.4.15) >> % 282 0 obj -(4.15 psb\137gediv \204 Entrywise Division) +(\376\377\0004\000.\0001\0005\000\040\000p\000s\000b\000\137\000g\000e\000d\000i\000v\000\040\040\024\000\040\000E\000n\000t\000r\000y\000w\000i\000s\000e\000\040\000D\000i\000v\000i\000s\000i\000o\000n) % 283 0 obj << /S /GoTo /D (subsection.4.16) >> % 286 0 obj -(4.16 psb\137geinv \204 Entrywise Inversion) +(\376\377\0004\000.\0001\0006\000\040\000p\000s\000b\000\137\000g\000e\000i\000n\000v\000\040\040\024\000\040\000E\000n\000t\000r\000y\000w\000i\000s\000e\000\040\000I\000n\000v\000e\000r\000s\000i\000o\000n) % 287 0 obj << /S /GoTo /D (section.5) >> % 290 0 obj -(5 Communication routines) +(\376\377\0005\000\040\000C\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n\000\040\000r\000o\000u\000t\000i\000n\000e\000s) % 291 0 obj << /S /GoTo /D (subsection.5.1) >> % 294 0 obj -(5.1 psb\137halo \204 Halo Data Communication) +(\376\377\0005\000.\0001\000\040\000p\000s\000b\000\137\000h\000a\000l\000o\000\040\040\024\000\040\000H\000a\000l\000o\000\040\000D\000a\000t\000a\000\040\000C\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n) % 295 0 obj << /S /GoTo /D (subsection.5.2) >> % 298 0 obj -(5.2 psb\137ovrl \204 Overlap Update) +(\376\377\0005\000.\0002\000\040\000p\000s\000b\000\137\000o\000v\000r\000l\000\040\040\024\000\040\000O\000v\000e\000r\000l\000a\000p\000\040\000U\000p\000d\000a\000t\000e) % 299 0 obj << /S /GoTo /D (subsection.5.3) >> % 302 0 obj -(5.3 psb\137gather \204 Gather Global Dense Matrix) +(\376\377\0005\000.\0003\000\040\000p\000s\000b\000\137\000g\000a\000t\000h\000e\000r\000\040\040\024\000\040\000G\000a\000t\000h\000e\000r\000\040\000G\000l\000o\000b\000a\000l\000\040\000D\000e\000n\000s\000e\000\040\000M\000a\000t\000r\000i\000x) % 303 0 obj << /S /GoTo /D (subsection.5.4) >> % 306 0 obj -(5.4 psb\137scatter \204 Scatter Global Dense Matrix) +(\376\377\0005\000.\0004\000\040\000p\000s\000b\000\137\000s\000c\000a\000t\000t\000e\000r\000\040\040\024\000\040\000S\000c\000a\000t\000t\000e\000r\000\040\000G\000l\000o\000b\000a\000l\000\040\000D\000e\000n\000s\000e\000\040\000M\000a\000t\000r\000i\000x) % 307 0 obj << /S /GoTo /D (section.6) >> % 310 0 obj -(6 Data management routines) +(\376\377\0006\000\040\000D\000a\000t\000a\000\040\000m\000a\000n\000a\000g\000e\000m\000e\000n\000t\000\040\000r\000o\000u\000t\000i\000n\000e\000s) % 311 0 obj << /S /GoTo /D (subsection.6.1) >> % 314 0 obj -(6.1 psb\137cdall \204 Allocates a communication descriptor) +(\376\377\0006\000.\0001\000\040\000p\000s\000b\000\137\000c\000d\000a\000l\000l\000\040\040\024\000\040\000A\000l\000l\000o\000c\000a\000t\000e\000s\000\040\000a\000\040\000c\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n\000\040\000d\000e\000s\000c\000r\000i\000p\000t\000o\000r) % 315 0 obj << /S /GoTo /D (subsection.6.2) >> % 318 0 obj -(6.2 psb\137cdins \204 Communication descriptor insert routine) +(\376\377\0006\000.\0002\000\040\000p\000s\000b\000\137\000c\000d\000i\000n\000s\000\040\040\024\000\040\000C\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n\000\040\000d\000e\000s\000c\000r\000i\000p\000t\000o\000r\000\040\000i\000n\000s\000e\000r\000t\000\040\000r\000o\000u\000t\000i\000n\000e) % 319 0 obj << /S /GoTo /D (subsection.6.3) >> % 322 0 obj -(6.3 psb\137cdasb \204 Communication descriptor assembly routine) +(\376\377\0006\000.\0003\000\040\000p\000s\000b\000\137\000c\000d\000a\000s\000b\000\040\040\024\000\040\000C\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n\000\040\000d\000e\000s\000c\000r\000i\000p\000t\000o\000r\000\040\000a\000s\000s\000e\000m\000b\000l\000y\000\040\000r\000o\000u\000t\000i\000n\000e) % 323 0 obj << /S /GoTo /D (subsection.6.4) >> % 326 0 obj -(6.4 psb\137cdcpy \204 Copies a communication descriptor) +(\376\377\0006\000.\0004\000\040\000p\000s\000b\000\137\000c\000d\000c\000p\000y\000\040\040\024\000\040\000C\000o\000p\000i\000e\000s\000\040\000a\000\040\000c\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n\000\040\000d\000e\000s\000c\000r\000i\000p\000t\000o\000r) % 327 0 obj << /S /GoTo /D (subsection.6.5) >> % 330 0 obj -(6.5 psb\137cdfree \204 Frees a communication descriptor) +(\376\377\0006\000.\0005\000\040\000p\000s\000b\000\137\000c\000d\000f\000r\000e\000e\000\040\040\024\000\040\000F\000r\000e\000e\000s\000\040\000a\000\040\000c\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n\000\040\000d\000e\000s\000c\000r\000i\000p\000t\000o\000r) % 331 0 obj << /S /GoTo /D (subsection.6.6) >> % 334 0 obj -(6.6 psb\137cdbldext \204 Build an extended communication descriptor) +(\376\377\0006\000.\0006\000\040\000p\000s\000b\000\137\000c\000d\000b\000l\000d\000e\000x\000t\000\040\040\024\000\040\000B\000u\000i\000l\000d\000\040\000a\000n\000\040\000e\000x\000t\000e\000n\000d\000e\000d\000\040\000c\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n\000\040\000d\000e\000s\000c\000r\000i\000p\000t\000o\000r) % 335 0 obj << /S /GoTo /D (subsection.6.7) >> % 338 0 obj -(6.7 psb\137spall \204 Allocates a sparse matrix) +(\376\377\0006\000.\0007\000\040\000p\000s\000b\000\137\000s\000p\000a\000l\000l\000\040\040\024\000\040\000A\000l\000l\000o\000c\000a\000t\000e\000s\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 339 0 obj << /S /GoTo /D (subsection.6.8) >> % 342 0 obj -(6.8 psb\137spins \204 Insert a set of coefficients into a sparse matrix) +(\376\377\0006\000.\0008\000\040\000p\000s\000b\000\137\000s\000p\000i\000n\000s\000\040\040\024\000\040\000I\000n\000s\000e\000r\000t\000\040\000a\000\040\000s\000e\000t\000\040\000o\000f\000\040\000c\000o\000e\000f\000f\000i\000c\000i\000e\000n\000t\000s\000\040\000i\000n\000t\000o\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 343 0 obj << /S /GoTo /D (subsection.6.9) >> % 346 0 obj -(6.9 psb\137spasb \204 Sparse matrix assembly routine) +(\376\377\0006\000.\0009\000\040\000p\000s\000b\000\137\000s\000p\000a\000s\000b\000\040\040\024\000\040\000S\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x\000\040\000a\000s\000s\000e\000m\000b\000l\000y\000\040\000r\000o\000u\000t\000i\000n\000e) % 347 0 obj << /S /GoTo /D (subsection.6.10) >> % 350 0 obj -(6.10 psb\137spfree \204 Frees a sparse matrix) +(\376\377\0006\000.\0001\0000\000\040\000p\000s\000b\000\137\000s\000p\000f\000r\000e\000e\000\040\040\024\000\040\000F\000r\000e\000e\000s\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 351 0 obj << /S /GoTo /D (subsection.6.11) >> % 354 0 obj -(6.11 psb\137sprn \204 Reinit sparse matrix structure for psblas routines.) +(\376\377\0006\000.\0001\0001\000\040\000p\000s\000b\000\137\000s\000p\000r\000n\000\040\040\024\000\040\000R\000e\000i\000n\000i\000t\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x\000\040\000s\000t\000r\000u\000c\000t\000u\000r\000e\000\040\000f\000o\000r\000\040\000p\000s\000b\000l\000a\000s\000\040\000r\000o\000u\000t\000i\000n\000e\000s\000.) % 355 0 obj << /S /GoTo /D (subsection.6.12) >> % 358 0 obj -(6.12 psb\137geall \204 Allocates a dense matrix) +(\376\377\0006\000.\0001\0002\000\040\000p\000s\000b\000\137\000g\000e\000a\000l\000l\000\040\040\024\000\040\000A\000l\000l\000o\000c\000a\000t\000e\000s\000\040\000a\000\040\000d\000e\000n\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 359 0 obj << /S /GoTo /D (subsection.6.13) >> % 362 0 obj -(6.13 psb\137geins \204 Dense matrix insertion routine) +(\376\377\0006\000.\0001\0003\000\040\000p\000s\000b\000\137\000g\000e\000i\000n\000s\000\040\040\024\000\040\000D\000e\000n\000s\000e\000\040\000m\000a\000t\000r\000i\000x\000\040\000i\000n\000s\000e\000r\000t\000i\000o\000n\000\040\000r\000o\000u\000t\000i\000n\000e) % 363 0 obj << /S /GoTo /D (subsection.6.14) >> % 366 0 obj -(6.14 psb\137geasb \204 Assembly a dense matrix) +(\376\377\0006\000.\0001\0004\000\040\000p\000s\000b\000\137\000g\000e\000a\000s\000b\000\040\040\024\000\040\000A\000s\000s\000e\000m\000b\000l\000y\000\040\000a\000\040\000d\000e\000n\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 367 0 obj << /S /GoTo /D (subsection.6.15) >> % 370 0 obj -(6.15 psb\137gefree \204 Frees a dense matrix) +(\376\377\0006\000.\0001\0005\000\040\000p\000s\000b\000\137\000g\000e\000f\000r\000e\000e\000\040\040\024\000\040\000F\000r\000e\000e\000s\000\040\000a\000\040\000d\000e\000n\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 371 0 obj << /S /GoTo /D (subsection.6.16) >> % 374 0 obj -(6.16 psb\137gelp \204 Applies a left permutation to a dense matrix) +(\376\377\0006\000.\0001\0006\000\040\000p\000s\000b\000\137\000g\000e\000l\000p\000\040\040\024\000\040\000A\000p\000p\000l\000i\000e\000s\000\040\000a\000\040\000l\000e\000f\000t\000\040\000p\000e\000r\000m\000u\000t\000a\000t\000i\000o\000n\000\040\000t\000o\000\040\000a\000\040\000d\000e\000n\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 375 0 obj << /S /GoTo /D (subsection.6.17) >> % 378 0 obj -(6.17 psb\137glob\137to\137loc \204 Global to local indices convertion) +(\376\377\0006\000.\0001\0007\000\040\000p\000s\000b\000\137\000g\000l\000o\000b\000\137\000t\000o\000\137\000l\000o\000c\000\040\040\024\000\040\000G\000l\000o\000b\000a\000l\000\040\000t\000o\000\040\000l\000o\000c\000a\000l\000\040\000i\000n\000d\000i\000c\000e\000s\000\040\000c\000o\000n\000v\000e\000r\000t\000i\000o\000n) % 379 0 obj << /S /GoTo /D (subsection.6.18) >> % 382 0 obj -(6.18 psb\137loc\137to\137glob \204 Local to global indices conversion) +(\376\377\0006\000.\0001\0008\000\040\000p\000s\000b\000\137\000l\000o\000c\000\137\000t\000o\000\137\000g\000l\000o\000b\000\040\040\024\000\040\000L\000o\000c\000a\000l\000\040\000t\000o\000\040\000g\000l\000o\000b\000a\000l\000\040\000i\000n\000d\000i\000c\000e\000s\000\040\000c\000o\000n\000v\000e\000r\000s\000i\000o\000n) % 383 0 obj << /S /GoTo /D (subsection.6.19) >> % 386 0 obj -(6.19 psb\137is\137owned \204 ) +(\376\377\0006\000.\0001\0009\000\040\000p\000s\000b\000\137\000i\000s\000\137\000o\000w\000n\000e\000d\000\040\040\024\000\040) % 387 0 obj << /S /GoTo /D (subsection.6.20) >> % 390 0 obj -(6.20 psb\137owned\137index \204 ) +(\376\377\0006\000.\0002\0000\000\040\000p\000s\000b\000\137\000o\000w\000n\000e\000d\000\137\000i\000n\000d\000e\000x\000\040\040\024\000\040) % 391 0 obj << /S /GoTo /D (subsection.6.21) >> % 394 0 obj -(6.21 psb\137is\137local \204 ) +(\376\377\0006\000.\0002\0001\000\040\000p\000s\000b\000\137\000i\000s\000\137\000l\000o\000c\000a\000l\000\040\040\024\000\040) % 395 0 obj << /S /GoTo /D (subsection.6.22) >> % 398 0 obj -(6.22 psb\137local\137index \204 ) +(\376\377\0006\000.\0002\0002\000\040\000p\000s\000b\000\137\000l\000o\000c\000a\000l\000\137\000i\000n\000d\000e\000x\000\040\040\024\000\040) % 399 0 obj << /S /GoTo /D (subsection.6.23) >> % 402 0 obj -(6.23 psb\137get\137boundary \204 Extract list of boundary elements) +(\376\377\0006\000.\0002\0003\000\040\000p\000s\000b\000\137\000g\000e\000t\000\137\000b\000o\000u\000n\000d\000a\000r\000y\000\040\040\024\000\040\000E\000x\000t\000r\000a\000c\000t\000\040\000l\000i\000s\000t\000\040\000o\000f\000\040\000b\000o\000u\000n\000d\000a\000r\000y\000\040\000e\000l\000e\000m\000e\000n\000t\000s) + +endstream +endobj +404 0 obj +<< +/Type /ObjStm +/N 100 +/First 928 +/Length 16303 +>> +stream +403 0 407 48 408 376 411 424 412 766 415 814 416 1030 419 1078 420 1241 423 1283 +424 1470 427 1517 428 1834 431 1881 432 2269 435 2316 436 2626 439 2673 440 2961 443 3008 +444 3256 447 3303 448 3512 451 3559 452 3906 455 3953 456 4172 459 4219 460 4410 463 4458 +464 4624 467 4672 468 4858 471 4906 472 5092 475 5140 476 5407 479 5455 480 5722 483 5770 +484 6009 487 6057 488 6218 491 6266 492 6442 495 6484 496 6593 499 6640 500 6994 503 7041 +504 7435 507 7482 508 7853 511 7900 512 8366 515 8408 516 8489 519 8536 520 8990 523 9037 +524 9483 527 9530 528 9986 531 10033 532 10489 535 10536 536 10992 539 11039 540 11505 543 11548 +544 11707 547 11755 548 11991 551 12039 552 12260 555 12308 556 12584 559 12632 560 12977 563 13025 +564 13271 567 13319 568 13525 571 13568 572 13697 575 13745 576 14035 579 14078 580 14169 583 14217 +584 14374 587 14422 588 14614 591 14662 592 14816 595 14864 596 15023 599 15066 600 15238 603 15281 +% 403 0 obj +<< /S /GoTo /D (subsection.6.24) >> +% 407 0 obj +(\376\377\0006\000.\0002\0004\000\040\000p\000s\000b\000\137\000g\000e\000t\000\137\000o\000v\000e\000r\000l\000a\000p\000\040\040\024\000\040\000E\000x\000t\000r\000a\000c\000t\000\040\000l\000i\000s\000t\000\040\000o\000f\000\040\000o\000v\000e\000r\000l\000a\000p\000\040\000e\000l\000e\000m\000e\000n\000t\000s) +% 408 0 obj +<< /S /GoTo /D (subsection.6.25) >> +% 411 0 obj +(\376\377\0006\000.\0002\0005\000\040\000p\000s\000b\000\137\000s\000p\000\137\000g\000e\000t\000r\000o\000w\000\040\040\024\000\040\000E\000x\000t\000r\000a\000c\000t\000\040\000r\000o\000w\000\050\000s\000\051\000\040\000f\000r\000o\000m\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) +% 412 0 obj +<< /S /GoTo /D (subsection.6.26) >> +% 415 0 obj +(\376\377\0006\000.\0002\0006\000\040\000p\000s\000b\000\137\000s\000i\000z\000e\000o\000f\000\040\040\024\000\040\000M\000e\000m\000o\000r\000y\000\040\000o\000c\000c\000u\000p\000a\000t\000i\000o\000n) +% 416 0 obj +<< /S /GoTo /D (subsection.6.27) >> +% 419 0 obj +(\376\377\0006\000.\0002\0007\000\040\000S\000o\000r\000t\000i\000n\000g\000\040\000u\000t\000i\000l\000i\000t\000i\000e\000s\000\040\040\024\000\040) +% 420 0 obj +<< /S /GoTo /D (section.7) >> +% 423 0 obj +(\376\377\0007\000\040\000P\000a\000r\000a\000l\000l\000e\000l\000\040\000e\000n\000v\000i\000r\000o\000n\000m\000e\000n\000t\000\040\000r\000o\000u\000t\000i\000n\000e\000s) +% 424 0 obj +<< /S /GoTo /D (subsection.7.1) >> +% 427 0 obj +(\376\377\0007\000.\0001\000\040\000p\000s\000b\000\137\000i\000n\000i\000t\000\040\040\024\000\040\000I\000n\000i\000t\000i\000a\000l\000i\000z\000e\000s\000\040\000P\000S\000B\000L\000A\000S\000\040\000p\000a\000r\000a\000l\000l\000e\000l\000\040\000e\000n\000v\000i\000r\000o\000n\000m\000e\000n\000t) +% 428 0 obj +<< /S /GoTo /D (subsection.7.2) >> +% 431 0 obj +(\376\377\0007\000.\0002\000\040\000p\000s\000b\000\137\000i\000n\000f\000o\000\040\040\024\000\040\000R\000e\000t\000u\000r\000n\000\040\000i\000n\000f\000o\000r\000m\000a\000t\000i\000o\000n\000\040\000a\000b\000o\000u\000t\000\040\000P\000S\000B\000L\000A\000S\000\040\000p\000a\000r\000a\000l\000l\000e\000l\000\040\000e\000n\000v\000i\000r\000o\000n\000m\000e\000n\000t) +% 432 0 obj +<< /S /GoTo /D (subsection.7.3) >> +% 435 0 obj +(\376\377\0007\000.\0003\000\040\000p\000s\000b\000\137\000e\000x\000i\000t\000\040\040\024\000\040\000E\000x\000i\000t\000\040\000f\000r\000o\000m\000\040\000P\000S\000B\000L\000A\000S\000\040\000p\000a\000r\000a\000l\000l\000e\000l\000\040\000e\000n\000v\000i\000r\000o\000n\000m\000e\000n\000t) +% 436 0 obj +<< /S /GoTo /D (subsection.7.4) >> +% 439 0 obj +(\376\377\0007\000.\0004\000\040\000p\000s\000b\000\137\000g\000e\000t\000\137\000m\000p\000i\000\137\000c\000o\000m\000m\000\040\040\024\000\040\000G\000e\000t\000\040\000t\000h\000e\000\040\000M\000P\000I\000\040\000c\000o\000m\000m\000u\000n\000i\000c\000a\000t\000o\000r) +% 440 0 obj +<< /S /GoTo /D (subsection.7.5) >> +% 443 0 obj +(\376\377\0007\000.\0005\000\040\000p\000s\000b\000\137\000g\000e\000t\000\137\000m\000p\000i\000\137\000r\000a\000n\000k\000\040\040\024\000\040\000G\000e\000t\000\040\000t\000h\000e\000\040\000M\000P\000I\000\040\000r\000a\000n\000k) +% 444 0 obj +<< /S /GoTo /D (subsection.7.6) >> +% 447 0 obj +(\376\377\0007\000.\0006\000\040\000p\000s\000b\000\137\000w\000t\000i\000m\000e\000\040\040\024\000\040\000W\000a\000l\000l\000\040\000c\000l\000o\000c\000k\000\040\000t\000i\000m\000i\000n\000g) +% 448 0 obj +<< /S /GoTo /D (subsection.7.7) >> +% 451 0 obj +(\376\377\0007\000.\0007\000\040\000p\000s\000b\000\137\000b\000a\000r\000r\000i\000e\000r\000\040\040\024\000\040\000S\000i\000n\000c\000h\000r\000o\000n\000i\000z\000a\000t\000i\000o\000n\000\040\000p\000o\000i\000n\000t\000\040\000p\000a\000r\000a\000l\000l\000e\000l\000\040\000e\000n\000v\000i\000r\000o\000n\000m\000e\000n\000t) +% 452 0 obj +<< /S /GoTo /D (subsection.7.8) >> +% 455 0 obj +(\376\377\0007\000.\0008\000\040\000p\000s\000b\000\137\000a\000b\000o\000r\000t\000\040\040\024\000\040\000A\000b\000o\000r\000t\000\040\000a\000\040\000c\000o\000m\000p\000u\000t\000a\000t\000i\000o\000n) +% 456 0 obj +<< /S /GoTo /D (subsection.7.9) >> +% 459 0 obj +(\376\377\0007\000.\0009\000\040\000p\000s\000b\000\137\000b\000c\000a\000s\000t\000\040\040\024\000\040\000B\000r\000o\000a\000d\000c\000a\000s\000t\000\040\000d\000a\000t\000a) +% 460 0 obj +<< /S /GoTo /D (subsection.7.10) >> +% 463 0 obj +(\376\377\0007\000.\0001\0000\000\040\000p\000s\000b\000\137\000s\000u\000m\000\040\040\024\000\040\000G\000l\000o\000b\000a\000l\000\040\000s\000u\000m) +% 464 0 obj +<< /S /GoTo /D (subsection.7.11) >> +% 467 0 obj +(\376\377\0007\000.\0001\0001\000\040\000p\000s\000b\000\137\000m\000a\000x\000\040\040\024\000\040\000G\000l\000o\000b\000a\000l\000\040\000m\000a\000x\000i\000m\000u\000m) +% 468 0 obj +<< /S /GoTo /D (subsection.7.12) >> +% 471 0 obj +(\376\377\0007\000.\0001\0002\000\040\000p\000s\000b\000\137\000m\000i\000n\000\040\040\024\000\040\000G\000l\000o\000b\000a\000l\000\040\000m\000i\000n\000i\000m\000u\000m) +% 472 0 obj +<< /S /GoTo /D (subsection.7.13) >> +% 475 0 obj +(\376\377\0007\000.\0001\0003\000\040\000p\000s\000b\000\137\000a\000m\000x\000\040\040\024\000\040\000G\000l\000o\000b\000a\000l\000\040\000m\000a\000x\000i\000m\000u\000m\000\040\000a\000b\000s\000o\000l\000u\000t\000e\000\040\000v\000a\000l\000u\000e) +% 476 0 obj +<< /S /GoTo /D (subsection.7.14) >> +% 479 0 obj +(\376\377\0007\000.\0001\0004\000\040\000p\000s\000b\000\137\000a\000m\000n\000\040\040\024\000\040\000G\000l\000o\000b\000a\000l\000\040\000m\000i\000n\000i\000m\000u\000m\000\040\000a\000b\000s\000o\000l\000u\000t\000e\000\040\000v\000a\000l\000u\000e) +% 480 0 obj +<< /S /GoTo /D (subsection.7.15) >> +% 483 0 obj +(\376\377\0007\000.\0001\0005\000\040\000p\000s\000b\000\137\000n\000r\000m\0002\000\040\040\024\000\040\000G\000l\000o\000b\000a\000l\000\040\0002\000-\000n\000o\000r\000m\000\040\000r\000e\000d\000u\000c\000t\000i\000o\000n) +% 484 0 obj +<< /S /GoTo /D (subsection.7.16) >> +% 487 0 obj +(\376\377\0007\000.\0001\0006\000\040\000p\000s\000b\000\137\000s\000n\000d\000\040\040\024\000\040\000S\000e\000n\000d\000\040\000d\000a\000t\000a) +% 488 0 obj +<< /S /GoTo /D (subsection.7.17) >> +% 491 0 obj +(\376\377\0007\000.\0001\0007\000\040\000p\000s\000b\000\137\000r\000c\000v\000\040\040\024\000\040\000R\000e\000c\000e\000i\000v\000e\000\040\000d\000a\000t\000a) +% 492 0 obj +<< /S /GoTo /D (section.8) >> +% 495 0 obj +(\376\377\0008\000\040\000E\000r\000r\000o\000r\000\040\000h\000a\000n\000d\000l\000i\000n\000g) +% 496 0 obj +<< /S /GoTo /D (subsection.8.1) >> +% 499 0 obj +(\376\377\0008\000.\0001\000\040\000p\000s\000b\000\137\000e\000r\000r\000p\000u\000s\000h\000\040\040\024\000\040\000P\000u\000s\000h\000e\000s\000\040\000a\000n\000\040\000e\000r\000r\000o\000r\000\040\000c\000o\000d\000e\000\040\000o\000n\000t\000o\000\040\000t\000h\000e\000\040\000e\000r\000r\000o\000r\000\040\000s\000t\000a\000c\000k) +% 500 0 obj +<< /S /GoTo /D (subsection.8.2) >> +% 503 0 obj +(\376\377\0008\000.\0002\000\040\000p\000s\000b\000\137\000e\000r\000r\000o\000r\000\040\040\024\000\040\000P\000r\000i\000n\000t\000s\000\040\000t\000h\000e\000\040\000e\000r\000r\000o\000r\000\040\000s\000t\000a\000c\000k\000\040\000c\000o\000n\000t\000e\000n\000t\000\040\000a\000n\000d\000\040\000a\000b\000o\000r\000t\000s\000\040\000e\000x\000e\000c\000u\000t\000i\000o\000n) +% 504 0 obj +<< /S /GoTo /D (subsection.8.3) >> +% 507 0 obj +(\376\377\0008\000.\0003\000\040\000p\000s\000b\000\137\000s\000e\000t\000\137\000e\000r\000r\000v\000e\000r\000b\000o\000s\000i\000t\000y\000\040\040\024\000\040\000S\000e\000t\000s\000\040\000t\000h\000e\000\040\000v\000e\000r\000b\000o\000s\000i\000t\000y\000\040\000o\000f\000\040\000e\000r\000r\000o\000r\000\040\000m\000e\000s\000s\000a\000g\000e\000s) +% 508 0 obj +<< /S /GoTo /D (subsection.8.4) >> +% 511 0 obj +(\376\377\0008\000.\0004\000\040\000p\000s\000b\000\137\000s\000e\000t\000\137\000e\000r\000r\000a\000c\000t\000i\000o\000n\000\040\040\024\000\040\000S\000e\000t\000\040\000t\000h\000e\000\040\000t\000y\000p\000e\000\040\000o\000f\000\040\000a\000c\000t\000i\000o\000n\000\040\000t\000o\000\040\000b\000e\000\040\000t\000a\000k\000e\000n\000\040\000u\000p\000o\000n\000\040\000e\000r\000r\000o\000r\000\040\000c\000o\000n\000d\000i\000t\000i\000o\000n) +% 512 0 obj +<< /S /GoTo /D (section.9) >> +% 515 0 obj +(\376\377\0009\000\040\000U\000t\000i\000l\000i\000t\000i\000e\000s) +% 516 0 obj +<< /S /GoTo /D (subsection.9.1) >> +% 519 0 obj +(\376\377\0009\000.\0001\000\040\000\040\000h\000b\000\137\000r\000e\000a\000d\000\040\040\024\000\040\000R\000e\000a\000d\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x\000\040\000f\000r\000o\000m\000\040\000a\000\040\000f\000i\000l\000e\000\040\000i\000n\000\040\000t\000h\000e\000\040\000H\000a\000r\000w\000e\000l\000l\040\023\000B\000o\000e\000i\000n\000g\000\040\000f\000o\000r\000m\000a\000t) +% 520 0 obj +<< /S /GoTo /D (subsection.9.2) >> +% 523 0 obj +(\376\377\0009\000.\0002\000\040\000h\000b\000\137\000w\000r\000i\000t\000e\000\040\040\024\000\040\000W\000r\000i\000t\000e\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x\000\040\000t\000o\000\040\000a\000\040\000f\000i\000l\000e\000\040\000i\000n\000\040\000t\000h\000e\000\040\000H\000a\000r\000w\000e\000l\000l\040\023\000B\000o\000e\000i\000n\000g\000\040\000f\000o\000r\000m\000a\000t) +% 524 0 obj +<< /S /GoTo /D (subsection.9.3) >> +% 527 0 obj +(\376\377\0009\000.\0003\000\040\000m\000m\000\137\000m\000a\000t\000\137\000r\000e\000a\000d\000\040\040\024\000\040\000R\000e\000a\000d\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x\000\040\000f\000r\000o\000m\000\040\000a\000\040\000f\000i\000l\000e\000\040\000i\000n\000\040\000t\000h\000e\000\040\000M\000a\000t\000r\000i\000x\000M\000a\000r\000k\000e\000t\000\040\000f\000o\000r\000m\000a\000t) +% 528 0 obj +<< /S /GoTo /D (subsection.9.4) >> +% 531 0 obj +(\376\377\0009\000.\0004\000\040\000m\000m\000\137\000a\000r\000r\000a\000y\000\137\000r\000e\000a\000d\000\040\040\024\000\040\000R\000e\000a\000d\000\040\000a\000\040\000d\000e\000n\000s\000e\000\040\000a\000r\000r\000a\000y\000\040\000f\000r\000o\000m\000\040\000a\000\040\000f\000i\000l\000e\000\040\000i\000n\000\040\000t\000h\000e\000\040\000M\000a\000t\000r\000i\000x\000M\000a\000r\000k\000e\000t\000\040\000f\000o\000r\000m\000a\000t) +% 532 0 obj +<< /S /GoTo /D (subsection.9.5) >> +% 535 0 obj +(\376\377\0009\000.\0005\000\040\000m\000m\000\137\000m\000a\000t\000\137\000w\000r\000i\000t\000e\000\040\040\024\000\040\000W\000r\000i\000t\000e\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x\000\040\000t\000o\000\040\000a\000\040\000f\000i\000l\000e\000\040\000i\000n\000\040\000t\000h\000e\000\040\000M\000a\000t\000r\000i\000x\000M\000a\000r\000k\000e\000t\000\040\000f\000o\000r\000m\000a\000t) +% 536 0 obj +<< /S /GoTo /D (subsection.9.6) >> +% 539 0 obj +(\376\377\0009\000.\0006\000\040\000m\000m\000\137\000a\000r\000r\000a\000y\000\137\000w\000r\000i\000t\000e\000\040\040\024\000\040\000W\000r\000i\000t\000e\000\040\000a\000\040\000d\000e\000n\000s\000e\000\040\000a\000r\000r\000a\000y\000\040\000f\000r\000o\000m\000\040\000a\000\040\000f\000i\000l\000e\000\040\000i\000n\000\040\000t\000h\000e\000\040\000M\000a\000t\000r\000i\000x\000M\000a\000r\000k\000e\000t\000\040\000f\000o\000r\000m\000a\000t) +% 540 0 obj +<< /S /GoTo /D (section.10) >> +% 543 0 obj +(\376\377\0001\0000\000\040\000P\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r\000\040\000r\000o\000u\000t\000i\000n\000e\000s) +% 544 0 obj +<< /S /GoTo /D (subsection.10.1) >> +% 547 0 obj +(\376\377\0001\0000\000.\0001\000\040\000i\000n\000i\000t\000\040\040\024\000\040\000I\000n\000i\000t\000i\000a\000l\000i\000z\000e\000\040\000a\000\040\000p\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r) +% 548 0 obj +<< /S /GoTo /D (subsection.10.2) >> +% 551 0 obj +(\376\377\0001\0000\000.\0002\000\040\000b\000u\000i\000l\000d\000\040\040\024\000\040\000B\000u\000i\000l\000d\000s\000\040\000a\000\040\000p\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r) +% 552 0 obj +<< /S /GoTo /D (subsection.10.3) >> +% 555 0 obj +(\376\377\0001\0000\000.\0003\000\040\000a\000p\000p\000l\000y\000\040\040\024\000\040\000P\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r\000\040\000a\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n\000\040\000r\000o\000u\000t\000i\000n\000e) +% 556 0 obj +<< /S /GoTo /D (subsection.10.4) >> +% 559 0 obj +(\376\377\0001\0000\000.\0004\000\040\000d\000e\000s\000c\000r\000\040\040\024\000\040\000P\000r\000i\000n\000t\000s\000\040\000a\000\040\000d\000e\000s\000c\000r\000i\000p\000t\000i\000o\000n\000\040\000o\000f\000\040\000c\000u\000r\000r\000e\000n\000t\000\040\000p\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r) +% 560 0 obj +<< /S /GoTo /D (subsection.10.5) >> +% 563 0 obj +(\376\377\0001\0000\000.\0005\000\040\000c\000l\000o\000n\000e\000\040\040\024\000\040\000c\000l\000o\000n\000e\000\040\000c\000u\000r\000r\000e\000n\000t\000\040\000p\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r) +% 564 0 obj +<< /S /GoTo /D (subsection.10.6) >> +% 567 0 obj +(\376\377\0001\0000\000.\0006\000\040\000f\000r\000e\000e\000\040\040\024\000\040\000F\000r\000e\000e\000\040\000a\000\040\000p\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r) +% 568 0 obj +<< /S /GoTo /D (section.11) >> +% 571 0 obj +(\376\377\0001\0001\000\040\000I\000t\000e\000r\000a\000t\000i\000v\000e\000\040\000M\000e\000t\000h\000o\000d\000s) +% 572 0 obj +<< /S /GoTo /D (subsection.11.1) >> +% 575 0 obj +(\376\377\0001\0001\000.\0001\000\040\000p\000s\000b\000\137\000k\000r\000y\000l\000o\000v\000\040\000\040\040\024\000\040\000K\000r\000y\000l\000o\000v\000\040\000M\000e\000t\000h\000o\000d\000s\000\040\000D\000r\000i\000v\000e\000r\000\040\000R\000o\000u\000t\000i\000n\000e) +% 576 0 obj +<< /S /GoTo /D (section.12) >> +% 579 0 obj +(\376\377\0001\0002\000\040\000E\000x\000t\000e\000n\000s\000i\000o\000n\000s) +% 580 0 obj +<< /S /GoTo /D (subsection.12.1) >> +% 583 0 obj +(\376\377\0001\0002\000.\0001\000\040\000U\000s\000i\000n\000g\000\040\000t\000h\000e\000\040\000e\000x\000t\000e\000n\000s\000i\000o\000n\000s) +% 584 0 obj +<< /S /GoTo /D (subsection.12.2) >> +% 587 0 obj +(\376\377\0001\0002\000.\0002\000\040\000E\000x\000t\000e\000n\000s\000i\000o\000n\000s\000'\000\040\000D\000a\000t\000a\000\040\000S\000t\000r\000u\000c\000t\000u\000r\000e\000s) +% 588 0 obj +<< /S /GoTo /D (subsection.12.3) >> +% 591 0 obj +(\376\377\0001\0002\000.\0003\000\040\000C\000P\000U\000-\000c\000l\000a\000s\000s\000\040\000e\000x\000t\000e\000n\000s\000i\000o\000n\000s) +% 592 0 obj +<< /S /GoTo /D (subsection.12.4) >> +% 595 0 obj +(\376\377\0001\0002\000.\0004\000\040\000C\000U\000D\000A\000-\000c\000l\000a\000s\000s\000\040\000e\000x\000t\000e\000n\000s\000i\000o\000n\000s) +% 596 0 obj +<< /S /GoTo /D (section.13) >> +% 599 0 obj +(\376\377\0001\0003\000\040\000C\000U\000D\000A\000\040\000E\000n\000v\000i\000r\000o\000n\000m\000e\000n\000t\000\040\000R\000o\000u\000t\000i\000n\000e\000s) +% 600 0 obj +<< /S /GoTo /D (section*.6) >> +% 603 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000i\000n\000i\000t) endstream endobj -581 0 obj +662 0 obj << -/Length 728 +/Length 729 >> stream 0 g 0 G @@ -453,14 +674,14 @@ stream 0 g 0 G 0 g 0 G BT -/F51 24.7871 Tf 169.511 626.367 Td [(PSBLAS)-250(3.8.0)-250(User)-55(')55(s)-250(guide)]TJ +/F59 24.7871 Tf 169.511 626.367 Td [(PSBLAS)-250(3.9.0)-250(User)-55(')55(s)-250(guide)]TJ ET q 1 0 0 1 125.3 609.739 cm 0 0 343.711 4.981 re f Q BT -/F52 14.3462 Tf 156.541 586.546 Td [(A)-250(r)18(efer)18(ence)-250(guide)-250(for)-250(the)-250(Parallel)-250(Sparse)-250(BLAS)-250(library)]TJ +/F60 14.3462 Tf 156.541 586.546 Td [(A)-250(r)18(efer)18(ence)-250(guide)-250(for)-250(the)-250(Parallel)-250(Sparse)-250(BLAS)-250(library)]TJ 0 g 0 G 0 g 0 G 0 g 0 G @@ -480,14 +701,14 @@ Q 0 g 0 G 1 0 0 1 -168.637 -345.042 cm BT -/F51 9.9626 Tf 365.51 263.977 Td [(by)-250(Salvatore)-250(Filippone)]TJ 14.396 -11.956 Td [(and)-250(Alfredo)-250(Buttari)]TJ/F54 9.9626 Tf 29.31 -11.955 Td [(May)-250(1st,)-250(2022)]TJ +/F59 9.9626 Tf 365.51 263.977 Td [(by)-250(Salvatore)-250(Filippone)]TJ 14.396 -11.956 Td [(and)-250(Alfredo)-250(Buttari)]TJ/F62 9.9626 Tf 29.957 -11.955 Td [(Aug)-250(1st,)-250(2024)]TJ 0 g 0 G 0 g 0 G ET endstream endobj -578 0 obj +659 0 obj << /Type /XObject /Subtype /Image @@ -495,14 +716,14 @@ endobj /Height 480 /BitsPerComponent 8 /ColorSpace /DeviceRGB -/SMask 588 0 R +/SMask 669 0 R /Length 921600 >> stream ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþýýýýýýýýýþþþÿÿÿÿÿÿþþþþþþþþþþþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿýýýûûûûûûûûûûûûúúúúúúøøø÷÷÷÷÷÷÷÷÷öööõõõõõõððððððððððððððððððíííùùùþþþÿÿÿÿÿÿþþþùùùæææäääááááááááááááááááááÝÝÝÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÛÛÛÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÔÔÔÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÍÍÍÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌËËËÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÄÄĽ½½½½½½½½½½½½½½½½½ººº¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÈÈÈÿÿÿÿÿÿÿÿÿÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¼¼¼ýýýþþþÿÿÿÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿþþþèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööþþþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿÿÿÿÞÞÞ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿þþþÿÿÿÿÿÿÿÿÿÔÔÔ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿÿÿÿÿÿÿþþþöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøÿÿÿÿÿÿÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿÿÿÿþþþííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÿÿÿÿÿÿþþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿþþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿÿÿÿÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÒÒÒÿÿÿÿÿÿÿÿÿþþþøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿÿÿÿþþþñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþþÿÿÿÿÿÿÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººüüüÿÿÿÿÿÿþþþççç³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿÿÿÿÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµôôôþþþÿÿÿÿÿÿþþþÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿÿÿÿûûû¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¼¼¼ýýýþþþÿÿÿþþþôôô´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööþþþÿÿÿÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿÿÿÿûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿÿÿÿÿÿÿÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøÿÿÿÿÿÿÿÿÿÿÿÿöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿÿÿÿÿÿÿÍÍͳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÿÿÿÿÿÿþþþÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿÿÿÿÿÿÿäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿÿÿÿýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÒÒÒÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿþþþúúú···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿÿÿÿÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþþÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººüüüÿÿÿÿÿÿÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ôôôþþþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿÿÿÿÿÿÿßßß³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¼¼¼ýýýþþþÿÿÿÿÿÿÕÕÕ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿþþþöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööþþþÿÿÿÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿþþþííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿÿÿÿÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿÿÿÿþþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÜÜÜÿÿÿÿÿÿþþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøÿÿÿÿÿÿÿÿÿÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿÿÿÿþþþøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÿÿÿÿÿÿþþþñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿÿÿÿÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿÿÿÿþþþççç³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÒÒÒÿÿÿÿÿÿÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿþþþÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿþþþûûû¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþþÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººüüüÿÿÿÿÿÿþþþôôô´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ôôôþþþÿÿÿÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþÿÿÿÿÿÿÿÿÿááá³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¼¼¼ýýýþþþÿÿÿûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööþþþÿÿÿÿÿÿÿÿÿ÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿÿÿÿÍÍͳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿÿÿÿþþþÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÜÜÜÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···üüüûûýüüþýýþýýþüüþüüþüüþüüþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿûûýòòøòòøòòøòòøòñøòñøéèóäãñäãñäãñãâðâáïàßîÓÑçÓÑçÓÑçÓÑçÑÏæÑÏæÉÇâ¿Þ¿Þ¿Þûûýÿÿÿÿÿÿýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄýýþÑÎ棟ͣŸÍ£ŸÍ£ŸÍ£ŸÍ™”ȔŔŔŔŔŒŽÄ…€½…€½…€½…€½…€½…€½~x¹vpµvpµvpµvpµvpµvpµhb­f`¬f`¬f`¬f`¬f`¬b[ªWP¤WP¤WP¤WP¤WP¤WP¤LDžH@œH@œH@œH@œH@œF>›90”90”90”90”90”90”0') ‹) ‹) ‹) ‹) ‹) ‹„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ•Æÿÿÿÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³õõõùùü7.“ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‰éèóýýþÿÿÿÿÿÿÿÿÿ÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÐÐÐÿÿÿ’ŽÄƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒle¯ÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ùùúîíö'ŠƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáüüýÿÿÿÿÿÿþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÞÞÞÿÿÿvpµƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒB9˜üüýÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ûûüÛÚë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ œËþþþÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿZS¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) ‹ðï÷üüþÿÿÿÿÿÿþþþõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿üüþ½ºÛƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvqµÿÿÿÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³òòòýýþA8˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÐÎæüüþÿÿÿþþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÉÉÉþþþŸ›ËƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþÿÿÿÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³øøùóòø,#ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒª¦ÑýýþÿÿÿÿÿÿÿÿÿÆÆƳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿ|»ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&Žôôùþþÿÿÿÿþþþóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµûûüãâ𠆃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ€{ºÿÿÿÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³åååÿÿÿe^«ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…Ø×êüüýÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üûýÌÊッƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒVO¤ÿÿÿÿÿÿÿÿÿÿÿÿááá³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ïïïþþÿJBƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒµ²×ýýþÿÿÿÿÿÿÿÿÿÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆþýþ«§Ñƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ4+‘÷÷ûþþþÿÿÿþþþððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³öö÷ùøû5,’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ‹†ÀÿÿÿÿÿÿÿÿÿÿÿÿÐÐг³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿˆÁƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ †àßîûûýÿÿÿÿÿÿþþþùùù¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³úúûêéô$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒaZ©ÿÿÿÿÿÿÿÿÿþþþÞÞÞ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³áááÿÿÿpj²ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¾¼ÜýýþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¹¹¹ûûýÖÔ脃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ:2•ûúýÿÿÿÿÿÿþþþììì³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ëëëÿÿÿWP¤ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ•ÆÿÿÿÿÿÿÿÿÿÿÿÿÌÌ̳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀýüþ·³Øƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‰éèóýýþÿÿÿÿÿÿÿÿÿøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ôôôüüý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒle¯ÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÌÌÌþþÿš•ÈƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáüüýÿÿÿÿÿÿþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³øøùòñø+"ŒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒB9˜üüýÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÛÛÛÿÿÿ}w¸ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ œËþþþÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµûûüÞÝí…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) ‹ðï÷üüþÿÿÿÿÿÿþþþõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³çççÿÿÿ_X¨ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvqµÿÿÿÿÿÿÿÿÿÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½üüýÅÃàƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÐÎæüüþÿÿÿþþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ñññþþþG?›ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþÿÿÿÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆþþþ¥¡Îƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$Š$Š,#+"$Š†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒª¦ÑýýþÿÿÿÿÿÿÿÿÿÆÆƳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³÷÷÷ööú0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$ŠJC¤fa·~Ê—•Ù¨§äµ´í¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¯¯é«ªæ›ÜŠ‡ÐxtÃgb¸UO«B;Ÿ) ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&Žôôùþþÿÿÿÿþþþóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÕÕÕÿÿÿˆƒ¿ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ2*”gb¸š˜Ú··î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«æÓvrÂYT¯=5›†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ€{ºÿÿÿÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµúúûèçò#ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒIB£‘ŽÔ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï³²ë’Õlh¼H@¡#‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…Ø×êüüýÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ãããÿÿÿkd¯ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ;3™”’׸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¶¶îlg»ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒVO¤ÿÿÿÿÿÿÿÿÿÿÿÿááá³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººüüýÐÎ僃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„id¹¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtõ²×ýýþÿÿÿÿÿÿÿÿÿÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííÿÿÿPH ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"ˆŒÒ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃ4+‘÷÷ûþþþÿÿÿþþþððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂýýþ±®Õƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$Š—•Ù¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtˆÀÿÿÿÿÿÿÿÿÿÿÿÿÐÐг³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³õõõûûý91”ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ŒÒ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtà †àßîûûýÿÿÿÿÿÿþþþùùù¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÎÎÎþþÿ”Ńƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒql¾¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃaZ©ÿÿÿÿÿÿÿÿÿþþþÞÞÞ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ùùúïîö'Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ<5›¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtþ¼ÜýýþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿwq¶ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ“‘Ö¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃ:2•ûúýÿÿÿÿÿÿþþþîîî³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ûúüÜÚì…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒC<Ÿ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÕÆÿÿÿÿÿÿÿÿÿÿÿÿÌÌ̳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿZS¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒʸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï£¢áŽ‹ÒyuÄni¼gb¸d_¶d_¶hc¸rm¿|É‘ŽÔª©å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃ%‰éèóýýþÿÿÿÿÿÿÿÿÿøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿ýüþ¾»Üƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰³³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï•“×UO«*!Žƒƒƒƒƒƒƒƒƒƒƒƒ) G?¡d_¶~Ê©¨å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃle¯ÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³òòòýýþB9˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒIB£¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï©¨åF?¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„;3šid¹—•Ø··î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃÇÅáüüýÿÿÿÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÉÉÉþþþ œËƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒmi½¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï­¬ç/&‘ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) \V°›™Û¸¸ï¸¸ï¸¸ïxtÃB9˜üüýÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³øøùõõú/&ŽƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŠˆÐ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïTN«ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ;4š{wÅ°°êxtàœËþþþÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿƒ~¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ žÞ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï®­è†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰91˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) ‹ðï÷üüþÿÿÿÿÿÿþþþõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµûûüäã𠆃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ±±ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï”’׃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvqµÿÿÿÿÿÿÿÿÿÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³åååÿÿÿe_¬ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÐÎæüüþÿÿÿþþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»ûûýÍË䃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ ‡¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï”’׃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþÿÿÿÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ïïïþþÿJCƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ì$Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒª¦ÑýýþÿÿÿÿÿÿÿÿÿÆÆƳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆýýþ¬¨Òƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒµ´í¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïrnÀƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&Žôôùþþÿÿÿÿþþþóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³öööùøû5,’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¦¥ã¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïd_¶„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ€{ºÿÿÿÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿŠÂƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ’Õ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï—•ØMF¦…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…Ø×êüüýÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ùùúíìõ&‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒsoÀ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï©¨åzvÄLE¥&‹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒVO¤ÿÿÿÿÿÿÿÿÿÿÿÿááá³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿrl³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒNH§¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïœÝ{wÅ[U°<5›†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒµ²×ýýþÿÿÿÿÿÿÿÿÿÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¹¹¹ûûü×Õ鄃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"ˆ´³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´íš˜Û{wÅ[U¯:2™„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ4+‘÷÷ûþþþÿÿÿþþþððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ëëëÿÿÿWP¤ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ€}ɸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï±±ëŽ‹Òid¹D= †ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ‹†ÀÿÿÿÿÿÿÿÿÿÿÿÿÐÐг³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀüüý¹¶Ùƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ;4š··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°ê}yÇE>¡„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ †àßîûûýÿÿÿÿÿÿþþþùùù¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³óóóüüý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„€Ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï©¨årnÀ1)“ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒaZ©ÿÿÿÿÿÿÿÿÿþþþÞÞÞ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÌÌÌþþÿš–Ƀƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ*!Ž¬«æ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··îƒÊ1)“ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¾¼ÜýýþÿÿÿÿÿÿþþþÁÁÁ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³øøùòñø+"ŒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒH@¡µµí¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ìoj½†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ:2•ûúýÿÿÿÿÿÿþþþîîî³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿ~x¹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒUO¬¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï›Ü1)“ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ•ÆÿÿÿÿÿÿÿÿÿÿÿÿÌÌ̳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµúúüâàƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒLE¥°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«ç<5›ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‰éèóýýþÿÿÿÿÿÿÿÿÿøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³çççÿÿÿ`Y©ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ3*“š˜Ú¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï­¬ç4+”ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒle¯ÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½üüýÈÅჃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…`Z²­¬ç¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïžÞ ‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáüüýÿÿÿÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ñññþþþH@œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰hc¸¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïpk¾ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒB9˜üüýÿÿÿÿÿÿþþþèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆýýþ¦¢Ïƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„HA¢…‚̵´í¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï´³ì-$ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ œËþþþÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³÷÷÷ööú0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&ŒQJ¨{wÅ¥¤â¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïpk¾ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) ‹ðï÷üüþÿÿÿÿÿÿþþþöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿ‰„¿ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ0(“YS®}yÆŸžß··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï«ªæ„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvqµÿÿÿÿÿÿÿÿÿÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµúúûéèó#ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$ŠH@¡jeºŠÑ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï?7œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÐÎæüüþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþþþþÿÿÿÿÿÿþþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââÿÿÿmf°ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…;3šb]´‘ŽÔ¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïe_µƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒPH¡ÿÿÿþþþþþþþþþþþþþþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþûûûûûûûûûûûûúúúúúúøøø÷÷÷÷÷÷÷÷÷õõõõõõõõõððððððððððððððððððíííëëëëëëëëëëëëëëëîîîÿÿÿÿÿÿÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµ½½½½½½½½½½½½½½½½½½ÇÇÇüüýÑÏ惃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ(`Z²¢¡à¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï„€Ëƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¦¢ÏÿÿÿçççááááááÝÝÝÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÛÛÛÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÔÔÔÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÍÍÍÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌËËËÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÄÄĽ½½½½½½½½½½½½½½½½½»»»¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿþþþØØØÎÎÎÒÒÒÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÜÜÜÝÝÝÝÝÝÝÝÝÝÝÝÝÝÝßßßããããããããããããããããããèèèééééééééééééééééééïïïïïïïïïïïïïïïïïïñññóóóóóóóóóóóóóóóóóóööö÷÷÷÷÷÷÷÷÷÷÷÷÷÷÷÷÷÷úúúúúúúúúúúúúúúúúúûûûüüüûûûúùûúùûúùûúúûüûüüûüûûýúúüúúüúúüûûüýüýýüýüûýüûýüûýüûýþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿaZ©ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„`Z²²²ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïš˜Úƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ-#üüýððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂÿÿÿÿÿÿÿÿÿýüþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿýýþóòøóòøóòøóòøòñøòñøêéóåäñåäñåäñãâðâáïâáïÕÓèÕÓèÕÓèÕÓèÒÐæÑÏæÌÊã¿Þ¿Þ¿Þ¿Þ¿Þ¿޶³Ø³¯Ö³¯Ö³¯Ö³¯Ö³¯Ö°­Ô£ŸÍ£ŸÍ£ŸÍ£ŸÍ£ŸÍ£ŸÍš–ɔŔŔŔŔŔŅ€½…€½…€½…€½…€½…€½y¹vpµŠ„¿ýýþ²¯ÕƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒA:ž³²ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïª©åƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââÿÿÿÿÿÿÿÿÿÿÿÿ‡¾WP¤WP¤WP¤WP¤WP¤WP¤OG H@œH@œH@œH@œH@œH@œ91”90”90”90”90”90”3*‘) ‹) ‹) ‹) ‹) ‹) ‹…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÄàÿÿÿTM£ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒid¹¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï³³ìƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒH@œþþþæææ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûÿÿÿÿÿÿüüýÍË䃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ùùüÛÚë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ'Œ¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒàßïþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿÿÿÿÿÿÿþþÿIAœƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–’Æÿÿÿ~x¹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒœ›Ý¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´íƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®ÿÿÿÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´òòòþþþÿÿÿÿÿÿýýþª¦Ðƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒìëõõôù0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŠÑ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¯®éƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒññ÷ùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÅÅÅþþþÿÿÿþþþùøü4+‘ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ^V¨þþþ§£ÏƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMF¦MF¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–“׸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¥¤âƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ˜”ÇÿÿÿÒÒÒ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³åååþþþÿÿÿÿÿÿÿÿÿˆÁƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæþþþKDžƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï›™ÛLE¥„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…­¬ç¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï“‘Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$‰úùüóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿýýþéèó$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(úúüÓÑ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¡ŸßZT¯ ‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒKD¤¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï|ȃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ»¹ÚÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ØØØÿÿÿÿÿÿÿÿÿÿÿÿoi±ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ£ Îÿÿÿsm³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«çrm¿5,•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰£¡à¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïb]´ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ýýþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµõõõþþþÿÿÿÿÿÿüüþÕÓ脃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒñð÷ðð÷+"Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï–”Ø^X±+"ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ)“‘Ö¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï?7œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒØÖéþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÊÊÊÿÿÿÿÿÿÿÿÿÿÿÿVO¤ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®þþÿœ˜Êƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ï–”Øid¹A9ž†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†^X±©©å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¯¯é†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ\U¦ÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿÿÿÿÿÿÿýýþ¶³ØƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÙØëýýþD<šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï³²ë—”ØxtÃ\V°G?¡5,•#‰ƒƒƒƒƒ(>6›XR®ƒʯ®é¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï|Ƀƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒìëôüüüººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿÿÿÿûûý;3•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ûûýÊÈ⃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï±±ë®­è±±ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïG?¡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ|»ÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÛÛÛÿÿÿÿÿÿÿÿÿþþþ˜“ǃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ±­Õÿÿÿhb­ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïœšÜƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ …÷÷úöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿüüþññ÷*!Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ôôùèçò%‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïLE¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ­ªÓÿÿÿÍÍͳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÎÎÎÿÿÿÿÿÿÿÿÿÿÿÿ{u¸ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒwqµÿÿÿ’ŽÄƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï‹ˆÐƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(üüýîîî³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿüüýßÝî…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒâáïûûý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¨§ä*!ŽƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÌÊäÿÿÿÄÄij³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÿÿÿÿÿÿÿÿÿ]V§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒC:šüüýÁ¾Ýƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï²²ë;3šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒKCžþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ßßßþþþÿÿÿÿÿÿüüýÄÂ߃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¼ºÛÿÿÿ^W¨ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êC<Ÿƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒäãðýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿÿÿÿþþÿG?›ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$‰÷÷ûâáï!‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ\V°¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïžÞ6.–ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒsm³ÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿÿÿÿÿÿÿþþÿ¤ Îƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„¼ÿÿÿˆ‚¾ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ0(“d_¶—•Ø··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ítpÁ ‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒóòøùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿþþþööú0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒèçóøøû6-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ-$`[³“‘Ö¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµµí„€Ë2*”ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŸ›ËÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂÿÿÿÿÿÿÿÿÿÿÿÿˆ‚¾ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸýýþ±®Õƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰G?¡lg»‘Ô²²ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïžÞjeº+"ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ(‹ûûüñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââÿÿÿÿÿÿÿÿÿýýþéèó#ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáÿÿÿTM£ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…<4š`Z²wsÊѣ¢á¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··î¢ ß|È\V°2*”ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÁ¾ÝÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûÿÿÿÿÿÿÿÿÿjc®ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ùùüÛÚë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†2*”HA¢WQ­b]µmi½xtÃxtÃ}yÆ~ÊxtÃxtÃql¾fa·YS®KD¤6-•†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ@8˜þþþèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿÿÿÿÿÿÿüüýÏÍ僃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ—’Æÿÿÿ}w¸ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÛÚìþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´òòòþþþÿÿÿÿÿÿþþþPH ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒìëõõôù0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒb[ªÿÿÿÞÞÞ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÅÅÅþþþÿÿÿÿÿÿýýþ°­Ôƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ^V¨þþþ§£Ïƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒîíõûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³åååþþþÿÿÿÿÿÿûúý91”ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæþþþKDžƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŽ‰ÂÿÿÿÕÕÕ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿÿÿÿ’ŽÄƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(úúüÓÑ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡ùùüõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ØØØÿÿÿÿÿÿþþÿîíö'Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¤ Îÿÿÿsm³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ´±ÖÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµõõõþþþÿÿÿÿÿÿÿÿÿvpµƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒñð÷îíö(‹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ5-’ýýýììì³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÊÊÊÿÿÿÿÿÿÿÿÿüüþÛÙë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®þþÿœ˜ÊƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿÿÿÿÿÿÿÿÿÿZS¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÚØëýýþD<šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒSL¢þþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿÿÿÿýýþ¾»Ûƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ6.–:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™80˜0'’)„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ûûýÊÈ⃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒèçòýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÛÛÛÿÿÿÿÿÿÿÿÿýýþA8˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¶¶î©©å•“×zvÄ`[³<4š…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ±­Õÿÿÿhb­ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒzt·ÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿÿÿÿþþÿŸ›Ëƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êˆ…ÏSLª#‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ôôùèçò%‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…õõùøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÎÎÎÿÿÿÿÿÿþþÿôôù/&Žƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï´³ì~{È3*“ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒwqµÿÿÿ‘ăƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¥¡ÎÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿÿÿÿ‚}»ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··î~{È'Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒâáïûûý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ûûüñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÿÿÿÿÿÿýýþãâ𠆃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«æJC£ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒC:šüüýÁ¾ÝƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÆÄàÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ßßßþþþÿÿÿÿÿÿÿÿÿe^«ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïni¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ½ºÛÿÿÿ]V§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒH@œþþþæææ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿÿÿÿüüþÎÌ䃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï|É…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$‰÷÷ûâáï!‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒàÞîþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿÿÿÿÿÿÿþþþJBƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï{wŃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„¼ÿÿÿˆ‚¾ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®ÿÿÿÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿýýþ«§Ñƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒéèóøøû6-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒðï÷úúú¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂÿÿÿÿÿÿÿÿÿùøü5,’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ì5-–ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸýýþ±®Õƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–‘ÆÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââÿÿÿÿÿÿÿÿÿÿÿÿŽ‰Âƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï•“׃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáÿÿÿTM£ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$‰úùüóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûÿÿÿÿÿÿüüþêéô$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïG@¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ùùüÛÚë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒº·ÚÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿÿÿÿÿÿÿÿÿÿqk²ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï}yÇNH§NH§NH§NH§NH§NH§NH§NH§NH§UO«_Y²lh¼ŠÑ¯¯é¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï“‘Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ—’Æÿÿÿ}w¸ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ80“ýýþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´òòòþþþÿÿÿÿÿÿüüýÖÔ脃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒ†UO«œÝ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ï0'’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒììõõôù0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÖÔéþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿÿÿÿÿÿÿWP¤ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†rm¿¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïe_¶ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ^V¨þþþ§£Ïƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ[T¦ÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³åååþþþÿÿÿÿÿÿýýþ¸µÙƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒmi¼¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŒ‰ÑƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæþþþKDžƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒëêôüüüººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿûûý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ“‘Ö¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°ê„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(úúüÓÑ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ|»ÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ØØØÿÿÿÿÿÿÿÿÿþþþš•Èƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒA9¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï/&‘ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¥¡Îÿÿÿsm³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ø÷û÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµõõõþþþÿÿÿþþÿòòø+"Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïC<Ÿƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒñð÷îíö(‹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¬©ÒÿÿÿÍÍͳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÊÊÊÿÿÿÿÿÿÿÿÿÿÿÿ}w¸ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ|ȸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïSLªƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®þþÿœ˜Êƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&üüýïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿÿÿÿÿÿÿüüýßÝî…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒlh¼¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïYT¯ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÚØëýýþC;™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒËÉãÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿÿÿÿÿÿÿ_X¨ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ`[³¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ûûýÊÈ⃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒIAþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÛÛÛÿÿÿÿÿÿÿÿÿüüýÆÄჃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒgb¸¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï]W±ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ±­Õÿÿÿhb­ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒãâðýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿÿÿÿþþÿH@œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒtpÁ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïVP¬ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ …õõúèçò%‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒrl³ÿÿÿÛÛÛ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÎÎÎÿÿÿÿÿÿÿÿÿýýþ¥¡Îƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–”ظ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïLE¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒxr¶ÿÿÿ‘ăƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒóòøùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿþþþööú0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&Œ¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï7/—ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒâáïûûý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ˜ÊÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÿÿÿÿÿÿÿÿÿˆƒ¿ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒni¼¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ï#‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒC:šüüýÁ¾Ýƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ'Šúúüóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ßßßþþþÿÿÿÿÿÿýýþéèó#ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ6.–³³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïžÞƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ½ºÛÿÿÿ]V§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÀ½ÝÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿÿÿÿÿÿÿle¯ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ0'’§¦ã¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃ$‰ø÷ûâáï!‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ@8˜þþþèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿÿÿÿÿÿÿüüýÐÎ惃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…XR®¯¯é¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïHA¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„½ÿÿÿˆ‚¾ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÚÙëþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿÿÿÿPH ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒ"ˆC< jeº¦¥ã¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«æ…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒéèóøøû6-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ_X¨ÿÿÿßßß³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂÿÿÿÿÿÿÿÿÿýýþ³¯Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«ç¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à©¨å³³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïmi¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸýýþ±®Õƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒíìõûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââÿÿÿÿÿÿÿÿÿúúü:2•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï±±ë'ŒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÈÅáÿÿÿTM£ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŒ‡ÁÿÿÿÕÕÕ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûÿÿÿÿÿÿþþþ”Ńƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïc^µƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ùùüÛÚë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ …ùùüõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿÿÿÿþþÿïîö'Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï—•Ø…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ—’Æÿÿÿ}w¸ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ²®ÕÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´òòòþþþÿÿÿÿÿÿÿÿÿwq¶ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«æ/'’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒììõõôù0&ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ5-’ýýýîîî³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿÿÿÿüüþÜÚì…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¯¯é<5›ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ^V¨þþþ¦¢ÏƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÐÎæÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³åååþþþÿÿÿÿÿÿÿÿÿ[S¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï­¬ç<5›ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæþþþKDžƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒSL¢þþþããã³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿýýþ¾¼Üƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï—•Ø/'’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(úúüÓÑ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒæåñýýý»»»³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿýýþB9˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï²²ëgb¸…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¤ Îÿÿÿsm³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ3)7/“7/“7/“7/“7/“>6–SL¢SL¢–‘ÆÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³çççþþþÿÿÿÿÿÿþþÿ¡Ìƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï´³ì{wÅ,#ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒòñ÷ñð÷†¾„¼„¼„¼„¼„¼ˆÁ™•È™•È™•È™•È™•È™•Èª¦Ñ­©Ó­©Ó­©Ó­©Ó­©Ó±­Õ¾»Ü¾»Ü¾»Ü¾»Ü¾»Ü¾»ÜÈÆáÍËäÍËäÍËäÍËäÍËäÎÌåÚÙëÚÙëÚÙëÚÙëÚÙëÚÙëàÞîæäñæäñæäñæäñæäñæäñíìõïîöïîöïîöïîöïîöñð÷öõúöõúõôúôôùôôùôôù÷÷ûùùüùøûøøüøøüøøü÷÷ûüüýüüýûûýûûýûûýûûýýýþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿýýý···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÅÅÅÿÿÿÿÿÿþþÿôôù.%Žƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŸžßhc¸)ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–’ÆþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿöõúóòøóòøóòøòñøòñøïïöåäñåäñåäñåäñâáïâáïÛÙëÕÓèÕÓèÕÓèÓÑçÑÏæÑÏæ¿Þ¿Þ¿Þ¿Þ¿Þ¿޽ºÛ³¯Ö³¯Ö³¯Ö³¯Ö³¯Ö³¯Ö§£Ï£ŸÍ£ŸÍ£ŸÍ£ŸÍ£ŸÍ¡Ì”ŔŔŔŔŔŋ†À…€½…€½…€½…€½…€½…€½vpµvpµvpµvpµ½ºÛÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ðððþþþÿÿÿÿÿÿÿÿÿ„¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïª©å‡„Î`[³5-–ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒìëôôóù\U§WP¤WP¤WP¤TM£H@œH@œH@œH@œH@œH@œ@7—90”90”90”90”90”90”*!Œ) ‹) ‹) ‹) ‹) ‹$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒòò÷úúú¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿÿÿÿÿÿÿ@7—ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï›™Û„€Ë„€Ë„€Ë„€Ë„€Ë„€Ë„€Ë„€Ë„€Ë„€Ë„€Ë„€ËƒÊxtÃrnÀc^µUO«A9&Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…½ÿÿÿ„¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®ÿÿÿßßß³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿÿÿÿýýþ¹¶Ùƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ …ööúäãð!‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÙ×êþþþÁÁÁ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿÿÿÿ\U§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ®«Ôÿÿÿf`¬ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ýýýííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿüüýáàï †ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(úúüÍË䃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¯«ÔÿÿÿÎÎγ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââþþþÿÿÿÿÿÿÿÿÿ…€½ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÍËäþþÿJCƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…õôùøøø···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÁÁÁÿÿÿÿÿÿþþþööú2)ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒQI¡ýýþ­©ÒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvpµÿÿÿÛÛÛ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ìììÿÿÿÿÿÿÿÿÿýýþ®«Óƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒåãñùøü6-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒáàïþþþ¿¿¿³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿÿÿÿSK¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒsm³ÿÿÿŠÂƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒB:˜þþþêêê³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´õõõþþþÿÿÿÿÿÿüüýÚÙë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒññ÷íìõ'Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ»¸ÚÿÿÿÊÊʳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÕÕÕÿÿÿÿÿÿÿÿÿÿÿÿzt·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ œÌÿÿÿrl³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡øøûõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúÿÿÿüüþòòø-$ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ)‹ùùü×Õ鄃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ~¼ÿÿÿØØس³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ßßßþþþÿÿÿÿÿÿþþÿ£ŸÍƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÂ¿ÞÿÿÿXQ¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒèçòýýý½½½³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿þþþÿÿÿÿÿÿþþÿJCƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ@8˜üüý¹¶ÙƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþæææ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿÿÿÿÿÿÿüüýÒÐ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÜÛìüüý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÆÄàÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆÿÿÿÿÿÿÿÿÿÿÿÿoi±ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒf`­þþÿ›—Ƀƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ûúüóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ñññþþþÿÿÿÿÿÿüüþíìõ(‹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒííöòñø+"Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ•ÅÿÿÿÔÔÔ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿÿÿÿÿÿÿþþþš•Èƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŠÂÿÿÿy¹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒíìõûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùÿÿÿÿÿÿÿÿÿüüýB:™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡÷öúáàï †ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒWQ¤þþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿÿÿÿÿÿÿüüþÄÂ߃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒµ²×ÿÿÿaZ©ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÐÍæÿÿÿÄÄij³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüÿÿÿÿÿÿÿÿÿe_¬ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ7/“ûûýÈÅჃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(üüýððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääÿÿÿÿÿÿÿÿÿýýþèçò$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÒÐçþþþH@œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ£žÍÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÃÃÃþþþÿÿÿÿÿÿÿÿÿŠÂƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒTL£ýýþ§£Ïƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒññ÷úúú¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿûúý;3•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒèçòööú1(ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®ÿÿÿßßß³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÍÍÍÿÿÿÿÿÿÿÿÿýýþ¹¶Ùƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ}w¹ÿÿÿŠ…ÀƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÙ×êþþþÁÁÁ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµöööÿÿÿÿÿÿÿÿÿÿÿÿ\U§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ôôùéèó$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ýýýííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿÿÿÿÿÿÿüüýáàï †ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ§£Ïÿÿÿmf°ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¯«ÔÿÿÿÎÎγ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¹¹¹ûûûÿÿÿÿÿÿÿÿÿ…€½ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ+!ŒúùüÕÓ脃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…õôùøøø···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿþþþööú2)ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáÿÿÿQI¡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvpµÿÿÿÛÛÛ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿þþþÿÿÿÿÿÿýýþ®«ÓƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒH@œýýþ´±Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒáàïþþþ¿¿¿³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿÿÿÿÿÿÿSK¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒàßîûûý:2•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒB:˜þþþêêê³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÉÉÉÿÿÿÿÿÿÿÿÿüüýÚÙë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®þþÿ–‘ƃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ»¸ÚÿÿÿÊÊʳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´òòòþþþÿÿÿÿÿÿÿÿÿzt·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒðð÷ïîö'Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡øøûõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÓÓÓÿÿÿÿÿÿüüþòòø-$ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ—’Æÿÿÿys·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ~¼ÿÿÿØØس³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùþþþÿÿÿÿÿÿþþÿ£ŸÍƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$‰øøûÜÛì…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒèçòýýý½½½³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÜÜÜÿÿÿÿÿÿÿÿÿþþÿJCƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ»¸Úÿÿÿ\T¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþæææ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½þþþÿÿÿÿÿÿüüýÒÐ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”üûýÅÂ߃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÆÄàÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³æææþþþÿÿÿÿÿÿÿÿÿoi±ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ×ÕéýýþB:™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ûúüóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿÿÿÿüüþíìõ(‹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ\U¦þþþ¢žÌƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ•ÅÿÿÿÔÔÔ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ðððþþþÿÿÿÿÿÿþþþš•Èƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒëêôõõú0&ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒíìõûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÎÎÎÿÿÿÿÿÿÿÿÿüüýB:™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ|»ÿÿÿ…€½ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒWQ¤þþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿÿÿÿüüþÄÂ߃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ …ööúåãñ!‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÐÍæÿÿÿÄÄij³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ØØØÿÿÿÿÿÿÿÿÿÿÿÿe_¬ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ­ªÓÿÿÿga­ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(üüýððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûþþþÿÿÿýýþèçò$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(ûûýÎÌ䃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ£žÍÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââÿÿÿÿÿÿÿÿÿÿÿÿŠÂƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÌÊäþþÿOG ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒññ÷úúú¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÁÁÁþþþÿÿÿÿÿÿûúý;3•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒKCžýýþ¯«Óƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®ÿÿÿßßß³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ëëëþþþÿÿÿÿÿÿýýþ¹¶Ùƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒäãðùùü6-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÙ×êþþþÁÁÁ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿÿÿÿ\U§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒsm³ÿÿÿ‘ŒÃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ýýýííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´õõõþþþÿÿÿÿÿÿüüýáàï †ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒòñ÷ííõ'Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¯«ÔÿÿÿÎÎγ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿÿÿÿÿÿÿÿÿÿ„¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒž™Êÿÿÿtn´ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…õôùøøø···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþþþþööú2)ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ(‹ùùü×Õ鄃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvpµÿÿÿÛÛÛ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÞÞÞþþþÿÿÿÿÿÿýýþ®«ÓƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÁ¾ÝÿÿÿXQ¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒáàïþþþ¿¿¿³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½þþþÿÿÿÿÿÿÿÿÿSK¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ@8˜üüýº·ÚƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒB:˜þþþêêê³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèÿÿÿÿÿÿÿÿÿüüýÚÙë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÛÙìüüþA8˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ»¸ÚÿÿÿÊÊʳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆÿÿÿÿÿÿÿÿÿÿÿÿzt·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ_X¨þþþœ˜Êƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡øøûõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿüüþòòø-$ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒíìõòòø,#ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"ˆ0'’91˜A:žA9:2™3*“(ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ~¼ÿÿÿØØس³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÐÐÐÿÿÿÿÿÿÿÿÿþþÿ£ŸÍƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŽ‰ÂÿÿÿzºƒƒƒƒƒtpÁxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃni¼lg»d_¶]W±G?¡0(“…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&ŒxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃ[U°ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ>6›xtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃMF¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$Š\V°ƒÊ£¢á¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï´³ì£¡àŽ‹ÒsoÀXR®;4š†ƒƒƒƒƒƒƒƒƒƒèçòýýý½½½³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùþþþÿÿÿÿÿÿþþÿJCƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡÷÷ûâáï †ƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï´³ìŠÑ`[³,#ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„̸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïžÞƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ+"~{ȳ²ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï³³ì‘Õlg» ‡ƒƒƒƒƒƒƒMEŸþþþæææ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿÿÿÿÿÿÿüüýÒÐ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ³°Öÿÿÿb[ªƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ìvrÂ) ƒƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‹´³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï;3™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒWQ­¯®é¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï*!ŽƒƒƒƒƒƒƒƒÆÄàÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿÿÿÿoi±ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ5-’ûûýÇÅჃƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¨§ä80—ƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ\V°¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïtpÁƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ`Z²¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï*!Žƒƒƒƒƒƒƒƒ* ‹ûúüóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääÿÿÿÿÿÿÿÿÿüüþíìõ'ŠƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæþþþIAœƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï­¬ç*!Žƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ—”ظ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«æ†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒTMª··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï*!Žƒƒƒƒƒƒƒƒƒ•ÅÿÿÿÔÔÔ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂÿÿÿÿÿÿÿÿÿþþþ™”ȃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒSL¢ýýþ©¥Ðƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï†ƒÍƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ3*“¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïLE¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰ª©å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï*!Žƒƒƒƒƒƒƒƒƒƒíìõûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿüüýB:™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒçæòùøû4+‘ƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï0(“ƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒmi¼¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï†ƒÍƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjeº¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··î«ªæ¢¡à®­è··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï*!ŽƒƒƒƒƒƒƒƒƒƒWQ¤þþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÌÌÌÿÿÿÿÿÿÿÿÿüüþÄÂ߃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvpµÿÿÿ‹†Àƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï§¦ã0'’0'’0'’0'’0'’2*”C< `[³¥¤â¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïb]µƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„§¦ã¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ì&ŒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒžÞ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïƒÊG@¢'Œƒƒƒƒƒ+"IB£fa·Œ‰Ñ³²ë¸¸ï¸¸ï¸¸ï¸¸ï*!ŽƒƒƒƒƒƒƒƒƒƒƒÐÍæÿÿÿÄÄij³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµõõõÿÿÿÿÿÿÿÿÿÿÿÿe_¬ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ôôùéèó$ˆƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒ#‰š˜Û¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï„̃ƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒD= ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï^X±ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&‘¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïOI¨ƒƒƒƒƒƒƒƒƒƒƒƒ!‡KD¤~Ê´³ì¸¸ï*!Žƒƒƒƒƒƒƒƒƒƒƒ1(üüýððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿÿÿÿÿÿÿýýþèçò$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¥¡Îÿÿÿnh±ƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒD= ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï“‘Öƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ|ȸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¢ ß¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï˜–ÙƒƒƒƒƒƒƒƒƒƒƒƒƒƒG@¢¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&Œa\´$Šƒƒƒƒƒƒƒƒƒƒƒƒ£žÍÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¹¹¹ûûûÿÿÿÿÿÿÿÿÿŠÂƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹úùüÕÓ脃ƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒ†··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŸžßƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"ˆ±±ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïA:ž³³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï5,•ƒƒƒƒƒƒƒƒƒƒƒƒƒUO¬¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïfa·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒññ÷úúú¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿÿÿÿûúý;3•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÆÄàÿÿÿRJ¡ƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒ„¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï˜–Ùƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒVP¬¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¡Ÿßƒ„€Ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïpk½ƒƒƒƒƒƒƒƒƒƒƒƒƒ`[³¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïmi½ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®ÿÿÿßßß³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿þþþÿÿÿÿÿÿýýþ¹¶ÙƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒH@œýýþµ²×ƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒ4+”¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï~ʃƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ‘ŽÔ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïkfºƒNH§¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¨§ä„ƒƒƒƒƒƒƒƒƒƒƒƒXR®¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¥¤â"ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÙ×êþþþÁÁÁ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿÿÿÿÿÿÿ\U§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒßÞîûûý;3•ƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒ~zǸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïWQ­ƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ.%‘··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï4+”ƒ†°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïG?¡ƒƒƒƒƒƒƒƒƒƒƒƒKD¤¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïœšÜJC¤ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ýýýííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿÿÿÿüüýáàï †ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒib­þþÿ—’ǃƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒ4+”~{ȸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï«ªæ"ˆƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒgb¸¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïœšÜƒƒƒ|ȸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï~ʃƒƒƒƒƒƒƒƒƒƒƒ) ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï§¦ã}yÇTMª1)“ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¯«ÔÿÿÿÎÎγ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´òòòþþþÿÿÿÿÿÿÿÿÿ„¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒïî÷ñð÷*!Œƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ì˜–Ù˜–Ù˜–Ù˜–Ù˜–Ù žÞ´³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïRK©ƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¢ ß¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïe_¶ƒƒƒIB£¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï³³ì#‰ƒƒƒƒƒƒƒƒƒƒƒƒœ›Ý¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¯®éÓpk½LE¥&Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…õôùøøø···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÒÒÒÿÿÿÿÿÿþþþööú2)ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–‘Æÿÿÿzt·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïa\´ƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ?7œ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ï0'’ƒƒƒ†­¬ç¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïXR®ƒƒƒƒƒƒƒƒƒƒƒƒWQ­¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïœ›Ýkfº2*”ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvpµÿÿÿÛÛÛ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùþþþÿÿÿÿÿÿýýþ®«Óƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$‰øøûÝÜì…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï˜–ÙA9ƒƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒxtø¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï–“׃ƒƒƒƒ{wŸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï“‘Öƒƒƒƒƒƒƒƒƒƒƒƒ…š˜Ú¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï‘ÕA9ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒàßïþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¹¹¹½½½½½½½½½½½½½½½ÞÞÞÿÿÿÿÿÿÿÿÿÿÿÿSK¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¹·Ùÿÿÿ\U§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´íxtÃ6.–ƒƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†®­è¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`Z²ƒƒƒƒƒD= ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ï/'’ƒƒƒƒƒƒƒƒƒƒƒƒ.%‘«ªæ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï„€Ë#‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒC;™þþþëëëÇÇÇÇÇÇÇÇÇÉÉÉÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÓÓÓÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÙÙÙÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜááááááááááááááááááãããææææææææææææææææææêêêëëëëëëëëëëëëëëëìììððððððñññðððððððððôôôõõõööö÷÷÷÷÷÷÷÷÷öööúúúúúúûûûûûûûûûûûûýýýÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿüüýÚÙë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”üüý¿Þƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··îws†ƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒPJ¨¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··î*!Žƒƒƒƒƒ„©¨å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïid¹ƒƒƒƒƒƒƒƒƒƒƒƒƒ/&‘›Ü¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¡Ÿß) ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÌÊãÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþþþþþþþþþþþþþÿÿÿþþþýýýýýýýýýýýýÿÿÿÿÿÿþþþþþþþþþþþþþþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿzt·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&7/“7/“7/“7/“7/“;2•SL¢SL¢SL¢SL¢SL¢SL¢`Y©mf°mf°mf°mf°mf°mf°‚}¼„¼„¼„¼„¼„¼‹…¿™•È™•È™•È™•È™•È™•È§£Ï­©Ó­©Ó­©Ó­©Ó­©Ó®ªÓ¾»ÜðïöýýþC;™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï‘Ô†ƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŠˆÐ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï‘ŽÔƒƒƒƒƒƒƒvr¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï£¢áƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†f`¶°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï•“׆ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ+"ŒòòøþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿüüþøøûÍËäÙØêÚÙëÚÙëÚÙëÚÙëÚÙëÞÝíæäñæäñæäñæäñæäñæäñììõïîöïîöïîöïîöïîöðï÷öõúöõúöõùööúööúööúøøûúúüúúüûûýûûýûûýûûýýýþýýþýýþýýýýýýýýýþþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿúúü"‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµµí¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à©¨å¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï}yǃƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) µµí¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïYT¯ƒƒƒƒƒƒƒ?7œ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïA9ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&‹`[³œÝ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïZT¯ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ™Êþþþÿÿÿÿÿÿÿÿÿûûûûûûûûûûûûúúúúúú÷÷÷÷÷÷÷÷÷÷÷÷õõõõõõóóóñññññññññðððððððððìììëëëëëëëëëëëëëëëêêêææææææææææææææææææâââáááááááááááááááàààÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÙÙÙÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÒÒÒÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÐÐÐÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÉÉÉÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÀÀÀ½½½½½½½½½½½½½½½½½½¹¹¹¸¸¸¸¸¸ÎÎÎÿÿÿµ±×ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒ ‡F?¡Œ‰Ð¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïA9ƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒb]´¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´í&Œƒƒƒƒƒƒƒ„¦¥ã¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï{wŃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ0(“YT¯„€Ë¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ=5–ûûýÿÿÿÿÿÿþþþèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêþþþA9˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒoj½¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï‡„΃ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ›Ü¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŠˆÐƒƒƒƒƒƒƒƒƒql¾¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¯¯é ‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ,#OI¨tpÁ¡Ÿß¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï5,•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¼¹Úýýþÿÿÿÿÿÿþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÜÚ샃ƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒ…£¡à¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´í ‡ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ80˜¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŠ‡Ðd_¶d_¶d_¶d_¶d_¶d_¶d_¶d_¶d_¶|xƸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïSL©ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ0'’c^µ©¨å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïYT¯ƒƒƒƒƒƒƒƒƒƒƒƒƒUN£ÿÿÿÿÿÿÿÿÿÿÿÿÝÝݳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿrl³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒƒrm¿¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï>6›ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒsoÀ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŒ‰Ñƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$Š•“׸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïjeºƒƒƒƒƒƒƒƒƒƒƒƒ„ÔÒèüüýÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøöõù…ƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒƒ_Y²¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïPI¨ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ª©å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¶¶î*!Žƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ4+”¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïvrƒƒƒƒƒƒƒƒƒƒƒƒuo´ÿÿÿÿÿÿÿÿÿÿÿÿÔÔÔ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÐÐÐÿÿÿ¨¤Ðƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïZT¯ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒJC¤¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïd_¶ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ«ªæ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïql¾ƒƒƒƒƒƒƒƒƒƒƒ%‰ëêôüüþÿÿÿÿÿÿþþþöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííýýý5-’ƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒƒ€}ɸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïOI¨ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„̸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïžÞƒƒƒƒƒƒƒ) b]´"ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¬«ç¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïe_µƒƒƒƒƒƒƒƒƒƒƒ‘ÄÿÿÿÿÿÿÿÿÿÿÿÿÊÊʳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÓÑ烃ƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒ) °°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïA9žƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‹´³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï;3šƒƒƒƒƒƒ0'’¸¸ï­¬çjeº&Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ:2™¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïWQ­ƒƒƒƒƒƒƒƒƒƒ3*‘øøûÿÿÿÿÿÿþþþííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³áááÿÿÿe^«ƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒ)–“׸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··î) ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ[U°¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïvrƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï´³ì†ƒÍMF¦†ƒƒƒƒƒƒƒƒƒƒƒ)›Ü¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï4+”ƒƒƒƒƒƒƒƒƒƒ¯¬Óýýþÿÿÿÿÿÿÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúòñ÷ƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï§¦ã0'’0'’0'’0'’0'’1)“;3™NH§vr¯®é¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï”’׃ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï²²ë¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à›Üƒƒƒƒ—”ظ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï®­èxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtâ ß¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«ç†ƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï®­èˆ…Îf`¶C<Ÿ'Œƒƒƒƒ…5-–hc¸«ªæ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¦¥ãƒƒƒƒƒƒƒƒƒƒKDžþþþÿÿÿÿÿÿþþþááá³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÓÓÓÿÿÿš–Ƀƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïZT¯ƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êƒƒƒ3*“¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃZT¯¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïMF¦ƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ï¬«æ¢¡à§¦ãµ´ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïsoÀƒƒƒƒƒƒƒƒƒƒËÉãüüýÿÿÿþþþüüüººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ñññüüý-$ƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï˜–Ù„ƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êƒƒƒmi¼¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïA9ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&Œµµí¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï‡„΃ƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï²²ë-$ƒƒƒƒƒƒƒƒƒib®ÿÿÿÿÿÿÿÿÿÿÿÿØØس³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÅÅÅÿÿÿÊÈ⃃ƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïª©å5,•ƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êƒƒ„§¦ã¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¦¥ã„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ‹‰Ñ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´í'Œƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï_Y²ƒƒƒƒƒƒƒƒƒ†âáïýýþÿÿÿÿÿÿÿÿÿøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþÿYR¥ƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£á2*”ƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êƒƒD= ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïpk½ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒUO«¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï^X±ƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïwsÆ€½ÿÿÿÿÿÿÿÿÿÿÿÿÎÎγ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûêê󃃃ƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êgb¸ ‡ƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êƒƒ|ȸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï91˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰³²ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï™—Úƒƒƒƒ&‹Ó¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°ê^X±ƒƒƒƒƒƒƒƒƒƒ.%Žóóøþþÿÿÿÿþþþñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿˆÁƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï®­è†ƒÍSL©#‰ƒƒƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êƒ"ˆ±±ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŸžßƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†ƒÍ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï6-•ƒƒƒƒƒ'ŒYS®Š‡Ð¯¯é¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï±±ë}yÆ,#ƒƒƒƒƒƒƒƒƒƒƒ¢žÍþþÿÿÿÿÿÿÿÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµôôôúúü'Šƒƒƒƒƒƒjeºni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼d_¶^X±WQ­C< ) ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‹ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼jeºƒ3*“ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼KD¤ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ=5›ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼A9ƒƒƒƒƒƒƒƒ„80—^X±}yÆ”’ת©å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ì™—Ú|xÆTN«"ˆƒƒƒƒƒƒƒƒƒƒƒƒ@7—üüýÿÿÿÿÿÿÿÿÿæææ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÈÈÈÿÿÿ¿¼Üƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"ˆ0'’7/—>6›D= ;4š4+”+"…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÀ½Ýüüýÿÿÿÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþNGŸƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ\U§ÿÿÿÿÿÿÿÿÿþþþÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½ýýýäãðƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÛÚìüüýÿÿÿÿÿÿúúú···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿzºƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒzt·ÿÿÿÿÿÿÿÿÿÿÿÿÒÒÒ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööùùü"‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ'Šíìõüüþÿÿÿÿÿÿþþþóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÌÌÌÿÿÿ´±Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ—’ÇþþþÿÿÿÿÿÿÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêþþþA9˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ8/“ûúýÿÿÿÿÿÿÿÿÿééé³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÜÚ샃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ´±ÖýýþÿÿÿÿÿÿÿÿÿÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿrl³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøöõù…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæüüþÿÿÿÿÿÿûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÐÐÐÿÿÿ§£Ïƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒnh±ÿÿÿÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííýýý5-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡çæòýýþÿÿÿÿÿÿÿÿÿöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÓÑ烃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ‹†ÀÿÿÿÿÿÿÿÿÿÿÿÿÌÌ̳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³áááÿÿÿe^«ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&Žõõùþþþÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúòñ÷ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨¤ÐýýþÿÿÿÿÿÿþþþÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÓÓÓÿÿÿš–ɃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒE=šþþÿÿÿÿÿÿÿþþþããã³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ñññüüý-$ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÄÁßüüýÿÿÿÿÿÿýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÅÅÅÿÿÿÊÈ⃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒb[ªÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþÿYR¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…àÞîüüþÿÿÿÿÿÿþþþùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûêê󃃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒzºÿÿÿÿÿÿÿÿÿÿÿÿÐÐг³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿˆÁƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) ‹ñð÷þþÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµôôôúúü&‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒœ˜ÊþþþÿÿÿÿÿÿÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÈÈÈÿÿÿ¿¼Üƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ=5–ûûýÿÿÿÿÿÿþþþèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþNGŸƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ»¸Úýýþÿÿÿÿÿÿþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½ýýýäãðƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒTM£ÿÿÿÿÿÿÿÿÿÿÿÿÞÞÞ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿzºƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÓÑçüüýÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööùùü"‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒtn´ÿÿÿÿÿÿÿÿÿÿÿÿÔÔÔ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÌÌÌÿÿÿ´±Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‰ëêôüüþÿÿÿÿÿÿþþþöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêþþþA9˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ‘ŒÃÿÿÿÿÿÿÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÜÚ샃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ3*‘øøûÿÿÿÿÿÿþþþííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿrl³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ­ªÓþþÿÿÿÿÿÿÿÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøöõù…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒJCþþþÿÿÿÿÿÿþþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÐÐÐÿÿÿ§£ÏƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÊÈâüüþÿÿÿþþþüüü»»»³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííýýý5-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒga­ÿÿÿÿÿÿÿÿÿÿÿÿØØس³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÓÑ烃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†âáïýýþÿÿÿÿÿÿþþþøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³áááÿÿÿe^«ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…€½ÿÿÿÿÿÿÿÿÿÿÿÿÎÎγ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúòñ÷ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ-$óóøþþÿÿÿÿþþþñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÓÓÓÿÿÿš–Ƀƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¢žÌþþþÿÿÿÿÿÿÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ñññüüý-$ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ?6—üüýÿÿÿÿÿÿþþþççç³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÅÅÅÿÿÿÊÈ⃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¿½Üýýþÿÿÿÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþÿYR¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ\T¦ÿÿÿÿÿÿÿÿÿþþþÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûêê󃃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÙØêüüþÿÿÿÿÿÿúúú···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿˆÁƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒys·ÿÿÿÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµôôôúúü&‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&‰íìõüüþÿÿÿÿÿÿþþþôôô´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÈÈÈÿÿÿ¿¼Üƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–‘ÆÿÿÿÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþNGŸƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ8/“ûúýÿÿÿÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½ýýýäãðƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ³¯ÕýýþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿzºƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööùùü"‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÐÎæüüýÿÿÿÿÿÿûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÌÌÌÿÿÿ´±Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒmf°ÿÿÿÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêþþþA9˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡çæòýýþÿÿÿÿÿÿÿÿÿöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÜÚ샃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŠ…ÀÿÿÿÿÿÿÿÿÿÿÿÿÍÍͳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿrl³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&Žõõúþþþÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøöõù…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ§£ÏþþÿÿÿÿÿÿÿþþþÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÐÐÐÿÿÿ§£ÏƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒD<šýýþÿÿÿÿÿÿÿÿÿäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³îîîýýý5-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÂ¿Þüüýÿÿÿÿÿÿýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÓÑ烃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒaZ©ÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³áááÿÿÿd]«ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‰) ‹) ‹) ‹) ‹) ‹+"Œ90”90”90”90”90”90”A8˜H@œH@œH@œH@œH@œH@œUN£WP¤WP¤WP¤WP¤WP¤\T¦f`¬f`¬f`¬f`¬f`¬f`¬qk²vpµvpµvpµvpµvpµwq¶…€½…€½…€½…€½àßîüüýÿÿÿÿÿÿþþþùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¹¹¹úúúõôù£ŸÍ£ŸÍ£ŸÍ£ŸÍ¨¤Ð³¯Ö³¯Ö³¯Ö³¯Ö³¯Ö³¯Ö½»Û¿Þ¿Þ¿Þ¿Þ¿ÞÃÀÞÑÏæÑÏæÓÑçÓÑçÓÑçÓÑçÛÚìâáïâáïäãñäãñäãñäãññð÷òñøòòøòòøòòøòòøööúÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ðððþþþýýþýýþýýþùùüùùüùùüùùüüüýýýþûûýûûþûûþûûþûûýÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÍÍÍÿÿÿÿÿÿÿÿÿþþþôôôµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµ÷÷÷þþþÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³×××ÿÿÿÿÿÿÿÿÿûûû»»»³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûÿÿÿÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³áááÿÿÿÿÿÿÿÿÿþþþÆÆƳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêþþþÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÊÊÊÿÿÿÿÿÿþþþûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´óóóþþþÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÓÓÓÿÿÿÿÿÿÿÿÿÿÿÿÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝþþþÿÿÿÿÿÿÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½þþþÿÿÿÿÿÿþþþùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³çççþþþÿÿÿÿÿÿþþþÝÝݳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿÿÿÿÿÿÿ÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿýýý½½½³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ãããþþþÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÁÁÁÿÿÿÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ìììþþþÿÿÿÿÿÿþþþõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµõõõþþþÿÿÿÿÿÿûûû»»»³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÕÕÕÿÿÿÿÿÿÿÿÿþþþããã³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸ûûûÿÿÿÿÿÿþþþÆÆƳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ßßßÿÿÿÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿þþþÿÿÿÿÿÿÿÿÿÒÒÒ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿÿÿÿþþþûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆÿÿÿÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ñññþþþÿÿÿÿÿÿÿÿÿÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿÿÿÿÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùÿÿÿÿÿÿÿÿÿÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÛÛÛÿÿÿÿÿÿÿÿÿþþþùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüÿÿÿÿÿÿþþþÝÝݳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÃÃÃþþþÿÿÿÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³îîîþþþÿÿÿÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÍÍÍÿÿÿÿÿÿÿÿÿÿÿÿ÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµöööÿÿÿÿÿÿÿÿÿÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³×××ÿÿÿÿÿÿÿÿÿýýý½½½³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿þþþÿÿÿÿÿÿþþþõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêþþþÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÉÉÉÿÿÿÿÿÿÿÿÿûûû»»»³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´óóóþþþÿÿÿÿÿÿþþþããã³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÓÓÓÿÿÿÿÿÿÿÿÿþþþÆÆƳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùþþþÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÜÜÜÿÿÿÿÿÿÿÿÿÿÿÿÒÒÒ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½þþþÿÿÿþþþûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³æææþþþÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿÿÿÿÿÿÿÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ðððþþþÿÿÿÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿÿÿÿÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿÿÿÿþþþùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿþþþÝÝݳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûþþþÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââþþþÿÿÿÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÁÁÁÿÿÿÿÿÿÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ëëëþþþÿÿÿÿÿÿÿÿÿ÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´õõõþþþÿÿÿÿÿÿýýý½½½³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÞÞÞþþþÿÿÿÿÿÿþþþõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½þþþÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèÿÿÿÿÿÿÿÿÿûûû»»»³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆÿÿÿÿÿÿÿÿÿþþþããã³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿþþþÆÆƳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùþþþÿÿÿÿÿÿÿÿÿÒÒÒ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿÿÿÿþþþûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»ýýýÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääÿÿÿÿÿÿÿÿÿþþþ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸»»»½½½½½½½½½½½½½½½½½½ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÄÄÄÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇËËËÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÍÍÍÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÔÔÔÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÞÞÞááááááááááááááááááäääæææææææææææææææçççëëëëëëëëëëëëëëëëëëîîîððððððþþþÿÿÿÿÿÿþþþüüüúúúúúúûûûûûûûûûûûûþþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþþþþþþþþþþÿÿÿÿÿÿþþþýýýýýýýýýþþþÿÿÿÿÿÿþþþþþþþþþþþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ endstream endobj -588 0 obj +669 0 obj << /Type /XObject /Subtype /Image @@ -538,7 +759,7 @@ stream ãýÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿüËXÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ<Áüÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿý¨+üÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿú#’þÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþÿ‰íþýýüüüüþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿýññññððéâââáßßÑÏÏÏÍÌÇ»»»»µN™™™™™ˆˆˆˆˆˆxwwwwwqffffffZUUUUUSDDDDDD;333333$""""" endstream endobj -591 0 obj +672 0 obj << /Length 78 >> @@ -547,13 +768,13 @@ stream 0 g 0 G 0 g 0 G BT -/F54 9.9626 Tf 320.07 90.438 Td [(2)]TJ +/F62 9.9626 Tf 320.07 90.438 Td [(2)]TJ 0 g 0 G ET endstream endobj -637 0 obj +718 0 obj << /Length 15343 >> @@ -561,9 +782,9 @@ stream 0 g 0 G 0 g 0 G BT -/F51 14.3462 Tf 99.895 706.042 Td [(Contents)]TJ +/F59 14.3462 Tf 99.895 706.042 Td [(Contents)]TJ 0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf 0 -22.894 Td [(1)-1000(Introduction)]TJ +/F59 9.9626 Tf 0 -22.894 Td [(1)-1000(Introduction)]TJ 0 g 0 G [-26723(1)]TJ 0 0 1 rg 0 0 1 RG @@ -571,7 +792,7 @@ BT 0 g 0 G [-24361(2)]TJ 0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -12.074 Td [(2.1)-1050(Basic)-250(Nomenclatur)18(e)]TJ +/F62 9.9626 Tf 14.944 -12.074 Td [(2.1)-1050(Basic)-250(Nomenclatur)18(e)]TJ 0 g 0 G [-339(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -606,11 +827,11 @@ BT [-2000(8)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -14.944 -22.116 Td [(3)-1000(Data)-250(Structures)-250(and)-250(Classes)]TJ +/F59 9.9626 Tf -14.944 -22.116 Td [(3)-1000(Data)-250(Structures)-250(and)-250(Classes)]TJ 0 g 0 G [-19810(9)]TJ 0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -12.074 Td [(3.1)-1050(Descriptor)-250(data)-250(str)8(uctur)18(e)]TJ +/F62 9.9626 Tf 14.944 -12.074 Td [(3.1)-1050(Descriptor)-250(data)-250(str)8(uctur)18(e)]TJ 0 g 0 G [-369(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -631,14 +852,14 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 542.324 Td [(local)]TJ +/F62 9.9626 Tf 186.779 542.324 Td [(local)]TJ ET q 1 0 0 1 208.019 542.523 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 211.008 542.324 Td [(r)18(ows)-250(\227)-250(Get)-250(number)-250(of)-250(local)-250(r)18(ows)]TJ +/F62 9.9626 Tf 211.008 542.324 Td [(r)18(ows)-250(\227)-250(Get)-250(number)-250(of)-250(local)-250(r)18(ows)]TJ 0 g 0 G [-471(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -652,14 +873,14 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 530.25 Td [(local)]TJ +/F62 9.9626 Tf 186.779 530.25 Td [(local)]TJ ET q 1 0 0 1 208.019 530.449 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 211.008 530.25 Td [(cols)-250(\227)-250(Get)-250(number)-250(of)-250(local)-250(cols)]TJ +/F62 9.9626 Tf 211.008 530.25 Td [(cols)-250(\227)-250(Get)-250(number)-250(of)-250(local)-250(cols)]TJ 0 g 0 G [-673(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -673,14 +894,14 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 518.176 Td [(global)]TJ +/F62 9.9626 Tf 186.779 518.176 Td [(global)]TJ ET q 1 0 0 1 214.644 518.375 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 217.633 518.176 Td [(r)18(ows)-250(\227)-250(Get)-250(number)-250(of)-250(global)-250(r)18(ows)]TJ +/F62 9.9626 Tf 217.633 518.176 Td [(r)18(ows)-250(\227)-250(Get)-250(number)-250(of)-250(global)-250(r)18(ows)]TJ 0 g 0 G [-641(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -694,14 +915,14 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 506.102 Td [(global)]TJ +/F62 9.9626 Tf 186.779 506.102 Td [(global)]TJ ET q 1 0 0 1 214.644 506.301 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 217.633 506.102 Td [(cols)-250(\227)-250(Get)-250(number)-250(of)-250(global)-250(cols)]TJ +/F62 9.9626 Tf 217.633 506.102 Td [(cols)-250(\227)-250(Get)-250(number)-250(of)-250(global)-250(cols)]TJ 0 g 0 G [-843(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -715,14 +936,14 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 494.027 Td [(global)]TJ +/F62 9.9626 Tf 186.779 494.027 Td [(global)]TJ ET q 1 0 0 1 214.644 494.227 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 217.633 494.027 Td [(indices)-250(\227)-250(Get)-250(vector)-250(of)-250(global)-250(indices)]TJ +/F62 9.9626 Tf 217.633 494.027 Td [(indices)-250(\227)-250(Get)-250(vector)-250(of)-250(global)-250(indices)]TJ 0 g 0 G [-999(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -736,7 +957,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 481.953 Td [(context)-250(\227)-250(Get)-250(communication)-250(context)]TJ +/F62 9.9626 Tf 186.779 481.953 Td [(context)-250(\227)-250(Get)-250(communication)-250(context)]TJ 0 g 0 G [-852(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -764,28 +985,28 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 188.941 445.731 Td [(cd)]TJ +/F62 9.9626 Tf 188.941 445.731 Td [(cd)]TJ ET q 1 0 0 1 200.049 445.93 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 203.038 445.731 Td [(get)]TJ +/F62 9.9626 Tf 203.038 445.731 Td [(get)]TJ ET q 1 0 0 1 217.195 445.93 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 220.184 445.731 Td [(lar)18(ge)]TJ +/F62 9.9626 Tf 220.184 445.731 Td [(lar)18(ge)]TJ ET q 1 0 0 1 242.729 445.93 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 245.718 445.731 Td [(thr)18(eshold)-190(\227)-190(Get)-190(thr)18(eshold)-190(for)-190(index)-190(map-)]TJ -76.084 -11.955 Td [(ping)-250(switch)]TJ +/F62 9.9626 Tf 245.718 445.731 Td [(thr)18(eshold)-190(\227)-190(Get)-190(thr)18(eshold)-190(for)-190(index)-190(map-)]TJ -76.084 -11.955 Td [(ping)-250(switch)]TJ 0 g 0 G [-819(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -799,28 +1020,28 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 188.941 421.702 Td [(cd)]TJ +/F62 9.9626 Tf 188.941 421.702 Td [(cd)]TJ ET q 1 0 0 1 200.049 421.901 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 203.038 421.702 Td [(set)]TJ +/F62 9.9626 Tf 203.038 421.702 Td [(set)]TJ ET q 1 0 0 1 215.88 421.901 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 218.869 421.702 Td [(lar)18(ge)]TJ +/F62 9.9626 Tf 218.869 421.702 Td [(lar)18(ge)]TJ ET q 1 0 0 1 241.414 421.901 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 244.403 421.702 Td [(thr)18(eshold)-190(\227)-190(Set)-190(thr)18(eshold)-190(for)-190(index)-190(map-)]TJ -74.769 -11.955 Td [(ping)-250(switch)]TJ +/F62 9.9626 Tf 244.403 421.702 Td [(thr)18(eshold)-190(\227)-190(Set)-190(thr)18(eshold)-190(for)-190(index)-190(map-)]TJ -74.769 -11.955 Td [(ping)-250(switch)]TJ 0 g 0 G [-819(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -834,14 +1055,14 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 397.673 Td [(p)]TJ +/F62 9.9626 Tf 186.779 397.673 Td [(p)]TJ ET q 1 0 0 1 193.364 397.872 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 196.353 397.673 Td [(adjcncy)-250(\227)-250(Get)-250(pr)18(ocess)-250(adjacency)-250(list)]TJ +/F62 9.9626 Tf 196.353 397.673 Td [(adjcncy)-250(\227)-250(Get)-250(pr)18(ocess)-250(adjacency)-250(list)]TJ 0 g 0 G [-652(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -855,14 +1076,14 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 185.464 385.599 Td [(p)]TJ +/F62 9.9626 Tf 185.464 385.599 Td [(p)]TJ ET q 1 0 0 1 192.049 385.798 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 195.038 385.599 Td [(adjcncy)-250(\227)-250(Set)-250(pr)18(ocess)-250(adjacency)-250(list)]TJ +/F62 9.9626 Tf 195.038 385.599 Td [(adjcncy)-250(\227)-250(Set)-250(pr)18(ocess)-250(adjacency)-250(list)]TJ 0 g 0 G [-272(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -876,7 +1097,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 188.423 373.524 Td [(owner)-250(\227)-250(Find)-250(the)-250(owner)-250(pr)18(ocess)-250(of)-250(a)-250(set)-250(of)-250(indices)]TJ +/F62 9.9626 Tf 188.423 373.524 Td [(owner)-250(\227)-250(Find)-250(the)-250(owner)-250(pr)18(ocess)-250(of)-250(a)-250(set)-250(of)-250(indices)]TJ 0 g 0 G [-361(.)]TJ 0 g 0 G @@ -911,7 +1132,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 325.228 Td [(nr)18(ows)-250(\227)-250(Get)-250(number)-250(of)-250(r)18(ows)-250(in)-250(a)-250(sparse)-250(matrix)]TJ +/F62 9.9626 Tf 186.779 325.228 Td [(nr)18(ows)-250(\227)-250(Get)-250(number)-250(of)-250(r)18(ows)-250(in)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G [-286(.)-500(.)-500(.)]TJ 0 g 0 G @@ -925,7 +1146,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 313.154 Td [(ncols)-250(\227)-250(Get)-250(number)-250(of)-250(columns)-250(in)-250(a)-250(sparse)-250(matrix)]TJ +/F62 9.9626 Tf 186.779 313.154 Td [(ncols)-250(\227)-250(Get)-250(number)-250(of)-250(columns)-250(in)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G [-670(.)]TJ 0 g 0 G @@ -939,7 +1160,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 301.08 Td [(nnzer)18(os)-190(\227)-190(Get)-190(number)-190(of)-190(nonzer)18(o)-190(elements)-190(in)-190(a)-190(sparse)]TJ -17.145 -11.955 Td [(matrix)]TJ +/F62 9.9626 Tf 186.779 301.08 Td [(nnzer)18(os)-190(\227)-190(Get)-190(number)-190(of)-190(nonzer)18(o)-190(elements)-190(in)-190(a)-190(sparse)]TJ -17.145 -11.955 Td [(matrix)]TJ 0 g 0 G [-839(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -953,7 +1174,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 277.051 Td [(size)-354(\227)-354(Get)-355(maximum)-354(number)-354(of)-354(nonzer)18(o)-354(elements)]TJ -17.145 -11.955 Td [(in)-250(a)-250(sparse)-250(matrix)]TJ +/F62 9.9626 Tf 186.779 277.051 Td [(size)-354(\227)-354(Get)-355(maximum)-354(number)-354(of)-354(nonzer)18(o)-354(elements)]TJ -17.145 -11.955 Td [(in)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G [-393(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -974,7 +1195,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 228.992 Td [(fmt)-250(\227)-250(Short)-250(description)-250(of)-250(the)-250(dynamic)-250(type)]TJ +/F62 9.9626 Tf 186.779 228.992 Td [(fmt)-250(\227)-250(Short)-250(description)-250(of)-250(the)-250(dynamic)-250(type)]TJ 0 g 0 G [-278(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -988,21 +1209,21 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 180.344 216.918 Td [(bld,)-250(is)]TJ +/F62 9.9626 Tf 180.344 216.918 Td [(bld,)-250(is)]TJ ET q 1 0 0 1 207.541 217.117 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 210.53 216.918 Td [(upd,)-250(is)]TJ +/F62 9.9626 Tf 210.53 216.918 Td [(upd,)-250(is)]TJ ET q 1 0 0 1 241.314 217.117 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 244.303 216.918 Td [(asb)-250(\227)-250(Status)-250(check)]TJ +/F62 9.9626 Tf 244.303 216.918 Td [(asb)-250(\227)-250(Status)-250(check)]TJ 0 g 0 G [-569(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1016,28 +1237,28 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 180.344 204.844 Td [(lower)74(,)-250(is)]TJ +/F62 9.9626 Tf 180.344 204.844 Td [(lower)74(,)-250(is)]TJ ET q 1 0 0 1 217.663 205.043 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 220.652 204.844 Td [(upper)74(,)-250(is)]TJ +/F62 9.9626 Tf 220.652 204.844 Td [(upper)74(,)-250(is)]TJ ET q 1 0 0 1 259.306 205.043 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 262.295 204.844 Td [(triangle,)-250(is)]TJ +/F62 9.9626 Tf 262.295 204.844 Td [(triangle,)-250(is)]TJ ET q 1 0 0 1 309.069 205.043 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 312.058 204.844 Td [(unit)-250(\227)-250(Format)-250(check)]TJ +/F62 9.9626 Tf 312.058 204.844 Td [(unit)-250(\227)-250(Format)-250(check)]TJ 0 g 0 G [-441(.)-500(.)]TJ 0 g 0 G @@ -1065,7 +1286,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 196.094 168.622 Td [(zer)18(os)-250(\227)-250(Eliminate)-250(zer)18(o)-250(coef)18(\002cients)]TJ +/F62 9.9626 Tf 196.094 168.622 Td [(zer)18(os)-250(\227)-250(Eliminate)-250(zer)18(o)-250(coef)18(\002cients)]TJ 0 g 0 G [-677(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1079,7 +1300,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 156.548 Td [(diag)-250(\227)-250(Get)-250(main)-250(diagonal)]TJ +/F62 9.9626 Tf 186.779 156.548 Td [(diag)-250(\227)-250(Get)-250(main)-250(diagonal)]TJ 0 g 0 G [-870(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1093,7 +1314,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 189.429 144.474 Td [(diag)-250(\227)-250(Cut)-250(out)-250(main)-250(diagonal)]TJ +/F62 9.9626 Tf 189.429 144.474 Td [(diag)-250(\227)-250(Cut)-250(out)-250(main)-250(diagonal)]TJ 0 g 0 G [-309(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1120,282 +1341,7 @@ ET endstream endobj -404 0 obj -<< -/Type /ObjStm -/N 100 -/First 877 -/Length 7532 ->> -stream -403 0 407 48 408 127 411 175 412 257 415 305 416 361 419 409 420 452 423 494 -424 540 427 587 428 662 431 709 432 797 435 844 436 917 439 964 440 1038 443 1085 -444 1151 447 1198 448 1252 451 1299 452 1380 455 1427 456 1483 459 1530 460 1581 463 1629 -464 1675 467 1723 468 1773 471 1821 472 1871 475 1919 476 1984 479 2032 480 2097 483 2145 -484 2205 487 2253 488 2298 491 2346 492 2394 495 2436 496 2467 499 2514 500 2594 503 2641 -504 2729 507 2776 508 2863 511 2910 512 3013 515 3055 516 3081 519 3128 520 3231 523 3278 -524 3377 527 3424 528 3525 531 3572 532 3673 535 3720 536 3821 539 3868 540 3971 543 4014 -544 4055 547 4103 548 4160 551 4208 552 4262 555 4310 556 4375 559 4423 560 4500 563 4548 -564 4607 567 4655 568 4706 571 4749 572 4784 575 4832 576 4904 577 4949 579 5078 582 5184 -583 5240 3 5296 580 5350 590 5479 592 5593 589 5650 636 5717 593 6211 594 6357 595 6503 -% 403 0 obj -<< /S /GoTo /D (subsection.6.24) >> -% 407 0 obj -(6.24 psb\137get\137overlap \204 Extract list of overlap elements) -% 408 0 obj -<< /S /GoTo /D (subsection.6.25) >> -% 411 0 obj -(6.25 psb\137sp\137getrow \204 Extract row\(s\) from a sparse matrix) -% 412 0 obj -<< /S /GoTo /D (subsection.6.26) >> -% 415 0 obj -(6.26 psb\137sizeof \204 Memory occupation) -% 416 0 obj -<< /S /GoTo /D (subsection.6.27) >> -% 419 0 obj -(6.27 Sorting utilities \204 ) -% 420 0 obj -<< /S /GoTo /D (section.7) >> -% 423 0 obj -(7 Parallel environment routines) -% 424 0 obj -<< /S /GoTo /D (subsection.7.1) >> -% 427 0 obj -(7.1 psb\137init \204 Initializes PSBLAS parallel environment) -% 428 0 obj -<< /S /GoTo /D (subsection.7.2) >> -% 431 0 obj -(7.2 psb\137info \204 Return information about PSBLAS parallel environment) -% 432 0 obj -<< /S /GoTo /D (subsection.7.3) >> -% 435 0 obj -(7.3 psb\137exit \204 Exit from PSBLAS parallel environment) -% 436 0 obj -<< /S /GoTo /D (subsection.7.4) >> -% 439 0 obj -(7.4 psb\137get\137mpi\137comm \204 Get the MPI communicator) -% 440 0 obj -<< /S /GoTo /D (subsection.7.5) >> -% 443 0 obj -(7.5 psb\137get\137mpi\137rank \204 Get the MPI rank) -% 444 0 obj -<< /S /GoTo /D (subsection.7.6) >> -% 447 0 obj -(7.6 psb\137wtime \204 Wall clock timing) -% 448 0 obj -<< /S /GoTo /D (subsection.7.7) >> -% 451 0 obj -(7.7 psb\137barrier \204 Sinchronization point parallel environment) -% 452 0 obj -<< /S /GoTo /D (subsection.7.8) >> -% 455 0 obj -(7.8 psb\137abort \204 Abort a computation) -% 456 0 obj -<< /S /GoTo /D (subsection.7.9) >> -% 459 0 obj -(7.9 psb\137bcast \204 Broadcast data) -% 460 0 obj -<< /S /GoTo /D (subsection.7.10) >> -% 463 0 obj -(7.10 psb\137sum \204 Global sum) -% 464 0 obj -<< /S /GoTo /D (subsection.7.11) >> -% 467 0 obj -(7.11 psb\137max \204 Global maximum) -% 468 0 obj -<< /S /GoTo /D (subsection.7.12) >> -% 471 0 obj -(7.12 psb\137min \204 Global minimum) -% 472 0 obj -<< /S /GoTo /D (subsection.7.13) >> -% 475 0 obj -(7.13 psb\137amx \204 Global maximum absolute value) -% 476 0 obj -<< /S /GoTo /D (subsection.7.14) >> -% 479 0 obj -(7.14 psb\137amn \204 Global minimum absolute value) -% 480 0 obj -<< /S /GoTo /D (subsection.7.15) >> -% 483 0 obj -(7.15 psb\137nrm2 \204 Global 2-norm reduction) -% 484 0 obj -<< /S /GoTo /D (subsection.7.16) >> -% 487 0 obj -(7.16 psb\137snd \204 Send data) -% 488 0 obj -<< /S /GoTo /D (subsection.7.17) >> -% 491 0 obj -(7.17 psb\137rcv \204 Receive data) -% 492 0 obj -<< /S /GoTo /D (section.8) >> -% 495 0 obj -(8 Error handling) -% 496 0 obj -<< /S /GoTo /D (subsection.8.1) >> -% 499 0 obj -(8.1 psb\137errpush \204 Pushes an error code onto the error stack) -% 500 0 obj -<< /S /GoTo /D (subsection.8.2) >> -% 503 0 obj -(8.2 psb\137error \204 Prints the error stack content and aborts execution) -% 504 0 obj -<< /S /GoTo /D (subsection.8.3) >> -% 507 0 obj -(8.3 psb\137set\137errverbosity \204 Sets the verbosity of error messages) -% 508 0 obj -<< /S /GoTo /D (subsection.8.4) >> -% 511 0 obj -(8.4 psb\137set\137erraction \204 Set the type of action to be taken upon error condition) -% 512 0 obj -<< /S /GoTo /D (section.9) >> -% 515 0 obj -(9 Utilities) -% 516 0 obj -<< /S /GoTo /D (subsection.9.1) >> -% 519 0 obj -(9.1 \040hb\137read \204 Read a sparse matrix from a file in the Harwell\205Boeing format) -% 520 0 obj -<< /S /GoTo /D (subsection.9.2) >> -% 523 0 obj -(9.2 hb\137write \204 Write a sparse matrix to a file in the Harwell\205Boeing format) -% 524 0 obj -<< /S /GoTo /D (subsection.9.3) >> -% 527 0 obj -(9.3 mm\137mat\137read \204 Read a sparse matrix from a file in the MatrixMarket format) -% 528 0 obj -<< /S /GoTo /D (subsection.9.4) >> -% 531 0 obj -(9.4 mm\137array\137read \204 Read a dense array from a file in the MatrixMarket format) -% 532 0 obj -<< /S /GoTo /D (subsection.9.5) >> -% 535 0 obj -(9.5 mm\137mat\137write \204 Write a sparse matrix to a file in the MatrixMarket format) -% 536 0 obj -<< /S /GoTo /D (subsection.9.6) >> -% 539 0 obj -(9.6 mm\137array\137write \204 Write a dense array from a file in the MatrixMarket format) -% 540 0 obj -<< /S /GoTo /D (section.10) >> -% 543 0 obj -(10 Preconditioner routines) -% 544 0 obj -<< /S /GoTo /D (subsection.10.1) >> -% 547 0 obj -(10.1 init \204 Initialize a preconditioner) -% 548 0 obj -<< /S /GoTo /D (subsection.10.2) >> -% 551 0 obj -(10.2 build \204 Builds a preconditioner) -% 552 0 obj -<< /S /GoTo /D (subsection.10.3) >> -% 555 0 obj -(10.3 apply \204 Preconditioner application routine) -% 556 0 obj -<< /S /GoTo /D (subsection.10.4) >> -% 559 0 obj -(10.4 descr \204 Prints a description of current preconditioner) -% 560 0 obj -<< /S /GoTo /D (subsection.10.5) >> -% 563 0 obj -(10.5 clone \204 clone current preconditioner) -% 564 0 obj -<< /S /GoTo /D (subsection.10.6) >> -% 567 0 obj -(10.6 free \204 Free a preconditioner) -% 568 0 obj -<< /S /GoTo /D (section.11) >> -% 571 0 obj -(11 Iterative Methods) -% 572 0 obj -<< /S /GoTo /D (subsection.11.1) >> -% 575 0 obj -(11.1 psb\137krylov \040\204 Krylov Methods Driver Routine) -% 576 0 obj -<< /S /GoTo /D [577 0 R /Fit] >> -% 577 0 obj -<< -/Type /Page -/Contents 581 0 R -/Resources 580 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 587 0 R -/Group 579 0 R ->> -% 579 0 obj -%PTEX Group needed for transparent pngs -<> -% 582 0 obj -<< -/D [577 0 R /XYZ 98.895 753.953 null] ->> -% 583 0 obj -<< -/D [577 0 R /XYZ 99.895 716.092 null] ->> -% 3 0 obj -<< -/D [577 0 R /XYZ 99.895 716.092 null] ->> -% 580 0 obj -<< -/Font << /F51 584 0 R /F52 585 0 R /F54 586 0 R >> -/XObject << /Im1 578 0 R >> -/ProcSet [ /PDF /Text /ImageC ] ->> -% 590 0 obj -<< -/Type /Page -/Contents 591 0 R -/Resources 589 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 587 0 R ->> -% 592 0 obj -<< -/D [590 0 R /XYZ 149.705 753.953 null] ->> -% 589 0 obj -<< -/Font << /F54 586 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 636 0 obj -<< -/Type /Page -/Contents 637 0 R -/Resources 635 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 587 0 R -/Annots [ 593 0 R 594 0 R 595 0 R 596 0 R 597 0 R 598 0 R 599 0 R 600 0 R 601 0 R 602 0 R 603 0 R 604 0 R 605 0 R 606 0 R 607 0 R 608 0 R 609 0 R 610 0 R 611 0 R 640 0 R 612 0 R 641 0 R 613 0 R 614 0 R 615 0 R 616 0 R 617 0 R 618 0 R 619 0 R 620 0 R 621 0 R 642 0 R 622 0 R 643 0 R 623 0 R 644 0 R 624 0 R 625 0 R 626 0 R 627 0 R 628 0 R 629 0 R 630 0 R 631 0 R 632 0 R 633 0 R ] ->> -% 593 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 681.973 173.389 691.303] -/A << /S /GoTo /D (section.1) >> ->> -% 594 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 659.857 196.921 669.187] -/A << /S /GoTo /D (section.2) >> ->> -% 595 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 647.803 227.028 657.212] -/A << /S /GoTo /D (subsection.2.1) >> ->> - -endstream -endobj -695 0 obj +775 0 obj << /Length 15896 >> @@ -1404,28 +1350,28 @@ stream 0 g 0 G 0 0 1 rg 0 0 1 RG BT -/F54 9.9626 Tf 188.563 706.129 Td [(3.2.17)-700(psb)]TJ +/F62 9.9626 Tf 188.563 706.129 Td [(3.2.17)-700(psb)]TJ ET q 1 0 0 1 236.762 706.328 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 239.751 706.129 Td [(set)]TJ +/F62 9.9626 Tf 239.751 706.129 Td [(set)]TJ ET q 1 0 0 1 252.592 706.328 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 255.581 706.129 Td [(mat)]TJ +/F62 9.9626 Tf 255.581 706.129 Td [(mat)]TJ ET q 1 0 0 1 273.205 706.328 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 276.194 706.129 Td [(default)-250(\227)-250(Set)-250(default)-250(storage)-250(format)]TJ +/F62 9.9626 Tf 276.194 706.129 Td [(default)-250(\227)-250(Set)-250(default)-250(storage)-250(format)]TJ 0 g 0 G [-829(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1467,7 +1413,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 237.589 645.73 Td [(nr)18(ows)-250(\227)-250(Get)-250(number)-250(of)-250(r)18(ows)-250(in)-250(a)-250(dense)-250(vector)]TJ +/F62 9.9626 Tf 237.589 645.73 Td [(nr)18(ows)-250(\227)-250(Get)-250(number)-250(of)-250(r)18(ows)-250(in)-250(a)-250(dense)-250(vector)]TJ 0 g 0 G [-690(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1494,7 +1440,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 237.589 609.491 Td [(vect)-250(\227)-250(Get)-250(a)-250(copy)-250(of)-250(the)-250(vector)-250(contents)]TJ +/F62 9.9626 Tf 237.589 609.491 Td [(vect)-250(\227)-250(Get)-250(a)-250(copy)-250(of)-250(the)-250(vector)-250(contents)]TJ 0 g 0 G [-770(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1522,18 +1468,18 @@ BT [-1500(28)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -14.944 -22.125 Td [(4)-1000(Computational)-250(routines)]TJ +/F59 9.9626 Tf -14.944 -22.125 Td [(4)-1000(Computational)-250(routines)]TJ 0 g 0 G [-21085(29)]TJ 0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -12.08 Td [(4.1)-1050(psb)]TJ +/F62 9.9626 Tf 14.944 -12.08 Td [(4.1)-1050(psb)]TJ ET q 1 0 0 1 204.881 539.246 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 539.047 Td [(geaxpby)-250(\227)-250(General)-250(Dense)-250(Matrix)-250(Sum)]TJ +/F62 9.9626 Tf 207.87 539.047 Td [(geaxpby)-250(\227)-250(General)-250(Dense)-250(Matrix)-250(Sum)]TJ 0 g 0 G [-539(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1547,7 +1493,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 526.967 Td [(gedot)-250(\227)-250(Dot)-250(Pr)18(oduct)]TJ +/F62 9.9626 Tf 207.87 526.967 Td [(gedot)-250(\227)-250(Dot)-250(Pr)18(oduct)]TJ 0 g 0 G [-837(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1561,7 +1507,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 514.888 Td [(gedots)-250(\227)-250(Generalized)-250(Dot)-250(Pr)18(oduct)]TJ +/F62 9.9626 Tf 207.87 514.888 Td [(gedots)-250(\227)-250(Generalized)-250(Dot)-250(Pr)18(oduct)]TJ 0 g 0 G [-793(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1575,7 +1521,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 502.808 Td [(normi)-250(\227)-250(In\002nity-Norm)-250(of)-250(V)111(ector)]TJ +/F62 9.9626 Tf 207.87 502.808 Td [(normi)-250(\227)-250(In\002nity-Norm)-250(of)-250(V)111(ector)]TJ 0 g 0 G [-868(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1589,7 +1535,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 490.728 Td [(geamaxs)-250(\227)-250(Generalized)-250(In\002nity)-250(Norm)]TJ +/F62 9.9626 Tf 207.87 490.728 Td [(geamaxs)-250(\227)-250(Generalized)-250(In\002nity)-250(Norm)]TJ 0 g 0 G [-600(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1603,7 +1549,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 478.649 Td [(norm1)-250(\227)-250(1-Norm)-250(of)-250(V)111(ector)]TJ +/F62 9.9626 Tf 207.87 478.649 Td [(norm1)-250(\227)-250(1-Norm)-250(of)-250(V)111(ector)]TJ 0 g 0 G [-438(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1617,7 +1563,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 466.569 Td [(geasums)-250(\227)-250(Generalized)-250(1-Norm)-250(of)-250(V)111(ector)]TJ +/F62 9.9626 Tf 207.87 466.569 Td [(geasums)-250(\227)-250(Generalized)-250(1-Norm)-250(of)-250(V)111(ector)]TJ 0 g 0 G [-605(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1631,7 +1577,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 454.489 Td [(norm2)-250(\227)-250(2-Norm)-250(of)-250(V)111(ector)]TJ +/F62 9.9626 Tf 207.87 454.489 Td [(norm2)-250(\227)-250(2-Norm)-250(of)-250(V)111(ector)]TJ 0 g 0 G [-438(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1645,7 +1591,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 442.409 Td [(genrm2s)-250(\227)-250(Generalized)-250(2-Norm)-250(of)-250(V)111(ector)]TJ +/F62 9.9626 Tf 207.87 442.409 Td [(genrm2s)-250(\227)-250(Generalized)-250(2-Norm)-250(of)-250(V)111(ector)]TJ 0 g 0 G [-655(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1659,7 +1605,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 430.33 Td [(norm1)-250(\227)-250(1-Norm)-250(of)-250(Sparse)-250(Matrix)]TJ +/F62 9.9626 Tf 207.87 430.33 Td [(norm1)-250(\227)-250(1-Norm)-250(of)-250(Sparse)-250(Matrix)]TJ 0 g 0 G [-841(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1673,7 +1619,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 418.25 Td [(normi)-250(\227)-250(In\002nity)-250(Norm)-250(of)-250(Sparse)-250(Matrix)]TJ +/F62 9.9626 Tf 207.87 418.25 Td [(normi)-250(\227)-250(In\002nity)-250(Norm)-250(of)-250(Sparse)-250(Matrix)]TJ 0 g 0 G [-604(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1687,7 +1633,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 406.17 Td [(spmm)-250(\227)-250(Sparse)-250(Matrix)-250(by)-250(Dense)-250(Matrix)-250(Pr)18(oduct)]TJ +/F62 9.9626 Tf 207.87 406.17 Td [(spmm)-250(\227)-250(Sparse)-250(Matrix)-250(by)-250(Dense)-250(Matrix)-250(Pr)18(oduct)]TJ 0 g 0 G [-491(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1701,7 +1647,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 394.091 Td [(spsm)-250(\227)-250(T)90(riangular)-250(System)-250(Solve)]TJ +/F62 9.9626 Tf 207.87 394.091 Td [(spsm)-250(\227)-250(T)90(riangular)-250(System)-250(Solve)]TJ 0 g 0 G [-945(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1715,7 +1661,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 382.011 Td [(gemlt)-250(\227)-250(Entrywise)-250(Pr)18(oduct)]TJ +/F62 9.9626 Tf 207.87 382.011 Td [(gemlt)-250(\227)-250(Entrywise)-250(Pr)18(oduct)]TJ 0 g 0 G [-968(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1729,7 +1675,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 369.931 Td [(gediv)-250(\227)-250(Entrywise)-250(Division)]TJ +/F62 9.9626 Tf 207.87 369.931 Td [(gediv)-250(\227)-250(Entrywise)-250(Division)]TJ 0 g 0 G [-748(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1743,25 +1689,25 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 357.852 Td [(geinv)-250(\227)-250(Entrywise)-250(Inversion)]TJ +/F62 9.9626 Tf 207.87 357.852 Td [(geinv)-250(\227)-250(Entrywise)-250(Inversion)]TJ 0 g 0 G [-340(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G [-1500(57)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -57.165 -22.126 Td [(5)-1000(Communication)-250(routines)]TJ +/F59 9.9626 Tf -57.165 -22.126 Td [(5)-1000(Communication)-250(routines)]TJ 0 g 0 G [-20585(58)]TJ 0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -12.079 Td [(5.1)-1050(psb)]TJ +/F62 9.9626 Tf 14.944 -12.079 Td [(5.1)-1050(psb)]TJ ET q 1 0 0 1 204.881 323.846 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 323.647 Td [(halo)-250(\227)-250(Halo)-250(Data)-250(Communication)]TJ +/F62 9.9626 Tf 207.87 323.647 Td [(halo)-250(\227)-250(Halo)-250(Data)-250(Communication)]TJ 0 g 0 G [-888(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1775,7 +1721,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 311.567 Td [(ovrl)-250(\227)-250(Overlap)-250(Update)]TJ +/F62 9.9626 Tf 207.87 311.567 Td [(ovrl)-250(\227)-250(Overlap)-250(Update)]TJ 0 g 0 G [-553(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1789,7 +1735,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 299.487 Td [(gather)-250(\227)-250(Gather)-250(Global)-250(Dense)-250(Matrix)]TJ +/F62 9.9626 Tf 207.87 299.487 Td [(gather)-250(\227)-250(Gather)-250(Global)-250(Dense)-250(Matrix)]TJ 0 g 0 G [-973(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1803,25 +1749,25 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 287.407 Td [(scatter)-250(\227)-250(Scatter)-250(Global)-250(Dense)-250(Matrix)]TJ +/F62 9.9626 Tf 207.87 287.407 Td [(scatter)-250(\227)-250(Scatter)-250(Global)-250(Dense)-250(Matrix)]TJ 0 g 0 G [-967(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G [-1500(68)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -57.165 -22.125 Td [(6)-1000(Data)-250(management)-250(routines)]TJ +/F59 9.9626 Tf -57.165 -22.125 Td [(6)-1000(Data)-250(management)-250(routines)]TJ 0 g 0 G [-19668(70)]TJ 0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -12.08 Td [(6.1)-1050(psb)]TJ +/F62 9.9626 Tf 14.944 -12.08 Td [(6.1)-1050(psb)]TJ ET q 1 0 0 1 204.881 253.402 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 253.202 Td [(cdall)-250(\227)-250(Allocates)-250(a)-250(communication)-250(descriptor)]TJ +/F62 9.9626 Tf 207.87 253.202 Td [(cdall)-250(\227)-250(Allocates)-250(a)-250(communication)-250(descriptor)]TJ 0 g 0 G [-363(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1835,7 +1781,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 241.123 Td [(cdins)-250(\227)-250(Communication)-250(descriptor)-250(insert)-250(r)18(outine)]TJ +/F62 9.9626 Tf 207.87 241.123 Td [(cdins)-250(\227)-250(Communication)-250(descriptor)-250(insert)-250(r)18(outine)]TJ 0 g 0 G [-261(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1849,7 +1795,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 229.043 Td [(cdasb)-250(\227)-250(Communication)-250(descriptor)-250(assembly)-250(r)18(outine)]TJ +/F62 9.9626 Tf 207.87 229.043 Td [(cdasb)-250(\227)-250(Communication)-250(descriptor)-250(assembly)-250(r)18(outine)]TJ 0 g 0 G [-718(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1863,7 +1809,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 216.963 Td [(cdcpy)-250(\227)-250(Copies)-250(a)-250(communication)-250(descriptor)]TJ +/F62 9.9626 Tf 207.87 216.963 Td [(cdcpy)-250(\227)-250(Copies)-250(a)-250(communication)-250(descriptor)]TJ 0 g 0 G [-873(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1877,7 +1823,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 204.884 Td [(cdfr)18(ee)-250(\227)-250(Fr)18(ees)-250(a)-250(communication)-250(descriptor)]TJ +/F62 9.9626 Tf 207.87 204.884 Td [(cdfr)18(ee)-250(\227)-250(Fr)18(ees)-250(a)-250(communication)-250(descriptor)]TJ 0 g 0 G [-791(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1891,7 +1837,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 192.804 Td [(cdbldext)-250(\227)-250(Build)-250(an)-250(extended)-250(communication)-250(descriptor)]TJ +/F62 9.9626 Tf 207.87 192.804 Td [(cdbldext)-250(\227)-250(Build)-250(an)-250(extended)-250(communication)-250(descriptor)]TJ 0 g 0 G [-676(.)]TJ 0 g 0 G @@ -1905,7 +1851,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 180.724 Td [(spall)-250(\227)-250(Allocates)-250(a)-250(sparse)-250(matrix)]TJ +/F62 9.9626 Tf 207.87 180.724 Td [(spall)-250(\227)-250(Allocates)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G [-842(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1919,7 +1865,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 168.644 Td [(spins)-250(\227)-250(Insert)-250(a)-250(set)-250(of)-250(coef)18(\002cients)-250(into)-250(a)-250(sparse)-250(matrix)]TJ +/F62 9.9626 Tf 207.87 168.644 Td [(spins)-250(\227)-250(Insert)-250(a)-250(set)-250(of)-250(coef)18(\002cients)-250(into)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G [-625(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1933,7 +1879,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 156.565 Td [(spasb)-250(\227)-250(Sparse)-250(matrix)-250(assembly)-250(r)18(outine)]TJ +/F62 9.9626 Tf 207.87 156.565 Td [(spasb)-250(\227)-250(Sparse)-250(matrix)-250(assembly)-250(r)18(outine)]TJ 0 g 0 G [-611(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1947,7 +1893,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 144.485 Td [(spfr)18(ee)-250(\227)-250(Fr)18(ees)-250(a)-250(sparse)-250(matrix)]TJ +/F62 9.9626 Tf 207.87 144.485 Td [(spfr)18(ee)-250(\227)-250(Fr)18(ees)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G [-520(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1961,7 +1907,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 132.405 Td [(sprn)-250(\227)-250(Reinit)-250(sparse)-250(matrix)-250(str)8(uctur)18(e)-250(for)-250(psblas)-250(r)18(outines.)]TJ +/F62 9.9626 Tf 207.87 132.405 Td [(sprn)-250(\227)-250(Reinit)-250(sparse)-250(matrix)-250(str)8(uctur)18(e)-250(for)-250(psblas)-250(r)18(outines.)]TJ 0 g 0 G [-725(.)]TJ 0 g 0 G @@ -1975,7 +1921,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 120.326 Td [(geall)-250(\227)-250(Allocates)-250(a)-250(dense)-250(matrix)]TJ +/F62 9.9626 Tf 207.87 120.326 Td [(geall)-250(\227)-250(Allocates)-250(a)-250(dense)-250(matrix)]TJ 0 g 0 G [-330(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1988,716 +1934,159 @@ ET endstream endobj -741 0 obj +605 0 obj << -/Length 18214 +/Type /ObjStm +/N 100 +/First 904 +/Length 14415 >> stream -0 g 0 G -0 g 0 G -0 0 1 rg 0 0 1 RG -BT -/F54 9.9626 Tf 114.839 706.129 Td [(6.13)-550(psb)]TJ -ET -q -1 0 0 1 154.072 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 706.129 Td [(geins)-250(\227)-250(Dense)-250(matrix)-250(insertion)-250(r)18(outine)]TJ -0 g 0 G - [-411(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1500(92)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(6.14)-550(psb)]TJ -ET -q -1 0 0 1 154.072 694.234 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 694.035 Td [(geasb)-250(\227)-250(Assembly)-250(a)-250(dense)-250(matrix)]TJ -0 g 0 G - [-376(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1500(94)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(6.15)-550(psb)]TJ -ET -q -1 0 0 1 154.072 682.14 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 681.941 Td [(gefr)18(ee)-250(\227)-250(Fr)18(ees)-250(a)-250(dense)-250(matrix)]TJ -0 g 0 G - [-758(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1500(95)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.095 Td [(6.16)-550(psb)]TJ -ET -q -1 0 0 1 154.072 670.046 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 669.846 Td [(gelp)-250(\227)-250(Applies)-250(a)-250(left)-250(permutation)-250(to)-250(a)-250(dense)-250(matrix)]TJ -0 g 0 G - [-801(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1500(96)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(6.17)-550(psb)]TJ -ET -q -1 0 0 1 154.072 657.951 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 657.752 Td [(glob)]TJ -ET -q -1 0 0 1 177.046 657.951 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 180.034 657.752 Td [(to)]TJ -ET -q -1 0 0 1 189.319 657.951 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 192.308 657.752 Td [(loc)-250(\227)-250(Global)-250(to)-250(local)-250(indices)-250(convertion)]TJ -0 g 0 G - [-427(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1500(97)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -77.469 -12.094 Td [(6.18)-550(psb)]TJ -ET -q -1 0 0 1 154.072 645.857 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 645.658 Td [(loc)]TJ -ET -q -1 0 0 1 170.42 645.857 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 173.409 645.658 Td [(to)]TJ -ET -q -1 0 0 1 182.694 645.857 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 185.683 645.658 Td [(glob)-250(\227)-250(Local)-250(to)-250(global)-250(indices)-250(conversion)]TJ -0 g 0 G - [-966(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1500(99)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -70.844 -12.094 Td [(6.19)-550(psb)]TJ -ET -q -1 0 0 1 154.072 633.763 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 633.564 Td [(is)]TJ -ET -q -1 0 0 1 164.782 633.763 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 167.771 633.564 Td [(owned)-250(\227)]TJ -0 g 0 G - [-1135(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(100)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -52.932 -12.094 Td [(6.20)-550(psb)]TJ -ET -q -1 0 0 1 154.072 621.669 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 621.47 Td [(owned)]TJ -ET -q -1 0 0 1 188.064 621.669 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 191.053 621.47 Td [(index)-250(\227)]TJ -0 g 0 G - [-871(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(101)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -76.214 -12.095 Td [(6.21)-550(psb)]TJ -ET -q -1 0 0 1 154.072 609.575 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 609.375 Td [(is)]TJ -ET -q -1 0 0 1 164.782 609.575 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 167.771 609.375 Td [(local)-250(\227)]TJ -0 g 0 G - [-615(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(102)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -52.932 -12.094 Td [(6.22)-550(psb)]TJ -ET -q -1 0 0 1 154.072 597.481 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 597.281 Td [(local)]TJ -ET -q -1 0 0 1 178.301 597.481 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 181.29 597.281 Td [(index)-250(\227)]TJ -0 g 0 G - [-1101(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(103)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -66.451 -12.094 Td [(6.23)-550(psb)]TJ -ET -q -1 0 0 1 154.072 585.386 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 585.187 Td [(get)]TJ -ET -q -1 0 0 1 171.217 585.386 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 174.206 585.187 Td [(boundary)-250(\227)-250(Extract)-250(list)-250(of)-250(boundary)-250(elements)]TJ -0 g 0 G - [-827(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(104)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -59.367 -12.094 Td [(6.24)-550(psb)]TJ -ET -q -1 0 0 1 154.072 573.292 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 573.093 Td [(get)]TJ -ET -q -1 0 0 1 171.217 573.292 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 174.206 573.093 Td [(overlap)-250(\227)-250(Extract)-250(list)-250(of)-250(overlap)-250(elements)]TJ -0 g 0 G - [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(105)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -59.367 -12.094 Td [(6.25)-550(psb)]TJ -ET -q -1 0 0 1 154.072 561.198 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 560.999 Td [(sp)]TJ -ET -q -1 0 0 1 167.87 561.198 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 170.859 560.999 Td [(getr)18(ow)-250(\227)-250(Extract)-250(r)18(ow\050s\051)-250(fr)18(om)-250(a)-250(sparse)-250(matrix)]TJ -0 g 0 G - [-671(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(106)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -56.02 -12.094 Td [(6.26)-550(psb)]TJ -ET -q -1 0 0 1 154.072 549.104 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 548.905 Td [(sizeof)-250(\227)-250(Memory)-250(occupation)]TJ -0 g 0 G - [-251(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(108)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.095 Td [(6.27)-550(Sorting)-250(utilities)-250(\227)]TJ -0 g 0 G - [-1157(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(109)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -14.944 -22.149 Td [(7)-1000(Parallel)-250(environment)-250(routines)]TJ -0 g 0 G - [-17835(111)]TJ -0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -12.094 Td [(7.1)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 502.766 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 502.567 Td [(init)-250(\227)-250(Initializes)-250(PSBLAS)-250(parallel)-250(envir)18(onment)]TJ -0 g 0 G - [-766(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(112)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.095 Td [(7.2)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 490.672 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 490.472 Td [(info)-264(\227)-264(Return)-264(information)-264(abou)1(t)-264(PSBLAS)-264(parallel)-264(envir)18(on-)]TJ -19.308 -11.955 Td [(ment)]TJ -0 g 0 G - [-930(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(113)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -22.914 -12.094 Td [(7.3)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 466.622 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 466.423 Td [(exit)-250(\227)-250(Exit)-250(fr)18(om)-250(PSBLAS)-250(parallel)-250(envir)18(onment)]TJ -0 g 0 G - [-823(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(114)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.4)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 454.528 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 454.329 Td [(get)]TJ -ET -q -1 0 0 1 171.217 454.528 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 174.206 454.329 Td [(mpi)]TJ -ET -q -1 0 0 1 192.487 454.528 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 195.476 454.329 Td [(comm)-250(\227)-250(Get)-250(the)-250(MPI)-250(communicator)]TJ -0 g 0 G - [-615(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(115)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -80.637 -12.094 Td [(7.5)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 442.434 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 442.235 Td [(get)]TJ -ET -q -1 0 0 1 171.217 442.434 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 174.206 442.235 Td [(mpi)]TJ -ET -q -1 0 0 1 192.487 442.434 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 195.476 442.235 Td [(rank)-250(\227)-250(Get)-250(the)-250(MPI)-250(rank)]TJ -0 g 0 G - [-498(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(116)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -80.637 -12.094 Td [(7.6)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 430.34 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 430.141 Td [(wtime)-250(\227)-250(W)92(all)-250(clock)-250(timing)]TJ -0 g 0 G - [-499(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(117)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.095 Td [(7.7)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 418.246 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 418.046 Td [(barrier)-250(\227)-250(Sinchr)18(onization)-250(point)-250(parallel)-250(envir)18(onment)]TJ -0 g 0 G - [-903(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(118)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.8)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 406.151 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 405.952 Td [(abort)-250(\227)-250(Abort)-250(a)-250(computation)]TJ -0 g 0 G - [-946(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(119)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.9)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 394.057 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 393.858 Td [(bcast)-250(\227)-250(Br)18(oadcast)-250(data)]TJ -0 g 0 G - [-739(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(120)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.10)-550(psb)]TJ -ET -q -1 0 0 1 154.072 381.963 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 381.764 Td [(sum)-250(\227)-250(Global)-250(sum)]TJ -0 g 0 G - [-998(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(122)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.11)-550(psb)]TJ -ET -q -1 0 0 1 154.072 369.869 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 369.67 Td [(max)-250(\227)-250(Global)-250(maximum)]TJ -0 g 0 G - [-610(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(124)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.095 Td [(7.12)-550(psb)]TJ -ET -q -1 0 0 1 154.072 357.775 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 357.575 Td [(min)-250(\227)-250(Global)-250(minimum)]TJ -0 g 0 G - [-896(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(126)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.13)-550(psb)]TJ -ET -q -1 0 0 1 154.072 345.681 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 345.481 Td [(amx)-250(\227)-250(Global)-250(maximum)-250(absolute)-250(value)]TJ -0 g 0 G - [-700(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(128)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.14)-550(psb)]TJ -ET -q -1 0 0 1 154.072 333.586 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 333.387 Td [(amn)-250(\227)-250(Global)-250(minimum)-250(absolute)-250(value)]TJ -0 g 0 G - [-777(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(130)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.15)-550(psb)]TJ -ET -q -1 0 0 1 154.072 321.492 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 321.293 Td [(nrm2)-250(\227)-250(Global)-250(2-norm)-250(r)18(eduction)]TJ -0 g 0 G - [-710(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(132)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.16)-550(psb)]TJ -ET -q -1 0 0 1 154.072 309.398 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 309.199 Td [(snd)-250(\227)-250(Send)-250(data)]TJ -0 g 0 G - [-511(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(134)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.17)-550(psb)]TJ -ET -q -1 0 0 1 154.072 297.304 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 297.105 Td [(r)18(cv)-250(\227)-250(Receive)-250(data)]TJ -0 g 0 G - [-284(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(135)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -57.166 -22.15 Td [(8)-1000(Error)-250(handling)]TJ -0 g 0 G - [-24750(136)]TJ -0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -12.094 Td [(8.1)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 263.06 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 262.861 Td [(errpush)-250(\227)-250(Pushes)-250(an)-250(err)18(or)-250(code)-250(onto)-250(the)-250(err)18(or)-250(stack)]TJ -0 g 0 G - [-734(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(138)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(8.2)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 250.966 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 250.767 Td [(err)18(or)-250(\227)-250(Prints)-250(the)-250(err)18(or)-250(stack)-250(content)-250(and)-250(aborts)-250(execution)]TJ -0 g 0 G -0 g 0 G - [-1381(139)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(8.3)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 238.872 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 238.673 Td [(set)]TJ -ET -q -1 0 0 1 169.902 238.872 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 172.891 238.673 Td [(errverbosity)-250(\227)-250(Sets)-250(the)-250(verbosity)-250(of)-250(err)18(or)-250(messages)]TJ -0 g 0 G - [-253(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(140)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -58.052 -12.095 Td [(8.4)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 226.778 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 226.578 Td [(set)]TJ -ET -q -1 0 0 1 169.902 226.778 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 172.891 226.578 Td [(erraction)-232(\227)-231(Set)-232(the)-231(type)-232(of)-231(action)-232(to)-232(b)1(e)-232(taken)-232(upon)-231(err)18(or)]TJ -35.138 -11.955 Td [(condition)]TJ -0 g 0 G - [-481(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(141)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -37.858 -22.149 Td [(9)-1000(Utilities)]TJ -0 g 0 G - [-27780(142)]TJ -0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -12.095 Td [(9.1)-1450(hb)]TJ -ET -q -1 0 0 1 153.644 180.579 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 156.633 180.38 Td [(r)18(ead)-400(\227)-400(Read)-400(a)-400(sparse)-400(matrix)-400(fr)18(om)-400(a)-400(\002le)-400(in)-400(the)-400(Harwell\226)]TJ -18.88 -11.956 Td [(Boeing)-250(format)]TJ -0 g 0 G - [-652(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(143)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -22.914 -12.094 Td [(9.2)-1050(hb)]TJ -ET -q -1 0 0 1 149.659 156.529 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 152.647 156.33 Td [(write)-226(\227)-226(W)74(rite)-226(a)-226(sparse)-226(matrix)-226(to)-226(a)-225(\002le)-226(in)-226(the)-226(Harwell\226Boeing)]TJ -14.894 -11.955 Td [(format)]TJ -0 g 0 G - [-967(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(144)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -22.914 -12.094 Td [(9.3)-1050(mm)]TJ -ET -q -1 0 0 1 155.945 132.48 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 158.934 132.281 Td [(mat)]TJ -ET -q -1 0 0 1 176.558 132.48 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 179.546 132.281 Td [(r)18(ead)-265(\227)-265(Read)-265(a)-265(sparse)-265(matrix)-265(fr)18(om)-265(a)-265(\002le)-265(in)-265(the)-265(Matrix-)]TJ -41.793 -11.955 Td [(Market)-250(format)]TJ -0 g 0 G - [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(145)]TJ -0 g 0 G -0 g 0 G - 129.649 -29.888 Td [(iii)]TJ -0 g 0 G -ET - -endstream -endobj -645 0 obj +604 0 608 43 609 137 612 180 613 304 616 347 617 491 620 535 621 654 624 698 +625 817 628 861 629 995 632 1039 633 1153 636 1197 637 1346 640 1390 641 1539 644 1583 +645 1752 648 1796 649 1945 652 1989 653 2133 656 2177 657 2346 658 2391 660 2520 663 2626 +664 2682 3 2738 661 2792 671 2921 673 3035 670 3092 717 3159 674 3653 675 3799 676 3945 +677 4097 678 4249 679 4401 680 4556 681 4708 682 4851 683 5003 684 5160 685 5317 686 5474 +687 5629 688 5786 689 5943 690 6100 691 6257 692 6410 721 6568 693 6723 722 6881 694 7038 +695 7196 696 7354 697 7512 698 7670 699 7821 700 7978 701 8134 702 8291 723 8448 703 8604 +724 8761 704 8916 725 9073 705 9229 706 9386 707 9543 708 9700 709 9858 710 10014 711 10172 +712 10330 713 10488 714 10646 719 10801 720 10857 716 10913 774 10993 715 11495 726 11653 727 11811 +728 11969 729 12120 730 12277 731 12434 732 12590 733 12747 734 12904 735 13061 736 13213 737 13365 +% 604 0 obj +<< /S /GoTo /D (section*.7) >> +% 608 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000e\000x\000i\000t) +% 609 0 obj +<< /S /GoTo /D (section*.8) >> +% 612 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000D\000e\000v\000i\000c\000e\000S\000y\000n\000c) +% 613 0 obj +<< /S /GoTo /D (section*.9) >> +% 616 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000g\000e\000t\000D\000e\000v\000i\000c\000e\000C\000o\000u\000n\000t) +% 617 0 obj +<< /S /GoTo /D (section*.10) >> +% 620 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000g\000e\000t\000D\000e\000v\000i\000c\000e) +% 621 0 obj +<< /S /GoTo /D (section*.11) >> +% 624 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000s\000e\000t\000D\000e\000v\000i\000c\000e) +% 625 0 obj +<< /S /GoTo /D (section*.12) >> +% 628 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000D\000e\000v\000i\000c\000e\000H\000a\000s\000U\000V\000A) +% 629 0 obj +<< /S /GoTo /D (section*.13) >> +% 632 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000W\000a\000r\000p\000S\000i\000z\000e) +% 633 0 obj +<< /S /GoTo /D (section*.14) >> +% 636 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000M\000u\000l\000t\000i\000P\000r\000o\000c\000e\000s\000s\000o\000r\000s) +% 637 0 obj +<< /S /GoTo /D (section*.15) >> +% 640 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000M\000a\000x\000T\000h\000r\000e\000a\000d\000s\000P\000e\000r\000M\000P) +% 641 0 obj +<< /S /GoTo /D (section*.16) >> +% 644 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000M\000a\000x\000R\000e\000g\000i\000s\000t\000e\000r\000P\000e\000r\000B\000l\000o\000c\000k) +% 645 0 obj +<< /S /GoTo /D (section*.17) >> +% 648 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000M\000e\000m\000o\000r\000y\000C\000l\000o\000c\000k\000R\000a\000t\000e) +% 649 0 obj +<< /S /GoTo /D (section*.18) >> +% 652 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000M\000e\000m\000o\000r\000y\000B\000u\000s\000W\000i\000d\000t\000h) +% 653 0 obj +<< /S /GoTo /D (section*.19) >> +% 656 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000M\000e\000m\000o\000r\000y\000P\000e\000a\000k\000B\000a\000n\000d\000w\000i\000d\000t\000h) +% 657 0 obj +<< /S /GoTo /D [658 0 R /Fit] >> +% 658 0 obj << -/Type /ObjStm -/N 100 -/First 927 -/Length 16599 ->> -stream -596 0 597 152 598 304 599 459 600 611 601 754 602 906 603 1063 604 1220 605 1377 -606 1532 607 1689 608 1846 609 2003 610 2160 611 2313 640 2471 612 2626 641 2784 613 2941 -614 3099 615 3257 616 3415 617 3573 618 3724 619 3881 620 4037 621 4194 642 4351 622 4507 -643 4664 623 4819 644 4976 624 5132 625 5289 626 5446 627 5603 628 5761 629 5917 630 6075 -631 6233 632 6391 633 6549 638 6704 639 6760 635 6816 694 6896 634 7398 646 7556 647 7714 -648 7872 649 8023 650 8180 651 8337 652 8493 653 8650 654 8807 655 8964 656 9116 657 9268 -658 9414 659 9566 660 9718 661 9870 662 10022 663 10174 664 10326 665 10478 666 10630 667 10781 -668 10934 669 11087 670 11240 671 11393 672 11546 673 11699 674 11852 675 11999 676 12149 677 12301 -678 12453 679 12605 680 12752 681 12904 682 13056 683 13208 684 13360 685 13512 686 13664 687 13816 -688 13968 689 14120 690 14273 691 14424 696 14575 693 14632 740 14712 692 15214 697 15367 698 15519 -% 596 0 obj +/Type /Page +/Contents 662 0 R +/Resources 661 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 668 0 R +/Group 660 0 R +>> +% 660 0 obj +%PTEX Group needed for transparent pngs +<> +% 663 0 obj +<< +/D [658 0 R /XYZ 98.895 753.953 null] +>> +% 664 0 obj +<< +/D [658 0 R /XYZ 99.895 716.092 null] +>> +% 3 0 obj +<< +/D [658 0 R /XYZ 99.895 716.092 null] +>> +% 661 0 obj +<< +/Font << /F59 665 0 R /F60 666 0 R /F62 667 0 R >> +/XObject << /Im1 659 0 R >> +/ProcSet [ /PDF /Text /ImageC ] +>> +% 671 0 obj +<< +/Type /Page +/Contents 672 0 R +/Resources 670 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 668 0 R +>> +% 673 0 obj +<< +/D [671 0 R /XYZ 149.705 753.953 null] +>> +% 670 0 obj +<< +/Font << /F62 667 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 717 0 obj +<< +/Type /Page +/Contents 718 0 R +/Resources 716 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 668 0 R +/Annots [ 674 0 R 675 0 R 676 0 R 677 0 R 678 0 R 679 0 R 680 0 R 681 0 R 682 0 R 683 0 R 684 0 R 685 0 R 686 0 R 687 0 R 688 0 R 689 0 R 690 0 R 691 0 R 692 0 R 721 0 R 693 0 R 722 0 R 694 0 R 695 0 R 696 0 R 697 0 R 698 0 R 699 0 R 700 0 R 701 0 R 702 0 R 723 0 R 703 0 R 724 0 R 704 0 R 725 0 R 705 0 R 706 0 R 707 0 R 708 0 R 709 0 R 710 0 R 711 0 R 712 0 R 713 0 R 714 0 R ] +>> +% 674 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [98.899 681.973 173.389 691.303] +/A << /S /GoTo /D (section.1) >> +>> +% 675 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [98.899 659.857 196.921 669.187] +/A << /S /GoTo /D (section.2) >> +>> +% 676 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 647.803 227.028 657.212] +/A << /S /GoTo /D (subsection.2.1) >> +>> +% 677 0 obj << /Type /Annot /Subtype /Link @@ -2705,7 +2094,7 @@ stream /Rect [113.843 633.079 211.078 645.138] /A << /S /GoTo /D (subsection.2.2) >> >> -% 597 0 obj +% 678 0 obj << /Type /Annot /Subtype /Link @@ -2713,7 +2102,7 @@ stream /Rect [113.843 621.004 233.094 633.064] /A << /S /GoTo /D (subsection.2.3) >> >> -% 598 0 obj +% 679 0 obj << /Type /Annot /Subtype /Link @@ -2721,7 +2110,7 @@ stream /Rect [136.757 608.93 301.886 620.99] /A << /S /GoTo /D (subsubsection.2.3.1) >> >> -% 599 0 obj +% 680 0 obj << /Type /Annot /Subtype /Link @@ -2729,7 +2118,7 @@ stream /Rect [113.843 596.856 230.734 608.916] /A << /S /GoTo /D (subsection.2.4) >> >> -% 600 0 obj +% 681 0 obj << /Type /Annot /Subtype /Link @@ -2737,7 +2126,7 @@ stream /Rect [98.899 577.37 242.261 586.7] /A << /S /GoTo /D (section.3) >> >> -% 601 0 obj +% 682 0 obj << /Type /Annot /Subtype /Link @@ -2745,7 +2134,7 @@ stream /Rect [113.843 562.666 249.144 574.726] /A << /S /GoTo /D (subsection.3.1) >> >> -% 602 0 obj +% 683 0 obj << /Type /Annot /Subtype /Link @@ -2753,7 +2142,7 @@ stream /Rect [136.757 550.592 258.689 562.652] /A << /S /GoTo /D (subsubsection.3.1.1) >> >> -% 603 0 obj +% 684 0 obj << /Type /Annot /Subtype /Link @@ -2761,7 +2150,7 @@ stream /Rect [136.757 538.518 360.207 550.578] /A << /S /GoTo /D (subsubsection.3.1.2) >> >> -% 604 0 obj +% 685 0 obj << /Type /Annot /Subtype /Link @@ -2769,7 +2158,7 @@ stream /Rect [136.757 526.444 350.723 538.504] /A << /S /GoTo /D (subsubsection.3.1.3) >> >> -% 605 0 obj +% 686 0 obj << /Type /Annot /Subtype /Link @@ -2777,7 +2166,7 @@ stream /Rect [136.757 514.37 373.457 526.43] /A << /S /GoTo /D (subsubsection.3.1.4) >> >> -% 606 0 obj +% 687 0 obj << /Type /Annot /Subtype /Link @@ -2785,7 +2174,7 @@ stream /Rect [136.757 502.296 363.973 514.355] /A << /S /GoTo /D (subsubsection.3.1.5) >> >> -% 607 0 obj +% 688 0 obj << /Type /Annot /Subtype /Link @@ -2793,7 +2182,7 @@ stream /Rect [136.757 490.222 384.834 502.281] /A << /S /GoTo /D (subsubsection.3.1.6) >> >> -% 608 0 obj +% 689 0 obj << /Type /Annot /Subtype /Link @@ -2801,7 +2190,7 @@ stream /Rect [136.757 478.148 356.411 490.013] /A << /S /GoTo /D (subsubsection.3.1.7) >> >> -% 609 0 obj +% 690 0 obj << /Type /Annot /Subtype /Link @@ -2809,7 +2198,7 @@ stream /Rect [136.757 466.074 297.523 478.133] /A << /S /GoTo /D (subsubsection.3.1.8) >> >> -% 610 0 obj +% 691 0 obj << /Type /Annot /Subtype /Link @@ -2817,7 +2206,7 @@ stream /Rect [136.757 454 345.014 466.059] /A << /S /GoTo /D (subsubsection.3.1.9) >> >> -% 611 0 obj +% 692 0 obj << /Type /Annot /Subtype /Link @@ -2825,7 +2214,7 @@ stream /Rect [136.757 441.925 444.603 453.985] /A << /S /GoTo /D (subsubsection.3.1.10) >> >> -% 640 0 obj +% 721 0 obj << /Type /Annot /Subtype /Link @@ -2833,7 +2222,7 @@ stream /Rect [98.899 429.97 222.246 442.03] /A << /S /GoTo /D (subsubsection.3.1.10) >> >> -% 612 0 obj +% 693 0 obj << /Type /Annot /Subtype /Link @@ -2841,7 +2230,7 @@ stream /Rect [136.757 417.896 444.603 429.956] /A << /S /GoTo /D (subsubsection.3.1.11) >> >> -% 641 0 obj +% 722 0 obj << /Type /Annot /Subtype /Link @@ -2849,7 +2238,7 @@ stream /Rect [98.899 405.941 222.246 418.001] /A << /S /GoTo /D (subsubsection.3.1.11) >> >> -% 613 0 obj +% 694 0 obj << /Type /Annot /Subtype /Link @@ -2857,7 +2246,7 @@ stream /Rect [136.757 393.867 358.404 405.927] /A << /S /GoTo /D (subsubsection.3.1.12) >> >> -% 614 0 obj +% 695 0 obj << /Type /Annot /Subtype /Link @@ -2865,7 +2254,7 @@ stream /Rect [136.757 381.793 354.718 393.853] /A << /S /GoTo /D (subsubsection.3.1.13) >> >> -% 615 0 obj +% 696 0 obj << /Type /Annot /Subtype /Link @@ -2873,7 +2262,7 @@ stream /Rect [136.757 369.719 413.607 381.778] /A << /S /GoTo /D (subsubsection.3.1.14) >> >> -% 616 0 obj +% 697 0 obj << /Type /Annot /Subtype /Link @@ -2881,7 +2270,7 @@ stream /Rect [136.757 360.295 250.062 369.704] /A << /S /GoTo /D (subsubsection.3.1.15) >> >> -% 617 0 obj +% 698 0 obj << /Type /Annot /Subtype /Link @@ -2889,7 +2278,7 @@ stream /Rect [113.843 345.571 223.242 357.63] /A << /S /GoTo /D (subsection.3.2) >> >> -% 618 0 obj +% 699 0 obj << /Type /Annot /Subtype /Link @@ -2897,7 +2286,7 @@ stream /Rect [136.757 333.497 273.364 345.556] /A << /S /GoTo /D (subsubsection.3.2.1) >> >> -% 619 0 obj +% 700 0 obj << /Type /Annot /Subtype /Link @@ -2905,7 +2294,7 @@ stream /Rect [136.757 321.423 399.41 333.482] /A << /S /GoTo /D (subsubsection.3.2.2) >> >> -% 620 0 obj +% 701 0 obj << /Type /Annot /Subtype /Link @@ -2913,7 +2302,7 @@ stream /Rect [136.757 309.348 410.528 321.408] /A << /S /GoTo /D (subsubsection.3.2.3) >> >> -% 621 0 obj +% 702 0 obj << /Type /Annot /Subtype /Link @@ -2921,7 +2310,7 @@ stream /Rect [136.757 297.274 444.603 309.334] /A << /S /GoTo /D (subsubsection.3.2.4) >> >> -% 642 0 obj +% 723 0 obj << /Type /Annot /Subtype /Link @@ -2929,7 +2318,7 @@ stream /Rect [98.899 287.969 199.631 297.075] /A << /S /GoTo /D (subsubsection.3.2.4) >> >> -% 622 0 obj +% 703 0 obj << /Type /Annot /Subtype /Link @@ -2937,7 +2326,7 @@ stream /Rect [136.757 273.245 444.603 285.305] /A << /S /GoTo /D (subsubsection.3.2.5) >> >> -% 643 0 obj +% 724 0 obj << /Type /Annot /Subtype /Link @@ -2945,7 +2334,7 @@ stream /Rect [98.899 261.29 248.906 273.046] /A << /S /GoTo /D (subsubsection.3.2.5) >> >> -% 623 0 obj +% 704 0 obj << /Type /Annot /Subtype /Link @@ -2953,7 +2342,7 @@ stream /Rect [136.757 249.216 444.603 261.276] /A << /S /GoTo /D (subsubsection.3.2.6) >> >> -% 644 0 obj +% 725 0 obj << /Type /Annot /Subtype /Link @@ -2961,7 +2350,7 @@ stream /Rect [98.899 239.911 185.853 249.016] /A << /S /GoTo /D (subsubsection.3.2.6) >> >> -% 624 0 obj +% 705 0 obj << /Type /Annot /Subtype /Link @@ -2969,7 +2358,7 @@ stream /Rect [136.757 225.187 384.545 237.246] /A << /S /GoTo /D (subsubsection.3.2.7) >> >> -% 625 0 obj +% 706 0 obj << /Type /Annot /Subtype /Link @@ -2977,1520 +2366,2954 @@ stream /Rect [136.757 213.113 329.343 225.172] /A << /S /GoTo /D (subsubsection.3.2.8) >> >> -% 626 0 obj +% 707 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 201.038 405.337 213.098] +/A << /S /GoTo /D (subsubsection.3.2.9) >> +>> +% 708 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 188.964 371.724 201.024] +/A << /S /GoTo /D (subsubsection.3.2.10) >> +>> +% 709 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 176.89 309.029 188.95] +/A << /S /GoTo /D (subsubsection.3.2.11) >> +>> +% 710 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 167.466 350.683 176.876] +/A << /S /GoTo /D (subsubsection.3.2.12) >> +>> +% 711 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 152.742 303.929 164.802] +/A << /S /GoTo /D (subsubsection.3.2.13) >> +>> +% 712 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 140.668 324.462 152.728] +/A << /S /GoTo /D (subsubsection.3.2.14) >> +>> +% 713 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 128.594 309.687 140.654] +/A << /S /GoTo /D (subsubsection.3.2.15) >> +>> +% 714 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 116.52 314.13 128.58] +/A << /S /GoTo /D (subsubsection.3.2.16) >> +>> +% 719 0 obj +<< +/D [717 0 R /XYZ 98.895 753.953 null] +>> +% 720 0 obj +<< +/D [717 0 R /XYZ 99.895 723.975 null] +>> +% 716 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 774 0 obj +<< +/Type /Page +/Contents 775 0 R +/Resources 773 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 668 0 R +/Annots [ 715 0 R 726 0 R 727 0 R 728 0 R 729 0 R 730 0 R 731 0 R 732 0 R 733 0 R 734 0 R 735 0 R 736 0 R 737 0 R 738 0 R 739 0 R 740 0 R 741 0 R 742 0 R 743 0 R 744 0 R 745 0 R 746 0 R 747 0 R 748 0 R 749 0 R 750 0 R 751 0 R 752 0 R 753 0 R 754 0 R 755 0 R 756 0 R 757 0 R 758 0 R 759 0 R 760 0 R 761 0 R 762 0 R 763 0 R 764 0 R 765 0 R 766 0 R 767 0 R 768 0 R 769 0 R 770 0 R 771 0 R ] +>> +% 715 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [187.567 702.323 437.338 714.383] +/A << /S /GoTo /D (subsubsection.3.2.17) >> +>> +% 726 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [187.567 690.243 348.332 702.303] +/A << /S /GoTo /D (subsubsection.3.2.18) >> +>> +% 727 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [187.567 680.814 300.871 690.223] +/A << /S /GoTo /D (subsubsection.3.2.19) >> +>> +% 728 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 668.734 313.682 677.95] +/A << /S /GoTo /D (subsection.3.3) >> +>> +% 729 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [187.567 656.654 290.829 666.064] +/A << /S /GoTo /D (subsubsection.3.3.1) >> +>> +% 730 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 201.038 405.337 213.098] -/A << /S /GoTo /D (subsubsection.3.2.9) >> +/Rect [187.567 641.925 446.194 653.984] +/A << /S /GoTo /D (subsubsection.3.3.2) >> >> -% 627 0 obj +% 731 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 188.964 371.724 201.024] -/A << /S /GoTo /D (subsubsection.3.2.10) >> +/Rect [187.567 629.845 479.97 641.905] +/A << /S /GoTo /D (subsubsection.3.3.3) >> >> -% 628 0 obj +% 732 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 176.89 309.029 188.95] -/A << /S /GoTo /D (subsubsection.3.2.11) >> +/Rect [187.567 620.415 358.813 629.825] +/A << /S /GoTo /D (subsubsection.3.3.4) >> >> -% 629 0 obj +% 733 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 167.466 350.683 176.876] -/A << /S /GoTo /D (subsubsection.3.2.12) >> +/Rect [187.567 605.686 415.509 617.745] +/A << /S /GoTo /D (subsubsection.3.3.5) >> >> -% 630 0 obj +% 734 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 152.742 303.929 164.802] -/A << /S /GoTo /D (subsubsection.3.2.13) >> +/Rect [187.567 593.606 348.332 605.666] +/A << /S /GoTo /D (subsubsection.3.3.6) >> >> -% 631 0 obj +% 735 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 140.668 324.462 152.728] -/A << /S /GoTo /D (subsubsection.3.2.14) >> +/Rect [164.653 584.176 318.663 593.586] +/A << /S /GoTo /D (subsection.3.4) >> >> -% 632 0 obj +% 736 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 128.594 309.687 140.654] -/A << /S /GoTo /D (subsubsection.3.2.15) >> +/Rect [164.653 569.446 277.409 581.506] +/A << /S /GoTo /D (subsection.3.5) >> >> -% 633 0 obj +% 737 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 116.52 314.13 128.58] -/A << /S /GoTo /D (subsubsection.3.2.16) >> +/Rect [149.709 547.56 275.386 559.281] +/A << /S /GoTo /D (section.4) >> >> -% 638 0 obj + +endstream +endobj +822 0 obj +<< +/Length 18214 +>> +stream +0 g 0 G +0 g 0 G +0 0 1 rg 0 0 1 RG +BT +/F62 9.9626 Tf 114.839 706.129 Td [(6.13)-550(psb)]TJ +ET +q +1 0 0 1 154.072 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 706.129 Td [(geins)-250(\227)-250(Dense)-250(matrix)-250(insertion)-250(r)18(outine)]TJ +0 g 0 G + [-411(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1500(92)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(6.14)-550(psb)]TJ +ET +q +1 0 0 1 154.072 694.234 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 694.035 Td [(geasb)-250(\227)-250(Assembly)-250(a)-250(dense)-250(matrix)]TJ +0 g 0 G + [-376(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1500(94)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(6.15)-550(psb)]TJ +ET +q +1 0 0 1 154.072 682.14 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 681.941 Td [(gefr)18(ee)-250(\227)-250(Fr)18(ees)-250(a)-250(dense)-250(matrix)]TJ +0 g 0 G + [-758(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1500(95)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.095 Td [(6.16)-550(psb)]TJ +ET +q +1 0 0 1 154.072 670.046 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 669.846 Td [(gelp)-250(\227)-250(Applies)-250(a)-250(left)-250(permutation)-250(to)-250(a)-250(dense)-250(matrix)]TJ +0 g 0 G + [-801(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1500(96)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(6.17)-550(psb)]TJ +ET +q +1 0 0 1 154.072 657.951 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 657.752 Td [(glob)]TJ +ET +q +1 0 0 1 177.046 657.951 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 180.034 657.752 Td [(to)]TJ +ET +q +1 0 0 1 189.319 657.951 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 192.308 657.752 Td [(loc)-250(\227)-250(Global)-250(to)-250(local)-250(indices)-250(convertion)]TJ +0 g 0 G + [-427(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1500(97)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -77.469 -12.094 Td [(6.18)-550(psb)]TJ +ET +q +1 0 0 1 154.072 645.857 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 645.658 Td [(loc)]TJ +ET +q +1 0 0 1 170.42 645.857 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 173.409 645.658 Td [(to)]TJ +ET +q +1 0 0 1 182.694 645.857 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 185.683 645.658 Td [(glob)-250(\227)-250(Local)-250(to)-250(global)-250(indices)-250(conversion)]TJ +0 g 0 G + [-966(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1500(99)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -70.844 -12.094 Td [(6.19)-550(psb)]TJ +ET +q +1 0 0 1 154.072 633.763 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 633.564 Td [(is)]TJ +ET +q +1 0 0 1 164.782 633.763 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 167.771 633.564 Td [(owned)-250(\227)]TJ +0 g 0 G + [-1135(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(100)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -52.932 -12.094 Td [(6.20)-550(psb)]TJ +ET +q +1 0 0 1 154.072 621.669 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 621.47 Td [(owned)]TJ +ET +q +1 0 0 1 188.064 621.669 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 191.053 621.47 Td [(index)-250(\227)]TJ +0 g 0 G + [-871(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(101)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -76.214 -12.095 Td [(6.21)-550(psb)]TJ +ET +q +1 0 0 1 154.072 609.575 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 609.375 Td [(is)]TJ +ET +q +1 0 0 1 164.782 609.575 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 167.771 609.375 Td [(local)-250(\227)]TJ +0 g 0 G + [-615(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(102)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -52.932 -12.094 Td [(6.22)-550(psb)]TJ +ET +q +1 0 0 1 154.072 597.481 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 597.281 Td [(local)]TJ +ET +q +1 0 0 1 178.301 597.481 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 181.29 597.281 Td [(index)-250(\227)]TJ +0 g 0 G + [-1101(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(103)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -66.451 -12.094 Td [(6.23)-550(psb)]TJ +ET +q +1 0 0 1 154.072 585.386 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 585.187 Td [(get)]TJ +ET +q +1 0 0 1 171.217 585.386 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 174.206 585.187 Td [(boundary)-250(\227)-250(Extract)-250(list)-250(of)-250(boundary)-250(elements)]TJ +0 g 0 G + [-827(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(104)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -59.367 -12.094 Td [(6.24)-550(psb)]TJ +ET +q +1 0 0 1 154.072 573.292 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 573.093 Td [(get)]TJ +ET +q +1 0 0 1 171.217 573.292 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 174.206 573.093 Td [(overlap)-250(\227)-250(Extract)-250(list)-250(of)-250(overlap)-250(elements)]TJ +0 g 0 G + [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(105)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -59.367 -12.094 Td [(6.25)-550(psb)]TJ +ET +q +1 0 0 1 154.072 561.198 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 560.999 Td [(sp)]TJ +ET +q +1 0 0 1 167.87 561.198 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 170.859 560.999 Td [(getr)18(ow)-250(\227)-250(Extract)-250(r)18(ow\050s\051)-250(fr)18(om)-250(a)-250(sparse)-250(matrix)]TJ +0 g 0 G + [-671(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(106)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -56.02 -12.094 Td [(6.26)-550(psb)]TJ +ET +q +1 0 0 1 154.072 549.104 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 548.905 Td [(sizeof)-250(\227)-250(Memory)-250(occupation)]TJ +0 g 0 G + [-251(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(108)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.095 Td [(6.27)-550(Sorting)-250(utilities)-250(\227)]TJ +0 g 0 G + [-1157(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(109)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG +/F59 9.9626 Tf -14.944 -22.149 Td [(7)-1000(Parallel)-250(environment)-250(routines)]TJ +0 g 0 G + [-17835(111)]TJ +0 0 1 rg 0 0 1 RG +/F62 9.9626 Tf 14.944 -12.094 Td [(7.1)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 502.766 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 502.567 Td [(init)-250(\227)-250(Initializes)-250(PSBLAS)-250(parallel)-250(envir)18(onment)]TJ +0 g 0 G + [-766(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(112)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.095 Td [(7.2)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 490.672 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 490.472 Td [(info)-264(\227)-264(Return)-264(information)-264(abou)1(t)-264(PSBLAS)-264(parallel)-264(envir)18(on-)]TJ -19.308 -11.955 Td [(ment)]TJ +0 g 0 G + [-930(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(113)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -22.914 -12.094 Td [(7.3)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 466.622 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 466.423 Td [(exit)-250(\227)-250(Exit)-250(fr)18(om)-250(PSBLAS)-250(parallel)-250(envir)18(onment)]TJ +0 g 0 G + [-823(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(114)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(7.4)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 454.528 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 454.329 Td [(get)]TJ +ET +q +1 0 0 1 171.217 454.528 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 174.206 454.329 Td [(mpi)]TJ +ET +q +1 0 0 1 192.487 454.528 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 195.476 454.329 Td [(comm)-250(\227)-250(Get)-250(the)-250(MPI)-250(communicator)]TJ +0 g 0 G + [-615(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(115)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -80.637 -12.094 Td [(7.5)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 442.434 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 442.235 Td [(get)]TJ +ET +q +1 0 0 1 171.217 442.434 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 174.206 442.235 Td [(mpi)]TJ +ET +q +1 0 0 1 192.487 442.434 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 195.476 442.235 Td [(rank)-250(\227)-250(Get)-250(the)-250(MPI)-250(rank)]TJ +0 g 0 G + [-498(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(116)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -80.637 -12.094 Td [(7.6)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 430.34 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 430.141 Td [(wtime)-250(\227)-250(W)92(all)-250(clock)-250(timing)]TJ +0 g 0 G + [-499(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(117)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.095 Td [(7.7)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 418.246 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 418.046 Td [(barrier)-250(\227)-250(Sinchr)18(onization)-250(point)-250(parallel)-250(envir)18(onment)]TJ +0 g 0 G + [-903(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(118)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(7.8)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 406.151 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 405.952 Td [(abort)-250(\227)-250(Abort)-250(a)-250(computation)]TJ +0 g 0 G + [-946(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(119)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(7.9)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 394.057 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 393.858 Td [(bcast)-250(\227)-250(Br)18(oadcast)-250(data)]TJ +0 g 0 G + [-739(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(120)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(7.10)-550(psb)]TJ +ET +q +1 0 0 1 154.072 381.963 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 381.764 Td [(sum)-250(\227)-250(Global)-250(sum)]TJ +0 g 0 G + [-998(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(122)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(7.11)-550(psb)]TJ +ET +q +1 0 0 1 154.072 369.869 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 369.67 Td [(max)-250(\227)-250(Global)-250(maximum)]TJ +0 g 0 G + [-610(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(124)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.095 Td [(7.12)-550(psb)]TJ +ET +q +1 0 0 1 154.072 357.775 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 357.575 Td [(min)-250(\227)-250(Global)-250(minimum)]TJ +0 g 0 G + [-896(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(126)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(7.13)-550(psb)]TJ +ET +q +1 0 0 1 154.072 345.681 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 345.481 Td [(amx)-250(\227)-250(Global)-250(maximum)-250(absolute)-250(value)]TJ +0 g 0 G + [-700(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(128)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(7.14)-550(psb)]TJ +ET +q +1 0 0 1 154.072 333.586 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 333.387 Td [(amn)-250(\227)-250(Global)-250(minimum)-250(absolute)-250(value)]TJ +0 g 0 G + [-777(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(130)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(7.15)-550(psb)]TJ +ET +q +1 0 0 1 154.072 321.492 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 321.293 Td [(nrm2)-250(\227)-250(Global)-250(2-norm)-250(r)18(eduction)]TJ +0 g 0 G + [-710(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(132)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(7.16)-550(psb)]TJ +ET +q +1 0 0 1 154.072 309.398 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 309.199 Td [(snd)-250(\227)-250(Send)-250(data)]TJ +0 g 0 G + [-511(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(134)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(7.17)-550(psb)]TJ +ET +q +1 0 0 1 154.072 297.304 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 297.105 Td [(r)18(cv)-250(\227)-250(Receive)-250(data)]TJ +0 g 0 G + [-284(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(135)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG +/F59 9.9626 Tf -57.166 -22.15 Td [(8)-1000(Error)-250(handling)]TJ +0 g 0 G + [-24750(136)]TJ +0 0 1 rg 0 0 1 RG +/F62 9.9626 Tf 14.944 -12.094 Td [(8.1)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 263.06 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 262.861 Td [(errpush)-250(\227)-250(Pushes)-250(an)-250(err)18(or)-250(code)-250(onto)-250(the)-250(err)18(or)-250(stack)]TJ +0 g 0 G + [-734(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(138)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(8.2)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 250.966 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 250.767 Td [(err)18(or)-250(\227)-250(Prints)-250(the)-250(err)18(or)-250(stack)-250(content)-250(and)-250(aborts)-250(execution)]TJ +0 g 0 G +0 g 0 G + [-1381(139)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(8.3)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 238.872 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 238.673 Td [(set)]TJ +ET +q +1 0 0 1 169.902 238.872 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 172.891 238.673 Td [(errverbosity)-250(\227)-250(Sets)-250(the)-250(verbosity)-250(of)-250(err)18(or)-250(messages)]TJ +0 g 0 G + [-253(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(140)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -58.052 -12.095 Td [(8.4)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 226.778 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 226.578 Td [(set)]TJ +ET +q +1 0 0 1 169.902 226.778 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 172.891 226.578 Td [(erraction)-232(\227)-231(Set)-232(the)-231(type)-232(of)-231(action)-232(to)-232(b)1(e)-232(taken)-232(upon)-231(err)18(or)]TJ -35.138 -11.955 Td [(condition)]TJ +0 g 0 G + [-481(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(141)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG +/F59 9.9626 Tf -37.858 -22.149 Td [(9)-1000(Utilities)]TJ +0 g 0 G + [-27780(142)]TJ +0 0 1 rg 0 0 1 RG +/F62 9.9626 Tf 14.944 -12.095 Td [(9.1)-1450(hb)]TJ +ET +q +1 0 0 1 153.644 180.579 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 156.633 180.38 Td [(r)18(ead)-400(\227)-400(Read)-400(a)-400(sparse)-400(matrix)-400(fr)18(om)-400(a)-400(\002le)-400(in)-400(the)-400(Harwell\226)]TJ -18.88 -11.956 Td [(Boeing)-250(format)]TJ +0 g 0 G + [-652(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(143)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -22.914 -12.094 Td [(9.2)-1050(hb)]TJ +ET +q +1 0 0 1 149.659 156.529 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 152.647 156.33 Td [(write)-226(\227)-226(W)74(rite)-226(a)-226(sparse)-226(matrix)-226(to)-226(a)-225(\002le)-226(in)-226(the)-226(Harwell\226Boeing)]TJ -14.894 -11.955 Td [(format)]TJ +0 g 0 G + [-967(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(144)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -22.914 -12.094 Td [(9.3)-1050(mm)]TJ +ET +q +1 0 0 1 155.945 132.48 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 158.934 132.281 Td [(mat)]TJ +ET +q +1 0 0 1 176.558 132.48 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 179.546 132.281 Td [(r)18(ead)-265(\227)-265(Read)-265(a)-265(sparse)-265(matrix)-265(fr)18(om)-265(a)-265(\002le)-265(in)-265(the)-265(Matrix-)]TJ -41.793 -11.955 Td [(Market)-250(format)]TJ +0 g 0 G + [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(145)]TJ +0 g 0 G +0 g 0 G + 129.649 -29.888 Td [(iii)]TJ +0 g 0 G +ET + +endstream +endobj +862 0 obj << -/D [636 0 R /XYZ 98.895 753.953 null] +/Length 13187 >> -% 639 0 obj +stream +0 g 0 G +0 g 0 G +0 0 1 rg 0 0 1 RG +BT +/F62 9.9626 Tf 165.649 706.129 Td [(9.4)-1050(mm)]TJ +ET +q +1 0 0 1 206.755 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 209.743 706.129 Td [(array)]TJ +ET +q +1 0 0 1 233.713 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 236.702 706.129 Td [(r)18(ead)-281(\227)-281(Read)-281(a)-281(dense)-281(array)-281(fr)18(om)-281(a)-281(\002le)-281(in)-281(the)-281(Matrix-)]TJ -48.139 -11.955 Td [(Market)-250(format)]TJ +0 g 0 G + [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(146)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -22.914 -11.955 Td [(9.5)-1050(mm)]TJ +ET +q +1 0 0 1 206.755 682.418 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 209.743 682.219 Td [(mat)]TJ +ET +q +1 0 0 1 227.367 682.418 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 230.356 682.219 Td [(write)-333(\227)-333(W)74(rite)-334(a)-333(sparse)-333(matrix)-333(to)-333(a)-334(\002le)-333(in)-333(the)-333(Matrix-)]TJ -41.793 -11.956 Td [(Market)-250(format)]TJ +0 g 0 G + [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(147)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -22.914 -11.955 Td [(9.6)-1050(mm)]TJ +ET +q +1 0 0 1 206.755 658.507 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 209.743 658.308 Td [(array)]TJ +ET +q +1 0 0 1 233.713 658.507 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 236.702 658.308 Td [(write)-234(\227)-234(W)74(rite)-234(a)-234(dense)-234(array)-234(fr)18(om)-234(a)-234(\002le)-234(in)-234(the)-234(Matrix-)]TJ -48.139 -11.955 Td [(Market)-250(format)]TJ +0 g 0 G + [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(148)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG +/F59 9.9626 Tf -37.858 -21.918 Td [(10)-500(Preconditioner)-250(routines)]TJ +0 g 0 G + [-20696(150)]TJ +0 0 1 rg 0 0 1 RG +/F62 9.9626 Tf 14.944 -11.955 Td [(10.1)-550(init)-250(\227)-250(Initialize)-250(a)-250(pr)18(econditioner)]TJ +0 g 0 G + [-772(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(151)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.955 Td [(10.2)-550(build)-250(\227)-250(Builds)-250(a)-250(pr)18(econditioner)]TJ +0 g 0 G + [-970(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(152)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.955 Td [(10.3)-550(apply)-250(\227)-250(Pr)18(econditioner)-250(application)-250(r)18(outine)]TJ +0 g 0 G + [-421(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(154)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.955 Td [(10.4)-550(descr)-250(\227)-250(Prints)-250(a)-250(description)-250(of)-250(curr)18(ent)-250(pr)18(econditioner)]TJ +0 g 0 G + [-350(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(155)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.956 Td [(10.5)-550(clone)-250(\227)-250(clone)-250(curr)18(ent)-250(pr)18(econditioner)]TJ +0 g 0 G + [-260(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(156)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.955 Td [(10.6)-550(fr)18(ee)-250(\227)-250(Fr)18(ee)-250(a)-250(pr)18(econditioner)]TJ +0 g 0 G + [-341(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(157)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG +/F59 9.9626 Tf -14.944 -21.918 Td [(11)-500(Iterative)-250(Methods)]TJ +0 g 0 G + [-23362(158)]TJ +0 0 1 rg 0 0 1 RG +/F62 9.9626 Tf 14.944 -11.955 Td [(11.1)-550(psb)]TJ +ET +q +1 0 0 1 204.881 519.031 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 207.87 518.831 Td [(krylov)-250(\227)-250(Krylov)-250(Methods)-250(Driver)-250(Routine)]TJ +0 g 0 G + [-716(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(159)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG +/F59 9.9626 Tf -57.165 -21.918 Td [(12)-500(Extensions)]TJ +0 g 0 G + [-26557(162)]TJ +0 0 1 rg 0 0 1 RG +/F62 9.9626 Tf 14.944 -11.955 Td [(12.1)-550(Using)-250(the)-250(extensions)]TJ +0 g 0 G + [-783(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(162)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.955 Td [(12.2)-550(Extensions')-250(Data)-250(Str)8(uctur)18(es)]TJ +0 g 0 G + [-797(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(163)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.955 Td [(12.3)-550(CPU-class)-250(extensions)]TJ +0 g 0 G + [-544(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(163)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.955 Td [(12.4)-550(CUDA-class)-250(extensions)]TJ +0 g 0 G + [-346(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(170)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG +/F59 9.9626 Tf -14.944 -21.918 Td [(13)-500(CUDA)-250(Environment)-250(Routines)]TJ +0 g 0 G + [-17779(171)]TJ +0 0 1 rg 0 0 1 RG +/F62 9.9626 Tf 14.944 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 415.419 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 415.22 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 415.419 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 415.22 Td [(init)]TJ +0 g 0 G + [-304(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(171)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 403.464 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 403.265 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 403.464 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 403.265 Td [(exit)]TJ +0 g 0 G + [-932(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(171)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.956 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 391.509 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 391.309 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 391.509 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 391.309 Td [(DeviceSync)]TJ +0 g 0 G + [-405(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(172)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 379.554 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 379.354 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 379.554 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 379.354 Td [(getDeviceCount)]TJ +0 g 0 G + [-635(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(172)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 367.598 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 367.399 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 367.598 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 367.399 Td [(getDevice)]TJ +0 g 0 G + [-401(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(172)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 355.643 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 355.444 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 355.643 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 355.444 Td [(setDevice)]TJ +0 g 0 G + [-533(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(172)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 343.688 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 343.489 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 343.688 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 343.489 Td [(DeviceHasUV)111(A)]TJ +0 g 0 G + [-839(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(172)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 331.733 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 331.534 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 331.733 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 331.534 Td [(W)92(arpSize)]TJ +0 g 0 G + [-595(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(172)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.956 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 319.778 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 319.578 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 319.778 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 319.578 Td [(MultiPr)18(ocessors)]TJ +0 g 0 G + [-674(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(172)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 307.823 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 307.623 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 307.823 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 307.623 Td [(MaxThr)18(eadsPerMP)]TJ +0 g 0 G + [-718(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(172)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 295.867 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 295.668 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 295.867 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 295.668 Td [(MaxRegisterPerBlock)]TJ +0 g 0 G + [-538(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(173)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 283.912 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 283.713 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 283.912 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 283.713 Td [(MemoryClockRate)]TJ +0 g 0 G + [-970(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(173)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 271.957 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 271.758 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 271.957 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 271.758 Td [(MemoryBusW)55(idth)]TJ +0 g 0 G + [-346(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(173)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 260.002 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 259.803 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 260.002 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 259.803 Td [(MemoryPeakBandwidth)]TJ +0 g 0 G + [-652(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(173)]TJ +0 g 0 G +0 g 0 G + 108.254 -169.365 Td [(iv)]TJ +0 g 0 G +ET + +endstream +endobj +777 0 obj << -/D [636 0 R /XYZ 99.895 723.975 null] +/Type /ObjStm +/N 100 +/First 926 +/Length 16374 >> -% 635 0 obj +stream +738 0 739 152 740 304 741 456 742 608 743 760 744 912 745 1064 746 1216 747 1367 +748 1520 749 1673 750 1826 751 1979 752 2132 753 2285 754 2438 755 2585 756 2735 757 2887 +758 3039 759 3191 760 3338 761 3490 762 3642 763 3794 764 3946 765 4098 766 4250 767 4402 +768 4554 769 4706 770 4859 771 5010 776 5161 773 5218 821 5298 772 5800 778 5953 779 6105 +780 6258 781 6409 782 6562 783 6715 784 6868 785 7020 786 7172 787 7325 788 7478 789 7631 +790 7784 791 7936 792 8089 793 8235 794 8387 824 8539 795 8690 796 8842 797 8994 798 9146 +799 9298 800 9448 801 9600 802 9752 803 9905 804 10058 805 10209 806 10361 807 10514 808 10667 +809 10820 810 10973 811 11119 812 11271 813 11423 814 11575 825 11727 815 11878 816 12024 826 12176 +817 12327 827 12479 818 12630 828 12782 823 12931 820 12987 861 13067 819 13473 864 13625 829 13777 +865 13929 830 14081 866 14233 831 14385 832 14530 833 14683 834 14836 835 14989 836 15142 837 15295 +% 738 0 obj << -/Font << /F51 584 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 535.241 380.451 547.301] +/A << /S /GoTo /D (subsection.4.1) >> >> -% 694 0 obj +% 739 0 obj << -/Type /Page -/Contents 695 0 R -/Resources 693 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 587 0 R -/Annots [ 634 0 R 646 0 R 647 0 R 648 0 R 649 0 R 650 0 R 651 0 R 652 0 R 653 0 R 654 0 R 655 0 R 656 0 R 657 0 R 658 0 R 659 0 R 660 0 R 661 0 R 662 0 R 663 0 R 664 0 R 665 0 R 666 0 R 667 0 R 668 0 R 669 0 R 670 0 R 671 0 R 672 0 R 673 0 R 674 0 R 675 0 R 676 0 R 677 0 R 678 0 R 679 0 R 680 0 R 681 0 R 682 0 R 683 0 R 684 0 R 685 0 R 686 0 R 687 0 R 688 0 R 689 0 R 690 0 R 691 0 R ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 523.162 302.763 535.221] +/A << /S /GoTo /D (subsection.4.2) >> >> -% 634 0 obj +% 740 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 702.323 437.338 714.383] -/A << /S /GoTo /D (subsubsection.3.2.17) >> +/Rect [164.653 511.082 362.977 523.142] +/A << /S /GoTo /D (subsection.4.3) >> >> -% 646 0 obj +% 741 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 690.243 348.332 702.303] -/A << /S /GoTo /D (subsubsection.3.2.18) >> +/Rect [164.653 499.002 354.758 511.062] +/A << /S /GoTo /D (subsection.4.4) >> >> -% 647 0 obj +% 742 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 680.814 300.871 690.223] -/A << /S /GoTo /D (subsubsection.3.2.19) >> +/Rect [164.653 486.923 379.844 498.982] +/A << /S /GoTo /D (subsection.4.5) >> >> -% 648 0 obj +% 743 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 668.734 313.682 677.95] -/A << /S /GoTo /D (subsection.3.3) >> +/Rect [164.653 474.843 329.154 486.903] +/A << /S /GoTo /D (subsection.4.6) >> >> -% 649 0 obj +% 744 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 656.654 290.829 666.064] -/A << /S /GoTo /D (subsubsection.3.3.1) >> +/Rect [164.653 462.763 394.738 474.823] +/A << /S /GoTo /D (subsection.4.7) >> >> -% 650 0 obj +% 745 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 641.925 446.194 653.984] -/A << /S /GoTo /D (subsubsection.3.3.2) >> +/Rect [164.653 450.684 329.154 462.743] +/A << /S /GoTo /D (subsection.4.8) >> >> -% 651 0 obj +% 746 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 629.845 479.97 641.905] -/A << /S /GoTo /D (subsubsection.3.3.3) >> +/Rect [164.653 438.604 394.24 450.663] +/A << /S /GoTo /D (subsection.4.9) >> >> -% 652 0 obj +% 747 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 620.415 358.813 629.825] -/A << /S /GoTo /D (subsubsection.3.3.4) >> +/Rect [164.653 426.524 362.499 438.584] +/A << /S /GoTo /D (subsection.4.10) >> >> -% 653 0 obj +% 748 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 605.686 415.509 617.745] -/A << /S /GoTo /D (subsubsection.3.3.5) >> +/Rect [164.653 414.444 387.276 426.504] +/A << /S /GoTo /D (subsection.4.11) >> >> -% 654 0 obj +% 749 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 593.606 348.332 605.666] -/A << /S /GoTo /D (subsubsection.3.3.6) >> +/Rect [164.653 402.365 425.761 414.424] +/A << /S /GoTo /D (subsection.4.12) >> >> -% 655 0 obj +% 750 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 584.176 318.663 593.586] -/A << /S /GoTo /D (subsection.3.4) >> +/Rect [164.653 390.285 353.991 402.345] +/A << /S /GoTo /D (subsection.4.13) >> >> -% 656 0 obj +% 751 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 569.446 277.409 581.506] -/A << /S /GoTo /D (subsection.3.5) >> +/Rect [164.653 378.205 331.346 390.265] +/A << /S /GoTo /D (subsection.4.14) >> >> -% 657 0 obj +% 752 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [149.709 547.56 275.386 559.281] -/A << /S /GoTo /D (section.4) >> +/Rect [164.653 366.126 333.538 378.185] +/A << /S /GoTo /D (subsection.4.15) >> >> -% 658 0 obj +% 753 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 535.241 380.451 547.301] -/A << /S /GoTo /D (subsection.4.1) >> +/Rect [164.653 354.046 337.602 366.106] +/A << /S /GoTo /D (subsection.4.16) >> >> -% 659 0 obj +% 754 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 523.162 302.763 535.221] -/A << /S /GoTo /D (subsection.4.2) >> +/Rect [149.709 334.551 280.368 343.701] +/A << /S /GoTo /D (section.5) >> >> -% 660 0 obj +% 755 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 511.082 362.977 523.142] -/A << /S /GoTo /D (subsection.4.3) >> +/Rect [164.653 319.841 362.031 331.9] +/A << /S /GoTo /D (subsection.5.1) >> >> -% 661 0 obj +% 756 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 499.002 354.758 511.062] -/A << /S /GoTo /D (subsection.4.4) >> +/Rect [164.653 307.761 313.065 319.821] +/A << /S /GoTo /D (subsection.5.2) >> >> -% 662 0 obj +% 757 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 486.923 379.844 498.982] -/A << /S /GoTo /D (subsection.4.5) >> +/Rect [164.653 295.681 376.127 307.741] +/A << /S /GoTo /D (subsection.5.3) >> >> -% 663 0 obj +% 758 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 474.843 329.154 486.903] -/A << /S /GoTo /D (subsection.4.6) >> +/Rect [164.653 283.602 376.187 295.661] +/A << /S /GoTo /D (subsection.5.4) >> >> -% 664 0 obj +% 759 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 462.763 394.738 474.823] -/A << /S /GoTo /D (subsection.4.7) >> +/Rect [149.709 261.636 289.504 273.257] +/A << /S /GoTo /D (section.6) >> >> -% 665 0 obj +% 760 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 450.684 329.154 462.743] -/A << /S /GoTo /D (subsection.4.8) >> +/Rect [164.653 249.397 412.092 261.456] +/A << /S /GoTo /D (subsection.6.1) >> >> -% 666 0 obj +% 761 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 438.604 394.24 450.663] -/A << /S /GoTo /D (subsection.4.9) >> +/Rect [164.653 237.317 428.052 249.377] +/A << /S /GoTo /D (subsection.6.2) >> >> -% 667 0 obj +% 762 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 426.524 362.499 438.584] -/A << /S /GoTo /D (subsection.4.10) >> +/Rect [164.653 225.237 445.915 237.297] +/A << /S /GoTo /D (subsection.6.3) >> >> -% 668 0 obj +% 763 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 414.444 387.276 426.504] -/A << /S /GoTo /D (subsection.4.11) >> +/Rect [164.653 213.158 407.011 225.217] +/A << /S /GoTo /D (subsection.6.4) >> >> -% 669 0 obj +% 764 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 402.365 425.761 414.424] -/A << /S /GoTo /D (subsection.4.12) >> +/Rect [164.653 201.078 400.356 213.138] +/A << /S /GoTo /D (subsection.6.5) >> >> -% 670 0 obj +% 765 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 390.285 353.991 402.345] -/A << /S /GoTo /D (subsection.4.13) >> +/Rect [164.653 188.998 461.277 201.058] +/A << /S /GoTo /D (subsection.6.6) >> >> -% 671 0 obj +% 766 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 378.205 331.346 390.265] -/A << /S /GoTo /D (subsection.4.14) >> +/Rect [164.653 176.918 355.017 188.978] +/A << /S /GoTo /D (subsection.6.7) >> >> -% 672 0 obj +% 767 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 366.126 333.538 378.185] -/A << /S /GoTo /D (subsection.4.15) >> +/Rect [164.653 164.839 446.841 176.898] +/A << /S /GoTo /D (subsection.6.8) >> >> -% 673 0 obj +% 768 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 354.046 337.602 366.106] -/A << /S /GoTo /D (subsection.4.16) >> +/Rect [164.653 152.759 387.206 164.819] +/A << /S /GoTo /D (subsection.6.9) >> >> -% 674 0 obj +% 769 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [149.709 334.551 280.368 343.701] -/A << /S /GoTo /D (section.5) >> +/Rect [164.653 140.679 343.281 152.739] +/A << /S /GoTo /D (subsection.6.10) >> >> -% 675 0 obj +% 770 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 319.841 362.031 331.9] -/A << /S /GoTo /D (subsection.5.1) >> +/Rect [164.653 128.6 460.789 140.659] +/A << /S /GoTo /D (subsection.6.11) >> >> -% 676 0 obj +% 771 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 307.761 313.065 319.821] -/A << /S /GoTo /D (subsection.5.2) >> +/Rect [164.653 116.52 352.646 128.58] +/A << /S /GoTo /D (subsection.6.12) >> >> -% 677 0 obj +% 776 0 obj +<< +/D [774 0 R /XYZ 149.705 753.953 null] +>> +% 773 0 obj +<< +/Font << /F62 667 0 R /F59 665 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 821 0 obj +<< +/Type /Page +/Contents 822 0 R +/Resources 820 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 668 0 R +/Annots [ 772 0 R 778 0 R 779 0 R 780 0 R 781 0 R 782 0 R 783 0 R 784 0 R 785 0 R 786 0 R 787 0 R 788 0 R 789 0 R 790 0 R 791 0 R 792 0 R 793 0 R 794 0 R 824 0 R 795 0 R 796 0 R 797 0 R 798 0 R 799 0 R 800 0 R 801 0 R 802 0 R 803 0 R 804 0 R 805 0 R 806 0 R 807 0 R 808 0 R 809 0 R 810 0 R 811 0 R 812 0 R 813 0 R 814 0 R 825 0 R 815 0 R 816 0 R 826 0 R 817 0 R 827 0 R 818 0 R 828 0 R ] +>> +% 772 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 295.681 376.127 307.741] -/A << /S /GoTo /D (subsection.5.3) >> +/Rect [113.843 702.323 330.917 714.383] +/A << /S /GoTo /D (subsection.6.13) >> >> -% 678 0 obj +% 778 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 283.602 376.187 295.661] -/A << /S /GoTo /D (subsection.5.4) >> +/Rect [113.843 690.229 308.85 702.289] +/A << /S /GoTo /D (subsection.6.14) >> >> -% 679 0 obj +% 779 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [149.709 261.636 289.504 273.257] -/A << /S /GoTo /D (section.6) >> +/Rect [113.843 678.135 290.101 690.194] +/A << /S /GoTo /D (subsection.6.15) >> >> -% 680 0 obj +% 780 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 249.397 412.092 261.456] -/A << /S /GoTo /D (subsection.6.1) >> +/Rect [113.843 666.041 386.806 678.1] +/A << /S /GoTo /D (subsection.6.16) >> >> -% 681 0 obj +% 781 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 237.317 428.052 249.377] -/A << /S /GoTo /D (subsection.6.2) >> +/Rect [113.843 653.946 368.116 666.006] +/A << /S /GoTo /D (subsection.6.17) >> >> -% 682 0 obj +% 782 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 225.237 445.915 237.297] -/A << /S /GoTo /D (subsection.6.3) >> +/Rect [113.843 641.852 370.219 653.912] +/A << /S /GoTo /D (subsection.6.18) >> >> -% 683 0 obj +% 783 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 213.158 407.011 225.217] -/A << /S /GoTo /D (subsection.6.4) >> +/Rect [113.843 629.758 214.116 641.818] +/A << /S /GoTo /D (subsection.6.19) >> >> -% 684 0 obj +% 784 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 201.078 400.356 213.138] -/A << /S /GoTo /D (subsection.6.5) >> +/Rect [113.843 617.664 231.69 629.724] +/A << /S /GoTo /D (subsection.6.20) >> >> -% 685 0 obj +% 785 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 188.998 461.277 201.058] -/A << /S /GoTo /D (subsection.6.6) >> +/Rect [113.843 605.57 204.353 617.629] +/A << /S /GoTo /D (subsection.6.21) >> >> -% 686 0 obj +% 786 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 176.918 355.017 188.978] -/A << /S /GoTo /D (subsection.6.7) >> +/Rect [113.843 593.476 221.927 605.535] +/A << /S /GoTo /D (subsection.6.22) >> >> -% 687 0 obj +% 787 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 164.839 446.841 176.898] -/A << /S /GoTo /D (subsection.6.8) >> +/Rect [113.843 581.381 379.076 593.441] +/A << /S /GoTo /D (subsection.6.23) >> >> -% 688 0 obj +% 788 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 152.759 387.206 164.819] -/A << /S /GoTo /D (subsection.6.9) >> +/Rect [113.843 569.287 359.768 581.347] +/A << /S /GoTo /D (subsection.6.24) >> >> -% 689 0 obj +% 789 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 140.679 343.281 152.739] -/A << /S /GoTo /D (subsection.6.10) >> +/Rect [113.843 557.193 373.158 569.253] +/A << /S /GoTo /D (subsection.6.25) >> >> -% 690 0 obj +% 790 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 128.6 460.789 140.659] -/A << /S /GoTo /D (subsection.6.11) >> +/Rect [113.843 545.099 287.68 557.158] +/A << /S /GoTo /D (subsection.6.26) >> >> -% 691 0 obj +% 791 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 116.52 352.646 128.58] -/A << /S /GoTo /D (subsection.6.12) >> ->> -% 696 0 obj -<< -/D [694 0 R /XYZ 149.705 753.953 null] ->> -% 693 0 obj -<< -/Font << /F54 586 0 R /F51 584 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 740 0 obj -<< -/Type /Page -/Contents 741 0 R -/Resources 739 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 587 0 R -/Annots [ 692 0 R 697 0 R 698 0 R 699 0 R 700 0 R 701 0 R 702 0 R 703 0 R 704 0 R 705 0 R 706 0 R 707 0 R 708 0 R 709 0 R 710 0 R 711 0 R 712 0 R 713 0 R 743 0 R 714 0 R 715 0 R 716 0 R 717 0 R 718 0 R 719 0 R 720 0 R 721 0 R 722 0 R 723 0 R 724 0 R 725 0 R 726 0 R 727 0 R 728 0 R 729 0 R 730 0 R 731 0 R 732 0 R 733 0 R 744 0 R 734 0 R 735 0 R 745 0 R 736 0 R 746 0 R 737 0 R 747 0 R ] +/Rect [113.843 533.005 221.369 545.064] +/A << /S /GoTo /D (subsection.6.27) >> >> -% 692 0 obj +% 792 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 702.323 330.917 714.383] -/A << /S /GoTo /D (subsection.6.13) >> +/Rect [98.899 513.485 251.974 522.815] +/A << /S /GoTo /D (section.7) >> >> -% 697 0 obj +% 793 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 690.229 308.85 702.289] -/A << /S /GoTo /D (subsection.6.14) >> +/Rect [113.843 498.761 364.739 510.821] +/A << /S /GoTo /D (subsection.7.1) >> >> -% 698 0 obj +% 794 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 678.135 290.101 690.194] -/A << /S /GoTo /D (subsection.6.15) >> ->> - -endstream -endobj -762 0 obj -<< -/Length 4437 +/Rect [113.843 486.667 444.603 498.726] +/A << /S /GoTo /D (subsection.7.2) >> >> -stream -0 g 0 G -0 g 0 G -0 0 1 rg 0 0 1 RG -BT -/F54 9.9626 Tf 165.649 706.129 Td [(9.4)-1050(mm)]TJ -ET -q -1 0 0 1 206.755 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 209.743 706.129 Td [(array)]TJ -ET -q -1 0 0 1 233.713 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 236.702 706.129 Td [(r)18(ead)-281(\227)-281(Read)-281(a)-281(dense)-281(array)-281(fr)18(om)-281(a)-281(\002le)-281(in)-281(the)-281(Matrix-)]TJ -48.139 -11.955 Td [(Market)-250(format)]TJ -0 g 0 G - [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(146)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -22.914 -11.955 Td [(9.5)-1050(mm)]TJ -ET -q -1 0 0 1 206.755 682.418 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 209.743 682.219 Td [(mat)]TJ -ET -q -1 0 0 1 227.367 682.418 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 230.356 682.219 Td [(write)-333(\227)-333(W)74(rite)-334(a)-333(sparse)-333(matrix)-333(to)-333(a)-334(\002le)-333(in)-333(the)-333(Matrix-)]TJ -41.793 -11.956 Td [(Market)-250(format)]TJ -0 g 0 G - [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(147)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -22.914 -11.955 Td [(9.6)-1050(mm)]TJ -ET -q -1 0 0 1 206.755 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 209.743 658.308 Td [(array)]TJ -ET -q -1 0 0 1 233.713 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 236.702 658.308 Td [(write)-234(\227)-234(W)74(rite)-234(a)-234(dense)-234(array)-234(fr)18(om)-234(a)-234(\002le)-234(in)-234(the)-234(Matrix-)]TJ -48.139 -11.955 Td [(Market)-250(format)]TJ -0 g 0 G - [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(148)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -37.858 -21.918 Td [(10)-500(Preconditioner)-250(routines)]TJ -0 g 0 G - [-20696(150)]TJ -0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -11.955 Td [(10.1)-550(init)-250(\227)-250(Initialize)-250(a)-250(pr)18(econditioner)]TJ -0 g 0 G - [-772(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(151)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - 0 -11.955 Td [(10.2)-550(build)-250(\227)-250(Builds)-250(a)-250(pr)18(econditioner)]TJ -0 g 0 G - [-970(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(152)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - 0 -11.955 Td [(10.3)-550(apply)-250(\227)-250(Pr)18(econditioner)-250(application)-250(r)18(outine)]TJ -0 g 0 G - [-421(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(154)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - 0 -11.955 Td [(10.4)-550(descr)-250(\227)-250(Prints)-250(a)-250(description)-250(of)-250(curr)18(ent)-250(pr)18(econditioner)]TJ -0 g 0 G - [-350(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(155)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - 0 -11.956 Td [(10.5)-550(clone)-250(\227)-250(clone)-250(curr)18(ent)-250(pr)18(econditioner)]TJ -0 g 0 G - [-260(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(156)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - 0 -11.955 Td [(10.6)-550(fr)18(ee)-250(\227)-250(Fr)18(ee)-250(a)-250(pr)18(econditioner)]TJ -0 g 0 G - [-341(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(157)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -14.944 -21.918 Td [(11)-500(Iterative)-250(Methods)]TJ -0 g 0 G - [-23362(158)]TJ -0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -11.955 Td [(11.1)-550(psb)]TJ -ET -q -1 0 0 1 204.881 519.031 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 207.87 518.831 Td [(krylov)-250(\227)-250(Krylov)-250(Methods)-250(Driver)-250(Routine)]TJ -0 g 0 G - [-716(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(159)]TJ -0 g 0 G -0 g 0 G - 110.426 -428.393 Td [(iv)]TJ -0 g 0 G -ET - -endstream -endobj -779 0 obj +% 824 0 obj << -/Length 8044 +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [98.899 477.362 161.365 486.467] +/A << /S /GoTo /D (subsection.7.2) >> >> -stream -0 g 0 G -0 g 0 G -BT -/F51 14.3462 Tf 99.895 705.784 Td [(1)-1000(Introduction)]TJ/F54 9.9626 Tf 0 -22.913 Td [(The)-272(PSBLAS)-271(library)111(,)-277(developed)-272(with)-272(t)1(he)-272(aim)-272(to)-271(facilitate)-272(the)-272(parallelization)-271(of)]TJ 0 -11.955 Td [(computationally)-348(intensive)-347(scienti\002c)-348(applications,)-372(is)-347(designed)-348(to)-348(addr)18(ess)-347(par)18(-)]TJ 0 -11.955 Td [(allel)-282(implementation)-283(of)-282(iterative)-282(solvers)-283(for)-282(sparse)-282(linear)-283(systems)-282(thr)18(ough)-282(the)]TJ 0 -11.955 Td [(distributed)-232(memory)-232(paradigm.)-304(It)-232(includes)-233(r)18(outines)-232(for)-232(multiplying)-232(sparse)-232(ma-)]TJ 0 -11.955 Td [(trices)-211(by)-211(dense)-211(matrices,)-219(solving)-211(block)-211(diagonal)-211(systems)-211(with)-211(triangular)-211(diago-)]TJ 0 -11.956 Td [(nal)-229(entries,)-233(pr)18(epr)18(ocessing)-228(sparse)-229(matrices,)-233(and)-228(contains)-229(additional)-229(r)18(outines)-228(for)]TJ 0 -11.955 Td [(dense)-292(matrix)-292(operations.)-436(The)-292(curr)18(ent)-292(implementation)-292(of)-292(PSBLAS)-292(addr)18(esses)-292(a)]TJ 0 -11.955 Td [(distributed)-250(memory)-250(execution)-250(model)-250(operating)-250(with)-250(message)-250(passing.)]TJ 14.944 -12.064 Td [(The)-267(PSBLAS)-267(library)-268(version)-267(3)-267(is)-267(implemented)-267(in)-267(the)-268(Fortran)-267(2003)-267([)]TJ -1 0 0 rg 1 0 0 RG - [(17)]TJ -0 g 0 G - [(])-267(pr)18(o-)]TJ -14.944 -11.955 Td [(gramming)-278(language,)-284(with)-277(r)18(euse)-278(and/or)-278(adaptation)-277(of)-278(existing)-277(Fortran)-278(77)-277(and)]TJ 0 -11.955 Td [(Fortran)-250(95)-250(softwar)18(e,)-250(plus)-250(a)-250(handful)-250(of)-250(C)-250(r)18(outines.)]TJ 14.944 -12.064 Td [(The)-391(use)-392(of)-391(Fortran)-392(2003)-391(of)18(fers)-392(a)-391(number)-391(of)-392(advantages)-391(over)-392(Fortran)-391(95,)]TJ -14.944 -11.955 Td [(mostly)-385(in)-385(the)-385(handling)-385(of)-385(r)18(equir)18(ements)-385(for)-385(evolution)-385(and)-385(adaptation)-385(of)-385(the)]TJ 0 -11.956 Td [(library)-431(to)-432(new)-431(computing)-432(ar)18(chitectur)18(es)-431(and)-432(integration)-431(of)-432(new)-431(algorithms.)]TJ 0 -11.955 Td [(For)-365(a)-365(detail)1(ed)-365(discussion)-365(of)-365(our)-364(design)-365(see)-365([)]TJ -1 0 0 rg 1 0 0 RG - [(11)]TJ -0 g 0 G - [(];)-422(other)-365(works)-364(discussing)-365(ad-)]TJ 0 -11.955 Td [(vanced)-213(pr)18(ogramming)-214(in)-213(Fortran)-213(2003)-213(include)-214([)]TJ -1 0 0 rg 1 0 0 RG - [(1)]TJ -0 g 0 G - [(,)]TJ -1 0 0 rg 1 0 0 RG - [-213(18)]TJ -0 g 0 G - [(];)-225(suf)18(\002cient)-214(support)-213(for)-213(For)18(-)]TJ 0 -11.955 Td [(tran)-315(2003)-314(is)-315(now)-314(available)-315(fr)18(om)-314(many)-315(compilers,)-331(including)-314(the)-315(GNU)-314(Fortran)]TJ 0 -11.955 Td [(compiler)-250(fr)18(om)-250(the)-250(Fr)18(ee)-250(Softwar)18(e)-250(Foundation)-250(\050as)-250(of)-250(version)-250(4.8\051.)]TJ 14.944 -12.064 Td [(Pr)18(evious)-311(appr)18(oaches)-312(have)-311(been)-311(based)-311(on)-312(mixing)-311(Fortran)-311(95,)-327(with)-311(its)-311(sup-)]TJ -14.944 -11.955 Td [(port)-249(for)-249(object-based)-249(design,)-249(with)-249(other)-249(languages;)-249(these)-249(have)-249(been)-249(advocated)]TJ 0 -11.956 Td [(by)-346(a)-346(number)-346(of)-347(authors,)-370(e.g.)-346([)]TJ -1 0 0 rg 1 0 0 RG - [(16)]TJ -0 g 0 G - [(].)-598(Mor)18(eover)74(,)-371(the)-346(Fortran)-346(95)-346(facilities)-346(for)-346(dy-)]TJ 0 -11.955 Td [(namic)-411(memory)-410(management)-411(and)-410(interface)-411(overloading)-410(gr)18(eatly)-411(enhance)-410(the)]TJ 0 -11.955 Td [(usability)-397(of)-398(the)-397(PSBLAS)-398(subr)18(outines.)-752(In)-398(this)-397(way)111(,)-434(the)-398(library)-397(can)-398(take)-397(car)18(e)]TJ 0 -11.955 Td [(of)-267(r)8(untime)-266(memory)-267(r)18(equir)18(ements)-266(that)-267(ar)18(e)-266(quite)-267(dif)18(\002cult)-267(or)-266(even)-267(impossible)-266(to)]TJ 0 -11.955 Td [(pr)18(edict)-250(at)-250(implementation)-250(or)-250(compilation)-250(time.)]TJ 14.944 -12.064 Td [(The)-249(pr)18(esentation)-250(of)-249(the)-250(PSBLAS)-249(library)-249(follows)-250(the)-249(general)-249(str)8(uctur)18(e)-250(of)-249(the)]TJ -14.944 -11.955 Td [(pr)18(oposal)-207(for)-206(serial)-207(Sparse)-207(BLAS)-207([)]TJ -1 0 0 rg 1 0 0 RG - [(8)]TJ -0 g 0 G - [(,)]TJ -1 0 0 rg 1 0 0 RG - [-206(9)]TJ -0 g 0 G - [(],)-216(which)-206(in)-207(its)-207(turn)-206(is)-207(based)-207(on)-207(t)1(he)-207(pr)18(oposal)]TJ 0 -11.956 Td [(for)-250(BLAS)-250(on)-250(dense)-250(matrices)-250([)]TJ -1 0 0 rg 1 0 0 RG - [(15)]TJ -0 g 0 G - [(,)]TJ -1 0 0 rg 1 0 0 RG - [-250(5)]TJ -0 g 0 G - [(,)]TJ -1 0 0 rg 1 0 0 RG - [-250(6)]TJ -0 g 0 G - [(].)]TJ 14.944 -12.063 Td [(The)-297(applicability)-297(of)-298(sparse)-297(iterative)-297(solvers)-297(to)-297(many)-298(dif)18(fer)18(ent)-297(ar)18(eas)-297(causes)]TJ -14.944 -11.956 Td [(some)-190(terminology)-190(pr)18(oblems)-190(because)-190(the)-190(same)-190(concept)-190(may)-190(be)-190(denoted)-190(thr)18(ough)]TJ 0 -11.955 Td [(dif)18(fer)18(ent)-271(names)-271(depending)-272(on)-271(the)-271(application)-271(ar)18(ea.)-374(The)-271(PSBLAS)-271(featur)18(es)-271(pr)18(e-)]TJ 0 -11.955 Td [(sented)-332(in)-333(this)-332(document)-332(will)-333(be)-332(discussed)-332(r)18(eferring)-333(to)-332(a)-333(\002ni)1(te)-333(dif)18(fer)18(ence)-332(dis-)]TJ 0 -11.955 Td [(cr)18(etization)-284(of)-285(a)-284(Partial)-285(Dif)18(fer)18(ential)-284(Equation)-284(\050PDE\051.)-285(However)74(,)-293(the)-284(scope)-285(of)-284(the)]TJ 0 -11.955 Td [(library)-283(is)-283(wider)-283(than)-284(that:)-376(for)-283(example,)-291(it)-283(can)-284(be)-283(applied)-283(to)-283(\002nite)-283(element)-283(dis-)]TJ 0 -11.956 Td [(cr)18(etizations)-267(of)-267(PDEs,)-271(and)-267(even)-266(to)-267(dif)18(fer)18(ent)-267(classes)-267(of)-267(pr)18(oblems)-267(such)-266(as)-267(nonlin-)]TJ 0 -11.955 Td [(ear)-250(optimization,)-250(for)-250(example)-250(in)-250(optimal)-250(contr)18(ol)-250(pr)18(oblems.)]TJ 14.944 -12.064 Td [(The)-383(design)-383(of)-383(a)-383(solver)-383(for)-384(sparse)-383(linear)-383(systems)-383(is)-383(driven)-383(by)-383(many)-383(con-)]TJ -14.944 -11.955 Td [(\003icting)-271(objectives,)-277(such)-272(as)-271(limiting)-271(occupation)-272(of)-271(storage)-271(r)18(esour)18(ces,)-277(exploiting)]TJ 0 -11.955 Td [(r)18(egularities)-274(in)-274(the)-275(input)-274(data,)-280(exploiting)-274(har)18(dwar)18(e)-275(characteristi)1(cs)-275(of)-274(the)-274(paral-)]TJ 0 -11.955 Td [(lel)-350(platform.)-610(T)92(o)-350(achieve)-350(an)-350(optimal)-350(communication)-350(to)-350(computation)-350(ratio)-350(on)]TJ 0 -11.955 Td [(distributed)-379(memory)-378(machines)-379(it)-378(is)-379(essential)-379(to)-378(keep)-379(the)]TJ/F52 9.9626 Tf 256.501 0 Td [(data)-379(locality)]TJ/F54 9.9626 Tf 54.198 0 Td [(as)-379(high)]TJ -310.699 -11.956 Td [(as)-315(possible;)-346(this)-315(can)-314(be)-315(done)-315(thr)18(ough)-314(an)-315(appr)18(opriate)-314(data)-315(allocation)-314(strategy)111(.)]TJ 0 -11.955 Td [(The)-323(choice)-323(of)-324(the)-323(pr)18(econditioner)-323(is)-323(another)-323(very)-324(important)-323(factor)-323(that)-323(af)18(fects)]TJ 0 -11.955 Td [(ef)18(\002ciency)-300(of)-300(the)-300(im)1(plemented)-300(application.)-460(Optimal)-300(data)-299(distribution)-300(r)18(equir)18(e-)]TJ 0 -11.955 Td [(ments)-300(for)-299(a)-300(given)-300(pr)18(econditioner)-299(may)-300(con\003ict)-300(with)-300(distribution)-299(r)18(equir)18(ements)]TJ 0 -11.955 Td [(of)-356(the)-356(r)18(est)-356(of)-357(the)-356(solver)74(.)-628(Finding)-356(the)-357(o)1(ptimal)-357(trade-of)18(f)-356(may)-356(be)-356(very)-356(dif)18(\002cult)]TJ 0 -11.955 Td [(because)-292(it)-291(is)-292(application)-291(dependent.)-435(Possible)-292(solutions)-291(to)-292(these)-292(pr)18(oblems)-291(and)]TJ 0 -11.956 Td [(other)-342(important)-342(inputs)-342(to)-342(the)-342(development)-342(of)-341(the)-342(PSBLAS)-342(softwar)18(e)-342(package)]TJ -0 g 0 G - 169.365 -29.887 Td [(1)]TJ -0 g 0 G -ET - -endstream -endobj -798 0 obj +% 795 0 obj << -/Length 5269 +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 462.617 364.172 474.677] +/A << /S /GoTo /D (subsection.7.3) >> >> -stream -0 g 0 G -0 g 0 G -BT -/F54 9.9626 Tf 150.705 706.129 Td [(have)-292(come)-291(fr)18(om)-292(an)-292(established)-291(experience)-292(in)-292(applying)-291(the)-292(PSBLAS)-291(solvers)-292(to)]TJ 0 -11.955 Td [(computational)-250(\003uid)-250(dynamics)-250(applications.)]TJ/F51 14.3462 Tf 0 -33.474 Td [(2)-1000(General)-250(overview)]TJ/F54 9.9626 Tf 0 -22.702 Td [(The)-190(PSBLAS)-190(library)-190(is)-190(designed)-190(to)-190(handle)-190(the)-190(implementation)-190(of)-190(iterative)-190(solvers)]TJ 0 -11.955 Td [(for)-275(sparse)-275(linear)-275(systems)-275(on)-275(distributed)-275(memory)-274(parallel)-275(computers.)-385(The)-275(sys-)]TJ 0 -11.955 Td [(tem)-307(coef)18(\002cient)-308(matrix)]TJ/F52 9.9626 Tf 100.571 0 Td [(A)]TJ/F54 9.9626 Tf 10.381 0 Td [(must)-307(be)-308(squar)18(e;)-336(it)-308(may)-307(be)-308(r)18(eal)-307(or)-307(complex,)-322(nonsym-)]TJ -110.952 -11.955 Td [(metric,)-301(and)-291(its)-291(sparsity)-291(pattern)-291(needs)-291(not)-291(to)-291(be)-291(symmetric.)-433(The)-291(serial)-291(compu-)]TJ 0 -11.955 Td [(tation)-240(parts)-239(ar)18(e)-240(based)-240(on)-239(the)-240(serial)-240(sparse)-239(BLAS,)-240(so)-240(that)-239(any)-240(extension)-239(made)-240(to)]TJ 0 -11.956 Td [(the)-258(data)-258(str)8(uctur)18(es)-259(of)-258(the)-258(serial)-258(kernels)-258(is)-259(available)-258(to)-258(the)-258(parallel)-258(version.)-335(The)]TJ 0 -11.955 Td [(overall)-294(design)-294(and)-294(parallelization)-294(strategy)-294(have)-294(been)-294(in\003uenced)-294(by)-294(the)-294(str)8(uc-)]TJ 0 -11.955 Td [(tur)18(e)-306(of)-307(the)-306(ScaLAP)92(ACK)-306(parallel)-307(library)111(.)-479(The)-306(layer)18(ed)-306(str)8(uctur)18(e)-306(of)-307(the)-306(PSBLAS)]TJ 0 -11.955 Td [(library)-349(is)-349(shown)-348(in)-349(\002gur)18(e)]TJ -0 0 1 rg 0 0 1 RG - [-349(1)]TJ -0 g 0 G - [(;)-398(lower)-349(layers)-349(of)-349(the)-349(library)-349(in)1(dicate)-349(an)-349(encapsu-)]TJ 0 -11.955 Td [(lation)-314(r)18(elationship)-314(with)-313(upper)-314(layers.)-502(The)-314(ongoing)-314(discussion)-313(focuses)-314(on)-314(the)]TJ 0 -11.955 Td [(Fortran)-244(2003)-244(layer)-245(immediately)-244(below)-244(the)-244(application)-244(layer)74(.)-308(The)-245(serial)-244(parts)-244(of)]TJ 0 -11.956 Td [(the)-230(computation)-230(on)-230(each)-230(pr)18(ocess)-230(ar)18(e)-230(executed)-230(thr)18(ough)-230(calls)-230(to)-230(the)-230(serial)-230(sparse)]TJ 0 -11.955 Td [(BLAS)-307(subr)18(outines.)-482(In)-307(a)-307(similar)-308(way)111(,)-321(the)-307(inter)18(-pr)18(ocess)-308(message)-307(exchanges)-307(ar)18(e)]TJ 0 -11.955 Td [(encapsulated)-244(in)-243(an)-244(applicaiton)-244(layer)-243(that)-244(has)-244(been)-243(str)18(ongly)-244(inspir)18(ed)-244(by)-243(the)-244(Ba-)]TJ 0 -11.955 Td [(sic)-314(Linear)-313(Algebra)-314(Communication)-313(Subr)18(outines)-314(\050BLACS\051)-314(library)-313([)]TJ -1 0 0 rg 1 0 0 RG - [(7)]TJ -0 g 0 G - [(].)-501(Usually)]TJ 0 -11.955 Td [(ther)18(e)-315(is)-314(no)-315(need)-315(to)-314(deal)-315(dir)18(ectly)-314(with)-315(MPI;)-315(however)74(,)-330(in)-315(some)-315(cases,)-331(MPI)-314(r)18(ou-)]TJ 0 -11.955 Td [(tines)-219(ar)18(e)-219(used)-218(dir)18(ectly)-219(to)-219(impr)18(ove)-219(ef)18(\002ciency)111(.)-299(For)-219(further)-219(details)-219(on)-218(our)-219(commu-)]TJ 0 -11.956 Td [(nication)-250(layer)-250(see)-250(Sec.)]TJ -0 0 1 rg 0 0 1 RG - [-250(7)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -ET -1 0 0 1 258.536 281.98 cm -q -.65 0 0 .65 0 0 cm -q -1 0 0 1 0 0 cm -/Im2 Do -Q -Q -0 g 0 G -1 0 0 1 -258.536 -281.98 cm -BT -/F54 9.9626 Tf 216.385 250.1 Td [(Figur)18(e)-250(1:)-310(PSBLAS)-250(library)-250(components)-250(hierar)18(chy)111(.)]TJ -0 g 0 G -0 g 0 G - -50.736 -22.178 Td [(The)-370(type)-369(of)-370(linear)-369(system)-370(matrices)-370(that)-369(we)-370(addr)18(ess)-369(typically)-370(arise)-370(in)-369(the)]TJ -14.944 -11.955 Td [(numerical)-260(solution)-261(of)-260(PDEs;)-266(in)-260(such)-260(a)-261(context,)-263(it)-260(is)-261(necessary)-260(to)-260(pay)-261(special)-260(at-)]TJ 0 -11.955 Td [(tention)-297(to)-298(the)-297(str)8(uctur)18(e)-298(of)-297(the)-298(pr)18(oblem)-297(fr)18(om)-298(which)-297(the)-298(application)-297(originates.)]TJ 0 -11.955 Td [(The)-277(nonzer)18(o)-276(pattern)-277(of)-277(a)-276(matrix)-277(arising)-277(fr)18(om)-276(the)-277(discr)18(etization)-276(of)-277(a)-277(PDE)-276(is)-277(in-)]TJ 0 -11.956 Td [(\003uenced)-232(by)-232(various)-231(factors,)-236(such)-232(as)-232(the)-231(shape)-232(of)-232(the)-232(domain,)-235(the)-232(discr)18(etization)]TJ 0 -11.955 Td [(strategy)111(,)-313(and)-301(the)-300(equation/unknown)-301(or)18(dering.)-461(The)-301(matrix)-301(it)1(self)-301(can)-301(be)-300(inter)18(-)]TJ 0 -11.955 Td [(pr)18(eted)-291(as)-291(the)-291(adjacency)-291(matrix)-292(of)-291(the)-291(graph)-291(associated)-291(with)-291(the)-291(discr)18(etization)]TJ 0 -11.955 Td [(mesh.)]TJ 14.944 -11.955 Td [(The)-308(distribution)-308(of)-308(the)-309(coef)18(\002cient)-308(matrix)-308(for)-308(the)-308(linear)-309(system)-308(is)-308(based)-308(on)]TJ -14.944 -11.955 Td [(the)-314(\223owner)-314(computes\224)-314(r)8(ule:)-438(the)-314(variable)-314(associated)-314(to)-314(each)-314(mesh)-314(point)-314(is)-314(as-)]TJ -0 g 0 G - 169.365 -29.888 Td [(2)]TJ -0 g 0 G -ET - -endstream -endobj -794 0 obj +% 796 0 obj << -/Type /XObject -/Subtype /Form -/FormType 1 -/PTEX.FileName (./figures/psblas.pdf) -/PTEX.PageNumber 1 -/PTEX.InfoDict 800 0 R -/BBox [0 0 197 215] -/Resources << -/ProcSet [ /PDF /Text ] -/ExtGState << -/R7 801 0 R ->>/Font << /R8 802 0 R>> +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 450.523 358.772 462.583] +/A << /S /GoTo /D (subsection.7.4) >> >> -/Length 898 -/Filter /FlateDecode +% 797 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 438.429 307.635 450.489] +/A << /S /GoTo /D (subsection.7.5) >> >> -stream -xœµVM7 ½ëWèÖ4€Y‘ú>&@[HMl ‡¢câu·;{þý’#QvvÝKÐÂÏ{#=RüÐð«u€ÖɯÿOóÓÇl÷g³°öã¯ýá´7_ ¶çþ7ìÛ // ¢Ãl7÷¦É E„ŒŽ,E(%’ÝÌ«õîô°íúq{:ï~Üüe0ƒsµØÀíæ³yõöý›µ¼ûyc>Ÿ(Ú¿M, -¯ ¼”ìá3›"dQžÙ7_Û³ËXm2‚0— -¼î(:HhU3vܪÅ“Aª f6è=A΃AŸÄ!9`³Ç -PwæAt_UOÏ¡OæO³æl8f¢ãPHÄY¥æ«‡åÕÌ+k(*«¦XÁI䓉uyQ¥ -Åá ¢ï¹Éø*Þcv ( ñÌÿôÚÇJÊBt«FpÍ©vvË×VþÕ‘” -„rÓ‘û×&@^ -éÿ2!)Õš\‘Œ÷¼ža®8Î7L¤ÂõŸÇ )]aôà¥`;¡vç ãp½ªba=WY³¨Hj.8‰µ2Rt%ùÁ` R=EÝ 4¢{4_póXÕFwt{Š[Íf.óÈv¾†Ê‘yÎÌÆGÞ§Áxq€Èúš‹ú‚c—++SJëgÅ¡Br5XUäÛFZYMv8™\øH”˜M!‚¢%rl9ª§¸Ù›Œbõh¾0ÝgÔ3©EÅ-:º‹¼i¾>9ÎßÑò…/aëUËv±Òu‹ß`ú®›Lk„¡ÀtÕ—ßö¼E÷õ¶Ž¿áæsù—¼0ȽýßI -~·œíÕÏãÎh§ÀP‰ó+‰I‰Ë`ä8Äwsâ~L¥¼–U9‰–2ó¢ß”ÇM®F‘ -ÁóÕÞ²þ-ÃÉu1 ƒéuÄóŠ -}¦öqëXË|Vb´A—Ó6QƒŠY¡8Õ†% -Ûæ“Ž=…åËE²A5) -í}óiV<\îrãDÝ âÖß7x¤U -«Ͻ'ƒÇ›ÇÇùaÚ>=|9Êh±Büuæ¹£$îËLËàq·–ÑÃÊŸ5×k^Þ½;>íN÷Ûi™ZŠ\V+9D£­8îNËLÓG™÷×»~0+¾’”àŠ'¢ˆ±íúmw>o÷;{·=ŸŽûEý—»a¥ÃѲîîÝâ8SË4Â%ÕÇ¥_¾œžNÛ#OαéƒùüÐ -endstream -endobj -804 0 obj +% 798 0 obj << -/Filter /FlateDecode -/Subtype /Type1C -/Length 2887 +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 426.335 277.737 438.395] +/A << /S /GoTo /D (subsection.7.6) >> >> -stream -xœe–iXSWÇo É=Z¤-iʦ µ­m]‘Ö*ÕVYÔZQU (! KBI ,A8,!, Ö„M‚T¥*(¶ÖQ¬ ÖœÇn:Jg9—¹|˜‹vž~˜/÷¹÷žåyß÷ÿ?¿÷Ð0‡%Fs ¤ðÓ׋SbE‹ßë±b ±’IÑü?æw0VbôåБšV82ÑÔ«Èò2*x£Óhr}›¿X’•&HHÌð|÷ppø{k×®ûóÏ&ÏãYÿñ ৠDž«©_(–¤ðEÛ<ý©ÙB¡à„g‚0K’˜îÇ[\+ä'{î‰Xæù®ÿ{žÞ^^›ÖSÍ)Ç¥éžÏ#÷< öñ ô æ'H…±iÿ?‚a˜›¯ÈO,IÛž±76ðÄ~~|P‚ Dèéåý>†­ÂbobAØ!,{ Åck0?l=€mÀvc{°MاØgØØ~Œ†½Š±°×0wªx˜ÆÇîѲiW—ì]ÒA~Êá‡}¿2ü&&ÎŒg^Æ·g YúêÒK-=½ ,Ó'æã¡E oXhóafö±rMMⲌ¸îšÉ$ãrö‘«s²£‹Ýä(ÊŒGgm†ò:‘{-e sp­A¢O…*7˜¦È+rT…/¨Q áȆic8'¡‰p¿DûvÌÐë‰UìÊòšÒZlÕÊ(.YŠÃà\UDP¢DQ¡4@;@§ðÏGš =œkîå’|ª‘‹“âLx”.ÇÏ”…ÿ»ì{T¨Ø·‡ó/(‹Êä†f»9Ío„fy1Û'7;#ÆÒÎ}„pVRÃü6Ë$Çg´†\è ‚pè«Vo= XIrôW|N5õ²w[Fàçp8k@Ø‘b•>«36Õ´6–°­ÙÔ7莼'H_Žù˜ý7xVkWRËÇÓ­'ÝáaY܉Œä¬ãù{!ˆRUwsÑ e“¾ÚÄf²„ŸŒöþ8Œ·žKUEc#¼m´ŽPÝtTNÄ°É×½Ö’ÒãçÕÈ9?ýâ —OÈ׸j1{æÊZr%É8¶gg\|뀌+˾§À“ÑkSj/2¥Aј…R4Ý„ÓUç»3HúÈ…¥­&V²«Kõ°‚SzÍîB"Ãssƒ -kµiqÖ±F|g©º~ Ð&¼7©U3Zör@o£Wv=^ÂK;ÌãÞÇ ËËÕ¡ -7´f'»ó²ýìs;H@Ò£vú ¶wrœÐ#Jã-´{3t=áή+¯.5@ÐVÍã’å8Œ,(Ø—CiW`Â÷”+Lp )Þ}æ¼µ²Z«©åÔ«j ´šl=2‹ V”Åý ×–…IÔAr7'â hF“ƒ¨ºŸRôïs.¬D"e³‘2ÏÙ–¥Üù6JcÚt•VZÁD جlØÚ˜'Ñ›Œ“u_Žß)4ªà.°° ‡[sÕ;´”U{Ìø'ª:x z”†€á¦iα,êfç ×Ï-Ö:ôtƒ]„V0r™ÅÅy%%EЭjÊru€ÕP%•V¤zlŽðé>1ÍHêÍlH‡B7^²8B 46Ë9²¶¼õ8È&½ŒL}MEUV`5¬+n¡Äh(´Y‹=î5që|zÏA;—ľ×+Û Õm Ëvv¼SàÝBÕ¸K3F8v;‘–Ü?ãšEUD›us5Ìš’òBNA^a^†]X³¹…Zwx²´¸¼°n~A^dn錹r®¿u¬‡£®•¥)4R说~ÉE§qÖì3梌æy3 ástêÓØp§:gËIªTç͸OyvüÔ©ù¦Î8SÌäy9>ûâ,9Ašñà µÕùò÷è݇.¬âÀ×l…*G«„ £ n˜‹Âq8¦íP¶Jí1æ ÖûFfX2[Z›,Í%º=·¸º¤ -êµ»yàœUt˜s'7ìWäÇð¥™*L¬ˆÀaÞäð`ÓØ$‡e ×5Ê=NÁ&Co?5Dnüž …ÚÕ7 'a«Øøx‡wºQ¬öû ]£\K¾L™vÂÜÕßT§5rêS šZÌ-v^ûÁýa’£©ÜT^~BÉ6à“üyƒ ýrsüño4QFÌ­…wºœðhh@çaÿÛÊF4 -´ÇÐ0»½¸¡¤NË5gZ¯vÛ'á×pHÕ/é81üm=…Œ&«Ò7¿FýŸsaK¾fGÁ‹ân/W.š%$dV½ÞŠ<®ykMP$‰gªJË2ž3W4ŽÞ· —6”tZg£Ùn¡ìûVH"\Ù§?Á3¡²G»‡þšÜÝ‹ºhLø^]~Í"KÎ""Œ-ÉÈ‹›3l-ͶŒÕô¾PÙ¥‚Xœ§!Õ3Ö -"d~+[Æ,ÊÈÍU@ TT(ª«§.>¾*Ù#òI¢DaV, ‡Û†CCÐmþØñ6~µZ'ƒéàÓÈ£~Þ—S$'ƒÙI&2š™¬@s©¡ÎäQ ŠÌÔNÊþü„?~ðûw¡×I×)Øax\ê¿62$Oèåt%×¥ÖQíâyú´_gèæy:»’Y[j(­‚ »FEõÎTV©µT®¹&<°"§^Ä/xQY¸‚¯Ù­psBå”+TíóÎí´Ö§Èø”ŽôTj¡0VŸx(„Oâ\ -IZ¹t4¤w!}^€gì½—ú€h9@±^è Ò…s2•=3B2H>ÉçynÞÌû7ŠC‰#ˆþhñÚï-whèÇGtBŒV²Õºø`uÞ1èFæ3ÑvðøçÓ÷à´ÛoÝ34\z"ž“,P&+š´®¿Ú»oB03¼å㣶xsÉÝä!FáŽ/†m'~¼êŒn?àßvaÝDëÑ÷ì+°¥¸5ïšôt0Ü ¶æ+ÉÔ·ÆsRz…t¥J–xFzëþƒöÁQîè`û¼/dŠº2›åž™j@7GÛú.º?Üwyc$O‘œÀI‘(S¥ME®ƒßŒô| Áõ^`‚2!-›’"VîIN™"Ì‚B-ÌÎe³/uV9:Î6:.Ç°ÿMɪH -endstream -endobj -810 0 obj +% 799 0 obj << -/Length 8252 +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 414.241 393.262 426.3] +/A << /S /GoTo /D (subsection.7.7) >> >> -stream -0 g 0 G -0 g 0 G -BT -/F54 9.9626 Tf 99.895 706.129 Td [(signed)-263(to)-264(a)-263(pr)18(ocess)-263(that)-263(will)-264(own)-263(the)-263(corr)18(esponding)-263(r)18(ow)-263(in)-264(the)-263(coef)18(\002cient)-263(ma-)]TJ 0 -11.955 Td [(trix)-406(and)-406(will)-406(carry)-405(out)-406(all)-406(r)18(elated)-406(computations.)-778(This)-406(all)1(ocation)-406(strategy)-406(is)]TJ 0 -11.955 Td [(equivalent)-353(to)-353(a)-353(partition)-353(of)-353(the)-353(discr)18(etization)-353(mesh)-353(in)1(to)]TJ/F52 9.9626 Tf 253.543 0 Td [(sub-domains)]TJ/F54 9.9626 Tf 51.107 0 Td [(.)-619(Our)-353(li-)]TJ -304.65 -11.956 Td [(brary)-220(supports)-220(any)-220(distribution)-220(that)-220(keeps)-220(together)-220(the)-220(coef)18(\002cients)-220(of)-220(each)-220(ma-)]TJ 0 -11.955 Td [(trix)-244(r)18(ow;)-245(ther)18(e)-244(ar)18(e)-243(no)-244(other)-243(constraints)-244(on)-243(the)-243(variable)-244(assignment.)-308(This)-243(choice)]TJ 0 -11.955 Td [(is)-324(consistent)-324(with)-324(simple)-324(data)-325(distributions)-324(such)-324(as)]TJ/F59 9.9626 Tf 232.237 0 Td [(CYCLIC\050N\051)]TJ/F54 9.9626 Tf 50.302 0 Td [(and)]TJ/F59 9.9626 Tf 20.095 0 Td [(BLOCK)]TJ/F54 9.9626 Tf 26.152 0 Td [(,)-324(as)]TJ -328.786 -11.955 Td [(well)-310(as)-309(completely)-310(arbitrary)-310(assignments)-309(of)-310(equation)-310(indices)-309(to)-310(pr)18(ocesses.)-489(In)]TJ 0 -11.955 Td [(particular)-250(it)-250(is)-251(consistent)-250(with)-250(the)-250(usage)-250(of)-251(graph)-250(partitioning)-250(tools)-250(commonly)]TJ 0 -11.956 Td [(available)-333(in)-332(the)-333(literatur)18(e,)-353(e.g.)-558(METIS)-332([)]TJ -1 0 0 rg 1 0 0 RG - [(14)]TJ -0 g 0 G - [(].)-558(Dense)-333(ve)1(ctors)-333(conform)-333(to)-332(sparse)]TJ 0 -11.955 Td [(matrices,)-257(that)-255(is,)-257(the)-255(entries)-255(of)-256(a)-255(vector)-255(follow)-256(the)-255(same)-255(distribution)-256(of)-255(the)-255(ma-)]TJ 0 -11.955 Td [(trix)-250(r)18(ows.)]TJ 14.944 -12.648 Td [(W)92(e)-343(assume)-344(that)-343(the)-344(sparse)-343(matrix)-343(is)-344(built)-343(in)-344(parallel,)-366(wher)18(e)-344(each)-343(pr)18(ocess)]TJ -14.944 -11.955 Td [(generates)-254(its)-254(own)-255(portion.)-322(W)92(e)-255(never)-254(r)18(equir)18(e)-254(that)-254(the)-255(entir)18(e)-254(matrix)-254(be)-254(available)]TJ 0 -11.955 Td [(on)-288(a)-288(single)-288(node.)-423(However)74(,)-298(it)-287(is)-288(possible)-288(to)-288(hold)-288(the)-288(entir)18(e)-288(matrix)-287(in)-288(one)-288(pr)18(o-)]TJ 0 -11.955 Td [(cess)-241(and)-242(distribute)-241(it)-241(explicitly)]TJ -0 0 1 rg 0 0 1 RG -/F54 7.5716 Tf 133.807 3.616 Td [(1)]TJ -0 g 0 G -/F54 9.9626 Tf 4.284 -3.616 Td [(,)-243(even)-241(though)-242(the)-241(r)18(esulting)-241(memory)-241(bottleneck)]TJ -138.091 -11.955 Td [(would)-250(make)-250(this)-250(option)-250(unattractive)-250(in)-250(most)-250(cases.)]TJ/F51 11.9552 Tf 0 -33.074 Td [(2.1)-1000(Basic)-250(Nomenclature)]TJ/F54 9.9626 Tf 0 -20.306 Td [(Our)-301(computational)-301(model)-301(implies)-301(that)-301(the)-301(data)-301(al)1(location)-301(on)-301(the)-301(parallel)-301(dis-)]TJ 0 -11.955 Td [(tributed)-370(memory)-369(machine)-370(is)-370(guided)-370(by)-369(the)-370(str)8(uctur)18(e)-370(of)-370(the)-369(physical)-370(model,)]TJ 0 -11.955 Td [(and)-250(speci\002cally)-250(by)-250(the)-250(discr)18(etization)-250(mesh)-250(of)-250(the)-250(PDE.)]TJ 14.944 -12.648 Td [(Each)-400(point)-400(of)-400(the)-399(discr)18(etization)-400(mesh)-400(will)-400(have)-400(\050at)-400(least)1(\051)-400(one)-400(associated)]TJ -14.944 -11.955 Td [(equation/variable,)-416(and)-384(ther)18(efor)18(e)-383(one)-383(index.)-710(W)92(e)-383(say)-383(that)-384(point)]TJ/F52 9.9626 Tf 289.765 0 Td [(i)-403(depends)]TJ/F54 9.9626 Tf 42.709 0 Td [(on)]TJ -332.474 -11.955 Td [(point)]TJ/F52 9.9626 Tf 26.955 0 Td [(j)]TJ/F54 9.9626 Tf 6.004 0 Td [(if)-312(the)-312(equation)-312(for)-312(a)-312(variable)-313(associated)-312(with)]TJ/F52 9.9626 Tf 202.502 0 Td [(i)]TJ/F54 9.9626 Tf 6.074 0 Td [(contains)-312(a)-312(term)-312(in)]TJ/F52 9.9626 Tf 84.153 0 Td [(j)]TJ/F54 9.9626 Tf 2.894 0 Td [(,)-328(or)]TJ -328.582 -11.955 Td [(equivalently)-291(if)]TJ/F52 9.9626 Tf 67.321 0 Td [(a)]TJ/F52 7.5716 Tf 4.59 -1.96 Td [(i)-67(j)]TJ/F83 10.3811 Tf 8.967 1.96 Td [(6)]TJ/F85 10.3811 Tf 0.249 0 Td [(=)]TJ/F54 9.9626 Tf 11.726 0 Td [(0.)-434(After)-292(the)-291(partition)-292(of)-291(the)-292(discr)18(etization)-291(mesh)-292(into)]TJ/F52 9.9626 Tf 233.514 0 Td [(sub-)]TJ -326.367 -11.955 Td [(domains)]TJ/F54 9.9626 Tf 37.559 0 Td [(assigned)-381(to)-381(the)-381(parallel)-381(pr)18(ocesses,)-413(we)-381(classify)-381(the)-381(points)-381(of)-381(a)-381(given)]TJ -37.559 -11.955 Td [(sub-domain)-250(as)-250(following.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -22.003 Td [(Internal.)]TJ -0 g 0 G -/F54 9.9626 Tf 43.995 0 Td [(An)-359(internal)-359(poi)1(nt)-359(of)-359(a)-359(given)-359(domain)]TJ/F52 9.9626 Tf 168.65 0 Td [(depends)]TJ/F54 9.9626 Tf 35.684 0 Td [(only)-359(on)-359(points)-358(of)-359(the)]TJ -223.422 -11.955 Td [(same)-264(domain.)-351(If)-264(all)-264(points)-264(of)-264(a)-264(domain)-263(ar)18(e)-264(assigned)-264(to)-264(one)-264(pr)18(ocess,)-267(then)]TJ 0 -11.956 Td [(a)-196(computational)-196(step)-195(\050e.g.,)-207(a)-196(matrix-vector)-196(pr)18(oduct\051)-196(of)-195(the)-196(equations)-196(asso-)]TJ 0 -11.955 Td [(ciated)-214(with)-213(the)-214(internal)-214(points)-214(r)18(equir)18(es)-213(no)-214(data)-214(items)-214(fr)18(om)-213(other)-214(domains)]TJ 0 -11.955 Td [(and)-250(no)-250(communications.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -22.695 Td [(Boundary)92(.)]TJ -0 g 0 G -/F54 9.9626 Tf 51.397 0 Td [(A)-192(point)-191(of)-192(a)-192(given)-191(domain)-192(is)-192(a)-191(boundary)-192(point)-192(if)-191(it)]TJ/F52 9.9626 Tf 217.552 0 Td [(depends)]TJ/F54 9.9626 Tf 34.019 0 Td [(on)-192(points)]TJ -278.061 -11.955 Td [(belonging)-250(to)-250(other)-250(domains.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -22.696 Td [(Halo.)]TJ -0 g 0 G -/F54 9.9626 Tf 29.609 0 Td [(A)-389(halo)-389(point)-389(for)-389(a)-389(given)-389(domain)-389(i)1(s)-389(a)-389(point)-389(belonging)-389(to)-389(another)-389(do-)]TJ -4.702 -11.955 Td [(main)-267(such)-267(that)-266(ther)18(e)-267(is)-267(a)-267(boundary)-267(point)-266(which)]TJ/F52 9.9626 Tf 212.474 0 Td [(depends)]TJ/F54 9.9626 Tf 34.767 0 Td [(on)-267(it.)-360(Whenever)]TJ -247.241 -11.955 Td [(performing)-360(a)-361(computational)-360(step,)-388(such)-361(as)-360(a)-361(matrix-vector)-360(pr)18(oduct,)-388(the)]TJ 0 -11.955 Td [(values)-274(associated)-273(with)-274(halo)-274(points)-274(ar)18(e)-274(r)18(equested)-273(fr)18(om)-274(other)-274(domains.)-381(A)]TJ 0 -11.955 Td [(boundary)-259(point)-258(of)-259(a)-258(given)-259(domain)-258(is)-259(usually)-258(a)-259(halo)-258(point)-259(for)-259(some)-258(other)]TJ 0 -11.956 Td [(domain)]TJ -0 0 1 rg 0 0 1 RG -/F54 7.5716 Tf 34.002 3.617 Td [(2)]TJ -0 g 0 G -/F54 9.9626 Tf 4.284 -3.617 Td [(;)-238(ther)18(efor)18(e)-232(the)-233(car)18(dinality)-232(of)-232(the)-232(boundary)-233(p)1(oints)-233(set)-232(denotes)-232(the)]TJ -38.286 -11.955 Td [(amount)-250(of)-250(data)-250(sent)-250(to)-250(other)-250(domains.)]TJ -0 g 0 G -ET -q -1 0 0 1 99.895 168.389 cm -[]0 d 0 J 0.398 w 0 0 m 137.482 0 l S -Q -BT -/F54 5.9776 Tf 110.755 161.427 Td [(1)]TJ/F54 7.9701 Tf 3.487 -2.893 Td [(In)-250(our)-250(pr)18(ototype)-250(implementation)-250(we)-250(pr)18(ovide)-250(sample)-250(scatter/gather)-250(r)18(outines.)]TJ/F54 5.9776 Tf -3.487 -6.922 Td [(2)]TJ/F54 7.9701 Tf 3.487 -2.893 Td [(This)-401(is)-402(the)-401(normal)-402(situation)-401(when)-402(the)-401(pattern)-402(of)-401(the)-402(sparse)-401(matrix)-402(is)-401(symmetric,)-440(which)-401(is)]TJ -14.347 -9.464 Td [(equivalent)-358(to)-358(say)-358(that)-358(the)-357(interaction)-358(between)-358(two)-358(variables)-358(is)-358(r)18(ecipr)18(ocal.)-634(If)-357(the)-358(matrix)-358(pattern)]TJ 0 -9.465 Td [(is)-241(non-symmetric)-241(we)-242(may)-241(have)-241(one-way)-241(interactions,)-243(and)-241(these)-241(could)-241(cause)-242(a)-241(situation)-241(in)-241(which)-241(a)]TJ 0 -9.464 Td [(boundary)-250(point)-250(is)-250(not)-250(a)-250(halo)-250(point)-250(for)-250(its)-250(neighbour)74(.)]TJ -0 g 0 G -0 g 0 G -/F54 9.9626 Tf 169.365 -29.888 Td [(3)]TJ -0 g 0 G -ET - -endstream -endobj -823 0 obj +% 800 0 obj << -/Length 4830 +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 402.147 288.227 414.206] +/A << /S /GoTo /D (subsection.7.8) >> >> -stream -0 g 0 G -0 g 0 G -0 g 0 G -BT -/F51 9.9626 Tf 150.705 706.129 Td [(Overlap.)]TJ -0 g 0 G -/F54 9.9626 Tf 44.553 0 Td [(An)-245(overlap)-244(point)-245(is)-245(a)-245(boundary)-244(point)-245(assigned)-245(to)-244(multiple)-245(domains.)]TJ -19.647 -11.955 Td [(Any)-204(operation)-204(that)-204(involves)-204(an)-204(overlap)-204(point)-204(has)-204(to)-204(be)-204(r)18(eplicated)-204(for)-204(each)]TJ 0 -11.955 Td [(assignment.)]TJ -24.906 -18.943 Td [(Overlap)-358(points)-359(do)-358(not)-358(usually)-359(exist)-358(in)-359(the)-358(basic)-358(data)-359(distributions;)-412(however)]TJ 0 -11.955 Td [(they)-325(ar)18(e)-326(a)-325(featur)18(e)-326(of)-325(Domain)-326(Decomposition)-325(Schwarz)-326(pr)18(econditioners)-325(which)]TJ 0 -11.956 Td [(ar)18(e)-250(the)-250(subject)-250(of)-250(r)18(elated)-250(r)18(esear)18(ch)-250(work)-250([)]TJ -1 0 0 rg 1 0 0 RG - [(4)]TJ -0 g 0 G - [(,)]TJ -1 0 0 rg 1 0 0 RG - [-250(3)]TJ -0 g 0 G - [(].)]TJ 14.944 -11.955 Td [(W)92(e)-225(denote)-225(the)-225(sets)-225(of)-225(internal,)-230(boundary)-225(and)-225(halo)-225(points)-225(for)-225(a)-225(given)-225(subdo-)]TJ -14.944 -11.955 Td [(main)-251(by)]TJ/F83 10.3811 Tf 38.66 0 Td [(I)]TJ/F54 9.9626 Tf 6.53 0 Td [(,)]TJ/F83 10.3811 Tf 5.125 0 Td [(B)]TJ/F54 9.9626 Tf 9.753 0 Td [(and)]TJ/F83 10.3811 Tf 19.497 0 Td [(H)]TJ/F54 9.9626 Tf 8.972 0 Td [(.)-314(Each)-252(subdomain)-251(is)-252(assigned)-251(to)-252(one)-251(pr)18(ocess;)-253(each)-251(pr)18(ocess)]TJ -88.537 -11.955 Td [(usually)-346(owns)-346(one)-346(su)1(bdomain,)-370(although)-346(the)-346(user)-346(may)-346(choose)-345(to)-346(assign)-346(mor)18(e)]TJ 0 -11.955 Td [(than)-302(one)-301(subdomain)-302(to)-301(a)-302(pr)18(ocess.)-465(If)-302(each)-301(pr)18(ocess)]TJ/F52 9.9626 Tf 222.767 0 Td [(i)]TJ/F54 9.9626 Tf 5.968 0 Td [(owns)-302(one)-301(subdomain,)-315(the)]TJ -228.735 -11.956 Td [(number)-221(of)-221(r)18(ows)-221(in)-221(the)-221(local)-221(sparse)-221(matrix)-221(is)]TJ/F83 10.3811 Tf 192.655 0 Td [(j)-24(I)]TJ/F52 7.5716 Tf 8.943 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F85 10.3811 Tf 4.799 0 Td [(+)]TJ/F83 10.3811 Tf 9.989 0 Td [(j)-24(B)]TJ/F52 7.5716 Tf 10.108 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.876 1.96 Td [(j)]TJ/F54 9.9626 Tf 3.003 0 Td [(,)-227(and)-221(the)-221(number)-221(of)-221(local)]TJ -235.248 -11.955 Td [(columns)-207(\050i.e.)-296(those)-207(for)-207(which)-207(ther)18(e)-208(exists)-207(at)-207(least)-207(one)-207(non-zer)18(o)-208(entry)-207(in)-207(the)-207(local)]TJ 0 -11.955 Td [(r)18(ows\051)-250(is)]TJ/F83 10.3811 Tf 37.275 0 Td [(j)-24(I)]TJ/F52 7.5716 Tf 8.943 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F85 10.3811 Tf 5.066 0 Td [(+)]TJ/F83 10.3811 Tf 10.255 0 Td [(j)-24(B)]TJ/F52 7.5716 Tf 10.109 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F85 10.3811 Tf 5.066 0 Td [(+)]TJ/F83 10.3811 Tf 10.256 0 Td [(j)-24(H)]TJ/F52 7.5716 Tf 12.051 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F54 9.9626 Tf 3.004 0 Td [(.)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -ET -1 0 0 1 222.462 541.675 cm -q -0 -1 1 0 0 0 cm -q -.65 0 0 .65 0 0 cm -q -1 0 0 1 0 0 cm -/Im3 Do -Q -Q -Q -0 g 0 G -1 0 0 1 -222.462 -541.675 cm -BT -/F54 9.9626 Tf 260.803 335.398 Td [(Figur)18(e)-250(2:)-310(Point)-250(class\002cation.)]TJ -0 g 0 G -0 g 0 G - -95.154 -23.688 Td [(This)-190(classi\002cation)-190(of)-190(mesh)-190(points)-190(guides)-190(the)-190(naming)-190(scheme)-190(that)-190(we)-190(adopted)]TJ -14.944 -11.956 Td [(in)-190(the)-190(library)-190(internals)-190(and)-190(in)-190(the)-190(data)-190(str)8(uctur)18(es.)-290(W)92(e)-190(explicitly)-190(note)-190(that)-190(\223Halo\224)]TJ 0 -11.955 Td [(points)-250(ar)18(e)-250(also)-250(often)-250(called)-250(\223ghost\224)-250(points)-250(in)-250(the)-250(literatur)18(e.)]TJ/F51 11.9552 Tf 0 -28.902 Td [(2.2)-1000(Library)-250(contents)]TJ/F54 9.9626 Tf 0 -18.964 Td [(The)-250(PSBLAS)-250(library)-250(consists)-250(of)-250(various)-250(classes)-250(of)-250(subr)18(outines:)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -18.943 Td [(Computational)-250(routines)]TJ -0 g 0 G -/F54 9.9626 Tf 113.723 0 Td [(comprising:)]TJ -0 g 0 G - -77.917 -19.434 Td [(\225)]TJ -0 g 0 G - [-500(Sparse)-250(matrix)-250(by)-250(dense)-250(matrix)-250(pr)18(oduct;)]TJ -0 g 0 G - 0 -15.449 Td [(\225)]TJ -0 g 0 G - [-500(Sparse)-250(triangular)-250(systems)-250(solution)-250(for)-250(block)-250(diagonal)-250(matrices;)]TJ -0 g 0 G - 0 -15.449 Td [(\225)]TJ -0 g 0 G - [-500(V)111(ector)-250(and)-250(matrix)-250(norms;)]TJ -0 g 0 G - 0 -15.449 Td [(\225)]TJ -0 g 0 G - [-500(Dense)-250(matrix)-250(sums;)]TJ -0 g 0 G - 0 -15.449 Td [(\225)]TJ -0 g 0 G - [-500(Dot)-250(pr)18(oducts.)]TJ -0 g 0 G -/F51 9.9626 Tf -35.806 -19.434 Td [(Communication)-250(routines)]TJ -0 g 0 G -/F54 9.9626 Tf 118.704 0 Td [(handling)-250(halo)-250(and)-250(overlap)-250(communications;)]TJ -0 g 0 G - 50.661 -29.888 Td [(4)]TJ -0 g 0 G -ET - -endstream -endobj -820 0 obj +% 801 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 390.052 260.403 402.112] +/A << /S /GoTo /D (subsection.7.9) >> +>> +% 802 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 377.958 242.878 390.018] +/A << /S /GoTo /D (subsection.7.10) >> +>> +% 803 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 365.864 269.159 377.924] +/A << /S /GoTo /D (subsection.7.11) >> +>> +% 804 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 353.77 266.31 365.829] +/A << /S /GoTo /D (subsection.7.12) >> +>> +% 805 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 341.676 335.51 353.735] +/A << /S /GoTo /D (subsection.7.13) >> +>> +% 806 0 obj << -/Type /XObject -/Subtype /Form -/FormType 1 -/PTEX.FileName (./figures/points.pdf) -/PTEX.PageNumber 1 -/PTEX.InfoDict 826 0 R -/BBox [0 0 274 308] -/Resources << -/ProcSet [ /PDF /Text ] -/ExtGState << -/R7 827 0 R ->>/Font << /R8 828 0 R>> +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 329.581 334.742 341.641] +/A << /S /GoTo /D (subsection.7.14) >> >> -/Length 1397 -/Filter /FlateDecode +% 807 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 317.487 305.523 329.547] +/A << /S /GoTo /D (subsection.7.15) >> >> -stream -xœÝYËŽ5Ý÷Wô’ q±Ën»ï6 -– 󣄠¹3ÊBü=®§«æ±bA‡HŒ}Ï©c·í²»?­é”ׄÿäïÍeùö]_?ü¾¤Ó©d êwßGüðaù´d"®òçæ²¾¾ä}ÍíëÕûe4­ß ,äýÔ×sÿ»º,_ýx÷Ç/w×·¯®~[¾»ZÞ.ø›Œ1¸ð™âuóâ¯ïÿ¼ûùúáoO*žþx/þÃõí½Î22Tø<ᜇd†&Âoî/×ïV˜âÿõèCê1V^õd¨æõãR ¬Û9ŸÎç¶^–ºµÓ¾ÍšÚýÝz¦zõ¯7‹!€S®ûj짔êJÚR¿–ðWZSöN•m˜´ ide«3çûfyÿõROÛú×|J_F¿~]~z2ò–}×òVÐÕämë¦Î€sQ<I<³¦uiüd¸r͵9.Ö¤¢ÆR’ÉÑãY~ОÐCÑÝ¥Ÿ}öçÙ^â<3LA ‰c‹YÒ¶®ôçY¯qž&mCÙØâÌû懣ç—Ñ#|H–_rƧšÇÒ³,wš0s>}yüÇ5ÒNóË p%U¤ –ðW@E’§$§•|¡pxõE`&ÆøåU ™¤ó«›%AÝIUÍ0Gš]ý‘&ûÖM’ î Jšx÷¬…T.ù)~¼C²8˜}~‚­ÛÍWÛ¢íÁvKÑö¶K,8ÛÍ—&†`[C*—ü¨ONÔÇs­ƒ ½m‚ê ò9؆Áu¶!×`{P9¦m‚êKI7oÛB*—ü¨O샹~ñ̳·Ç'­¡Á^ÝIaÏvRy!œzw'ó¤`Íx"0.Ѥb'…iÄù|ùÌs¼žP:-%X/[´^º“#Àa°há…dÞPÓY/)Z‡Ýqˆ&-VŠÖ½ON¬Çtnƒ®G±À¹ÍY–& é›Ë’וB¿Ìœ¤¡¹M…ÁnngäŽ%¤Ò#ØœÃÉÙÇ‚"d;’Àô)ùÃ(˜\X‹³Ž¥²£0}Z¡pø#`Ó†Sò‹%Hvt§Ð̧f£`ú`-Î+”ÐŽQ4ó9ƒ…Ç,x›O/,îf,z»âißn«ªÝìv«$½úæ-ÜŒå`?›“禩™|,ˆ7cïó™;Ìñº@!osõé]Š¦?ݲta0€yýÒ¥¤Zdy›«OïRÜ<%9­äƒ€[}拇ú6m8uõIPžþhǃf>m))…YÞæê“ Ò<%9­äƒ€[}ækçÿÜæ“WO’rõ= A} £ Ñ0'Ë 9‘S,irêÕ÷+\_ã­uâÝ¿›ÑÆE?æóé{¦ƒÙÇá'È‹ÎB#4_²$&†`[–’qq‘‘&/> Mõ5^_'†`[Bý˜OõºÖÁ–%©¡ ª/]07o[šqq ’&/M Íõ5^_'nÞ¶†4.ú1Ÿ6ØsýÜ¥%]Š!ƒCÞgVe@Ù–‹’…$)š5-ƒÃØ5}‡ä²?ÖLg+‡ |>{é>hO‘jøX5~,ê>–0àxÕ},1’š¬ác ”ø±ŠûX€5‹ûXb$3òø³ Ú…t¡í¡=Å>tpº8Õ‡’Ô$iÎ>´-ö¡Ç%ÀšTÔXJR#ÞgL¼í“-J/0®jãȶw.Þâªick£Z,”Ô¤š^”Ñk·ì«éUÝ ‹¯WjÇ‚µÛçƒ.ÁºUE³zÉgýãPˆ,é"›Ñe±ûÌ‹:t˜!*%~ Ö *«QÊÒ@emPMÓ1:¾Þ’àX¼÷(˜®4æ ¤Nƒ¾]þÎJ¦' -endstream -endobj -748 0 obj +% 808 0 obj << -/Type /ObjStm -/N 100 -/First 919 -/Length 15283 ->> -stream -699 0 700 151 701 304 702 457 703 610 704 762 705 914 706 1067 707 1220 708 1373 -709 1526 710 1678 711 1831 712 1977 713 2129 743 2281 714 2432 715 2584 716 2736 717 2888 -718 3040 719 3190 720 3342 721 3494 722 3647 723 3800 724 3951 725 4103 726 4256 727 4409 -728 4562 729 4715 730 4861 731 5013 732 5165 733 5317 744 5469 734 5620 735 5766 745 5918 -736 6069 746 6221 737 6372 747 6524 742 6673 739 6729 761 6809 738 7055 764 7207 749 7359 -765 7511 750 7663 766 7815 751 7967 752 8112 753 8265 754 8418 755 8571 756 8724 757 8877 -758 9030 759 9177 763 9330 760 9387 778 9467 767 9673 768 9823 769 9974 770 10125 771 10279 -772 10429 773 10579 774 10729 775 10877 776 11025 7 11173 777 11227 797 11320 800 11470 801 11711 -802 11753 803 12139 791 12439 792 12585 793 12732 11 12879 799 12935 796 12992 809 13113 795 13263 -806 13411 807 13560 811 13709 15 13765 815 13820 816 13877 808 13934 822 14066 826 14208 827 14322 -% 699 0 obj +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 305.393 232.786 317.453] +/A << /S /GoTo /D (subsection.7.16) >> +>> +% 809 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 666.041 386.806 678.1] -/A << /S /GoTo /D (subsection.6.16) >> +/Rect [113.843 293.299 242.519 305.359] +/A << /S /GoTo /D (subsection.7.17) >> >> -% 700 0 obj +% 810 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 653.946 368.116 666.006] -/A << /S /GoTo /D (subsection.6.17) >> +/Rect [98.899 271.309 183.083 283.109] +/A << /S /GoTo /D (section.8) >> >> -% 701 0 obj +% 811 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 641.852 370.219 653.912] -/A << /S /GoTo /D (subsection.6.18) >> +/Rect [113.843 259.055 387.474 271.115] +/A << /S /GoTo /D (subsection.8.1) >> >> -% 702 0 obj +% 812 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 629.758 214.116 641.818] -/A << /S /GoTo /D (subsection.6.19) >> +/Rect [113.843 246.961 415.897 259.021] +/A << /S /GoTo /D (subsection.8.2) >> >> -% 703 0 obj +% 813 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 617.664 231.69 629.724] -/A << /S /GoTo /D (subsection.6.20) >> +/Rect [113.843 234.867 399.738 246.926] +/A << /S /GoTo /D (subsection.8.3) >> >> -% 704 0 obj +% 814 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 605.57 204.353 617.629] -/A << /S /GoTo /D (subsection.6.21) >> +/Rect [113.843 222.773 444.603 234.832] +/A << /S /GoTo /D (subsection.8.4) >> >> -% 705 0 obj +% 825 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 593.476 221.927 605.535] -/A << /S /GoTo /D (subsection.6.22) >> +/Rect [98.899 213.468 180.781 222.877] +/A << /S /GoTo /D (subsection.8.4) >> >> -% 706 0 obj +% 815 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 581.381 379.076 593.441] -/A << /S /GoTo /D (subsection.6.23) >> +/Rect [98.899 191.298 152.896 200.628] +/A << /S /GoTo /D (section.9) >> >> -% 707 0 obj +% 816 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 569.287 359.768 581.347] -/A << /S /GoTo /D (subsection.6.24) >> +/Rect [113.843 176.574 444.603 188.633] +/A << /S /GoTo /D (subsection.9.1) >> >> -% 708 0 obj +% 826 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 557.193 373.158 569.253] -/A << /S /GoTo /D (subsection.6.25) >> +/Rect [98.899 164.619 201.494 176.678] +/A << /S /GoTo /D (subsection.9.1) >> >> -% 709 0 obj +% 817 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 545.099 287.68 557.158] -/A << /S /GoTo /D (subsection.6.26) >> +/Rect [113.843 152.524 444.603 164.584] +/A << /S /GoTo /D (subsection.9.2) >> >> -% 710 0 obj +% 827 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 533.005 221.369 545.064] -/A << /S /GoTo /D (subsection.6.27) >> +/Rect [98.899 143.219 168.468 152.629] +/A << /S /GoTo /D (subsection.9.2) >> >> -% 711 0 obj +% 818 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 513.485 251.974 522.815] -/A << /S /GoTo /D (section.7) >> +/Rect [113.843 128.475 444.603 140.535] +/A << /S /GoTo /D (subsection.9.3) >> >> -% 712 0 obj +% 828 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 498.761 364.739 510.821] -/A << /S /GoTo /D (subsection.7.1) >> +/Rect [98.899 119.17 202.859 128.58] +/A << /S /GoTo /D (subsection.9.3) >> >> -% 713 0 obj +% 823 0 obj +<< +/D [821 0 R /XYZ 98.895 753.953 null] +>> +% 820 0 obj +<< +/Font << /F62 667 0 R /F59 665 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 861 0 obj +<< +/Type /Page +/Contents 862 0 R +/Resources 860 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 668 0 R +/Annots [ 819 0 R 864 0 R 829 0 R 865 0 R 830 0 R 866 0 R 831 0 R 832 0 R 833 0 R 834 0 R 835 0 R 836 0 R 837 0 R 838 0 R 839 0 R 840 0 R 841 0 R 842 0 R 843 0 R 844 0 R 845 0 R 846 0 R 847 0 R 848 0 R 849 0 R 850 0 R 851 0 R 852 0 R 853 0 R 854 0 R 855 0 R 856 0 R 857 0 R 858 0 R 859 0 R ] +>> +% 819 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 486.667 444.603 498.726] -/A << /S /GoTo /D (subsection.7.2) >> +/Rect [164.653 702.323 495.412 714.383] +/A << /S /GoTo /D (subsection.9.4) >> >> -% 743 0 obj +% 864 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 477.362 161.365 486.467] -/A << /S /GoTo /D (subsection.7.2) >> +/Rect [149.709 693.018 253.668 702.428] +/A << /S /GoTo /D (subsection.9.4) >> >> -% 714 0 obj +% 829 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 462.617 364.172 474.677] -/A << /S /GoTo /D (subsection.7.3) >> +/Rect [164.653 678.413 495.412 690.472] +/A << /S /GoTo /D (subsection.9.5) >> >> -% 715 0 obj +% 865 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 450.523 358.772 462.583] -/A << /S /GoTo /D (subsection.7.4) >> +/Rect [149.709 669.108 253.668 678.517] +/A << /S /GoTo /D (subsection.9.5) >> >> -% 716 0 obj +% 830 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 438.429 307.635 450.489] -/A << /S /GoTo /D (subsection.7.5) >> +/Rect [164.653 654.503 495.412 666.562] +/A << /S /GoTo /D (subsection.9.6) >> >> -% 717 0 obj +% 866 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 426.335 277.737 438.395] -/A << /S /GoTo /D (subsection.7.6) >> +/Rect [149.709 645.197 253.668 654.607] +/A << /S /GoTo /D (subsection.9.6) >> >> -% 718 0 obj +% 831 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 414.241 393.262 426.3] -/A << /S /GoTo /D (subsection.7.7) >> +/Rect [149.709 623.26 274.28 632.59] +/A << /S /GoTo /D (section.10) >> >> -% 719 0 obj +% 832 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 402.147 288.227 414.206] -/A << /S /GoTo /D (subsection.7.8) >> +/Rect [164.653 608.674 333.298 620.734] +/A << /S /GoTo /D (subsection.10.1) >> >> -% 720 0 obj +% 833 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 390.052 260.403 402.112] -/A << /S /GoTo /D (subsection.7.9) >> +/Rect [164.653 596.719 331.326 608.779] +/A << /S /GoTo /D (subsection.10.2) >> >> -% 721 0 obj +% 834 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 377.958 242.878 390.018] -/A << /S /GoTo /D (subsection.7.10) >> +/Rect [164.653 584.764 381.626 596.824] +/A << /S /GoTo /D (subsection.10.3) >> >> -% 722 0 obj +% 835 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 365.864 269.159 377.924] -/A << /S /GoTo /D (subsection.7.11) >> +/Rect [164.653 572.809 427.165 584.869] +/A << /S /GoTo /D (subsection.10.4) >> >> -% 723 0 obj +% 836 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 353.77 266.31 365.829] -/A << /S /GoTo /D (subsection.7.12) >> +/Rect [164.653 560.854 353.343 572.913] +/A << /S /GoTo /D (subsection.10.5) >> >> -% 724 0 obj +% 837 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 548.899 315.177 560.958] +/A << /S /GoTo /D (subsection.10.6) >> +>> + +endstream +endobj +880 0 obj +<< +/Length 8045 +>> +stream +0 g 0 G +0 g 0 G +BT +/F59 14.3462 Tf 99.895 705.784 Td [(1)-1000(Introduction)]TJ/F62 9.9626 Tf 0 -22.913 Td [(The)-272(PSBLAS)-271(library)111(,)-277(developed)-272(with)-272(t)1(he)-272(aim)-272(to)-271(facilitate)-272(the)-272(parallelization)-271(of)]TJ 0 -11.955 Td [(computationally)-348(intensive)-347(scienti\002c)-348(applications,)-372(is)-347(designed)-348(to)-348(addr)18(ess)-347(par)18(-)]TJ 0 -11.955 Td [(allel)-282(implementation)-283(of)-282(iterative)-282(solvers)-283(for)-282(sparse)-282(linear)-283(systems)-282(thr)18(ough)-282(the)]TJ 0 -11.955 Td [(distributed)-232(memory)-232(paradigm.)-304(It)-232(includes)-233(r)18(outines)-232(for)-232(multiplying)-232(sparse)-232(ma-)]TJ 0 -11.955 Td [(trices)-211(by)-211(dense)-211(matrices,)-219(solving)-211(block)-211(diagonal)-211(systems)-211(with)-211(triangular)-211(diago-)]TJ 0 -11.956 Td [(nal)-229(entries,)-233(pr)18(epr)18(ocessing)-228(sparse)-229(matrices,)-233(and)-228(contains)-229(additional)-229(r)18(outines)-228(for)]TJ 0 -11.955 Td [(dense)-292(matrix)-292(operations.)-436(The)-292(curr)18(ent)-292(implementation)-292(of)-292(PSBLAS)-292(addr)18(esses)-292(a)]TJ 0 -11.955 Td [(distributed)-250(memory)-250(execution)-250(model)-250(operating)-250(with)-250(message)-250(passing.)]TJ 14.944 -12.064 Td [(The)-267(PSBLAS)-267(library)-268(version)-267(3)-267(is)-267(implemented)-267(in)-267(the)-268(Fortran)-267(2003)-267([)]TJ +1 0 0 rg 1 0 0 RG + [(16)]TJ +0 g 0 G + [(])-267(pr)18(o-)]TJ -14.944 -11.955 Td [(gramming)-278(language,)-284(with)-277(r)18(euse)-278(and/or)-278(adaptation)-277(of)-278(existing)-277(Fortran)-278(77)-277(and)]TJ 0 -11.955 Td [(Fortran)-250(95)-250(softwar)18(e,)-250(plus)-250(a)-250(handful)-250(of)-250(C)-250(r)18(outines.)]TJ 14.944 -12.064 Td [(The)-391(use)-392(of)-391(Fortran)-392(2003)-391(of)18(fers)-392(a)-391(number)-391(of)-392(advantages)-391(over)-392(Fortran)-391(95,)]TJ -14.944 -11.955 Td [(mostly)-385(in)-385(the)-385(handling)-385(of)-385(r)18(equir)18(ements)-385(for)-385(evolution)-385(and)-385(adaptation)-385(of)-385(the)]TJ 0 -11.956 Td [(library)-431(to)-432(new)-431(computing)-432(ar)18(chitectur)18(es)-431(and)-432(integration)-431(of)-432(new)-431(algorithms.)]TJ 0 -11.955 Td [(For)-365(a)-365(detail)1(ed)-365(discussion)-365(of)-365(our)-364(design)-365(see)-365([)]TJ +1 0 0 rg 1 0 0 RG + [(10)]TJ +0 g 0 G + [(];)-422(other)-365(works)-364(discussing)-365(ad-)]TJ 0 -11.955 Td [(vanced)-361(pr)18(ogramming)-362(in)-361(Fortran)-362(2003)-361(include)-362([)]TJ +1 0 0 rg 1 0 0 RG + [(20)]TJ +0 g 0 G + [(,)]TJ +1 0 0 rg 1 0 0 RG + [-361(18)]TJ +0 g 0 G + [(];)-417(suf)18(\002cient)-362(support)-361(for)]TJ 0 -11.955 Td [(Fortran)-314(2003)-314(is)-313(now)-314(available)-314(fr)18(om)-314(many)-313(compilers,)-330(including)-314(the)-313(GNU)-314(For)18(-)]TJ 0 -11.955 Td [(tran)-250(compiler)-250(fr)18(om)-250(the)-250(Fr)18(ee)-250(Softwar)18(e)-250(Foundation)-250(\050as)-250(of)-250(version)-250(4.8\051.)]TJ 14.944 -12.064 Td [(Pr)18(evious)-311(appr)18(oaches)-312(have)-311(been)-311(based)-311(on)-312(mixing)-311(Fortran)-311(95,)-327(with)-311(its)-311(sup-)]TJ -14.944 -11.955 Td [(port)-249(for)-249(object-based)-249(design,)-249(with)-249(other)-249(languages;)-249(these)-249(have)-249(been)-249(advocated)]TJ 0 -11.956 Td [(by)-346(a)-346(number)-346(of)-347(authors,)-370(e.g.)-346([)]TJ +1 0 0 rg 1 0 0 RG + [(15)]TJ +0 g 0 G + [(].)-598(Mor)18(eover)74(,)-371(the)-346(Fortran)-346(95)-346(facilities)-346(for)-346(dy-)]TJ 0 -11.955 Td [(namic)-411(memory)-410(management)-411(and)-410(interface)-411(overloading)-410(gr)18(eatly)-411(enhance)-410(the)]TJ 0 -11.955 Td [(usability)-397(of)-398(the)-397(PSBLAS)-398(subr)18(outines.)-752(In)-398(this)-397(way)111(,)-434(the)-398(library)-397(can)-398(take)-397(car)18(e)]TJ 0 -11.955 Td [(of)-267(r)8(untime)-266(memory)-267(r)18(equir)18(ements)-266(that)-267(ar)18(e)-266(quite)-267(dif)18(\002cult)-267(or)-266(even)-267(impossible)-266(to)]TJ 0 -11.955 Td [(pr)18(edict)-250(at)-250(implementation)-250(or)-250(compilation)-250(time.)]TJ 14.944 -12.064 Td [(The)-249(pr)18(esentation)-250(of)-249(the)-250(PSBLAS)-249(library)-249(follows)-250(the)-249(general)-249(str)8(uctur)18(e)-250(of)-249(the)]TJ -14.944 -11.955 Td [(pr)18(oposal)-207(for)-206(serial)-207(Sparse)-207(BLAS)-207([)]TJ +1 0 0 rg 1 0 0 RG + [(7)]TJ +0 g 0 G + [(,)]TJ +1 0 0 rg 1 0 0 RG + [-206(8)]TJ +0 g 0 G + [(],)-216(which)-206(in)-207(its)-207(turn)-206(is)-207(based)-207(on)-207(t)1(he)-207(pr)18(oposal)]TJ 0 -11.956 Td [(for)-250(BLAS)-250(on)-250(dense)-250(matrices)-250([)]TJ +1 0 0 rg 1 0 0 RG + [(14)]TJ +0 g 0 G + [(,)]TJ +1 0 0 rg 1 0 0 RG + [-250(4)]TJ +0 g 0 G + [(,)]TJ +1 0 0 rg 1 0 0 RG + [-250(5)]TJ +0 g 0 G + [(].)]TJ 14.944 -12.063 Td [(The)-297(applicability)-297(of)-298(sparse)-297(iterative)-297(solvers)-297(to)-297(many)-298(dif)18(fer)18(ent)-297(ar)18(eas)-297(causes)]TJ -14.944 -11.956 Td [(some)-190(terminology)-190(pr)18(oblems)-190(because)-190(the)-190(same)-190(concept)-190(may)-190(be)-190(denoted)-190(thr)18(ough)]TJ 0 -11.955 Td [(dif)18(fer)18(ent)-271(names)-271(depending)-272(on)-271(the)-271(application)-271(ar)18(ea.)-374(The)-271(PSBLAS)-271(featur)18(es)-271(pr)18(e-)]TJ 0 -11.955 Td [(sented)-332(in)-333(this)-332(document)-332(will)-333(be)-332(discussed)-332(r)18(eferring)-333(to)-332(a)-333(\002ni)1(te)-333(dif)18(fer)18(ence)-332(dis-)]TJ 0 -11.955 Td [(cr)18(etization)-284(of)-285(a)-284(Partial)-285(Dif)18(fer)18(ential)-284(Equation)-284(\050PDE\051.)-285(However)74(,)-293(the)-284(scope)-285(of)-284(the)]TJ 0 -11.955 Td [(library)-283(is)-283(wider)-283(than)-284(that:)-376(for)-283(example,)-291(it)-283(can)-284(be)-283(applied)-283(to)-283(\002nite)-283(element)-283(dis-)]TJ 0 -11.956 Td [(cr)18(etizations)-267(of)-267(PDEs,)-271(and)-267(even)-266(to)-267(dif)18(fer)18(ent)-267(classes)-267(of)-267(pr)18(oblems)-267(such)-266(as)-267(nonlin-)]TJ 0 -11.955 Td [(ear)-250(optimization,)-250(for)-250(example)-250(in)-250(optimal)-250(contr)18(ol)-250(pr)18(oblems.)]TJ 14.944 -12.064 Td [(The)-383(design)-383(of)-383(a)-383(solver)-383(for)-384(sparse)-383(linear)-383(systems)-383(is)-383(driven)-383(by)-383(many)-383(con-)]TJ -14.944 -11.955 Td [(\003icting)-271(objectives,)-277(such)-272(as)-271(limiting)-271(occupation)-272(of)-271(storage)-271(r)18(esour)18(ces,)-277(exploiting)]TJ 0 -11.955 Td [(r)18(egularities)-274(in)-274(the)-275(input)-274(data,)-280(exploiting)-274(har)18(dwar)18(e)-275(characteristi)1(cs)-275(of)-274(the)-274(paral-)]TJ 0 -11.955 Td [(lel)-350(platform.)-610(T)92(o)-350(achieve)-350(an)-350(optimal)-350(communication)-350(to)-350(computation)-350(ratio)-350(on)]TJ 0 -11.955 Td [(distributed)-379(memory)-378(machines)-379(it)-378(is)-379(essential)-379(to)-378(keep)-379(the)]TJ/F60 9.9626 Tf 256.501 0 Td [(data)-379(locality)]TJ/F62 9.9626 Tf 54.198 0 Td [(as)-379(high)]TJ -310.699 -11.956 Td [(as)-315(possible;)-346(this)-315(can)-314(be)-315(done)-315(thr)18(ough)-314(an)-315(appr)18(opriate)-314(data)-315(allocation)-314(strategy)111(.)]TJ 0 -11.955 Td [(The)-323(choice)-323(of)-324(the)-323(pr)18(econditioner)-323(is)-323(another)-323(very)-324(important)-323(factor)-323(that)-323(af)18(fects)]TJ 0 -11.955 Td [(ef)18(\002ciency)-300(of)-300(the)-300(im)1(plemented)-300(application.)-460(Optimal)-300(data)-299(distribution)-300(r)18(equir)18(e-)]TJ 0 -11.955 Td [(ments)-300(for)-299(a)-300(given)-300(pr)18(econditioner)-299(may)-300(con\003ict)-300(with)-300(distribution)-299(r)18(equir)18(ements)]TJ 0 -11.955 Td [(of)-356(the)-356(r)18(est)-356(of)-357(the)-356(solver)74(.)-628(Finding)-356(the)-357(o)1(ptimal)-357(trade-of)18(f)-356(may)-356(be)-356(very)-356(dif)18(\002cult)]TJ 0 -11.955 Td [(because)-292(it)-291(is)-292(application)-291(dependent.)-435(Possible)-292(solutions)-291(to)-292(these)-292(pr)18(oblems)-291(and)]TJ 0 -11.956 Td [(other)-342(important)-342(inputs)-342(to)-342(the)-342(development)-342(of)-341(the)-342(PSBLAS)-342(softwar)18(e)-342(package)]TJ +0 g 0 G + 169.365 -29.887 Td [(1)]TJ +0 g 0 G +ET + +endstream +endobj +899 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 341.676 335.51 353.735] -/A << /S /GoTo /D (subsection.7.13) >> +/Length 5269 >> -% 725 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F62 9.9626 Tf 150.705 706.129 Td [(have)-292(come)-291(fr)18(om)-292(an)-292(established)-291(experience)-292(in)-292(applying)-291(the)-292(PSBLAS)-291(solvers)-292(to)]TJ 0 -11.955 Td [(computational)-250(\003uid)-250(dynamics)-250(applications.)]TJ/F59 14.3462 Tf 0 -33.474 Td [(2)-1000(General)-250(overview)]TJ/F62 9.9626 Tf 0 -22.702 Td [(The)-190(PSBLAS)-190(library)-190(is)-190(designed)-190(to)-190(handle)-190(the)-190(implementation)-190(of)-190(iterative)-190(solvers)]TJ 0 -11.955 Td [(for)-275(sparse)-275(linear)-275(systems)-275(on)-275(distributed)-275(memory)-274(parallel)-275(computers.)-385(The)-275(sys-)]TJ 0 -11.955 Td [(tem)-307(coef)18(\002cient)-308(matrix)]TJ/F60 9.9626 Tf 100.571 0 Td [(A)]TJ/F62 9.9626 Tf 10.381 0 Td [(must)-307(be)-308(squar)18(e;)-336(it)-308(may)-307(be)-308(r)18(eal)-307(or)-307(complex,)-322(nonsym-)]TJ -110.952 -11.955 Td [(metric,)-301(and)-291(its)-291(sparsity)-291(pattern)-291(needs)-291(not)-291(to)-291(be)-291(symmetric.)-433(The)-291(serial)-291(compu-)]TJ 0 -11.955 Td [(tation)-240(parts)-239(ar)18(e)-240(based)-240(on)-239(the)-240(serial)-240(sparse)-239(BLAS,)-240(so)-240(that)-239(any)-240(extension)-239(made)-240(to)]TJ 0 -11.956 Td [(the)-258(data)-258(str)8(uctur)18(es)-259(of)-258(the)-258(serial)-258(kernels)-258(is)-259(available)-258(to)-258(the)-258(parallel)-258(version.)-335(The)]TJ 0 -11.955 Td [(overall)-294(design)-294(and)-294(parallelization)-294(strategy)-294(have)-294(been)-294(in\003uenced)-294(by)-294(the)-294(str)8(uc-)]TJ 0 -11.955 Td [(tur)18(e)-306(of)-307(the)-306(ScaLAP)92(ACK)-306(parallel)-307(library)111(.)-479(The)-306(layer)18(ed)-306(str)8(uctur)18(e)-306(of)-307(the)-306(PSBLAS)]TJ 0 -11.955 Td [(library)-349(is)-349(shown)-348(in)-349(\002gur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-349(1)]TJ +0 g 0 G + [(;)-398(lower)-349(layers)-349(of)-349(the)-349(library)-349(in)1(dicate)-349(an)-349(encapsu-)]TJ 0 -11.955 Td [(lation)-314(r)18(elationship)-314(with)-313(upper)-314(layers.)-502(The)-314(ongoing)-314(discussion)-313(focuses)-314(on)-314(the)]TJ 0 -11.955 Td [(Fortran)-244(2003)-244(layer)-245(immediately)-244(below)-244(the)-244(application)-244(layer)74(.)-308(The)-245(serial)-244(parts)-244(of)]TJ 0 -11.956 Td [(the)-230(computation)-230(on)-230(each)-230(pr)18(ocess)-230(ar)18(e)-230(executed)-230(thr)18(ough)-230(calls)-230(to)-230(the)-230(serial)-230(sparse)]TJ 0 -11.955 Td [(BLAS)-307(subr)18(outines.)-482(In)-307(a)-307(similar)-308(way)111(,)-321(the)-307(inter)18(-pr)18(ocess)-308(message)-307(exchanges)-307(ar)18(e)]TJ 0 -11.955 Td [(encapsulated)-244(in)-243(an)-244(applicaiton)-244(layer)-243(that)-244(has)-244(been)-243(str)18(ongly)-244(inspir)18(ed)-244(by)-243(the)-244(Ba-)]TJ 0 -11.955 Td [(sic)-314(Linear)-313(Algebra)-314(Communication)-313(Subr)18(outines)-314(\050BLACS\051)-314(library)-313([)]TJ +1 0 0 rg 1 0 0 RG + [(6)]TJ +0 g 0 G + [(].)-501(Usually)]TJ 0 -11.955 Td [(ther)18(e)-315(is)-314(no)-315(need)-315(to)-314(deal)-315(dir)18(ectly)-314(with)-315(MPI;)-315(however)74(,)-330(in)-315(some)-315(cases,)-331(MPI)-314(r)18(ou-)]TJ 0 -11.955 Td [(tines)-219(ar)18(e)-219(used)-218(dir)18(ectly)-219(to)-219(impr)18(ove)-219(ef)18(\002ciency)111(.)-299(For)-219(further)-219(details)-219(on)-218(our)-219(commu-)]TJ 0 -11.956 Td [(nication)-250(layer)-250(see)-250(Sec.)]TJ +0 0 1 rg 0 0 1 RG + [-250(7)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +ET +1 0 0 1 258.536 281.98 cm +q +.65 0 0 .65 0 0 cm +q +1 0 0 1 0 0 cm +/Im2 Do +Q +Q +0 g 0 G +1 0 0 1 -258.536 -281.98 cm +BT +/F62 9.9626 Tf 216.385 250.1 Td [(Figur)18(e)-250(1:)-310(PSBLAS)-250(library)-250(components)-250(hierar)18(chy)111(.)]TJ +0 g 0 G +0 g 0 G + -50.736 -22.178 Td [(The)-370(type)-369(of)-370(linear)-369(system)-370(matrices)-370(that)-369(we)-370(addr)18(ess)-369(typically)-370(arise)-370(in)-369(the)]TJ -14.944 -11.955 Td [(numerical)-260(solution)-261(of)-260(PDEs;)-266(in)-260(such)-260(a)-261(context,)-263(it)-260(is)-261(necessary)-260(to)-260(pay)-261(special)-260(at-)]TJ 0 -11.955 Td [(tention)-297(to)-298(the)-297(str)8(uctur)18(e)-298(of)-297(the)-298(pr)18(oblem)-297(fr)18(om)-298(which)-297(the)-298(application)-297(originates.)]TJ 0 -11.955 Td [(The)-277(nonzer)18(o)-276(pattern)-277(of)-277(a)-276(matrix)-277(arising)-277(fr)18(om)-276(the)-277(discr)18(etization)-276(of)-277(a)-277(PDE)-276(is)-277(in-)]TJ 0 -11.956 Td [(\003uenced)-232(by)-232(various)-231(factors,)-236(such)-232(as)-232(the)-231(shape)-232(of)-232(the)-232(domain,)-235(the)-232(discr)18(etization)]TJ 0 -11.955 Td [(strategy)111(,)-313(and)-301(the)-300(equation/unknown)-301(or)18(dering.)-461(The)-301(matrix)-301(it)1(self)-301(can)-301(be)-300(inter)18(-)]TJ 0 -11.955 Td [(pr)18(eted)-291(as)-291(the)-291(adjacency)-291(matrix)-292(of)-291(the)-291(graph)-291(associated)-291(with)-291(the)-291(discr)18(etization)]TJ 0 -11.955 Td [(mesh.)]TJ 14.944 -11.955 Td [(The)-308(distribution)-308(of)-308(the)-309(coef)18(\002cient)-308(matrix)-308(for)-308(the)-308(linear)-309(system)-308(is)-308(based)-308(on)]TJ -14.944 -11.955 Td [(the)-314(\223owner)-314(computes\224)-314(r)8(ule:)-438(the)-314(variable)-314(associated)-314(to)-314(each)-314(mesh)-314(point)-314(is)-314(as-)]TJ +0 g 0 G + 169.365 -29.888 Td [(2)]TJ +0 g 0 G +ET + +endstream +endobj +895 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 329.581 334.742 341.641] -/A << /S /GoTo /D (subsection.7.14) >> +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (./figures/psblas.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 901 0 R +/BBox [0 0 197 215] +/Resources << +/ProcSet [ /PDF /Text ] +/ExtGState << +/R7 902 0 R +>>/Font << /R8 903 0 R>> >> -% 726 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 317.487 305.523 329.547] -/A << /S /GoTo /D (subsection.7.15) >> +/Length 898 +/Filter /FlateDecode >> -% 727 0 obj +stream +xœµVM7 ½ëWèÖ4€Y‘ú>&@[HMl ‡¢câu·;{þý’#QvvÝKÐÂÏ{#=RüÐð«u€ÖɯÿOóÓÇl÷g³°öã¯ýá´7_ ¶çþ7ìÛ // ¢Ãl7÷¦É E„ŒŽ,E(%’ÝÌ«õîô°íúq{:ï~Üüe0ƒsµØÀíæ³yõöý›µ¼ûyc>Ÿ(Ú¿M, +¯ ¼”ìá3›"dQžÙ7_Û³ËXm2‚0— +¼î(:HhU3vܪÅ“Aª f6è=A΃AŸÄ!9`³Ç +PwæAt_UOÏ¡OæO³æl8f¢ãPHÄY¥æ«‡åÕÌ+k(*«¦XÁI䓉uyQ¥ +Åá ¢ï¹Éø*Þcv ( ñÌÿôÚÇJÊBt«FpÍ©vvË×VþÕ‘” +„rÓ‘û×&@^ +éÿ2!)Õš\‘Œ÷¼ža®8Î7L¤ÂõŸÇ )]aôà¥`;¡vç ãp½ªba=WY³¨Hj.8‰µ2Rt%ùÁ` R=EÝ 4¢{4_póXÕFwt{Š[Íf.óÈv¾†Ê‘yÎÌÆGÞ§Áxq€Èúš‹ú‚c—++SJëgÅ¡Br5XUäÛFZYMv8™\øH”˜M!‚¢%rl9ª§¸Ù›Œbõh¾0ÝgÔ3©EÅ-:º‹¼i¾>9ÎßÑò…/aëUËv±Òu‹ß`ú®›Lk„¡ÀtÕ—ßö¼E÷õ¶Ž¿áæsù—¼0ȽýßI +~·œíÕÏãÎh§ÀP‰ó+‰I‰Ë`ä8Äwsâ~L¥¼–U9‰–2ó¢ß”ÇM®F‘ +ÁóÕÞ²þ-ÃÉu1 ƒéuÄóŠ +}¦öqëXË|Vb´A—Ó6QƒŠY¡8Õ†% +Ûæ“Ž=…åËE²A5) +í}óiV<\îrãDÝ âÖß7x¤U +«Ͻ'ƒÇ›ÇÇùaÚ>=|9Êh±Büuæ¹£$îËLËàq·–ÑÃÊŸ5×k^Þ½;>íN÷Ûi™ZŠ\V+9D£­8îNËLÓG™÷×»~0+¾’”àŠ'¢ˆ±íúmw>o÷;{·=ŸŽûEý—»a¥ÃѲîîÝâ8SË4Â%ÕÇ¥_¾œžNÛ#OαéƒùüÐ +endstream +endobj +905 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 305.393 232.786 317.453] -/A << /S /GoTo /D (subsection.7.16) >> +/Filter /FlateDecode +/Subtype /Type1C +/Length 2887 >> -% 728 0 obj +stream +xœe–iXSWÇo É=Z¤-iʦ µ­m]‘Ö*ÕVYÔZQU (! KBI ,A8,!, Ö„M‚T¥*(¶ÖQ¬ ÖœÇn:Jg9—¹|˜‹vž~˜/÷¹÷žåyß÷ÿ?¿÷Ð0‡%Fs ¤ðÓ׋SbE‹ßë±b ±’IÑü?æw0VbôåБšV82ÑÔ«Èò2*x£Óhr}›¿X’•&HHÌð|÷ppø{k×®ûóÏ&ÏãYÿñ ৠDž«©_(–¤ðEÛ<ý©ÙB¡à„g‚0K’˜îÇ[\+ä'{î‰Xæù®ÿ{žÞ^^›ÖSÍ)Ç¥éžÏ#÷< öñ ô æ'H…±iÿ?‚a˜›¯ÈO,IÛž±76ðÄ~~|P‚ Dèéåý>†­ÂbobAØ!,{ Åck0?l=€mÀvc{°MاØgØØ~Œ†½Š±°×0wªx˜ÆÇîѲiW—ì]ÒA~Êá‡}¿2ü&&ÎŒg^Æ·g YúêÒK-=½ ,Ó'æã¡E oXhóafö±rMMⲌ¸îšÉ$ãrö‘«s²£‹Ýä(ÊŒGgm†ò:‘{-e sp­A¢O…*7˜¦È+rT…/¨Q áȆic8'¡‰p¿DûvÌÐë‰UìÊòšÒZlÕÊ(.YŠÃà\UDP¢DQ¡4@;@§ðÏGš =œkîå’|ª‘‹“âLx”.ÇÏ”…ÿ»ì{T¨Ø·‡ó/(‹Êä†f»9Ío„fy1Û'7;#ÆÒÎ}„pVRÃü6Ë$Çg´†\è ‚pè«Vo= XIrôW|N5õ²w[Fàçp8k@Ø‘b•>«36Õ´6–°­ÙÔ7莼'H_Žù˜ý7xVkWRËÇÓ­'ÝáaY܉Œä¬ãù{!ˆRUwsÑ e“¾ÚÄf²„ŸŒöþ8Œ·žKUEc#¼m´ŽPÝtTNÄ°É×½Ö’ÒãçÕÈ9?ýâ —OÈ׸j1{æÊZr%É8¶gg\|뀌+˾§À“ÑkSj/2¥Aј…R4Ý„ÓUç»3HúÈ…¥­&V²«Kõ°‚SzÍîB"Ãssƒ +kµiqÖ±F|g©º~ Ð&¼7©U3Zör@o£Wv=^ÂK;ÌãÞÇ ËËÕ¡ +7´f'»ó²ýìs;H@Ò£vú ¶wrœÐ#Jã-´{3t=áή+¯.5@ÐVÍã’å8Œ,(Ø—CiW`Â÷”+Lp )Þ}æ¼µ²Z«©åÔ«j ´šl=2‹ V”Åý ×–…IÔAr7'â hF“ƒ¨ºŸRôïs.¬D"e³‘2ÏÙ–¥Üù6JcÚt•VZÁD جlØÚ˜'Ñ›Œ“u_Žß)4ªà.°° ‡[sÕ;´”U{Ìø'ª:x z”†€á¦iα,êfç ×Ï-Ö:ôtƒ]„V0r™ÅÅy%%EЭjÊru€ÕP%•V¤zlŽðé>1ÍHêÍlH‡B7^²8B 46Ë9²¶¼õ8È&½ŒL}MEUV`5¬+n¡Äh(´Y‹=î5që|zÏA;—ľ×+Û Õm Ëvv¼SàÝBÕ¸K3F8v;‘–Ü?ãšEUD›us5Ìš’òBNA^a^†]X³¹…Zwx²´¸¼°n~A^dn錹r®¿u¬‡£®•¥)4R说~ÉE§qÖì3梌æy3 ástêÓØp§:gËIªTç͸OyvüÔ©ù¦Î8SÌäy9>ûâ,9Ašñà µÕùò÷è݇.¬âÀ×l…*G«„ £ n˜‹Âq8¦íP¶Jí1æ ÖûFfX2[Z›,Í%º=·¸º¤ +êµ»yàœUt˜s'7ìWäÇð¥™*L¬ˆÀaÞäð`ÓØ$‡e ×5Ê=NÁ&Co?5Dnüž …ÚÕ7 'a«Øøx‡wºQ¬öû ]£\K¾L™vÂÜÕßT§5rêS šZÌ-v^ûÁýa’£©ÜT^~BÉ6à“üyƒ ýrsüño4QFÌ­…wºœðhh@çaÿÛÊF4 +´ÇÐ0»½¸¡¤NË5gZ¯vÛ'á×pHÕ/é81üm=…Œ&«Ò7¿FýŸsaK¾fGÁ‹ân/W.š%$dV½ÞŠ<®ykMP$‰gªJË2ž3W4ŽÞ· —6”tZg£Ùn¡ìûVH"\Ù§?Á3¡²G»‡þšÜÝ‹ºhLø^]~Í"KÎ""Œ-ÉÈ‹›3l-ͶŒÕô¾PÙ¥‚Xœ§!Õ3Ö +"d~+[Æ,ÊÈÍU@ TT(ª«§.>¾*Ù#òI¢DaV, ‡Û†CCÐmþØñ6~µZ'ƒéàÓÈ£~Þ—S$'ƒÙI&2š™¬@s©¡ÎäQ ŠÌÔNÊþü„?~ðûw¡×I×)Øax\ê¿62$Oèåt%×¥ÖQíâyú´_gèæy:»’Y[j(­‚ »FEõÎTV©µT®¹&<°"§^Ä/xQY¸‚¯Ù­psBå”+TíóÎí´Ö§Èø”ŽôTj¡0VŸx(„Oâ\ +IZ¹t4¤w!}^€gì½—ú€h9@±^è Ò…s2•=3B2H>ÉçynÞÌû7ŠC‰#ˆþhñÚï-whèÇGtBŒV²Õºø`uÞ1èFæ3ÑvðøçÓ÷à´ÛoÝ34\z"ž“,P&+š´®¿Ú»oB03¼å㣶xsÉÝä!FáŽ/†m'~¼êŒn?àßvaÝDëÑ÷ì+°¥¸5ïšôt0Ü ¶æ+ÉÔ·ÆsRz…t¥J–xFzëþƒöÁQîè`û¼/dŠº2›åž™j@7GÛú.º?Üwyc$O‘œÀI‘(S¥ME®ƒßŒô| Áõ^`‚2!-›’"VîIN™"Ì‚B-ÌÎe³/uV9:Î6:.Ç°ÿMɪH +endstream +endobj +911 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 293.299 242.519 305.359] -/A << /S /GoTo /D (subsection.7.17) >> +/Length 8252 >> -% 729 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F62 9.9626 Tf 99.895 706.129 Td [(signed)-263(to)-264(a)-263(pr)18(ocess)-263(that)-263(will)-264(own)-263(the)-263(corr)18(esponding)-263(r)18(ow)-263(in)-264(the)-263(coef)18(\002cient)-263(ma-)]TJ 0 -11.955 Td [(trix)-406(and)-406(will)-406(carry)-405(out)-406(all)-406(r)18(elated)-406(computations.)-778(This)-406(all)1(ocation)-406(strategy)-406(is)]TJ 0 -11.955 Td [(equivalent)-353(to)-353(a)-353(partition)-353(of)-353(the)-353(discr)18(etization)-353(mesh)-353(in)1(to)]TJ/F60 9.9626 Tf 253.543 0 Td [(sub-domains)]TJ/F62 9.9626 Tf 51.107 0 Td [(.)-619(Our)-353(li-)]TJ -304.65 -11.956 Td [(brary)-220(supports)-220(any)-220(distribution)-220(that)-220(keeps)-220(together)-220(the)-220(coef)18(\002cients)-220(of)-220(each)-220(ma-)]TJ 0 -11.955 Td [(trix)-244(r)18(ow;)-245(ther)18(e)-244(ar)18(e)-243(no)-244(other)-243(constraints)-244(on)-243(the)-243(variable)-244(assignment.)-308(This)-243(choice)]TJ 0 -11.955 Td [(is)-324(consistent)-324(with)-324(simple)-324(data)-325(distributions)-324(such)-324(as)]TJ/F67 9.9626 Tf 232.237 0 Td [(CYCLIC\050N\051)]TJ/F62 9.9626 Tf 50.302 0 Td [(and)]TJ/F67 9.9626 Tf 20.095 0 Td [(BLOCK)]TJ/F62 9.9626 Tf 26.152 0 Td [(,)-324(as)]TJ -328.786 -11.955 Td [(well)-310(as)-309(completely)-310(arbitrary)-310(assignments)-309(of)-310(equation)-310(indices)-309(to)-310(pr)18(ocesses.)-489(In)]TJ 0 -11.955 Td [(particular)-250(it)-250(is)-251(consistent)-250(with)-250(the)-250(usage)-250(of)-251(graph)-250(partitioning)-250(tools)-250(commonly)]TJ 0 -11.956 Td [(available)-333(in)-332(the)-333(literatur)18(e,)-353(e.g.)-558(METIS)-332([)]TJ +1 0 0 rg 1 0 0 RG + [(13)]TJ +0 g 0 G + [(].)-558(Dense)-333(ve)1(ctors)-333(conform)-333(to)-332(sparse)]TJ 0 -11.955 Td [(matrices,)-257(that)-255(is,)-257(the)-255(entries)-255(of)-256(a)-255(vector)-255(follow)-256(the)-255(same)-255(distribution)-256(of)-255(the)-255(ma-)]TJ 0 -11.955 Td [(trix)-250(r)18(ows.)]TJ 14.944 -12.648 Td [(W)92(e)-343(assume)-344(that)-343(the)-344(sparse)-343(matrix)-343(is)-344(built)-343(in)-344(parallel,)-366(wher)18(e)-344(each)-343(pr)18(ocess)]TJ -14.944 -11.955 Td [(generates)-254(its)-254(own)-255(portion.)-322(W)92(e)-255(never)-254(r)18(equir)18(e)-254(that)-254(the)-255(entir)18(e)-254(matrix)-254(be)-254(available)]TJ 0 -11.955 Td [(on)-288(a)-288(single)-288(node.)-423(However)74(,)-298(it)-287(is)-288(possible)-288(to)-288(hold)-288(the)-288(entir)18(e)-288(matrix)-287(in)-288(one)-288(pr)18(o-)]TJ 0 -11.955 Td [(cess)-241(and)-242(distribute)-241(it)-241(explicitly)]TJ +0 0 1 rg 0 0 1 RG +/F62 7.5716 Tf 133.807 3.616 Td [(1)]TJ +0 g 0 G +/F62 9.9626 Tf 4.284 -3.616 Td [(,)-243(even)-241(though)-242(the)-241(r)18(esulting)-241(memory)-241(bottleneck)]TJ -138.091 -11.955 Td [(would)-250(make)-250(this)-250(option)-250(unattractive)-250(in)-250(most)-250(cases.)]TJ/F59 11.9552 Tf 0 -33.074 Td [(2.1)-1000(Basic)-250(Nomenclature)]TJ/F62 9.9626 Tf 0 -20.306 Td [(Our)-301(computational)-301(model)-301(implies)-301(that)-301(the)-301(data)-301(al)1(location)-301(on)-301(the)-301(parallel)-301(dis-)]TJ 0 -11.955 Td [(tributed)-370(memory)-369(machine)-370(is)-370(guided)-370(by)-369(the)-370(str)8(uctur)18(e)-370(of)-370(the)-369(physical)-370(model,)]TJ 0 -11.955 Td [(and)-250(speci\002cally)-250(by)-250(the)-250(discr)18(etization)-250(mesh)-250(of)-250(the)-250(PDE.)]TJ 14.944 -12.648 Td [(Each)-400(point)-400(of)-400(the)-399(discr)18(etization)-400(mesh)-400(will)-400(have)-400(\050at)-400(least)1(\051)-400(one)-400(associated)]TJ -14.944 -11.955 Td [(equation/variable,)-416(and)-384(ther)18(efor)18(e)-383(one)-383(index.)-710(W)92(e)-383(say)-383(that)-384(point)]TJ/F60 9.9626 Tf 289.765 0 Td [(i)-403(depends)]TJ/F62 9.9626 Tf 42.709 0 Td [(on)]TJ -332.474 -11.955 Td [(point)]TJ/F60 9.9626 Tf 26.955 0 Td [(j)]TJ/F62 9.9626 Tf 6.004 0 Td [(if)-312(the)-312(equation)-312(for)-312(a)-312(variable)-313(associated)-312(with)]TJ/F60 9.9626 Tf 202.502 0 Td [(i)]TJ/F62 9.9626 Tf 6.074 0 Td [(contains)-312(a)-312(term)-312(in)]TJ/F60 9.9626 Tf 84.153 0 Td [(j)]TJ/F62 9.9626 Tf 2.894 0 Td [(,)-328(or)]TJ -328.582 -11.955 Td [(equivalently)-291(if)]TJ/F60 9.9626 Tf 67.321 0 Td [(a)]TJ/F60 7.5716 Tf 4.59 -1.96 Td [(i)-67(j)]TJ/F91 10.3811 Tf 8.967 1.96 Td [(6)]TJ/F93 10.3811 Tf 0.249 0 Td [(=)]TJ/F62 9.9626 Tf 11.726 0 Td [(0.)-434(After)-292(the)-291(partition)-292(of)-291(the)-292(discr)18(etization)-291(mesh)-292(into)]TJ/F60 9.9626 Tf 233.514 0 Td [(sub-)]TJ -326.367 -11.955 Td [(domains)]TJ/F62 9.9626 Tf 37.559 0 Td [(assigned)-381(to)-381(the)-381(parallel)-381(pr)18(ocesses,)-413(we)-381(classify)-381(the)-381(points)-381(of)-381(a)-381(given)]TJ -37.559 -11.955 Td [(sub-domain)-250(as)-250(following.)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -22.003 Td [(Internal.)]TJ +0 g 0 G +/F62 9.9626 Tf 43.995 0 Td [(An)-359(internal)-359(poi)1(nt)-359(of)-359(a)-359(given)-359(domain)]TJ/F60 9.9626 Tf 168.65 0 Td [(depends)]TJ/F62 9.9626 Tf 35.684 0 Td [(only)-359(on)-359(points)-358(of)-359(the)]TJ -223.422 -11.955 Td [(same)-264(domain.)-351(If)-264(all)-264(points)-264(of)-264(a)-264(domain)-263(ar)18(e)-264(assigned)-264(to)-264(one)-264(pr)18(ocess,)-267(then)]TJ 0 -11.956 Td [(a)-196(computational)-196(step)-195(\050e.g.,)-207(a)-196(matrix-vector)-196(pr)18(oduct\051)-196(of)-195(the)-196(equations)-196(asso-)]TJ 0 -11.955 Td [(ciated)-214(with)-213(the)-214(internal)-214(points)-214(r)18(equir)18(es)-213(no)-214(data)-214(items)-214(fr)18(om)-213(other)-214(domains)]TJ 0 -11.955 Td [(and)-250(no)-250(communications.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.695 Td [(Boundary)92(.)]TJ +0 g 0 G +/F62 9.9626 Tf 51.397 0 Td [(A)-192(point)-191(of)-192(a)-192(given)-191(domain)-192(is)-192(a)-191(boundary)-192(point)-192(if)-191(it)]TJ/F60 9.9626 Tf 217.552 0 Td [(depends)]TJ/F62 9.9626 Tf 34.019 0 Td [(on)-192(points)]TJ -278.061 -11.955 Td [(belonging)-250(to)-250(other)-250(domains.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.696 Td [(Halo.)]TJ +0 g 0 G +/F62 9.9626 Tf 29.609 0 Td [(A)-389(halo)-389(point)-389(for)-389(a)-389(given)-389(domain)-389(i)1(s)-389(a)-389(point)-389(belonging)-389(to)-389(another)-389(do-)]TJ -4.702 -11.955 Td [(main)-267(such)-267(that)-266(ther)18(e)-267(is)-267(a)-267(boundary)-267(point)-266(which)]TJ/F60 9.9626 Tf 212.474 0 Td [(depends)]TJ/F62 9.9626 Tf 34.767 0 Td [(on)-267(it.)-360(Whenever)]TJ -247.241 -11.955 Td [(performing)-360(a)-361(computational)-360(step,)-388(such)-361(as)-360(a)-361(matrix-vector)-360(pr)18(oduct,)-388(the)]TJ 0 -11.955 Td [(values)-274(associated)-273(with)-274(halo)-274(points)-274(ar)18(e)-274(r)18(equested)-273(fr)18(om)-274(other)-274(domains.)-381(A)]TJ 0 -11.955 Td [(boundary)-259(point)-258(of)-259(a)-258(given)-259(domain)-258(is)-259(usually)-258(a)-259(halo)-258(point)-259(for)-259(some)-258(other)]TJ 0 -11.956 Td [(domain)]TJ +0 0 1 rg 0 0 1 RG +/F62 7.5716 Tf 34.002 3.617 Td [(2)]TJ +0 g 0 G +/F62 9.9626 Tf 4.284 -3.617 Td [(;)-238(ther)18(efor)18(e)-232(the)-233(car)18(dinality)-232(of)-232(the)-232(boundary)-233(p)1(oints)-233(set)-232(denotes)-232(the)]TJ -38.286 -11.955 Td [(amount)-250(of)-250(data)-250(sent)-250(to)-250(other)-250(domains.)]TJ +0 g 0 G +ET +q +1 0 0 1 99.895 168.389 cm +[]0 d 0 J 0.398 w 0 0 m 137.482 0 l S +Q +BT +/F62 5.9776 Tf 110.755 161.427 Td [(1)]TJ/F62 7.9701 Tf 3.487 -2.893 Td [(In)-250(our)-250(pr)18(ototype)-250(implementation)-250(we)-250(pr)18(ovide)-250(sample)-250(scatter/gather)-250(r)18(outines.)]TJ/F62 5.9776 Tf -3.487 -6.922 Td [(2)]TJ/F62 7.9701 Tf 3.487 -2.893 Td [(This)-401(is)-402(the)-401(normal)-402(situation)-401(when)-402(the)-401(pattern)-402(of)-401(the)-402(sparse)-401(matrix)-402(is)-401(symmetric,)-440(which)-401(is)]TJ -14.347 -9.464 Td [(equivalent)-358(to)-358(say)-358(that)-358(the)-357(interaction)-358(between)-358(two)-358(variables)-358(is)-358(r)18(ecipr)18(ocal.)-634(If)-357(the)-358(matrix)-358(pattern)]TJ 0 -9.465 Td [(is)-241(non-symmetric)-241(we)-242(may)-241(have)-241(one-way)-241(interactions,)-243(and)-241(these)-241(could)-241(cause)-242(a)-241(situation)-241(in)-241(which)-241(a)]TJ 0 -9.464 Td [(boundary)-250(point)-250(is)-250(not)-250(a)-250(halo)-250(point)-250(for)-250(its)-250(neighbour)74(.)]TJ +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 169.365 -29.888 Td [(3)]TJ +0 g 0 G +ET + +endstream +endobj +924 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 271.309 183.083 283.109] -/A << /S /GoTo /D (section.8) >> +/Length 4830 >> -% 730 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 150.705 706.129 Td [(Overlap.)]TJ +0 g 0 G +/F62 9.9626 Tf 44.553 0 Td [(An)-245(overlap)-244(point)-245(is)-245(a)-245(boundary)-244(point)-245(assigned)-245(to)-244(multiple)-245(domains.)]TJ -19.647 -11.955 Td [(Any)-204(operation)-204(that)-204(involves)-204(an)-204(overlap)-204(point)-204(has)-204(to)-204(be)-204(r)18(eplicated)-204(for)-204(each)]TJ 0 -11.955 Td [(assignment.)]TJ -24.906 -18.943 Td [(Overlap)-358(points)-359(do)-358(not)-358(usually)-359(exist)-358(in)-359(the)-358(basic)-358(data)-359(distributions;)-412(however)]TJ 0 -11.955 Td [(they)-325(ar)18(e)-326(a)-325(featur)18(e)-326(of)-325(Domain)-326(Decomposition)-325(Schwarz)-326(pr)18(econditioners)-325(which)]TJ 0 -11.956 Td [(ar)18(e)-250(the)-250(subject)-250(of)-250(r)18(elated)-250(r)18(esear)18(ch)-250(work)-250([)]TJ +1 0 0 rg 1 0 0 RG + [(3)]TJ +0 g 0 G + [(,)]TJ +1 0 0 rg 1 0 0 RG + [-250(2)]TJ +0 g 0 G + [(].)]TJ 14.944 -11.955 Td [(W)92(e)-225(denote)-225(the)-225(sets)-225(of)-225(internal,)-230(boundary)-225(and)-225(halo)-225(points)-225(for)-225(a)-225(given)-225(subdo-)]TJ -14.944 -11.955 Td [(main)-251(by)]TJ/F91 10.3811 Tf 38.66 0 Td [(I)]TJ/F62 9.9626 Tf 6.53 0 Td [(,)]TJ/F91 10.3811 Tf 5.125 0 Td [(B)]TJ/F62 9.9626 Tf 9.753 0 Td [(and)]TJ/F91 10.3811 Tf 19.497 0 Td [(H)]TJ/F62 9.9626 Tf 8.972 0 Td [(.)-314(Each)-252(subdomain)-251(is)-252(assigned)-251(to)-252(one)-251(pr)18(ocess;)-253(each)-251(pr)18(ocess)]TJ -88.537 -11.955 Td [(usually)-346(owns)-346(one)-346(su)1(bdomain,)-370(although)-346(the)-346(user)-346(may)-346(choose)-345(to)-346(assign)-346(mor)18(e)]TJ 0 -11.955 Td [(than)-302(one)-301(subdomain)-302(to)-301(a)-302(pr)18(ocess.)-465(If)-302(each)-301(pr)18(ocess)]TJ/F60 9.9626 Tf 222.767 0 Td [(i)]TJ/F62 9.9626 Tf 5.968 0 Td [(owns)-302(one)-301(subdomain,)-315(the)]TJ -228.735 -11.956 Td [(number)-221(of)-221(r)18(ows)-221(in)-221(the)-221(local)-221(sparse)-221(matrix)-221(is)]TJ/F91 10.3811 Tf 192.655 0 Td [(j)-24(I)]TJ/F60 7.5716 Tf 8.943 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F93 10.3811 Tf 4.799 0 Td [(+)]TJ/F91 10.3811 Tf 9.989 0 Td [(j)-24(B)]TJ/F60 7.5716 Tf 10.108 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.876 1.96 Td [(j)]TJ/F62 9.9626 Tf 3.003 0 Td [(,)-227(and)-221(the)-221(number)-221(of)-221(local)]TJ -235.248 -11.955 Td [(columns)-207(\050i.e.)-296(those)-207(for)-207(which)-207(ther)18(e)-208(exists)-207(at)-207(least)-207(one)-207(non-zer)18(o)-208(entry)-207(in)-207(the)-207(local)]TJ 0 -11.955 Td [(r)18(ows\051)-250(is)]TJ/F91 10.3811 Tf 37.275 0 Td [(j)-24(I)]TJ/F60 7.5716 Tf 8.943 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F93 10.3811 Tf 5.066 0 Td [(+)]TJ/F91 10.3811 Tf 10.255 0 Td [(j)-24(B)]TJ/F60 7.5716 Tf 10.109 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F93 10.3811 Tf 5.066 0 Td [(+)]TJ/F91 10.3811 Tf 10.256 0 Td [(j)-24(H)]TJ/F60 7.5716 Tf 12.051 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F62 9.9626 Tf 3.004 0 Td [(.)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +ET +1 0 0 1 222.462 541.675 cm +q +0 -1 1 0 0 0 cm +q +.65 0 0 .65 0 0 cm +q +1 0 0 1 0 0 cm +/Im3 Do +Q +Q +Q +0 g 0 G +1 0 0 1 -222.462 -541.675 cm +BT +/F62 9.9626 Tf 260.803 335.398 Td [(Figur)18(e)-250(2:)-310(Point)-250(class\002cation.)]TJ +0 g 0 G +0 g 0 G + -95.154 -23.688 Td [(This)-190(classi\002cation)-190(of)-190(mesh)-190(points)-190(guides)-190(the)-190(naming)-190(scheme)-190(that)-190(we)-190(adopted)]TJ -14.944 -11.956 Td [(in)-190(the)-190(library)-190(internals)-190(and)-190(in)-190(the)-190(data)-190(str)8(uctur)18(es.)-290(W)92(e)-190(explicitly)-190(note)-190(that)-190(\223Halo\224)]TJ 0 -11.955 Td [(points)-250(ar)18(e)-250(also)-250(often)-250(called)-250(\223ghost\224)-250(points)-250(in)-250(the)-250(literatur)18(e.)]TJ/F59 11.9552 Tf 0 -28.902 Td [(2.2)-1000(Library)-250(contents)]TJ/F62 9.9626 Tf 0 -18.964 Td [(The)-250(PSBLAS)-250(library)-250(consists)-250(of)-250(various)-250(classes)-250(of)-250(subr)18(outines:)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -18.943 Td [(Computational)-250(routines)]TJ +0 g 0 G +/F62 9.9626 Tf 113.723 0 Td [(comprising:)]TJ +0 g 0 G + -77.917 -19.434 Td [(\225)]TJ +0 g 0 G + [-500(Sparse)-250(matrix)-250(by)-250(dense)-250(matrix)-250(pr)18(oduct;)]TJ +0 g 0 G + 0 -15.449 Td [(\225)]TJ +0 g 0 G + [-500(Sparse)-250(triangular)-250(systems)-250(solution)-250(for)-250(block)-250(diagonal)-250(matrices;)]TJ +0 g 0 G + 0 -15.449 Td [(\225)]TJ +0 g 0 G + [-500(V)111(ector)-250(and)-250(matrix)-250(norms;)]TJ +0 g 0 G + 0 -15.449 Td [(\225)]TJ +0 g 0 G + [-500(Dense)-250(matrix)-250(sums;)]TJ +0 g 0 G + 0 -15.449 Td [(\225)]TJ +0 g 0 G + [-500(Dot)-250(pr)18(oducts.)]TJ +0 g 0 G +/F59 9.9626 Tf -35.806 -19.434 Td [(Communication)-250(routines)]TJ +0 g 0 G +/F62 9.9626 Tf 118.704 0 Td [(handling)-250(halo)-250(and)-250(overlap)-250(communications;)]TJ +0 g 0 G + 50.661 -29.888 Td [(4)]TJ +0 g 0 G +ET + +endstream +endobj +921 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 259.055 387.474 271.115] -/A << /S /GoTo /D (subsection.8.1) >> +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (./figures/points.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 927 0 R +/BBox [0 0 274 308] +/Resources << +/ProcSet [ /PDF /Text ] +/ExtGState << +/R7 928 0 R +>>/Font << /R8 929 0 R>> >> -% 731 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 246.961 415.897 259.021] -/A << /S /GoTo /D (subsection.8.2) >> +/Length 1397 +/Filter /FlateDecode +>> +stream +xœÝYËŽ5Ý÷Wô’ q±Ën»ï6 +– 󣄠¹3ÊBü=®§«æ±bA‡HŒ}Ï©c·í²»?­é”ׄÿäïÍeùö]_?ü¾¤Ó©d êwßGüðaù´d"®òçæ²¾¾ä}ÍíëÕûe4­ß ,äýÔ×sÿ»º,_ýx÷Ç/w×·¯®~[¾»ZÞ.ø›Œ1¸ð™âuóâ¯ïÿ¼ûùúáoO*žþx/þÃõí½Î22Tø<ᜇd†&Âoî/×ïV˜âÿõèCê1V^õd¨æõãR ¬Û9ŸÎç¶^–ºµÓ¾ÍšÚýÝz¦zõ¯7‹!€S®ûj짔êJÚR¿–ðWZSöN•m˜´ ide«3çûfyÿõROÛú×|J_F¿~]~z2ò–}×òVÐÕämë¦Î€sQ<I<³¦uiüd¸r͵9.Ö¤¢ÆR’ÉÑãY~ОÐCÑÝ¥Ÿ}öçÙ^â<3LA ‰c‹YÒ¶®ôçY¯qž&mCÙØâÌû懣ç—Ñ#|H–_rƧšÇÒ³,wš0s>}yüÇ5ÒNóË p%U¤ –ðW@E’§$§•|¡pxõE`&ÆøåU ™¤ó«›%AÝIUÍ0Gš]ý‘&ûÖM’ î Jšx÷¬…T.ù)~¼C²8˜}~‚­ÛÍWÛ¢íÁvKÑö¶K,8ÛÍ—&†`[C*—ü¨ONÔÇs­ƒ ½m‚ê ò9؆Áu¶!×`{P9¦m‚êKI7oÛB*—ü¨O샹~ñ̳·Ç'­¡Á^ÝIaÏvRy!œzw'ó¤`Íx"0.Ѥb'…iÄù|ùÌs¼žP:-%X/[´^º“#Àa°há…dÞPÓY/)Z‡Ýqˆ&-VŠÖ½ON¬Çtnƒ®G±À¹ÍY–& é›Ë’וB¿Ìœ¤¡¹M…ÁnngäŽ%¤Ò#ØœÃÉÙÇ‚"d;’Àô)ùÃ(˜\X‹³Ž¥²£0}Z¡pø#`Ó†Sò‹%Hvt§Ð̧f£`ú`-Î+”ÐŽQ4ó9ƒ…Ç,x›O/,îf,z»âißn«ªÝìv«$½úæ-ÜŒå`?›“禩™|,ˆ7cïó™;Ìñº@!osõé]Š¦?ݲta0€yýÒ¥¤Zdy›«OïRÜ<%9­äƒ€[}拇ú6m8uõIPžþhǃf>m))…YÞæê“ Ò<%9­äƒ€[}ækçÿÜæ“WO’rõ= A} £ Ñ0'Ë 9‘S,irêÕ÷+\_ã­uâÝ¿›ÑÆE?æóé{¦ƒÙÇá'È‹ÎB#4_²$&†`[–’qq‘‘&/> Mõ5^_'†`[Bý˜OõºÖÁ–%©¡ ª/]07o[šqq ’&/M Íõ5^_'nÞ¶†4.ú1Ÿ6ØsýÜ¥%]Š!ƒCÞgVe@Ù–‹’…$)š5-ƒÃØ5}‡ä²?ÖLg+‡ |>{é>hO‘jøX5~,ê>–0àxÕ},1’š¬ác ”ø±ŠûX€5‹ûXb$3òø³ Ú…t¡í¡=Å>tpº8Õ‡’Ô$iÎ>´-ö¡Ç%ÀšTÔXJR#ÞgL¼í“-J/0®jãȶw.Þâªick£Z,”Ô¤š^”Ñk·ì«éUÝ ‹¯WjÇ‚µÛçƒ.ÁºUE³zÉgýãPˆ,é"›Ñe±ûÌ‹:t˜!*%~ Ö *«QÊÒ@emPMÓ1:¾Þ’àX¼÷(˜®4æ ¤Nƒ¾]þÎJ¦' +endstream +endobj +936 0 obj +<< +/Length 4927 >> -% 732 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 99.895 706.129 Td [(Data)-250(management)-250(and)-250(auxiliary)-250(routines)]TJ +0 g 0 G +/F62 9.9626 Tf 190.375 0 Td [(including:)]TJ +0 g 0 G + -154.569 -24.208 Td [(\225)]TJ +0 g 0 G + [-500(Parallel)-250(envir)18(onment)-250(management)]TJ +0 g 0 G + 0 -18.081 Td [(\225)]TJ +0 g 0 G + [-500(Communication)-250(descriptors)-250(allocation;)]TJ +0 g 0 G + 0 -18.082 Td [(\225)]TJ +0 g 0 G + [-500(Dense)-250(and)-250(sparse)-250(matrix)-250(allocation;)]TJ +0 g 0 G + 0 -18.081 Td [(\225)]TJ +0 g 0 G + [-500(Dense)-250(and)-250(sparse)-250(matrix)-250(build)-250(and)-250(update;)]TJ +0 g 0 G + 0 -18.082 Td [(\225)]TJ +0 g 0 G + [-500(Sparse)-250(matrix)-250(and)-250(data)-250(distribution)-250(pr)18(epr)18(ocessing.)]TJ +0 g 0 G +/F59 9.9626 Tf -35.806 -24.207 Td [(Preconditioner)-250(routines)]TJ +0 g 0 G +0 g 0 G + 0 -24.208 Td [(Iterative)-250(methods)]TJ +0 g 0 G +/F62 9.9626 Tf 84.951 0 Td [(a)-250(subset)-250(of)-250(Krylov)-250(subspace)-250(iterative)-250(methods)]TJ -84.951 -23.137 Td [(The)-262(following)-263(naming)-262(scheme)-262(has)-262(been)-263(adopted)-262(for)-262(all)-263(the)-262(symbols)-262(internally)]TJ 0 -11.955 Td [(de\002ned)-250(in)-250(the)-250(PSBLAS)-250(softwar)18(e)-250(package:)]TJ +0 g 0 G + 13.888 -23.137 Td [(\225)]TJ +0 g 0 G + [-500(all)-250(symbols)-250(\050i.e.)-310(subr)18(outine)-250(names,)-250(data)-250(types...\051)-310(ar)18(e)-250(pr)18(e\002xed)-250(by)]TJ/F67 9.9626 Tf 294.184 0 Td [(psb_)]TJ +0 g 0 G +/F62 9.9626 Tf -294.184 -24.208 Td [(\225)]TJ +0 g 0 G + [-500(all)-250(data)-250(type)-250(names)-250(ar)18(e)-250(suf)18(\002xed)-250(by)]TJ/F67 9.9626 Tf 166.604 0 Td [(_type)]TJ +0 g 0 G +/F62 9.9626 Tf -166.604 -24.208 Td [(\225)]TJ +0 g 0 G + [-500(all)-250(constants)-250(ar)18(e)-250(suf)18(\002xed)-250(by)]TJ/F67 9.9626 Tf 135.59 0 Td [(_)]TJ +0 g 0 G +/F62 9.9626 Tf -135.59 -24.208 Td [(\225)]TJ +0 g 0 G + [-500(all)-279(top-level)-279(subr)18(outine)-279(names)-279(follow)-279(the)-279(r)8(ule)]TJ/F67 9.9626 Tf 216.11 0 Td [(psb_xxname)]TJ/F62 9.9626 Tf 55.083 0 Td [(wher)18(e)]TJ/F67 9.9626 Tf 30.187 0 Td [(xx)]TJ/F62 9.9626 Tf 13.241 0 Td [(can)]TJ -303.602 -11.955 Td [(be)-250(either:)]TJ +0 g 0 G +/F59 9.9626 Tf 11.955 -24.208 Td [(\226)]TJ +0 g 0 G +/F67 9.9626 Tf 9.963 0 Td [(ge)]TJ/F62 9.9626 Tf 10.46 0 Td [(:)-310(the)-250(r)18(outine)-250(is)-250(r)18(elated)-250(to)-250(dense)-250(data,)]TJ +0 g 0 G +/F59 9.9626 Tf -20.423 -18.081 Td [(\226)]TJ +0 g 0 G +/F67 9.9626 Tf 9.963 0 Td [(sp)]TJ/F62 9.9626 Tf 10.46 0 Td [(:)-310(the)-250(r)18(outine)-250(is)-250(r)18(elated)-250(to)-250(sparse)-250(data,)]TJ +0 g 0 G +/F59 9.9626 Tf -20.423 -18.081 Td [(\226)]TJ +0 g 0 G +/F67 9.9626 Tf 9.963 0 Td [(cd)]TJ/F62 9.9626 Tf 10.46 0 Td [(:)-310(the)-250(r)18(outine)-250(is)-250(r)18(elated)-250(to)-250(communication)-250(descriptor)-250(\050see)]TJ +0 0 1 rg 0 0 1 RG + [-250(3)]TJ +0 g 0 G + [(\051.)]TJ -32.378 -24.208 Td [(For)-215(example)-215(the)]TJ/F67 9.9626 Tf 72.515 0 Td [(psb_geins)]TJ/F62 9.9626 Tf 47.073 0 Td [(,)]TJ/F67 9.9626 Tf 4.704 0 Td [(psb_spins)]TJ/F62 9.9626 Tf 49.218 0 Td [(and)]TJ/F67 9.9626 Tf 19.011 0 Td [(psb_cdins)]TJ/F62 9.9626 Tf 49.218 0 Td [(perform)-215(the)-215(same)]TJ -241.739 -11.955 Td [(action)-247(\050see)]TJ +0 0 1 rg 0 0 1 RG + [-246(6)]TJ +0 g 0 G + [(\051)-247(on)-246(dense)-247(matrices,)-247(sparse)-247(matric)1(es)-247(and)-247(communication)-246(de-)]TJ 0 -11.956 Td [(scriptors)-222(r)18(espectively)111(.)-301(Interface)-222(overloading)-223(allows)-222(the)-222(usage)-222(of)-223(the)-222(same)]TJ 0 -11.955 Td [(subr)18(outine)-250(names)-250(for)-250(both)-250(r)18(eal)-250(and)-250(complex)-250(data.)]TJ -24.907 -23.137 Td [(In)-288(the)-288(description)-288(of)-289(the)-288(subr)18(outines,)-297(ar)18(guments)-289(or)-288(ar)18(gument)-288(entries)-288(ar)18(e)-288(clas-)]TJ 0 -11.955 Td [(si\002ed)-250(as:)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -23.137 Td [(global)]TJ +0 g 0 G +/F62 9.9626 Tf 33.763 0 Td [(For)-270(input)-270(ar)18(guments,)-275(the)-270(value)-271(must)-270(be)-270(the)-270(same)-270(on)-270(all)-270(pr)18(ocesses)-270(par)18(-)]TJ -8.856 -11.955 Td [(ticipating)-276(in)-277(the)-276(subr)18(outine)-277(call;)-289(for)-277(output)-276(ar)18(guments)-277(the)-276(value)-277(is)-276(guar)18(-)]TJ 0 -11.955 Td [(anteed)-250(to)-250(be)-250(the)-250(same.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -24.208 Td [(local)]TJ +0 g 0 G +/F62 9.9626 Tf 26.56 0 Td [(Each)-250(pr)18(ocess)-250(has)-250(its)-250(own)-250(value\050s\051)-250(independently)111(.)]TJ -26.56 -23.137 Td [(T)92(o)-250(\002nish)-250(our)-250(general)-250(description,)-250(we)-250(de\002ne)-250(a)-250(version)-250(string)-250(with)-250(the)-250(constant)]TJ/F67 9.9626 Tf 122.168 -24.059 Td [(psb_version_string_)]TJ/F62 9.9626 Tf -122.168 -24.059 Td [(whose)-250(curr)18(ent)-250(value)-250(is)]TJ/F67 9.9626 Tf 101.857 0 Td [(3.8.0)]TJ +0 g 0 G +/F62 9.9626 Tf 67.508 -29.888 Td [(5)]TJ +0 g 0 G +ET + +endstream +endobj +941 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 234.867 399.738 246.926] -/A << /S /GoTo /D (subsection.8.3) >> +/Length 8378 >> -% 733 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F59 11.9552 Tf 150.705 706.129 Td [(2.3)-1000(Application)-250(structure)]TJ/F62 9.9626 Tf 0 -19.381 Td [(The)-244(main)-244(underlyi)1(ng)-244(principle)-244(of)-244(the)-244(PSBLAS)-243(library)-244(is)-244(that)-244(the)-243(library)-244(objects)]TJ 0 -11.956 Td [(ar)18(e)-236(cr)18(eated)-235(and)-236(exist)-235(with)-236(r)18(efer)18(ence)-235(to)-236(a)-235(discr)18(etized)-236(space)-236(t)1(o)-236(which)-236(ther)18(e)-235(corr)18(e-)]TJ 0 -11.955 Td [(sponds)-258(an)-257(index)-258(space)-257(and)-258(a)-258(matrix)-257(sparsity)-258(pattern.)-332(As)-258(an)-258(example,)-259(consider)]TJ 0 -11.955 Td [(a)-310(cell-center)18(ed)-309(\002nite-volume)-310(discr)18(etization)-310(of)-309(the)-310(Navier)18(-Stokes)-310(e)1(quations)-310(on)]TJ 0 -11.955 Td [(a)-234(simulation)-235(domain;)-239(the)-234(index)-235(space)-234(1)-179(.)-192(.)-192(.)]TJ/F60 9.9626 Tf 185.595 0 Td [(n)]TJ/F62 9.9626 Tf 7.998 0 Td [(is)-234(isomorphic)-235(to)-234(the)-234(set)-235(of)-234(cell)-234(cen-)]TJ -193.593 -11.955 Td [(ters,)-210(wher)18(eas)-200(the)-201(pattern)-200(of)-200(the)-201(associated)-200(linear)-200(system)-200(matrix)-201(is)-200(isomorphic)-200(to)]TJ 0 -11.956 Td [(the)-294(adjacency)-294(graph)-294(imposed)-294(on)-294(the)-294(discr)18(et)1(ization)-294(mesh)-294(by)-294(the)-294(discr)18(etization)]TJ 0 -11.955 Td [(stencil.)]TJ 14.944 -12.17 Td [(Thus)-343(the)-343(\002rst)-343(or)18(der)-344(of)-343(business)-343(is)-343(to)-343(establish)-343(an)-343(index)-343(space,)-367(and)-343(this)-343(is)]TJ -14.944 -11.955 Td [(done)-287(with)-287(a)-287(call)-287(to)]TJ/F67 9.9626 Tf 85.52 0 Td [(psb_cdall)]TJ/F62 9.9626 Tf 49.932 0 Td [(in)-287(which)-287(we)-287(specify)-287(the)-287(size)-287(of)-287(the)-287(index)-287(space)]TJ/F60 9.9626 Tf -135.328 -11.956 Td [(n)]TJ/F62 9.9626 Tf 8.041 0 Td [(and)-238(the)-239(allocation)-238(of)-239(the)-239(ele)1(ments)-239(of)-239(the)-238(index)-239(space)-238(to)-239(the)-238(various)-239(pr)18(ocesses)]TJ -8.165 -11.955 Td [(making)-250(up)-250(the)-250(MPI)-250(\050virtual\051)-250(parallel)-250(machine.)]TJ 14.944 -12.17 Td [(The)-366(index)-367(space)-366(is)-366(partitioned)-367(among)-366(pr)18(ocesses,)-396(and)-366(this)-366(cr)18(eates)-367(a)-366(map-)]TJ -14.944 -11.956 Td [(ping)-301(fr)18(om)-300(the)-301(\223global\224)-301(numbering)-300(1)-180(.)-191(.)-192(.)]TJ/F60 9.9626 Tf 176.584 0 Td [(n)]TJ/F62 9.9626 Tf 8.659 0 Td [(to)-301(a)-300(numbering)-301(\223local\224)-301(to)-301(each)-300(pr)18(o-)]TJ -185.243 -11.955 Td [(cess;)-230(each)-221(pr)18(ocess)]TJ/F60 9.9626 Tf 79.682 0 Td [(i)]TJ/F62 9.9626 Tf 5.162 0 Td [(will)-221(own)-220(a)-221(certain)-220(subset)-221(1)-179(.)-192(.)-192(.)]TJ/F60 9.9626 Tf 130.532 0 Td [(n)]TJ/F62 9.9626 Tf 5.664 -1.494 Td [(r)18(ow)]TJ/F60 5.9776 Tf 17.537 -1.649 Td [(i)]TJ/F62 9.9626 Tf 2.775 3.143 Td [(,)-226(each)-221(element)-221(of)-220(which)]TJ -241.352 -11.955 Td [(corr)18(esponds)-258(to)-259(a)-258(certain)-258(element)-258(of)-259(1)-179(.)-192(.)-191(.)]TJ/F60 9.9626 Tf 177.035 0 Td [(n)]TJ/F62 9.9626 Tf 5.664 0 Td [(.)-335(The)-258(user)-259(doe)1(s)-259(not)-258(set)-258(explicitly)-259(this)]TJ -182.699 -11.955 Td [(mapping;)-225(when)-212(the)-212(application)-212(needs)-213(to)-212(indicate)-212(to)-212(which)-213(el)1(ement)-213(of)-212(the)-212(index)]TJ 0 -11.955 Td [(space)-305(a)-306(certain)-305(item)-306(is)-305(r)18(elated,)-320(such)-305(as)-306(the)-305(r)18(ow)-306(and)-305(column)-306(index)-305(of)-306(a)-305(matrix)]TJ 0 -11.956 Td [(coef)18(\002cient,)-283(it)-276(does)-277(so)-276(in)-277(the)-276(\223global\224)-277(numb)1(ering,)-284(and)-276(the)-276(library)-277(will)-276(translate)]TJ 0 -11.955 Td [(into)-250(the)-250(appr)18(opriate)-250(\223local\224)-250(numbering.)]TJ 14.944 -12.17 Td [(For)-324(a)-325(given)-324(index)-324(space)-325(1)-179(.)-191(.)-192(.)]TJ/F60 9.9626 Tf 129.74 0 Td [(n)]TJ/F62 9.9626 Tf 8.895 0 Td [(ther)18(e)-324(ar)18(e)-325(many)-324(possible)-324(associated)-325(topolo-)]TJ -153.579 -11.956 Td [(gies,)-213(i.e.)-295(many)-204(dif)18(fer)18(ent)-204(discr)18(etization)-204(stencils;)-220(thus)-204(the)-204(description)-204(of)-204(the)-204(index)]TJ 0 -11.955 Td [(space)-277(is)-278(not)-277(completed)-278(until)-277(the)-277(user)-278(has)-277(de\002ned)-278(a)-277(sparsity)-278(p)1(attern,)-285(either)-277(ex-)]TJ 0 -11.955 Td [(plicitly)-263(thr)18(ough)]TJ/F67 9.9626 Tf 71.63 0 Td [(psb_cdins)]TJ/F62 9.9626 Tf 49.698 0 Td [(or)-264(im)1(plicitly)-264(thr)18(ough)]TJ/F67 9.9626 Tf 95.326 0 Td [(psb_spins)]TJ/F62 9.9626 Tf 47.073 0 Td [(.)-351(T)1(he)-264(descriptor)-263(is)]TJ -263.727 -11.955 Td [(\002nalized)-225(with)-225(a)-225(call)-226(to)]TJ/F67 9.9626 Tf 98.787 0 Td [(psb_cdasb)]TJ/F62 9.9626 Tf 49.316 0 Td [(and)-225(a)-225(sparse)-225(matrix)-226(with)-225(a)-225(call)-225(to)]TJ/F67 9.9626 Tf 146.044 0 Td [(psb_spasb)]TJ/F62 9.9626 Tf 47.073 0 Td [(.)]TJ -341.22 -11.955 Td [(After)]TJ/F67 9.9626 Tf 26.16 0 Td [(psb_cdasb)]TJ/F62 9.9626 Tf 50.21 0 Td [(each)-315(pr)18(ocess)]TJ/F60 9.9626 Tf 59.13 0 Td [(i)]TJ/F62 9.9626 Tf 6.101 0 Td [(will)-315(have)-315(de\002ned)-315(a)-314(set)-315(of)-315(\223halo\224)-315(\050or)-315(\223ghost\224\051)]TJ -141.601 -11.955 Td [(indices)]TJ/F60 9.9626 Tf 34.731 0 Td [(n)]TJ/F62 9.9626 Tf 5.663 -1.495 Td [(r)18(ow)]TJ/F60 5.9776 Tf 17.538 -1.648 Td [(i)]TJ/F93 10.3811 Tf 5.211 3.143 Td [(+)]TJ/F62 9.9626 Tf 10.506 0 Td [(1)-179(.)-192(.)-192(.)]TJ/F60 9.9626 Tf 19.967 0 Td [(n)]TJ/F62 9.9626 Tf 5.664 -3.831 Td [(col)]TJ/F60 5.9776 Tf 12.794 -1.648 Td [(i)]TJ/F62 9.9626 Tf 2.775 5.479 Td [(,)-377(denoting)-352(elements)-351(of)-352(the)-352(index)-351(space)-352(that)-351(ar)18(e)]TJ/F60 9.9626 Tf 215.582 0 Td [(not)]TJ/F62 9.9626 Tf -330.431 -13.79 Td [(assigned)-289(to)-290(pr)18(ocess)]TJ/F60 9.9626 Tf 88.744 0 Td [(i)]TJ/F62 9.9626 Tf 2.964 0 Td [(;)-309(however)-290(t)1(he)-290(variables)-289(associated)-290(with)-289(them)-290(ar)18(e)-289(needed)]TJ -91.708 -11.955 Td [(to)-289(complete)-289(computations)-289(associated)-289(with)-290(the)-289(sparse)-289(matrix)]TJ/F60 9.9626 Tf 269.662 0 Td [(A)]TJ/F62 9.9626 Tf 7.318 0 Td [(,)-299(and)-289(thus)-289(they)]TJ -276.98 -11.955 Td [(have)-266(to)-266(be)-266(fetched)-265(fr)18(om)-266(\050neighbouring\051)-266(pr)18(ocesses.)-358(The)-266(descriptor)-265(of)-266(the)-266(index)]TJ 0 -11.956 Td [(space)-294(is)-293(built)-294(exactly)-294(for)-293(the)-294(purpose)-294(of)-293(pr)18(operly)-294(sequencing)-294(the)-293(communica-)]TJ 0 -11.955 Td [(tion)-250(steps)-250(r)18(equir)18(ed)-250(to)-250(achieve)-250(this)-250(objective.)]TJ 14.944 -12.17 Td [(A)-197(simple)-197(application)-197(str)8(uctur)18(e)-197(will)-197(walk)-197(thr)18(ough)-197(the)-197(index)-197(space)-197(allocation,)]TJ -14.944 -11.956 Td [(matrix/vector)-250(cr)18(eation)-250(and)-250(linear)-250(system)-250(solution)-250(as)-250(follows:)]TJ +0 g 0 G + 12.453 -20.571 Td [(1.)]TJ +0 g 0 G + [-500(Initialize)-250(parallel)-250(envir)18(onment)-250(with)]TJ/F67 9.9626 Tf 171.465 0 Td [(psb_init)]TJ +0 g 0 G +/F62 9.9626 Tf -171.465 -20.787 Td [(2.)]TJ +0 g 0 G + [-500(Initialize)-250(index)-250(space)-250(with)]TJ/F67 9.9626 Tf 130.489 0 Td [(psb_cdall)]TJ +0 g 0 G +/F62 9.9626 Tf -130.489 -20.788 Td [(3.)]TJ +0 g 0 G + [-500(Allocate)-221(sparse)-221(matrix)-221(and)-221(dense)-221(vectors)-220(with)]TJ/F67 9.9626 Tf 215.843 0 Td [(psb_spall)]TJ/F62 9.9626 Tf 49.274 0 Td [(and)]TJ/F67 9.9626 Tf 19.068 0 Td [(psb_geall)]TJ +0 g 0 G +/F62 9.9626 Tf -284.185 -20.787 Td [(4.)]TJ +0 g 0 G + [-500(Loop)-320(over)-320(all)-320(local)-320(r)18(ows,)-338(generate)-320(matrix)-320(and)-320(vector)-320(entries,)-337(and)-320(insert)]TJ 12.454 -11.955 Td [(them)-250(with)]TJ/F67 9.9626 Tf 47.849 0 Td [(psb_spins)]TJ/F62 9.9626 Tf 49.564 0 Td [(and)]TJ/F67 9.9626 Tf 19.358 0 Td [(psb_geins)]TJ +0 g 0 G +/F62 9.9626 Tf -129.225 -20.787 Td [(5.)]TJ +0 g 0 G + [-500(Assemble)-250(the)-250(various)-250(entities:)]TJ +0 g 0 G + 17.774 -20.787 Td [(\050a\051)]TJ +0 g 0 G +/F67 9.9626 Tf 16.597 0 Td [(psb_cdasb)]TJ +0 g 0 G +/F62 9.9626 Tf -17.125 -16.371 Td [(\050b\051)]TJ +0 g 0 G +/F67 9.9626 Tf 17.125 0 Td [(psb_spasb)]TJ +0 g 0 G +/F62 9.9626 Tf -16.039 -16.371 Td [(\050c\051)]TJ +0 g 0 G +/F67 9.9626 Tf 16.039 0 Td [(psb_geasb)]TJ +0 g 0 G +/F62 9.9626 Tf 122.541 -29.888 Td [(6)]TJ +0 g 0 G +ET + +endstream +endobj +954 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 222.773 444.603 234.832] -/A << /S /GoTo /D (subsection.8.4) >> +/Length 7484 >> -% 744 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F62 9.9626 Tf 112.349 706.129 Td [(6.)]TJ +0 g 0 G + [-500(Choose)-301(the)-300(pr)18(econditioner)-301(to)-300(be)-301(used)-300(with)]TJ/F67 9.9626 Tf 205.537 0 Td [(prec%init)]TJ/F62 9.9626 Tf 50.068 0 Td [(and)-301(build)-300(it)-301(with)]TJ/F67 9.9626 Tf -243.152 -11.955 Td [(prec%build)]TJ +0 0 1 rg 0 0 1 RG +/F62 7.5716 Tf 52.303 3.616 Td [(3)]TJ +0 g 0 G +/F62 9.9626 Tf 4.284 -3.616 Td [(.)]TJ +0 g 0 G + -69.04 -22.307 Td [(7.)]TJ +0 g 0 G + [-500(Call)-190(the)-190(iterative)-190(driver)]TJ/F67 9.9626 Tf 115.326 0 Td [(psb_krylov)]TJ/F62 9.9626 Tf 54.196 0 Td [(with)-190(the)-190(method)-190(of)-190(choice,)-202(e.g.)]TJ/F67 9.9626 Tf 134.982 0 Td [(bicgstab)]TJ/F62 9.9626 Tf 41.843 0 Td [(.)]TJ -358.801 -21.712 Td [(This)-250(is)-250(the)-250(str)8(uctur)18(e)-250(of)-250(the)-250(sample)-250(pr)18(ograms)-250(in)-250(the)-250(dir)18(ectory)]TJ/F67 9.9626 Tf 266.418 0 Td [(test/pargen/)]TJ/F62 9.9626 Tf 62.764 0 Td [(.)]TJ -314.238 -12.551 Td [(For)-257(a)-258(simulation)-257(in)-257(which)-257(the)-258(same)-257(discr)18(etization)-257(mesh)-257(is)-258(used)-257(over)-257(multi-)]TJ -14.944 -11.955 Td [(ple)-250(time)-250(steps,)-250(the)-250(following)-250(str)8(uctur)18(e)-250(may)-250(be)-250(mor)18(e)-250(appr)18(opriate:)]TJ +0 g 0 G + 12.454 -21.712 Td [(1.)]TJ +0 g 0 G + [-500(Initialize)-250(parallel)-250(envir)18(onment)-250(with)]TJ/F67 9.9626 Tf 171.464 0 Td [(psb_init)]TJ +0 g 0 G +/F62 9.9626 Tf -171.464 -22.307 Td [(2.)]TJ +0 g 0 G + [-500(Initialize)-250(index)-250(space)-250(with)]TJ/F67 9.9626 Tf 130.489 0 Td [(psb_cdall)]TJ +0 g 0 G +/F62 9.9626 Tf -130.489 -22.307 Td [(3.)]TJ +0 g 0 G + [-500(Loop)-248(over)-248(the)-248(topology)-248(of)-248(the)-248(discr)18(eti)1(zation)-248(mesh)-248(and)-248(build)-248(the)-248(descrip-)]TJ 12.453 -11.955 Td [(tor)-250(with)]TJ/F67 9.9626 Tf 37.857 0 Td [(psb_cdins)]TJ +0 g 0 G +/F62 9.9626 Tf -50.31 -22.307 Td [(4.)]TJ +0 g 0 G + [-500(Assemble)-250(the)-250(descriptor)-250(with)]TJ/F67 9.9626 Tf 144.386 0 Td [(psb_cdasb)]TJ +0 g 0 G +/F62 9.9626 Tf -144.386 -22.307 Td [(5.)]TJ +0 g 0 G + [-500(Allocate)-190(the)-190(sparse)-190(matrices)-190(and)-190(dense)-190(vectors)-190(with)]TJ/F67 9.9626 Tf 237.676 0 Td [(psb_spall)]TJ/F62 9.9626 Tf 48.966 0 Td [(and)]TJ/F67 9.9626 Tf 18.759 0 Td [(psb_geall)]TJ +0 g 0 G +/F62 9.9626 Tf -305.401 -22.308 Td [(6.)]TJ +0 g 0 G + [-500(Loop)-250(over)-250(the)-250(time)-250(steps:)]TJ +0 g 0 G + 17.773 -22.307 Td [(\050a\051)]TJ +0 g 0 G + [-500(If)-297(after)-298(\002rst)-297(time)-298(step,)-309(r)18(einitialize)-297(the)-298(sparse)-297(matrix)-298(with)]TJ/F67 9.9626 Tf 269.151 0 Td [(psb_sprn)]TJ/F62 9.9626 Tf 41.843 0 Td [(;)]TJ -294.396 -11.955 Td [(also)-250(zer)18(o)-250(out)-250(the)-250(dense)-250(vectors;)]TJ +0 g 0 G + -17.126 -17.131 Td [(\050b\051)]TJ +0 g 0 G + [-500(Loop)-428(over)-429(the)-428(mesh,)-473(generate)-429(the)-428(coef)18(\002cients)-429(and)-428(insert/update)]TJ 17.126 -11.955 Td [(them)-250(with)]TJ/F67 9.9626 Tf 47.85 0 Td [(psb_spins)]TJ/F62 9.9626 Tf 49.563 0 Td [(and)]TJ/F67 9.9626 Tf 19.358 0 Td [(psb_geins)]TJ +0 g 0 G +/F62 9.9626 Tf -132.811 -17.131 Td [(\050c\051)]TJ +0 g 0 G + [-500(Assemble)-250(with)]TJ/F67 9.9626 Tf 84.223 0 Td [(psb_spasb)]TJ/F62 9.9626 Tf 49.564 0 Td [(and)]TJ/F67 9.9626 Tf 19.357 0 Td [(psb_geasb)]TJ +0 g 0 G +/F62 9.9626 Tf -154.808 -17.132 Td [(\050d\051)]TJ +0 g 0 G + [-500(Choose)-250(and)-250(build)-250(pr)18(econditioner)-250(with)]TJ/F67 9.9626 Tf 188.671 0 Td [(prec%init)]TJ/F62 9.9626 Tf 49.563 0 Td [(and)]TJ/F67 9.9626 Tf 19.358 0 Td [(prec%build)]TJ +0 g 0 G +/F62 9.9626 Tf -256.277 -17.131 Td [(\050e\051)]TJ +0 g 0 G + [-500(Call)-250(the)-250(iterative)-250(method)-250(of)-250(choice,)-250(e.g.)]TJ/F67 9.9626 Tf 190.902 0 Td [(psb_bicgstab)]TJ/F62 9.9626 Tf -221.338 -22.307 Td [(The)-276(insertion)-275(r)18(outines)-276(will)-275(be)-276(called)-275(as)-276(many)-276(times)-275(as)-276(needed;)-288(they)-276(only)-275(need)]TJ 0 -11.955 Td [(to)-214(be)-213(called)-214(on)-213(the)-214(data)-214(t)1(hat)-214(is)-214(actually)-213(allocated)-214(to)-213(the)-214(curr)18(ent)-213(pr)18(ocess,)-221(i.e.)-298(each)]TJ 0 -11.955 Td [(pr)18(ocess)-250(generates)-250(its)-250(own)-250(data.)]TJ 14.944 -12.551 Td [(In)-219(principle)-218(ther)18(e)-219(is)-219(no)-218(speci\002c)-219(or)18(der)-219(in)-218(the)-219(calls)-219(to)]TJ/F67 9.9626 Tf 220.804 0 Td [(psb_spins)]TJ/F62 9.9626 Tf 47.073 0 Td [(,)-225(nor)-219(is)-218(ther)18(e)-219(a)]TJ -282.821 -11.955 Td [(r)18(equir)18(ement)-243(to)-243(build)-243(a)-242(matrix)-243(r)18(ow)-243(in)-243(its)-243(entir)18(ety)-243(befor)18(e)-242(calling)-243(the)-243(r)18(outine;)-245(this)]TJ 0 -11.955 Td [(allows)-364(t)1(he)-364(application)-363(pr)18(ogrammer)-364(to)-363(walk)-364(thr)18(ough)-363(the)-364(discr)18(etization)-363(mesh)]TJ 0 -11.955 Td [(element)-316(by)-317(element,)-333(generating)-316(the)-316(main)-317(part)-316(of)-316(a)-317(given)-316(matrix)-316(r)18(ow)-317(but)-316(also)]TJ 0 -11.956 Td [(contributions)-250(to)-250(the)-250(r)18(ows)-250(corr)18(esponding)-250(to)-250(neighbouring)-250(elements.)]TJ 14.944 -12.55 Td [(Fr)18(om)-328(a)-329(func)1(tional)-329(point)-328(of)-328(view)-328(it)-329(is)-328(even)-328(possible)-328(to)-329(exe)1(cute)-329(one)-328(call)-328(for)]TJ -14.944 -11.955 Td [(each)-204(nonzer)18(o)-204(coef)18(\002cient;)-219(however)-203(this)-204(would)-204(have)-204(a)-204(subst)1(antial)-204(computational)]TJ 0 -11.955 Td [(over)18(head.)-457(It)-299(is)-299(ther)18(efor)18(e)-299(advisable)-299(to)-299(pack)-299(a)-299(certain)-299(amount)-299(of)-299(data)-299(into)-299(each)]TJ 0 -11.956 Td [(call)-303(to)-303(the)-302(insertion)-303(r)18(outine,)-316(say)-303(touching)-303(on)-302(a)-303(few)-303(tens)-303(of)-302(r)18(ows;)-330(the)-302(best)-303(per)18(-)]TJ 0 -11.955 Td [(formng)-342(value)-343(would)-342(depend)-342(on)-342(both)-343(the)-342(ar)18(chitectur)18(e)-342(of)-343(the)-342(computer)-342(being)]TJ 0 -11.955 Td [(used)-223(and)-223(on)-222(the)-223(pr)18(oblem)-223(str)8(uctur)18(e.)-301(At)-222(the)-223(opposite)-223(extr)18(eme,)-228(it)-223(would)-222(be)-223(possi-)]TJ 0 -11.955 Td [(ble)-267(to)-267(generate)-267(the)-267(entir)18(e)-267(part)-267(of)-267(a)-267(coef)18(\002cient)-267(matrix)-267(r)18(esiding)-267(on)-267(a)-267(pr)18(ocess)-267(and)]TJ 0 -11.955 Td [(pass)-275(it)-274(in)-275(a)-275(single)-274(call)-275(to)]TJ/F67 9.9626 Tf 108.421 0 Td [(psb_spins)]TJ/F62 9.9626 Tf 47.073 0 Td [(;)-287(this,)-281(however)74(,)-281(would)-274(entail)-275(a)-275(doubling)-274(of)]TJ -155.494 -11.956 Td [(memory)-250(occupation,)-250(and)-250(thus)-250(would)-250(be)-250(almost)-250(always)-250(far)-250(fr)18(om)-250(optimal.)]TJ +0 g 0 G +ET +q +1 0 0 1 99.895 139.555 cm +[]0 d 0 J 0.398 w 0 0 m 137.482 0 l S +Q +BT +/F62 5.9776 Tf 110.755 132.683 Td [(3)]TJ/F62 7.9701 Tf 3.487 -2.893 Td [(The)-260(subr)18(outine)-260(style)]TJ/F97 7.9701 Tf 74.235 0 Td [(psb)]TJ +ET +q +1 0 0 1 201.687 129.989 cm +[]0 d 0 J 0.398 w 0 0 m 2.541 0 l S +Q +BT +/F97 7.9701 Tf 204.228 129.79 Td [(precinit)]TJ/F62 7.9701 Tf 35.946 0 Td [(and)]TJ/F97 7.9701 Tf 15.567 0 Td [(psb)]TJ +ET +q +1 0 0 1 268.951 129.989 cm +[]0 d 0 J 0.398 w 0 0 m 2.541 0 l S +Q +BT +/F97 7.9701 Tf 271.492 129.79 Td [(precbl)]TJ/F62 7.9701 Tf 27.478 0 Td [(ar)18(e)-260(still)-260(supported)-260(for)-260(backwar)18(d)-260(compat-)]TJ -199.075 -9.464 Td [(ibility)]TJ +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 169.365 -29.888 Td [(7)]TJ +0 g 0 G +ET + +endstream +endobj +867 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 213.468 180.781 222.877] -/A << /S /GoTo /D (subsection.8.4) >> +/Type /ObjStm +/N 100 +/First 906 +/Length 12362 >> -% 734 0 obj +stream +838 0 839 147 840 300 841 447 842 600 843 753 844 906 845 1059 846 1206 847 1354 +848 1502 849 1650 850 1797 851 1946 852 2095 853 2244 854 2393 855 2542 856 2691 857 2840 +858 2989 859 3138 863 3286 860 3343 879 3423 868 3629 869 3779 870 3930 871 4082 872 4236 +873 4386 874 4536 875 4686 876 4834 877 4982 7 5130 878 5184 898 5277 901 5427 902 5668 +903 5710 904 6096 892 6396 893 6542 894 6689 11 6836 900 6892 897 6949 910 7070 896 7220 +907 7368 908 7517 912 7666 15 7722 916 7777 917 7834 909 7891 923 8023 927 8165 928 8279 +929 8321 919 8390 920 8537 925 8685 926 8742 19 8799 922 8855 935 9002 932 9144 933 9291 +937 9438 934 9494 940 9587 942 9701 23 9758 943 9814 944 9871 945 9928 946 9985 947 10042 +948 10099 949 10156 950 10213 939 10270 953 10389 938 10523 955 10672 956 10728 957 10784 958 10840 +959 10896 960 10952 961 11008 962 11064 963 11120 964 11176 965 11232 966 11288 967 11344 968 11400 +% 838 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 191.298 152.896 200.628] -/A << /S /GoTo /D (section.9) >> +/Rect [149.709 529.611 247.72 538.941] +/A << /S /GoTo /D (section.11) >> >> -% 735 0 obj +% 839 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 176.574 444.603 188.633] -/A << /S /GoTo /D (subsection.9.1) >> +/Rect [164.653 515.026 393.631 527.085] +/A << /S /GoTo /D (subsection.11.1) >> >> -% 745 0 obj +% 840 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 164.619 201.494 176.678] -/A << /S /GoTo /D (subsection.9.1) >> +/Rect [149.709 495.738 215.89 504.888] +/A << /S /GoTo /D (section.12) >> >> -% 736 0 obj +% 841 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 152.524 444.603 164.584] -/A << /S /GoTo /D (subsection.9.2) >> +/Rect [164.653 481.153 280.885 493.212] +/A << /S /GoTo /D (subsection.12.1) >> >> -% 746 0 obj +% 842 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 143.219 168.468 152.629] -/A << /S /GoTo /D (subsection.9.2) >> +/Rect [164.653 471.847 310.634 481.063] +/A << /S /GoTo /D (subsection.12.2) >> >> -% 737 0 obj +% 843 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 128.475 444.603 140.535] -/A << /S /GoTo /D (subsection.9.3) >> +/Rect [164.653 459.892 283.267 469.302] +/A << /S /GoTo /D (subsection.12.3) >> >> -% 747 0 obj +% 844 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 119.17 202.859 128.58] -/A << /S /GoTo /D (subsection.9.3) >> ->> -% 742 0 obj -<< -/D [740 0 R /XYZ 98.895 753.953 null] ->> -% 739 0 obj -<< -/Font << /F54 586 0 R /F51 584 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 761 0 obj -<< -/Type /Page -/Contents 762 0 R -/Resources 760 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 587 0 R -/Annots [ 738 0 R 764 0 R 749 0 R 765 0 R 750 0 R 766 0 R 751 0 R 752 0 R 753 0 R 754 0 R 755 0 R 756 0 R 757 0 R 758 0 R 759 0 R ] +/Rect [164.653 447.937 292.711 457.347] +/A << /S /GoTo /D (subsection.12.4) >> >> -% 738 0 obj +% 845 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 702.323 495.412 714.383] -/A << /S /GoTo /D (subsection.9.4) >> +/Rect [149.709 425.999 303.341 435.15] +/A << /S /GoTo /D (section.13) >> >> -% 764 0 obj +% 846 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [149.709 693.018 253.668 702.428] -/A << /S /GoTo /D (subsection.9.4) >> +/Rect [164.653 411.414 225.882 423.474] +/A << /S /GoTo /D (section*.6) >> >> -% 749 0 obj +% 847 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 678.413 495.412 690.472] -/A << /S /GoTo /D (subsection.9.5) >> +/Rect [164.653 399.459 227.098 411.519] +/A << /S /GoTo /D (section*.7) >> >> -% 765 0 obj +% 848 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [149.709 669.108 253.668 678.517] -/A << /S /GoTo /D (subsection.9.5) >> +/Rect [164.653 387.504 262.236 399.563] +/A << /S /GoTo /D (section*.8) >> >> -% 750 0 obj +% 849 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 654.503 495.412 666.562] -/A << /S /GoTo /D (subsection.9.6) >> +/Rect [164.653 375.549 282.36 387.608] +/A << /S /GoTo /D (section*.9) >> >> -% 766 0 obj +% 850 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [149.709 645.197 253.668 654.607] -/A << /S /GoTo /D (subsection.9.6) >> +/Rect [164.653 363.593 254.803 375.653] +/A << /S /GoTo /D (section*.10) >> >> -% 751 0 obj +% 851 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [149.709 623.26 274.28 632.59] -/A << /S /GoTo /D (section.10) >> +/Rect [164.653 351.638 253.488 363.698] +/A << /S /GoTo /D (section*.11) >> >> -% 752 0 obj +% 852 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 608.674 333.298 620.734] -/A << /S /GoTo /D (subsection.10.1) >> +/Rect [164.653 339.683 280.328 351.743] +/A << /S /GoTo /D (section*.12) >> >> -% 753 0 obj +% 853 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 596.719 331.326 608.779] -/A << /S /GoTo /D (subsection.10.2) >> +/Rect [164.653 327.728 252.871 339.788] +/A << /S /GoTo /D (section*.13) >> >> -% 754 0 obj +% 854 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 584.764 381.626 596.824] -/A << /S /GoTo /D (subsection.10.3) >> +/Rect [164.653 315.773 281.971 327.832] +/A << /S /GoTo /D (section*.14) >> >> -% 755 0 obj +% 855 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 572.809 427.165 584.869] -/A << /S /GoTo /D (subsection.10.4) >> +/Rect [164.653 303.818 296.477 315.877] +/A << /S /GoTo /D (section*.15) >> >> -% 756 0 obj +% 856 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 560.854 353.343 572.913] -/A << /S /GoTo /D (subsection.10.5) >> +/Rect [164.653 291.862 305.742 303.922] +/A << /S /GoTo /D (section*.16) >> >> -% 757 0 obj +% 857 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 548.899 315.177 560.958] -/A << /S /GoTo /D (subsection.10.6) >> +/Rect [164.653 279.907 293.966 291.967] +/A << /S /GoTo /D (section*.17) >> >> -% 758 0 obj +% 858 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [149.709 529.611 247.72 538.941] -/A << /S /GoTo /D (section.11) >> +/Rect [164.653 267.952 292.711 280.012] +/A << /S /GoTo /D (section*.18) >> >> -% 759 0 obj +% 859 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 515.026 393.631 527.085] -/A << /S /GoTo /D (subsection.11.1) >> +/Rect [164.653 255.997 319.55 268.057] +/A << /S /GoTo /D (section*.19) >> >> -% 763 0 obj +% 863 0 obj << -/D [761 0 R /XYZ 149.705 753.953 null] +/D [861 0 R /XYZ 149.705 753.953 null] >> -% 760 0 obj +% 860 0 obj << -/Font << /F54 586 0 R /F51 584 0 R >> +/Font << /F62 667 0 R /F59 665 0 R >> /ProcSet [ /PDF /Text ] >> -% 778 0 obj +% 879 0 obj << /Type /Page -/Contents 779 0 R -/Resources 777 0 R +/Contents 880 0 R +/Resources 878 0 R /MediaBox [0 0 595.276 841.89] -/Parent 780 0 R -/Annots [ 767 0 R 768 0 R 769 0 R 770 0 R 771 0 R 772 0 R 773 0 R 774 0 R 775 0 R 776 0 R ] +/Parent 881 0 R +/Annots [ 868 0 R 869 0 R 870 0 R 871 0 R 872 0 R 873 0 R 874 0 R 875 0 R 876 0 R 877 0 R ] >> -% 767 0 obj +% 868 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[0 1 0] -/Rect [408.168 586.065 420.123 595.071] +/Rect [408.168 585.966 420.123 595.071] /A << /S /GoTo /D (cite.metcalf) >> >> -% 768 0 obj +% 869 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[0 1 0] -/Rect [300.381 514.225 312.336 523.231] +/Rect [300.381 514.126 312.336 523.231] /A << /S /GoTo /D (cite.Sparse03) >> >> -% 769 0 obj +% 870 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[0 1 0] -/Rect [302.511 502.27 309.484 511.276] +/Rect [311.368 502.171 323.323 511.127] /A << /S /GoTo /D (cite.DesPat:11) >> >> -% 770 0 obj +% 871 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[0 1 0] -/Rect [312.107 502.171 324.063 511.276] +/Rect [327.422 502.171 339.378 511.276] /A << /S /GoTo /D (cite.RouXiaXu:11) >> >> -% 771 0 obj +% 872 0 obj << /Type /Annot /Subtype /Link @@ -4498,15 +5321,15 @@ stream /Rect [234.17 442.286 246.125 451.392] /A << /S /GoTo /D (cite.machiels) >> >> -% 772 0 obj +% 873 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[0 1 0] -/Rect [241.917 370.446 248.891 379.402] +/Rect [241.917 370.546 248.891 379.402] /A << /S /GoTo /D (cite.sblas97) >> >> -% 773 0 obj +% 874 0 obj << /Type /Annot /Subtype /Link @@ -4514,23 +5337,23 @@ stream /Rect [251.448 370.446 258.422 379.402] /A << /S /GoTo /D (cite.sblas02) >> >> -% 774 0 obj +% 875 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[0 1 0] -/Rect [226.689 358.491 238.644 367.597] +/Rect [226.689 358.591 238.644 367.597] /A << /S /GoTo /D (cite.BLAS1) >> >> -% 775 0 obj +% 876 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[0 1 0] -/Rect [241.633 358.491 248.606 367.447] +/Rect [241.633 358.591 248.606 367.597] /A << /S /GoTo /D (cite.BLAS2) >> >> -% 776 0 obj +% 877 0 obj << /Type /Annot /Subtype /Link @@ -4540,23 +5363,23 @@ stream >> % 7 0 obj << -/D [778 0 R /XYZ 99.895 716.092 null] +/D [879 0 R /XYZ 99.895 716.092 null] >> -% 777 0 obj +% 878 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 797 0 obj +% 898 0 obj << /Type /Page -/Contents 798 0 R -/Resources 796 0 R +/Contents 899 0 R +/Resources 897 0 R /MediaBox [0 0 595.276 841.89] -/Parent 780 0 R -/Annots [ 791 0 R 792 0 R 793 0 R ] +/Parent 881 0 R +/Annots [ 892 0 R 893 0 R 894 0 R ] >> -% 800 0 obj +% 901 0 obj << /Producer (GPL Ghostscript 9.04) /CreationDate (D:20111215145523+01'00') @@ -4565,15 +5388,15 @@ stream /Creator (fig2dev Version 3.2 Patchlevel 5d) /Author (sfilippo@donald \(Salvatore Filippone\)) >> -% 801 0 obj +% 902 0 obj << /Type /ExtGState /OPM 1 >> -% 802 0 obj +% 903 0 obj << /BaseFont /JEJNJE+Times-Roman -/FontDescriptor 803 0 R +/FontDescriptor 904 0 R /Type /Font /FirstChar 32 /LastChar 116 @@ -4581,7 +5404,7 @@ stream /Encoding /WinAnsiEncoding /Subtype /Type1 >> -% 803 0 obj +% 904 0 obj << /Type /FontDescriptor /FontName /JEJNJE+Times-Roman @@ -4595,9 +5418,9 @@ stream /MissingWidth 500 /XHeight 460 /CharSet (/A/B/F/I/L/M/P/S/a/c/e/f/g/i/l/n/o/p/r/s/space/t/three/two/zero) -/FontFile3 804 0 R +/FontFile3 905 0 R >> -% 791 0 obj +% 892 0 obj << /Type /Annot /Subtype /Link @@ -4605,15 +5428,15 @@ stream /Rect [268.275 538.551 275.249 550.611] /A << /S /GoTo /D (figure.1) >> >> -% 792 0 obj +% 893 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[0 1 0] -/Rect [443.339 469.57 450.312 478.427] +/Rect [443.339 469.47 450.312 478.427] /A << /S /GoTo /D (cite.BLACS) >> >> -% 793 0 obj +% 894 0 obj << /Type /Annot /Subtype /Link @@ -4623,36 +5446,36 @@ stream >> % 11 0 obj << -/D [797 0 R /XYZ 150.705 675.823 null] +/D [898 0 R /XYZ 150.705 675.823 null] >> -% 799 0 obj +% 900 0 obj << -/D [797 0 R /XYZ 150.705 272.018 null] +/D [898 0 R /XYZ 150.705 272.018 null] >> -% 796 0 obj +% 897 0 obj << -/Font << /F54 586 0 R /F51 584 0 R /F52 585 0 R >> -/XObject << /Im2 794 0 R >> +/Font << /F62 667 0 R /F59 665 0 R /F60 666 0 R >> +/XObject << /Im2 895 0 R >> /ProcSet [ /PDF /Text ] >> -% 809 0 obj +% 910 0 obj << /Type /Page -/Contents 810 0 R -/Resources 808 0 R +/Contents 911 0 R +/Resources 909 0 R /MediaBox [0 0 595.276 841.89] -/Parent 780 0 R -/Annots [ 795 0 R 806 0 R 807 0 R ] +/Parent 881 0 R +/Annots [ 896 0 R 907 0 R 908 0 R ] >> -% 795 0 obj +% 896 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[0 1 0] -/Rect [275.119 609.432 287.074 618.438] +/Rect [275.119 609.332 287.074 618.438] /A << /S /GoTo /D (cite.METIS) >> >> -% 806 0 obj +% 907 0 obj << /Type /Annot /Subtype /Link @@ -4660,21616 +5483,25844 @@ stream /Rect [232.706 534.258 238.983 547.962] /A << /S /GoTo /D (Hfootnote.1) >> >> -% 807 0 obj +% 908 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [157.808 185.645 164.084 199.235] +/A << /S /GoTo /D (Hfootnote.2) >> +>> +% 912 0 obj +<< +/D [910 0 R /XYZ 98.895 753.953 null] +>> +% 15 0 obj +<< +/D [910 0 R /XYZ 99.895 504.866 null] +>> +% 916 0 obj +<< +/D [910 0 R /XYZ 114.242 167.999 null] +>> +% 917 0 obj +<< +/D [910 0 R /XYZ 114.242 158.184 null] +>> +% 909 0 obj +<< +/Font << /F62 667 0 R /F60 666 0 R /F67 913 0 R /F59 665 0 R /F91 914 0 R /F93 915 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 923 0 obj +<< +/Type /Page +/Contents 924 0 R +/Resources 922 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 881 0 R +/Annots [ 919 0 R 920 0 R ] +>> +% 927 0 obj +<< +/Producer (ESP Ghostscript 815.03) +/CreationDate (D:20070123225315) +/ModDate (D:20070123225315) +>> +% 928 0 obj +<< +/Type /ExtGState +/OPM 1 +>> +% 929 0 obj +<< +/BaseFont /Times-Roman +/Type /Font +/Subtype /Type1 +>> +% 919 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [327.281 638.21 334.255 647.166] +/A << /S /GoTo /D (cite.2007c) >> +>> +% 920 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [337.243 638.309 344.217 647.166] +/A << /S /GoTo /D (cite.2007d) >> +>> +% 925 0 obj +<< +/D [923 0 R /XYZ 149.705 753.953 null] +>> +% 926 0 obj +<< +/D [923 0 R /XYZ 150.705 353.614 null] +>> +% 19 0 obj +<< +/D [923 0 R /XYZ 150.705 270.035 null] +>> +% 922 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F91 914 0 R /F60 666 0 R /F93 915 0 R >> +/XObject << /Im3 921 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 935 0 obj +<< +/Type /Page +/Contents 936 0 R +/Resources 934 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 881 0 R +/Annots [ 932 0 R 933 0 R ] +>> +% 932 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [404.739 354.196 411.713 366.255] +/A << /S /GoTo /D (section.3) >> +>> +% 933 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [172.593 318.033 179.567 330.093] +/A << /S /GoTo /D (section.6) >> +>> +% 937 0 obj +<< +/D [935 0 R /XYZ 98.895 753.953 null] +>> +% 934 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 940 0 obj +<< +/Type /Page +/Contents 941 0 R +/Resources 939 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 881 0 R +>> +% 942 0 obj +<< +/D [940 0 R /XYZ 149.705 753.953 null] +>> +% 23 0 obj +<< +/D [940 0 R /XYZ 150.705 716.092 null] +>> +% 943 0 obj +<< +/D [940 0 R /XYZ 150.705 282.521 null] +>> +% 944 0 obj +<< +/D [940 0 R /XYZ 150.705 261.733 null] +>> +% 945 0 obj +<< +/D [940 0 R /XYZ 150.705 240.946 null] +>> +% 946 0 obj +<< +/D [940 0 R /XYZ 150.705 220.159 null] +>> +% 947 0 obj +<< +/D [940 0 R /XYZ 150.705 188.012 null] +>> +% 948 0 obj +<< +/D [940 0 R /XYZ 150.705 167.072 null] +>> +% 949 0 obj +<< +/D [940 0 R /XYZ 150.705 148.646 null] +>> +% 950 0 obj +<< +/D [940 0 R /XYZ 150.705 132.275 null] +>> +% 939 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F67 913 0 R /F93 915 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 953 0 obj +<< +/Type /Page +/Contents 954 0 R +/Resources 952 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 971 0 R +/Annots [ 938 0 R ] +>> +% 938 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [176.109 690.964 182.386 703.958] +/A << /S /GoTo /D (Hfootnote.3) >> +>> +% 955 0 obj +<< +/D [953 0 R /XYZ 98.895 753.953 null] +>> +% 956 0 obj +<< +/D [953 0 R /XYZ 99.895 716.092 null] +>> +% 957 0 obj +<< +/D [953 0 R /XYZ 99.895 686.784 null] +>> +% 958 0 obj +<< +/D [953 0 R /XYZ 99.895 618.259 null] +>> +% 959 0 obj +<< +/D [953 0 R /XYZ 99.895 595.952 null] +>> +% 960 0 obj +<< +/D [953 0 R /XYZ 99.895 573.645 null] +>> +% 961 0 obj +<< +/D [953 0 R /XYZ 99.895 539.978 null] +>> +% 962 0 obj +<< +/D [953 0 R /XYZ 99.895 517.075 null] +>> +% 963 0 obj +<< +/D [953 0 R /XYZ 99.895 494.768 null] +>> +% 964 0 obj +<< +/D [953 0 R /XYZ 99.895 469.873 null] +>> +% 965 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [157.808 185.645 164.084 199.235] -/A << /S /GoTo /D (Hfootnote.2) >> +/D [953 0 R /XYZ 99.895 442.062 null] >> -% 811 0 obj +% 966 0 obj << -/D [809 0 R /XYZ 98.895 753.953 null] +/D [953 0 R /XYZ 99.895 412.296 null] >> -% 15 0 obj +% 967 0 obj << -/D [809 0 R /XYZ 99.895 504.866 null] +/D [953 0 R /XYZ 99.895 395.165 null] >> -% 815 0 obj +% 968 0 obj << -/D [809 0 R /XYZ 114.242 167.999 null] +/D [953 0 R /XYZ 99.895 377.438 null] >> -% 816 0 obj + +endstream +endobj +976 0 obj << -/D [809 0 R /XYZ 114.242 158.184 null] +/Length 7173 >> -% 808 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 150.705 706.129 Td [(2.3.1)-1000(User)18(-de\002ned)-250(index)-250(mappings)]TJ/F62 9.9626 Tf 0 -18.964 Td [(PSBLAS)-316(supports)-315(user)18(-de\002ned)-316(global)-316(to)-315(local)-316(index)-316(mappings,)-332(subject)-315(to)-316(the)]TJ 0 -11.955 Td [(constraints)-250(outlined)-250(in)-250(sec.)]TJ +0 0 1 rg 0 0 1 RG + [-250(2.3)]TJ +0 g 0 G + [(:)]TJ +0 g 0 G + 12.453 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(The)-250(set)-250(of)-250(indices)-250(owned)-250(locally)-250(must)-250(be)-250(mapped)-250(to)-250(the)-250(set)-250(1)-179(.)-192(.)-191(.)]TJ/F60 9.9626 Tf 294.494 0 Td [(n)]TJ/F62 9.9626 Tf 5.664 -1.495 Td [(r)18(ow)]TJ/F60 5.9776 Tf 17.537 -1.648 Td [(i)]TJ/F62 9.9626 Tf 2.775 3.143 Td [(;)]TJ +0 g 0 G + -320.47 -19.926 Td [(2.)]TJ +0 g 0 G + [-500(The)-250(set)-250(of)-250(halo)-250(points)-250(must)-250(be)-250(mapped)-250(to)-250(the)-250(set)]TJ/F60 9.9626 Tf 228.286 0 Td [(n)]TJ/F62 9.9626 Tf 5.664 -1.494 Td [(r)18(ow)]TJ/F60 5.9776 Tf 17.537 -1.648 Td [(i)]TJ/F93 10.3811 Tf 4.836 3.142 Td [(+)]TJ/F62 9.9626 Tf 10.132 0 Td [(1)-179(.)-192(.)-191(.)]TJ/F60 9.9626 Tf 19.966 0 Td [(n)]TJ/F62 9.9626 Tf 5.664 -3.83 Td [(col)]TJ/F60 5.9776 Tf 12.795 -1.649 Td [(i)]TJ/F62 9.9626 Tf 2.774 5.479 Td [(;)]TJ -320.107 -21.759 Td [(but)-289(otherwise)-289(the)-288(mapping)-289(is)-289(arbitrary)111(.)-426(The)-289(user)-289(application)-289(is)-288(r)18(esponsible)-289(to)]TJ 0 -11.956 Td [(ensur)18(e)-262(consistency)-261(of)-262(this)-262(mapping;)-267(some)-262(err)18(ors)-262(may)-261(be)-262(caught)-262(by)-261(the)-262(library)111(,)]TJ 0 -11.955 Td [(but)-236(this)-236(is)-236(not)-236(guaranteed.)-305(The)-236(application)-236(str)8(uctur)18(e)-236(to)-236(support)-236(this)-236(usage)-236(is)-236(as)]TJ 0 -11.955 Td [(follows:)]TJ +0 g 0 G + 12.453 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(Initialize)-190(index)-190(space)-190(with)]TJ/F67 9.9626 Tf 128.098 0 Td [(psb_cdall\050ictx,desc,info,vl=vl,lidx=lidx\051)]TJ/F62 9.9626 Tf -115.645 -11.955 Td [(passing)-292(the)-293(vectors)]TJ/F67 9.9626 Tf 87.882 0 Td [(vl\050:\051)]TJ/F62 9.9626 Tf 29.064 0 Td [(containing)-292(the)-293(set)-292(of)-293(global)-292(indices)-292(owned)-293(by)]TJ -116.946 -11.956 Td [(the)-327(curr)18(ent)-328(pr)18(ocess)-327(and)]TJ/F67 9.9626 Tf 108.493 0 Td [(lidx\050:\051)]TJ/F62 9.9626 Tf 39.873 0 Td [(containing)-327(the)-327(corr)18(esponding)-328(local)-327(in-)]TJ -148.366 -11.955 Td [(dices;)]TJ +0 g 0 G + -12.453 -19.925 Td [(2.)]TJ +0 g 0 G + [-500(Add)-412(the)-412(halo)-412(points)]TJ/F67 9.9626 Tf 109.326 0 Td [(ja\050:\051)]TJ/F62 9.9626 Tf 30.256 0 Td [(and)-412(their)-412(associated)-412(local)-412(indices)]TJ/F67 9.9626 Tf 155.064 0 Td [(lidx\050:\051)]TJ/F62 9.9626 Tf -282.193 -11.955 Td [(with)-250(a\050some\051)-250(call\050s\051)-250(to)]TJ/F67 9.9626 Tf 99.815 0 Td [(psb_cdins\050nz,ja,desc,info,lidx=lidx\051)]TJ/F62 9.9626 Tf 188.292 0 Td [(;)]TJ +0 g 0 G + -300.56 -19.926 Td [(3.)]TJ +0 g 0 G + [-500(Assemble)-250(the)-250(descriptor)-250(with)]TJ/F67 9.9626 Tf 144.387 0 Td [(psb_cdasb)]TJ/F62 9.9626 Tf 47.073 0 Td [(;)]TJ +0 g 0 G + -191.46 -19.925 Td [(4.)]TJ +0 g 0 G + [-500(Build)-190(the)-190(sparse)-190(matrices)-190(and)-190(vectors,)-202(optionally)-190(making)-190(use)-190(in)]TJ/F67 9.9626 Tf 288.117 0 Td [(psb_spins)]TJ/F62 9.9626 Tf -275.664 -11.955 Td [(and)]TJ/F67 9.9626 Tf 19.958 0 Td [(psb_geins)]TJ/F62 9.9626 Tf 50.163 0 Td [(of)-310(the)]TJ/F67 9.9626 Tf 28.756 0 Td [(local)]TJ/F62 9.9626 Tf 29.243 0 Td [(ar)18(gument)-310(specifying)-310(that)-310(the)-311(indices)-310(in)]TJ/F67 9.9626 Tf 177.734 0 Td [(ia)]TJ/F62 9.9626 Tf 10.46 0 Td [(,)]TJ/F67 9.9626 Tf -316.314 -11.955 Td [(ja)]TJ/F62 9.9626 Tf 12.952 0 Td [(and)]TJ/F67 9.9626 Tf 19.357 0 Td [(irw)]TJ/F62 9.9626 Tf 15.691 0 Td [(,)-250(r)18(espectively)111(,)-250(ar)18(e)-250(alr)18(eady)-250(local)-250(indices.)]TJ/F59 11.9552 Tf -72.906 -29.133 Td [(2.4)-1000(Programming)-250(model)]TJ/F62 9.9626 Tf 0 -18.964 Td [(The)-316(PSBLAS)-315(librarary)-316(is)-315(based)-316(on)-315(the)-316(Single)-315(Pr)18(ogram)-316(Multiple)-316(Data)-315(\050SPMD\051)]TJ 0 -11.955 Td [(pr)18(ogramming)-277(model:)-364(each)-277(pr)18(ocess)-277(participatin)1(g)-277(in)-277(the)-277(computation)-277(performs)]TJ 0 -11.955 Td [(the)-250(same)-250(actions)-250(on)-250(a)-250(chunk)-250(of)-250(data.)-310(Parallelism)-250(is)-250(thus)-250(data-driven.)]TJ 14.944 -11.956 Td [(Because)-313(of)-313(this)-312(str)8(uctur)18(e,)-329(many)-313(subr)18(outines)-313(coor)18(dinate)-312(their)-313(action)-313(acr)18(oss)]TJ -14.944 -11.955 Td [(the)-336(various)-336(pr)18(ocesses,)-358(thus)-336(pr)18(oviding)-336(an)-336(implicit)-336(synchr)18(onization)-336(point,)-358(and)]TJ 0 -11.955 Td [(ther)18(efor)18(e)]TJ/F60 9.9626 Tf 43.283 0 Td [(must)]TJ/F62 9.9626 Tf 24.136 0 Td [(be)-367(called)-366(simultaneously)-367(by)-366(all)-367(pr)18(ocesses)-367(participating)-366(in)-367(the)]TJ -67.419 -11.955 Td [(computation.)-525(This)-321(is)-322(certainly)-322(tr)8(ue)-321(for)-322(the)-322(data)-321(allocation)-322(and)-322(assembl)1(y)-322(r)18(ou-)]TJ 0 -11.955 Td [(tines,)-250(for)-250(all)-250(the)-250(computational)-250(r)18(outines)-250(and)-250(for)-250(some)-250(of)-250(the)-250(tools)-250(r)18(outines.)]TJ 14.944 -11.955 Td [(However)-333(ther)18(e)-332(ar)18(e)-333(many)-333(cases)-332(wher)18(e)-333(no)-333(synchr)18(onizati)1(on,)-354(and)-332(indeed)-333(no)]TJ -14.944 -11.956 Td [(communication)-344(among)-343(pr)18(ocesses,)-367(is)-344(implied;)-390(for)-344(instance,)-367(all)-344(the)-343(r)18(outines)-344(in)]TJ 0 -11.955 Td [(sec.)]TJ +0 0 1 rg 0 0 1 RG + [-246(3)]TJ +0 g 0 G + [-247(ar)18(e)-246(only)-246(acting)-246(on)-247(the)-246(local)-246(data)-247(str)8(uctur)18(es,)-247(and)-246(thus)-246(may)-247(be)-246(called)-246(inde-)]TJ 0 -11.955 Td [(pendently)111(.)-306(The)-238(most)-238(important)-237(case)-238(is)-238(that)-238(of)-238(the)-238(coef)18(\002cient)-237(insertion)-238(r)18(outines:)]TJ 0 -11.955 Td [(since)-231(the)-231(number)-231(of)-230(coef)18(\002cients)-231(in)-231(the)-231(sparse)-231(and)-231(dense)-230(matrices)-231(varies)-231(among)]TJ 0 -11.955 Td [(the)-248(pr)18(ocessors,)-249(and)-249(since)-248(the)-249(user)-248(is)-249(fr)18(ee)-248(to)-249(choose)-248(an)-249(arbitrary)-248(or)18(der)-249(in)-248(builid-)]TJ 0 -11.955 Td [(ing)-250(the)-250(matrix)-250(entries,)-250(these)-250(r)18(outines)-250(cannot)-250(imply)-250(a)-250(synchr)18(onization.)]TJ 14.944 -11.956 Td [(Thr)18(oughout)-250(this)-250(user)-74('s)-250(guide)-250(each)-250(subr)18(outine)-250(will)-250(be)-250(clearly)-250(indicated)-250(as:)]TJ +0 g 0 G +/F59 9.9626 Tf -14.944 -19.925 Td [(Synchronous:)]TJ +0 g 0 G +/F62 9.9626 Tf 67.247 0 Td [(must)-307(be)-307(called)-308(simultaneously)-307(by)-307(all)-307(the)-308(pr)18(ocesses)-307(in)-307(the)-307(r)18(ele-)]TJ -42.341 -11.955 Td [(vant)-250(communication)-250(context;)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.925 Td [(Asynchronous:)]TJ +0 g 0 G +/F62 9.9626 Tf 73.334 0 Td [(may)-250(be)-250(called)-250(in)-250(a)-250(totally)-250(independent)-250(manner)74(.)]TJ +0 g 0 G + 96.031 -56.634 Td [(8)]TJ +0 g 0 G +ET + +endstream +endobj +987 0 obj << -/Font << /F54 586 0 R /F52 585 0 R /F59 812 0 R /F51 584 0 R /F83 813 0 R /F85 814 0 R >> -/ProcSet [ /PDF /Text ] +/Length 8187 +>> +stream +0 g 0 G +0 g 0 G +BT +/F59 14.3462 Tf 99.895 705.784 Td [(3)-1000(Data)-250(Structures)-250(and)-250(Classes)]TJ/F62 9.9626 Tf 0 -23.091 Td [(In)-289(this)-288(chapter)-289(we)-289(illustrate)-288(the)-289(data)-289(str)8(uctur)18(es)-288(used)-289(for)-289(de\002nition)-289(of)-288(r)18(outines)]TJ 0 -11.956 Td [(interfaces.)-622(They)-354(include)-354(data)-354(str)8(uctur)18(es)-354(for)-354(sparse)-354(matrices,)-380(communication)]TJ 0 -11.955 Td [(descriptors)-250(and)-250(pr)18(econditioners.)]TJ 14.944 -12.156 Td [(All)-248(the)-248(data)-249(types)-248(and)-248(the)-248(basic)-248(subr)18(outine)-249(interfaces)-248(r)18(elated)-248(to)-248(descriptors)]TJ -14.944 -11.955 Td [(and)-345(sparse)-345(matrices)-344(ar)18(e)-345(de\002ned)-345(in)-345(the)-345(module)]TJ/F67 9.9626 Tf 213.323 0 Td [(psb_base_mod)]TJ/F62 9.9626 Tf 62.764 0 Td [(;)-392(this)-345(will)-345(have)]TJ -276.087 -11.955 Td [(to)-381(be)-381(included)-381(by)-381(every)-381(user)-381(subr)18(outine)-381(that)-381(makes)-381(u)1(se)-381(of)-381(the)-381(library)111(.)-703(The)]TJ 0 -11.956 Td [(pr)18(econditioners)-250(ar)18(e)-250(de\002ned)-250(in)-250(the)-250(module)]TJ/F67 9.9626 Tf 187.993 0 Td [(psb_prec_mod)]TJ/F62 9.9626 Tf -173.049 -12.156 Td [(Integer)74(,)-433(r)18(eal)-396(and)-397(complex)-396(data)-396(types)-397(ar)18(e)-396(parametrized)-396(with)-397(a)-396(kind)-396(type)]TJ -14.944 -11.955 Td [(de\002ned)-250(in)-250(the)-250(library)-250(as)-250(follows:)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -20.528 Td [(psb)]TJ +ET +q +1 0 0 1 117.091 566.32 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 120.08 566.121 Td [(spk)]TJ +ET +q +1 0 0 1 137.275 566.32 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 145.245 566.121 Td [(Kind)-407(parameter)-406(for)-407(short)-406(pr)18(ecision)-407(r)18(eal)-406(and)-407(complex)-406(data;)-485(corr)18(e-)]TJ -20.443 -11.955 Td [(sponds)-250(to)-250(a)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F67 9.9626 Tf 52.901 0 Td [(REAL)]TJ +0 g 0 G +/F62 9.9626 Tf 23.412 0 Td [(declaration)-250(and)-250(is)-250(normally)-250(4)-250(bytes;)]TJ +0 g 0 G +/F59 9.9626 Tf -101.22 -20.73 Td [(psb)]TJ +ET +q +1 0 0 1 117.091 533.635 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 120.08 533.436 Td [(dpk)]TJ +ET +q +1 0 0 1 138.939 533.635 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 146.909 533.436 Td [(Kind)-420(parameter)-421(for)-420(long)-420(pr)18(ecision)-421(r)18(eal)-420(and)-420(complex)-421(data;)-505(corr)18(e-)]TJ -22.107 -11.955 Td [(sponds)-250(to)-250(a)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F67 9.9626 Tf 52.901 0 Td [(DOUBLE)-525(PRECISION)]TJ +0 g 0 G +/F62 9.9626 Tf 86.176 0 Td [(declaration)-250(and)-250(is)-250(normally)-250(8)-250(bytes;)]TJ +0 g 0 G +/F59 9.9626 Tf -163.984 -20.73 Td [(psb)]TJ +ET +q +1 0 0 1 117.091 500.951 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 120.08 500.751 Td [(mpk)]TJ +ET +q +1 0 0 1 141.708 500.951 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 149.678 500.751 Td [(Kind)-250(parameter)-250(for)-250(4-bytes)-250(integer)-250(data,)-250(as)-250(is)-250(always)-250(used)-250(by)-250(MPI;)]TJ +0 g 0 G +/F59 9.9626 Tf -49.783 -20.729 Td [(psb)]TJ +ET +q +1 0 0 1 117.091 480.221 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 120.08 480.022 Td [(epk)]TJ +ET +q +1 0 0 1 137.833 480.221 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 145.803 480.022 Td [(Kind)-364(parameter)-363(for)-364(8-bytes)-363(integer)-364(data,)-391(a)-1(s)-363(is)-364(always)-363(used)-364(by)-363(the)]TJ/F67 9.9626 Tf -21.001 -11.955 Td [(sizeof)]TJ/F62 9.9626 Tf 33.873 0 Td [(methods;)]TJ +0 g 0 G +/F59 9.9626 Tf -58.78 -20.73 Td [(psb)]TJ +ET +q +1 0 0 1 117.091 447.537 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 120.08 447.337 Td [(ipk)]TJ +ET +q +1 0 0 1 136.169 447.537 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 144.139 447.337 Td [(Kind)-398(parameter)-399(for)-398(\223local\224)-398(integer)-399(indices)-398(and)-398(data;)-473(with)-398(default)]TJ -19.337 -11.955 Td [(build)-250(options)-250(this)-250(is)-250(a)-250(4)-250(bytes)-250(integer;)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -20.729 Td [(psb)]TJ +ET +q +1 0 0 1 117.091 414.852 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 120.08 414.653 Td [(lpk)]TJ +ET +q +1 0 0 1 136.169 414.852 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 144.139 414.653 Td [(Kind)-328(parameter)-329(for)-328(\223global\224)-328(integer)-329(indices)-328(and)-328(data;)-368(with)-328(default)]TJ -19.337 -11.955 Td [(build)-250(options)-250(this)-250(is)-250(an)-250(8)-250(bytes)-250(integer;)]TJ -24.907 -20.529 Td [(The)-205(integer)-205(kinds)-205(for)-206(local)-205(and)-205(global)-205(indices)-205(can)-205(be)-206(chosen)-205(at)-205(con\002gur)18(e)-205(time)-205(to)]TJ 0 -11.955 Td [(hold)-266(4)-267(or)-266(8)-267(bytes,)-270(with)-266(the)-267(global)-266(indices)-266(at)-267(least)-266(as)-267(lar)18(ge)-266(as)-266(the)-267(local)-266(ones.)-359(T)92(o-)]TJ 0 -11.955 Td [(gether)-219(with)-220(the)-219(classes)-219(attributes)-219(we)-219(also)-220(discuss)-219(their)-219(methods.)-300(Most)-219(methods)]TJ 0 -11.955 Td [(detailed)-272(her)18(e)-272(only)-273(act)-272(on)-272(the)-272(local)-272(variable,)-278(i.e.)-376(their)-273(action)-272(is)-272(pur)18(ely)-272(local)-272(and)]TJ 0 -11.956 Td [(asynchr)18(onous)-359(unless)-360(otherwise)-359(stated.)-638(The)-359(list)-360(of)-359(methods)-359(her)18(e)-360(is)-359(not)-359(com-)]TJ 0 -11.955 Td [(pletely)-336(exhaustive;)-380(many)-336(methods,)-358(especially)-336(those)-336(that)-336(alter)-337(the)-336(contents)-336(of)]TJ 0 -11.955 Td [(the)-299(various)-298(objects,)-311(ar)18(e)-299(usually)-299(not)-299(needed)-298(by)-299(the)-299(end-user)74(,)-311(and)-298(ther)18(efor)18(e)-299(ar)18(e)]TJ 0 -11.955 Td [(described)-250(in)-250(the)-250(developer)-74('s)-250(documentation.)]TJ/F59 11.9552 Tf 0 -30.277 Td [(3.1)-1000(Descriptor)-250(data)-250(structure)]TJ/F62 9.9626 Tf 0 -19.353 Td [(All)-241(the)-241(gener)1(a)-1(l)-240(matrix)-241(informations)-241(and)-240(elements)-241(to)-241(be)-241(exchanged)-240(among)-241(pr)18(o-)]TJ 0 -11.956 Td [(cesses)-402(ar)18(e)-401(stor)18(ed)-402(within)-401(a)-402(data)-401(str)8(uctur)18(e)-402(of)-401(the)-402(type)]TJ/F67 9.9626 Tf 242.575 0 Td [(psb)]TJ +ET +q +1 0 0 1 358.788 237.097 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 361.927 236.897 Td [(desc)]TJ +ET +q +1 0 0 1 383.476 237.097 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 386.614 236.897 Td [(type)]TJ/F62 9.9626 Tf 20.921 0 Td [(.)-765(Every)]TJ -307.64 -11.955 Td [(str)8(uctur)18(e)-371(of)-370(this)-371(type)-370(is)-371(associated)-370(with)-371(a)-371(discr)18(etization)-370(pattern)-371(and)-370(enables)]TJ 0 -11.955 Td [(data)-301(communications)-302(and)-301(other)-301(operations)-302(that)-301(ar)18(e)-302(nece)1(ssa)-1(r)1(y)-302(for)-301(implement-)]TJ 0 -11.955 Td [(ing)-250(the)-250(various)-250(algorithms)-250(of)-250(inter)18(est)-250(to)-250(us.)]TJ 14.944 -12.156 Td [(The)-265(data)-266(str)8(uctur)18(e)-265(itself)]TJ/F67 9.9626 Tf 107.448 0 Td [(psb_desc_type)]TJ/F62 9.9626 Tf 70.638 0 Td [(can)-265(be)-266(tr)18(eated)-265(as)-265(an)-265(opaque)-266(object)]TJ -193.03 -11.955 Td [(handled)-321(via)-321(the)-321(tools)-321(r)18(outines)-321(of)-321(Sec.)]TJ +0 0 1 rg 0 0 1 RG + [-321(6)]TJ +0 g 0 G + [-321(or)-321(the)-321(query)-321(r)18(outines)-321(detailed)-321(below;)]TJ 0 -11.956 Td [(nevertheless)-250(we)-250(include)-250(her)18(e)-250(a)-250(description)-250(for)-250(the)-250(curious)-250(r)18(eader)74(.)]TJ 14.944 -12.156 Td [(First)-229(we)-228(describe)-228(the)]TJ/F67 9.9626 Tf 92.473 0 Td [(psb_indx_map)]TJ/F62 9.9626 Tf 65.04 0 Td [(type.)-303(This)-228(is)-229(a)-228(data)-229(str)8(uctur)18(e)-228(that)-229(keeps)]TJ -172.457 -11.955 Td [(track)-250(of)-250(a)-250(certain)-250(number)-250(of)-250(basic)-250(issues)-250(such)-250(as:)]TJ +0 g 0 G + 13.888 -20.528 Td [(\225)]TJ +0 g 0 G + [-500(The)-250(value)-250(of)-250(the)-250(communication)-250(context;)]TJ +0 g 0 G + 155.477 -29.888 Td [(9)]TJ +0 g 0 G +ET + +endstream +endobj +995 0 obj +<< +/Length 6070 >> -% 822 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F62 9.9626 Tf 164.593 706.129 Td [(\225)]TJ +0 g 0 G + [-500(The)-236(number)-236(of)-235(indices)-236(in)-236(the)-236(index)-236(space,)-238(i.e.)-306(global)-236(number)-235(of)-236(r)18(ows)-236(and)]TJ 11.018 -11.955 Td [(columns)-250(of)-250(a)-250(sparse)-250(matrix;)]TJ +0 g 0 G + -11.018 -20.409 Td [(\225)]TJ +0 g 0 G + [-500(The)-250(local)-250(set)-250(of)-250(indices,)-250(including:)]TJ +0 g 0 G +/F59 9.9626 Tf 22.974 -20.408 Td [(\226)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(The)-250(number)-250(of)-250(local)-250(indices)-250(\050and)-250(local)-250(r)18(ows\051;)]TJ +0 g 0 G +/F59 9.9626 Tf -9.962 -16.182 Td [(\226)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(The)-250(number)-250(of)-250(halo)-250(indices)-250(\050and)-250(ther)18(efor)18(e)-250(local)-250(columns\051;)]TJ +0 g 0 G +/F59 9.9626 Tf -9.962 -16.181 Td [(\226)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(The)-250(global)-250(indices)-250(corr)18(esponding)-250(to)-250(the)-250(local)-250(ones.)]TJ -46.824 -20.409 Td [(Ther)18(e)-301(ar)18(e)-301(many)-301(dif)18(fer)18(ent)-301(schemes)-301(for)-301(storing)-301(these)-301(data;)-326(ther)18(efor)18(e)-301(ther)18(e)-301(ar)18(e)-301(a)]TJ 0 -11.955 Td [(number)-299(of)-299(types)-300(extending)-299(the)-299(base)-299(one,)-312(and)-299(the)-299(descriptor)-300(str)8(uctur)18(e)-299(holds)-299(a)]TJ 0 -11.955 Td [(polymorphic)-212(object)-213(whose)-212(dynamic)-212(type)-213(can)-212(be)-212(any)-213(of)-212(the)-212(extended)-213(types.)-297(The)]TJ 0 -11.955 Td [(methods)-250(associated)-250(with)-250(this)-250(data)-250(type)-250(answer)-250(the)-250(following)-250(queries:)]TJ +0 g 0 G + 13.888 -20.288 Td [(\225)]TJ +0 g 0 G + [-500(For)-411(a)-412(given)-411(set)-412(of)-411(local)-411(indices,)-452(\002nd)-412(the)-411(corr)18(esponding)-411(indices)-412(in)-411(the)]TJ 11.018 -11.955 Td [(global)-250(numbering;)]TJ +0 g 0 G + -11.018 -20.408 Td [(\225)]TJ +0 g 0 G + [-500(For)-357(a)-357(given)-357(set)-357(of)-358(global)-357(indices,)-384(\002nd)-357(the)-357(corr)18(esponding)-357(indices)-357(in)-357(the)]TJ 11.018 -11.955 Td [(local)-250(numbering,)-250(if)-250(any)111(,)-250(or)-250(r)18(eturn)-250(an)-250(invalid)]TJ +0 g 0 G + -11.018 -20.409 Td [(\225)]TJ +0 g 0 G + [-500(Add)-250(a)-250(global)-250(index)-250(to)-250(the)-250(set)-250(of)-250(halo)-250(indices;)]TJ +0 g 0 G + 0 -20.408 Td [(\225)]TJ +0 g 0 G + [-500(Find)-250(the)-250(pr)18(ocess)-250(owner)-250(of)-250(each)-250(member)-250(of)-250(a)-250(set)-250(of)-250(global)-250(indices.)]TJ -13.888 -20.288 Td [(All)-295(methods)-295(but)-294(the)-295(last)-295(ar)18(e)-295(pur)18(ely)-295(local;)-317(the)-295(last)-295(method)-294(potentially)-295(r)18(equir)18(es)]TJ 0 -11.955 Td [(communication)-418(among)-419(pr)18(ocesses,)-460(and)-419(thus)-418(is)-418(a)-419(synchr)18(onous)-418(method.)-815(The)]TJ 0 -11.955 Td [(choice)-244(of)-244(a)-244(speci\002c)-244(dynamic)-244(type)-244(for)-244(the)-244(index)-244(map)-244(is)-244(made)-244(at)-244(the)-244(time)-244(the)-244(de-)]TJ 0 -11.955 Td [(scriptor)-210(is)-211(init)1(ially)-211(allocated,)-218(accor)18(ding)-210(to)-211(t)1(he)-211(mode)-210(of)-210(initialization)-211(\050see)-210(also)]TJ +0 0 1 rg 0 0 1 RG + [-210(6)]TJ +0 g 0 G + [(\051.)]TJ 14.944 -12.076 Td [(The)-250(descriptor)-250(contents)-250(ar)18(e)-250(as)-250(follows:)]TJ +0 g 0 G +/F59 9.9626 Tf -14.944 -20.288 Td [(indxmap)]TJ +0 g 0 G +/F62 9.9626 Tf 45.38 0 Td [(A)-190(polymorphic)-190(variable)-190(of)-190(a)-190(type)-190(that)-190(is)-190(any)-190(extension)-190(of)-190(the)-190(indx)]TJ +ET +q +1 0 0 1 478.491 370.98 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 481.48 370.78 Td [(map)]TJ -305.869 -11.955 Td [(type)-250(described)-250(above.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -32.363 Td [(halo)]TJ +ET +q +1 0 0 1 171.228 326.661 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 174.217 326.462 Td [(index)]TJ +0 g 0 G +/F62 9.9626 Tf 30.435 0 Td [(A)-331(list)-332(of)-331(the)-332(halo)-331(and)-332(boundary)-331(elements)-332(for)-331(the)-332(curr)18(ent)-331(pr)18(ocess)]TJ -29.041 -11.955 Td [(to)-247(be)-247(exchanged)-247(with)-246(other)-247(pr)18(ocesses;)-248(for)-247(each)-247(pr)18(ocesses)-247(with)-247(whic)1(h)-247(it)-247(is)]TJ 0 -11.956 Td [(necessary)-250(to)-250(communicate:)]TJ +0 g 0 G + 9.465 -20.408 Td [(1.)]TJ +0 g 0 G + [-500(Pr)18(ocess)-250(identi\002er;)]TJ +0 g 0 G + 0 -16.182 Td [(2.)]TJ +0 g 0 G + [-500(Number)-250(of)-250(points)-250(to)-250(be)-250(r)18(eceived;)]TJ +0 g 0 G + 0 -16.181 Td [(3.)]TJ +0 g 0 G + [-500(Indices)-250(of)-250(points)-250(to)-250(be)-250(r)18(eceived;)]TJ +0 g 0 G + 0 -16.182 Td [(4.)]TJ +0 g 0 G + [-500(Number)-250(of)-250(points)-250(to)-250(be)-250(sent;)]TJ +0 g 0 G + 0 -16.182 Td [(5.)]TJ +0 g 0 G + [-500(Indices)-250(of)-250(points)-250(to)-250(be)-250(sent;)]TJ -9.465 -20.408 Td [(Speci\002ed)-250(as:)-310(a)-250(vector)-250(of)-250(integer)-250(type,)-250(see)]TJ +0 0 1 rg 0 0 1 RG + [-250(3.3)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -20.409 Td [(ext)]TJ +ET +q +1 0 0 1 164.583 176.799 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 167.572 176.599 Td [(index)]TJ +0 g 0 G +/F62 9.9626 Tf 30.435 0 Td [(A)-216(list)-217(of)-216(element)-217(indices)-216(to)-217(be)-216(exchanged)-217(to)-216(implement)-217(the)-216(mapping)]TJ -22.396 -11.955 Td [(between)-250(a)-250(base)-250(descriptor)-250(and)-250(a)-250(descriptor)-250(with)-250(overlap.)]TJ 0 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(vector)-250(of)-250(integer)-250(type,)-250(see)]TJ +0 0 1 rg 0 0 1 RG + [-250(3.3)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -20.408 Td [(ovrlap)]TJ +ET +q +1 0 0 1 180.642 132.48 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 183.631 132.281 Td [(index)]TJ +0 g 0 G +/F62 9.9626 Tf 30.436 0 Td [(A)-259(list)-259(of)-258(the)-259(overlap)-259(elements)-259(for)-259(the)-258(curr)18(ent)-259(pr)18(ocess,)-261(or)18(ganized)]TJ -38.456 -11.955 Td [(in)-250(gr)18(oups)-250(like)-250(the)-250(pr)18(evious)-250(vector:)]TJ +0 g 0 G + 141.968 -29.888 Td [(10)]TJ +0 g 0 G +ET + +endstream +endobj +1006 0 obj << -/Type /Page -/Contents 823 0 R -/Resources 821 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 780 0 R -/Annots [ 818 0 R 819 0 R ] +/Length 7045 >> -% 826 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F62 9.9626 Tf 134.267 706.129 Td [(1.)]TJ +0 g 0 G + [-500(Pr)18(ocess)-250(identi\002er;)]TJ +0 g 0 G + 0 -16.693 Td [(2.)]TJ +0 g 0 G + [-500(Number)-250(of)-250(points)-250(to)-250(be)-250(r)18(eceived;)]TJ +0 g 0 G + 0 -16.694 Td [(3.)]TJ +0 g 0 G + [-500(Indices)-250(of)-250(points)-250(to)-250(be)-250(r)18(eceived;)]TJ +0 g 0 G + 0 -16.693 Td [(4.)]TJ +0 g 0 G + [-500(Number)-250(of)-250(points)-250(to)-250(be)-250(sent;)]TJ +0 g 0 G + 0 -16.693 Td [(5.)]TJ +0 g 0 G + [-500(Indices)-250(of)-250(points)-250(to)-250(be)-250(sent;)]TJ -9.465 -21.431 Td [(Speci\002ed)-250(as:)-310(a)-250(vector)-250(of)-250(integer)-250(type,)-250(see)]TJ +0 0 1 rg 0 0 1 RG + [-250(3.3)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -21.431 Td [(ovr)]TJ +ET +q +1 0 0 1 115.447 596.693 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 118.436 596.494 Td [(mst)]TJ +ET +q +1 0 0 1 135.631 596.693 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 138.62 596.494 Td [(idx)]TJ +0 g 0 G +/F62 9.9626 Tf 19.367 0 Td [(A)-331(list)-332(to)-331(r)18(etrieve)-331(the)-332(value)-331(of)-331(each)-332(overlap)-331(element)-331(fr)18(om)-332(the)-331(r)18(e-)]TJ -33.185 -11.956 Td [(spective)-250(master)-250(pr)18(ocess.)]TJ 0 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(vector)-250(of)-250(integer)-250(type,)-250(see)]TJ +0 0 1 rg 0 0 1 RG + [-250(3.3)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -21.431 Td [(ovrlap)]TJ +ET +q +1 0 0 1 129.833 551.351 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 132.822 551.152 Td [(elem)]TJ +0 g 0 G +/F62 9.9626 Tf 27.118 0 Td [(For)-250(all)-250(overlap)-250(points)-250(belonging)-250(to)-250(th)-250(ecurr)18(ent)-250(pr)18(ocess:)]TJ +0 g 0 G + -25.673 -21.431 Td [(1.)]TJ +0 g 0 G + [-500(Overlap)-250(point)-250(index;)]TJ +0 g 0 G + 0 -16.693 Td [(2.)]TJ +0 g 0 G + [-500(Number)-250(of)-250(pr)18(ocesses)-250(sharing)-250(that)-250(overlap)-250(points;)]TJ +0 g 0 G + 0 -16.694 Td [(3.)]TJ +0 g 0 G + [-500(Index)-250(of)-250(a)-250(\223master)-74(\224)-250(pr)18(ocess:)]TJ -9.465 -21.431 Td [(Speci\002ed)-250(as:)-310(an)-250(allocatable)-250(integer)-250(array)-250(of)-250(rank)-250(two.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -21.431 Td [(bnd)]TJ +ET +q +1 0 0 1 118.755 453.671 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 121.743 453.472 Td [(elem)]TJ +0 g 0 G +/F62 9.9626 Tf 27.119 0 Td [(A)-235(list)-235(of)-235(all)-235(boundary)-235(points,)-238(i.e.)-305(points)-235(that)-235(have)-235(a)-235(connection)-235(with)]TJ -24.06 -11.955 Td [(other)-250(pr)18(ocesses.)]TJ -24.907 -21.055 Td [(The)-393(Fortran)-394(2003)-393(declaration)-394(for)]TJ/F67 9.9626 Tf 151.232 0 Td [(psb_desc_type)]TJ/F62 9.9626 Tf 71.913 0 Td [(str)8(uctur)18(es)-393(is)-394(as)-393(follows:)-597(A)]TJ +0 g 0 G +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 99.895 294.955 cm +0 0 343.711 104.608 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 102.884 388.902 Td [(type)]TJ +0 g 0 G + [-525(psb_desc_type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 18.829 -10.959 Td [(class)]TJ +0 g 0 G + [(\050psb_indx_map\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(indxmap)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ +0 g 0 G + [(\050psb_i_vect_type\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(v_halo_index)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.958 Td [(type)]TJ +0 g 0 G + [(\050psb_i_vect_type\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(v_ext_index)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ +0 g 0 G + [(\050psb_i_vect_type\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(v_ovrlap_index)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ +0 g 0 G + [(\050psb_i_vect_type\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(v_ovr_mst_idx)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(integer)]TJ +0 g 0 G + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-1050(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(ovrlap_elem\050:,:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(integer)]TJ +0 g 0 G + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-1050(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(bnd_elem\050:\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -18.829 -10.959 Td [(end)-525(type)]TJ +0 g 0 G + [-525(psb_desc_type)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F62 9.9626 Tf -2.989 -41.43 Td [(Listing)-259(1:)-327(The)-259(PSBLAS)-259(de\002ned)-259(data)-258(type)-259(that)-259(contains)-259(the)-258(communication)-259(de-)]TJ 0 -11.955 Td [(scriptor)74(.)]TJ 0 -25.259 Td [(communication)-319(descriptor)-320(associated)-319(with)-319(a)-320(sparse)-319(matrix)-320(has)-319(a)-319(state,)-337(which)]TJ 0 -11.955 Td [(can)-250(take)-250(the)-250(following)-250(values:)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.054 Td [(Build:)]TJ +0 g 0 G +/F62 9.9626 Tf 32.927 0 Td [(State)-283(enter)18(ed)-283(after)-283(the)-283(\002rst)-284(allocation,)-291(and)-283(befor)18(e)-283(the)-283(\002rst)-283(assembly;)-300(in)]TJ -8.02 -11.956 Td [(this)-220(state)-220(it)-220(is)-220(possible)-220(to)-220(add)-220(communication)-220(r)18(equir)18(ements)-220(among)-220(dif)18(fer)18(-)]TJ 0 -11.955 Td [(ent)-250(pr)18(ocesses.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -21.431 Td [(Assembled:)]TJ +0 g 0 G +/F62 9.9626 Tf 58.381 0 Td [(State)-308(enter)18(ed)-308(after)-308(the)-309(assembly;)-337(computations)-308(using)-308(the)-308(associ-)]TJ -33.474 -11.955 Td [(ated)-310(sparse)-310(matrix,)-325(such)-310(as)-310(matrix-vector)-309(pr)18(oducts,)-325(ar)18(e)-310(only)-310(possible)-310(in)]TJ 0 -11.955 Td [(this)-250(state.)]TJ +0 g 0 G + 141.968 -29.888 Td [(11)]TJ +0 g 0 G +ET + +endstream +endobj +1022 0 obj << -/Producer (ESP Ghostscript 815.03) -/CreationDate (D:20070123225315) -/ModDate (D:20070123225315) +/Length 4957 >> -% 827 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 150.705 706.129 Td [(3.1.1)-1000(Descriptor)-250(Methods)]TJ 0 -19 Td [(3.1.2)-1000(get)]TJ +ET +q +1 0 0 1 195.029 687.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 198.017 687.129 Td [(local)]TJ +ET +q +1 0 0 1 220.194 687.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 223.183 687.129 Td [(rows)-250(\227)-250(Get)-250(number)-250(of)-250(local)-250(rows)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -72.478 -19 Td [(nr)-525(=)-525(desc%get_local_rows\050\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.974 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -20.001 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20 Td [(desc)]TJ +0 g 0 G +/F62 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.011 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.917 -33.929 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(The)-399(number)-398(of)-399(local)-398(r)18(ows,)-436(i.e.)-756(the)-398(number)-399(of)-399(r)18(ows)-398(owned)]TJ -47.87 -11.956 Td [(by)-350(the)-349(curr)18(ent)-350(pr)18(ocess;)-399(as)-350(explained)-350(in)]TJ +0 0 1 rg 0 0 1 RG + [-349(1)]TJ +0 g 0 G + [(,)-375(it)-350(is)-349(equal)-350(to)]TJ/F91 10.3811 Tf 249.705 0 Td [(j)-24(I)]TJ/F60 7.5716 Tf 8.943 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.876 1.96 Td [(j)]TJ/F93 10.3811 Tf 5.433 0 Td [(+)]TJ/F91 10.3811 Tf 10.624 0 Td [(j)-23(B)]TJ/F60 7.5716 Tf 10.108 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.876 1.96 Td [(j)]TJ/F62 9.9626 Tf 3.003 0 Td [(.)-609(The)]TJ -293.569 -11.955 Td [(r)18(eturned)-250(value)-250(is)-250(speci\002c)-250(to)-250(the)-250(calling)-250(pr)18(ocess.)]TJ/F59 9.9626 Tf -24.906 -27.247 Td [(3.1.3)-1000(get)]TJ +ET +q +1 0 0 1 195.029 489.311 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 198.017 489.112 Td [(local)]TJ +ET +q +1 0 0 1 220.194 489.311 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 223.183 489.112 Td [(cols)-250(\227)-250(Get)-250(number)-250(of)-250(local)-250(cols)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -72.478 -19 Td [(nc)-525(=)-525(desc%get_local_cols\050\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.974 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -20 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.001 Td [(desc)]TJ +0 g 0 G +/F62 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.011 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.917 -33.929 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(The)-320(number)-320(of)-321(local)-320(cols,)-338(i.e.)-521(the)-320(number)-320(of)-321(indices)-320(used)-320(by)]TJ -47.87 -11.955 Td [(the)-322(curr)18(ent)-322(pr)18(ocess,)-340(including)-322(both)-322(local)-322(and)-322(halo)-322(indices;)-358(as)-322(explained)]TJ 0 -11.956 Td [(in)]TJ +0 0 1 rg 0 0 1 RG + [-284(1)]TJ +0 g 0 G + [(,)-294(i)1(t)-285(is)-284(equal)-285(to)]TJ/F91 10.3811 Tf 79.58 0 Td [(j)-24(I)]TJ/F60 7.5716 Tf 8.943 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F93 10.3811 Tf 5.193 0 Td [(+)]TJ/F91 10.3811 Tf 10.383 0 Td [(j)-24(B)]TJ/F60 7.5716 Tf 10.109 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F93 10.3811 Tf 5.192 0 Td [(+)]TJ/F91 10.3811 Tf 10.383 0 Td [(j)-24(H)]TJ/F60 7.5716 Tf 12.052 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F62 9.9626 Tf 3.004 0 Td [(.)-413(The)-285(r)18(eturned)-284(value)-285(is)-284(speci\002c)-285(to)-284(the)]TJ -153.464 -11.955 Td [(calling)-250(pr)18(ocess.)]TJ/F59 9.9626 Tf -24.907 -27.247 Td [(3.1.4)-1000(get)]TJ +ET +q +1 0 0 1 195.029 279.339 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 198.017 279.14 Td [(global)]TJ +ET +q +1 0 0 1 227.397 279.339 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 230.386 279.14 Td [(rows)-250(\227)-250(Get)-250(number)-250(of)-250(global)-250(rows)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -79.681 -19 Td [(nr)-525(=)-525(desc%get_global_rows\050\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.974 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -20 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.001 Td [(desc)]TJ +0 g 0 G +/F62 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.011 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.917 -33.929 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(The)-351(number)-351(of)-350(global)-351(r)18(ows,)-376(i.e.)-613(the)-351(size)-351(of)-350(the)-351(global)-351(index)]TJ -47.87 -11.955 Td [(space.)]TJ +0 g 0 G + 141.967 -29.888 Td [(12)]TJ +0 g 0 G +ET + +endstream +endobj +1026 0 obj << -/Type /ExtGState -/OPM 1 +/Length 4367 >> +stream +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 99.895 706.129 Td [(3.1.5)-1000(get)]TJ +ET +q +1 0 0 1 144.219 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 147.208 706.129 Td [(global)]TJ +ET +q +1 0 0 1 176.587 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 179.576 706.129 Td [(cols)-250(\227)-250(Get)-250(number)-250(of)-250(global)-250(cols)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -79.681 -18.974 Td [(nr)-525(=)-525(desc%get_global_cols\050\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.935 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.947 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.947 Td [(desc)]TJ +0 g 0 G +/F62 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.918 -33.889 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.947 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(The)-242(number)-242(of)-241(global)-242(cols;)-245(usually)-241(this)-242(is)-242(equal)-242(to)-241(the)-242(number)]TJ -47.87 -11.955 Td [(of)-250(global)-250(r)18(ows.)]TJ/F59 9.9626 Tf -24.907 -27.172 Td [(3.1.6)-1000(get)]TJ +ET +q +1 0 0 1 144.219 520.607 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 147.208 520.408 Td [(global)]TJ +ET +q +1 0 0 1 176.587 520.607 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 179.576 520.408 Td [(indices)-250(\227)-250(Get)-250(vector)-250(of)-250(global)-250(indices)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -79.681 -18.974 Td [(myidx)-525(=)-525(desc%get_global_indices\050[owned]\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.934 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.947 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.947 Td [(desc)]TJ +0 g 0 G +/F62 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -91.287 -31.902 Td [(owned)]TJ +0 g 0 G +/F62 9.9626 Tf 35.975 0 Td [(Choose)-330(if)-329(you)-330(only)-329(want)-330(owned)-330(indices)-329(\050)]TJ/F67 9.9626 Tf 185.766 0 Td [(owned)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(.true.)]TJ/F62 9.9626 Tf 62.764 0 Td [(\051)-330(or)-329(also)-330(halo)]TJ -259.598 -11.955 Td [(indices)-250(\050)]TJ/F67 9.9626 Tf 36.911 0 Td [(owned)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(.false.)]TJ/F62 9.9626 Tf 67.995 0 Td [(\051.)-310(Scope:)]TJ/F59 9.9626 Tf 40.328 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -166.813 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(;)-250(default:)]TJ/F67 9.9626 Tf 41.872 0 Td [(.true.)]TJ/F62 9.9626 Tf 31.382 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -163.436 -33.89 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.947 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(The)-277(global)-277(indices,)-284(r)18(eturned)-277(as)-277(an)-277(allocatable)-277(integer)-277(array)-277(of)]TJ -47.87 -11.955 Td [(kind)]TJ/F67 9.9626 Tf 22.814 0 Td [(psb_lpk_)]TJ/F62 9.9626 Tf 44.334 0 Td [(and)-250(rank)-250(1.)]TJ/F59 9.9626 Tf -92.055 -27.171 Td [(3.1.7)-1000(get)]TJ +ET +q +1 0 0 1 144.219 267.119 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 147.208 266.92 Td [(context)-250(\227)-250(Get)-250(communication)-250(context)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -47.313 -18.975 Td [(ctxt)-525(=)-525(desc%get_context\050\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.934 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.947 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.947 Td [(desc)]TJ +0 g 0 G +/F62 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.918 -33.889 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.947 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(The)-250(communication)-250(context.)]TJ +0 g 0 G + 94.098 -29.888 Td [(13)]TJ +0 g 0 G +ET endstream endobj -836 0 obj +1031 0 obj << -/Length 4927 +/Length 4754 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(Data)-250(management)-250(and)-250(auxiliary)-250(routines)]TJ +/F59 9.9626 Tf 150.705 706.129 Td [(3.1.8)-1000(Clone)-250(\227)-250(clone)-250(current)-250(object)]TJ 0 g 0 G -/F54 9.9626 Tf 190.375 0 Td [(including:)]TJ 0 g 0 G - -154.569 -24.208 Td [(\225)]TJ +/F67 9.9626 Tf 0 -19.289 Td [(call)-1050(desc%clone\050descout,info\051)]TJ 0 g 0 G - [-500(Parallel)-250(envir)18(onment)-250(management)]TJ +/F59 9.9626 Tf 0 -22.422 Td [(T)90(ype:)]TJ 0 g 0 G - 0 -18.081 Td [(\225)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - [-500(Communication)-250(descriptors)-250(allocation;)]TJ +/F59 9.9626 Tf -29.828 -20.597 Td [(On)-250(Entry)]TJ 0 g 0 G - 0 -18.082 Td [(\225)]TJ 0 g 0 G - [-500(Dense)-250(and)-250(sparse)-250(matrix)-250(allocation;)]TJ + 0 -20.598 Td [(desc)]TJ 0 g 0 G - 0 -18.081 Td [(\225)]TJ +/F62 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ 0 g 0 G - [-500(Dense)-250(and)-250(sparse)-250(matrix)-250(build)-250(and)-250(update;)]TJ +/F59 9.9626 Tf -77.917 -34.377 Td [(On)-250(Return)]TJ 0 g 0 G - 0 -18.082 Td [(\225)]TJ 0 g 0 G - [-500(Sparse)-250(matrix)-250(and)-250(data)-250(distribution)-250(pr)18(epr)18(ocessing.)]TJ + 0 -20.597 Td [(descout)]TJ 0 g 0 G -/F51 9.9626 Tf -35.806 -24.207 Td [(Preconditioner)-250(routines)]TJ +/F62 9.9626 Tf 39.84 0 Td [(A)-250(copy)-250(of)-250(the)-250(input)-250(object.)]TJ 0 g 0 G +/F59 9.9626 Tf -39.84 -20.597 Td [(info)]TJ 0 g 0 G - 0 -24.208 Td [(Iterative)-250(methods)]TJ +/F62 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ/F59 9.9626 Tf -23.8 -28.097 Td [(3.1.9)-1000(CNV)-250(\227)-250(convert)-250(internal)-250(storage)-250(format)]TJ 0 g 0 G -/F54 9.9626 Tf 84.951 0 Td [(a)-250(subset)-250(of)-250(Krylov)-250(subspace)-250(iterative)-250(methods)]TJ -84.951 -23.137 Td [(The)-262(following)-263(naming)-262(scheme)-262(has)-262(been)-263(adopted)-262(for)-262(all)-263(the)-262(symbols)-262(internally)]TJ 0 -11.955 Td [(de\002ned)-250(in)-250(the)-250(PSBLAS)-250(softwar)18(e)-250(package:)]TJ 0 g 0 G - 13.888 -23.137 Td [(\225)]TJ +/F67 9.9626 Tf 0 -19.289 Td [(call)-1050(desc%cnv\050mold\051)]TJ 0 g 0 G - [-500(all)-250(symbols)-250(\050i.e.)-310(subr)18(outine)-250(names,)-250(data)-250(types...\051)-310(ar)18(e)-250(pr)18(e\002xed)-250(by)]TJ/F59 9.9626 Tf 294.184 0 Td [(psb_)]TJ +/F59 9.9626 Tf 0 -22.422 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf -294.184 -24.208 Td [(\225)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -20.597 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.597 Td [(desc)]TJ 0 g 0 G - [-500(all)-250(data)-250(type)-250(names)-250(ar)18(e)-250(suf)18(\002xed)-250(by)]TJ/F59 9.9626 Tf 166.604 0 Td [(_type)]TJ +/F62 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf -166.604 -24.208 Td [(\225)]TJ +/F59 9.9626 Tf -77.917 -32.553 Td [(mold)]TJ 0 g 0 G - [-500(all)-250(constants)-250(ar)18(e)-250(suf)18(\002xed)-250(by)]TJ/F59 9.9626 Tf 135.59 0 Td [(_)]TJ +/F62 9.9626 Tf 28.782 0 Td [(the)-250(desir)18(ed)-250(integer)-250(storage)-250(format.)]TJ -3.876 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(object)-190(of)-190(type)-190(derived)-190(fr)18(om)-190(\050integer\051)]TJ/F67 9.9626 Tf 221.926 0 Td [(psb)]TJ +ET +q +1 0 0 1 413.855 356.476 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 416.994 356.277 Td [(T)]TJ +ET +q +1 0 0 1 422.851 356.476 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 425.99 356.277 Td [(base)]TJ +ET +q +1 0 0 1 447.539 356.476 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 450.677 356.277 Td [(vect)]TJ +ET +q +1 0 0 1 472.226 356.476 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 475.364 356.277 Td [(type)]TJ/F62 9.9626 Tf 20.921 0 Td [(.)]TJ -345.58 -22.59 Td [(The)]TJ/F67 9.9626 Tf 19.583 0 Td [(mold)]TJ/F62 9.9626 Tf 23.828 0 Td [(ar)18(guments)-292(may)-291(be)-292(employed)-292(to)-292(interface)-291(with)-292(special)-292(devices,)-302(such)]TJ -43.411 -11.955 Td [(as)-250(GPUs)-250(and)-250(other)-250(accelerators.)]TJ/F59 9.9626 Tf 0 -28.096 Td [(3.1.10)-1000(psb)]TJ +ET +q +1 0 0 1 202.769 293.835 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 205.758 293.636 Td [(cd)]TJ +ET +q +1 0 0 1 216.867 293.835 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 219.855 293.636 Td [(get)]TJ +ET +q +1 0 0 1 234.291 293.835 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 237.28 293.636 Td [(large)]TJ +ET +q +1 0 0 1 260.572 293.835 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 263.561 293.636 Td [(threshold)-190(\227)-190(Get)-190(threshold)-190(for)-190(index)-190(mapping)-190(switch)]TJ 0 g 0 G -/F54 9.9626 Tf -135.59 -24.208 Td [(\225)]TJ 0 g 0 G - [-500(all)-279(top-level)-279(subr)18(outine)-279(names)-279(follow)-279(the)-279(r)8(ule)]TJ/F59 9.9626 Tf 216.11 0 Td [(psb_xxname)]TJ/F54 9.9626 Tf 55.083 0 Td [(wher)18(e)]TJ/F59 9.9626 Tf 30.187 0 Td [(xx)]TJ/F54 9.9626 Tf 13.241 0 Td [(can)]TJ -303.602 -11.955 Td [(be)-250(either:)]TJ +/F67 9.9626 Tf -112.856 -19.29 Td [(ith)-525(=)-525(psb_cd_get_large_threshold\050\051)]TJ 0 g 0 G -/F51 9.9626 Tf 11.955 -24.208 Td [(\226)]TJ +/F59 9.9626 Tf 0 -22.421 Td [(T)90(ype:)]TJ 0 g 0 G -/F59 9.9626 Tf 9.963 0 Td [(ge)]TJ/F54 9.9626 Tf 10.46 0 Td [(:)-310(the)-250(r)18(outine)-250(is)-250(r)18(elated)-250(to)-250(dense)-250(data,)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -20.423 -18.081 Td [(\226)]TJ +/F59 9.9626 Tf -29.828 -20.597 Td [(On)-250(Return)]TJ 0 g 0 G -/F59 9.9626 Tf 9.963 0 Td [(sp)]TJ/F54 9.9626 Tf 10.46 0 Td [(:)-310(the)-250(r)18(outine)-250(is)-250(r)18(elated)-250(to)-250(sparse)-250(data,)]TJ 0 g 0 G -/F51 9.9626 Tf -20.423 -18.081 Td [(\226)]TJ + 0 -20.598 Td [(Function)-250(value)]TJ 0 g 0 G -/F59 9.9626 Tf 9.963 0 Td [(cd)]TJ/F54 9.9626 Tf 10.46 0 Td [(:)-310(the)-250(r)18(outine)-250(is)-250(r)18(elated)-250(to)-250(communication)-250(descriptor)-250(\050see)]TJ -0 0 1 rg 0 0 1 RG - [-250(3)]TJ +/F62 9.9626 Tf 72.777 0 Td [(The)-250(curr)18(ent)-250(value)-250(for)-250(the)-250(size)-250(thr)18(eshold.)]TJ/F59 9.9626 Tf -72.777 -28.096 Td [(3.1.11)-1000(psb)]TJ +ET +q +1 0 0 1 202.769 182.833 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 205.758 182.634 Td [(cd)]TJ +ET +q +1 0 0 1 216.867 182.833 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 219.855 182.634 Td [(set)]TJ +ET +q +1 0 0 1 233.175 182.833 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 236.164 182.634 Td [(large)]TJ +ET +q +1 0 0 1 259.457 182.833 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 262.445 182.634 Td [(threshold)-190(\227)-190(Set)-190(threshold)-190(for)-190(index)-190(mapping)-190(switch)]TJ 0 g 0 G - [(\051.)]TJ -32.378 -24.208 Td [(For)-215(example)-215(the)]TJ/F59 9.9626 Tf 72.515 0 Td [(psb_geins)]TJ/F54 9.9626 Tf 47.073 0 Td [(,)]TJ/F59 9.9626 Tf 4.704 0 Td [(psb_spins)]TJ/F54 9.9626 Tf 49.218 0 Td [(and)]TJ/F59 9.9626 Tf 19.011 0 Td [(psb_cdins)]TJ/F54 9.9626 Tf 49.218 0 Td [(perform)-215(the)-215(same)]TJ -241.739 -11.955 Td [(action)-247(\050see)]TJ -0 0 1 rg 0 0 1 RG - [-246(6)]TJ 0 g 0 G - [(\051)-247(on)-246(dense)-247(matrices,)-247(sparse)-247(matric)1(es)-247(and)-247(communication)-246(de-)]TJ 0 -11.956 Td [(scriptors)-222(r)18(espectively)111(.)-301(Interface)-222(overloading)-223(allows)-222(the)-222(usage)-222(of)-223(the)-222(same)]TJ 0 -11.955 Td [(subr)18(outine)-250(names)-250(for)-250(both)-250(r)18(eal)-250(and)-250(complex)-250(data.)]TJ -24.907 -23.137 Td [(In)-288(the)-288(description)-288(of)-289(the)-288(subr)18(outines,)-297(ar)18(guments)-289(or)-288(ar)18(gument)-288(entries)-288(ar)18(e)-288(clas-)]TJ 0 -11.955 Td [(si\002ed)-250(as:)]TJ +/F67 9.9626 Tf -111.74 -19.289 Td [(call)-525(psb_cd_set_large_threshold\050ith\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -23.137 Td [(global)]TJ +/F59 9.9626 Tf 0 -22.422 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 33.763 0 Td [(For)-270(input)-270(ar)18(guments,)-275(the)-270(value)-271(must)-270(be)-270(the)-270(same)-270(on)-270(all)-270(pr)18(ocesses)-270(par)18(-)]TJ -8.856 -11.955 Td [(ticipating)-276(in)-277(the)-276(subr)18(outine)-277(call;)-289(for)-277(output)-276(ar)18(guments)-277(the)-276(value)-277(is)-276(guar)18(-)]TJ 0 -11.955 Td [(anteed)-250(to)-250(be)-250(the)-250(same.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -24.208 Td [(local)]TJ +/F59 9.9626 Tf -29.828 -20.597 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 26.56 0 Td [(Each)-250(pr)18(ocess)-250(has)-250(its)-250(own)-250(value\050s\051)-250(independently)111(.)]TJ -26.56 -23.137 Td [(T)92(o)-250(\002nish)-250(our)-250(general)-250(description,)-250(we)-250(de\002ne)-250(a)-250(version)-250(string)-250(with)-250(the)-250(constant)]TJ/F59 9.9626 Tf 122.168 -24.059 Td [(psb_version_string_)]TJ/F54 9.9626 Tf -122.168 -24.059 Td [(whose)-250(curr)18(ent)-250(value)-250(is)]TJ/F59 9.9626 Tf 101.857 0 Td [(3.8.0)]TJ 0 g 0 G -/F54 9.9626 Tf 67.508 -29.888 Td [(5)]TJ +/F62 9.9626 Tf 166.874 -29.888 Td [(14)]TJ 0 g 0 G ET endstream endobj -841 0 obj +1036 0 obj << -/Length 8378 +/Length 5640 >> stream 0 g 0 G 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(2.3)-1000(Application)-250(structure)]TJ/F54 9.9626 Tf 0 -19.381 Td [(The)-244(main)-244(underlyi)1(ng)-244(principle)-244(of)-244(the)-244(PSBLAS)-243(library)-244(is)-244(that)-244(the)-243(library)-244(objects)]TJ 0 -11.956 Td [(ar)18(e)-236(cr)18(eated)-235(and)-236(exist)-235(with)-236(r)18(efer)18(ence)-235(to)-236(a)-235(discr)18(etized)-236(space)-236(t)1(o)-236(which)-236(ther)18(e)-235(corr)18(e-)]TJ 0 -11.955 Td [(sponds)-258(an)-257(index)-258(space)-257(and)-258(a)-258(matrix)-257(sparsity)-258(pattern.)-332(As)-258(an)-258(example,)-259(consider)]TJ 0 -11.955 Td [(a)-310(cell-center)18(ed)-309(\002nite-volume)-310(discr)18(etization)-310(of)-309(the)-310(Navier)18(-Stokes)-310(e)1(quations)-310(on)]TJ 0 -11.955 Td [(a)-234(simulation)-235(domain;)-239(the)-234(index)-235(space)-234(1)-179(.)-192(.)-192(.)]TJ/F52 9.9626 Tf 185.595 0 Td [(n)]TJ/F54 9.9626 Tf 7.998 0 Td [(is)-234(isomorphic)-235(to)-234(the)-234(set)-235(of)-234(cell)-234(cen-)]TJ -193.593 -11.955 Td [(ters,)-210(wher)18(eas)-200(the)-201(pattern)-200(of)-200(the)-201(associated)-200(linear)-200(system)-200(matrix)-201(is)-200(isomorphic)-200(to)]TJ 0 -11.956 Td [(the)-294(adjacency)-294(graph)-294(imposed)-294(on)-294(the)-294(discr)18(et)1(ization)-294(mesh)-294(by)-294(the)-294(discr)18(etization)]TJ 0 -11.955 Td [(stencil.)]TJ 14.944 -12.17 Td [(Thus)-343(the)-343(\002rst)-343(or)18(der)-344(of)-343(business)-343(is)-343(to)-343(establish)-343(an)-343(index)-343(space,)-367(and)-343(this)-343(is)]TJ -14.944 -11.955 Td [(done)-287(with)-287(a)-287(call)-287(to)]TJ/F59 9.9626 Tf 85.52 0 Td [(psb_cdall)]TJ/F54 9.9626 Tf 49.932 0 Td [(in)-287(which)-287(we)-287(specify)-287(the)-287(size)-287(of)-287(the)-287(index)-287(space)]TJ/F52 9.9626 Tf -135.328 -11.956 Td [(n)]TJ/F54 9.9626 Tf 8.041 0 Td [(and)-238(the)-239(allocation)-238(of)-239(the)-239(ele)1(ments)-239(of)-239(the)-238(index)-239(space)-238(to)-239(the)-238(various)-239(pr)18(ocesses)]TJ -8.165 -11.955 Td [(making)-250(up)-250(the)-250(MPI)-250(\050virtual\051)-250(parallel)-250(machine.)]TJ 14.944 -12.17 Td [(The)-366(index)-367(space)-366(is)-366(partitioned)-367(among)-366(pr)18(ocesses,)-396(and)-366(this)-366(cr)18(eates)-367(a)-366(map-)]TJ -14.944 -11.956 Td [(ping)-301(fr)18(om)-300(the)-301(\223global\224)-301(numbering)-300(1)-180(.)-191(.)-192(.)]TJ/F52 9.9626 Tf 176.584 0 Td [(n)]TJ/F54 9.9626 Tf 8.659 0 Td [(to)-301(a)-300(numbering)-301(\223local\224)-301(to)-301(each)-300(pr)18(o-)]TJ -185.243 -11.955 Td [(cess;)-230(each)-221(pr)18(ocess)]TJ/F52 9.9626 Tf 79.682 0 Td [(i)]TJ/F54 9.9626 Tf 5.162 0 Td [(will)-221(own)-220(a)-221(certain)-220(subset)-221(1)-179(.)-192(.)-192(.)]TJ/F52 9.9626 Tf 130.532 0 Td [(n)]TJ/F54 9.9626 Tf 5.664 -1.494 Td [(r)18(ow)]TJ/F52 5.9776 Tf 17.537 -1.649 Td [(i)]TJ/F54 9.9626 Tf 2.775 3.143 Td [(,)-226(each)-221(element)-221(of)-220(which)]TJ -241.352 -11.955 Td [(corr)18(esponds)-258(to)-259(a)-258(certain)-258(element)-258(of)-259(1)-179(.)-192(.)-191(.)]TJ/F52 9.9626 Tf 177.035 0 Td [(n)]TJ/F54 9.9626 Tf 5.664 0 Td [(.)-335(The)-258(user)-259(doe)1(s)-259(not)-258(set)-258(explicitly)-259(this)]TJ -182.699 -11.955 Td [(mapping;)-225(when)-212(the)-212(application)-212(needs)-213(to)-212(indicate)-212(to)-212(which)-213(el)1(ement)-213(of)-212(the)-212(index)]TJ 0 -11.955 Td [(space)-305(a)-306(certain)-305(item)-306(is)-305(r)18(elated,)-320(such)-305(as)-306(the)-305(r)18(ow)-306(and)-305(column)-306(index)-305(of)-306(a)-305(matrix)]TJ 0 -11.956 Td [(coef)18(\002cient,)-283(it)-276(does)-277(so)-276(in)-277(the)-276(\223global\224)-277(numb)1(ering,)-284(and)-276(the)-276(library)-277(will)-276(translate)]TJ 0 -11.955 Td [(into)-250(the)-250(appr)18(opriate)-250(\223local\224)-250(numbering.)]TJ 14.944 -12.17 Td [(For)-324(a)-325(given)-324(index)-324(space)-325(1)-179(.)-191(.)-192(.)]TJ/F52 9.9626 Tf 129.74 0 Td [(n)]TJ/F54 9.9626 Tf 8.895 0 Td [(ther)18(e)-324(ar)18(e)-325(many)-324(possible)-324(associated)-325(topolo-)]TJ -153.579 -11.956 Td [(gies,)-213(i.e.)-295(many)-204(dif)18(fer)18(ent)-204(discr)18(etization)-204(stencils;)-220(thus)-204(the)-204(description)-204(of)-204(the)-204(index)]TJ 0 -11.955 Td [(space)-277(is)-278(not)-277(completed)-278(until)-277(the)-277(user)-278(has)-277(de\002ned)-278(a)-277(sparsity)-278(p)1(attern,)-285(either)-277(ex-)]TJ 0 -11.955 Td [(plicitly)-263(thr)18(ough)]TJ/F59 9.9626 Tf 71.63 0 Td [(psb_cdins)]TJ/F54 9.9626 Tf 49.698 0 Td [(or)-264(im)1(plicitly)-264(thr)18(ough)]TJ/F59 9.9626 Tf 95.326 0 Td [(psb_spins)]TJ/F54 9.9626 Tf 47.073 0 Td [(.)-351(T)1(he)-264(descriptor)-263(is)]TJ -263.727 -11.955 Td [(\002nalized)-225(with)-225(a)-225(call)-226(to)]TJ/F59 9.9626 Tf 98.787 0 Td [(psb_cdasb)]TJ/F54 9.9626 Tf 49.316 0 Td [(and)-225(a)-225(sparse)-225(matrix)-226(with)-225(a)-225(call)-225(to)]TJ/F59 9.9626 Tf 146.044 0 Td [(psb_spasb)]TJ/F54 9.9626 Tf 47.073 0 Td [(.)]TJ -341.22 -11.955 Td [(After)]TJ/F59 9.9626 Tf 26.16 0 Td [(psb_cdasb)]TJ/F54 9.9626 Tf 50.21 0 Td [(each)-315(pr)18(ocess)]TJ/F52 9.9626 Tf 59.13 0 Td [(i)]TJ/F54 9.9626 Tf 6.101 0 Td [(will)-315(have)-315(de\002ned)-315(a)-314(set)-315(of)-315(\223halo\224)-315(\050or)-315(\223ghost\224\051)]TJ -141.601 -11.955 Td [(indices)]TJ/F52 9.9626 Tf 34.731 0 Td [(n)]TJ/F54 9.9626 Tf 5.663 -1.495 Td [(r)18(ow)]TJ/F52 5.9776 Tf 17.538 -1.648 Td [(i)]TJ/F85 10.3811 Tf 5.211 3.143 Td [(+)]TJ/F54 9.9626 Tf 10.506 0 Td [(1)-179(.)-192(.)-192(.)]TJ/F52 9.9626 Tf 19.967 0 Td [(n)]TJ/F54 9.9626 Tf 5.664 -3.831 Td [(col)]TJ/F52 5.9776 Tf 12.794 -1.648 Td [(i)]TJ/F54 9.9626 Tf 2.775 5.479 Td [(,)-377(denoting)-352(elements)-351(of)-352(the)-352(index)-351(space)-352(that)-351(ar)18(e)]TJ/F52 9.9626 Tf 215.582 0 Td [(not)]TJ/F54 9.9626 Tf -330.431 -13.79 Td [(assigned)-289(to)-290(pr)18(ocess)]TJ/F52 9.9626 Tf 88.744 0 Td [(i)]TJ/F54 9.9626 Tf 2.964 0 Td [(;)-309(however)-290(t)1(he)-290(variables)-289(associated)-290(with)-289(them)-290(ar)18(e)-289(needed)]TJ -91.708 -11.955 Td [(to)-289(complete)-289(computations)-289(associated)-289(with)-290(the)-289(sparse)-289(matrix)]TJ/F52 9.9626 Tf 269.662 0 Td [(A)]TJ/F54 9.9626 Tf 7.318 0 Td [(,)-299(and)-289(thus)-289(they)]TJ -276.98 -11.955 Td [(have)-266(to)-266(be)-266(fetched)-265(fr)18(om)-266(\050neighbouring\051)-266(pr)18(ocesses.)-358(The)-266(descriptor)-265(of)-266(the)-266(index)]TJ 0 -11.956 Td [(space)-294(is)-293(built)-294(exactly)-294(for)-293(the)-294(purpose)-294(of)-293(pr)18(operly)-294(sequencing)-294(the)-293(communica-)]TJ 0 -11.955 Td [(tion)-250(steps)-250(r)18(equir)18(ed)-250(to)-250(achieve)-250(this)-250(objective.)]TJ 14.944 -12.17 Td [(A)-197(simple)-197(application)-197(str)8(uctur)18(e)-197(will)-197(walk)-197(thr)18(ough)-197(the)-197(index)-197(space)-197(allocation,)]TJ -14.944 -11.956 Td [(matrix/vector)-250(cr)18(eation)-250(and)-250(linear)-250(system)-250(solution)-250(as)-250(follows:)]TJ -0 g 0 G - 12.453 -20.571 Td [(1.)]TJ -0 g 0 G - [-500(Initialize)-250(parallel)-250(envir)18(onment)-250(with)]TJ/F59 9.9626 Tf 171.465 0 Td [(psb_init)]TJ -0 g 0 G -/F54 9.9626 Tf -171.465 -20.787 Td [(2.)]TJ -0 g 0 G - [-500(Initialize)-250(index)-250(space)-250(with)]TJ/F59 9.9626 Tf 130.489 0 Td [(psb_cdall)]TJ -0 g 0 G -/F54 9.9626 Tf -130.489 -20.788 Td [(3.)]TJ -0 g 0 G - [-500(Allocate)-221(sparse)-221(matrix)-221(and)-221(dense)-221(vectors)-220(with)]TJ/F59 9.9626 Tf 215.843 0 Td [(psb_spall)]TJ/F54 9.9626 Tf 49.274 0 Td [(and)]TJ/F59 9.9626 Tf 19.068 0 Td [(psb_geall)]TJ 0 g 0 G -/F54 9.9626 Tf -284.185 -20.787 Td [(4.)]TJ -0 g 0 G - [-500(Loop)-320(over)-320(all)-320(local)-320(r)18(ows,)-338(generate)-320(matrix)-320(and)-320(vector)-320(entries,)-337(and)-320(insert)]TJ 12.454 -11.955 Td [(them)-250(with)]TJ/F59 9.9626 Tf 47.849 0 Td [(psb_spins)]TJ/F54 9.9626 Tf 49.564 0 Td [(and)]TJ/F59 9.9626 Tf 19.358 0 Td [(psb_geins)]TJ -0 g 0 G -/F54 9.9626 Tf -129.225 -20.787 Td [(5.)]TJ +BT +/F59 9.9626 Tf 99.895 706.129 Td [(ith)]TJ 0 g 0 G - [-500(Assemble)-250(the)-250(various)-250(entities:)]TJ +/F62 9.9626 Tf 17.704 0 Td [(the)-250(new)-250(thr)18(eshold)-250(for)-250(communication)-250(descriptors.)]TJ 7.203 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(gr)18(eater)-250(than)-250(zer)18(o.)]TJ -24.907 -20.813 Td [(Note:)-665(the)-427(thr)18(eshold)-428(value)-427(is)-428(only)-427(queried)-428(by)-427(the)-428(library)-427(at)-428(the)-427(time)-428(a)-427(call)]TJ 0 -11.955 Td [(to)]TJ/F67 9.9626 Tf 12.451 0 Td [(psb_cdall)]TJ/F62 9.9626 Tf 50.837 0 Td [(is)-378(executed,)-409(ther)18(efor)18(e)-378(changing)-378(the)-378(thr)18(eshold)-377(has)-378(no)-378(ef)18(fect)-378(on)]TJ -63.288 -11.955 Td [(communication)-339(descriptors)-340(that)-339(have)-339(alr)18(eady)-340(been)-339(initialized.)-578(Mor)18(eover)-339(the)]TJ 0 -11.955 Td [(thr)18(eshold)-250(must)-250(have)-250(the)-250(same)-250(value)-250(on)-250(all)-250(pr)18(ocesses.)]TJ/F59 9.9626 Tf 0 -26.933 Td [(3.1.12)-1000(get)]TJ +ET +q +1 0 0 1 149.2 574.896 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 152.189 574.697 Td [(p)]TJ +ET +q +1 0 0 1 158.874 574.896 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 161.863 574.697 Td [(adjcncy)-250(\227)-250(Get)-250(process)-250(adjacency)-250(list)]TJ 0 g 0 G - 17.774 -20.787 Td [(\050a\051)]TJ 0 g 0 G -/F59 9.9626 Tf 16.597 0 Td [(psb_cdasb)]TJ +/F67 9.9626 Tf -61.968 -18.964 Td [(list)-525(=)-1050(desc%get_p_adjcncy\050\051)]TJ 0 g 0 G -/F54 9.9626 Tf -17.125 -16.371 Td [(\050b\051)]TJ +/F59 9.9626 Tf 0 -20.813 Td [(T)90(ype:)]TJ 0 g 0 G -/F59 9.9626 Tf 17.125 0 Td [(psb_spasb)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf -16.039 -16.371 Td [(\050c\051)]TJ +/F59 9.9626 Tf -29.828 -19.483 Td [(On)-250(Return)]TJ 0 g 0 G -/F59 9.9626 Tf 16.039 0 Td [(psb_geasb)]TJ 0 g 0 G -/F54 9.9626 Tf 122.541 -29.888 Td [(6)]TJ + 0 -19.484 Td [(Function)-250(value)]TJ 0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(The)-190(curr)18(ent)-190(list)-190(of)-190(adjacent)-190(pr)18(ocesses,)-202(i.e.)-290(pr)18(ocesses)-190(with)-190(which)]TJ -47.87 -11.955 Td [(the)-250(curr)18(ent)-250(one)-250(has)-250(to)-250(exchange)-250(halo)-250(data.)]TJ/F59 9.9626 Tf -24.907 -26.933 Td [(3.1.13)-1000(set)]TJ ET - -endstream -endobj -854 0 obj -<< -/Length 7484 ->> -stream -0 g 0 G -0 g 0 G -0 g 0 G +q +1 0 0 1 148.085 457.264 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q BT -/F54 9.9626 Tf 112.349 706.129 Td [(6.)]TJ -0 g 0 G - [-500(Choose)-301(the)-300(pr)18(econditioner)-301(to)-300(be)-301(used)-300(with)]TJ/F59 9.9626 Tf 205.537 0 Td [(prec%init)]TJ/F54 9.9626 Tf 50.068 0 Td [(and)-301(build)-300(it)-301(with)]TJ/F59 9.9626 Tf -243.152 -11.955 Td [(prec%build)]TJ -0 0 1 rg 0 0 1 RG -/F54 7.5716 Tf 52.303 3.616 Td [(3)]TJ -0 g 0 G -/F54 9.9626 Tf 4.284 -3.616 Td [(.)]TJ +/F59 9.9626 Tf 151.073 457.065 Td [(p)]TJ +ET +q +1 0 0 1 157.758 457.264 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 160.747 457.065 Td [(adjcncy)-250(\227)-250(Set)-250(process)-250(adjacency)-250(list)]TJ 0 g 0 G - -69.04 -22.307 Td [(7.)]TJ 0 g 0 G - [-500(Call)-190(the)-190(iterative)-190(driver)]TJ/F59 9.9626 Tf 115.326 0 Td [(psb_krylov)]TJ/F54 9.9626 Tf 54.196 0 Td [(with)-190(the)-190(method)-190(of)-190(choice,)-202(e.g.)]TJ/F59 9.9626 Tf 134.982 0 Td [(bicgstab)]TJ/F54 9.9626 Tf 41.843 0 Td [(.)]TJ -358.801 -21.712 Td [(This)-250(is)-250(the)-250(str)8(uctur)18(e)-250(of)-250(the)-250(sample)-250(pr)18(ograms)-250(in)-250(the)-250(dir)18(ectory)]TJ/F59 9.9626 Tf 266.418 0 Td [(test/pargen/)]TJ/F54 9.9626 Tf 62.764 0 Td [(.)]TJ -314.238 -12.551 Td [(For)-257(a)-258(simulation)-257(in)-257(which)-257(the)-258(same)-257(discr)18(etization)-257(mesh)-257(is)-258(used)-257(over)-257(multi-)]TJ -14.944 -11.955 Td [(ple)-250(time)-250(steps,)-250(the)-250(following)-250(str)8(uctur)18(e)-250(may)-250(be)-250(mor)18(e)-250(appr)18(opriate:)]TJ +/F67 9.9626 Tf -60.852 -18.964 Td [(call)-525(desc%set_p_adjcncy\050list\051)]TJ 0 g 0 G - 12.454 -21.712 Td [(1.)]TJ +/F59 9.9626 Tf 0 -20.813 Td [(T)90(ype:)]TJ 0 g 0 G - [-500(Initialize)-250(parallel)-250(envir)18(onment)-250(with)]TJ/F59 9.9626 Tf 171.464 0 Td [(psb_init)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf -171.464 -22.307 Td [(2.)]TJ +/F59 9.9626 Tf -29.828 -19.483 Td [(On)-250(Entry)]TJ 0 g 0 G - [-500(Initialize)-250(index)-250(space)-250(with)]TJ/F59 9.9626 Tf 130.489 0 Td [(psb_cdall)]TJ 0 g 0 G -/F54 9.9626 Tf -130.489 -22.307 Td [(3.)]TJ + 0 -19.483 Td [(list)]TJ 0 g 0 G - [-500(Loop)-248(over)-248(the)-248(topology)-248(of)-248(the)-248(discr)18(eti)1(zation)-248(mesh)-248(and)-248(build)-248(the)-248(descrip-)]TJ 12.453 -11.955 Td [(tor)-250(with)]TJ/F59 9.9626 Tf 37.857 0 Td [(psb_cdins)]TJ +/F62 9.9626 Tf 19.358 0 Td [(the)-250(list)-250(of)-250(adjacent)-250(pr)18(ocesses.)]TJ 5.549 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(one-dimensional)-250(array)-250(of)-250(integers)-250(of)-250(kind)]TJ/F67 9.9626 Tf 250.209 0 Td [(psb_ipk_)]TJ/F62 9.9626 Tf 41.843 0 Td [(.)]TJ -316.959 -20.813 Td [(Note:)-596(this)-392(method)-393(can)-393(be)-393(called)-393(after)-392(a)-393(call)-393(to)]TJ/F67 9.9626 Tf 216.367 0 Td [(psb_cdall)]TJ/F62 9.9626 Tf 50.987 0 Td [(and)-393(befor)18(e)-393(a)-392(call)]TJ -267.354 -11.955 Td [(to)]TJ/F67 9.9626 Tf 11.711 0 Td [(psb_cdasb)]TJ/F62 9.9626 Tf 47.073 0 Td [(.)-470(The)-304(user)-303(is)-304(specifying)-303(her)18(e)-304(some)-303(knowledge)-304(about)-303(which)-304(pr)18(o-)]TJ -58.784 -11.955 Td [(cesses)-208(ar)18(e)-208(topol)1(ogical)-208(neighbours)-208(of)-208(the)-207(curr)18(ent)-208(pr)18(ocess.)-296(The)-208(availability)-207(of)-208(this)]TJ 0 -11.955 Td [(information)-250(may)-250(speed)-250(up)-250(the)-250(execution)-250(of)-250(the)-250(assembly)-250(call)]TJ/F67 9.9626 Tf 269.656 0 Td [(psb_cdasb)]TJ/F62 9.9626 Tf 47.073 0 Td [(.)]TJ/F59 9.9626 Tf -316.729 -26.934 Td [(3.1.14)-1000(fnd)]TJ +ET +q +1 0 0 1 151.412 247.089 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 154.401 246.89 Td [(owner)-250(\227)-250(Find)-250(the)-250(owner)-250(process)-250(of)-250(a)-250(set)-250(of)-250(indices)]TJ 0 g 0 G -/F54 9.9626 Tf -50.31 -22.307 Td [(4.)]TJ 0 g 0 G - [-500(Assemble)-250(the)-250(descriptor)-250(with)]TJ/F59 9.9626 Tf 144.386 0 Td [(psb_cdasb)]TJ +/F67 9.9626 Tf -54.506 -18.964 Td [(call)-525(desc%fnd_owner\050idx,iprc,info\051)]TJ 0 g 0 G -/F54 9.9626 Tf -144.386 -22.307 Td [(5.)]TJ +/F59 9.9626 Tf 0 -20.813 Td [(T)90(ype:)]TJ 0 g 0 G - [-500(Allocate)-190(the)-190(sparse)-190(matrices)-190(and)-190(dense)-190(vectors)-190(with)]TJ/F59 9.9626 Tf 237.676 0 Td [(psb_spall)]TJ/F54 9.9626 Tf 48.966 0 Td [(and)]TJ/F59 9.9626 Tf 18.759 0 Td [(psb_geall)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf -305.401 -22.308 Td [(6.)]TJ +/F59 9.9626 Tf -29.828 -19.483 Td [(On)-250(Entry)]TJ 0 g 0 G - [-500(Loop)-250(over)-250(the)-250(time)-250(steps:)]TJ 0 g 0 G - 17.773 -22.307 Td [(\050a\051)]TJ + 0 -19.484 Td [(idx)]TJ 0 g 0 G - [-500(If)-297(after)-298(\002rst)-297(time)-298(step,)-309(r)18(einitialize)-297(the)-298(sparse)-297(matrix)-298(with)]TJ/F59 9.9626 Tf 269.151 0 Td [(psb_sprn)]TJ/F54 9.9626 Tf 41.843 0 Td [(;)]TJ -294.396 -11.955 Td [(also)-250(zer)18(o)-250(out)-250(the)-250(dense)-250(vectors;)]TJ +/F62 9.9626 Tf 19.368 0 Td [(the)-250(list)-250(of)-250(global)-250(indices)-250(for)-250(which)-250(we)-250(need)-250(the)-250(owning)-250(pr)18(ocesses.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(one-dimensional)-250(array)-250(of)-250(integers)-250(of)-250(kind)]TJ/F67 9.9626 Tf 250.209 0 Td [(psb_lpk_)]TJ/F62 9.9626 Tf 41.843 0 Td [(.)]TJ 0 g 0 G - -17.126 -17.131 Td [(\050b\051)]TJ + -150.084 -29.888 Td [(15)]TJ 0 g 0 G - [-500(Loop)-428(over)-429(the)-428(mesh,)-473(generate)-429(the)-428(coef)18(\002cients)-429(and)-428(insert/update)]TJ 17.126 -11.955 Td [(them)-250(with)]TJ/F59 9.9626 Tf 47.85 0 Td [(psb_spins)]TJ/F54 9.9626 Tf 49.563 0 Td [(and)]TJ/F59 9.9626 Tf 19.358 0 Td [(psb_geins)]TJ +ET + +endstream +endobj +1043 0 obj +<< +/Length 7676 +>> +stream 0 g 0 G -/F54 9.9626 Tf -132.811 -17.131 Td [(\050c\051)]TJ 0 g 0 G - [-500(Assemble)-250(with)]TJ/F59 9.9626 Tf 84.223 0 Td [(psb_spasb)]TJ/F54 9.9626 Tf 49.564 0 Td [(and)]TJ/F59 9.9626 Tf 19.357 0 Td [(psb_geasb)]TJ 0 g 0 G -/F54 9.9626 Tf -154.808 -17.132 Td [(\050d\051)]TJ +BT +/F59 9.9626 Tf 150.705 706.129 Td [(On)-250(Return)]TJ 0 g 0 G - [-500(Choose)-250(and)-250(build)-250(pr)18(econditioner)-250(with)]TJ/F59 9.9626 Tf 188.671 0 Td [(prec%init)]TJ/F54 9.9626 Tf 49.563 0 Td [(and)]TJ/F59 9.9626 Tf 19.358 0 Td [(prec%build)]TJ 0 g 0 G -/F54 9.9626 Tf -256.277 -17.131 Td [(\050e\051)]TJ + 0 -21.934 Td [(iprc)]TJ 0 g 0 G - [-500(Call)-250(the)-250(iterative)-250(method)-250(of)-250(choice,)-250(e.g.)]TJ/F59 9.9626 Tf 190.902 0 Td [(psb_bicgstab)]TJ/F54 9.9626 Tf -221.338 -22.307 Td [(The)-276(insertion)-275(r)18(outines)-276(will)-275(be)-276(called)-275(as)-276(many)-276(times)-275(as)-276(needed;)-288(they)-276(only)-275(need)]TJ 0 -11.955 Td [(to)-214(be)-213(called)-214(on)-213(the)-214(data)-214(t)1(hat)-214(is)-214(actually)-213(allocated)-214(to)-213(the)-214(curr)18(ent)-213(pr)18(ocess,)-221(i.e.)-298(each)]TJ 0 -11.955 Td [(pr)18(ocess)-250(generates)-250(its)-250(own)-250(data.)]TJ 14.944 -12.551 Td [(In)-219(principle)-218(ther)18(e)-219(is)-219(no)-218(speci\002c)-219(or)18(der)-219(in)-218(the)-219(calls)-219(to)]TJ/F59 9.9626 Tf 220.804 0 Td [(psb_spins)]TJ/F54 9.9626 Tf 47.073 0 Td [(,)-225(nor)-219(is)-218(ther)18(e)-219(a)]TJ -282.821 -11.955 Td [(r)18(equir)18(ement)-243(to)-243(build)-243(a)-242(matrix)-243(r)18(ow)-243(in)-243(its)-243(entir)18(ety)-243(befor)18(e)-242(calling)-243(the)-243(r)18(outine;)-245(this)]TJ 0 -11.955 Td [(allows)-364(t)1(he)-364(application)-363(pr)18(ogrammer)-364(to)-363(walk)-364(thr)18(ough)-363(the)-364(discr)18(etization)-363(mesh)]TJ 0 -11.955 Td [(element)-316(by)-317(element,)-333(generating)-316(the)-316(main)-317(part)-316(of)-316(a)-317(given)-316(matrix)-316(r)18(ow)-317(but)-316(also)]TJ 0 -11.956 Td [(contributions)-250(to)-250(the)-250(r)18(ows)-250(corr)18(esponding)-250(to)-250(neighbouring)-250(elements.)]TJ 14.944 -12.55 Td [(Fr)18(om)-328(a)-329(func)1(tional)-329(point)-328(of)-328(view)-328(it)-329(is)-328(even)-328(possible)-328(to)-329(exe)1(cute)-329(one)-328(call)-328(for)]TJ -14.944 -11.955 Td [(each)-204(nonzer)18(o)-204(coef)18(\002cient;)-219(however)-203(this)-204(would)-204(have)-204(a)-204(subst)1(antial)-204(computational)]TJ 0 -11.955 Td [(over)18(head.)-457(It)-299(is)-299(ther)18(efor)18(e)-299(advisable)-299(to)-299(pack)-299(a)-299(certain)-299(amount)-299(of)-299(data)-299(into)-299(each)]TJ 0 -11.956 Td [(call)-303(to)-303(the)-302(insertion)-303(r)18(outine,)-316(say)-303(touching)-303(on)-302(a)-303(few)-303(tens)-303(of)-302(r)18(ows;)-330(the)-302(best)-303(per)18(-)]TJ 0 -11.955 Td [(formng)-342(value)-343(would)-342(depend)-342(on)-342(both)-343(the)-342(ar)18(chitectur)18(e)-342(of)-343(the)-342(computer)-342(being)]TJ 0 -11.955 Td [(used)-223(and)-223(on)-222(the)-223(pr)18(oblem)-223(str)8(uctur)18(e.)-301(At)-222(the)-223(opposite)-223(extr)18(eme,)-228(it)-223(would)-222(be)-223(possi-)]TJ 0 -11.955 Td [(ble)-267(to)-267(generate)-267(the)-267(entir)18(e)-267(part)-267(of)-267(a)-267(coef)18(\002cient)-267(matrix)-267(r)18(esiding)-267(on)-267(a)-267(pr)18(ocess)-267(and)]TJ 0 -11.955 Td [(pass)-275(it)-274(in)-275(a)-275(single)-274(call)-275(to)]TJ/F59 9.9626 Tf 108.421 0 Td [(psb_spins)]TJ/F54 9.9626 Tf 47.073 0 Td [(;)-287(this,)-281(however)74(,)-281(would)-274(entail)-275(a)-275(doubling)-274(of)]TJ -155.494 -11.956 Td [(memory)-250(occupation,)-250(and)-250(thus)-250(would)-250(be)-250(almost)-250(always)-250(far)-250(fr)18(om)-250(optimal.)]TJ +/F62 9.9626 Tf 22.685 0 Td [(the)-250(list)-250(of)-250(pr)18(ocesses)-250(owning)-250(the)-250(indices)-250(in)]TJ/F67 9.9626 Tf 184.993 0 Td [(idx)]TJ/F62 9.9626 Tf 15.691 0 Td [(.)]TJ -198.463 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.381 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-408(as:)-627(an)-408(allocatable)-408(one-dimensional)-408(array)-409(of)-408(integers)-408(of)-408(kind)]TJ/F67 9.9626 Tf 0 -11.955 Td [(psb_ipk_)]TJ/F62 9.9626 Tf 41.843 0 Td [(.)]TJ -66.749 -23.926 Td [(Note:)-349(this)-269(method)-269(may)-270(or)-269(may)-269(not)-269(actually)-270(r)18(equir)18(e)-269(communications,)-274(depend-)]TJ 0 -11.956 Td [(ing)-283(on)-283(the)-283(ex)1(a)-1(c)1(t)-283(internal)-283(data)-283(storage;)-299(given)-283(that)-283(the)-283(choice)-283(of)-282(storage)-283(may)-283(be)]TJ 0 -11.955 Td [(alter)18(ed)-376(by)-375(r)8(untime)-376(parameters,)-407(it)-376(is)-375(necessary)-376(for)-376(safety)-375(that)-376(this)-375(method)-376(is)]TJ 0 -11.955 Td [(called)-250(by)-250(all)-250(pr)18(ocesses.)]TJ/F59 9.9626 Tf 0 -29.998 Td [(3.1.15)-1000(Named)-250(Constants)]TJ 0 g 0 G + 0 -19.937 Td [(psb)]TJ ET q -1 0 0 1 99.895 139.555 cm -[]0 d 0 J 0.398 w 0 0 m 137.482 0 l S +1 0 0 1 167.9 514.891 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 5.9776 Tf 110.755 132.683 Td [(3)]TJ/F54 7.9701 Tf 3.487 -2.893 Td [(The)-260(subr)18(outine)-260(style)]TJ/F89 7.9701 Tf 74.235 0 Td [(psb)]TJ +/F59 9.9626 Tf 170.889 514.692 Td [(none)]TJ ET q -1 0 0 1 201.687 129.989 cm -[]0 d 0 J 0.398 w 0 0 m 2.541 0 l S +1 0 0 1 194.182 514.891 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q +0 g 0 G BT -/F89 7.9701 Tf 204.228 129.79 Td [(precinit)]TJ/F54 7.9701 Tf 35.946 0 Td [(and)]TJ/F89 7.9701 Tf 15.567 0 Td [(psb)]TJ +/F62 9.9626 Tf 202.152 514.692 Td [(Generic)-250(no-op;)]TJ +0 g 0 G +/F59 9.9626 Tf -51.447 -21.934 Td [(psb)]TJ ET q -1 0 0 1 268.951 129.989 cm -[]0 d 0 J 0.398 w 0 0 m 2.541 0 l S +1 0 0 1 167.9 492.957 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F89 7.9701 Tf 271.492 129.79 Td [(precbl)]TJ/F54 7.9701 Tf 27.478 0 Td [(ar)18(e)-260(still)-260(supported)-260(for)-260(backwar)18(d)-260(compat-)]TJ -199.075 -9.464 Td [(ibility)]TJ -0 g 0 G -0 g 0 G -/F54 9.9626 Tf 169.365 -29.888 Td [(7)]TJ -0 g 0 G +/F59 9.9626 Tf 170.889 492.758 Td [(root)]TJ ET - -endstream -endobj -875 0 obj -<< -/Length 7173 ->> -stream -0 g 0 G +q +1 0 0 1 189.758 492.957 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q 0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(2.3.1)-1000(User)18(-de\002ned)-250(index)-250(mappings)]TJ/F54 9.9626 Tf 0 -18.964 Td [(PSBLAS)-316(supports)-315(user)18(-de\002ned)-316(global)-316(to)-315(local)-316(index)-316(mappings,)-332(subject)-315(to)-316(the)]TJ 0 -11.955 Td [(constraints)-250(outlined)-250(in)-250(sec.)]TJ -0 0 1 rg 0 0 1 RG - [-250(2.3)]TJ -0 g 0 G - [(:)]TJ -0 g 0 G - 12.453 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(The)-250(set)-250(of)-250(indices)-250(owned)-250(locally)-250(must)-250(be)-250(mapped)-250(to)-250(the)-250(set)-250(1)-179(.)-192(.)-191(.)]TJ/F52 9.9626 Tf 294.494 0 Td [(n)]TJ/F54 9.9626 Tf 5.664 -1.495 Td [(r)18(ow)]TJ/F52 5.9776 Tf 17.537 -1.648 Td [(i)]TJ/F54 9.9626 Tf 2.775 3.143 Td [(;)]TJ -0 g 0 G - -320.47 -19.926 Td [(2.)]TJ -0 g 0 G - [-500(The)-250(set)-250(of)-250(halo)-250(points)-250(must)-250(be)-250(mapped)-250(to)-250(the)-250(set)]TJ/F52 9.9626 Tf 228.286 0 Td [(n)]TJ/F54 9.9626 Tf 5.664 -1.494 Td [(r)18(ow)]TJ/F52 5.9776 Tf 17.537 -1.648 Td [(i)]TJ/F85 10.3811 Tf 4.836 3.142 Td [(+)]TJ/F54 9.9626 Tf 10.132 0 Td [(1)-179(.)-192(.)-191(.)]TJ/F52 9.9626 Tf 19.966 0 Td [(n)]TJ/F54 9.9626 Tf 5.664 -3.83 Td [(col)]TJ/F52 5.9776 Tf 12.795 -1.649 Td [(i)]TJ/F54 9.9626 Tf 2.774 5.479 Td [(;)]TJ -320.107 -21.759 Td [(but)-289(otherwise)-289(the)-288(mapping)-289(is)-289(arbitrary)111(.)-426(The)-289(user)-289(application)-289(is)-288(r)18(esponsible)-289(to)]TJ 0 -11.956 Td [(ensur)18(e)-262(consistency)-261(of)-262(this)-262(mapping;)-267(some)-262(err)18(ors)-262(may)-261(be)-262(caught)-262(by)-261(the)-262(library)111(,)]TJ 0 -11.955 Td [(but)-236(this)-236(is)-236(not)-236(guaranteed.)-305(The)-236(application)-236(str)8(uctur)18(e)-236(to)-236(support)-236(this)-236(usage)-236(is)-236(as)]TJ 0 -11.955 Td [(follows:)]TJ -0 g 0 G - 12.453 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(Initialize)-190(index)-190(space)-190(with)]TJ/F59 9.9626 Tf 128.098 0 Td [(psb_cdall\050ictx,desc,info,vl=vl,lidx=lidx\051)]TJ/F54 9.9626 Tf -115.645 -11.955 Td [(passing)-292(the)-293(vectors)]TJ/F59 9.9626 Tf 87.882 0 Td [(vl\050:\051)]TJ/F54 9.9626 Tf 29.064 0 Td [(containing)-292(the)-293(set)-292(of)-293(global)-292(indices)-292(owned)-293(by)]TJ -116.946 -11.956 Td [(the)-327(curr)18(ent)-328(pr)18(ocess)-327(and)]TJ/F59 9.9626 Tf 108.493 0 Td [(lidx\050:\051)]TJ/F54 9.9626 Tf 39.873 0 Td [(containing)-327(the)-327(corr)18(esponding)-328(local)-327(in-)]TJ -148.366 -11.955 Td [(dices;)]TJ -0 g 0 G - -12.453 -19.925 Td [(2.)]TJ -0 g 0 G - [-500(Add)-412(the)-412(halo)-412(points)]TJ/F59 9.9626 Tf 109.326 0 Td [(ja\050:\051)]TJ/F54 9.9626 Tf 30.256 0 Td [(and)-412(their)-412(associated)-412(local)-412(indices)]TJ/F59 9.9626 Tf 155.064 0 Td [(lidx\050:\051)]TJ/F54 9.9626 Tf -282.193 -11.955 Td [(with)-250(a\050some\051)-250(call\050s\051)-250(to)]TJ/F59 9.9626 Tf 99.815 0 Td [(psb_cdins\050nz,ja,desc,info,lidx=lidx\051)]TJ/F54 9.9626 Tf 188.292 0 Td [(;)]TJ -0 g 0 G - -300.56 -19.926 Td [(3.)]TJ -0 g 0 G - [-500(Assemble)-250(the)-250(descriptor)-250(with)]TJ/F59 9.9626 Tf 144.387 0 Td [(psb_cdasb)]TJ/F54 9.9626 Tf 47.073 0 Td [(;)]TJ -0 g 0 G - -191.46 -19.925 Td [(4.)]TJ -0 g 0 G - [-500(Build)-190(the)-190(sparse)-190(matrices)-190(and)-190(vectors,)-202(optionally)-190(making)-190(use)-190(in)]TJ/F59 9.9626 Tf 288.117 0 Td [(psb_spins)]TJ/F54 9.9626 Tf -275.664 -11.955 Td [(and)]TJ/F59 9.9626 Tf 19.958 0 Td [(psb_geins)]TJ/F54 9.9626 Tf 50.163 0 Td [(of)-310(the)]TJ/F59 9.9626 Tf 28.756 0 Td [(local)]TJ/F54 9.9626 Tf 29.243 0 Td [(ar)18(gument)-310(specifying)-310(that)-310(the)-311(indices)-310(in)]TJ/F59 9.9626 Tf 177.734 0 Td [(ia)]TJ/F54 9.9626 Tf 10.46 0 Td [(,)]TJ/F59 9.9626 Tf -316.314 -11.955 Td [(ja)]TJ/F54 9.9626 Tf 12.952 0 Td [(and)]TJ/F59 9.9626 Tf 19.357 0 Td [(irw)]TJ/F54 9.9626 Tf 15.691 0 Td [(,)-250(r)18(espectively)111(,)-250(ar)18(e)-250(alr)18(eady)-250(local)-250(indices.)]TJ/F51 11.9552 Tf -72.906 -29.133 Td [(2.4)-1000(Programming)-250(model)]TJ/F54 9.9626 Tf 0 -18.964 Td [(The)-316(PSBLAS)-315(librarary)-316(is)-315(based)-316(on)-315(the)-316(Single)-315(Pr)18(ogram)-316(Multiple)-316(Data)-315(\050SPMD\051)]TJ 0 -11.955 Td [(pr)18(ogramming)-277(model:)-364(each)-277(pr)18(ocess)-277(participatin)1(g)-277(in)-277(the)-277(computation)-277(performs)]TJ 0 -11.955 Td [(the)-250(same)-250(actions)-250(on)-250(a)-250(chunk)-250(of)-250(data.)-310(Parallelism)-250(is)-250(thus)-250(data-driven.)]TJ 14.944 -11.956 Td [(Because)-313(of)-313(this)-312(str)8(uctur)18(e,)-329(many)-313(subr)18(outines)-313(coor)18(dinate)-312(their)-313(action)-313(acr)18(oss)]TJ -14.944 -11.955 Td [(the)-336(various)-336(pr)18(ocesses,)-358(thus)-336(pr)18(oviding)-336(an)-336(implicit)-336(synchr)18(onization)-336(point,)-358(and)]TJ 0 -11.955 Td [(ther)18(efor)18(e)]TJ/F52 9.9626 Tf 43.283 0 Td [(must)]TJ/F54 9.9626 Tf 24.136 0 Td [(be)-367(called)-366(simultaneously)-367(by)-366(all)-367(pr)18(ocesses)-367(participating)-366(in)-367(the)]TJ -67.419 -11.955 Td [(computation.)-525(This)-321(is)-322(certainly)-322(tr)8(ue)-321(for)-322(the)-322(data)-321(allocation)-322(and)-322(assembl)1(y)-322(r)18(ou-)]TJ 0 -11.955 Td [(tines,)-250(for)-250(all)-250(the)-250(computational)-250(r)18(outines)-250(and)-250(for)-250(some)-250(of)-250(the)-250(tools)-250(r)18(outines.)]TJ 14.944 -11.955 Td [(However)-333(ther)18(e)-332(ar)18(e)-333(many)-333(cases)-332(wher)18(e)-333(no)-333(synchr)18(onizati)1(on,)-354(and)-332(indeed)-333(no)]TJ -14.944 -11.956 Td [(communication)-344(among)-343(pr)18(ocesses,)-367(is)-344(implied;)-390(for)-344(instance,)-367(all)-344(the)-343(r)18(outines)-344(in)]TJ 0 -11.955 Td [(sec.)]TJ -0 0 1 rg 0 0 1 RG - [-246(3)]TJ -0 g 0 G - [-247(ar)18(e)-246(only)-246(acting)-246(on)-247(the)-246(local)-246(data)-247(str)8(uctur)18(es,)-247(and)-246(thus)-246(may)-247(be)-246(called)-246(inde-)]TJ 0 -11.955 Td [(pendently)111(.)-306(The)-238(most)-238(important)-237(case)-238(is)-238(that)-238(of)-238(the)-238(coef)18(\002cient)-237(insertion)-238(r)18(outines:)]TJ 0 -11.955 Td [(since)-231(the)-231(number)-231(of)-230(coef)18(\002cients)-231(in)-231(the)-231(sparse)-231(and)-231(dense)-230(matrices)-231(varies)-231(among)]TJ 0 -11.955 Td [(the)-248(pr)18(ocessors,)-249(and)-249(since)-248(the)-249(user)-248(is)-249(fr)18(ee)-248(to)-249(choose)-248(an)-249(arbitrary)-248(or)18(der)-249(in)-248(builid-)]TJ 0 -11.955 Td [(ing)-250(the)-250(matrix)-250(entries,)-250(these)-250(r)18(outines)-250(cannot)-250(imply)-250(a)-250(synchr)18(onization.)]TJ 14.944 -11.956 Td [(Thr)18(oughout)-250(this)-250(user)-74('s)-250(guide)-250(each)-250(subr)18(outine)-250(will)-250(be)-250(clearly)-250(indicated)-250(as:)]TJ -0 g 0 G -/F51 9.9626 Tf -14.944 -19.925 Td [(Synchronous:)]TJ -0 g 0 G -/F54 9.9626 Tf 67.247 0 Td [(must)-307(be)-307(called)-308(simultaneously)-307(by)-307(all)-307(the)-308(pr)18(ocesses)-307(in)-307(the)-307(r)18(ele-)]TJ -42.341 -11.955 Td [(vant)-250(communication)-250(context;)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(Asynchronous:)]TJ +/F62 9.9626 Tf 197.728 492.758 Td [(Default)-250(r)18(oot)-250(pr)18(ocess)-250(for)-250(br)18(oadcast)-250(and)-250(scatter)-250(operations;)]TJ 0 g 0 G -/F54 9.9626 Tf 73.334 0 Td [(may)-250(be)-250(called)-250(in)-250(a)-250(totally)-250(independent)-250(manner)74(.)]TJ +/F59 9.9626 Tf -47.023 -21.934 Td [(psb)]TJ +ET +q +1 0 0 1 167.9 471.023 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 170.889 470.824 Td [(nohalo)]TJ +ET +q +1 0 0 1 203.038 471.023 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q 0 g 0 G - 96.031 -56.634 Td [(8)]TJ +BT +/F62 9.9626 Tf 211.008 470.824 Td [(Do)-250(not)-250(fetch)-250(halo)-250(elements;)]TJ 0 g 0 G +/F59 9.9626 Tf -60.303 -21.934 Td [(psb)]TJ ET - -endstream -endobj -886 0 obj -<< -/Length 8187 ->> -stream -0 g 0 G +q +1 0 0 1 167.9 449.089 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 170.889 448.89 Td [(halo)]TJ +ET +q +1 0 0 1 191.412 449.089 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q 0 g 0 G BT -/F51 14.3462 Tf 99.895 705.784 Td [(3)-1000(Data)-250(Structures)-250(and)-250(Classes)]TJ/F54 9.9626 Tf 0 -23.091 Td [(In)-289(this)-288(chapter)-289(we)-289(illustrate)-288(the)-289(data)-289(str)8(uctur)18(es)-288(used)-289(for)-289(de\002nition)-289(of)-288(r)18(outines)]TJ 0 -11.956 Td [(interfaces.)-622(They)-354(include)-354(data)-354(str)8(uctur)18(es)-354(for)-354(sparse)-354(matrices,)-380(communication)]TJ 0 -11.955 Td [(descriptors)-250(and)-250(pr)18(econditioners.)]TJ 14.944 -12.156 Td [(All)-248(the)-248(data)-249(types)-248(and)-248(the)-248(basic)-248(subr)18(outine)-249(interfaces)-248(r)18(elated)-248(to)-248(descriptors)]TJ -14.944 -11.955 Td [(and)-345(sparse)-345(matrices)-344(ar)18(e)-345(de\002ned)-345(in)-345(the)-345(module)]TJ/F59 9.9626 Tf 213.323 0 Td [(psb_base_mod)]TJ/F54 9.9626 Tf 62.764 0 Td [(;)-392(this)-345(will)-345(have)]TJ -276.087 -11.955 Td [(to)-381(be)-381(included)-381(by)-381(every)-381(user)-381(subr)18(outine)-381(that)-381(makes)-381(u)1(se)-381(of)-381(the)-381(library)111(.)-703(The)]TJ 0 -11.956 Td [(pr)18(econditioners)-250(ar)18(e)-250(de\002ned)-250(in)-250(the)-250(module)]TJ/F59 9.9626 Tf 187.993 0 Td [(psb_prec_mod)]TJ/F54 9.9626 Tf -173.049 -12.156 Td [(Integer)74(,)-433(r)18(eal)-396(and)-397(complex)-396(data)-396(types)-397(ar)18(e)-396(parametrized)-396(with)-397(a)-396(kind)-396(type)]TJ -14.944 -11.955 Td [(de\002ned)-250(in)-250(the)-250(library)-250(as)-250(follows:)]TJ +/F62 9.9626 Tf 199.382 448.89 Td [(Fetch)-250(halo)-250(elements)-250(fr)18(om)-250(neighbouring)-250(pr)18(ocesses;)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -20.528 Td [(psb)]TJ +/F59 9.9626 Tf -48.677 -21.934 Td [(psb)]TJ ET q -1 0 0 1 117.091 566.32 cm +1 0 0 1 167.9 427.155 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 120.08 566.121 Td [(spk)]TJ +/F59 9.9626 Tf 170.889 426.956 Td [(sum)]TJ ET q -1 0 0 1 137.275 566.32 cm +1 0 0 1 190.854 427.155 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 145.245 566.121 Td [(Kind)-407(parameter)-406(for)-407(short)-406(pr)18(ecision)-407(r)18(eal)-406(and)-407(complex)-406(data;)-485(corr)18(e-)]TJ -20.443 -11.955 Td [(sponds)-250(to)-250(a)]TJ -0.56 0.13 0.00 rg 0.56 0.13 0.00 RG -/F59 9.9626 Tf 52.901 0 Td [(REAL)]TJ +/F62 9.9626 Tf 198.824 426.956 Td [(Sum)-250(overlapped)-250(elements)]TJ 0 g 0 G -/F54 9.9626 Tf 23.412 0 Td [(declaration)-250(and)-250(is)-250(normally)-250(4)-250(bytes;)]TJ -0 g 0 G -/F51 9.9626 Tf -101.22 -20.73 Td [(psb)]TJ +/F59 9.9626 Tf -48.119 -21.934 Td [(psb)]TJ ET q -1 0 0 1 117.091 533.635 cm +1 0 0 1 167.9 405.221 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 120.08 533.436 Td [(dpk)]TJ +/F59 9.9626 Tf 170.889 405.022 Td [(avg)]TJ ET q -1 0 0 1 138.939 533.635 cm +1 0 0 1 187.546 405.221 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 146.909 533.436 Td [(Kind)-420(parameter)-421(for)-420(long)-420(pr)18(ecision)-421(r)18(eal)-420(and)-420(complex)-421(data;)-505(corr)18(e-)]TJ -22.107 -11.955 Td [(sponds)-250(to)-250(a)]TJ -0.56 0.13 0.00 rg 0.56 0.13 0.00 RG -/F59 9.9626 Tf 52.901 0 Td [(DOUBLE)-525(PRECISION)]TJ +/F62 9.9626 Tf 195.517 405.022 Td [(A)92(verage)-250(overlapped)-250(elements)]TJ 0 g 0 G -/F54 9.9626 Tf 86.176 0 Td [(declaration)-250(and)-250(is)-250(normally)-250(8)-250(bytes;)]TJ -0 g 0 G -/F51 9.9626 Tf -163.984 -20.73 Td [(psb)]TJ +/F59 9.9626 Tf -44.812 -21.934 Td [(psb)]TJ ET q -1 0 0 1 117.091 500.951 cm +1 0 0 1 167.9 383.288 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 120.08 500.751 Td [(mpk)]TJ +/F59 9.9626 Tf 170.889 383.088 Td [(comm)]TJ ET q -1 0 0 1 141.708 500.951 cm +1 0 0 1 199.163 383.288 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 202.152 383.088 Td [(halo)]TJ +ET +q +1 0 0 1 222.674 383.288 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 149.678 500.751 Td [(Kind)-250(parameter)-250(for)-250(4-bytes)-250(integer)-250(data,)-250(as)-250(is)-250(always)-250(used)-250(by)-250(MPI;)]TJ +/F62 9.9626 Tf 230.645 383.088 Td [(Exchange)-250(data)-250(based)-250(on)-250(the)]TJ/F67 9.9626 Tf 124.92 0 Td [(halo_index)]TJ/F62 9.9626 Tf 54.794 0 Td [(list;)]TJ 0 g 0 G -/F51 9.9626 Tf -49.783 -20.729 Td [(psb)]TJ +/F59 9.9626 Tf -259.654 -21.934 Td [(psb)]TJ ET q -1 0 0 1 117.091 480.221 cm +1 0 0 1 167.9 361.354 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 120.08 480.022 Td [(epk)]TJ +/F59 9.9626 Tf 170.889 361.154 Td [(comm)]TJ ET q -1 0 0 1 137.833 480.221 cm +1 0 0 1 199.163 361.354 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 202.152 361.154 Td [(ext)]TJ +ET +q +1 0 0 1 216.029 361.354 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 145.803 480.022 Td [(Kind)-364(parameter)-363(for)-364(8-bytes)-363(integer)-364(data,)-391(a)-1(s)-363(is)-364(always)-363(used)-364(by)-363(the)]TJ/F59 9.9626 Tf -21.001 -11.955 Td [(sizeof)]TJ/F54 9.9626 Tf 33.873 0 Td [(methods;)]TJ +/F62 9.9626 Tf 224 361.154 Td [(Exchange)-250(data)-250(based)-250(on)-250(the)]TJ/F67 9.9626 Tf 124.92 0 Td [(ext_index)]TJ/F62 9.9626 Tf 49.564 0 Td [(list;)]TJ 0 g 0 G -/F51 9.9626 Tf -58.78 -20.73 Td [(psb)]TJ +/F59 9.9626 Tf -247.779 -21.934 Td [(psb)]TJ ET q -1 0 0 1 117.091 447.537 cm +1 0 0 1 167.9 339.42 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 120.08 447.337 Td [(ipk)]TJ +/F59 9.9626 Tf 170.889 339.22 Td [(comm)]TJ ET q -1 0 0 1 136.169 447.537 cm +1 0 0 1 199.163 339.42 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 202.152 339.22 Td [(ovr)]TJ +ET +q +1 0 0 1 217.703 339.42 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 144.139 447.337 Td [(Kind)-398(parameter)-399(for)-398(\223local\224)-398(integer)-399(indices)-398(and)-398(data;)-473(with)-398(default)]TJ -19.337 -11.955 Td [(build)-250(options)-250(this)-250(is)-250(a)-250(4)-250(bytes)-250(integer;)]TJ +/F62 9.9626 Tf 225.673 339.22 Td [(Exchange)-250(data)-250(based)-250(on)-250(the)]TJ/F67 9.9626 Tf 124.92 0 Td [(ovrlap_index)]TJ/F62 9.9626 Tf 65.255 0 Td [(list;)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.729 Td [(psb)]TJ +/F59 9.9626 Tf -265.143 -21.934 Td [(psb)]TJ ET q -1 0 0 1 117.091 414.852 cm +1 0 0 1 167.9 317.486 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 120.08 414.653 Td [(lpk)]TJ +/F59 9.9626 Tf 170.889 317.286 Td [(comm)]TJ ET q -1 0 0 1 136.169 414.852 cm +1 0 0 1 199.163 317.486 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 202.152 317.286 Td [(mov)]TJ +ET +q +1 0 0 1 222.684 317.486 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 144.139 414.653 Td [(Kind)-328(parameter)-329(for)-328(\223global\224)-328(integer)-329(indices)-328(and)-328(data;)-368(with)-328(default)]TJ -19.337 -11.955 Td [(build)-250(options)-250(this)-250(is)-250(an)-250(8)-250(bytes)-250(integer;)]TJ -24.907 -20.529 Td [(The)-205(integer)-205(kinds)-205(for)-206(local)-205(and)-205(global)-205(indices)-205(can)-205(be)-206(chosen)-205(at)-205(con\002gur)18(e)-205(time)-205(to)]TJ 0 -11.955 Td [(hold)-266(4)-267(or)-266(8)-267(bytes,)-270(with)-266(the)-267(global)-266(indices)-266(at)-267(least)-266(as)-267(lar)18(ge)-266(as)-266(the)-267(local)-266(ones.)-359(T)92(o-)]TJ 0 -11.955 Td [(gether)-219(with)-220(the)-219(classes)-219(attributes)-219(we)-219(also)-220(discuss)-219(their)-219(methods.)-300(Most)-219(methods)]TJ 0 -11.955 Td [(detailed)-272(her)18(e)-272(only)-273(act)-272(on)-272(the)-272(local)-272(variable,)-278(i.e.)-376(their)-273(action)-272(is)-272(pur)18(ely)-272(local)-272(and)]TJ 0 -11.956 Td [(asynchr)18(onous)-359(unless)-360(otherwise)-359(stated.)-638(The)-359(list)-360(of)-359(methods)-359(her)18(e)-360(is)-359(not)-359(com-)]TJ 0 -11.955 Td [(pletely)-336(exhaustive;)-380(many)-336(methods,)-358(especially)-336(those)-336(that)-336(alter)-337(the)-336(contents)-336(of)]TJ 0 -11.955 Td [(the)-299(various)-298(objects,)-311(ar)18(e)-299(usually)-299(not)-299(needed)-298(by)-299(the)-299(end-user)74(,)-311(and)-298(ther)18(efor)18(e)-299(ar)18(e)]TJ 0 -11.955 Td [(described)-250(in)-250(the)-250(developer)-74('s)-250(documentation.)]TJ/F51 11.9552 Tf 0 -30.277 Td [(3.1)-1000(Descriptor)-250(data)-250(structure)]TJ/F54 9.9626 Tf 0 -19.353 Td [(All)-241(the)-241(gener)1(a)-1(l)-240(matrix)-241(informations)-241(and)-240(elements)-241(to)-241(be)-241(exchanged)-240(among)-241(pr)18(o-)]TJ 0 -11.956 Td [(cesses)-402(ar)18(e)-401(stor)18(ed)-402(within)-401(a)-402(data)-401(str)8(uctur)18(e)-402(of)-401(the)-402(type)]TJ/F59 9.9626 Tf 242.575 0 Td [(psb)]TJ +/F62 9.9626 Tf 230.654 317.286 Td [(Exchange)-250(data)-250(based)-250(on)-250(the)]TJ/F67 9.9626 Tf 124.921 0 Td [(ovr_mst_idx)]TJ/F62 9.9626 Tf 60.024 0 Td [(list;)]TJ/F59 11.9552 Tf -264.894 -31.99 Td [(3.2)-1000(Sparse)-250(Matrix)-250(class)]TJ/F62 9.9626 Tf 0 -19.937 Td [(The)]TJ/F67 9.9626 Tf 19.623 0 Td [(psb)]TJ ET q -1 0 0 1 358.788 237.097 cm +1 0 0 1 186.647 265.558 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 361.927 236.897 Td [(desc)]TJ +/F67 9.9626 Tf 189.785 265.359 Td [(Tspmat)]TJ ET q -1 0 0 1 383.476 237.097 cm +1 0 0 1 221.795 265.558 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 386.614 236.897 Td [(type)]TJ/F54 9.9626 Tf 20.921 0 Td [(.)-765(Every)]TJ -307.64 -11.955 Td [(str)8(uctur)18(e)-371(of)-370(this)-371(type)-370(is)-371(associated)-370(with)-371(a)-371(discr)18(etization)-370(pattern)-371(and)-370(enables)]TJ 0 -11.955 Td [(data)-301(communications)-302(and)-301(other)-301(operations)-302(that)-301(ar)18(e)-302(nece)1(ssa)-1(r)1(y)-302(for)-301(implement-)]TJ 0 -11.955 Td [(ing)-250(the)-250(various)-250(algorithms)-250(of)-250(inter)18(est)-250(to)-250(us.)]TJ 14.944 -12.156 Td [(The)-265(data)-266(str)8(uctur)18(e)-265(itself)]TJ/F59 9.9626 Tf 107.448 0 Td [(psb_desc_type)]TJ/F54 9.9626 Tf 70.638 0 Td [(can)-265(be)-266(tr)18(eated)-265(as)-265(an)-265(opaque)-266(object)]TJ -193.03 -11.955 Td [(handled)-321(via)-321(the)-321(tools)-321(r)18(outines)-321(of)-321(Sec.)]TJ +/F67 9.9626 Tf 224.933 265.359 Td [(type)]TJ/F62 9.9626 Tf 23.868 0 Td [(class)-296(contains)-295(all)-296(information)-296(about)-296(the)-295(local)-296(portion)-296(of)]TJ -98.096 -11.955 Td [(the)-200(sparse)-199(matrix)-200(and)-199(its)-200(storage)-200(mode.)-293(Its)-199(design)-200(is)-200(based)-199(on)-200(the)-200(ST)74(A)74(TE)-199(design)]TJ 0 -11.955 Td [(pattern)-256([)]TJ +1 0 0 rg 1 0 0 RG + [(12)]TJ +0 g 0 G + [(])-255(as)-256(detailed)-256(in)-256([)]TJ +1 0 0 rg 1 0 0 RG + [(10)]TJ +0 g 0 G + [(];)-258(the)-256(type)-256(declaration)-255(is)-256(shown)-256(in)-256(\002gur)18(e)]TJ 0 0 1 rg 0 0 1 RG - [-321(6)]TJ + [-255(2)]TJ 0 g 0 G - [-321(or)-321(the)-321(query)-321(r)18(outines)-321(detailed)-321(below;)]TJ 0 -11.956 Td [(nevertheless)-250(we)-250(include)-250(her)18(e)-250(a)-250(description)-250(for)-250(the)-250(curious)-250(r)18(eader)74(.)]TJ 14.944 -12.156 Td [(First)-229(we)-228(describe)-228(the)]TJ/F59 9.9626 Tf 92.473 0 Td [(psb_indx_map)]TJ/F54 9.9626 Tf 65.04 0 Td [(type.)-303(This)-228(is)-229(a)-228(data)-229(str)8(uctur)18(e)-228(that)-229(keeps)]TJ -172.457 -11.955 Td [(track)-250(of)-250(a)-250(certain)-250(number)-250(of)-250(basic)-250(issues)-250(such)-250(as:)]TJ + [-256(wher)18(e)]TJ/F67 9.9626 Tf 0 -11.956 Td [(T)]TJ/F62 9.9626 Tf 7.721 0 Td [(is)-250(a)-250(placeholder)-250(for)-250(the)-250(data)-250(type)-250(and)-250(pr)18(ecision)-250(variants)]TJ 0 g 0 G - 13.888 -20.528 Td [(\225)]TJ +/F59 9.9626 Tf -7.721 -21.431 Td [(S)]TJ 0 g 0 G - [-500(The)-250(value)-250(of)-250(the)-250(communication)-250(context;)]TJ +/F62 9.9626 Tf 11.068 0 Td [(Single)-250(pr)18(ecision)-250(r)18(eal;)]TJ 0 g 0 G - 155.477 -29.888 Td [(9)]TJ +/F59 9.9626 Tf -11.068 -21.934 Td [(D)]TJ +0 g 0 G +/F62 9.9626 Tf 13.28 0 Td [(Double)-250(pr)18(ecision)-250(r)18(eal;)]TJ +0 g 0 G +/F59 9.9626 Tf -13.28 -21.934 Td [(C)]TJ +0 g 0 G +/F62 9.9626 Tf 12.174 0 Td [(Single)-250(pr)18(ecision)-250(complex;)]TJ +0 g 0 G +/F59 9.9626 Tf -12.174 -21.934 Td [(Z)]TJ +0 g 0 G +/F62 9.9626 Tf 11.626 0 Td [(Double)-250(pr)18(ecision)-250(complex;)]TJ +0 g 0 G +/F59 9.9626 Tf -11.626 -21.934 Td [(LS,LD,LC,LZ)]TJ +0 g 0 G +/F62 9.9626 Tf 65.026 0 Td [(Same)-214(numeric)-214(type)-215(as)-214(above,)-221(but)-214(with)]TJ/F67 9.9626 Tf 168.016 0 Td [(psb_lpk_)]TJ/F62 9.9626 Tf 43.978 0 Td [(integer)-214(indices.)]TJ +0 g 0 G + -110.146 -29.888 Td [(16)]TJ 0 g 0 G ET endstream endobj -894 0 obj +1050 0 obj << -/Length 6070 +/Length 7385 >> stream 0 g 0 G 0 g 0 G 0 g 0 G -BT -/F54 9.9626 Tf 164.593 706.129 Td [(\225)]TJ -0 g 0 G - [-500(The)-236(number)-236(of)-235(indices)-236(in)-236(the)-236(index)-236(space,)-238(i.e.)-306(global)-236(number)-235(of)-236(r)18(ows)-236(and)]TJ 11.018 -11.955 Td [(columns)-250(of)-250(a)-250(sparse)-250(matrix;)]TJ -0 g 0 G - -11.018 -20.409 Td [(\225)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +q +1 0 0 1 99.895 671.26 cm +0 0 343.711 38.854 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G - [-500(The)-250(local)-250(set)-250(of)-250(indices,)-250(including:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 22.974 -20.408 Td [(\226)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 112.299 699.454 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(number)-250(of)-250(local)-250(indices)-250(\050and)-250(local)-250(r)18(ows\051;)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -9.962 -16.182 Td [(\226)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(number)-250(of)-250(halo)-250(indices)-250(\050and)-250(ther)18(efor)18(e)-250(local)-250(columns\051;)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -9.962 -16.181 Td [(\226)]TJ + [-525(psb_Tspmat_type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(global)-250(indices)-250(corr)18(esponding)-250(to)-250(the)-250(local)-250(ones.)]TJ -46.824 -20.409 Td [(Ther)18(e)-301(ar)18(e)-301(many)-301(dif)18(fer)18(ent)-301(schemes)-301(for)-301(storing)-301(these)-301(data;)-326(ther)18(efor)18(e)-301(ther)18(e)-301(ar)18(e)-301(a)]TJ 0 -11.955 Td [(number)-299(of)-299(types)-300(extending)-299(the)-299(base)-299(one,)-312(and)-299(the)-299(descriptor)-300(str)8(uctur)18(e)-299(holds)-299(a)]TJ 0 -11.955 Td [(polymorphic)-212(object)-213(whose)-212(dynamic)-212(type)-213(can)-212(be)-212(any)-213(of)-212(the)-212(extended)-213(types.)-297(The)]TJ 0 -11.955 Td [(methods)-250(associated)-250(with)-250(this)-250(data)-250(type)-250(answer)-250(the)-250(following)-250(queries:)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 9.414 -10.959 Td [(class)]TJ 0 g 0 G - 13.888 -20.288 Td [(\225)]TJ + [(\050psb_T_base_sparse_mat\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(For)-411(a)-412(given)-411(set)-412(of)-411(local)-411(indices,)-452(\002nd)-412(the)-411(corr)18(esponding)-411(indices)-412(in)-411(the)]TJ 11.018 -11.955 Td [(global)-250(numbering;)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ 0 g 0 G - -11.018 -20.408 Td [(\225)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(For)-357(a)-357(given)-357(set)-357(of)-358(global)-357(indices,)-384(\002nd)-357(the)-357(corr)18(esponding)-357(indices)-357(in)-357(the)]TJ 11.018 -11.955 Td [(local)-250(numbering,)-250(if)-250(any)111(,)-250(or)-250(r)18(eturn)-250(an)-250(invalid)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-1050(::)]TJ 0 g 0 G - -11.018 -20.409 Td [(\225)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(Add)-250(a)-250(global)-250(index)-250(to)-250(the)-250(set)-250(of)-250(halo)-250(indices;)]TJ + [-525(a)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -20.408 Td [(\225)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.414 -10.959 Td [(end)-525(type)]TJ 0 g 0 G - [-500(Find)-250(the)-250(pr)18(ocess)-250(owner)-250(of)-250(each)-250(member)-250(of)-250(a)-250(set)-250(of)-250(global)-250(indices.)]TJ -13.888 -20.288 Td [(All)-295(methods)-295(but)-294(the)-295(last)-295(ar)18(e)-295(pur)18(ely)-295(local;)-317(the)-295(last)-295(method)-294(potentially)-295(r)18(equir)18(es)]TJ 0 -11.955 Td [(communication)-418(among)-419(pr)18(ocesses,)-460(and)-419(thus)-418(is)-418(a)-419(synchr)18(onous)-418(method.)-815(The)]TJ 0 -11.955 Td [(choice)-244(of)-244(a)-244(speci\002c)-244(dynamic)-244(type)-244(for)-244(the)-244(index)-244(map)-244(is)-244(made)-244(at)-244(the)-244(time)-244(the)-244(de-)]TJ 0 -11.955 Td [(scriptor)-210(is)-211(init)1(ially)-211(allocated,)-218(accor)18(ding)-210(to)-211(t)1(he)-211(mode)-210(of)-210(initialization)-211(\050see)-210(also)]TJ -0 0 1 rg 0 0 1 RG - [-210(6)]TJ + [-1050(psb_Tspmat_type)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G - [(\051.)]TJ 14.944 -12.076 Td [(The)-250(descriptor)-250(contents)-250(ar)18(e)-250(as)-250(follows:)]TJ 0 g 0 G -/F51 9.9626 Tf -14.944 -20.288 Td [(indxmap)]TJ +/F62 9.9626 Tf 4.295 -41.429 Td [(Listing)-250(2:)-310(The)-250(PSBLAS)-250(de\002ned)-250(data)-250(type)-250(that)-250(contains)-250(a)-250(sparse)-250(matrix.)]TJ -16.699 -32.661 Td [(The)-190(actual)-190(data)-190(is)-190(contained)-190(in)-190(the)-190(polymorphic)-190(component)]TJ/F67 9.9626 Tf 259.484 0 Td [(a%a)]TJ/F62 9.9626 Tf 17.584 0 Td [(of)-190(type)]TJ/F67 9.9626 Tf 32.089 0 Td [(psb)]TJ +ET +q +1 0 0 1 425.371 603.645 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 428.509 603.446 Td [(T)]TJ +ET +q +1 0 0 1 434.367 603.645 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 437.505 603.446 Td [(base)]TJ +ET +q +1 0 0 1 459.054 603.645 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 462.193 603.446 Td [(sparse)]TJ +ET +q +1 0 0 1 494.202 603.645 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 497.341 603.446 Td [(mat)]TJ/F62 9.9626 Tf 15.691 0 Td [(;)]TJ -413.137 -11.955 Td [(its)-306(speci)1(\002c)-306(layout)-305(can)-306(be)-305(chosen)-306(dynamically)-305(among)-306(the)-305(pr)18(ede\002ned)-306(types,)-319(or)]TJ 0 -11.955 Td [(an)-305(entir)18(ely)-305(new)-305(storage)-305(layout)-305(can)-305(be)-305(implemented)-304(and)-305(passed)-305(to)-305(the)-305(library)]TJ 0 -11.955 Td [(at)-231(r)8(untime)-231(via)-231(the)]TJ/F67 9.9626 Tf 80.145 0 Td [(psb_spasb)]TJ/F62 9.9626 Tf 49.377 0 Td [(r)18(outine.)-304(The)-231(following)-231(very)-231(common)-231(formats)-231(ar)18(e)]TJ -129.522 -11.955 Td [(pr)18(ecompiled)-250(in)-250(PSBLAS)-250(and)-250(thus)-250(ar)18(e)-250(always)-250(available:)]TJ 0 g 0 G -/F54 9.9626 Tf 45.38 0 Td [(A)-190(polymorphic)-190(variable)-190(of)-190(a)-190(type)-190(that)-190(is)-190(any)-190(extension)-190(of)-190(the)-190(indx)]TJ +/F59 9.9626 Tf 0 -19.889 Td [(psb)]TJ ET q -1 0 0 1 478.491 370.98 cm +1 0 0 1 117.091 535.936 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 481.48 370.78 Td [(map)]TJ -305.869 -11.955 Td [(type)-250(described)-250(above.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -32.363 Td [(halo)]TJ +/F59 9.9626 Tf 120.08 535.737 Td [(T)]TJ ET q -1 0 0 1 171.228 326.661 cm +1 0 0 1 127.322 535.936 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.217 326.462 Td [(index)]TJ -0 g 0 G -/F54 9.9626 Tf 30.435 0 Td [(A)-331(list)-332(of)-331(the)-332(halo)-331(and)-332(boundary)-331(elements)-332(for)-331(the)-332(curr)18(ent)-331(pr)18(ocess)]TJ -29.041 -11.955 Td [(to)-247(be)-247(exchanged)-247(with)-246(other)-247(pr)18(ocesses;)-248(for)-247(each)-247(pr)18(ocesses)-247(with)-247(whic)1(h)-247(it)-247(is)]TJ 0 -11.956 Td [(necessary)-250(to)-250(communicate:)]TJ -0 g 0 G - 9.465 -20.408 Td [(1.)]TJ -0 g 0 G - [-500(Pr)18(ocess)-250(identi\002er;)]TJ -0 g 0 G - 0 -16.182 Td [(2.)]TJ -0 g 0 G - [-500(Number)-250(of)-250(points)-250(to)-250(be)-250(r)18(eceived;)]TJ -0 g 0 G - 0 -16.181 Td [(3.)]TJ -0 g 0 G - [-500(Indices)-250(of)-250(points)-250(to)-250(be)-250(r)18(eceived;)]TJ -0 g 0 G - 0 -16.182 Td [(4.)]TJ -0 g 0 G - [-500(Number)-250(of)-250(points)-250(to)-250(be)-250(sent;)]TJ -0 g 0 G - 0 -16.182 Td [(5.)]TJ -0 g 0 G - [-500(Indices)-250(of)-250(points)-250(to)-250(be)-250(sent;)]TJ -9.465 -20.408 Td [(Speci\002ed)-250(as:)-310(a)-250(vector)-250(of)-250(integer)-250(type,)-250(see)]TJ -0 0 1 rg 0 0 1 RG - [-250(3.3)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -20.409 Td [(ext)]TJ +/F59 9.9626 Tf 130.311 535.737 Td [(coo)]TJ ET q -1 0 0 1 164.583 176.799 cm +1 0 0 1 146.411 535.936 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 167.572 176.599 Td [(index)]TJ -0 g 0 G -/F54 9.9626 Tf 30.435 0 Td [(A)-216(list)-217(of)-216(element)-217(indices)-216(to)-217(be)-216(exchanged)-217(to)-216(implement)-217(the)-216(mapping)]TJ -22.396 -11.955 Td [(between)-250(a)-250(base)-250(descriptor)-250(and)-250(a)-250(descriptor)-250(with)-250(overlap.)]TJ 0 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(vector)-250(of)-250(integer)-250(type,)-250(see)]TJ -0 0 1 rg 0 0 1 RG - [-250(3.3)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -20.408 Td [(ovrlap)]TJ +/F59 9.9626 Tf 149.399 535.737 Td [(sparse)]TJ ET q -1 0 0 1 180.642 132.48 cm +1 0 0 1 178.769 535.936 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 183.631 132.281 Td [(index)]TJ +/F59 9.9626 Tf 181.758 535.737 Td [(mat)]TJ 0 g 0 G -/F54 9.9626 Tf 30.436 0 Td [(A)-259(list)-259(of)-258(the)-259(overlap)-259(elements)-259(for)-259(the)-258(curr)18(ent)-259(pr)18(ocess,)-261(or)18(ganized)]TJ -38.456 -11.955 Td [(in)-250(gr)18(oups)-250(like)-250(the)-250(pr)18(evious)-250(vector:)]TJ -0 g 0 G - 141.968 -29.888 Td [(10)]TJ +/F62 9.9626 Tf 22.137 0 Td [(Coor)18(dinate)-250(storage;)]TJ 0 g 0 G +/F59 9.9626 Tf -104 -19.907 Td [(psb)]TJ ET - -endstream -endobj -905 0 obj -<< -/Length 5988 ->> -stream -0 g 0 G -0 g 0 G -0 g 0 G +q +1 0 0 1 117.091 516.03 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q BT -/F54 9.9626 Tf 134.267 706.129 Td [(1.)]TJ -0 g 0 G - [-500(Pr)18(ocess)-250(identi\002er;)]TJ -0 g 0 G - 0 -16.693 Td [(2.)]TJ -0 g 0 G - [-500(Number)-250(of)-250(points)-250(to)-250(be)-250(r)18(eceived;)]TJ -0 g 0 G - 0 -16.694 Td [(3.)]TJ -0 g 0 G - [-500(Indices)-250(of)-250(points)-250(to)-250(be)-250(r)18(eceived;)]TJ -0 g 0 G - 0 -16.693 Td [(4.)]TJ -0 g 0 G - [-500(Number)-250(of)-250(points)-250(to)-250(be)-250(sent;)]TJ -0 g 0 G - 0 -16.693 Td [(5.)]TJ -0 g 0 G - [-500(Indices)-250(of)-250(points)-250(to)-250(be)-250(sent;)]TJ -9.465 -21.431 Td [(Speci\002ed)-250(as:)-310(a)-250(vector)-250(of)-250(integer)-250(type,)-250(see)]TJ -0 0 1 rg 0 0 1 RG - [-250(3.3)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -21.431 Td [(ovr)]TJ +/F59 9.9626 Tf 120.08 515.83 Td [(T)]TJ ET q -1 0 0 1 115.447 596.693 cm +1 0 0 1 127.322 516.03 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 118.436 596.494 Td [(mst)]TJ +/F59 9.9626 Tf 130.311 515.83 Td [(csr)]TJ ET q -1 0 0 1 135.631 596.693 cm +1 0 0 1 143.631 516.03 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 138.62 596.494 Td [(idx)]TJ -0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(A)-331(list)-332(to)-331(r)18(etrieve)-331(the)-332(value)-331(of)-331(each)-332(overlap)-331(element)-331(fr)18(om)-332(the)-331(r)18(e-)]TJ -33.185 -11.956 Td [(spective)-250(master)-250(pr)18(ocess.)]TJ 0 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(vector)-250(of)-250(integer)-250(type,)-250(see)]TJ -0 0 1 rg 0 0 1 RG - [-250(3.3)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -21.431 Td [(ovrlap)]TJ +/F59 9.9626 Tf 146.62 515.83 Td [(sparse)]TJ ET q -1 0 0 1 129.833 551.351 cm +1 0 0 1 175.989 516.03 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 132.822 551.152 Td [(elem)]TJ -0 g 0 G -/F54 9.9626 Tf 27.118 0 Td [(For)-250(all)-250(overlap)-250(points)-250(belonging)-250(to)-250(th)-250(ecurr)18(ent)-250(pr)18(ocess:)]TJ -0 g 0 G - -25.673 -21.431 Td [(1.)]TJ -0 g 0 G - [-500(Overlap)-250(point)-250(index;)]TJ -0 g 0 G - 0 -16.693 Td [(2.)]TJ -0 g 0 G - [-500(Number)-250(of)-250(pr)18(ocesses)-250(sharing)-250(that)-250(overlap)-250(points;)]TJ -0 g 0 G - 0 -16.694 Td [(3.)]TJ +/F59 9.9626 Tf 178.978 515.83 Td [(mat)]TJ 0 g 0 G - [-500(Index)-250(of)-250(a)-250(\223master)-74(\224)-250(pr)18(ocess:)]TJ -9.465 -21.431 Td [(Speci\002ed)-250(as:)-310(an)-250(allocatable)-250(integer)-250(array)-250(of)-250(rank)-250(two.)]TJ +/F62 9.9626 Tf 22.137 0 Td [(Compr)18(essed)-250(storage)-250(by)-250(r)18(ows;)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.431 Td [(bnd)]TJ +/F59 9.9626 Tf -101.22 -19.906 Td [(psb)]TJ ET q -1 0 0 1 118.755 453.671 cm +1 0 0 1 117.091 496.123 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 121.743 453.472 Td [(elem)]TJ -0 g 0 G -/F54 9.9626 Tf 27.119 0 Td [(A)-235(list)-235(of)-235(all)-235(boundary)-235(points,)-238(i.e.)-305(points)-235(that)-235(have)-235(a)-235(connection)-235(with)]TJ -24.06 -11.955 Td [(other)-250(pr)18(ocesses.)]TJ -24.907 -21.055 Td [(The)-393(Fortran)-394(2003)-393(declaration)-394(for)]TJ/F59 9.9626 Tf 151.232 0 Td [(psb_desc_type)]TJ/F54 9.9626 Tf 71.913 0 Td [(str)8(uctur)18(es)-393(is)-394(as)-393(follows:)-597(A)]TJ -0 g 0 G -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F59 9.9626 Tf 120.08 495.924 Td [(T)]TJ ET q -1 0 0 1 99.895 294.955 cm -0 0 343.711 104.608 re f +1 0 0 1 127.322 496.123 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 102.884 388.902 Td [(type)]TJ -0 g 0 G - [-525(psb_desc_type)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 18.829 -10.959 Td [(class)]TJ -0 g 0 G - [(\050psb_indx_map\051,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(allocatable)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ -0 g 0 G - [-525(indxmap)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.959 Td [(type)]TJ -0 g 0 G - [(\050psb_i_vect_type\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ -0 g 0 G - [-525(v_halo_index)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.958 Td [(type)]TJ +/F59 9.9626 Tf 130.311 495.924 Td [(csc)]TJ +ET +q +1 0 0 1 144.179 496.123 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 147.168 495.924 Td [(sparse)]TJ +ET +q +1 0 0 1 176.537 496.123 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 179.526 495.924 Td [(mat)]TJ 0 g 0 G - [(\050psb_i_vect_type\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ +/F62 9.9626 Tf 22.137 0 Td [(Compr)18(essed)-250(storage)-250(by)-250(columns;)]TJ -101.768 -19.889 Td [(The)-295(inner)-295(sparse)-294(matrix)-295(has)-295(an)-295(associated)-294(state,)-306(which)-295(can)-295(take)-294(the)-295(following)]TJ 0 -11.955 Td [(values:)]TJ 0 g 0 G - [-525(v_ext_index)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.959 Td [(type)]TJ +/F59 9.9626 Tf 0 -19.888 Td [(Build:)]TJ 0 g 0 G - [(\050psb_i_vect_type\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ +/F62 9.9626 Tf 32.927 0 Td [(State)-283(enter)18(ed)-283(after)-283(the)-283(\002rst)-284(allocation,)-291(and)-283(befor)18(e)-283(the)-283(\002rst)-283(assembly;)-300(in)]TJ -8.02 -11.955 Td [(this)-250(state)-250(it)-250(is)-250(possible)-250(to)-250(add)-250(nonzer)18(o)-250(entries.)]TJ 0 g 0 G - [-525(v_ovrlap_index)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.959 Td [(type)]TJ +/F59 9.9626 Tf -24.907 -19.907 Td [(Assembled:)]TJ 0 g 0 G - [(\050psb_i_vect_type\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ +/F62 9.9626 Tf 58.381 0 Td [(State)-324(enter)18(ed)-325(after)-324(the)-325(assembly;)-362(computations)-324(using)-325(the)-324(sparse)]TJ -33.474 -11.955 Td [(matrix,)-250(such)-250(as)-250(matrix-vector)-250(pr)18(oducts,)-250(ar)18(e)-250(only)-250(possible)-250(in)-250(this)-250(state;)]TJ 0 g 0 G - [-525(v_ovr_mst_idx)]TJ -0.56 0.13 0.00 rg 0.56 0.13 0.00 RG - 0 -10.959 Td [(integer)]TJ +/F59 9.9626 Tf -24.907 -19.907 Td [(Update:)]TJ 0 g 0 G - [(,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(allocatable)]TJ +/F62 9.9626 Tf 40.678 0 Td [(State)-219(enter)18(ed)-220(after)-219(a)-219(r)18(einitalization;)-230(this)-219(is)-219(used)-220(to)-219(handle)-219(applications)]TJ -15.771 -11.955 Td [(in)-288(which)-288(the)-288(same)-288(sparsity)-289(pattern)-288(is)-288(used)-288(multiple)-288(times)-288(with)-288(dif)18(fer)18(ent)]TJ 0 -11.955 Td [(coef)18(\002cients.)-298(In)-213(this)-214(state)-213(it)-214(is)-213(only)-214(possible)-213(to)-214(enter)-213(coef)18(\002cients)-214(for)-213(alr)18(eady)]TJ 0 -11.956 Td [(existing)-250(nonzer)18(o)-250(entries.)]TJ -24.907 -19.888 Td [(The)-293(only)-292(storage)-293(variant)-292(supporting)-293(the)-293(build)-292(state)-293(is)-292(COO;)-293(all)-293(other)-292(variants)]TJ 0 -11.955 Td [(ar)18(e)-250(obtained)-250(by)-250(conversion)-250(to/fr)18(om)-250(it.)]TJ/F59 9.9626 Tf 0 -27.132 Td [(3.2.1)-1000(Sparse)-250(Matrix)-250(Methods)]TJ 0 -18.964 Td [(3.2.2)-1000(get)]TJ +ET +q +1 0 0 1 144.219 266.863 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 147.208 266.663 Td [(nrows)-250(\227)-250(Get)-250(number)-250(of)-250(rows)-250(in)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-1050(::)]TJ 0 g 0 G - [-525(ovrlap_elem\050:,:\051)]TJ -0.56 0.13 0.00 rg 0.56 0.13 0.00 RG - 0 -10.959 Td [(integer)]TJ +/F67 9.9626 Tf -47.313 -18.963 Td [(nr)-525(=)-525(a%get_nrows\050\051)]TJ 0 g 0 G - [(,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(allocatable)]TJ +/F59 9.9626 Tf 0 -21.872 Td [(T)90(ype:)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-1050(::)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - [-525(bnd_elem\050:\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -18.829 -10.959 Td [(end)-525(type)]TJ +/F59 9.9626 Tf -29.828 -19.907 Td [(On)-250(Entry)]TJ 0 g 0 G - [-525(psb_desc_type)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G + 0 -19.907 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf -2.989 -41.43 Td [(Listing)-259(1:)-327(The)-259(PSBLAS)-259(de\002ned)-259(data)-258(type)-259(that)-259(contains)-259(the)-258(communication)-259(de-)]TJ 0 -11.955 Td [(scriptor)74(.)]TJ 0 -25.259 Td [(communication)-319(descriptor)-320(associated)-319(with)-319(a)-320(sparse)-319(matrix)-320(has)-319(a)-319(state,)-337(which)]TJ 0 -11.955 Td [(can)-250(take)-250(the)-250(following)-250(values:)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.054 Td [(Build:)]TJ + -56.339 -33.827 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 32.927 0 Td [(State)-283(enter)18(ed)-283(after)-283(the)-283(\002rst)-284(allocation,)-291(and)-283(befor)18(e)-283(the)-283(\002rst)-283(assembly;)-300(in)]TJ -8.02 -11.956 Td [(this)-220(state)-220(it)-220(is)-220(possible)-220(to)-220(add)-220(communication)-220(r)18(equir)18(ements)-220(among)-220(dif)18(fer)18(-)]TJ 0 -11.955 Td [(ent)-250(pr)18(ocesses.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.431 Td [(Assembled:)]TJ + 0 -19.906 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 58.381 0 Td [(State)-308(enter)18(ed)-308(after)-308(the)-309(assembly;)-337(computations)-308(using)-308(the)-308(associ-)]TJ -33.474 -11.955 Td [(ated)-310(sparse)-310(matrix,)-325(such)-310(as)-310(matrix-vector)-309(pr)18(oducts,)-325(ar)18(e)-310(only)-310(possible)-310(in)]TJ 0 -11.955 Td [(this)-250(state.)]TJ +/F62 9.9626 Tf 72.777 0 Td [(The)-250(number)-250(of)-250(r)18(ows)-250(of)-250(sparse)-250(matrix)]TJ/F67 9.9626 Tf 165.298 0 Td [(a)]TJ/F62 9.9626 Tf 5.231 0 Td [(.)]TJ 0 g 0 G - 141.968 -29.888 Td [(11)]TJ + -76.431 -29.888 Td [(17)]TJ 0 g 0 G ET endstream endobj -921 0 obj +1055 0 obj << -/Length 4957 +/Length 3860 >> stream 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(3.1.1)-1000(Descriptor)-250(Methods)]TJ 0 -19 Td [(3.1.2)-1000(get)]TJ -ET -q -1 0 0 1 195.029 687.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 198.017 687.129 Td [(local)]TJ +/F59 9.9626 Tf 150.705 706.129 Td [(3.2.3)-1000(get)]TJ ET q -1 0 0 1 220.194 687.328 cm +1 0 0 1 195.029 706.328 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 223.183 687.129 Td [(rows)-250(\227)-250(Get)-250(number)-250(of)-250(local)-250(rows)]TJ -0 g 0 G +/F59 9.9626 Tf 198.017 706.129 Td [(ncols)-250(\227)-250(Get)-250(number)-250(of)-250(columns)-250(in)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G -/F59 9.9626 Tf -72.478 -19 Td [(nr)-525(=)-525(desc%get_local_rows\050\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.974 Td [(T)90(ype:)]TJ +/F67 9.9626 Tf -47.312 -19.023 Td [(nc)-525(=)-525(a%get_ncols\050\051)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F59 9.9626 Tf 0 -22.01 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.001 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G +/F59 9.9626 Tf -29.828 -20.049 Td [(On)-250(Entry)]TJ 0 g 0 G - 0 -20 Td [(desc)]TJ 0 g 0 G -/F54 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.011 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ + 0 -20.048 Td [(a)]TJ 0 g 0 G -/F51 9.9626 Tf -77.917 -33.929 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ 0 g 0 G + -56.338 -33.965 Td [(On)-250(Return)]TJ 0 g 0 G - 0 -20 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-399(number)-398(of)-399(local)-398(r)18(ows,)-436(i.e.)-756(the)-398(number)-399(of)-399(r)18(ows)-398(owned)]TJ -47.87 -11.956 Td [(by)-350(the)-349(curr)18(ent)-350(pr)18(ocess;)-399(as)-350(explained)-350(in)]TJ -0 0 1 rg 0 0 1 RG - [-349(1)]TJ + 0 -20.048 Td [(Function)-250(value)]TJ 0 g 0 G - [(,)-375(it)-350(is)-349(equal)-350(to)]TJ/F83 10.3811 Tf 249.705 0 Td [(j)-24(I)]TJ/F52 7.5716 Tf 8.943 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.876 1.96 Td [(j)]TJ/F85 10.3811 Tf 5.433 0 Td [(+)]TJ/F83 10.3811 Tf 10.624 0 Td [(j)-23(B)]TJ/F52 7.5716 Tf 10.108 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.876 1.96 Td [(j)]TJ/F54 9.9626 Tf 3.003 0 Td [(.)-609(The)]TJ -293.569 -11.955 Td [(r)18(eturned)-250(value)-250(is)-250(speci\002c)-250(to)-250(the)-250(calling)-250(pr)18(ocess.)]TJ/F51 9.9626 Tf -24.906 -27.247 Td [(3.1.3)-1000(get)]TJ -ET -q -1 0 0 1 195.029 489.311 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 198.017 489.112 Td [(local)]TJ +/F62 9.9626 Tf 72.777 0 Td [(The)-250(number)-250(of)-250(columns)-250(of)-250(sparse)-250(matrix)]TJ/F67 9.9626 Tf 181.158 0 Td [(a)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ/F59 9.9626 Tf -259.165 -27.315 Td [(3.2.4)-1000(get)]TJ ET q -1 0 0 1 220.194 489.311 cm +1 0 0 1 195.029 531.915 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 223.183 489.112 Td [(cols)-250(\227)-250(Get)-250(number)-250(of)-250(local)-250(cols)]TJ +/F59 9.9626 Tf 198.017 531.716 Td [(nnzeros)-250(\227)-250(Get)-250(number)-250(of)-250(nonzero)-250(elements)-250(in)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -72.478 -19 Td [(nc)-525(=)-525(desc%get_local_cols\050\051)]TJ +/F67 9.9626 Tf -47.312 -19.024 Td [(nz)-525(=)-525(a%get_nnzeros\050\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.974 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 0 -22.01 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -20.048 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -20.001 Td [(desc)]TJ + 0 -20.048 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.011 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ 0 g 0 G -/F51 9.9626 Tf -77.917 -33.929 Td [(On)-250(Return)]TJ + -56.338 -33.965 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G - 0 -20 Td [(Function)-250(value)]TJ + 0 -20.048 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-320(number)-320(of)-321(local)-320(cols,)-338(i.e.)-521(the)-320(number)-320(of)-321(indices)-320(used)-320(by)]TJ -47.87 -11.955 Td [(the)-322(curr)18(ent)-322(pr)18(ocess,)-340(including)-322(both)-322(local)-322(and)-322(halo)-322(indices;)-358(as)-322(explained)]TJ 0 -11.956 Td [(in)]TJ -0 0 1 rg 0 0 1 RG - [-284(1)]TJ +/F62 9.9626 Tf 72.777 0 Td [(The)-250(number)-250(of)-250(nonzer)18(o)-250(elements)-250(stor)18(ed)-250(in)-250(sparse)-250(matrix)]TJ/F67 9.9626 Tf 251.284 0 Td [(a)]TJ/F62 9.9626 Tf 5.231 0 Td [(.)]TJ/F59 9.9626 Tf -329.292 -22.041 Td [(Notes)]TJ 0 g 0 G - [(,)-294(i)1(t)-285(is)-284(equal)-285(to)]TJ/F83 10.3811 Tf 79.58 0 Td [(j)-24(I)]TJ/F52 7.5716 Tf 8.943 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F85 10.3811 Tf 5.193 0 Td [(+)]TJ/F83 10.3811 Tf 10.383 0 Td [(j)-24(B)]TJ/F52 7.5716 Tf 10.109 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F85 10.3811 Tf 5.192 0 Td [(+)]TJ/F83 10.3811 Tf 10.383 0 Td [(j)-24(H)]TJ/F52 7.5716 Tf 12.052 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F54 9.9626 Tf 3.004 0 Td [(.)-413(The)-285(r)18(eturned)-284(value)-285(is)-284(speci\002c)-285(to)-284(the)]TJ -153.464 -11.955 Td [(calling)-250(pr)18(ocess.)]TJ/F51 9.9626 Tf -24.907 -27.247 Td [(3.1.4)-1000(get)]TJ -ET -q -1 0 0 1 195.029 279.339 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 198.017 279.14 Td [(global)]TJ +/F62 9.9626 Tf 12.453 -20.017 Td [(1.)]TJ +0 g 0 G + [-500(The)-395(function)-395(value)-395(is)-395(speci\002c)-395(to)-395(the)-395(storage)-395(format)-395(of)-396(matri)1(x)]TJ/F67 9.9626 Tf 295.646 0 Td [(a)]TJ/F62 9.9626 Tf 5.23 0 Td [(;)-468(some)]TJ -288.422 -11.956 Td [(storage)-343(formats)-342(employ)-343(padding,)-366(thus)-343(the)-342(r)18(eturned)-343(value)-343(for)-342(the)-343(same)]TJ 0 -11.955 Td [(matrix)-250(may)-250(be)-250(dif)18(fer)18(ent)-250(for)-250(dif)18(fer)18(ent)-250(storage)-250(choices.)]TJ/F59 9.9626 Tf -24.907 -27.315 Td [(3.2.5)-1000(get)]TJ ET q -1 0 0 1 227.397 279.339 cm +1 0 0 1 195.029 291.533 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 230.386 279.14 Td [(rows)-250(\227)-250(Get)-250(number)-250(of)-250(global)-250(rows)]TJ +/F59 9.9626 Tf 198.017 291.334 Td [(size)-398(\227)-397(Get)-398(maximum)-397(number)-398(of)-398(nonzero)-397(elements)-398(in)-398(a)-397(sparse)]TJ -17.424 -11.955 Td [(matrix)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -79.681 -19 Td [(nr)-525(=)-525(desc%get_global_rows\050\051)]TJ +/F67 9.9626 Tf -29.888 -19.024 Td [(maxnz)-525(=)-525(a%get_size\050\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.974 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 0 -22.01 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -20.048 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -20.001 Td [(desc)]TJ + 0 -20.048 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.011 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ 0 g 0 G -/F51 9.9626 Tf -77.917 -33.929 Td [(On)-250(Return)]TJ + -56.338 -33.965 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G - 0 -20 Td [(Function)-250(value)]TJ + 0 -20.048 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-351(number)-351(of)-350(global)-351(r)18(ows,)-376(i.e.)-613(the)-351(size)-351(of)-350(the)-351(global)-351(index)]TJ -47.87 -11.955 Td [(space.)]TJ +/F62 9.9626 Tf 72.777 0 Td [(The)-220(maximum)-220(number)-219(of)-220(nonzer)18(o)-220(elements)-220(that)-220(can)-219(be)-220(stor)18(ed)]TJ -47.87 -11.955 Td [(in)-250(sparse)-250(matrix)]TJ/F67 9.9626 Tf 73.294 0 Td [(a)]TJ/F62 9.9626 Tf 7.721 0 Td [(using)-250(its)-250(curr)18(ent)-250(memory)-250(allocation.)]TJ 0 g 0 G - 141.967 -29.888 Td [(12)]TJ + 60.952 -29.888 Td [(18)]TJ 0 g 0 G ET endstream endobj -925 0 obj +1060 0 obj << -/Length 4367 +/Length 4447 >> stream 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(3.1.5)-1000(get)]TJ -ET -q -1 0 0 1 144.219 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 147.208 706.129 Td [(global)]TJ +/F59 9.9626 Tf 99.895 706.129 Td [(3.2.6)-1000(sizeof)-250(\227)-250(Get)-250(memory)-250(occupation)-250(in)-250(bytes)-250(of)-250(a)-250(sparse)-250(matrix)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf 0 -20.135 Td [(memory_size)-525(=)-525(a%sizeof\050\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -23.732 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -22.343 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -22.343 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ +0 g 0 G + -56.339 -35.687 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -22.343 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(The)-250(memory)-250(occupation)-250(in)-250(bytes.)]TJ/F59 9.9626 Tf -72.777 -30.58 Td [(3.2.7)-1000(get)]TJ ET q -1 0 0 1 176.587 706.328 cm +1 0 0 1 144.219 517.21 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 179.576 706.129 Td [(cols)-250(\227)-250(Get)-250(number)-250(of)-250(global)-250(cols)]TJ +/F59 9.9626 Tf 147.208 517.011 Td [(fmt)-250(\227)-250(Short)-250(description)-250(of)-250(the)-250(dynamic)-250(type)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -47.313 -20.135 Td [(write)]TJ +0 g 0 G + [(\050)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(*)]TJ +0 g 0 G + [(,)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(*)]TJ 0 g 0 G + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf -79.681 -18.974 Td [(nr)-525(=)-525(desc%get_global_cols\050\051)]TJ + [-525(a%get_fmt\050\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.935 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 0 -24.336 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.947 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -22.343 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.947 Td [(desc)]TJ + 0 -22.343 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ 0 g 0 G -/F51 9.9626 Tf -77.918 -33.889 Td [(On)-250(Return)]TJ + -56.339 -35.686 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G - 0 -19.947 Td [(Function)-250(value)]TJ + 0 -22.343 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(A)-244(short)-245(string)-244(describing)-245(the)-244(dynamic)-245(type)-244(of)-245(the)-244(matrix.)-308(Pr)18(e-)]TJ -47.87 -11.955 Td [(de\002ned)-250(values)-250(include)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 102.415 0 Td [(NULL)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-242(number)-242(of)-241(global)-242(cols;)-245(usually)-241(this)-242(is)-242(equal)-242(to)-241(the)-242(number)]TJ -47.87 -11.955 Td [(of)-250(global)-250(r)18(ows.)]TJ/F51 9.9626 Tf -24.907 -27.172 Td [(3.1.6)-1000(get)]TJ +/F62 9.9626 Tf 20.921 0 Td [(,)]TJ/F67 9.9626 Tf 4.981 0 Td [(COO)]TJ/F62 9.9626 Tf 15.691 0 Td [(,)]TJ/F67 9.9626 Tf 4.982 0 Td [(CSR)]TJ/F62 9.9626 Tf 18.181 0 Td [(and)]TJ/F67 9.9626 Tf 19.358 0 Td [(CSC)]TJ/F62 9.9626 Tf 15.691 0 Td [(.)]TJ/F59 9.9626 Tf -227.127 -30.581 Td [(3.2.8)-1000(is)]TJ ET q -1 0 0 1 144.219 520.607 cm +1 0 0 1 138.122 315.533 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 147.208 520.408 Td [(global)]TJ +/F59 9.9626 Tf 141.111 315.333 Td [(bld,)-250(is)]TJ ET q -1 0 0 1 176.587 520.607 cm +1 0 0 1 169.922 315.533 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 179.576 520.408 Td [(indices)-250(\227)-250(Get)-250(vector)-250(of)-250(global)-250(indices)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -79.681 -18.974 Td [(myidx)-525(=)-525(desc%get_global_indices\050[owned]\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.934 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.947 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf 172.911 315.333 Td [(upd,)-250(is)]TJ +ET +q +1 0 0 1 204.493 315.533 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 207.482 315.333 Td [(asb)-250(\227)-250(Status)-250(check)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -107.587 -20.135 Td [(if)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.947 Td [(desc)]TJ + [-525(\050a%is_bld\050\051\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ 0 g 0 G -/F51 9.9626 Tf -91.287 -31.902 Td [(owned)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(if)]TJ 0 g 0 G -/F54 9.9626 Tf 35.975 0 Td [(Choose)-330(if)-329(you)-330(only)-329(want)-330(owned)-330(indices)-329(\050)]TJ/F59 9.9626 Tf 185.766 0 Td [(owned)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(.true.)]TJ/F54 9.9626 Tf 62.764 0 Td [(\051)-330(or)-329(also)-330(halo)]TJ -259.598 -11.955 Td [(indices)-250(\050)]TJ/F59 9.9626 Tf 36.911 0 Td [(owned)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [-525(\050a%is_upd\050\051\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(.false.)]TJ/F54 9.9626 Tf 67.995 0 Td [(\051.)-310(Scope:)]TJ/F51 9.9626 Tf 40.328 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -166.813 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(;)-250(default:)]TJ/F59 9.9626 Tf 41.872 0 Td [(.true.)]TJ/F54 9.9626 Tf 31.382 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ 0 g 0 G -/F51 9.9626 Tf -163.436 -33.89 Td [(On)-250(Return)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(if)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.947 Td [(Function)-250(value)]TJ + [-525(\050a%is_asb\050\051\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-277(global)-277(indices,)-284(r)18(eturned)-277(as)-277(an)-277(allocatable)-277(integer)-277(array)-277(of)]TJ -47.87 -11.955 Td [(kind)]TJ/F59 9.9626 Tf 22.814 0 Td [(psb_lpk_)]TJ/F54 9.9626 Tf 44.334 0 Td [(and)-250(rank)-250(1.)]TJ/F51 9.9626 Tf -92.055 -27.171 Td [(3.1.7)-1000(get)]TJ -ET -q -1 0 0 1 144.219 267.119 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 147.208 266.92 Td [(context)-250(\227)-250(Get)-250(communication)-250(context)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -47.313 -18.975 Td [(ctxt)-525(=)-525(desc%get_context\050\051)]TJ +/F59 9.9626 Tf 0 -24.336 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.934 Td [(T)90(ype:)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F59 9.9626 Tf -29.828 -22.343 Td [(On)-250(Entry)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.947 Td [(On)-250(Entry)]TJ 0 g 0 G + 0 -22.343 Td [(a)]TJ 0 g 0 G - 0 -19.947 Td [(desc)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ 0 g 0 G -/F54 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ + -56.339 -35.686 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -77.918 -33.889 Td [(On)-250(Return)]TJ 0 g 0 G + 0 -22.343 Td [(Function)-250(value)]TJ 0 g 0 G - 0 -19.947 Td [(Function)-250(value)]TJ +/F62 9.9626 Tf 72.777 0 Td [(A)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F67 9.9626 Tf 9.966 0 Td [(logical)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-250(communication)-250(context.)]TJ +/F62 9.9626 Tf 38.827 0 Td [(value)-222(indicating)-223(whether)-222(the)-222(matrix)-223(is)-222(in)-222(the)-223(Build,)]TJ -96.663 -11.955 Td [(Update)-250(or)-250(Assembled)-250(state,)-250(r)18(espectively)111(.)]TJ 0 g 0 G - 94.098 -29.888 Td [(13)]TJ + 141.968 -29.888 Td [(19)]TJ 0 g 0 G ET endstream endobj -829 0 obj +972 0 obj << /Type /ObjStm /N 100 -/First 875 -/Length 9086 ->> -stream -828 0 818 69 819 217 824 364 825 421 19 478 821 534 835 681 832 823 833 970 -837 1117 834 1173 840 1266 842 1380 23 1437 843 1493 844 1550 845 1607 846 1664 847 1721 -848 1778 849 1835 850 1892 839 1949 853 2068 838 2202 855 2351 856 2407 857 2463 858 2519 -859 2575 860 2631 861 2687 862 2743 863 2799 864 2855 865 2911 866 2967 867 3023 868 3079 -869 3135 852 3192 874 3285 851 3427 872 3579 876 3726 27 3783 877 3839 878 3896 879 3951 -880 4007 881 4064 882 4121 31 4178 873 4234 885 4353 883 4487 887 4634 35 4690 39 4745 -888 4800 884 4856 893 4949 889 5099 890 5245 891 5397 895 5549 896 5606 897 5663 898 5720 -899 5777 900 5834 892 5891 904 5971 901 6113 902 6265 906 6417 907 6473 908 6529 909 6585 -910 6641 911 6697 912 6753 913 6809 914 6865 916 6921 903 6977 920 7083 917 7225 918 7372 -922 7518 43 7575 47 7631 51 7687 55 7743 919 7799 924 7931 926 8045 59 8101 63 8156 -% 828 0 obj +/First 913 +/Length 9266 +>> +stream +969 0 952 57 975 150 951 292 973 444 977 591 27 648 978 704 979 761 980 816 +981 872 982 929 983 986 31 1043 974 1099 986 1218 984 1352 988 1499 35 1555 39 1610 +989 1665 985 1721 994 1814 990 1964 991 2110 992 2262 996 2414 997 2471 998 2528 999 2585 +1000 2642 1001 2700 993 2758 1005 2838 1002 2985 1003 3138 1007 3291 1008 3349 1009 3407 1010 3465 +1011 3523 1012 3581 1013 3639 1014 3697 1015 3755 1017 3813 1004 3871 1021 3980 1018 4127 1019 4275 +1023 4422 43 4481 47 4538 51 4595 55 4652 1020 4709 1025 4842 1027 4960 59 5018 63 5074 +67 5130 1024 5186 1030 5280 1032 5398 71 5457 75 5514 1033 5571 79 5630 83 5685 1029 5742 +1035 5836 1037 5954 87 6012 91 6068 95 6124 1034 6180 1042 6274 1038 6431 1039 6589 1040 6741 +1044 6889 99 6948 103 7005 1045 7063 1041 7122 1049 7216 1051 7334 1047 7392 1052 7450 107 7509 +111 7566 1048 7623 1054 7732 1056 7850 115 7909 119 7967 1057 8025 123 8084 1053 8141 1059 8235 +% 969 0 obj +<< +/D [953 0 R /XYZ 114.242 139.255 null] +>> +% 952 0 obj << -/BaseFont /Times-Roman -/Type /Font -/Subtype /Type1 +/Font << /F62 667 0 R /F67 913 0 R /F97 970 0 R >> +/ProcSet [ /PDF /Text ] >> -% 818 0 obj +% 975 0 obj +<< +/Type /Page +/Contents 976 0 R +/Resources 974 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 971 0 R +/Annots [ 951 0 R 973 0 R ] +>> +% 951 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[0 1 0] -/Rect [327.281 638.309 334.255 647.315] -/A << /S /GoTo /D (cite.2007c) >> +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [269.647 674.054 284.093 683.464] +/A << /S /GoTo /D (subsection.2.3) >> +>> +% 973 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [168.073 254.848 175.046 266.907] +/A << /S /GoTo /D (section.3) >> +>> +% 977 0 obj +<< +/D [975 0 R /XYZ 149.705 753.953 null] +>> +% 27 0 obj +<< +/D [975 0 R /XYZ 150.705 716.092 null] +>> +% 978 0 obj +<< +/D [975 0 R /XYZ 150.705 671.065 null] +>> +% 979 0 obj +<< +/D [975 0 R /XYZ 150.705 648.1 null] +>> +% 980 0 obj +<< +/D [975 0 R /XYZ 150.705 573.59 null] +>> +% 981 0 obj +<< +/D [975 0 R /XYZ 150.705 516.424 null] +>> +% 982 0 obj +<< +/D [975 0 R /XYZ 150.705 483.864 null] +>> +% 983 0 obj +<< +/D [975 0 R /XYZ 150.705 463.343 null] +>> +% 31 0 obj +<< +/D [975 0 R /XYZ 150.705 408.307 null] +>> +% 974 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F93 915 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 986 0 obj +<< +/Type /Page +/Contents 987 0 R +/Resources 985 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 971 0 R +/Annots [ 984 0 R ] +>> +% 984 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [268.105 173.115 275.079 185.175] +/A << /S /GoTo /D (section.6) >> +>> +% 988 0 obj +<< +/D [986 0 R /XYZ 98.895 753.953 null] +>> +% 35 0 obj +<< +/D [986 0 R /XYZ 99.895 716.092 null] +>> +% 39 0 obj +<< +/D [986 0 R /XYZ 99.895 279.545 null] +>> +% 989 0 obj +<< +/D [986 0 R /XYZ 342.47 236.897 null] +>> +% 985 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 994 0 obj +<< +/Type /Page +/Contents 995 0 R +/Resources 993 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 971 0 R +/Annots [ 990 0 R 991 0 R 992 0 R ] +>> +% 990 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [482.63 399.338 489.604 411.398] +/A << /S /GoTo /D (section.6) >> +>> +% 991 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [354.987 193.202 369.432 205.262] +/A << /S /GoTo /D (subsection.3.3) >> +>> +% 992 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [354.987 148.883 369.432 160.943] +/A << /S /GoTo /D (subsection.3.3) >> +>> +% 996 0 obj +<< +/D [994 0 R /XYZ 149.705 753.953 null] >> -% 819 0 obj +% 997 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[0 1 0] -/Rect [337.243 638.21 344.217 647.166] -/A << /S /GoTo /D (cite.2007d) >> +/D [994 0 R /XYZ 150.705 293.402 null] >> -% 824 0 obj +% 998 0 obj << -/D [822 0 R /XYZ 149.705 753.953 null] +/D [994 0 R /XYZ 150.705 278.496 null] >> -% 825 0 obj +% 999 0 obj << -/D [822 0 R /XYZ 150.705 353.614 null] +/D [994 0 R /XYZ 150.705 261.039 null] >> -% 19 0 obj +% 1000 0 obj << -/D [822 0 R /XYZ 150.705 270.035 null] +/D [994 0 R /XYZ 150.705 244.857 null] >> -% 821 0 obj +% 1001 0 obj +<< +/D [994 0 R /XYZ 150.705 228.675 null] +>> +% 993 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F83 813 0 R /F52 585 0 R /F85 814 0 R >> -/XObject << /Im3 820 0 R >> +/Font << /F62 667 0 R /F59 665 0 R >> /ProcSet [ /PDF /Text ] >> -% 835 0 obj +% 1005 0 obj << /Type /Page -/Contents 836 0 R -/Resources 834 0 R +/Contents 1006 0 R +/Resources 1004 0 R /MediaBox [0 0 595.276 841.89] -/Parent 780 0 R -/Annots [ 832 0 R 833 0 R ] +/Parent 971 0 R +/Annots [ 1002 0 R 1003 0 R ] >> -% 832 0 obj +% 1002 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [404.739 354.196 411.713 366.255] -/A << /S /GoTo /D (section.3) >> +/Rect [304.177 614.119 318.623 626.179] +/A << /S /GoTo /D (subsection.3.3) >> >> -% 833 0 obj +% 1003 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [172.593 318.033 179.567 330.093] -/A << /S /GoTo /D (section.6) >> ->> -% 837 0 obj -<< -/D [835 0 R /XYZ 98.895 753.953 null] ->> -% 834 0 obj -<< -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 840 0 obj -<< -/Type /Page -/Contents 841 0 R -/Resources 839 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 780 0 R +/Rect [304.177 568.778 318.623 580.837] +/A << /S /GoTo /D (subsection.3.3) >> >> -% 842 0 obj +% 1007 0 obj << -/D [840 0 R /XYZ 149.705 753.953 null] +/D [1005 0 R /XYZ 98.895 753.953 null] >> -% 23 0 obj +% 1008 0 obj << -/D [840 0 R /XYZ 150.705 716.092 null] +/D [1005 0 R /XYZ 99.895 716.092 null] >> -% 843 0 obj +% 1009 0 obj << -/D [840 0 R /XYZ 150.705 282.521 null] +/D [1005 0 R /XYZ 99.895 702.226 null] >> -% 844 0 obj +% 1010 0 obj << -/D [840 0 R /XYZ 150.705 261.733 null] +/D [1005 0 R /XYZ 99.895 684.257 null] >> -% 845 0 obj +% 1011 0 obj << -/D [840 0 R /XYZ 150.705 240.946 null] +/D [1005 0 R /XYZ 99.895 667.564 null] >> -% 846 0 obj +% 1012 0 obj << -/D [840 0 R /XYZ 150.705 220.159 null] +/D [1005 0 R /XYZ 99.895 650.871 null] >> -% 847 0 obj +% 1013 0 obj << -/D [840 0 R /XYZ 150.705 188.012 null] +/D [1005 0 R /XYZ 99.895 541.236 null] >> -% 848 0 obj +% 1014 0 obj << -/D [840 0 R /XYZ 150.705 167.072 null] +/D [1005 0 R /XYZ 99.895 524.542 null] >> -% 849 0 obj +% 1015 0 obj << -/D [840 0 R /XYZ 150.705 148.646 null] +/D [1005 0 R /XYZ 99.895 507.849 null] >> -% 850 0 obj +% 1017 0 obj << -/D [840 0 R /XYZ 150.705 132.275 null] +/D [1005 0 R /XYZ 99.895 288.977 null] >> -% 839 0 obj +% 1004 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F59 812 0 R /F85 814 0 R >> +/Font << /F62 667 0 R /F59 665 0 R /F67 913 0 R /F102 1016 0 R >> /ProcSet [ /PDF /Text ] >> -% 853 0 obj +% 1021 0 obj << /Type /Page -/Contents 854 0 R -/Resources 852 0 R +/Contents 1022 0 R +/Resources 1020 0 R /MediaBox [0 0 595.276 841.89] -/Parent 871 0 R -/Annots [ 838 0 R ] +/Parent 971 0 R +/Annots [ 1018 0 R 1019 0 R ] >> -% 838 0 obj +% 1018 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [176.109 690.964 182.386 703.958] -/A << /S /GoTo /D (Hfootnote.3) >> ->> -% 855 0 obj -<< -/D [853 0 R /XYZ 98.895 753.953 null] ->> -% 856 0 obj -<< -/D [853 0 R /XYZ 99.895 716.092 null] ->> -% 857 0 obj -<< -/D [853 0 R /XYZ 99.895 686.784 null] +/Rect [352.861 524.509 359.835 537.101] +/A << /S /GoTo /D (section.1) >> >> -% 858 0 obj +% 1019 0 obj << -/D [853 0 R /XYZ 99.895 618.259 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [186.147 314.537 193.12 327.129] +/A << /S /GoTo /D (section.1) >> >> -% 859 0 obj +% 1023 0 obj << -/D [853 0 R /XYZ 99.895 595.952 null] +/D [1021 0 R /XYZ 149.705 753.953 null] >> -% 860 0 obj +% 43 0 obj << -/D [853 0 R /XYZ 99.895 573.645 null] +/D [1021 0 R /XYZ 150.705 716.092 null] >> -% 861 0 obj +% 47 0 obj << -/D [853 0 R /XYZ 99.895 539.978 null] +/D [1021 0 R /XYZ 150.705 696.532 null] >> -% 862 0 obj +% 51 0 obj << -/D [853 0 R /XYZ 99.895 517.075 null] +/D [1021 0 R /XYZ 150.705 498.276 null] >> -% 863 0 obj +% 55 0 obj << -/D [853 0 R /XYZ 99.895 494.768 null] +/D [1021 0 R /XYZ 150.705 288.305 null] >> -% 864 0 obj +% 1020 0 obj << -/D [853 0 R /XYZ 99.895 469.873 null] +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F91 914 0 R /F60 666 0 R /F93 915 0 R >> +/ProcSet [ /PDF /Text ] >> -% 865 0 obj +% 1025 0 obj << -/D [853 0 R /XYZ 99.895 442.062 null] +/Type /Page +/Contents 1026 0 R +/Resources 1024 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1028 0 R >> -% 866 0 obj +% 1027 0 obj << -/D [853 0 R /XYZ 99.895 412.296 null] +/D [1025 0 R /XYZ 98.895 753.953 null] >> -% 867 0 obj +% 59 0 obj << -/D [853 0 R /XYZ 99.895 395.165 null] +/D [1025 0 R /XYZ 99.895 716.092 null] >> -% 868 0 obj +% 63 0 obj << -/D [853 0 R /XYZ 99.895 377.438 null] +/D [1025 0 R /XYZ 99.895 529.559 null] >> -% 869 0 obj +% 67 0 obj << -/D [853 0 R /XYZ 114.242 139.255 null] +/D [1025 0 R /XYZ 99.895 276.666 null] >> -% 852 0 obj +% 1024 0 obj << -/Font << /F54 586 0 R /F59 812 0 R /F89 870 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 874 0 obj +% 1030 0 obj << /Type /Page -/Contents 875 0 R -/Resources 873 0 R +/Contents 1031 0 R +/Resources 1029 0 R /MediaBox [0 0 595.276 841.89] -/Parent 871 0 R -/Annots [ 851 0 R 872 0 R ] ->> -% 851 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [269.647 674.054 284.093 683.464] -/A << /S /GoTo /D (subsection.2.3) >> ->> -% 872 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [168.073 254.848 175.046 266.907] -/A << /S /GoTo /D (section.3) >> ->> -% 876 0 obj -<< -/D [874 0 R /XYZ 149.705 753.953 null] ->> -% 27 0 obj -<< -/D [874 0 R /XYZ 150.705 716.092 null] ->> -% 877 0 obj -<< -/D [874 0 R /XYZ 150.705 671.065 null] +/Parent 1028 0 R >> -% 878 0 obj +% 1032 0 obj << -/D [874 0 R /XYZ 150.705 648.1 null] +/D [1030 0 R /XYZ 149.705 753.953 null] >> -% 879 0 obj +% 71 0 obj << -/D [874 0 R /XYZ 150.705 573.59 null] +/D [1030 0 R /XYZ 150.705 716.092 null] >> -% 880 0 obj +% 75 0 obj << -/D [874 0 R /XYZ 150.705 516.424 null] +/D [1030 0 R /XYZ 150.705 519.544 null] >> -% 881 0 obj +% 1033 0 obj << -/D [874 0 R /XYZ 150.705 483.864 null] +/D [1030 0 R /XYZ 397.537 356.277 null] >> -% 882 0 obj +% 79 0 obj << -/D [874 0 R /XYZ 150.705 463.343 null] +/D [1030 0 R /XYZ 150.705 305.6 null] >> -% 31 0 obj +% 83 0 obj << -/D [874 0 R /XYZ 150.705 408.307 null] +/D [1030 0 R /XYZ 150.705 194.578 null] >> -% 873 0 obj +% 1029 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 885 0 obj +% 1035 0 obj << /Type /Page -/Contents 886 0 R -/Resources 884 0 R +/Contents 1036 0 R +/Resources 1034 0 R /MediaBox [0 0 595.276 841.89] -/Parent 871 0 R -/Annots [ 883 0 R ] ->> -% 883 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [268.105 173.115 275.079 185.175] -/A << /S /GoTo /D (section.6) >> +/Parent 1028 0 R >> -% 887 0 obj +% 1037 0 obj << -/D [885 0 R /XYZ 98.895 753.953 null] +/D [1035 0 R /XYZ 98.895 753.953 null] >> -% 35 0 obj +% 87 0 obj << -/D [885 0 R /XYZ 99.895 716.092 null] +/D [1035 0 R /XYZ 99.895 583.842 null] >> -% 39 0 obj +% 91 0 obj << -/D [885 0 R /XYZ 99.895 279.545 null] +/D [1035 0 R /XYZ 99.895 466.211 null] >> -% 888 0 obj +% 95 0 obj << -/D [885 0 R /XYZ 342.47 236.897 null] +/D [1035 0 R /XYZ 99.895 256.035 null] >> -% 884 0 obj +% 1034 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 893 0 obj +% 1042 0 obj << /Type /Page -/Contents 894 0 R -/Resources 892 0 R +/Contents 1043 0 R +/Resources 1041 0 R /MediaBox [0 0 595.276 841.89] -/Parent 871 0 R -/Annots [ 889 0 R 890 0 R 891 0 R ] +/Parent 1028 0 R +/Annots [ 1038 0 R 1039 0 R 1040 0 R ] >> -% 889 0 obj +% 1038 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [482.63 399.338 489.604 411.398] -/A << /S /GoTo /D (section.6) >> +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [187.544 240.393 199.499 249.399] +/A << /S /GoTo /D (cite.DesignPatterns) >> >> -% 890 0 obj +% 1039 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [354.987 193.202 369.432 205.262] -/A << /S /GoTo /D (subsection.3.3) >> +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [267.981 240.293 279.936 249.399] +/A << /S /GoTo /D (cite.Sparse03) >> >> -% 891 0 obj +% 1040 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [354.987 148.883 369.432 160.943] -/A << /S /GoTo /D (subsection.3.3) >> ->> -% 895 0 obj -<< -/D [893 0 R /XYZ 149.705 753.953 null] ->> -% 896 0 obj -<< -/D [893 0 R /XYZ 150.705 293.402 null] +/Rect [458.483 237.643 465.457 249.703] +/A << /S /GoTo /D (listing.2) >> >> -% 897 0 obj +% 1044 0 obj << -/D [893 0 R /XYZ 150.705 278.496 null] +/D [1042 0 R /XYZ 149.705 753.953 null] >> -% 898 0 obj +% 99 0 obj << -/D [893 0 R /XYZ 150.705 261.039 null] +/D [1042 0 R /XYZ 150.705 544.277 null] >> -% 899 0 obj +% 103 0 obj << -/D [893 0 R /XYZ 150.705 244.857 null] +/D [1042 0 R /XYZ 150.705 296.936 null] >> -% 900 0 obj +% 1045 0 obj << -/D [893 0 R /XYZ 150.705 228.675 null] +/D [1042 0 R /XYZ 170.328 265.359 null] >> -% 892 0 obj +% 1041 0 obj << -/Font << /F54 586 0 R /F51 584 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 904 0 obj +% 1049 0 obj << /Type /Page -/Contents 905 0 R -/Resources 903 0 R +/Contents 1050 0 R +/Resources 1048 0 R /MediaBox [0 0 595.276 841.89] -/Parent 871 0 R -/Annots [ 901 0 R 902 0 R ] ->> -% 901 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [304.177 614.119 318.623 626.179] -/A << /S /GoTo /D (subsection.3.3) >> ->> -% 902 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [304.177 568.778 318.623 580.837] -/A << /S /GoTo /D (subsection.3.3) >> +/Parent 1028 0 R >> -% 906 0 obj +% 1051 0 obj << -/D [904 0 R /XYZ 98.895 753.953 null] +/D [1049 0 R /XYZ 98.895 753.953 null] >> -% 907 0 obj +% 1047 0 obj << -/D [904 0 R /XYZ 99.895 716.092 null] +/D [1049 0 R /XYZ 99.895 665.282 null] >> -% 908 0 obj +% 1052 0 obj << -/D [904 0 R /XYZ 99.895 702.226 null] +/D [1049 0 R /XYZ 409.052 603.446 null] >> -% 909 0 obj +% 107 0 obj << -/D [904 0 R /XYZ 99.895 684.257 null] +/D [1049 0 R /XYZ 99.895 294.773 null] >> -% 910 0 obj +% 111 0 obj << -/D [904 0 R /XYZ 99.895 667.564 null] +/D [1049 0 R /XYZ 99.895 276.048 null] >> -% 911 0 obj +% 1048 0 obj << -/D [904 0 R /XYZ 99.895 650.871 null] +/Font << /F102 1016 0 R /F62 667 0 R /F67 913 0 R /F59 665 0 R >> +/ProcSet [ /PDF /Text ] >> -% 912 0 obj +% 1054 0 obj << -/D [904 0 R /XYZ 99.895 541.236 null] +/Type /Page +/Contents 1055 0 R +/Resources 1053 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1028 0 R >> -% 913 0 obj +% 1056 0 obj << -/D [904 0 R /XYZ 99.895 524.542 null] +/D [1054 0 R /XYZ 149.705 753.953 null] >> -% 914 0 obj +% 115 0 obj << -/D [904 0 R /XYZ 99.895 507.849 null] +/D [1054 0 R /XYZ 150.705 716.092 null] >> -% 916 0 obj +% 119 0 obj << -/D [904 0 R /XYZ 99.895 288.977 null] +/D [1054 0 R /XYZ 150.705 540.892 null] >> -% 903 0 obj +% 1057 0 obj << -/Font << /F54 586 0 R /F51 584 0 R /F59 812 0 R /F94 915 0 R >> -/ProcSet [ /PDF /Text ] +/D [1054 0 R /XYZ 150.705 358.382 null] >> -% 920 0 obj +% 123 0 obj << -/Type /Page -/Contents 921 0 R -/Resources 919 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 871 0 R -/Annots [ 917 0 R 918 0 R ] +/D [1054 0 R /XYZ 150.705 300.51 null] >> -% 917 0 obj +% 1053 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [352.861 524.509 359.835 537.101] -/A << /S /GoTo /D (section.1) >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] >> -% 918 0 obj +% 1059 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [186.147 314.537 193.12 327.129] -/A << /S /GoTo /D (section.1) >> +/Type /Page +/Contents 1060 0 R +/Resources 1058 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1062 0 R >> -% 922 0 obj + +endstream +endobj +1066 0 obj << -/D [920 0 R /XYZ 149.705 753.953 null] +/Length 5883 >> -% 43 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 150.705 706.129 Td [(3.2.9)-1000(is)]TJ +ET +q +1 0 0 1 188.931 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 191.92 706.129 Td [(lower)55(,)-250(is)]TJ +ET +q +1 0 0 1 230.704 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 233.693 706.129 Td [(upper)55(,)-250(is)]TJ +ET +q +1 0 0 1 273.583 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 276.572 706.129 Td [(triangle,)-250(is)]TJ +ET +q +1 0 0 1 325.309 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 328.298 706.129 Td [(unit)-250(\227)-250(Format)-250(check)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -177.593 -19.573 Td [(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(\050a%is_triangle\050\051\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(\050a%is_upper\050\051\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(\050a%is_lower\050\051\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.956 Td [(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(\050a%is_unit\050\051\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ +0 g 0 G +0 g 0 G +/F59 9.9626 Tf 0 -22.86 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -21.183 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -21.183 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ +0 g 0 G + -56.338 -34.816 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -21.183 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(A)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F67 9.9626 Tf 10.803 0 Td [(logical)]TJ +0 g 0 G +/F62 9.9626 Tf 39.665 0 Td [(value)-306(indicating)-307(whether)-306(the)-307(matrix)-306(is)-306(triangular;)]TJ -98.338 -11.955 Td [(if)]TJ/F67 9.9626 Tf 8.595 0 Td [(is_triangle\050\051)]TJ/F62 9.9626 Tf 70.373 0 Td [(r)18(eturns)]TJ/F67 9.9626 Tf 34.119 0 Td [(.true.)]TJ/F62 9.9626 Tf 33.761 0 Td [(check)-239(also)-238(if)-239(it)-239(is)-239(lower)74(,)-241(upper)-238(and)-239(with)]TJ -146.848 -11.955 Td [(a)-250(unit)-250(\050i.e.)-310(assumed\051)-250(diagonal.)]TJ/F59 9.9626 Tf -24.907 -28.929 Td [(3.2.10)-1000(cscnv)-250(\227)-250(Convert)-250(to)-250(a)-250(dif)18(ferent)-250(storage)-250(format)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 0 -19.573 Td [(call)]TJ +0 g 0 G + [-1050(a%cscnv\050b,info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525([,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(type)]TJ +0 g 0 G + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(mold,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(dupl]\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ +0 g 0 G + [-1050(a%cscnv\050info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525([,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(type)]TJ +0 g 0 G + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(mold,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(dupl]\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -22.861 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -21.183 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -21.183 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.622 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ -160.077 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.917 -33.138 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 24.906 0 Td [(a)-250(string)-250(r)18(equesting)-250(a)-250(new)-250(format.)]TJ 0.001 -11.955 Td [(T)90(ype:)-310(optional.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -21.183 Td [(mold)]TJ +0 g 0 G +/F62 9.9626 Tf 28.782 0 Td [(a)-236(variable)-236(of)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 56.403 0 Td [(class)]TJ +0 g 0 G + [(\050psb_T_base_sparse_mat\051)]TJ/F62 9.9626 Tf 148.803 0 Td [(r)18(equesting)-236(a)-236(new)-237(format)1(.)]TJ -209.081 -11.955 Td [(T)90(ype:)-310(optional.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -21.182 Td [(dupl)]TJ +0 g 0 G +/F62 9.9626 Tf 26.56 0 Td [(an)-359(integer)-358(value)-359(speci\002ng)-358(how)-359(to)-359(handle)-358(duplicates)-359(\050see)-359(Named)-358(Con-)]TJ -1.653 -11.956 Td [(stants)-250(below\051)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.861 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -21.182 Td [(b,a)]TJ +0 g 0 G +/F62 9.9626 Tf 18.54 0 Td [(A)-250(copy)-250(of)]TJ/F67 9.9626 Tf 45.37 0 Td [(a)]TJ/F62 9.9626 Tf 7.721 0 Td [(with)-250(a)-250(new)-250(storage)-250(format.)]TJ -46.724 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.622 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.456 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -184.985 -21.183 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ -23.801 -23.175 Td [(The)]TJ/F67 9.9626 Tf 19.584 0 Td [(mold)]TJ/F62 9.9626 Tf 23.827 0 Td [(ar)18(guments)-292(may)-291(be)-292(employed)-292(to)-292(interface)-291(with)-292(special)-292(devices,)-302(such)]TJ -43.411 -11.955 Td [(as)-250(GPUs)-250(and)-250(other)-250(accelerators.)]TJ +0 g 0 G + 166.874 -29.888 Td [(20)]TJ +0 g 0 G +ET + +endstream +endobj +1070 0 obj << -/D [920 0 R /XYZ 150.705 716.092 null] +/Length 4616 >> -% 47 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 99.895 706.129 Td [(3.2.11)-1000(csclip)-250(\227)-250(Reduce)-250(to)-250(a)-250(submatrix)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 20.922 -20.279 Td [(call)]TJ +0 g 0 G + [-525(a%csclip\050b,info[,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 15.691 -11.955 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(imin,imax,jmin,jmax,rscale,cscale]\051)]TJ/F62 9.9626 Tf -21.669 -24.631 Td [(Returns)-190(the)-190(submatrix)]TJ/F67 9.9626 Tf 98.878 0 Td [(A\050imin:imax,jmin:jmax\051)]TJ/F62 9.9626 Tf 115.068 0 Td [(,)-202(optionally)-190(r)18(escaling)-190(r)18(ow/-)]TJ -228.89 -11.955 Td [(col)-250(indices)-250(to)-250(the)-250(range)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG +/F67 9.9626 Tf 103.85 0 Td [(1)]TJ +0 g 0 G + [(:imax)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(-)]TJ +0 g 0 G + [(imin)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(+)]TJ +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G + [(,)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G + [(:jmax)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(-)]TJ +0 g 0 G + [(jmin)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(+)]TJ +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G +/F62 9.9626 Tf 141.219 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -245.069 -21.961 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -22.638 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -22.639 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ -160.078 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.918 -34.594 Td [(imin,imax,jmin,jmax)]TJ +0 g 0 G +/F62 9.9626 Tf 99.885 0 Td [(Minimum)-250(and)-250(maximum)-250(r)18(ow)-250(and)-250(column)-250(indices.)]TJ -74.978 -11.955 Td [(T)90(ype:)-310(optional.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.638 Td [(rscale,cscale)]TJ +0 g 0 G +/F62 9.9626 Tf 60.025 0 Td [(Whether)-250(to)-250(r)18(escale)-250(r)18(ow/column)-250(indices.)-310(T)90(ype:)-310(optional.)]TJ +0 g 0 G +/F59 9.9626 Tf -60.025 -24.632 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -22.639 Td [(b)]TJ +0 g 0 G +/F62 9.9626 Tf 11.069 0 Td [(A)-250(copy)-250(of)-250(a)-250(submatrix)-250(of)]TJ/F67 9.9626 Tf 111.321 0 Td [(a)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ -102.713 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -184.985 -22.639 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ/F59 9.9626 Tf -23.801 -31 Td [(3.2.12)-1000(clean)]TJ +ET +q +1 0 0 1 159.153 364.307 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 162.142 364.108 Td [(zeros)-250(\227)-250(Eliminate)-250(zero)-250(coef)18(\002cients)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -62.247 -20.278 Td [(call)]TJ +0 g 0 G + [-525(a%clean_zeros\050info\051)]TJ/F62 9.9626 Tf 14.944 -12.634 Td [(Eliminates)-214(zer)18(o)-214(coef)18(\002cients)-214(in)-214(the)-214(input)-214(matrix.)-298(Note)-214(that)-214(depending)-214(on)-214(the)]TJ -14.944 -11.955 Td [(internal)-246(storage)-245(format,)-247(ther)18(e)-245(may)-246(still)-245(be)-246(some)-245(amount)-246(of)-246(ze)1(r)18(o)-246(padding)-246(in)-245(the)]TJ 0 -11.955 Td [(output.)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -24.632 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -22.638 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -22.639 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ -160.078 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.918 -35.908 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -22.638 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(The)-250(matrix)]TJ/F67 9.9626 Tf 50.659 0 Td [(a)]TJ/F62 9.9626 Tf 7.721 0 Td [(without)-250(zer)18(o)-250(coef)18(\002cients.)]TJ -43.436 -11.956 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -184.985 -22.638 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ +0 g 0 G + 143.074 -29.888 Td [(21)]TJ +0 g 0 G +ET + +endstream +endobj +1074 0 obj << -/D [920 0 R /XYZ 150.705 696.532 null] +/Length 4627 >> -% 51 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 150.705 706.129 Td [(3.2.13)-1000(get)]TJ +ET +q +1 0 0 1 200.01 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 202.999 706.129 Td [(diag)-250(\227)-250(Get)-250(main)-250(diagonal)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -52.294 -19.329 Td [(call)]TJ +0 g 0 G + [-525(a%get_diag\050d,info\051)]TJ/F62 9.9626 Tf 14.944 -12.144 Td [(Returns)-250(a)-250(copy)-250(of)-250(the)-250(main)-250(diagonal.)]TJ +0 g 0 G +/F59 9.9626 Tf -14.944 -20.49 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -20.679 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.679 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.622 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ -160.077 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.917 -34.627 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20.679 Td [(d)]TJ +0 g 0 G +/F62 9.9626 Tf 11.068 0 Td [(A)-250(copy)-250(of)-250(the)-250(main)-250(diagonal.)]TJ 13.839 -11.955 Td [(A)-250(one-dimensional)-250(array)-250(of)-250(the)-250(appr)18(opriate)-250(type.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -20.679 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ/F59 9.9626 Tf -23.8 -28.213 Td [(3.2.14)-1000(clip)]TJ +ET +q +1 0 0 1 203.317 472.944 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 206.306 472.745 Td [(diag)-250(\227)-250(Cut)-250(out)-250(main)-250(diagonal)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -55.601 -19.329 Td [(call)]TJ +0 g 0 G + [-525(a%clip_diag\050b,info\051)]TJ/F62 9.9626 Tf 14.944 -12.144 Td [(Returns)-250(a)-250(copy)-250(of)]TJ/F67 9.9626 Tf 79.73 0 Td [(a)]TJ/F62 9.9626 Tf 7.721 0 Td [(without)-250(the)-250(main)-250(diagonal.)]TJ +0 g 0 G +/F59 9.9626 Tf -102.395 -20.49 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -20.679 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.679 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.622 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ -160.077 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.917 -34.627 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20.679 Td [(b)]TJ +0 g 0 G +/F62 9.9626 Tf 11.068 0 Td [(A)-250(copy)-250(of)]TJ/F67 9.9626 Tf 45.37 0 Td [(a)]TJ/F62 9.9626 Tf 7.721 0 Td [(without)-250(the)-250(main)-250(diagonal.)]TJ -39.252 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.622 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -184.984 -20.679 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ/F59 9.9626 Tf -23.8 -28.213 Td [(3.2.15)-1000(tril)-250(\227)-250(Return)-250(the)-250(lower)-250(triangle)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 20.921 -19.329 Td [(call)]TJ +0 g 0 G + [-525(a%tril\050l,info[,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 15.691 -11.955 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(diag,imin,imax,jmin,jmax,rscale,cscale,u]\051)]TJ/F62 9.9626 Tf -21.668 -22.671 Td [(Returns)-309(the)-308(lower)-309(triangular)-308(part)-309(of)-309(submatrix)]TJ/F67 9.9626 Tf 211.209 0 Td [(A\050imin:imax,jmin:jmax\051)]TJ/F62 9.9626 Tf 115.067 0 Td [(,)]TJ -341.22 -11.956 Td [(optionally)-190(r)18(escaling)-190(r)18(ow/col)-190(indices)-190(to)-190(the)-190(range)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG +/F67 9.9626 Tf 212.61 0 Td [(1)]TJ +0 g 0 G + [(:imax)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(-)]TJ +0 g 0 G + [(imin)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(+)]TJ +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G + [(,)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G + [(:jmax)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(-)]TJ +0 g 0 G + [(jmin)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(+)]TJ +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G +/F62 9.9626 Tf -212.61 -11.955 Td [(and)-250(r)18(eturing)-250(the)-250(complementary)-250(upper)-250(triangle.)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -20.49 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -20.679 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 166.874 -29.888 Td [(22)]TJ +0 g 0 G +ET + +endstream +endobj +1078 0 obj << -/D [920 0 R /XYZ 150.705 498.276 null] +/Length 6324 >> -% 55 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 99.895 706.129 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ -160.078 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.918 -30.706 Td [(diag)]TJ +0 g 0 G +/F62 9.9626 Tf 24.907 0 Td [(Include)-300(diagonals)-301(up)-300(to)-301(this)-300(one;)]TJ/F67 9.9626 Tf 149.76 0 Td [(diag)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G +/F62 9.9626 Tf 34.376 0 Td [(means)-300(the)-301(\002rst)-300(super)18(diagonal,)]TJ/F67 9.9626 Tf -184.136 -11.955 Td [(diag)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=-)]TJ +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G +/F62 9.9626 Tf 39.103 0 Td [(means)-250(the)-250(\002rst)-250(subdiagonal.)-310(Default)-250(0.)]TJ +0 g 0 G +/F59 9.9626 Tf -64.01 -18.75 Td [(imin,imax,jmin,jmax)]TJ +0 g 0 G +/F62 9.9626 Tf 99.885 0 Td [(Minimum)-250(and)-250(maximum)-250(r)18(ow)-250(and)-250(column)-250(indices.)]TJ -74.978 -11.955 Td [(T)90(ype:)-310(optional.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -18.75 Td [(rscale,cscale)]TJ +0 g 0 G +/F62 9.9626 Tf 60.025 0 Td [(Whether)-250(to)-250(r)18(escale)-250(r)18(ow/column)-250(indices.)-310(T)90(ype:)-310(optional.)]TJ +0 g 0 G +/F59 9.9626 Tf -60.025 -18.979 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -18.75 Td [(l)]TJ +0 g 0 G +/F62 9.9626 Tf 8.299 0 Td [(A)-250(copy)-250(of)-250(the)-250(lower)-250(triangle)-250(of)]TJ/F67 9.9626 Tf 137.333 0 Td [(a)]TJ/F62 9.9626 Tf 5.231 0 Td [(.)]TJ -125.956 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -184.985 -18.75 Td [(u)]TJ +0 g 0 G +/F62 9.9626 Tf 11.069 0 Td [(\050optional\051)-250(A)-250(copy)-250(of)-250(the)-250(upper)-250(triangle)-250(of)]TJ/F67 9.9626 Tf 184.485 0 Td [(a)]TJ/F62 9.9626 Tf 5.231 0 Td [(.)]TJ -175.878 -11.956 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -184.985 -18.749 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ/F59 9.9626 Tf -23.801 -26.59 Td [(3.2.16)-1000(triu)-250(\227)-250(Return)-250(the)-250(upper)-250(triangle)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 20.922 -18.964 Td [(call)]TJ +0 g 0 G + [-525(a%triu\050u,info[,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 15.691 -11.955 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(diag,imin,imax,jmin,jmax,rscale,cscale,l]\051)]TJ/F62 9.9626 Tf -21.669 -18.979 Td [(Returns)-289(the)-290(upper)-289(triangular)-290(part)-289(of)-290(submatrix)]TJ/F67 9.9626 Tf 211.209 0 Td [(A\050imin:imax,jmin:jmax\051)]TJ/F62 9.9626 Tf 115.068 0 Td [(,)]TJ -341.221 -11.955 Td [(optionally)-190(r)18(escaling)-190(r)18(ow/col)-190(indices)-190(to)-190(the)-190(range)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG +/F67 9.9626 Tf 212.611 0 Td [(1)]TJ +0 g 0 G + [(:imax)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(-)]TJ +0 g 0 G + [(imin)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(+)]TJ +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G + [(,)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G + [(:jmax)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(-)]TJ +0 g 0 G + [(jmin)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(+)]TJ +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G +/F62 9.9626 Tf 141.219 0 Td [(,)]TJ -353.83 -11.956 Td [(and)-250(r)18(eturing)-250(the)-250(complementary)-250(lower)-250(triangle.)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -17.574 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -18.75 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -18.75 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ -160.078 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.918 -30.706 Td [(diag)]TJ +0 g 0 G +/F62 9.9626 Tf 24.907 0 Td [(Include)-300(diagonals)-301(up)-300(to)-301(this)-300(one;)]TJ/F67 9.9626 Tf 149.76 0 Td [(diag)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G +/F62 9.9626 Tf 34.376 0 Td [(means)-300(the)-301(\002rst)-300(super)18(diagonal,)]TJ/F67 9.9626 Tf -184.136 -11.955 Td [(diag)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=-)]TJ +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G +/F62 9.9626 Tf 39.103 0 Td [(means)-250(the)-250(\002rst)-250(subdiagonal.)-310(Default)-250(0.)]TJ +0 g 0 G +/F59 9.9626 Tf -64.01 -18.75 Td [(imin,imax,jmin,jmax)]TJ +0 g 0 G +/F62 9.9626 Tf 99.885 0 Td [(Minimum)-250(and)-250(maximum)-250(r)18(ow)-250(and)-250(column)-250(indices.)]TJ -74.978 -11.955 Td [(T)90(ype:)-310(optional.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -18.75 Td [(rscale,cscale)]TJ +0 g 0 G +/F62 9.9626 Tf 60.025 0 Td [(Whether)-250(to)-250(r)18(escale)-250(r)18(ow/column)-250(indices.)-310(T)90(ype:)-310(optional.)]TJ +0 g 0 G +/F59 9.9626 Tf -60.025 -18.979 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -18.75 Td [(u)]TJ +0 g 0 G +/F62 9.9626 Tf 11.069 0 Td [(A)-250(copy)-250(of)-250(the)-250(upper)-250(triangle)-250(of)]TJ/F67 9.9626 Tf 138.668 0 Td [(a)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ -130.06 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -184.985 -18.75 Td [(l)]TJ +0 g 0 G +/F62 9.9626 Tf 8.299 0 Td [(\050optional\051)-250(A)-250(copy)-250(of)-250(the)-250(lower)-250(triangle)-250(of)]TJ/F67 9.9626 Tf 183.151 0 Td [(a)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ -171.773 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -184.985 -18.75 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ +0 g 0 G + 143.074 -29.888 Td [(23)]TJ +0 g 0 G +ET + +endstream +endobj +1084 0 obj << -/D [920 0 R /XYZ 150.705 288.305 null] +/Length 7619 >> -% 919 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 150.705 706.129 Td [(3.2.17)-1000(psb)]TJ +ET +q +1 0 0 1 202.769 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 205.758 706.129 Td [(set)]TJ +ET +q +1 0 0 1 219.078 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 222.067 706.129 Td [(mat)]TJ +ET +q +1 0 0 1 239.82 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 242.809 706.129 Td [(default)-250(\227)-250(Set)-250(default)-250(storage)-250(format)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -92.104 -18.964 Td [(call)]TJ +0 g 0 G + [-1050(psb_set_mat_default\050a\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -20.183 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.231 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.231 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(a)-203(variable)-203(of)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 55.42 0 Td [(class)]TJ +0 g 0 G + [(\050psb_T_base_sparse_mat\051)]TJ/F62 9.9626 Tf 148.475 0 Td [(r)18(equesting)-203(a)-204(new)-203(default)-203(stor)18(-)]TJ -188.951 -11.956 Td [(age)-250(format.)]TJ 0 -11.955 Td [(T)90(ype:)-310(r)18(equir)18(ed.)]TJ/F59 9.9626 Tf -24.907 -26.815 Td [(3.2.18)-1000(clone)-250(\227)-250(Clone)-250(current)-250(object)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 0 -18.964 Td [(call)]TJ +0 g 0 G + [-1050(a%clone\050b,info\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -20.183 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.231 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.231 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.917 -32.138 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.232 Td [(b)]TJ +0 g 0 G +/F62 9.9626 Tf 11.068 0 Td [(A)-250(copy)-250(of)-250(the)-250(input)-250(object.)]TJ +0 g 0 G +/F59 9.9626 Tf -11.068 -19.231 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ/F59 9.9626 Tf -23.8 -26.815 Td [(3.2.19)-1000(Named)-250(Constants)]TJ +0 g 0 G + 0 -18.964 Td [(psb)]TJ +ET +q +1 0 0 1 167.9 372.049 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 170.889 371.85 Td [(dupl)]TJ +ET +q +1 0 0 1 193.066 372.049 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 196.055 371.85 Td [(ovwrt)]TJ +ET +q +1 0 0 1 223.222 372.049 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 231.193 371.85 Td [(Duplicate)-259(coef)18(\002cients)-259(shou)1(ld)-259(be)-259(overwritten)-259(\050i.e.)-336(ignor)18(e)-259(du-)]TJ -55.582 -11.955 Td [(plications\051)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.231 Td [(psb)]TJ +ET +q +1 0 0 1 167.9 340.863 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 170.889 340.664 Td [(dupl)]TJ +ET +q +1 0 0 1 193.066 340.863 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 196.055 340.664 Td [(add)]TJ +ET +q +1 0 0 1 213.808 340.863 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 221.778 340.664 Td [(Duplicate)-250(coef)18(\002cients)-250(should)-250(be)-250(added;)]TJ +0 g 0 G +/F59 9.9626 Tf -71.073 -19.232 Td [(psb)]TJ +ET +q +1 0 0 1 167.9 321.632 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 170.889 321.432 Td [(dupl)]TJ +ET +q +1 0 0 1 193.066 321.632 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 196.055 321.432 Td [(err)]TJ +ET +q +1 0 0 1 209.384 321.632 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 217.355 321.432 Td [(Duplicate)-250(coef)18(\002cients)-250(should)-250(trigger)-250(an)-250(err)18(or)-250(conditino)]TJ +0 g 0 G +/F59 9.9626 Tf -66.65 -19.231 Td [(psb)]TJ +ET +q +1 0 0 1 167.9 302.4 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 170.889 302.201 Td [(upd)]TJ +ET +q +1 0 0 1 189.748 302.4 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 192.737 302.201 Td [(d\003t)]TJ +ET +q +1 0 0 1 208.827 302.4 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 216.797 302.201 Td [(Default)-250(update)-250(strategy)-250(for)-250(matrix)-250(coef)18(\002cients;)]TJ +0 g 0 G +/F59 9.9626 Tf -66.092 -19.231 Td [(psb)]TJ +ET +q +1 0 0 1 167.9 283.169 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 170.889 282.97 Td [(upd)]TJ +ET +q +1 0 0 1 189.748 283.169 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 192.737 282.97 Td [(srch)]TJ +ET +q +1 0 0 1 212.144 283.169 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 220.114 282.97 Td [(Update)-250(strategy)-250(based)-250(on)-250(sear)18(ch)-250(into)-250(the)-250(data)-250(str)8(uctur)18(e;)]TJ +0 g 0 G +/F59 9.9626 Tf -69.409 -19.232 Td [(psb)]TJ +ET +q +1 0 0 1 167.9 263.938 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 170.889 263.738 Td [(upd)]TJ +ET +q +1 0 0 1 189.748 263.938 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 192.737 263.738 Td [(perm)]TJ +ET +q +1 0 0 1 217.135 263.938 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 225.106 263.738 Td [(Update)-392(strategy)-393(based)-392(on)-393(additional)-392(permutation)-393(data)-392(\050see)]TJ -49.495 -11.955 Td [(tools)-250(r)18(outine)-250(description\051.)]TJ/F59 11.9552 Tf -24.906 -28.807 Td [(3.3)-1000(Dense)-250(V)111(ector)-250(Data)-250(Structure)]TJ/F62 9.9626 Tf 0 -18.964 Td [(The)]TJ/F67 9.9626 Tf 20.094 0 Td [(psb)]TJ +ET +q +1 0 0 1 187.117 204.211 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 190.255 204.012 Td [(T)]TJ +ET +q +1 0 0 1 196.113 204.211 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 199.252 204.012 Td [(vect)]TJ +ET +q +1 0 0 1 220.801 204.211 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 223.939 204.012 Td [(type)]TJ/F62 9.9626 Tf 24.338 0 Td [(data)-343(str)8(uctur)18(e)-343(encapsulates)-343(the)-343(dense)-343(vectors)-342(in)-343(a)-343(way)]TJ -97.572 -11.955 Td [(similar)-368(to)-368(sparse)-368(matrices,)-397(i.e.)-664(including)-368(a)-368(base)-368(type)]TJ/F67 9.9626 Tf 242.472 0 Td [(psb)]TJ +ET +q +1 0 0 1 409.495 192.256 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 412.633 192.057 Td [(T)]TJ +ET +q +1 0 0 1 418.491 192.256 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 421.63 192.057 Td [(base)]TJ +ET +q +1 0 0 1 443.178 192.256 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 446.317 192.057 Td [(vect)]TJ +ET +q +1 0 0 1 467.866 192.256 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 471.004 192.057 Td [(type)]TJ/F62 9.9626 Tf 20.921 0 Td [(.)]TJ -341.22 -11.956 Td [(The)-263(user)-263(will)-263(not,)-266(in)-263(general,)-267(access)-263(the)-263(vector)-263(components)-263(dir)18(ectly)111(,)-266(but)-263(rather)]TJ 0 -11.955 Td [(via)-222(the)-222(r)18(out)1(ines)-222(of)-222(sec.)]TJ +0 0 1 rg 0 0 1 RG + [-222(6)]TJ +0 g 0 G + [(.)-300(Among)-222(other)-222(simple)-222(things,)-227(we)-222(de\002ne)-222(her)18(e)-221(an)-222(extrac-)]TJ 0 -11.955 Td [(tion)-273(method)-274(that)-273(can)-274(be)-273(used)-274(to)-273(get)-274(a)-273(full)-274(copy)-273(of)-274(the)-273(part)-274(of)-273(the)-274(vector)-273(stor)18(ed)]TJ 0 -11.955 Td [(on)-250(the)-250(local)-250(pr)18(ocess.)]TJ 14.944 -11.955 Td [(The)-311(type)-311(declaration)-311(is)-310(shown)-311(in)-311(\002gur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-311(3)]TJ +0 g 0 G + [-311(wher)18(e)]TJ/F67 9.9626 Tf 217.442 0 Td [(T)]TJ/F62 9.9626 Tf 8.327 0 Td [(is)-311(a)-311(placeholder)-311(for)-310(the)]TJ -240.713 -11.955 Td [(data)-250(type)-250(and)-250(pr)18(ecision)-250(variants)]TJ +0 g 0 G + 166.874 -29.888 Td [(24)]TJ +0 g 0 G +ET + +endstream +endobj +1090 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F83 813 0 R /F52 585 0 R /F85 814 0 R >> -/ProcSet [ /PDF /Text ] +/Length 5064 >> -% 924 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 99.895 706.129 Td [(I)]TJ +0 g 0 G +/F62 9.9626 Tf 8.857 0 Td [(Integer;)]TJ +0 g 0 G +/F59 9.9626 Tf -8.857 -20.359 Td [(S)]TJ +0 g 0 G +/F62 9.9626 Tf 11.069 0 Td [(Single)-250(pr)18(ecision)-250(r)18(eal;)]TJ +0 g 0 G +/F59 9.9626 Tf -11.069 -20.358 Td [(D)]TJ +0 g 0 G +/F62 9.9626 Tf 13.281 0 Td [(Double)-250(pr)18(ecision)-250(r)18(eal;)]TJ +0 g 0 G +/F59 9.9626 Tf -13.281 -20.359 Td [(C)]TJ +0 g 0 G +/F62 9.9626 Tf 12.175 0 Td [(Single)-250(pr)18(ecision)-250(complex;)]TJ +0 g 0 G +/F59 9.9626 Tf -12.175 -20.358 Td [(Z)]TJ +0 g 0 G +/F62 9.9626 Tf 11.627 0 Td [(Double)-250(pr)18(ecision)-250(complex.)]TJ -11.627 -20.251 Td [(The)-209(actual)-208(data)-209(is)-208(contained)-209(in)-209(the)-208(polymorphic)-209(component)]TJ/F67 9.9626 Tf 261.152 0 Td [(v%v)]TJ/F62 9.9626 Tf 15.691 0 Td [(;)-222(the)-209(separation)]TJ -276.843 -11.955 Td [(between)-353(the)-353(application)-353(and)-353(the)-353(actual)-353(data)-353(is)-353(esse)1(ntial)-353(for)-353(cases)-353(wher)18(e)-353(it)-353(is)]TJ 0 -11.955 Td [(necessary)-321(to)-321(link)-320(to)-321(data)-321(storage)-321(made)-320(available)-321(elsewher)18(e)-321(outside)-320(the)-321(dir)18(ect)]TJ 0 -11.955 Td [(contr)18(ol)-231(of)-231(the)-231(compiler/application,)-235(e.g.)-304(data)-231(stor)18(ed)-231(in)-231(a)-231(graphics)-231(accelerator)-74('s)]TJ 0 -11.955 Td [(private)-250(memory)111(.)]TJ +0 g 0 G +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 99.895 452.975 cm +0 0 343.711 82.69 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 112.299 525.005 Td [(type)]TJ +0 g 0 G + [-525(psb_T_base_vect_type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 9.414 -10.959 Td [(TYPE)]TJ +0 g 0 G + [(\050KIND_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(v\050:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.414 -10.959 Td [(end)-525(type)]TJ +0 g 0 G + [-525(psb_T_base_vect_type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -21.918 Td [(type)]TJ +0 g 0 G + [-525(psb_T_vect_type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 9.414 -10.959 Td [(class)]TJ +0 g 0 G + [(\050psb_T_base_vect_type\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(v)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.414 -10.959 Td [(end)-525(type)]TJ +0 g 0 G + [-1050(psb_T_vect_type)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 6.677 -41.429 Td [(Listing)-250(3:)-310(The)-250(PSBLAS)-250(de\002ned)-250(data)-250(type)-250(that)-250(contains)-250(a)-250(dense)-250(vector)74(.)]TJ/F59 9.9626 Tf -19.081 -39.929 Td [(3.3.1)-1000(V)111(ector)-250(Methods)]TJ 0 -19.174 Td [(3.3.2)-1000(get)]TJ +ET +q +1 0 0 1 144.219 358.919 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 147.208 358.719 Td [(nrows)-250(\227)-250(Get)-250(number)-250(of)-250(rows)-250(in)-250(a)-250(dense)-250(vector)]TJ/F67 9.9626 Tf -47.313 -19.173 Td [(nr)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(v%get_nrows\050\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -22.351 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -20.359 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.358 Td [(v)]TJ +0 g 0 G +/F62 9.9626 Tf 10.521 0 Td [(the)-250(dense)-250(vector)]TJ 14.386 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ +0 g 0 G + -56.339 -34.198 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20.358 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(The)-250(number)-250(of)-250(r)18(ows)-250(of)-250(dense)-250(vector)]TJ/F67 9.9626 Tf 161.273 0 Td [(v)]TJ/F62 9.9626 Tf 5.231 0 Td [(.)]TJ/F59 9.9626 Tf -239.281 -27.757 Td [(3.3.3)-1000(sizeof)-250(\227)-250(Get)-250(memory)-250(occupation)-250(in)-250(bytes)-250(of)-250(a)-250(dense)-250(vector)]TJ/F67 9.9626 Tf 0 -19.174 Td [(memory_size)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(v%sizeof\050\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -22.351 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -20.358 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 166.875 -29.888 Td [(25)]TJ +0 g 0 G +ET + +endstream +endobj +1097 0 obj << -/Type /Page -/Contents 925 0 R -/Resources 923 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 927 0 R +/Length 3867 >> -% 926 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 150.705 706.129 Td [(v)]TJ +0 g 0 G +/F62 9.9626 Tf 10.52 0 Td [(the)-250(dense)-250(vector)]TJ 14.386 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ +0 g 0 G + -56.338 -36.868 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -23.918 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.776 0 Td [(The)-250(memory)-250(occupation)-250(in)-250(bytes.)]TJ/F59 9.9626 Tf -72.776 -32.82 Td [(3.3.4)-1000(set)-250(\227)-250(Set)-250(contents)-250(of)-250(the)-250(vector)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 5.23 -20.898 Td [(call)]TJ +0 g 0 G + [-1050(v%set\050alpha[,first,last]\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ +0 g 0 G + [-1050(v%set\050vect[,first,last]\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ +0 g 0 G + [-1050(v%zero\050\051)]TJ +0 g 0 G +/F59 9.9626 Tf -5.23 -24.913 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -23.918 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -23.918 Td [(v)]TJ +0 g 0 G +/F62 9.9626 Tf 10.52 0 Td [(the)-250(dense)-250(vector)]TJ 14.386 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ +0 g 0 G + -56.338 -35.873 Td [(alpha)]TJ +0 g 0 G +/F62 9.9626 Tf 30.436 0 Td [(A)-250(scalar)-250(value.)]TJ -5.53 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(1)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -23.918 Td [(\002rst,last)]TJ +0 g 0 G +/F62 9.9626 Tf 41.215 0 Td [(Boundaries)-250(for)-250(setting)-250(in)-250(the)-250(vector)74(.)]TJ -16.309 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(integers.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -23.918 Td [(vect)]TJ +0 g 0 G +/F62 9.9626 Tf 23.242 0 Td [(An)-250(array)]TJ 1.664 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(1)]TJ +0 g 0 G + [(.)]TJ -24.906 -25.91 Td [(Note)-336(that)-336(a)-335(call)-336(to)]TJ/F67 9.9626 Tf 84.614 0 Td [(v%zero\050\051)]TJ/F62 9.9626 Tf 45.189 0 Td [(is)-336(pr)18(ovided)-336(as)-335(a)-336(shorthand,)-358(but)-335(is)-336(equivalent)-336(to)]TJ -129.803 -11.956 Td [(a)-270(call)-270(to)]TJ/F67 9.9626 Tf 36.947 0 Td [(v%set\050zero\051)]TJ/F62 9.9626 Tf 60.225 0 Td [(with)-270(the)]TJ/F67 9.9626 Tf 39.456 0 Td [(zero)]TJ/F62 9.9626 Tf 23.613 0 Td [(constant)-270(having)-270(the)-271(appr)18(opriat)1(e)-271(type)-270(and)]TJ -160.241 -11.955 Td [(kind.)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -25.91 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -23.918 Td [(v)]TJ +0 g 0 G +/F62 9.9626 Tf 10.52 0 Td [(the)-250(dense)-250(vector)74(,)-250(with)-250(updated)-250(entries)]TJ 14.386 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ +0 g 0 G +/F62 9.9626 Tf 110.536 -41.843 Td [(26)]TJ +0 g 0 G +ET + +endstream +endobj +1104 0 obj << -/D [924 0 R /XYZ 98.895 753.953 null] +/Length 4464 >> -% 59 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 99.895 706.129 Td [(3.3.5)-1000(get)]TJ +ET +q +1 0 0 1 144.219 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 147.208 706.129 Td [(vect)-250(\227)-250(Get)-250(a)-250(copy)-250(of)-250(the)-250(vector)-250(contents)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -47.313 -19.66 Td [(extv)-525(=)-525(v%get_vect\050[n]\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -22.994 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -21.362 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -21.361 Td [(v)]TJ +0 g 0 G +/F62 9.9626 Tf 10.521 0 Td [(the)-250(dense)-250(vector)]TJ 14.386 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ +0 g 0 G + -56.339 -33.316 Td [(n)]TJ +0 g 0 G +/F62 9.9626 Tf 11.069 0 Td [(Size)-250(to)-250(be)-250(r)18(eturned)]TJ 13.838 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(;)-250(default:)-310(entir)18(e)-250(vector)74(.)]TJ +0 g 0 G +/F59 9.9626 Tf -90.182 -34.95 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -21.361 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(An)-316(allocatable)-316(array)-316(holding)-316(a)-317(copy)-316(of)-316(the)-316(dense)-316(vector)-316(con-)]TJ -47.87 -11.955 Td [(tents.)-321(If)-254(the)-254(ar)18(gument)]TJ/F60 9.9626 Tf 98.086 0 Td [(n)]TJ/F62 9.9626 Tf 8.192 0 Td [(is)-254(speci\002ed,)-255(the)-253(size)-254(of)-254(the)-254(r)18(eturned)-254(array)-253(equals)]TJ -106.278 -11.955 Td [(the)-339(minimum)-339(between)]TJ/F60 9.9626 Tf 105.247 0 Td [(n)]TJ/F62 9.9626 Tf 9.041 0 Td [(and)-339(the)-339(internal)-339(size)-339(of)-339(the)-339(vector)74(,)-361(or)-339(0)-339(if)]TJ/F60 9.9626 Tf 188.353 0 Td [(n)]TJ/F62 9.9626 Tf 9.04 0 Td [(is)]TJ -311.681 -11.956 Td [(negative;)-314(otherwise,)-303(the)-292(size)-293(of)-292(the)-293(array)-292(is)-293(the)-292(same)-293(as)-292(the)-293(internal)-292(size)]TJ 0 -11.955 Td [(of)-250(the)-250(vector)74(.)]TJ/F59 9.9626 Tf -24.907 -29.183 Td [(3.3.6)-1000(clone)-250(\227)-250(Clone)-250(current)-250(object)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf 0 -19.659 Td [(call)-1050(x%clone\050y,info\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -22.995 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -21.361 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -21.362 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(dense)-250(vector)74(.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.918 -34.95 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -21.361 Td [(y)]TJ +0 g 0 G +/F62 9.9626 Tf 10.521 0 Td [(A)-250(copy)-250(of)-250(the)-250(input)-250(object.)]TJ +0 g 0 G +/F59 9.9626 Tf -10.521 -21.361 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ/F59 11.9552 Tf -23.801 -31.176 Td [(3.4)-1000(Preconditioner)-250(data)-250(structure)]TJ/F62 9.9626 Tf 0 -19.659 Td [(Our)-396(base)-397(l)1(ibrary)-397(of)18(fers)-396(support)-396(for)-396(simple)-397(well)-396(known)-396(pr)18(econditioners)-396(like)]TJ 0 -11.956 Td [(Diagonal)-250(Scaling)-250(or)-250(Block)-250(Jacobi)-250(with)-250(incomplete)-250(factorization)-250(ILU\0500\051.)]TJ 14.944 -12.314 Td [(A)-361(pr)18(econditioner)-361(is)-361(held)-361(in)-361(the)]TJ/F67 9.9626 Tf 143.781 0 Td [(psb)]TJ +ET +q +1 0 0 1 274.939 168.346 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 278.077 168.146 Td [(prec)]TJ +ET +q +1 0 0 1 299.626 168.346 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 302.764 168.146 Td [(type)]TJ/F62 9.9626 Tf 24.519 0 Td [(data)-361(str)8(uctur)18(e)-361(r)18(eported)-361(in)]TJ -227.388 -11.955 Td [(\002gur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-282(4)]TJ +0 g 0 G + [(.)-407(The)]TJ/F67 9.9626 Tf 59.933 0 Td [(psb_prec_type)]TJ/F62 9.9626 Tf 70.808 0 Td [(data)-282(type)-283(may)-282(contain)-282(a)-283(simple)-282(pr)18(econditioning)]TJ -130.741 -11.955 Td [(matrix)-376(with)-376(the)-376(associated)-376(communication)-375(descriptor)74(.The)-376(internal)-376(pr)18(econdi-)]TJ 0 -11.955 Td [(tioner)-317(is)-317(allocated)-318(appr)18(opriately)-317(with)-317(the)-317(dynamic)-318(type)-317(corr)18(esponding)-317(to)-317(the)]TJ 0 -11.955 Td [(desir)18(ed)-250(pr)18(econditioner)74(.)]TJ +0 g 0 G + 166.875 -29.888 Td [(27)]TJ +0 g 0 G +ET + +endstream +endobj +1110 0 obj << -/D [924 0 R /XYZ 99.895 716.092 null] +/Length 4234 >> -% 63 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +q +1 0 0 1 150.705 671.26 cm +0 0 343.711 38.854 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 163.108 699.454 Td [(type)]TJ +0 g 0 G + [-525(psb_Tprec_type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 9.415 -10.959 Td [(class)]TJ +0 g 0 G + [(\050psb_T_base_prec_type\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(prec)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.415 -10.959 Td [(end)-525(type)]TJ +0 g 0 G + [-525(psb_Tprec_type)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 1.845 -41.429 Td [(Listing)-250(4:)-310(The)-250(PSBLAS)-250(de\002ned)-250(data)-250(type)-250(that)-250(contains)-250(a)-250(pr)18(econditioner)74(.)]TJ/F59 11.9552 Tf -14.248 -32.698 Td [(3.5)-1000(Heap)-250(data)-250(structure)]TJ/F62 9.9626 Tf 0 -18.964 Td [(Among)-310(the)-311(tools)-310(r)18(outines)-310(of)-310(sec.)]TJ +0 0 1 rg 0 0 1 RG + [-311(6)]TJ +0 g 0 G + [(,)-325(we)-310(have)-311(a)-310(number)-310(of)-311(so)1(rting)-311(utilities;)-340(the)]TJ 0 -11.955 Td [(heap)-250(sort)-250(is)-250(implemented)-250(in)-250(terms)-250(of)-250(heaps)-250(having)-250(the)-250(following)-250(signatur)18(es:)]TJ +0 g 0 G +/F67 9.9626 Tf 0 -19.925 Td [(psb)]TJ +ET +q +1 0 0 1 167.023 552.764 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 170.162 552.565 Td [(T)]TJ +ET +q +1 0 0 1 176.02 552.764 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 179.158 552.565 Td [(heap)]TJ +0 g 0 G +/F62 9.9626 Tf 25.903 0 Td [(:)-333(a)-262(heap)-262(containing)-262(e)1(lements)-262(of)-262(type)-262(T)74(,)-261(wher)18(e)-262(T)-262(can)-261(be)]TJ/F67 9.9626 Tf 242.282 0 Td [(i,s,c,d,z)]TJ/F62 9.9626 Tf -271.731 -11.955 Td [(for)-250(integer)74(,)-250(r)18(eal)-250(and)-250(complex)-250(data;)]TJ +0 g 0 G +/F67 9.9626 Tf -24.907 -19.925 Td [(psb)]TJ +ET +q +1 0 0 1 167.023 520.884 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 170.162 520.685 Td [(T)]TJ +ET +q +1 0 0 1 176.02 520.884 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 179.158 520.685 Td [(idx)]TJ +ET +q +1 0 0 1 195.476 520.884 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 198.615 520.685 Td [(heap)]TJ +0 g 0 G +/F62 9.9626 Tf 25.902 0 Td [(:)-289(a)-207(heap)-207(containing)-207(elements)-207(of)-207(type)-207(T)74(,)-207(as)-207(above,)-215(together)-207(with)]TJ -48.906 -11.956 Td [(an)-250(integer)-250(index.)]TJ -24.906 -19.925 Td [(Given)-250(a)-250(heap)-250(object,)-250(the)-250(following)-250(methods)-250(ar)18(e)-250(de\002ned)-250(on)-250(it:)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -19.925 Td [(init)]TJ +0 g 0 G +/F62 9.9626 Tf 21.021 0 Td [(Initialize)-250(memory;)-250(also)-250(choose)-250(ascending)-250(or)-250(descending)-250(or)18(der;)]TJ +0 g 0 G +/F59 9.9626 Tf -21.021 -19.925 Td [(howmany)]TJ +0 g 0 G +/F62 9.9626 Tf 50.371 0 Td [(Curr)18(ent)-250(heap)-250(occupancy;)]TJ +0 g 0 G +/F59 9.9626 Tf -50.371 -19.926 Td [(insert)]TJ +0 g 0 G +/F62 9.9626 Tf 30.983 0 Td [(Add)-250(an)-250(item)-250(\050or)-250(an)-250(item)-250(and)-250(its)-250(index\051;)]TJ +0 g 0 G +/F59 9.9626 Tf -30.983 -19.925 Td [(get)]TJ +ET +q +1 0 0 1 165.141 409.302 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 168.129 409.103 Td [(\002rst)]TJ +0 g 0 G +/F62 9.9626 Tf 22.685 0 Td [(Remove)-250(and)-250(r)18(eturn)-250(the)-250(\002rst)-250(element;)]TJ +0 g 0 G +/F59 9.9626 Tf -40.109 -19.925 Td [(dump)]TJ +0 g 0 G +/F62 9.9626 Tf 32.099 0 Td [(Print)-250(on)-250(\002le;)]TJ +0 g 0 G +/F59 9.9626 Tf -32.099 -19.926 Td [(free)]TJ +0 g 0 G +/F62 9.9626 Tf 22.695 0 Td [(Release)-250(memory)111(.)]TJ -22.695 -19.925 Td [(These)-305(objects)-305(ar)18(e)-305(used)-305(in)-305(AMG4PSBLAS)-305(to)-305(implement)-305(the)-305(factorization)-305(algo-)]TJ 0 -11.955 Td [(rithms.)]TJ +0 g 0 G + 166.874 -246.934 Td [(28)]TJ +0 g 0 G +ET + +endstream +endobj +1114 0 obj << -/D [924 0 R /XYZ 99.895 529.559 null] +/Length 158 >> +stream +0 g 0 G +0 g 0 G +BT +/F59 14.3462 Tf 99.895 705.784 Td [(4)-1000(Computational)-250(routines)]TJ +0 g 0 G +/F62 9.9626 Tf 166.875 -615.346 Td [(29)]TJ +0 g 0 G +ET endstream endobj -931 0 obj +1125 0 obj << -/Length 4754 +/Length 7465 >> stream 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(3.1.8)-1000(Clone)-250(\227)-250(clone)-250(current)-250(object)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(4.1)-1000(psb)]TJ +ET +q +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 201.825 706.129 Td [(geaxpby)-250(\227)-250(General)-250(Dense)-250(Matrix)-250(Sum)]TJ/F62 9.9626 Tf -51.12 -19.189 Td [(This)-358(subr)18(outine)-358(is)-359(an)-358(interface)-358(to)-358(the)-358(computational)-359(kernel)-358(for)-358(dense)-358(matrix)]TJ 0 -11.955 Td [(sum:)]TJ/F60 9.9626 Tf 143.149 -12.304 Td [(y)]TJ/F91 10.3811 Tf 7.998 0 Td [(\040)]TJ/F68 9.9626 Tf 13.397 0 Td [(a)]TJ/F60 9.9626 Tf 7.616 0 Td [(x)]TJ/F93 10.3811 Tf 7.267 0 Td [(+)]TJ/F68 9.9626 Tf 10.505 0 Td [(b)]TJ/F60 9.9626 Tf 5.649 0 Td [(y)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -175.407 -18.398 Td [(call)]TJ +0 g 0 G + [-525(psb_geaxpby\050alpha,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(beta,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info\051)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +ET +q +1 0 0 1 227.737 629.682 cm +[]0 d 0 J 0.398 w 0 0 m 189.647 0 l S +Q +BT +/F60 9.9626 Tf 234.009 621.114 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(,)]TJ/F60 9.9626 Tf 5.106 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(,)]TJ/F68 9.9626 Tf 5.105 0 Td [(a)]TJ/F62 9.9626 Tf 5.385 0 Td [(,)]TJ/F68 9.9626 Tf 5.355 0 Td [(b)]TJ/F59 9.9626 Tf 89.359 0 Td [(Subroutine)]TJ +ET +q +1 0 0 1 227.737 617.328 cm +[]0 d 0 J 0.398 w 0 0 m 189.647 0 l S +Q +BT +/F62 9.9626 Tf 233.715 608.761 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +ET +q +1 0 0 1 370.948 608.96 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 373.937 608.761 Td [(geaxpby)]TJ -140.222 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +ET +q +1 0 0 1 370.948 597.005 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 373.937 596.805 Td [(geaxpby)]TJ -140.222 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +ET +q +1 0 0 1 370.948 585.05 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 373.937 584.85 Td [(geaxpby)]TJ -140.222 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +ET +q +1 0 0 1 370.948 573.094 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 373.937 572.895 Td [(geaxpby)]TJ +ET +q +1 0 0 1 227.737 569.109 cm +[]0 d 0 J 0.398 w 0 0 m 189.647 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 280.768 540.731 Td [(T)92(able)-250(1:)-310(Data)-250(types)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf 0 -19.289 Td [(call)-1050(desc%clone\050descout,info\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.422 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf -130.063 -35.05 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.597 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -20.39 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -20.598 Td [(desc)]TJ + 0 -20.391 Td [(alpha)]TJ 0 g 0 G -/F54 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +/F62 9.9626 Tf 30.436 0 Td [(the)-250(scalar)]TJ/F68 9.9626 Tf 44.368 0 Td [(a)]TJ/F62 9.9626 Tf 5.385 0 Td [(.)]TJ -55.282 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(1)]TJ 0 g 0 G -/F51 9.9626 Tf -77.917 -34.377 Td [(On)-250(Return)]TJ + [(.)]TJ 0 g 0 G +/F59 9.9626 Tf -24.907 -20.391 Td [(x)]TJ 0 g 0 G - 0 -20.597 Td [(descout)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.614 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(o)1(r)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ +ET +q +1 0 0 1 436.673 349.068 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 439.811 348.869 Td [(T)]TJ +ET +q +1 0 0 1 445.669 349.068 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 448.807 348.869 Td [(vect)]TJ +ET +q +1 0 0 1 470.356 349.068 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 473.495 348.869 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf -297.884 -11.955 Td [(containing)-312(numbers)-311(of)-312(type)-311(speci\002ed)-312(in)-311(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-312(1)]TJ 0 g 0 G -/F54 9.9626 Tf 39.84 0 Td [(A)-250(copy)-250(of)-250(the)-250(input)-250(object.)]TJ + [(.)-494(The)-312(rank)-312(of)]TJ/F60 9.9626 Tf 274.834 0 Td [(x)]TJ/F62 9.9626 Tf 8.31 0 Td [(must)-311(be)]TJ -283.144 -11.955 Td [(the)-250(same)-250(of)]TJ/F60 9.9626 Tf 52.946 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -39.84 -20.597 Td [(info)]TJ +/F59 9.9626 Tf -82.958 -20.391 Td [(beta)]TJ +0 g 0 G +/F62 9.9626 Tf 24.348 0 Td [(the)-250(scalar)]TJ/F68 9.9626 Tf 44.618 0 Td [(b)]TJ/F62 9.9626 Tf 5.524 0 Td [(.)]TJ -49.584 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(1)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ/F51 9.9626 Tf -23.8 -28.097 Td [(3.1.9)-1000(CNV)-250(\227)-250(convert)-250(internal)-250(storage)-250(format)]TJ + [(.)]TJ 0 g 0 G +/F59 9.9626 Tf -24.906 -20.391 Td [(y)]TJ 0 g 0 G -/F59 9.9626 Tf 0 -19.289 Td [(call)-1050(desc%cnv\050mold\051)]TJ +/F62 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 191.754 0 Td [(y)]TJ/F62 9.9626 Tf 5.105 0 Td [(.)]TJ -182.473 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ +ET +q +1 0 0 1 436.673 188.736 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 439.811 188.537 Td [(T)]TJ +ET +q +1 0 0 1 445.669 188.736 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 448.807 188.537 Td [(vect)]TJ +ET +q +1 0 0 1 470.356 188.736 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 473.495 188.537 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.422 Td [(T)90(ype:)]TJ +/F62 9.9626 Tf -297.884 -11.956 Td [(containing)-276(numbers)-277(of)-276(the)-276(type)-276(indicated)-277(in)-276(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-276(1)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [(.)-389(The)-276(rank)-277(of)]TJ/F60 9.9626 Tf 288.67 0 Td [(y)]TJ/F62 9.9626 Tf 7.859 0 Td [(must)]TJ -296.529 -11.955 Td [(be)-250(the)-250(same)-250(of)]TJ/F60 9.9626 Tf 65.888 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.597 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -95.999 -20.39 Td [(desc)]TJ +ET +q +1 0 0 1 171.218 144.435 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 174.207 144.236 Td [(a)]TJ 0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ 0 g 0 G - 0 -20.597 Td [(desc)]TJ +/F62 9.9626 Tf 114.879 -29.888 Td [(30)]TJ 0 g 0 G -/F54 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +ET + +endstream +endobj +1132 0 obj +<< +/Length 2404 +>> +stream 0 g 0 G -/F51 9.9626 Tf -77.917 -32.553 Td [(mold)]TJ 0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(the)-250(desir)18(ed)-250(integer)-250(storage)-250(format.)]TJ -3.876 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(object)-190(of)-190(type)-190(derived)-190(fr)18(om)-190(\050integer\051)]TJ/F59 9.9626 Tf 221.926 0 Td [(psb)]TJ +BT +/F62 9.9626 Tf 124.802 706.129 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 413.855 356.476 cm +1 0 0 1 273.363 694.373 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 416.994 356.277 Td [(T)]TJ +/F67 9.9626 Tf 276.501 694.174 Td [(desc)]TJ ET q -1 0 0 1 422.851 356.476 cm +1 0 0 1 298.05 694.373 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 425.99 356.277 Td [(base)]TJ +/F67 9.9626 Tf 301.189 694.174 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -222.215 -21.918 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(y)]TJ +0 g 0 G +/F62 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F60 9.9626 Tf 160.68 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ -151.4 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ ET q -1 0 0 1 447.539 356.476 cm +1 0 0 1 385.864 604.709 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 450.677 356.277 Td [(vect)]TJ +/F67 9.9626 Tf 389.002 604.51 Td [(T)]TJ ET q -1 0 0 1 472.226 356.476 cm +1 0 0 1 394.86 604.709 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 475.364 356.277 Td [(type)]TJ/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -345.58 -22.59 Td [(The)]TJ/F59 9.9626 Tf 19.583 0 Td [(mold)]TJ/F54 9.9626 Tf 23.828 0 Td [(ar)18(guments)-292(may)-291(be)-292(employed)-292(to)-292(interface)-291(with)-292(special)-292(devices,)-302(such)]TJ -43.411 -11.955 Td [(as)-250(GPUs)-250(and)-250(other)-250(accelerators.)]TJ/F51 9.9626 Tf 0 -28.096 Td [(3.1.10)-1000(psb)]TJ +/F67 9.9626 Tf 397.998 604.51 Td [(vect)]TJ ET q -1 0 0 1 202.769 293.835 cm +1 0 0 1 419.547 604.709 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 422.685 604.51 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(1)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 141.968 -434.371 Td [(31)]TJ +0 g 0 G +ET + +endstream +endobj +1142 0 obj +<< +/Length 7647 +>> +stream +0 g 0 G +0 g 0 G +BT +/F59 11.9552 Tf 150.705 706.129 Td [(4.2)-1000(psb)]TJ +ET +q +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 201.825 706.129 Td [(gedot)-250(\227)-250(Dot)-250(Product)]TJ/F62 9.9626 Tf -51.12 -18.976 Td [(This)-250(function)-250(computes)-250(dot)-250(pr)18(oduct)-250(between)-250(two)-250(vectors)]TJ/F60 9.9626 Tf 254.647 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(and)]TJ/F60 9.9626 Tf 19.481 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ -286.93 -11.955 Td [(If)]TJ/F60 9.9626 Tf 9.459 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(and)]TJ/F60 9.9626 Tf 19.482 0 Td [(y)]TJ/F62 9.9626 Tf 7.597 0 Td [(ar)18(e)-250(r)18(eal)-250(vectors)-250(it)-250(computes)-250(dot-pr)18(oduct)-250(as:)]TJ/F60 9.9626 Tf 104.717 -23.132 Td [(d)-25(o)-35(t)]TJ/F91 10.3811 Tf 16.337 0 Td [(\040)]TJ/F60 9.9626 Tf 13.566 0 Td [(x)]TJ/F60 7.5716 Tf 5.399 4.115 Td [(T)]TJ/F60 9.9626 Tf 5.525 -4.115 Td [(y)]TJ/F62 9.9626 Tf -189.778 -21.93 Td [(Else)-250(if)]TJ/F60 9.9626 Tf 29.474 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(and)]TJ/F60 9.9626 Tf 19.482 0 Td [(y)]TJ/F62 9.9626 Tf 7.596 0 Td [(ar)18(e)-250(complex)-250(vectors)-250(then)-250(it)-250(computes)-250(dot-pr)18(oduct)-250(as:)]TJ/F60 9.9626 Tf 83.965 -23.132 Td [(d)-25(o)-35(t)]TJ/F91 10.3811 Tf 16.336 0 Td [(\040)]TJ/F60 9.9626 Tf 13.567 0 Td [(x)]TJ/F60 7.5716 Tf 5.588 4.115 Td [(H)]TJ/F60 9.9626 Tf 6.812 -4.115 Td [(y)]TJ/F67 9.9626 Tf -175.572 -21.937 Td [(psb_gedot\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525([,global]\051)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +ET +q +1 0 0 1 233.929 570.686 cm +[]0 d 0 J 0.398 w 0 0 m 177.263 0 l S +Q +BT +/F60 9.9626 Tf 240.031 562.118 Td [(d)-25(o)-35(t)]TJ/F62 9.9626 Tf 13.444 0 Td [(,)]TJ/F60 9.9626 Tf 5.276 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(,)]TJ/F60 9.9626 Tf 5.106 0 Td [(y)]TJ/F59 9.9626 Tf 91.76 0 Td [(Function)]TJ +ET +q +1 0 0 1 233.929 558.332 cm +[]0 d 0 J 0.398 w 0 0 m 177.263 0 l S +Q +BT +/F62 9.9626 Tf 239.906 549.765 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +ET +q +1 0 0 1 377.14 549.964 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 205.758 293.636 Td [(cd)]TJ +/F62 9.9626 Tf 380.129 549.765 Td [(gedot)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 216.867 293.835 cm +1 0 0 1 377.14 538.009 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 219.855 293.636 Td [(get)]TJ +/F62 9.9626 Tf 380.129 537.809 Td [(gedot)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 234.291 293.835 cm +1 0 0 1 377.14 526.053 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 237.28 293.636 Td [(large)]TJ +/F62 9.9626 Tf 380.129 525.854 Td [(gedot)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 260.572 293.835 cm +1 0 0 1 377.14 514.098 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 263.561 293.636 Td [(threshold)-190(\227)-190(Get)-190(threshold)-190(for)-190(index)-190(mapping)-190(switch)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -112.856 -19.29 Td [(ith)-525(=)-525(psb_cd_get_large_threshold\050\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -22.421 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.597 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 380.129 513.899 Td [(gedot)]TJ +ET +q +1 0 0 1 233.929 510.113 cm +[]0 d 0 J 0.398 w 0 0 m 177.263 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 280.768 481.735 Td [(T)92(able)-250(2:)-310(Data)-250(types)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +/F59 9.9626 Tf -130.063 -34.507 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.951 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.951 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.614 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.742 0 Td [(psb)]TJ +ET +q +1 0 0 1 436.673 359.705 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 439.811 359.506 Td [(T)]TJ +ET +q +1 0 0 1 445.669 359.705 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 448.807 359.506 Td [(vect)]TJ +ET +q +1 0 0 1 470.356 359.705 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 473.495 359.506 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf -297.884 -11.956 Td [(containing)-312(numbers)-311(of)-312(type)-311(speci\002ed)-312(in)-311(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-312(2)]TJ +0 g 0 G + [(.)-494(The)-312(rank)-312(of)]TJ/F60 9.9626 Tf 274.834 0 Td [(x)]TJ/F62 9.9626 Tf 8.31 0 Td [(must)-311(be)]TJ -283.144 -11.955 Td [(the)-250(same)-250(of)]TJ/F60 9.9626 Tf 52.946 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -82.958 -19.951 Td [(y)]TJ +0 g 0 G +/F62 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.445 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ -166.165 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ +ET +q +1 0 0 1 436.673 268.023 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 439.811 267.824 Td [(T)]TJ +ET +q +1 0 0 1 445.669 268.023 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 448.807 267.824 Td [(vect)]TJ +ET +q +1 0 0 1 470.356 268.023 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 473.495 267.824 Td [(type)]TJ 0 g 0 G +/F62 9.9626 Tf -297.884 -11.955 Td [(containing)-313(numbers)-314(of)-313(type)-313(speci\002ed)-314(in)-313(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-313(2)]TJ 0 g 0 G - 0 -20.598 Td [(Function)-250(value)]TJ + [(.)-500(The)-314(rank)-313(of)]TJ/F60 9.9626 Tf 274.898 0 Td [(y)]TJ/F62 9.9626 Tf 8.228 0 Td [(must)-313(be)]TJ -283.126 -11.956 Td [(the)-250(same)-250(of)]TJ/F60 9.9626 Tf 53.116 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-250(curr)18(ent)-250(value)-250(for)-250(the)-250(size)-250(thr)18(eshold.)]TJ/F51 9.9626 Tf -72.777 -28.096 Td [(3.1.11)-1000(psb)]TJ -ET -q -1 0 0 1 202.769 182.833 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 205.758 182.634 Td [(cd)]TJ +/F59 9.9626 Tf -83.227 -19.95 Td [(desc)]TJ ET q -1 0 0 1 216.867 182.833 cm +1 0 0 1 171.218 224.162 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 219.855 182.634 Td [(set)]TJ +/F59 9.9626 Tf 174.207 223.963 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 233.175 182.833 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 324.173 176.341 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 236.164 182.634 Td [(large)]TJ +/F67 9.9626 Tf 327.311 176.142 Td [(desc)]TJ ET q -1 0 0 1 259.457 182.833 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 348.86 176.341 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 262.445 182.634 Td [(threshold)-190(\227)-190(Set)-190(threshold)-190(for)-190(index)-190(mapping)-190(switch)]TJ +/F67 9.9626 Tf 351.998 176.142 Td [(type)]TJ 0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F59 9.9626 Tf -111.74 -19.289 Td [(call)-525(psb_cd_set_large_threshold\050ith\051)]TJ +/F59 9.9626 Tf -222.214 -19.951 Td [(global)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.422 Td [(T)90(ype:)]TJ +/F62 9.9626 Tf 33.763 0 Td [(Speci\002es)-226(whether)-227(the)-226(computation)-226(should)-226(include)-227(the)-226(global)-226(r)18(eduction)]TJ -8.857 -11.955 Td [(acr)18(oss)-250(all)-250(pr)18(ocesses.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.597 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G -/F54 9.9626 Tf 166.874 -29.888 Td [(14)]TJ + 76.693 -29.888 Td [(32)]TJ 0 g 0 G ET endstream endobj -936 0 obj +1148 0 obj << -/Length 5640 +/Length 4270 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(ith)]TJ +/F62 9.9626 Tf 124.802 706.129 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(scalar)74(.)-310(Default:)]TJ/F67 9.9626 Tf 165.318 0 Td [(global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 17.704 0 Td [(the)-250(new)-250(thr)18(eshold)-250(for)-250(communication)-250(descriptors.)]TJ 7.203 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(gr)18(eater)-250(than)-250(zer)18(o.)]TJ -24.907 -20.813 Td [(Note:)-665(the)-427(thr)18(eshold)-428(value)-427(is)-428(only)-427(queried)-428(by)-427(the)-428(library)-427(at)-428(the)-427(time)-428(a)-427(call)]TJ 0 -11.955 Td [(to)]TJ/F59 9.9626 Tf 12.451 0 Td [(psb_cdall)]TJ/F54 9.9626 Tf 50.837 0 Td [(is)-378(executed,)-409(ther)18(efor)18(e)-378(changing)-378(the)-378(thr)18(eshold)-377(has)-378(no)-378(ef)18(fect)-378(on)]TJ -63.288 -11.955 Td [(communication)-339(descriptors)-340(that)-339(have)-339(alr)18(eady)-340(been)-339(initialized.)-578(Mor)18(eover)-339(the)]TJ 0 -11.955 Td [(thr)18(eshold)-250(must)-250(have)-250(the)-250(same)-250(value)-250(on)-250(all)-250(pr)18(ocesses.)]TJ/F51 9.9626 Tf 0 -26.933 Td [(3.1.12)-1000(get)]TJ -ET -q -1 0 0 1 149.2 574.896 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 152.189 574.697 Td [(p)]TJ -ET -q -1 0 0 1 158.874 574.896 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 161.863 574.697 Td [(adjcncy)-250(\227)-250(Get)-250(process)-250(adjacency)-250(list)]TJ + [(.true.)]TJ 0 g 0 G +/F59 9.9626 Tf -190.225 -31.881 Td [(On)-250(Return)]TJ 0 g 0 G -/F59 9.9626 Tf -61.968 -18.964 Td [(list)-525(=)-1050(desc%get_p_adjcncy\050\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -20.813 Td [(T)90(ype:)]TJ + 0 -19.925 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 72.777 0 Td [(is)-250(the)-250(dot)-250(pr)18(oduct)-250(of)-250(vectors)]TJ/F60 9.9626 Tf 126.33 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(and)]TJ/F60 9.9626 Tf 19.482 0 Td [(y)]TJ/F62 9.9626 Tf 5.105 0 Td [(.)]TJ -206.483 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.133 0 Td [(global)]TJ/F62 9.9626 Tf 30.675 0 Td [(unless)-190(the)-190(optional)-190(variable)]TJ/F67 9.9626 Tf 121.612 0 Td [(global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.483 Td [(On)-250(Return)]TJ + [(.false.)]TJ/F62 9.9626 Tf 75.118 0 Td [(has)-190(been)-190(spec-)]TJ -258.538 -11.955 Td [(i\002ed)]TJ 0 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(2)]TJ 0 g 0 G + [(.)]TJ 0 g 0 G - 0 -19.484 Td [(Function)-250(value)]TJ +/F59 9.9626 Tf -24.907 -19.925 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-190(curr)18(ent)-190(list)-190(of)-190(adjacent)-190(pr)18(ocesses,)-202(i.e.)-290(pr)18(ocesses)-190(with)-190(which)]TJ -47.87 -11.955 Td [(the)-250(curr)18(ent)-250(one)-250(has)-250(to)-250(exchange)-250(halo)-250(data.)]TJ/F51 9.9626 Tf -24.907 -26.933 Td [(3.1.13)-1000(set)]TJ -ET -q -1 0 0 1 148.085 457.264 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 151.073 457.065 Td [(p)]TJ -ET -q -1 0 0 1 157.758 457.264 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 160.747 457.065 Td [(adjcncy)-250(\227)-250(Set)-250(process)-250(adjacency)-250(list)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -21.917 Td [(Notes)]TJ 0 g 0 G +/F62 9.9626 Tf 12.454 -19.926 Td [(1.)]TJ 0 g 0 G -/F59 9.9626 Tf -60.852 -18.964 Td [(call)-525(desc%set_p_adjcncy\050list\051)]TJ + [-500(The)-190(computation)-190(of)-190(a)-190(global)-190(r)18(esult)-190(r)18(equir)18(es)-190(a)-190(global)-190(communication,)-202(which)]TJ 12.453 -11.955 Td [(entails)-318(a)-318(signi\002cant)-318(ove)1(r)18(head.)-514(It)-318(may)-318(be)-318(necessary)-317(and/or)-318(advisable)-318(to)]TJ 0 -11.955 Td [(compute)-204(multiple)-204(dot)-204(pr)18(oducts)-204(at)-204(the)-204(same)-204(time;)-219(in)-204(this)-204(case,)-213(it)-204(is)-204(possible)]TJ 0 -11.955 Td [(to)-250(impr)18(ove)-250(the)-250(r)8(untime)-250(ef)18(\002ciency)-250(by)-250(using)-250(the)-250(following)-250(scheme:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -20.813 Td [(T)90(ype:)]TJ +/F67 9.9626 Tf 52.303 -19.925 Td [(vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.483 Td [(On)-250(Entry)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.483 Td [(list)]TJ + [-525(psb_gedot\050x1,y1,desc_a,info,global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 19.358 0 Td [(the)-250(list)-250(of)-250(adjacent)-250(pr)18(ocesses.)]TJ 5.549 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(one-dimensional)-250(array)-250(of)-250(integers)-250(of)-250(kind)]TJ/F59 9.9626 Tf 250.209 0 Td [(psb_ipk_)]TJ/F54 9.9626 Tf 41.843 0 Td [(.)]TJ -316.959 -20.813 Td [(Note:)-596(this)-392(method)-393(can)-393(be)-393(called)-393(after)-392(a)-393(call)-393(to)]TJ/F59 9.9626 Tf 216.367 0 Td [(psb_cdall)]TJ/F54 9.9626 Tf 50.987 0 Td [(and)-393(befor)18(e)-393(a)-392(call)]TJ -267.354 -11.955 Td [(to)]TJ/F59 9.9626 Tf 11.711 0 Td [(psb_cdasb)]TJ/F54 9.9626 Tf 47.073 0 Td [(.)-470(The)-304(user)-303(is)-304(specifying)-303(her)18(e)-304(some)-303(knowledge)-304(about)-303(which)-304(pr)18(o-)]TJ -58.784 -11.955 Td [(cesses)-208(ar)18(e)-208(topol)1(ogical)-208(neighbours)-208(of)-208(the)-207(curr)18(ent)-208(pr)18(ocess.)-296(The)-208(availability)-207(of)-208(this)]TJ 0 -11.955 Td [(information)-250(may)-250(speed)-250(up)-250(the)-250(execution)-250(of)-250(the)-250(assembly)-250(call)]TJ/F59 9.9626 Tf 269.656 0 Td [(psb_cdasb)]TJ/F54 9.9626 Tf 47.073 0 Td [(.)]TJ/F51 9.9626 Tf -316.729 -26.934 Td [(3.1.14)-1000(fnd)]TJ -ET -q -1 0 0 1 151.412 247.089 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 154.401 246.89 Td [(owner)-250(\227)-250(Find)-250(the)-250(owner)-250(process)-250(of)-250(a)-250(set)-250(of)-250(indices)]TJ + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + 0 -11.956 Td [(vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(2)]TJ 0 g 0 G -/F59 9.9626 Tf -54.506 -18.964 Td [(call)-525(desc%fnd_owner\050idx,iprc,info\051)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -20.813 Td [(T)90(ype:)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.483 Td [(On)-250(Entry)]TJ + [-525(psb_gedot\050x2,y2,desc_a,info,global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.484 Td [(idx)]TJ + 0 -11.955 Td [(vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(3)]TJ 0 g 0 G -/F54 9.9626 Tf 19.368 0 Td [(the)-250(list)-250(of)-250(global)-250(indices)-250(for)-250(which)-250(we)-250(need)-250(the)-250(owning)-250(pr)18(ocesses.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(one-dimensional)-250(array)-250(of)-250(integers)-250(of)-250(kind)]TJ/F59 9.9626 Tf 250.209 0 Td [(psb_lpk_)]TJ/F54 9.9626 Tf 41.843 0 Td [(.)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -150.084 -29.888 Td [(15)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(psb_gedot\050x3,y3,desc_a,info,global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ +0 g 0 G + [-525(psb_sum\050ctxt,vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G + [(:)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(3)]TJ +0 g 0 G + [(\051\051)]TJ/F62 9.9626 Tf -52.303 -19.925 Td [(In)-253(this)-252(way)-253(the)-253(global)-253(communicati)1(on,)-254(which)-253(for)-252(small)-253(sizes)-253(is)-252(a)-253(latency-)]TJ 0 -11.955 Td [(bound)-250(operation,)-250(is)-250(invoked)-250(only)-250(once.)]TJ +0 g 0 G + 141.968 -282.939 Td [(33)]TJ 0 g 0 G ET endstream endobj -943 0 obj +1159 0 obj << -/Length 7676 +/Length 8519 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -21.934 Td [(iprc)]TJ -0 g 0 G -/F54 9.9626 Tf 22.685 0 Td [(the)-250(list)-250(of)-250(pr)18(ocesses)-250(owning)-250(the)-250(indices)-250(in)]TJ/F59 9.9626 Tf 184.993 0 Td [(idx)]TJ/F54 9.9626 Tf 15.691 0 Td [(.)]TJ -198.463 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.381 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-408(as:)-627(an)-408(allocatable)-408(one-dimensional)-408(array)-409(of)-408(integers)-408(of)-408(kind)]TJ/F59 9.9626 Tf 0 -11.955 Td [(psb_ipk_)]TJ/F54 9.9626 Tf 41.843 0 Td [(.)]TJ -66.749 -23.926 Td [(Note:)-349(this)-269(method)-269(may)-270(or)-269(may)-269(not)-269(actually)-270(r)18(equir)18(e)-269(communications,)-274(depend-)]TJ 0 -11.956 Td [(ing)-283(on)-283(the)-283(ex)1(a)-1(c)1(t)-283(internal)-283(data)-283(storage;)-299(given)-283(that)-283(the)-283(choice)-283(of)-282(storage)-283(may)-283(be)]TJ 0 -11.955 Td [(alter)18(ed)-376(by)-375(r)8(untime)-376(parameters,)-407(it)-376(is)-375(necessary)-376(for)-376(safety)-375(that)-376(this)-375(method)-376(is)]TJ 0 -11.955 Td [(called)-250(by)-250(all)-250(pr)18(ocesses.)]TJ/F51 9.9626 Tf 0 -29.998 Td [(3.1.15)-1000(Named)-250(Constants)]TJ -0 g 0 G - 0 -19.937 Td [(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(4.3)-1000(psb)]TJ ET q -1 0 0 1 167.9 514.891 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 170.889 514.692 Td [(none)]TJ -ET -q -1 0 0 1 194.182 514.891 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q +/F59 11.9552 Tf 201.825 706.129 Td [(gedots)-250(\227)-250(Generalized)-250(Dot)-250(Product)]TJ/F62 9.9626 Tf -51.12 -18.964 Td [(This)-283(subr)18(outine)-284(computes)-283(a)-284(series)-284(of)-283(dot)-284(pr)18(oducts)-283(among)-284(the)-283(columns)-284(of)-283(two)]TJ 0 -11.955 Td [(dense)-250(matrices)]TJ/F60 9.9626 Tf 68.208 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(and)]TJ/F60 9.9626 Tf 19.482 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(:)]TJ/F60 9.9626 Tf 24.807 -13.101 Td [(r)-17(e)-25(s)]TJ/F93 10.3811 Tf 12.293 0 Td [(\050)]TJ/F60 9.9626 Tf 4.205 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F91 10.3811 Tf 7.041 0 Td [(\040)]TJ/F60 9.9626 Tf 13.567 0 Td [(x)]TJ/F93 10.3811 Tf 5.33 0 Td [(\050)]TJ/F62 9.9626 Tf 4.274 0 Td [(:)-12(,)]TJ/F60 9.9626 Tf 6.821 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F60 7.5716 Tf 4.343 4.114 Td [(T)]TJ/F60 9.9626 Tf 5.525 -4.114 Td [(y)]TJ/F93 10.3811 Tf 5.23 0 Td [(\050)]TJ/F62 9.9626 Tf 4.274 0 Td [(:)-13(,)]TJ/F60 9.9626 Tf 6.821 0 Td [(i)]TJ/F93 10.3811 Tf 3.089 0 Td [(\051)]TJ/F62 9.9626 Tf -214.288 -16.876 Td [(If)-300(the)-299(matrices)-300(ar)18(e)-299(complex,)-312(then)-300(the)-300(usual)-299(convention)-300(applies,)-312(i.e.)-459(the)-299(conju-)]TJ 0 -11.955 Td [(gate)-239(transpose)-239(of)]TJ/F60 9.9626 Tf 77.351 0 Td [(x)]TJ/F62 9.9626 Tf 7.589 0 Td [(is)-239(used.)-307(If)]TJ/F60 9.9626 Tf 45.493 0 Td [(x)]TJ/F62 9.9626 Tf 7.589 0 Td [(and)]TJ/F60 9.9626 Tf 19.375 0 Td [(y)]TJ/F62 9.9626 Tf 7.489 0 Td [(ar)18(e)-239(of)-239(rank)-240(one,)-241(then)]TJ/F60 9.9626 Tf 92.601 0 Td [(r)-17(e)-25(s)]TJ/F62 9.9626 Tf 14.552 0 Td [(is)-239(a)-240(scalar)75(,)-242(else)-239(it)]TJ -272.039 -11.955 Td [(is)-250(a)-250(rank)-250(one)-250(array)111(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F54 9.9626 Tf 202.152 514.692 Td [(Generic)-250(no-op;)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 20.174 -11.955 Td [(call)]TJ 0 g 0 G -/F51 9.9626 Tf -51.447 -21.934 Td [(psb)]TJ -ET -q -1 0 0 1 167.9 492.957 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 170.889 492.758 Td [(root)]TJ -ET -q -1 0 0 1 189.758 492.957 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + [-525(psb_gedots\050res,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F54 9.9626 Tf 197.728 492.758 Td [(Default)-250(r)18(oot)-250(pr)18(ocess)-250(for)-250(br)18(oadcast)-250(and)-250(scatter)-250(operations;)]TJ + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -47.023 -21.934 Td [(psb)]TJ -ET -q -1 0 0 1 167.9 471.023 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 170.889 470.824 Td [(nohalo)]TJ -ET -q -1 0 0 1 203.038 471.023 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F54 9.9626 Tf 211.008 470.824 Td [(Do)-250(not)-250(fetch)-250(halo)-250(elements;)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info\051)]TJ 0 g 0 G -/F51 9.9626 Tf -60.303 -21.934 Td [(psb)]TJ -ET -q -1 0 0 1 167.9 449.089 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 170.889 448.89 Td [(halo)]TJ -ET -q -1 0 0 1 191.412 449.089 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q 0 g 0 G -BT -/F54 9.9626 Tf 199.382 448.89 Td [(Fetch)-250(halo)-250(elements)-250(fr)18(om)-250(neighbouring)-250(pr)18(ocesses;)]TJ 0 g 0 G -/F51 9.9626 Tf -48.677 -21.934 Td [(psb)]TJ ET q -1 0 0 1 167.9 427.155 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 230.392 595.704 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F51 9.9626 Tf 170.889 426.956 Td [(sum)]TJ +/F60 9.9626 Tf 236.394 587.136 Td [(r)-17(e)-25(s)]TJ/F62 9.9626 Tf 12.17 0 Td [(,)]TJ/F60 9.9626 Tf 5.275 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(,)]TJ/F60 9.9626 Tf 5.106 0 Td [(y)]TJ/F59 9.9626 Tf 93.135 0 Td [(Subroutine)]TJ ET q -1 0 0 1 190.854 427.155 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 230.392 583.351 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 198.824 426.956 Td [(Sum)-250(overlapped)-250(elements)]TJ -0 g 0 G -/F51 9.9626 Tf -48.119 -21.934 Td [(psb)]TJ +/F62 9.9626 Tf 236.369 574.783 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 167.9 405.221 cm +1 0 0 1 373.603 574.982 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 170.889 405.022 Td [(avg)]TJ +/F62 9.9626 Tf 376.592 574.783 Td [(gedots)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 187.546 405.221 cm +1 0 0 1 373.603 563.027 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 195.517 405.022 Td [(A)92(verage)-250(overlapped)-250(elements)]TJ -0 g 0 G -/F51 9.9626 Tf -44.812 -21.934 Td [(psb)]TJ +/F62 9.9626 Tf 376.592 562.828 Td [(gedots)]TJ -140.223 -11.956 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 167.9 383.288 cm +1 0 0 1 373.603 551.072 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 170.889 383.088 Td [(comm)]TJ +/F62 9.9626 Tf 376.592 550.872 Td [(gedots)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 199.163 383.288 cm +1 0 0 1 373.603 539.116 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 202.152 383.088 Td [(halo)]TJ +/F62 9.9626 Tf 376.592 538.917 Td [(gedots)]TJ ET q -1 0 0 1 222.674 383.288 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 230.392 535.131 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 230.645 383.088 Td [(Exchange)-250(data)-250(based)-250(on)-250(the)]TJ/F59 9.9626 Tf 124.92 0 Td [(halo_index)]TJ/F54 9.9626 Tf 54.794 0 Td [(list;)]TJ +/F62 9.9626 Tf 280.768 506.753 Td [(T)92(able)-250(3:)-310(Data)-250(types)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +/F59 9.9626 Tf -130.063 -32.002 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.22 Td [(On)-250(Entry)]TJ +0 g 0 G 0 g 0 G -/F51 9.9626 Tf -259.654 -21.934 Td [(psb)]TJ + 0 -19.22 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.614 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.742 0 Td [(psb)]TJ ET q -1 0 0 1 167.9 361.354 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 436.673 388.689 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 170.889 361.154 Td [(comm)]TJ +/F67 9.9626 Tf 439.811 388.49 Td [(T)]TJ ET q -1 0 0 1 199.163 361.354 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 445.669 388.689 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 202.152 361.154 Td [(ext)]TJ +/F67 9.9626 Tf 448.807 388.49 Td [(vect)]TJ ET q -1 0 0 1 216.029 361.354 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 470.356 388.689 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 224 361.154 Td [(Exchange)-250(data)-250(based)-250(on)-250(the)]TJ/F59 9.9626 Tf 124.92 0 Td [(ext_index)]TJ/F54 9.9626 Tf 49.564 0 Td [(list;)]TJ +/F67 9.9626 Tf 473.495 388.49 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -247.779 -21.934 Td [(psb)]TJ -ET -q -1 0 0 1 167.9 339.42 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 170.889 339.22 Td [(comm)]TJ -ET -q -1 0 0 1 199.163 339.42 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 202.152 339.22 Td [(ovr)]TJ +/F62 9.9626 Tf -297.884 -11.955 Td [(containing)-312(numbers)-311(of)-312(type)-311(speci\002ed)-312(in)-311(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-312(3)]TJ +0 g 0 G + [(.)-494(The)-312(rank)-312(of)]TJ/F60 9.9626 Tf 274.834 0 Td [(x)]TJ/F62 9.9626 Tf 8.31 0 Td [(must)-311(be)]TJ -283.144 -11.955 Td [(the)-250(same)-250(of)]TJ/F60 9.9626 Tf 52.946 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -82.958 -19.221 Td [(y)]TJ +0 g 0 G +/F62 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.445 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ -166.165 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ ET q -1 0 0 1 217.703 339.42 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 436.673 297.738 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 225.673 339.22 Td [(Exchange)-250(data)-250(based)-250(on)-250(the)]TJ/F59 9.9626 Tf 124.92 0 Td [(ovrlap_index)]TJ/F54 9.9626 Tf 65.255 0 Td [(list;)]TJ -0 g 0 G -/F51 9.9626 Tf -265.143 -21.934 Td [(psb)]TJ +/F67 9.9626 Tf 439.811 297.539 Td [(T)]TJ ET q -1 0 0 1 167.9 317.486 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 445.669 297.738 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 170.889 317.286 Td [(comm)]TJ +/F67 9.9626 Tf 448.807 297.539 Td [(vect)]TJ ET q -1 0 0 1 199.163 317.486 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 470.356 297.738 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 202.152 317.286 Td [(mov)]TJ +/F67 9.9626 Tf 473.495 297.539 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf -297.884 -11.956 Td [(containing)-313(numbers)-314(of)-313(type)-313(speci\002ed)-314(in)-313(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-313(3)]TJ +0 g 0 G + [(.)-500(The)-314(rank)-313(of)]TJ/F60 9.9626 Tf 274.898 0 Td [(y)]TJ/F62 9.9626 Tf 8.228 0 Td [(must)-313(be)]TJ -283.126 -11.955 Td [(the)-250(same)-250(of)]TJ/F60 9.9626 Tf 53.116 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -83.227 -19.22 Td [(desc)]TJ ET q -1 0 0 1 222.684 317.486 cm +1 0 0 1 171.218 254.607 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 230.654 317.286 Td [(Exchange)-250(data)-250(based)-250(on)-250(the)]TJ/F59 9.9626 Tf 124.921 0 Td [(ovr_mst_idx)]TJ/F54 9.9626 Tf 60.024 0 Td [(list;)]TJ/F51 11.9552 Tf -264.894 -31.99 Td [(3.2)-1000(Sparse)-250(Matrix)-250(class)]TJ/F54 9.9626 Tf 0 -19.937 Td [(The)]TJ/F59 9.9626 Tf 19.623 0 Td [(psb)]TJ +/F59 9.9626 Tf 174.207 254.408 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 186.647 265.558 cm +1 0 0 1 324.173 206.786 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 189.785 265.359 Td [(Tspmat)]TJ +/F67 9.9626 Tf 327.311 206.587 Td [(desc)]TJ ET q -1 0 0 1 221.795 265.558 cm +1 0 0 1 348.86 206.786 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 224.933 265.359 Td [(type)]TJ/F54 9.9626 Tf 23.868 0 Td [(class)-296(contains)-295(all)-296(information)-296(about)-296(the)-295(local)-296(portion)-296(of)]TJ -98.096 -11.955 Td [(the)-200(sparse)-199(matrix)-200(and)-199(its)-200(storage)-200(mode.)-293(Its)-199(design)-200(is)-200(based)-199(on)-200(the)-200(ST)74(A)74(TE)-199(design)]TJ 0 -11.955 Td [(pattern)-256([)]TJ -1 0 0 rg 1 0 0 RG - [(13)]TJ +/F67 9.9626 Tf 351.998 206.587 Td [(type)]TJ 0 g 0 G - [(])-255(as)-256(detailed)-256(in)-256([)]TJ -1 0 0 rg 1 0 0 RG - [(11)]TJ -0 g 0 G - [(];)-258(the)-256(type)-256(declaration)-255(is)-256(shown)-256(in)-256(\002gur)18(e)]TJ -0 0 1 rg 0 0 1 RG - [-255(2)]TJ -0 g 0 G - [-256(wher)18(e)]TJ/F59 9.9626 Tf 0 -11.956 Td [(T)]TJ/F54 9.9626 Tf 7.721 0 Td [(is)-250(a)-250(placeholder)-250(for)-250(the)-250(data)-250(type)-250(and)-250(pr)18(ecision)-250(variants)]TJ -0 g 0 G -/F51 9.9626 Tf -7.721 -21.431 Td [(S)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(Single)-250(pr)18(ecision)-250(r)18(eal;)]TJ +/F59 9.9626 Tf -222.214 -19.22 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -11.068 -21.934 Td [(D)]TJ 0 g 0 G -/F54 9.9626 Tf 13.28 0 Td [(Double)-250(pr)18(ecision)-250(r)18(eal;)]TJ -0 g 0 G -/F51 9.9626 Tf -13.28 -21.934 Td [(C)]TJ -0 g 0 G -/F54 9.9626 Tf 12.174 0 Td [(Single)-250(pr)18(ecision)-250(complex;)]TJ -0 g 0 G -/F51 9.9626 Tf -12.174 -21.934 Td [(Z)]TJ -0 g 0 G -/F54 9.9626 Tf 11.626 0 Td [(Double)-250(pr)18(ecision)-250(complex;)]TJ + 0 -19.221 Td [(res)]TJ 0 g 0 G -/F51 9.9626 Tf -11.626 -21.934 Td [(LS,LD,LC,LZ)]TJ +/F62 9.9626 Tf 18.261 0 Td [(is)-250(the)-250(dot)-250(pr)18(oduct)-250(of)-250(vectors)]TJ/F60 9.9626 Tf 126.33 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(and)]TJ/F60 9.9626 Tf 19.482 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ -151.968 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-289(as:)-389(a)-290(number)-290(or)-289(a)-290(rank-one)-289(array)-290(of)-289(the)-290(data)-289(type)-290(indicated)-289(in)]TJ 0 -11.955 Td [(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(2)]TJ 0 g 0 G -/F54 9.9626 Tf 65.026 0 Td [(Same)-214(numeric)-214(type)-215(as)-214(above,)-221(but)-214(with)]TJ/F59 9.9626 Tf 168.016 0 Td [(psb_lpk_)]TJ/F54 9.9626 Tf 43.978 0 Td [(integer)-214(indices.)]TJ + [(.)]TJ 0 g 0 G - -110.146 -29.888 Td [(16)]TJ + 141.967 -29.888 Td [(34)]TJ 0 g 0 G ET endstream endobj -950 0 obj +1063 0 obj +<< +/Type /ObjStm +/N 100 +/First 957 +/Length 10603 +>> +stream +1061 0 127 58 131 115 135 172 1058 229 1065 323 1067 441 139 500 143 558 1064 616 +1069 710 1071 828 147 886 151 943 1068 1000 1073 1094 1075 1212 155 1271 159 1329 163 1387 +1072 1445 1077 1539 1079 1657 167 1715 1076 1771 1083 1865 1080 2013 1081 2159 1085 2307 171 2366 +175 2424 179 2481 183 2538 1086 2596 1082 2655 1089 2749 1091 2867 1087 2925 187 2983 191 3040 +195 3097 1088 3154 1096 3263 1093 3411 1094 3557 1098 3702 199 3761 1095 3819 1103 3913 1100 4052 +1105 4200 204 4258 208 4315 212 4371 1106 4428 1102 4486 1109 4593 1101 4732 1111 4879 1107 4938 +216 4997 1108 5055 1113 5164 1115 5282 220 5340 1112 5397 1124 5478 1116 5662 1117 5808 1118 5952 +1119 6098 1120 6244 1121 6388 1126 6534 224 6593 1099 6651 1123 6710 1131 6857 1122 7014 1128 7161 +1129 7305 1133 7451 1130 7509 1141 7616 1135 7791 1136 7932 1137 8078 1138 8222 1139 8367 1143 8514 +228 8573 1144 8631 1140 8690 1147 8810 1145 8949 1149 9095 1150 9153 1146 9211 1158 9318 1151 9502 +% 1061 0 obj +<< +/D [1059 0 R /XYZ 98.895 753.953 null] +>> +% 127 0 obj +<< +/D [1059 0 R /XYZ 99.895 716.092 null] +>> +% 131 0 obj +<< +/D [1059 0 R /XYZ 99.895 526.761 null] +>> +% 135 0 obj +<< +/D [1059 0 R /XYZ 99.895 326.359 null] +>> +% 1058 0 obj +<< +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1065 0 obj +<< +/Type /Page +/Contents 1066 0 R +/Resources 1064 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1062 0 R +>> +% 1067 0 obj +<< +/D [1065 0 R /XYZ 149.705 753.953 null] +>> +% 139 0 obj +<< +/D [1065 0 R /XYZ 150.705 716.092 null] +>> +% 143 0 obj +<< +/D [1065 0 R /XYZ 150.705 474.131 null] +>> +% 1064 0 obj +<< +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1069 0 obj +<< +/Type /Page +/Contents 1070 0 R +/Resources 1068 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1062 0 R +>> +% 1071 0 obj +<< +/D [1069 0 R /XYZ 98.895 753.953 null] +>> +% 147 0 obj +<< +/D [1069 0 R /XYZ 99.895 716.092 null] +>> +% 151 0 obj +<< +/D [1069 0 R /XYZ 99.895 376.562 null] +>> +% 1068 0 obj +<< +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1073 0 obj +<< +/Type /Page +/Contents 1074 0 R +/Resources 1072 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1062 0 R +>> +% 1075 0 obj +<< +/D [1073 0 R /XYZ 149.705 753.953 null] +>> +% 155 0 obj +<< +/D [1073 0 R /XYZ 150.705 716.092 null] +>> +% 159 0 obj +<< +/D [1073 0 R /XYZ 150.705 484.709 null] +>> +% 163 0 obj +<< +/D [1073 0 R /XYZ 150.705 251.325 null] +>> +% 1072 0 obj +<< +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1077 0 obj +<< +/Type /Page +/Contents 1078 0 R +/Resources 1076 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1062 0 R +>> +% 1079 0 obj +<< +/D [1077 0 R /XYZ 98.895 753.953 null] +>> +% 167 0 obj +<< +/D [1077 0 R /XYZ 99.895 476.15 null] +>> +% 1076 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1083 0 obj +<< +/Type /Page +/Contents 1084 0 R +/Resources 1082 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1062 0 R +/Annots [ 1080 0 R 1081 0 R ] +>> +% 1080 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [248.894 164.341 255.868 176.4] +/A << /S /GoTo /D (section.6) >> +>> +% 1081 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [343.512 128.475 350.485 140.535] +/A << /S /GoTo /D (listing.3) >> +>> +% 1085 0 obj +<< +/D [1083 0 R /XYZ 149.705 753.953 null] +>> +% 171 0 obj +<< +/D [1083 0 R /XYZ 150.705 716.092 null] +>> +% 175 0 obj +<< +/D [1083 0 R /XYZ 150.705 586.94 null] +>> +% 179 0 obj +<< +/D [1083 0 R /XYZ 150.705 402.59 null] +>> +% 183 0 obj +<< +/D [1083 0 R /XYZ 150.705 234.114 null] +>> +% 1086 0 obj +<< +/D [1083 0 R /XYZ 170.799 204.012 null] +>> +% 1082 0 obj +<< +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1089 0 obj +<< +/Type /Page +/Contents 1090 0 R +/Resources 1088 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1092 0 R +>> +% 1091 0 obj +<< +/D [1089 0 R /XYZ 98.895 753.953 null] +>> +% 1087 0 obj +<< +/D [1089 0 R /XYZ 99.895 446.997 null] +>> +% 187 0 obj +<< +/D [1089 0 R /XYZ 99.895 387.147 null] +>> +% 191 0 obj +<< +/D [1089 0 R /XYZ 99.895 370.604 null] +>> +% 195 0 obj +<< +/D [1089 0 R /XYZ 99.895 194.093 null] +>> +% 1088 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F102 1016 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1096 0 obj +<< +/Type /Page +/Contents 1097 0 R +/Resources 1095 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1092 0 R +/Annots [ 1093 0 R 1094 0 R ] +>> +% 1093 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [428.968 383.557 435.942 395.616] +/A << /S /GoTo /D (table.1) >> +>> +% 1094 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [428.968 240.08 435.942 252.139] +/A << /S /GoTo /D (table.1) >> +>> +% 1098 0 obj +<< +/D [1096 0 R /XYZ 149.705 753.953 null] +>> +% 199 0 obj +<< +/D [1096 0 R /XYZ 150.705 610.712 null] +>> +% 1095 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1103 0 obj +<< +/Type /Page +/Contents 1104 0 R +/Resources 1102 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1092 0 R +/Annots [ 1100 0 R ] +>> +% 1100 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [127.814 152.385 134.788 164.445] +/A << /S /GoTo /D (listing.4) >> +>> +% 1105 0 obj +<< +/D [1103 0 R /XYZ 98.895 753.953 null] +>> +% 204 0 obj +<< +/D [1103 0 R /XYZ 99.895 716.092 null] +>> +% 208 0 obj +<< +/D [1103 0 R /XYZ 99.895 430.41 null] +>> +% 212 0 obj +<< +/D [1103 0 R /XYZ 99.895 226.203 null] +>> +% 1106 0 obj +<< +/D [1103 0 R /XYZ 258.62 168.146 null] +>> +% 1102 0 obj +<< +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1109 0 obj +<< +/Type /Page +/Contents 1110 0 R +/Resources 1108 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1092 0 R +/Annots [ 1101 0 R ] +>> +% 1101 0 obj << -/Length 7032 +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [297.461 580.64 304.435 592.699] +/A << /S /GoTo /D (section.6) >> >> -stream -0 g 0 G -0 g 0 G -0 g 0 G -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -q -1 0 0 1 99.895 671.26 cm -0 0 343.711 38.854 re f -Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -BT -/F94 8.9664 Tf 112.299 699.454 Td [(type)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ -0 g 0 G - [-525(psb_Tspmat_type)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 9.414 -10.959 Td [(class)]TJ -0 g 0 G - [(\050psb_T_base_sparse_mat\051,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(allocatable)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-1050(::)]TJ -0 g 0 G - [-525(a)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -9.414 -10.959 Td [(end)-525(type)]TJ -0 g 0 G - [-1050(psb_Tspmat_type)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0 g 0 G -/F54 9.9626 Tf 4.295 -41.429 Td [(Listing)-250(2:)-310(The)-250(PSBLAS)-250(de\002ned)-250(data)-250(type)-250(that)-250(contains)-250(a)-250(sparse)-250(matrix.)]TJ -16.699 -32.661 Td [(The)-190(actual)-190(data)-190(is)-190(contained)-190(in)-190(the)-190(polymorphic)-190(component)]TJ/F59 9.9626 Tf 259.484 0 Td [(a%a)]TJ/F54 9.9626 Tf 17.584 0 Td [(of)-190(type)]TJ/F59 9.9626 Tf 32.089 0 Td [(psb)]TJ -ET -q -1 0 0 1 425.371 603.645 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 428.509 603.446 Td [(T)]TJ -ET -q -1 0 0 1 434.367 603.645 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 437.505 603.446 Td [(base)]TJ -ET -q -1 0 0 1 459.054 603.645 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 462.193 603.446 Td [(sparse)]TJ -ET -q -1 0 0 1 494.202 603.645 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 497.341 603.446 Td [(mat)]TJ/F54 9.9626 Tf 15.691 0 Td [(;)]TJ -413.137 -11.955 Td [(its)-306(speci)1(\002c)-306(layout)-305(can)-306(be)-305(chosen)-306(dynamically)-305(among)-306(the)-305(pr)18(ede\002ned)-306(types,)-319(or)]TJ 0 -11.955 Td [(an)-305(entir)18(ely)-305(new)-305(storage)-305(layout)-305(can)-305(be)-305(implemented)-304(and)-305(passed)-305(to)-305(the)-305(library)]TJ 0 -11.955 Td [(at)-231(r)8(untime)-231(via)-231(the)]TJ/F59 9.9626 Tf 80.145 0 Td [(psb_spasb)]TJ/F54 9.9626 Tf 49.377 0 Td [(r)18(outine.)-304(The)-231(following)-231(very)-231(common)-231(formats)-231(ar)18(e)]TJ -129.522 -11.955 Td [(pr)18(ecompiled)-250(in)-250(PSBLAS)-250(and)-250(thus)-250(ar)18(e)-250(always)-250(available:)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -19.889 Td [(psb)]TJ -ET -q -1 0 0 1 117.091 535.936 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 120.08 535.737 Td [(T)]TJ -ET -q -1 0 0 1 127.322 535.936 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 130.311 535.737 Td [(coo)]TJ -ET -q -1 0 0 1 146.411 535.936 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 149.399 535.737 Td [(sparse)]TJ -ET -q -1 0 0 1 178.769 535.936 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 181.758 535.737 Td [(mat)]TJ -0 g 0 G -/F54 9.9626 Tf 22.137 0 Td [(Coor)18(dinate)-250(storage;)]TJ -0 g 0 G -/F51 9.9626 Tf -104 -19.907 Td [(psb)]TJ -ET -q -1 0 0 1 117.091 516.03 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 120.08 515.83 Td [(T)]TJ -ET -q -1 0 0 1 127.322 516.03 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 130.311 515.83 Td [(csr)]TJ -ET -q -1 0 0 1 143.631 516.03 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 146.62 515.83 Td [(sparse)]TJ -ET -q -1 0 0 1 175.989 516.03 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 178.978 515.83 Td [(mat)]TJ -0 g 0 G -/F54 9.9626 Tf 22.137 0 Td [(Compr)18(essed)-250(storage)-250(by)-250(r)18(ows;)]TJ -0 g 0 G -/F51 9.9626 Tf -101.22 -19.906 Td [(psb)]TJ -ET -q -1 0 0 1 117.091 496.123 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 120.08 495.924 Td [(T)]TJ -ET -q -1 0 0 1 127.322 496.123 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 130.311 495.924 Td [(csc)]TJ -ET -q -1 0 0 1 144.179 496.123 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 147.168 495.924 Td [(sparse)]TJ -ET -q -1 0 0 1 176.537 496.123 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 179.526 495.924 Td [(mat)]TJ -0 g 0 G -/F54 9.9626 Tf 22.137 0 Td [(Compr)18(essed)-250(storage)-250(by)-250(columns;)]TJ -101.768 -19.889 Td [(The)-295(inner)-295(sparse)-294(matrix)-295(has)-295(an)-295(associated)-294(state,)-306(which)-295(can)-295(take)-294(the)-295(following)]TJ 0 -11.955 Td [(values:)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -19.888 Td [(Build:)]TJ -0 g 0 G -/F54 9.9626 Tf 32.927 0 Td [(State)-283(enter)18(ed)-283(after)-283(the)-283(\002rst)-284(allocation,)-291(and)-283(befor)18(e)-283(the)-283(\002rst)-283(assembly;)-300(in)]TJ -8.02 -11.955 Td [(this)-250(state)-250(it)-250(is)-250(possible)-250(to)-250(add)-250(nonzer)18(o)-250(entries.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.907 Td [(Assembled:)]TJ +% 1111 0 obj +<< +/D [1109 0 R /XYZ 149.705 753.953 null] +>> +% 1107 0 obj +<< +/D [1109 0 R /XYZ 150.705 665.282 null] +>> +% 216 0 obj +<< +/D [1109 0 R /XYZ 150.705 613.372 null] +>> +% 1108 0 obj +<< +/Font << /F102 1016 0 R /F62 667 0 R /F59 665 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1113 0 obj +<< +/Type /Page +/Contents 1114 0 R +/Resources 1112 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1092 0 R +>> +% 1115 0 obj +<< +/D [1113 0 R /XYZ 98.895 753.953 null] +>> +% 220 0 obj +<< +/D [1113 0 R /XYZ 99.895 716.092 null] +>> +% 1112 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1124 0 obj +<< +/Type /Page +/Contents 1125 0 R +/Resources 1123 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1092 0 R +/Annots [ 1116 0 R 1117 0 R 1118 0 R 1119 0 R 1120 0 R 1121 0 R ] +>> +% 1116 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [428.968 413.274 435.942 425.334] +/A << /S /GoTo /D (table.1) >> +>> +% 1117 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [419.358 345.063 495.412 357.123] +/A << /S /GoTo /D (vdata) >> +>> +% 1118 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [381.755 333.108 388.729 345.168] +/A << /S /GoTo /D (table.1) >> +>> +% 1119 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [428.968 252.942 435.942 265.002] +/A << /S /GoTo /D (table.1) >> +>> +% 1120 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [419.358 184.731 495.412 196.791] +/A << /S /GoTo /D (vdata) >> +>> +% 1121 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [397.868 172.776 404.842 184.835] +/A << /S /GoTo /D (table.1) >> +>> +% 1126 0 obj +<< +/D [1124 0 R /XYZ 149.705 753.953 null] +>> +% 224 0 obj +<< +/D [1124 0 R /XYZ 150.705 716.092 null] +>> +% 1099 0 obj +<< +/D [1124 0 R /XYZ 150.705 558.947 null] +>> +% 1123 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F68 1127 0 R /F93 915 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1131 0 obj +<< +/Type /Page +/Contents 1132 0 R +/Resources 1130 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1134 0 R +/Annots [ 1122 0 R 1128 0 R 1129 0 R ] +>> +% 1122 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [256.048 690.368 323.106 702.428] +/A << /S /GoTo /D (descdata) >> +>> +% 1128 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [368.549 600.704 444.603 612.764] +/A << /S /GoTo /D (vdata) >> +>> +% 1129 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [344.963 588.749 351.937 600.809] +/A << /S /GoTo /D (table.1) >> +>> +% 1133 0 obj +<< +/D [1131 0 R /XYZ 98.895 753.953 null] +>> +% 1130 0 obj +<< +/Font << /F62 667 0 R /F59 665 0 R /F67 913 0 R /F60 666 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1141 0 obj +<< +/Type /Page +/Contents 1142 0 R +/Resources 1140 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1134 0 R +/Annots [ 1135 0 R 1136 0 R 1137 0 R 1138 0 R 1139 0 R ] +>> +% 1135 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [419.358 355.7 495.412 367.76] +/A << /S /GoTo /D (vdata) >> +>> +% 1136 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [381.755 343.745 388.729 355.804] +/A << /S /GoTo /D (table.2) >> +>> +% 1137 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [419.358 264.018 495.412 276.078] +/A << /S /GoTo /D (vdata) >> +>> +% 1138 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [381.88 252.063 388.854 264.123] +/A << /S /GoTo /D (table.2) >> +>> +% 1139 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [306.858 172.336 373.916 184.396] +/A << /S /GoTo /D (descdata) >> +>> +% 1143 0 obj +<< +/D [1141 0 R /XYZ 149.705 753.953 null] +>> +% 228 0 obj +<< +/D [1141 0 R /XYZ 150.705 716.092 null] +>> +% 1144 0 obj +<< +/D [1141 0 R /XYZ 150.705 499.951 null] +>> +% 1140 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1147 0 obj +<< +/Type /Page +/Contents 1148 0 R +/Resources 1146 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1134 0 R +/Annots [ 1145 0 R ] +>> +% 1145 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [378.159 602.697 385.133 614.756] +/A << /S /GoTo /D (table.2) >> +>> +% 1149 0 obj +<< +/D [1147 0 R /XYZ 98.895 753.953 null] +>> +% 1150 0 obj +<< +/D [1147 0 R /XYZ 99.895 512.639 null] +>> +% 1146 0 obj +<< +/Font << /F62 667 0 R /F59 665 0 R /F67 913 0 R /F60 666 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1158 0 obj +<< +/Type /Page +/Contents 1159 0 R +/Resources 1157 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1134 0 R +/Annots [ 1151 0 R 1152 0 R 1153 0 R 1154 0 R 1155 0 R 1156 0 R ] +>> +% 1151 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [419.358 384.684 495.412 396.744] +/A << /S /GoTo /D (vdata) >> +>> + +endstream +endobj +1165 0 obj +<< +/Length 582 +>> +stream 0 g 0 G -/F54 9.9626 Tf 58.381 0 Td [(State)-324(enter)18(ed)-325(after)-324(the)-325(assembly;)-362(computations)-324(using)-325(the)-324(sparse)]TJ -33.474 -11.955 Td [(matrix,)-250(such)-250(as)-250(matrix-vector)-250(pr)18(oducts,)-250(ar)18(e)-250(only)-250(possible)-250(in)-250(this)-250(state;)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.907 Td [(Update:)]TJ 0 g 0 G -/F54 9.9626 Tf 40.678 0 Td [(State)-219(enter)18(ed)-220(after)-219(a)-219(r)18(einitalization;)-230(this)-219(is)-219(used)-220(to)-219(handle)-219(applications)]TJ -15.771 -11.955 Td [(in)-288(which)-288(the)-288(same)-288(sparsity)-289(pattern)-288(is)-288(used)-288(multiple)-288(times)-288(with)-288(dif)18(fer)18(ent)]TJ 0 -11.955 Td [(coef)18(\002cients.)-298(In)-213(this)-214(state)-213(it)-214(is)-213(only)-214(possible)-213(to)-214(enter)-213(coef)18(\002cients)-214(for)-213(alr)18(eady)]TJ 0 -11.956 Td [(existing)-250(nonzer)18(o)-250(entries.)]TJ -24.907 -19.888 Td [(The)-293(only)-292(storage)-293(variant)-292(supporting)-293(the)-293(build)-292(state)-293(is)-292(COO;)-293(all)-293(other)-292(variants)]TJ 0 -11.955 Td [(ar)18(e)-250(obtained)-250(by)-250(conversion)-250(to/fr)18(om)-250(it.)]TJ/F51 9.9626 Tf 0 -27.132 Td [(3.2.1)-1000(Sparse)-250(Matrix)-250(Methods)]TJ 0 -18.964 Td [(3.2.2)-1000(get)]TJ -ET -q -1 0 0 1 144.219 266.863 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q BT -/F51 9.9626 Tf 147.208 266.663 Td [(nrows)-250(\227)-250(Get)-250(number)-250(of)-250(rows)-250(in)-250(a)-250(sparse)-250(matrix)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -47.313 -18.963 Td [(nr)-525(=)-525(a%get_nrows\050\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.872 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 99.895 706.129 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.907 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.907 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ -0 g 0 G - -56.339 -33.827 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -19.906 Td [(Function)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-250(number)-250(of)-250(r)18(ows)-250(of)-250(sparse)-250(matrix)]TJ/F59 9.9626 Tf 165.298 0 Td [(a)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ -0 g 0 G - -76.431 -29.888 Td [(17)]TJ + 141.968 -567.87 Td [(35)]TJ 0 g 0 G ET endstream endobj -955 0 obj +1172 0 obj << -/Length 3860 +/Length 7777 >> stream 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(3.2.3)-1000(get)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(4.4)-1000(psb)]TJ ET q -1 0 0 1 195.029 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 198.017 706.129 Td [(ncols)-250(\227)-250(Get)-250(number)-250(of)-250(columns)-250(in)-250(a)-250(sparse)-250(matrix)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -47.312 -19.023 Td [(nc)-525(=)-525(a%get_ncols\050\051)]TJ +/F59 11.9552 Tf 201.825 706.129 Td [(normi)-250(\227)-250(In\002nity-Norm)-250(of)-250(V)111(ector)]TJ/F62 9.9626 Tf -51.12 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(in\002nity-norm)-250(of)-250(a)-250(vector)]TJ/F60 9.9626 Tf 233.576 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -238.781 -11.955 Td [(If)]TJ/F60 9.9626 Tf 9.459 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(r)18(eal)-250(vector)-250(it)-250(computes)-250(in\002nity)-250(norm)-250(as:)]TJ/F60 9.9626 Tf 117.807 -18.736 Td [(a)-25(m)-40(a)-42(x)]TJ/F91 10.3811 Tf 25.761 0 Td [(\040)]TJ/F62 9.9626 Tf 13.272 0 Td [(max)]TJ/F60 7.5716 Tf 8.355 -7.21 Td [(i)]TJ/F91 10.3811 Tf 12.349 7.21 Td [(j)]TJ/F60 9.9626 Tf 3.298 0 Td [(x)]TJ/F60 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F62 9.9626 Tf -206.019 -23.313 Td [(else)-250(if)]TJ/F60 9.9626 Tf 28.159 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(complex)-250(vector)-250(then)-250(it)-250(computes)-250(the)-250(in\002nity-norm)-250(as:)]TJ/F60 9.9626 Tf 63.42 -18.737 Td [(a)-25(m)-40(a)-42(x)]TJ/F91 10.3811 Tf 25.761 0 Td [(\040)]TJ/F62 9.9626 Tf 13.273 0 Td [(max)]TJ/F60 7.5716 Tf 8.354 -7.21 Td [(i)]TJ/F93 10.3811 Tf 12.35 7.21 Td [(\050)]TJ/F91 10.3811 Tf 4.274 0 Td [(j)]TJ/F60 9.9626 Tf 3.028 0 Td [(r)-17(e)]TJ/F93 10.3811 Tf 8.17 0 Td [(\050)]TJ/F60 9.9626 Tf 4.443 0 Td [(x)]TJ/F60 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F93 10.3811 Tf 2.875 1.96 Td [(\051)]TJ/F91 10.3811 Tf 4.274 0 Td [(j)]TJ/F93 10.3811 Tf 5.066 0 Td [(+)]TJ/F91 10.3811 Tf 10.256 0 Td [(j)]TJ/F60 9.9626 Tf 3.058 0 Td [(i)-32(m)]TJ/F93 10.3811 Tf 11.088 0 Td [(\050)]TJ/F60 9.9626 Tf 4.444 0 Td [(x)]TJ/F60 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F93 10.3811 Tf 2.875 1.96 Td [(\051)]TJ/F91 10.3811 Tf 4.274 0 Td [(j)]TJ/F93 10.3811 Tf 3.128 0 Td [(\051)]TJ/F67 9.9626 Tf -225.616 -22.974 Td [(psb_geamax\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -22.01 Td [(T)90(ype:)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [-525(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -20.049 Td [(On)-250(Entry)]TJ + [-525([,global]\051)]TJ -14.944 -11.955 Td [(psb_normi\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -20.048 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ + [-525(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -56.338 -33.965 Td [(On)-250(Return)]TJ + [-525([,global]\051)]TJ 0 g 0 G 0 g 0 G - 0 -20.048 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-250(number)-250(of)-250(columns)-250(of)-250(sparse)-250(matrix)]TJ/F59 9.9626 Tf 181.158 0 Td [(a)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ/F51 9.9626 Tf -259.165 -27.315 Td [(3.2.4)-1000(get)]TJ ET q -1 0 0 1 195.029 531.915 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 179.304 566.399 cm +[]0 d 0 J 0.398 w 0 0 m 286.513 0 l S Q BT -/F51 9.9626 Tf 198.017 531.716 Td [(nnzeros)-250(\227)-250(Get)-250(number)-250(of)-250(nonzero)-250(elements)-250(in)-250(a)-250(sparse)-250(matrix)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -47.312 -19.024 Td [(nz)-525(=)-525(a%get_nnzeros\050\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -22.01 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.048 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -20.048 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ -0 g 0 G - -56.338 -33.965 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -20.048 Td [(Function)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-250(number)-250(of)-250(nonzer)18(o)-250(elements)-250(stor)18(ed)-250(in)-250(sparse)-250(matrix)]TJ/F59 9.9626 Tf 251.284 0 Td [(a)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ/F51 9.9626 Tf -329.292 -22.041 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.453 -20.017 Td [(1.)]TJ -0 g 0 G - [-500(The)-395(function)-395(value)-395(is)-395(speci\002c)-395(to)-395(the)-395(storage)-395(format)-395(of)-396(matri)1(x)]TJ/F59 9.9626 Tf 295.646 0 Td [(a)]TJ/F54 9.9626 Tf 5.23 0 Td [(;)-468(some)]TJ -288.422 -11.956 Td [(storage)-343(formats)-342(employ)-343(padding,)-366(thus)-343(the)-342(r)18(eturned)-343(value)-343(for)-342(the)-343(same)]TJ 0 -11.955 Td [(matrix)-250(may)-250(be)-250(dif)18(fer)18(ent)-250(for)-250(dif)18(fer)18(ent)-250(storage)-250(choices.)]TJ/F51 9.9626 Tf -24.907 -27.315 Td [(3.2.5)-1000(get)]TJ +/F60 9.9626 Tf 185.556 557.832 Td [(a)-25(m)-40(a)-42(x)-7779(x)]TJ/F59 9.9626 Tf 220.764 0 Td [(Function)]TJ ET q -1 0 0 1 195.029 291.533 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 179.304 554.046 cm +[]0 d 0 J 0.398 w 0 0 m 286.513 0 l S Q BT -/F51 9.9626 Tf 198.017 291.334 Td [(size)-398(\227)-397(Get)-398(maximum)-397(number)-398(of)-398(nonzero)-397(elements)-398(in)-398(a)-397(sparse)]TJ -17.424 -11.955 Td [(matrix)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -29.888 -19.024 Td [(maxnz)-525(=)-525(a%get_size\050\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -22.01 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.048 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -20.048 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ -0 g 0 G - -56.338 -33.965 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -20.048 Td [(Function)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-220(maximum)-220(number)-219(of)-220(nonzer)18(o)-220(elements)-220(that)-220(can)-219(be)-220(stor)18(ed)]TJ -47.87 -11.955 Td [(in)-250(sparse)-250(matrix)]TJ/F59 9.9626 Tf 73.294 0 Td [(a)]TJ/F54 9.9626 Tf 7.721 0 Td [(using)-250(its)-250(curr)18(ent)-250(memory)-250(allocation.)]TJ -0 g 0 G - 60.952 -29.888 Td [(18)]TJ -0 g 0 G +/F62 9.9626 Tf 185.282 545.478 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET - -endstream -endobj -960 0 obj -<< -/Length 4133 ->> -stream -0 g 0 G -0 g 0 G +q +1 0 0 1 422.639 545.677 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q BT -/F51 9.9626 Tf 99.895 706.129 Td [(3.2.6)-1000(sizeof)-250(\227)-250(Get)-250(memory)-250(occupation)-250(in)-250(bytes)-250(of)-250(a)-250(sparse)-250(matrix)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf 0 -20.135 Td [(memory_size)-525(=)-525(a%sizeof\050\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -23.732 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -22.343 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -22.343 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ -0 g 0 G - -56.339 -35.687 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -22.343 Td [(Function)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-250(memory)-250(occupation)-250(in)-250(bytes.)]TJ/F51 9.9626 Tf -72.777 -30.58 Td [(3.2.7)-1000(get)]TJ +/F62 9.9626 Tf 425.628 545.478 Td [(geamax)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 144.219 517.21 cm +1 0 0 1 422.639 533.722 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 147.208 517.011 Td [(fmt)-250(\227)-250(Short)-250(description)-250(of)-250(the)-250(dynamic)-250(type)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -47.313 -20.135 Td [(write)]TJ -0 g 0 G - [(\050)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(*)]TJ -0 g 0 G - [(,)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(*)]TJ -0 g 0 G - [(\051)-525(a%get_fmt\050\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -24.336 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -22.343 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -22.343 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ -0 g 0 G - -56.339 -35.686 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -22.343 Td [(Function)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(A)-244(short)-245(string)-244(describing)-245(the)-244(dynamic)-245(type)-244(of)-245(the)-244(matrix.)-308(Pr)18(e-)]TJ -47.87 -11.955 Td [(de\002ned)-250(values)-250(include)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 102.415 0 Td [(NULL)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(,)]TJ/F59 9.9626 Tf 4.981 0 Td [(COO)]TJ/F54 9.9626 Tf 15.691 0 Td [(,)]TJ/F59 9.9626 Tf 4.982 0 Td [(CSR)]TJ/F54 9.9626 Tf 18.181 0 Td [(and)]TJ/F59 9.9626 Tf 19.358 0 Td [(CSC)]TJ/F54 9.9626 Tf 15.691 0 Td [(.)]TJ/F51 9.9626 Tf -227.127 -30.581 Td [(3.2.8)-1000(is)]TJ +/F62 9.9626 Tf 425.628 533.523 Td [(geamax)]TJ -240.346 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 138.122 315.533 cm +1 0 0 1 422.639 521.767 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 141.111 315.333 Td [(bld,)-250(is)]TJ +/F62 9.9626 Tf 425.628 521.568 Td [(geamax)]TJ -240.346 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 169.922 315.533 cm +1 0 0 1 422.639 509.812 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 172.911 315.333 Td [(upd,)-250(is)]TJ +/F62 9.9626 Tf 425.628 509.612 Td [(geamax)]TJ ET q -1 0 0 1 204.493 315.533 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 179.304 505.827 cm +[]0 d 0 J 0.398 w 0 0 m 286.513 0 l S Q -BT -/F51 9.9626 Tf 207.482 315.333 Td [(asb)-250(\227)-250(Status)-250(check)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -107.587 -20.135 Td [(if)]TJ -0 g 0 G - [-525(\050a%is_bld\050\051\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(if)]TJ -0 g 0 G - [-525(\050a%is_upd\050\051\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(if)]TJ -0 g 0 G - [-525(\050a%is_asb\050\051\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ -0 g 0 G -0 g 0 G -/F51 9.9626 Tf 0 -24.336 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -22.343 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -22.343 Td [(a)]TJ +BT +/F62 9.9626 Tf 280.768 477.448 Td [(T)92(able)-250(4:)-310(Data)-250(types)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ 0 g 0 G - -56.339 -35.686 Td [(On)-250(Return)]TJ 0 g 0 G +/F59 9.9626 Tf -130.063 -30.014 Td [(T)90(ype:)]TJ 0 g 0 G - 0 -22.343 Td [(Function)-250(value)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(A)]TJ -0.56 0.13 0.00 rg 0.56 0.13 0.00 RG -/F59 9.9626 Tf 9.966 0 Td [(logical)]TJ +/F59 9.9626 Tf -29.828 -18.652 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 38.827 0 Td [(value)-222(indicating)-223(whether)-222(the)-222(matrix)-223(is)-222(in)-222(the)-223(Build,)]TJ -96.663 -11.955 Td [(Update)-250(or)-250(Assembled)-250(state,)-250(r)18(espectively)111(.)]TJ 0 g 0 G - 141.968 -29.888 Td [(19)]TJ + 0 -18.653 Td [(x)]TJ 0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.614 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.742 0 Td [(psb)]TJ ET - -endstream -endobj -965 0 obj -<< -/Length 5143 ->> -stream -0 g 0 G -0 g 0 G +q +1 0 0 1 436.673 362.508 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F51 9.9626 Tf 150.705 706.129 Td [(3.2.9)-1000(is)]TJ +/F67 9.9626 Tf 439.811 362.308 Td [(T)]TJ ET q -1 0 0 1 188.931 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 445.669 362.508 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 191.92 706.129 Td [(lower)55(,)-250(is)]TJ +/F67 9.9626 Tf 448.807 362.308 Td [(vect)]TJ ET q -1 0 0 1 230.704 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 470.356 362.508 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 233.693 706.129 Td [(upper)55(,)-250(is)]TJ +/F67 9.9626 Tf 473.495 362.308 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf -297.884 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(4)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -18.652 Td [(desc)]TJ ET q -1 0 0 1 273.583 706.328 cm +1 0 0 1 171.218 331.9 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 276.572 706.129 Td [(triangle,)-250(is)]TJ +/F59 9.9626 Tf 174.207 331.701 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ +ET +q +1 0 0 1 324.173 284.079 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 327.311 283.88 Td [(desc)]TJ ET q -1 0 0 1 325.309 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 348.86 284.079 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 328.298 706.129 Td [(unit)-250(\227)-250(Format)-250(check)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -177.593 -19.573 Td [(if)]TJ -0 g 0 G - [-525(\050a%is_triangle\050\051\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(if)]TJ -0 g 0 G - [-525(\050a%is_upper\050\051\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ +/F67 9.9626 Tf 351.998 283.88 Td [(type)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(if)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - [-525(\050a%is_lower\050\051\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ +/F59 9.9626 Tf -222.214 -18.653 Td [(global)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.956 Td [(if)]TJ +/F62 9.9626 Tf 33.763 0 Td [(Speci\002es)-226(whether)-227(the)-226(computation)-226(should)-226(include)-227(the)-226(global)-226(r)18(eduction)]TJ -8.857 -11.955 Td [(acr)18(oss)-250(all)-250(pr)18(ocesses.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(scalar)74(.)-310(Default:)]TJ/F67 9.9626 Tf 165.319 0 Td [(global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - [-525(\050a%is_unit\050\051\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ + [(.true.)]TJ 0 g 0 G +/F59 9.9626 Tf -190.225 -30.607 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.86 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + 0 -18.653 Td [(Function)-250(value)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -21.183 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 72.776 0 Td [(is)-250(the)-250(in\002nity)-250(norm)-250(of)-250(vector)]TJ/F60 9.9626 Tf 128.562 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -181.637 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.133 0 Td [(global)]TJ/F62 9.9626 Tf 30.675 0 Td [(unless)-190(the)-190(optional)-190(variable)]TJ/F67 9.9626 Tf 121.612 0 Td [(global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G + [(.false.)]TJ/F62 9.9626 Tf 75.118 0 Td [(has)-190(been)-190(spec-)]TJ -258.538 -11.955 Td [(i\002ed)]TJ 0 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ 0 g 0 G - 0 -21.183 Td [(a)]TJ + 141.968 -29.888 Td [(36)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ +ET + +endstream +endobj +1177 0 obj +<< +/Length 3043 +>> +stream 0 g 0 G - -56.338 -34.816 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G - 0 -21.183 Td [(Function)-250(value)]TJ +BT +/F59 9.9626 Tf 99.895 706.129 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(A)]TJ -0.56 0.13 0.00 rg 0.56 0.13 0.00 RG -/F59 9.9626 Tf 10.803 0 Td [(logical)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 39.665 0 Td [(value)-306(indicating)-307(whether)-306(the)-307(matrix)-306(is)-306(triangular;)]TJ -98.338 -11.955 Td [(if)]TJ/F59 9.9626 Tf 8.595 0 Td [(is_triangle\050\051)]TJ/F54 9.9626 Tf 70.373 0 Td [(r)18(eturns)]TJ/F59 9.9626 Tf 34.119 0 Td [(.true.)]TJ/F54 9.9626 Tf 33.761 0 Td [(check)-239(also)-238(if)-239(it)-239(is)-239(lower)74(,)-241(upper)-238(and)-239(with)]TJ -146.848 -11.955 Td [(a)-250(unit)-250(\050i.e.)-310(assumed\051)-250(diagonal.)]TJ/F51 9.9626 Tf -24.907 -28.929 Td [(3.2.10)-1000(cscnv)-250(\227)-250(Convert)-250(to)-250(a)-250(dif)18(ferent)-250(storage)-250(format)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 0 -19.573 Td [(call)]TJ +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ 0 g 0 G - [-1050(a%cscnv\050b,info)-525([,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(type)]TJ + [-500(The)-190(computation)-190(of)-190(a)-190(global)-190(r)18(esult)-190(r)18(equir)18(es)-190(a)-190(global)-190(communication,)-202(which)]TJ 12.453 -11.955 Td [(entails)-318(a)-318(signi\002cant)-318(ove)1(r)18(head.)-514(It)-318(may)-318(be)-318(necessary)-317(and/or)-318(advisable)-318(to)]TJ 0 -11.955 Td [(compute)-333(multiple)-333(norms)-332(at)-333(the)-333(same)-333(time;)-374(in)-333(this)-333(case,)-354(it)-332(is)-333(possible)-333(to)]TJ 0 -11.955 Td [(impr)18(ove)-250(the)-250(r)8(untime)-250(ef)18(\002ciency)-250(by)-250(using)-250(the)-250(following)-250(scheme:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(,)-525(mold,)-525(dupl]\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ +/F67 9.9626 Tf 52.303 -19.926 Td [(vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ 0 g 0 G - [-1050(a%cscnv\050info)-525([,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(type)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(,)-525(mold,)-525(dupl]\051)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.861 Td [(T)90(ype:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [-525(psb_geamax\050x1,desc_a,info,global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -21.183 Td [(On)-250(Entry)]TJ + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + 0 -11.955 Td [(vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(2)]TJ 0 g 0 G - 0 -21.183 Td [(a)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.622 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ -160.077 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -77.917 -33.138 Td [(type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 24.906 0 Td [(a)-250(string)-250(r)18(equesting)-250(a)-250(new)-250(format.)]TJ 0.001 -11.955 Td [(T)90(ype:)-310(optional.)]TJ + [-525(psb_geamax\050x2,desc_a,info,global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.183 Td [(mold)]TJ + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(a)-236(variable)-236(of)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 56.403 0 Td [(class)]TJ + 0 -11.955 Td [(vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(3)]TJ 0 g 0 G - [(\050psb_T_base_sparse_mat\051)]TJ/F54 9.9626 Tf 148.803 0 Td [(r)18(equesting)-236(a)-236(new)-237(format)1(.)]TJ -209.081 -11.955 Td [(T)90(ype:)-310(optional.)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -21.182 Td [(dupl)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F54 9.9626 Tf 26.56 0 Td [(an)-359(integer)-358(value)-359(speci\002ng)-358(how)-359(to)-359(handle)-358(duplicates)-359(\050see)-359(Named)-358(Con-)]TJ -1.653 -11.956 Td [(stants)-250(below\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -22.861 Td [(On)-250(Return)]TJ + [-525(psb_geamax\050x3,desc_a,info,global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -21.182 Td [(b,a)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 18.54 0 Td [(A)-250(copy)-250(of)]TJ/F59 9.9626 Tf 45.37 0 Td [(a)]TJ/F54 9.9626 Tf 7.721 0 Td [(with)-250(a)-250(new)-250(storage)-250(format.)]TJ -46.724 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.622 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.456 0 Td [(.)]TJ + [-525(psb_amx\050ctxt,vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ 0 g 0 G -/F51 9.9626 Tf -184.985 -21.183 Td [(info)]TJ + [(:)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(3)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ -23.801 -23.175 Td [(The)]TJ/F59 9.9626 Tf 19.584 0 Td [(mold)]TJ/F54 9.9626 Tf 23.827 0 Td [(ar)18(guments)-292(may)-291(be)-292(employed)-292(to)-292(interface)-291(with)-292(special)-292(devices,)-302(such)]TJ -43.411 -11.955 Td [(as)-250(GPUs)-250(and)-250(other)-250(accelerators.)]TJ + [(\051\051)]TJ/F62 9.9626 Tf -52.303 -19.926 Td [(In)-253(this)-252(way)-253(the)-253(global)-253(communicati)1(on,)-254(which)-253(for)-252(small)-253(sizes)-253(is)-252(a)-253(latency-)]TJ 0 -11.955 Td [(bound)-250(operation,)-250(is)-250(invoked)-250(only)-250(once.)]TJ 0 g 0 G - 166.874 -29.888 Td [(20)]TJ + 141.968 -402.49 Td [(37)]TJ 0 g 0 G ET endstream endobj -969 0 obj +1186 0 obj << -/Length 4477 +/Length 6432 >> stream 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(3.2.11)-1000(csclip)-250(\227)-250(Reduce)-250(to)-250(a)-250(submatrix)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 20.922 -20.279 Td [(call)]TJ -0 g 0 G - [-525(a%csclip\050b,info[,&)]TJ 15.691 -11.955 Td [(&)-525(imin,imax,jmin,jmax,rscale,cscale]\051)]TJ/F54 9.9626 Tf -21.669 -24.631 Td [(Returns)-190(the)-190(submatrix)]TJ/F59 9.9626 Tf 98.878 0 Td [(A\050imin:imax,jmin:jmax\051)]TJ/F54 9.9626 Tf 115.068 0 Td [(,)-202(optionally)-190(r)18(escaling)-190(r)18(ow/-)]TJ -228.89 -11.955 Td [(col)-250(indices)-250(to)-250(the)-250(range)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG -/F59 9.9626 Tf 103.85 0 Td [(1)]TJ -0 g 0 G - [(:imax)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(-)]TJ -0 g 0 G - [(imin)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(+)]TJ -0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ -0 g 0 G - [(,)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(4.5)-1000(psb)]TJ +ET +q +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 201.825 706.129 Td [(geamaxs)-250(\227)-250(Generalized)-250(In\002nity)-250(Norm)]TJ/F62 9.9626 Tf -51.12 -18.964 Td [(This)-256(subr)18(outine)-255(computes)-256(a)-256(series)-255(of)-256(in\002nity)-256(norms)-256(on)-255(the)-256(columns)-256(of)-255(a)-256(dense)]TJ 0 -11.955 Td [(matrix)]TJ/F60 9.9626 Tf 31.785 0 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(:)]TJ/F60 9.9626 Tf 88.539 -11.955 Td [(r)-17(e)-25(s)]TJ/F93 10.3811 Tf 12.294 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F91 10.3811 Tf 7.042 0 Td [(\040)]TJ/F62 9.9626 Tf 13.273 0 Td [(max)]TJ/F60 7.5716 Tf 7.759 -7.336 Td [(k)]TJ/F91 10.3811 Tf 12.944 7.336 Td [(j)]TJ/F60 9.9626 Tf 3.298 0 Td [(x)]TJ/F93 10.3811 Tf 5.33 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(k)]TJ/F62 9.9626 Tf 4.598 0 Td [(,)]TJ/F60 9.9626 Tf 4.206 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F91 10.3811 Tf 4.274 0 Td [(j)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(:jmax)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(-)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -195.028 -22.296 Td [(call)]TJ 0 g 0 G - [(jmin)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(+)]TJ + [-525(psb_geamaxs\050res,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 141.219 0 Td [(.)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -245.069 -21.961 Td [(T)90(ype:)]TJ + [-525(info\051)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -22.638 Td [(On)-250(Entry)]TJ 0 g 0 G +ET +q +1 0 0 1 177.192 626.591 cm +[]0 d 0 J 0.398 w 0 0 m 290.737 0 l S +Q +BT +/F60 9.9626 Tf 183.195 618.023 Td [(r)-17(e)-25(s)-8868(x)]TJ/F59 9.9626 Tf 221.013 0 Td [(Subroutine)]TJ +ET +q +1 0 0 1 177.192 614.237 cm +[]0 d 0 J 0.398 w 0 0 m 290.737 0 l S +Q +BT +/F62 9.9626 Tf 183.17 605.669 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +ET +q +1 0 0 1 420.527 605.868 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 423.516 605.669 Td [(geamaxs)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +ET +q +1 0 0 1 420.527 593.913 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 423.516 593.714 Td [(geamaxs)]TJ -240.346 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +ET +q +1 0 0 1 420.527 581.958 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 423.516 581.759 Td [(geamaxs)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +ET +q +1 0 0 1 420.527 570.003 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 423.516 569.804 Td [(geamaxs)]TJ +ET +q +1 0 0 1 177.192 566.018 cm +[]0 d 0 J 0.398 w 0 0 m 290.737 0 l S +Q 0 g 0 G - 0 -22.639 Td [(a)]TJ +BT +/F62 9.9626 Tf 280.768 537.639 Td [(T)92(able)-250(5:)-310(Data)-250(types)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ -160.078 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -77.918 -34.594 Td [(imin,imax,jmin,jmax)]TJ 0 g 0 G -/F54 9.9626 Tf 99.885 0 Td [(Minimum)-250(and)-250(maximum)-250(r)18(ow)-250(and)-250(column)-250(indices.)]TJ -74.978 -11.955 Td [(T)90(ype:)-310(optional.)]TJ +/F59 9.9626 Tf -130.063 -34.468 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.638 Td [(rscale,cscale)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 60.025 0 Td [(Whether)-250(to)-250(r)18(escale)-250(r)18(ow/column)-250(indices.)-310(T)90(ype:)-310(optional.)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F51 9.9626 Tf -60.025 -24.632 Td [(On)-250(Return)]TJ 0 g 0 G + 0 -19.926 Td [(x)]TJ 0 g 0 G - 0 -22.639 Td [(b)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.614 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.742 0 Td [(psb)]TJ +ET +q +1 0 0 1 436.673 415.699 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 439.811 415.5 Td [(T)]TJ +ET +q +1 0 0 1 445.669 415.699 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 448.807 415.5 Td [(vect)]TJ +ET +q +1 0 0 1 470.356 415.699 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 473.495 415.5 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 11.069 0 Td [(A)-250(copy)-250(of)-250(a)-250(submatrix)-250(of)]TJ/F59 9.9626 Tf 111.321 0 Td [(a)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ -102.713 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ +/F62 9.9626 Tf -297.884 -11.956 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(5)]TJ 0 g 0 G -/F51 9.9626 Tf -184.985 -22.639 Td [(info)]TJ + [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ/F51 9.9626 Tf -23.801 -31 Td [(3.2.12)-1000(clean)]TJ +/F59 9.9626 Tf -24.906 -19.925 Td [(desc)]TJ ET q -1 0 0 1 159.153 364.307 cm +1 0 0 1 171.218 383.818 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 162.142 364.108 Td [(zeros)-250(\227)-250(Eliminate)-250(zero)-250(coef)18(\002cients)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -62.247 -20.278 Td [(call)]TJ -0 g 0 G - [-525(a%clean_zeros\050info\051)]TJ/F54 9.9626 Tf 14.944 -12.634 Td [(Eliminates)-214(zer)18(o)-214(coef)18(\002cients)-214(in)-214(the)-214(input)-214(matrix.)-298(Note)-214(that)-214(depending)-214(on)-214(the)]TJ -14.944 -11.955 Td [(internal)-246(storage)-245(format,)-247(ther)18(e)-245(may)-246(still)-245(be)-246(some)-245(amount)-246(of)-246(ze)1(r)18(o)-246(padding)-246(in)-245(the)]TJ 0 -11.955 Td [(output.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -24.632 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F59 9.9626 Tf 174.207 383.619 Td [(a)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -22.638 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -22.639 Td [(a)]TJ +/F62 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ +ET +q +1 0 0 1 324.173 335.998 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 327.311 335.798 Td [(desc)]TJ +ET +q +1 0 0 1 348.86 335.998 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 351.998 335.798 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ -160.078 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -77.918 -35.908 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -222.214 -19.925 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G - 0 -22.638 Td [(a)]TJ + 0 -19.925 Td [(res)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(matrix)]TJ/F59 9.9626 Tf 50.659 0 Td [(a)]TJ/F54 9.9626 Tf 7.721 0 Td [(without)-250(zer)18(o)-250(coef)18(\002cients.)]TJ -43.436 -11.956 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ +/F62 9.9626 Tf 18.261 0 Td [(is)-250(the)-250(in\002nity)-250(norm)-250(of)-250(the)-250(columns)-250(of)]TJ/F60 9.9626 Tf 166.26 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -164.82 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.956 Td [(Speci\002ed)-330(as:)-470(a)-330(number)-330(or)-330(a)-330(rank-one)-330(array)-329(of)-330(long)-330(pr)18(ecision)-330(r)18(eal)-330(num-)]TJ 0 -11.955 Td [(bers.)]TJ 0 g 0 G -/F51 9.9626 Tf -184.985 -22.638 Td [(info)]TJ +/F59 9.9626 Tf -24.906 -19.925 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 143.074 -29.888 Td [(21)]TJ + 141.968 -89.943 Td [(38)]TJ 0 g 0 G ET endstream endobj -973 0 obj +1194 0 obj << -/Length 4488 +/Length 7404 >> stream 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(3.2.13)-1000(get)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(4.6)-1000(psb)]TJ ET q -1 0 0 1 200.01 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 202.999 706.129 Td [(diag)-250(\227)-250(Get)-250(main)-250(diagonal)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -52.294 -19.329 Td [(call)]TJ -0 g 0 G - [-525(a%get_diag\050d,info\051)]TJ/F54 9.9626 Tf 14.944 -12.144 Td [(Returns)-250(a)-250(copy)-250(of)-250(the)-250(main)-250(diagonal.)]TJ -0 g 0 G -/F51 9.9626 Tf -14.944 -20.49 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F59 11.9552 Tf 151.016 706.129 Td [(norm1)-250(\227)-250(1-Norm)-250(of)-250(V)111(ector)]TJ/F62 9.9626 Tf -51.121 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(1-norm)-250(of)-250(a)-250(vector)]TJ/F60 9.9626 Tf 206.349 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -211.554 -11.955 Td [(If)]TJ/F60 9.9626 Tf 9.46 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(r)18(eal)-250(vector)-250(it)-250(computes)-250(1-norm)-250(as:)]TJ/F60 9.9626 Tf 125.989 -21.269 Td [(a)-25(s)-25(u)-25(m)]TJ/F91 10.3811 Tf 25.352 0 Td [(\040)-291(k)]TJ/F60 9.9626 Tf 19.007 0 Td [(x)]TJ/F60 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(k)]TJ/F62 9.9626 Tf -195.526 -21.269 Td [(else)-250(if)]TJ/F60 9.9626 Tf 28.159 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(complex)-250(vector)-250(then)-250(it)-250(computes)-250(1-norm)-250(as:)]TJ/F60 9.9626 Tf 71.974 -21.269 Td [(a)-25(s)-25(u)-25(m)]TJ/F91 10.3811 Tf 25.353 0 Td [(\040)-291(k)]TJ/F60 9.9626 Tf 18.737 0 Td [(r)-17(e)]TJ/F93 10.3811 Tf 8.17 0 Td [(\050)]TJ/F60 9.9626 Tf 4.443 0 Td [(x)]TJ/F93 10.3811 Tf 5.33 0 Td [(\051)]TJ/F91 10.3811 Tf 4.274 0 Td [(k)]TJ/F62 7.5716 Tf 5.315 -1.858 Td [(1)]TJ/F93 10.3811 Tf 6.345 1.858 Td [(+)]TJ/F91 10.3811 Tf 10.256 0 Td [(k)]TJ/F60 9.9626 Tf 5.37 0 Td [(i)-32(m)]TJ/F93 10.3811 Tf 11.088 0 Td [(\050)]TJ/F60 9.9626 Tf 4.444 0 Td [(x)]TJ/F93 10.3811 Tf 5.329 0 Td [(\051)]TJ/F91 10.3811 Tf 4.274 0 Td [(k)]TJ/F62 7.5716 Tf 5.315 -1.858 Td [(1)]TJ/F67 9.9626 Tf -216.928 -19.411 Td [(psb_geasum\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -20.679 Td [(On)-250(Entry)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -20.679 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.622 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ -160.077 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ + [-525([,global]\051)-190(psb_norm1\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -77.917 -34.627 Td [(On)-250(Return)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -20.679 Td [(d)]TJ + [-525([,global]\051)]TJ 0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(A)-250(copy)-250(of)-250(the)-250(main)-250(diagonal.)]TJ 13.839 -11.955 Td [(A)-250(one-dimensional)-250(array)-250(of)-250(the)-250(appr)18(opriate)-250(type.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.679 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ/F51 9.9626 Tf -23.8 -28.213 Td [(3.2.14)-1000(clip)]TJ ET q -1 0 0 1 203.317 472.944 cm +1 0 0 1 128.44 576.025 cm +[]0 d 0 J 0.398 w 0 0 m 286.622 0 l S +Q +BT +/F60 9.9626 Tf 134.691 567.457 Td [(a)-25(s)-25(u)-25(m)-7810(x)]TJ/F59 9.9626 Tf 220.765 0 Td [(Function)]TJ +ET +q +1 0 0 1 128.44 563.671 cm +[]0 d 0 J 0.398 w 0 0 m 286.622 0 l S +Q +BT +/F62 9.9626 Tf 134.417 555.103 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +ET +q +1 0 0 1 371.775 555.303 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 206.306 472.745 Td [(diag)-250(\227)-250(Cut)-250(out)-250(main)-250(diagonal)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -55.601 -19.329 Td [(call)]TJ -0 g 0 G - [-525(a%clip_diag\050b,info\051)]TJ/F54 9.9626 Tf 14.944 -12.144 Td [(Returns)-250(a)-250(copy)-250(of)]TJ/F59 9.9626 Tf 79.73 0 Td [(a)]TJ/F54 9.9626 Tf 7.721 0 Td [(without)-250(the)-250(main)-250(diagonal.)]TJ -0 g 0 G -/F51 9.9626 Tf -102.395 -20.49 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.679 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 374.763 555.103 Td [(geasum)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +ET +q +1 0 0 1 371.775 543.347 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 374.763 543.148 Td [(geasum)]TJ -240.346 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +ET +q +1 0 0 1 371.775 531.392 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 374.763 531.193 Td [(geasum)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +ET +q +1 0 0 1 371.775 519.437 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 374.763 519.238 Td [(geasum)]TJ +ET +q +1 0 0 1 128.44 515.452 cm +[]0 d 0 J 0.398 w 0 0 m 286.622 0 l S +Q 0 g 0 G +BT +/F62 9.9626 Tf 229.958 487.074 Td [(T)92(able)-250(6:)-310(Data)-250(types)]TJ 0 g 0 G - 0 -20.679 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.622 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ -160.077 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -77.917 -34.627 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -130.063 -33.561 Td [(T)90(ype:)]TJ 0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G - 0 -20.679 Td [(b)]TJ +/F59 9.9626 Tf -29.828 -19.665 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(A)-250(copy)-250(of)]TJ/F59 9.9626 Tf 45.37 0 Td [(a)]TJ/F54 9.9626 Tf 7.721 0 Td [(without)-250(the)-250(main)-250(diagonal.)]TJ -39.252 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.622 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -184.984 -20.679 Td [(info)]TJ + 0 -19.666 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ/F51 9.9626 Tf -23.8 -28.213 Td [(3.2.15)-1000(tril)-250(\227)-250(Return)-250(the)-250(lower)-250(triangle)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 20.921 -19.329 Td [(call)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.614 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ +ET +q +1 0 0 1 385.864 366.56 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 389.002 366.361 Td [(T)]TJ +ET +q +1 0 0 1 394.86 366.56 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 397.998 366.361 Td [(vect)]TJ +ET +q +1 0 0 1 419.547 366.56 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 422.685 366.361 Td [(type)]TJ 0 g 0 G - [-525(a%tril\050l,info[,&)]TJ 15.691 -11.955 Td [(&)-525(diag,imin,imax,jmin,jmax,rscale,cscale,u]\051)]TJ/F54 9.9626 Tf -21.668 -22.671 Td [(Returns)-309(the)-308(lower)-309(triangular)-308(part)-309(of)-309(submatrix)]TJ/F59 9.9626 Tf 211.209 0 Td [(A\050imin:imax,jmin:jmax\051)]TJ/F54 9.9626 Tf 115.067 0 Td [(,)]TJ -341.22 -11.956 Td [(optionally)-190(r)18(escaling)-190(r)18(ow/col)-190(indices)-190(to)-190(the)-190(range)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG -/F59 9.9626 Tf 212.61 0 Td [(1)]TJ +/F62 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(6)]TJ 0 g 0 G - [(:imax)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(-)]TJ + [(.)]TJ 0 g 0 G - [(imin)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(+)]TJ +/F59 9.9626 Tf -24.907 -19.666 Td [(desc)]TJ +ET +q +1 0 0 1 120.408 334.939 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 123.397 334.74 Td [(a)]TJ 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ +/F62 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ +ET +q +1 0 0 1 273.363 287.119 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 276.501 286.919 Td [(desc)]TJ +ET +q +1 0 0 1 298.05 287.119 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 301.189 286.919 Td [(type)]TJ 0 g 0 G - [(,)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - [(:jmax)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(-)]TJ +/F59 9.9626 Tf -222.215 -19.665 Td [(global)]TJ 0 g 0 G - [(jmin)]TJ +/F62 9.9626 Tf 33.764 0 Td [(Speci\002es)-226(whether)-226(the)-227(computation)-226(should)-226(include)-227(the)-226(global)-226(r)18(eduction)]TJ -8.857 -11.955 Td [(acr)18(oss)-250(all)-250(pr)18(ocesses.)]TJ 0 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(scalar)74(.)-310(Default:)]TJ/F67 9.9626 Tf 165.318 0 Td [(global)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(+)]TJ + [(=)]TJ 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ + [(.true.)]TJ 0 g 0 G -/F54 9.9626 Tf -212.61 -11.955 Td [(and)-250(r)18(eturing)-250(the)-250(complementary)-250(upper)-250(triangle.)]TJ +/F59 9.9626 Tf -190.225 -31.621 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -20.49 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + 0 -19.666 Td [(Function)-250(value)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.679 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 72.777 0 Td [(is)-250(the)-250(1-norm)-250(of)-250(vector)]TJ/F60 9.9626 Tf 102.161 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -155.236 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.133 0 Td [(global)]TJ/F62 9.9626 Tf 30.675 0 Td [(unless)-190(the)-190(optional)-190(variable)]TJ/F67 9.9626 Tf 121.612 0 Td [(global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G + [(.false.)]TJ/F62 9.9626 Tf 75.118 0 Td [(has)-190(been)-190(spec-)]TJ -258.538 -11.955 Td [(i\002ed)]TJ 0 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ 0 g 0 G -/F54 9.9626 Tf 166.874 -29.888 Td [(22)]TJ + 141.968 -29.888 Td [(39)]TJ 0 g 0 G ET endstream endobj -977 0 obj +1199 0 obj << -/Length 6185 +/Length 3046 >> stream 0 g 0 G 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(a)]TJ +/F59 9.9626 Tf 150.705 706.129 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ -160.078 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ 0 g 0 G -/F51 9.9626 Tf -77.918 -30.706 Td [(diag)]TJ +/F62 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ 0 g 0 G -/F54 9.9626 Tf 24.907 0 Td [(Include)-300(diagonals)-301(up)-300(to)-301(this)-300(one;)]TJ/F59 9.9626 Tf 149.76 0 Td [(diag)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [-500(The)-190(computation)-190(of)-190(a)-190(global)-190(r)18(esult)-190(r)18(equir)18(es)-190(a)-190(global)-190(communication,)-202(which)]TJ 12.453 -11.955 Td [(entails)-318(a)-318(signi\002cant)-318(over)18(head.)-513(It)-318(may)-318(be)-318(necessary)-318(and/or)-317(advisable)-318(to)]TJ 0 -11.955 Td [(compute)-333(multiple)-333(norms)-333(at)-332(the)-333(same)-333(time;)-374(in)-333(this)-333(case,)-354(it)-333(i)1(s)-333(possible)-333(to)]TJ 0 -11.955 Td [(impr)18(ove)-250(the)-250(r)8(untime)-250(ef)18(\002ciency)-250(by)-250(using)-250(the)-250(following)-250(scheme:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +/F67 9.9626 Tf 20.922 -19.926 Td [(vres\050)]TJ 0.25 0.63 0.44 rg 0.25 0.63 0.44 RG [(1)]TJ 0 g 0 G -/F54 9.9626 Tf 34.376 0 Td [(means)-300(the)-301(\002rst)-300(super)18(diagonal,)]TJ/F59 9.9626 Tf -184.136 -11.955 Td [(diag)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=-)]TJ -0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 39.103 0 Td [(means)-250(the)-250(\002rst)-250(subdiagonal.)-310(Default)-250(0.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -64.01 -18.75 Td [(imin,imax,jmin,jmax)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 99.885 0 Td [(Minimum)-250(and)-250(maximum)-250(r)18(ow)-250(and)-250(column)-250(indices.)]TJ -74.978 -11.955 Td [(T)90(ype:)-310(optional.)]TJ + [-525(psb_geasum\050x1,desc_a,info,global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -18.75 Td [(rscale,cscale)]TJ + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 60.025 0 Td [(Whether)-250(to)-250(r)18(escale)-250(r)18(ow/column)-250(indices.)-310(T)90(ype:)-310(optional.)]TJ + 31.382 -11.955 Td [(vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(2)]TJ 0 g 0 G -/F51 9.9626 Tf -60.025 -18.979 Td [(On)-250(Return)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G - 0 -18.75 Td [(l)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 8.299 0 Td [(A)-250(copy)-250(of)-250(the)-250(lower)-250(triangle)-250(of)]TJ/F59 9.9626 Tf 137.333 0 Td [(a)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ -125.956 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ + [-525(psb_geasum\050x2,desc_a,info,global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -184.985 -18.75 Td [(u)]TJ + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 11.069 0 Td [(\050optional\051)-250(A)-250(copy)-250(of)-250(the)-250(upper)-250(triangle)-250(of)]TJ/F59 9.9626 Tf 184.485 0 Td [(a)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ -175.878 -11.956 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ + 0 -11.955 Td [(vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(3)]TJ 0 g 0 G -/F51 9.9626 Tf -184.985 -18.749 Td [(info)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ/F51 9.9626 Tf -23.801 -26.59 Td [(3.2.16)-1000(triu)-250(\227)-250(Return)-250(the)-250(upper)-250(triangle)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 20.922 -18.964 Td [(call)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G - [-525(a%triu\050u,info[,&)]TJ 15.691 -11.955 Td [(&)-525(diag,imin,imax,jmin,jmax,rscale,cscale,l]\051)]TJ/F54 9.9626 Tf -21.669 -18.979 Td [(Returns)-289(the)-290(upper)-289(triangular)-290(part)-289(of)-290(submatrix)]TJ/F59 9.9626 Tf 211.209 0 Td [(A\050imin:imax,jmin:jmax\051)]TJ/F54 9.9626 Tf 115.068 0 Td [(,)]TJ -341.221 -11.955 Td [(optionally)-190(r)18(escaling)-190(r)18(ow/col)-190(indices)-190(to)-190(the)-190(range)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG -/F59 9.9626 Tf 212.611 0 Td [(1)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(:imax)]TJ + [-525(psb_geasum\050x3,desc_a,info,global)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(-)]TJ + [(=)]TJ 0 g 0 G - [(imin)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(+)]TJ + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ 0 g 0 G + [-525(psb_sum\050ctxt,vres\050)]TJ 0.25 0.63 0.44 rg 0.25 0.63 0.44 RG [(1)]TJ 0 g 0 G - [(,)]TJ + [(:)]TJ 0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ + [(3)]TJ 0 g 0 G - [(:jmax)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(-)]TJ + [(\051\051)]TJ/F62 9.9626 Tf -52.304 -19.926 Td [(In)-253(this)-252(way)-253(the)-253(global)-253(communication,)-253(which)-253(for)-252(small)-253(sizes)-253(is)-252(a)-253(latency-)]TJ 0 -11.955 Td [(bound)-250(operation,)-250(is)-250(invoked)-250(only)-250(once.)]TJ 0 g 0 G - [(jmin)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(+)]TJ + 141.968 -402.49 Td [(40)]TJ 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ +ET + +endstream +endobj +1207 0 obj +<< +/Length 7502 +>> +stream +0 g 0 G +0 g 0 G +BT +/F59 11.9552 Tf 99.895 706.129 Td [(4.7)-1000(psb)]TJ +ET +q +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 151.016 706.129 Td [(geasums)-250(\227)-250(Generalized)-250(1-Norm)-250(of)-250(V)111(ector)]TJ/F62 9.9626 Tf -51.121 -20.192 Td [(This)-216(subr)18(outine)-217(computes)-216(a)-217(series)-216(of)-216(1-norms)-217(on)-216(the)-217(columns)-216(of)-216(a)-217(dense)-216(matrix)]TJ/F60 9.9626 Tf 0.294 -11.955 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(:)]TJ/F60 9.9626 Tf 120.031 -13.856 Td [(r)-17(e)-25(s)]TJ/F93 10.3811 Tf 12.293 0 Td [(\050)]TJ/F60 9.9626 Tf 4.205 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F91 10.3811 Tf 7.041 0 Td [(\040)]TJ/F62 9.9626 Tf 13.273 0 Td [(max)]TJ/F60 7.5716 Tf 7.76 -7.335 Td [(k)]TJ/F91 10.3811 Tf 12.944 7.335 Td [(j)]TJ/F60 9.9626 Tf 3.298 0 Td [(x)]TJ/F93 10.3811 Tf 5.33 0 Td [(\050)]TJ/F60 9.9626 Tf 4.273 0 Td [(k)]TJ/F62 9.9626 Tf 4.598 0 Td [(,)]TJ/F60 9.9626 Tf 4.206 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F91 10.3811 Tf 4.274 0 Td [(j)]TJ/F62 9.9626 Tf -215.202 -24.535 Td [(This)-250(function)-250(computes)-250(the)-250(1-norm)-250(of)-250(a)-250(vector)]TJ/F60 9.9626 Tf 206.349 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -211.554 -11.955 Td [(If)]TJ/F60 9.9626 Tf 9.46 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(r)18(eal)-250(vector)-250(it)-250(computes)-250(1-norm)-250(as:)]TJ/F60 9.9626 Tf 125.227 -23.185 Td [(r)-17(e)-25(s)]TJ/F93 10.3811 Tf 12.293 0 Td [(\050)]TJ/F60 9.9626 Tf 4.205 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F91 10.3811 Tf 7.041 0 Td [(\040)-291(k)]TJ/F60 9.9626 Tf 19.007 0 Td [(x)]TJ/F60 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(k)]TJ/F62 9.9626 Tf -196.039 -23.185 Td [(else)-250(if)]TJ/F60 9.9626 Tf 28.159 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(complex)-250(vector)-250(then)-250(it)-250(computes)-250(1-norm)-250(as:)]TJ/F60 9.9626 Tf 71.212 -23.185 Td [(r)-17(e)-25(s)]TJ/F93 10.3811 Tf 12.294 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.089 0 Td [(\051)]TJ/F91 10.3811 Tf 7.041 0 Td [(\040)-291(k)]TJ/F60 9.9626 Tf 18.737 0 Td [(r)-17(e)]TJ/F93 10.3811 Tf 8.169 0 Td [(\050)]TJ/F60 9.9626 Tf 4.444 0 Td [(x)]TJ/F93 10.3811 Tf 5.33 0 Td [(\051)]TJ/F91 10.3811 Tf 4.274 0 Td [(k)]TJ/F62 7.5716 Tf 5.315 -1.858 Td [(1)]TJ/F93 10.3811 Tf 6.345 1.858 Td [(+)]TJ/F91 10.3811 Tf 10.256 0 Td [(k)]TJ/F60 9.9626 Tf 5.37 0 Td [(i)-32(m)]TJ/F93 10.3811 Tf 11.088 0 Td [(\050)]TJ/F60 9.9626 Tf 4.443 0 Td [(x)]TJ/F93 10.3811 Tf 5.33 0 Td [(\051)]TJ/F91 10.3811 Tf 4.274 0 Td [(k)]TJ/F62 7.5716 Tf 5.315 -1.858 Td [(1)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -212.21 -21.96 Td [(call)]TJ +0 g 0 G + [-525(psb_geasums\050res,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 141.219 0 Td [(,)]TJ -353.83 -11.956 Td [(and)-250(r)18(eturing)-250(the)-250(complementary)-250(lower)-250(triangle.)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info\051)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +ET +q +1 0 0 1 126.328 514.627 cm +[]0 d 0 J 0.398 w 0 0 m 290.846 0 l S +Q +BT +/F60 9.9626 Tf 132.33 506.059 Td [(r)-17(e)-25(s)-8868(x)]TJ/F59 9.9626 Tf 221.014 0 Td [(Subroutine)]TJ +ET +q +1 0 0 1 126.328 502.274 cm +[]0 d 0 J 0.398 w 0 0 m 290.846 0 l S +Q +BT +/F62 9.9626 Tf 132.305 493.706 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +ET +q +1 0 0 1 369.663 493.905 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 372.651 493.706 Td [(geasums)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +ET +q +1 0 0 1 369.663 481.95 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 372.651 481.751 Td [(geasums)]TJ -240.346 -11.956 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +ET +q +1 0 0 1 369.663 469.995 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 372.651 469.795 Td [(geasums)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +ET +q +1 0 0 1 369.663 458.04 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 372.651 457.84 Td [(geasums)]TJ +ET +q +1 0 0 1 126.328 454.054 cm +[]0 d 0 J 0.398 w 0 0 m 290.846 0 l S +Q 0 g 0 G -/F51 9.9626 Tf 0 -17.574 Td [(T)90(ype:)]TJ +BT +/F62 9.9626 Tf 229.958 425.676 Td [(T)92(able)-250(7:)-310(Data)-250(types)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -18.75 Td [(On)-250(Entry)]TJ 0 g 0 G +/F59 9.9626 Tf -130.063 -37.636 Td [(T)90(ype:)]TJ 0 g 0 G - 0 -18.75 Td [(a)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ -160.078 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +/F59 9.9626 Tf -29.828 -22.46 Td [(On)-250(Entry)]TJ 0 g 0 G -/F51 9.9626 Tf -77.918 -30.706 Td [(diag)]TJ 0 g 0 G -/F54 9.9626 Tf 24.907 0 Td [(Include)-300(diagonals)-301(up)-300(to)-301(this)-300(one;)]TJ/F59 9.9626 Tf 149.76 0 Td [(diag)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + 0 -22.459 Td [(x)]TJ 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.614 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ +ET +q +1 0 0 1 385.864 295.5 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 389.002 295.3 Td [(T)]TJ +ET +q +1 0 0 1 394.86 295.5 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 397.998 295.3 Td [(vect)]TJ +ET +q +1 0 0 1 419.547 295.5 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 422.685 295.3 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 34.376 0 Td [(means)-300(the)-301(\002rst)-300(super)18(diagonal,)]TJ/F59 9.9626 Tf -184.136 -11.955 Td [(diag)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=-)]TJ +/F62 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(7)]TJ 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ + [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 39.103 0 Td [(means)-250(the)-250(\002rst)-250(subdiagonal.)-310(Default)-250(0.)]TJ +/F59 9.9626 Tf -24.907 -22.459 Td [(desc)]TJ +ET +q +1 0 0 1 120.408 261.085 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 123.397 260.886 Td [(a)]TJ 0 g 0 G -/F51 9.9626 Tf -64.01 -18.75 Td [(imin,imax,jmin,jmax)]TJ +/F62 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ +ET +q +1 0 0 1 273.363 213.264 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 276.501 213.065 Td [(desc)]TJ +ET +q +1 0 0 1 298.05 213.264 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 301.189 213.065 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 99.885 0 Td [(Minimum)-250(and)-250(maximum)-250(r)18(ow)-250(and)-250(column)-250(indices.)]TJ -74.978 -11.955 Td [(T)90(ype:)-310(optional.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -18.75 Td [(rscale,cscale)]TJ +/F59 9.9626 Tf -222.215 -22.459 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 60.025 0 Td [(Whether)-250(to)-250(r)18(escale)-250(r)18(ow/column)-250(indices.)-310(T)90(ype:)-310(optional.)]TJ 0 g 0 G -/F51 9.9626 Tf -60.025 -18.979 Td [(On)-250(Return)]TJ + 0 -22.46 Td [(res)]TJ 0 g 0 G +/F62 9.9626 Tf 18.262 0 Td [(contains)-250(the)-250(1-norm)-250(of)-250(\050the)-250(columns)-250(of\051)]TJ/F60 9.9626 Tf 176.182 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -174.742 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Short)-255(as:)-320(a)-255(long)-254(pr)18(ecision)-255(r)18(eal)-255(number)74(.)-325(Speci\002ed)-255(as:)-320(a)-254(long)-255(pr)18(ecision)-255(r)18(eal)]TJ 0 -11.955 Td [(number)74(.)]TJ 0 g 0 G - 0 -18.75 Td [(u)]TJ + 141.968 -29.888 Td [(41)]TJ 0 g 0 G -/F54 9.9626 Tf 11.069 0 Td [(A)-250(copy)-250(of)-250(the)-250(upper)-250(triangle)-250(of)]TJ/F59 9.9626 Tf 138.668 0 Td [(a)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ -130.06 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ +ET + +endstream +endobj +1212 0 obj +<< +/Length 583 +>> +stream 0 g 0 G -/F51 9.9626 Tf -184.985 -18.75 Td [(l)]TJ 0 g 0 G -/F54 9.9626 Tf 8.299 0 Td [(\050optional\051)-250(A)-250(copy)-250(of)-250(the)-250(lower)-250(triangle)-250(of)]TJ/F59 9.9626 Tf 183.151 0 Td [(a)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ -171.773 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -184.985 -18.75 Td [(info)]TJ +BT +/F59 9.9626 Tf 150.705 706.129 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 143.074 -29.888 Td [(23)]TJ + 141.968 -567.87 Td [(42)]TJ 0 g 0 G ET endstream endobj -983 0 obj +1219 0 obj << -/Length 7619 +/Length 6687 >> stream 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(3.2.17)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(4.8)-1000(psb)]TJ ET q -1 0 0 1 202.769 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 205.758 706.129 Td [(set)]TJ +/F59 11.9552 Tf 151.016 706.129 Td [(norm2)-250(\227)-250(2-Norm)-250(of)-250(V)111(ector)]TJ/F62 9.9626 Tf -51.121 -20.076 Td [(This)-250(function)-250(computes)-250(the)-250(2-norm)-250(of)-250(a)-250(vector)]TJ/F60 9.9626 Tf 206.349 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -211.554 -11.955 Td [(If)]TJ/F60 9.9626 Tf 9.46 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(r)18(eal)-250(vector)-250(it)-250(computes)-250(2-norm)-250(as:)]TJ/F60 9.9626 Tf 122.551 -25.46 Td [(n)-15(r)-35(m)]TJ/F62 9.9626 Tf 17.788 0 Td [(2)]TJ/F91 10.3811 Tf 7.873 0 Td [(\040)]TJ 13.397 9.727 Td [(p)]TJ ET q -1 0 0 1 219.078 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 287.432 658.569 cm +[]0 d 0 J 0.408 w 0 0 m 16.592 0 l S Q BT -/F51 9.9626 Tf 222.067 706.129 Td [(mat)]TJ +/F60 9.9626 Tf 287.726 648.638 Td [(x)]TJ/F60 7.5716 Tf 5.399 2.88 Td [(T)]TJ/F60 9.9626 Tf 5.694 -2.88 Td [(x)]TJ/F62 9.9626 Tf -198.924 -23.065 Td [(else)-250(if)]TJ/F60 9.9626 Tf 28.159 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(complex)-250(vector)-250(then)-250(it)-250(computes)-250(2-norm)-250(as:)]TJ/F60 9.9626 Tf 103.113 -25.46 Td [(n)-15(r)-35(m)]TJ/F62 9.9626 Tf 17.789 0 Td [(2)]TJ/F91 10.3811 Tf 7.873 0 Td [(\040)]TJ 13.397 9.727 Td [(p)]TJ ET q -1 0 0 1 239.82 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 286.694 610.044 cm +[]0 d 0 J 0.408 w 0 0 m 18.069 0 l S Q BT -/F51 9.9626 Tf 242.809 706.129 Td [(default)-250(\227)-250(Set)-250(default)-250(storage)-250(format)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -92.104 -18.964 Td [(call)]TJ -0 g 0 G - [-1050(psb_set_mat_default\050a\051)]TJ +/F60 9.9626 Tf 286.988 600.113 Td [(x)]TJ/F60 7.5716 Tf 5.588 2.88 Td [(H)]TJ/F60 9.9626 Tf 6.982 -2.88 Td [(x)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -20.183 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.231 Td [(On)-250(Entry)]TJ +ET +q +1 0 0 1 128.689 575.464 cm +[]0 d 0 J 0.398 w 0 0 m 286.124 0 l S +Q +BT +/F60 9.9626 Tf 134.791 566.896 Td [(n)-15(r)-35(m)]TJ/F62 9.9626 Tf 17.788 0 Td [(2)]TJ/F60 9.9626 Tf 82.505 0 Td [(x)]TJ/F59 9.9626 Tf 120.621 0 Td [(Function)]TJ +ET +q +1 0 0 1 128.689 563.111 cm +[]0 d 0 J 0.398 w 0 0 m 286.124 0 l S +Q +BT +/F62 9.9626 Tf 134.667 554.543 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +ET +q +1 0 0 1 372.024 554.742 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 375.012 554.543 Td [(genrm2)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +ET +q +1 0 0 1 372.024 542.787 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 375.012 542.588 Td [(genrm2)]TJ -240.346 -11.956 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +ET +q +1 0 0 1 372.024 530.832 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 375.012 530.632 Td [(genrm2)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +ET +q +1 0 0 1 372.024 518.876 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 375.012 518.677 Td [(genrm2)]TJ +ET +q +1 0 0 1 128.689 514.891 cm +[]0 d 0 J 0.398 w 0 0 m 286.124 0 l S +Q 0 g 0 G +BT +/F62 9.9626 Tf 229.958 486.513 Td [(T)92(able)-250(8:)-310(Data)-250(types)]TJ 0 g 0 G - 0 -19.231 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(a)-203(variable)-203(of)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 55.42 0 Td [(class)]TJ +/F67 9.9626 Tf -115.119 -27.631 Td [(psb_genrm2\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(\050psb_T_base_sparse_mat\051)]TJ/F54 9.9626 Tf 148.475 0 Td [(r)18(equesting)-203(a)-204(new)-203(default)-203(stor)18(-)]TJ -188.951 -11.956 Td [(age)-250(format.)]TJ 0 -11.955 Td [(T)90(ype:)-310(r)18(equir)18(ed.)]TJ/F51 9.9626 Tf -24.907 -26.815 Td [(3.2.18)-1000(clone)-250(\227)-250(Clone)-250(current)-250(object)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 0 -18.964 Td [(call)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-1050(a%clone\050b,info\051)]TJ + [-525(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -20.183 Td [(T)90(ype:)]TJ + [-525([,global]\051)]TJ -14.944 -11.955 Td [(psb_norm2\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.231 Td [(On)-250(Entry)]TJ + [-525(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525([,global]\051)]TJ 0 g 0 G - 0 -19.231 Td [(a)]TJ +/F59 9.9626 Tf 0 -36.169 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -77.917 -32.138 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -29.828 -22.221 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.232 Td [(b)]TJ + 0 -22.221 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(A)-250(copy)-250(of)-250(the)-250(input)-250(object.)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.614 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ +ET +q +1 0 0 1 385.864 318.695 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 389.002 318.495 Td [(T)]TJ +ET +q +1 0 0 1 394.86 318.695 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 397.998 318.495 Td [(vect)]TJ +ET +q +1 0 0 1 419.547 318.695 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 422.685 318.495 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -11.068 -19.231 Td [(info)]TJ +/F62 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(8)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ/F51 9.9626 Tf -23.8 -26.815 Td [(3.2.19)-1000(Named)-250(Constants)]TJ + [(.)]TJ 0 g 0 G - 0 -18.964 Td [(psb)]TJ +/F59 9.9626 Tf -24.907 -22.221 Td [(desc)]TJ ET q -1 0 0 1 167.9 372.049 cm +1 0 0 1 120.408 284.518 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 170.889 371.85 Td [(dupl)]TJ +/F59 9.9626 Tf 123.397 284.319 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 193.066 372.049 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 273.363 236.698 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 196.055 371.85 Td [(ovwrt)]TJ +/F67 9.9626 Tf 276.501 236.499 Td [(desc)]TJ ET q -1 0 0 1 223.222 372.049 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 298.05 236.698 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q +BT +/F67 9.9626 Tf 301.189 236.499 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -222.215 -22.221 Td [(global)]TJ +0 g 0 G +/F62 9.9626 Tf 33.764 0 Td [(Speci\002es)-226(whether)-226(the)-227(computation)-226(should)-226(include)-227(the)-226(global)-226(r)18(eduction)]TJ -8.857 -11.956 Td [(acr)18(oss)-250(all)-250(pr)18(ocesses.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(scalar)74(.)-310(Default:)]TJ/F67 9.9626 Tf 165.318 0 Td [(global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(.true.)]TJ +0 g 0 G +/F59 9.9626 Tf -190.225 -34.176 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 166.875 -29.888 Td [(43)]TJ +0 g 0 G +ET + +endstream +endobj +1225 0 obj +<< +/Length 4322 +>> +stream +0 g 0 G +0 g 0 G 0 g 0 G BT -/F54 9.9626 Tf 231.193 371.85 Td [(Duplicate)-259(coef)18(\002cients)-259(shou)1(ld)-259(be)-259(overwritten)-259(\050i.e.)-336(ignor)18(e)-259(du-)]TJ -55.582 -11.955 Td [(plications\051)]TJ +/F59 9.9626 Tf 150.705 706.129 Td [(Function)-250(V)111(alue)]TJ +0 g 0 G +/F62 9.9626 Tf 73.882 0 Td [(is)-250(the)-250(2-norm)-250(of)-250(vector)]TJ/F60 9.9626 Tf 102.161 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -156.342 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.133 0 Td [(global)]TJ/F62 9.9626 Tf 30.675 0 Td [(unless)-190(the)-190(optional)-190(variable)]TJ/F67 9.9626 Tf 121.612 0 Td [(global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(.false.)]TJ/F62 9.9626 Tf 75.118 0 Td [(has)-190(been)-190(spec-)]TJ -258.538 -11.955 Td [(i\002ed)]TJ 0 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.925 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.231 Td [(psb)]TJ + [-500(The)-190(computation)-190(of)-190(a)-190(global)-190(r)18(esult)-190(r)18(equir)18(es)-190(a)-190(global)-190(communication,)-202(which)]TJ 12.453 -11.955 Td [(entails)-318(a)-318(signi\002cant)-318(over)18(head.)-513(It)-318(may)-318(be)-318(necessary)-318(and/or)-317(advisable)-318(to)]TJ 0 -11.955 Td [(compute)-333(multiple)-333(norms)-333(at)-332(the)-333(same)-333(time;)-374(in)-333(this)-333(case,)-354(it)-332(is)-333(possible)-333(to)]TJ 0 -11.955 Td [(impr)18(ove)-250(the)-250(r)8(untime)-250(ef)18(\002ciency)-250(by)-250(using)-250(the)-250(following)-250(scheme:)]TJ 24.981 -17.933 Td [(v)-107(r)-108(e)-107(s)-266(\050)-159(1)-158(\051)-756(=)-657(p)-61(s)-61(b)]TJ ET q -1 0 0 1 167.9 340.863 cm +1 0 0 1 278.034 495.12 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 170.889 340.664 Td [(dupl)]TJ +/F62 9.9626 Tf 281.627 494.921 Td [(g)-61(e)-60(n)-61(r)-61(m)-60(2)-194(\050)-180(x)-46(1)-267(,)-273(d)-97(e)-98(s)-98(c)]TJ ET q -1 0 0 1 193.066 340.863 cm +1 0 0 1 367.96 495.12 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 196.055 340.664 Td [(add)]TJ +/F62 9.9626 Tf 371.922 494.921 Td [(a)-371(,)-283(i)-108(n)-108(f)-108(o)-391(,)-298(g)-123(l)-123(o)-124(b)-123(a)-123(l)-238(=)-115(.)-277(f)-162(a)-162(l)-162(s)-163(e)-367(.)-206(\051)]TJ -171.33 -11.955 Td [(v)-107(r)-108(e)-107(s)-266(\050)-159(2)-158(\051)-756(=)-657(p)-61(s)-61(b)]TJ ET q -1 0 0 1 213.808 340.863 cm +1 0 0 1 278.034 483.165 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 221.778 340.664 Td [(Duplicate)-250(coef)18(\002cients)-250(should)-250(be)-250(added;)]TJ -0 g 0 G -/F51 9.9626 Tf -71.073 -19.232 Td [(psb)]TJ +/F62 9.9626 Tf 281.627 482.966 Td [(g)-61(e)-60(n)-61(r)-61(m)-60(2)-194(\050)-180(x)-46(2)-267(,)-273(d)-97(e)-98(s)-98(c)]TJ ET q -1 0 0 1 167.9 321.632 cm +1 0 0 1 367.96 483.165 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 170.889 321.432 Td [(dupl)]TJ +/F62 9.9626 Tf 371.922 482.966 Td [(a)-371(,)-283(i)-108(n)-108(f)-108(o)-391(,)-298(g)-123(l)-123(o)-124(b)-123(a)-123(l)-238(=)-115(.)-277(f)-162(a)-162(l)-162(s)-163(e)-367(.)-206(\051)]TJ -171.33 -11.955 Td [(v)-107(r)-108(e)-107(s)-266(\050)-159(3)-158(\051)-756(=)-657(p)-61(s)-61(b)]TJ ET q -1 0 0 1 193.066 321.632 cm +1 0 0 1 278.034 471.21 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 196.055 321.432 Td [(err)]TJ +/F62 9.9626 Tf 281.627 471.011 Td [(g)-61(e)-60(n)-61(r)-61(m)-60(2)-194(\050)-180(x)-46(3)-267(,)-273(d)-97(e)-98(s)-98(c)]TJ ET q -1 0 0 1 209.384 321.632 cm +1 0 0 1 367.96 471.21 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 217.355 321.432 Td [(Duplicate)-250(coef)18(\002cients)-250(should)-250(trigger)-250(an)-250(err)18(or)-250(conditino)]TJ -0 g 0 G -/F51 9.9626 Tf -66.65 -19.231 Td [(psb)]TJ +/F62 9.9626 Tf 371.922 471.011 Td [(a)-371(,)-283(i)-108(n)-108(f)-108(o)-391(,)-298(g)-123(l)-123(o)-124(b)-123(a)-123(l)-238(=)-115(.)-277(f)-162(a)-162(l)-162(s)-163(e)-367(.)-206(\051)]TJ -170.658 -11.956 Td [(c)-175(a)-175(l)-174(l)-831(p)-56(s)-56(b)]TJ ET q -1 0 0 1 167.9 302.4 cm +1 0 0 1 247.952 459.255 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 170.889 302.201 Td [(upd)]TJ +/F62 9.9626 Tf 251.497 459.055 Td [(n)-56(r)-56(m)-55(2)-190(\050)-264(c)-132(t)-131(x)-131(t)-438(,)-283(v)-107(r)-107(e)-108(s)-300(\050)-193(1)-193(:)-193(3)-193(\051)-193(\051)]TJ +0 g 0 G +0 g 0 G + -75.886 -21.917 Td [(In)-253(this)-252(way)-253(the)-253(global)-253(communication,)-253(which)-253(for)-252(small)-253(sizes)-253(is)-252(a)-253(latency-)]TJ 0 -11.956 Td [(bound)-250(operation,)-250(is)-250(invoked)-250(only)-250(once.)]TJ +0 g 0 G + 141.968 -334.744 Td [(44)]TJ +0 g 0 G ET -q -1 0 0 1 189.748 302.4 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + +endstream +endobj +1238 0 obj +<< +/Length 6244 +>> +stream +0 g 0 G +0 g 0 G BT -/F51 9.9626 Tf 192.737 302.201 Td [(d\003t)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(4.9)-1000(psb)]TJ ET q -1 0 0 1 208.827 302.4 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 216.797 302.201 Td [(Default)-250(update)-250(strategy)-250(for)-250(matrix)-250(coef)18(\002cients;)]TJ +/F59 11.9552 Tf 151.016 706.129 Td [(genrm2s)-250(\227)-250(Generalized)-250(2-Norm)-250(of)-250(V)111(ector)]TJ/F62 9.9626 Tf -51.121 -18.964 Td [(This)-216(subr)18(outine)-217(computes)-216(a)-217(series)-216(of)-216(2-norms)-217(on)-216(the)-217(columns)-216(of)-216(a)-217(dense)-216(matrix)]TJ/F60 9.9626 Tf 0.294 -11.955 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(:)]TJ/F60 9.9626 Tf 126.858 -11.955 Td [(r)-17(e)-25(s)]TJ/F93 10.3811 Tf 12.294 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.089 0 Td [(\051)]TJ/F91 10.3811 Tf 7.041 0 Td [(\040)-291(k)]TJ/F60 9.9626 Tf 19.006 0 Td [(x)]TJ/F93 10.3811 Tf 5.33 0 Td [(\050)]TJ/F62 9.9626 Tf 4.274 0 Td [(:)-13(,)]TJ/F60 9.9626 Tf 6.821 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F91 10.3811 Tf 4.274 0 Td [(k)]TJ/F62 7.5716 Tf 5.315 -1.744 Td [(2)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -186.919 -16.189 Td [(call)]TJ +0 g 0 G + [-525(psb_genrm2s\050res,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info\051)]TJ +0 g 0 G +0 g 0 G 0 g 0 G -/F51 9.9626 Tf -66.092 -19.231 Td [(psb)]TJ ET q -1 0 0 1 167.9 283.169 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 126.577 630.954 cm +[]0 d 0 J 0.398 w 0 0 m 290.348 0 l S Q BT -/F51 9.9626 Tf 170.889 282.97 Td [(upd)]TJ +/F60 9.9626 Tf 132.579 622.386 Td [(r)-17(e)-25(s)-8868(x)]TJ/F59 9.9626 Tf 221.014 0 Td [(Subroutine)]TJ ET q -1 0 0 1 189.748 283.169 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 126.577 618.6 cm +[]0 d 0 J 0.398 w 0 0 m 290.348 0 l S Q BT -/F51 9.9626 Tf 192.737 282.97 Td [(srch)]TJ +/F62 9.9626 Tf 132.554 610.032 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 212.144 283.169 cm +1 0 0 1 369.912 610.231 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 220.114 282.97 Td [(Update)-250(strategy)-250(based)-250(on)-250(sear)18(ch)-250(into)-250(the)-250(data)-250(str)8(uctur)18(e;)]TJ -0 g 0 G -/F51 9.9626 Tf -69.409 -19.232 Td [(psb)]TJ +/F62 9.9626 Tf 372.9 610.032 Td [(genrm2s)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 167.9 263.938 cm +1 0 0 1 369.912 598.276 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 170.889 263.738 Td [(upd)]TJ +/F62 9.9626 Tf 372.9 598.077 Td [(genrm2s)]TJ -240.346 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 189.748 263.938 cm +1 0 0 1 369.912 586.321 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 192.737 263.738 Td [(perm)]TJ +/F62 9.9626 Tf 372.9 586.122 Td [(genrm2s)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 217.135 263.938 cm +1 0 0 1 369.912 574.366 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 225.106 263.738 Td [(Update)-392(strategy)-393(based)-392(on)-393(additional)-392(permutation)-393(data)-392(\050see)]TJ -49.495 -11.955 Td [(tools)-250(r)18(outine)-250(description\051.)]TJ/F51 11.9552 Tf -24.906 -28.807 Td [(3.3)-1000(Dense)-250(V)111(ector)-250(Data)-250(Structure)]TJ/F54 9.9626 Tf 0 -18.964 Td [(The)]TJ/F59 9.9626 Tf 20.094 0 Td [(psb)]TJ +/F62 9.9626 Tf 372.9 574.167 Td [(genrm2s)]TJ ET q -1 0 0 1 187.117 204.211 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 126.577 570.381 cm +[]0 d 0 J 0.398 w 0 0 m 290.348 0 l S Q +0 g 0 G BT -/F59 9.9626 Tf 190.255 204.012 Td [(T)]TJ +/F62 9.9626 Tf 229.958 542.002 Td [(T)92(able)-250(9:)-310(Data)-250(types)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +/F59 9.9626 Tf -130.063 -34.468 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.614 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ ET q -1 0 0 1 196.113 204.211 cm +1 0 0 1 385.864 420.062 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 199.252 204.012 Td [(vect)]TJ +/F67 9.9626 Tf 389.002 419.863 Td [(T)]TJ ET q -1 0 0 1 220.801 204.211 cm +1 0 0 1 394.86 420.062 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 223.939 204.012 Td [(type)]TJ/F54 9.9626 Tf 24.338 0 Td [(data)-343(str)8(uctur)18(e)-343(encapsulates)-343(the)-343(dense)-343(vectors)-342(in)-343(a)-343(way)]TJ -97.572 -11.955 Td [(similar)-368(to)-368(sparse)-368(matrices,)-397(i.e.)-664(including)-368(a)-368(base)-368(type)]TJ/F59 9.9626 Tf 242.472 0 Td [(psb)]TJ +/F67 9.9626 Tf 397.998 419.863 Td [(vect)]TJ ET q -1 0 0 1 409.495 192.256 cm +1 0 0 1 419.547 420.062 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 412.633 192.057 Td [(T)]TJ +/F67 9.9626 Tf 422.685 419.863 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf -297.883 -11.956 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(9)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(desc)]TJ ET q -1 0 0 1 418.491 192.256 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 120.408 388.181 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 421.63 192.057 Td [(base)]TJ +/F59 9.9626 Tf 123.397 387.982 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 443.178 192.256 cm +1 0 0 1 273.363 340.361 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 446.317 192.057 Td [(vect)]TJ +/F67 9.9626 Tf 276.501 340.161 Td [(desc)]TJ ET q -1 0 0 1 467.866 192.256 cm +1 0 0 1 298.05 340.361 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 471.004 192.057 Td [(type)]TJ/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -341.22 -11.956 Td [(The)-263(user)-263(will)-263(not,)-266(in)-263(general,)-267(access)-263(the)-263(vector)-263(components)-263(dir)18(ectly)111(,)-266(but)-263(rather)]TJ 0 -11.955 Td [(via)-222(the)-222(r)18(out)1(ines)-222(of)-222(sec.)]TJ -0 0 1 rg 0 0 1 RG - [-222(6)]TJ +/F67 9.9626 Tf 301.189 340.161 Td [(type)]TJ 0 g 0 G - [(.)-300(Among)-222(other)-222(simple)-222(things,)-227(we)-222(de\002ne)-222(her)18(e)-221(an)-222(extrac-)]TJ 0 -11.955 Td [(tion)-273(method)-274(that)-273(can)-274(be)-273(used)-274(to)-273(get)-274(a)-273(full)-274(copy)-273(of)-274(the)-273(part)-274(of)-273(the)-274(vector)-273(stor)18(ed)]TJ 0 -11.955 Td [(on)-250(the)-250(local)-250(pr)18(ocess.)]TJ 14.944 -11.955 Td [(The)-311(type)-311(declaration)-311(is)-310(shown)-311(in)-311(\002gur)18(e)]TJ -0 0 1 rg 0 0 1 RG - [-311(3)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - [-311(wher)18(e)]TJ/F59 9.9626 Tf 217.442 0 Td [(T)]TJ/F54 9.9626 Tf 8.327 0 Td [(is)-311(a)-311(placeholder)-311(for)-310(the)]TJ -240.713 -11.955 Td [(data)-250(type)-250(and)-250(pr)18(ecision)-250(variants)]TJ +/F59 9.9626 Tf -222.215 -19.925 Td [(On)-250(Return)]TJ 0 g 0 G - 166.874 -29.888 Td [(24)]TJ +0 g 0 G + 0 -19.925 Td [(res)]TJ +0 g 0 G +/F62 9.9626 Tf 18.262 0 Td [(contains)-250(the)-250(1-norm)-250(of)-250(\050the)-250(columns)-250(of\051)]TJ/F60 9.9626 Tf 176.182 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -174.742 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 141.968 -106.261 Td [(45)]TJ 0 g 0 G ET endstream endobj -989 0 obj +1245 0 obj << -/Length 4359 +/Length 5385 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(I)]TJ -0 g 0 G -/F54 9.9626 Tf 8.857 0 Td [(Integer;)]TJ -0 g 0 G -/F51 9.9626 Tf -8.857 -20.359 Td [(S)]TJ -0 g 0 G -/F54 9.9626 Tf 11.069 0 Td [(Single)-250(pr)18(ecision)-250(r)18(eal;)]TJ -0 g 0 G -/F51 9.9626 Tf -11.069 -20.358 Td [(D)]TJ -0 g 0 G -/F54 9.9626 Tf 13.281 0 Td [(Double)-250(pr)18(ecision)-250(r)18(eal;)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(4.10)-1000(psb)]TJ +ET +q +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 207.803 706.129 Td [(norm1)-250(\227)-250(1-Norm)-250(of)-250(Sparse)-250(Matrix)]TJ/F62 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(1-norm)-250(of)-250(a)-250(matrix)]TJ/F60 9.9626 Tf 208.231 0 Td [(A)]TJ/F62 9.9626 Tf 7.318 0 Td [(:)]TJ/F60 9.9626 Tf -74.65 -33.873 Td [(n)-15(r)-35(m)]TJ/F62 9.9626 Tf 17.788 0 Td [(1)]TJ/F91 10.3811 Tf 7.873 0 Td [(\040)-291(k)]TJ/F60 9.9626 Tf 19.335 0 Td [(A)]TJ/F91 10.3811 Tf 7.442 0 Td [(k)]TJ/F62 7.5716 Tf 5.315 -1.858 Td [(1)]TJ/F62 9.9626 Tf -198.652 -20.06 Td [(wher)18(e:)]TJ 0 g 0 G -/F51 9.9626 Tf -13.281 -20.359 Td [(C)]TJ +/F60 9.9626 Tf 0.622 -19.925 Td [(A)]TJ 0 g 0 G -/F54 9.9626 Tf 12.175 0 Td [(Single)-250(pr)18(ecision)-250(complex;)]TJ +/F62 9.9626 Tf 12.299 0 Td [(r)18(epr)18(esents)-250(the)-250(global)-250(matrix)]TJ/F60 9.9626 Tf 125.981 0 Td [(A)]TJ 0 g 0 G -/F51 9.9626 Tf -12.175 -20.358 Td [(Z)]TJ 0 g 0 G -/F54 9.9626 Tf 11.627 0 Td [(Double)-250(pr)18(ecision)-250(complex.)]TJ -11.627 -20.251 Td [(The)-209(actual)-208(data)-209(is)-208(contained)-209(in)-209(the)-208(polymorphic)-209(component)]TJ/F59 9.9626 Tf 261.152 0 Td [(v%v)]TJ/F54 9.9626 Tf 15.691 0 Td [(;)-222(the)-209(separation)]TJ -276.843 -11.955 Td [(between)-353(the)-353(application)-353(and)-353(the)-353(actual)-353(data)-353(is)-353(esse)1(ntial)-353(for)-353(cases)-353(wher)18(e)-353(it)-353(is)]TJ 0 -11.955 Td [(necessary)-321(to)-321(link)-320(to)-321(data)-321(storage)-321(made)-320(available)-321(elsewher)18(e)-321(outside)-320(the)-321(dir)18(ect)]TJ 0 -11.955 Td [(contr)18(ol)-231(of)-231(the)-231(compiler/application,)-235(e.g.)-304(data)-231(stor)18(ed)-231(in)-231(a)-231(graphics)-231(accelerator)-74('s)]TJ 0 -11.955 Td [(private)-250(memory)111(.)]TJ 0 g 0 G -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 99.895 452.975 cm -0 0 343.711 82.69 re f +1 0 0 1 229.61 588.515 cm +[]0 d 0 J 0.398 w 0 0 m 185.901 0 l S +Q +BT +/F60 9.9626 Tf 236.21 579.947 Td [(A)]TJ/F59 9.9626 Tf 120.293 0 Td [(Function)]TJ +ET +q +1 0 0 1 229.61 576.161 cm +[]0 d 0 J 0.398 w 0 0 m 185.901 0 l S +Q +BT +/F62 9.9626 Tf 235.587 567.594 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +ET +q +1 0 0 1 372.821 567.793 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 375.81 567.594 Td [(spnrm1)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +ET +q +1 0 0 1 372.821 555.838 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 375.81 555.638 Td [(spnrm1)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +ET +q +1 0 0 1 372.821 543.882 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 375.81 543.683 Td [(spnrm1)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +ET +q +1 0 0 1 372.821 531.927 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 375.81 531.728 Td [(spnrm1)]TJ +ET +q +1 0 0 1 229.61 527.942 cm +[]0 d 0 J 0.398 w 0 0 m 185.901 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 112.299 525.005 Td [(type)]TJ +/F62 9.9626 Tf 278.277 499.564 Td [(T)92(able)-250(10:)-310(Data)-250(types)]TJ 0 g 0 G - [-525(psb_T_base_vect_type)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 9.414 -10.959 Td [(TYPE)]TJ 0 g 0 G - [(\050KIND_\051,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(allocatable)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ 0 g 0 G - [-525(v\050:\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -9.414 -10.959 Td [(end)-525(type)]TJ +/F67 9.9626 Tf -127.572 -33.873 Td [(psb_spnrm1\050A,)-525(desc_a,)-525(info\051)]TJ 0 -11.955 Td [(psb_norm1\050A,)-525(desc_a,)-525(info\051)]TJ 0 g 0 G - [-525(psb_T_base_vect_type)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -21.918 Td [(type)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G - [-525(psb_T_vect_type)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 9.414 -10.959 Td [(class)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G - [(\050psb_T_base_vect_type\051,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(allocatable)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ 0 g 0 G - [-525(v)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -9.414 -10.959 Td [(end)-525(type)]TJ + 0 -19.926 Td [(a)]TJ 0 g 0 G - [-1050(psb_T_vect_type)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(the)-250(global)-250(sparse)-250(matrix)]TJ/F60 9.9626 Tf 194.722 0 Td [(A)]TJ/F62 9.9626 Tf 7.317 0 Td [(.)]TJ -187.095 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.242 0 Td [(psb)]TJ +ET +q +1 0 0 1 324.173 344.346 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 327.311 344.147 Td [(Tspmat)]TJ +ET +q +1 0 0 1 359.321 344.346 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 362.459 344.147 Td [(type)]TJ 0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 6.677 -41.429 Td [(Listing)-250(3:)-310(The)-250(PSBLAS)-250(de\002ned)-250(data)-250(type)-250(that)-250(contains)-250(a)-250(dense)-250(vector)74(.)]TJ/F51 9.9626 Tf -19.081 -39.929 Td [(3.3.1)-1000(V)111(ector)-250(Methods)]TJ 0 -19.174 Td [(3.3.2)-1000(get)]TJ +/F59 9.9626 Tf -232.675 -19.926 Td [(desc)]TJ ET q -1 0 0 1 144.219 358.919 cm +1 0 0 1 171.218 324.421 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 147.208 358.719 Td [(nrows)-250(\227)-250(Get)-250(number)-250(of)-250(rows)-250(in)-250(a)-250(dense)-250(vector)]TJ/F59 9.9626 Tf -47.313 -19.173 Td [(nr)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ -0 g 0 G - [-525(v%get_nrows\050\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -22.351 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.359 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -20.358 Td [(v)]TJ -0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(dense)-250(vector)]TJ 14.386 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ -0 g 0 G - -56.339 -34.198 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf 174.207 324.221 Td [(a)]TJ 0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ +ET +q +1 0 0 1 324.173 276.6 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 327.311 276.401 Td [(desc)]TJ +ET +q +1 0 0 1 348.86 276.6 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 351.998 276.401 Td [(type)]TJ 0 g 0 G - 0 -20.358 Td [(Function)-250(value)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-250(number)-250(of)-250(r)18(ows)-250(of)-250(dense)-250(vector)]TJ/F59 9.9626 Tf 161.273 0 Td [(v)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ/F51 9.9626 Tf -239.281 -27.757 Td [(3.3.3)-1000(sizeof)-250(\227)-250(Get)-250(memory)-250(occupation)-250(in)-250(bytes)-250(of)-250(a)-250(dense)-250(vector)]TJ/F59 9.9626 Tf 0 -19.174 Td [(memory_size)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ +/F59 9.9626 Tf -222.214 -19.926 Td [(On)-250(Return)]TJ 0 g 0 G - [-525(v%sizeof\050\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.351 Td [(T)90(ype:)]TJ + 0 -19.925 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 72.776 0 Td [(is)-250(the)-250(1-norm)-250(of)-250(sparse)-250(submatrix)]TJ/F60 9.9626 Tf 150.4 0 Td [(A)]TJ/F62 9.9626 Tf 7.317 0 Td [(.)]TJ -205.587 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.358 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -24.906 -19.926 Td [(info)]TJ 0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G -/F54 9.9626 Tf 166.875 -29.888 Td [(25)]TJ + 141.968 -54.456 Td [(46)]TJ 0 g 0 G ET endstream endobj -996 0 obj +1252 0 obj << -/Length 3735 +/Length 5404 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(v)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(4.11)-1000(psb)]TJ +ET +q +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 156.993 706.129 Td [(normi)-250(\227)-250(In\002nity)-250(Norm)-250(of)-250(Sparse)-250(Matrix)]TJ/F62 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(in\002nity-norm)-250(of)-250(a)-250(matrix)]TJ/F60 9.9626 Tf 235.459 0 Td [(A)]TJ/F62 9.9626 Tf 7.318 0 Td [(:)]TJ/F60 9.9626 Tf -102.327 -33.873 Td [(n)-15(r)-35(m)-18(i)]TJ/F91 10.3811 Tf 23.698 0 Td [(\040)-291(k)]TJ/F60 9.9626 Tf 19.336 0 Td [(A)]TJ/F91 10.3811 Tf 7.442 0 Td [(k)]TJ/F104 7.5716 Tf 5.409 -1.494 Td [(\245)]TJ/F62 9.9626 Tf -196.335 -20.424 Td [(wher)18(e:)]TJ 0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(dense)-250(vector)]TJ 14.386 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ +/F60 9.9626 Tf 0.623 -19.925 Td [(A)]TJ 0 g 0 G - -56.338 -36.868 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 12.299 0 Td [(r)18(epr)18(esents)-250(the)-250(global)-250(matrix)]TJ/F60 9.9626 Tf 125.981 0 Td [(A)]TJ 0 g 0 G 0 g 0 G - 0 -23.918 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 72.776 0 Td [(The)-250(memory)-250(occupation)-250(in)-250(bytes.)]TJ/F51 9.9626 Tf -72.776 -32.82 Td [(3.3.4)-1000(set)-250(\227)-250(Set)-250(contents)-250(of)-250(the)-250(vector)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 5.23 -20.898 Td [(call)]TJ +ET +q +1 0 0 1 179.842 588.515 cm +[]0 d 0 J 0.398 w 0 0 m 183.819 0 l S +Q +BT +/F60 9.9626 Tf 186.442 579.947 Td [(A)]TJ/F59 9.9626 Tf 120.292 0 Td [(Function)]TJ +ET +q +1 0 0 1 179.842 576.161 cm +[]0 d 0 J 0.398 w 0 0 m 183.819 0 l S +Q +BT +/F62 9.9626 Tf 185.819 567.594 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +ET +q +1 0 0 1 323.053 567.793 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 326.042 567.594 Td [(spnrmi)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +ET +q +1 0 0 1 323.053 555.838 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 326.042 555.638 Td [(spnrmi)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +ET +q +1 0 0 1 323.053 543.882 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 326.042 543.683 Td [(spnrmi)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +ET +q +1 0 0 1 323.053 531.927 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 326.042 531.728 Td [(spnrmi)]TJ +ET +q +1 0 0 1 179.842 527.942 cm +[]0 d 0 J 0.398 w 0 0 m 183.819 0 l S +Q 0 g 0 G - [-1050(v%set\050alpha[,first,last]\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ +BT +/F62 9.9626 Tf 227.467 499.564 Td [(T)92(able)-250(11:)-310(Data)-250(types)]TJ 0 g 0 G - [-1050(v%set\050vect[,first,last]\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ 0 g 0 G - [-1050(v%zero\050\051)]TJ 0 g 0 G -/F51 9.9626 Tf -5.23 -24.913 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F67 9.9626 Tf -127.572 -33.873 Td [(psb_spnrmi\050A,)-525(desc_a,)-525(info\051)]TJ 0 -11.955 Td [(psb_normi\050A,)-525(desc_a,)-525(info\051)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -23.918 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G - 0 -23.918 Td [(v)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(dense)-250(vector)]TJ 14.386 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ 0 g 0 G - -56.338 -35.873 Td [(alpha)]TJ + 0 -19.926 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 30.436 0 Td [(A)-250(scalar)-250(value.)]TJ -5.53 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(the)-250(global)-250(sparse)-250(matrix)]TJ/F60 9.9626 Tf 194.722 0 Td [(A)]TJ/F62 9.9626 Tf 7.318 0 Td [(.)]TJ -187.096 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG - [-250(1)]TJ +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ +ET +q +1 0 0 1 273.363 344.346 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 276.501 344.147 Td [(Tspmat)]TJ +ET +q +1 0 0 1 308.511 344.346 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 311.649 344.147 Td [(type)]TJ 0 g 0 G - [(.)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -23.918 Td [(\002rst,last)]TJ +/F59 9.9626 Tf -232.676 -19.926 Td [(desc)]TJ +ET +q +1 0 0 1 120.408 324.421 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 123.397 324.221 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 41.215 0 Td [(Boundaries)-250(for)-250(setting)-250(in)-250(the)-250(vector)74(.)]TJ -16.309 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(integers.)]TJ +/F62 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ +ET +q +1 0 0 1 273.363 276.6 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 276.501 276.401 Td [(desc)]TJ +ET +q +1 0 0 1 298.05 276.6 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 301.189 276.401 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -23.918 Td [(vect)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.242 0 Td [(An)-250(array)]TJ 1.664 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(1)]TJ +/F59 9.9626 Tf -222.215 -19.926 Td [(On)-250(Return)]TJ 0 g 0 G - [(.)]TJ -24.906 -25.91 Td [(Note)-336(that)-336(a)-335(call)-336(to)]TJ/F59 9.9626 Tf 84.614 0 Td [(v%zero\050\051)]TJ/F54 9.9626 Tf 45.189 0 Td [(is)-336(pr)18(ovided)-336(as)-335(a)-336(shorthand,)-358(but)-335(is)-336(equivalent)-336(to)]TJ -129.803 -11.956 Td [(a)-270(call)-270(to)]TJ/F59 9.9626 Tf 36.947 0 Td [(v%set\050zero\051)]TJ/F54 9.9626 Tf 60.225 0 Td [(with)-270(the)]TJ/F59 9.9626 Tf 39.456 0 Td [(zero)]TJ/F54 9.9626 Tf 23.613 0 Td [(constant)-270(having)-270(the)-271(appr)18(opriat)1(e)-271(type)-270(and)]TJ -160.241 -11.955 Td [(kind.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -25.91 Td [(On)-250(Return)]TJ + 0 -19.925 Td [(Function)-250(value)]TJ 0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(is)-250(the)-250(in\002nity-norm)-250(of)-250(sparse)-250(submatrix)]TJ/F60 9.9626 Tf 177.627 0 Td [(A)]TJ/F62 9.9626 Tf 7.317 0 Td [(.)]TJ -232.814 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ 0 g 0 G - 0 -23.918 Td [(v)]TJ +/F59 9.9626 Tf -24.907 -19.926 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(dense)-250(vector)74(,)-250(with)-250(updated)-250(entries)]TJ 14.386 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G -/F54 9.9626 Tf 110.536 -41.843 Td [(26)]TJ + 141.968 -54.456 Td [(47)]TJ 0 g 0 G ET endstream endobj -1003 0 obj +1263 0 obj << -/Length 4464 +/Length 7972 >> stream 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(3.3.5)-1000(get)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(4.12)-1000(psb)]TJ ET q -1 0 0 1 144.219 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 147.208 706.129 Td [(vect)-250(\227)-250(Get)-250(a)-250(copy)-250(of)-250(the)-250(vector)-250(contents)]TJ -0 g 0 G +/F59 11.9552 Tf 207.803 706.129 Td [(spmm)-250(\227)-250(Sparse)-250(Matrix)-250(by)-250(Dense)-250(Matrix)-250(Product)]TJ/F62 9.9626 Tf -57.098 -19.303 Td [(This)-250(subr)18(outine)-250(computes)-250(the)-250(Sparse)-250(Matrix)-250(by)-250(Dense)-250(Matrix)-250(Pr)18(oduct:)]TJ/F60 9.9626 Tf 140.147 -24.611 Td [(y)]TJ/F91 10.3811 Tf 7.998 0 Td [(\040)]TJ/F68 9.9626 Tf 13.397 0 Td [(a)]TJ/F60 9.9626 Tf 6.008 0 Td [(A)-42(x)]TJ/F93 10.3811 Tf 14.878 0 Td [(+)]TJ/F68 9.9626 Tf 10.505 0 Td [(b)]TJ/F60 9.9626 Tf 5.649 0 Td [(y)]TJ 0 g 0 G -/F59 9.9626 Tf -47.313 -19.66 Td [(extv)-525(=)-525(v%get_vect\050[n]\051)]TJ +/F62 9.9626 Tf 133.513 0 Td [(\0501\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.994 Td [(T)90(ype:)]TJ +/F60 9.9626 Tf -194.745 -20.13 Td [(y)]TJ/F91 10.3811 Tf 7.998 0 Td [(\040)]TJ/F68 9.9626 Tf 13.398 0 Td [(a)]TJ/F60 9.9626 Tf 6.007 0 Td [(A)]TJ/F60 7.5716 Tf 7.511 4.115 Td [(T)]TJ/F60 9.9626 Tf 5.694 -4.115 Td [(x)]TJ/F93 10.3811 Tf 7.267 0 Td [(+)]TJ/F68 9.9626 Tf 10.505 0 Td [(b)]TJ/F60 9.9626 Tf 5.649 0 Td [(y)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 130.715 0 Td [(\0502\051)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -21.362 Td [(On)-250(Entry)]TJ +/F60 9.9626 Tf -195.482 -20.129 Td [(y)]TJ/F91 10.3811 Tf 7.998 0 Td [(\040)]TJ/F68 9.9626 Tf 13.397 0 Td [(a)]TJ/F60 9.9626 Tf 6.008 0 Td [(A)]TJ/F60 7.5716 Tf 7.7 4.114 Td [(H)]TJ/F60 9.9626 Tf 6.981 -4.114 Td [(x)]TJ/F93 10.3811 Tf 7.267 0 Td [(+)]TJ/F68 9.9626 Tf 10.505 0 Td [(b)]TJ/F60 9.9626 Tf 5.649 0 Td [(y)]TJ 0 g 0 G +/F62 9.9626 Tf 129.977 0 Td [(\0503\051)]TJ 0 g 0 G - 0 -21.361 Td [(v)]TJ + -317.15 -18.633 Td [(wher)18(e:)]TJ 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(dense)-250(vector)]TJ 14.386 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ +/F60 9.9626 Tf -14.65 -20.451 Td [(x)]TJ 0 g 0 G - -56.339 -33.316 Td [(n)]TJ +/F62 9.9626 Tf 10.186 0 Td [(is)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 115.61 0 Td [(x)]TJ/F62 7.5716 Tf 5.201 -1.494 Td [(:)-12(,)-13(:)]TJ 0 g 0 G -/F54 9.9626 Tf 11.069 0 Td [(Size)-250(to)-250(be)-250(r)18(eturned)]TJ 13.838 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(;)-250(default:)-310(entir)18(e)-250(vector)74(.)]TJ +/F60 9.9626 Tf -131.167 -19.132 Td [(y)]TJ 0 g 0 G -/F51 9.9626 Tf -90.182 -34.95 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 10.087 0 Td [(is)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 115.441 0 Td [(y)]TJ/F62 7.5716 Tf 5.2 -1.494 Td [(:)-13(,)-12(:)]TJ 0 g 0 G +/F60 9.9626 Tf -130.23 -19.131 Td [(A)]TJ 0 g 0 G - 0 -21.361 Td [(Function)-250(value)]TJ +/F62 9.9626 Tf 12.299 0 Td [(is)-250(the)-250(global)-250(sparse)-250(matrix)]TJ/F60 9.9626 Tf 118.41 0 Td [(A)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(An)-316(allocatable)-316(array)-316(holding)-316(a)-317(copy)-316(of)-316(the)-316(dense)-316(vector)-316(con-)]TJ -47.87 -11.955 Td [(tents.)-321(If)-254(the)-254(ar)18(gument)]TJ/F52 9.9626 Tf 98.086 0 Td [(n)]TJ/F54 9.9626 Tf 8.192 0 Td [(is)-254(speci\002ed,)-255(the)-253(size)-254(of)-254(the)-254(r)18(eturned)-254(array)-253(equals)]TJ -106.278 -11.955 Td [(the)-339(minimum)-339(between)]TJ/F52 9.9626 Tf 105.247 0 Td [(n)]TJ/F54 9.9626 Tf 9.041 0 Td [(and)-339(the)-339(internal)-339(size)-339(of)-339(the)-339(vector)74(,)-361(or)-339(0)-339(if)]TJ/F52 9.9626 Tf 188.353 0 Td [(n)]TJ/F54 9.9626 Tf 9.04 0 Td [(is)]TJ -311.681 -11.956 Td [(negative;)-314(otherwise,)-303(the)-292(size)-293(of)-292(the)-293(array)-292(is)-293(the)-292(same)-293(as)-292(the)-293(internal)-292(size)]TJ 0 -11.955 Td [(of)-250(the)-250(vector)74(.)]TJ/F51 9.9626 Tf -24.907 -29.183 Td [(3.3.6)-1000(clone)-250(\227)-250(Clone)-250(current)-250(object)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf 0 -19.659 Td [(call)-1050(x%clone\050y,info\051)]TJ +ET +q +1 0 0 1 230.392 517.986 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +Q +BT +/F60 9.9626 Tf 236.992 509.418 Td [(A)]TJ/F62 9.9626 Tf 7.318 0 Td [(,)]TJ/F60 9.9626 Tf 5.275 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(,)]TJ/F60 9.9626 Tf 5.106 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(,)]TJ/F68 9.9626 Tf 5.106 0 Td [(a)]TJ/F62 9.9626 Tf 5.385 0 Td [(,)]TJ/F68 9.9626 Tf 5.355 0 Td [(b)]TJ/F59 9.9626 Tf 76.437 0 Td [(Subroutine)]TJ +ET +q +1 0 0 1 230.392 505.633 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +Q +BT +/F62 9.9626 Tf 236.369 497.065 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +ET +q +1 0 0 1 373.603 497.264 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 376.592 497.065 Td [(spmm)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +ET +q +1 0 0 1 373.603 485.309 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 376.592 485.11 Td [(spmm)]TJ -140.223 -11.956 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +ET +q +1 0 0 1 373.603 473.354 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 376.592 473.154 Td [(spmm)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +ET +q +1 0 0 1 373.603 461.398 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 376.592 461.199 Td [(spmm)]TJ +ET +q +1 0 0 1 230.392 457.413 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +Q 0 g 0 G -/F51 9.9626 Tf 0 -22.995 Td [(T)90(ype:)]TJ +BT +/F62 9.9626 Tf 278.277 429.035 Td [(T)92(able)-250(12:)-310(Data)-250(types)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -21.361 Td [(On)-250(Entry)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -107.398 -24.261 Td [(call)]TJ 0 g 0 G - 0 -21.362 Td [(x)]TJ + [-525(psb_spmm\050alpha,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(dense)-250(vector)74(.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ + [-525(a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -77.918 -34.95 Td [(On)-250(Return)]TJ + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(beta,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -21.361 Td [(y)]TJ + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(A)-250(copy)-250(of)-250(the)-250(input)-250(object.)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -10.521 -21.361 Td [(info)]TJ + [-525(info\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ/F51 11.9552 Tf -23.801 -31.176 Td [(3.4)-1000(Preconditioner)-250(data)-250(structure)]TJ/F54 9.9626 Tf 0 -19.659 Td [(Our)-396(base)-397(l)1(ibrary)-397(of)18(fers)-396(support)-396(for)-396(simple)-397(well)-396(known)-396(pr)18(econditioners)-396(like)]TJ 0 -11.956 Td [(Diagonal)-250(Scaling)-250(or)-250(Block)-250(Jacobi)-250(with)-250(incomplete)-250(factorization)-250(ILU\0500\051.)]TJ 14.944 -12.314 Td [(A)-361(pr)18(econditioner)-361(is)-361(held)-361(in)-361(the)]TJ/F59 9.9626 Tf 143.781 0 Td [(psb)]TJ -ET -q -1 0 0 1 274.939 168.346 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 278.077 168.146 Td [(prec)]TJ -ET -q -1 0 0 1 299.626 168.346 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 302.764 168.146 Td [(type)]TJ/F54 9.9626 Tf 24.519 0 Td [(data)-361(str)8(uctur)18(e)-361(r)18(eported)-361(in)]TJ -227.388 -11.955 Td [(\002gur)18(e)]TJ -0 0 1 rg 0 0 1 RG - [-282(4)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -14.944 -11.955 Td [(call)]TJ 0 g 0 G - [(.)-407(The)]TJ/F59 9.9626 Tf 59.933 0 Td [(psb_prec_type)]TJ/F54 9.9626 Tf 70.808 0 Td [(data)-282(type)-283(may)-282(contain)-282(a)-283(simple)-282(pr)18(econditioning)]TJ -130.741 -11.955 Td [(matrix)-376(with)-376(the)-376(associated)-376(communication)-375(descriptor)74(.The)-376(internal)-376(pr)18(econdi-)]TJ 0 -11.955 Td [(tioner)-317(is)-317(allocated)-318(appr)18(opriately)-317(with)-317(the)-317(dynamic)-318(type)-317(corr)18(esponding)-317(to)-317(the)]TJ 0 -11.955 Td [(desir)18(ed)-250(pr)18(econditioner)74(.)]TJ + [-525(psb_spmm\050alpha,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 166.875 -29.888 Td [(27)]TJ + [-525(a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1009 0 obj -<< -/Length 3969 ->> -stream + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(beta,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(y,desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -q -1 0 0 1 150.705 671.26 cm -0 0 343.711 38.854 re f -Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -BT -/F94 8.9664 Tf 163.108 699.454 Td [(type)]TJ + [-525(trans,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_Tprec_type)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 9.415 -10.959 Td [(class)]TJ + [-525(work\051)]TJ 0 g 0 G - [(\050psb_T_base_prec_type\051,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(allocatable)]TJ +/F59 9.9626 Tf -5.23 -22.618 Td [(T)90(ype:)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G - [-525(prec)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -9.415 -10.959 Td [(end)-525(type)]TJ +/F59 9.9626 Tf -29.828 -20.626 Td [(On)-250(Entry)]TJ 0 g 0 G - [-525(psb_Tprec_type)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G + 0 -20.626 Td [(alpha)]TJ 0 g 0 G -/F54 9.9626 Tf 1.845 -41.429 Td [(Listing)-250(4:)-310(The)-250(PSBLAS)-250(de\002ned)-250(data)-250(type)-250(that)-250(contains)-250(a)-250(pr)18(econditioner)74(.)]TJ/F51 11.9552 Tf -14.248 -32.698 Td [(3.5)-1000(Heap)-250(data)-250(structure)]TJ/F54 9.9626 Tf 0 -18.964 Td [(Among)-310(the)-311(tools)-310(r)18(outines)-310(of)-310(sec.)]TJ +/F62 9.9626 Tf 30.436 0 Td [(the)-250(scalar)]TJ/F68 9.9626 Tf 44.368 0 Td [(a)]TJ/F62 9.9626 Tf 5.385 0 Td [(.)]TJ -55.282 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ 0 0 1 rg 0 0 1 RG - [-311(6)]TJ + [-250(12)]TJ 0 g 0 G - [(,)-325(we)-310(have)-311(a)-310(number)-310(of)-311(so)1(rting)-311(utilities;)-340(the)]TJ 0 -11.955 Td [(heap)-250(sort)-250(is)-250(implemented)-250(in)-250(terms)-250(of)-250(heaps)-250(having)-250(the)-250(following)-250(signatur)18(es:)]TJ + [(.)]TJ 0 g 0 G -/F59 9.9626 Tf 0 -19.925 Td [(psb)]TJ +/F59 9.9626 Tf -24.907 -20.626 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(the)-250(sparse)-250(matrix)]TJ/F60 9.9626 Tf 164.964 0 Td [(A)]TJ/F62 9.9626 Tf 7.317 0 Td [(.)]TJ -157.337 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.242 0 Td [(psb)]TJ ET q -1 0 0 1 167.023 552.764 cm +1 0 0 1 324.173 212.882 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 170.162 552.565 Td [(T)]TJ +/F67 9.9626 Tf 327.311 212.682 Td [(Tspmat)]TJ ET q -1 0 0 1 176.02 552.764 cm +1 0 0 1 359.321 212.882 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 179.158 552.565 Td [(heap)]TJ +/F67 9.9626 Tf 362.459 212.682 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 25.903 0 Td [(:)-333(a)-262(heap)-262(containing)-262(e)1(lements)-262(of)-262(type)-262(T)74(,)-261(wher)18(e)-262(T)-262(can)-261(be)]TJ/F59 9.9626 Tf 242.282 0 Td [(i,s,c,d,z)]TJ/F54 9.9626 Tf -271.731 -11.955 Td [(for)-250(integer)74(,)-250(r)18(eal)-250(and)-250(complex)-250(data;)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F59 9.9626 Tf -24.907 -19.925 Td [(psb)]TJ +/F59 9.9626 Tf -232.675 -20.625 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.614 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.742 0 Td [(psb)]TJ ET q -1 0 0 1 167.023 520.884 cm +1 0 0 1 436.673 144.435 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 170.162 520.685 Td [(T)]TJ +/F67 9.9626 Tf 439.811 144.236 Td [(T)]TJ ET q -1 0 0 1 176.02 520.884 cm +1 0 0 1 445.669 144.435 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 179.158 520.685 Td [(idx)]TJ +/F67 9.9626 Tf 448.807 144.236 Td [(vect)]TJ ET q -1 0 0 1 195.476 520.884 cm +1 0 0 1 470.356 144.435 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 198.615 520.685 Td [(heap)]TJ -0 g 0 G -/F54 9.9626 Tf 25.902 0 Td [(:)-289(a)-207(heap)-207(containing)-207(elements)-207(of)-207(type)-207(T)74(,)-207(as)-207(above,)-215(together)-207(with)]TJ -48.906 -11.956 Td [(an)-250(integer)-250(index.)]TJ -24.906 -19.925 Td [(Given)-250(a)-250(heap)-250(object,)-250(the)-250(following)-250(methods)-250(ar)18(e)-250(de\002ned)-250(on)-250(it:)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -19.925 Td [(init)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(Initialize)-250(memory;)-250(also)-250(choose)-250(ascending)-250(or)-250(descending)-250(or)18(der;)]TJ -0 g 0 G -/F51 9.9626 Tf -21.021 -19.925 Td [(howmany)]TJ -0 g 0 G -/F54 9.9626 Tf 50.371 0 Td [(Curr)18(ent)-250(heap)-250(occupancy;)]TJ -0 g 0 G -/F51 9.9626 Tf -50.371 -19.926 Td [(insert)]TJ -0 g 0 G -/F54 9.9626 Tf 30.983 0 Td [(Add)-250(an)-250(item)-250(\050or)-250(an)-250(item)-250(and)-250(its)-250(index\051;)]TJ -0 g 0 G -/F51 9.9626 Tf -30.983 -19.925 Td [(get)]TJ -ET -q -1 0 0 1 165.141 409.302 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 168.129 409.103 Td [(\002rst)]TJ -0 g 0 G -/F54 9.9626 Tf 22.685 0 Td [(Remove)-250(and)-250(r)18(eturn)-250(the)-250(\002rst)-250(element;)]TJ -0 g 0 G -/F51 9.9626 Tf -40.109 -19.925 Td [(dump)]TJ +/F67 9.9626 Tf 473.495 144.236 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 32.099 0 Td [(Print)-250(on)-250(\002le;)]TJ -0 g 0 G -/F51 9.9626 Tf -32.099 -19.926 Td [(free)]TJ +/F62 9.9626 Tf -297.884 -11.955 Td [(containing)-278(numbers)-278(of)-279(type)-278(speci\002ed)-278(in)-278(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-278(12)]TJ 0 g 0 G -/F54 9.9626 Tf 22.695 0 Td [(Release)-250(memory)111(.)]TJ -22.695 -19.925 Td [(These)-305(objects)-305(ar)18(e)-305(used)-305(in)-305(AMG4PSBLAS)-305(to)-305(implement)-305(the)-305(factorization)-305(algo-)]TJ 0 -11.955 Td [(rithms.)]TJ + [(.)-395(The)-278(rank)-279(of)]TJ/F60 9.9626 Tf 275.498 0 Td [(x)]TJ/F62 9.9626 Tf 7.978 0 Td [(must)-278(be)]TJ -283.476 -11.955 Td [(the)-250(same)-250(of)]TJ/F60 9.9626 Tf 52.946 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ 0 g 0 G - 166.874 -246.934 Td [(28)]TJ + 83.916 -29.888 Td [(48)]TJ 0 g 0 G ET endstream endobj -928 0 obj +1162 0 obj +<< +/Type /ObjStm +/N 100 +/First 984 +/Length 11501 +>> +stream +1152 0 1153 146 1154 290 1155 435 1156 582 1160 726 232 785 1161 843 1157 901 1164 1034 +1166 1152 1163 1210 1171 1291 1167 1448 1168 1592 1169 1738 1173 1885 236 1944 1174 2002 1170 2061 +1176 2194 1178 2312 1179 2370 1175 2427 1185 2521 1181 2678 1182 2822 1183 2968 1187 3115 240 3174 +1188 3232 1184 3291 1193 3424 1189 3581 1190 3725 1191 3868 1195 4015 244 4073 1196 4130 1192 4187 +1198 4320 1200 4438 1201 4497 1197 4555 1206 4649 1202 4806 1203 4950 1204 5096 1208 5243 248 5301 +1209 5358 1205 5416 1211 5549 1213 5667 1210 5726 1218 5807 1214 5964 1215 6107 1216 6253 1220 6400 +252 6458 1221 6515 1217 6572 1224 6692 1226 6810 1227 6869 1228 6928 1229 6987 1230 7046 1231 7105 +1232 7164 1223 7223 1237 7330 1233 7487 1234 7631 1235 7777 1239 7924 256 7982 1240 8039 1236 8097 +1244 8230 1241 8378 1242 8523 1246 8670 260 8729 1247 8787 1243 8845 1251 8965 1248 9113 1249 9258 +1253 9405 264 9463 1255 9520 1250 9577 1262 9712 1256 9878 1257 10025 1258 10170 1259 10312 1264 10458 +% 1152 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [381.755 372.729 388.729 384.789] +/A << /S /GoTo /D (table.3) >> +>> +% 1153 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [419.358 293.733 495.412 305.793] +/A << /S /GoTo /D (vdata) >> +>> +% 1154 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [381.88 281.778 388.854 293.837] +/A << /S /GoTo /D (table.3) >> +>> +% 1155 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [306.858 202.781 373.916 214.841] +/A << /S /GoTo /D (descdata) >> +>> +% 1156 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [200.458 119.17 207.432 128.58] +/A << /S /GoTo /D (table.2) >> +>> +% 1160 0 obj +<< +/D [1158 0 R /XYZ 149.705 753.953 null] +>> +% 232 0 obj +<< +/D [1158 0 R /XYZ 150.705 716.092 null] +>> +% 1161 0 obj +<< +/D [1158 0 R /XYZ 150.705 524.97 null] +>> +% 1157 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F93 915 0 R /F91 914 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1164 0 obj << -/Type /ObjStm -/N 100 -/First 881 -/Length 8939 ->> -stream -67 0 923 55 930 148 932 262 71 319 75 375 933 431 79 488 83 542 929 598 -935 691 937 805 87 861 91 916 95 971 934 1026 942 1119 938 1269 939 1426 940 1577 -944 1724 99 1781 103 1837 945 1894 941 1951 949 2044 951 2158 947 2214 952 2270 107 2327 -111 2383 948 2439 954 2545 956 2659 115 2716 119 2773 957 2830 123 2887 953 2943 959 3036 -961 3150 127 3206 131 3262 135 3318 958 3374 964 3467 966 3581 139 3638 143 3695 963 3752 -968 3845 970 3959 147 4015 151 4071 967 4127 972 4220 974 4334 155 4391 159 4448 163 4505 -971 4562 976 4655 978 4769 167 4825 975 4880 982 4973 979 5115 980 5260 984 5407 171 5464 -175 5521 179 5577 183 5633 985 5690 981 5747 988 5840 990 5954 986 6010 187 6066 191 6122 -195 6178 987 6234 995 6340 992 6482 993 6627 997 6771 199 6828 994 6885 1002 6978 999 7115 -1004 7262 204 7320 208 7377 212 7433 1005 7490 1001 7548 1008 7655 1000 7793 1010 7940 1006 7999 -% 67 0 obj +/Type /Page +/Contents 1165 0 R +/Resources 1163 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1134 0 R +>> +% 1166 0 obj << -/D [924 0 R /XYZ 99.895 276.666 null] +/D [1164 0 R /XYZ 98.895 753.953 null] >> -% 923 0 obj +% 1163 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 930 0 obj +% 1171 0 obj << /Type /Page -/Contents 931 0 R -/Resources 929 0 R +/Contents 1172 0 R +/Resources 1170 0 R /MediaBox [0 0 595.276 841.89] -/Parent 927 0 R +/Parent 1134 0 R +/Annots [ 1167 0 R 1168 0 R 1169 0 R ] >> -% 932 0 obj +% 1167 0 obj << -/D [930 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [419.358 358.503 495.412 370.562] +/A << /S /GoTo /D (vdata) >> >> -% 71 0 obj +% 1168 0 obj << -/D [930 0 R /XYZ 150.705 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [377.462 346.547 384.436 358.607] +/A << /S /GoTo /D (table.4) >> >> -% 75 0 obj +% 1169 0 obj << -/D [930 0 R /XYZ 150.705 519.544 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [306.858 280.074 373.916 292.134] +/A << /S /GoTo /D (descdata) >> >> -% 933 0 obj +% 1173 0 obj << -/D [930 0 R /XYZ 397.537 356.277 null] +/D [1171 0 R /XYZ 149.705 753.953 null] >> -% 79 0 obj +% 236 0 obj << -/D [930 0 R /XYZ 150.705 305.6 null] +/D [1171 0 R /XYZ 150.705 716.092 null] >> -% 83 0 obj +% 1174 0 obj << -/D [930 0 R /XYZ 150.705 194.578 null] +/D [1171 0 R /XYZ 150.705 495.665 null] >> -% 929 0 obj +% 1170 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F93 915 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 935 0 obj +% 1176 0 obj << /Type /Page -/Contents 936 0 R -/Resources 934 0 R +/Contents 1177 0 R +/Resources 1175 0 R /MediaBox [0 0 595.276 841.89] -/Parent 927 0 R ->> -% 937 0 obj -<< -/D [935 0 R /XYZ 98.895 753.953 null] ->> -% 87 0 obj -<< -/D [935 0 R /XYZ 99.895 583.842 null] +/Parent 1180 0 R >> -% 91 0 obj +% 1178 0 obj << -/D [935 0 R /XYZ 99.895 466.211 null] +/D [1176 0 R /XYZ 98.895 753.953 null] >> -% 95 0 obj +% 1179 0 obj << -/D [935 0 R /XYZ 99.895 256.035 null] +/D [1176 0 R /XYZ 99.895 632.19 null] >> -% 934 0 obj +% 1175 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 942 0 obj +% 1185 0 obj << /Type /Page -/Contents 943 0 R -/Resources 941 0 R +/Contents 1186 0 R +/Resources 1184 0 R /MediaBox [0 0 595.276 841.89] -/Parent 927 0 R -/Annots [ 938 0 R 939 0 R 940 0 R ] +/Parent 1180 0 R +/Annots [ 1181 0 R 1182 0 R 1183 0 R ] >> -% 938 0 obj +% 1181 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[0 1 0] -/Rect [187.544 240.293 199.499 249.399] -/A << /S /GoTo /D (cite.DesignPatterns) >> +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [419.358 411.694 495.412 423.754] +/A << /S /GoTo /D (vdata) >> >> -% 939 0 obj +% 1182 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[0 1 0] -/Rect [267.981 240.393 279.936 249.399] -/A << /S /GoTo /D (cite.Sparse03) >> +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [377.462 399.739 384.436 411.798] +/A << /S /GoTo /D (table.5) >> >> -% 940 0 obj +% 1183 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [458.483 237.643 465.457 249.703] -/A << /S /GoTo /D (listing.2) >> ->> -% 944 0 obj -<< -/D [942 0 R /XYZ 149.705 753.953 null] +/Rect [306.858 331.993 373.916 344.052] +/A << /S /GoTo /D (descdata) >> >> -% 99 0 obj +% 1187 0 obj << -/D [942 0 R /XYZ 150.705 544.277 null] +/D [1185 0 R /XYZ 149.705 753.953 null] >> -% 103 0 obj +% 240 0 obj << -/D [942 0 R /XYZ 150.705 296.936 null] +/D [1185 0 R /XYZ 150.705 716.092 null] >> -% 945 0 obj +% 1188 0 obj << -/D [942 0 R /XYZ 170.328 265.359 null] +/D [1185 0 R /XYZ 150.705 555.856 null] >> -% 941 0 obj +% 1184 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F93 915 0 R /F91 914 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 949 0 obj +% 1193 0 obj << /Type /Page -/Contents 950 0 R -/Resources 948 0 R +/Contents 1194 0 R +/Resources 1192 0 R /MediaBox [0 0 595.276 841.89] -/Parent 927 0 R +/Parent 1180 0 R +/Annots [ 1189 0 R 1190 0 R 1191 0 R ] >> -% 951 0 obj +% 1189 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [368.549 362.555 444.603 374.615] +/A << /S /GoTo /D (vdata) >> +>> +% 1190 0 obj << -/D [949 0 R /XYZ 98.895 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [326.652 350.6 333.626 362.66] +/A << /S /GoTo /D (table.6) >> >> -% 947 0 obj +% 1191 0 obj << -/D [949 0 R /XYZ 99.895 665.282 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [256.048 283.114 323.106 295.173] +/A << /S /GoTo /D (descdata) >> >> -% 952 0 obj +% 1195 0 obj << -/D [949 0 R /XYZ 409.052 603.446 null] +/D [1193 0 R /XYZ 98.895 753.953 null] >> -% 107 0 obj +% 244 0 obj << -/D [949 0 R /XYZ 99.895 294.773 null] +/D [1193 0 R /XYZ 99.895 716.092 null] >> -% 111 0 obj +% 1196 0 obj << -/D [949 0 R /XYZ 99.895 276.048 null] +/D [1193 0 R /XYZ 99.895 505.29 null] >> -% 948 0 obj +% 1192 0 obj << -/Font << /F94 915 0 R /F54 586 0 R /F59 812 0 R /F51 584 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F93 915 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 954 0 obj +% 1198 0 obj << /Type /Page -/Contents 955 0 R -/Resources 953 0 R +/Contents 1199 0 R +/Resources 1197 0 R /MediaBox [0 0 595.276 841.89] -/Parent 927 0 R ->> -% 956 0 obj -<< -/D [954 0 R /XYZ 149.705 753.953 null] ->> -% 115 0 obj -<< -/D [954 0 R /XYZ 150.705 716.092 null] ->> -% 119 0 obj -<< -/D [954 0 R /XYZ 150.705 540.892 null] +/Parent 1180 0 R >> -% 957 0 obj +% 1200 0 obj << -/D [954 0 R /XYZ 150.705 358.382 null] +/D [1198 0 R /XYZ 149.705 753.953 null] >> -% 123 0 obj +% 1201 0 obj << -/D [954 0 R /XYZ 150.705 300.51 null] +/D [1198 0 R /XYZ 150.705 632.19 null] >> -% 953 0 obj +% 1197 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 959 0 obj +% 1206 0 obj << /Type /Page -/Contents 960 0 R -/Resources 958 0 R +/Contents 1207 0 R +/Resources 1205 0 R /MediaBox [0 0 595.276 841.89] -/Parent 962 0 R ->> -% 961 0 obj -<< -/D [959 0 R /XYZ 98.895 753.953 null] ->> -% 127 0 obj -<< -/D [959 0 R /XYZ 99.895 716.092 null] ->> -% 131 0 obj -<< -/D [959 0 R /XYZ 99.895 526.761 null] +/Parent 1180 0 R +/Annots [ 1202 0 R 1203 0 R 1204 0 R ] >> -% 135 0 obj +% 1202 0 obj << -/D [959 0 R /XYZ 99.895 326.359 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [368.549 291.495 444.603 303.554] +/A << /S /GoTo /D (vdata) >> >> -% 958 0 obj +% 1203 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [326.652 279.539 333.626 291.599] +/A << /S /GoTo /D (table.7) >> >> -% 964 0 obj +% 1204 0 obj << -/Type /Page -/Contents 965 0 R -/Resources 963 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 962 0 R +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [256.048 209.259 323.106 221.319] +/A << /S /GoTo /D (descdata) >> >> -% 966 0 obj +% 1208 0 obj << -/D [964 0 R /XYZ 149.705 753.953 null] +/D [1206 0 R /XYZ 98.895 753.953 null] >> -% 139 0 obj +% 248 0 obj << -/D [964 0 R /XYZ 150.705 716.092 null] +/D [1206 0 R /XYZ 99.895 716.092 null] >> -% 143 0 obj +% 1209 0 obj << -/D [964 0 R /XYZ 150.705 474.131 null] +/D [1206 0 R /XYZ 99.895 443.893 null] >> -% 963 0 obj +% 1205 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F93 915 0 R /F91 914 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 968 0 obj +% 1211 0 obj << /Type /Page -/Contents 969 0 R -/Resources 967 0 R +/Contents 1212 0 R +/Resources 1210 0 R /MediaBox [0 0 595.276 841.89] -/Parent 962 0 R ->> -% 970 0 obj -<< -/D [968 0 R /XYZ 98.895 753.953 null] ->> -% 147 0 obj -<< -/D [968 0 R /XYZ 99.895 716.092 null] +/Parent 1180 0 R >> -% 151 0 obj +% 1213 0 obj << -/D [968 0 R /XYZ 99.895 376.562 null] +/D [1211 0 R /XYZ 149.705 753.953 null] >> -% 967 0 obj +% 1210 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 972 0 obj +% 1218 0 obj << /Type /Page -/Contents 973 0 R -/Resources 971 0 R +/Contents 1219 0 R +/Resources 1217 0 R /MediaBox [0 0 595.276 841.89] -/Parent 962 0 R +/Parent 1222 0 R +/Annots [ 1214 0 R 1215 0 R 1216 0 R ] >> -% 974 0 obj -<< -/D [972 0 R /XYZ 149.705 753.953 null] ->> -% 155 0 obj -<< -/D [972 0 R /XYZ 150.705 716.092 null] ->> -% 159 0 obj +% 1214 0 obj << -/D [972 0 R /XYZ 150.705 484.709 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [368.549 314.69 444.603 326.749] +/A << /S /GoTo /D (vdata) >> >> -% 163 0 obj +% 1215 0 obj << -/D [972 0 R /XYZ 150.705 251.325 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [326.652 302.734 333.626 314.794] +/A << /S /GoTo /D (table.8) >> >> -% 971 0 obj +% 1216 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [256.048 232.693 323.106 244.753] +/A << /S /GoTo /D (descdata) >> >> -% 976 0 obj +% 1220 0 obj << -/Type /Page -/Contents 977 0 R -/Resources 975 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 962 0 R +/D [1218 0 R /XYZ 98.895 753.953 null] >> -% 978 0 obj +% 252 0 obj << -/D [976 0 R /XYZ 98.895 753.953 null] +/D [1218 0 R /XYZ 99.895 716.092 null] >> -% 167 0 obj +% 1221 0 obj << -/D [976 0 R /XYZ 99.895 476.15 null] +/D [1218 0 R /XYZ 99.895 504.73 null] >> -% 975 0 obj +% 1217 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 982 0 obj +% 1224 0 obj << /Type /Page -/Contents 983 0 R -/Resources 981 0 R +/Contents 1225 0 R +/Resources 1223 0 R /MediaBox [0 0 595.276 841.89] -/Parent 962 0 R -/Annots [ 979 0 R 980 0 R ] ->> -% 979 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [248.894 164.341 255.868 176.4] -/A << /S /GoTo /D (section.6) >> +/Parent 1222 0 R >> -% 980 0 obj +% 1226 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [343.512 128.475 350.485 140.535] -/A << /S /GoTo /D (listing.3) >> +/D [1224 0 R /XYZ 149.705 753.953 null] >> -% 984 0 obj +% 1227 0 obj << -/D [982 0 R /XYZ 149.705 753.953 null] +/D [1224 0 R /XYZ 150.705 564.444 null] >> -% 171 0 obj +% 1228 0 obj << -/D [982 0 R /XYZ 150.705 716.092 null] +/D [1224 0 R /XYZ 150.705 504.067 null] >> -% 175 0 obj +% 1229 0 obj << -/D [982 0 R /XYZ 150.705 586.94 null] +/D [1224 0 R /XYZ 175.611 506.876 null] >> -% 179 0 obj +% 1230 0 obj << -/D [982 0 R /XYZ 150.705 402.59 null] +/D [1224 0 R /XYZ 175.611 494.921 null] >> -% 183 0 obj +% 1231 0 obj << -/D [982 0 R /XYZ 150.705 234.114 null] +/D [1224 0 R /XYZ 175.611 482.966 null] >> -% 985 0 obj +% 1232 0 obj << -/D [982 0 R /XYZ 170.799 204.012 null] +/D [1224 0 R /XYZ 175.611 471.011 null] >> -% 981 0 obj +% 1223 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 988 0 obj +% 1237 0 obj << /Type /Page -/Contents 989 0 R -/Resources 987 0 R +/Contents 1238 0 R +/Resources 1236 0 R /MediaBox [0 0 595.276 841.89] -/Parent 991 0 R +/Parent 1222 0 R +/Annots [ 1233 0 R 1234 0 R 1235 0 R ] >> -% 990 0 obj +% 1233 0 obj << -/D [988 0 R /XYZ 98.895 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [368.549 416.057 444.603 428.117] +/A << /S /GoTo /D (vdata) >> >> -% 986 0 obj +% 1234 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [326.652 404.102 333.626 416.161] +/A << /S /GoTo /D (table.9) >> +>> +% 1235 0 obj << -/D [988 0 R /XYZ 99.895 446.997 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [256.048 336.356 323.106 348.415] +/A << /S /GoTo /D (descdata) >> >> -% 187 0 obj +% 1239 0 obj << -/D [988 0 R /XYZ 99.895 387.147 null] +/D [1237 0 R /XYZ 98.895 753.953 null] >> -% 191 0 obj +% 256 0 obj << -/D [988 0 R /XYZ 99.895 370.604 null] +/D [1237 0 R /XYZ 99.895 716.092 null] >> -% 195 0 obj +% 1240 0 obj << -/D [988 0 R /XYZ 99.895 194.093 null] +/D [1237 0 R /XYZ 99.895 560.219 null] >> -% 987 0 obj +% 1236 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F94 915 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F93 915 0 R /F91 914 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 995 0 obj +% 1244 0 obj << /Type /Page -/Contents 996 0 R -/Resources 994 0 R +/Contents 1245 0 R +/Resources 1243 0 R /MediaBox [0 0 595.276 841.89] -/Parent 991 0 R -/Annots [ 992 0 R 993 0 R ] +/Parent 1222 0 R +/Annots [ 1241 0 R 1242 0 R ] >> -% 992 0 obj +% 1241 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [428.968 383.557 435.942 395.616] -/A << /S /GoTo /D (table.1) >> +/Rect [306.858 340.341 384.376 352.401] +/A << /S /GoTo /D (spdata) >> >> -% 993 0 obj +% 1242 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [428.968 240.08 435.942 252.139] -/A << /S /GoTo /D (table.1) >> +/Rect [306.858 272.595 373.916 284.655] +/A << /S /GoTo /D (descdata) >> >> -% 997 0 obj +% 1246 0 obj << -/D [995 0 R /XYZ 149.705 753.953 null] +/D [1244 0 R /XYZ 149.705 753.953 null] >> -% 199 0 obj +% 260 0 obj << -/D [995 0 R /XYZ 150.705 610.712 null] +/D [1244 0 R /XYZ 150.705 716.092 null] >> -% 994 0 obj +% 1247 0 obj +<< +/D [1244 0 R /XYZ 150.705 517.78 null] +>> +% 1243 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1002 0 obj +% 1251 0 obj << /Type /Page -/Contents 1003 0 R -/Resources 1001 0 R +/Contents 1252 0 R +/Resources 1250 0 R /MediaBox [0 0 595.276 841.89] -/Parent 991 0 R -/Annots [ 999 0 R ] +/Parent 1222 0 R +/Annots [ 1248 0 R 1249 0 R ] >> -% 999 0 obj +% 1248 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [127.814 152.385 134.788 164.445] -/A << /S /GoTo /D (listing.4) >> ->> -% 1004 0 obj -<< -/D [1002 0 R /XYZ 98.895 753.953 null] +/Rect [256.048 340.341 333.567 352.401] +/A << /S /GoTo /D (spdata) >> >> -% 204 0 obj +% 1249 0 obj << -/D [1002 0 R /XYZ 99.895 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [256.048 272.595 323.106 284.655] +/A << /S /GoTo /D (descdata) >> >> -% 208 0 obj +% 1253 0 obj << -/D [1002 0 R /XYZ 99.895 430.41 null] +/D [1251 0 R /XYZ 98.895 753.953 null] >> -% 212 0 obj +% 264 0 obj << -/D [1002 0 R /XYZ 99.895 226.203 null] +/D [1251 0 R /XYZ 99.895 716.092 null] >> -% 1005 0 obj +% 1255 0 obj << -/D [1002 0 R /XYZ 258.62 168.146 null] +/D [1251 0 R /XYZ 99.895 517.78 null] >> -% 1001 0 obj +% 1250 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F104 1254 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1008 0 obj +% 1262 0 obj << /Type /Page -/Contents 1009 0 R -/Resources 1007 0 R +/Contents 1263 0 R +/Resources 1261 0 R /MediaBox [0 0 595.276 841.89] -/Parent 991 0 R -/Annots [ 1000 0 R ] +/Parent 1222 0 R +/Annots [ 1256 0 R 1257 0 R 1258 0 R 1259 0 R ] >> -% 1000 0 obj +% 1256 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [297.461 580.64 304.435 592.699] -/A << /S /GoTo /D (section.6) >> ->> -% 1010 0 obj -<< -/D [1008 0 R /XYZ 149.705 753.953 null] ->> -% 1006 0 obj -<< -/D [1008 0 R /XYZ 150.705 665.282 null] +/Rect [428.968 277.323 440.924 289.383] +/A << /S /GoTo /D (table.12) >> >> - -endstream -endobj -1014 0 obj +% 1257 0 obj << -/Length 158 +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [306.858 208.877 384.376 220.936] +/A << /S /GoTo /D (spdata) >> >> -stream -0 g 0 G -0 g 0 G -BT -/F51 14.3462 Tf 99.895 705.784 Td [(4)-1000(Computational)-250(routines)]TJ -0 g 0 G -/F54 9.9626 Tf 166.875 -615.346 Td [(29)]TJ -0 g 0 G -ET - -endstream -endobj -1025 0 obj +% 1258 0 obj << -/Length 7171 ->> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(4.1)-1000(psb)]TJ -ET -q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 201.825 706.129 Td [(geaxpby)-250(\227)-250(General)-250(Dense)-250(Matrix)-250(Sum)]TJ/F54 9.9626 Tf -51.12 -19.189 Td [(This)-358(subr)18(outine)-358(is)-359(an)-358(interface)-358(to)-358(the)-358(computational)-359(kernel)-358(for)-358(dense)-358(matrix)]TJ 0 -11.955 Td [(sum:)]TJ/F52 9.9626 Tf 143.149 -12.304 Td [(y)]TJ/F83 10.3811 Tf 7.998 0 Td [(\040)]TJ/F60 9.9626 Tf 13.397 0 Td [(a)]TJ/F52 9.9626 Tf 7.616 0 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -175.407 -18.398 Td [(call)]TJ -0 g 0 G - [-525(psb_geaxpby\050alpha,)-525(x,)-525(beta,)-525(y,)-525(desc_a,)-525(info\051)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -ET -q -1 0 0 1 227.737 629.682 cm -[]0 d 0 J 0.398 w 0 0 m 189.647 0 l S -Q -BT -/F52 9.9626 Tf 234.009 621.114 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(,)]TJ/F52 9.9626 Tf 5.106 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(,)]TJ/F60 9.9626 Tf 5.105 0 Td [(a)]TJ/F54 9.9626 Tf 5.385 0 Td [(,)]TJ/F60 9.9626 Tf 5.355 0 Td [(b)]TJ/F51 9.9626 Tf 89.359 0 Td [(Subroutine)]TJ -ET -q -1 0 0 1 227.737 617.328 cm -[]0 d 0 J 0.398 w 0 0 m 189.647 0 l S -Q -BT -/F54 9.9626 Tf 233.715 608.761 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 370.948 608.96 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 373.937 608.761 Td [(geaxpby)]TJ -140.222 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 370.948 597.005 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 373.937 596.805 Td [(geaxpby)]TJ -140.222 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ -ET -q -1 0 0 1 370.948 585.05 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 373.937 584.85 Td [(geaxpby)]TJ -140.222 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ -ET -q -1 0 0 1 370.948 573.094 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 373.937 572.895 Td [(geaxpby)]TJ -ET -q -1 0 0 1 227.737 569.109 cm -[]0 d 0 J 0.398 w 0 0 m 189.647 0 l S -Q -0 g 0 G -BT -/F54 9.9626 Tf 280.768 540.731 Td [(T)92(able)-250(1:)-310(Data)-250(types)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -/F51 9.9626 Tf -130.063 -35.05 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.39 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -20.391 Td [(alpha)]TJ -0 g 0 G -/F54 9.9626 Tf 30.436 0 Td [(the)-250(scalar)]TJ/F60 9.9626 Tf 44.368 0 Td [(a)]TJ/F54 9.9626 Tf 5.385 0 Td [(.)]TJ -55.282 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(1)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -20.391 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(o)1(r)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ -ET -q -1 0 0 1 436.673 349.068 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 439.811 348.869 Td [(T)]TJ -ET -q -1 0 0 1 445.669 349.068 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 448.807 348.869 Td [(vect)]TJ -ET -q -1 0 0 1 470.356 349.068 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 473.495 348.869 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf -297.884 -11.955 Td [(containing)-312(numbers)-311(of)-312(type)-311(speci\002ed)-312(in)-311(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-312(1)]TJ -0 g 0 G - [(.)-494(The)-312(rank)-312(of)]TJ/F52 9.9626 Tf 274.834 0 Td [(x)]TJ/F54 9.9626 Tf 8.31 0 Td [(must)-311(be)]TJ -283.144 -11.955 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 52.946 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -82.958 -20.391 Td [(beta)]TJ -0 g 0 G -/F54 9.9626 Tf 24.348 0 Td [(the)-250(scalar)]TJ/F60 9.9626 Tf 44.618 0 Td [(b)]TJ/F54 9.9626 Tf 5.524 0 Td [(.)]TJ -49.584 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(1)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -20.391 Td [(y)]TJ -0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 191.754 0 Td [(y)]TJ/F54 9.9626 Tf 5.105 0 Td [(.)]TJ -182.473 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ -ET -q -1 0 0 1 436.673 188.736 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 439.811 188.537 Td [(T)]TJ -ET -q -1 0 0 1 445.669 188.736 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 448.807 188.537 Td [(vect)]TJ -ET -q -1 0 0 1 470.356 188.736 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 473.495 188.537 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf -297.884 -11.956 Td [(containing)-276(numbers)-277(of)-276(the)-276(type)-276(indicated)-277(in)-276(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-276(1)]TJ -0 g 0 G - [(.)-389(The)-276(rank)-277(of)]TJ/F52 9.9626 Tf 288.67 0 Td [(y)]TJ/F54 9.9626 Tf 7.859 0 Td [(must)]TJ -296.529 -11.955 Td [(be)-250(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 65.888 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -95.999 -20.39 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 144.435 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 144.236 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ -0 g 0 G -/F54 9.9626 Tf 114.879 -29.888 Td [(30)]TJ -0 g 0 G -ET +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [419.358 140.43 495.412 152.49] +/A << /S /GoTo /D (vdata) >> +>> +% 1259 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [379.43 128.475 391.385 140.535] +/A << /S /GoTo /D (table.12) >> +>> +% 1264 0 obj +<< +/D [1262 0 R /XYZ 149.705 753.953 null] +>> endstream endobj -1032 0 obj +1279 0 obj << -/Length 2404 +/Length 6532 >> stream 0 g 0 G 0 g 0 G -BT -/F54 9.9626 Tf 124.802 706.129 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ -ET -q -1 0 0 1 273.363 694.373 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 276.501 694.174 Td [(desc)]TJ -ET -q -1 0 0 1 298.05 694.373 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 301.189 694.174 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +BT +/F59 9.9626 Tf 99.895 706.129 Td [(beta)]TJ 0 g 0 G -/F51 9.9626 Tf -222.215 -21.918 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 24.349 0 Td [(the)-250(scalar)]TJ/F68 9.9626 Tf 44.617 0 Td [(b)]TJ/F62 9.9626 Tf 5.524 0 Td [(.)]TJ -49.583 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(12)]TJ 0 g 0 G + [(.)]TJ 0 g 0 G - 0 -19.925 Td [(y)]TJ +/F59 9.9626 Tf -24.907 -18.597 Td [(y)]TJ 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F52 9.9626 Tf 160.68 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -151.4 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +/F62 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.445 0 Td [(y)]TJ/F62 9.9626 Tf 5.105 0 Td [(.)]TJ -166.164 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ ET q -1 0 0 1 385.864 604.709 cm +1 0 0 1 385.864 592.09 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 389.002 604.51 Td [(T)]TJ +/F67 9.9626 Tf 389.002 591.891 Td [(T)]TJ ET q -1 0 0 1 394.86 604.709 cm +1 0 0 1 394.86 592.09 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 397.998 604.51 Td [(vect)]TJ +/F67 9.9626 Tf 397.998 591.891 Td [(vect)]TJ ET q -1 0 0 1 419.547 604.709 cm +1 0 0 1 419.547 592.09 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 422.685 604.51 Td [(type)]TJ +/F67 9.9626 Tf 422.685 591.891 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +/F62 9.9626 Tf -297.883 -11.956 Td [(containing)-280(numbers)-280(of)-280(type)-280(speci\002ed)-280(in)-280(T)92(able)]TJ 0 0 1 rg 0 0 1 RG - [-250(1)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ -0 g 0 G - 141.968 -434.371 Td [(31)]TJ -0 g 0 G -ET - -endstream -endobj -1042 0 obj -<< -/Length 7447 ->> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(4.2)-1000(psb)]TJ -ET -q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 201.825 706.129 Td [(gedot)-250(\227)-250(Dot)-250(Product)]TJ/F54 9.9626 Tf -51.12 -18.976 Td [(This)-250(function)-250(computes)-250(dot)-250(pr)18(oduct)-250(between)-250(two)-250(vectors)]TJ/F52 9.9626 Tf 254.647 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(and)]TJ/F52 9.9626 Tf 19.481 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -286.93 -11.955 Td [(If)]TJ/F52 9.9626 Tf 9.459 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(and)]TJ/F52 9.9626 Tf 19.482 0 Td [(y)]TJ/F54 9.9626 Tf 7.597 0 Td [(ar)18(e)-250(r)18(eal)-250(vectors)-250(it)-250(computes)-250(dot-pr)18(oduct)-250(as:)]TJ/F52 9.9626 Tf 104.717 -23.132 Td [(d)-25(o)-35(t)]TJ/F83 10.3811 Tf 16.337 0 Td [(\040)]TJ/F52 9.9626 Tf 13.566 0 Td [(x)]TJ/F52 7.5716 Tf 5.399 4.115 Td [(T)]TJ/F52 9.9626 Tf 5.525 -4.115 Td [(y)]TJ/F54 9.9626 Tf -189.778 -21.93 Td [(Else)-250(if)]TJ/F52 9.9626 Tf 29.474 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(and)]TJ/F52 9.9626 Tf 19.482 0 Td [(y)]TJ/F54 9.9626 Tf 7.596 0 Td [(ar)18(e)-250(complex)-250(vectors)-250(then)-250(it)-250(computes)-250(dot-pr)18(oduct)-250(as:)]TJ/F52 9.9626 Tf 83.965 -23.132 Td [(d)-25(o)-35(t)]TJ/F83 10.3811 Tf 16.336 0 Td [(\040)]TJ/F52 9.9626 Tf 13.567 0 Td [(x)]TJ/F52 7.5716 Tf 5.588 4.115 Td [(H)]TJ/F52 9.9626 Tf 6.812 -4.115 Td [(y)]TJ/F59 9.9626 Tf -175.572 -21.937 Td [(psb_gedot\050x,)-525(y,)-525(desc_a,)-525(info)-525([,global]\051)]TJ -0 g 0 G + [-280(12)]TJ 0 g 0 G + [(.)-400(The)-280(rank)-280(of)]TJ/F60 9.9626 Tf 275.562 0 Td [(y)]TJ/F62 9.9626 Tf 7.895 0 Td [(must)-280(be)]TJ -283.457 -11.955 Td [(the)-250(same)-250(of)]TJ/F60 9.9626 Tf 53.115 0 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(.)]TJ 0 g 0 G +/F59 9.9626 Tf -83.228 -18.597 Td [(desc)]TJ ET q -1 0 0 1 233.929 570.686 cm -[]0 d 0 J 0.398 w 0 0 m 177.263 0 l S -Q -BT -/F52 9.9626 Tf 240.031 562.118 Td [(d)-25(o)-35(t)]TJ/F54 9.9626 Tf 13.444 0 Td [(,)]TJ/F52 9.9626 Tf 5.276 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(,)]TJ/F52 9.9626 Tf 5.106 0 Td [(y)]TJ/F51 9.9626 Tf 91.76 0 Td [(Function)]TJ -ET -q -1 0 0 1 233.929 558.332 cm -[]0 d 0 J 0.398 w 0 0 m 177.263 0 l S -Q -BT -/F54 9.9626 Tf 239.906 549.765 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 377.14 549.964 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 380.129 549.765 Td [(gedot)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 377.14 538.009 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 380.129 537.809 Td [(gedot)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ -ET -q -1 0 0 1 377.14 526.053 cm +1 0 0 1 120.408 549.583 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 380.129 525.854 Td [(gedot)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F59 9.9626 Tf 123.397 549.383 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 377.14 514.098 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 273.363 501.762 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 380.129 513.899 Td [(gedot)]TJ +/F67 9.9626 Tf 276.501 501.563 Td [(desc)]TJ ET q -1 0 0 1 233.929 510.113 cm -[]0 d 0 J 0.398 w 0 0 m 177.263 0 l S +1 0 0 1 298.05 501.762 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 280.768 481.735 Td [(T)92(able)-250(2:)-310(Data)-250(types)]TJ +/F67 9.9626 Tf 301.189 501.563 Td [(type)]TJ 0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G +/F59 9.9626 Tf -222.215 -18.597 Td [(trans)]TJ 0 g 0 G -/F51 9.9626 Tf -130.063 -34.507 Td [(T)90(ype:)]TJ +/F62 9.9626 Tf 27.666 0 Td [(indicates)-250(what)-250(kind)-250(of)-250(operation)-250(to)-250(perform.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F59 9.9626 Tf -2.759 -18.597 Td [(trans)-250(=)-250(N)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.951 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 46.983 0 Td [(the)-250(operation)-250(is)-250(speci\002ed)-250(by)-250(equation)]TJ +0 0 1 rg 0 0 1 RG + [-250(1)]TJ 0 g 0 G 0 g 0 G - 0 -19.951 Td [(x)]TJ +/F59 9.9626 Tf -46.983 -14.612 Td [(trans)-250(=)-250(T)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +/F62 9.9626 Tf 45.33 0 Td [(the)-250(operation)-250(is)-250(speci\002ed)-250(by)-250(equation)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.742 0 Td [(psb)]TJ -ET -q -1 0 0 1 436.673 359.705 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 439.811 359.506 Td [(T)]TJ -ET -q -1 0 0 1 445.669 359.705 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 448.807 359.506 Td [(vect)]TJ -ET -q -1 0 0 1 470.356 359.705 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 473.495 359.506 Td [(type)]TJ + [-250(2)]TJ +0 g 0 G +0 g 0 G +/F59 9.9626 Tf -45.33 -14.612 Td [(trans)-250(=)-250(C)]TJ 0 g 0 G -/F54 9.9626 Tf -297.884 -11.956 Td [(containing)-312(numbers)-311(of)-312(type)-311(speci\002ed)-312(in)-311(T)92(able)]TJ +/F62 9.9626 Tf 45.878 0 Td [(the)-250(operation)-250(is)-250(speci\002ed)-250(by)-250(equation)]TJ 0 0 1 rg 0 0 1 RG - [-312(2)]TJ + [-250(3)]TJ 0 g 0 G - [(.)-494(The)-312(rank)-312(of)]TJ/F52 9.9626 Tf 274.834 0 Td [(x)]TJ/F54 9.9626 Tf 8.31 0 Td [(must)-311(be)]TJ -283.144 -11.955 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 52.946 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ + -45.878 -18.597 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F60 9.9626 Tf 38.64 0 Td [(t)-15(r)-50(a)-25(n)-25(s)]TJ/F93 10.3811 Tf 25.193 0 Td [(=)]TJ/F60 9.9626 Tf 11.434 0 Td [(N)]TJ/F62 9.9626 Tf -75.267 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(character)-250(variable.)]TJ 0 g 0 G -/F51 9.9626 Tf -82.958 -19.951 Td [(y)]TJ +/F59 9.9626 Tf -24.907 -18.596 Td [(work)]TJ 0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.445 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -166.165 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ -ET -q -1 0 0 1 436.673 268.023 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 439.811 267.824 Td [(T)]TJ -ET -q -1 0 0 1 445.669 268.023 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 448.807 267.824 Td [(vect)]TJ -ET -q -1 0 0 1 470.356 268.023 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 473.495 267.824 Td [(type)]TJ +/F62 9.9626 Tf 28.782 0 Td [(work)-250(array)111(.)]TJ -3.875 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-270(as:)-351(a)-270(rank)-270(one)-270(array)-271(of)-270(the)-270(same)-270(type)-271(of)]TJ/F60 9.9626 Tf 220.875 0 Td [(x)]TJ/F62 9.9626 Tf 7.898 0 Td [(and)]TJ/F60 9.9626 Tf 19.684 0 Td [(y)]TJ/F62 9.9626 Tf 7.798 0 Td [(with)-270(the)-270(T)74(AR-)]TJ -256.255 -11.955 Td [(GET)-250(attribute.)]TJ 0 g 0 G -/F54 9.9626 Tf -297.884 -11.955 Td [(containing)-313(numbers)-314(of)-313(type)-313(speci\002ed)-314(in)-313(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-313(2)]TJ +/F59 9.9626 Tf -24.907 -18.597 Td [(On)-250(Return)]TJ 0 g 0 G - [(.)-500(The)-314(rank)-313(of)]TJ/F52 9.9626 Tf 274.898 0 Td [(y)]TJ/F54 9.9626 Tf 8.228 0 Td [(must)-313(be)]TJ -283.126 -11.956 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 53.116 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -83.227 -19.95 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 224.162 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 223.963 Td [(a)]TJ + 0 -18.597 Td [(y)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F62 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(matrix)]TJ/F60 9.9626 Tf 144.939 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ -135.659 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-379(as:)-568(an)-379(array)-379(of)-379(rank)-379(one)-379(or)-379(two)-379(containing)-379(numbers)-379(of)-379(type)]TJ 0 -11.955 Td [(speci\002ed)-250(in)-250(T)92(able)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ -ET -q -1 0 0 1 324.173 176.341 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 327.311 176.142 Td [(desc)]TJ -ET -q -1 0 0 1 348.86 176.341 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 351.998 176.142 Td [(type)]TJ + [-250(12)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ + [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -222.214 -19.951 Td [(global)]TJ +/F59 9.9626 Tf -24.907 -18.597 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 33.763 0 Td [(Speci\002es)-226(whether)-227(the)-226(computation)-226(should)-226(include)-227(the)-226(global)-226(r)18(eduction)]TJ -8.857 -11.955 Td [(acr)18(oss)-250(all)-250(pr)18(ocesses.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 76.693 -29.888 Td [(32)]TJ + 141.968 -36.529 Td [(49)]TJ 0 g 0 G ET endstream endobj -1048 0 obj +1286 0 obj << -/Length 3827 +/Length 8092 >> stream 0 g 0 G 0 g 0 G BT -/F54 9.9626 Tf 124.802 706.129 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(scalar)74(.)-310(Default:)]TJ/F59 9.9626 Tf 165.318 0 Td [(global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(4.13)-1000(psb)]TJ +ET +q +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 207.803 706.129 Td [(spsm)-250(\227)-250(T)111(riangular)-250(System)-250(Solve)]TJ/F62 9.9626 Tf -57.098 -19.83 Td [(This)-250(subr)18(outine)-250(computes)-250(the)-250(T)90(riangular)-250(System)-250(Solve:)]TJ/F60 9.9626 Tf 122.724 -35.213 Td [(y)]TJ/F91 10.3811 Tf 15.193 0 Td [(\040)]TJ/F68 9.9626 Tf 20.593 0 Td [(a)]TJ/F60 9.9626 Tf 5.639 0 Td [(T)]TJ/F91 7.8896 Tf 6.545 4.115 Td [(\000)]TJ/F62 7.5716 Tf 6.228 0 Td [(1)]TJ/F60 9.9626 Tf 4.577 -4.115 Td [(x)]TJ/F93 10.3811 Tf 7.267 0 Td [(+)]TJ/F68 9.9626 Tf 10.505 0 Td [(b)]TJ/F60 9.9626 Tf 5.649 0 Td [(y)]TJ -82.196 -16.139 Td [(y)]TJ/F91 10.3811 Tf 15.193 0 Td [(\040)]TJ/F68 9.9626 Tf 20.593 0 Td [(a)]TJ/F60 9.9626 Tf 5.708 0 Td [(D)-48(T)]TJ/F91 7.8896 Tf 14.775 4.114 Td [(\000)]TJ/F62 7.5716 Tf 6.227 0 Td [(1)]TJ/F60 9.9626 Tf 4.578 -4.114 Td [(x)]TJ/F93 10.3811 Tf 7.267 0 Td [(+)]TJ/F68 9.9626 Tf 10.505 0 Td [(b)]TJ/F60 9.9626 Tf 5.649 0 Td [(y)]TJ -90.495 -16.139 Td [(y)]TJ/F91 10.3811 Tf 15.193 0 Td [(\040)]TJ/F68 9.9626 Tf 20.593 0 Td [(a)]TJ/F60 9.9626 Tf 5.639 0 Td [(T)]TJ/F91 7.8896 Tf 6.545 4.114 Td [(\000)]TJ/F62 7.5716 Tf 6.228 0 Td [(1)]TJ/F60 9.9626 Tf 4.607 -4.114 Td [(D)-52(x)]TJ/F93 10.3811 Tf 15.536 0 Td [(+)]TJ/F68 9.9626 Tf 10.505 0 Td [(b)]TJ/F60 9.9626 Tf 5.649 0 Td [(y)]TJ -90.495 -16.09 Td [(y)]TJ/F91 10.3811 Tf 15.193 0 Td [(\040)]TJ/F68 9.9626 Tf 20.593 0 Td [(a)]TJ/F60 9.9626 Tf 5.639 0 Td [(T)]TJ/F91 7.8896 Tf 6.545 4.114 Td [(\000)]TJ/F60 7.5716 Tf 6.421 0 Td [(T)]TJ/F60 9.9626 Tf 5.694 -4.114 Td [(x)]TJ/F93 10.3811 Tf 7.267 0 Td [(+)]TJ/F68 9.9626 Tf 10.505 0 Td [(b)]TJ/F60 9.9626 Tf 5.649 0 Td [(y)]TJ -83.506 -16.09 Td [(y)]TJ/F91 10.3811 Tf 15.193 0 Td [(\040)]TJ/F68 9.9626 Tf 20.593 0 Td [(a)]TJ/F60 9.9626 Tf 5.708 0 Td [(D)-48(T)]TJ/F91 7.8896 Tf 14.775 4.114 Td [(\000)]TJ/F60 7.5716 Tf 6.42 0 Td [(T)]TJ/F60 9.9626 Tf 5.695 -4.114 Td [(x)]TJ/F93 10.3811 Tf 7.267 0 Td [(+)]TJ/F68 9.9626 Tf 10.505 0 Td [(b)]TJ/F60 9.9626 Tf 5.649 0 Td [(y)]TJ -91.805 -16.09 Td [(y)]TJ/F91 10.3811 Tf 15.193 0 Td [(\040)]TJ/F68 9.9626 Tf 20.593 0 Td [(a)]TJ/F60 9.9626 Tf 5.639 0 Td [(T)]TJ/F91 7.8896 Tf 6.545 4.114 Td [(\000)]TJ/F60 7.5716 Tf 6.421 0 Td [(T)]TJ/F60 9.9626 Tf 5.724 -4.114 Td [(D)-52(x)]TJ/F93 10.3811 Tf 15.536 0 Td [(+)]TJ/F68 9.9626 Tf 10.505 0 Td [(b)]TJ/F60 9.9626 Tf 5.649 0 Td [(y)]TJ -91.805 -16.091 Td [(y)]TJ/F91 10.3811 Tf 15.193 0 Td [(\040)]TJ/F68 9.9626 Tf 20.593 0 Td [(a)]TJ/F60 9.9626 Tf 5.639 0 Td [(T)]TJ/F91 7.8896 Tf 6.545 4.115 Td [(\000)]TJ/F60 7.5716 Tf 6.61 0 Td [(H)]TJ/F60 9.9626 Tf 6.982 -4.115 Td [(x)]TJ/F93 10.3811 Tf 7.267 0 Td [(+)]TJ/F68 9.9626 Tf 10.505 0 Td [(b)]TJ/F60 9.9626 Tf 5.648 0 Td [(y)]TJ -84.982 -16.09 Td [(y)]TJ/F91 10.3811 Tf 15.193 0 Td [(\040)]TJ/F68 9.9626 Tf 20.593 0 Td [(a)]TJ/F60 9.9626 Tf 5.708 0 Td [(D)-48(T)]TJ/F91 7.8896 Tf 14.775 4.115 Td [(\000)]TJ/F60 7.5716 Tf 6.61 0 Td [(H)]TJ/F60 9.9626 Tf 6.982 -4.115 Td [(x)]TJ/F93 10.3811 Tf 7.267 0 Td [(+)]TJ/F68 9.9626 Tf 10.504 0 Td [(b)]TJ/F60 9.9626 Tf 5.649 0 Td [(y)]TJ -93.281 -16.09 Td [(y)]TJ/F91 10.3811 Tf 15.193 0 Td [(\040)]TJ/F68 9.9626 Tf 20.593 0 Td [(a)]TJ/F60 9.9626 Tf 5.639 0 Td [(T)]TJ/F91 7.8896 Tf 6.545 4.115 Td [(\000)]TJ/F60 7.5716 Tf 6.61 0 Td [(H)]TJ/F60 9.9626 Tf 7.012 -4.115 Td [(D)-52(x)]TJ/F93 10.3811 Tf 15.536 0 Td [(+)]TJ/F68 9.9626 Tf 10.505 0 Td [(b)]TJ/F60 9.9626 Tf 5.648 0 Td [(y)]TJ/F62 9.9626 Tf -201.061 -38.202 Td [(wher)18(e:)]TJ 0 g 0 G - [(.true.)]TJ +/F60 9.9626 Tf -14.65 -21.265 Td [(x)]TJ 0 g 0 G -/F51 9.9626 Tf -190.225 -31.881 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 10.186 0 Td [(is)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 115.61 0 Td [(x)]TJ/F62 7.5716 Tf 5.201 -1.495 Td [(:)-12(,)-13(:)]TJ 0 g 0 G +/F60 9.9626 Tf -131.167 -20.218 Td [(y)]TJ 0 g 0 G - 0 -19.925 Td [(Function)-250(value)]TJ +/F62 9.9626 Tf 10.087 0 Td [(is)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 115.441 0 Td [(y)]TJ/F62 7.5716 Tf 5.201 -1.494 Td [(:)-12(,)-13(:)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(is)-250(the)-250(dot)-250(pr)18(oduct)-250(of)-250(vectors)]TJ/F52 9.9626 Tf 126.33 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(and)]TJ/F52 9.9626 Tf 19.482 0 Td [(y)]TJ/F54 9.9626 Tf 5.105 0 Td [(.)]TJ -206.483 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.133 0 Td [(global)]TJ/F54 9.9626 Tf 30.675 0 Td [(unless)-190(the)-190(optional)-190(variable)]TJ/F59 9.9626 Tf 121.612 0 Td [(global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F60 9.9626 Tf -130.599 -20.218 Td [(T)]TJ 0 g 0 G - [(.false.)]TJ/F54 9.9626 Tf 75.118 0 Td [(has)-190(been)-190(spec-)]TJ -258.538 -11.955 Td [(i\002ed)]TJ 0 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(2)]TJ +/F62 9.9626 Tf 11.432 0 Td [(is)-250(the)-250(global)-250(sparse)-250(block)-250(triangular)-250(submatrix)]TJ/F60 9.9626 Tf 206.797 0 Td [(T)]TJ 0 g 0 G - [(.)]TJ + -218.159 -21.712 Td [(D)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(info)]TJ +/F62 9.9626 Tf 12.956 0 Td [(is)-250(the)-250(scaling)-250(diagonal)-250(matrix.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.917 Td [(Notes)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 6.894 -21.266 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.926 Td [(1.)]TJ + [-525(psb_spsm\050alpha,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(The)-190(computation)-190(of)-190(a)-190(global)-190(r)18(esult)-190(r)18(equir)18(es)-190(a)-190(global)-190(communication,)-202(which)]TJ 12.453 -11.955 Td [(entails)-318(a)-318(signi\002cant)-318(ove)1(r)18(head.)-514(It)-318(may)-318(be)-318(necessary)-317(and/or)-318(advisable)-318(to)]TJ 0 -11.955 Td [(compute)-204(multiple)-204(dot)-204(pr)18(oducts)-204(at)-204(the)-204(same)-204(time;)-219(in)-204(this)-204(case,)-213(it)-204(is)-204(possible)]TJ 0 -11.955 Td [(to)-250(impr)18(ove)-250(the)-250(r)8(untime)-250(ef)18(\002ciency)-250(by)-250(using)-250(the)-250(following)-250(scheme:)]TJ/F59 9.9626 Tf 52.303 -19.925 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ + [-525(t,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_gedot\050x1,y1,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [-525(beta,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(.false.\051)]TJ 0 -11.956 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(2)]TJ + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_gedot\050x2,y2,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [-525(info\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(.false.\051)]TJ 0 -11.955 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(3)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -14.944 -11.955 Td [(call)]TJ 0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ + [-525(psb_spsm\050alpha,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_gedot\050x3,y3,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [-525(t,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(.false.\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_sum\050ctxt,vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ + [-525(beta,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(:)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(3)]TJ + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(\051\051)]TJ/F54 9.9626 Tf -52.303 -19.925 Td [(In)-253(this)-252(way)-253(the)-253(global)-253(communicati)1(on,)-254(which)-253(for)-252(small)-253(sizes)-253(is)-252(a)-253(latency-)]TJ 0 -11.955 Td [(bound)-250(operation,)-250(is)-250(invoked)-250(only)-250(once.)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 141.968 -282.939 Td [(33)]TJ + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1059 0 obj -<< -/Length 8275 ->> -stream + [-525(trans,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(unit,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(4.3)-1000(psb)]TJ -ET -q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 201.825 706.129 Td [(gedots)-250(\227)-250(Generalized)-250(Dot)-250(Product)]TJ/F54 9.9626 Tf -51.12 -18.964 Td [(This)-283(subr)18(outine)-284(computes)-283(a)-284(series)-284(of)-283(dot)-284(pr)18(oducts)-283(among)-284(the)-283(columns)-284(of)-283(two)]TJ 0 -11.955 Td [(dense)-250(matrices)]TJ/F52 9.9626 Tf 68.208 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(and)]TJ/F52 9.9626 Tf 19.482 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(:)]TJ/F52 9.9626 Tf 24.807 -13.101 Td [(r)-17(e)-25(s)]TJ/F85 10.3811 Tf 12.293 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F83 10.3811 Tf 7.041 0 Td [(\040)]TJ/F52 9.9626 Tf 13.567 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\050)]TJ/F54 9.9626 Tf 4.274 0 Td [(:)-12(,)]TJ/F52 9.9626 Tf 6.821 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F52 7.5716 Tf 4.343 4.114 Td [(T)]TJ/F52 9.9626 Tf 5.525 -4.114 Td [(y)]TJ/F85 10.3811 Tf 5.23 0 Td [(\050)]TJ/F54 9.9626 Tf 4.274 0 Td [(:)-13(,)]TJ/F52 9.9626 Tf 6.821 0 Td [(i)]TJ/F85 10.3811 Tf 3.089 0 Td [(\051)]TJ/F54 9.9626 Tf -214.288 -16.876 Td [(If)-300(the)-299(matrices)-300(ar)18(e)-299(complex,)-312(then)-300(the)-300(usual)-299(convention)-300(applies,)-312(i.e.)-459(the)-299(conju-)]TJ 0 -11.955 Td [(gate)-239(transpose)-239(of)]TJ/F52 9.9626 Tf 77.351 0 Td [(x)]TJ/F54 9.9626 Tf 7.589 0 Td [(is)-239(used.)-307(If)]TJ/F52 9.9626 Tf 45.493 0 Td [(x)]TJ/F54 9.9626 Tf 7.589 0 Td [(and)]TJ/F52 9.9626 Tf 19.375 0 Td [(y)]TJ/F54 9.9626 Tf 7.489 0 Td [(ar)18(e)-239(of)-239(rank)-240(one,)-241(then)]TJ/F52 9.9626 Tf 92.601 0 Td [(r)-17(e)-25(s)]TJ/F54 9.9626 Tf 14.552 0 Td [(is)-239(a)-240(scalar)75(,)-242(else)-239(it)]TJ -272.039 -11.955 Td [(is)-250(a)-250(rank)-250(one)-250(array)111(.)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 20.174 -11.955 Td [(call)]TJ + [-525(choice,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_gedots\050res,)-525(x,)-525(y,)-525(desc_a,)-525(info\051)]TJ + [-525(diag,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(work\051)]TJ 0 g 0 G 0 g 0 G 0 g 0 G ET q -1 0 0 1 230.392 595.704 cm +1 0 0 1 230.392 339.439 cm []0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F52 9.9626 Tf 236.394 587.136 Td [(r)-17(e)-25(s)]TJ/F54 9.9626 Tf 12.17 0 Td [(,)]TJ/F52 9.9626 Tf 5.275 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(,)]TJ/F52 9.9626 Tf 5.106 0 Td [(y)]TJ/F51 9.9626 Tf 93.135 0 Td [(Subroutine)]TJ +/F60 9.9626 Tf 236.623 330.871 Td [(T)]TJ/F62 9.9626 Tf 6.451 0 Td [(,)]TJ/F60 9.9626 Tf 5.275 0 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(,)]TJ/F60 9.9626 Tf 5.106 0 Td [(y)]TJ/F62 9.9626 Tf 5.105 0 Td [(,)]TJ/F60 9.9626 Tf 5.306 0 Td [(D)]TJ/F62 9.9626 Tf 7.975 0 Td [(,)]TJ/F68 9.9626 Tf 5.105 0 Td [(a)]TJ/F62 9.9626 Tf 5.385 0 Td [(,)]TJ/F68 9.9626 Tf 5.355 0 Td [(b)]TJ/F59 9.9626 Tf 64.393 0 Td [(Subroutine)]TJ ET q -1 0 0 1 230.392 583.351 cm +1 0 0 1 230.392 327.085 cm []0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F54 9.9626 Tf 236.369 574.783 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F62 9.9626 Tf 236.369 318.517 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 373.603 574.982 cm +1 0 0 1 373.603 318.716 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 376.592 574.783 Td [(gedots)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F62 9.9626 Tf 376.592 318.517 Td [(spsm)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 373.603 563.027 cm +1 0 0 1 373.603 306.761 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 376.592 562.828 Td [(gedots)]TJ -140.223 -11.956 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F62 9.9626 Tf 376.592 306.562 Td [(spsm)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 373.603 551.072 cm +1 0 0 1 373.603 294.806 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 376.592 550.872 Td [(gedots)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F62 9.9626 Tf 376.592 294.607 Td [(spsm)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 373.603 539.116 cm +1 0 0 1 373.603 282.851 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 376.592 538.917 Td [(gedots)]TJ +/F62 9.9626 Tf 376.592 282.652 Td [(spsm)]TJ ET q -1 0 0 1 230.392 535.131 cm +1 0 0 1 230.392 278.866 cm []0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 280.768 506.753 Td [(T)92(able)-250(3:)-310(Data)-250(types)]TJ +/F62 9.9626 Tf 278.277 250.487 Td [(T)92(able)-250(13:)-310(Data)-250(types)]TJ 0 g 0 G 0 g 0 G 0 g 0 G -/F51 9.9626 Tf -130.063 -32.002 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf -127.572 -38.916 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.22 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -21.712 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.22 Td [(x)]TJ + 0 -21.713 Td [(alpha)]TJ +0 g 0 G +/F62 9.9626 Tf 30.436 0 Td [(the)-250(scalar)]TJ/F68 9.9626 Tf 44.368 0 Td [(a)]TJ/F62 9.9626 Tf 5.385 0 Td [(.)]TJ -55.282 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(13)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G + 141.967 -29.888 Td [(50)]TJ +0 g 0 G +ET + +endstream +endobj +1297 0 obj +<< +/Length 7295 +>> +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 99.895 706.129 Td [(t)]TJ +0 g 0 G +/F62 9.9626 Tf 8.299 0 Td [(the)-250(global)-250(portion)-250(of)-250(the)-250(sparse)-250(matrix)]TJ/F60 9.9626 Tf 171.221 0 Td [(T)]TJ/F62 9.9626 Tf 6.451 0 Td [(.)]TJ -161.064 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(type)-250(speci\002ed)-250(in)-250(\247)]TJ +0 0 1 rg 0 0 1 RG + [-250(3)]TJ +0 g 0 G + [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +/F59 9.9626 Tf -24.907 -20.65 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.614 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.742 0 Td [(psb)]TJ +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ ET q -1 0 0 1 436.673 388.689 cm +1 0 0 1 385.864 590.037 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 439.811 388.49 Td [(T)]TJ +/F67 9.9626 Tf 389.002 589.838 Td [(T)]TJ ET q -1 0 0 1 445.669 388.689 cm +1 0 0 1 394.86 590.037 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 448.807 388.49 Td [(vect)]TJ +/F67 9.9626 Tf 397.998 589.838 Td [(vect)]TJ ET q -1 0 0 1 470.356 388.689 cm +1 0 0 1 419.547 590.037 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 473.495 388.49 Td [(type)]TJ +/F67 9.9626 Tf 422.685 589.838 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.884 -11.955 Td [(containing)-312(numbers)-311(of)-312(type)-311(speci\002ed)-312(in)-311(T)92(able)]TJ +/F62 9.9626 Tf -297.883 -11.955 Td [(containing)-278(numbers)-278(of)-279(type)-278(speci\002ed)-278(in)-278(T)92(able)]TJ 0 0 1 rg 0 0 1 RG - [-312(3)]TJ + [-278(13)]TJ +0 g 0 G + [(.)-395(The)-278(rank)-278(of)]TJ/F60 9.9626 Tf 275.498 0 Td [(x)]TJ/F62 9.9626 Tf 7.977 0 Td [(must)-278(be)]TJ -283.475 -11.956 Td [(the)-250(same)-250(of)]TJ/F60 9.9626 Tf 52.946 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -82.959 -20.649 Td [(beta)]TJ +0 g 0 G +/F62 9.9626 Tf 24.349 0 Td [(the)-250(scalar)]TJ/F68 9.9626 Tf 44.617 0 Td [(b)]TJ/F62 9.9626 Tf 5.524 0 Td [(.)]TJ -49.583 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(13)]TJ 0 g 0 G - [(.)-494(The)-312(rank)-312(of)]TJ/F52 9.9626 Tf 274.834 0 Td [(x)]TJ/F54 9.9626 Tf 8.31 0 Td [(must)-311(be)]TJ -283.144 -11.955 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 52.946 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ + [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -82.958 -19.221 Td [(y)]TJ +/F59 9.9626 Tf -24.907 -20.65 Td [(y)]TJ 0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.445 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -166.165 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +/F62 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.445 0 Td [(y)]TJ/F62 9.9626 Tf 5.105 0 Td [(.)]TJ -166.164 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ ET q -1 0 0 1 436.673 297.738 cm +1 0 0 1 385.864 429.186 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 439.811 297.539 Td [(T)]TJ +/F67 9.9626 Tf 389.002 428.986 Td [(T)]TJ ET q -1 0 0 1 445.669 297.738 cm +1 0 0 1 394.86 429.186 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 448.807 297.539 Td [(vect)]TJ +/F67 9.9626 Tf 397.998 428.986 Td [(vect)]TJ ET q -1 0 0 1 470.356 297.738 cm +1 0 0 1 419.547 429.186 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 473.495 297.539 Td [(type)]TJ +/F67 9.9626 Tf 422.685 428.986 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.884 -11.956 Td [(containing)-313(numbers)-314(of)-313(type)-313(speci\002ed)-314(in)-313(T)92(able)]TJ +/F62 9.9626 Tf -297.883 -11.955 Td [(containing)-280(numbers)-280(of)-280(type)-280(speci\002ed)-280(in)-280(T)92(able)]TJ 0 0 1 rg 0 0 1 RG - [-313(3)]TJ + [-280(13)]TJ 0 g 0 G - [(.)-500(The)-314(rank)-313(of)]TJ/F52 9.9626 Tf 274.898 0 Td [(y)]TJ/F54 9.9626 Tf 8.228 0 Td [(must)-313(be)]TJ -283.126 -11.955 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 53.116 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ + [(.)-400(The)-280(rank)-280(of)]TJ/F60 9.9626 Tf 275.562 0 Td [(y)]TJ/F62 9.9626 Tf 7.895 0 Td [(must)-280(be)]TJ -283.457 -11.955 Td [(the)-250(same)-250(of)]TJ/F60 9.9626 Tf 53.115 0 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -83.227 -19.22 Td [(desc)]TJ +/F59 9.9626 Tf -83.228 -20.65 Td [(desc)]TJ ET q -1 0 0 1 171.218 254.607 cm +1 0 0 1 120.408 384.625 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 254.408 Td [(a)]TJ +/F59 9.9626 Tf 123.397 384.426 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 324.173 206.786 cm +1 0 0 1 273.363 336.805 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 327.311 206.587 Td [(desc)]TJ +/F67 9.9626 Tf 276.501 336.605 Td [(desc)]TJ ET q -1 0 0 1 348.86 206.786 cm +1 0 0 1 298.05 336.805 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 351.998 206.587 Td [(type)]TJ +/F67 9.9626 Tf 301.189 336.605 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -222.214 -19.22 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -222.215 -20.649 Td [(trans)]TJ 0 g 0 G +/F62 9.9626 Tf 27.666 0 Td [(specify)-250(with)]TJ/F60 9.9626 Tf 56.398 0 Td [(unitd)]TJ/F62 9.9626 Tf 24.637 0 Td [(the)-250(operation)-250(to)-250(perform.)]TJ 0 g 0 G - 0 -19.221 Td [(res)]TJ +/F59 9.9626 Tf -83.794 -20.65 Td [(trans)-250(=)-250('N')]TJ 0 g 0 G -/F54 9.9626 Tf 18.261 0 Td [(is)-250(the)-250(dot)-250(pr)18(oduct)-250(of)-250(vectors)]TJ/F52 9.9626 Tf 126.33 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(and)]TJ/F52 9.9626 Tf 19.482 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -151.968 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-289(as:)-389(a)-290(number)-290(or)-289(a)-290(rank-one)-289(array)-290(of)-289(the)-290(data)-289(type)-290(indicated)-289(in)]TJ 0 -11.955 Td [(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(2)]TJ +/F62 9.9626 Tf 52.522 0 Td [(the)-250(operation)-250(is)-250(with)-250(no)-250(transposed)-250(matrix)]TJ 0 g 0 G - [(.)]TJ +/F59 9.9626 Tf -52.522 -16.303 Td [(trans)-250(=)-250('T')]TJ 0 g 0 G - 141.967 -29.888 Td [(34)]TJ +/F62 9.9626 Tf 50.869 0 Td [(the)-250(operation)-250(is)-250(with)-250(transposed)-250(matrix.)]TJ +0 g 0 G +/F59 9.9626 Tf -50.869 -16.302 Td [(trans)-250(=)-250('C')]TJ +0 g 0 G +/F62 9.9626 Tf 51.417 0 Td [(the)-250(operation)-250(is)-250(with)-250(conjugate)-250(transposed)-250(matrix.)]TJ -51.417 -20.65 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Default:)]TJ/F60 9.9626 Tf 38.64 0 Td [(t)-15(r)-50(a)-25(n)-25(s)]TJ/F93 10.3811 Tf 25.193 0 Td [(=)]TJ/F60 9.9626 Tf 11.434 0 Td [(N)]TJ/F62 9.9626 Tf -75.267 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(character)-250(variable.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -20.65 Td [(unitd)]TJ +0 g 0 G +/F62 9.9626 Tf 29.878 0 Td [(specify)-250(with)]TJ/F60 9.9626 Tf 56.398 0 Td [(trans)]TJ/F62 9.9626 Tf 23.521 0 Td [(the)-250(operation)-250(to)-250(perform.)]TJ +0 g 0 G +/F59 9.9626 Tf -84.89 -20.649 Td [(unitd)-250(=)-250('U')]TJ +0 g 0 G +/F62 9.9626 Tf 54.186 0 Td [(the)-250(operation)-250(is)-250(with)-250(no)-250(scaling)]TJ +0 g 0 G +/F59 9.9626 Tf -54.186 -16.303 Td [(unitd)-250(=)-250('L)74(')]TJ +0 g 0 G +/F62 9.9626 Tf 51.785 0 Td [(the)-250(operation)-250(is)-250(with)-250(left)-250(scaling)]TJ +0 g 0 G +/F59 9.9626 Tf -51.785 -16.302 Td [(unitd)-250(=)-250('R')]TJ +0 g 0 G +/F62 9.9626 Tf 53.628 0 Td [(the)-250(operation)-250(is)-250(with)-250(right)-250(scaling.)]TJ +0 g 0 G + 88.34 -29.888 Td [(51)]TJ 0 g 0 G ET endstream endobj -1064 0 obj +1303 0 obj << -/Length 582 +/Length 4541 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(info)]TJ +/F62 9.9626 Tf 175.611 706.129 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Default:)]TJ/F60 9.9626 Tf 38.64 0 Td [(u)-25(n)-18(i)-32(t)-25(d)]TJ/F93 10.3811 Tf 26.159 0 Td [(=)]TJ/F60 9.9626 Tf 10.927 0 Td [(U)]TJ/F62 9.9626 Tf -75.726 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(character)-250(variable.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F59 9.9626 Tf -24.906 -19.925 Td [(choice)]TJ 0 g 0 G - 141.968 -567.87 Td [(35)]TJ +/F62 9.9626 Tf 33.753 0 Td [(speci\002es)-250(the)-250(update)-250(of)-250(overlap)-250(elements)-250(to)-250(be)-250(performed)-250(on)-250(exit:)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -3.865 -19.925 Td [(psb_none_)]TJ +0 g 0 G +0 g 0 G + 0 -15.941 Td [(psb_sum_)]TJ +0 g 0 G +0 g 0 G + 0 -15.94 Td [(psb_avg_)]TJ +0 g 0 G +0 g 0 G + 0 -15.94 Td [(psb_square_root_)]TJ/F62 9.9626 Tf -4.982 -19.925 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F67 9.9626 Tf 38.515 0 Td [(psb_avg_)]TJ/F62 9.9626 Tf -38.515 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.925 Td [(diag)]TJ +0 g 0 G +/F62 9.9626 Tf 24.906 0 Td [(the)-250(diagonal)-250(scaling)-250(matrix.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F60 9.9626 Tf 38.64 0 Td [(d)-18(i)-47(a)-47(g)]TJ/F93 10.3811 Tf 18.52 0 Td [(\050)]TJ/F62 9.9626 Tf 4.15 0 Td [(1)]TJ/F93 10.3811 Tf 5.106 0 Td [(\051)-289(=)]TJ/F62 9.9626 Tf 18.002 0 Td [(1)]TJ/F93 10.3811 Tf 5.106 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(n)-25(o)-35(s)-25(c)-40(a)-25(l)-48(i)-32(n)-47(g)]TJ/F93 10.3811 Tf 41.384 0 Td [(\051)]TJ/F62 9.9626 Tf -135.182 -11.955 Td [(Speci\002ed)-293(as:)-395(a)-293(rank)-293(one)-293(array)-292(containing)-293(numbers)-293(of)-293(the)-292(type)-293(indicated)]TJ 0 -11.955 Td [(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(13)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.926 Td [(work)]TJ +0 g 0 G +/F62 9.9626 Tf 28.782 0 Td [(a)-250(work)-250(array)111(.)]TJ -3.876 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-344(as:)-498(a)-344(rank)-343(one)-344(array)-344(of)-344(the)-344(same)-344(type)-344(of)]TJ/F60 9.9626 Tf 229.679 0 Td [(x)]TJ/F62 9.9626 Tf 8.631 0 Td [(with)-344(the)-344(T)74(ARGET)]TJ -238.31 -11.955 Td [(attribute.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.926 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(y)]TJ +0 g 0 G +/F62 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.445 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ -166.165 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-379(as:)-568(an)-379(array)-379(of)-379(rank)-379(one)-379(or)-379(two)-379(containing)-379(numbers)-379(of)-379(type)]TJ 0 -11.955 Td [(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(13)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.925 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.943 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 141.968 -73.723 Td [(52)]TJ 0 g 0 G ET endstream endobj -1071 0 obj +1314 0 obj << -/Length 7477 +/Length 7550 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(4.4)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(4.14)-1000(psb)]TJ ET q -1 0 0 1 198.238 706.328 cm +1 0 0 1 153.407 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(normi)-250(\227)-250(In\002nity-Norm)-250(of)-250(V)111(ector)]TJ/F54 9.9626 Tf -51.12 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(in\002nity-norm)-250(of)-250(a)-250(vector)]TJ/F52 9.9626 Tf 233.576 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -238.781 -11.955 Td [(If)]TJ/F52 9.9626 Tf 9.459 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(r)18(eal)-250(vector)-250(it)-250(computes)-250(in\002nity)-250(norm)-250(as:)]TJ/F52 9.9626 Tf 117.807 -18.736 Td [(a)-25(m)-40(a)-42(x)]TJ/F83 10.3811 Tf 25.761 0 Td [(\040)]TJ/F54 9.9626 Tf 13.272 0 Td [(max)]TJ/F52 7.5716 Tf 8.355 -7.21 Td [(i)]TJ/F83 10.3811 Tf 12.349 7.21 Td [(j)]TJ/F52 9.9626 Tf 3.298 0 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F54 9.9626 Tf -206.019 -23.313 Td [(else)-250(if)]TJ/F52 9.9626 Tf 28.159 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(complex)-250(vector)-250(then)-250(it)-250(computes)-250(the)-250(in\002nity-norm)-250(as:)]TJ/F52 9.9626 Tf 63.42 -18.737 Td [(a)-25(m)-40(a)-42(x)]TJ/F83 10.3811 Tf 25.761 0 Td [(\040)]TJ/F54 9.9626 Tf 13.273 0 Td [(max)]TJ/F52 7.5716 Tf 8.354 -7.21 Td [(i)]TJ/F85 10.3811 Tf 12.35 7.21 Td [(\050)]TJ/F83 10.3811 Tf 4.274 0 Td [(j)]TJ/F52 9.9626 Tf 3.028 0 Td [(r)-17(e)]TJ/F85 10.3811 Tf 8.17 0 Td [(\050)]TJ/F52 9.9626 Tf 4.443 0 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F85 10.3811 Tf 2.875 1.96 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(j)]TJ/F85 10.3811 Tf 5.066 0 Td [(+)]TJ/F83 10.3811 Tf 10.256 0 Td [(j)]TJ/F52 9.9626 Tf 3.058 0 Td [(i)-32(m)]TJ/F85 10.3811 Tf 11.088 0 Td [(\050)]TJ/F52 9.9626 Tf 4.444 0 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F85 10.3811 Tf 2.875 1.96 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(j)]TJ/F85 10.3811 Tf 3.128 0 Td [(\051)]TJ/F59 9.9626 Tf -225.616 -22.974 Td [(psb_geamax\050x,)-525(desc_a,)-525(info)-525([,global]\051)]TJ -14.944 -11.955 Td [(psb_normi\050x,)-525(desc_a,)-525(info)-525([,global]\051)]TJ +/F59 11.9552 Tf 156.993 706.129 Td [(gemlt)-250(\227)-250(Entrywise)-250(Product)]TJ/F62 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(entrywise)-250(pr)18(oduct)-250(between)-250(two)-250(vectors)]TJ/F60 9.9626 Tf 299.677 0 Td [(x)]TJ/F62 9.9626 Tf 7.697 0 Td [(and)]TJ/F60 9.9626 Tf 19.481 0 Td [(y)]TJ -187.918 -21.112 Td [(d)-25(o)-35(t)]TJ/F91 10.3811 Tf 16.336 0 Td [(\040)]TJ/F60 9.9626 Tf 13.567 0 Td [(x)]TJ/F93 10.3811 Tf 5.33 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F60 9.9626 Tf 4.274 0 Td [(y)]TJ/F93 10.3811 Tf 5.231 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(.)]TJ/F67 9.9626 Tf -187.465 -21.111 Td [(psb_gemlt\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info\051)]TJ 0 g 0 G 0 g 0 G 0 g 0 G ET q -1 0 0 1 179.304 566.399 cm -[]0 d 0 J 0.398 w 0 0 m 286.513 0 l S +1 0 0 1 183.035 630.896 cm +[]0 d 0 J 0.398 w 0 0 m 177.433 0 l S Q BT -/F52 9.9626 Tf 185.556 557.832 Td [(a)-25(m)-40(a)-42(x)-7779(x)]TJ/F51 9.9626 Tf 220.764 0 Td [(Function)]TJ +/F60 9.9626 Tf 189.137 622.328 Td [(d)-25(o)-35(t)]TJ/F62 9.9626 Tf 13.444 0 Td [(,)]TJ/F60 9.9626 Tf 5.275 0 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(,)]TJ/F60 9.9626 Tf 5.106 0 Td [(y)]TJ/F59 9.9626 Tf 91.759 0 Td [(Function)]TJ ET q -1 0 0 1 179.304 554.046 cm -[]0 d 0 J 0.398 w 0 0 m 286.513 0 l S +1 0 0 1 183.035 618.542 cm +[]0 d 0 J 0.398 w 0 0 m 177.433 0 l S Q BT -/F54 9.9626 Tf 185.282 545.478 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F62 9.9626 Tf 189.012 609.974 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 422.639 545.677 cm +1 0 0 1 326.246 610.173 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 425.628 545.478 Td [(geamax)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F62 9.9626 Tf 329.235 609.974 Td [(gemlt)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 422.639 533.722 cm +1 0 0 1 326.246 598.218 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 425.628 533.523 Td [(geamax)]TJ -240.346 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F62 9.9626 Tf 329.235 598.019 Td [(gemlt)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 422.639 521.767 cm +1 0 0 1 326.246 586.263 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 425.628 521.568 Td [(geamax)]TJ -240.346 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F62 9.9626 Tf 329.235 586.064 Td [(gemlt)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 422.639 509.812 cm +1 0 0 1 326.246 574.308 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 425.628 509.612 Td [(geamax)]TJ +/F62 9.9626 Tf 329.235 574.109 Td [(gemlt)]TJ ET q -1 0 0 1 179.304 505.827 cm -[]0 d 0 J 0.398 w 0 0 m 286.513 0 l S +1 0 0 1 183.035 570.323 cm +[]0 d 0 J 0.398 w 0 0 m 177.433 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 280.768 477.448 Td [(T)92(able)-250(4:)-310(Data)-250(types)]TJ +/F62 9.9626 Tf 227.467 541.944 Td [(T)92(able)-250(14:)-310(Data)-250(types)]TJ 0 g 0 G 0 g 0 G 0 g 0 G -/F51 9.9626 Tf -130.063 -30.014 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf -127.572 -33.34 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -18.652 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.603 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -18.653 Td [(x)]TJ + 0 -19.603 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F60 9.9626 Tf 174.06 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -164.321 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.742 0 Td [(psb)]TJ +/F67 9.9626 Tf 139.526 0 Td [(psb)]TJ ET q -1 0 0 1 436.673 362.508 cm +1 0 0 1 280.646 421.777 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 439.811 362.308 Td [(T)]TJ +/F67 9.9626 Tf 283.785 421.578 Td [(T)]TJ ET q -1 0 0 1 445.669 362.508 cm +1 0 0 1 289.642 421.777 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 448.807 362.308 Td [(vect)]TJ +/F67 9.9626 Tf 292.781 421.578 Td [(vect)]TJ ET q -1 0 0 1 470.356 362.508 cm +1 0 0 1 314.33 421.777 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 473.495 362.308 Td [(type)]TJ +/F67 9.9626 Tf 317.468 421.578 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.884 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +/F62 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ 0 0 1 rg 0 0 1 RG - [-250(4)]TJ + [-250(2)]TJ 0 g 0 G [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -18.652 Td [(desc)]TJ +/F59 9.9626 Tf -24.907 -19.603 Td [(y)]TJ +0 g 0 G +/F62 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F60 9.9626 Tf 173.89 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ -164.61 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 139.526 0 Td [(psb)]TJ ET q -1 0 0 1 171.218 331.9 cm +1 0 0 1 280.646 342.398 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 283.785 342.199 Td [(T)]TJ +ET +q +1 0 0 1 289.642 342.398 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 292.781 342.199 Td [(vect)]TJ +ET +q +1 0 0 1 314.33 342.398 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 317.468 342.199 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(2)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.603 Td [(desc)]TJ +ET +q +1 0 0 1 120.408 310.84 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 331.701 Td [(a)]TJ +/F59 9.9626 Tf 123.397 310.641 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 324.173 284.079 cm +1 0 0 1 273.363 263.02 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 327.311 283.88 Td [(desc)]TJ +/F67 9.9626 Tf 276.501 262.82 Td [(desc)]TJ ET q -1 0 0 1 348.86 284.079 cm +1 0 0 1 298.05 263.02 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 351.998 283.88 Td [(type)]TJ +/F67 9.9626 Tf 301.189 262.82 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -222.214 -18.653 Td [(global)]TJ +/F59 9.9626 Tf -222.215 -19.602 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 33.763 0 Td [(Speci\002es)-226(whether)-227(the)-226(computation)-226(should)-226(include)-227(the)-226(global)-226(r)18(eduction)]TJ -8.857 -11.955 Td [(acr)18(oss)-250(all)-250(pr)18(ocesses.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(scalar)74(.)-310(Default:)]TJ/F59 9.9626 Tf 165.319 0 Td [(global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ 0 g 0 G - [(.true.)]TJ + 0 -19.603 Td [(y)]TJ 0 g 0 G -/F51 9.9626 Tf -190.225 -30.607 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F60 9.9626 Tf 160.68 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ -151.4 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 139.526 0 Td [(psb)]TJ +ET +q +1 0 0 1 280.646 175.993 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 283.785 175.794 Td [(T)]TJ +ET +q +1 0 0 1 289.642 175.993 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 292.781 175.794 Td [(vect)]TJ +ET +q +1 0 0 1 314.33 175.993 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 317.468 175.794 Td [(type)]TJ 0 g 0 G +/F62 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(14)]TJ 0 g 0 G - 0 -18.653 Td [(Function)-250(value)]TJ + [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 72.776 0 Td [(is)-250(the)-250(in\002nity)-250(norm)-250(of)-250(vector)]TJ/F52 9.9626 Tf 128.562 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -181.637 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.133 0 Td [(global)]TJ/F54 9.9626 Tf 30.675 0 Td [(unless)-190(the)-190(optional)-190(variable)]TJ/F59 9.9626 Tf 121.612 0 Td [(global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F59 9.9626 Tf -24.907 -19.603 Td [(info)]TJ 0 g 0 G - [(.false.)]TJ/F54 9.9626 Tf 75.118 0 Td [(has)-190(been)-190(spec-)]TJ -258.538 -11.955 Td [(i\002ed)]TJ 0 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ 0 g 0 G - 141.968 -29.888 Td [(36)]TJ +/F62 9.9626 Tf 114.88 -29.888 Td [(53)]TJ 0 g 0 G ET endstream endobj -1076 0 obj -<< -/Length 2600 ->> -stream -0 g 0 G -0 g 0 G -0 g 0 G -BT -/F51 9.9626 Tf 99.895 706.129 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(The)-190(computation)-190(of)-190(a)-190(global)-190(r)18(esult)-190(r)18(equir)18(es)-190(a)-190(global)-190(communication,)-202(which)]TJ 12.453 -11.955 Td [(entails)-318(a)-318(signi\002cant)-318(ove)1(r)18(head.)-514(It)-318(may)-318(be)-318(necessary)-317(and/or)-318(advisable)-318(to)]TJ 0 -11.955 Td [(compute)-333(multiple)-333(norms)-332(at)-333(the)-333(same)-333(time;)-374(in)-333(this)-333(case,)-354(it)-332(is)-333(possible)-333(to)]TJ 0 -11.955 Td [(impr)18(ove)-250(the)-250(r)8(untime)-250(ef)18(\002ciency)-250(by)-250(using)-250(the)-250(following)-250(scheme:)]TJ/F59 9.9626 Tf 52.303 -19.926 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ -0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ -0 g 0 G - [-525(psb_geamax\050x1,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.false.\051)]TJ 0 -11.955 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(2)]TJ -0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ -0 g 0 G - [-525(psb_geamax\050x2,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.false.\051)]TJ 0 -11.955 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(3)]TJ -0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ -0 g 0 G - [-525(psb_geamax\050x3,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.false.\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ -0 g 0 G - [-525(psb_amx\050ctxt,vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ +1319 0 obj +<< +/Length 314 +>> +stream 0 g 0 G - [(:)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(3)]TJ 0 g 0 G - [(\051\051)]TJ/F54 9.9626 Tf -52.303 -19.926 Td [(In)-253(this)-252(way)-253(the)-253(global)-253(communicati)1(on,)-254(which)-253(for)-252(small)-253(sizes)-253(is)-252(a)-253(latency-)]TJ 0 -11.955 Td [(bound)-250(operation,)-250(is)-250(invoked)-250(only)-250(once.)]TJ +BT +/F62 9.9626 Tf 175.611 706.129 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 141.968 -402.49 Td [(37)]TJ + 141.968 -603.736 Td [(54)]TJ 0 g 0 G ET endstream endobj -1085 0 obj +1330 0 obj << -/Length 6238 +/Length 7518 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(4.5)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(4.15)-1000(psb)]TJ ET q -1 0 0 1 198.238 706.328 cm +1 0 0 1 153.407 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(geamaxs)-250(\227)-250(Generalized)-250(In\002nity)-250(Norm)]TJ/F54 9.9626 Tf -51.12 -18.964 Td [(This)-256(subr)18(outine)-255(computes)-256(a)-256(series)-255(of)-256(in\002nity)-256(norms)-256(on)-255(the)-256(columns)-256(of)-255(a)-256(dense)]TJ 0 -11.955 Td [(matrix)]TJ/F52 9.9626 Tf 31.785 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(:)]TJ/F52 9.9626 Tf 88.539 -11.955 Td [(r)-17(e)-25(s)]TJ/F85 10.3811 Tf 12.294 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F83 10.3811 Tf 7.042 0 Td [(\040)]TJ/F54 9.9626 Tf 13.273 0 Td [(max)]TJ/F52 7.5716 Tf 7.759 -7.336 Td [(k)]TJ/F83 10.3811 Tf 12.944 7.336 Td [(j)]TJ/F52 9.9626 Tf 3.298 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(k)]TJ/F54 9.9626 Tf 4.598 0 Td [(,)]TJ/F52 9.9626 Tf 4.206 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(j)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -195.028 -22.296 Td [(call)]TJ +/F59 11.9552 Tf 156.993 706.129 Td [(gediv)-250(\227)-250(Entrywise)-250(Division)]TJ/F62 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(entrywise)-250(division)-250(between)-250(two)-250(vectors)]TJ/F60 9.9626 Tf 300.604 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(and)]TJ/F60 9.9626 Tf 19.482 0 Td [(y)]TJ/F62 9.9626 Tf -188.347 -21.112 Td [(/)]TJ/F91 10.3811 Tf 9.054 0 Td [(\040)]TJ/F60 9.9626 Tf 13.567 0 Td [(x)]TJ/F93 10.3811 Tf 5.329 0 Td [(\050)]TJ/F60 9.9626 Tf 4.205 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 4.274 0 Td [(/)]TJ/F60 9.9626 Tf 6.286 0 Td [(y)]TJ/F93 10.3811 Tf 5.231 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(.)]TJ/F67 9.9626 Tf -186.967 -21.111 Td [(psb_gediv\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_geamaxs\050res,)-525(x,)-525(desc_a,)-525(info\051)]TJ + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525([flag\051)]TJ 0 g 0 G 0 g 0 G 0 g 0 G ET q -1 0 0 1 177.192 626.591 cm -[]0 d 0 J 0.398 w 0 0 m 290.737 0 l S +1 0 0 1 183.199 630.896 cm +[]0 d 0 J 0.398 w 0 0 m 177.104 0 l S Q BT -/F52 9.9626 Tf 183.195 618.023 Td [(r)-17(e)-25(s)-8868(x)]TJ/F51 9.9626 Tf 221.013 0 Td [(Subroutine)]TJ +/F62 9.9626 Tf 189.301 622.328 Td [(/)-13(,)]TJ/F60 9.9626 Tf 11.437 0 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(,)]TJ/F60 9.9626 Tf 5.105 0 Td [(y)]TJ/F59 9.9626 Tf 99.043 0 Td [(Function)]TJ ET q -1 0 0 1 177.192 614.237 cm -[]0 d 0 J 0.398 w 0 0 m 290.737 0 l S +1 0 0 1 183.199 618.542 cm +[]0 d 0 J 0.398 w 0 0 m 177.104 0 l S Q BT -/F54 9.9626 Tf 183.17 605.669 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F62 9.9626 Tf 189.177 609.974 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 420.527 605.868 cm +1 0 0 1 326.41 610.173 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 423.516 605.669 Td [(geamaxs)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F62 9.9626 Tf 329.399 609.974 Td [(gediv)]TJ -140.222 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 420.527 593.913 cm +1 0 0 1 326.41 598.218 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 423.516 593.714 Td [(geamaxs)]TJ -240.346 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F62 9.9626 Tf 329.399 598.019 Td [(gediv)]TJ -140.222 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 420.527 581.958 cm +1 0 0 1 326.41 586.263 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 423.516 581.759 Td [(geamaxs)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F62 9.9626 Tf 329.399 586.064 Td [(gediv)]TJ -140.222 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 420.527 570.003 cm +1 0 0 1 326.41 574.308 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 423.516 569.804 Td [(geamaxs)]TJ +/F62 9.9626 Tf 329.399 574.109 Td [(gediv)]TJ ET q -1 0 0 1 177.192 566.018 cm -[]0 d 0 J 0.398 w 0 0 m 290.737 0 l S +1 0 0 1 183.199 570.323 cm +[]0 d 0 J 0.398 w 0 0 m 177.104 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 280.768 537.639 Td [(T)92(able)-250(5:)-310(Data)-250(types)]TJ +/F62 9.9626 Tf 227.467 541.944 Td [(T)92(able)-250(15:)-310(Data)-250(types)]TJ 0 g 0 G 0 g 0 G 0 g 0 G -/F51 9.9626 Tf -130.063 -34.468 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf -127.572 -33.34 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.603 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.926 Td [(x)]TJ + 0 -19.603 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F60 9.9626 Tf 174.06 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -164.321 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.742 0 Td [(psb)]TJ +/F67 9.9626 Tf 139.526 0 Td [(psb)]TJ ET q -1 0 0 1 436.673 415.699 cm +1 0 0 1 280.646 421.777 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 439.811 415.5 Td [(T)]TJ +/F67 9.9626 Tf 283.785 421.578 Td [(T)]TJ ET q -1 0 0 1 445.669 415.699 cm +1 0 0 1 289.642 421.777 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 448.807 415.5 Td [(vect)]TJ +/F67 9.9626 Tf 292.781 421.578 Td [(vect)]TJ ET q -1 0 0 1 470.356 415.699 cm +1 0 0 1 314.33 421.777 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 473.495 415.5 Td [(type)]TJ +/F67 9.9626 Tf 317.468 421.578 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.884 -11.956 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +/F62 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ 0 0 1 rg 0 0 1 RG - [-250(5)]TJ + [-250(2)]TJ 0 g 0 G [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(desc)]TJ +/F59 9.9626 Tf -24.907 -19.603 Td [(y)]TJ +0 g 0 G +/F62 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F60 9.9626 Tf 173.89 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ -164.61 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 139.526 0 Td [(psb)]TJ ET q -1 0 0 1 171.218 383.818 cm +1 0 0 1 280.646 342.398 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 283.785 342.199 Td [(T)]TJ +ET +q +1 0 0 1 289.642 342.398 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 292.781 342.199 Td [(vect)]TJ +ET +q +1 0 0 1 314.33 342.398 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 317.468 342.199 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(2)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.603 Td [(desc)]TJ +ET +q +1 0 0 1 120.408 310.84 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 383.619 Td [(a)]TJ +/F59 9.9626 Tf 123.397 310.641 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 324.173 335.998 cm +1 0 0 1 273.363 263.02 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 327.311 335.798 Td [(desc)]TJ +/F67 9.9626 Tf 276.501 262.82 Td [(desc)]TJ ET q -1 0 0 1 348.86 335.998 cm +1 0 0 1 298.05 263.02 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 351.998 335.798 Td [(type)]TJ +/F67 9.9626 Tf 301.189 262.82 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F59 9.9626 Tf -222.215 -19.602 Td [(\003ag)]TJ 0 g 0 G -/F51 9.9626 Tf -222.214 -19.925 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 21.589 0 Td [(check)-280(if)-280(any)-280(of)-280(the)]TJ/F60 9.9626 Tf 84.137 0 Td [(y)]TJ/F93 10.3811 Tf 5.23 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)-343(=)]TJ/F62 9.9626 Tf 19.108 0 Td [(0,)-287(and)-280(in)-280(case)-280(r)18(eturns)-280(err)18(or)-280(halting)-280(the)-280(compu-)]TJ -112.449 -11.956 Td [(tation.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 40.677 0 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -108.97 -11.955 Td [(Speci\002ed)-250(as:)-310(the)-250(logical)-250(value)]TJ/F67 9.9626 Tf 132.133 0 Td [(flag)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G + [(.true.)]TJ 0 g 0 G - 0 -19.925 Td [(res)]TJ +/F59 9.9626 Tf -157.04 -19.603 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 18.261 0 Td [(is)-250(the)-250(in\002nity)-250(norm)-250(of)-250(the)-250(columns)-250(of)]TJ/F52 9.9626 Tf 166.26 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -164.82 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.956 Td [(Speci\002ed)-330(as:)-470(a)-330(number)-330(or)-330(a)-330(rank-one)-330(array)-329(of)-330(long)-330(pr)18(ecision)-330(r)18(eal)-330(num-)]TJ 0 -11.955 Td [(bers.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(info)]TJ + 0 -19.603 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F60 9.9626 Tf 160.849 0 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(.)]TJ -151.111 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ 0 g 0 G - 141.968 -89.943 Td [(38)]TJ + 85.819 -29.888 Td [(55)]TJ 0 g 0 G ET endstream endobj -1093 0 obj +1336 0 obj << -/Length 7104 +/Length 1288 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(4.6)-1000(psb)]TJ +/F62 9.9626 Tf 175.611 706.129 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 139.526 0 Td [(psb)]TJ ET q -1 0 0 1 147.429 706.328 cm +1 0 0 1 331.456 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 334.594 706.129 Td [(T)]TJ +ET +q +1 0 0 1 340.452 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 343.59 706.129 Td [(vect)]TJ +ET +q +1 0 0 1 365.139 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 368.277 706.129 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 24.453 0 Td [(containing)-354(numbers)-355(of)]TJ -217.119 -11.955 Td [(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(14)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.926 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 141.968 -535.99 Td [(56)]TJ +0 g 0 G +ET + +endstream +endobj +1345 0 obj +<< +/Length 7434 +>> +stream +0 g 0 G +0 g 0 G +BT +/F59 11.9552 Tf 99.895 706.129 Td [(4.16)-1000(psb)]TJ +ET +q +1 0 0 1 153.407 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(norm1)-250(\227)-250(1-Norm)-250(of)-250(V)111(ector)]TJ/F54 9.9626 Tf -51.121 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(1-norm)-250(of)-250(a)-250(vector)]TJ/F52 9.9626 Tf 206.349 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -211.554 -11.955 Td [(If)]TJ/F52 9.9626 Tf 9.46 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(r)18(eal)-250(vector)-250(it)-250(computes)-250(1-norm)-250(as:)]TJ/F52 9.9626 Tf 125.989 -21.269 Td [(a)-25(s)-25(u)-25(m)]TJ/F83 10.3811 Tf 25.352 0 Td [(\040)-291(k)]TJ/F52 9.9626 Tf 19.007 0 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(k)]TJ/F54 9.9626 Tf -195.526 -21.269 Td [(else)-250(if)]TJ/F52 9.9626 Tf 28.159 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(complex)-250(vector)-250(then)-250(it)-250(computes)-250(1-norm)-250(as:)]TJ/F52 9.9626 Tf 71.974 -21.269 Td [(a)-25(s)-25(u)-25(m)]TJ/F83 10.3811 Tf 25.353 0 Td [(\040)-291(k)]TJ/F52 9.9626 Tf 18.737 0 Td [(r)-17(e)]TJ/F85 10.3811 Tf 8.17 0 Td [(\050)]TJ/F52 9.9626 Tf 4.443 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(k)]TJ/F54 7.5716 Tf 5.315 -1.858 Td [(1)]TJ/F85 10.3811 Tf 6.345 1.858 Td [(+)]TJ/F83 10.3811 Tf 10.256 0 Td [(k)]TJ/F52 9.9626 Tf 5.37 0 Td [(i)-32(m)]TJ/F85 10.3811 Tf 11.088 0 Td [(\050)]TJ/F52 9.9626 Tf 4.444 0 Td [(x)]TJ/F85 10.3811 Tf 5.329 0 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(k)]TJ/F54 7.5716 Tf 5.315 -1.858 Td [(1)]TJ/F59 9.9626 Tf -216.928 -19.411 Td [(psb_geasum\050x,)-525(desc_a,)-525(info)-525([,global]\051)-190(psb_norm1\050x,)-525(desc_a,)-525(info)-525([,global]\051)]TJ +/F59 11.9552 Tf 156.993 706.129 Td [(geinv)-250(\227)-250(Entrywise)-250(Inversion)]TJ/F62 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(entrywise)-250(inverse)-250(of)-250(a)-250(vector)]TJ/F60 9.9626 Tf 252.097 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(and)-250(puts)-250(it)-250(into)]TJ/F60 9.9626 Tf 69.951 0 Td [(y)]TJ/F62 9.9626 Tf -184.401 -18.334 Td [(/)]TJ/F91 10.3811 Tf 9.054 0 Td [(\040)]TJ/F62 9.9626 Tf 13.272 0 Td [(1)-13(/)]TJ/F60 9.9626 Tf 11.562 0 Td [(x)]TJ/F93 10.3811 Tf 5.33 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.089 0 Td [(\051)]TJ/F62 9.9626 Tf 4.149 0 Td [(.)]TJ/F67 9.9626 Tf -181.059 -18.334 Td [(psb_geinv\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525([flag\051)]TJ 0 g 0 G 0 g 0 G 0 g 0 G ET q -1 0 0 1 128.44 576.025 cm -[]0 d 0 J 0.398 w 0 0 m 286.622 0 l S +1 0 0 1 183.343 637.562 cm +[]0 d 0 J 0.398 w 0 0 m 176.815 0 l S Q BT -/F52 9.9626 Tf 134.691 567.457 Td [(a)-25(s)-25(u)-25(m)-7810(x)]TJ/F51 9.9626 Tf 220.765 0 Td [(Function)]TJ +/F62 9.9626 Tf 189.446 628.995 Td [(/)-12(,)]TJ/F60 9.9626 Tf 11.437 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(,)]TJ/F60 9.9626 Tf 5.106 0 Td [(y)]TJ/F59 9.9626 Tf 99.042 0 Td [(Function)]TJ ET q -1 0 0 1 128.44 563.671 cm -[]0 d 0 J 0.398 w 0 0 m 286.622 0 l S +1 0 0 1 183.343 625.209 cm +[]0 d 0 J 0.398 w 0 0 m 176.815 0 l S Q BT -/F54 9.9626 Tf 134.417 555.103 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F62 9.9626 Tf 189.321 616.641 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 371.775 555.303 cm +1 0 0 1 326.555 616.84 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 374.763 555.103 Td [(geasum)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F62 9.9626 Tf 329.544 616.641 Td [(geinv)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 371.775 543.347 cm +1 0 0 1 326.555 604.885 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 374.763 543.148 Td [(geasum)]TJ -240.346 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F62 9.9626 Tf 329.544 604.686 Td [(geinv)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 371.775 531.392 cm +1 0 0 1 326.555 592.93 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 374.763 531.193 Td [(geasum)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F62 9.9626 Tf 329.544 592.731 Td [(geinv)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 371.775 519.437 cm +1 0 0 1 326.555 580.975 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 374.763 519.238 Td [(geasum)]TJ +/F62 9.9626 Tf 329.544 580.775 Td [(geinv)]TJ ET q -1 0 0 1 128.44 515.452 cm -[]0 d 0 J 0.398 w 0 0 m 286.622 0 l S +1 0 0 1 183.343 576.99 cm +[]0 d 0 J 0.398 w 0 0 m 176.815 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 229.958 487.074 Td [(T)92(able)-250(6:)-310(Data)-250(types)]TJ +/F62 9.9626 Tf 227.467 548.611 Td [(T)92(able)-250(16:)-310(Data)-250(types)]TJ 0 g 0 G 0 g 0 G 0 g 0 G -/F51 9.9626 Tf -130.063 -33.561 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf -127.572 -29.451 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.665 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -18.492 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.666 Td [(x)]TJ + 0 -18.491 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F60 9.9626 Tf 174.06 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -164.321 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ +/F67 9.9626 Tf 139.526 0 Td [(psb)]TJ ET q -1 0 0 1 385.864 366.56 cm +1 0 0 1 280.646 434.555 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 389.002 366.361 Td [(T)]TJ +/F67 9.9626 Tf 283.785 434.356 Td [(T)]TJ ET q -1 0 0 1 394.86 366.56 cm +1 0 0 1 289.642 434.555 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 397.998 366.361 Td [(vect)]TJ +/F67 9.9626 Tf 292.781 434.356 Td [(vect)]TJ ET q -1 0 0 1 419.547 366.56 cm +1 0 0 1 314.33 434.555 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 422.685 366.361 Td [(type)]TJ +/F67 9.9626 Tf 317.468 434.356 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +/F62 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ 0 0 1 rg 0 0 1 RG - [-250(6)]TJ + [-250(2)]TJ 0 g 0 G [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.666 Td [(desc)]TJ +/F59 9.9626 Tf -24.907 -18.492 Td [(desc)]TJ ET q -1 0 0 1 120.408 334.939 cm +1 0 0 1 120.408 404.108 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 334.74 Td [(a)]TJ +/F59 9.9626 Tf 123.397 403.909 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 273.363 287.119 cm +1 0 0 1 273.363 356.288 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 276.501 286.919 Td [(desc)]TJ +/F67 9.9626 Tf 276.501 356.088 Td [(desc)]TJ ET q -1 0 0 1 298.05 287.119 cm +1 0 0 1 298.05 356.288 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 301.189 286.919 Td [(type)]TJ +/F67 9.9626 Tf 301.189 356.088 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -222.215 -19.665 Td [(global)]TJ +/F59 9.9626 Tf -222.215 -18.491 Td [(\003ag)]TJ 0 g 0 G -/F54 9.9626 Tf 33.764 0 Td [(Speci\002es)-226(whether)-226(the)-227(computation)-226(should)-226(include)-227(the)-226(global)-226(r)18(eduction)]TJ -8.857 -11.955 Td [(acr)18(oss)-250(all)-250(pr)18(ocesses.)]TJ 0 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(scalar)74(.)-310(Default:)]TJ/F59 9.9626 Tf 165.318 0 Td [(global)]TJ +/F62 9.9626 Tf 21.589 0 Td [(check)-278(if)-279(any)-278(of)-278(the)]TJ/F60 9.9626 Tf 84.227 0 Td [(x)]TJ/F93 10.3811 Tf 5.329 0 Td [(\050)]TJ/F60 9.9626 Tf 4.205 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)-340(=)]TJ/F62 9.9626 Tf 19.049 0 Td [(0,)-285(and)-279(in)-278(case)-279(r)18(eturns)-278(err)18(or)-278(halting)-279(the)-278(compu-)]TJ -112.58 -11.955 Td [(tation.)]TJ 0 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 40.677 0 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -108.97 -11.955 Td [(Speci\002ed)-250(as:)-310(the)-250(logical)-250(value)]TJ/F67 9.9626 Tf 132.133 0 Td [(flag)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG [(=)]TJ 0 g 0 G [(.true.)]TJ 0 g 0 G -/F51 9.9626 Tf -190.225 -31.621 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -157.04 -18.492 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G - 0 -19.666 Td [(Function)-250(value)]TJ + 0 -18.491 Td [(y)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(is)-250(the)-250(1-norm)-250(of)-250(vector)]TJ/F52 9.9626 Tf 102.161 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -155.236 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.133 0 Td [(global)]TJ/F54 9.9626 Tf 30.675 0 Td [(unless)-190(the)-190(optional)-190(variable)]TJ/F59 9.9626 Tf 121.612 0 Td [(global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F62 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F60 9.9626 Tf 160.849 0 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(.)]TJ -151.669 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 139.526 0 Td [(psb)]TJ +ET +q +1 0 0 1 280.646 205.171 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 283.785 204.972 Td [(T)]TJ +ET +q +1 0 0 1 289.642 205.171 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 292.781 204.972 Td [(vect)]TJ +ET +q +1 0 0 1 314.33 205.171 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 317.468 204.972 Td [(type)]TJ 0 g 0 G - [(.false.)]TJ/F54 9.9626 Tf 75.118 0 Td [(has)-190(been)-190(spec-)]TJ -258.538 -11.955 Td [(i\002ed)]TJ 0 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ +/F62 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(16)]TJ 0 g 0 G - 141.968 -29.888 Td [(39)]TJ + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -18.492 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 141.968 -36.266 Td [(57)]TJ 0 g 0 G ET endstream endobj -1098 0 obj +1351 0 obj << -/Length 2603 +/Length 623 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(The)-190(computation)-190(of)-190(a)-190(global)-190(r)18(esult)-190(r)18(equir)18(es)-190(a)-190(global)-190(communication,)-202(which)]TJ 12.453 -11.955 Td [(entails)-318(a)-318(signi\002cant)-318(over)18(head.)-513(It)-318(may)-318(be)-318(necessary)-318(and/or)-317(advisable)-318(to)]TJ 0 -11.955 Td [(compute)-333(multiple)-333(norms)-333(at)-332(the)-333(same)-333(time;)-374(in)-333(this)-333(case,)-354(it)-333(i)1(s)-333(possible)-333(to)]TJ 0 -11.955 Td [(impr)18(ove)-250(the)-250(r)8(untime)-250(ef)18(\002ciency)-250(by)-250(using)-250(the)-250(following)-250(scheme:)]TJ/F59 9.9626 Tf 20.922 -19.926 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ -0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ -0 g 0 G - [-525(psb_geasum\050x1,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.false.\051)]TJ 31.382 -11.955 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(2)]TJ -0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ -0 g 0 G - [-525(psb_geasum\050x2,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.false.\051)]TJ 0 -11.955 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(3)]TJ -0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ -0 g 0 G - [-525(psb_geasum\050x3,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.false.\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ -0 g 0 G - [-525(psb_sum\050ctxt,vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ -0 g 0 G - [(:)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(3)]TJ +/F59 14.3462 Tf 150.705 706.042 Td [(5)-1000(Communication)-250(routines)]TJ/F62 9.9626 Tf 0 -22.702 Td [(The)-303(r)18(outines)-302(in)-303(this)-303(chapter)-302(implement)-303(various)-303(global)-302(communication)-303(opera-)]TJ 0 -11.955 Td [(tors)-271(on)-271(vectors)-271(associated)-271(with)-271(a)-272(discr)18(etization)-271(mesh.)-373(For)-271(auxiliary)-271(communi-)]TJ 0 -11.955 Td [(cation)-250(r)18(outines)-250(not)-250(tied)-250(to)-250(a)-250(discr)18(etization)-250(space)-250(see)]TJ +0 0 1 rg 0 0 1 RG + [-250(6)]TJ 0 g 0 G - [(\051\051)]TJ/F54 9.9626 Tf -52.304 -19.926 Td [(In)-253(this)-252(way)-253(the)-253(global)-253(communication,)-253(which)-253(for)-252(small)-253(sizes)-253(is)-252(a)-253(latency-)]TJ 0 -11.955 Td [(bound)-250(operation,)-250(is)-250(invoked)-250(only)-250(once.)]TJ + [(.)]TJ 0 g 0 G - 141.968 -402.49 Td [(40)]TJ + 166.874 -568.992 Td [(58)]TJ 0 g 0 G ET endstream endobj -1106 0 obj +1359 0 obj << -/Length 7308 +/Length 6634 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(4.7)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(5.1)-1000(psb)]TJ ET q 1 0 0 1 147.429 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(geasums)-250(\227)-250(Generalized)-250(1-Norm)-250(of)-250(V)111(ector)]TJ/F54 9.9626 Tf -51.121 -20.192 Td [(This)-216(subr)18(outine)-217(computes)-216(a)-217(series)-216(of)-216(1-norms)-217(on)-216(the)-217(columns)-216(of)-216(a)-217(dense)-216(matrix)]TJ/F52 9.9626 Tf 0.294 -11.955 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(:)]TJ/F52 9.9626 Tf 120.031 -13.856 Td [(r)-17(e)-25(s)]TJ/F85 10.3811 Tf 12.293 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F83 10.3811 Tf 7.041 0 Td [(\040)]TJ/F54 9.9626 Tf 13.273 0 Td [(max)]TJ/F52 7.5716 Tf 7.76 -7.335 Td [(k)]TJ/F83 10.3811 Tf 12.944 7.335 Td [(j)]TJ/F52 9.9626 Tf 3.298 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\050)]TJ/F52 9.9626 Tf 4.273 0 Td [(k)]TJ/F54 9.9626 Tf 4.598 0 Td [(,)]TJ/F52 9.9626 Tf 4.206 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(j)]TJ/F54 9.9626 Tf -215.202 -24.535 Td [(This)-250(function)-250(computes)-250(the)-250(1-norm)-250(of)-250(a)-250(vector)]TJ/F52 9.9626 Tf 206.349 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -211.554 -11.955 Td [(If)]TJ/F52 9.9626 Tf 9.46 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(r)18(eal)-250(vector)-250(it)-250(computes)-250(1-norm)-250(as:)]TJ/F52 9.9626 Tf 125.227 -23.185 Td [(r)-17(e)-25(s)]TJ/F85 10.3811 Tf 12.293 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F83 10.3811 Tf 7.041 0 Td [(\040)-291(k)]TJ/F52 9.9626 Tf 19.007 0 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(k)]TJ/F54 9.9626 Tf -196.039 -23.185 Td [(else)-250(if)]TJ/F52 9.9626 Tf 28.159 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(complex)-250(vector)-250(then)-250(it)-250(computes)-250(1-norm)-250(as:)]TJ/F52 9.9626 Tf 71.212 -23.185 Td [(r)-17(e)-25(s)]TJ/F85 10.3811 Tf 12.294 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.089 0 Td [(\051)]TJ/F83 10.3811 Tf 7.041 0 Td [(\040)-291(k)]TJ/F52 9.9626 Tf 18.737 0 Td [(r)-17(e)]TJ/F85 10.3811 Tf 8.169 0 Td [(\050)]TJ/F52 9.9626 Tf 4.444 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(k)]TJ/F54 7.5716 Tf 5.315 -1.858 Td [(1)]TJ/F85 10.3811 Tf 6.345 1.858 Td [(+)]TJ/F83 10.3811 Tf 10.256 0 Td [(k)]TJ/F52 9.9626 Tf 5.37 0 Td [(i)-32(m)]TJ/F85 10.3811 Tf 11.088 0 Td [(\050)]TJ/F52 9.9626 Tf 4.443 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(k)]TJ/F54 7.5716 Tf 5.315 -1.858 Td [(1)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -212.21 -21.96 Td [(call)]TJ +/F59 11.9552 Tf 151.016 706.129 Td [(halo)-250(\227)-250(Halo)-250(Data)-250(Communication)]TJ/F62 9.9626 Tf -51.121 -19.15 Td [(These)-250(subr)18(outines)-250(gathers)-250(the)-250(values)-250(of)-250(the)-250(halo)-250(elements:)]TJ/F60 9.9626 Tf 158.568 -25.014 Td [(x)]TJ/F91 10.3811 Tf 8.097 0 Td [(\040)]TJ/F60 9.9626 Tf 13.567 0 Td [(x)]TJ/F62 9.9626 Tf -180.232 -22.11 Td [(wher)18(e:)]TJ +0 g 0 G +/F60 9.9626 Tf 0.294 -20.212 Td [(x)]TJ 0 g 0 G - [-525(psb_geasums\050res,)-525(x,)-525(desc_a,)-525(info\051)]TJ +/F62 9.9626 Tf 10.187 0 Td [(is)-250(a)-250(global)-250(dense)-250(submatrix.)]TJ 0 g 0 G 0 g 0 G 0 g 0 G ET q -1 0 0 1 126.328 514.627 cm -[]0 d 0 J 0.398 w 0 0 m 290.846 0 l S +1 0 0 1 179.582 596.326 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F52 9.9626 Tf 132.33 506.059 Td [(r)-17(e)-25(s)-8868(x)]TJ/F51 9.9626 Tf 221.014 0 Td [(Subroutine)]TJ +/F68 9.9626 Tf 185.685 587.758 Td [(a)]TJ/F62 9.9626 Tf 5.384 0 Td [(,)]TJ/F60 9.9626 Tf 5.276 0 Td [(x)]TJ/F59 9.9626 Tf 110.13 0 Td [(Subroutine)]TJ ET q -1 0 0 1 126.328 502.274 cm -[]0 d 0 J 0.398 w 0 0 m 290.846 0 l S +1 0 0 1 179.582 583.972 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F54 9.9626 Tf 132.305 493.706 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F62 9.9626 Tf 185.56 575.404 Td [(Integer)-8983(psb)]TJ ET q -1 0 0 1 369.663 493.905 cm +1 0 0 1 322.794 575.603 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 372.651 493.706 Td [(geasums)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F62 9.9626 Tf 325.783 575.404 Td [(halo)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 369.663 481.95 cm +1 0 0 1 322.794 563.648 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 372.651 481.751 Td [(geasums)]TJ -240.346 -11.956 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F62 9.9626 Tf 325.783 563.449 Td [(halo)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 369.663 469.995 cm +1 0 0 1 322.794 551.693 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 372.651 469.795 Td [(geasums)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F62 9.9626 Tf 325.783 551.494 Td [(halo)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 369.663 458.04 cm +1 0 0 1 322.794 539.738 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 372.651 457.84 Td [(geasums)]TJ +/F62 9.9626 Tf 325.783 539.539 Td [(halo)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 126.328 454.054 cm -[]0 d 0 J 0.398 w 0 0 m 290.846 0 l S +1 0 0 1 322.794 527.783 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 325.783 527.583 Td [(halo)]TJ +ET +q +1 0 0 1 179.582 523.798 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 229.958 425.676 Td [(T)92(able)-250(7:)-310(Data)-250(types)]TJ +/F62 9.9626 Tf 227.467 495.419 Td [(T)92(able)-250(17:)-310(Data)-250(types)]TJ +0 g 0 G +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -127.572 -24.102 Td [(call)]TJ 0 g 0 G + [-525(psb_halo\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -130.063 -37.636 Td [(T)90(ype:)]TJ + [-525(info\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ + [-525(psb_halo\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -22.46 Td [(On)-250(Entry)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -22.459 Td [(x)]TJ + [-525(work,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(data)]TJ +0 g 0 G + [(\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -22.301 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -20.308 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.309 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 89.688 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -79.949 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.956 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ ET q -1 0 0 1 385.864 295.5 cm +1 0 0 1 385.864 348.823 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 389.002 295.3 Td [(T)]TJ +/F67 9.9626 Tf 389.002 348.623 Td [(T)]TJ ET q -1 0 0 1 394.86 295.5 cm +1 0 0 1 394.86 348.823 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 397.998 295.3 Td [(vect)]TJ +/F67 9.9626 Tf 397.998 348.623 Td [(vect)]TJ ET q -1 0 0 1 419.547 295.5 cm +1 0 0 1 419.547 348.823 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 422.685 295.3 Td [(type)]TJ +/F67 9.9626 Tf 422.685 348.623 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +/F62 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ 0 0 1 rg 0 0 1 RG - [-250(7)]TJ + [-250(17)]TJ 0 g 0 G [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.459 Td [(desc)]TJ +/F59 9.9626 Tf -24.907 -20.308 Td [(desc)]TJ ET q -1 0 0 1 120.408 261.085 cm +1 0 0 1 120.408 316.559 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 260.886 Td [(a)]TJ +/F59 9.9626 Tf 123.397 316.36 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 273.363 213.264 cm +1 0 0 1 309.258 268.738 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 276.501 213.065 Td [(desc)]TJ +/F67 9.9626 Tf 312.397 268.539 Td [(desc)]TJ ET q -1 0 0 1 298.05 213.264 cm +1 0 0 1 333.945 268.738 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 301.189 213.065 Td [(type)]TJ +/F67 9.9626 Tf 337.084 268.539 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -222.215 -22.459 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -258.11 -20.308 Td [(work)]TJ 0 g 0 G +/F62 9.9626 Tf 28.782 0 Td [(the)-250(work)-250(array)111(.)]TJ -3.875 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(of)-250(the)-250(same)-250(type)-250(of)]TJ/F60 9.9626 Tf 218.454 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ 0 g 0 G - 0 -22.46 Td [(res)]TJ +/F59 9.9626 Tf -248.566 -20.309 Td [(data)]TJ 0 g 0 G -/F54 9.9626 Tf 18.262 0 Td [(contains)-250(the)-250(1-norm)-250(of)-250(\050the)-250(columns)-250(of\051)]TJ/F52 9.9626 Tf 176.182 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -174.742 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Short)-255(as:)-320(a)-255(long)-254(pr)18(ecision)-255(r)18(eal)-255(number)74(.)-325(Speci\002ed)-255(as:)-320(a)-254(long)-255(pr)18(ecision)-255(r)18(eal)]TJ 0 -11.955 Td [(number)74(.)]TJ +/F62 9.9626 Tf 24.349 0 Td [(index)-250(list)-250(selector)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Speci\002ed)-190(as:)-280(an)-190(integer)74(.)-290(V)92(alues:)]TJ/F67 9.9626 Tf 136.507 0 Td [(psb_comm_halo_)]TJ/F62 9.9626 Tf 73.224 0 Td [(,)]TJ/F67 9.9626 Tf 2.491 0 Td [(psb_comm_mov_)]TJ/F62 9.9626 Tf 67.995 0 Td [(,)]TJ/F67 9.9626 Tf 4.503 0 Td [(psb_comm_ext_)]TJ/F62 9.9626 Tf 67.994 0 Td [(,)]TJ -352.714 -11.955 Td [(default:)]TJ/F67 9.9626 Tf 39.042 0 Td [(psb_comm_halo_)]TJ/F62 9.9626 Tf 73.225 0 Td [(.)-634(Chooses)-358(the)-358(index)-358(list)-358(on)-357(which)-358(to)-358(base)-358(the)]TJ -112.267 -11.955 Td [(data)-250(exchange.)]TJ 0 g 0 G - 141.968 -29.888 Td [(41)]TJ + 141.968 -29.888 Td [(59)]TJ +0 g 0 G +ET + +endstream +endobj +1366 0 obj +<< +/Length 3039 +>> +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 150.705 706.129 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(global)-250(dense)-250(r)18(esult)-250(matrix)]TJ/F60 9.9626 Tf 117.085 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -107.346 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Returned)-285(as:)-381(a)-285(rank)-285(one)-286(or)-285(two)-285(array)-285(containing)-285(numbers)-286(of)-285(type)-285(speci-)]TJ 0 -11.955 Td [(\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(17)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.926 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F60 9.9626 Tf 160.68 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ -164.68 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value)-250(that)-250(contains)-250(an)-250(err)18(or)-250(code.)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +ET +1 0 0 1 210.511 336.406 cm +q +.45 0 0 .45 0 0 cm +q +1 0 0 1 0 0 cm +/Im4 Do +Q +Q +0 g 0 G +1 0 0 1 -210.511 -336.406 cm +BT +/F62 9.9626 Tf 240.086 304.526 Td [(Figur)18(e)-250(3:)-310(Sample)-250(discr)18(etization)-250(mesh.)]TJ +0 g 0 G +0 g 0 G +/F59 11.9552 Tf -89.381 -23.91 Td [(Usage)-325(Example)]TJ/F62 9.9626 Tf 87.482 0 Td [(Consider)-325(the)-325(discr)18(etization)-324(mesh)-325(depicted)-325(in)-325(\002g.)]TJ +0 0 1 rg 0 0 1 RG + [-325(3)]TJ +0 g 0 G + [(,)-343(parti-)]TJ -87.482 -11.956 Td [(tioned)-219(among)-220(two)-219(pr)18(ocesses)-220(as)-219(shown)-220(b)1(y)-220(the)-219(dashed)-220(line;)-229(the)-220(data)-219(distribution)]TJ 0 -11.955 Td [(is)-343(such)-342(that)-343(each)-343(pr)18(ocess)-343(will)-342(own)-343(32)-343(entries)-343(in)-342(the)-343(index)-343(space,)-366(with)-342(a)-343(halo)]TJ 0 -11.955 Td [(made)-355(of)-355(8)-355(entries)-355(place)1(d)-355(at)-355(local)-355(indices)-355(33)-355(thr)18(ough)-355(40.)-624(If)-355(pr)18(ocess)-355(0)-355(assigns)]TJ 0 -11.955 Td [(an)-280(initial)-280(value)-280(of)-281(1)-280(to)-280(its)-280(entries)-280(in)-280(the)]TJ/F60 9.9626 Tf 173.857 0 Td [(x)]TJ/F62 9.9626 Tf 7.997 0 Td [(vector)74(,)-288(and)-280(pr)18(ocess)-280(1)-280(assigns)-280(a)-280(value)]TJ -181.854 -11.955 Td [(of)-314(2,)-329(then)-313(after)-314(a)-314(c)1(a)-1(l)1(l)-314(to)]TJ/F67 9.9626 Tf 106.994 0 Td [(psb_halo)]TJ/F62 9.9626 Tf 44.966 0 Td [(the)-314(conten)1(ts)-314(of)-314(the)-313(local)-314(vectors)-313(will)-314(be)-313(the)]TJ -151.96 -11.955 Td [(following:)]TJ +0 g 0 G + 166.874 -118.447 Td [(60)]TJ 0 g 0 G ET endstream endobj -1111 0 obj +1362 0 obj +<< +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (./figures/try8x8.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 1369 0 R +/BBox [0 0 498 439] +/Resources << +/ProcSet [ /PDF /Text ] +/ExtGState << +/R7 1370 0 R +>>/Font << /R8 1371 0 R/R10 1372 0 R>> +>> +/Length 3349 +/Filter /FlateDecode +>> +stream +xœ]›½Ž$¹„ý~ŠötkìÿªXtÒj½ÃY}Ð-q’¡×WWfDFr±Æ$ɬªo›1d%ç÷gù¨ÏrÿÃÏ×o¿Ìç¯ÿ}Œãù¿‡=¿üù1Žùœs<³hÔñü÷ã×^Ù½ŽÔ}ì®c©¿õÅþVzê_=ú©þ>⩽¦ÇŽí'ž»!ïˆw È;\)ònÒBBZÃ!-$äݤ…5=ÖOÈztRZHLk€Óâ•/©ÇDµX=&¬·œÖcâZ ¼× b!r+“ÈÙ@¶xå €ì1‘­d‰ì-Gö˜ÈÖ²Ç5ÙGù¬|‡|7ˆ|Ç+_@d‹ùnÙâ@¶-ä»Ed‹k ²¹×И…D¶-^ù {LdkÙc"{Ë‘=&²µ€ìqÍ@ö‘@>Cqò)ÍY¼òD>³ì¬Eä3 Ï[@>³ô¬Eä3‹ÏiŠy´Ÿ…aeMò³xå èf-ËÏZô³–åç-8ZËò³=­eù9M€>åyÝœÅFê@l#mÄH?Ki³h¤œy¤ž1ÒŽ–FêÙb¤®+Ô‘ ¬ü|cîÇÌ3FŒ9FŒ™#7³Fê#Æ#ÆŒcÖH1ÇÈ!€•ŸïÌ¥‰ùŽÅì#ÎÌgƈ1ÇH=cÄ™9âÌ>âÌ1RDàÌ9°òó¹+˜-fŒsŒ3GnfÔ3FŒ9FŒ#Ƭ‘"cŽ‘C+?ߘëÁlq0cĘcĘ9r3k¤ž1bÌ1bÌ1fsŒXùùÎ<¤A‹Å<¤Áqæ! j¤ž1âÌ#k#Î<²922Á!€•ŸoÌ—$xe^à•õwI~WVß%ñ]Y{—¤weå]Þ•tw…ì®MuIt›æ’ä6Å%ÁmzKrÛԖĶi-I-+MBK½ð9 Ë1>úó(çÇ,Ým;V¥¯Ç(ó£ñ7P?#¿Óz_þq"ûýmó»ÜÁëw¿›`ùÁòz|ÛíÌÚÑìwM,¤…fÍ~F~§Ã:Z?kF³ßE±Öš5í©ž, Í]ËÌ`ÍŒf¿ cí/4kÞÈï4R •¹¡U|ö$´»é3Ðü.ßvsò°ohSaAB»›†æù~éhíèÍ~ëÇŽFhÖ´§z~° dáµ2š-Eb‡%4kÞÈï´EG«kmh…Ÿ}éÚÝt€E4ÜåÛn5öM¶|Š=aBëò;Ýhc“­¯bß(4k:@Èwù¶¹‹Eï/õ}3^<1sÁ„˜Ýatε©Û³>øµI`AÌ&GR€0wªÆIØ¿ÿüúOQu}ýçN5h=u÷ |ù§¨z|ù·õœÅîOïžó¶û××cdg¶ÌóôXñ½°ø³y‰ìÛ¶Ö +;ãÍï,_0 Ž 7k‚3ãŠM·à¼¿Dz,íÙ·qž<à¬åõ|Ád8¬Ðlåã…ëͺâM༿Dz¬áçpŸ?,pw SpeŸV.Å<î®cB`jgý%Òc±æp0©È>ðùÃÔxó»åõ|Á$8®¹,vŸÂõf_ñNFpÞ_"=Ve/,ÐÖWøù»­în`\òµmqeqÏ‚0‹÷H ®CHåàÆ&3°x×pÖ€‘œ-¯§,„[áú‰ù˜›&ÔÀd.¹œŒfÅì…e1,jÙ’µ¤07®Æ©Ø•@!Lqu aî\ƒFT7¡ fâJî–÷±ÃÝŽ+ÜM/»Þ·×üÀ"Îw·ÈZh!öùA¾™¿ÂóùÄý­ß˜?´Cœõá¾/¬ƒÏûAúÈ~—ö•ÒïüúÉi™Y“âˆì 1 ÃÄ<qVÖÆ¢4"›,YIsCk1#›0B3¡õ¤‹¹¡ð§ºÙ +U1š\o{¯Wµçlµ…ëé}ùëÑJ“ë­´\ZŽ!¶Ybþín¨جòþÖ_µ«Oâã;¼ª'®7wÓ›þàs×[i¸´"óØ]Œù·»©|Þ_µµOæÃûºªÝ'®7wS"øÜõVZ .-ÊÏÌW9p=òYÕþV<‰ï檶 ä뜸ùÌõVZ .­ËïŸÚÿŠ'óÁõ¦6¨¸Þ]/ÊœÁçý‡Ö†ªp.¾Êy ëÏú§¶ÀâI|t½©=*ù:ç…®>ë?´6T5|t1䜺îoýS»`ñ$>ºÞÔ6×»ëEi9ø¼ÿÐÚP•‹«+󕘸ø¬j#,žÌ7¤ºø:ç…®¾N}0ÿÐ qÖ‡»[”̓Ïû§öÂâI|—ä®ç×ONËÌê˜Gd±J³0|Ìó'aem,J#²gÙ\O…Ÿõ¹¡µ˜‘M¡‹™ÐzÒÅÜÐFøSÝl…ª˜ M®·Õ}›v¬½•p=Uí_^K¸žWƒ‘_´FCl³ÄüÛÝpÁf•÷·þ¦±x+¹M;V\﵇ª¢žçý%%ŠÖh»‹1ÿv7H>ïoÚ‹'ó¡jÛ´cÅõ^€¨ªLàyÞ_Re¢h†xf¾Êy€ë‘Ïú›vÄâI|¬Ð6íXÉ×9/p=òYI剢5šÇîbÌ?8p=Þßú›vÄâI|¬Æ6íXq½—"ªjxž÷—T£(Z£!^™¯Ä<¸ë‘Ïú›vÄâÉ|Cú`á|ó×#_§>˜_´FCœõa7ÁçýM;bñ$¾Kòˆúƒ_?9-3«cR‘]b•f!} ù‹“°²6¥Ùmd×Ëe×–ö¬ñÕ‹oÞöÅã÷n&´žt17´þT7[¡*fBK®—ÎÁx ›ªZèàÏ]-r½SkC”yñÔŒÏòÍÝN.xëŸÚ‹'ñÑõ¦v¬¸WU.ð<ôkm¨S1࣋!ßÜ-5Ÿ÷OíˆÅ“ùàzS;V\²«Šxúµ6Ô òÁÅÀW9t½©ò¦øÊ>¿t½©+ù:ç…®>ï×ÚP§]ÀGCþÁy ëMU0ü¹ÈŸ×îzéÜŠÇp±© +†¸úµ6ÔÉ–O¹¬ÌWbàzSE 0¬àË®—Ψxܳ>P…Uƒ|úˆò¬Öhˆ³>ÜÝâð^ðyÿÔŽX<‰ï’<ÂÇfÔ1â@aÐMŠC5Ú¢"mZ½!qVÖÆ¢4"{^›ëéŠÏúÜÐZÌÈ&ŒÐÅLh=ébnh#ü©n¶BUÌ„&×]Õ ÝÅFS5Cg_QUÍðӂȯZ£!ö²{U5'"ýÈCS5 žßÛæzÖæI¿®+®÷ó'MÕ <Ïû«Ö†:%>A©ªfè\dðy׎X<™§úºv¬¸Þ¡4U3ð<ï¯ZêD ùp¥ªš¡â³þ®±xOðuíXÉ×9/<ŠRUÍÀ‰¿|p+q¥ªš¡³¨Áçý];bñ$>žÖëÚ±âz?ŽÒTÍÀó¼¿jm¨“~/žú[™¯Ä<¸ë‘Ïú»vÄâÉ|Cú€‹‘¯s^x(¥ªšá]ú€[!ÎúðS)MÕ ÞPÌïms=k_’} ×ONËÌê˜Gd×X¥YgSjT3âÔr°-J#²{Ë®ç_ŤŒ¹¡µ˜‘M¡‹™ÐzÒÅÜÐFøSÝl…ª˜ M®÷Ó[ÎÏŸŸåùËãý¤†¿õ8šÇ÷‰<ÿÓ”ÿüúøýQ=Æ×oÏ?~}üøåzÖöüúχÿeK}ŽãýÜò±ž_{üôCýô¹Ï£üÐ>}¥”:~ü<ðóÄωŸ×§Ÿ¿þåíÝíýŸšÇóë/Îñé뿟ߟvñvÿ®Ý¾k×ïÚeoë»öõ]{Þí·¯??·~¢ûCÞŸ#²ÚaYïá÷…–aXŽí)Ãüã—÷'W¯û³êíãýŸ¼žkà®3Ô{οþõÝ*ŸþðøÓ×Çßßÿþ{Ü +endstream +endobj +1375 0 obj +<< +/Filter /FlateDecode +/Subtype /Type1C +/Length 13073 +>> +stream +xœºwxWö?laÏŒ˜ r‘G¶5h†ôB'ZBïL·1`pø˶$K–eK²%«Yr•-˽w Lï%„ ”$$$¤m²›Æî{½ûýÈ–ßû<ïûýãõ<~4£¹ºsçÜs>çs +ÏÇo”dz%!)&cÒ攤}ÉÞ뉚ç7Ê#ñ-VyRŸ/Å<ÌŸâ1¾ÅcüNŽc~G¿8ŽfüÇÑA>~<Þ¬5›íonݼý­ &.II•§'ÄÅKÇOŸ:mÆøhùøÝ¿4&#!.yüëÜIVÌ¡”Ô¤˜déú„¤èÌŒñ/ž<~sL\æ¡}éÿúøïDÿÿ¦æùsïæ³Ý§•À‹åý<*Åw¬ïjßM¾}~´_¤_¬_;ÆÃæbë±d,ëÆð`|<þ +¾—ãýø)üþ þÿ'‘G˜;?€Ì·ò«ø®ÑüÑ£•£¿'7ÇÉÈûà=°,Ù ¨­c|Æ?æ1³ÆèÆÜ;nìkcç]4vÍØð±ÛÆÆ-ûpì+ˆ¤ +ä‚|ApQð‘àgÁßÿôçû ü—û¯ößà¿Í¿Ø¿Ä¿É¿Ó¿×ÿI€0`I@x€2   0 $àTÀÕ€Û¾ ôd_ |#p]`Z`f 5°.ðbàõÀ? šô~Ðò ¸ Â ª Ú æ Ž ž þ ÓA烾 ú6è7¡¿P('|S8[ø¾p±p¥p¿0Gh– +Â&a‡°GxEø¡ð¡ð ! ö ‡³Á¯O^¼*8|=øqð“à?ÿüwʇR¯Ss¨ÔJjµ‹ÚO¤”TUBÕPõT'u’ºI=¦þLý&%"Eþ¢0ÑK¢·DSE³DóEËD«EE[D¢ƒ"¹¨HdÙEu¢ÑIÑ%Ñ ÑmÑ}ÑW¢ŸE#!£CD!lÈÄi!³C„, Y²1$2$>D’b ) q†4„´†t‡9r!äÇ!C~ù-…ú…Ž„ +Sðˆ—¢ÒÑ+F~*þŠÅhÃb8™øÂP©®œÃo€J)®ž«RÏ2ðÑd˜Ž·›¬ÖVÒ8äܪêéü&¼¾‚´Êð»úŠ\óþÈÆåàY“R)Ôz%-ÕV ±p;qFߦldz3÷Ön¤'-ÚµVZ›ÝØXç®·mF;k(5:ìâ–ÎúÃ'[’·2ë 4i½<o ?3[•pP à?žÏ¦P8‡^AQ“:§œÛÌ®½ôôÀ×4Ü ÇAFý%ùÉ–ËìGkgõN ãüŽ—'ö²çvuLB@¼Q½8©êÍt{Ð`‚;èã'Põ‹p¢'œÛÙ,\8.³0O­ 5 +«CÁ +»ªbcé}{ÉÙøCò}ÛÅó†Â¡ßFæÎ3û›bø¥¹¶¬tñª»O[w +v2R¼Åcõ¸pm­©¢ÊI—×jk¹‰”ýƒ4äýè÷Ï·Ü@¡wØ—/î:/>¤ïÚ±#²¸¦#±*­z#§ÈZ<{)2mbиï߀A0èÇgœv…Nù³¹)Ô—' £V.<Ûx8‹M9“sûcñŸŽ_»Ã€%dMù$ p‰`¼ßiÉ©ìÃnâØp}E}²ñÐ*ņÕêœ]E|%<à$vÙò*Œ'ùPNü°ç¢݇kV0­y»,!o»2LAXìp¦¤µT¹‹E&¸1/wg@žïó7j©KvYr7Š†%¡PØtîÔw⣹}imÌ`̬æÉ4j“á÷ +ÊòM‹É|ÍB_Ûk‰ù6u…ñ.î$.˜a>ºŠÁ™øH„ç ÕZî8 /í´3k_œŒÙ¯›`àƒB(Áòð½¾HGë4f[ëJmô{›·/ ïÜÿe{é Ö“]“qH¼'1eÇòøþ_¥LšZ‰—Ù­æ +èår½ÞÄ€¸¶ÊÑif¬eŦr¶uɈaÎh~F/Á´%-&ŒïäçO-äÖy¹–˜nÎ-7>àÃm|f]ýº¹þ´5 tyÆSŽÊs9ÝáP¬aÑ:bòðÖÏÏn>y„9r²öÚc1$bžìLÏÎN—•åÔg3-%%¿"£äÐAñÛ›–ÎKÉ(¯U3y®ÂÆa1Œ!Ú9³®b<ᵈžÿ|yzˆa¸KÇ#_Þ›†¨BþJ|¾¹Àbìå{’óÝÊʇV> p$xý KÀaš‡]ÃhÊc*IŸ—¥fTé©Yt|J÷QÖJ˜® õÃу|p¯z (E ÑgªÎ1ǣݨ.$¾v8Ošä$:@§èó³I˜U“ƒÇ*+ûŠX¸º0W"Ñ’8Ó% ß×£{žM½òNÜÂùì’eû'¼!Füþé0K?yÿ ýëÓ¾»wÙO>éÿî™øYì·‹î2÷Në{‰F?ŽQp œÈ[`8œˆ&¢-h3š„&£ o½ßys³òøÇ߈A¶d“Äl3›îJÀR@yëÈ{ØHðwäEÒ…äĽjU!wßš[a¼Ç÷8ÎœG¨SÔ#¿×ÁùØÈU\‡æcxäÕÕ@ÂÌ?7R„gækféù2àù.¥’Æ'¤"ŸB>*Ã=K=Åœ# D6Z€5ã5¿ô¶A‡iøȪ‘¿bJÏU¢.ÅÄ%ÊZ5 '¿ Š7Kµ 9#k½z·¤iÎ’?ý×JVP@ÂQ*éÎldžΟ:¶…5%P=íIqqÉÉ1ñÉm}½ím½,ðµ‹`®Ï6í³¨Ë}ap-Ç£ V½Eo) +sh,:½8?_§fdJ˜ìÄ÷XÕî;&Ám¡m-0úµTß0„9Ñ^1`,/ÌåÆæªr"˜g6Qh> NÎ߬ K*ÀrSe©©Œî.Í`G²㶢¢¥y|ð¤ì?¯S-vçVpýûv×3†Õ 5ŠRÂU™Ÿ›_˜¯Éc9 ~ K{Š›¾µ•ž²ð]5([áÂOÙòÍSùÓS _ÂòË eâªrG-Sê?fêVôï…£( +Cs±]’e tFÔÈñ<­Ía·Øí¥, €Ó0Á{¸ø©ËyÎʯEÛdD±\—#Ž]„F½ŠøLŒÂoW:/3.$“—+•ËX•E¼ +ù‹à¨>qM¹­Çë3¸ß+¬U^˜W“qzE…²"‡;”Õ™¡h5Ê[>#!-Â&‡%."ÁœWâ;í'®3P7`°$Wµ„@Í"A éYå¦à›«Œ—ÚhDM}Û]±».†­N­Tv§.FãCûð ?–ÓӠЋÙê“}lNcnÅ¡z~âªmkyÔ1½gÅÂiE{:“ÕI¥^R_üHüÉkwN¸yó&P¯áPˆrX_•ï +;žÓ¸‹FÄd4½…‚ïLûí“‹=wŽ°%1Žl§”¯(/Ï·Ó›Ùbg¿Äz ­¢À ¡÷'÷}¢`g¸(Ä ¦wÄ +GCvð‹Û” n¶JêH¯{Ÿ/°pÈ#'nÛ2¿#F‚BgEç*Òiu¾µJÆVeçYtb¦<5®Szúèáê¶6¶¾¾´²ó\í¡B>éï¼næ»P†œ8Y©•j‹‹óÓÙ‚L$B½ˆ ˜ª9¿¥R\ç*ëg*]8(îó|ÙÇûýWÎyjìãܳ® ×Ø.÷=ýIüMòÃðóÌG+¯IhÔ Ãif‰q±Z½Pχ›=7¨Ž2Ç‘¯nä$ 3·ÎD>bDFÏšÉ$ïؤžAïW—v²àeÌÕÒñùåÇYÏ BoŽÌIÈ[¥ãôßa+.¥ûJ5Ñ,šJtƤ–ï!Y ½Œ‚ïÍ€¾úÝ'ºÙ5„µ¾qC®PZ­Z“Çd+Ò4Éô¬­ŸrêwòÞýsÑÛXÐË"ŸXJ²±E<ØÑrübûÁi ŸŠØA[5š™ãÀëœj#[ W¨™ÍsößÀrk5eâ²J{ c¯F‘Üc…&㧈:¶8tÅK[f¨Õ1ú0™Ëskòˆì¼’ +‡Ùæîd?„¼[hV!/Qæ‹só´™L$n•÷óQn?§à\ü’ÑfXÆwA7-ÎJ 7ñÑ\h†“ðßÌéåÓkL¡ø66b”ãÀEfå¼M‚zYŸÝn2•0à0i+.6ß•Õ¼Ò2¥`íО«Cƒug®2 Ø wzh7ÏC<ŸF©q”7rdÿ¤<Í*gIiµj"NZÕ`·7–5±GžaP÷¿U´æ(ŲÜ8&G;\¹xTZÛ×qì0ÔÏ0ÿo(wÉvÎÒ;dÄ]™Æ´ˆþã€v™ +œ ‚‰€„ösÙdÜ´£$ÃÂÔêË:«Þ¦·…™óšR½5,¹ùP»Ñ¯Sd»t´žûËmÚßÇv¨Éj:X¨ Õi±ùoEoÊë,E%6³Éfe._:Ü÷ñU¾nõì¦ô–u1¹á^U±™0³ÓÉ¡eC©b/;âO,(JÏËf4:UŠVjìu +¶A®*Í¡¥rEfâ€ôüÐ᪦¶·³þÖ#1ðúó9®ŒDKJÕBZðݧr +0ñ6š{(Û¨ObáJâvµóÇÄ/Q¶–Z³›þÓ5§¯]?-+ÇRÏÑ$‰0à+¯fªkP–¢¯”ü!0LÖe¶€µš`r'$zqÝY{ÖÌpû'€ob.YÁdNmÞÐNÅä¨ÔRZ•]R*g3*0ḊŒŒ’LZ81E"×Q^íl„8YUØD_€A¸ŽZ³™žÎ!GºpH­ýø-ñæ¾BQè£7ž;Ý2Üͺ+Û\øè £n:ºûOѧÛcVYX #Éò³¸€&l©Äó澞7á= +%kÕ²}FnÄäPæ"ä&lŸY]{] “› (B°kíj¥Xž§KàF÷)Æ^k«)7fÖ§gdffhl +»Œqï°flÇ™”Ö}5|‡LfÓÐR…<-Å©l«(³”6²Îk¤gËÒ3Ü\ çv76f»¥,'®‹›/yBÁθ¸ÕJ¶ ÷"o‡»p-d%ðÊC_xÆRÓ Ç„8Â+WÀlÎMí´èKßñŸ¶Õš†9ò-Z­¸@«W3r˜ü‡·çvã8ÙÓØÐÝ™Y«Õ:Æ 7é]‰£«¥ç|? èrg¡ÖÉ6äa O¢×/Ý>•Ñµa½DÅgHÀšè̃\¤q{óO?ß¾ú»›‘Y°÷Ó#¢WÒ+wœºÕæ¨kè`UmÚº½¶¢Î-­ŒOÏÔçƱ€* ÷ªc8n·Þ8Gº$—蠟-\ŸT#oiuÖtX‹íÆÖXŽ•]憚£tcmÊÎÚsl‡¤bÃÚ™qàØÊd —,$#¬Û$î…Á’Ø­±¬@AÂK25Æ,–î*ñYYk|¼,e[®e¯9‡D?cŽ%TbZZbbsZgGssggZó!?RÝÙö6ºZ[/óÑßÏP¦Ø’Øò8¾‰“ŒQ,_›40|¥¥Žb>?G} òÞS¨æpáþk\¸oj·XZM|8îÿ;ÞXÇ áÛä2 Øÿ²`a„Ïÿ+^íÙå–­¦Õ3½xSDÂÃ.BjÂÀú=Êìýâ §£ïß?5xýXVëò“LêZêpjUö!ñA©4>:µýDWEC ö;vQI8Q”$p8AÀ©$Тä‹ðz<8`k¦…Ÿ'Àóhøµ–GâhªÅvÁ•Ónsž£K)Dã†+°!øµμ|»–.*4 +ÙW·b ]r‚$²Hà,kV:Ó‰£SS£åyÖêL¦J–gË£³•9Y‰íò³Ð÷óKàhGMC«øDdïúuRWÄ3Mrl¨Ã]ÙK÷ÔÉ’bÓ2—IYAÖ{$ðØŸ_šºYgÕš9Ÿ09R+y:bÇ•ú‹¤>³µ½¾¾­MZŸÌ +ŽŸmâ~Þ™>•‰yä‰&n¶¨Ú™el}4½+*}ã&|$™z#Â( +½‹fr1Åø.šwrÇl8 î€;¸«¹h+§¦(ÍE³Ñv´ÎD³¹Øb7à]¸FqCf¡(nH#ùÂ"ÏKn)¢ìƒáÀ¶†¹(¸ͨ5X¿èêøÒÆi{TÔÖð£{€'ŽZž Ñ/:dW…–Y1³½Ää Ûì¹›Yt÷ü‘}íõdžÅO\E~±)ºœh¦@ª)Ê£³4¥µEly¿Ây¸Â!øµ›÷ø¬{æ úȯŀø¸ºj R%ß ³ŸI5è…h2Ujdb@Í—€ç¡T½;UŸ¶ÉÌŽ¼CÌÒ¦eæiJZf(3·<ô†+ ™TÊJJe^äljm¥¦†®sV7ÝÙ2Á¦´]qlRt^¬n<%¯H†<¿æµB.Öžü憓Ȃ‰g?µ"!¯pQ÷†å k·p¶ºø=¯Ãñ°ÙeÕÁ , O)Ì÷æ1ŒÅµûY×e€j¿Ð{üÞÉh4#ÜFíZ¸$rso;#ÈV$drŽ&e;yf(± $A¡¤·JHPÉÙû/`äµ›ž¸‘¸U„ÀE,Êqâ. øJ˜è$¶Z5¥gÅžŸ8v•oЩuaà Ù~¿Žˆ»rk22reà°„ pR|}ÛÑ÷£ÔR)“%ÓÈrv7©B‡ëÁMÞ7¤'&Q#q€bþwâ‘@?’{0@‰=ŸöƒÊÒÊ7HçQgH¸·…Û®ó Ñw–Qk¡ñ:!ym•'η + Fì¬Á)[+nÉcÑ–G¨Ç>?ö)Øõ˜ÅNC–Èi +õ!:+ËVª`OL—Û£èÝß°ÛL@'LGÜ?wze;üHhG‰ˆÐFÜxûÅØpvˆ®¯(Hu±àÊì"œ(rÿÜé†3(OI.I§wïÎ8Â&íÕ(|‡f‘¦øœ|N¹Æ_CZüþ®¼ùO%R¼Ø +¼;ʪaêÖÆÂúá‰Nð{±Ûóí0–<ó…ïŠ8\â|”ÃÏQ_e®§¿¼°iæ»Û¶Ì“ ™X/Ä2£RÉ PQ¡ÆMyTPk?ôÝ9nÐüîWHóBÕ‚—Ƹ°ß®ÊÌ/L1†¡±ø“/±«à)Ïï%9áÎlim¨oÍ#SY çTLZÎ@òbŽ[N—,žÈEøÇ@wQuÑ`_ëvë+èúj¸_1pø}¡çàŽˆ¸ƒ;ØD€N่Ūq×ý–¦|¹|ÉÜ-áÛoßgRiJ2p‰VCq¯³ÝbÀB1|…ÄA9•Š»àÎv8ßͼ»_Àõ_ø‚nˆ{æCÎByCžw†8ÿ}ǃµˆý|ü³+͹yn@Á·Hè(øˆ,ª^Ï?¿'AQ4¦Å}Ö[5¸˜ +Âx±¼â’¹Áêg7Üzòú±÷â®g¹Ç„tƒ“0„Ì¡îÕyº®Úd=ÌáQº"Gª3éÌ:Öª^±ÉœWaºVA€¯AâÒÎë`Éé)h{aÉq±mÉà_鋂İíòO^8Û tf'1Ç’Wþ‰~€Z»ˆÇ=ö¦~öŽaó é|ùˆ/ü€ó¸c®ž\@y5|$ŽÁI5@ÉÝp­ }äÑ|ä Ð<3 Šk<Ó8‰]ò¹®¥¹ÐM?ô p .§<<o„Ç;*£߮×`½-à2v]w¯ø>?H;€üëY5߇„•LØ´Uó3Ù¯ @å1}$$sëŽ*Ž†Ž7êô™,uëõè/|Iô<âyÄ?"pð¾v®î…KZx@QîæT ^¿7Ýßò·x2–]½744´µHSXAC]{{zc+(5•Óàù«ÜdÃôcàCNgÆÌwgÍž3ušÐ'؇òù„ø„ú„ùˆ}}‚|€·ªäç³ÃçSÅ+æyF½2Êéëç»ß·• ´Å÷= ÷xðÛ'¾ž(¡rm±›ò´‘œÚàð=¸{úýÀý‡âŸç>xeËöÌý±Lb‚2Q±´NúçÃ=·9Û<ë½ÝSfMgÑr´ S{Ä„@Ë©Ô·W‚àÇbî?‚“àcêrƒ!¿½.í_.~7|Í¢”l{c,ãJVØ´T©ÊŠ?šyûþ£æÁaöÄ`Ó¥ÅgrŽ§t0Yõ²Š½µ|á—·N4õ¾öüä{‰qLRŠ2=kƒ³0ôÞác×é›C{×Ä)ãÓÓØ$îΊ >÷ZnøÆ Ïûî xìáüÂï=‡`õšgÓ}8² [»ÑE£÷dkcÖ²-pôƒ>pZ3E£ê«“KP u`ý²w¶þ§œv´ºlu]Gi-и=wïñ<[9O1‚ãÑ5ƒÖV^0…Á$íùû‡ñÏ_öJ£õ¹ 5èÂýe÷`ÊG[î À£‚úù\÷à-ñ㥗_çBœ•SW×&?›ÂŸ"Þì¬äåâ·®„8óàæo†ÑØoù«ÔÝíqáâÕQQ«ì¸ô䃺ÁËÃŒðÁÉÔôKqÇ/ˆ/¼~ëèžåëÓö¯ˆbZ¨9êñkåµç{9“‚~•çkoäÿ~ÿ*Òb8g*DLÑHpg.ô9¤zð,f!ßñhb^†Ï©ìÂü¼|Fš£Š£D|ÁÅê+uͬ»¦©¼þêØLô˽`óó f^ã°òG_h>›Ú²/'.–݃4ZŒFu!þ‰pfOßéŒKôé£5=½ì¥óýNJᾩð%Â¥Q_CŠA1{ÆÏœ¹çoðL8G=a½zø<€Ãª„'¾Á6'>_Q>ld9:P`F+§b*Â9Ðh/¥ëJ•‰ìÈN°V¡Z`ä TUžme0ª +ï'ƒ“Âç±Á+ÕdµÚ,k‰Õj3fÀê¶Y­î¢Ì:f,Oà5ó½>Gx8ï,ïþ¨M£®Žºé;Å7Ê7Ú·Ø÷¾ïcßïüÄ~‰~R?›ßE¿ë~ëÂNcÿÄq|4.Ä_Çâ…„¡& +‰ßùß¿0zúè„Ñé£?ýýè¿ÛÈDò6ù|Jþ Ð`&X*Á1pÜCY}#úE„DÿB†CÆ…¼òzÈ„ç^sGJþ@ïÌ6¿ÊG Ä +]Œ"‰‰H]sh!½tWã‰TVÞ©í».†wOJ>õ&šzeÄ£?Mè-úlœyŸôÜ`” †U„ùYyÕg¥|ð|[-eÑ”ÅßCfÏ¡Ù8:0¢^ƒ^WçDÂdpW-‘]ÑVa©²U²Çáh :ñÁ XEª=M%NWl322è Fr9² + ÈzÔŠ8›Z±7y ½yoÛÅdVÞ¯í4\ãëxÁš¶ôN~oÒîºíôÎHyB ºtŽ1wfBâOŸ\†>GôN¤–w÷]¥¯6¦Ì¨dÁƒ–V_ÏAO(50ŸÈRYìÙl¦+KK±¥Ñ“-²¶9æÃlZFV¶:¯¨0,+S—§g–çÉf–lO;+Þö`ÏŸr†éï¨éì_‹\deU‚m5g”V‹Ý]=-¦¨·Ö`¦û;[:Ø®VgÏ)q›¾AÕÉ íˆ˜Òv?Ô¦¿éÊÛ´à¦onj(ufmð{êrkÏñŽfER5S_µ_ž¿{]ô?2% „ó°u8ò¯ºq8ö7(xüäû|'ü…rü¡ÞQ`^ÀGï‰R£Yop ¼þ‡Öo‡N2'^o¹E_8%‹ëgÛ“«ÓšÖóð‡eâÙ:yn­É±Øå¬3*ÒEsaç+è ´÷­–…6²ËoþGÑ_Á€žžÃ,ÜC˜¯–W\´ò'ų È•xgu~N¡QW¤eßFmˆ€%˜®ª°ªLìrÚš'ŠVÍ…5yÄ‘1´"Õd´iø­;«c•âézÅ!&]™%SѺ|oÑÒ.—[sè ™,ý`GúÑ[ÇÏÃ×γp²gbegyY£% dJÞ&ïsÏv*fr\–.GMç+-6%[³k§#’ž3ûЖuì²u1SÐ(1Ú ÃÐ8GI¹ [ºxeÒlzm¸»/»åä¡o Ÿ®úžƒó×.žÉ:p˜éLt¦Ô®æ¢I_eÓ–zK°(´ )[0%á0aæŠ +SÝXš³‡EJ°(_³¢ˆò'¾¨%ŸªÅç˜UU†{PHš?©¬xláŒè”ŒøR_®1KŒ Ôy³¸Á¾ž—áTjYD\ÆfzÊÚO!ñûùÛŸõVjbÊØ’L,ÙáRÔÓÍuµ –˜µkGv\4»sOÊŠybD=š }®îèeZj[Û/óõa×±i$hùw@ò©¾Ray‰¾!F( C døѪ‚ÈÃìÈ:ÂðR®bŽž/ƒêZb¾UYgøïG›jøJsN•á2fGNµ–”ê5åŒÒY¡¯¤5­]Yî„})™Kv²ßqqlxfœ*\–CXªJMtÓ aXÃvnm\½âÍë¿+g%Ï|¹ˆ?RâM‰µH®‹=‘1’zgžw!MÉ­òI*õ„¢0Äà°ÞóæD?EˆÄ\¸ãã*ç5328ªy SzÆ`™£ æ9ñµ–¼2ã¾ç¢Ð¼#'6o©",§„0•W˜tg™*’I#Œ[Uªu…œ¦÷Ãôö>XÙËó¼üßç=. +¾4r5›øÀДŸ)Öåéòd‘\¯ÎÛ«ã+¢ÖH쳦•;Åõ —2­g1¸MÄ Ëµùkó†P™ÇN Á÷° |•*G~þüPšÕVÈê-E5b˜_#>6Ôf;Þç×ÁÜ,\;/[ù²|¡o0•ØÝôW8|­I5µ‚9Ò)ø¼ƒ¡;HJOOLnJoëhnlïHkNbÓé¬vV—•^¹ÙÞyûÓFç‡Þ‚_¡Œ8a,×g‹ãÞæ؇ ˜@|PíºhbÌ¥Å&ëBƒ3$pðÉÅ\U¥o`‹Dð9À_¿Cn¢×$´¨zQ)°gÔBN‡©ŽÔ;ëú®nÓ[ÍF]b±”TF_šÿg¶Èj,Ö•6uÕÔu ðÛz›WŠmz›^—-ÏR0Æ¢CIÉiZ]6¿xr¨6¼lsž7ÙëĽªÚ´T•,.ˤ« +g$$Ï_žNí—ŒÇz×DU®¢ÑVŽÅ½‚ö¬q®iÞɺ⫳ÄLDlèIüäc±×ŽçÞ¢á6.î} ü õ+ïQ¸£ûù{×PÝ«+ç‰Ñæ¹h4 +Ý]{b/“ÞC%ÜPŸÿZ Ã?‡†\”íb@q삵ÿ­ª=¡¼U3ÓB>Ú6{ÏcMü«|&ƒ}ÄÿZnÃáStB¯Ãl¼£Ì1ðÍÿ^eƒø:!ˆ!G"‰eFYɧ|¹­>DYqø.z«ÌµèµbÞÁä+`tÏΨlJcQÜçÀ—;ZZ*Ía5(\QAÔ•iÚB­Zâhögâ¿ÕDZ ÌS¼ "ÔÂF xîõø)2üŒ±¦ C¬)Tk” 'é,ô&LÇòªuMãÄp¥ÈQ˜!Δ&mÙÁ eh&÷¦OÊJ?+ᔬOF<З«Í‹ɼ¼÷½ÒéyÑœRn¼Ç‡ˆæº¿¾Œ=˜vy†X™«K62ú¼b£š•ÁKµÄv“ªlêOH-¡p3ÜxëƒjÇ-[X-rà# ·N"ÿ`’>•ÞÖv“m!¾¶;O™¼Uð”&2•Øg‘—Ë™¤Úœ‹ô@­ò 82íû«Ä“㤶–êíÆù0‚x}nÎÆ}v3ð"¡·Ä«Ó ¶ä„¥¾(â[ÍÅ%ôQGÁ^v$‘0,ÑlÖò•ð“Ød+(7p?×ǴצO6Å-cÑ.°4?o³žOx«xÃC°×Íûۯ𯾵"ØQ‹/,QWïða4¼€¶;HÏFb—mCà6îddY- ¶KRS^7`^5µ&=+5Ö™ì*¶D™kÍ£S¤™éI­™}=í•-Ílcƒ»¿ý!À»¡-wìeö2Sh›$W/WÅ0sQÒ,˜†ÉO­å⺊ò#ã¨A;¸ÓØ­/.>‡$a@FÎjß{ùdã™.&·;.ûÉÖ?‘@wpj«¥~˜½Ñ~øÊ-ña•·áHÚ”YQÓåt–9«ùBOskÍ‘~ñ…¸³;˜Æ½moÓ»÷Q–ºRSýÕíóæn˜¦P›léì?[c™ÉE Ÿýzcë;³Ömž–‘k¶'³‚ÿTf¼€øE9ÎÛ,€·`…Vk•²­&Ý[¼:í"RMGRz‹W+ø*ÔŸzÒÝ~ÖVn¼eñ:¢ÅX¥‘‹Uú\E +G¨P2Á}˜¬½ ¶Z\ï*ëcª\èœo0œÒ6dY^<6™ýNMZj–"á ù"\Ø«7ºžü$†[9·ÿ*ZÃPpÌc4–SíUó@þhôW6Aê˜ÓÜð"ÛÙÖPØI?îÙ’ÄzYÿ6IJœ7J|ûoãOP”ãCzk¾yAKáâ)œ ál)N•pß,ç¾ùÒî²zí+¡Ìßaà+€ÚcV;ŒÍ|ø#¡² r &%J›ª¬VºÄ¬ËdQa”j´û ü NxsÆg ÕÞœqªDJŽý)jˆJ ”âÿñ©Ï,~Vü îìÂn&Ú•mFA&>:(Ç_ýËðÏtåÞØrd?¥ÎJSéMZ¦G“SF§fJS÷ &]‚>WïÀ@ÖƒæOJ½ xÖœÀj< ¸å÷ÊšGvî‹×nÞ¼é ´( &i:=‚+AŸ|3Ÿ+m(‰‚z¾Ð¤.3ÞàÃi„Pv°·As„†à[è_ƒËž¾ ŒS"$_‹ÁŽ"¯A„“èœ7¹>…”‹h9ðL¡ã·:“—–³5xd1ô çfm&Î}ÅÆ©’¯ÄÀâ2WšKùÕ÷K¯´‰ëe.i†L.UTMs½Í€m6Wö Ýý™ƒ=œ°xÏàð3_xÞ¢à0ŠÃAƒ´µ­¡´H€ç\Š¿(6Ψõ6 ÌÄFŠ«IxÙEL“UÝ6²ÀT^ΩSc•*óPZz¢Vo²ªY›2ßœOgæäd¥5(:ï|p÷Ñ™”öZ¥ÑÂTR%ðÀ]‹œ‚ŽƒG»Îö1ÙÕX*0“ph"ÿ(*¥lÛ¿ðŽXG~ÌAyìgÒ³h ±ù@Á gPìçP‘uý=,ÙùåØI yÜ,ÞPäêŸÍ=‘G¢¹Þ´<œûǧÙ{ÍH¸‘ŒÄ!œŽïßA +à= +®B°ÓÛ]9â¥n¯·á6Yâm³1—ü§,PKÊ8ïKüËûþ;oŸõ76þOCáÓ¿ÀJÐ~…ÚÝ‘ÝÔ&nk¨ëê«ÏŒ=¤â8'?“ŽU¶\gA“»º£K|cKßä”\CA"£ËÖ +è\mI¹‘Uáè ViÔ[Œ)HuáÖܪóbøùâ'‹a«‰E5„q9ãÓ½%Ý•‘}£³[!÷ W~` d±Æwó½Ô ÜôêU-þmùÉéOŽïY#d'@5áåý˜£ÉâtŠ;ÔÕÒ,•<-×–Q’ÅO8Õyk½PÙß,nTÔdÄ+¢_Ñ1úyà¸oÕIóï’„À£*‚!žQ0„7À0™´Öÿ 8´JMáv(RÐ÷*÷s†³Á/žÏ¦vã&RpÄÅ!ÈHÜHÜͧ«FâɃ¿]Wž$Ÿ’‚Žòg]Qõïæ¡ yú½%N»©”Ô­®¡ÿ U…··OÀû›ÄëC’¿ð\uƒÏÏnÞ_~å¶ÏCðÀ ›*¤é%™4€Éó¾÷vÜýÉÜ¢%‰كœ‹l0^2”«›ÃFŒ~ 996®=¹¯§­½§/¹ ¼`ðÍ/üvà Ípć0µ´e4:GÕ*ð­Ùîó,ÕÈ-¥%¥|€ 4×TtÐuuúü$5¥”…Û‡+ëJ\Þf§o´Ë[q¼æÁ¯ñ€ˆC +ðÓE€Ï'Ãl>Ž„hà,@Ñ[hZßBÁAñ¾ÿ¸Mrïë òeJ}­×xsi@ŒB¿`Ð_ ðö‡„³À£‚)×y`mq»GÜÚÚúz’Û½bøáÙòMà‚æ| Ët„Zû€®ˆ)Њ ùÙŽ›èuµNÒÁŽc°©pÊM”SWáo3„©°r#€«½å] a×5ÄéÊÍ¿+Wy{ê?q*>;^›Z9ëÚÕ —ºyÞƇŒ¬pºˆ‹€'óÑ‚ó\©h"Õ²¿k¶px”’¦ÑتTŒÛÛRà)ú§°oǤ°„p!ÈUK¹ÍQìÿ,…KŸ +endstream +endobj +1376 0 obj +<< +/Filter /FlateDecode +/Subtype /Type1C +/Length 11578 +>> +stream +xœzwxTÕÚ/CØ…½’I™Ùf³÷F&X*ˆˆ€ô -dÒë¤L’I&½Ìd&½·I2“BHB „Б*Š"¢¢Ç‚¢~õ¨krV<÷® úï»Ï½ß÷™Ì“é{­w½ëWÞwI&M™qxâð«Ã˜t¶t‰t·t¿4Iš*Í‘vJû¤¤Ÿ9Nv”9.s\åxÄÑß1Â1ɱƱÉñ¼ãÇGŽß:N38-rZéôšÓ:§ÍN¾NÉNeN§>§!§»N÷œ8}éôÓÿržê,u~ÆYtžã¼Ðyµó^gogµs–ss›s¯ó€óMçÏsþ§ í¹¼ä²Þ個K˜‹Ê%Î%É%եĥÅeÈå¬Ë—.w\î¹|íò™Df/s–Í’-‘mùËÔ²Y¦¬Tf–õÉÎÊ®ÊnÊÞ—}"ûRöDö£ìWÙ?åùT¹TÎÉÈWÈ7Ê÷È˽åAòpy´<^ž"7È«ä-ò>ù¨ü–üùòoä?Ê“³–fØéìv1»†ÝÄîc°¾lÍjØ46‡5°El9[Ï6³Ýìö2ûûýšý‘ý……5c"{'·>’wKNYŸE«¬ŸR}Ј C7«#Âiš×¹­ •"̦ ¤ßÑb3|E&!Ò/M2Ð0‰ê©,…+ΡéÁ=Æ? µs€õ6¦Têh8Ž<¢©³¹p*NRÁÁêÔpîX’霿A»£©þŒb8•A…Ô’÷G£]5#W„­Ô@µ¹àV1 Æ6ZX¯Ô*è|@DÑÖ&<4¤?­|!%ùHútô²P» +bË›•ÅæÆ BucÉÎ×*+`±ú°ha49 «ÈŒV¤êÒÓUÂ&´=ÜA¨Ïåå•(ª+Š-ùB0£ühª)·(·(=_?´ æ´Gg¦%¸‡U!Ñ\zFI©!ÏhÈ …†ÂE[Œ)P¥<Úr¹³­¼±Ahh2õu= ¬ÏN«=UVÖžGîn»<Mvç”gÄ(´ZeèëBr@F¬6 u »>á3ãD™¢¦¬°'_0#m4Õ›cÐ×o1ä3ò36¡µÓ¢o|KÄdW—+jËŠ[ ð§TÑTsvAVË8œ™f&¡3¼ñ3ºA4‘ð%ëœêî’’–Âé~ ²pN,•”••­å²Ò…±<$¦ †{yËŽ[ú=~ò¨B~aQž=N'D W-dcyQQ)WX¢Ë)SÚFRÛ¸Ç7®ýžr`Þ)qS[„¥SÑÑjéiiNkÔå Õ^4€Ö0ÉÌB3Ðì'ÏÁiÐõ»_àLøìÂ_+BÙ+õûÑJô¢Ú}Ïnõ98®¯½.‚±]c+Y©"ßÉ®ÉW¨µ!QÞ²C;=LÊ-Ôu…Ó“›r**Õ%ý‚Å@XЗ*ªS×o +‚ÛÐçÓжPkKÓÊu÷hx°…‚¾è6}ÈñmÖØ¢‚êü"¡¬¨¦ÐÂ}Óí=k[’_Hœ•”¹VOƒZ"?ÍXù:’-ÝÁ^ñNñ¥9ÊMñÖa¯×w¶œU +‘ÝɉïÒá(¯l.*)©áJÊs3Åijݙƒœÿ÷Ï¡Ë}Ïû[OˆhÞûĪ¾XËÅõS=ïœ>ïRè ¬ 7m¥Á0ßÖu2ìx (5.âÁ¾¤7x`fƒ +´Õ= +«ÚM f”¤|ÒŸÌ Ï4:øøG%ã–îÿÎøåÄÑšêŒÔ"±0=/ÏGqÈPÛÃj6÷^ß|f š1ïYôáÿíÐÿœ  Å›;¯2{ÇñȃsÑsÐÿ8ÉÜsK„2Êø~yÅûx×^®¬ºU*XPˆŠz?Ó·A®Pègë:¨HSmÒÞ3âx8¥ß­ÑnÈ u%\ÊFaé š˜@è´ez:œ„ë<¢yœ¢2Ñ,¢™´|×\gê‘v|þ4•UFe|¶ðÆK8ãà"FQº“5­Ú„üÕT›¾&NçO£áèo¢" šà èc§¡HÊ ‰ÐÓjèx]wMCÛŸßjÕWÇéið +ÓK?ãÒúåkÇÍù?”m¶ÆYg±ç‘+%kÛÌÈz×ó²Ëך\Wô¥hdïù3²E! ˆgà]`U²Æ²¢.¢î3Caƒâ¸º1"2.6L›W'”(‹ƒ ÃiÐØh®hâºj#÷‰(›Ò+µšà\:¦P`yAj¹îm5SÐ1gÔ£ºÔAbü2ymd-£CЩ€7Sºµ©i+0~ƒXE¬6=P'dëµúd=~a Ó_Žs"DcêÓ‹Ö¯ÐÞhªC_oPÒãÅø’ q!z|É +|ŸÈÀÏaû#ÚB”&3ÓMj¤¤†C¦2JÜíõ͆EÓ¾ƒÂ­oÌu׋§›P¬º‚j7«U‰ñšq«á\"©-§¬TQ[[ajMè‚šÉm¸22 h±æ€2Ô—KSÃS<éSm©,ª®n¥«5Hôb@Rž vöÁ“=HÀ¹ðu¸ÔVÀ‹¬ÞlÊ«æ¾8_fÏ·µ_¸©¸|aw»`ò *Þ¡OÕ&r4·:Q·¦ ‹ø5üÔ²„·š§Luø«'š³RkÄ–ø]Šyþǧjò Aº‘ÀöŒà¨cŠ=÷ƒ  \ýÅèö ¼o×ð8¿räl3Õ|âDKK»(ÝÈxk™ ýƒcÈãÙµ9…¹tAn~V¶"--!5U@r$'¬ëÑ®hꄾ:Áà7±¾šøЉ•0SG‹sŠôÃ4$©&ówHAÔ©ŠR2)ÉYz!jÍTjv~EºˆÜ`Ãßm®»Q‚ÿ<Àî†ÏÚÙõÕ +SyE½PjB1j²#£8mIàTÔ=-ü Â?)9!™ËÌ.,‹)Ca^A>¦³Æð UÀ±“ÁÃÃ=7Íä­Äi +‹» xŸÃk\_¥OÌ +O‹äßÞu²í/ì1ðIµ`<¦²O`ÎO(‡'[[Ož ÇïÖߨ­6ôÑ°€ú›ÛÅWwyDìöÀ÷y]ÖtJ~€Ëa|Ó®ÊÞ%a(| +ÐC"tFI"º¢"od×'ŽÒãi”Wdì>ê¬Ù&SEq෺„÷ª7—+^P/ ñ<‚ŽÄlå<µ•§E@é®V×u2¹WjêÏézº«®'Gu5ñº=ôx#Š°6ú1¤t//5I¬³ ”=ê–àÃ)Ãk»Ôb|{fGÎæ‘YŸ$ ‡öÐÝ!ž¦ýrXù<šfü”><ÓþÍM•ÁÕ,dòk{Ïr­uI¡%bA|~”á D×^ãÕ¸€îÌ¡#m¡§ ó EB¯Uù¼á.œ—”îJµmˆÀÈÉY8>gØÁB«‹¦.›‡¸M槼ŀ®ó1§¹ó—›ºÅî~ÓÛwð™eÿHA¯ÄáHŒ,Ë"óƒ¶+Ðê„}Ž% ÿ +·Õ\Á¹ºÛZ:¤€_§RhåV"Ž*;^WXÂÕk#DE6$§nãÔ0‡·%á ¸Ào`îóÀzµ+€(cà‹ŸÝ€S¡üåwÑ«±9y¹‡E¿)ÆšÒ¼rîÑо¹‹}ÜÖû¥Tõ‹`´¼©Ôl¢eÖ‹ià”âbÈÙ=íB“·géznzšIÙ¯O†.Zâí¶A©­ê­mSêI™õog®\~ôÈÊœ´p&/è‰VÀÈSY¼q±ª!‚€N|K±|Ú²G᧊RjRŠLqBXsFAx>™®ÕÆ+‚{ãÞèÉa Ýá/îÕ¤—Vä +„rêxtc2Ful¿ÿµ«Ã]˜$ÃBƒý[Ã{ñy +GŠd€o ÓÛÞÞÓzÂ_Ag!%6ÁbêzöñÄ.ázØΓë87ÿ¤QVÚ >dÆœL. ¯6öÁs]ë›úÁÕzƒì‡D ™¾€Ï1š„25§V©Ëµmñ"œµ„°~þ6HãSüž »{GHôáx:%<8+‚Û²½k4\ ?ŸüÞO +è ~nè¸Ó\“¡ÍÑeåf +Y©ºŒL…¦4©"G^î~ŠàU^Úi“#UuH€ø%<W'1¶Y†4US–œª8]½ŽZuFùsK}~y».*ní8ýr´&7=\ÈÑfè29mVQ…˜ˆq®…´dæØÁ>ØÇ¢DÃRß1¦hÇ/K`v`Α¥}’_ óÏ`A€ß²QŠ¦Žmb½}<;‚;;ƒ:½Ei¿’!` ©ÛKQÆ}I‰ k˜ØmvÔ%zW +å{Xƒ‡!*XžÇ^1ÖÆŽ€É?1àÌØÔLÀÞ&ù8+°ˆÀÎ@Ù#(Ûx¨tý·ð6…U'”M(¬–Vï­â;(„Fæ¼®ú/4Ùi)Z9#€®”¾³¢¾¹6ë2 s¶ûäð°²ÃÌ붸´¦¢Rh45U4r£æ]è08ÝÊá¡žž! ¸pê€Gx¸'½Èr—;±—‚ÕP…ð?¼æŽ/ Q‡éèl5 0‘Q…)&CG³“ÛjÁ{ß›4(Žnñûùz€MQ¨)‡ +Ó4vE`éKå.ö™ú,bms±¹p„–FjMF¼æU]o[óÚ#hÖ—dF)ÐEÊ Ô&Ú€:x(•ž‡OúŸÆãVöàùœo9Û*ÔŸ(鹪îÑ^‘b\`Ʊe +€Ò|—òp ¼?2‰À5,æ)¾´šÍ­Oñ…Œ‰‰ŠhŠiiijji‰iŠAÑBºz ëéïïåqÒètw÷i`â¥?3¥%˜õ ê뛹J@þˆù'ඊ + iŽ:ÑÞÜÜ~BÕ,‚ŒÍè©)8ùÚà'@y‘ïí>ÑÞÓÚî'¶ÁÂ"›*p1Q®“V±ç‰ä¯ãkÙS-µB¤Ò–Ôè+4 °ÕS¹"H0“½ ÕÜ© (/-S3NÀiâáéúDò€ÚtQ]4œI4‹2&Ç®¾Vƒýƒ*/ÛöϱI@¬èúEé÷pë)8딜汣à Êa@ Ä6-ãEl½“%;º€!ARyyZ —_dÌ/‰ +Ç݈‰}˜Ð:±ÖkÀx ÌäÝ¢–γ\_S¨hîǃìŸ$èRÜ>uøp°Ïö¨BÛ|ñŒµÙ +(i|½Õ§zU“ ÀˆìŸIò1’m+(,,,((Î/.üÚÞ~ ®ØRb©(+-(¬°wxÚÙ7É<é;Él‰I2,¹+“gOž79rÅäÎÉßÚM±[n·Ù.ÒNm÷Ñ”W¦lšR5¥nÊ}b1L\ ®ïŸ?“!äWÔkTUHÕQŸÐ›èP:—®¢ÿ˜:yê SÓ§~Â,f¶01Œ†IgŒÌ s H€ LžÀ:À5pÓ^jÏÚO·çíØ/³ßn¿ßþ°½—}®}½}“ýÇö_8Lr f9¸9¨RÚ>vøÌá±Ãw¿;ŒK'IIéé*éZé^©Fš*Í•öJ‡¥W¥w¥Hÿp´wtr|ÅñUÇ×}3›O;žs¼êxÇñsÇ_œœ^rÚáäéádp:îÔã4êtÓé§Oþæô›Ó¿œ§9Ïs^îü†ófg/ç`çXçTçRç&çÎ=ÎÎÃÎç?tþÑ…p™á2Óe¹Ëz—Ý.‡]¼]]’]Š]Z]κÜuùÂåw-“ËfÈž“=/[#Û-ó”EÉ2dY™¬ZÖ ë•–Ý½#»'ûJö›œ”?#Vþ¼ü%ù+òuò=òCò ¹Zž&Ï’åeòZy£¼MÞ%‘ß’$$ÿ^>ÆNb¥ì3ìLv»}™]ƾÉîb²Al›Áæ²¥l-{œígÏ°çÙ«ìmö=öûû»ëdW©«Âu®ë‹®Ë]_wÝàºÕu·«›ëWo×`×XWk¶k‘k•«Åõ¤ë°ëˆëE×kc{þ,I?Ènˆ{s¢j9ß[<çuÎà”ñAUå'FŒ¼UÔݬr­ñM}A-h8ü¶—xæfÂûÜï6vˆ§{š?üVq“Ò\èÌ>–àÍù…×uʼn '2:rîÒ°Ìú,i8¤‡î 9jÚÇ!§óÑ\4÷Ó…Ð铳íOn‰˜ÜײÐ>¿®ï,7\„ìŒb¬ÈkêyH‰uí<4úâÛ<|óƒÞh>Þ€ž¢>˜…“o"䇎íÄ +œFÔ,¨¼`è Hé"°†ÝdÓÒˆÜÜ\}6—œYR+Â*êóµ£HŠˆµßð1G÷÷µš;«²+µ5BzYnQ‘¢¡¥æ¤þd‹ÐVy!·:-T¡ÍMÊIÐô=š¿"R;s+ÊæêÒÁ|Áh«vˆtKEõëLéåÑ] §!½ì­“ˆìZ]q©¢­¢t´@° au*«,½ö0 ÿqZ+ wZ£àÚñ(¢…„G­?Vö56tæOI¼‹c¹[Bò²»³Ù·óÙOË0Á]&‘#Ü@¼Dâl .‘pÊR+iaר«ôçik°Æ²rCWRœ›] –¦åé ÏÓÈ +Ë.ŠöUŸ ºÍAúÁïpÙ½äwbÛDSR|Y4çZÓ‘"¶¥çëJ2èÊäâ(¥bÏ[»½:ð u:¢783G¯ÏÁõÀ*{BF®àÁ#^_ÿ§ÈOnKx*òÇ\ š 'ƒ×¨CChÿcûÖp;m:kèDš~n›êÏ*êvn½ÆÊèÝ“S7dÐ*ë} +lÌ3Y]\>âe­'ÍÛyÙéÅŒìÁ«<ø¬ÂM8ÚŸJPbNÿÏ> Ñ”ùhÊÄ>Úbÿ )½öESƒ™%éï4Æ;#Ý+kÅ,3€­{·š¨üâò²2dðpÖ—è±³f©ÙÙKÞx]\¿Ñwá<¢{^y´VØpÿ«ÀŸ¸Ÿ¿ê~ÿ}ñþýžo~UüêÿxÝû‡o,î~–Cß»°p?\„oûá>¸-BûÑ^ôzíZ°æÄíMÂæ;¾Vxm!šÕ‡MbilA‚J‘Ñ?Œ¡Ò{ìÒÑ‘Ð IòëÜуe/ÐpëxºŽ¼ «NèáäÒ jk+yz*2Ùm؇¦h%附IWÈý2©N‰æÂâZú®œ:õq~qk"ŸaryEÙËðŒ™Ý™ŸZ©¿AÃ=Ôhë@GIi¦¦\ˆ¬¯No溛[{†‚:ŽRFm:*Êî~ŠöDS§²ËS&:6i©Þ™8põ 0`L +³Ø(Ò­Úô:Òê8F5ã T:F´’g xºÎÓU: ݆q—?ˆië +H¬/Á ö,"‰’¸‚”tETBª· R˦t2Ú˜Y¬éBeÖŸ¦EÈgütÜÖ°`?Ýt5Tšb©ÀÈzsi~eI£ØóñLg?CžZL‚»le<”˜p±Þ7©ÈCMa„.« Þ¾ZÑ0(~Ú{ññ +Hnú~f§Z–h\_˜`˜VNéÏÖÔàÌ4¡ûj39¢«MЦS)ŸÜÍ9‘™¾ñÓWø¸-™¯˜ñåOý„ØÓWR¯sðE«†ý9ëâkh¦‰û_›“% Ù®sŠö?„¢μôð§"Aú +?ÑæXË'¦Çƒe¼©ë/ð&/ËÜÀÈ6üUÐÑøÚ“-ÙÜÇ1¼6NglUh88 ÿ]ˆ.‚ÙÇ(…œ(6«à]ê*J£€—)«+"¡Š¬5” +å%õE­Ü{ç‚ßê[ܪÖ/W,R/ûïµi˜Œ´t¬Ìª ©…ÚjtÅ*wC·“–Jð–=¥¢fåF+4Ù¡¡ +Cá ¦ŒïWWß+xÊ#­Ó,ÊJ; t¹‰5^E¾+¶, +¬Á®¶QU¯Ž§_‹ôõ;¨8V—p3F,ú^E~–SfÜGÏ¢†ÇnÞîS?,x]Šç†Û†,BlU¶N¯ÏÈ™žÆ\ØÛ9uÉFwz<œÒ¹'§àU¶¾KõBo"’ Ý¢ŽŒ <Ý4opÛ*°CH£~ïS"²\l†ŸG‘ÉhŠ_À, ã©îŠÒ _žO‰hÚ#«Â*7ÓfxhÛ^ÐCÐÀÿÕçm +êZ;ÌüUÅ‘µ5{ç`A½Óí~Â.ö‹ÐL¤6ê ‹55¥-B RW9qm;áËèâ4ôŠ­ ®†_›Â¶ðù—h¸™‚ +ØÒs¿¼|¨hº íS›¨ •&+QƒÍº+üÂB®)Í(Õ}@Ãä ƒµô'íƒ×>R@zÑm´z¢]ÿNvU’q½­]¯ß‘ë™Cç¦ét©"Ô[?b›j+:ux¡IÛS”Q1‚::8sÆ–²K¶Dïß*®Ûî¿MQ`ƒ¤@3 <ˆüþpÇMáÖ¶5í³¹‚@ö÷kh&“ƒÛÐÔ™G=[‡"ÅÐ Éïþª€N¦†/ -µ}œJ©‹U'ëÊ3S²s³²Ò„ô¤œ´TÌ7kTäÎe¾ž[÷Ó‘£èµuÀ÷î¾ãŸö ªrB©Ö$Çq©µ}"¼`ë_)d\xûöAe¸VUs£{ÍÖ®š†Âbºç|us­¢8§ ###G›$èõ:]®y¡•Ór(ŒJ«°»§ïŒÜþ›âî±á]‡‚CÀÓ½­«VÛööÈðêpÝ–@>þjëYÄh²ótQ"$ +Ê Å±»ò + Œ­šãqä¤ÿ𙞓ÃglÕÀþú¦­¦{SëKÈÂ$»S­'ÄNúÀ#h!| ݧÀ #âÐkh-V`xÝÑóXÍÀNr t„ÄEéƶîÐQî››¿`©ê|ø›ÍÑ1!Áõq]Yb_¦A_©¥«‹#<s=·­÷¬í‰Z®V (ÎÄ´(£CyVh:bÛ7ÖÃG¤tÜàI?\cÿ¬¨š¨¨¹åÒ@§@ç`¤ƒið{ÛÒÐjæÌeqØBæàRï=¿ãH@øÆáG +mÙ_¨&òK + ùœyÂn߇ÎÔÄ’1àò"@ýÕ[Á€§E’bKÑ9Zúú¸{¡Yä±?jþ¦Á†âòÖ&Ü:h²¾ZÀ£¿f·%ñà SÀ5Áøê§Ý¡Ò‰îPéŸÝ¡¬¤t]&•cë}eMg‡ë•/¾˜päHBד'uÝgDàþ˜ÖçØ>&F¨çW0î¿´òQ2õÊzâõÕ=©ÿKWßL=íûK­snß¾mج´Ä­£ŽKåÜTW¡²-W¯t´G+«D`s21;ù¿Úä{¡œx@¡^(‹qƒþ«ˆ?ìHðC_j;‘“’¥Ïà|BºßOÑä:6# r{B˜›’Èy©GqèÅ.ö6û ºC”Æk‹Ë +ó‹‹Ê„²êlŽG™ƒýUª€ÝWŽ¶ZmL¤:`"Ò2 ?¯žì…5½.ð,ô_Ä2®’ý:dïzj²Ì¸<Þ,†‰V²›¿z»;mÇ\†Ò‹² Þôÿ›à+5@;L p6È3IÀ+L»©ÎâãáK| ÐùÃçy`3XôæC[—*Þê?zm¤ãĹ¡°ÞÙYyú,AŸ«ËÓq©¥}ý÷GEðSl(ãþž°ÛÃc¼x[3a_b)ÄÉq“ô!vSÆS’ÞPœP©?GÃD˜…)ÝÁDíÛÙ¥o)pÞØÈC¹Jô­#€•€ Ùõ¥¯ç8x³!¿¦¤AŒ?ÞœÞÊõ¶4o©ŠˆÎJÂÛs[BxLt]U Íùp6¯«|ëɧmyœ&`aÿ”6 t®¡ö¶­pø­Šz'פÁ„‚|Qô¥ €1oÚâ›Ybºž´ ZWD¶jcµŒ¿QÆe©aœ)“ü³r|š?n MÀ”±Ä†žh9Z†Qâ \ŽVÀCø¶®À{?[…ˆàd yµ[µ­K0y²¦­EGV+ß„CÝî×|3Hð4ÚÎî2jêuØ#?K=cÑžæ ñðp>œùÆdb  õHœº½nM«l%:Œ¡H°€Qûz¬æ!F\„D©Bþ,ŸhjÅ”5)Ï £h‡³Ë PvÜ¥>' [ +åìe@<˜åщP.‡î8”+ð͇r9|Lœh02à+Æêcb=È]s‰:€Ö·‘…ßœùãû”ƒ=¢þ{¢\ä{ÀÓÎx}sV='›Ñ„¤€n¶Îa;˜/?¯½Ždñ9q…Œøt]—QfÑ‹ C¨ +Š,x‹C;à N 2¾‡ák¸¾Æçܶj·PÒ£¼ídÖ±Cõ¡~Š M\THrÓÈ`AM}« e­s¶ŒXnS?¼=Ž¿t›G[G¼>¼wîÜ=0aKá6“änogm7³ÐÚ£¸x¤oë[îž»|ð°Þ=ŸÞþÁ㋆:îw ðþý€ûî≯¯Š²ŽñgCXUtrf4§Œ7ÛjèÚH2 )=-ŽKO.(‹Í{Ž”ÛúÂ(€B¯A"¾)³²Jaª©hÀŸýÀæþø ÜÅþ¿èy°†æõ4€/ÃÅp6¾¿Œ×îþë@S½ °;`õ¨«ØÒ#ÞÑ‘jn‹÷Íäˆ:0ª={ºaÓ™x“ P·]ú†¥Q ‡9­:Ó7gwÚü:@î‰×I„Áÿ<ˆ/›(ƒ‰Ë©-J»i€Vgó¶îkn5†'yCô9)"¸*×àû×ìÆüŽ¹â£‰ÖPœó;¾yë‘ð@ni—áºnøj¯D«Ô*›š-øäZ×pKBJ• Á¤ãÖD*Р 1¿Â¨¨b÷)ÓÛï`±¿fÁåX[!þnÿ3 ïwre`¦ +ØÎ}àÜ%àr€u=˜8«’€qtK~‡Î6‰jÆ[È…8Cžu;2vä#$ Ñ³”º ©4gÞ3Œí0#@¿ìÌÊ +ÒOWk'T²_Ž–Pài³ì¤®¦$1R¸(Ïlç?Àÿñå¼£ +endstream +endobj +1269 0 obj << -/Length 583 +/Type /ObjStm +/N 100 +/First 996 +/Length 13380 >> stream -0 g 0 G -0 g 0 G -0 g 0 G -BT -/F51 9.9626 Tf 150.705 706.129 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ -0 g 0 G - 141.968 -567.87 Td [(42)]TJ -0 g 0 G -ET - -endstream -endobj -1118 0 obj +268 0 1265 58 1266 116 1267 175 1268 234 1261 293 1278 440 1260 642 1270 789 1271 933 +1272 1079 1273 1226 1274 1377 1275 1528 1276 1679 1280 1825 1277 1883 1285 2017 1282 2156 1287 2301 +272 2360 1288 2418 1284 2477 1296 2624 1283 2817 1289 2965 1290 3109 1291 3256 1292 3403 1293 3546 +1294 3693 1298 3838 1295 3896 1302 4030 1299 4178 1300 4325 1304 4472 1301 4531 1313 4651 1305 4844 +1306 4988 1307 5133 1308 5277 1309 5422 1310 5569 1311 5713 1315 5860 276 5918 1316 5975 1312 6033 +1318 6166 1320 6284 1317 6343 1329 6424 1321 6599 1322 6743 1323 6888 1324 7032 1325 7177 1331 7324 +280 7382 1332 7439 1328 7497 1335 7630 1326 7778 1327 7922 1337 8069 1334 8128 1344 8222 1338 8397 +1339 8539 1340 8684 1341 8831 1342 8975 1346 9122 284 9180 1347 9237 1343 9295 1350 9428 1348 9567 +1352 9715 288 9774 1349 9832 1358 9913 1353 10070 1354 10214 1355 10361 1360 10508 292 10566 1361 10623 +1357 10681 1365 10815 1369 10963 1370 11090 1371 11133 1372 11340 1373 11578 1374 11854 1356 12090 1363 12237 +% 268 0 obj << -/Length 6387 +/D [1262 0 R /XYZ 150.705 716.092 null] >> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(4.8)-1000(psb)]TJ -ET -q -1 0 0 1 147.429 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 151.016 706.129 Td [(norm2)-250(\227)-250(2-Norm)-250(of)-250(V)111(ector)]TJ/F54 9.9626 Tf -51.121 -20.076 Td [(This)-250(function)-250(computes)-250(the)-250(2-norm)-250(of)-250(a)-250(vector)]TJ/F52 9.9626 Tf 206.349 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -211.554 -11.955 Td [(If)]TJ/F52 9.9626 Tf 9.46 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(r)18(eal)-250(vector)-250(it)-250(computes)-250(2-norm)-250(as:)]TJ/F52 9.9626 Tf 122.551 -25.46 Td [(n)-15(r)-35(m)]TJ/F54 9.9626 Tf 17.788 0 Td [(2)]TJ/F83 10.3811 Tf 7.873 0 Td [(\040)]TJ 13.397 9.727 Td [(p)]TJ -ET -q -1 0 0 1 287.432 658.569 cm -[]0 d 0 J 0.408 w 0 0 m 16.592 0 l S -Q -BT -/F52 9.9626 Tf 287.726 648.638 Td [(x)]TJ/F52 7.5716 Tf 5.399 2.88 Td [(T)]TJ/F52 9.9626 Tf 5.694 -2.88 Td [(x)]TJ/F54 9.9626 Tf -198.924 -23.065 Td [(else)-250(if)]TJ/F52 9.9626 Tf 28.159 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(complex)-250(vector)-250(then)-250(it)-250(computes)-250(2-norm)-250(as:)]TJ/F52 9.9626 Tf 103.113 -25.46 Td [(n)-15(r)-35(m)]TJ/F54 9.9626 Tf 17.789 0 Td [(2)]TJ/F83 10.3811 Tf 7.873 0 Td [(\040)]TJ 13.397 9.727 Td [(p)]TJ -ET -q -1 0 0 1 286.694 610.044 cm -[]0 d 0 J 0.408 w 0 0 m 18.069 0 l S -Q -BT -/F52 9.9626 Tf 286.988 600.113 Td [(x)]TJ/F52 7.5716 Tf 5.588 2.88 Td [(H)]TJ/F52 9.9626 Tf 6.982 -2.88 Td [(x)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -ET -q -1 0 0 1 128.689 575.464 cm -[]0 d 0 J 0.398 w 0 0 m 286.124 0 l S -Q -BT -/F52 9.9626 Tf 134.791 566.896 Td [(n)-15(r)-35(m)]TJ/F54 9.9626 Tf 17.788 0 Td [(2)]TJ/F52 9.9626 Tf 82.505 0 Td [(x)]TJ/F51 9.9626 Tf 120.621 0 Td [(Function)]TJ -ET -q -1 0 0 1 128.689 563.111 cm -[]0 d 0 J 0.398 w 0 0 m 286.124 0 l S -Q -BT -/F54 9.9626 Tf 134.667 554.543 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 372.024 554.742 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 375.012 554.543 Td [(genrm2)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 372.024 542.787 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 375.012 542.588 Td [(genrm2)]TJ -240.346 -11.956 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ -ET -q -1 0 0 1 372.024 530.832 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 375.012 530.632 Td [(genrm2)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ -ET -q -1 0 0 1 372.024 518.876 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 375.012 518.677 Td [(genrm2)]TJ -ET -q -1 0 0 1 128.689 514.891 cm -[]0 d 0 J 0.398 w 0 0 m 286.124 0 l S -Q -0 g 0 G -BT -/F54 9.9626 Tf 229.958 486.513 Td [(T)92(able)-250(8:)-310(Data)-250(types)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -115.119 -27.631 Td [(psb_genrm2\050x,)-525(desc_a,)-525(info)-525([,global]\051)]TJ -14.944 -11.955 Td [(psb_norm2\050x,)-525(desc_a,)-525(info)-525([,global]\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -36.169 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -22.221 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -22.221 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ -ET -q -1 0 0 1 385.864 318.695 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 389.002 318.495 Td [(T)]TJ -ET -q -1 0 0 1 394.86 318.695 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 397.998 318.495 Td [(vect)]TJ -ET -q -1 0 0 1 419.547 318.695 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 422.685 318.495 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(8)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -22.221 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 284.518 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 284.319 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ -ET -q -1 0 0 1 273.363 236.698 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 276.501 236.499 Td [(desc)]TJ -ET -q -1 0 0 1 298.05 236.698 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 301.189 236.499 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -222.215 -22.221 Td [(global)]TJ -0 g 0 G -/F54 9.9626 Tf 33.764 0 Td [(Speci\002es)-226(whether)-226(the)-227(computation)-226(should)-226(include)-227(the)-226(global)-226(r)18(eduction)]TJ -8.857 -11.956 Td [(acr)18(oss)-250(all)-250(pr)18(ocesses.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(scalar)74(.)-310(Default:)]TJ/F59 9.9626 Tf 165.318 0 Td [(global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.true.)]TJ -0 g 0 G -/F51 9.9626 Tf -190.225 -34.176 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G -/F54 9.9626 Tf 166.875 -29.888 Td [(43)]TJ -0 g 0 G -ET - -endstream -endobj -1011 0 obj +% 1265 0 obj << -/Type /ObjStm -/N 100 -/First 987 -/Length 12089 ->> -stream -216 0 1007 58 1013 165 1015 282 220 340 1012 397 1024 478 1016 661 1017 807 1018 951 -1019 1097 1020 1243 1021 1387 1026 1533 224 1592 998 1650 1023 1708 1031 1855 1022 2012 1028 2159 -1029 2303 1033 2449 1030 2507 1041 2614 1035 2789 1036 2930 1037 3076 1038 3220 1039 3365 1043 3512 -228 3571 1044 3629 1040 3688 1047 3808 1045 3947 1049 4093 1050 4151 1046 4209 1058 4316 1051 4500 -1052 4644 1053 4790 1054 4934 1055 5079 1056 5226 1060 5370 232 5429 1061 5487 1057 5545 1063 5678 -1065 5796 1062 5854 1070 5935 1066 6092 1067 6236 1068 6382 1072 6529 236 6588 1073 6646 1069 6705 -1075 6838 1077 6956 1078 7014 1074 7071 1084 7165 1080 7322 1081 7466 1082 7612 1086 7759 240 7818 -1087 7876 1083 7935 1092 8068 1088 8225 1089 8369 1090 8512 1094 8659 244 8717 1095 8774 1091 8831 -1097 8964 1099 9082 1100 9141 1096 9199 1105 9293 1101 9450 1102 9594 1103 9740 1107 9887 248 9945 -1108 10002 1104 10060 1110 10193 1112 10311 1109 10370 1117 10451 1113 10608 1114 10751 1115 10897 1119 11044 -% 216 0 obj +/D [1262 0 R /XYZ 290.728 674.17 null] +>> +% 1266 0 obj << -/D [1008 0 R /XYZ 150.705 613.372 null] +/D [1262 0 R /XYZ 287.931 654.041 null] >> -% 1007 0 obj +% 1267 0 obj +<< +/D [1262 0 R /XYZ 287.193 633.911 null] +>> +% 1268 0 obj +<< +/D [1262 0 R /XYZ 150.705 447.252 null] +>> +% 1261 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F68 1127 0 R /F93 915 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1278 0 obj +<< +/Type /Page +/Contents 1279 0 R +/Resources 1277 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1281 0 R +/Annots [ 1260 0 R 1270 0 R 1271 0 R 1272 0 R 1273 0 R 1274 0 R 1275 0 R 1276 0 R ] +>> +% 1260 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [378.159 654.503 390.114 666.562] +/A << /S /GoTo /D (table.12) >> +>> +% 1270 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [368.549 588.085 444.603 600.145] +/A << /S /GoTo /D (vdata) >> +>> +% 1271 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [328.746 576.13 340.701 588.189] +/A << /S /GoTo /D (table.12) >> +>> +% 1272 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [256.048 497.757 323.106 509.817] +/A << /S /GoTo /D (descdata) >> +>> +% 1273 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [338.139 460.563 345.113 472.623] +/A << /S /GoTo /D (equation.4.1) >> +>> +% 1274 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [336.486 445.951 343.459 458.011] +/A << /S /GoTo /D (equation.4.2) >> +>> +% 1275 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [337.034 431.339 344.007 443.399] +/A << /S /GoTo /D (equation.4.3) >> +>> +% 1276 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [202.52 189.579 214.475 201.639] +/A << /S /GoTo /D (table.12) >> +>> +% 1280 0 obj +<< +/D [1278 0 R /XYZ 98.895 753.953 null] +>> +% 1277 0 obj << -/Font << /F94 915 0 R /F54 586 0 R /F51 584 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F68 1127 0 R /F60 666 0 R /F67 913 0 R /F93 915 0 R >> /ProcSet [ /PDF /Text ] >> -% 1013 0 obj +% 1285 0 obj +<< +/Type /Page +/Contents 1286 0 R +/Resources 1284 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1281 0 R +/Annots [ 1282 0 R ] +>> +% 1282 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [428.968 116.52 440.924 128.58] +/A << /S /GoTo /D (table.13) >> +>> +% 1287 0 obj << -/Type /Page -/Contents 1014 0 R -/Resources 1012 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 991 0 R +/D [1285 0 R /XYZ 149.705 753.953 null] >> -% 1015 0 obj +% 272 0 obj << -/D [1013 0 R /XYZ 98.895 753.953 null] +/D [1285 0 R /XYZ 150.705 716.092 null] >> -% 220 0 obj +% 1288 0 obj << -/D [1013 0 R /XYZ 99.895 716.092 null] +/D [1285 0 R /XYZ 150.705 268.704 null] >> -% 1012 0 obj +% 1284 0 obj << -/Font << /F51 584 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F68 1127 0 R /F93 915 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1024 0 obj +% 1296 0 obj << /Type /Page -/Contents 1025 0 R -/Resources 1023 0 R +/Contents 1297 0 R +/Resources 1295 0 R /MediaBox [0 0 595.276 841.89] -/Parent 991 0 R -/Annots [ 1016 0 R 1017 0 R 1018 0 R 1019 0 R 1020 0 R 1021 0 R ] +/Parent 1281 0 R +/Annots [ 1283 0 R 1289 0 R 1290 0 R 1291 0 R 1292 0 R 1293 0 R 1294 0 R ] >> -% 1016 0 obj +% 1283 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [428.968 413.274 435.942 425.334] -/A << /S /GoTo /D (table.1) >> +/Rect [305.144 654.503 312.117 666.562] +/A << /S /GoTo /D (section.3) >> >> -% 1017 0 obj +% 1289 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 345.063 495.412 357.123] +/Rect [368.549 586.032 444.603 598.092] /A << /S /GoTo /D (vdata) >> >> -% 1018 0 obj +% 1290 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [381.755 333.108 388.729 345.168] -/A << /S /GoTo /D (table.1) >> +/Rect [328.621 574.077 340.576 586.136] +/A << /S /GoTo /D (table.13) >> >> -% 1019 0 obj +% 1291 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [428.968 252.942 435.942 265.002] -/A << /S /GoTo /D (table.1) >> +/Rect [378.159 493.651 390.114 505.711] +/A << /S /GoTo /D (table.13) >> >> -% 1020 0 obj +% 1292 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 184.731 495.412 196.791] +/Rect [368.549 425.181 444.603 437.24] /A << /S /GoTo /D (vdata) >> >> -% 1021 0 obj +% 1293 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [397.868 172.776 404.842 184.835] -/A << /S /GoTo /D (table.1) >> ->> -% 1026 0 obj -<< -/D [1024 0 R /XYZ 149.705 753.953 null] +/Rect [328.746 413.225 340.701 425.285] +/A << /S /GoTo /D (table.13) >> >> -% 224 0 obj +% 1294 0 obj << -/D [1024 0 R /XYZ 150.705 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [256.048 332.8 323.106 344.859] +/A << /S /GoTo /D (descdata) >> >> -% 998 0 obj +% 1298 0 obj << -/D [1024 0 R /XYZ 150.705 558.947 null] +/D [1296 0 R /XYZ 98.895 753.953 null] >> -% 1023 0 obj +% 1295 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F60 1027 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F67 913 0 R /F68 1127 0 R /F93 915 0 R >> /ProcSet [ /PDF /Text ] >> -% 1031 0 obj +% 1302 0 obj << /Type /Page -/Contents 1032 0 R -/Resources 1030 0 R +/Contents 1303 0 R +/Resources 1301 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1034 0 R -/Annots [ 1022 0 R 1028 0 R 1029 0 R ] ->> -% 1022 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 690.368 323.106 702.428] -/A << /S /GoTo /D (descdata) >> +/Parent 1281 0 R +/Annots [ 1299 0 R 1300 0 R ] >> -% 1028 0 obj +% 1299 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 600.704 444.603 612.764] -/A << /S /GoTo /D (vdata) >> +/Rect [211.646 410.079 223.601 419.489] +/A << /S /GoTo /D (table.13) >> >> -% 1029 0 obj +% 1300 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [344.963 588.749 351.937 600.809] -/A << /S /GoTo /D (table.1) >> +/Rect [253.329 228.102 265.284 240.161] +/A << /S /GoTo /D (table.13) >> >> -% 1033 0 obj +% 1304 0 obj << -/D [1031 0 R /XYZ 98.895 753.953 null] +/D [1302 0 R /XYZ 149.705 753.953 null] >> -% 1030 0 obj +% 1301 0 obj << -/Font << /F54 586 0 R /F51 584 0 R /F59 812 0 R /F52 585 0 R >> +/Font << /F62 667 0 R /F59 665 0 R /F60 666 0 R /F93 915 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1041 0 obj +% 1313 0 obj << /Type /Page -/Contents 1042 0 R -/Resources 1040 0 R +/Contents 1314 0 R +/Resources 1312 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1034 0 R -/Annots [ 1035 0 R 1036 0 R 1037 0 R 1038 0 R 1039 0 R ] +/Parent 1281 0 R +/Annots [ 1305 0 R 1306 0 R 1307 0 R 1308 0 R 1309 0 R 1310 0 R 1311 0 R ] >> -% 1035 0 obj +% 1305 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 355.7 495.412 367.76] +/Rect [263.331 417.772 339.385 429.832] /A << /S /GoTo /D (vdata) >> >> -% 1036 0 obj +% 1306 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [381.755 343.745 388.729 355.804] +/Rect [224.557 405.817 231.53 417.877] /A << /S /GoTo /D (table.2) >> >> -% 1037 0 obj +% 1307 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 264.018 495.412 276.078] +/Rect [263.331 338.393 339.385 350.453] /A << /S /GoTo /D (vdata) >> >> -% 1038 0 obj +% 1308 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [381.88 252.063 388.854 264.123] +/Rect [224.557 326.438 231.53 338.498] /A << /S /GoTo /D (table.2) >> >> -% 1039 0 obj +% 1309 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [306.858 172.336 373.916 184.396] +/Rect [256.048 259.015 323.106 271.074] /A << /S /GoTo /D (descdata) >> >> -% 1043 0 obj +% 1310 0 obj << -/D [1041 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [263.331 171.988 339.385 184.048] +/A << /S /GoTo /D (vdata) >> >> -% 228 0 obj +% 1311 0 obj << -/D [1041 0 R /XYZ 150.705 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [242.868 160.033 254.823 172.093] +/A << /S /GoTo /D (table.14) >> >> -% 1044 0 obj +% 1315 0 obj << -/D [1041 0 R /XYZ 150.705 499.951 null] +/D [1313 0 R /XYZ 98.895 753.953 null] >> -% 1040 0 obj +% 276 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/D [1313 0 R /XYZ 99.895 716.092 null] >> -% 1047 0 obj +% 1316 0 obj << -/Type /Page -/Contents 1048 0 R -/Resources 1046 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1034 0 R -/Annots [ 1045 0 R ] +/D [1313 0 R /XYZ 99.895 560.161 null] >> -% 1045 0 obj +% 1312 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [378.159 602.697 385.133 614.756] -/A << /S /GoTo /D (table.2) >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F93 915 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1049 0 obj +% 1318 0 obj << -/D [1047 0 R /XYZ 98.895 753.953 null] +/Type /Page +/Contents 1319 0 R +/Resources 1317 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1281 0 R >> -% 1050 0 obj +% 1320 0 obj << -/D [1047 0 R /XYZ 99.895 512.639 null] +/D [1318 0 R /XYZ 149.705 753.953 null] >> -% 1046 0 obj +% 1317 0 obj << -/Font << /F54 586 0 R /F51 584 0 R /F59 812 0 R /F52 585 0 R >> +/Font << /F62 667 0 R /F59 665 0 R >> /ProcSet [ /PDF /Text ] >> -% 1058 0 obj +% 1329 0 obj << /Type /Page -/Contents 1059 0 R -/Resources 1057 0 R +/Contents 1330 0 R +/Resources 1328 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1034 0 R -/Annots [ 1051 0 R 1052 0 R 1053 0 R 1054 0 R 1055 0 R 1056 0 R ] +/Parent 1333 0 R +/Annots [ 1321 0 R 1322 0 R 1323 0 R 1324 0 R 1325 0 R ] >> -% 1051 0 obj +% 1321 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 384.684 495.412 396.744] +/Rect [263.331 417.772 339.385 429.832] /A << /S /GoTo /D (vdata) >> >> -% 1052 0 obj +% 1322 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [381.755 372.729 388.729 384.789] -/A << /S /GoTo /D (table.3) >> +/Rect [224.557 405.817 231.53 417.877] +/A << /S /GoTo /D (table.2) >> >> -% 1053 0 obj +% 1323 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 293.733 495.412 305.793] +/Rect [263.331 338.393 339.385 350.453] /A << /S /GoTo /D (vdata) >> >> -% 1054 0 obj +% 1324 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [381.88 281.778 388.854 293.837] -/A << /S /GoTo /D (table.3) >> +/Rect [224.557 326.438 231.53 338.498] +/A << /S /GoTo /D (table.2) >> >> -% 1055 0 obj +% 1325 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [306.858 202.781 373.916 214.841] +/Rect [256.048 259.015 323.106 271.074] /A << /S /GoTo /D (descdata) >> >> -% 1056 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [200.458 119.17 207.432 128.58] -/A << /S /GoTo /D (table.2) >> ->> -% 1060 0 obj +% 1331 0 obj << -/D [1058 0 R /XYZ 149.705 753.953 null] +/D [1329 0 R /XYZ 98.895 753.953 null] >> -% 232 0 obj +% 280 0 obj << -/D [1058 0 R /XYZ 150.705 716.092 null] +/D [1329 0 R /XYZ 99.895 716.092 null] >> -% 1061 0 obj +% 1332 0 obj << -/D [1058 0 R /XYZ 150.705 524.97 null] +/D [1329 0 R /XYZ 99.895 560.161 null] >> -% 1057 0 obj +% 1328 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F93 915 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1063 0 obj +% 1335 0 obj << /Type /Page -/Contents 1064 0 R -/Resources 1062 0 R +/Contents 1336 0 R +/Resources 1334 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1034 0 R +/Parent 1333 0 R +/Annots [ 1326 0 R 1327 0 R ] >> -% 1065 0 obj +% 1326 0 obj << -/D [1063 0 R /XYZ 98.895 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [314.141 702.323 390.195 714.383] +/A << /S /GoTo /D (vdata) >> >> -% 1062 0 obj +% 1327 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [293.677 690.368 305.632 702.428] +/A << /S /GoTo /D (table.14) >> +>> +% 1337 0 obj +<< +/D [1335 0 R /XYZ 149.705 753.953 null] +>> +% 1334 0 obj << -/Font << /F51 584 0 R /F54 586 0 R >> +/Font << /F62 667 0 R /F67 913 0 R /F59 665 0 R >> /ProcSet [ /PDF /Text ] >> -% 1070 0 obj +% 1344 0 obj << /Type /Page -/Contents 1071 0 R -/Resources 1069 0 R +/Contents 1345 0 R +/Resources 1343 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1034 0 R -/Annots [ 1066 0 R 1067 0 R 1068 0 R ] +/Parent 1333 0 R +/Annots [ 1338 0 R 1339 0 R 1340 0 R 1341 0 R 1342 0 R ] >> -% 1066 0 obj +% 1338 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 358.503 495.412 370.562] +/Rect [263.331 430.55 339.385 442.61] /A << /S /GoTo /D (vdata) >> >> -% 1067 0 obj +% 1339 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [377.462 346.547 384.436 358.607] -/A << /S /GoTo /D (table.4) >> +/Rect [224.557 418.595 231.53 430.655] +/A << /S /GoTo /D (table.2) >> >> -% 1068 0 obj +% 1340 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [306.858 280.074 373.916 292.134] +/Rect [256.048 352.283 323.106 364.342] /A << /S /GoTo /D (descdata) >> >> -% 1072 0 obj -<< -/D [1070 0 R /XYZ 149.705 753.953 null] ->> -% 236 0 obj -<< -/D [1070 0 R /XYZ 150.705 716.092 null] ->> -% 1073 0 obj +% 1341 0 obj << -/D [1070 0 R /XYZ 150.705 495.665 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [263.331 201.166 339.385 213.226] +/A << /S /GoTo /D (vdata) >> >> -% 1069 0 obj +% 1342 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F85 814 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [242.868 189.211 254.823 201.271] +/A << /S /GoTo /D (table.16) >> >> -% 1075 0 obj +% 1346 0 obj << -/Type /Page -/Contents 1076 0 R -/Resources 1074 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1079 0 R +/D [1344 0 R /XYZ 98.895 753.953 null] >> -% 1077 0 obj +% 284 0 obj << -/D [1075 0 R /XYZ 98.895 753.953 null] +/D [1344 0 R /XYZ 99.895 716.092 null] >> -% 1078 0 obj +% 1347 0 obj << -/D [1075 0 R /XYZ 99.895 632.19 null] +/D [1344 0 R /XYZ 99.895 566.828 null] >> -% 1074 0 obj +% 1343 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F93 915 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1084 0 obj +% 1350 0 obj << /Type /Page -/Contents 1085 0 R -/Resources 1083 0 R +/Contents 1351 0 R +/Resources 1349 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1079 0 R -/Annots [ 1080 0 R 1081 0 R 1082 0 R ] ->> -% 1080 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 411.694 495.412 423.754] -/A << /S /GoTo /D (vdata) >> ->> -% 1081 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [377.462 399.739 384.436 411.798] -/A << /S /GoTo /D (table.5) >> +/Parent 1333 0 R +/Annots [ 1348 0 R ] >> -% 1082 0 obj +% 1348 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [306.858 331.993 373.916 344.052] -/A << /S /GoTo /D (descdata) >> ->> -% 1086 0 obj -<< -/D [1084 0 R /XYZ 149.705 753.953 null] +/Rect [378.029 655.624 385.003 667.684] +/A << /S /GoTo /D (section.6) >> >> -% 240 0 obj +% 1352 0 obj << -/D [1084 0 R /XYZ 150.705 716.092 null] +/D [1350 0 R /XYZ 149.705 753.953 null] >> -% 1087 0 obj +% 288 0 obj << -/D [1084 0 R /XYZ 150.705 555.856 null] +/D [1350 0 R /XYZ 150.705 716.092 null] >> -% 1083 0 obj +% 1349 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1092 0 obj +% 1358 0 obj << /Type /Page -/Contents 1093 0 R -/Resources 1091 0 R +/Contents 1359 0 R +/Resources 1357 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1079 0 R -/Annots [ 1088 0 R 1089 0 R 1090 0 R ] +/Parent 1333 0 R +/Annots [ 1353 0 R 1354 0 R 1355 0 R ] >> -% 1088 0 obj +% 1353 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 362.555 444.603 374.615] +/Rect [368.549 344.818 444.603 356.877] /A << /S /GoTo /D (vdata) >> >> -% 1089 0 obj +% 1354 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [326.652 350.6 333.626 362.66] -/A << /S /GoTo /D (table.6) >> +/Rect [326.652 332.863 338.608 344.922] +/A << /S /GoTo /D (table.17) >> >> -% 1090 0 obj +% 1355 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 283.114 323.106 295.173] +/Rect [291.943 264.733 359.001 276.793] /A << /S /GoTo /D (descdata) >> >> -% 1094 0 obj +% 1360 0 obj << -/D [1092 0 R /XYZ 98.895 753.953 null] +/D [1358 0 R /XYZ 98.895 753.953 null] >> -% 244 0 obj +% 292 0 obj << -/D [1092 0 R /XYZ 99.895 716.092 null] +/D [1358 0 R /XYZ 99.895 716.092 null] >> -% 1095 0 obj +% 1361 0 obj << -/D [1092 0 R /XYZ 99.895 505.29 null] +/D [1358 0 R /XYZ 99.895 513.636 null] >> -% 1091 0 obj +% 1357 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F68 1127 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1097 0 obj +% 1365 0 obj << /Type /Page -/Contents 1098 0 R -/Resources 1096 0 R +/Contents 1366 0 R +/Resources 1364 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1079 0 R +/Parent 1333 0 R +/Annots [ 1356 0 R 1363 0 R ] >> -% 1099 0 obj +% 1369 0 obj << -/D [1097 0 R /XYZ 149.705 753.953 null] +/Producer (GPL Ghostscript 9.22) +/CreationDate (D:20180323100645Z00'00') +/ModDate (D:20180323100645Z00'00') >> -% 1100 0 obj +% 1370 0 obj << -/D [1097 0 R /XYZ 150.705 632.19 null] +/Type /ExtGState +/OPM 1 >> -% 1096 0 obj +% 1371 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/BaseFont /XYUGDR+Times-Roman +/FontDescriptor 1373 0 R +/Type /Font +/FirstChar 48 +/LastChar 57 +/Widths [ 500 500 500 500 500 500 500 500 500 500] +/Encoding /WinAnsiEncoding +/Subtype /Type1 >> -% 1105 0 obj +% 1372 0 obj << -/Type /Page -/Contents 1106 0 R -/Resources 1104 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1079 0 R -/Annots [ 1101 0 R 1102 0 R 1103 0 R ] +/BaseFont /XISTAL+Times-Bold +/FontDescriptor 1374 0 R +/Type /Font +/FirstChar 48 +/LastChar 80 +/Widths [ 500 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 611] +/Encoding /WinAnsiEncoding +/Subtype /Type1 >> -% 1101 0 obj +% 1373 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 291.495 444.603 303.554] -/A << /S /GoTo /D (vdata) >> +/Type /FontDescriptor +/FontName /XYUGDR+Times-Roman +/FontBBox [ 0 -14 476 688] +/Flags 65568 +/Ascent 688 +/CapHeight 688 +/Descent -14 +/ItalicAngle 0 +/StemV 71 +/MissingWidth 250 +/CharSet (/eight/five/four/nine/one/seven/six/three/two/zero) +/FontFile3 1375 0 R >> -% 1102 0 obj +% 1374 0 obj +<< +/Type /FontDescriptor +/FontName /XISTAL+Times-Bold +/FontBBox [ 0 -13 600 688] +/Flags 65568 +/Ascent 688 +/CapHeight 676 +/Descent -13 +/ItalicAngle 0 +/StemV 90 +/MissingWidth 250 +/CharSet (/P/one/zero) +/FontFile3 1376 0 R +>> +% 1356 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [326.652 279.539 333.626 291.599] -/A << /S /GoTo /D (table.7) >> +/Rect [231.023 625.272 242.978 634.682] +/A << /S /GoTo /D (table.17) >> >> -% 1103 0 obj +% 1363 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 209.259 323.106 221.319] -/A << /S /GoTo /D (descdata) >> ->> -% 1107 0 obj -<< -/D [1105 0 R /XYZ 98.895 753.953 null] +/Rect [458.157 276.439 465.131 290.202] +/A << /S /GoTo /D (figure.3) >> >> -% 248 0 obj + +endstream +endobj +1380 0 obj << -/D [1105 0 R /XYZ 99.895 716.092 null] +/Length 3048 >> -% 1108 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F62 8.9664 Tf 209.77 645.656 Td [(Pr)18(ocess)-250(0)-7729(Pr)18(ocess)-250(1)]TJ -31.696 -10.959 Td [(I)-1333(GLOB\050I\051)-1334(X\050I\051)-4663(I)-1333(GLOB\050I\051)-1333(X\050I\051)]TJ -1.462 -10.959 Td [(1)-4607(1)-1754(1.0)-4500(1)-4107(33)-1753(2.0)]TJ 0 -10.959 Td [(2)-4607(2)-1754(1.0)-4500(2)-4107(34)-1753(2.0)]TJ 0 -10.959 Td [(3)-4607(3)-1754(1.0)-4500(3)-4107(35)-1753(2.0)]TJ 0 -10.959 Td [(4)-4607(4)-1754(1.0)-4500(4)-4107(36)-1753(2.0)]TJ 0 -10.959 Td [(5)-4607(5)-1754(1.0)-4500(5)-4107(37)-1753(2.0)]TJ 0 -10.959 Td [(6)-4607(6)-1754(1.0)-4500(6)-4107(38)-1753(2.0)]TJ 0 -10.959 Td [(7)-4607(7)-1754(1.0)-4500(7)-4107(39)-1753(2.0)]TJ 0 -10.958 Td [(8)-4607(8)-1754(1.0)-4500(8)-4107(40)-1753(2.0)]TJ 0 -10.959 Td [(9)-4607(9)-1754(1.0)-4500(9)-4107(41)-1753(2.0)]TJ -4.483 -10.959 Td [(10)-4107(10)-1754(1.0)-4000(10)-4107(42)-1753(2.0)]TJ 0 -10.959 Td [(11)-4107(11)-1754(1.0)-4000(11)-4107(43)-1753(2.0)]TJ 0 -10.959 Td [(12)-4107(12)-1754(1.0)-4000(12)-4107(44)-1753(2.0)]TJ 0 -10.959 Td [(13)-4107(13)-1754(1.0)-4000(13)-4107(45)-1753(2.0)]TJ 0 -10.959 Td [(14)-4107(14)-1754(1.0)-4000(14)-4107(46)-1753(2.0)]TJ 0 -10.959 Td [(15)-4107(15)-1754(1.0)-4000(15)-4107(47)-1753(2.0)]TJ 0 -10.959 Td [(16)-4107(16)-1754(1.0)-4000(16)-4107(48)-1753(2.0)]TJ 0 -10.959 Td [(17)-4107(17)-1754(1.0)-4000(17)-4107(49)-1753(2.0)]TJ 0 -10.958 Td [(18)-4107(18)-1754(1.0)-4000(18)-4107(50)-1753(2.0)]TJ 0 -10.959 Td [(19)-4107(19)-1754(1.0)-4000(19)-4107(51)-1753(2.0)]TJ 0 -10.959 Td [(20)-4107(20)-1754(1.0)-4000(20)-4107(52)-1753(2.0)]TJ 0 -10.959 Td [(21)-4107(21)-1754(1.0)-4000(21)-4107(53)-1753(2.0)]TJ 0 -10.959 Td [(22)-4107(22)-1754(1.0)-4000(22)-4107(54)-1753(2.0)]TJ 0 -10.959 Td [(23)-4107(23)-1754(1.0)-4000(23)-4107(55)-1753(2.0)]TJ 0 -10.959 Td [(24)-4107(24)-1754(1.0)-4000(24)-4107(56)-1753(2.0)]TJ 0 -10.959 Td [(25)-4107(25)-1754(1.0)-4000(25)-4107(57)-1753(2.0)]TJ 0 -10.959 Td [(26)-4107(26)-1754(1.0)-4000(26)-4107(58)-1753(2.0)]TJ 0 -10.959 Td [(27)-4107(27)-1754(1.0)-4000(27)-4107(59)-1753(2.0)]TJ 0 -10.958 Td [(28)-4107(28)-1754(1.0)-4000(28)-4107(60)-1753(2.0)]TJ 0 -10.959 Td [(29)-4107(29)-1754(1.0)-4000(29)-4107(61)-1753(2.0)]TJ 0 -10.959 Td [(30)-4107(30)-1754(1.0)-4000(30)-4107(62)-1753(2.0)]TJ 0 -10.959 Td [(31)-4107(31)-1754(1.0)-4000(31)-4107(63)-1753(2.0)]TJ 0 -10.959 Td [(32)-4107(32)-1754(1.0)-4000(32)-4107(64)-1753(2.0)]TJ 0 -10.959 Td [(33)-4107(33)-1754(2.0)-4000(33)-4107(25)-1753(1.0)]TJ 0 -10.959 Td [(34)-4107(34)-1754(2.0)-4000(34)-4107(26)-1753(1.0)]TJ 0 -10.959 Td [(35)-4107(35)-1754(2.0)-4000(35)-4107(27)-1753(1.0)]TJ 0 -10.959 Td [(36)-4107(36)-1754(2.0)-4000(36)-4107(28)-1753(1.0)]TJ 0 -10.959 Td [(37)-4107(37)-1754(2.0)-4000(37)-4107(29)-1753(1.0)]TJ 0 -10.958 Td [(38)-4107(38)-1754(2.0)-4000(38)-4107(30)-1753(1.0)]TJ 0 -10.959 Td [(39)-4107(39)-1754(2.0)-4000(39)-4107(31)-1753(1.0)]TJ 0 -10.959 Td [(40)-4107(40)-1754(2.0)-4000(40)-4107(32)-1753(1.0)]TJ +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 94.641 -105.903 Td [(61)]TJ +0 g 0 G +ET + +endstream +endobj +1388 0 obj << -/D [1105 0 R /XYZ 99.895 443.893 null] +/Length 7819 >> -% 1104 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F59 11.9552 Tf 150.705 706.129 Td [(5.2)-1000(psb)]TJ +ET +q +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 201.825 706.129 Td [(ovrl)-250(\227)-250(Overlap)-250(Update)]TJ/F62 9.9626 Tf -51.12 -18.964 Td [(These)-250(subr)18(outines)-250(applies)-250(an)-250(overlap)-250(operator)-250(to)-250(the)-250(input)-250(vector:)]TJ/F60 9.9626 Tf 154.518 -23.824 Td [(x)]TJ/F91 10.3811 Tf 8.097 0 Td [(\040)]TJ/F60 9.9626 Tf 13.497 0 Td [(Q)-42(x)]TJ/F62 9.9626 Tf -176.112 -21.014 Td [(wher)18(e:)]TJ +0 g 0 G +/F60 9.9626 Tf 0.294 -19.203 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 10.186 0 Td [(is)-250(the)-250(global)-250(dense)-250(submatrix)]TJ/F60 9.9626 Tf 131.351 0 Td [(x)]TJ +0 g 0 G + -141.607 -19.564 Td [(Q)]TJ +0 g 0 G +/F62 9.9626 Tf 12.857 0 Td [(is)-250(the)-250(overlap)-250(operator;)-250(it)-250(is)-250(the)-250(composition)-250(of)-250(two)-250(operators)]TJ/F60 9.9626 Tf 271.931 0 Td [(P)]TJ/F60 7.5716 Tf 5.424 -1.494 Td [(a)]TJ/F62 9.9626 Tf 6.445 1.494 Td [(and)]TJ/F60 9.9626 Tf 19.681 0 Td [(P)]TJ/F60 7.5716 Tf 6.405 3.616 Td [(T)]TJ/F62 9.9626 Tf 5.401 -3.616 Td [(.)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +ET +q +1 0 0 1 230.392 581.71 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +Q +BT +/F60 9.9626 Tf 236.663 573.142 Td [(x)]TJ/F59 9.9626 Tf 120.622 0 Td [(Subroutine)]TJ +ET +q +1 0 0 1 230.392 569.356 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +Q +BT +/F62 9.9626 Tf 236.369 560.788 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +ET +q +1 0 0 1 373.603 560.988 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 376.592 560.788 Td [(ovrl)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +ET +q +1 0 0 1 373.603 549.032 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 376.592 548.833 Td [(ovrl)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +ET +q +1 0 0 1 373.603 537.077 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 376.592 536.878 Td [(ovrl)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +ET +q +1 0 0 1 373.603 525.122 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 376.592 524.923 Td [(ovrl)]TJ +ET +q +1 0 0 1 230.392 521.137 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 278.277 492.758 Td [(T)92(able)-250(18:)-310(Data)-250(types)]TJ +0 g 0 G +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -127.572 -23.549 Td [(call)]TJ +0 g 0 G + [-525(psb_ovrl\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ +0 g 0 G + [-525(psb_ovrl\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(update)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(update_type,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(work)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(work\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.014 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.564 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.564 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 89.687 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -79.948 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.742 0 Td [(psb)]TJ +ET +q +1 0 0 1 436.673 349.49 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 439.811 349.291 Td [(T)]TJ +ET +q +1 0 0 1 445.669 349.49 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 448.807 349.291 Td [(vect)]TJ +ET +q +1 0 0 1 470.356 349.49 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 473.495 349.291 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf -297.884 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(18)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.564 Td [(desc)]TJ +ET +q +1 0 0 1 171.218 317.971 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 174.207 317.772 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 360.068 270.151 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 363.206 269.951 Td [(desc)]TJ +ET +q +1 0 0 1 384.755 270.151 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 387.893 269.951 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -258.11 -19.564 Td [(update)]TJ +0 g 0 G +/F62 9.9626 Tf 36.523 0 Td [(Update)-250(operator)74(.)]TJ +0 g 0 G +/F59 9.9626 Tf -11.617 -31.519 Td [(update)-250(=)-250(psb)]TJ +ET +q +1 0 0 1 235.367 219.067 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 238.356 218.868 Td [(none)]TJ +ET +q +1 0 0 1 261.648 219.067 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 269.619 218.868 Td [(Do)-250(nothing;)]TJ +0 g 0 G +/F59 9.9626 Tf -94.008 -15.579 Td [(update)-250(=)-250(psb)]TJ +ET +q +1 0 0 1 235.367 203.488 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 238.356 203.289 Td [(add)]TJ +ET +q +1 0 0 1 256.109 203.488 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 264.079 203.289 Td [(Sum)-250(overlap)-250(entries,)-250(i.e.)-310(apply)]TJ/F60 9.9626 Tf 137.239 0 Td [(P)]TJ/F60 7.5716 Tf 6.405 3.617 Td [(T)]TJ/F62 9.9626 Tf 5.4 -3.617 Td [(;)]TJ +0 g 0 G +/F59 9.9626 Tf -237.512 -15.579 Td [(update)-250(=)-250(psb)]TJ +ET +q +1 0 0 1 235.367 187.91 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 238.356 187.71 Td [(avg)]TJ +ET +q +1 0 0 1 255.013 187.91 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 262.983 187.71 Td [(A)92(verage)-250(overlap)-250(entries,)-250(i.e.)-310(apply)]TJ/F60 9.9626 Tf 153.667 0 Td [(P)]TJ/F60 7.5716 Tf 5.424 -1.494 Td [(a)]TJ/F60 9.9626 Tf 4.278 1.494 Td [(P)]TJ/F60 7.5716 Tf 6.405 3.617 Td [(T)]TJ/F62 9.9626 Tf 5.401 -3.617 Td [(;)]TJ -262.547 -19.564 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F60 9.9626 Tf 38.64 0 Td [(u)-80(p)-25(d)-40(a)-25(t)-25(e)]TJ +ET +q +1 0 0 1 244.034 144.435 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F60 9.9626 Tf 247.147 144.236 Td [(t)-25(y)-80(p)-25(e)]TJ/F93 10.3811 Tf 21.467 0 Td [(=)]TJ/F60 9.9626 Tf 11.634 0 Td [(p)-25(s)-25(b)]TJ +ET +q +1 0 0 1 294.938 144.435 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F60 9.9626 Tf 298.201 144.236 Td [(a)-25(v)-47(g)]TJ +ET +q +1 0 0 1 314.026 144.435 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 175.611 132.281 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(integer)-250(variable.)]TJ +0 g 0 G + 141.968 -29.888 Td [(62)]TJ +0 g 0 G +ET + +endstream +endobj +1397 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/Length 5447 >> -% 1110 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 99.895 706.129 Td [(work)]TJ +0 g 0 G +/F62 9.9626 Tf 28.782 0 Td [(the)-250(work)-250(array)111(.)]TJ -3.875 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(one)-250(dimensional)-250(array)-250(of)-250(the)-250(same)-250(type)-250(of)]TJ/F60 9.9626 Tf 252.794 0 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -282.907 -19.925 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(global)-250(dense)-250(r)18(esult)-250(matrix)]TJ/F60 9.9626 Tf 117.084 0 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(.)]TJ -107.346 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-379(as:)-568(an)-379(array)-379(of)-379(rank)-379(one)-379(or)-379(two)-379(containing)-379(numbers)-379(of)-379(type)]TJ 0 -11.955 Td [(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(18)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.926 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(If)-241(ther)18(e)-240(is)-241(no)-241(overlap)-240(in)-241(the)-241(data)-240(distribution)-241(associated)-241(with)-240(the)-241(descrip-)]TJ 12.453 -11.955 Td [(tor)74(,)-250(no)-250(operations)-250(ar)18(e)-250(performed;)]TJ +0 g 0 G + -12.453 -19.926 Td [(2.)]TJ +0 g 0 G + [-500(The)-284(operator)]TJ/F60 9.9626 Tf 72.855 0 Td [(P)]TJ/F60 7.5716 Tf 6.405 3.617 Td [(T)]TJ/F62 9.9626 Tf 8.232 -3.617 Td [(performs)-284(the)-284(r)18(eduction)-285(sum)-284(of)-284(overlap)-284(elements;)-302(it)-284(is)-284(a)]TJ -75.039 -11.955 Td [(\223pr)18(olongation\224)-265(operator)]TJ/F60 9.9626 Tf 110.535 0 Td [(P)]TJ/F60 7.5716 Tf 6.405 3.616 Td [(T)]TJ/F62 9.9626 Tf 8.044 -3.616 Td [(that)-265(r)18(eplicates)-266(ov)1(erlap)-266(elements,)-269(accounting)]TJ -124.984 -11.955 Td [(for)-250(the)-250(physical)-250(r)18(eplication)-250(of)-250(data;)]TJ +0 g 0 G + -12.453 -19.925 Td [(3.)]TJ +0 g 0 G + [-500(The)-190(operator)]TJ/F60 9.9626 Tf 70.978 0 Td [(P)]TJ/F60 7.5716 Tf 5.423 -1.495 Td [(a)]TJ/F62 9.9626 Tf 5.848 1.495 Td [(performs)-190(a)-190(scaling)-190(on)-190(the)-190(overlap)-190(elements)-190(by)-190(the)-190(amount)]TJ -69.796 -11.956 Td [(of)-325(r)18(eplication;)-363(thus,)-343(when)-325(combined)-325(with)-325(the)-325(r)18(eduction)-325(operator)74(,)-344(it)-325(im-)]TJ 0 -11.955 Td [(plements)-250(the)-250(average)-250(of)-250(r)18(eplicated)-250(elements)-250(over)-250(all)-250(of)-250(their)-250(instances.)]TJ/F59 11.9552 Tf -24.907 -19.925 Td [(Example)-320(of)-320(use)]TJ/F62 9.9626 Tf 87.879 0 Td [(Consider)-320(the)-320(discr)18(etization)-320(mesh)-320(depicted)-320(in)-320(\002g.)]TJ +0 0 1 rg 0 0 1 RG + [-320(4)]TJ +0 g 0 G + [(,)-337(parti-)]TJ -87.879 -11.955 Td [(tioned)-262(among)-262(two)-263(pr)18(ocesse)1(s)-263(as)-262(shown)-262(by)-262(the)-262(dashed)-263(li)1(nes,)-266(with)-262(an)-262(overlap)-262(of)]TJ 0 -11.955 Td [(1)-261(extr)1(a)-261(layer)-260(with)-261(r)18(espect)-260(to)-261(the)-260(partition)-261(of)-260(\002g.)]TJ +0 0 1 rg 0 0 1 RG + [-261(3)]TJ +0 g 0 G + [(;)-265(the)-261(data)-260(distribution)-261(is)-260(such)]TJ 0 -11.956 Td [(that)-267(each)-268(pr)18(ocess)-267(will)-267(own)-267(40)-268(entries)-267(in)-267(the)-267(index)-268(space,)-271(with)-267(an)-268(overlap)-267(of)-267(16)]TJ 0 -11.955 Td [(entries)-249(placed)-248(at)-249(local)-249(i)1(ndices)-249(25)-249(thr)18(ough)-248(40;)-249(the)-249(halo)-249(will)-248(r)8(un)-249(fr)18(om)-249(local)-248(index)]TJ 0 -11.955 Td [(41)-236(thr)18(ough)-237(local)-236(index)-237(48..)-305(If)-236(pr)18(ocess)-237(0)-236(assigns)-237(an)-236(initial)-236(value)-237(of)-236(1)-236(to)-237(its)-236(entries)]TJ 0 -11.955 Td [(in)-259(the)]TJ/F60 9.9626 Tf 27.963 0 Td [(x)]TJ/F62 9.9626 Tf 7.782 0 Td [(vector)74(,)-261(and)-258(pr)18(ocess)-259(1)-259(assigns)-258(a)-259(value)-259(of)-258(2,)-261(then)-259(after)-258(a)-259(call)-259(to)]TJ/F67 9.9626 Tf 266.124 0 Td [(psb_ovrl)]TJ/F62 9.9626 Tf -301.869 -11.955 Td [(with)]TJ/F67 9.9626 Tf 22.816 0 Td [(psb_avg_)]TJ/F62 9.9626 Tf 44.404 0 Td [(and)-257(a)-257(call)-257(to)]TJ/F67 9.9626 Tf 55.983 0 Td [(psb_halo_)]TJ/F62 9.9626 Tf 49.635 0 Td [(the)-257(contents)-257(of)-257(the)-257(local)-257(vectors)-257(will)-258(b)1(e)]TJ -172.838 -11.955 Td [(the)-250(following)-250(\050showing)-250(a)-250(transition)-250(among)-250(the)-250(two)-250(subdomains\051)]TJ +0 g 0 G + 166.875 -143.462 Td [(63)]TJ +0 g 0 G +ET + +endstream +endobj +1405 0 obj << -/Type /Page -/Contents 1111 0 R -/Resources 1109 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1079 0 R +/Length 3551 >> -% 1112 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F62 7.9701 Tf 265.805 653.177 Td [(Pr)18(ocess)-250(0)-8396(Pr)18(ocess)-250(1)]TJ -31.163 -9.464 Td [(I)-1500(GLOB\050I\051)-1500(X\050I\051)-5163(I)-1500(GLOB\050I\051)-1500(X\050I\051)]TJ -1.299 -9.465 Td [(1)-4774(1)-1920(1.0)-5000(1)-4274(33)-1920(1.5)]TJ 0 -9.464 Td [(2)-4774(2)-1920(1.0)-5000(2)-4274(34)-1920(1.5)]TJ 0 -9.465 Td [(3)-4774(3)-1920(1.0)-5000(3)-4274(35)-1920(1.5)]TJ 0 -9.464 Td [(4)-4774(4)-1920(1.0)-5000(4)-4274(36)-1920(1.5)]TJ 0 -9.465 Td [(5)-4774(5)-1920(1.0)-5000(5)-4274(37)-1920(1.5)]TJ 0 -9.464 Td [(6)-4774(6)-1920(1.0)-5000(6)-4274(38)-1920(1.5)]TJ 0 -9.465 Td [(7)-4774(7)-1920(1.0)-5000(7)-4274(39)-1920(1.5)]TJ 0 -9.464 Td [(8)-4774(8)-1920(1.0)-5000(8)-4274(40)-1920(1.5)]TJ 0 -9.465 Td [(9)-4774(9)-1920(1.0)-5000(9)-4274(41)-1920(2.0)]TJ -3.985 -9.464 Td [(10)-4274(10)-1920(1.0)-4500(10)-4274(42)-1920(2.0)]TJ 0 -9.465 Td [(11)-4274(11)-1920(1.0)-4500(11)-4274(43)-1920(2.0)]TJ 0 -9.464 Td [(12)-4274(12)-1920(1.0)-4500(12)-4274(44)-1920(2.0)]TJ 0 -9.465 Td [(13)-4274(13)-1920(1.0)-4500(13)-4274(45)-1920(2.0)]TJ 0 -9.464 Td [(14)-4274(14)-1920(1.0)-4500(14)-4274(46)-1920(2.0)]TJ 0 -9.465 Td [(15)-4274(15)-1920(1.0)-4500(15)-4274(47)-1920(2.0)]TJ 0 -9.464 Td [(16)-4274(16)-1920(1.0)-4500(16)-4274(48)-1920(2.0)]TJ 0 -9.465 Td [(17)-4274(17)-1920(1.0)-4500(17)-4274(49)-1920(2.0)]TJ 0 -9.464 Td [(18)-4274(18)-1920(1.0)-4500(18)-4274(50)-1920(2.0)]TJ 0 -9.465 Td [(19)-4274(19)-1920(1.0)-4500(19)-4274(51)-1920(2.0)]TJ 0 -9.464 Td [(20)-4274(20)-1920(1.0)-4500(20)-4274(52)-1920(2.0)]TJ 0 -9.465 Td [(21)-4274(21)-1920(1.0)-4500(21)-4274(53)-1920(2.0)]TJ 0 -9.464 Td [(22)-4274(22)-1920(1.0)-4500(22)-4274(54)-1920(2.0)]TJ 0 -9.465 Td [(23)-4274(23)-1920(1.0)-4500(23)-4274(55)-1920(2.0)]TJ 0 -9.464 Td [(24)-4274(24)-1920(1.0)-4500(24)-4274(56)-1920(2.0)]TJ 0 -9.465 Td [(25)-4274(25)-1920(1.5)-4500(25)-4274(57)-1920(2.0)]TJ 0 -9.464 Td [(26)-4274(26)-1920(1.5)-4500(26)-4274(58)-1920(2.0)]TJ 0 -9.465 Td [(27)-4274(27)-1920(1.5)-4500(27)-4274(59)-1920(2.0)]TJ 0 -9.464 Td [(28)-4274(28)-1920(1.5)-4500(28)-4274(60)-1920(2.0)]TJ 0 -9.465 Td [(29)-4274(29)-1920(1.5)-4500(29)-4274(61)-1920(2.0)]TJ 0 -9.464 Td [(30)-4274(30)-1920(1.5)-4500(30)-4274(62)-1920(2.0)]TJ 0 -9.465 Td [(31)-4274(31)-1920(1.5)-4500(31)-4274(63)-1920(2.0)]TJ 0 -9.464 Td [(32)-4274(32)-1920(1.5)-4500(32)-4274(64)-1920(2.0)]TJ 0 -9.465 Td [(33)-4274(33)-1920(1.5)-4500(33)-4274(25)-1920(1.5)]TJ 0 -9.464 Td [(34)-4274(34)-1920(1.5)-4500(34)-4274(26)-1920(1.5)]TJ 0 -9.465 Td [(35)-4274(35)-1920(1.5)-4500(35)-4274(27)-1920(1.5)]TJ 0 -9.464 Td [(36)-4274(36)-1920(1.5)-4500(36)-4274(28)-1920(1.5)]TJ 0 -9.465 Td [(37)-4274(37)-1920(1.5)-4500(37)-4274(29)-1920(1.5)]TJ 0 -9.464 Td [(38)-4274(38)-1920(1.5)-4500(38)-4274(30)-1920(1.5)]TJ 0 -9.465 Td [(39)-4274(39)-1920(1.5)-4500(39)-4274(31)-1920(1.5)]TJ 0 -9.464 Td [(40)-4274(40)-1920(1.5)-4500(40)-4274(32)-1920(1.5)]TJ 0 -9.465 Td [(41)-4274(41)-1920(2.0)-4500(41)-4274(17)-1920(1.0)]TJ 0 -9.464 Td [(42)-4274(42)-1920(2.0)-4500(42)-4274(18)-1920(1.0)]TJ 0 -9.465 Td [(43)-4274(43)-1920(2.0)-4500(43)-4274(19)-1920(1.0)]TJ 0 -9.464 Td [(44)-4274(44)-1920(2.0)-4500(44)-4274(20)-1920(1.0)]TJ 0 -9.465 Td [(45)-4274(45)-1920(2.0)-4500(45)-4274(21)-1920(1.0)]TJ 0 -9.464 Td [(46)-4274(46)-1920(2.0)-4500(46)-4274(22)-1920(1.0)]TJ 0 -9.465 Td [(47)-4274(47)-1920(2.0)-4500(47)-4274(23)-1920(1.0)]TJ 0 -9.464 Td [(48)-4274(48)-1920(2.0)-4500(48)-4274(24)-1920(1.0)]TJ +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 88.221 -98.979 Td [(64)]TJ +0 g 0 G +ET + +endstream +endobj +1409 0 obj << -/D [1110 0 R /XYZ 149.705 753.953 null] +/Length 321 >> -% 1109 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +0 g 0 G +0 g 0 G +1 0 0 1 104.053 292.88 cm +q +.65 0 0 .65 0 0 cm +q +1 0 0 1 0 0 cm +/Im5 Do +Q +Q +0 g 0 G +1 0 0 1 -104.053 -292.88 cm +BT +/F62 9.9626 Tf 189.276 261 Td [(Figur)18(e)-250(4:)-310(Sample)-250(discr)18(etization)-250(mesh.)]TJ +0 g 0 G +0 g 0 G +0 g 0 G + 77.494 -170.562 Td [(65)]TJ +0 g 0 G +ET + +endstream +endobj +1392 0 obj << -/Font << /F51 584 0 R /F54 586 0 R >> +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (./figures/try8x8_ov.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 1411 0 R +/BBox [0 0 516 439] +/Resources << /ProcSet [ /PDF /Text ] +/ExtGState << +/R7 1412 0 R +>>/Font << /R8 1413 0 R/R10 1414 0 R>> >> -% 1117 0 obj -<< -/Type /Page -/Contents 1118 0 R -/Resources 1116 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1121 0 R -/Annots [ 1113 0 R 1114 0 R 1115 0 R ] ->> -% 1113 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 314.69 444.603 326.749] -/A << /S /GoTo /D (vdata) >> ->> -% 1114 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [326.652 302.734 333.626 314.794] -/A << /S /GoTo /D (table.8) >> ->> -% 1115 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 232.693 323.106 244.753] -/A << /S /GoTo /D (descdata) >> +/Length 3413 +/Filter /FlateDecode >> -% 1119 0 obj +stream +xœ…›Aä¸ …ïõ+ê8s˜^Ë’-é SvÒ·Ážj‘éÃ&‡üýØä{|ÔI°‡¦(ÚõmÑ|%ZšßŸÛ[yn÷øûúxüôµ?û×£½Ï?lîùõO³ögïíùaV+íùÇ_Í.µÐ=äÝ¥Mù÷:éß·šü³†ÿèòןZKúضÅÇÖŸ[J'¥™Ä´8ÍžùºMTÕmÂúÈiÝ&®ÀëvÉ ö™@î%o3ï‘o{æ ˆlv ß#"›È6²Ù|ˆlvÉD¶"ï×Õ@6“È6²Ù3_d·‰l# »Md9²ÛD¶Ý.È>È}òmò= òmÏ|‘Íä{Dd³ÙF@6;ï‘Í.€È6CäZ£âÌ$² €löÌÙm"ÛÈnÙGŽì6‘md·K²Ïòˆò33‡ÊÏì™/ òÈåg#"\~>òÈåg#"\~N³ [‹ò3“È6²Ù3_d·‰l# »Md9²ÛD¶Ý.È>ßòãÐœqhÌtÌÔ3͇ÆL9óÌ.‡æL€CÚ ½·Ðf4f :f š37´fj‹ƒŽƒæL9óÌ.ƒŽ™.ƒÚ ]f h³33Í™Z3µÅŒAÇŒAs¦œyfAÇL€A Í › +ÑlA7bÌ8tS!j¦¶˜qè– ‘3åÌ3»º¥BäÄ– z¨G.á*¹‡jpäªÀ‘ p¨þF.¿¡ê©ø†jo,¥—*o)¼TwKÙ¥ª[Š.ÕÜRr©â–‚Kõ–Ë-U[rCïÌÜŽöVŸç8ßúV]¾c­úz£¿íü½?ZÄ”à+ÄÌ£¾õgD_œßå6^¸û=t‹–×ãûªjn–Œf¿9±¼š ñ…ÖÑêY2šý&Å +\h6´Oõø`Ih®]¦³g4û¡ŒŽ@h6¼!PN¶õ­à»7#¡ÝCèæwù¾*”›uA«H… íšÇMGÛšÑì×?ú¡ÙÐ>ÕãƒEhÐ!3ÇÌh¶$‰¾Kh6¼!P­Ì¹ müî·º ÝC˜DÃ]¾¯jcf]ÊÀ–QÑ)&´Š2@üAZ[ÊÀÖYÑM +͆e€»|_ôŬ롾oÆ‹;2Ñ—"è¨F:çš÷#}]Ëè‰/~.%0QŒ&Gª@_©v&a}þùøwQU=þ}¥j”ž²jþ.ªÿ²®3Ûõ©ìûu¹Ë–:Ù×ÕÉîý 2çË=ÄÇÒïÅe ¥†ñ·p¡?·TòþæwžÄÇ•šÛ%ñ™v©>÷ûÊ®ôÈm—)Æßê¥.=øÜoŸ‹øàÉ|X”Ù"È• +×›€éíAð¹ßÖCˆù\«ÈW˜hùÌï ]|Ûš_®¿Ü®™¯2/7ò™ß×k5ø Ln»`1þ` p¼¿ùís<‰K-³]³p½I™ÞØŸûm•ƒøX¦½¸d›™o‹<¸Ê‘ÏüÎ0ƒ/ÉܲÜ2»æú0=Ó«&ñUÖãcEF¾–ëÃMo£‚ÏýΠúHZ—×XfB¿p}gZz®ŽÎâˆh®ÃGCüdf®ÉÒˆh²äÒH•Ñ´=2²FÔEOh5ÕE_ÐZèSYd…UÑZR½Ôçº ë§T/^†]·=O©Þ©UŸ:á»bÏâMÝüŸg÷7¿3x|ð$>ªžÛ%ñ¹ºÅk¼àsÿ©¥Ÿš^ðQÅoê/ú‚Ïýö¹ˆžÌÕ³¦*æ×»ºÅ Èàsÿ©õŸú[òAÅÀW˜ªøÌï ]|Ûš_ªžÛ5óUæ…ª>óŸZª•U ñó@ÕÃýÍoŸ‹øàI|T=³¡b~½«[¼ô >÷ŸZ ªk}±ƒ™o‹<@õÀg~g˜Á—U/µŸn×\®nñ¶Z|•õÁøS+3ع>\Ýâ…vð¹ßTYõÔqºIóë;ÓÒsutGDŸ±@33tÌã'“0smL–FD“%—FªŒ¾ í‘‘¥0¢.zB«©.ú‚ÖBŸÊ"+¬ŠžÐ¤zË{¿ª^t¯[¨žÞ§¿û¾…êùÛ@Ä­Ñ`[–«v ,«¼¿ù«º]ñ$>¾È«jHq½©›v‚ÏýEkC½TŒñ·ºi¯ øÜ_ÕòŠ'óá]UWŠëMÝ´‡|î/Zê}ù\ÅÈW˜¨ùÌ_Õ÷Š'ññõ\UkJ¾Ê¼@õÈgþ¢µ¡^í*Æøƒy€êñþæ¯j~Å“øø&®ª?Åõ¦nÚ7 +>÷­ õïÅ7z3óm‘W=ò™¿ªOækª¨ù*óÕ#_e}0¾h;ׇ©›öÄ‚ÏýUm°xßPyPÇp}gZz®ŽÎâˆè«43©cˆŸLÂ̵1Y][V½üέ¦†5½xò–Ï]Oh5ÕE_ÐZèSYd…UÑZR½´â6Tl4©^lÉ]·íMª×µ6ÔNÉ‹»&ž%Ä›ºõ)ÕÃýÍ?Ô‹'ñQõ†:V\ïꛉÁ¿Ö†ÚU ñ¦n±Ý|îêˆÅ“ù zC+®wu‹mÐàƒ_kCí*¾Â\Ýb[=øÜ?Ô‹'ñ •Gè˜_ß™–ž«£³8"ºÇ*ÍÌÐ1ŸLÂ̵1Y=Æ¢zÚð¬÷mŒ,…uÑZMuÑ´úTYaUô„&Õ[ö…›:ÖZG¨žvõ_ºP=ß-Fü®5lËãouÃYË*ïoþ¦ŽX<‰½M+®7uÓy„àsÿ®µ¡6‰Ácü­n:±|îoêˆÅ“ù°§ÛÔ±âzS7¤>÷ïZj?˜|®bä+ÌT|æoêˆÅ“ø¸}ÛÔ±’¯2/P=ò™×ÚP[¿àƒŠ1þ` z¼¿ù›:bñ$>îÔ6u¬¸ÞÔM§W‚Ïý»Ö†Úå}qÇwf¾-òàªG>ó7uÄâÉ|Mõ#_e^ z䫬ÆïZ£ÁÎõaꦓ9Áçþ¦ŽX<‰o¨<¨c¸¾3-=WGgqDô«43©cˆŸLÂ̵1YÝö¬zy?¶¥ž5½xò–Ï]Oh5ÕE_ÐZèSYd…UÑZR½tNÆm¨ØÔn†]·ÚÍðÓ3ˆïZ£Áö, ífณguj7 ?ÏUõÒ‘Ø%ñ¹º ífàóÜßµ6Ô¡ðQņv3tè)øÜ?Õ‹'óAõ¦:V\ïê6´›Ïs×ÚPçcÈÚÍÐq-ñ™ª#Oâ£êMu¬ä«Ì Uoh7Ç_jðQõºÖnŒ?˜ªÞÔn†.â繪^:Óâ6Tlj7Cà‚Ïý]kCzyñÌÌ|[äª7µ›†|YõÒñ·k®W·¡Ý òUÖã»Öh°s}¸º ífðþõÁøy®ª§“*nRÇfìfÄàë,Žˆî±J33tlÄnFI ¶ÉÒˆèy.ª§Ó)žõ¾ í‘‘¥0¢.zB«©.ú‚ÖBŸÊ"+¬ŠžÐ¤zíÐn†Û®b­i7Cg _Vµ›á§ _µFƒmYbü­n81iYåýͨ#OâãAÀC+®7uÓ©ÈàsÕÚP‡Ácü­n:7|î?Ô‹'óáÌß¡Ž×›ºé®?Œ÷Æå‡ñ¶ŽùÃxü0î÷øúz~ÙëDõ¹žDí‡E]Ó×…aXŽí!Í>øïŸ×düoõ–‘¿Ö«\¯ç­ÜýÌ•çë»ýô—w-ÿ/Iÿõv×ï!o'ÈŸÿ`[G. +endstream +endobj +1417 0 obj << -/D [1117 0 R /XYZ 98.895 753.953 null] +/Filter /FlateDecode +/Subtype /Type1C +/Length 13073 >> - +stream +xœºwxWö?laÏŒ˜ r‘G¶5h†ôB'ZBïL·1`pø˶$K–eK²%«Yr•-˽w Lï%„ ”$$$¤m²›Æî{½ûýÈ–ßû<ïûýãõ<~4£¹ºsçÜs>çs +ÏÇo”dz%!)&cÒ攤}ÉÞ뉚ç7Ê#ñ-VyRŸ/Å<ÌŸâ1¾ÅcüNŽc~G¿8ŽfüÇÑA>~<Þ¬5›íonݼý­ &.II•§'ÄÅKÇOŸ:mÆøhùøÝ¿4&#!.yüëÜIVÌ¡”Ô¤˜déú„¤èÌŒñ/ž<~sL\æ¡}éÿúøïDÿÿ¦æùsïæ³Ý§•À‹åý<*Åw¬ïjßM¾}~´_¤_¬_;ÆÃæbë±d,ëÆð`|<þ +¾—ãýø)üþ þÿ'‘G˜;?€Ì·ò«ø®ÑüÑ£•£¿'7ÇÉÈûà=°,Ù ¨­c|Æ?æ1³ÆèÆÜ;nìkcç]4vÍØð±ÛÆÆ-ûpì+ˆ¤ +ä‚|ApQð‘àgÁßÿôçû ü—û¯ößà¿Í¿Ø¿Ä¿É¿Ó¿×ÿI€0`I@x€2   0 $àTÀÕ€Û¾ ôd_ |#p]`Z`f 5°.ðbàõÀ? šô~Ðò ¸ Â ª Ú æ Ž ž þ ÓA烾 ú6è7¡¿P('|S8[ø¾p±p¥p¿0Gh– +Â&a‡°GxEø¡ð¡ð ! ö ‡³Á¯O^¼*8|=øqð“à?ÿüwʇR¯Ss¨ÔJjµ‹ÚO¤”TUBÕPõT'u’ºI=¦þLý&%"Eþ¢0ÑK¢·DSE³DóEËD«EE[D¢ƒ"¹¨HdÙEu¢ÑIÑ%Ñ ÑmÑ}ÑW¢ŸE#!£CD!lÈÄi!³C„, Y²1$2$>D’b ) q†4„´†t‡9r!äÇ!C~ù-…ú…Ž„ +Sðˆ—¢ÒÑ+F~*þŠÅhÃb8™øÂP©®œÃo€J)®ž«RÏ2ðÑd˜Ž·›¬ÖVÒ8äܪêéü&¼¾‚´Êð»úŠ\óþÈÆåàY“R)Ôz%-ÕV ±p;qFߦldz3÷Ön¤'-ÚµVZ›ÝØXç®·mF;k(5:ìâ–ÎúÃ'[’·2ë 4i½<o ?3[•pP à?žÏ¦P8‡^AQ“:§œÛÌ®½ôôÀ×4Ü ÇAFý%ùÉ–ËìGkgõN ãüŽ—'ö²çvuLB@¼Q½8©êÍt{Ð`‚;èã'Põ‹p¢'œÛÙ,\8.³0O­ 5 +«CÁ +»ªbcé}{ÉÙøCò}ÛÅó†Â¡ßFæÎ3û›bø¥¹¶¬tñª»O[w +v2R¼Åcõ¸pm­©¢ÊI—×jk¹‰”ýƒ4äýè÷Ï·Ü@¡wØ—/î:/>¤ïÚ±#²¸¦#±*­z#§ÈZ<{)2mbиï߀A0èÇgœv…Nù³¹)Ô—' £V.<Ûx8‹M9“sûcñŸŽ_»Ã€%dMù$ p‰`¼ßiÉ©ìÃnâØp}E}²ñÐ*ņÕêœ]E|%<à$vÙò*Œ'ùPNü°ç¢݇kV0­y»,!o»2LAXìp¦¤µT¹‹E&¸1/wg@žïó7j©KvYr7Š†%¡PØtîÔw⣹}imÌ`̬æÉ4j“á÷ +ÊòM‹É|ÍB_Ûk‰ù6u…ñ.î$.˜a>ºŠÁ™øH„ç ÕZî8 /í´3k_œŒÙ¯›`àƒB(Áòð½¾HGë4f[ëJmô{›·/ ïÜÿe{é Ö“]“qH¼'1eÇòøþ_¥LšZ‰—Ù­æ +èår½ÞÄ€¸¶ÊÑif¬eŦr¶uɈaÎh~F/Á´%-&ŒïäçO-äÖy¹–˜nÎ-7>àÃm|f]ýº¹þ´5 tyÆSŽÊs9ÝáP¬aÑ:bòðÖÏÏn>y„9r²öÚc1$bžìLÏÎN—•åÔg3-%%¿"£äÐAñÛ›–ÎKÉ(¯U3y®ÂÆa1Œ!Ú9³®b<ᵈžÿ|yzˆa¸KÇ#_Þ›†¨BþJ|¾¹Àbìå{’óÝÊʇV> p$xý KÀaš‡]ÃhÊc*IŸ—¥fTé©Yt|J÷QÖJ˜® õÃу|p¯z (E ÑgªÎ1ǣݨ.$¾v8Ošä$:@§èó³I˜U“ƒÇ*+ûŠX¸º0W"Ñ’8Ó% ß×£{žM½òNÜÂùì’eû'¼!Füþé0K?yÿ ýëÓ¾»wÙO>éÿî™øYì·‹î2÷Në{‰F?ŽQp œÈ[`8œˆ&¢-h3š„&£ o½ßys³òøÇ߈A¶d“Äl3›îJÀR@yëÈ{ØHðwäEÒ…äĽjU!wßš[a¼Ç÷8ÎœG¨SÔ#¿×ÁùØÈU\‡æcxäÕÕ@ÂÌ?7R„gækféù2àù.¥’Æ'¤"ŸB>*Ã=K=Åœ# D6Z€5ã5¿ô¶A‡iøȪ‘¿bJÏU¢.ÅÄ%ÊZ5 '¿ Š7Kµ 9#k½z·¤iÎ’?ý×JVP@ÂQ*éÎldžΟ:¶…5%P=íIqqÉÉ1ñÉm}½ím½,ðµ‹`®Ï6í³¨Ë}ap-Ç£ V½Eo) +sh,:½8?_§fdJ˜ìÄ÷XÕî;&Ám¡m-0úµTß0„9Ñ^1`,/ÌåÆæªr"˜g6Qh> NÎ߬ K*ÀrSe©©Œî.Í`G²㶢¢¥y|ð¤ì?¯S-vçVpýûv×3†Õ 5ŠRÂU™Ÿ›_˜¯Éc9 ~ K{Š›¾µ•ž²ð]5([áÂOÙòÍSùÓS _ÂòË eâªrG-Sê?fêVôï…£( +Cs±]’e tFÔÈñ<­Ía·Øí¥, €Ó0Á{¸ø©ËyÎʯEÛdD±\—#Ž]„F½ŠøLŒÂoW:/3.$“—+•ËX•E¼ +ù‹à¨>qM¹­Çë3¸ß+¬U^˜W“qzE…²"‡;”Õ™¡h5Ê[>#!-Â&‡%."ÁœWâ;í'®3P7`°$Wµ„@Í"A éYå¦à›«Œ—ÚhDM}Û]±».†­N­Tv§.FãCûð ?–ÓӠЋÙê“}lNcnÅ¡z~âªmkyÔ1½gÅÂiE{:“ÕI¥^R_üHüÉkwN¸yó&P¯áPˆrX_•ï +;žÓ¸‹FÄd4½…‚ïLûí“‹=wŽ°%1Žl§”¯(/Ï·Ó›Ùbg¿Äz ­¢À ¡÷'÷}¢`g¸(Ä ¦wÄ +GCvð‹Û” n¶JêH¯{Ÿ/°pÈ#'nÛ2¿#F‚BgEç*Òiu¾µJÆVeçYtb¦<5®Szúèáê¶6¶¾¾´²ó\í¡B>éï¼næ»P†œ8Y©•j‹‹óÓÙ‚L$B½ˆ ˜ª9¿¥R\ç*ëg*]8(îó|ÙÇûýWÎyjìãܳ® ×Ø.÷=ýIüMòÃðóÌG+¯IhÔ Ãif‰q±Z½Pχ›=7¨Ž2Ç‘¯nä$ 3·ÎD>bDFÏšÉ$ïؤžAïW—v²àeÌÕÒñùåÇYÏ BoŽÌIÈ[¥ãôßa+.¥ûJ5Ñ,šJtƤ–ï!Y ½Œ‚ïÍ€¾úÝ'ºÙ5„µ¾qC®PZ­Z“Çd+Ò4Éô¬­ŸrêwòÞýsÑÛXÐË"ŸXJ²±E<ØÑrübûÁi ŸŠØA[5š™ãÀëœj#[ W¨™ÍsößÀrk5eâ²J{ c¯F‘Üc…&㧈:¶8tÅK[f¨Õ1ú0™Ëskòˆì¼’ +‡Ùæîd?„¼[hV!/Qæ‹só´™L$n•÷óQn?§à\ü’ÑfXÆwA7-ÎJ 7ñÑ\h†“ðßÌéåÓkL¡ø66b”ãÀEfå¼M‚zYŸÝn2•0à0i+.6ß•Õ¼Ò2¥`íО«Cƒug®2 Ø wzh7ÏC<ŸF©q”7rdÿ¤<Í*gIiµj"NZÕ`·7–5±GžaP÷¿U´æ(ŲÜ8&G;\¹xTZÛ×qì0ÔÏ0ÿo(wÉvÎÒ;dÄ]™Æ´ˆþã€v™ +œ ‚‰€„ösÙdÜ´£$ÃÂÔêË:«Þ¦·…™óšR½5,¹ùP»Ñ¯Sd»t´žûËmÚßÇv¨Éj:X¨ Õi±ùoEoÊë,E%6³Éfe._:Ü÷ñU¾nõì¦ô–u1¹á^U±™0³ÓÉ¡eC©b/;âO,(JÏËf4:UŠVjìu +¶A®*Í¡¥rEfâ€ôüÐ᪦¶·³þÖ#1ðúó9®ŒDKJÕBZðݧr +0ñ6š{(Û¨ObáJâvµóÇÄ/Q¶–Z³›þÓ5§¯]?-+ÇRÏÑ$‰0à+¯fªkP–¢¯”ü!0LÖe¶€µš`r'$zqÝY{ÖÌpû'€ob.YÁdNmÞÐNÅä¨ÔRZ•]R*g3*0ḊŒŒ’LZ81E"×Q^íl„8YUØD_€A¸ŽZ³™žÎ!GºpH­ýø-ñæ¾BQè£7ž;Ý2Üͺ+Û\øè £n:ºûOѧÛcVYX #Éò³¸€&l©Äó澞7á= +%kÕ²}FnÄäPæ"ä&lŸY]{] “› (B°kíj¥Xž§KàF÷)Æ^k«)7fÖ§gdffhl +»Œqï°flÇ™”Ö}5|‡LfÓÐR…<-Å©l«(³”6²Îk¤gËÒ3Ü\ çv76f»¥,'®‹›/yBÁθ¸ÕJ¶ ÷"o‡»p-d%ðÊC_xÆRÓ Ç„8Â+WÀlÎMí´èKßñŸ¶Õš†9ò-Z­¸@«W3r˜ü‡·çvã8ÙÓØÐÝ™Y«Õ:Æ 7é]‰£«¥ç|? èrg¡ÖÉ6äa O¢×/Ý>•Ñµa½DÅgHÀšè̃\¤q{óO?ß¾ú»›‘Y°÷Ó#¢WÒ+wœºÕæ¨kè`UmÚº½¶¢Î-­ŒOÏÔçƱ€* ÷ªc8n·Þ8Gº$—蠟-\ŸT#oiuÖtX‹íÆÖXŽ•]憚£tcmÊÎÚsl‡¤bÃÚ™qàØÊd —,$#¬Û$î…Á’Ø­±¬@AÂK25Æ,–î*ñYYk|¼,e[®e¯9‡D?cŽ%TbZZbbsZgGssggZó!?RÝÙö6ºZ[/óÑßÏP¦Ø’Øò8¾‰“ŒQ,_›40|¥¥Žb>?G} òÞS¨æpáþk\¸oj·XZM|8îÿ;ÞXÇ áÛä2 Øÿ²`a„Ïÿ+^íÙå–­¦Õ3½xSDÂÃ.BjÂÀú=Êìýâ §£ïß?5xýXVëò“LêZêpjUö!ñA©4>:µýDWEC ö;vQI8Q”$p8AÀ©$Тä‹ðz<8`k¦…Ÿ'Àóhøµ–GâhªÅvÁ•Ónsž£K)Dã†+°!øµμ|»–.*4 +ÙW·b ]r‚$²Hà,kV:Ó‰£SS£åyÖêL¦J–gË£³•9Y‰íò³Ð÷óKàhGMC«øDdïúuRWÄ3Mrl¨Ã]ÙK÷ÔÉ’bÓ2—IYAÖ{$ðØŸ_šºYgÕš9Ÿ09R+y:bÇ•ú‹¤>³µ½¾¾­MZŸÌ +ŽŸmâ~Þ™>•‰yä‰&n¶¨Ú™el}4½+*}ã&|$™z#Â( +½‹fr1Åø.šwrÇl8 î€;¸«¹h+§¦(ÍE³Ñv´ÎD³¹Øb7à]¸FqCf¡(nH#ùÂ"ÏKn)¢ìƒáÀ¶†¹(¸ͨ5X¿èêøÒÆi{TÔÖð£{€'ŽZž Ñ/:dW…–Y1³½Ää Ûì¹›Yt÷ü‘}íõdžÅO\E~±)ºœh¦@ª)Ê£³4¥µEly¿Ây¸Â!øµ›÷ø¬{æ úȯŀø¸ºj R%ß ³ŸI5è…h2Ujdb@Í—€ç¡T½;UŸ¶ÉÌŽ¼CÌÒ¦eæiJZf(3·<ô†+ ™TÊJJe^äljm¥¦†®sV7ÝÙ2Á¦´]qlRt^¬n<%¯H†<¿æµB.Öžü憓Ȃ‰g?µ"!¯pQ÷†å k·p¶ºø=¯Ãñ°ÙeÕÁ , O)Ì÷æ1ŒÅµûY×e€j¿Ð{üÞÉh4#ÜFíZ¸$rso;#ÈV$drŽ&e;yf(± $A¡¤·JHPÉÙû/`äµ›ž¸‘¸U„ÀE,Êqâ. øJ˜è$¶Z5¥gÅžŸ8v•oЩuaà Ù~¿Žˆ»rk22reà°„ pR|}ÛÑ÷£ÔR)“%ÓÈrv7©B‡ëÁMÞ7¤'&Q#q€bþwâ‘@?’{0@‰=ŸöƒÊÒÊ7HçQgH¸·…Û®ó Ñw–Qk¡ñ:!ym•'η + Fì¬Á)[+nÉcÑ–G¨Ç>?ö)Øõ˜ÅNC–Èi +õ!:+ËVª`OL—Û£èÝß°ÛL@'LGÜ?wze;üHhG‰ˆÐFÜxûÅØpvˆ®¯(Hu±àÊì"œ(rÿÜé†3(OI.I§wïÎ8Â&íÕ(|‡f‘¦øœ|N¹Æ_CZüþ®¼ùO%R¼Ø +¼;ʪaêÖÆÂúá‰Nð{±Ûóí0–<ó…ïŠ8\â|”ÃÏQ_e®§¿¼°iæ»Û¶Ì“ ™X/Ä2£RÉ PQ¡ÆMyTPk?ôÝ9nÐüîWHóBÕ‚—Ƹ°ß®ÊÌ/L1†¡±ø“/±«à)Ïï%9áÎlim¨oÍ#SY çTLZÎ@òbŽ[N—,žÈEøÇ@wQuÑ`_ëvë+èúj¸_1pø}¡çàŽˆ¸ƒ;ØD€N่Ūq×ý–¦|¹|ÉÜ-áÛoßgRiJ2p‰VCq¯³ÝbÀB1|…ÄA9•Š»àÎv8ßͼ»_Àõ_ø‚nˆ{æCÎByCžw†8ÿ}ǃµˆý|ü³+͹yn@Á·Hè(øˆ,ª^Ï?¿'AQ4¦Å}Ö[5¸˜ +Âx±¼â’¹Áêg7Üzòú±÷â®g¹Ç„tƒ“0„Ì¡îÕyº®Úd=ÌáQº"Gª3éÌ:Öª^±ÉœWaºVA€¯AâÒÎë`Éé)h{aÉq±mÉà_鋂İíòO^8Û tf'1Ç’Wþ‰~€Z»ˆÇ=ö¦~öŽaó é|ùˆ/ü€ó¸c®ž\@y5|$ŽÁI5@ÉÝp­ }äÑ|ä Ð<3 Šk<Ó8‰]ò¹®¥¹ÐM?ô p .§<<o„Ç;*£߮×`½-à2v]w¯ø>?H;€üëY5߇„•LØ´Uó3Ù¯ @å1}$$sëŽ*Ž†Ž7êô™,uëõè/|Iô<âyÄ?"pð¾v®î…KZx@QîæT ^¿7Ýßò·x2–]½744´µHSXAC]{{zc+(5•Óàù«ÜdÃôcàCNgÆÌwgÍž3ušÐ'؇òù„ø„ú„ùˆ}}‚|€·ªäç³ÃçSÅ+æyF½2Êéëç»ß·• ´Å÷= ÷xðÛ'¾ž(¡rm±›ò´‘œÚàð=¸{úýÀý‡âŸç>xeËöÌý±Lb‚2Q±´NúçÃ=·9Û<ë½ÝSfMgÑr´ S{Ä„@Ë©Ô·W‚àÇbî?‚“àcêrƒ!¿½.í_.~7|Í¢”l{c,ãJVØ´T©ÊŠ?šyûþ£æÁaöÄ`Ó¥ÅgrŽ§t0Yõ²Š½µ|á—·N4õ¾öüä{‰qLRŠ2=kƒ³0ôÞác×é›C{×Ä)ãÓÓØ$îΊ >÷ZnøÆ Ïûî xìáüÂï=‡`õšgÓ}8² [»ÑE£÷dkcÖ²-pôƒ>pZ3E£ê«“KP u`ý²w¶þ§œv´ºlu]Gi-и=wïñ<[9O1‚ãÑ5ƒÖV^0…Á$íùû‡ñÏ_öJ£õ¹ 5èÂýe÷`ÊG[î À£‚úù\÷à-ñ㥗_çBœ•SW×&?›ÂŸ"Þì¬äåâ·®„8óàæo†ÑØoù«ÔÝíqáâÕQQ«ì¸ô䃺ÁËÃŒðÁÉÔôKqÇ/ˆ/¼~ëèžåëÓö¯ˆbZ¨9êñkåµç{9“‚~•çkoäÿ~ÿ*Òb8g*DLÑHpg.ô9¤zð,f!ßñhb^†Ï©ìÂü¼|Fš£Š£D|ÁÅê+uͬ»¦©¼þêØLô˽`óó f^ã°òG_h>›Ú²/'.–݃4ZŒFu!þ‰pfOßéŒKôé£5=½ì¥óýNJᾩð%Â¥Q_CŠA1{ÆÏœ¹çoðL8G=a½zø<€Ãª„'¾Á6'>_Q>ld9:P`F+§b*Â9Ðh/¥ëJ•‰ìÈN°V¡Z`ä TUžme0ª +ï'ƒ“Âç±Á+ÕdµÚ,k‰Õj3fÀê¶Y­î¢Ì:f,Oà5ó½>Gx8ï,ïþ¨M£®Žºé;Å7Ê7Ú·Ø÷¾ïcßïüÄ~‰~R?›ßE¿ë~ëÂNcÿÄq|4.Ä_Çâ…„¡& +‰ßùß¿0zúè„Ñé£?ýýè¿ÛÈDò6ù|Jþ Ð`&X*Á1pÜCY}#úE„DÿB†CÆ…¼òzÈ„ç^sGJþ@ïÌ6¿ÊG Ä +]Œ"‰‰H]sh!½tWã‰TVÞ©í».†wOJ>õ&šzeÄ£?Mè-úlœyŸôÜ`” †U„ùYyÕg¥|ð|[-eÑ”ÅßCfÏ¡Ù8:0¢^ƒ^WçDÂdpW-‘]ÑVa©²U²Çáh :ñÁ XEª=M%NWl322è Fr9² + ÈzÔŠ8›Z±7y ½yoÛÅdVÞ¯í4\ãëxÁš¶ôN~oÒîºíôÎHyB ºtŽ1wfBâOŸ\†>GôN¤–w÷]¥¯6¦Ì¨dÁƒ–V_ÏAO(50ŸÈRYìÙl¦+KK±¥Ñ“-²¶9æÃlZFV¶:¯¨0,+S—§g–çÉf–lO;+Þö`ÏŸr†éï¨éì_‹\deU‚m5g”V‹Ý]=-¦¨·Ö`¦û;[:Ø®VgÏ)q›¾AÕÉ íˆ˜Òv?Ô¦¿éÊÛ´à¦onj(ufmð{êrkÏñŽfER5S_µ_ž¿{]ô?2% „ó°u8ò¯ºq8ö7(xüäû|'ü…rü¡ÞQ`^ÀGï‰R£Yop ¼þ‡Öo‡N2'^o¹E_8%‹ëgÛ“«ÓšÖóð‡eâÙ:yn­É±Øå¬3*ÒEsaç+è ´÷­–…6²ËoþGÑ_Á€žžÃ,ÜC˜¯–W\´ò'ų È•xgu~N¡QW¤eßFmˆ€%˜®ª°ªLìrÚš'ŠVÍ…5yÄ‘1´"Õd´iø­;«c•âézÅ!&]™%SѺ|oÑÒ.—[sè ™,ý`GúÑ[ÇÏÃ×γp²gbegyY£% dJÞ&ïsÏv*fr\–.GMç+-6%[³k§#’ž3ûЖuì²u1SÐ(1Ú ÃÐ8GI¹ [ºxeÒlzm¸»/»åä¡o Ÿ®úžƒó×.žÉ:p˜éLt¦Ô®æ¢I_eÓ–zK°(´ )[0%á0aæŠ +SÝXš³‡EJ°(_³¢ˆò'¾¨%ŸªÅç˜UU†{PHš?©¬xláŒè”ŒøR_®1KŒ Ôy³¸Á¾ž—áTjYD\ÆfzÊÚO!ñûùÛŸõVjbÊØ’L,ÙáRÔÓÍuµ –˜µkGv\4»sOÊŠybD=š }®îèeZj[Û/óõa×±i$hùw@ò©¾Ray‰¾!F( C døѪ‚ÈÃìÈ:ÂðR®bŽž/ƒêZb¾UYgøïG›jøJsN•á2fGNµ–”ê5åŒÒY¡¯¤5­]Yî„})™Kv²ßqqlxfœ*\–CXªJMtÓ aXÃvnm\½âÍë¿+g%Ï|¹ˆ?RâM‰µH®‹=‘1’zgžw!MÉ­òI*õ„¢0Äà°ÞóæD?EˆÄ\¸ãã*ç5328ªy SzÆ`™£ æ9ñµ–¼2ã¾ç¢Ð¼#'6o©",§„0•W˜tg™*’I#Œ[Uªu…œ¦÷Ãôö>XÙËó¼üßç=. +¾4r5›øÀДŸ)Öåéòd‘\¯ÎÛ«ã+¢ÖH쳦•;Åõ —2­g1¸MÄ Ëµùkó†P™ÇN Á÷° |•*G~þüPšÕVÈê-E5b˜_#>6Ôf;Þç×ÁÜ,\;/[ù²|¡o0•ØÝôW8|­I5µ‚9Ò)ø¼ƒ¡;HJOOLnJoëhnlïHkNbÓé¬vV—•^¹ÙÞyûÓFç‡Þ‚_¡Œ8a,×g‹ãÞæ؇ ˜@|PíºhbÌ¥Å&ëBƒ3$pðÉÅ\U¥o`‹Dð9À_¿Cn¢×$´¨zQ)°gÔBN‡©ŽÔ;ëú®nÓ[ÍF]b±”TF_šÿg¶Èj,Ö•6uÕÔu ðÛz›WŠmz›^—-ÏR0Æ¢CIÉiZ]6¿xr¨6¼lsž7ÙëĽªÚ´T•,.ˤ« +g$$Ï_žNí—ŒÇz×DU®¢ÑVŽÅ½‚ö¬q®iÞɺ⫳ÄLDlèIüäc±×ŽçÞ¢á6.î} ü õ+ïQ¸£ûù{×PÝ«+ç‰Ñæ¹h4 +Ý]{b/“ÞC%ÜPŸÿZ Ã?‡†\”íb@q삵ÿ­ª=¡¼U3ÓB>Ú6{ÏcMü«|&ƒ}ÄÿZnÃáStB¯Ãl¼£Ì1ðÍÿ^eƒø:!ˆ!G"‰eFYɧ|¹­>DYqø.z«ÌµèµbÞÁä+`tÏΨlJcQÜçÀ—;ZZ*Ía5(\QAÔ•iÚB­Zâhögâ¿ÕDZ ÌS¼ "ÔÂF xîõø)2üŒ±¦ C¬)Tk” 'é,ô&LÇòªuMãÄp¥ÈQ˜!Δ&mÙÁ eh&÷¦OÊJ?+ᔬOF<З«Í‹ɼ¼÷½ÒéyÑœRn¼Ç‡ˆæº¿¾Œ=˜vy†X™«K62ú¼b£š•ÁKµÄv“ªlêOH-¡p3ÜxëƒjÇ-[X-rà# ·N"ÿ`’>•ÞÖv“m!¾¶;O™¼Uð”&2•Øg‘—Ë™¤Úœ‹ô@­ò 82íû«Ä“㤶–êíÆù0‚x}nÎÆ}v3ð"¡·Ä«Ó ¶ä„¥¾(â[ÍÅ%ôQGÁ^v$‘0,ÑlÖò•ð“Ød+(7p?×ǴצO6Å-cÑ.°4?o³žOx«xÃC°×Íûۯ𯾵"ØQ‹/,QWïða4¼€¶;HÏFb—mCà6îddY- ¶KRS^7`^5µ&=+5Ö™ì*¶D™kÍ£S¤™éI­™}=í•-Ílcƒ»¿ý!À»¡-wìeö2Sh›$W/WÅ0sQÒ,˜†ÉO­å⺊ò#ã¨A;¸ÓØ­/.>‡$a@FÎjß{ùdã™.&·;.ûÉÖ?‘@wpj«¥~˜½Ñ~øÊ-ña•·áHÚ”YQÓåt–9«ùBOskÍ‘~ñ…¸³;˜Æ½moÓ»÷Q–ºRSýÕíóæn˜¦P›léì?[c™ÉE Ÿýzcë;³Ömž–‘k¶'³‚ÿTf¼€øE9ÎÛ,€·`…Vk•²­&Ý[¼:í"RMGRz‹W+ø*ÔŸzÒÝ~ÖVn¼eñ:¢ÅX¥‘‹Uú\E +G¨P2Á}˜¬½ ¶Z\ï*ëcª\èœo0œÒ6dY^<6™ýNMZj–"á ù"\Ø«7ºžü$†[9·ÿ*ZÃPpÌc4–SíUó@þhôW6Aê˜ÓÜð"ÛÙÖPØI?îÙ’ÄzYÿ6IJœ7J|ûoãOP”ãCzk¾yAKáâ)œ ál)N•pß,ç¾ùÒî²zí+¡Ìßaà+€ÚcV;ŒÍ|ø#¡² r &%J›ª¬VºÄ¬ËdQa”j´û ü NxsÆg ÕÞœqªDJŽý)jˆJ ”âÿñ©Ï,~Vü îìÂn&Ú•mFA&>:(Ç_ýËðÏtåÞØrd?¥ÎJSéMZ¦G“SF§fJS÷ &]‚>WïÀ@ÖƒæOJ½ xÖœÀj< ¸å÷ÊšGvî‹×nÞ¼é ´( &i:=‚+AŸ|3Ÿ+m(‰‚z¾Ð¤.3ÞàÃi„Pv°·As„†à[è_ƒËž¾ ŒS"$_‹ÁŽ"¯A„“èœ7¹>…”‹h9ðL¡ã·:“—–³5xd1ô çfm&Î}ÅÆ©’¯ÄÀâ2WšKùÕ÷K¯´‰ëe.i†L.UTMs½Í€m6Wö Ýý™ƒ=œ°xÏàð3_xÞ¢à0ŠÃAƒ´µ­¡´H€ç\Š¿(6Ψõ6 ÌÄFŠ«IxÙEL“UÝ6²ÀT^ΩSc•*óPZz¢Vo²ªY›2ßœOgæäd¥5(:ï|p÷Ñ™”öZ¥ÑÂTR%ðÀ]‹œ‚ŽƒG»Îö1ÙÕX*0“ph"ÿ(*¥lÛ¿ðŽXG~ÌAyìgÒ³h ±ù@Á gPìçP‘uý=,ÙùåØI yÜ,ÞPäêŸÍ=‘G¢¹Þ´<œûǧÙ{ÍH¸‘ŒÄ!œŽïßA +à= +®B°ÓÛ]9â¥n¯·á6Yâm³1—ü§,PKÊ8ïKüËûþ;oŸõ76þOCáÓ¿ÀJÐ~…ÚÝ‘ÝÔ&nk¨ëê«ÏŒ=¤â8'?“ŽU¶\gA“»º£K|cKßä”\CA"£ËÖ +è\mI¹‘Uáè ViÔ[Œ)HuáÖܪóbøùâ'‹a«‰E5„q9ãÓ½%Ý•‘}£³[!÷ W~` d±Æwó½Ô ÜôêU-þmùÉéOŽïY#d'@5áåý˜£ÉâtŠ;ÔÕÒ,•<-×–Q’ÅO8Õyk½PÙß,nTÔdÄ+¢_Ñ1úyà¸oÕIóï’„À£*‚!žQ0„7À0™´Öÿ 8´JMáv(RÐ÷*÷s†³Á/žÏ¦vã&RpÄÅ!ÈHÜHÜͧ«FâɃ¿]Wž$Ÿ’‚Žòg]Qõïæ¡ yú½%N»©”Ô­®¡ÿ U…··OÀû›ÄëC’¿ð\uƒÏÏnÞ_~å¶ÏCðÀ ›*¤é%™4€Éó¾÷vÜýÉÜ¢%‰كœ‹l0^2”«›ÃFŒ~ 996®=¹¯§­½§/¹ ¼`ðÍ/üvà Ípć0µ´e4:GÕ*ð­Ùîó,ÕÈ-¥%¥|€ 4×TtÐuuúü$5¥”…Û‡+ëJ\Þf§o´Ë[q¼æÁ¯ñ€ˆC +ðÓE€Ï'Ãl>Ž„hà,@Ñ[hZßBÁAñ¾ÿ¸Mrïë òeJ}­×xsi@ŒB¿`Ð_ ðö‡„³À£‚)×y`mq»GÜÚÚúz’Û½bøáÙòMà‚æ| Ët„Zû€®ˆ)Њ ùÙŽ›èuµNÒÁŽc°©pÊM”SWáo3„©°r#€«½å] a×5ÄéÊÍ¿+Wy{ê?q*>;^›Z9ëÚÕ —ºyÞƇŒ¬pºˆ‹€'óÑ‚ó\©h"Õ²¿k¶px”’¦ÑتTŒÛÛRà)ú§°oǤ°„p!ÈUK¹ÍQìÿ,…KŸ endstream endobj -1125 0 obj +1418 0 obj << -/Length 4322 +/Filter /FlateDecode +/Subtype /Type1C +/Length 11578 >> stream -0 g 0 G -0 g 0 G -0 g 0 G -BT -/F51 9.9626 Tf 150.705 706.129 Td [(Function)-250(V)111(alue)]TJ -0 g 0 G -/F54 9.9626 Tf 73.882 0 Td [(is)-250(the)-250(2-norm)-250(of)-250(vector)]TJ/F52 9.9626 Tf 102.161 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -156.342 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.133 0 Td [(global)]TJ/F54 9.9626 Tf 30.675 0 Td [(unless)-190(the)-190(optional)-190(variable)]TJ/F59 9.9626 Tf 121.612 0 Td [(global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.false.)]TJ/F54 9.9626 Tf 75.118 0 Td [(has)-190(been)-190(spec-)]TJ -258.538 -11.955 Td [(i\002ed)]TJ 0 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(The)-190(computation)-190(of)-190(a)-190(global)-190(r)18(esult)-190(r)18(equir)18(es)-190(a)-190(global)-190(communication,)-202(which)]TJ 12.453 -11.955 Td [(entails)-318(a)-318(signi\002cant)-318(over)18(head.)-513(It)-318(may)-318(be)-318(necessary)-318(and/or)-317(advisable)-318(to)]TJ 0 -11.955 Td [(compute)-333(multiple)-333(norms)-333(at)-332(the)-333(same)-333(time;)-374(in)-333(this)-333(case,)-354(it)-332(is)-333(possible)-333(to)]TJ 0 -11.955 Td [(impr)18(ove)-250(the)-250(r)8(untime)-250(ef)18(\002ciency)-250(by)-250(using)-250(the)-250(following)-250(scheme:)]TJ 24.981 -17.933 Td [(v)-107(r)-108(e)-107(s)-266(\050)-159(1)-158(\051)-756(=)-657(p)-61(s)-61(b)]TJ -ET -q -1 0 0 1 278.034 495.12 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 281.627 494.921 Td [(g)-61(e)-60(n)-61(r)-61(m)-60(2)-194(\050)-180(x)-46(1)-267(,)-273(d)-97(e)-98(s)-98(c)]TJ -ET -q -1 0 0 1 367.96 495.12 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 371.922 494.921 Td [(a)-371(,)-283(i)-108(n)-108(f)-108(o)-391(,)-298(g)-123(l)-123(o)-124(b)-123(a)-123(l)-238(=)-115(.)-277(f)-162(a)-162(l)-162(s)-163(e)-367(.)-206(\051)]TJ -171.33 -11.955 Td [(v)-107(r)-108(e)-107(s)-266(\050)-159(2)-158(\051)-756(=)-657(p)-61(s)-61(b)]TJ -ET -q -1 0 0 1 278.034 483.165 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 281.627 482.966 Td [(g)-61(e)-60(n)-61(r)-61(m)-60(2)-194(\050)-180(x)-46(2)-267(,)-273(d)-97(e)-98(s)-98(c)]TJ -ET -q -1 0 0 1 367.96 483.165 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 371.922 482.966 Td [(a)-371(,)-283(i)-108(n)-108(f)-108(o)-391(,)-298(g)-123(l)-123(o)-124(b)-123(a)-123(l)-238(=)-115(.)-277(f)-162(a)-162(l)-162(s)-163(e)-367(.)-206(\051)]TJ -171.33 -11.955 Td [(v)-107(r)-108(e)-107(s)-266(\050)-159(3)-158(\051)-756(=)-657(p)-61(s)-61(b)]TJ -ET -q -1 0 0 1 278.034 471.21 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 281.627 471.011 Td [(g)-61(e)-60(n)-61(r)-61(m)-60(2)-194(\050)-180(x)-46(3)-267(,)-273(d)-97(e)-98(s)-98(c)]TJ -ET -q -1 0 0 1 367.96 471.21 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 371.922 471.011 Td [(a)-371(,)-283(i)-108(n)-108(f)-108(o)-391(,)-298(g)-123(l)-123(o)-124(b)-123(a)-123(l)-238(=)-115(.)-277(f)-162(a)-162(l)-162(s)-163(e)-367(.)-206(\051)]TJ -170.658 -11.956 Td [(c)-175(a)-175(l)-174(l)-831(p)-56(s)-56(b)]TJ -ET -q -1 0 0 1 247.952 459.255 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 251.497 459.055 Td [(n)-56(r)-56(m)-55(2)-190(\050)-264(c)-132(t)-131(x)-131(t)-438(,)-283(v)-107(r)-107(e)-108(s)-300(\050)-193(1)-193(:)-193(3)-193(\051)-193(\051)]TJ -0 g 0 G -0 g 0 G - -75.886 -21.917 Td [(In)-253(this)-252(way)-253(the)-253(global)-253(communication,)-253(which)-253(for)-252(small)-253(sizes)-253(is)-252(a)-253(latency-)]TJ 0 -11.956 Td [(bound)-250(operation,)-250(is)-250(invoked)-250(only)-250(once.)]TJ -0 g 0 G - 141.968 -334.744 Td [(44)]TJ -0 g 0 G -ET - +xœzwxTÕÚ/CØ…½’I™Ùf³÷F&X*ˆˆ€ô -dÒë¤L’I&½Ìd&½·I2“BHB „Б*Š"¢¢Ç‚¢~õ¨krV<÷® úï»Ï½ß÷™Ì“é{­w½ëWÞwI&M™qxâð«Ã˜t¶t‰t·t¿4Iš*Í‘vJû¤¤Ÿ9Nv”9.s\åxÄÑß1Â1ɱƱÉñ¼ãÇGŽß:N38-rZéôšÓ:§ÍN¾NÉNeN§>§!§»N÷œ8}éôÓÿržê,u~ÆYtžã¼Ðyµó^gogµs–ss›s¯ó€óMçÏsþ§ í¹¼ä²Þ個K˜‹Ê%Î%É%եĥÅeÈå¬Ë—.w\î¹|íò™Df/s–Í’-‘mùËÔ²Y¦¬Tf–õÉÎÊ®ÊnÊÞ—}"ûRöDö£ìWÙ?åùT¹TÎÉÈWÈ7Ê÷È˽åAòpy´<^ž"7È«ä-ò>ù¨ü–üùòoä?Ê“³–fØéìv1»†ÝÄîc°¾lÍjØ46‡5°El9[Ï6³Ýìö2ûûýšý‘ý……5c"{'·>’wKNYŸE«¬ŸR}Ј C7«#Âiš×¹­ •"̦ ¤ßÑb3|E&!Ò/M2Ð0‰ê©,…+ΡéÁ=Æ? µs€õ6¦Têh8Ž<¢©³¹p*NRÁÁêÔpîX’霿A»£©þŒb8•A…Ô’÷G£]5#W„­Ô@µ¹àV1 Æ6ZX¯Ô*è|@DÑÖ&<4¤?­|!%ùHútô²P» +bË›•ÅæÆ BucÉÎ×*+`±ú°ha49 «ÈŒV¤êÒÓUÂ&´=ÜA¨Ïåå•(ª+Š-ùB0£ühª)·(·(=_?´ æ´Gg¦%¸‡U!Ñ\zFI©!ÏhÈ …†ÂE[Œ)P¥<Úr¹³­¼±Ahh2õu= ¬ÏN«=UVÖžGîn»<Mvç”gÄ(´ZeèëBr@F¬6 u »>á3ãD™¢¦¬°'_0#m4Õ›cÐ×o1ä3ò36¡µÓ¢o|KÄdW—+jËŠ[ ð§TÑTsvAVË8œ™f&¡3¼ñ3ºA4‘ð%ëœêî’’–Âé~ ²pN,•”••­å²Ò…±<$¦ †{yËŽ[ú=~ò¨B~aQž=N'D W-dcyQQ)WX¢Ë)SÚFRÛ¸Ç7®ýžr`Þ)qS[„¥SÑÑjéiiNkÔå Õ^4€Ö0ÉÌB3Ðì'ÏÁiÐõ»_àLøìÂ_+BÙ+õûÑJô¢Ú}Ïnõ98®¯½.‚±]c+Y©"ßÉ®ÉW¨µ!QÞ²C;=LÊ-Ôu…Ó“›r**Õ%ý‚Å@XЗ*ªS×o +‚ÛÐçÓжPkKÓÊu÷hx°…‚¾è6}ÈñmÖØ¢‚êü"¡¬¨¦ÐÂ}Óí=k[’_Hœ•”¹VOƒZ"?ÍXù:’-ÝÁ^ñNñ¥9ÊMñÖa¯×w¶œU +‘ÝɉïÒá(¯l.*)©áJÊs3Åijݙƒœÿ÷Ï¡Ë}Ïû[OˆhÞûĪ¾XËÅõS=ïœ>ïRè ¬ 7m¥Á0ßÖu2ìx (5.âÁ¾¤7x`fƒ +´Õ= +«ÚM f”¤|ÒŸÌ Ï4:øøG%ã–îÿÎøåÄÑšêŒÔ"±0=/ÏGqÈPÛÃj6÷^ß|f š1ïYôáÿíÐÿœ  Å›;¯2{ÇñȃsÑsÐÿ8ÉÜsK„2Êø~yÅûx×^®¬ºU*XPˆŠz?Ó·A®Pègë:¨HSmÒÞ3âx8¥ß­ÑnÈ u%\ÊFaé š˜@è´ez:œ„ë<¢yœ¢2Ñ,¢™´|×\gê‘v|þ4•UFe|¶ðÆK8ãà"FQº“5­Ú„üÕT›¾&NçO£áèo¢" šà èc§¡HÊ ‰ÐÓjèx]wMCÛŸßjÕWÇéið +ÓK?ãÒúåkÇÍù?”m¶ÆYg±ç‘+%kÛÌÈz×ó²Ëך\Wô¥hdïù3²E! ˆgà]`U²Æ²¢.¢î3Caƒâ¸º1"2.6L›W'”(‹ƒ ÃiÐØh®hâºj#÷‰(›Ò+µšà\:¦P`yAj¹îm5SÐ1gÔ£ºÔAbü2ymd-£CЩ€7Sºµ©i+0~ƒXE¬6=P'dëµúd=~a Ó_Žs"DcêÓ‹Ö¯ÐÞhªC_oPÒãÅø’ q!z|É +|ŸÈÀÏaû#ÚB”&3ÓMj¤¤†C¦2JÜíõ͆EÓ¾ƒÂ­oÌu׋§›P¬º‚j7«U‰ñšq«á\"©-§¬TQ[[ajMè‚šÉm¸22 h±æ€2Ô—KSÃS<éSm©,ª®n¥«5Hôb@Rž vöÁ“=HÀ¹ðu¸ÔVÀ‹¬ÞlÊ«æ¾8_fÏ·µ_¸©¸|aw»`ò *Þ¡OÕ&r4·:Q·¦ ‹ø5üÔ²„·š§Luø«'š³RkÄ–ø]Šyþǧjò Aº‘ÀöŒà¨cŠ=÷ƒ  \ýÅèö ¼o×ð8¿räl3Õ|âDKK»(ÝÈxk™ ýƒcÈãÙµ9…¹tAn~V¶"--!5U@r$'¬ëÑ®hꄾ:Áà7±¾šøЉ•0SG‹sŠôÃ4$©&ówHAÔ©ŠR2)ÉYz!jÍTjv~EºˆÜ`Ãßm®»Q‚ÿ<Àî†ÏÚÙõÕ +SyE½PjB1j²#£8mIàTÔ=-ü Â?)9!™ËÌ.,‹)Ca^A>¦³Æð UÀ±“ÁÃÃ=7Íä­Äi +‹» xŸÃk\_¥OÌ +O‹äßÞu²í/ì1ðIµ`<¦²O`ÎO(‡'[[Ož ÇïÖߨ­6ôÑ°€ú›ÛÅWwyDìöÀ÷y]ÖtJ~€Ëa|Ó®ÊÞ%a(| +ÐC"tFI"º¢"od×'ŽÒãi”Wdì>ê¬Ù&SEq෺„÷ª7—+^P/ ñ<‚ŽÄlå<µ•§E@é®V×u2¹WjêÏézº«®'Gu5ñº=ôx#Š°6ú1¤t//5I¬³ ”=ê–àÃ)Ãk»Ôb|{fGÎæ‘YŸ$ ‡öÐÝ!ž¦ýrXù<šfü”><ÓþÍM•ÁÕ,dòk{Ïr­uI¡%bA|~”á D×^ãÕ¸€îÌ¡#m¡§ ó EB¯Uù¼á.œ—”îJµmˆÀÈÉY8>gØÁB«‹¦.›‡¸M槼ŀ®ó1§¹ó—›ºÅî~ÓÛwð™eÿHA¯ÄáHŒ,Ë"óƒ¶+Ðê„}Ž% ÿ +·Õ\Á¹ºÛZ:¤€_§RhåV"Ž*;^WXÂÕk#DE6$§nãÔ0‡·%á ¸Ào`îóÀzµ+€(cà‹ŸÝ€S¡üåwÑ«±9y¹‡E¿)ÆšÒ¼rîÑо¹‹}ÜÖû¥Tõ‹`´¼©Ôl¢eÖ‹ià”âbÈÙ=íB“·géznzšIÙ¯O†.Zâí¶A©­ê­mSêI™õog®\~ôÈÊœ´p&/è‰VÀÈSY¼q±ª!‚€N|K±|Ú²G᧊RjRŠLqBXsFAx>™®ÕÆ+‚{ãÞèÉa Ýá/îÕ¤—Vä +„rêxtc2Ful¿ÿµ«Ã]˜$ÃBƒý[Ã{ñy +GŠd€o ÓÛÞÞÓzÂ_Ag!%6ÁbêzöñÄ.ázØΓë87ÿ¤QVÚ >dÆœL. ¯6öÁs]ë›úÁÕzƒì‡D ™¾€Ï1š„25§V©Ëµmñ"œµ„°~þ6HãSüž »{GHôáx:%<8+‚Û²½k4\ ?ŸüÞO +è ~nè¸Ó\“¡ÍÑeåf +Y©ºŒL…¦4©"G^î~ŠàU^Úi“#UuH€ø%<W'1¶Y†4US–œª8]½ŽZuFùsK}~y».*ní8ýr´&7=\ÈÑfè29mVQ…˜ˆq®…´dæØÁ>ØÇ¢DÃRß1¦hÇ/K`v`Α¥}’_ óÏ`A€ß²QŠ¦Žmb½}<;‚;;ƒ:½Ei¿’!` ©ÛKQÆ}I‰ k˜ØmvÔ%zW +å{Xƒ‡!*XžÇ^1ÖÆŽ€É?1àÌØÔLÀÞ&ù8+°ˆÀÎ@Ù#(Ûx¨tý·ð6…U'”M(¬–Vï­â;(„Fæ¼®ú/4Ùi)Z9#€®”¾³¢¾¹6ë2 s¶ûäð°²ÃÌ붸´¦¢Rh45U4r£æ]è08ÝÊá¡žž! ¸pê€Gx¸'½Èr—;±—‚ÕP…ð?¼æŽ/ Q‡éèl5 0‘Q…)&CG³“ÛjÁ{ß›4(Žnñûùz€MQ¨)‡ +Ó4vE`éKå.ö™ú,bms±¹p„–FjMF¼æU]o[óÚ#hÖ—dF)ÐEÊ Ô&Ú€:x(•ž‡OúŸÆãVöàùœo9Û*ÔŸ(鹪îÑ^‘b\`Ʊe +€Ò|—òp ¼?2‰À5,æ)¾´šÍ­Oñ…Œ‰‰ŠhŠiiijji‰iŠAÑBºz ëéïïåqÒètw÷i`â¥?3¥%˜õ ê뛹J@þˆù'ඊ + iŽ:ÑÞÜÜ~BÕ,‚ŒÍè©)8ùÚà'@y‘ïí>ÑÞÓÚî'¶ÁÂ"›*p1Q®“V±ç‰ä¯ãkÙS-µB¤Ò–Ôè+4 °ÕS¹"H0“½ ÕÜ© (/-S3NÀiâáéúDò€ÚtQ]4œI4‹2&Ç®¾Vƒýƒ*/ÛöϱI@¬èúEé÷pë)8딜汣à Êa@ Ä6-ãEl½“%;º€!ARyyZ —_dÌ/‰ +Ç݈‰}˜Ð:±ÖkÀx ÌäÝ¢–γ\_S¨hîǃìŸ$èRÜ>uøp°Ïö¨BÛ|ñŒµÙ +(i|½Õ§zU“ ÀˆìŸIò1’m+(,,,((Î/.üÚÞ~ ®ØRb©(+-(¬°wxÚÙ7É<é;Él‰I2,¹+“gOž79rÅäÎÉßÚM±[n·Ù.ÒNm÷Ñ”W¦lšR5¥nÊ}b1L\ ®ïŸ?“!äWÔkTUHÕQŸÐ›èP:—®¢ÿ˜:yê SÓ§~Â,f¶01Œ†IgŒÌ s H€ LžÀ:À5pÓ^jÏÚO·çíØ/³ßn¿ßþ°½—}®}½}“ýÇö_8Lr f9¸9¨RÚ>vøÌá±Ãw¿;ŒK'IIéé*éZé^©Fš*Í•öJ‡¥W¥w¥Hÿp´wtr|ÅñUÇ×}3›O;žs¼êxÇñsÇ_œœ^rÚáäéádp:îÔã4êtÓé§Oþæô›Ó¿œ§9Ïs^îü†ófg/ç`çXçTçRç&çÎ=ÎÎÃÎç?tþÑ…p™á2Óe¹Ëz—Ý.‡]¼]]’]Š]Z]κÜuùÂåw-“ËfÈž“=/[#Û-ó”EÉ2dY™¬ZÖ ë•–Ý½#»'ûJö›œ”?#Vþ¼ü%ù+òuò=òCò ¹Zž&Ï’åeòZy£¼MÞ%‘ß’$$ÿ^>ÆNb¥ì3ìLv»}™]ƾÉîb²Al›Áæ²¥l-{œígÏ°çÙ«ìmö=öûû»ëdW©«Âu®ë‹®Ë]_wÝàºÕu·«›ëWo×`×XWk¶k‘k•«Åõ¤ë°ëˆëE×kc{þ,I?Ènˆ{s¢j9ß[<çuÎà”ñAUå'FŒ¼UÔݬr­ñM}A-h8ü¶—xæfÂûÜï6vˆ§{š?üVq“Ò\èÌ>–àÍù…×uʼn '2:rîÒ°Ìú,i8¤‡î 9jÚÇ!§óÑ\4÷Ó…Ð铳íOn‰˜ÜײÐ>¿®ï,7\„ìŒb¬ÈkêyH‰uí<4úâÛ<|óƒÞh>Þ€ž¢>˜…“o"䇎íÄ +œFÔ,¨¼`è Hé"°†ÝdÓÒˆÜÜ\}6—œYR+Â*êóµ£HŠˆµßð1G÷÷µš;«²+µ5BzYnQ‘¢¡¥æ¤þd‹ÐVy!·:-T¡ÍMÊIÐô=š¿"R;s+ÊæêÒÁ|Áh«vˆtKEõëLéåÑ] §!½ì­“ˆìZ]q©¢­¢t´@° au*«,½ö0 ÿqZ+ wZ£àÚñ(¢…„G­?Vö56tæOI¼‹c¹[Bò²»³Ù·óÙOË0Á]&‘#Ü@¼Dâl .‘pÊR+iaר«ôçik°Æ²rCWRœ›] –¦åé ÏÓÈ +Ë.ŠöUŸ ºÍAúÁïpÙ½äwbÛDSR|Y4çZÓ‘"¶¥çëJ2èÊäâ(¥bÏ[»½:ð u:¢783G¯ÏÁõÀ*{BF®àÁ#^_ÿ§ÈOnKx*òÇ\ š 'ƒ×¨CChÿcûÖp;m:kèDš~n›êÏ*êvn½ÆÊèÝ“S7dÐ*ë} +lÌ3Y]\>âe­'ÍÛyÙéÅŒìÁ«<ø¬ÂM8ÚŸJPbNÿÏ> Ñ”ùhÊÄ>Úbÿ )½öESƒ™%éï4Æ;#Ý+kÅ,3€­{·š¨üâò²2dðpÖ—è±³f©ÙÙKÞx]\¿Ñwá<¢{^y´VØpÿ«ÀŸ¸Ÿ¿ê~ÿ}ñþýžo~UüêÿxÝû‡o,î~–Cß»°p?\„oûá>¸-BûÑ^ôzíZ°æÄíMÂæ;¾Vxm!šÕ‡MbilA‚J‘Ñ?Œ¡Ò{ìÒÑ‘Ð IòëÜуe/ÐpëxºŽ¼ «NèáäÒ jk+yz*2Ùm؇¦h%附IWÈý2©N‰æÂâZú®œ:õq~qk"ŸaryEÙËðŒ™Ý™ŸZ©¿AÃ=Ôhë@GIi¦¦\ˆ¬¯No溛[{†‚:ŽRFm:*Êî~ŠöDS§²ËS&:6i©Þ™8põ 0`L +³Ø(Ò­Úô:Òê8F5ã T:F´’g xºÎÓU: ݆q—?ˆië +H¬/Á ö,"‰’¸‚”tETBª· R˦t2Ú˜Y¬éBeÖŸ¦EÈgütÜÖ°`?Ýt5Tšb©ÀÈzsi~eI£ØóñLg?CžZL‚»le<”˜p±Þ7©ÈCMa„.« Þ¾ZÑ0(~Ú{ññ +Hnú~f§Z–h\_˜`˜VNéÏÖÔàÌ4¡ûj39¢«MЦS)ŸÜÍ9‘™¾ñÓWø¸-™¯˜ñåOý„ØÓWR¯sðE«†ý9ëâkh¦‰û_›“% Ù®sŠö?„¢μôð§"Aú +?ÑæXË'¦Çƒe¼©ë/ð&/ËÜÀÈ6üUÐÑøÚ“-ÙÜÇ1¼6NglUh88 ÿ]ˆ.‚ÙÇ(…œ(6«à]ê*J£€—)«+"¡Š¬5” +å%õE­Ü{ç‚ßê[ܪÖ/W,R/ûïµi˜Œ´t¬Ìª ©…ÚjtÅ*wC·“–Jð–=¥¢fåF+4Ù¡¡ +Cá ¦ŒïWWß+xÊ#­Ó,ÊJ; t¹‰5^E¾+¶, +¬Á®¶QU¯Ž§_‹ôõ;¨8V—p3F,ú^E~–SfÜGÏ¢†ÇnÞîS?,x]Šç†Û†,BlU¶N¯ÏÈ™žÆ\ØÛ9uÉFwz<œÒ¹'§àU¶¾KõBo"’ Ý¢ŽŒ <Ý4opÛ*°CH£~ïS"²\l†ŸG‘ÉhŠ_À, ã©îŠÒ _žO‰hÚ#«Â*7ÓfxhÛ^ÐCÐÀÿÕçm +êZ;ÌüUÅ‘µ5{ç`A½Óí~Â.ö‹ÐL¤6ê ‹55¥-B RW9qm;áËèâ4ôŠ­ ®†_›Â¶ðù—h¸™‚ +ØÒs¿¼|¨hº íS›¨ •&+QƒÍº+üÂB®)Í(Õ}@Ãä ƒµô'íƒ×>R@zÑm´z¢]ÿNvU’q½­]¯ß‘ë™Cç¦ét©"Ô[?b›j+:ux¡IÛS”Q1‚::8sÆ–²K¶Dïß*®Ûî¿MQ`ƒ¤@3 <ˆüþpÇMáÖ¶5í³¹‚@ö÷kh&“ƒÛÐÔ™G=[‡"ÅÐ Éïþª€N¦†/ -µ}œJ©‹U'ëÊ3S²s³²Ò„ô¤œ´TÌ7kTäÎe¾ž[÷Ó‘£èµuÀ÷î¾ãŸö ªrB©Ö$Çq©µ}"¼`ë_)d\xûöAe¸VUs£{ÍÖ®š†Âbºç|us­¢8§ ###G›$èõ:]®y¡•Ór(ŒJ«°»§ïŒÜþ›âî±á]‡‚CÀÓ½­«VÛööÈðêpÝ–@>þjëYÄh²ótQ"$ +Ê Å±»ò + Œ­šãqä¤ÿ𙞓ÃglÕÀþú¦­¦{SëKÈÂ$»S­'ÄNúÀ#h!| ݧÀ #âÐkh-V`xÝÑóXÍÀNr t„ÄEéƶîÐQî››¿`©ê|ø›ÍÑ1!Áõq]Yb_¦A_©¥«‹#<s=·­÷¬í‰Z®V (ÎÄ´(£CyVh:bÛ7ÖÃG¤tÜàI?\cÿ¬¨š¨¨¹åÒ@§@ç`¤ƒið{ÛÒÐjæÌeqØBæàRï=¿ãH@øÆáG +mÙ_¨&òK + ùœyÂn߇ÎÔÄ’1àò"@ýÕ[Á€§E’bKÑ9Zúú¸{¡Yä±?jþ¦Á†âòÖ&Ü:h²¾ZÀ£¿f·%ñà SÀ5Áøê§Ý¡Ò‰îPéŸÝ¡¬¤t]&•cë}eMg‡ë•/¾˜päHBד'uÝgDàþ˜ÖçØ>&F¨çW0î¿´òQ2õÊzâõÕ=©ÿKWßL=íûK­snß¾mج´Ä­£ŽKåÜTW¡²-W¯t´G+«D`s21;ù¿Úä{¡œx@¡^(‹qƒþ«ˆ?ìHðC_j;‘“’¥Ïà|BºßOÑä:6# r{B˜›’Èy©GqèÅ.ö6û ºC”Æk‹Ë +ó‹‹Ê„²êlŽG™ƒýUª€ÝWŽ¶ZmL¤:`"Ò2 ?¯žì…5½.ð,ô_Ä2®’ý:dïzj²Ì¸<Þ,†‰V²›¿z»;mÇ\†Ò‹² Þôÿ›à+5@;L p6È3IÀ+L»©ÎâãáK| ÐùÃçy`3XôæC[—*Þê?zm¤ãĹ¡°ÞÙYyú,AŸ«ËÓq©¥}ý÷GEðSl(ãþž°ÛÃc¼x[3a_b)ÄÉq“ô!vSÆS’ÞPœP©?GÃD˜…)ÝÁDíÛÙ¥o)pÞØÈC¹Jô­#€•€ Ùõ¥¯ç8x³!¿¦¤AŒ?ÞœÞÊõ¶4o©ŠˆÎJÂÛs[BxLt]U Íùp6¯«|ëɧmyœ&`aÿ”6 t®¡ö¶­pø­Šz'פÁ„‚|Qô¥ €1oÚâ›Ybºž´ ZWD¶jcµŒ¿QÆe©aœ)“ü³r|š?n MÀ”±Ä†žh9Z†Qâ \ŽVÀCø¶®À{?[…ˆàd yµ[µ­K0y²¦­EGV+ß„CÝî×|3Hð4ÚÎî2jêuØ#?K=cÑžæ ñðp>œùÆdb  õHœº½nM«l%:Œ¡H°€Qûz¬æ!F\„D©Bþ,ŸhjÅ”5)Ï £h‡³Ë PvÜ¥>' [ +åìe@<˜åщP.‡î8”+ð͇r9|Lœh02à+Æêcb=È]s‰:€Ö·‘…ßœùãû”ƒ=¢þ{¢\ä{ÀÓÎx}sV='›Ñ„¤€n¶Îa;˜/?¯½Ždñ9q…Œøt]—QfÑ‹ C¨ +Š,x‹C;à N 2¾‡ák¸¾Æçܶj·PÒ£¼ídÖ±Cõ¡~Š M\THrÓÈ`AM}« e­s¶ŒXnS?¼=Ž¿t›G[G¼>¼wîÜ=0aKá6“änogm7³ÐÚ£¸x¤oë[îž»|ð°Þ=ŸÞþÁ㋆:îw ðþý€ûî≯¯Š²ŽñgCXUtrf4§Œ7ÛjèÚH2 )=-ŽKO.(‹Í{Ž”ÛúÂ(€B¯A"¾)³²Jaª©hÀŸýÀæþø ÜÅþ¿èy°†æõ4€/ÃÅp6¾¿Œ×îþë@S½ °;`õ¨«ØÒ#ÞÑ‘jn‹÷Íäˆ:0ª={ºaÓ™x“ P·]ú†¥Q ‡9­:Ó7gwÚü:@î‰×I„Áÿ<ˆ/›(ƒ‰Ë©-J»i€Vgó¶îkn5†'yCô9)"¸*×àû×ìÆüŽ¹â£‰ÖPœó;¾yë‘ð@ni—áºnøj¯D«Ô*›š-øäZ×pKBJ• Á¤ãÖD*Р 1¿Â¨¨b÷)ÓÛï`±¿fÁåX[!þnÿ3 ïwre`¦ +ØÎ}àÜ%àr€u=˜8«’€qtK~‡Î6‰jÆ[È…8Cžu;2vä#$ Ñ³”º ©4gÞ3Œí0#@¿ìÌÊ +ÒOWk'T²_Ž–Pài³ì¤®¦$1R¸(Ïlç?Àÿñå¼£ endstream endobj -1138 0 obj +1424 0 obj << -/Length 6050 +/Length 8493 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(4.9)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(5.3)-1000(psb)]TJ ET q -1 0 0 1 147.429 706.328 cm +1 0 0 1 198.238 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(genrm2s)-250(\227)-250(Generalized)-250(2-Norm)-250(of)-250(V)111(ector)]TJ/F54 9.9626 Tf -51.121 -18.964 Td [(This)-216(subr)18(outine)-217(computes)-216(a)-217(series)-216(of)-216(2-norms)-217(on)-216(the)-217(columns)-216(of)-216(a)-217(dense)-216(matrix)]TJ/F52 9.9626 Tf 0.294 -11.955 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(:)]TJ/F52 9.9626 Tf 126.858 -11.955 Td [(r)-17(e)-25(s)]TJ/F85 10.3811 Tf 12.294 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.089 0 Td [(\051)]TJ/F83 10.3811 Tf 7.041 0 Td [(\040)-291(k)]TJ/F52 9.9626 Tf 19.006 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\050)]TJ/F54 9.9626 Tf 4.274 0 Td [(:)-13(,)]TJ/F52 9.9626 Tf 6.821 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(k)]TJ/F54 7.5716 Tf 5.315 -1.744 Td [(2)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -186.919 -16.189 Td [(call)]TJ -0 g 0 G - [-525(psb_genrm2s\050res,)-525(x,)-525(desc_a,)-525(info\051)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -ET -q -1 0 0 1 126.577 630.954 cm -[]0 d 0 J 0.398 w 0 0 m 290.348 0 l S -Q -BT -/F52 9.9626 Tf 132.579 622.386 Td [(r)-17(e)-25(s)-8868(x)]TJ/F51 9.9626 Tf 221.014 0 Td [(Subroutine)]TJ +/F59 11.9552 Tf 201.825 706.129 Td [(gather)-250(\227)-250(Gather)-250(Global)-250(Dense)-250(Matrix)]TJ/F62 9.9626 Tf -51.12 -19.441 Td [(These)-280(subr)18(outines)-280(collect)-280(the)-280(portions)-280(of)-280(g)1(lobal)-280(dense)-280(matrix)-280(distributed)-280(over)]TJ 0 -11.955 Td [(all)-250(pr)18(ocess)-250(into)-250(one)-250(single)-250(array)-250(stor)18(ed)-250(on)-250(one)-250(pr)18(ocess.)]TJ/F60 9.9626 Tf 120.161 -25.465 Td [(g)-25(l)-55(o)-35(b)]TJ ET q -1 0 0 1 126.577 618.6 cm -[]0 d 0 J 0.398 w 0 0 m 290.348 0 l S -Q -BT -/F54 9.9626 Tf 132.554 610.032 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 369.912 610.231 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 372.9 610.032 Td [(genrm2s)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 369.912 598.276 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 372.9 598.077 Td [(genrm2s)]TJ -240.346 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ -ET -q -1 0 0 1 369.912 586.321 cm +1 0 0 1 289.521 649.467 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 372.9 586.122 Td [(genrm2s)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F60 9.9626 Tf 292.803 649.268 Td [(x)]TJ/F91 10.3811 Tf 8.097 0 Td [(\040)]TJ/F60 9.9626 Tf 13.398 0 Td [(c)-25(o)-35(l)-55(l)-55(e)-25(c)-25(t)]TJ/F93 10.3811 Tf 27.705 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(l)-55(o)-35(c)]TJ ET q -1 0 0 1 369.912 574.366 cm +1 0 0 1 359.144 649.467 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 372.9 574.167 Td [(genrm2s)]TJ -ET -q -1 0 0 1 126.577 570.381 cm -[]0 d 0 J 0.398 w 0 0 m 290.348 0 l S -Q -0 g 0 G -BT -/F54 9.9626 Tf 229.958 542.002 Td [(T)92(able)-250(9:)-310(Data)-250(types)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -/F51 9.9626 Tf -130.063 -34.468 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.926 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ -ET -q -1 0 0 1 385.864 420.062 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 389.002 419.863 Td [(T)]TJ -ET -q -1 0 0 1 394.86 420.062 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 397.998 419.863 Td [(vect)]TJ -ET -q -1 0 0 1 419.547 420.062 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 422.685 419.863 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf -297.883 -11.956 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(9)]TJ +/F60 9.9626 Tf 362.427 649.268 Td [(x)]TJ/F60 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F93 10.3811 Tf 2.875 1.96 Td [(\051)]TJ/F62 9.9626 Tf -219.744 -22.41 Td [(wher)18(e:)]TJ 0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(desc)]TJ +/F60 9.9626 Tf 0.344 -20.664 Td [(g)-25(l)-55(o)-35(b)]TJ ET q -1 0 0 1 120.408 388.181 cm +1 0 0 1 169.703 606.393 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 387.982 Td [(a)]TJ +/F60 9.9626 Tf 172.986 606.194 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ -ET -q -1 0 0 1 273.363 340.361 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 276.501 340.161 Td [(desc)]TJ +/F62 9.9626 Tf 10.187 0 Td [(is)-250(the)-250(global)-250(submatrix)]TJ/F60 9.9626 Tf 103.256 0 Td [(g)-25(l)-55(o)-35(b)]TJ ET q -1 0 0 1 298.05 340.361 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 305.084 606.393 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 301.189 340.161 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -222.215 -19.925 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(res)]TJ +/F60 9.9626 Tf 308.366 606.194 Td [(x)]TJ/F62 7.5716 Tf 5.106 -1.858 Td [(1)-13(:)]TJ/F60 7.5716 Tf 5.963 0 Td [(m)]TJ/F62 7.5716 Tf 5.985 0 Td [(,1)-13(:)]TJ/F60 7.5716 Tf 7.856 0 Td [(n)]TJ 0 g 0 G -/F54 9.9626 Tf 18.262 0 Td [(contains)-250(the)-250(1-norm)-250(of)-250(\050the)-250(columns)-250(of\051)]TJ/F52 9.9626 Tf 176.182 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -174.742 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ -0 g 0 G - 141.968 -106.261 Td [(45)]TJ -0 g 0 G -ET - -endstream -endobj -1145 0 obj -<< -/Length 5385 ->> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(4.10)-1000(psb)]TJ +/F60 9.9626 Tf -182.447 -19.051 Td [(l)-55(o)-35(c)]TJ ET q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 163.696 585.484 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(norm1)-250(\227)-250(1-Norm)-250(of)-250(Sparse)-250(Matrix)]TJ/F54 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(1-norm)-250(of)-250(a)-250(matrix)]TJ/F52 9.9626 Tf 208.231 0 Td [(A)]TJ/F54 9.9626 Tf 7.318 0 Td [(:)]TJ/F52 9.9626 Tf -74.65 -33.873 Td [(n)-15(r)-35(m)]TJ/F54 9.9626 Tf 17.788 0 Td [(1)]TJ/F83 10.3811 Tf 7.873 0 Td [(\040)-291(k)]TJ/F52 9.9626 Tf 19.335 0 Td [(A)]TJ/F83 10.3811 Tf 7.442 0 Td [(k)]TJ/F54 7.5716 Tf 5.315 -1.858 Td [(1)]TJ/F54 9.9626 Tf -198.652 -20.06 Td [(wher)18(e:)]TJ +/F60 9.9626 Tf 166.979 585.285 Td [(x)]TJ/F60 7.5716 Tf 5.147 -1.96 Td [(i)]TJ 0 g 0 G -/F52 9.9626 Tf 0.622 -19.925 Td [(A)]TJ +/F62 9.9626 Tf 7.732 1.96 Td [(is)-250(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)-250(on)-250(pr)18(ocess)]TJ/F60 9.9626 Tf 234.034 0 Td [(i)]TJ/F62 9.9626 Tf 2.964 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 12.299 0 Td [(r)18(epr)18(esents)-250(the)-250(global)-250(matrix)]TJ/F52 9.9626 Tf 125.981 0 Td [(A)]TJ +/F60 9.9626 Tf -266.027 -20.91 Td [(c)-25(o)-35(l)-55(l)-55(e)-25(c)-25(t)]TJ +0 g 0 G +/F62 9.9626 Tf 32.563 0 Td [(is)-250(the)-250(collect)-250(function.)]TJ 0 g 0 G 0 g 0 G 0 g 0 G ET q -1 0 0 1 229.61 588.515 cm -[]0 d 0 J 0.398 w 0 0 m 185.901 0 l S +1 0 0 1 230.392 543.107 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F52 9.9626 Tf 236.21 579.947 Td [(A)]TJ/F51 9.9626 Tf 120.293 0 Td [(Function)]TJ +/F60 9.9626 Tf 236.663 534.539 Td [(x)]TJ/F60 7.5716 Tf 5.148 -1.96 Td [(i)]TJ/F62 9.9626 Tf 2.75 1.96 Td [(,)]TJ/F60 9.9626 Tf 4.276 0 Td [(y)]TJ/F59 9.9626 Tf 108.448 0 Td [(Subroutine)]TJ ET q -1 0 0 1 229.61 576.161 cm -[]0 d 0 J 0.398 w 0 0 m 185.901 0 l S +1 0 0 1 230.392 530.753 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F54 9.9626 Tf 235.587 567.594 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F62 9.9626 Tf 236.369 522.185 Td [(Integer)-8983(psb)]TJ ET q -1 0 0 1 372.821 567.793 cm +1 0 0 1 373.603 522.385 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 375.81 567.594 Td [(spnrm1)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F62 9.9626 Tf 376.592 522.185 Td [(gather)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 372.821 555.838 cm +1 0 0 1 373.603 510.429 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 375.81 555.638 Td [(spnrm1)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F62 9.9626 Tf 376.592 510.23 Td [(gather)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 372.821 543.882 cm +1 0 0 1 373.603 498.474 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 375.81 543.683 Td [(spnrm1)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F62 9.9626 Tf 376.592 498.275 Td [(gather)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 372.821 531.927 cm +1 0 0 1 373.603 486.519 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 375.81 531.728 Td [(spnrm1)]TJ +/F62 9.9626 Tf 376.592 486.32 Td [(gather)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 229.61 527.942 cm -[]0 d 0 J 0.398 w 0 0 m 185.901 0 l S +1 0 0 1 373.603 474.564 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 376.592 474.365 Td [(gather)]TJ +ET +q +1 0 0 1 230.392 470.579 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 278.277 499.564 Td [(T)92(able)-250(10:)-310(Data)-250(types)]TJ +/F62 9.9626 Tf 278.277 442.2 Td [(T)92(able)-250(19:)-310(Data)-250(types)]TJ +0 g 0 G 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -127.572 -27.052 Td [(call)]TJ 0 g 0 G + [-525(psb_gather\050glob_x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(loc_x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf -127.572 -33.873 Td [(psb_spnrm1\050A,)-525(desc_a,)-525(info\051)]TJ 0 -11.955 Td [(psb_norm1\050A,)-525(desc_a,)-525(info\051)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ + [-525(root\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-190(call)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + [-525(psb_gather\050glob_x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(loc_x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.926 Td [(a)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(root\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -22.902 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -20.91 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.909 Td [(loc)]TJ +ET +q +1 0 0 1 164.583 350.626 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 167.571 350.427 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(the)-250(global)-250(sparse)-250(matrix)]TJ/F52 9.9626 Tf 194.722 0 Td [(A)]TJ/F54 9.9626 Tf 7.317 0 Td [(.)]TJ -187.095 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.664 0 Td [(g)-25(l)-55(o)-35(b)]TJ +ET +q +1 0 0 1 371.853 350.626 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F60 9.9626 Tf 375.135 350.427 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(.)]TJ -204.73 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.242 0 Td [(psb)]TJ +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ ET q -1 0 0 1 324.173 344.346 cm +1 0 0 1 436.673 302.805 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 327.311 344.147 Td [(Tspmat)]TJ +/F67 9.9626 Tf 439.811 302.606 Td [(T)]TJ ET q -1 0 0 1 359.321 344.346 cm +1 0 0 1 445.669 302.805 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 448.807 302.606 Td [(vect)]TJ +ET +q +1 0 0 1 470.356 302.805 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 362.459 344.147 Td [(type)]TJ +/F67 9.9626 Tf 473.495 302.606 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf -297.884 -11.955 Td [(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(19)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ + [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -232.675 -19.926 Td [(desc)]TJ +/F59 9.9626 Tf -24.906 -20.91 Td [(desc)]TJ ET q -1 0 0 1 171.218 324.421 cm +1 0 0 1 171.218 269.941 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 324.221 Td [(a)]TJ +/F59 9.9626 Tf 174.207 269.741 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 324.173 276.6 cm +1 0 0 1 360.068 222.12 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 327.311 276.401 Td [(desc)]TJ +/F67 9.9626 Tf 363.206 221.921 Td [(desc)]TJ ET q -1 0 0 1 348.86 276.6 cm +1 0 0 1 384.755 222.12 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 351.998 276.401 Td [(type)]TJ +/F67 9.9626 Tf 387.893 221.921 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -222.214 -19.926 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -258.11 -20.91 Td [(root)]TJ 0 g 0 G +/F62 9.9626 Tf 23.252 0 Td [(The)-253(pr)18(ocess)-254(that)-253(holds)-253(the)-253(global)-254(copy)111(.)-319(If)]TJ/F60 9.9626 Tf 182.635 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F93 10.3811 Tf 19.983 0 Td [(=)]TJ/F91 10.3811 Tf 11.147 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1)-253(all)-254(t)1(he)-254(pr)18(ocesses)-253(will)]TJ -220.305 -11.955 Td [(have)-250(a)-250(copy)-250(of)-250(the)-250(global)-250(vector)74(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable)]TJ/F91 10.3811 Tf 142.42 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1)]TJ/F91 10.3811 Tf 7.873 0 Td [(\024)]TJ/F60 9.9626 Tf 10.986 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F91 10.3811 Tf 19.923 0 Td [(\024)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.504 0 Td [(\000)]TJ/F62 9.9626 Tf 10.131 0 Td [(1,)-250(default)]TJ/F91 10.3811 Tf 43.89 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1.)]TJ 0 g 0 G - 0 -19.925 Td [(Function)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.776 0 Td [(is)-250(the)-250(1-norm)-250(of)-250(sparse)-250(submatrix)]TJ/F52 9.9626 Tf 150.4 0 Td [(A)]TJ/F54 9.9626 Tf 7.317 0 Td [(.)]TJ -205.587 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ +/F59 9.9626 Tf -301.107 -20.909 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.926 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ -0 g 0 G - 141.968 -54.456 Td [(46)]TJ +/F62 9.9626 Tf 166.874 -29.888 Td [(66)]TJ 0 g 0 G ET endstream endobj -1152 0 obj +1429 0 obj << -/Length 5403 +/Length 1417 >> stream 0 g 0 G 0 g 0 G +0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(4.11)-1000(psb)]TJ +/F59 9.9626 Tf 99.895 706.129 Td [(glob)]TJ ET q -1 0 0 1 153.407 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 120.976 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 11.9552 Tf 156.993 706.129 Td [(normi)-250(\227)-250(In\002nity)-250(Norm)-250(of)-250(Sparse)-250(Matrix)]TJ/F54 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(in\002nity-norm)-250(of)-250(a)-250(matrix)]TJ/F52 9.9626 Tf 235.459 0 Td [(A)]TJ/F54 9.9626 Tf 7.318 0 Td [(:)]TJ/F52 9.9626 Tf -102.327 -33.873 Td [(n)-15(r)-35(m)-18(i)]TJ/F83 10.3811 Tf 23.698 0 Td [(\040)-291(k)]TJ/F52 9.9626 Tf 19.336 0 Td [(A)]TJ/F83 10.3811 Tf 7.442 0 Td [(k)]TJ/F96 7.5716 Tf 5.409 -1.494 Td [(\245)]TJ/F54 9.9626 Tf -196.335 -20.424 Td [(wher)18(e:)]TJ +/F59 9.9626 Tf 123.965 706.129 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(The)-250(array)-250(wher)18(e)-250(the)-250(local)-250(parts)-250(must)-250(be)-250(gather)18(ed.)]TJ -9.126 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(or)-250(two)-250(array)-250(with)-250(the)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 202.459 0 Td [(ALLOCATABLE)]TJ 0 g 0 G -/F52 9.9626 Tf 0.623 -19.925 Td [(A)]TJ +/F62 9.9626 Tf 60.024 0 Td [(attribute.)]TJ 0 g 0 G -/F54 9.9626 Tf 12.299 0 Td [(r)18(epr)18(esents)-250(the)-250(global)-250(matrix)]TJ/F52 9.9626 Tf 125.981 0 Td [(A)]TJ +/F59 9.9626 Tf -287.39 -19.925 Td [(info)]TJ 0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G + 141.968 -500.124 Td [(67)]TJ 0 g 0 G ET -q -1 0 0 1 179.842 588.515 cm -[]0 d 0 J 0.398 w 0 0 m 183.819 0 l S -Q + +endstream +endobj +1436 0 obj +<< +/Length 7428 +>> +stream +0 g 0 G +0 g 0 G BT -/F52 9.9626 Tf 186.442 579.947 Td [(A)]TJ/F51 9.9626 Tf 120.292 0 Td [(Function)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(5.4)-1000(psb)]TJ ET q -1 0 0 1 179.842 576.161 cm -[]0 d 0 J 0.398 w 0 0 m 183.819 0 l S +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F54 9.9626 Tf 185.819 567.594 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F59 11.9552 Tf 201.825 706.129 Td [(scatter)-250(\227)-250(Scatter)-250(Global)-250(Dense)-250(Matrix)]TJ/F62 9.9626 Tf -51.12 -20.363 Td [(These)-223(subr)18(outines)-223(scatters)-224(the)-223(portions)-223(of)-224(global)-223(dense)-223(matrix)-223(owned)-224(by)-223(a)-223(pr)18(o-)]TJ 0 -11.955 Td [(cess)-250(to)-250(all)-250(the)-250(pr)18(ocesses)-250(in)-250(the)-250(pr)18(ocesses)-250(grid.)]TJ/F60 9.9626 Tf 119.478 -26.893 Td [(l)-55(o)-35(c)]TJ ET q -1 0 0 1 323.053 567.793 cm +1 0 0 1 283.05 647.117 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 326.042 567.594 Td [(spnrmi)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F60 9.9626 Tf 286.333 646.918 Td [(x)]TJ/F60 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F91 10.3811 Tf 5.642 1.96 Td [(\040)]TJ/F60 9.9626 Tf 13.398 0 Td [(s)-25(c)-40(a)-25(t)-25(t)-25(e)-15(r)]TJ/F93 10.3811 Tf 28.632 0 Td [(\050)]TJ/F60 9.9626 Tf 4.493 0 Td [(g)-25(l)-55(o)-35(b)]TJ ET q -1 0 0 1 323.053 555.838 cm +1 0 0 1 362.3 647.117 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 326.042 555.638 Td [(spnrmi)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F60 9.9626 Tf 365.583 646.918 Td [(x)]TJ/F93 10.3811 Tf 5.329 0 Td [(\051)]TJ/F62 9.9626 Tf -220.207 -23.362 Td [(wher)18(e:)]TJ +0 g 0 G +/F60 9.9626 Tf 0.344 -22.091 Td [(g)-25(l)-55(o)-35(b)]TJ ET q -1 0 0 1 323.053 543.882 cm +1 0 0 1 169.703 601.664 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 326.042 543.683 Td [(spnrmi)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F60 9.9626 Tf 172.986 601.465 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 10.187 0 Td [(is)-250(the)-250(global)-250(matrix)]TJ/F60 9.9626 Tf 87.515 0 Td [(g)-25(l)-55(o)-35(b)]TJ ET q -1 0 0 1 323.053 531.927 cm +1 0 0 1 289.343 601.664 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 326.042 531.728 Td [(spnrmi)]TJ +/F60 9.9626 Tf 292.626 601.465 Td [(x)]TJ/F62 7.5716 Tf 5.105 -1.858 Td [(1)-13(:)]TJ/F60 7.5716 Tf 5.963 0 Td [(m)]TJ/F62 7.5716 Tf 5.985 0 Td [(,1)-13(:)]TJ/F60 7.5716 Tf 7.856 0 Td [(n)]TJ +0 g 0 G +/F60 9.9626 Tf -166.706 -20.955 Td [(l)-55(o)-35(c)]TJ ET q -1 0 0 1 179.842 527.942 cm -[]0 d 0 J 0.398 w 0 0 m 183.819 0 l S +1 0 0 1 163.696 578.851 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 227.467 499.564 Td [(T)92(able)-250(11:)-310(Data)-250(types)]TJ -0 g 0 G -0 g 0 G -0 g 0 G +/F60 9.9626 Tf 166.979 578.652 Td [(x)]TJ/F60 7.5716 Tf 5.147 -1.96 Td [(i)]TJ 0 g 0 G -/F59 9.9626 Tf -127.572 -33.873 Td [(psb_spnrmi\050A,)-525(desc_a,)-525(info\051)]TJ 0 -11.955 Td [(psb_normi\050A,)-525(desc_a,)-525(info\051)]TJ +/F62 9.9626 Tf 7.732 1.96 Td [(is)-250(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)-250(on)-250(pr)18(ocess)]TJ/F60 9.9626 Tf 234.034 0 Td [(i)]TJ/F62 9.9626 Tf 2.964 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F60 9.9626 Tf -266.027 -22.813 Td [(s)-25(c)-40(a)-25(t)-25(t)-25(e)-15(r)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 33.489 0 Td [(is)-250(the)-250(scatter)-250(function.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.926 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(the)-250(global)-250(sparse)-250(matrix)]TJ/F52 9.9626 Tf 194.722 0 Td [(A)]TJ/F54 9.9626 Tf 7.318 0 Td [(.)]TJ -187.096 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 273.363 344.346 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 230.392 532.667 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F59 9.9626 Tf 276.501 344.147 Td [(Tspmat)]TJ +/F60 9.9626 Tf 236.663 524.099 Td [(x)]TJ/F60 7.5716 Tf 5.148 -1.96 Td [(i)]TJ/F62 9.9626 Tf 2.75 1.96 Td [(,)]TJ/F60 9.9626 Tf 4.276 0 Td [(y)]TJ/F59 9.9626 Tf 108.448 0 Td [(Subroutine)]TJ ET q -1 0 0 1 308.511 344.346 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 230.392 520.313 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F59 9.9626 Tf 311.649 344.147 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -232.676 -19.926 Td [(desc)]TJ +/F62 9.9626 Tf 236.369 511.745 Td [(Integer)-8983(psb)]TJ ET q -1 0 0 1 120.408 324.421 cm +1 0 0 1 373.603 511.945 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 324.221 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F62 9.9626 Tf 376.592 511.745 Td [(scatter)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 273.363 276.6 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 373.603 499.989 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 276.501 276.401 Td [(desc)]TJ +/F62 9.9626 Tf 376.592 499.79 Td [(scatter)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 298.05 276.6 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 373.603 488.034 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 301.189 276.401 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -222.215 -19.926 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(Function)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(is)-250(the)-250(in\002nity-norm)-250(of)-250(sparse)-250(submatrix)]TJ/F52 9.9626 Tf 177.627 0 Td [(A)]TJ/F54 9.9626 Tf 7.317 0 Td [(.)]TJ -232.814 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.926 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ -0 g 0 G - 141.968 -54.456 Td [(47)]TJ -0 g 0 G +/F62 9.9626 Tf 376.592 487.835 Td [(scatter)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET - -endstream -endobj -1163 0 obj -<< -/Length 7234 ->> -stream -0 g 0 G -0 g 0 G +q +1 0 0 1 373.603 476.079 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q BT -/F51 11.9552 Tf 150.705 706.129 Td [(4.12)-1000(psb)]TJ +/F62 9.9626 Tf 376.592 475.88 Td [(scatter)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 373.603 464.124 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(spmm)-250(\227)-250(Sparse)-250(Matrix)-250(by)-250(Dense)-250(Matrix)-250(Product)]TJ/F54 9.9626 Tf -57.098 -19.303 Td [(This)-250(subr)18(outine)-250(computes)-250(the)-250(Sparse)-250(Matrix)-250(by)-250(Dense)-250(Matrix)-250(Pr)18(oduct:)]TJ/F52 9.9626 Tf 140.147 -24.611 Td [(y)]TJ/F83 10.3811 Tf 7.998 0 Td [(\040)]TJ/F60 9.9626 Tf 13.397 0 Td [(a)]TJ/F52 9.9626 Tf 6.008 0 Td [(A)-42(x)]TJ/F85 10.3811 Tf 14.878 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -0 g 0 G -/F54 9.9626 Tf 133.513 0 Td [(\0501\051)]TJ +/F62 9.9626 Tf 376.592 463.925 Td [(scatter)]TJ +ET +q +1 0 0 1 230.392 460.139 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +Q 0 g 0 G -/F52 9.9626 Tf -194.745 -20.13 Td [(y)]TJ/F83 10.3811 Tf 7.998 0 Td [(\040)]TJ/F60 9.9626 Tf 13.398 0 Td [(a)]TJ/F52 9.9626 Tf 6.007 0 Td [(A)]TJ/F52 7.5716 Tf 7.511 4.115 Td [(T)]TJ/F52 9.9626 Tf 5.694 -4.115 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ +BT +/F62 9.9626 Tf 278.277 431.76 Td [(T)92(able)-250(20:)-310(Data)-250(types)]TJ 0 g 0 G -/F54 9.9626 Tf 130.715 0 Td [(\0502\051)]TJ 0 g 0 G -/F52 9.9626 Tf -195.482 -20.129 Td [(y)]TJ/F83 10.3811 Tf 7.998 0 Td [(\040)]TJ/F60 9.9626 Tf 13.397 0 Td [(a)]TJ/F52 9.9626 Tf 6.008 0 Td [(A)]TJ/F52 7.5716 Tf 7.7 4.114 Td [(H)]TJ/F52 9.9626 Tf 6.981 -4.114 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -112.628 -28.004 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 129.977 0 Td [(\0503\051)]TJ + [-525(psb_scatter\050glob_x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -317.15 -18.633 Td [(wher)18(e:)]TJ + [-525(loc_x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F52 9.9626 Tf -14.65 -20.451 Td [(x)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 10.186 0 Td [(is)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 115.61 0 Td [(x)]TJ/F54 7.5716 Tf 5.201 -1.494 Td [(:)-12(,)-13(:)]TJ + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F52 9.9626 Tf -131.167 -19.132 Td [(y)]TJ + [-525(root,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 10.087 0 Td [(is)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 115.441 0 Td [(y)]TJ/F54 7.5716 Tf 5.2 -1.494 Td [(:)-13(,)-12(:)]TJ + [-525(mold\051)]TJ 0 g 0 G -/F52 9.9626 Tf -130.23 -19.131 Td [(A)]TJ +/F59 9.9626 Tf -14.944 -24.806 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 12.299 0 Td [(is)-250(the)-250(global)-250(sparse)-250(matrix)]TJ/F52 9.9626 Tf 118.41 0 Td [(A)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G +/F59 9.9626 Tf -29.828 -22.813 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G + 0 -22.813 Td [(glob)]TJ ET q -1 0 0 1 230.392 517.986 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -BT -/F52 9.9626 Tf 236.992 509.418 Td [(A)]TJ/F54 9.9626 Tf 7.318 0 Td [(,)]TJ/F52 9.9626 Tf 5.275 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(,)]TJ/F52 9.9626 Tf 5.106 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(,)]TJ/F60 9.9626 Tf 5.106 0 Td [(a)]TJ/F54 9.9626 Tf 5.385 0 Td [(,)]TJ/F60 9.9626 Tf 5.355 0 Td [(b)]TJ/F51 9.9626 Tf 76.437 0 Td [(Subroutine)]TJ -ET -q -1 0 0 1 230.392 505.633 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -BT -/F54 9.9626 Tf 236.369 497.065 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 373.603 497.264 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 376.592 497.065 Td [(spmm)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 373.603 485.309 cm +1 0 0 1 171.786 333.523 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 376.592 485.11 Td [(spmm)]TJ -140.223 -11.956 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F59 9.9626 Tf 174.774 333.324 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(The)-250(array)-250(that)-250(must)-250(be)-250(scatter)18(ed)-250(into)-250(local)-250(pieces.)]TJ -9.126 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(or)-250(two)-250(array)111(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -22.814 Td [(desc)]TJ ET q -1 0 0 1 373.603 473.354 cm +1 0 0 1 171.218 262.89 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 376.592 473.154 Td [(spmm)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F59 9.9626 Tf 174.207 262.69 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 373.603 461.398 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 360.068 215.069 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 376.592 461.199 Td [(spmm)]TJ +/F67 9.9626 Tf 363.206 214.87 Td [(desc)]TJ ET q -1 0 0 1 230.392 457.413 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +1 0 0 1 384.755 215.069 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 278.277 429.035 Td [(T)92(able)-250(12:)-310(Data)-250(types)]TJ -0 g 0 G -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -107.398 -24.261 Td [(call)]TJ +/F67 9.9626 Tf 387.893 214.87 Td [(type)]TJ 0 g 0 G - [-525(psb_spmm\050alpha,)-525(a,)-525(x,)-525(beta,)-525(y,)-525(desc_a,)-525(info\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -14.944 -11.955 Td [(call)]TJ -0 g 0 G - [-525(psb_spmm\050alpha,)-525(a,)-525(x,)-525(beta,)-525(y,desc_a,)-525(info,)-525(trans,)-525(work\051)]TJ -0 g 0 G -/F51 9.9626 Tf -5.23 -22.618 Td [(T)90(ype:)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F59 9.9626 Tf -258.11 -22.813 Td [(root)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.626 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 23.252 0 Td [(The)-218(pr)18(ocess)-218(that)-218(holds)-219(t)1(he)-219(global)-218(copy)111(.)-299(If)]TJ/F60 9.9626 Tf 179.982 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F93 10.3811 Tf 19.922 0 Td [(=)]TJ/F91 10.3811 Tf 11.086 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1)-218(all)-218(the)-218(pr)18(ocesses)-219(have)]TJ -217.53 -11.956 Td [(a)-250(copy)-250(of)-250(the)-250(global)-250(vector)74(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-258(as:)-327(an)-258(integer)-259(variable)]TJ/F91 10.3811 Tf 142.917 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1)]TJ/F91 10.3811 Tf 8.027 0 Td [(\024)]TJ/F60 9.9626 Tf 11.139 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F91 10.3811 Tf 20.077 0 Td [(\024)]TJ/F60 9.9626 Tf 11.239 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.534 0 Td [(\000)]TJ/F62 9.9626 Tf 10.162 0 Td [(1,)-260(default)]TJ/F67 9.9626 Tf 43.952 0 Td [(psb_root_)]TJ/F62 9.9626 Tf 47.073 0 Td [(,)]TJ -316.314 -11.955 Td [(i.e.)-310(pr)18(ocess)-250(0.)]TJ 0 g 0 G + 141.968 -29.888 Td [(68)]TJ 0 g 0 G - 0 -20.626 Td [(alpha)]TJ -0 g 0 G -/F54 9.9626 Tf 30.436 0 Td [(the)-250(scalar)]TJ/F60 9.9626 Tf 44.368 0 Td [(a)]TJ/F54 9.9626 Tf 5.385 0 Td [(.)]TJ -55.282 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(12)]TJ +ET + +endstream +endobj +1443 0 obj +<< +/Length 3984 +>> +stream 0 g 0 G - [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.626 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(the)-250(sparse)-250(matrix)]TJ/F52 9.9626 Tf 164.964 0 Td [(A)]TJ/F54 9.9626 Tf 7.317 0 Td [(.)]TJ -157.337 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.242 0 Td [(psb)]TJ -ET -q -1 0 0 1 324.173 212.882 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q BT -/F59 9.9626 Tf 327.311 212.682 Td [(Tspmat)]TJ +/F59 9.9626 Tf 99.895 706.129 Td [(mold)]TJ +0 g 0 G +/F62 9.9626 Tf 28.782 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(vector)-250(storage.)]TJ -3.875 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-223(as:)-296(an)-223(object)-223(of)-222(a)-223(class)-223(derived)-223(fr)18(om)]TJ/F67 9.9626 Tf 199.086 0 Td [(psb)]TJ ET q -1 0 0 1 359.321 212.882 cm +1 0 0 1 340.207 658.507 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 362.459 212.682 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -232.675 -20.625 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.742 0 Td [(psb)]TJ +/F67 9.9626 Tf 343.345 658.308 Td [(T)]TJ ET q -1 0 0 1 436.673 144.435 cm +1 0 0 1 349.203 658.507 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 439.811 144.236 Td [(T)]TJ +/F67 9.9626 Tf 352.341 658.308 Td [(base)]TJ ET q -1 0 0 1 445.669 144.435 cm +1 0 0 1 373.89 658.507 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 448.807 144.236 Td [(vect)]TJ +/F67 9.9626 Tf 377.028 658.308 Td [(vect)]TJ ET q -1 0 0 1 470.356 144.435 cm +1 0 0 1 398.577 658.507 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 473.495 144.236 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf -297.884 -11.955 Td [(containing)-278(numbers)-278(of)-279(type)-278(speci\002ed)-278(in)-278(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-278(12)]TJ -0 g 0 G - [(.)-395(The)-278(rank)-279(of)]TJ/F52 9.9626 Tf 275.498 0 Td [(x)]TJ/F54 9.9626 Tf 7.978 0 Td [(must)-278(be)]TJ -283.476 -11.955 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 52.946 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -0 g 0 G - 83.916 -29.888 Td [(48)]TJ -0 g 0 G +/F67 9.9626 Tf 401.716 658.308 Td [(type)]TJ/F62 9.9626 Tf 20.921 0 Td [(;)-232(this)]TJ -297.835 -11.955 Td [(is)-250(only)-250(allowed)-250(when)-250(loc)]TJ ET - -endstream -endobj -1178 0 obj -<< -/Length 6532 ->> -stream -0 g 0 G -0 g 0 G -0 g 0 G +q +1 0 0 1 234.988 646.552 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q BT -/F51 9.9626 Tf 99.895 706.129 Td [(beta)]TJ -0 g 0 G -/F54 9.9626 Tf 24.349 0 Td [(the)-250(scalar)]TJ/F60 9.9626 Tf 44.617 0 Td [(b)]TJ/F54 9.9626 Tf 5.524 0 Td [(.)]TJ -49.583 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(12)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -18.597 Td [(y)]TJ -0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.445 0 Td [(y)]TJ/F54 9.9626 Tf 5.105 0 Td [(.)]TJ -166.164 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +/F62 9.9626 Tf 237.976 646.353 Td [(x)-250(is)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ +/F67 9.9626 Tf 50.53 0 Td [(psb)]TJ ET q -1 0 0 1 385.864 592.09 cm +1 0 0 1 304.825 646.552 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 389.002 591.891 Td [(T)]TJ +/F67 9.9626 Tf 307.963 646.353 Td [(T)]TJ ET q -1 0 0 1 394.86 592.09 cm +1 0 0 1 313.821 646.552 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 397.998 591.891 Td [(vect)]TJ +/F67 9.9626 Tf 316.959 646.353 Td [(vect)]TJ ET q -1 0 0 1 419.547 592.09 cm +1 0 0 1 338.508 646.552 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 422.685 591.891 Td [(type)]TJ +/F67 9.9626 Tf 341.646 646.353 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.883 -11.956 Td [(containing)-280(numbers)-280(of)-280(type)-280(speci\002ed)-280(in)-280(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-280(12)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -262.673 -19.925 Td [(On)-250(Return)]TJ 0 g 0 G - [(.)-400(The)-280(rank)-280(of)]TJ/F52 9.9626 Tf 275.562 0 Td [(y)]TJ/F54 9.9626 Tf 7.895 0 Td [(must)-280(be)]TJ -283.457 -11.955 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 53.115 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -83.228 -18.597 Td [(desc)]TJ + 0 -19.926 Td [(loc)]TJ ET q -1 0 0 1 120.408 549.583 cm +1 0 0 1 113.773 606.702 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 549.383 Td [(a)]TJ +/F59 9.9626 Tf 116.762 606.502 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.664 0 Td [(g)-25(l)-55(o)-35(b)]TJ +ET +q +1 0 0 1 321.043 606.702 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F60 9.9626 Tf 324.326 606.502 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -204.729 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-214(as:)-292(a)-215(rank)-214(one)-214(or)-214(two)-215(ALLOCA)74(T)74(ABLE)-214(array)-214(or)-214(an)-215(object)-214(of)-214(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F67 9.9626 Tf 0 -11.955 Td [(psb)]TJ ET q -1 0 0 1 273.363 501.762 cm +1 0 0 1 141.121 546.926 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 276.501 501.563 Td [(desc)]TJ +/F67 9.9626 Tf 144.259 546.727 Td [(T)]TJ ET q -1 0 0 1 298.05 501.762 cm +1 0 0 1 150.117 546.926 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 301.189 501.563 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -222.215 -18.597 Td [(trans)]TJ -0 g 0 G -/F54 9.9626 Tf 27.666 0 Td [(indicates)-250(what)-250(kind)-250(of)-250(operation)-250(to)-250(perform.)]TJ -0 g 0 G -/F51 9.9626 Tf -2.759 -18.597 Td [(trans)-250(=)-250(N)]TJ -0 g 0 G -/F54 9.9626 Tf 46.983 0 Td [(the)-250(operation)-250(is)-250(speci\002ed)-250(by)-250(equation)]TJ -0 0 1 rg 0 0 1 RG - [-250(1)]TJ -0 g 0 G -0 g 0 G -/F51 9.9626 Tf -46.983 -14.612 Td [(trans)-250(=)-250(T)]TJ -0 g 0 G -/F54 9.9626 Tf 45.33 0 Td [(the)-250(operation)-250(is)-250(speci\002ed)-250(by)-250(equation)]TJ -0 0 1 rg 0 0 1 RG - [-250(2)]TJ -0 g 0 G -0 g 0 G -/F51 9.9626 Tf -45.33 -14.612 Td [(trans)-250(=)-250(C)]TJ -0 g 0 G -/F54 9.9626 Tf 45.878 0 Td [(the)-250(operation)-250(is)-250(speci\002ed)-250(by)-250(equation)]TJ -0 0 1 rg 0 0 1 RG - [-250(3)]TJ -0 g 0 G - -45.878 -18.597 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F52 9.9626 Tf 38.64 0 Td [(t)-15(r)-50(a)-25(n)-25(s)]TJ/F85 10.3811 Tf 25.193 0 Td [(=)]TJ/F52 9.9626 Tf 11.434 0 Td [(N)]TJ/F54 9.9626 Tf -75.267 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(character)-250(variable.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -18.596 Td [(work)]TJ -0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(work)-250(array)111(.)]TJ -3.875 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-270(as:)-351(a)-270(rank)-270(one)-270(array)-271(of)-270(the)-270(same)-270(type)-271(of)]TJ/F52 9.9626 Tf 220.875 0 Td [(x)]TJ/F54 9.9626 Tf 7.898 0 Td [(and)]TJ/F52 9.9626 Tf 19.684 0 Td [(y)]TJ/F54 9.9626 Tf 7.798 0 Td [(with)-270(the)-270(T)74(AR-)]TJ -256.255 -11.955 Td [(GET)-250(attribute.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -18.597 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -18.597 Td [(y)]TJ +/F67 9.9626 Tf 153.255 546.727 Td [(vect)]TJ +ET +q +1 0 0 1 174.804 546.926 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 177.942 546.727 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(matrix)]TJ/F52 9.9626 Tf 144.939 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -135.659 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-379(as:)-568(an)-379(array)-379(of)-379(rank)-379(one)-379(or)-379(two)-379(containing)-379(numbers)-379(of)-379(type)]TJ 0 -11.955 Td [(speci\002ed)-250(in)-250(T)92(able)]TJ +/F62 9.9626 Tf 23.412 0 Td [(containing)-250(numbers)-250(of)-250(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ 0 0 1 rg 0 0 1 RG - [-250(12)]TJ + [-250(20)]TJ 0 g 0 G [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -18.597 Td [(info)]TJ +/F59 9.9626 Tf -101.459 -19.926 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 141.968 -36.529 Td [(49)]TJ + 141.968 -388.543 Td [(69)]TJ 0 g 0 G ET endstream endobj -1185 0 obj +1447 0 obj << -/Length 7154 +/Length 6319 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(4.13)-1000(psb)]TJ +/F59 14.3462 Tf 150.705 706.042 Td [(6)-1000(Data)-250(management)-250(routines)]TJ/F59 11.9552 Tf 0 -24.694 Td [(6.1)-1000(psb)]TJ ET q -1 0 0 1 204.216 706.328 cm +1 0 0 1 198.238 681.547 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(spsm)-250(\227)-250(T)111(riangular)-250(System)-250(Solve)]TJ/F54 9.9626 Tf -57.098 -19.83 Td [(This)-250(subr)18(outine)-250(computes)-250(the)-250(T)90(riangular)-250(System)-250(Solve:)]TJ/F52 9.9626 Tf 122.724 -35.213 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.639 0 Td [(T)]TJ/F83 7.8896 Tf 6.545 4.115 Td [(\000)]TJ/F54 7.5716 Tf 6.228 0 Td [(1)]TJ/F52 9.9626 Tf 4.577 -4.115 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -82.196 -16.139 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.708 0 Td [(D)-48(T)]TJ/F83 7.8896 Tf 14.775 4.114 Td [(\000)]TJ/F54 7.5716 Tf 6.227 0 Td [(1)]TJ/F52 9.9626 Tf 4.578 -4.114 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -90.495 -16.139 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.639 0 Td [(T)]TJ/F83 7.8896 Tf 6.545 4.114 Td [(\000)]TJ/F54 7.5716 Tf 6.228 0 Td [(1)]TJ/F52 9.9626 Tf 4.607 -4.114 Td [(D)-52(x)]TJ/F85 10.3811 Tf 15.536 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -90.495 -16.09 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.639 0 Td [(T)]TJ/F83 7.8896 Tf 6.545 4.114 Td [(\000)]TJ/F52 7.5716 Tf 6.421 0 Td [(T)]TJ/F52 9.9626 Tf 5.694 -4.114 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -83.506 -16.09 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.708 0 Td [(D)-48(T)]TJ/F83 7.8896 Tf 14.775 4.114 Td [(\000)]TJ/F52 7.5716 Tf 6.42 0 Td [(T)]TJ/F52 9.9626 Tf 5.695 -4.114 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -91.805 -16.09 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.639 0 Td [(T)]TJ/F83 7.8896 Tf 6.545 4.114 Td [(\000)]TJ/F52 7.5716 Tf 6.421 0 Td [(T)]TJ/F52 9.9626 Tf 5.724 -4.114 Td [(D)-52(x)]TJ/F85 10.3811 Tf 15.536 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -91.805 -16.091 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.639 0 Td [(T)]TJ/F83 7.8896 Tf 6.545 4.115 Td [(\000)]TJ/F52 7.5716 Tf 6.61 0 Td [(H)]TJ/F52 9.9626 Tf 6.982 -4.115 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.648 0 Td [(y)]TJ -84.982 -16.09 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.708 0 Td [(D)-48(T)]TJ/F83 7.8896 Tf 14.775 4.115 Td [(\000)]TJ/F52 7.5716 Tf 6.61 0 Td [(H)]TJ/F52 9.9626 Tf 6.982 -4.115 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.504 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -93.281 -16.09 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.639 0 Td [(T)]TJ/F83 7.8896 Tf 6.545 4.115 Td [(\000)]TJ/F52 7.5716 Tf 6.61 0 Td [(H)]TJ/F52 9.9626 Tf 7.012 -4.115 Td [(D)-52(x)]TJ/F85 10.3811 Tf 15.536 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.648 0 Td [(y)]TJ/F54 9.9626 Tf -201.061 -38.202 Td [(wher)18(e:)]TJ -0 g 0 G -/F52 9.9626 Tf -14.65 -21.265 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 10.186 0 Td [(is)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 115.61 0 Td [(x)]TJ/F54 7.5716 Tf 5.201 -1.495 Td [(:)-12(,)-13(:)]TJ -0 g 0 G -/F52 9.9626 Tf -131.167 -20.218 Td [(y)]TJ -0 g 0 G -/F54 9.9626 Tf 10.087 0 Td [(is)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 115.441 0 Td [(y)]TJ/F54 7.5716 Tf 5.201 -1.494 Td [(:)-12(,)-13(:)]TJ +/F59 11.9552 Tf 201.825 681.348 Td [(cdall)-250(\227)-250(Allocates)-250(a)-250(communication)-250(descriptor)]TJ 0 g 0 G -/F52 9.9626 Tf -130.599 -20.218 Td [(T)]TJ 0 g 0 G -/F54 9.9626 Tf 11.432 0 Td [(is)-250(the)-250(global)-250(sparse)-250(block)-250(triangular)-250(submatrix)]TJ/F52 9.9626 Tf 206.797 0 Td [(T)]TJ +/F67 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,mg=mg,parts=parts\051)]TJ 0 -11.955 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,vg=vg,[mg=mg,flag=flag]\051)]TJ 0 -11.955 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,vl=vl,[nl=nl,globalcheck=.false.,lidx=lidx]\051)]TJ 0 -11.955 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,nl=nl\051)]TJ 0 -11.956 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,mg=mg,repl=.true.\051)]TJ/F62 9.9626 Tf 14.944 -19.771 Td [(This)-377(subr)18(outine)-378(initializes)-377(the)-378(communication)-377(descriptor)-378(associated)-377(with)]TJ -14.944 -11.956 Td [(an)-271(index)-271(space.)-373(One)-272(o)1(f)-272(the)-271(optional)-271(ar)18(guments)]TJ/F67 9.9626 Tf 209.77 0 Td [(parts)]TJ/F62 9.9626 Tf 26.152 0 Td [(,)]TJ/F67 9.9626 Tf 5.244 0 Td [(vg)]TJ/F62 9.9626 Tf 10.461 0 Td [(,)]TJ/F67 9.9626 Tf 5.244 0 Td [(vl)]TJ/F62 9.9626 Tf 10.461 0 Td [(,)]TJ/F67 9.9626 Tf 5.244 0 Td [(nl)]TJ/F62 9.9626 Tf 13.161 0 Td [(or)]TJ/F67 9.9626 Tf 12.076 0 Td [(repl)]TJ/F62 9.9626 Tf 23.622 0 Td [(must)]TJ -321.435 -11.955 Td [(be)-250(speci\002ed,)-250(ther)18(eby)-250(choosing)-250(the)-250(speci\002c)-250(initialization)-250(strategy)111(.)]TJ 0 g 0 G - -218.159 -21.712 Td [(D)]TJ -0 g 0 G -/F54 9.9626 Tf 12.956 0 Td [(is)-250(the)-250(scaling)-250(diagonal)-250(matrix.)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 6.894 -21.266 Td [(call)]TJ +/F59 9.9626 Tf 0 -18.208 Td [(On)-250(Entry)]TJ 0 g 0 G - [-525(psb_spsm\050alpha,)-525(t,)-525(x,)-525(beta,)-525(y,)-525(desc_a,)-525(info\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -14.944 -11.955 Td [(call)]TJ 0 g 0 G - [-525(psb_spsm\050alpha,)-525(t,)-525(x,)-525(beta,)-525(y,)-525(desc_a,)-525(info,)-525(trans,)-525(unit,)-525(choice,)-525(diag,)-525(work\051)]TJ + 0 -19.067 Td [(T)90(ype:)]TJ 0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G +/F59 9.9626 Tf -29.828 -19.067 Td [(icontxt)]TJ 0 g 0 G -ET -q -1 0 0 1 230.392 339.439 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -BT -/F52 9.9626 Tf 236.623 330.871 Td [(T)]TJ/F54 9.9626 Tf 6.451 0 Td [(,)]TJ/F52 9.9626 Tf 5.275 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(,)]TJ/F52 9.9626 Tf 5.106 0 Td [(y)]TJ/F54 9.9626 Tf 5.105 0 Td [(,)]TJ/F52 9.9626 Tf 5.306 0 Td [(D)]TJ/F54 9.9626 Tf 7.975 0 Td [(,)]TJ/F60 9.9626 Tf 5.105 0 Td [(a)]TJ/F54 9.9626 Tf 5.385 0 Td [(,)]TJ/F60 9.9626 Tf 5.355 0 Td [(b)]TJ/F51 9.9626 Tf 64.393 0 Td [(Subroutine)]TJ -ET -q -1 0 0 1 230.392 327.085 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -BT -/F54 9.9626 Tf 236.369 318.517 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 373.603 318.716 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 376.592 318.517 Td [(spsm)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 373.603 306.761 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 376.592 306.562 Td [(spsm)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ -ET -q -1 0 0 1 373.603 294.806 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 376.592 294.607 Td [(spsm)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ -ET -q -1 0 0 1 373.603 282.851 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 376.592 282.652 Td [(spsm)]TJ -ET -q -1 0 0 1 230.392 278.866 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q +/F62 9.9626 Tf 35.965 0 Td [(the)-250(communication)-250(context.)]TJ -11.058 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 278.277 250.487 Td [(T)92(able)-250(13:)-310(Data)-250(types)]TJ +/F59 9.9626 Tf -24.907 -19.066 Td [(vg)]TJ 0 g 0 G +/F62 9.9626 Tf 16.06 0 Td [(Data)-250(allocation:)-310(each)-250(index)]TJ/F60 9.9626 Tf 121.707 0 Td [(i)]TJ/F91 10.3811 Tf 5.856 0 Td [(2)-290(f)]TJ/F62 9.9626 Tf 15.245 0 Td [(1)-179(.)-192(.)-191(.)]TJ/F60 9.9626 Tf 19.967 0 Td [(m)-47(g)]TJ/F91 10.3811 Tf 13.449 0 Td [(g)]TJ/F62 9.9626 Tf 7.806 0 Td [(is)-250(allocated)-250(to)-250(pr)18(ocess)]TJ/F60 9.9626 Tf 98.454 0 Td [(v)-47(g)]TJ/F93 10.3811 Tf 10.68 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.089 0 Td [(\051)]TJ/F62 9.9626 Tf 4.149 0 Td [(.)]TJ -295.759 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)]TJ 0 g 0 G +/F59 9.9626 Tf -24.907 -19.067 Td [(\003ag)]TJ 0 g 0 G -/F51 9.9626 Tf -127.572 -38.916 Td [(T)90(ype:)]TJ +/F62 9.9626 Tf 21.589 0 Td [(Speci\002es)-250(whether)-250(entries)-250(in)]TJ/F60 9.9626 Tf 123.401 0 Td [(v)-47(g)]TJ/F62 9.9626 Tf 13.046 0 Td [(ar)18(e)-250(zer)18(o-)-250(or)-250(one-based.)]TJ -133.129 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0,)-167(1,)-250(default)-250(0.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F59 9.9626 Tf -24.907 -19.067 Td [(mg)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -21.712 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 19.377 0 Td [(the)-250(\050global\051)-250(number)-250(of)-250(r)18(ows)-250(of)-250(the)-250(pr)18(oblem.)]TJ 5.53 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-262(as:)-335(an)-263(integer)-262(value.)-348(It)-262(is)-262(r)18(equir)18(ed)-263(if)]TJ/F67 9.9626 Tf 203.091 0 Td [(parts)]TJ/F62 9.9626 Tf 28.766 0 Td [(or)]TJ/F67 9.9626 Tf 11.99 0 Td [(repl)]TJ/F62 9.9626 Tf 23.536 0 Td [(is)-262(speci\002ed,)]TJ -267.383 -11.955 Td [(it)-250(is)-250(optional)-250(if)]TJ/F67 9.9626 Tf 66.141 0 Td [(vg)]TJ/F62 9.9626 Tf 12.951 0 Td [(is)-250(speci\002ed.)]TJ 0 g 0 G +/F59 9.9626 Tf -103.999 -19.067 Td [(parts)]TJ 0 g 0 G - 0 -21.713 Td [(alpha)]TJ +/F62 9.9626 Tf 27.666 0 Td [(the)-250(subr)18(outine)-250(that)-250(de\002nes)-250(the)-250(partitioning)-250(scheme.)]TJ -2.759 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(subr)18(outine.)]TJ 0 g 0 G -/F54 9.9626 Tf 30.436 0 Td [(the)-250(scalar)]TJ/F60 9.9626 Tf 44.368 0 Td [(a)]TJ/F54 9.9626 Tf 5.385 0 Td [(.)]TJ -55.282 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(13)]TJ +/F59 9.9626 Tf -24.907 -19.067 Td [(vl)]TJ 0 g 0 G - [(.)]TJ +/F62 9.9626 Tf 13.838 0 Td [(Data)-293(allocation:)-395(the)-293(set)-292(of)-293(global)-293(i)1(ndices)]TJ/F60 9.9626 Tf 181.166 0 Td [(v)-25(l)]TJ/F93 10.3811 Tf 8.548 0 Td [(\050)]TJ/F62 9.9626 Tf 4.149 0 Td [(1)-369(:)]TJ/F60 9.9626 Tf 14.955 0 Td [(n)-25(l)]TJ/F93 10.3811 Tf 9.105 0 Td [(\051)]TJ/F62 9.9626 Tf 7.065 0 Td [(belonging)-293(to)-292(the)-293(calling)]TJ -213.919 -11.955 Td [(pr)18(ocess.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)]TJ 0 g 0 G - 141.967 -29.888 Td [(50)]TJ + 141.967 -29.887 Td [(70)]TJ 0 g 0 G ET endstream endobj -1196 0 obj +1452 0 obj << -/Length 7295 +/Length 6337 >> stream 0 g 0 G 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(t)]TJ +/F59 9.9626 Tf 99.895 706.129 Td [(nl)]TJ 0 g 0 G -/F54 9.9626 Tf 8.299 0 Td [(the)-250(global)-250(portion)-250(of)-250(the)-250(sparse)-250(matrix)]TJ/F52 9.9626 Tf 171.221 0 Td [(T)]TJ/F54 9.9626 Tf 6.451 0 Td [(.)]TJ -161.064 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(type)-250(speci\002ed)-250(in)-250(\247)]TJ -0 0 1 rg 0 0 1 RG - [-250(3)]TJ -0 g 0 G - [(.)]TJ +/F62 9.9626 Tf 14.386 0 Td [(Data)-305(allocation:)-421(in)-305(a)-305(generalized)-305(block-r)18(ow)-305(distribution)-306(the)-305(number)-305(of)-305(in-)]TJ 10.521 -11.955 Td [(dices)-250(belonging)-250(to)-250(the)-250(curr)18(ent)-250(pr)18(ocess.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(May)-250(be)-250(speci\002ed)-250(together)-250(with)]TJ/F67 9.9626 Tf 272.943 0 Td [(vl)]TJ/F62 9.9626 Tf 10.461 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.65 Td [(x)]TJ +/F59 9.9626 Tf -308.311 -20.135 Td [(repl)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ -ET -q -1 0 0 1 385.864 590.037 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 389.002 589.838 Td [(T)]TJ -ET -q -1 0 0 1 394.86 590.037 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 397.998 589.838 Td [(vect)]TJ -ET -q -1 0 0 1 419.547 590.037 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 422.685 589.838 Td [(type)]TJ +/F62 9.9626 Tf 23.243 0 Td [(Data)-288(allocation:)-385(build)-288(a)-288(r)18(eplicated)-287(index)-288(space)-288(\050i.e.)-423(all)-288(pr)18(ocesses)-287(own)-288(all)]TJ 1.664 -11.955 Td [(indices\051.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(the)-250(logical)-250(value)]TJ/F67 9.9626 Tf 132.133 0 Td [(.true.)]TJ 0 g 0 G -/F54 9.9626 Tf -297.883 -11.955 Td [(containing)-278(numbers)-278(of)-279(type)-278(speci\002ed)-278(in)-278(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-278(13)]TJ +/F59 9.9626 Tf -157.04 -20.135 Td [(globalcheck)]TJ 0 g 0 G - [(.)-395(The)-278(rank)-278(of)]TJ/F52 9.9626 Tf 275.498 0 Td [(x)]TJ/F54 9.9626 Tf 7.977 0 Td [(must)-278(be)]TJ -283.475 -11.956 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 52.946 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ +/F62 9.9626 Tf 59.766 0 Td [(Data)-250(allocation:)-310(do)-250(global)-250(checks)-250(on)-250(the)-250(local)-250(index)-250(lists)]TJ/F67 9.9626 Tf 247.788 0 Td [(vl)]TJ/F62 9.9626 Tf -282.647 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(value,)-250(default:)]TJ/F67 9.9626 Tf 162.678 0 Td [(.false.)]TJ 0 g 0 G -/F51 9.9626 Tf -82.959 -20.649 Td [(beta)]TJ +/F59 9.9626 Tf -187.585 -20.135 Td [(lidx)]TJ 0 g 0 G -/F54 9.9626 Tf 24.349 0 Td [(the)-250(scalar)]TJ/F60 9.9626 Tf 44.617 0 Td [(b)]TJ/F54 9.9626 Tf 5.524 0 Td [(.)]TJ -49.583 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(13)]TJ +/F62 9.9626 Tf 22.685 0 Td [(Data)-308(allocation:)-425(the)-307(set)-308(of)-307(local)-308(indices)]TJ/F60 9.9626 Tf 175.731 0 Td [(l)-48(i)-32(d)-42(x)]TJ/F93 10.3811 Tf 17.065 0 Td [(\050)]TJ/F62 9.9626 Tf 4.15 0 Td [(1)-397(:)]TJ/F60 9.9626 Tf 15.505 0 Td [(n)-25(l)]TJ/F93 10.3811 Tf 9.105 0 Td [(\051)]TJ/F62 9.9626 Tf 7.214 0 Td [(to)-308(be)-307(assigned)-308(to)-307(the)]TJ -226.548 -11.955 Td [(global)-250(indices)]TJ/F60 9.9626 Tf 63.476 0 Td [(v)-25(l)]TJ/F62 9.9626 Tf 8.423 0 Td [(.)]TJ -71.899 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)]TJ 0 g 0 G - [(.)]TJ +/F59 9.9626 Tf -24.907 -22.127 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.65 Td [(y)]TJ 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.445 0 Td [(y)]TJ/F54 9.9626 Tf 5.105 0 Td [(.)]TJ -166.164 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ + 0 -20.135 Td [(desc)]TJ ET q -1 0 0 1 385.864 429.186 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 120.408 376.512 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 389.002 428.986 Td [(T)]TJ +/F59 9.9626 Tf 123.397 376.313 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 394.86 429.186 cm +1 0 0 1 309.258 328.692 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 397.998 428.986 Td [(vect)]TJ +/F67 9.9626 Tf 312.397 328.492 Td [(desc)]TJ ET q -1 0 0 1 419.547 429.186 cm +1 0 0 1 333.945 328.692 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 422.685 428.986 Td [(type)]TJ +/F67 9.9626 Tf 337.084 328.492 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.883 -11.955 Td [(containing)-280(numbers)-280(of)-280(type)-280(speci\002ed)-280(in)-280(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-280(13)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - [(.)-400(The)-280(rank)-280(of)]TJ/F52 9.9626 Tf 275.562 0 Td [(y)]TJ/F54 9.9626 Tf 7.895 0 Td [(must)-280(be)]TJ -283.457 -11.955 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 53.115 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(.)]TJ +/F59 9.9626 Tf -258.11 -20.135 Td [(info)]TJ 0 g 0 G -/F51 9.9626 Tf -83.228 -20.65 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 384.625 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 384.426 Td [(a)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -22.128 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ -ET -q -1 0 0 1 273.363 336.805 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 276.501 336.605 Td [(desc)]TJ -ET -q -1 0 0 1 298.05 336.805 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 301.189 336.605 Td [(type)]TJ +/F62 9.9626 Tf 12.454 -20.082 Td [(1.)]TJ +0 g 0 G + [-500(One)-309(of)-310(the)-309(optional)-310(ar)18(guments)]TJ/F67 9.9626 Tf 152.661 0 Td [(parts)]TJ/F62 9.9626 Tf 26.152 0 Td [(,)]TJ/F67 9.9626 Tf 5.723 0 Td [(vg)]TJ/F62 9.9626 Tf 10.46 0 Td [(,)]TJ/F67 9.9626 Tf 5.723 0 Td [(vl)]TJ/F62 9.9626 Tf 10.46 0 Td [(,)]TJ/F67 9.9626 Tf 5.723 0 Td [(nl)]TJ/F62 9.9626 Tf 13.544 0 Td [(or)]TJ/F67 9.9626 Tf 12.458 0 Td [(repl)]TJ/F62 9.9626 Tf 24.005 0 Td [(must)-310(be)-309(speci-)]TJ -254.456 -11.956 Td [(\002ed,)-250(ther)18(eby)-250(choosing)-250(the)-250(initialization)-250(strategy)-250(as)-250(follows:)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -20.135 Td [(parts)]TJ +0 g 0 G +/F62 9.9626 Tf 27.666 0 Td [(In)-242(this)-242(case)-242(we)-243(have)-242(a)-242(subr)18(outine)-242(specifying)-242(the)-242(mapping)-242(between)]TJ -5.748 -11.955 Td [(global)-312(indices)-312(and)-311(pr)18(ocess/local)-312(index)-312(pairs.)-496(If)-311(this)-312(optional)-312(ar)18(gu-)]TJ 0 -11.955 Td [(ment)-230(is)-230(speci\002ed,)-234(then)-230(it)-230(is)-230(mandatory)-230(to)-230(specify)-230(the)-230(ar)18(gument)]TJ/F67 9.9626 Tf 274.929 0 Td [(mg)]TJ/F62 9.9626 Tf 12.752 0 Td [(as)]TJ -287.681 -11.955 Td [(well.)-310(The)-250(subr)18(outine)-250(must)-250(conform)-250(to)-250(the)-250(following)-250(interface:)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -222.215 -20.649 Td [(trans)]TJ +/F67 9.9626 Tf 10.46 -18.09 Td [(interface)]TJ 15.691 -11.955 Td [(subroutine)-525(psb_parts\050glob_index,mg,np,pv,nv\051)]TJ 0 g 0 G -/F54 9.9626 Tf 27.666 0 Td [(specify)-250(with)]TJ/F52 9.9626 Tf 56.398 0 Td [(unitd)]TJ/F54 9.9626 Tf 24.637 0 Td [(the)-250(operation)-250(to)-250(perform.)]TJ +/F62 9.9626 Tf 93.899 -29.888 Td [(71)]TJ 0 g 0 G -/F51 9.9626 Tf -83.794 -20.65 Td [(trans)-250(=)-250('N')]TJ +ET + +endstream +endobj +1458 0 obj +<< +/Length 9985 +>> +stream 0 g 0 G -/F54 9.9626 Tf 52.522 0 Td [(the)-250(operation)-250(is)-250(with)-250(no)-250(transposed)-250(matrix)]TJ 0 g 0 G -/F51 9.9626 Tf -52.522 -16.303 Td [(trans)-250(=)-250('T')]TJ +BT +/F67 9.9626 Tf 234.142 706.129 Td [(integer,)-525(intent)-525(\050in\051)-1050(::)-525(glob_index,np,mg)]TJ 0 -11.955 Td [(integer,)-525(intent)-525(\050out\051)-525(::)-525(nv,)-525(pv\050*\051)]TJ -10.461 -11.955 Td [(end)-525(subroutine)-525(psb_parts)]TJ -15.691 -11.956 Td [(end)-525(interface)]TJ/F62 9.9626 Tf -10.461 -17.586 Td [(The)-250(input)-250(ar)18(guments)-250(ar)18(e:)]TJ 0 g 0 G -/F54 9.9626 Tf 50.869 0 Td [(the)-250(operation)-250(is)-250(with)-250(transposed)-250(matrix.)]TJ +/F59 9.9626 Tf 0 -15.594 Td [(glob)]TJ +ET +q +1 0 0 1 218.61 637.283 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 221.599 637.083 Td [(index)]TJ 0 g 0 G -/F51 9.9626 Tf -50.869 -16.302 Td [(trans)-250(=)-250('C')]TJ +/F62 9.9626 Tf 30.436 0 Td [(The)-250(global)-250(index)-250(to)-250(be)-250(mapped;)]TJ 0 g 0 G -/F54 9.9626 Tf 51.417 0 Td [(the)-250(operation)-250(is)-250(with)-250(conjugate)-250(transposed)-250(matrix.)]TJ -51.417 -20.65 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Default:)]TJ/F52 9.9626 Tf 38.64 0 Td [(t)-15(r)-50(a)-25(n)-25(s)]TJ/F85 10.3811 Tf 25.193 0 Td [(=)]TJ/F52 9.9626 Tf 11.434 0 Td [(N)]TJ/F54 9.9626 Tf -75.267 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(character)-250(variable.)]TJ +/F59 9.9626 Tf -54.506 -13.774 Td [(np)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.65 Td [(unitd)]TJ +/F62 9.9626 Tf 17.156 0 Td [(The)-250(number)-250(of)-250(pr)18(ocesses)-250(in)-250(the)-250(mapping;)]TJ 0 g 0 G -/F54 9.9626 Tf 29.878 0 Td [(specify)-250(with)]TJ/F52 9.9626 Tf 56.398 0 Td [(trans)]TJ/F54 9.9626 Tf 23.521 0 Td [(the)-250(operation)-250(to)-250(perform.)]TJ +/F59 9.9626 Tf -17.156 -13.774 Td [(mg)]TJ 0 g 0 G -/F51 9.9626 Tf -84.89 -20.649 Td [(unitd)-250(=)-250('U')]TJ +/F62 9.9626 Tf 19.377 0 Td [(The)-250(total)-250(number)-250(of)-250(global)-250(r)18(ows)-250(in)-250(the)-250(mapping;)]TJ -19.377 -15.594 Td [(The)-250(output)-250(ar)18(guments)-250(ar)18(e:)]TJ 0 g 0 G -/F54 9.9626 Tf 54.186 0 Td [(the)-250(operation)-250(is)-250(with)-250(no)-250(scaling)]TJ +/F59 9.9626 Tf 0 -15.594 Td [(nv)]TJ 0 g 0 G -/F51 9.9626 Tf -54.186 -16.303 Td [(unitd)-250(=)-250('L)74(')]TJ +/F62 9.9626 Tf 16.608 0 Td [(The)-250(number)-250(of)-250(entries)-250(in)]TJ/F67 9.9626 Tf 111.052 0 Td [(pv)]TJ/F62 9.9626 Tf 10.461 0 Td [(;)]TJ 0 g 0 G -/F54 9.9626 Tf 51.785 0 Td [(the)-250(operation)-250(is)-250(with)-250(left)-250(scaling)]TJ +/F59 9.9626 Tf -138.121 -13.774 Td [(pv)]TJ 0 g 0 G -/F51 9.9626 Tf -51.785 -16.302 Td [(unitd)-250(=)-250('R')]TJ +/F62 9.9626 Tf 16.608 0 Td [(A)-393(vector)-394(containing)-393(the)-394(indices)-393(of)-394(the)-394(pr)18(ocesses)-393(to)-394(which)-393(the)]TJ 2.022 -11.955 Td [(global)-357(index)-357(should)-356(be)-357(assigend;)-410(each)-357(entry)-357(must)-357(satisfy)-357(0)]TJ/F91 10.3811 Tf 270.063 0 Td [(\024)]TJ/F60 9.9626 Tf -269.39 -11.956 Td [(p)-25(v)]TJ/F93 10.3811 Tf 10.461 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F69 10.3811 Tf 8.665 0 Td [(<)]TJ/F60 9.9626 Tf 12.71 0 Td [(n)-80(p)]TJ/F62 9.9626 Tf 11.442 0 Td [(;)-382(if)]TJ/F60 9.9626 Tf 16.006 0 Td [(n)-25(v)]TJ/F69 10.3811 Tf 15.409 0 Td [(>)]TJ/F62 9.9626 Tf 12.586 0 Td [(1)-338(we)-338(have)-338(an)-338(index)-338(assigned)-338(to)-338(multiple)]TJ -95.244 -11.955 Td [(pr)18(ocesses,)-250(i.e.)-310(we)-250(have)-250(an)-250(overlap)-250(among)-250(the)-250(subdomains.)]TJ 0 g 0 G -/F54 9.9626 Tf 53.628 0 Td [(the)-250(operation)-250(is)-250(with)-250(right)-250(scaling.)]TJ +/F59 9.9626 Tf -40.548 -15.594 Td [(vg)]TJ 0 g 0 G - 88.34 -29.888 Td [(51)]TJ +/F62 9.9626 Tf 16.06 0 Td [(In)-330(this)-330(case)-330(the)-330(association)-330(between)-331(an)-330(index)-330(and)-330(a)-330(pr)18(ocess)-330(is)-330(spec-)]TJ 5.858 -11.955 Td [(i\002ed)-371(via)-372(an)-371(integer)-372(vector)]TJ/F67 9.9626 Tf 120.986 0 Td [(vg\0501:mg\051)]TJ/F62 9.9626 Tf 41.842 0 Td [(;)-432(each)-372(index)]TJ/F60 9.9626 Tf 58.923 0 Td [(i)]TJ/F91 10.3811 Tf 8.096 0 Td [(2)-506(f)]TJ/F62 9.9626 Tf 17.485 0 Td [(1)-179(.)-192(.)-192(.)]TJ/F60 9.9626 Tf 19.967 0 Td [(m)-47(g)]TJ/F91 10.3811 Tf 13.449 0 Td [(g)]TJ/F62 9.9626 Tf 9.016 0 Td [(is)]TJ -289.764 -11.955 Td [(assigned)-381(to)-381(pr)18(ocess)]TJ/F60 9.9626 Tf 91.547 0 Td [(v)-47(g)]TJ/F93 10.3811 Tf 10.68 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.089 0 Td [(\051)]TJ/F62 9.9626 Tf 4.149 0 Td [(.)-703(The)-380(vector)]TJ/F67 9.9626 Tf 61.203 0 Td [(vg)]TJ/F62 9.9626 Tf 14.255 0 Td [(must)-381(be)-381(identical)-381(on)-380(all)]TJ -189.127 -11.955 Td [(calling)-354(pr)18(ocesses;)-406(its)-355(entri)1(es)-355(may)-354(have)-354(the)-354(ranges)]TJ/F93 10.3811 Tf 226.209 0 Td [(\050)]TJ/F62 9.9626 Tf 4.149 0 Td [(0)-179(.)-192(.)-191(.)]TJ/F60 9.9626 Tf 19.967 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.888 0 Td [(\000)]TJ/F62 9.9626 Tf 10.515 0 Td [(1)]TJ/F93 10.3811 Tf 5.106 0 Td [(\051)]TJ/F62 9.9626 Tf 7.678 0 Td [(or)]TJ/F93 10.3811 Tf -287.387 -11.955 Td [(\050)]TJ/F62 9.9626 Tf 4.149 0 Td [(1)-179(.)-192(.)-191(.)]TJ/F60 9.9626 Tf 19.967 0 Td [(n)-80(p)]TJ/F93 10.3811 Tf 11.566 0 Td [(\051)]TJ/F62 9.9626 Tf 6.984 0 Td [(accor)18(ding)-284(to)-285(the)-284(value)-285(of)]TJ/F67 9.9626 Tf 113.068 0 Td [(flag)]TJ/F62 9.9626 Tf 20.922 0 Td [(.)-413(The)-285(size)]TJ/F60 9.9626 Tf 45.955 0 Td [(m)-47(g)]TJ/F62 9.9626 Tf 16.159 0 Td [(may)-285(be)-284(spec-)]TJ -238.895 -11.955 Td [(i\002ed)-349(via)-349(the)-349(optional)-349(ar)18(gument)]TJ/F67 9.9626 Tf 144.092 0 Td [(mg)]TJ/F62 9.9626 Tf 10.46 0 Td [(;)-399(the)-349(default)-349(is)-349(to)-349(use)-349(the)-349(entir)18(e)]TJ -154.552 -11.956 Td [(vector)]TJ/F67 9.9626 Tf 29.937 0 Td [(vg)]TJ/F62 9.9626 Tf 10.461 0 Td [(,)-250(thus)-250(having)]TJ/F67 9.9626 Tf 59.885 0 Td [(mg=size\050vg\051)]TJ/F62 9.9626 Tf 57.534 0 Td [(.)]TJ 0 g 0 G -ET - -endstream -endobj -1202 0 obj -<< -/Length 4541 ->> -stream +/F59 9.9626 Tf -179.735 -15.593 Td [(vl)]TJ 0 g 0 G +/F62 9.9626 Tf 13.838 0 Td [(In)-383(this)-382(case)-383(we)-383(ar)18(e)-382(specifying)-383(the)-383(list)-382(of)-383(indices)]TJ/F67 9.9626 Tf 220.787 0 Td [(vl\0501:nl\051)]TJ/F62 9.9626 Tf 45.655 0 Td [(assigned)]TJ -258.362 -11.955 Td [(to)-401(the)-400(curr)18(ent)-401(pr)18(ocess;)-476(thus,)-438(the)-400(global)-401(pr)18(oblem)-400(size)]TJ/F60 9.9626 Tf 243.82 0 Td [(m)-47(g)]TJ/F62 9.9626 Tf 17.316 0 Td [(is)-401(given)]TJ -261.136 -11.956 Td [(by)-435(the)-435(range)-435(of)-435(the)-435(aggr)18(egate)-435(of)-435(the)-435(individual)-435(vectors)]TJ/F67 9.9626 Tf 259.368 0 Td [(vl)]TJ/F62 9.9626 Tf 14.794 0 Td [(spec-)]TJ -274.162 -11.955 Td [(i\002ed)-429(in)-429(the)-429(calling)-428(pr)18(ocesses.)-847(The)-429(size)-429(may)-429(be)-428(speci\002ed)-429(via)-429(the)]TJ 0 -11.955 Td [(optional)-438(ar)18(gument)]TJ/F67 9.9626 Tf 88.319 0 Td [(nl)]TJ/F62 9.9626 Tf 10.46 0 Td [(;)-532(the)-438(default)-438(is)-438(to)-438(use)-438(the)-438(entir)18(e)-438(vector)]TJ/F67 9.9626 Tf 185.156 0 Td [(vl)]TJ/F62 9.9626 Tf 10.461 0 Td [(,)]TJ -294.396 -11.955 Td [(thus)-364(having)]TJ/F67 9.9626 Tf 57.178 0 Td [(nl=size\050vl\051)]TJ/F62 9.9626 Tf 57.534 0 Td [(.)-652(If)]TJ/F67 9.9626 Tf 19.294 0 Td [(globalcheck=.true.)]TJ/F62 9.9626 Tf 97.774 0 Td [(the)-364(subr)18(outine)]TJ -231.78 -11.955 Td [(will)-403(check)-403(how)-404(many)-403(times)-403(each)-403(entry)-403(in)-404(the)-403(global)-403(index)-403(space)]TJ/F93 10.3811 Tf 0.125 -11.955 Td [(\050)]TJ/F62 9.9626 Tf 4.149 0 Td [(1)-179(.)-192(.)-191(.)]TJ/F60 9.9626 Tf 19.967 0 Td [(m)-47(g)]TJ/F93 10.3811 Tf 13.449 0 Td [(\051)]TJ/F62 9.9626 Tf 6.245 0 Td [(is)-210(speci\002ed)-211(in)-210(the)-210(input)-210(lists)]TJ/F67 9.9626 Tf 122.836 0 Td [(vl)]TJ/F62 9.9626 Tf 10.461 0 Td [(,)-218(thus)-211(allowin)1(g)-211(for)-210(the)-210(pr)18(es-)]TJ -177.232 -11.956 Td [(ence)-302(of)-302(overlap)-302(in)-302(the)-302(input,)-315(and)-302(checki)1(ng)-302(for)-302(\223orphan\224)-302(indices.)-466(If)]TJ/F67 9.9626 Tf 0 -11.955 Td [(globalcheck=.false.)]TJ/F62 9.9626 Tf 99.377 0 Td [(,)-437(the)-400(subr)18(outine)-400(will)-400(not)-400(check)-400(for)-400(overlap,)]TJ -99.377 -11.955 Td [(and)-255(may)-255(be)-255(signi\002cantly)-255(faster)74(,)-257(but)-255(the)-255(user)-255(is)-255(implicitly)-255(guarantee-)]TJ 0 -11.955 Td [(ing)-250(that)-250(ther)18(e)-250(ar)18(e)-250(neither)-250(orphan)-250(nor)-250(overlap)-250(indices.)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 175.611 706.129 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Default:)]TJ/F52 9.9626 Tf 38.64 0 Td [(u)-25(n)-18(i)-32(t)-25(d)]TJ/F85 10.3811 Tf 26.159 0 Td [(=)]TJ/F52 9.9626 Tf 10.927 0 Td [(U)]TJ/F54 9.9626 Tf -75.726 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(character)-250(variable.)]TJ +/F59 9.9626 Tf -21.918 -15.594 Td [(lidx)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(choice)]TJ +/F62 9.9626 Tf 22.685 0 Td [(The)-377(optional)-376(ar)18(gument)]TJ/F67 9.9626 Tf 107.528 0 Td [(lidx)]TJ/F62 9.9626 Tf 24.674 0 Td [(is)-377(available)-376(for)-377(those)-377(cases)-377(in)-376(which)]TJ -132.969 -11.955 Td [(the)-361(user)-361(has)-361(alr)18(eady)-361(established)-361(a)-361(global-to-local)-361(mapping;)-416(if)-361(it)-361(is)]TJ 0 -11.955 Td [(speci\002ed,)-253(each)-253(index)-253(in)]TJ/F67 9.9626 Tf 105.128 0 Td [(vl\050i\051)]TJ/F62 9.9626 Tf 28.669 0 Td [(will)-253(be)-252(mapped)-253(to)-253(the)-252(corr)18(esponding)]TJ -133.797 -11.955 Td [(local)-317(index)]TJ/F67 9.9626 Tf 51.649 0 Td [(lidx\050i\051)]TJ/F62 9.9626 Tf 36.612 0 Td [(.)-510(When)-317(specifying)-316(the)-317(ar)18(gument)]TJ/F67 9.9626 Tf 148.638 0 Td [(lidx)]TJ/F62 9.9626 Tf 24.076 0 Td [(the)-317(user)]TJ -260.975 -11.956 Td [(would)-329(also)-330(likely)-329(employ)]TJ/F67 9.9626 Tf 117.394 0 Td [(lidx)]TJ/F62 9.9626 Tf 24.203 0 Td [(in)-329(calls)-330(to)]TJ/F67 9.9626 Tf 46.656 0 Td [(psb_cdins)]TJ/F62 9.9626 Tf 50.355 0 Td [(and)]TJ/F67 9.9626 Tf 20.148 0 Td [(local)]TJ/F62 9.9626 Tf 29.433 0 Td [(in)]TJ -288.189 -11.955 Td [(calls)-250(to)]TJ/F67 9.9626 Tf 33.095 0 Td [(psb_spins)]TJ/F62 9.9626 Tf 49.564 0 Td [(and)]TJ/F67 9.9626 Tf 19.358 0 Td [(psb_geins)]TJ/F62 9.9626 Tf 47.073 0 Td [(;)-250(see)-250(also)-250(sec.)]TJ +0 0 1 rg 0 0 1 RG + [-250(2.3.1)]TJ 0 g 0 G -/F54 9.9626 Tf 33.753 0 Td [(speci\002es)-250(the)-250(update)-250(of)-250(overlap)-250(elements)-250(to)-250(be)-250(performed)-250(on)-250(exit:)]TJ + [(.)]TJ 0 g 0 G +/F59 9.9626 Tf -171.008 -15.593 Td [(nl)]TJ 0 g 0 G -/F59 9.9626 Tf -3.865 -19.925 Td [(psb_none_)]TJ +/F62 9.9626 Tf 14.386 0 Td [(If)-411(this)-411(ar)18(gument)-411(is)-411(speci\002ed)-411(alone)-411(\050i.e.)-793(without)]TJ/F67 9.9626 Tf 223.432 0 Td [(vl)]TJ/F62 9.9626 Tf 10.461 0 Td [(\051)-411(the)-411(r)18(esult)-411(is)-411(a)]TJ -226.361 -11.956 Td [(generalized)-280(r)18(ow-block)-280(distribution)-280(in)-280(which)-281(each)-280(pr)18(ocess)]TJ/F60 9.9626 Tf 257.148 0 Td [(I)]TJ/F62 9.9626 Tf 6.642 0 Td [(gets)-280(as-)]TJ -263.79 -11.955 Td [(signed)-250(a)-250(consecutive)-250(chunk)-250(of)]TJ/F60 9.9626 Tf 135.186 0 Td [(N)]TJ/F60 7.5716 Tf 7.851 -1.808 Td [(I)]TJ/F93 10.3811 Tf 6.316 1.808 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-25(l)]TJ/F62 9.9626 Tf 11.472 0 Td [(global)-250(indices.)]TJ 0 g 0 G +/F59 9.9626 Tf -193.829 -15.593 Td [(repl)]TJ 0 g 0 G - 0 -15.941 Td [(psb_sum_)]TJ +/F62 9.9626 Tf 23.243 0 Td [(This)-418(ar)18(guments)-417(speci\002es)-418(to)-417(r)18(eplicate)-418(all)-417(indices)-418(on)-418(all)-417(pr)18(ocesses.)]TJ -1.325 -11.956 Td [(This)-366(is)-367(a)-366(special)-366(purpose)-366(data)-367(allocation)-366(that)-366(is)-366(useful)-367(in)-366(the)-366(con-)]TJ 0 -11.955 Td [(str)8(uction)-250(of)-250(some)-250(multilevel)-250(pr)18(econditioners.)]TJ 0 g 0 G + -34.371 -19.579 Td [(2.)]TJ 0 g 0 G - 0 -15.94 Td [(psb_avg_)]TJ + [-500(On)-250(exit)-250(fr)18(om)-250(this)-250(r)18(outine)-250(the)-250(descriptor)-250(is)-250(in)-250(the)-250(build)-250(state.)]TJ 0 g 0 G + 154.421 -29.888 Td [(72)]TJ 0 g 0 G - 0 -15.94 Td [(psb_square_root_)]TJ/F54 9.9626 Tf -4.982 -19.925 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F59 9.9626 Tf 38.515 0 Td [(psb_avg_)]TJ/F54 9.9626 Tf -38.515 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +ET + +endstream +endobj +1464 0 obj +<< +/Length 2555 +>> +stream 0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(diag)]TJ 0 g 0 G -/F54 9.9626 Tf 24.906 0 Td [(the)-250(diagonal)-250(scaling)-250(matrix.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F52 9.9626 Tf 38.64 0 Td [(d)-18(i)-47(a)-47(g)]TJ/F85 10.3811 Tf 18.52 0 Td [(\050)]TJ/F54 9.9626 Tf 4.15 0 Td [(1)]TJ/F85 10.3811 Tf 5.106 0 Td [(\051)-289(=)]TJ/F54 9.9626 Tf 18.002 0 Td [(1)]TJ/F85 10.3811 Tf 5.106 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(n)-25(o)-35(s)-25(c)-40(a)-25(l)-48(i)-32(n)-47(g)]TJ/F85 10.3811 Tf 41.384 0 Td [(\051)]TJ/F54 9.9626 Tf -135.182 -11.955 Td [(Speci\002ed)-293(as:)-395(a)-293(rank)-293(one)-293(array)-292(containing)-293(numbers)-293(of)-293(the)-292(type)-293(indicated)]TJ 0 -11.955 Td [(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(13)]TJ 0 g 0 G - [(.)]TJ +BT +/F62 9.9626 Tf 112.349 706.129 Td [(3.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.926 Td [(work)]TJ + [-500(Calling)-260(the)-260(r)18(outine)-260(with)]TJ/F67 9.9626 Tf 120.886 0 Td [(vg)]TJ/F62 9.9626 Tf 13.052 0 Td [(or)]TJ/F67 9.9626 Tf 11.965 0 Td [(parts)]TJ/F62 9.9626 Tf 28.742 0 Td [(implies)-260(that)-260(every)-260(pr)18(ocess)-260(will)-260(scan)]TJ -162.192 -11.955 Td [(the)-250(entir)18(e)-250(index)-250(space)-250(to)-250(\002gur)18(e)-250(out)-250(the)-250(local)-250(indices.)]TJ 0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(a)-250(work)-250(array)111(.)]TJ -3.876 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-344(as:)-498(a)-344(rank)-343(one)-344(array)-344(of)-344(the)-344(same)-344(type)-344(of)]TJ/F52 9.9626 Tf 229.679 0 Td [(x)]TJ/F54 9.9626 Tf 8.631 0 Td [(with)-344(the)-344(T)74(ARGET)]TJ -238.31 -11.955 Td [(attribute.)]TJ + -12.453 -19.926 Td [(4.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.926 Td [(On)-250(Return)]TJ + [-500(Overlapped)-250(indices)-250(ar)18(e)-250(possible)-250(with)-250(both)]TJ/F67 9.9626 Tf 201.093 0 Td [(parts)]TJ/F62 9.9626 Tf 28.643 0 Td [(and)]TJ/F67 9.9626 Tf 19.357 0 Td [(vl)]TJ/F62 9.9626 Tf 12.951 0 Td [(invocations.)]TJ 0 g 0 G + -262.044 -19.925 Td [(5.)]TJ 0 g 0 G - 0 -19.925 Td [(y)]TJ + [-500(When)-190(the)-190(subr)18(outine)-190(is)-190(invoked)-190(with)]TJ/F67 9.9626 Tf 175.161 0 Td [(vl)]TJ/F62 9.9626 Tf 12.354 0 Td [(in)-190(conjunction)-190(with)]TJ/F67 9.9626 Tf 86.235 0 Td [(globalcheck=.true.)]TJ/F62 9.9626 Tf 94.146 0 Td [(,)]TJ -355.443 -11.955 Td [(it)-280(will)-281(perform)-280(a)-280(scan)-281(of)-280(the)-281(index)-280(space)-280(to)-281(sear)18(ch)-280(for)-280(overlap)-281(or)-280(orphan)]TJ 0 -11.955 Td [(indices.)]TJ 0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.445 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -166.165 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-379(as:)-568(an)-379(array)-379(of)-379(rank)-379(one)-379(or)-379(two)-379(containing)-379(numbers)-379(of)-379(type)]TJ 0 -11.955 Td [(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(13)]TJ + -12.453 -19.925 Td [(6.)]TJ 0 g 0 G - [(.)]TJ + [-500(When)-190(the)-190(subr)18(outine)-190(is)-190(invoked)-190(with)]TJ/F67 9.9626 Tf 175.161 0 Td [(vl)]TJ/F62 9.9626 Tf 12.354 0 Td [(in)-190(conjunction)-190(with)]TJ/F67 9.9626 Tf 86.235 0 Td [(globalcheck=.false.)]TJ/F62 9.9626 Tf 99.377 0 Td [(,)]TJ -360.674 -11.956 Td [(no)-338(index)-337(space)-338(scan)-337(will)-338(take)-337(place.)-573(Thus)-337(it)-338(is)-338(the)-337(r)18(esponsibility)-338(of)-337(the)]TJ 0 -11.955 Td [(user)-328(to)-328(make)-328(sur)18(e)-328(that)-328(the)-328(indices)-328(speci\002ed)-328(in)]TJ/F67 9.9626 Tf 209.973 0 Td [(vl)]TJ/F62 9.9626 Tf 13.729 0 Td [(have)-328(neither)-328(orphans)]TJ -223.702 -11.955 Td [(nor)-250(overlaps;)-250(if)-250(this)-250(assumption)-250(fails,)-250(r)18(esults)-250(will)-250(be)-250(unpr)18(edictable.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(info)]TJ + -12.453 -19.925 Td [(7.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.943 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + [-500(Orphan)-417(and)-416(overlap)-417(indices)-416(ar)18(e)-417(impossible)-416(by)-417(constr)8(uction)-417(when)-416(the)]TJ 12.453 -11.955 Td [(subr)18(outine)-250(is)-250(invoked)-250(with)]TJ/F67 9.9626 Tf 121.164 0 Td [(nl)]TJ/F62 9.9626 Tf 12.951 0 Td [(\050alone\051,)-250(or)]TJ/F67 9.9626 Tf 47.372 0 Td [(vg)]TJ/F62 9.9626 Tf 10.461 0 Td [(.)]TJ 0 g 0 G - 141.968 -73.723 Td [(52)]TJ + -49.98 -452.304 Td [(73)]TJ 0 g 0 G ET endstream endobj -1213 0 obj +1475 0 obj << -/Length 7400 +/Length 7006 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(4.14)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(6.2)-1000(psb)]TJ ET q -1 0 0 1 153.407 706.328 cm +1 0 0 1 198.238 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 156.993 706.129 Td [(gemlt)-250(\227)-250(Entrywise)-250(Product)]TJ/F54 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(entrywise)-250(pr)18(oduct)-250(between)-250(two)-250(vectors)]TJ/F52 9.9626 Tf 299.677 0 Td [(x)]TJ/F54 9.9626 Tf 7.697 0 Td [(and)]TJ/F52 9.9626 Tf 19.481 0 Td [(y)]TJ -187.918 -21.112 Td [(d)-25(o)-35(t)]TJ/F83 10.3811 Tf 16.336 0 Td [(\040)]TJ/F52 9.9626 Tf 13.567 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F52 9.9626 Tf 4.274 0 Td [(y)]TJ/F85 10.3811 Tf 5.231 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(.)]TJ/F59 9.9626 Tf -187.465 -21.111 Td [(psb_gemlt\050x,)-525(y,)-525(desc_a,)-525(info\051)]TJ +/F59 11.9552 Tf 201.825 706.129 Td [(cdins)-250(\227)-250(Communication)-250(descriptor)-250(insert)-250(routine)]TJ 0 g 0 G 0 g 0 G +/F67 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_cdins\050nz,)-525(ia,)-525(ja,)-525(desc_a,)-525(info)-525([,ila,jla]\051)]TJ 0 -11.955 Td [(call)-525(psb_cdins\050nz,ja,desc,info[,jla,mask,lidx]\051)]TJ/F62 9.9626 Tf 14.944 -20.366 Td [(This)-336(subr)18(outine)-335(examines)-336(the)-336(edges)-335(of)-336(the)-336(graph)-335(associated)-336(with)-335(the)-336(dis-)]TJ -14.944 -11.955 Td [(cr)18(etization)-260(mesh)-261(\050and)-260(isomorphic)-260(to)-261(the)-260(sparsity)-260(pattern)-261(of)-260(a)-260(linear)-261(system)-260(co-)]TJ 0 -11.955 Td [(ef)18(\002cient)-238(matrix\051,)-241(storing)-238(them)-239(as)-238(necessary)-238(into)-239(the)-238(communication)-238(descriptor)74(.)]TJ 0 -11.955 Td [(In)-259(the)-260(\002rst)-259(form)-260(the)-259(edges)-259(ar)18(e)-260(speci\002ed)-259(as)-260(pairs)-259(of)-260(indices)]TJ/F60 9.9626 Tf 255.974 0 Td [(i)-47(a)]TJ/F93 10.3811 Tf 7.91 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(,)]TJ/F60 9.9626 Tf 4.624 0 Td [(j)-40(a)]TJ/F93 10.3811 Tf 7.841 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(;)-264(the)-260(start)1(-)]TJ -299.233 -11.956 Td [(ing)-299(index)]TJ/F60 9.9626 Tf 44.948 0 Td [(i)-47(a)]TJ/F93 10.3811 Tf 7.91 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 7.13 0 Td [(should)-299(belong)-299(to)-299(the)-299(curr)18(ent)-299(pr)18(ocess.)-458(In)-299(the)-299(second)-299(form)-299(only)]TJ -67.28 -11.955 Td [(the)-250(r)18(emote)-250(indices)]TJ/F60 9.9626 Tf 83.65 0 Td [(j)-40(a)]TJ/F93 10.3811 Tf 7.841 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 6.64 0 Td [(ar)18(e)-250(speci\002ed.)]TJ 0 g 0 G -ET -q -1 0 0 1 183.035 630.896 cm -[]0 d 0 J 0.398 w 0 0 m 177.433 0 l S -Q -BT -/F52 9.9626 Tf 189.137 622.328 Td [(d)-25(o)-35(t)]TJ/F54 9.9626 Tf 13.444 0 Td [(,)]TJ/F52 9.9626 Tf 5.275 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(,)]TJ/F52 9.9626 Tf 5.106 0 Td [(y)]TJ/F51 9.9626 Tf 91.759 0 Td [(Function)]TJ -ET -q -1 0 0 1 183.035 618.542 cm -[]0 d 0 J 0.398 w 0 0 m 177.433 0 l S -Q -BT -/F54 9.9626 Tf 189.012 609.974 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 326.246 610.173 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.235 609.974 Td [(gemlt)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 326.246 598.218 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.235 598.019 Td [(gemlt)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ -ET -q -1 0 0 1 326.246 586.263 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.235 586.064 Td [(gemlt)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ -ET -q -1 0 0 1 326.246 574.308 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.235 574.109 Td [(gemlt)]TJ -ET -q -1 0 0 1 183.035 570.323 cm -[]0 d 0 J 0.398 w 0 0 m 177.433 0 l S -Q +/F59 9.9626 Tf -105.423 -20.366 Td [(T)90(ype:)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 227.467 541.944 Td [(T)92(able)-250(14:)-310(Data)-250(types)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G +/F59 9.9626 Tf -29.828 -19.304 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G -/F51 9.9626 Tf -127.572 -33.34 Td [(T)90(ype:)]TJ + 0 -19.305 Td [(nz)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 16.05 0 Td [(the)-250(number)-250(of)-250(points)-250(being)-250(inserted.)]TJ 8.857 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.603 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -24.907 -19.305 Td [(ia)]TJ 0 g 0 G +/F62 9.9626 Tf 13.28 0 Td [(the)-250(indices)-250(of)-250(the)-250(starting)-250(vertex)-250(of)-250(the)-250(edges)-250(being)-250(inserted.)]TJ 11.627 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F60 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F62 9.9626 Tf 10.336 0 Td [(.)]TJ 0 g 0 G - 0 -19.603 Td [(x)]TJ +/F59 9.9626 Tf -207.221 -19.304 Td [(ja)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F52 9.9626 Tf 174.06 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -164.321 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 139.526 0 Td [(psb)]TJ -ET -q -1 0 0 1 280.646 421.777 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 283.785 421.578 Td [(T)]TJ -ET -q -1 0 0 1 289.642 421.777 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 292.781 421.578 Td [(vect)]TJ -ET -q -1 0 0 1 314.33 421.777 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 317.468 421.578 Td [(type)]TJ +/F62 9.9626 Tf 13.28 0 Td [(the)-250(indices)-250(of)-250(the)-250(end)-250(vertex)-250(of)-250(the)-250(edges)-250(being)-250(inserted.)]TJ 11.627 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F60 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F62 9.9626 Tf 10.336 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(2)]TJ +/F59 9.9626 Tf -207.221 -19.304 Td [(mask)]TJ 0 g 0 G - [(.)]TJ +/F62 9.9626 Tf 29.33 0 Td [(Mask)-247(entries)-248(in)]TJ/F67 9.9626 Tf 69.91 0 Td [(ja)]TJ/F62 9.9626 Tf 10.461 0 Td [(,)-248(they)-247(ar)18(e)-248(inserted)-247(only)-248(when)-247(the)-247(corr)18(esponding)]TJ/F67 9.9626 Tf 213.089 0 Td [(mask)]TJ/F62 9.9626 Tf -297.883 -11.956 Td [(entries)-250(ar)18(e)]TJ/F67 9.9626 Tf 48.139 0 Td [(.true.)]TJ/F62 9.9626 Tf -48.139 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(array)-250(of)-250(length)]TJ/F60 9.9626 Tf 164.297 0 Td [(n)-25(z)]TJ/F62 9.9626 Tf 10.336 0 Td [(,)-250(default)]TJ/F67 9.9626 Tf 38.784 0 Td [(.true.)]TJ/F62 9.9626 Tf 31.382 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.603 Td [(y)]TJ +/F59 9.9626 Tf -269.706 -19.305 Td [(lidx)]TJ 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F52 9.9626 Tf 173.89 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -164.61 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 139.526 0 Td [(psb)]TJ -ET -q -1 0 0 1 280.646 342.398 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 283.785 342.199 Td [(T)]TJ -ET -q -1 0 0 1 289.642 342.398 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 292.781 342.199 Td [(vect)]TJ -ET -q -1 0 0 1 314.33 342.398 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 317.468 342.199 Td [(type)]TJ +/F62 9.9626 Tf 22.685 0 Td [(User)-250(de\002ned)-250(local)-250(indices)-250(for)]TJ/F67 9.9626 Tf 131.117 0 Td [(ja)]TJ/F62 9.9626 Tf 10.46 0 Td [(.)]TJ -139.355 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F60 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F62 9.9626 Tf 10.336 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(2)]TJ +/F59 9.9626 Tf -207.221 -20.366 Td [(On)-250(Return)]TJ 0 g 0 G - [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.603 Td [(desc)]TJ + 0 -19.305 Td [(desc)]TJ ET q -1 0 0 1 120.408 310.84 cm +1 0 0 1 171.218 168.346 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 310.641 Td [(a)]TJ +/F59 9.9626 Tf 174.207 168.146 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.962 0 Td [(the)-250(updated)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 273.363 263.02 cm +1 0 0 1 360.068 120.525 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 276.501 262.82 Td [(desc)]TJ +/F67 9.9626 Tf 363.206 120.326 Td [(desc)]TJ ET q -1 0 0 1 298.05 263.02 cm +1 0 0 1 384.755 120.525 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 301.189 262.82 Td [(type)]TJ +/F67 9.9626 Tf 387.893 120.326 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -222.215 -19.602 Td [(On)-250(Return)]TJ + -91.236 -29.888 Td [(74)]TJ +0 g 0 G +ET + +endstream +endobj +1480 0 obj +<< +/Length 3007 +>> +stream 0 g 0 G 0 g 0 G - 0 -19.603 Td [(y)]TJ 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F52 9.9626 Tf 160.68 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -151.4 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 139.526 0 Td [(psb)]TJ -ET -q -1 0 0 1 280.646 175.993 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 283.785 175.794 Td [(T)]TJ -ET -q -1 0 0 1 289.642 175.993 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 292.781 175.794 Td [(vect)]TJ -ET -q -1 0 0 1 314.33 175.993 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q BT -/F59 9.9626 Tf 317.468 175.794 Td [(type)]TJ +/F59 9.9626 Tf 99.895 706.129 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(14)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - [(.)]TJ +/F59 9.9626 Tf -24.907 -19.925 Td [(ila)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.603 Td [(info)]TJ +/F62 9.9626 Tf 16.598 0 Td [(the)-250(local)-250(indices)-250(of)-250(the)-250(starting)-250(vertex)-250(of)-250(the)-250(edges)-250(being)-250(inserted.)]TJ 8.309 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F60 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F62 9.9626 Tf 10.336 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ +/F59 9.9626 Tf -207.221 -19.925 Td [(jla)]TJ 0 g 0 G -/F54 9.9626 Tf 114.88 -29.888 Td [(53)]TJ +/F62 9.9626 Tf 16.598 0 Td [(the)-250(local)-250(indices)-250(of)-250(the)-250(end)-250(vertex)-250(of)-250(the)-250(edges)-250(being)-250(inserted.)]TJ 8.309 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F60 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F62 9.9626 Tf 10.336 0 Td [(.)]TJ/F59 11.9552 Tf -207.221 -21.918 Td [(Notes)]TJ 0 g 0 G -ET - -endstream -endobj -1218 0 obj -<< -/Length 314 ->> -stream +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ 0 g 0 G + [-500(This)-250(r)18(outine)-250(may)-250(only)-250(be)-250(called)-250(if)-250(the)-250(descriptor)-250(is)-250(in)-250(the)-250(build)-250(state;)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 175.611 706.129 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + 0 -19.925 Td [(2.)]TJ 0 g 0 G - 141.968 -603.736 Td [(54)]TJ + [-500(This)-370(r)18(outine)-370(automatically)-370(ignor)18(es)-370(edges)-370(that)-370(do)-370(not)-370(insist)-370(on)-370(the)-370(cur)18(-)]TJ 12.453 -11.955 Td [(r)18(ent)-288(pr)18(ocess,)-298(i.e.)-424(edges)-288(for)-288(which)-288(neither)-288(the)-288(starting)-288(nor)-288(the)-288(end)-288(vertex)]TJ 0 -11.955 Td [(belong)-250(to)-250(the)-250(curr)18(ent)-250(pr)18(ocess.)]TJ +0 g 0 G + -12.453 -19.926 Td [(3.)]TJ +0 g 0 G + [-500(The)-322(second)-323(form)-322(of)-323(this)-322(r)18(outine)-323(will)-322(be)-322(useful)-323(when)-322(dealing)-323(with)-322(user)18(-)]TJ 12.453 -11.955 Td [(speci\002ed)-250(index)-250(mappings;)-250(see)-250(also)]TJ +0 0 1 rg 0 0 1 RG + [-250(2.3.1)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G + 141.968 -314.819 Td [(75)]TJ 0 g 0 G ET endstream endobj -1229 0 obj +1489 0 obj << -/Length 7318 +/Length 5969 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(4.15)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(6.3)-1000(psb)]TJ ET q -1 0 0 1 153.407 706.328 cm +1 0 0 1 198.238 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 156.993 706.129 Td [(gediv)-250(\227)-250(Entrywise)-250(Division)]TJ/F54 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(entrywise)-250(division)-250(between)-250(two)-250(vectors)]TJ/F52 9.9626 Tf 300.604 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(and)]TJ/F52 9.9626 Tf 19.482 0 Td [(y)]TJ/F54 9.9626 Tf -188.347 -21.112 Td [(/)]TJ/F83 10.3811 Tf 9.054 0 Td [(\040)]TJ/F52 9.9626 Tf 13.567 0 Td [(x)]TJ/F85 10.3811 Tf 5.329 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.274 0 Td [(/)]TJ/F52 9.9626 Tf 6.286 0 Td [(y)]TJ/F85 10.3811 Tf 5.231 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(.)]TJ/F59 9.9626 Tf -186.967 -21.111 Td [(psb_gediv\050x,)-525(y,)-525(desc_a,)-525(info,)-525([flag\051)]TJ +/F59 11.9552 Tf 201.825 706.129 Td [(cdasb)-250(\227)-250(Communication)-250(descriptor)-250(assembly)-250(routine)]TJ 0 g 0 G 0 g 0 G +/F67 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_cdasb\050desc_a,)-525(info)-525([,)-525(mold]\051)]TJ 0 g 0 G +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(desc)]TJ ET q -1 0 0 1 183.199 630.896 cm -[]0 d 0 J 0.398 w 0 0 m 177.104 0 l S -Q -BT -/F54 9.9626 Tf 189.301 622.328 Td [(/)-13(,)]TJ/F52 9.9626 Tf 11.437 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(,)]TJ/F52 9.9626 Tf 5.105 0 Td [(y)]TJ/F51 9.9626 Tf 99.043 0 Td [(Function)]TJ -ET -q -1 0 0 1 183.199 618.542 cm -[]0 d 0 J 0.398 w 0 0 m 177.104 0 l S -Q -BT -/F54 9.9626 Tf 189.177 609.974 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 326.41 610.173 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.399 609.974 Td [(gediv)]TJ -140.222 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 326.41 598.218 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.399 598.019 Td [(gediv)]TJ -140.222 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ -ET -q -1 0 0 1 326.41 586.263 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.399 586.064 Td [(gediv)]TJ -140.222 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ -ET -q -1 0 0 1 326.41 574.308 cm +1 0 0 1 171.218 625.596 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 329.399 574.109 Td [(gediv)]TJ -ET -q -1 0 0 1 183.199 570.323 cm -[]0 d 0 J 0.398 w 0 0 m 177.104 0 l S -Q -0 g 0 G -BT -/F54 9.9626 Tf 227.467 541.944 Td [(T)92(able)-250(15:)-310(Data)-250(types)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -/F51 9.9626 Tf -127.572 -33.34 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.603 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.603 Td [(x)]TJ +/F59 9.9626 Tf 174.207 625.397 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F52 9.9626 Tf 174.06 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -164.321 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ +/F62 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 139.526 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 280.646 421.777 cm +1 0 0 1 360.068 577.775 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 283.785 421.578 Td [(T)]TJ +/F67 9.9626 Tf 363.206 577.576 Td [(desc)]TJ ET q -1 0 0 1 289.642 421.777 cm +1 0 0 1 384.755 577.775 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 292.781 421.578 Td [(vect)]TJ +/F67 9.9626 Tf 387.893 577.576 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -258.11 -19.925 Td [(mold)]TJ +0 g 0 G +/F62 9.9626 Tf 28.782 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(index)-250(storage.)]TJ -3.875 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(object)-190(of)-190(type)-190(derived)-190(fr)18(om)-190(\050integer\051)]TJ/F67 9.9626 Tf 221.926 0 Td [(psb)]TJ ET q -1 0 0 1 314.33 421.777 cm +1 0 0 1 413.855 510.029 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 317.468 421.578 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(2)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.603 Td [(y)]TJ -0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F52 9.9626 Tf 173.89 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -164.61 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 139.526 0 Td [(psb)]TJ +/F67 9.9626 Tf 416.994 509.83 Td [(T)]TJ ET q -1 0 0 1 280.646 342.398 cm +1 0 0 1 422.851 510.029 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 283.785 342.199 Td [(T)]TJ +/F67 9.9626 Tf 425.99 509.83 Td [(base)]TJ ET q -1 0 0 1 289.642 342.398 cm +1 0 0 1 447.539 510.029 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 292.781 342.199 Td [(vect)]TJ +/F67 9.9626 Tf 450.677 509.83 Td [(vect)]TJ ET q -1 0 0 1 314.33 342.398 cm +1 0 0 1 472.226 510.029 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 317.468 342.199 Td [(type)]TJ +/F67 9.9626 Tf 475.364 509.83 Td [(type)]TJ/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(2)]TJ +/F59 9.9626 Tf -345.58 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G - [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.603 Td [(desc)]TJ + 0 -19.925 Td [(desc)]TJ ET q -1 0 0 1 120.408 310.84 cm +1 0 0 1 171.218 468.186 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 310.641 Td [(a)]TJ +/F59 9.9626 Tf 174.207 467.987 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 273.363 263.02 cm +1 0 0 1 360.068 420.366 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 276.501 262.82 Td [(desc)]TJ +/F67 9.9626 Tf 363.206 420.166 Td [(desc)]TJ ET q -1 0 0 1 298.05 263.02 cm +1 0 0 1 384.755 420.366 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 301.189 262.82 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -222.215 -19.602 Td [(\003ag)]TJ +/F67 9.9626 Tf 387.893 420.166 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 21.589 0 Td [(check)-280(if)-280(any)-280(of)-280(the)]TJ/F52 9.9626 Tf 84.137 0 Td [(y)]TJ/F85 10.3811 Tf 5.23 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)-343(=)]TJ/F54 9.9626 Tf 19.108 0 Td [(0,)-287(and)-280(in)-280(case)-280(r)18(eturns)-280(err)18(or)-280(halting)-280(the)-280(compu-)]TJ -112.449 -11.956 Td [(tation.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 40.677 0 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -108.97 -11.955 Td [(Speci\002ed)-250(as:)-310(the)-250(logical)-250(value)]TJ/F59 9.9626 Tf 132.133 0 Td [(flag)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.true.)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -157.04 -19.603 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -258.11 -19.925 Td [(info)]TJ 0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.906 -21.917 Td [(Notes)]TJ 0 g 0 G - 0 -19.603 Td [(x)]TJ +/F62 9.9626 Tf 12.453 -19.926 Td [(1.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F52 9.9626 Tf 160.849 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(.)]TJ -151.111 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ + [-500(On)-250(exit)-250(fr)18(om)-250(this)-250(r)18(outine)-250(the)-250(descriptor)-250(is)-250(in)-250(the)-250(assembled)-250(state.)]TJ -12.453 -19.925 Td [(This)-274(call)-275(will)-274(set)-275(up)-274(all)-275(the)-274(necessary)-275(information)-274(for)-275(the)-274(halo)-275(data)-274(exchanges.)]TJ 0 -11.955 Td [(In)-337(doing)-337(so,)-358(the)-337(library)-337(will)-337(need)-336(to)-337(identify)-337(the)-337(set)-337(of)-337(pr)18(ocesse)1(s)-337(owning)-337(the)]TJ 0 -11.955 Td [(halo)-381(indices)-381(thr)18(ough)-380(the)-381(use)-381(of)-381(the)]TJ/F67 9.9626 Tf 163.76 0 Td [(desc%fnd_owner\050\051)]TJ/F62 9.9626 Tf 87.479 0 Td [(method;)-446(the)-381(owning)]TJ -251.239 -11.956 Td [(pr)18(ocesses)-273(ar)18(e)-273(the)-273(topological)-272(neighbours)-273(of)-273(the)-273(calling)-273(pr)18(ocess.)-379(If)-272(the)-273(user)-273(has)]TJ 0 -11.955 Td [(some)-248(backgr)18(ound)-248(information)-248(on)-248(the)-248(pr)18(ocesses)-248(that)-248(ar)18(e)-248(neighbours)-248(of)-248(the)-248(cur)18(-)]TJ 0 -11.955 Td [(r)18(ent)-274(one,)-281(it)-274(is)-275(possible)-274(to)-274(specify)-275(explicitly)-274(the)-274(list)-275(of)-274(adjacent)-274(pr)18(ocesses)-275(with)-274(a)]TJ 0 -11.955 Td [(call)-327(to)]TJ/F67 9.9626 Tf 30.401 0 Td [(desc%set_p_adjcncy\050list\051)]TJ/F62 9.9626 Tf 125.529 0 Td [(;)-365(this)-327(will)-327(speed)-327(u)1(p)-327(the)-327(subsequent)-327(call)-327(to)]TJ/F67 9.9626 Tf -155.93 -11.955 Td [(psb_cdasb)]TJ/F62 9.9626 Tf 47.073 0 Td [(.)]TJ 0 g 0 G - 85.819 -29.888 Td [(55)]TJ + 119.801 -116.528 Td [(76)]TJ 0 g 0 G ET endstream endobj -1122 0 obj +1377 0 obj << /Type /ObjStm /N 100 -/First 989 -/Length 12429 ->> -stream -252 0 1120 57 1116 114 1124 234 1126 352 1127 411 1128 470 1129 529 1130 588 1131 647 -1132 706 1123 765 1137 872 1133 1029 1134 1173 1135 1319 1139 1466 256 1524 1140 1581 1136 1639 -1144 1772 1141 1920 1142 2065 1146 2212 260 2271 1147 2329 1143 2387 1151 2507 1148 2655 1149 2800 -1153 2947 264 3005 1155 3062 1150 3119 1162 3253 1156 3419 1157 3566 1158 3711 1159 3853 1164 3999 -268 4058 1165 4116 1166 4174 1167 4233 1168 4292 1161 4351 1177 4498 1160 4700 1169 4847 1170 4991 -1171 5137 1172 5284 1173 5435 1174 5586 1175 5737 1179 5883 1176 5941 1184 6075 1181 6214 1186 6359 -272 6418 1187 6476 1183 6535 1195 6682 1182 6875 1188 7023 1189 7167 1190 7314 1191 7461 1192 7604 -1193 7751 1197 7896 1194 7954 1201 8088 1198 8236 1199 8383 1203 8530 1200 8589 1212 8709 1204 8902 -1205 9046 1206 9191 1207 9335 1208 9480 1209 9627 1210 9771 1214 9918 276 9976 1215 10033 1211 10091 -1217 10224 1219 10342 1216 10401 1228 10482 1220 10657 1221 10801 1222 10946 1223 11090 1224 11235 1230 11382 -% 252 0 obj +/First 984 +/Length 11356 +>> +stream +1367 0 1368 59 1364 118 1379 254 1381 372 1378 430 1387 498 1383 655 1384 799 1385 944 +1389 1091 296 1150 1390 1208 1386 1267 1396 1400 1391 1557 1393 1703 1394 1849 1398 1994 1399 2052 +1400 2110 1401 2168 1395 2226 1404 2333 1406 2451 1403 2510 1408 2578 1411 2696 1412 2823 1413 2866 +1414 3073 1415 3311 1416 3587 1410 3823 1402 3881 1407 3939 1423 4036 1419 4193 1420 4334 1421 4481 +1425 4628 300 4687 1426 4745 1422 4804 1428 4937 1430 5055 1427 5113 1435 5207 1432 5346 1437 5493 +304 5552 1438 5610 1434 5669 1442 5802 1433 5959 1439 6102 1440 6245 1444 6392 1441 6450 1446 6557 +1448 6675 308 6734 312 6792 1445 6849 1451 6982 1449 7121 1453 7268 1454 7326 1450 7384 1457 7504 +1455 7643 1459 7801 1461 7860 1456 7919 1463 8066 1465 8184 1466 8242 1467 8300 1468 8358 1469 8416 +1470 8474 1462 8532 1474 8613 1472 8752 1476 8897 316 8956 1473 9014 1479 9134 1477 9273 1481 9431 +1482 9489 1483 9547 1484 9605 1478 9663 1488 9757 1485 9905 1486 10050 1490 10196 320 10255 1491 10313 +% 1367 0 obj << -/D [1117 0 R /XYZ 99.895 716.092 null] +/D [1365 0 R /XYZ 149.705 753.953 null] >> -% 1120 0 obj +% 1368 0 obj << -/D [1117 0 R /XYZ 99.895 504.73 null] +/D [1365 0 R /XYZ 150.705 326.444 null] >> -% 1116 0 obj +% 1364 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F67 913 0 R >> +/XObject << /Im4 1362 0 R >> /ProcSet [ /PDF /Text ] >> -% 1124 0 obj +% 1379 0 obj << /Type /Page -/Contents 1125 0 R -/Resources 1123 0 R +/Contents 1380 0 R +/Resources 1378 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1121 0 R ->> -% 1126 0 obj -<< -/D [1124 0 R /XYZ 149.705 753.953 null] ->> -% 1127 0 obj -<< -/D [1124 0 R /XYZ 150.705 564.444 null] +/Parent 1382 0 R >> -% 1128 0 obj -<< -/D [1124 0 R /XYZ 150.705 504.067 null] ->> -% 1129 0 obj -<< -/D [1124 0 R /XYZ 175.611 506.876 null] ->> -% 1130 0 obj -<< -/D [1124 0 R /XYZ 175.611 494.921 null] ->> -% 1131 0 obj -<< -/D [1124 0 R /XYZ 175.611 482.966 null] ->> -% 1132 0 obj +% 1381 0 obj << -/D [1124 0 R /XYZ 175.611 471.011 null] +/D [1379 0 R /XYZ 98.895 753.953 null] >> -% 1123 0 obj +% 1378 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F59 812 0 R >> +/Font << /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1137 0 obj +% 1387 0 obj << /Type /Page -/Contents 1138 0 R -/Resources 1136 0 R +/Contents 1388 0 R +/Resources 1386 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1121 0 R -/Annots [ 1133 0 R 1134 0 R 1135 0 R ] +/Parent 1382 0 R +/Annots [ 1383 0 R 1384 0 R 1385 0 R ] >> -% 1133 0 obj +% 1383 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 416.057 444.603 428.117] +/Rect [419.358 345.485 495.412 357.545] /A << /S /GoTo /D (vdata) >> >> -% 1134 0 obj +% 1384 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [326.652 404.102 333.626 416.161] -/A << /S /GoTo /D (table.9) >> +/Rect [377.462 333.53 389.417 345.59] +/A << /S /GoTo /D (table.18) >> >> -% 1135 0 obj +% 1385 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 336.356 323.106 348.415] +/Rect [342.753 266.146 409.811 278.205] /A << /S /GoTo /D (descdata) >> >> -% 1139 0 obj +% 1389 0 obj << -/D [1137 0 R /XYZ 98.895 753.953 null] +/D [1387 0 R /XYZ 149.705 753.953 null] >> -% 256 0 obj +% 296 0 obj << -/D [1137 0 R /XYZ 99.895 716.092 null] +/D [1387 0 R /XYZ 150.705 716.092 null] >> -% 1140 0 obj +% 1390 0 obj << -/D [1137 0 R /XYZ 99.895 560.219 null] +/D [1387 0 R /XYZ 150.705 510.975 null] >> -% 1136 0 obj +% 1386 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F67 913 0 R /F93 915 0 R >> /ProcSet [ /PDF /Text ] >> -% 1144 0 obj +% 1396 0 obj << /Type /Page -/Contents 1145 0 R -/Resources 1143 0 R +/Contents 1397 0 R +/Resources 1395 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1121 0 R -/Annots [ 1141 0 R 1142 0 R ] +/Parent 1382 0 R +/Annots [ 1391 0 R 1393 0 R 1394 0 R ] >> -% 1141 0 obj +% 1391 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [306.858 340.341 384.376 352.401] -/A << /S /GoTo /D (spdata) >> +/Rect [202.52 554.876 214.475 566.936] +/A << /S /GoTo /D (table.18) >> >> -% 1142 0 obj +% 1393 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [306.858 272.595 373.916 284.655] -/A << /S /GoTo /D (descdata) >> ->> -% 1146 0 obj -<< -/D [1144 0 R /XYZ 149.705 753.953 null] ->> -% 260 0 obj -<< -/D [1144 0 R /XYZ 150.705 716.092 null] ->> -% 1147 0 obj -<< -/D [1144 0 R /XYZ 150.705 517.78 null] ->> -% 1143 0 obj -<< -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 1151 0 obj -<< -/Type /Page -/Contents 1152 0 R -/Resources 1150 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1121 0 R -/Annots [ 1148 0 R 1149 0 R ] +/Rect [407.408 325.46 414.381 339.127] +/A << /S /GoTo /D (figure.4) >> >> -% 1148 0 obj +% 1394 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 340.341 333.567 352.401] -/A << /S /GoTo /D (spdata) >> +/Rect [309.226 301.825 316.2 313.885] +/A << /S /GoTo /D (figure.3) >> >> -% 1149 0 obj +% 1398 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 272.595 323.106 284.655] -/A << /S /GoTo /D (descdata) >> +/D [1396 0 R /XYZ 98.895 753.953 null] >> -% 1153 0 obj +% 1399 0 obj << -/D [1151 0 R /XYZ 98.895 753.953 null] +/D [1396 0 R /XYZ 99.895 464.818 null] >> -% 264 0 obj +% 1400 0 obj << -/D [1151 0 R /XYZ 99.895 716.092 null] +/D [1396 0 R /XYZ 99.895 430.343 null] >> -% 1155 0 obj +% 1401 0 obj << -/D [1151 0 R /XYZ 99.895 517.78 null] +/D [1396 0 R /XYZ 99.895 386.508 null] >> -% 1150 0 obj +% 1395 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F96 1154 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1162 0 obj +% 1404 0 obj << /Type /Page -/Contents 1163 0 R -/Resources 1161 0 R +/Contents 1405 0 R +/Resources 1403 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1121 0 R -/Annots [ 1156 0 R 1157 0 R 1158 0 R 1159 0 R ] +/Parent 1382 0 R >> -% 1156 0 obj +% 1406 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [428.968 277.323 440.924 289.383] -/A << /S /GoTo /D (table.12) >> +/D [1404 0 R /XYZ 149.705 753.953 null] >> -% 1157 0 obj +% 1403 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [306.858 208.877 384.376 220.936] -/A << /S /GoTo /D (spdata) >> +/Font << /F62 667 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1158 0 obj +% 1408 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 140.43 495.412 152.49] -/A << /S /GoTo /D (vdata) >> +/Type /Page +/Contents 1409 0 R +/Resources 1407 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1382 0 R >> -% 1159 0 obj +% 1411 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [379.43 128.475 391.385 140.535] -/A << /S /GoTo /D (table.12) >> +/Producer (GPL Ghostscript 9.22) +/CreationDate (D:20180323100658Z00'00') +/ModDate (D:20180323100658Z00'00') >> -% 1164 0 obj +% 1412 0 obj << -/D [1162 0 R /XYZ 149.705 753.953 null] +/Type /ExtGState +/OPM 1 >> -% 268 0 obj +% 1413 0 obj << -/D [1162 0 R /XYZ 150.705 716.092 null] +/BaseFont /XYUGDR+Times-Roman +/FontDescriptor 1415 0 R +/Type /Font +/FirstChar 48 +/LastChar 57 +/Widths [ 500 500 500 500 500 500 500 500 500 500] +/Encoding /WinAnsiEncoding +/Subtype /Type1 >> -% 1165 0 obj +% 1414 0 obj << -/D [1162 0 R /XYZ 290.728 674.17 null] +/BaseFont /XISTAL+Times-Bold +/FontDescriptor 1416 0 R +/Type /Font +/FirstChar 48 +/LastChar 80 +/Widths [ 500 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 611] +/Encoding /WinAnsiEncoding +/Subtype /Type1 >> -% 1166 0 obj +% 1415 0 obj << -/D [1162 0 R /XYZ 287.931 654.041 null] +/Type /FontDescriptor +/FontName /XYUGDR+Times-Roman +/FontBBox [ 0 -14 476 688] +/Flags 65568 +/Ascent 688 +/CapHeight 688 +/Descent -14 +/ItalicAngle 0 +/StemV 71 +/MissingWidth 250 +/CharSet (/eight/five/four/nine/one/seven/six/three/two/zero) +/FontFile3 1417 0 R >> -% 1167 0 obj +% 1416 0 obj +<< +/Type /FontDescriptor +/FontName /XISTAL+Times-Bold +/FontBBox [ 0 -13 600 688] +/Flags 65568 +/Ascent 688 +/CapHeight 676 +/Descent -13 +/ItalicAngle 0 +/StemV 90 +/MissingWidth 250 +/CharSet (/P/one/zero) +/FontFile3 1418 0 R +>> +% 1410 0 obj << -/D [1162 0 R /XYZ 287.193 633.911 null] +/D [1408 0 R /XYZ 98.895 753.953 null] >> -% 1168 0 obj +% 1402 0 obj << -/D [1162 0 R /XYZ 150.705 447.252 null] +/D [1408 0 R /XYZ 99.895 282.918 null] >> -% 1161 0 obj +% 1407 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F60 1027 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F62 667 0 R >> +/XObject << /Im5 1392 0 R >> /ProcSet [ /PDF /Text ] >> -% 1177 0 obj +% 1423 0 obj << /Type /Page -/Contents 1178 0 R -/Resources 1176 0 R +/Contents 1424 0 R +/Resources 1422 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1180 0 R -/Annots [ 1160 0 R 1169 0 R 1170 0 R 1171 0 R 1172 0 R 1173 0 R 1174 0 R 1175 0 R ] +/Parent 1382 0 R +/Annots [ 1419 0 R 1420 0 R 1421 0 R ] >> -% 1160 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [378.159 654.503 390.114 666.562] -/A << /S /GoTo /D (table.12) >> ->> -% 1169 0 obj +% 1419 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 588.085 444.603 600.145] +/Rect [419.358 298.8 495.412 310.86] /A << /S /GoTo /D (vdata) >> >> -% 1170 0 obj +% 1420 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [328.746 576.13 340.701 588.189] -/A << /S /GoTo /D (table.12) >> +/Rect [255.331 289.495 267.287 298.905] +/A << /S /GoTo /D (table.19) >> >> -% 1171 0 obj +% 1421 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 497.757 323.106 509.817] +/Rect [342.753 218.115 409.811 230.175] /A << /S /GoTo /D (descdata) >> >> -% 1172 0 obj +% 1425 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [338.139 460.563 345.113 472.623] -/A << /S /GoTo /D (equation.4.1) >> +/D [1423 0 R /XYZ 149.705 753.953 null] >> -% 1173 0 obj +% 300 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [336.486 445.951 343.459 458.011] -/A << /S /GoTo /D (equation.4.2) >> +/D [1423 0 R /XYZ 150.705 716.092 null] >> -% 1174 0 obj +% 1426 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [337.034 431.339 344.007 443.399] -/A << /S /GoTo /D (equation.4.3) >> +/D [1423 0 R /XYZ 150.705 460.417 null] >> -% 1175 0 obj +% 1422 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [202.52 189.579 214.475 201.639] -/A << /S /GoTo /D (table.12) >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F93 915 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1179 0 obj +% 1428 0 obj +<< +/Type /Page +/Contents 1429 0 R +/Resources 1427 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1431 0 R +>> +% 1430 0 obj << -/D [1177 0 R /XYZ 98.895 753.953 null] +/D [1428 0 R /XYZ 98.895 753.953 null] >> -% 1176 0 obj +% 1427 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F60 1027 0 R /F52 585 0 R /F59 812 0 R /F85 814 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1184 0 obj +% 1435 0 obj << /Type /Page -/Contents 1185 0 R -/Resources 1183 0 R +/Contents 1436 0 R +/Resources 1434 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1180 0 R -/Annots [ 1181 0 R ] +/Parent 1431 0 R +/Annots [ 1432 0 R ] >> -% 1181 0 obj +% 1432 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [428.968 116.52 440.924 128.58] -/A << /S /GoTo /D (table.13) >> +/Rect [342.753 211.064 409.811 223.124] +/A << /S /GoTo /D (descdata) >> >> -% 1186 0 obj +% 1437 0 obj << -/D [1184 0 R /XYZ 149.705 753.953 null] +/D [1435 0 R /XYZ 149.705 753.953 null] >> -% 272 0 obj +% 304 0 obj << -/D [1184 0 R /XYZ 150.705 716.092 null] +/D [1435 0 R /XYZ 150.705 716.092 null] >> -% 1187 0 obj +% 1438 0 obj << -/D [1184 0 R /XYZ 150.705 268.704 null] +/D [1435 0 R /XYZ 150.705 449.977 null] >> -% 1183 0 obj +% 1434 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F60 1027 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F93 915 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1195 0 obj +% 1442 0 obj << /Type /Page -/Contents 1196 0 R -/Resources 1194 0 R +/Contents 1443 0 R +/Resources 1441 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1180 0 R -/Annots [ 1182 0 R 1188 0 R 1189 0 R 1190 0 R 1191 0 R 1192 0 R 1193 0 R ] +/Parent 1431 0 R +/Annots [ 1433 0 R 1439 0 R 1440 0 R ] >> -% 1182 0 obj +% 1433 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [305.144 654.503 312.117 666.562] -/A << /S /GoTo /D (section.3) >> +/Rect [287.51 642.547 363.564 654.607] +/A << /S /GoTo /D (vdata) >> >> -% 1188 0 obj +% 1439 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 586.032 444.603 598.092] +/Rect [123.806 542.921 199.86 554.981] /A << /S /GoTo /D (vdata) >> >> -% 1189 0 obj +% 1440 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [328.621 574.077 340.576 586.136] -/A << /S /GoTo /D (table.13) >> +/Rect [421.516 542.921 433.471 554.981] +/A << /S /GoTo /D (table.20) >> >> -% 1190 0 obj +% 1444 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [378.159 493.651 390.114 505.711] -/A << /S /GoTo /D (table.13) >> +/D [1442 0 R /XYZ 98.895 753.953 null] >> -% 1191 0 obj +% 1441 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 425.181 444.603 437.24] -/A << /S /GoTo /D (vdata) >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F60 666 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1192 0 obj +% 1446 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [328.746 413.225 340.701 425.285] -/A << /S /GoTo /D (table.13) >> +/Type /Page +/Contents 1447 0 R +/Resources 1445 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1431 0 R >> -% 1193 0 obj +% 1448 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 332.8 323.106 344.859] -/A << /S /GoTo /D (descdata) >> +/D [1446 0 R /XYZ 149.705 753.953 null] >> -% 1197 0 obj +% 308 0 obj +<< +/D [1446 0 R /XYZ 150.705 716.092 null] +>> +% 312 0 obj << -/D [1195 0 R /XYZ 98.895 753.953 null] +/D [1446 0 R /XYZ 150.705 691.48 null] >> -% 1194 0 obj +% 1445 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F59 812 0 R /F60 1027 0 R /F85 814 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F93 915 0 R >> /ProcSet [ /PDF /Text ] >> -% 1201 0 obj +% 1451 0 obj << /Type /Page -/Contents 1202 0 R -/Resources 1200 0 R +/Contents 1452 0 R +/Resources 1450 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1180 0 R -/Annots [ 1198 0 R 1199 0 R ] +/Parent 1431 0 R +/Annots [ 1449 0 R ] >> -% 1198 0 obj +% 1449 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [211.646 410.079 223.601 419.489] -/A << /S /GoTo /D (table.13) >> +/Rect [291.943 324.687 359.001 336.746] +/A << /S /GoTo /D (descdata) >> >> -% 1199 0 obj +% 1453 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [253.329 228.102 265.284 240.161] -/A << /S /GoTo /D (table.13) >> +/D [1451 0 R /XYZ 98.895 753.953 null] >> -% 1203 0 obj +% 1454 0 obj << -/D [1201 0 R /XYZ 149.705 753.953 null] +/D [1451 0 R /XYZ 99.895 234.157 null] >> -% 1200 0 obj +% 1450 0 obj << -/Font << /F54 586 0 R /F51 584 0 R /F52 585 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F60 666 0 R /F93 915 0 R >> /ProcSet [ /PDF /Text ] >> -% 1212 0 obj +% 1457 0 obj << /Type /Page -/Contents 1213 0 R -/Resources 1211 0 R +/Contents 1458 0 R +/Resources 1456 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1180 0 R -/Annots [ 1204 0 R 1205 0 R 1206 0 R 1207 0 R 1208 0 R 1209 0 R 1210 0 R ] ->> -% 1204 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [263.331 417.772 339.385 429.832] -/A << /S /GoTo /D (vdata) >> ->> -% 1205 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [224.557 405.817 231.53 417.877] -/A << /S /GoTo /D (table.2) >> +/Parent 1431 0 R +/Annots [ 1455 0 R ] >> -% 1206 0 obj +% 1455 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [263.331 338.393 339.385 350.453] -/A << /S /GoTo /D (vdata) >> +/Rect [405.298 215.702 427.216 227.166] +/A << /S /GoTo /D (subsubsection.2.3.1) >> >> -% 1207 0 obj +% 1459 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [224.557 326.438 231.53 338.498] -/A << /S /GoTo /D (table.2) >> +/D [1457 0 R /XYZ 149.705 753.953 null] >> -% 1208 0 obj +% 1461 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 259.015 323.106 271.074] -/A << /S /GoTo /D (descdata) >> +/D [1457 0 R /XYZ 150.705 133.283 null] >> -% 1209 0 obj +% 1456 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [263.331 171.988 339.385 184.048] -/A << /S /GoTo /D (vdata) >> +/Font << /F67 913 0 R /F62 667 0 R /F59 665 0 R /F91 914 0 R /F60 666 0 R /F93 915 0 R /F69 1460 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1210 0 obj +% 1463 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [242.868 160.033 254.823 172.093] -/A << /S /GoTo /D (table.14) >> +/Type /Page +/Contents 1464 0 R +/Resources 1462 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1471 0 R >> -% 1214 0 obj +% 1465 0 obj << -/D [1212 0 R /XYZ 98.895 753.953 null] +/D [1463 0 R /XYZ 98.895 753.953 null] >> -% 276 0 obj +% 1466 0 obj << -/D [1212 0 R /XYZ 99.895 716.092 null] +/D [1463 0 R /XYZ 99.895 716.092 null] >> -% 1215 0 obj +% 1467 0 obj << -/D [1212 0 R /XYZ 99.895 560.161 null] +/D [1463 0 R /XYZ 99.895 687.379 null] >> -% 1211 0 obj +% 1468 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F85 814 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/D [1463 0 R /XYZ 99.895 667.454 null] >> -% 1217 0 obj +% 1469 0 obj << -/Type /Page -/Contents 1218 0 R -/Resources 1216 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1180 0 R +/D [1463 0 R /XYZ 99.895 626.268 null] >> -% 1219 0 obj +% 1470 0 obj << -/D [1217 0 R /XYZ 149.705 753.953 null] +/D [1463 0 R /XYZ 99.895 567.828 null] >> -% 1216 0 obj +% 1462 0 obj << -/Font << /F54 586 0 R /F51 584 0 R >> +/Font << /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1228 0 obj +% 1474 0 obj << /Type /Page -/Contents 1229 0 R -/Resources 1227 0 R +/Contents 1475 0 R +/Resources 1473 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1232 0 R -/Annots [ 1220 0 R 1221 0 R 1222 0 R 1223 0 R 1224 0 R ] +/Parent 1471 0 R +/Annots [ 1472 0 R ] >> -% 1220 0 obj +% 1472 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [263.331 417.772 339.385 429.832] -/A << /S /GoTo /D (vdata) >> +/Rect [342.753 116.52 409.811 128.58] +/A << /S /GoTo /D (descdata) >> >> -% 1221 0 obj +% 1476 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [224.557 405.817 231.53 417.877] -/A << /S /GoTo /D (table.2) >> +/D [1474 0 R /XYZ 149.705 753.953 null] >> -% 1222 0 obj +% 316 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [263.331 338.393 339.385 350.453] -/A << /S /GoTo /D (vdata) >> +/D [1474 0 R /XYZ 150.705 716.092 null] >> -% 1223 0 obj +% 1473 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [224.557 326.438 231.53 338.498] -/A << /S /GoTo /D (table.2) >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R /F93 915 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1224 0 obj +% 1479 0 obj +<< +/Type /Page +/Contents 1480 0 R +/Resources 1478 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1471 0 R +/Annots [ 1477 0 R ] +>> +% 1477 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 259.015 323.106 271.074] -/A << /S /GoTo /D (descdata) >> ->> -% 1230 0 obj -<< -/D [1228 0 R /XYZ 98.895 753.953 null] +/Rect [278.165 401.451 300.083 413.511] +/A << /S /GoTo /D (subsubsection.2.3.1) >> >> - -endstream -endobj -1236 0 obj +% 1481 0 obj << -/Length 1288 +/D [1479 0 R /XYZ 98.895 753.953 null] >> -stream -0 g 0 G -0 g 0 G -BT -/F54 9.9626 Tf 175.611 706.129 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 139.526 0 Td [(psb)]TJ -ET -q -1 0 0 1 331.456 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 334.594 706.129 Td [(T)]TJ -ET -q -1 0 0 1 340.452 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 343.59 706.129 Td [(vect)]TJ -ET -q -1 0 0 1 365.139 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 368.277 706.129 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 24.453 0 Td [(containing)-354(numbers)-355(of)]TJ -217.119 -11.955 Td [(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(14)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -19.926 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ -0 g 0 G - 141.968 -535.99 Td [(56)]TJ -0 g 0 G -ET - -endstream -endobj -1245 0 obj +% 1482 0 obj << -/Length 7234 +/D [1479 0 R /XYZ 99.895 496.698 null] >> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(4.16)-1000(psb)]TJ -ET -q -1 0 0 1 153.407 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 156.993 706.129 Td [(geinv)-250(\227)-250(Entrywise)-250(Inversion)]TJ/F54 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(entrywise)-250(inverse)-250(of)-250(a)-250(vector)]TJ/F52 9.9626 Tf 252.097 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(and)-250(puts)-250(it)-250(into)]TJ/F52 9.9626 Tf 69.951 0 Td [(y)]TJ/F54 9.9626 Tf -184.401 -18.334 Td [(/)]TJ/F83 10.3811 Tf 9.054 0 Td [(\040)]TJ/F54 9.9626 Tf 13.272 0 Td [(1)-13(/)]TJ/F52 9.9626 Tf 11.562 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.089 0 Td [(\051)]TJ/F54 9.9626 Tf 4.149 0 Td [(.)]TJ/F59 9.9626 Tf -181.059 -18.334 Td [(psb_geinv\050x,)-525(y,)-525(desc_a,)-525(info,)-525([flag\051)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -ET -q -1 0 0 1 183.343 637.562 cm -[]0 d 0 J 0.398 w 0 0 m 176.815 0 l S -Q -BT -/F54 9.9626 Tf 189.446 628.995 Td [(/)-12(,)]TJ/F52 9.9626 Tf 11.437 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(,)]TJ/F52 9.9626 Tf 5.106 0 Td [(y)]TJ/F51 9.9626 Tf 99.042 0 Td [(Function)]TJ -ET -q -1 0 0 1 183.343 625.209 cm -[]0 d 0 J 0.398 w 0 0 m 176.815 0 l S -Q -BT -/F54 9.9626 Tf 189.321 616.641 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 326.555 616.84 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.544 616.641 Td [(geinv)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 326.555 604.885 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.544 604.686 Td [(geinv)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ -ET -q -1 0 0 1 326.555 592.93 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.544 592.731 Td [(geinv)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ -ET -q -1 0 0 1 326.555 580.975 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.544 580.775 Td [(geinv)]TJ -ET -q -1 0 0 1 183.343 576.99 cm -[]0 d 0 J 0.398 w 0 0 m 176.815 0 l S -Q -0 g 0 G -BT -/F54 9.9626 Tf 227.467 548.611 Td [(T)92(able)-250(16:)-310(Data)-250(types)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -/F51 9.9626 Tf -127.572 -29.451 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -18.492 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -18.491 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F52 9.9626 Tf 174.06 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -164.321 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 139.526 0 Td [(psb)]TJ -ET -q -1 0 0 1 280.646 434.555 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 283.785 434.356 Td [(T)]TJ -ET -q -1 0 0 1 289.642 434.555 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 292.781 434.356 Td [(vect)]TJ -ET -q -1 0 0 1 314.33 434.555 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 317.468 434.356 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(2)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -18.492 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 404.108 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 403.909 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ -ET -q -1 0 0 1 273.363 356.288 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 276.501 356.088 Td [(desc)]TJ -ET -q -1 0 0 1 298.05 356.288 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 301.189 356.088 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -222.215 -18.491 Td [(\003ag)]TJ -0 g 0 G -/F54 9.9626 Tf 21.589 0 Td [(check)-278(if)-279(any)-278(of)-278(the)]TJ/F52 9.9626 Tf 84.227 0 Td [(x)]TJ/F85 10.3811 Tf 5.329 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)-340(=)]TJ/F54 9.9626 Tf 19.049 0 Td [(0,)-285(and)-279(in)-278(case)-279(r)18(eturns)-278(err)18(or)-278(halting)-279(the)-278(compu-)]TJ -112.58 -11.955 Td [(tation.)]TJ 0 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 40.677 0 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -108.97 -11.955 Td [(Speci\002ed)-250(as:)-310(the)-250(logical)-250(value)]TJ/F59 9.9626 Tf 132.133 0 Td [(flag)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.true.)]TJ -0 g 0 G -/F51 9.9626 Tf -157.04 -18.492 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -18.491 Td [(y)]TJ -0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F52 9.9626 Tf 160.849 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(.)]TJ -151.669 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 139.526 0 Td [(psb)]TJ -ET -q -1 0 0 1 280.646 205.171 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 283.785 204.972 Td [(T)]TJ -ET -q -1 0 0 1 289.642 205.171 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 292.781 204.972 Td [(vect)]TJ -ET -q -1 0 0 1 314.33 205.171 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 317.468 204.972 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(16)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -18.492 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ -0 g 0 G - 141.968 -36.266 Td [(57)]TJ -0 g 0 G -ET - -endstream -endobj -1251 0 obj +% 1483 0 obj << -/Length 623 +/D [1479 0 R /XYZ 99.895 474.179 null] >> -stream -0 g 0 G -0 g 0 G -BT -/F51 14.3462 Tf 150.705 706.042 Td [(5)-1000(Communication)-250(routines)]TJ/F54 9.9626 Tf 0 -22.702 Td [(The)-303(r)18(outines)-302(in)-303(this)-303(chapter)-302(implement)-303(various)-303(global)-302(communication)-303(opera-)]TJ 0 -11.955 Td [(tors)-271(on)-271(vectors)-271(associated)-271(with)-271(a)-272(discr)18(etization)-271(mesh.)-373(For)-271(auxiliary)-271(communi-)]TJ 0 -11.955 Td [(cation)-250(r)18(outines)-250(not)-250(tied)-250(to)-250(a)-250(discr)18(etization)-250(space)-250(see)]TJ -0 0 1 rg 0 0 1 RG - [-250(6)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G - 166.874 -568.992 Td [(58)]TJ -0 g 0 G -ET - -endstream -endobj -1259 0 obj +% 1484 0 obj << -/Length 6340 +/D [1479 0 R /XYZ 99.895 430.343 null] >> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(5.1)-1000(psb)]TJ -ET -q -1 0 0 1 147.429 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 151.016 706.129 Td [(halo)-250(\227)-250(Halo)-250(Data)-250(Communication)]TJ/F54 9.9626 Tf -51.121 -19.15 Td [(These)-250(subr)18(outines)-250(gathers)-250(the)-250(values)-250(of)-250(the)-250(halo)-250(elements:)]TJ/F52 9.9626 Tf 158.568 -25.014 Td [(x)]TJ/F83 10.3811 Tf 8.097 0 Td [(\040)]TJ/F52 9.9626 Tf 13.567 0 Td [(x)]TJ/F54 9.9626 Tf -180.232 -22.11 Td [(wher)18(e:)]TJ -0 g 0 G -/F52 9.9626 Tf 0.294 -20.212 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 10.187 0 Td [(is)-250(a)-250(global)-250(dense)-250(submatrix.)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -ET -q -1 0 0 1 179.582 596.326 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -BT -/F60 9.9626 Tf 185.685 587.758 Td [(a)]TJ/F54 9.9626 Tf 5.384 0 Td [(,)]TJ/F52 9.9626 Tf 5.276 0 Td [(x)]TJ/F51 9.9626 Tf 110.13 0 Td [(Subroutine)]TJ -ET -q -1 0 0 1 179.582 583.972 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -BT -/F54 9.9626 Tf 185.56 575.404 Td [(Integer)-8983(psb)]TJ -ET -q -1 0 0 1 322.794 575.603 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 325.783 575.404 Td [(halo)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 322.794 563.648 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 325.783 563.449 Td [(halo)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 322.794 551.693 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 325.783 551.494 Td [(halo)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ -ET -q -1 0 0 1 322.794 539.738 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 325.783 539.539 Td [(halo)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ -ET -q -1 0 0 1 322.794 527.783 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q +% 1478 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1488 0 obj +<< +/Type /Page +/Contents 1489 0 R +/Resources 1487 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1471 0 R +/Annots [ 1485 0 R 1486 0 R ] +>> +% 1485 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 573.77 409.811 585.83] +/A << /S /GoTo /D (descdata) >> +>> +% 1486 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 416.361 409.811 428.42] +/A << /S /GoTo /D (descdata) >> +>> +% 1490 0 obj +<< +/D [1488 0 R /XYZ 149.705 753.953 null] +>> +% 320 0 obj +<< +/D [1488 0 R /XYZ 150.705 716.092 null] +>> +% 1491 0 obj +<< +/D [1488 0 R /XYZ 150.705 326.302 null] +>> + +endstream +endobj +1497 0 obj +<< +/Length 3168 +>> +stream +0 g 0 G +0 g 0 G BT -/F54 9.9626 Tf 325.783 527.583 Td [(halo)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(6.4)-1000(psb)]TJ ET q -1 0 0 1 179.582 523.798 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 227.467 495.419 Td [(T)92(able)-250(17:)-310(Data)-250(types)]TJ -0 g 0 G -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -127.572 -24.102 Td [(call)]TJ +/F59 11.9552 Tf 151.016 706.129 Td [(cdcpy)-250(\227)-250(Copies)-250(a)-250(communication)-250(descriptor)]TJ 0 g 0 G - [-525(psb_halo\050x,)-525(desc_a,)-525(info\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ -0 g 0 G - [-525(psb_halo\050x,)-525(desc_a,)-525(info,)-525(work,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(data)]TJ -0 g 0 G - [(\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.301 Td [(T)90(ype:)]TJ +/F67 9.9626 Tf -51.121 -18.964 Td [(call)-525(psb_cdcpy\050desc_in,)-525(desc_out,)-525(info\051)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.308 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - 0 -20.309 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 89.688 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -79.949 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.956 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ + 0 -19.925 Td [(desc)]TJ ET q -1 0 0 1 385.864 348.823 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 120.408 625.596 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 389.002 348.623 Td [(T)]TJ +/F59 9.9626 Tf 123.397 625.397 Td [(in)]TJ +0 g 0 G +/F62 9.9626 Tf 14.386 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -12.981 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 394.86 348.823 cm +1 0 0 1 309.258 577.775 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 397.998 348.623 Td [(vect)]TJ +/F67 9.9626 Tf 312.397 577.576 Td [(desc)]TJ ET q -1 0 0 1 419.547 348.823 cm +1 0 0 1 333.945 577.775 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 422.685 348.623 Td [(type)]TJ +/F67 9.9626 Tf 337.084 577.576 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(17)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G - [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.308 Td [(desc)]TJ + 0 -19.925 Td [(desc)]TJ ET q -1 0 0 1 120.408 316.559 cm +1 0 0 1 120.408 535.932 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 316.36 Td [(a)]TJ +/F59 9.9626 Tf 123.397 535.733 Td [(out)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 19.925 0 Td [(the)-250(communication)-250(descriptor)-250(copy)111(.)]TJ -18.52 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 309.258 268.738 cm +1 0 0 1 309.258 488.112 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 312.397 268.539 Td [(desc)]TJ +/F67 9.9626 Tf 312.397 487.912 Td [(desc)]TJ ET q -1 0 0 1 333.945 268.738 cm +1 0 0 1 333.945 488.112 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 337.084 268.539 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F67 9.9626 Tf 337.084 487.912 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -20.308 Td [(work)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(the)-250(work)-250(array)111(.)]TJ -3.875 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(of)-250(the)-250(same)-250(type)-250(of)]TJ/F52 9.9626 Tf 218.454 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ +/F59 9.9626 Tf -258.11 -19.925 Td [(info)]TJ 0 g 0 G -/F51 9.9626 Tf -248.566 -20.309 Td [(data)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G -/F54 9.9626 Tf 24.349 0 Td [(index)-250(list)-250(selector)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Speci\002ed)-190(as:)-280(an)-190(integer)74(.)-290(V)92(alues:)]TJ/F59 9.9626 Tf 136.507 0 Td [(psb_comm_halo_)]TJ/F54 9.9626 Tf 73.224 0 Td [(,)]TJ/F59 9.9626 Tf 2.491 0 Td [(psb_comm_mov_)]TJ/F54 9.9626 Tf 67.995 0 Td [(,)]TJ/F59 9.9626 Tf 4.503 0 Td [(psb_comm_ext_)]TJ/F54 9.9626 Tf 67.994 0 Td [(,)]TJ -352.714 -11.955 Td [(default:)]TJ/F59 9.9626 Tf 39.042 0 Td [(psb_comm_halo_)]TJ/F54 9.9626 Tf 73.225 0 Td [(.)-634(Chooses)-358(the)-358(index)-358(list)-358(on)-357(which)-358(to)-358(base)-358(the)]TJ -112.267 -11.955 Td [(data)-250(exchange.)]TJ -0 g 0 G - 141.968 -29.888 Td [(59)]TJ + 141.968 -329.728 Td [(77)]TJ 0 g 0 G ET endstream endobj -1266 0 obj +1502 0 obj << -/Length 3039 +/Length 2167 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(x)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(6.5)-1000(psb)]TJ +ET +q +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 201.825 706.129 Td [(cdfree)-250(\227)-250(Frees)-250(a)-250(communication)-250(descriptor)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(global)-250(dense)-250(r)18(esult)-250(matrix)]TJ/F52 9.9626 Tf 117.085 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -107.346 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Returned)-285(as:)-381(a)-285(rank)-285(one)-286(or)-285(two)-285(array)-285(containing)-285(numbers)-286(of)-285(type)-285(speci-)]TJ 0 -11.955 Td [(\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(17)]TJ 0 g 0 G - [(.)]TJ +/F67 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_cdfree\050desc_a,)-525(info\051)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.926 Td [(info)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F52 9.9626 Tf 160.68 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -164.68 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value)-250(that)-250(contains)-250(an)-250(err)18(or)-250(code.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G + 0 -19.925 Td [(desc)]TJ ET -1 0 0 1 210.511 336.406 cm q -.45 0 0 .45 0 0 cm -q -1 0 0 1 0 0 cm -/Im4 Do -Q +1 0 0 1 171.218 625.596 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0 g 0 G -1 0 0 1 -210.511 -336.406 cm BT -/F54 9.9626 Tf 240.086 304.526 Td [(Figur)18(e)-250(3:)-310(Sample)-250(discr)18(etization)-250(mesh.)]TJ -0 g 0 G +/F59 9.9626 Tf 174.207 625.397 Td [(a)]TJ 0 g 0 G -/F51 11.9552 Tf -89.381 -23.91 Td [(Usage)-325(Example)]TJ/F54 9.9626 Tf 87.482 0 Td [(Consider)-325(the)-325(discr)18(etization)-324(mesh)-325(depicted)-325(in)-325(\002g.)]TJ +/F62 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)-250(to)-250(be)-250(fr)18(eed.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG - [-325(3)]TJ -0 g 0 G - [(,)-343(parti-)]TJ -87.482 -11.956 Td [(tioned)-219(among)-220(two)-219(pr)18(ocesses)-220(as)-219(shown)-220(b)1(y)-220(the)-219(dashed)-220(line;)-229(the)-220(data)-219(distribution)]TJ 0 -11.955 Td [(is)-343(such)-342(that)-343(each)-343(pr)18(ocess)-343(will)-342(own)-343(32)-343(entries)-343(in)-342(the)-343(index)-343(space,)-366(with)-342(a)-343(halo)]TJ 0 -11.955 Td [(made)-355(of)-355(8)-355(entries)-355(place)1(d)-355(at)-355(local)-355(indices)-355(33)-355(thr)18(ough)-355(40.)-624(If)-355(pr)18(ocess)-355(0)-355(assigns)]TJ 0 -11.955 Td [(an)-280(initial)-280(value)-280(of)-281(1)-280(to)-280(its)-280(entries)-280(in)-280(the)]TJ/F52 9.9626 Tf 173.857 0 Td [(x)]TJ/F54 9.9626 Tf 7.997 0 Td [(vector)74(,)-288(and)-280(pr)18(ocess)-280(1)-280(assigns)-280(a)-280(value)]TJ -181.854 -11.955 Td [(of)-314(2,)-329(then)-313(after)-314(a)-314(c)1(a)-1(l)1(l)-314(to)]TJ/F59 9.9626 Tf 106.994 0 Td [(psb_halo)]TJ/F54 9.9626 Tf 44.966 0 Td [(the)-314(conten)1(ts)-314(of)-314(the)-313(local)-314(vectors)-313(will)-314(be)-313(the)]TJ -151.96 -11.955 Td [(following:)]TJ -0 g 0 G - 166.874 -118.447 Td [(60)]TJ -0 g 0 G +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET - -endstream -endobj -1262 0 obj -<< -/Type /XObject -/Subtype /Form -/FormType 1 -/PTEX.FileName (./figures/try8x8.pdf) -/PTEX.PageNumber 1 -/PTEX.InfoDict 1269 0 R -/BBox [0 0 498 439] -/Resources << -/ProcSet [ /PDF /Text ] -/ExtGState << -/R7 1270 0 R ->>/Font << /R8 1271 0 R/R10 1272 0 R>> ->> -/Length 3349 -/Filter /FlateDecode ->> -stream -xœ]›½Ž$¹„ý~ŠötkìÿªXtÒj½ÃY}Ð-q’¡×WWfDFr±Æ$ɬªo›1d%ç÷gù¨ÏrÿÃÏ×o¿Ìç¯ÿ}Œãù¿‡=¿üù1Žùœs<³hÔñü÷ã×^Ù½ŽÔ}ì®c©¿õÅþVzê_=ú©þ>⩽¦ÇŽí'ž»!ïˆw È;\)ònÒBBZÃ!-$äݤ…5=ÖOÈztRZHLk€Óâ•/©ÇDµX=&¬·œÖcâZ ¼× b!r+“ÈÙ@¶xå €ì1‘­d‰ì-Gö˜ÈÖ²Ç5ÙGù¬|‡|7ˆ|Ç+_@d‹ùnÙâ@¶-ä»Ed‹k ²¹×И…D¶-^ù {LdkÙc"{Ë‘=&²µ€ìqÍ@ö‘@>Cqò)ÍY¼òD>³ì¬Eä3 Ï[@>³ô¬Eä3‹ÏiŠy´Ÿ…aeMò³xå èf-ËÏZô³–åç-8ZËò³=­eù9M€>åyÝœÅFê@l#mÄH?Ki³h¤œy¤ž1ÒŽ–FêÙb¤®+Ô‘ ¬ü|cîÇÌ3FŒ9FŒ™#7³Fê#Æ#ÆŒcÖH1ÇÈ!€•ŸïÌ¥‰ùŽÅì#ÎÌgƈ1ÇH=cÄ™9âÌ>âÌ1RDàÌ9°òó¹+˜-fŒsŒ3GnfÔ3FŒ9FŒ#Ƭ‘"cŽ‘C+?ߘëÁlq0cĘcĘ9r3k¤ž1bÌ1bÌ1fsŒXùùÎ<¤A‹Å<¤Áqæ! j¤ž1âÌ#k#Î<²922Á!€•ŸoÌ—$xe^à•õwI~WVß%ñ]Y{—¤weå]Þ•tw…ì®MuIt›æ’ä6Å%ÁmzKrÛԖĶi-I-+MBK½ð9 Ë1>úó(çÇ,Ým;V¥¯Ç(ó£ñ7P?#¿Óz_þq"ûýmó»ÜÁëw¿›`ùÁòz|ÛíÌÚÑìwM,¤…fÍ~F~§Ã:Z?kF³ßE±Öš5í©ž, Í]ËÌ`ÍŒf¿ cí/4kÞÈï4R •¹¡U|ö$´»é3Ðü.ßvsò°ohSaAB»›†æù~éhíèÍ~ëÇŽFhÖ´§z~° dáµ2š-Eb‡%4kÞÈï´EG«kmh…Ÿ}éÚÝt€E4ÜåÛn5öM¶|Š=aBëò;Ýhc“­¯bß(4k:@Èwù¶¹‹Eï/õ}3^<1sÁ„˜Ýatε©Û³>øµI`AÌ&GR€0wªÆIØ¿ÿüúOQu}ýçN5h=u÷ |ù§¨z|ù·õœÅîOïžó¶û××cdg¶ÌóôXñ½°ø³y‰ìÛ¶Ö -;ãÍï,_0 Ž 7k‚3ãŠM·à¼¿Dz,íÙ·qž<à¬åõ|Ád8¬Ðlåã…ëͺâM༿Dz¬áçpŸ?,pw SpeŸV.Å<î®cB`jgý%Òc±æp0©È>ðùÃÔxó»åõ|Á$8®¹,vŸÂõf_ñNFpÞ_"=Ve/,ÐÖWøù»­în`\òµmqeqÏ‚0‹÷H ®CHåàÆ&3°x×pÖ€‘œ-¯§,„[áú‰ù˜›&ÔÀd.¹œŒfÅì…e1,jÙ’µ¤07®Æ©Ø•@!Lqu aî\ƒFT7¡ fâJî–÷±ÃÝŽ+ÜM/»Þ·×üÀ"Îw·ÈZh!öùA¾™¿ÂóùÄý­ß˜?´Cœõá¾/¬ƒÏûAúÈ~—ö•ÒïüúÉi™Y“âˆì 1 ÃÄ<qVÖÆ¢4"›,YIsCk1#›0B3¡õ¤‹¹¡ð§ºÙ -U1š\o{¯Wµçlµ…ëé}ùëÑJ“ë­´\ZŽ!¶Ybþín¨جòþÖ_µ«Oâã;¼ª'®7wÓ›þàs×[i¸´"óØ]Œù·»©|Þ_µµOæÃûºªÝ'®7wS"øÜõVZ .-ÊÏÌW9p=òYÕþV<‰ï檶 ä뜸ùÌõVZ .­ËïŸÚÿŠ'óÁõ¦6¨¸Þ]/ÊœÁçý‡Ö†ªp.¾Êy ëÏú§¶ÀâI|t½©=*ù:ç…®>ë?´6T5|t1䜺îoýS»`ñ$>ºÞÔ6×»ëEi9ø¼ÿÐÚP•‹«+󕘸ø¬j#,žÌ7¤ºø:ç…®¾N}0ÿÐ qÖ‡»[”̓Ïû§öÂâI|—ä®ç×ONËÌê˜Gd±J³0|Ìó'aem,J#²gÙ\O…Ÿõ¹¡µ˜‘M¡‹™ÐzÒÅÜÐFøSÝl…ª˜ M®·Õ}›v¬½•p=Uí_^K¸žWƒ‘_´FCl³ÄüÛÝpÁf•÷·þ¦±x+¹M;V\﵇ª¢žçý%%ŠÖh»‹1ÿv7H>ïoÚ‹'ó¡jÛ´cÅõ^€¨ªLàyÞ_Re¢h†xf¾Êy€ë‘Ïú›vÄâI|¬Ð6íXÉ×9/p=òYI剢5šÇîbÌ?8p=Þßú›vÄâI|¬Æ6íXq½—"ªjxž÷—T£(Z£!^™¯Ä<¸ë‘Ïú›vÄâÉ|Cú`á|ó×#_§>˜_´FCœõa7ÁçýM;bñ$¾Kòˆúƒ_?9-3«cR‘]b•f!} ù‹“°²6¥Ùmd×Ëe×–ö¬ñÕ‹oÞöÅã÷n&´žt17´þT7[¡*fBK®—ÎÁx ›ªZèàÏ]-r½SkC”yñÔŒÏòÍÝN.xëŸÚ‹'ñÑõ¦v¬¸WU.ð<ôkm¨S1࣋!ßÜ-5Ÿ÷OíˆÅ“ùàzS;V\²«Šxúµ6Ô òÁÅÀW9t½©ò¦øÊ>¿t½©+ù:ç…®>ï×ÚP§]ÀGCþÁy ëMU0ü¹ÈŸ×îzéÜŠÇp±© -†¸úµ6ÔÉ–O¹¬ÌWbàzSE 0¬àË®—Ψxܳ>P…Uƒ|úˆò¬Öhˆ³>ÜÝâð^ðyÿÔŽX<‰ï’<ÂÇfÔ1â@aÐMŠC5Ú¢"mZ½!qVÖÆ¢4"{^›ëéŠÏúÜÐZÌÈ&ŒÐÅLh=ébnh#ü©n¶BUÌ„&×]Õ ÝÅFS5Cg_QUÍðӂȯZ£!ö²{U5'"ýÈCS5 žßÛæzÖæI¿®+®÷ó'MÕ <Ïû«Ö†:%>A©ªfè\dðy׎X<™§úºv¬¸Þ¡4U3ð<ï¯ZêD ùp¥ªš¡â³þ®±xOðuíXÉ×9/<ŠRUÍÀ‰¿|p+q¥ªš¡³¨Áçý];bñ$>žÖëÚ±âz?ŽÒTÍÀó¼¿jm¨“~/žú[™¯Ä<¸ë‘Ïú»vÄâÉ|Cú€‹‘¯s^x(¥ªšá]ú€[!ÎúðS)MÕ ÞPÌïms=k_’} ×ONËÌê˜Gd×X¥YgSjT3âÔr°-J#²{Ë®ç_ŤŒ¹¡µ˜‘M¡‹™ÐzÒÅÜÐFøSÝl…ª˜ M®÷Ó[ÎÏŸŸåùËãý¤†¿õ8šÇ÷‰<ÿÓ”ÿüúøýQ=Æ×oÏ?~}üøåzÖöüúχÿeK}ŽãýÜò±ž_{üôCýô¹Ï£üÐ>}¥”:~ü<ðóÄωŸ×§Ÿ¿þåíÝíýŸšÇóë/Îñé뿟ߟvñvÿ®Ý¾k×ïÚeoë»öõ]{Þí·¯??·~¢ûCÞŸ#²ÚaYïá÷…–aXŽí)Ãüã—÷'W¯û³êíãýŸ¼žkà®3Ô{οþõÝ*ŸþðøÓ×Çßßÿþ{Ü -endstream -endobj -1275 0 obj -<< -/Filter /FlateDecode -/Subtype /Type1C -/Length 13073 ->> -stream -xœºwxWö?laÏŒ˜ r‘G¶5h†ôB'ZBïL·1`pø˶$K–eK²%«Yr•-˽w Lï%„ ”$$$¤m²›Æî{½ûýÈ–ßû<ïûýãõ<~4£¹ºsçÜs>çs -ÏÇo”dz%!)&cÒ攤}ÉÞ뉚ç7Ê#ñ-VyRŸ/Å<ÌŸâ1¾ÅcüNŽc~G¿8ŽfüÇÑA>~<Þ¬5›íonݼý­ &.II•§'ÄÅKÇOŸ:mÆøhùøÝ¿4&#!.yüëÜIVÌ¡”Ô¤˜déú„¤èÌŒñ/ž<~sL\æ¡}éÿúøïDÿÿ¦æùsïæ³Ý§•À‹åý<*Åw¬ïjßM¾}~´_¤_¬_;ÆÃæbë±d,ëÆð`|<þ -¾—ãýø)üþ þÿ'‘G˜;?€Ì·ò«ø®ÑüÑ£•£¿'7ÇÉÈûà=°,Ù ¨­c|Æ?æ1³ÆèÆÜ;nìkcç]4vÍØð±ÛÆÆ-ûpì+ˆ¤ -ä‚|ApQð‘àgÁßÿôçû ü—û¯ößà¿Í¿Ø¿Ä¿É¿Ó¿×ÿI€0`I@x€2   0 $àTÀÕ€Û¾ ôd_ |#p]`Z`f 5°.ðbàõÀ? šô~Ðò ¸ Â ª Ú æ Ž ž þ ÓA烾 ú6è7¡¿P('|S8[ø¾p±p¥p¿0Gh– -Â&a‡°GxEø¡ð¡ð ! ö ‡³Á¯O^¼*8|=øqð“à?ÿüwʇR¯Ss¨ÔJjµ‹ÚO¤”TUBÕPõT'u’ºI=¦þLý&%"Eþ¢0ÑK¢·DSE³DóEËD«EE[D¢ƒ"¹¨HdÙEu¢ÑIÑ%Ñ ÑmÑ}ÑW¢ŸE#!£CD!lÈÄi!³C„, Y²1$2$>D’b ) q†4„´†t‡9r!äÇ!C~ù-…ú…Ž„ -Sðˆ—¢ÒÑ+F~*þŠÅhÃb8™øÂP©®œÃo€J)®ž«RÏ2ðÑd˜Ž·›¬ÖVÒ8äܪêéü&¼¾‚´Êð»úŠ\óþÈÆåàY“R)Ôz%-ÕV ±p;qFߦldz3÷Ön¤'-ÚµVZ›ÝØXç®·mF;k(5:ìâ–ÎúÃ'[’·2ë 4i½<o ?3[•pP à?žÏ¦P8‡^AQ“:§œÛÌ®½ôôÀ×4Ü ÇAFý%ùÉ–ËìGkgõN ãüŽ—'ö²çvuLB@¼Q½8©êÍt{Ð`‚;èã'Põ‹p¢'œÛÙ,\8.³0O­ 5 -«CÁ -»ªbcé}{ÉÙøCò}ÛÅó†Â¡ßFæÎ3û›bø¥¹¶¬tñª»O[w -v2R¼Åcõ¸pm­©¢ÊI—×jk¹‰”ýƒ4äýè÷Ï·Ü@¡wØ—/î:/>¤ïÚ±#²¸¦#±*­z#§ÈZ<{)2mbиï߀A0èÇgœv…Nù³¹)Ô—' £V.<Ûx8‹M9“sûcñŸŽ_»Ã€%dMù$ p‰`¼ßiÉ©ìÃnâØp}E}²ñÐ*ņÕêœ]E|%<à$vÙò*Œ'ùPNü°ç¢݇kV0­y»,!o»2LAXìp¦¤µT¹‹E&¸1/wg@žïó7j©KvYr7Š†%¡PØtîÔw⣹}imÌ`̬æÉ4j“á÷ -ÊòM‹É|ÍB_Ûk‰ù6u…ñ.î$.˜a>ºŠÁ™øH„ç ÕZî8 /í´3k_œŒÙ¯›`àƒB(Áòð½¾HGë4f[ëJmô{›·/ ïÜÿe{é Ö“]“qH¼'1eÇòøþ_¥LšZ‰—Ù­æ -èår½ÞÄ€¸¶ÊÑif¬eŦr¶uɈaÎh~F/Á´%-&ŒïäçO-äÖy¹–˜nÎ-7>àÃm|f]ýº¹þ´5 tyÆSŽÊs9ÝáP¬aÑ:bòðÖÏÏn>y„9r²öÚc1$bžìLÏÎN—•åÔg3-%%¿"£äÐAñÛ›–ÎKÉ(¯U3y®ÂÆa1Œ!Ú9³®b<ᵈžÿ|yzˆa¸KÇ#_Þ›†¨BþJ|¾¹Àbìå{’óÝÊʇV> p$xý KÀaš‡]ÃhÊc*IŸ—¥fTé©Yt|J÷QÖJ˜® õÃу|p¯z (E ÑgªÎ1ǣݨ.$¾v8Ošä$:@§èó³I˜U“ƒÇ*+ûŠX¸º0W"Ñ’8Ó% ß×£{žM½òNÜÂùì’eû'¼!Füþé0K?yÿ ýëÓ¾»wÙO>éÿî™øYì·‹î2÷Në{‰F?ŽQp œÈ[`8œˆ&¢-h3š„&£ o½ßys³òøÇ߈A¶d“Äl3›îJÀR@yëÈ{ØHðwäEÒ…äĽjU!wßš[a¼Ç÷8ÎœG¨SÔ#¿×ÁùØÈU\‡æcxäÕÕ@ÂÌ?7R„gækféù2àù.¥’Æ'¤"ŸB>*Ã=K=Åœ# D6Z€5ã5¿ô¶A‡iøȪ‘¿bJÏU¢.ÅÄ%ÊZ5 '¿ Š7Kµ 9#k½z·¤iÎ’?ý×JVP@ÂQ*éÎldžΟ:¶…5%P=íIqqÉÉ1ñÉm}½ím½,ðµ‹`®Ï6í³¨Ë}ap-Ç£ V½Eo) -sh,:½8?_§fdJ˜ìÄ÷XÕî;&Ám¡m-0úµTß0„9Ñ^1`,/ÌåÆæªr"˜g6Qh> NÎ߬ K*ÀrSe©©Œî.Í`G²㶢¢¥y|ð¤ì?¯S-vçVpýûv×3†Õ 5ŠRÂU™Ÿ›_˜¯Éc9 ~ K{Š›¾µ•ž²ð]5([áÂOÙòÍSùÓS _ÂòË eâªrG-Sê?fêVôï…£( -Cs±]’e tFÔÈñ<­Ía·Øí¥, €Ó0Á{¸ø©ËyÎʯEÛdD±\—#Ž]„F½ŠøLŒÂoW:/3.$“—+•ËX•E¼ -ù‹à¨>qM¹­Çë3¸ß+¬U^˜W“qzE…²"‡;”Õ™¡h5Ê[>#!-Â&‡%."ÁœWâ;í'®3P7`°$Wµ„@Í"A éYå¦à›«Œ—ÚhDM}Û]±».†­N­Tv§.FãCûð ?–ÓӠЋÙê“}lNcnÅ¡z~âªmkyÔ1½gÅÂiE{:“ÕI¥^R_üHüÉkwN¸yó&P¯áPˆrX_•ï -;žÓ¸‹FÄd4½…‚ïLûí“‹=wŽ°%1Žl§”¯(/Ï·Ó›Ùbg¿Äz ­¢À ¡÷'÷}¢`g¸(Ä ¦wÄ -GCvð‹Û” n¶JêH¯{Ÿ/°pÈ#'nÛ2¿#F‚BgEç*Òiu¾µJÆVeçYtb¦<5®Szúèáê¶6¶¾¾´²ó\í¡B>éï¼næ»P†œ8Y©•j‹‹óÓÙ‚L$B½ˆ ˜ª9¿¥R\ç*ëg*]8(îó|ÙÇûýWÎyjìãܳ® ×Ø.÷=ýIüMòÃðóÌG+¯IhÔ Ãif‰q±Z½Pχ›=7¨Ž2Ç‘¯nä$ 3·ÎD>bDFÏšÉ$ïؤžAïW—v²àeÌÕÒñùåÇYÏ BoŽÌIÈ[¥ãôßa+.¥ûJ5Ñ,šJtƤ–ï!Y ½Œ‚ïÍ€¾úÝ'ºÙ5„µ¾qC®PZ­Z“Çd+Ò4Éô¬­ŸrêwòÞýsÑÛXÐË"ŸXJ²±E<ØÑrübûÁi ŸŠØA[5š™ãÀëœj#[ W¨™ÍsößÀrk5eâ²J{ c¯F‘Üc…&㧈:¶8tÅK[f¨Õ1ú0™Ëskòˆì¼’ -‡Ùæîd?„¼[hV!/Qæ‹só´™L$n•÷óQn?§à\ü’ÑfXÆwA7-ÎJ 7ñÑ\h†“ðßÌéåÓkL¡ø66b”ãÀEfå¼M‚zYŸÝn2•0à0i+.6ß•Õ¼Ò2¥`íО«Cƒug®2 Ø wzh7ÏC<ŸF©q”7rdÿ¤<Í*gIiµj"NZÕ`·7–5±GžaP÷¿U´æ(ŲÜ8&G;\¹xTZÛ×qì0ÔÏ0ÿo(wÉvÎÒ;dÄ]™Æ´ˆþã€v™ -œ ‚‰€„ösÙdÜ´£$ÃÂÔêË:«Þ¦·…™óšR½5,¹ùP»Ñ¯Sd»t´žûËmÚßÇv¨Éj:X¨ Õi±ùoEoÊë,E%6³Éfe._:Ü÷ñU¾nõì¦ô–u1¹á^U±™0³ÓÉ¡eC©b/;âO,(JÏËf4:UŠVjìu -¶A®*Í¡¥rEfâ€ôüÐ᪦¶·³þÖ#1ðúó9®ŒDKJÕBZðݧr -0ñ6š{(Û¨ObáJâvµóÇÄ/Q¶–Z³›þÓ5§¯]?-+ÇRÏÑ$‰0à+¯fªkP–¢¯”ü!0LÖe¶€µš`r'$zqÝY{ÖÌpû'€ob.YÁdNmÞÐNÅä¨ÔRZ•]R*g3*0ḊŒŒ’LZ81E"×Q^íl„8YUØD_€A¸ŽZ³™žÎ!GºpH­ýø-ñæ¾BQè£7ž;Ý2Üͺ+Û\øè £n:ºûOѧÛcVYX #Éò³¸€&l©Äó澞7á= -%kÕ²}FnÄäPæ"ä&lŸY]{] “› (B°kíj¥Xž§KàF÷)Æ^k«)7fÖ§gdffhl -»Œqï°flÇ™”Ö}5|‡LfÓÐR…<-Å©l«(³”6²Îk¤gËÒ3Ü\ çv76f»¥,'®‹›/yBÁθ¸ÕJ¶ ÷"o‡»p-d%ðÊC_xÆRÓ Ç„8Â+WÀlÎMí´èKßñŸ¶Õš†9ò-Z­¸@«W3r˜ü‡·çvã8ÙÓØÐÝ™Y«Õ:Æ 7é]‰£«¥ç|? èrg¡ÖÉ6äa O¢×/Ý>•Ñµa½DÅgHÀšè̃\¤q{óO?ß¾ú»›‘Y°÷Ó#¢WÒ+wœºÕæ¨kè`UmÚº½¶¢Î-­ŒOÏÔçƱ€* ÷ªc8n·Þ8Gº$—蠟-\ŸT#oiuÖtX‹íÆÖXŽ•]憚£tcmÊÎÚsl‡¤bÃÚ™qàØÊd —,$#¬Û$î…Á’Ø­±¬@AÂK25Æ,–î*ñYYk|¼,e[®e¯9‡D?cŽ%TbZZbbsZgGssggZó!?RÝÙö6ºZ[/óÑßÏP¦Ø’Øò8¾‰“ŒQ,_›40|¥¥Žb>?G} òÞS¨æpáþk\¸oj·XZM|8îÿ;ÞXÇ áÛä2 Øÿ²`a„Ïÿ+^íÙå–­¦Õ3½xSDÂÃ.BjÂÀú=Êìýâ §£ïß?5xýXVëò“LêZêpjUö!ñA©4>:µýDWEC ö;vQI8Q”$p8AÀ©$Тä‹ðz<8`k¦…Ÿ'Àóhøµ–GâhªÅvÁ•Ónsž£K)Dã†+°!øµμ|»–.*4 -ÙW·b ]r‚$²Hà,kV:Ó‰£SS£åyÖêL¦J–gË£³•9Y‰íò³Ð÷óKàhGMC«øDdïúuRWÄ3Mrl¨Ã]ÙK÷ÔÉ’bÓ2—IYAÖ{$ðØŸ_šºYgÕš9Ÿ09R+y:bÇ•ú‹¤>³µ½¾¾­MZŸÌ -ŽŸmâ~Þ™>•‰yä‰&n¶¨Ú™el}4½+*}ã&|$™z#Â( -½‹fr1Åø.šwrÇl8 î€;¸«¹h+§¦(ÍE³Ñv´ÎD³¹Øb7à]¸FqCf¡(nH#ùÂ"ÏKn)¢ìƒáÀ¶†¹(¸ͨ5X¿èêøÒÆi{TÔÖð£{€'ŽZž Ñ/:dW…–Y1³½Ää Ûì¹›Yt÷ü‘}íõdžÅO\E~±)ºœh¦@ª)Ê£³4¥µEly¿Ây¸Â!øµ›÷ø¬{æ úȯŀø¸ºj R%ß ³ŸI5è…h2Ujdb@Í—€ç¡T½;UŸ¶ÉÌŽ¼CÌÒ¦eæiJZf(3·<ô†+ ™TÊJJe^äljm¥¦†®sV7ÝÙ2Á¦´]qlRt^¬n<%¯H†<¿æµB.Öžü憓Ȃ‰g?µ"!¯pQ÷†å k·p¶ºø=¯Ãñ°ÙeÕÁ , O)Ì÷æ1ŒÅµûY×e€j¿Ð{üÞÉh4#ÜFíZ¸$rso;#ÈV$drŽ&e;yf(± $A¡¤·JHPÉÙû/`äµ›ž¸‘¸U„ÀE,Êqâ. øJ˜è$¶Z5¥gÅžŸ8v•oЩuaà Ù~¿Žˆ»rk22reà°„ pR|}ÛÑ÷£ÔR)“%ÓÈrv7©B‡ëÁMÞ7¤'&Q#q€bþwâ‘@?’{0@‰=ŸöƒÊÒÊ7HçQgH¸·…Û®ó Ñw–Qk¡ñ:!ym•'η - Fì¬Á)[+nÉcÑ–G¨Ç>?ö)Øõ˜ÅNC–Èi -õ!:+ËVª`OL—Û£èÝß°ÛL@'LGÜ?wze;üHhG‰ˆÐFÜxûÅØpvˆ®¯(Hu±àÊì"œ(rÿÜé†3(OI.I§wïÎ8Â&íÕ(|‡f‘¦øœ|N¹Æ_CZüþ®¼ùO%R¼Ø -¼;ʪaêÖÆÂúá‰Nð{±Ûóí0–<ó…ïŠ8\â|”ÃÏQ_e®§¿¼°iæ»Û¶Ì“ ™X/Ä2£RÉ PQ¡ÆMyTPk?ôÝ9nÐüîWHóBÕ‚—Ƹ°ß®ÊÌ/L1†¡±ø“/±«à)Ïï%9áÎlim¨oÍ#SY çTLZÎ@òbŽ[N—,žÈEøÇ@wQuÑ`_ëvë+èúj¸_1pø}¡çàŽˆ¸ƒ;ØD€N่Ūq×ý–¦|¹|ÉÜ-áÛoßgRiJ2p‰VCq¯³ÝbÀB1|…ÄA9•Š»àÎv8ßͼ»_Àõ_ø‚nˆ{æCÎByCžw†8ÿ}ǃµˆý|ü³+͹yn@Á·Hè(øˆ,ª^Ï?¿'AQ4¦Å}Ö[5¸˜ -Âx±¼â’¹Áêg7Üzòú±÷â®g¹Ç„tƒ“0„Ì¡îÕyº®Úd=ÌáQº"Gª3éÌ:Öª^±ÉœWaºVA€¯AâÒÎë`Éé)h{aÉq±mÉà_鋂İíòO^8Û tf'1Ç’Wþ‰~€Z»ˆÇ=ö¦~öŽaó é|ùˆ/ü€ó¸c®ž\@y5|$ŽÁI5@ÉÝp­ }äÑ|ä Ð<3 Šk<Ó8‰]ò¹®¥¹ÐM?ô p .§<<o„Ç;*£߮×`½-à2v]w¯ø>?H;€üëY5߇„•LØ´Uó3Ù¯ @å1}$$sëŽ*Ž†Ž7êô™,uëõè/|Iô<âyÄ?"pð¾v®î…KZx@QîæT ^¿7Ýßò·x2–]½744´µHSXAC]{{zc+(5•Óàù«ÜdÃôcàCNgÆÌwgÍž3ušÐ'؇òù„ø„ú„ùˆ}}‚|€·ªäç³ÃçSÅ+æyF½2Êéëç»ß·• ´Å÷= ÷xðÛ'¾ž(¡rm±›ò´‘œÚàð=¸{úýÀý‡âŸç>xeËöÌý±Lb‚2Q±´NúçÃ=·9Û<ë½ÝSfMgÑr´ S{Ä„@Ë©Ô·W‚àÇbî?‚“àcêrƒ!¿½.í_.~7|Í¢”l{c,ãJVØ´T©ÊŠ?šyûþ£æÁaöÄ`Ó¥ÅgrŽ§t0Yõ²Š½µ|á—·N4õ¾öüä{‰qLRŠ2=kƒ³0ôÞác×é›C{×Ä)ãÓÓØ$îΊ >÷ZnøÆ Ïûî xìáüÂï=‡`õšgÓ}8² [»ÑE£÷dkcÖ²-pôƒ>pZ3E£ê«“KP u`ý²w¶þ§œv´ºlu]Gi-и=wïñ<[9O1‚ãÑ5ƒÖV^0…Á$íùû‡ñÏ_öJ£õ¹ 5èÂýe÷`ÊG[î À£‚úù\÷à-ñ㥗_çBœ•SW×&?›ÂŸ"Þì¬äåâ·®„8óàæo†ÑØoù«ÔÝíqáâÕQQ«ì¸ô䃺ÁËÃŒðÁÉÔôKqÇ/ˆ/¼~ëèžåëÓö¯ˆbZ¨9êñkåµç{9“‚~•çkoäÿ~ÿ*Òb8g*DLÑHpg.ô9¤zð,f!ßñhb^†Ï©ìÂü¼|Fš£Š£D|ÁÅê+uͬ»¦©¼þêØLô˽`óó f^ã°òG_h>›Ú²/'.–݃4ZŒFu!þ‰pfOßéŒKôé£5=½ì¥óýNJᾩð%Â¥Q_CŠA1{ÆÏœ¹çoðL8G=a½zø<€Ãª„'¾Á6'>_Q>ld9:P`F+§b*Â9Ðh/¥ëJ•‰ìÈN°V¡Z`ä TUžme0ª -ï'ƒ“Âç±Á+ÕdµÚ,k‰Õj3fÀê¶Y­î¢Ì:f,Oà5ó½>Gx8ï,ïþ¨M£®Žºé;Å7Ê7Ú·Ø÷¾ïcßïüÄ~‰~R?›ßE¿ë~ëÂNcÿÄq|4.Ä_Çâ…„¡& -‰ßùß¿0zúè„Ñé£?ýýè¿ÛÈDò6ù|Jþ Ð`&X*Á1pÜCY}#úE„DÿB†CÆ…¼òzÈ„ç^sGJþ@ïÌ6¿ÊG Ä -]Œ"‰‰H]sh!½tWã‰TVÞ©í».†wOJ>õ&šzeÄ£?Mè-úlœyŸôÜ`” †U„ùYyÕg¥|ð|[-eÑ”ÅßCfÏ¡Ù8:0¢^ƒ^WçDÂdpW-‘]ÑVa©²U²Çáh :ñÁ XEª=M%NWl322è Fr9² - ÈzÔŠ8›Z±7y ½yoÛÅdVÞ¯í4\ãëxÁš¶ôN~oÒîºíôÎHyB ºtŽ1wfBâOŸ\†>GôN¤–w÷]¥¯6¦Ì¨dÁƒ–V_ÏAO(50ŸÈRYìÙl¦+KK±¥Ñ“-²¶9æÃlZFV¶:¯¨0,+S—§g–çÉf–lO;+Þö`ÏŸr†éï¨éì_‹\deU‚m5g”V‹Ý]=-¦¨·Ö`¦û;[:Ø®VgÏ)q›¾AÕÉ íˆ˜Òv?Ô¦¿éÊÛ´à¦onj(ufmð{êrkÏñŽfER5S_µ_ž¿{]ô?2% „ó°u8ò¯ºq8ö7(xüäû|'ü…rü¡ÞQ`^ÀGï‰R£Yop ¼þ‡Öo‡N2'^o¹E_8%‹ëgÛ“«ÓšÖóð‡eâÙ:yn­É±Øå¬3*ÒEsaç+è ´÷­–…6²ËoþGÑ_Á€žžÃ,ÜC˜¯–W\´ò'ų È•xgu~N¡QW¤eßFmˆ€%˜®ª°ªLìrÚš'ŠVÍ…5yÄ‘1´"Õd´iø­;«c•âézÅ!&]™%SѺ|oÑÒ.—[sè ™,ý`GúÑ[ÇÏÃ×γp²gbegyY£% dJÞ&ïsÏv*fr\–.GMç+-6%[³k§#’ž3ûЖuì²u1SÐ(1Ú ÃÐ8GI¹ [ºxeÒlzm¸»/»åä¡o Ÿ®úžƒó×.žÉ:p˜éLt¦Ô®æ¢I_eÓ–zK°(´ )[0%á0aæŠ -SÝXš³‡EJ°(_³¢ˆò'¾¨%ŸªÅç˜UU†{PHš?©¬xláŒè”ŒøR_®1KŒ Ôy³¸Á¾ž—áTjYD\ÆfzÊÚO!ñûùÛŸõVjbÊØ’L,ÙáRÔÓÍuµ –˜µkGv\4»sOÊŠybD=š }®îèeZj[Û/óõa×±i$hùw@ò©¾Ray‰¾!F( C døѪ‚ÈÃìÈ:ÂðR®bŽž/ƒêZb¾UYgøïG›jøJsN•á2fGNµ–”ê5åŒÒY¡¯¤5­]Yî„})™Kv²ßqqlxfœ*\–CXªJMtÓ aXÃvnm\½âÍë¿+g%Ï|¹ˆ?RâM‰µH®‹=‘1’zgžw!MÉ­òI*õ„¢0Äà°ÞóæD?EˆÄ\¸ãã*ç5328ªy SzÆ`™£ æ9ñµ–¼2ã¾ç¢Ð¼#'6o©",§„0•W˜tg™*’I#Œ[Uªu…œ¦÷Ãôö>XÙËó¼üßç=. -¾4r5›øÀДŸ)Öåéòd‘\¯ÎÛ«ã+¢ÖH쳦•;Åõ —2­g1¸MÄ Ëµùkó†P™ÇN Á÷° |•*G~þüPšÕVÈê-E5b˜_#>6Ôf;Þç×ÁÜ,\;/[ù²|¡o0•ØÝôW8|­I5µ‚9Ò)ø¼ƒ¡;HJOOLnJoëhnlïHkNbÓé¬vV—•^¹ÙÞyûÓFç‡Þ‚_¡Œ8a,×g‹ãÞæ؇ ˜@|PíºhbÌ¥Å&ëBƒ3$pðÉÅ\U¥o`‹Dð9À_¿Cn¢×$´¨zQ)°gÔBN‡©ŽÔ;ëú®nÓ[ÍF]b±”TF_šÿg¶Èj,Ö•6uÕÔu ðÛz›WŠmz›^—-ÏR0Æ¢CIÉiZ]6¿xr¨6¼lsž7ÙëĽªÚ´T•,.ˤ« -g$$Ï_žNí—ŒÇz×DU®¢ÑVŽÅ½‚ö¬q®iÞɺ⫳ÄLDlèIüäc±×ŽçÞ¢á6.î} ü õ+ïQ¸£ûù{×PÝ«+ç‰Ñæ¹h4 -Ý]{b/“ÞC%ÜPŸÿZ Ã?‡†\”íb@q삵ÿ­ª=¡¼U3ÓB>Ú6{ÏcMü«|&ƒ}ÄÿZnÃáStB¯Ãl¼£Ì1ðÍÿ^eƒø:!ˆ!G"‰eFYɧ|¹­>DYqø.z«ÌµèµbÞÁä+`tÏΨlJcQÜçÀ—;ZZ*Ía5(\QAÔ•iÚB­Zâhögâ¿ÕDZ ÌS¼ "ÔÂF xîõø)2üŒ±¦ C¬)Tk” 'é,ô&LÇòªuMãÄp¥ÈQ˜!Δ&mÙÁ eh&÷¦OÊJ?+ᔬOF<З«Í‹ɼ¼÷½ÒéyÑœRn¼Ç‡ˆæº¿¾Œ=˜vy†X™«K62ú¼b£š•ÁKµÄv“ªlêOH-¡p3ÜxëƒjÇ-[X-rà# ·N"ÿ`’>•ÞÖv“m!¾¶;O™¼Uð”&2•Øg‘—Ë™¤Úœ‹ô@­ò 82íû«Ä“㤶–êíÆù0‚x}nÎÆ}v3ð"¡·Ä«Ó ¶ä„¥¾(â[ÍÅ%ôQGÁ^v$‘0,ÑlÖò•ð“Ød+(7p?×ǴצO6Å-cÑ.°4?o³žOx«xÃC°×Íûۯ𯾵"ØQ‹/,QWïða4¼€¶;HÏFb—mCà6îddY- ¶KRS^7`^5µ&=+5Ö™ì*¶D™kÍ£S¤™éI­™}=í•-Ílcƒ»¿ý!À»¡-wìeö2Sh›$W/WÅ0sQÒ,˜†ÉO­å⺊ò#ã¨A;¸ÓØ­/.>‡$a@FÎjß{ùdã™.&·;.ûÉÖ?‘@wpj«¥~˜½Ñ~øÊ-ña•·áHÚ”YQÓåt–9«ùBOskÍ‘~ñ…¸³;˜Æ½moÓ»÷Q–ºRSýÕíóæn˜¦P›léì?[c™ÉE Ÿýzcë;³Ömž–‘k¶'³‚ÿTf¼€øE9ÎÛ,€·`…Vk•²­&Ý[¼:í"RMGRz‹W+ø*ÔŸzÒÝ~ÖVn¼eñ:¢ÅX¥‘‹Uú\E -G¨P2Á}˜¬½ ¶Z\ï*ëcª\èœo0œÒ6dY^<6™ýNMZj–"á ù"\Ø«7ºžü$†[9·ÿ*ZÃPpÌc4–SíUó@þhôW6Aê˜ÓÜð"ÛÙÖPØI?îÙ’ÄzYÿ6IJœ7J|ûoãOP”ãCzk¾yAKáâ)œ ál)N•pß,ç¾ùÒî²zí+¡Ìßaà+€ÚcV;ŒÍ|ø#¡² r &%J›ª¬VºÄ¬ËdQa”j´û ü NxsÆg ÕÞœqªDJŽý)jˆJ ”âÿñ©Ï,~Vü îìÂn&Ú•mFA&>:(Ç_ýËðÏtåÞØrd?¥ÎJSéMZ¦G“SF§fJS÷ &]‚>WïÀ@ÖƒæOJ½ xÖœÀj< ¸å÷ÊšGvî‹×nÞ¼é ´( &i:=‚+AŸ|3Ÿ+m(‰‚z¾Ð¤.3ÞàÃi„Pv°·As„†à[è_ƒËž¾ ŒS"$_‹ÁŽ"¯A„“èœ7¹>…”‹h9ðL¡ã·:“—–³5xd1ô çfm&Î}ÅÆ©’¯ÄÀâ2WšKùÕ÷K¯´‰ëe.i†L.UTMs½Í€m6Wö Ýý™ƒ=œ°xÏàð3_xÞ¢à0ŠÃAƒ´µ­¡´H€ç\Š¿(6Ψõ6 ÌÄFŠ«IxÙEL“UÝ6²ÀT^ΩSc•*óPZz¢Vo²ªY›2ßœOgæäd¥5(:ï|p÷Ñ™”öZ¥ÑÂTR%ðÀ]‹œ‚ŽƒG»Îö1ÙÕX*0“ph"ÿ(*¥lÛ¿ðŽXG~ÌAyìgÒ³h ±ù@Á gPìçP‘uý=,ÙùåØI yÜ,ÞPäêŸÍ=‘G¢¹Þ´<œûǧÙ{ÍH¸‘ŒÄ!œŽïßA -à= -®B°ÓÛ]9â¥n¯·á6Yâm³1—ü§,PKÊ8ïKüËûþ;oŸõ76þOCáÓ¿ÀJÐ~…ÚÝ‘ÝÔ&nk¨ëê«ÏŒ=¤â8'?“ŽU¶\gA“»º£K|cKßä”\CA"£ËÖ -è\mI¹‘Uáè ViÔ[Œ)HuáÖܪóbøùâ'‹a«‰E5„q9ãÓ½%Ý•‘}£³[!÷ W~` d±Æwó½Ô ÜôêU-þmùÉéOŽïY#d'@5áåý˜£ÉâtŠ;ÔÕÒ,•<-×–Q’ÅO8Õyk½PÙß,nTÔdÄ+¢_Ñ1úyà¸oÕIóï’„À£*‚!žQ0„7À0™´Öÿ 8´JMáv(RÐ÷*÷s†³Á/žÏ¦vã&RpÄÅ!ÈHÜHÜͧ«FâɃ¿]Wž$Ÿ’‚Žòg]Qõïæ¡ yú½%N»©”Ô­®¡ÿ U…··OÀû›ÄëC’¿ð\uƒÏÏnÞ_~å¶ÏCðÀ ›*¤é%™4€Éó¾÷vÜýÉÜ¢%‰كœ‹l0^2”«›ÃFŒ~ 996®=¹¯§­½§/¹ ¼`ðÍ/üvà Ípć0µ´e4:GÕ*ð­Ùîó,ÕÈ-¥%¥|€ 4×TtÐuuúü$5¥”…Û‡+ëJ\Þf§o´Ë[q¼æÁ¯ñ€ˆC +ðÓE€Ï'Ãl>Ž„hà,@Ñ[hZßBÁAñ¾ÿ¸Mrïë òeJ}­×xsi@ŒB¿`Ð_ ðö‡„³À£‚)×y`mq»GÜÚÚúz’Û½bøáÙòMà‚æ| Ët„Zû€®ˆ)Њ ùÙŽ›èuµNÒÁŽc°©pÊM”SWáo3„©°r#€«½å] a×5ÄéÊÍ¿+Wy{ê?q*>;^›Z9ëÚÕ —ºyÞƇŒ¬pºˆ‹€'óÑ‚ó\©h"Õ²¿k¶px”’¦ÑتTŒÛÛRà)ú§°oǤ°„p!ÈUK¹ÍQìÿ,…KŸ -endstream -endobj -1276 0 obj -<< -/Filter /FlateDecode -/Subtype /Type1C -/Length 11578 ->> -stream -xœzwxTÕÚ/CØ…½’I™Ùf³÷F&X*ˆˆ€ô -dÒë¤L’I&½Ìd&½·I2“BHB „Б*Š"¢¢Ç‚¢~õ¨krV<÷® úï»Ï½ß÷™Ì“é{­w½ëWÞwI&M™qxâð«Ã˜t¶t‰t·t¿4Iš*Í‘vJû¤¤Ÿ9Nv”9.s\åxÄÑß1Â1ɱƱÉñ¼ãÇGŽß:N38-rZéôšÓ:§ÍN¾NÉNeN§>§!§»N÷œ8}éôÓÿržê,u~ÆYtžã¼Ðyµó^gogµs–ss›s¯ó€óMçÏsþ§ í¹¼ä²Þ個K˜‹Ê%Î%É%եĥÅeÈå¬Ë—.w\î¹|íò™Df/s–Í’-‘mùËÔ²Y¦¬Tf–õÉÎÊ®ÊnÊÞ—}"ûRöDö£ìWÙ?åùT¹TÎÉÈWÈ7Ê÷È˽åAòpy´<^ž"7È«ä-ò>ù¨ü–üùòoä?Ê“³–fØéìv1»†ÝÄîc°¾lÍjØ46‡5°El9[Ï6³Ýìö2ûûýšý‘ý……5c"{'·>’wKNYŸE«¬ŸR}Ј C7«#Âiš×¹­ •"̦ ¤ßÑb3|E&!Ò/M2Ð0‰ê©,…+ΡéÁ=Æ? µs€õ6¦Têh8Ž<¢©³¹p*NRÁÁêÔpîX’霿A»£©þŒb8•A…Ô’÷G£]5#W„­Ô@µ¹àV1 Æ6ZX¯Ô*è|@DÑÖ&<4¤?­|!%ùHútô²P» -bË›•ÅæÆ BucÉÎ×*+`±ú°ha49 «ÈŒV¤êÒÓUÂ&´=ÜA¨Ïåå•(ª+Š-ùB0£ühª)·(·(=_?´ æ´Gg¦%¸‡U!Ñ\zFI©!ÏhÈ …†ÂE[Œ)P¥<Úr¹³­¼±Ahh2õu= ¬ÏN«=UVÖžGîn»<Mvç”gÄ(´ZeèëBr@F¬6 u »>á3ãD™¢¦¬°'_0#m4Õ›cÐ×o1ä3ò36¡µÓ¢o|KÄdW—+jËŠ[ ð§TÑTsvAVË8œ™f&¡3¼ñ3ºA4‘ð%ëœêî’’–Âé~ ²pN,•”••­å²Ò…±<$¦ †{yËŽ[ú=~ò¨B~aQž=N'D W-dcyQQ)WX¢Ë)SÚFRÛ¸Ç7®ýžr`Þ)qS[„¥SÑÑjéiiNkÔå Õ^4€Ö0ÉÌB3Ðì'ÏÁiÐõ»_àLøìÂ_+BÙ+õûÑJô¢Ú}Ïnõ98®¯½.‚±]c+Y©"ßÉ®ÉW¨µ!QÞ²C;=LÊ-Ôu…Ó“›r**Õ%ý‚Å@XЗ*ªS×o -‚ÛÐçÓжPkKÓÊu÷hx°…‚¾è6}ÈñmÖØ¢‚êü"¡¬¨¦ÐÂ}Óí=k[’_Hœ•”¹VOƒZ"?ÍXù:’-ÝÁ^ñNñ¥9ÊMñÖa¯×w¶œU -‘ÝɉïÒá(¯l.*)©áJÊs3Åijݙƒœÿ÷Ï¡Ë}Ïû[OˆhÞûĪ¾XËÅõS=ïœ>ïRè ¬ 7m¥Á0ßÖu2ìx (5.âÁ¾¤7x`fƒ -´Õ= -«ÚM f”¤|ÒŸÌ Ï4:øøG%ã–îÿÎøåÄÑšêŒÔ"±0=/ÏGqÈPÛÃj6÷^ß|f š1ïYôáÿíÐÿœ  Å›;¯2{ÇñȃsÑsÐÿ8ÉÜsK„2Êø~yÅûx×^®¬ºU*XPˆŠz?Ó·A®Pègë:¨HSmÒÞ3âx8¥ß­ÑnÈ u%\ÊFaé š˜@è´ez:œ„ë<¢yœ¢2Ñ,¢™´|×\gê‘v|þ4•UFe|¶ðÆK8ãà"FQº“5­Ú„üÕT›¾&NçO£áèo¢" šà èc§¡HÊ ‰ÐÓjèx]wMCÛŸßjÕWÇéið -ÓK?ãÒúåkÇÍù?”m¶ÆYg±ç‘+%kÛÌÈz×ó²Ëך\Wô¥hdïù3²E! ˆgà]`U²Æ²¢.¢î3Caƒâ¸º1"2.6L›W'”(‹ƒ ÃiÐØh®hâºj#÷‰(›Ò+µšà\:¦P`yAj¹îm5SÐ1gÔ£ºÔAbü2ymd-£CЩ€7Sºµ©i+0~ƒXE¬6=P'dëµúd=~a Ó_Žs"DcêÓ‹Ö¯ÐÞhªC_oPÒãÅø’ q!z|É -|ŸÈÀÏaû#ÚB”&3ÓMj¤¤†C¦2JÜíõ͆EÓ¾ƒÂ­oÌu׋§›P¬º‚j7«U‰ñšq«á\"©-§¬TQ[[ajMè‚šÉm¸22 h±æ€2Ô—KSÃS<éSm©,ª®n¥«5Hôb@Rž vöÁ“=HÀ¹ðu¸ÔVÀ‹¬ÞlÊ«æ¾8_fÏ·µ_¸©¸|aw»`ò *Þ¡OÕ&r4·:Q·¦ ‹ø5üÔ²„·š§Luø«'š³RkÄ–ø]Šyþǧjò Aº‘ÀöŒà¨cŠ=÷ƒ  \ýÅèö ¼o×ð8¿räl3Õ|âDKK»(ÝÈxk™ ýƒcÈãÙµ9…¹tAn~V¶"--!5U@r$'¬ëÑ®hꄾ:Áà7±¾šøЉ•0SG‹sŠôÃ4$©&ówHAÔ©ŠR2)ÉYz!jÍTjv~EºˆÜ`Ãßm®»Q‚ÿ<Àî†ÏÚÙõÕ -SyE½PjB1j²#£8mIàTÔ=-ü Â?)9!™ËÌ.,‹)Ca^A>¦³Æð UÀ±“ÁÃÃ=7Íä­Äi -‹» xŸÃk\_¥OÌ -O‹äßÞu²í/ì1ðIµ`<¦²O`ÎO(‡'[[Ož ÇïÖߨ­6ôÑ°€ú›ÛÅWwyDìöÀ÷y]ÖtJ~€Ëa|Ó®ÊÞ%a(| -ÐC"tFI"º¢"od×'ŽÒãi”Wdì>ê¬Ù&SEq෺„÷ª7—+^P/ ñ<‚ŽÄlå<µ•§E@é®V×u2¹WjêÏézº«®'Gu5ñº=ôx#Š°6ú1¤t//5I¬³ ”=ê–àÃ)Ãk»Ôb|{fGÎæ‘YŸ$ ‡öÐÝ!ž¦ýrXù<šfü”><ÓþÍM•ÁÕ,dòk{Ïr­uI¡%bA|~”á D×^ãÕ¸€îÌ¡#m¡§ ó EB¯Uù¼á.œ—”îJµmˆÀÈÉY8>gØÁB«‹¦.›‡¸M槼ŀ®ó1§¹ó—›ºÅî~ÓÛwð™eÿHA¯ÄáHŒ,Ë"óƒ¶+Ðê„}Ž% ÿ -·Õ\Á¹ºÛZ:¤€_§RhåV"Ž*;^WXÂÕk#DE6$§nãÔ0‡·%á ¸Ào`îóÀzµ+€(cà‹ŸÝ€S¡üåwÑ«±9y¹‡E¿)ÆšÒ¼rîÑо¹‹}ÜÖû¥Tõ‹`´¼©Ôl¢eÖ‹ià”âbÈÙ=íB“·géznzšIÙ¯O†.Zâí¶A©­ê­mSêI™õog®\~ôÈÊœ´p&/è‰VÀÈSY¼q±ª!‚€N|K±|Ú²G᧊RjRŠLqBXsFAx>™®ÕÆ+‚{ãÞèÉa Ýá/îÕ¤—Vä -„rêxtc2Ful¿ÿµ«Ã]˜$ÃBƒý[Ã{ñy -GŠd€o ÓÛÞÞÓzÂ_Ag!%6ÁbêzöñÄ.ázØΓë87ÿ¤QVÚ >dÆœL. ¯6öÁs]ë›úÁÕzƒì‡D ™¾€Ï1š„25§V©Ëµmñ"œµ„°~þ6HãSüž »{GHôáx:%<8+‚Û²½k4\ ?ŸüÞO -è ~nè¸Ó\“¡ÍÑeåf -Y©ºŒL…¦4©"G^î~ŠàU^Úi“#UuH€ø%<W'1¶Y†4US–œª8]½ŽZuFùsK}~y».*ní8ýr´&7=\ÈÑfè29mVQ…˜ˆq®…´dæØÁ>ØÇ¢DÃRß1¦hÇ/K`v`Α¥}’_ óÏ`A€ß²QŠ¦Žmb½}<;‚;;ƒ:½Ei¿’!` ©ÛKQÆ}I‰ k˜ØmvÔ%zW -å{Xƒ‡!*XžÇ^1ÖÆŽ€É?1àÌØÔLÀÞ&ù8+°ˆÀÎ@Ù#(Ûx¨tý·ð6…U'”M(¬–Vï­â;(„Fæ¼®ú/4Ùi)Z9#€®”¾³¢¾¹6ë2 s¶ûäð°²ÃÌ붸´¦¢Rh45U4r£æ]è08ÝÊá¡žž! ¸pê€Gx¸'½Èr—;±—‚ÕP…ð?¼æŽ/ Q‡éèl5 0‘Q…)&CG³“ÛjÁ{ß›4(Žnñûùz€MQ¨)‡ -Ó4vE`éKå.ö™ú,bms±¹p„–FjMF¼æU]o[óÚ#hÖ—dF)ÐEÊ Ô&Ú€:x(•ž‡OúŸÆãVöàùœo9Û*ÔŸ(鹪îÑ^‘b\`Ʊe -€Ò|—òp ¼?2‰À5,æ)¾´šÍ­Oñ…Œ‰‰ŠhŠiiijji‰iŠAÑBºz ëéïïåqÒètw÷i`â¥?3¥%˜õ ê뛹J@þˆù'ඊ - iŽ:ÑÞÜÜ~BÕ,‚ŒÍè©)8ùÚà'@y‘ïí>ÑÞÓÚî'¶ÁÂ"›*p1Q®“V±ç‰ä¯ãkÙS-µB¤Ò–Ôè+4 °ÕS¹"H0“½ ÕÜ© (/-S3NÀiâáéúDò€ÚtQ]4œI4‹2&Ç®¾Vƒýƒ*/ÛöϱI@¬èúEé÷pë)8딜汣à Êa@ Ä6-ãEl½“%;º€!ARyyZ —_dÌ/‰ -Ç݈‰}˜Ð:±ÖkÀx ÌäÝ¢–γ\_S¨hîǃìŸ$èRÜ>uøp°Ïö¨BÛ|ñŒµÙ -(i|½Õ§zU“ ÀˆìŸIò1’m+(,,,((Î/.üÚÞ~ ®ØRb©(+-(¬°wxÚÙ7É<é;Él‰I2,¹+“gOž79rÅäÎÉßÚM±[n·Ù.ÒNm÷Ñ”W¦lšR5¥nÊ}b1L\ ®ïŸ?“!äWÔkTUHÕQŸÐ›èP:—®¢ÿ˜:yê SÓ§~Â,f¶01Œ†IgŒÌ s H€ LžÀ:À5pÓ^jÏÚO·çíØ/³ßn¿ßþ°½—}®}½}“ýÇö_8Lr f9¸9¨RÚ>vøÌá±Ãw¿;ŒK'IIéé*éZé^©Fš*Í•öJ‡¥W¥w¥Hÿp´wtr|ÅñUÇ×}3›O;žs¼êxÇñsÇ_œœ^rÚáäéádp:îÔã4êtÓé§Oþæô›Ó¿œ§9Ïs^îü†ófg/ç`çXçTçRç&çÎ=ÎÎÃÎç?tþÑ…p™á2Óe¹Ëz—Ý.‡]¼]]’]Š]Z]κÜuùÂåw-“ËfÈž“=/[#Û-ó”EÉ2dY™¬ZÖ ë•–Ý½#»'ûJö›œ”?#Vþ¼ü%ù+òuò=òCò ¹Zž&Ï’åeòZy£¼MÞ%‘ß’$$ÿ^>ÆNb¥ì3ìLv»}™]ƾÉîb²Al›Áæ²¥l-{œígÏ°çÙ«ìmö=öûû»ëdW©«Âu®ë‹®Ë]_wÝàºÕu·«›ëWo×`×XWk¶k‘k•«Åõ¤ë°ëˆëE×kc{þ,I?Ènˆ{s¢j9ß[<çuÎà”ñAUå'FŒ¼UÔݬr­ñM}A-h8ü¶—xæfÂûÜï6vˆ§{š?üVq“Ò\èÌ>–àÍù…×uʼn '2:rîÒ°Ìú,i8¤‡î 9jÚÇ!§óÑ\4÷Ó…Ð铳íOn‰˜ÜײÐ>¿®ï,7\„ìŒb¬ÈkêyH‰uí<4úâÛ<|óƒÞh>Þ€ž¢>˜…“o"䇎íÄ -œFÔ,¨¼`è Hé"°†ÝdÓÒˆÜÜ\}6—œYR+Â*êóµ£HŠˆµßð1G÷÷µš;«²+µ5BzYnQ‘¢¡¥æ¤þd‹ÐVy!·:-T¡ÍMÊIÐô=š¿"R;s+ÊæêÒÁ|Áh«vˆtKEõëLéåÑ] §!½ì­“ˆìZ]q©¢­¢t´@° au*«,½ö0 ÿqZ+ wZ£àÚñ(¢…„G­?Vö56tæOI¼‹c¹[Bò²»³Ù·óÙOË0Á]&‘#Ü@¼Dâl .‘pÊR+iaר«ôçik°Æ²rCWRœ›] –¦åé ÏÓÈ -Ë.ŠöUŸ ºÍAúÁïpÙ½äwbÛDSR|Y4çZÓ‘"¶¥çëJ2èÊäâ(¥bÏ[»½:ð u:¢783G¯ÏÁõÀ*{BF®àÁ#^_ÿ§ÈOnKx*òÇ\ š 'ƒ×¨CChÿcûÖp;m:kèDš~n›êÏ*êvn½ÆÊèÝ“S7dÐ*ë} -lÌ3Y]\>âe­'ÍÛyÙéÅŒìÁ«<ø¬ÂM8ÚŸJPbNÿÏ> Ñ”ùhÊÄ>Úbÿ )½öESƒ™%éï4Æ;#Ý+kÅ,3€­{·š¨üâò²2dðpÖ—è±³f©ÙÙKÞx]\¿Ñwá<¢{^y´VØpÿ«ÀŸ¸Ÿ¿ê~ÿ}ñþýžo~UüêÿxÝû‡o,î~–Cß»°p?\„oûá>¸-BûÑ^ôzíZ°æÄíMÂæ;¾Vxm!šÕ‡MbilA‚J‘Ñ?Œ¡Ò{ìÒÑ‘Ð IòëÜуe/ÐpëxºŽ¼ «NèáäÒ jk+yz*2Ùm؇¦h%附IWÈý2©N‰æÂâZú®œ:õq~qk"ŸaryEÙËðŒ™Ý™ŸZ©¿AÃ=Ôhë@GIi¦¦\ˆ¬¯No溛[{†‚:ŽRFm:*Êî~ŠöDS§²ËS&:6i©Þ™8põ 0`L -³Ø(Ò­Úô:Òê8F5ã T:F´’g xºÎÓU: ݆q—?ˆië -H¬/Á ö,"‰’¸‚”tETBª· R˦t2Ú˜Y¬éBeÖŸ¦EÈgütÜÖ°`?Ýt5Tšb©ÀÈzsi~eI£ØóñLg?CžZL‚»le<”˜p±Þ7©ÈCMa„.« Þ¾ZÑ0(~Ú{ññ -Hnú~f§Z–h\_˜`˜VNéÏÖÔàÌ4¡ûj39¢«MЦS)ŸÜÍ9‘™¾ñÓWø¸-™¯˜ñåOý„ØÓWR¯sðE«†ý9ëâkh¦‰û_›“% Ù®sŠö?„¢μôð§"Aú -?ÑæXË'¦Çƒe¼©ë/ð&/ËÜÀÈ6üUÐÑøÚ“-ÙÜÇ1¼6NglUh88 ÿ]ˆ.‚ÙÇ(…œ(6«à]ê*J£€—)«+"¡Š¬5” -å%õE­Ü{ç‚ßê[ܪÖ/W,R/ûïµi˜Œ´t¬Ìª ©…ÚjtÅ*wC·“–Jð–=¥¢fåF+4Ù¡¡ -Cá ¦ŒïWWß+xÊ#­Ó,ÊJ; t¹‰5^E¾+¶, -¬Á®¶QU¯Ž§_‹ôõ;¨8V—p3F,ú^E~–SfÜGÏ¢†ÇnÞîS?,x]Šç†Û†,BlU¶N¯ÏÈ™žÆ\ØÛ9uÉFwz<œÒ¹'§àU¶¾KõBo"’ Ý¢ŽŒ <Ý4opÛ*°CH£~ïS"²\l†ŸG‘ÉhŠ_À, ã©îŠÒ _žO‰hÚ#«Â*7ÓfxhÛ^ÐCÐÀÿÕçm -êZ;ÌüUÅ‘µ5{ç`A½Óí~Â.ö‹ÐL¤6ê ‹55¥-B RW9qm;áËèâ4ôŠ­ ®†_›Â¶ðù—h¸™‚ -ØÒs¿¼|¨hº íS›¨ •&+QƒÍº+üÂB®)Í(Õ}@Ãä ƒµô'íƒ×>R@zÑm´z¢]ÿNvU’q½­]¯ß‘ë™Cç¦ét©"Ô[?b›j+:ux¡IÛS”Q1‚::8sÆ–²K¶Dïß*®Ûî¿MQ`ƒ¤@3 <ˆüþpÇMáÖ¶5í³¹‚@ö÷kh&“ƒÛÐÔ™G=[‡"ÅÐ Éïþª€N¦†/ -µ}œJ©‹U'ëÊ3S²s³²Ò„ô¤œ´TÌ7kTäÎe¾ž[÷Ó‘£èµuÀ÷î¾ãŸö ªrB©Ö$Çq©µ}"¼`ë_)d\xûöAe¸VUs£{ÍÖ®š†Âbºç|us­¢8§ ###G›$èõ:]®y¡•Ór(ŒJ«°»§ïŒÜþ›âî±á]‡‚CÀÓ½­«VÛööÈðêpÝ–@>þjëYÄh²ótQ"$ -Ê Å±»ò - Œ­šãqä¤ÿ𙞓ÃglÕÀþú¦­¦{SëKÈÂ$»S­'ÄNúÀ#h!| ݧÀ #âÐkh-V`xÝÑóXÍÀNr t„ÄEéƶîÐQî››¿`©ê|ø›ÍÑ1!Áõq]Yb_¦A_©¥«‹#<s=·­÷¬í‰Z®V (ÎÄ´(£CyVh:bÛ7ÖÃG¤tÜàI?\cÿ¬¨š¨¨¹åÒ@§@ç`¤ƒið{ÛÒÐjæÌeqØBæàRï=¿ãH@øÆáG -mÙ_¨&òK - ùœyÂn߇ÎÔÄ’1àò"@ýÕ[Á€§E’bKÑ9Zúú¸{¡Yä±?jþ¦Á†âòÖ&Ü:h²¾ZÀ£¿f·%ñà SÀ5Áøê§Ý¡Ò‰îPéŸÝ¡¬¤t]&•cë}eMg‡ë•/¾˜päHBד'uÝgDàþ˜ÖçØ>&F¨çW0î¿´òQ2õÊzâõÕ=©ÿKWßL=íûK­snß¾mج´Ä­£ŽKåÜTW¡²-W¯t´G+«D`s21;ù¿Úä{¡œx@¡^(‹qƒþ«ˆ?ìHðC_j;‘“’¥Ïà|BºßOÑä:6# r{B˜›’Èy©GqèÅ.ö6û ºC”Æk‹Ë -ó‹‹Ê„²êlŽG™ƒýUª€ÝWŽ¶ZmL¤:`"Ò2 ?¯žì…5½.ð,ô_Ä2®’ý:dïzj²Ì¸<Þ,†‰V²›¿z»;mÇ\†Ò‹² Þôÿ›à+5@;L p6È3IÀ+L»©ÎâãáK| ÐùÃçy`3XôæC[—*Þê?zm¤ãĹ¡°ÞÙYyú,AŸ«ËÓq©¥}ý÷GEðSl(ãþž°ÛÃc¼x[3a_b)ÄÉq“ô!vSÆS’ÞPœP©?GÃD˜…)ÝÁDíÛÙ¥o)pÞØÈC¹Jô­#€•€ Ùõ¥¯ç8x³!¿¦¤AŒ?ÞœÞÊõ¶4o©ŠˆÎJÂÛs[BxLt]U Íùp6¯«|ëɧmyœ&`aÿ”6 t®¡ö¶­pø­Šz'פÁ„‚|Qô¥ €1oÚâ›Ybºž´ ZWD¶jcµŒ¿QÆe©aœ)“ü³r|š?n MÀ”±Ä†žh9Z†Qâ \ŽVÀCø¶®À{?[…ˆàd yµ[µ­K0y²¦­EGV+ß„CÝî×|3Hð4ÚÎî2jêuØ#?K=cÑžæ ñðp>œùÆdb  õHœº½nM«l%:Œ¡H°€Qûz¬æ!F\„D©Bþ,ŸhjÅ”5)Ï £h‡³Ë PvÜ¥>' [ -åìe@<˜åщP.‡î8”+ð͇r9|Lœh02à+Æêcb=È]s‰:€Ö·‘…ßœùãû”ƒ=¢þ{¢\ä{ÀÓÎx}sV='›Ñ„¤€n¶Îa;˜/?¯½Ždñ9q…Œøt]—QfÑ‹ C¨ -Š,x‹C;à N 2¾‡ák¸¾Æçܶj·PÒ£¼ídÖ±Cõ¡~Š M\THrÓÈ`AM}« e­s¶ŒXnS?¼=Ž¿t›G[G¼>¼wîÜ=0aKá6“änogm7³ÐÚ£¸x¤oë[îž»|ð°Þ=ŸÞþÁ㋆:îw ðþý€ûî≯¯Š²ŽñgCXUtrf4§Œ7ÛjèÚH2 )=-ŽKO.(‹Í{Ž”ÛúÂ(€B¯A"¾)³²Jaª©hÀŸýÀæþø ÜÅþ¿èy°†æõ4€/ÃÅp6¾¿Œ×îþë@S½ °;`õ¨«ØÒ#ÞÑ‘jn‹÷Íäˆ:0ª={ºaÓ™x“ P·]ú†¥Q ‡9­:Ó7gwÚü:@î‰×I„Áÿ<ˆ/›(ƒ‰Ë©-J»i€Vgó¶îkn5†'yCô9)"¸*×àû×ìÆüŽ¹â£‰ÖPœó;¾yë‘ð@ni—áºnøj¯D«Ô*›š-øäZ×pKBJ• Á¤ãÖD*Р 1¿Â¨¨b÷)ÓÛï`±¿fÁåX[!þnÿ3 ïwre`¦ -ØÎ}àÜ%àr€u=˜8«’€qtK~‡Î6‰jÆ[È…8Cžu;2vä#$ Ñ³”º ©4gÞ3Œí0#@¿ìÌÊ -ÒOWk'T²_Ž–Pài³ì¤®¦$1R¸(Ïlç?Àÿñå¼£ -endstream -endobj -1279 0 obj -<< -/Length 3048 ->> -stream -0 g 0 G +q +1 0 0 1 360.068 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 363.206 577.576 Td [(desc)]TJ +ET +q +1 0 0 1 384.755 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 387.893 577.576 Td [(type)]TJ 0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G +/F59 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G -BT -/F54 8.9664 Tf 209.77 645.656 Td [(Pr)18(ocess)-250(0)-7729(Pr)18(ocess)-250(1)]TJ -31.696 -10.959 Td [(I)-1333(GLOB\050I\051)-1334(X\050I\051)-4663(I)-1333(GLOB\050I\051)-1333(X\050I\051)]TJ -1.462 -10.959 Td [(1)-4607(1)-1754(1.0)-4500(1)-4107(33)-1753(2.0)]TJ 0 -10.959 Td [(2)-4607(2)-1754(1.0)-4500(2)-4107(34)-1753(2.0)]TJ 0 -10.959 Td [(3)-4607(3)-1754(1.0)-4500(3)-4107(35)-1753(2.0)]TJ 0 -10.959 Td [(4)-4607(4)-1754(1.0)-4500(4)-4107(36)-1753(2.0)]TJ 0 -10.959 Td [(5)-4607(5)-1754(1.0)-4500(5)-4107(37)-1753(2.0)]TJ 0 -10.959 Td [(6)-4607(6)-1754(1.0)-4500(6)-4107(38)-1753(2.0)]TJ 0 -10.959 Td [(7)-4607(7)-1754(1.0)-4500(7)-4107(39)-1753(2.0)]TJ 0 -10.958 Td [(8)-4607(8)-1754(1.0)-4500(8)-4107(40)-1753(2.0)]TJ 0 -10.959 Td [(9)-4607(9)-1754(1.0)-4500(9)-4107(41)-1753(2.0)]TJ -4.483 -10.959 Td [(10)-4107(10)-1754(1.0)-4000(10)-4107(42)-1753(2.0)]TJ 0 -10.959 Td [(11)-4107(11)-1754(1.0)-4000(11)-4107(43)-1753(2.0)]TJ 0 -10.959 Td [(12)-4107(12)-1754(1.0)-4000(12)-4107(44)-1753(2.0)]TJ 0 -10.959 Td [(13)-4107(13)-1754(1.0)-4000(13)-4107(45)-1753(2.0)]TJ 0 -10.959 Td [(14)-4107(14)-1754(1.0)-4000(14)-4107(46)-1753(2.0)]TJ 0 -10.959 Td [(15)-4107(15)-1754(1.0)-4000(15)-4107(47)-1753(2.0)]TJ 0 -10.959 Td [(16)-4107(16)-1754(1.0)-4000(16)-4107(48)-1753(2.0)]TJ 0 -10.959 Td [(17)-4107(17)-1754(1.0)-4000(17)-4107(49)-1753(2.0)]TJ 0 -10.958 Td [(18)-4107(18)-1754(1.0)-4000(18)-4107(50)-1753(2.0)]TJ 0 -10.959 Td [(19)-4107(19)-1754(1.0)-4000(19)-4107(51)-1753(2.0)]TJ 0 -10.959 Td [(20)-4107(20)-1754(1.0)-4000(20)-4107(52)-1753(2.0)]TJ 0 -10.959 Td [(21)-4107(21)-1754(1.0)-4000(21)-4107(53)-1753(2.0)]TJ 0 -10.959 Td [(22)-4107(22)-1754(1.0)-4000(22)-4107(54)-1753(2.0)]TJ 0 -10.959 Td [(23)-4107(23)-1754(1.0)-4000(23)-4107(55)-1753(2.0)]TJ 0 -10.959 Td [(24)-4107(24)-1754(1.0)-4000(24)-4107(56)-1753(2.0)]TJ 0 -10.959 Td [(25)-4107(25)-1754(1.0)-4000(25)-4107(57)-1753(2.0)]TJ 0 -10.959 Td [(26)-4107(26)-1754(1.0)-4000(26)-4107(58)-1753(2.0)]TJ 0 -10.959 Td [(27)-4107(27)-1754(1.0)-4000(27)-4107(59)-1753(2.0)]TJ 0 -10.958 Td [(28)-4107(28)-1754(1.0)-4000(28)-4107(60)-1753(2.0)]TJ 0 -10.959 Td [(29)-4107(29)-1754(1.0)-4000(29)-4107(61)-1753(2.0)]TJ 0 -10.959 Td [(30)-4107(30)-1754(1.0)-4000(30)-4107(62)-1753(2.0)]TJ 0 -10.959 Td [(31)-4107(31)-1754(1.0)-4000(31)-4107(63)-1753(2.0)]TJ 0 -10.959 Td [(32)-4107(32)-1754(1.0)-4000(32)-4107(64)-1753(2.0)]TJ 0 -10.959 Td [(33)-4107(33)-1754(2.0)-4000(33)-4107(25)-1753(1.0)]TJ 0 -10.959 Td [(34)-4107(34)-1754(2.0)-4000(34)-4107(26)-1753(1.0)]TJ 0 -10.959 Td [(35)-4107(35)-1754(2.0)-4000(35)-4107(27)-1753(1.0)]TJ 0 -10.959 Td [(36)-4107(36)-1754(2.0)-4000(36)-4107(28)-1753(1.0)]TJ 0 -10.959 Td [(37)-4107(37)-1754(2.0)-4000(37)-4107(29)-1753(1.0)]TJ 0 -10.958 Td [(38)-4107(38)-1754(2.0)-4000(38)-4107(30)-1753(1.0)]TJ 0 -10.959 Td [(39)-4107(39)-1754(2.0)-4000(39)-4107(31)-1753(1.0)]TJ 0 -10.959 Td [(40)-4107(40)-1754(2.0)-4000(40)-4107(32)-1753(1.0)]TJ + 0 -19.925 Td [(info)]TJ 0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G -/F54 9.9626 Tf 94.641 -105.903 Td [(61)]TJ + 141.968 -397.474 Td [(78)]TJ 0 g 0 G ET endstream endobj -1287 0 obj +1508 0 obj << -/Length 7519 +/Length 5710 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(5.2)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(6.6)-1000(psb)]TJ ET q -1 0 0 1 198.238 706.328 cm +1 0 0 1 147.429 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(ovrl)-250(\227)-250(Overlap)-250(Update)]TJ/F54 9.9626 Tf -51.12 -18.964 Td [(These)-250(subr)18(outines)-250(applies)-250(an)-250(overlap)-250(operator)-250(to)-250(the)-250(input)-250(vector:)]TJ/F52 9.9626 Tf 154.518 -23.824 Td [(x)]TJ/F83 10.3811 Tf 8.097 0 Td [(\040)]TJ/F52 9.9626 Tf 13.497 0 Td [(Q)-42(x)]TJ/F54 9.9626 Tf -176.112 -21.014 Td [(wher)18(e:)]TJ +/F59 11.9552 Tf 151.016 706.129 Td [(cdbldext)-190(\227)-190(Build)-190(an)-190(extended)-190(communication)-190(descrip-)]TJ -24.221 -13.948 Td [(tor)]TJ 0 g 0 G -/F52 9.9626 Tf 0.294 -19.203 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 10.186 0 Td [(is)-250(the)-250(global)-250(dense)-250(submatrix)]TJ/F52 9.9626 Tf 131.351 0 Td [(x)]TJ +/F67 9.9626 Tf -26.9 -19.693 Td [(call)-525(psb_cdbldext\050a,desc_a,nl,desc_out,)-525(info,)-525(extype\051)]TJ/F62 9.9626 Tf 14.944 -23.422 Td [(This)-379(subr)18(outine)-379(builds)-379(an)-379(extended)-379(communication)-379(descriptor)74(,)-411(based)-379(on)]TJ -14.944 -11.955 Td [(the)-428(input)-428(descriptor)]TJ/F67 9.9626 Tf 95.499 0 Td [(desc_a)]TJ/F62 9.9626 Tf 35.646 0 Td [(and)-428(on)-428(the)-428(stencil)-428(speci\002ed)-428(thr)18(ough)-428(the)-427(input)]TJ -131.145 -11.955 Td [(sparse)-250(matrix)]TJ/F67 9.9626 Tf 62.107 0 Td [(a)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ 0 g 0 G - -141.607 -19.564 Td [(Q)]TJ +/F59 9.9626 Tf -67.337 -21.054 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 12.857 0 Td [(is)-250(the)-250(overlap)-250(operator;)-250(it)-250(is)-250(the)-250(composition)-250(of)-250(two)-250(operators)]TJ/F52 9.9626 Tf 271.931 0 Td [(P)]TJ/F52 7.5716 Tf 5.424 -1.494 Td [(a)]TJ/F54 9.9626 Tf 6.445 1.494 Td [(and)]TJ/F52 9.9626 Tf 19.681 0 Td [(P)]TJ/F52 7.5716 Tf 6.405 3.616 Td [(T)]TJ/F54 9.9626 Tf 5.401 -3.616 Td [(.)]TJ +/F59 9.9626 Tf -29.828 -21.429 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G + 0 -21.43 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(A)-250(sparse)-250(matrix)-250(Scope:)]TJ/F59 9.9626 Tf 100.691 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -107.326 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(type.)]TJ 0 g 0 G +/F59 9.9626 Tf -24.907 -21.429 Td [(desc)]TJ ET q -1 0 0 1 230.392 581.71 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +1 0 0 1 120.408 504.147 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F52 9.9626 Tf 236.663 573.142 Td [(x)]TJ/F51 9.9626 Tf 120.622 0 Td [(Subroutine)]TJ +/F59 9.9626 Tf 123.397 503.948 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 230.392 569.356 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +1 0 0 1 309.258 456.326 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 236.369 560.788 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F67 9.9626 Tf 312.397 456.127 Td [(Tspmat)]TJ ET q -1 0 0 1 373.603 560.988 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 344.406 456.326 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 376.592 560.788 Td [(ovrl)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F67 9.9626 Tf 347.544 456.127 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -268.571 -21.43 Td [(nl)]TJ +0 g 0 G +/F62 9.9626 Tf 14.386 0 Td [(the)-250(number)-250(of)-250(additional)-250(layers)-250(desir)18(ed.)]TJ 10.521 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F60 9.9626 Tf 131.102 0 Td [(n)-25(l)]TJ/F91 10.3811 Tf 11.873 0 Td [(\025)]TJ/F62 9.9626 Tf 10.961 0 Td [(0.)]TJ +0 g 0 G +/F59 9.9626 Tf -178.843 -21.43 Td [(extype)]TJ +0 g 0 G +/F62 9.9626 Tf 34.869 0 Td [(the)-250(kind)-250(of)-250(estension)-250(r)18(equir)18(ed.)]TJ -9.962 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 40.677 0 Td [(.)]TJ -64.677 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-345(as:)-501(an)-345(integer)-346(value)]TJ/F67 9.9626 Tf 136.676 0 Td [(psb_ovt_xhal_)]TJ/F62 9.9626 Tf 67.994 0 Td [(,)]TJ/F67 9.9626 Tf 6.169 0 Td [(psb_ovt_asov_)]TJ/F62 9.9626 Tf 67.994 0 Td [(,)-369(default:)]TJ/F67 9.9626 Tf -278.833 -11.955 Td [(psb_ovt_xhal_)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -23.422 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -21.43 Td [(desc)]TJ ET q -1 0 0 1 373.603 549.032 cm +1 0 0 1 120.408 261.018 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 376.592 548.833 Td [(ovrl)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F59 9.9626 Tf 123.397 260.819 Td [(out)]TJ +0 g 0 G +/F62 9.9626 Tf 19.925 0 Td [(the)-250(extended)-250(communication)-250(descriptor)74(.)]TJ -18.52 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 373.603 537.077 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 309.258 213.198 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 376.592 536.878 Td [(ovrl)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F67 9.9626 Tf 312.397 212.998 Td [(desc)]TJ ET q -1 0 0 1 373.603 525.122 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 333.945 213.198 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 376.592 524.923 Td [(ovrl)]TJ -ET -q -1 0 0 1 230.392 521.137 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q +/F67 9.9626 Tf 337.084 212.998 Td [(type)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 278.277 492.758 Td [(T)92(able)-250(18:)-310(Data)-250(types)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G +/F59 9.9626 Tf -258.11 -21.429 Td [(info)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -127.572 -23.549 Td [(call)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -23.422 Td [(Notes)]TJ 0 g 0 G - [-525(psb_ovrl\050x,)-525(desc_a,)-525(info\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ +/F62 9.9626 Tf 166.875 -29.888 Td [(79)]TJ 0 g 0 G - [-525(psb_ovrl\050x,)-525(desc_a,)-525(info,)-525(update)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +ET + +endstream +endobj +1513 0 obj +<< +/Length 1484 +>> +stream 0 g 0 G - [(update_type,)-525(work)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ 0 g 0 G - [(work\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.014 Td [(T)90(ype:)]TJ +BT +/F62 9.9626 Tf 163.158 706.129 Td [(1.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ + [-500(Specifying)]TJ/F67 9.9626 Tf 61.745 0 Td [(psb_ovt_xhal_)]TJ/F62 9.9626 Tf 70.881 0 Td [(for)-290(the)]TJ/F67 9.9626 Tf 32.282 0 Td [(extype)]TJ/F62 9.9626 Tf 34.269 0 Td [(ar)18(gument)-290(the)-289(user)-290(will)-290(obtain)]TJ -186.724 -11.955 Td [(a)-400(descriptor)-400(for)-400(a)-400(domain)-400(partition)-400(in)-400(which)-400(the)-400(additional)-400(layers)-400(ar)18(e)]TJ 0 -11.955 Td [(fetched)-222(as)-221(part)-222(of)-221(an)-222(\050extended\051)-221(halo;)-232(however)-221(the)-222(index-to-pr)18(ocess)-221(map-)]TJ 0 -11.956 Td [(ping)-250(is)-250(identical)-250(to)-250(that)-250(of)-250(the)-250(base)-250(descriptor;)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.564 Td [(On)-250(Entry)]TJ + -12.453 -19.925 Td [(2.)]TJ 0 g 0 G + [-500(Specifying)]TJ/F67 9.9626 Tf 61.745 0 Td [(psb_ovt_asov_)]TJ/F62 9.9626 Tf 70.881 0 Td [(for)-290(the)]TJ/F67 9.9626 Tf 32.282 0 Td [(extype)]TJ/F62 9.9626 Tf 34.269 0 Td [(ar)18(gument)-290(the)-289(user)-290(will)-290(obtain)]TJ -186.724 -11.955 Td [(a)-330(descriptor)-331(with)-330(an)-330(overlapped)-331(decomposition:)-470(the)-331(additional)-330(layer)-330(is)]TJ 0 -11.955 Td [(aggr)18(egated)-326(to)-326(the)-326(local)-326(subdomain)-326(\050and)-326(thus)-326(is)-325(an)-326(overlap\051,)-345(and)-326(a)-326(new)]TJ 0 -11.955 Td [(halo)-250(extending)-250(beyond)-250(the)-250(last)-250(additional)-250(layer)-250(is)-250(formed.)]TJ 0 g 0 G - 0 -19.564 Td [(x)]TJ + 141.968 -524.035 Td [(80)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 89.687 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -79.948 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.742 0 Td [(psb)]TJ -ET -q -1 0 0 1 436.673 349.49 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 439.811 349.291 Td [(T)]TJ ET -q -1 0 0 1 445.669 349.49 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q + +endstream +endobj +1521 0 obj +<< +/Length 5699 +>> +stream +0 g 0 G +0 g 0 G BT -/F59 9.9626 Tf 448.807 349.291 Td [(vect)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(6.7)-1000(psb)]TJ ET q -1 0 0 1 470.356 349.49 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 473.495 349.291 Td [(type)]TJ +/F59 11.9552 Tf 151.016 706.129 Td [(spall)-250(\227)-250(Allocates)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G -/F54 9.9626 Tf -297.884 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(18)]TJ 0 g 0 G - [(.)]TJ +/F67 9.9626 Tf -51.121 -19.277 Td [(call)-525(psb_spall\050a,)-525(desc_a,)-525(info)-525([,)-525(nnz,)-525(dupl,)-525(bldmode]\051)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.564 Td [(desc)]TJ +/F59 9.9626 Tf 0 -22.403 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -20.571 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.572 Td [(desc)]TJ ET q -1 0 0 1 171.218 317.971 cm +1 0 0 1 120.408 623.505 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 317.772 Td [(a)]TJ +/F59 9.9626 Tf 123.397 623.306 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 360.068 270.151 cm +1 0 0 1 309.258 575.684 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 363.206 269.951 Td [(desc)]TJ +/F67 9.9626 Tf 312.397 575.485 Td [(desc)]TJ ET q -1 0 0 1 384.755 270.151 cm +1 0 0 1 333.945 575.684 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 387.893 269.951 Td [(type)]TJ +/F67 9.9626 Tf 337.084 575.485 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -258.11 -20.572 Td [(nnz)]TJ +0 g 0 G +/F62 9.9626 Tf 22.137 0 Td [(An)-230(estimate)-230(of)-230(the)-230(number)-230(of)-231(nonzer)18(oes)-230(in)-230(the)-230(local)-230(part)-230(of)-230(the)-230(assembled)]TJ 2.77 -11.955 Td [(matrix.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -20.572 Td [(dupl)]TJ +0 g 0 G +/F62 9.9626 Tf 26.561 0 Td [(How)-250(to)-250(handle)-250(duplicate)-250(coef)18(\002cients.)]TJ -1.654 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-243(as:)-306(integer)74(,)-244(possible)-243(values:)]TJ/F67 9.9626 Tf 164.941 0 Td [(psb_dupl_ovwrt_)]TJ/F62 9.9626 Tf 78.455 0 Td [(,)]TJ/F67 9.9626 Tf 4.923 0 Td [(psb_dupl_add_)]TJ/F62 9.9626 Tf 67.995 0 Td [(,)]TJ/F67 9.9626 Tf -316.314 -11.955 Td [(psb_dupl_err_)]TJ/F62 9.9626 Tf 67.994 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F59 9.9626 Tf -92.901 -20.572 Td [(bldmode)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -19.564 Td [(update)]TJ +/F62 9.9626 Tf 45.938 0 Td [(Whether)-372(to)-372(kee)1(p)-372(track)-372(of)-372(matrix)-372(entries)-371(that)-372(do)-372(not)-372(belong)-371(to)-372(the)]TJ -21.031 -11.955 Td [(curr)18(ent)-250(pr)18(ocess.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-190(as:)-280(an)-190(integer)-190(value)]TJ/F67 9.9626 Tf 128.287 0 Td [(psb_matbld_noremote_)]TJ/F62 9.9626 Tf 104.607 0 Td [(,)]TJ/F67 9.9626 Tf 4.503 0 Td [(psb_matbld_remote_)]TJ/F62 9.9626 Tf 94.146 0 Td [(.)]TJ -331.543 -11.955 Td [(Default:)]TJ/F67 9.9626 Tf 38.515 0 Td [(psb_matbld_noremote_)]TJ/F62 9.9626 Tf 104.607 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 36.523 0 Td [(Update)-250(operator)74(.)]TJ +/F59 9.9626 Tf -168.029 -22.402 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -11.617 -31.519 Td [(update)-250(=)-250(psb)]TJ +0 g 0 G + 0 -20.572 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(matrix)-250(to)-250(be)-250(allocated.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf -28.343 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf -24 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 235.367 219.067 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 309.258 231.892 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 238.356 218.868 Td [(none)]TJ +/F67 9.9626 Tf 312.397 231.692 Td [(Tspmat)]TJ ET q -1 0 0 1 261.648 219.067 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 344.406 231.892 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 269.619 218.868 Td [(Do)-250(nothing;)]TJ +/F67 9.9626 Tf 347.544 231.692 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -268.571 -20.571 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -22.564 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.454 -20.41 Td [(1.)]TJ +0 g 0 G + [-500(On)-250(exit)-250(fr)18(om)-250(this)-250(r)18(outine)-250(the)-250(sparse)-250(matrix)-250(is)-250(in)-250(the)-250(build)-250(state.)]TJ +0 g 0 G + 154.421 -29.888 Td [(81)]TJ 0 g 0 G -/F51 9.9626 Tf -94.008 -15.579 Td [(update)-250(=)-250(psb)]TJ ET -q -1 0 0 1 235.367 203.488 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + +endstream +endobj +1526 0 obj +<< +/Length 1141 +>> +stream +0 g 0 G +0 g 0 G +0 g 0 G BT -/F51 9.9626 Tf 238.356 203.289 Td [(add)]TJ +/F62 9.9626 Tf 163.158 706.129 Td [(2.)]TJ +0 g 0 G + [-500(The)-250(descriptor)-250(may)-250(be)-250(in)-250(either)-250(the)-250(build)-250(or)-250(assembled)-250(state.)]TJ +0 g 0 G + 0 -19.925 Td [(3.)]TJ +0 g 0 G + [-500(Pr)18(oviding)-219(a)-219(good)-219(estimate)-218(for)-219(the)-219(number)-219(of)-219(nonzer)18(oes)]TJ/F60 9.9626 Tf 255.761 0 Td [(n)-25(n)-25(z)]TJ/F62 9.9626 Tf 18.305 0 Td [(in)-219(the)-219(assem-)]TJ -261.613 -11.956 Td [(bled)-295(matri)1(x)-295(may)-294(substantially)-295(impr)18(ove)-294(performance)-295(in)-294(the)-295(matrix)-294(build)]TJ 0 -11.955 Td [(phase,)-370(as)-346(it)-346(will)-345(r)18(educe)-346(or)-346(eliminate)-346(the)-346(need)-346(for)-345(\050potentially)-346(multiple\051)]TJ 0 -11.955 Td [(data)-250(r)18(eallocations;)]TJ +0 g 0 G + -12.453 -19.925 Td [(4.)]TJ +0 g 0 G + [-500(Using)]TJ/F67 9.9626 Tf 41.798 0 Td [(psb_matbld_remote_)]TJ/F62 9.9626 Tf 97.28 0 Td [(is)-315(likel)1(y)-315(to)-315(cause)-314(a)-315(r)8(untime)-314(over)18(head)-315(at)-314(as-)]TJ -126.625 -11.955 Td [(sembly)-250(time;)]TJ +0 g 0 G + 141.968 -528.02 Td [(82)]TJ +0 g 0 G ET -q -1 0 0 1 256.109 203.488 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + +endstream +endobj +1534 0 obj +<< +/Length 5375 +>> +stream 0 g 0 G -BT -/F54 9.9626 Tf 264.079 203.289 Td [(Sum)-250(overlap)-250(entries,)-250(i.e.)-310(apply)]TJ/F52 9.9626 Tf 137.239 0 Td [(P)]TJ/F52 7.5716 Tf 6.405 3.617 Td [(T)]TJ/F54 9.9626 Tf 5.4 -3.617 Td [(;)]TJ 0 g 0 G -/F51 9.9626 Tf -237.512 -15.579 Td [(update)-250(=)-250(psb)]TJ +BT +/F59 11.9552 Tf 99.895 706.129 Td [(6.8)-1000(psb)]TJ ET q -1 0 0 1 235.367 187.91 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 238.356 187.71 Td [(avg)]TJ +/F59 11.9552 Tf 151.016 706.129 Td [(spins)-233(\227)-233(Insert)-233(a)-233(set)-233(of)-234(coef)18(\002cients)-233(into)-233(a)-233(sparse)-233(matrix)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -51.121 -20.373 Td [(call)-525(psb_spins\050nz,)-525(ia,)-525(ja,)-525(val,)-525(a,)-525(desc_a,)-525(info)-525([,local]\051)]TJ 0 -11.956 Td [(call)-525(psb_spins\050nr,)-525(irw,)-525(irp,)-525(ja,)-525(val,)-525(a,)-525(desc_a,)-525(info)-525([,local]\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -24.099 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -22.835 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -22.834 Td [(nz)]TJ +0 g 0 G +/F62 9.9626 Tf 16.05 0 Td [(the)-250(number)-250(of)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 8.857 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(scalar)74(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.834 Td [(nr)]TJ +0 g 0 G +/F62 9.9626 Tf 14.944 0 Td [(the)-250(number)-250(of)-250(r)18(ows)-250(to)-250(be)-250(inserted.)]TJ 9.963 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(scalar)74(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.834 Td [(irw)]TJ +0 g 0 G +/F62 9.9626 Tf 20.473 0 Td [(the)-250(\002rst)-250(r)18(ow)-250(to)-250(be)-250(inserted.)]TJ 4.434 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(scalar)74(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.834 Td [(ia)]TJ +0 g 0 G +/F62 9.9626 Tf 13.281 0 Td [(the)-250(r)18(ow)-250(indices)-250(of)-250(the)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 11.626 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(size)]TJ/F60 9.9626 Tf 160.8 0 Td [(n)-25(z)]TJ/F62 9.9626 Tf 10.336 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -196.043 -22.834 Td [(irp)]TJ +0 g 0 G +/F62 9.9626 Tf 18.262 0 Td [(the)-250(r)18(ow)-250(pointers)-250(of)-250(the)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 6.645 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(size)]TJ/F60 9.9626 Tf 160.8 0 Td [(n)-15(r)]TJ/F93 10.3811 Tf 11.85 0 Td [(+)]TJ/F62 9.9626 Tf 10.131 0 Td [(1.)]TJ +0 g 0 G +/F59 9.9626 Tf -207.688 -22.835 Td [(ja)]TJ +0 g 0 G +/F62 9.9626 Tf 13.28 0 Td [(the)-250(column)-250(indices)-250(of)-250(the)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 11.627 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(size)]TJ/F60 9.9626 Tf 160.8 0 Td [(n)-25(z)]TJ/F62 9.9626 Tf 10.336 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -196.043 -22.835 Td [(val)]TJ +0 g 0 G +/F62 9.9626 Tf 18.82 0 Td [(the)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 6.087 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-315(as:)-439(an)-314(array)-315(of)-315(size)]TJ/F60 9.9626 Tf 131.853 0 Td [(n)-25(z)]TJ/F62 9.9626 Tf 10.337 0 Td [(.)-504(Must)-314(be)-315(of)-315(the)-314(same)-315(type)-315(and)-314(kind)-315(of)]TJ -142.19 -11.956 Td [(the)-250(coef)18(\002cients)-250(of)-250(the)-250(sparse)-250(matrix)]TJ/F60 9.9626 Tf 157.901 0 Td [(a)]TJ/F62 9.9626 Tf 4.548 0 Td [(.)]TJ +0 g 0 G + -20.481 -29.887 Td [(83)]TJ +0 g 0 G ET -q -1 0 0 1 255.013 187.91 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + +endstream +endobj +1540 0 obj +<< +/Length 6861 +>> +stream +0 g 0 G +0 g 0 G 0 g 0 G BT -/F54 9.9626 Tf 262.983 187.71 Td [(A)92(verage)-250(overlap)-250(entries,)-250(i.e.)-310(apply)]TJ/F52 9.9626 Tf 153.667 0 Td [(P)]TJ/F52 7.5716 Tf 5.424 -1.494 Td [(a)]TJ/F52 9.9626 Tf 4.278 1.494 Td [(P)]TJ/F52 7.5716 Tf 6.405 3.617 Td [(T)]TJ/F54 9.9626 Tf 5.401 -3.617 Td [(;)]TJ -262.547 -19.564 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F52 9.9626 Tf 38.64 0 Td [(u)-80(p)-25(d)-40(a)-25(t)-25(e)]TJ +/F59 9.9626 Tf 150.705 706.129 Td [(desc)]TJ ET q -1 0 0 1 244.034 144.435 cm +1 0 0 1 171.218 706.328 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F52 9.9626 Tf 247.147 144.236 Td [(t)-25(y)-80(p)-25(e)]TJ/F85 10.3811 Tf 21.467 0 Td [(=)]TJ/F52 9.9626 Tf 11.634 0 Td [(p)-25(s)-25(b)]TJ +/F59 9.9626 Tf 174.207 706.129 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.381 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 136.328 0 Td [(psb)]TJ ET q -1 0 0 1 294.938 144.435 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 328.257 658.507 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F52 9.9626 Tf 298.201 144.236 Td [(a)-25(v)-47(g)]TJ +/F67 9.9626 Tf 331.395 658.308 Td [(desc)]TJ ET q -1 0 0 1 314.026 144.435 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 352.944 658.507 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 175.611 132.281 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(integer)-250(variable.)]TJ +/F67 9.9626 Tf 356.083 658.308 Td [(type)]TJ 0 g 0 G - 141.968 -29.888 Td [(62)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -ET - -endstream -endobj -1296 0 obj -<< -/Length 5447 ->> -stream +/F59 9.9626 Tf -226.299 -33.398 Td [(local)]TJ +0 g 0 G +/F62 9.9626 Tf 26.56 0 Td [(Whether)-207(the)-207(entries)-207(in)-207(the)-208(indices)-207(vectors)]TJ/F67 9.9626 Tf 181.487 0 Td [(ia)]TJ/F62 9.9626 Tf 10.46 0 Td [(,)]TJ/F67 9.9626 Tf 4.64 0 Td [(ja)]TJ/F62 9.9626 Tf 12.524 0 Td [(ar)18(e)-207(alr)18(eady)-207(in)-207(local)-208(num-)]TJ -210.765 -11.956 Td [(bering.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -62.187 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(value;)-250(default:)]TJ/F67 9.9626 Tf 162.678 0 Td [(.false.)]TJ/F62 9.9626 Tf 36.613 0 Td [(.)]TJ 0 g 0 G +/F59 9.9626 Tf -224.197 -23.056 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G + 0 -21.444 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(the)-250(matrix)-250(into)-250(which)-250(coef)18(\002cients)-250(will)-250(be)-250(inserted.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf -28.344 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf -24 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 360.068 484.968 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 363.206 484.768 Td [(Tspmat)]TJ +ET +q +1 0 0 1 395.216 484.968 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F51 9.9626 Tf 99.895 706.129 Td [(work)]TJ +/F67 9.9626 Tf 398.354 484.768 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(the)-250(work)-250(array)111(.)]TJ -3.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(one)-250(dimensional)-250(array)-250(of)-250(the)-250(same)-250(type)-250(of)]TJ/F52 9.9626 Tf 252.794 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -282.907 -19.925 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -268.57 -21.443 Td [(desc)]TJ +ET +q +1 0 0 1 171.218 463.524 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 174.207 463.325 Td [(a)]TJ 0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.381 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 136.328 0 Td [(psb)]TJ +ET +q +1 0 0 1 328.257 415.704 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 331.395 415.504 Td [(desc)]TJ +ET +q +1 0 0 1 352.944 415.704 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 356.083 415.504 Td [(type)]TJ 0 g 0 G - 0 -19.925 Td [(x)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(global)-250(dense)-250(r)18(esult)-250(matrix)]TJ/F52 9.9626 Tf 117.084 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(.)]TJ -107.346 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-379(as:)-568(an)-379(array)-379(of)-379(rank)-379(one)-379(or)-379(two)-379(containing)-379(numbers)-379(of)-379(type)]TJ 0 -11.955 Td [(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(18)]TJ +/F59 9.9626 Tf -226.299 -33.398 Td [(info)]TJ 0 g 0 G - [(.)]TJ +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.906 -23.436 Td [(Notes)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.926 Td [(info)]TJ +/F62 9.9626 Tf 12.453 -21.064 Td [(1.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ + [-500(On)-312(entry)-312(to)-312(this)-312(r)18(out)1(ine)-312(the)-312(descriptor)-312(may)-312(be)-312(in)-312(either)-312(the)-311(build)-312(or)-312(as-)]TJ 12.453 -11.955 Td [(sembled)-250(state.)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ + -12.453 -21.443 Td [(2.)]TJ 0 g 0 G - [-500(If)-241(ther)18(e)-240(is)-241(no)-241(overlap)-240(in)-241(the)-241(data)-240(distribution)-241(associated)-241(with)-240(the)-241(descrip-)]TJ 12.453 -11.955 Td [(tor)74(,)-250(no)-250(operations)-250(ar)18(e)-250(performed;)]TJ + [-500(On)-314(entry)-315(to)-314(this)-315(r)18(ou)1(tine)-315(the)-314(sparse)-315(matrix)-314(may)-314(be)-315(in)-314(either)-314(the)-315(build)-314(or)]TJ 12.453 -11.955 Td [(update)-250(state.)]TJ 0 g 0 G - -12.453 -19.926 Td [(2.)]TJ + -12.453 -21.444 Td [(3.)]TJ 0 g 0 G - [-500(The)-284(operator)]TJ/F52 9.9626 Tf 72.855 0 Td [(P)]TJ/F52 7.5716 Tf 6.405 3.617 Td [(T)]TJ/F54 9.9626 Tf 8.232 -3.617 Td [(performs)-284(the)-284(r)18(eduction)-285(sum)-284(of)-284(overlap)-284(elements;)-302(it)-284(is)-284(a)]TJ -75.039 -11.955 Td [(\223pr)18(olongation\224)-265(operator)]TJ/F52 9.9626 Tf 110.535 0 Td [(P)]TJ/F52 7.5716 Tf 6.405 3.616 Td [(T)]TJ/F54 9.9626 Tf 8.044 -3.616 Td [(that)-265(r)18(eplicates)-266(ov)1(erlap)-266(elements,)-269(accounting)]TJ -124.984 -11.955 Td [(for)-250(the)-250(physical)-250(r)18(eplication)-250(of)-250(data;)]TJ + [-500(If)-263(the)-263(descriptor)-263(is)-262(in)-263(the)-263(build)-263(state,)-266(then)-263(the)-263(sparse)-263(matrix)-262(must)-263(also)-263(be)]TJ 12.453 -11.955 Td [(in)-212(the)-212(build)-213(state;)-224(the)-213(action)-212(of)-212(the)-212(r)18(outine)-212(is)-213(to)-212(\050implicitly\051)-212(call)]TJ/F67 9.9626 Tf 271.732 0 Td [(psb_cdins)]TJ/F62 9.9626 Tf -271.732 -11.955 Td [(to)-259(add)-259(entries)-259(to)-259(the)-259(sparsity)-259(pattern;)-263(each)-259(sparse)-259(matrix)-259(entry)-259(implicitly)]TJ 0 -11.955 Td [(de\002nes)-288(a)-288(graph)-288(edge,)-297(that)-288(is)-288(passed)-288(to)-288(the)-288(descriptor)-288(r)18(outine)-288(for)-288(the)-288(ap-)]TJ 0 -11.955 Td [(pr)18(opriate)-250(pr)18(ocessing;)]TJ 0 g 0 G - -12.453 -19.925 Td [(3.)]TJ + -12.453 -21.444 Td [(4.)]TJ 0 g 0 G - [-500(The)-190(operator)]TJ/F52 9.9626 Tf 70.978 0 Td [(P)]TJ/F52 7.5716 Tf 5.423 -1.495 Td [(a)]TJ/F54 9.9626 Tf 5.848 1.495 Td [(performs)-190(a)-190(scaling)-190(on)-190(the)-190(overlap)-190(elements)-190(by)-190(the)-190(amount)]TJ -69.796 -11.956 Td [(of)-325(r)18(eplication;)-363(thus,)-343(when)-325(combined)-325(with)-325(the)-325(r)18(eduction)-325(operator)74(,)-344(it)-325(im-)]TJ 0 -11.955 Td [(plements)-250(the)-250(average)-250(of)-250(r)18(eplicated)-250(elements)-250(over)-250(all)-250(of)-250(their)-250(instances.)]TJ/F51 11.9552 Tf -24.907 -19.925 Td [(Example)-320(of)-320(use)]TJ/F54 9.9626 Tf 87.879 0 Td [(Consider)-320(the)-320(discr)18(etization)-320(mesh)-320(depicted)-320(in)-320(\002g.)]TJ -0 0 1 rg 0 0 1 RG - [-320(4)]TJ + [-500(The)-250(input)-250(data)-250(can)-250(be)-250(passed)-250(in)-250(either)-250(COO)-250(or)-250(CSR)-250(formats;)]TJ 0 g 0 G - [(,)-337(parti-)]TJ -87.879 -11.955 Td [(tioned)-262(among)-262(two)-263(pr)18(ocesse)1(s)-263(as)-262(shown)-262(by)-262(the)-262(dashed)-263(li)1(nes,)-266(with)-262(an)-262(overlap)-262(of)]TJ 0 -11.955 Td [(1)-261(extr)1(a)-261(layer)-260(with)-261(r)18(espect)-260(to)-261(the)-260(partition)-261(of)-260(\002g.)]TJ -0 0 1 rg 0 0 1 RG - [-261(3)]TJ + 0 -21.443 Td [(5.)]TJ 0 g 0 G - [(;)-265(the)-261(data)-260(distribution)-261(is)-260(such)]TJ 0 -11.956 Td [(that)-267(each)-268(pr)18(ocess)-267(will)-267(own)-267(40)-268(entries)-267(in)-267(the)-267(index)-268(space,)-271(with)-267(an)-268(overlap)-267(of)-267(16)]TJ 0 -11.955 Td [(entries)-249(placed)-248(at)-249(local)-249(i)1(ndices)-249(25)-249(thr)18(ough)-248(40;)-249(the)-249(halo)-249(will)-248(r)8(un)-249(fr)18(om)-249(local)-248(index)]TJ 0 -11.955 Td [(41)-236(thr)18(ough)-237(local)-236(index)-237(48..)-305(If)-236(pr)18(ocess)-237(0)-236(assigns)-237(an)-236(initial)-236(value)-237(of)-236(1)-236(to)-237(its)-236(entries)]TJ 0 -11.955 Td [(in)-259(the)]TJ/F52 9.9626 Tf 27.963 0 Td [(x)]TJ/F54 9.9626 Tf 7.782 0 Td [(vector)74(,)-261(and)-258(pr)18(ocess)-259(1)-259(assigns)-258(a)-259(value)-259(of)-258(2,)-261(then)-259(after)-258(a)-259(call)-259(to)]TJ/F59 9.9626 Tf 266.124 0 Td [(psb_ovrl)]TJ/F54 9.9626 Tf -301.869 -11.955 Td [(with)]TJ/F59 9.9626 Tf 22.816 0 Td [(psb_avg_)]TJ/F54 9.9626 Tf 44.404 0 Td [(and)-257(a)-257(call)-257(to)]TJ/F59 9.9626 Tf 55.983 0 Td [(psb_halo_)]TJ/F54 9.9626 Tf 49.635 0 Td [(the)-257(contents)-257(of)-257(the)-257(local)-257(vectors)-257(will)-258(b)1(e)]TJ -172.838 -11.955 Td [(the)-250(following)-250(\050showing)-250(a)-250(transition)-250(among)-250(the)-250(two)-250(subdomains\051)]TJ + [-500(In)-307(COO)-307(format)-307(the)-306(coef)18(\002cients)-307(to)-307(be)-307(inserted)-307(ar)18(e)-307(r)18(epr)18(esented)-306(by)-307(the)-307(or)18(-)]TJ 12.453 -11.955 Td [(der)18(ed)-194(triples)]TJ/F60 9.9626 Tf 57.352 0 Td [(i)-47(a)]TJ/F93 10.3811 Tf 7.911 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(,)]TJ/F60 9.9626 Tf 4.624 0 Td [(j)-40(a)]TJ/F93 10.3811 Tf 7.84 0 Td [(\050)]TJ/F60 9.9626 Tf 4.205 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 4.149 0 Td [(,)]TJ/F60 9.9626 Tf 4.276 0 Td [(v)-40(a)-25(l)]TJ/F93 10.3811 Tf 13.37 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(,)-205(for)]TJ/F60 9.9626 Tf 19.208 0 Td [(i)]TJ/F93 10.3811 Tf 5.856 0 Td [(=)]TJ/F62 9.9626 Tf 10.961 0 Td [(1,)-179(.)-192(.)-191(.)-180(,)]TJ/F60 9.9626 Tf 26.608 0 Td [(n)-25(z)]TJ/F62 9.9626 Tf 10.337 0 Td [(;)-212(these)-194(triples)-194(ar)18(e)-193(arbitrary;)]TJ 0 g 0 G - 166.875 -143.462 Td [(63)]TJ + -60.701 -29.888 Td [(84)]TJ 0 g 0 G ET endstream endobj -1304 0 obj +1549 0 obj << -/Length 3551 +/Length 4535 >> stream 0 g 0 G 0 g 0 G 0 g 0 G -0 g 0 G -0 g 0 G BT -/F54 7.9701 Tf 265.805 653.177 Td [(Pr)18(ocess)-250(0)-8396(Pr)18(ocess)-250(1)]TJ -31.163 -9.464 Td [(I)-1500(GLOB\050I\051)-1500(X\050I\051)-5163(I)-1500(GLOB\050I\051)-1500(X\050I\051)]TJ -1.299 -9.465 Td [(1)-4774(1)-1920(1.0)-5000(1)-4274(33)-1920(1.5)]TJ 0 -9.464 Td [(2)-4774(2)-1920(1.0)-5000(2)-4274(34)-1920(1.5)]TJ 0 -9.465 Td [(3)-4774(3)-1920(1.0)-5000(3)-4274(35)-1920(1.5)]TJ 0 -9.464 Td [(4)-4774(4)-1920(1.0)-5000(4)-4274(36)-1920(1.5)]TJ 0 -9.465 Td [(5)-4774(5)-1920(1.0)-5000(5)-4274(37)-1920(1.5)]TJ 0 -9.464 Td [(6)-4774(6)-1920(1.0)-5000(6)-4274(38)-1920(1.5)]TJ 0 -9.465 Td [(7)-4774(7)-1920(1.0)-5000(7)-4274(39)-1920(1.5)]TJ 0 -9.464 Td [(8)-4774(8)-1920(1.0)-5000(8)-4274(40)-1920(1.5)]TJ 0 -9.465 Td [(9)-4774(9)-1920(1.0)-5000(9)-4274(41)-1920(2.0)]TJ -3.985 -9.464 Td [(10)-4274(10)-1920(1.0)-4500(10)-4274(42)-1920(2.0)]TJ 0 -9.465 Td [(11)-4274(11)-1920(1.0)-4500(11)-4274(43)-1920(2.0)]TJ 0 -9.464 Td [(12)-4274(12)-1920(1.0)-4500(12)-4274(44)-1920(2.0)]TJ 0 -9.465 Td [(13)-4274(13)-1920(1.0)-4500(13)-4274(45)-1920(2.0)]TJ 0 -9.464 Td [(14)-4274(14)-1920(1.0)-4500(14)-4274(46)-1920(2.0)]TJ 0 -9.465 Td [(15)-4274(15)-1920(1.0)-4500(15)-4274(47)-1920(2.0)]TJ 0 -9.464 Td [(16)-4274(16)-1920(1.0)-4500(16)-4274(48)-1920(2.0)]TJ 0 -9.465 Td [(17)-4274(17)-1920(1.0)-4500(17)-4274(49)-1920(2.0)]TJ 0 -9.464 Td [(18)-4274(18)-1920(1.0)-4500(18)-4274(50)-1920(2.0)]TJ 0 -9.465 Td [(19)-4274(19)-1920(1.0)-4500(19)-4274(51)-1920(2.0)]TJ 0 -9.464 Td [(20)-4274(20)-1920(1.0)-4500(20)-4274(52)-1920(2.0)]TJ 0 -9.465 Td [(21)-4274(21)-1920(1.0)-4500(21)-4274(53)-1920(2.0)]TJ 0 -9.464 Td [(22)-4274(22)-1920(1.0)-4500(22)-4274(54)-1920(2.0)]TJ 0 -9.465 Td [(23)-4274(23)-1920(1.0)-4500(23)-4274(55)-1920(2.0)]TJ 0 -9.464 Td [(24)-4274(24)-1920(1.0)-4500(24)-4274(56)-1920(2.0)]TJ 0 -9.465 Td [(25)-4274(25)-1920(1.5)-4500(25)-4274(57)-1920(2.0)]TJ 0 -9.464 Td [(26)-4274(26)-1920(1.5)-4500(26)-4274(58)-1920(2.0)]TJ 0 -9.465 Td [(27)-4274(27)-1920(1.5)-4500(27)-4274(59)-1920(2.0)]TJ 0 -9.464 Td [(28)-4274(28)-1920(1.5)-4500(28)-4274(60)-1920(2.0)]TJ 0 -9.465 Td [(29)-4274(29)-1920(1.5)-4500(29)-4274(61)-1920(2.0)]TJ 0 -9.464 Td [(30)-4274(30)-1920(1.5)-4500(30)-4274(62)-1920(2.0)]TJ 0 -9.465 Td [(31)-4274(31)-1920(1.5)-4500(31)-4274(63)-1920(2.0)]TJ 0 -9.464 Td [(32)-4274(32)-1920(1.5)-4500(32)-4274(64)-1920(2.0)]TJ 0 -9.465 Td [(33)-4274(33)-1920(1.5)-4500(33)-4274(25)-1920(1.5)]TJ 0 -9.464 Td [(34)-4274(34)-1920(1.5)-4500(34)-4274(26)-1920(1.5)]TJ 0 -9.465 Td [(35)-4274(35)-1920(1.5)-4500(35)-4274(27)-1920(1.5)]TJ 0 -9.464 Td [(36)-4274(36)-1920(1.5)-4500(36)-4274(28)-1920(1.5)]TJ 0 -9.465 Td [(37)-4274(37)-1920(1.5)-4500(37)-4274(29)-1920(1.5)]TJ 0 -9.464 Td [(38)-4274(38)-1920(1.5)-4500(38)-4274(30)-1920(1.5)]TJ 0 -9.465 Td [(39)-4274(39)-1920(1.5)-4500(39)-4274(31)-1920(1.5)]TJ 0 -9.464 Td [(40)-4274(40)-1920(1.5)-4500(40)-4274(32)-1920(1.5)]TJ 0 -9.465 Td [(41)-4274(41)-1920(2.0)-4500(41)-4274(17)-1920(1.0)]TJ 0 -9.464 Td [(42)-4274(42)-1920(2.0)-4500(42)-4274(18)-1920(1.0)]TJ 0 -9.465 Td [(43)-4274(43)-1920(2.0)-4500(43)-4274(19)-1920(1.0)]TJ 0 -9.464 Td [(44)-4274(44)-1920(2.0)-4500(44)-4274(20)-1920(1.0)]TJ 0 -9.465 Td [(45)-4274(45)-1920(2.0)-4500(45)-4274(21)-1920(1.0)]TJ 0 -9.464 Td [(46)-4274(46)-1920(2.0)-4500(46)-4274(22)-1920(1.0)]TJ 0 -9.465 Td [(47)-4274(47)-1920(2.0)-4500(47)-4274(23)-1920(1.0)]TJ 0 -9.464 Td [(48)-4274(48)-1920(2.0)-4500(48)-4274(24)-1920(1.0)]TJ -0 g 0 G -0 g 0 G -/F54 9.9626 Tf 88.221 -98.979 Td [(64)]TJ -0 g 0 G -ET - -endstream -endobj -1308 0 obj -<< -/Length 321 ->> -stream +/F62 9.9626 Tf 112.349 706.129 Td [(6.)]TJ 0 g 0 G + [-500(In)-272(CSR)-271(format)-272(the)-271(coef)18(\002cients)-272(to)-271(be)-272(inserted)-272(for)-271(each)-272(input)-271(r)18(ow)]TJ/F60 9.9626 Tf 294.598 0 Td [(i)]TJ/F93 10.3811 Tf 6.254 0 Td [(=)]TJ/F62 9.9626 Tf 11.36 0 Td [(1,)]TJ/F60 9.9626 Tf 9.257 0 Td [(n)-15(r)]TJ/F62 9.9626 Tf -309.016 -11.955 Td [(ar)18(e)-311(r)18(epr)18(esented)-312(by)-311(the)-311(or)18(der)18(ed)-312(triples)]TJ/F93 10.3811 Tf 171.689 0 Td [(\050)]TJ/F60 9.9626 Tf 4.205 0 Td [(i)]TJ/F93 10.3811 Tf 5.251 0 Td [(+)]TJ/F60 9.9626 Tf 10.413 0 Td [(i)-22(r)-35(w)]TJ/F91 10.3811 Tf 16.818 0 Td [(\000)]TJ/F62 9.9626 Tf 10.358 0 Td [(1)]TJ/F93 10.3811 Tf 5.106 0 Td [(\051)]TJ/F62 9.9626 Tf 4.149 0 Td [(,)]TJ/F60 9.9626 Tf 4.624 0 Td [(j)-40(a)]TJ/F93 10.3811 Tf 7.841 0 Td [(\050)]TJ/F60 9.9626 Tf 4.622 0 Td [(j)]TJ/F93 10.3811 Tf 3.019 0 Td [(\051)]TJ/F62 9.9626 Tf 4.149 0 Td [(,)]TJ/F60 9.9626 Tf 4.276 0 Td [(v)-40(a)-25(l)]TJ/F93 10.3811 Tf 13.37 0 Td [(\050)]TJ/F60 9.9626 Tf 4.622 0 Td [(j)]TJ/F93 10.3811 Tf 3.019 0 Td [(\051)]TJ/F62 9.9626 Tf 4.149 0 Td [(,)-327(for)]TJ/F60 9.9626 Tf 22.013 0 Td [(j)]TJ/F93 10.3811 Tf 6.917 0 Td [(=)]TJ/F60 9.9626 Tf -310.555 -11.955 Td [(i)-22(r)-90(p)]TJ/F93 10.3811 Tf 12.991 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(,)-179(.)-192(.)-191(.)-180(,)]TJ/F60 9.9626 Tf 21.557 0 Td [(i)-22(r)-90(p)]TJ/F93 10.3811 Tf 12.991 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 5.301 0 Td [(+)]TJ/F62 9.9626 Tf 10.407 0 Td [(1)]TJ/F93 10.3811 Tf 5.106 0 Td [(\051)]TJ/F91 10.3811 Tf 6.486 0 Td [(\000)]TJ/F62 9.9626 Tf 10.407 0 Td [(1;)-362(these)-325(triples)-324(should)-325(belong)-325(to)-324(the)-325(curr)18(ent)-325(pr)18(o-)]TJ -100.947 -11.956 Td [(cess,)-276(i.e.)]TJ/F60 9.9626 Tf 39.307 0 Td [(i)]TJ/F93 10.3811 Tf 5.103 0 Td [(+)]TJ/F60 9.9626 Tf 10.263 0 Td [(i)-22(r)-35(w)]TJ/F91 10.3811 Tf 16.669 0 Td [(\000)]TJ/F62 9.9626 Tf 10.209 0 Td [(1)-271(should)-271(be)-271(one)-271(of)-271(the)-271(local)-270(indices,)-277(but)-271(ar)18(e)-270(otherwise)]TJ -81.551 -11.955 Td [(arbitrary;)]TJ 0 g 0 G + -12.453 -19.925 Td [(7.)]TJ 0 g 0 G + [-500(Ther)18(e)-315(is)-314(no)-315(r)18(equir)18(ement)-314(that)-315(a)-315(given)-314(r)18(ow)-315(must)-315(be)-314(passed)-315(in)-315(its)-314(entir)18(ety)]TJ 12.453 -11.955 Td [(to)-298(a)-299(single)-298(call)-298(to)-299(thi)1(s)-299(r)18(outine:)-406(the)-299(buildup)-298(of)-298(a)-299(r)18(ow)-298(may)-298(be)-299(split)-298(into)-298(as)]TJ 0 -11.955 Td [(many)-250(calls)-250(as)-250(desir)18(ed)-250(\050even)-250(in)-250(the)-250(CSR)-250(format\051;)]TJ 0 g 0 G + -12.453 -19.926 Td [(8.)]TJ 0 g 0 G -1 0 0 1 104.053 292.88 cm -q -.65 0 0 .65 0 0 cm -q -1 0 0 1 0 0 cm -/Im5 Do -Q -Q + [-500(Coef)18(\002cients)-288(fr)18(om)-289(dif)18(fer)18(ent)-288(r)18(ows)-288(may)-289(also)-288(be)-288(mixed)-289(up)-288(fr)18(eely)-288(in)-289(a)-288(single)]TJ 12.453 -11.955 Td [(call,)-250(accor)18(ding)-250(to)-250(the)-250(application)-250(needs;)]TJ 0 g 0 G -1 0 0 1 -104.053 -292.88 cm -BT -/F54 9.9626 Tf 189.276 261 Td [(Figur)18(e)-250(4:)-310(Sample)-250(discr)18(etization)-250(mesh.)]TJ + -12.453 -19.925 Td [(9.)]TJ 0 g 0 G + [-500(Coef)18(\002cients)-190(fr)18(om)-190(matrix)-190(r)18(ows)-190(not)-190(owned)-190(by)-190(the)-190(calling)-190(pr)18(ocess)-190(ar)18(e)-190(tr)18(eated)]TJ 12.453 -11.955 Td [(accor)18(ding)-254(to)-254(the)-253(value)-254(of)]TJ/F67 9.9626 Tf 111.539 0 Td [(bldmode)]TJ/F62 9.9626 Tf 39.141 0 Td [(speci\002ed)-254(at)-253(allocation)-254(time;)-256(if)]TJ/F67 9.9626 Tf 131.512 0 Td [(bldmode)]TJ/F62 9.9626 Tf -282.192 -11.956 Td [(was)-300(chosen)-300(as)]TJ/F67 9.9626 Tf 66.146 0 Td [(psb_matbld_remote_)]TJ/F62 9.9626 Tf 97.136 0 Td [(the)-300(library)-300(will)-300(keep)-300(track)-301(of)-300(them,)]TJ -163.282 -11.955 Td [(otherwise)-250(they)-250(ar)18(e)-250(silently)-250(ignor)18(ed;)]TJ 0 g 0 G + -17.435 -19.925 Td [(10.)]TJ 0 g 0 G - 77.494 -170.562 Td [(65)]TJ + [-500(If)-295(the)-294(descriptor)-295(is)-295(i)1(n)-295(the)-295(assembled)-294(state,)-306(then)-295(any)-294(entries)-295(in)-295(the)-294(sparse)]TJ 17.435 -11.955 Td [(matrix)-284(that)-284(would)-284(generate)-284(additional)-284(communication)-284(r)18(equir)18(ements)-284(ar)18(e)]TJ 0 -11.955 Td [(ignor)18(ed;)]TJ 0 g 0 G -ET - -endstream -endobj -1291 0 obj -<< -/Type /XObject -/Subtype /Form -/FormType 1 -/PTEX.FileName (./figures/try8x8_ov.pdf) -/PTEX.PageNumber 1 -/PTEX.InfoDict 1310 0 R -/BBox [0 0 516 439] -/Resources << -/ProcSet [ /PDF /Text ] -/ExtGState << -/R7 1311 0 R ->>/Font << /R8 1312 0 R/R10 1313 0 R>> ->> -/Length 3413 -/Filter /FlateDecode ->> -stream -xœ…›Aä¸ …ïõ+ê8s˜^Ë’-é SvÒ·Ážj‘éÃ&‡üýØä{|ÔI°‡¦(ÚõmÑ|%ZšßŸÛ[yn÷øûúxüôµ?û×£½Ï?lîùõO³ögïíùaV+íùÇ_Í.µÐ=äÝ¥Mù÷:éß·šü³†ÿèòןZKúضÅÇÖŸ[J'¥™Ä´8ÍžùºMTÕmÂúÈiÝ&®ÀëvÉ ö™@î%o3ï‘o{æ ˆlv ß#"›È6²Ù|ˆlvÉD¶"ï×Õ@6“È6²Ù3_d·‰l# »Md9²ÛD¶Ý.È>È}òmò= òmÏ|‘Íä{Dd³ÙF@6;ï‘Í.€È6CäZ£âÌ$² €löÌÙm"ÛÈnÙGŽì6‘md·K²Ïòˆò33‡ÊÏì™/ òÈåg#"\~>òÈåg#"\~N³ [‹ò3“È6²Ù3_d·‰l# »Md9²ÛD¶Ý.È>ßòãÐœqhÌtÌÔ3͇ÆL9óÌ.‡æL€CÚ ½·Ðf4f :f š37´fj‹ƒŽƒæL9óÌ.ƒŽ™.ƒÚ ]f h³33Í™Z3µÅŒAÇŒAs¦œyfAÇL€A Í › -ÑlA7bÌ8tS!j¦¶˜qè– ‘3åÌ3»º¥BäÄ– z¨G.á*¹‡jpäªÀ‘ p¨þF.¿¡ê©ø†jo,¥—*o)¼TwKÙ¥ª[Š.ÕÜRr©â–‚Kõ–Ë-U[rCïÌÜŽöVŸç8ßúV]¾c­úz£¿íü½?ZÄ”à+ÄÌ£¾õgD_œßå6^¸û=t‹–×ãûªjn–Œf¿9±¼š ñ…ÖÑêY2šý&Å -\h6´Oõø`Ih®]¦³g4û¡ŒŽ@h6¼!PN¶õ­à»7#¡ÝCèæwù¾*”›uA«H… íšÇMGÛšÑì×?ú¡ÙÐ>ÕãƒEhÐ!3ÇÌh¶$‰¾Kh6¼!P­Ì¹ müî·º ÝC˜DÃ]¾¯jcf]ÊÀ–QÑ)&´Š2@üAZ[ÊÀÖYÑM -͆e€»|_ôŬ롾oÆ‹;2Ñ—"è¨F:çš÷#}]Ëè‰/~.%0QŒ&Gª@_©v&a}þùøwQU=þ}¥j”ž²jþ.ªÿ²®3Ûõ©ìûu¹Ë–:Ù×ÕÉîý 2çË=ÄÇÒïÅe ¥†ñ·p¡?·TòþæwžÄÇ•šÛ%ñ™v©>÷ûÊ®ôÈm—)Æßê¥.=øÜoŸ‹øàÉ|X”Ù"È• -×›€éíAð¹ßÖCˆù\«ÈW˜hùÌï ]|Ûš_®¿Ü®™¯2/7ò™ß×k5ø Ln»`1þ` p¼¿ùís<‰K-³]³p½I™ÞØŸûm•ƒøX¦½¸d›™o‹<¸Ê‘ÏüÎ0ƒ/ÉܲÜ2»æú0=Ó«&ñUÖãcEF¾–ëÃMo£‚ÏýΠúHZ—×XfB¿p}gZz®ŽÎâˆh®ÃGCüdf®ÉÒˆh²äÒH•Ñ´=2²FÔEOh5ÕE_ÐZèSYd…UÑZR½Ôçº ë§T/^†]·=O©Þ©UŸ:á»bÏâMÝüŸg÷7¿3x|ð$>ªžÛ%ñ¹ºÅk¼àsÿ©¥Ÿš^ðQÅoê/ú‚Ïýö¹ˆžÌÕ³¦*æ×»ºÅ Èàsÿ©õŸú[òAÅÀW˜ªøÌï ]|Ûš_ªžÛ5óUæ…ª>óŸZª•U ñó@ÕÃýÍoŸ‹øàI|T=³¡b~½«[¼ô >÷ŸZ ªk}±ƒ™o‹<@õÀg~g˜Á—U/µŸn×\®nñ¶Z|•õÁøS+3ع>\Ýâ…vð¹ßTYõÔqºIóë;ÓÒsutGDŸ±@33tÌã'“0smL–FD“%—FªŒ¾ í‘‘¥0¢.zB«©.ú‚ÖBŸÊ"+¬ŠžÐ¤zË{¿ª^t¯[¨žÞ§¿û¾…êùÛ@Ä­Ñ`[–«v ,«¼¿ù«º]ñ$>¾È«jHq½©›v‚ÏýEkC½TŒñ·ºi¯ øÜ_ÕòŠ'óá]UWŠëMÝ´‡|î/Zê}ù\ÅÈW˜¨ùÌ_Õ÷Š'ññõ\UkJ¾Ê¼@õÈgþ¢µ¡^í*Æøƒy€êñþæ¯j~Å“øø&®ª?Åõ¦nÚ7 ->÷­ õïÅ7z3óm‘W=ò™¿ªOækª¨ù*óÕ#_e}0¾h;ׇ©›öÄ‚ÏýUm°xßPyPÇp}gZz®ŽÎâˆè«43©cˆŸLÂ̵1Y][V½üέ¦†5½xò–Ï]Oh5ÕE_ÐZèSYd…UÑZR½´â6Tl4©^lÉ]·íMª×µ6ÔNÉ‹»&ž%Ä›ºõ)ÕÃýÍ?Ô‹'ñQõ†:V\ïꛉÁ¿Ö†ÚU ñ¦n±Ý|îêˆÅ“ù zC+®wu‹mÐàƒ_kCí*¾Â\Ýb[=øÜ?Ô‹'ñ •Gè˜_ß™–ž«£³8"ºÇ*ÍÌÐ1ŸLÂ̵1Y=Æ¢zÚð¬÷mŒ,…uÑZMuÑ´úTYaUô„&Õ[ö…›:ÖZG¨žvõ_ºP=ß-Fü®5lËãouÃYË*ïoþ¦ŽX<‰½M+®7uÓy„àsÿ®µ¡6‰Ácü­n:±|îoêˆÅ“ù°§ÛÔ±âzS7¤>÷ïZj?˜|®bä+ÌT|æoêˆÅ“ø¸}ÛÔ±’¯2/P=ò™×ÚP[¿àƒŠ1þ` z¼¿ù›:bñ$>îÔ6u¬¸ÞÔM§W‚Ïý»Ö†Úå}qÇwf¾-òàªG>ó7uÄâÉ|Mõ#_e^ z䫬ÆïZ£ÁÎõaꦓ9Áçþ¦ŽX<‰o¨<¨c¸¾3-=WGgqDô«43©cˆŸLÂ̵1YÝö¬zy?¶¥ž5½xò–Ï]Oh5ÕE_ÐZèSYd…UÑZR½tNÆm¨ØÔn†]·ÚÍðÓ3ˆïZ£Áö, ífณguj7 ?ÏUõÒ‘Ø%ñ¹º ífàóÜßµ6Ô¡ðQņv3tè)øÜ?Õ‹'óAõ¦:V\ïê6´›Ïs×ÚPçcÈÚÍÐq-ñ™ª#Oâ£êMu¬ä«Ì Uoh7Ç_jðQõºÖnŒ?˜ªÞÔn†.â繪^:Óâ6Tlj7Cà‚Ïý]kCzyñÌÌ|[äª7µ›†|YõÒñ·k®W·¡Ý òUÖã»Öh°s}¸º ífðþõÁøy®ª§“*nRÇfìfÄàë,Žˆî±J33tlÄnFI ¶ÉÒˆèy.ª§Ó)žõ¾ í‘‘¥0¢.zB«©.ú‚ÖBŸÊ"+¬ŠžÐ¤zíÐn†Û®b­i7Cg _Vµ›á§ _µFƒmYbü­n81iYåýͨ#OâãAÀC+®7uÓ©ÈàsÕÚP‡Ácü­n:7|î?Ô‹'óáÌß¡Ž×›ºé®?Œ÷Æå‡ñ¶ŽùÃxü0î÷øúz~ÙëDõ¹žDí‡E]Ó×…aXŽí!Í>øïŸ×düoõ–‘¿Ö«\¯ç­ÜýÌ•çë»ýô—w-ÿ/Iÿõv×ï!o'ÈŸÿ`[G. -endstream -endobj -1316 0 obj -<< -/Filter /FlateDecode -/Subtype /Type1C -/Length 13073 ->> -stream -xœºwxWö?laÏŒ˜ r‘G¶5h†ôB'ZBïL·1`pø˶$K–eK²%«Yr•-˽w Lï%„ ”$$$¤m²›Æî{½ûýÈ–ßû<ïûýãõ<~4£¹ºsçÜs>çs -ÏÇo”dz%!)&cÒ攤}ÉÞ뉚ç7Ê#ñ-VyRŸ/Å<ÌŸâ1¾ÅcüNŽc~G¿8ŽfüÇÑA>~<Þ¬5›íonݼý­ &.II•§'ÄÅKÇOŸ:mÆøhùøÝ¿4&#!.yüëÜIVÌ¡”Ô¤˜déú„¤èÌŒñ/ž<~sL\æ¡}éÿúøïDÿÿ¦æùsïæ³Ý§•À‹åý<*Åw¬ïjßM¾}~´_¤_¬_;ÆÃæbë±d,ëÆð`|<þ -¾—ãýø)üþ þÿ'‘G˜;?€Ì·ò«ø®ÑüÑ£•£¿'7ÇÉÈûà=°,Ù ¨­c|Æ?æ1³ÆèÆÜ;nìkcç]4vÍØð±ÛÆÆ-ûpì+ˆ¤ -ä‚|ApQð‘àgÁßÿôçû ü—û¯ößà¿Í¿Ø¿Ä¿É¿Ó¿×ÿI€0`I@x€2   0 $àTÀÕ€Û¾ ôd_ |#p]`Z`f 5°.ðbàõÀ? šô~Ðò ¸ Â ª Ú æ Ž ž þ ÓA烾 ú6è7¡¿P('|S8[ø¾p±p¥p¿0Gh– -Â&a‡°GxEø¡ð¡ð ! ö ‡³Á¯O^¼*8|=øqð“à?ÿüwʇR¯Ss¨ÔJjµ‹ÚO¤”TUBÕPõT'u’ºI=¦þLý&%"Eþ¢0ÑK¢·DSE³DóEËD«EE[D¢ƒ"¹¨HdÙEu¢ÑIÑ%Ñ ÑmÑ}ÑW¢ŸE#!£CD!lÈÄi!³C„, Y²1$2$>D’b ) q†4„´†t‡9r!äÇ!C~ù-…ú…Ž„ -Sðˆ—¢ÒÑ+F~*þŠÅhÃb8™øÂP©®œÃo€J)®ž«RÏ2ðÑd˜Ž·›¬ÖVÒ8äܪêéü&¼¾‚´Êð»úŠ\óþÈÆåàY“R)Ôz%-ÕV ±p;qFߦldz3÷Ön¤'-ÚµVZ›ÝØXç®·mF;k(5:ìâ–ÎúÃ'[’·2ë 4i½<o ?3[•pP à?žÏ¦P8‡^AQ“:§œÛÌ®½ôôÀ×4Ü ÇAFý%ùÉ–ËìGkgõN ãüŽ—'ö²çvuLB@¼Q½8©êÍt{Ð`‚;èã'Põ‹p¢'œÛÙ,\8.³0O­ 5 -«CÁ -»ªbcé}{ÉÙøCò}ÛÅó†Â¡ßFæÎ3û›bø¥¹¶¬tñª»O[w -v2R¼Åcõ¸pm­©¢ÊI—×jk¹‰”ýƒ4äýè÷Ï·Ü@¡wØ—/î:/>¤ïÚ±#²¸¦#±*­z#§ÈZ<{)2mbиï߀A0èÇgœv…Nù³¹)Ô—' £V.<Ûx8‹M9“sûcñŸŽ_»Ã€%dMù$ p‰`¼ßiÉ©ìÃnâØp}E}²ñÐ*ņÕêœ]E|%<à$vÙò*Œ'ùPNü°ç¢݇kV0­y»,!o»2LAXìp¦¤µT¹‹E&¸1/wg@žïó7j©KvYr7Š†%¡PØtîÔw⣹}imÌ`̬æÉ4j“á÷ -ÊòM‹É|ÍB_Ûk‰ù6u…ñ.î$.˜a>ºŠÁ™øH„ç ÕZî8 /í´3k_œŒÙ¯›`àƒB(Áòð½¾HGë4f[ëJmô{›·/ ïÜÿe{é Ö“]“qH¼'1eÇòøþ_¥LšZ‰—Ù­æ -èår½ÞÄ€¸¶ÊÑif¬eŦr¶uɈaÎh~F/Á´%-&ŒïäçO-äÖy¹–˜nÎ-7>àÃm|f]ýº¹þ´5 tyÆSŽÊs9ÝáP¬aÑ:bòðÖÏÏn>y„9r²öÚc1$bžìLÏÎN—•åÔg3-%%¿"£äÐAñÛ›–ÎKÉ(¯U3y®ÂÆa1Œ!Ú9³®b<ᵈžÿ|yzˆa¸KÇ#_Þ›†¨BþJ|¾¹Àbìå{’óÝÊʇV> p$xý KÀaš‡]ÃhÊc*IŸ—¥fTé©Yt|J÷QÖJ˜® õÃу|p¯z (E ÑgªÎ1ǣݨ.$¾v8Ošä$:@§èó³I˜U“ƒÇ*+ûŠX¸º0W"Ñ’8Ó% ß×£{žM½òNÜÂùì’eû'¼!Füþé0K?yÿ ýëÓ¾»wÙO>éÿî™øYì·‹î2÷Në{‰F?ŽQp œÈ[`8œˆ&¢-h3š„&£ o½ßys³òøÇ߈A¶d“Äl3›îJÀR@yëÈ{ØHðwäEÒ…äĽjU!wßš[a¼Ç÷8ÎœG¨SÔ#¿×ÁùØÈU\‡æcxäÕÕ@ÂÌ?7R„gækféù2àù.¥’Æ'¤"ŸB>*Ã=K=Åœ# D6Z€5ã5¿ô¶A‡iøȪ‘¿bJÏU¢.ÅÄ%ÊZ5 '¿ Š7Kµ 9#k½z·¤iÎ’?ý×JVP@ÂQ*éÎldžΟ:¶…5%P=íIqqÉÉ1ñÉm}½ím½,ðµ‹`®Ï6í³¨Ë}ap-Ç£ V½Eo) -sh,:½8?_§fdJ˜ìÄ÷XÕî;&Ám¡m-0úµTß0„9Ñ^1`,/ÌåÆæªr"˜g6Qh> NÎ߬ K*ÀrSe©©Œî.Í`G²㶢¢¥y|ð¤ì?¯S-vçVpýûv×3†Õ 5ŠRÂU™Ÿ›_˜¯Éc9 ~ K{Š›¾µ•ž²ð]5([áÂOÙòÍSùÓS _ÂòË eâªrG-Sê?fêVôï…£( -Cs±]’e tFÔÈñ<­Ía·Øí¥, €Ó0Á{¸ø©ËyÎʯEÛdD±\—#Ž]„F½ŠøLŒÂoW:/3.$“—+•ËX•E¼ -ù‹à¨>qM¹­Çë3¸ß+¬U^˜W“qzE…²"‡;”Õ™¡h5Ê[>#!-Â&‡%."ÁœWâ;í'®3P7`°$Wµ„@Í"A éYå¦à›«Œ—ÚhDM}Û]±».†­N­Tv§.FãCûð ?–ÓӠЋÙê“}lNcnÅ¡z~âªmkyÔ1½gÅÂiE{:“ÕI¥^R_üHüÉkwN¸yó&P¯áPˆrX_•ï -;žÓ¸‹FÄd4½…‚ïLûí“‹=wŽ°%1Žl§”¯(/Ï·Ó›Ùbg¿Äz ­¢À ¡÷'÷}¢`g¸(Ä ¦wÄ -GCvð‹Û” n¶JêH¯{Ÿ/°pÈ#'nÛ2¿#F‚BgEç*Òiu¾µJÆVeçYtb¦<5®Szúèáê¶6¶¾¾´²ó\í¡B>éï¼næ»P†œ8Y©•j‹‹óÓÙ‚L$B½ˆ ˜ª9¿¥R\ç*ëg*]8(îó|ÙÇûýWÎyjìãܳ® ×Ø.÷=ýIüMòÃðóÌG+¯IhÔ Ãif‰q±Z½Pχ›=7¨Ž2Ç‘¯nä$ 3·ÎD>bDFÏšÉ$ïؤžAïW—v²àeÌÕÒñùåÇYÏ BoŽÌIÈ[¥ãôßa+.¥ûJ5Ñ,šJtƤ–ï!Y ½Œ‚ïÍ€¾úÝ'ºÙ5„µ¾qC®PZ­Z“Çd+Ò4Éô¬­ŸrêwòÞýsÑÛXÐË"ŸXJ²±E<ØÑrübûÁi ŸŠØA[5š™ãÀëœj#[ W¨™ÍsößÀrk5eâ²J{ c¯F‘Üc…&㧈:¶8tÅK[f¨Õ1ú0™Ëskòˆì¼’ -‡Ùæîd?„¼[hV!/Qæ‹só´™L$n•÷óQn?§à\ü’ÑfXÆwA7-ÎJ 7ñÑ\h†“ðßÌéåÓkL¡ø66b”ãÀEfå¼M‚zYŸÝn2•0à0i+.6ß•Õ¼Ò2¥`íО«Cƒug®2 Ø wzh7ÏC<ŸF©q”7rdÿ¤<Í*gIiµj"NZÕ`·7–5±GžaP÷¿U´æ(ŲÜ8&G;\¹xTZÛ×qì0ÔÏ0ÿo(wÉvÎÒ;dÄ]™Æ´ˆþã€v™ -œ ‚‰€„ösÙdÜ´£$ÃÂÔêË:«Þ¦·…™óšR½5,¹ùP»Ñ¯Sd»t´žûËmÚßÇv¨Éj:X¨ Õi±ùoEoÊë,E%6³Éfe._:Ü÷ñU¾nõì¦ô–u1¹á^U±™0³ÓÉ¡eC©b/;âO,(JÏËf4:UŠVjìu -¶A®*Í¡¥rEfâ€ôüÐ᪦¶·³þÖ#1ðúó9®ŒDKJÕBZðݧr -0ñ6š{(Û¨ObáJâvµóÇÄ/Q¶–Z³›þÓ5§¯]?-+ÇRÏÑ$‰0à+¯fªkP–¢¯”ü!0LÖe¶€µš`r'$zqÝY{ÖÌpû'€ob.YÁdNmÞÐNÅä¨ÔRZ•]R*g3*0ḊŒŒ’LZ81E"×Q^íl„8YUØD_€A¸ŽZ³™žÎ!GºpH­ýø-ñæ¾BQè£7ž;Ý2Üͺ+Û\øè £n:ºûOѧÛcVYX #Éò³¸€&l©Äó澞7á= -%kÕ²}FnÄäPæ"ä&lŸY]{] “› (B°kíj¥Xž§KàF÷)Æ^k«)7fÖ§gdffhl -»Œqï°flÇ™”Ö}5|‡LfÓÐR…<-Å©l«(³”6²Îk¤gËÒ3Ü\ çv76f»¥,'®‹›/yBÁθ¸ÕJ¶ ÷"o‡»p-d%ðÊC_xÆRÓ Ç„8Â+WÀlÎMí´èKßñŸ¶Õš†9ò-Z­¸@«W3r˜ü‡·çvã8ÙÓØÐÝ™Y«Õ:Æ 7é]‰£«¥ç|? èrg¡ÖÉ6äa O¢×/Ý>•Ñµa½DÅgHÀšè̃\¤q{óO?ß¾ú»›‘Y°÷Ó#¢WÒ+wœºÕæ¨kè`UmÚº½¶¢Î-­ŒOÏÔçƱ€* ÷ªc8n·Þ8Gº$—蠟-\ŸT#oiuÖtX‹íÆÖXŽ•]憚£tcmÊÎÚsl‡¤bÃÚ™qàØÊd —,$#¬Û$î…Á’Ø­±¬@AÂK25Æ,–î*ñYYk|¼,e[®e¯9‡D?cŽ%TbZZbbsZgGssggZó!?RÝÙö6ºZ[/óÑßÏP¦Ø’Øò8¾‰“ŒQ,_›40|¥¥Žb>?G} òÞS¨æpáþk\¸oj·XZM|8îÿ;ÞXÇ áÛä2 Øÿ²`a„Ïÿ+^íÙå–­¦Õ3½xSDÂÃ.BjÂÀú=Êìýâ §£ïß?5xýXVëò“LêZêpjUö!ñA©4>:µýDWEC ö;vQI8Q”$p8AÀ©$Тä‹ðz<8`k¦…Ÿ'Àóhøµ–GâhªÅvÁ•Ónsž£K)Dã†+°!øµμ|»–.*4 -ÙW·b ]r‚$²Hà,kV:Ó‰£SS£åyÖêL¦J–gË£³•9Y‰íò³Ð÷óKàhGMC«øDdïúuRWÄ3Mrl¨Ã]ÙK÷ÔÉ’bÓ2—IYAÖ{$ðØŸ_šºYgÕš9Ÿ09R+y:bÇ•ú‹¤>³µ½¾¾­MZŸÌ -ŽŸmâ~Þ™>•‰yä‰&n¶¨Ú™el}4½+*}ã&|$™z#Â( -½‹fr1Åø.šwrÇl8 î€;¸«¹h+§¦(ÍE³Ñv´ÎD³¹Øb7à]¸FqCf¡(nH#ùÂ"ÏKn)¢ìƒáÀ¶†¹(¸ͨ5X¿èêøÒÆi{TÔÖð£{€'ŽZž Ñ/:dW…–Y1³½Ää Ûì¹›Yt÷ü‘}íõdžÅO\E~±)ºœh¦@ª)Ê£³4¥µEly¿Ây¸Â!øµ›÷ø¬{æ úȯŀø¸ºj R%ß ³ŸI5è…h2Ujdb@Í—€ç¡T½;UŸ¶ÉÌŽ¼CÌÒ¦eæiJZf(3·<ô†+ ™TÊJJe^äljm¥¦†®sV7ÝÙ2Á¦´]qlRt^¬n<%¯H†<¿æµB.Öžü憓Ȃ‰g?µ"!¯pQ÷†å k·p¶ºø=¯Ãñ°ÙeÕÁ , O)Ì÷æ1ŒÅµûY×e€j¿Ð{üÞÉh4#ÜFíZ¸$rso;#ÈV$drŽ&e;yf(± $A¡¤·JHPÉÙû/`äµ›ž¸‘¸U„ÀE,Êqâ. øJ˜è$¶Z5¥gÅžŸ8v•oЩuaà Ù~¿Žˆ»rk22reà°„ pR|}ÛÑ÷£ÔR)“%ÓÈrv7©B‡ëÁMÞ7¤'&Q#q€bþwâ‘@?’{0@‰=ŸöƒÊÒÊ7HçQgH¸·…Û®ó Ñw–Qk¡ñ:!ym•'η - Fì¬Á)[+nÉcÑ–G¨Ç>?ö)Øõ˜ÅNC–Èi -õ!:+ËVª`OL—Û£èÝß°ÛL@'LGÜ?wze;üHhG‰ˆÐFÜxûÅØpvˆ®¯(Hu±àÊì"œ(rÿÜé†3(OI.I§wïÎ8Â&íÕ(|‡f‘¦øœ|N¹Æ_CZüþ®¼ùO%R¼Ø -¼;ʪaêÖÆÂúá‰Nð{±Ûóí0–<ó…ïŠ8\â|”ÃÏQ_e®§¿¼°iæ»Û¶Ì“ ™X/Ä2£RÉ PQ¡ÆMyTPk?ôÝ9nÐüîWHóBÕ‚—Ƹ°ß®ÊÌ/L1†¡±ø“/±«à)Ïï%9áÎlim¨oÍ#SY çTLZÎ@òbŽ[N—,žÈEøÇ@wQuÑ`_ëvë+èúj¸_1pø}¡çàŽˆ¸ƒ;ØD€N่Ūq×ý–¦|¹|ÉÜ-áÛoßgRiJ2p‰VCq¯³ÝbÀB1|…ÄA9•Š»àÎv8ßͼ»_Àõ_ø‚nˆ{æCÎByCžw†8ÿ}ǃµˆý|ü³+͹yn@Á·Hè(øˆ,ª^Ï?¿'AQ4¦Å}Ö[5¸˜ -Âx±¼â’¹Áêg7Üzòú±÷â®g¹Ç„tƒ“0„Ì¡îÕyº®Úd=ÌáQº"Gª3éÌ:Öª^±ÉœWaºVA€¯AâÒÎë`Éé)h{aÉq±mÉà_鋂İíòO^8Û tf'1Ç’Wþ‰~€Z»ˆÇ=ö¦~öŽaó é|ùˆ/ü€ó¸c®ž\@y5|$ŽÁI5@ÉÝp­ }äÑ|ä Ð<3 Šk<Ó8‰]ò¹®¥¹ÐM?ô p .§<<o„Ç;*£߮×`½-à2v]w¯ø>?H;€üëY5߇„•LØ´Uó3Ù¯ @å1}$$sëŽ*Ž†Ž7êô™,uëõè/|Iô<âyÄ?"pð¾v®î…KZx@QîæT ^¿7Ýßò·x2–]½744´µHSXAC]{{zc+(5•Óàù«ÜdÃôcàCNgÆÌwgÍž3ušÐ'؇òù„ø„ú„ùˆ}}‚|€·ªäç³ÃçSÅ+æyF½2Êéëç»ß·• ´Å÷= ÷xðÛ'¾ž(¡rm±›ò´‘œÚàð=¸{úýÀý‡âŸç>xeËöÌý±Lb‚2Q±´NúçÃ=·9Û<ë½ÝSfMgÑr´ S{Ä„@Ë©Ô·W‚àÇbî?‚“àcêrƒ!¿½.í_.~7|Í¢”l{c,ãJVØ´T©ÊŠ?šyûþ£æÁaöÄ`Ó¥ÅgrŽ§t0Yõ²Š½µ|á—·N4õ¾öüä{‰qLRŠ2=kƒ³0ôÞác×é›C{×Ä)ãÓÓØ$îΊ >÷ZnøÆ Ïûî xìáüÂï=‡`õšgÓ}8² [»ÑE£÷dkcÖ²-pôƒ>pZ3E£ê«“KP u`ý²w¶þ§œv´ºlu]Gi-и=wïñ<[9O1‚ãÑ5ƒÖV^0…Á$íùû‡ñÏ_öJ£õ¹ 5èÂýe÷`ÊG[î À£‚úù\÷à-ñ㥗_çBœ•SW×&?›ÂŸ"Þì¬äåâ·®„8óàæo†ÑØoù«ÔÝíqáâÕQQ«ì¸ô䃺ÁËÃŒðÁÉÔôKqÇ/ˆ/¼~ëèžåëÓö¯ˆbZ¨9êñkåµç{9“‚~•çkoäÿ~ÿ*Òb8g*DLÑHpg.ô9¤zð,f!ßñhb^†Ï©ìÂü¼|Fš£Š£D|ÁÅê+uͬ»¦©¼þêØLô˽`óó f^ã°òG_h>›Ú²/'.–݃4ZŒFu!þ‰pfOßéŒKôé£5=½ì¥óýNJᾩð%Â¥Q_CŠA1{ÆÏœ¹çoðL8G=a½zø<€Ãª„'¾Á6'>_Q>ld9:P`F+§b*Â9Ðh/¥ëJ•‰ìÈN°V¡Z`ä TUžme0ª -ï'ƒ“Âç±Á+ÕdµÚ,k‰Õj3fÀê¶Y­î¢Ì:f,Oà5ó½>Gx8ï,ïþ¨M£®Žºé;Å7Ê7Ú·Ø÷¾ïcßïüÄ~‰~R?›ßE¿ë~ëÂNcÿÄq|4.Ä_Çâ…„¡& -‰ßùß¿0zúè„Ñé£?ýýè¿ÛÈDò6ù|Jþ Ð`&X*Á1pÜCY}#úE„DÿB†CÆ…¼òzÈ„ç^sGJþ@ïÌ6¿ÊG Ä -]Œ"‰‰H]sh!½tWã‰TVÞ©í».†wOJ>õ&šzeÄ£?Mè-úlœyŸôÜ`” †U„ùYyÕg¥|ð|[-eÑ”ÅßCfÏ¡Ù8:0¢^ƒ^WçDÂdpW-‘]ÑVa©²U²Çáh :ñÁ XEª=M%NWl322è Fr9² - ÈzÔŠ8›Z±7y ½yoÛÅdVÞ¯í4\ãëxÁš¶ôN~oÒîºíôÎHyB ºtŽ1wfBâOŸ\†>GôN¤–w÷]¥¯6¦Ì¨dÁƒ–V_ÏAO(50ŸÈRYìÙl¦+KK±¥Ñ“-²¶9æÃlZFV¶:¯¨0,+S—§g–çÉf–lO;+Þö`ÏŸr†éï¨éì_‹\deU‚m5g”V‹Ý]=-¦¨·Ö`¦û;[:Ø®VgÏ)q›¾AÕÉ íˆ˜Òv?Ô¦¿éÊÛ´à¦onj(ufmð{êrkÏñŽfER5S_µ_ž¿{]ô?2% „ó°u8ò¯ºq8ö7(xüäû|'ü…rü¡ÞQ`^ÀGï‰R£Yop ¼þ‡Öo‡N2'^o¹E_8%‹ëgÛ“«ÓšÖóð‡eâÙ:yn­É±Øå¬3*ÒEsaç+è ´÷­–…6²ËoþGÑ_Á€žžÃ,ÜC˜¯–W\´ò'ų È•xgu~N¡QW¤eßFmˆ€%˜®ª°ªLìrÚš'ŠVÍ…5yÄ‘1´"Õd´iø­;«c•âézÅ!&]™%SѺ|oÑÒ.—[sè ™,ý`GúÑ[ÇÏÃ×γp²gbegyY£% dJÞ&ïsÏv*fr\–.GMç+-6%[³k§#’ž3ûЖuì²u1SÐ(1Ú ÃÐ8GI¹ [ºxeÒlzm¸»/»åä¡o Ÿ®úžƒó×.žÉ:p˜éLt¦Ô®æ¢I_eÓ–zK°(´ )[0%á0aæŠ -SÝXš³‡EJ°(_³¢ˆò'¾¨%ŸªÅç˜UU†{PHš?©¬xláŒè”ŒøR_®1KŒ Ôy³¸Á¾ž—áTjYD\ÆfzÊÚO!ñûùÛŸõVjbÊØ’L,ÙáRÔÓÍuµ –˜µkGv\4»sOÊŠybD=š }®îèeZj[Û/óõa×±i$hùw@ò©¾Ray‰¾!F( C døѪ‚ÈÃìÈ:ÂðR®bŽž/ƒêZb¾UYgøïG›jøJsN•á2fGNµ–”ê5åŒÒY¡¯¤5­]Yî„})™Kv²ßqqlxfœ*\–CXªJMtÓ aXÃvnm\½âÍë¿+g%Ï|¹ˆ?RâM‰µH®‹=‘1’zgžw!MÉ­òI*õ„¢0Äà°ÞóæD?EˆÄ\¸ãã*ç5328ªy SzÆ`™£ æ9ñµ–¼2ã¾ç¢Ð¼#'6o©",§„0•W˜tg™*’I#Œ[Uªu…œ¦÷Ãôö>XÙËó¼üßç=. -¾4r5›øÀДŸ)Öåéòd‘\¯ÎÛ«ã+¢ÖH쳦•;Åõ —2­g1¸MÄ Ëµùkó†P™ÇN Á÷° |•*G~þüPšÕVÈê-E5b˜_#>6Ôf;Þç×ÁÜ,\;/[ù²|¡o0•ØÝôW8|­I5µ‚9Ò)ø¼ƒ¡;HJOOLnJoëhnlïHkNbÓé¬vV—•^¹ÙÞyûÓFç‡Þ‚_¡Œ8a,×g‹ãÞæ؇ ˜@|PíºhbÌ¥Å&ëBƒ3$pðÉÅ\U¥o`‹Dð9À_¿Cn¢×$´¨zQ)°gÔBN‡©ŽÔ;ëú®nÓ[ÍF]b±”TF_šÿg¶Èj,Ö•6uÕÔu ðÛz›WŠmz›^—-ÏR0Æ¢CIÉiZ]6¿xr¨6¼lsž7ÙëĽªÚ´T•,.ˤ« -g$$Ï_žNí—ŒÇz×DU®¢ÑVŽÅ½‚ö¬q®iÞɺ⫳ÄLDlèIüäc±×ŽçÞ¢á6.î} ü õ+ïQ¸£ûù{×PÝ«+ç‰Ñæ¹h4 -Ý]{b/“ÞC%ÜPŸÿZ Ã?‡†\”íb@q삵ÿ­ª=¡¼U3ÓB>Ú6{ÏcMü«|&ƒ}ÄÿZnÃáStB¯Ãl¼£Ì1ðÍÿ^eƒø:!ˆ!G"‰eFYɧ|¹­>DYqø.z«ÌµèµbÞÁä+`tÏΨlJcQÜçÀ—;ZZ*Ía5(\QAÔ•iÚB­Zâhögâ¿ÕDZ ÌS¼ "ÔÂF xîõø)2üŒ±¦ C¬)Tk” 'é,ô&LÇòªuMãÄp¥ÈQ˜!Δ&mÙÁ eh&÷¦OÊJ?+ᔬOF<З«Í‹ɼ¼÷½ÒéyÑœRn¼Ç‡ˆæº¿¾Œ=˜vy†X™«K62ú¼b£š•ÁKµÄv“ªlêOH-¡p3ÜxëƒjÇ-[X-rà# ·N"ÿ`’>•ÞÖv“m!¾¶;O™¼Uð”&2•Øg‘—Ë™¤Úœ‹ô@­ò 82íû«Ä“㤶–êíÆù0‚x}nÎÆ}v3ð"¡·Ä«Ó ¶ä„¥¾(â[ÍÅ%ôQGÁ^v$‘0,ÑlÖò•ð“Ød+(7p?×ǴצO6Å-cÑ.°4?o³žOx«xÃC°×Íûۯ𯾵"ØQ‹/,QWïða4¼€¶;HÏFb—mCà6îddY- ¶KRS^7`^5µ&=+5Ö™ì*¶D™kÍ£S¤™éI­™}=í•-Ílcƒ»¿ý!À»¡-wìeö2Sh›$W/WÅ0sQÒ,˜†ÉO­å⺊ò#ã¨A;¸ÓØ­/.>‡$a@FÎjß{ùdã™.&·;.ûÉÖ?‘@wpj«¥~˜½Ñ~øÊ-ña•·áHÚ”YQÓåt–9«ùBOskÍ‘~ñ…¸³;˜Æ½moÓ»÷Q–ºRSýÕíóæn˜¦P›léì?[c™ÉE Ÿýzcë;³Ömž–‘k¶'³‚ÿTf¼€øE9ÎÛ,€·`…Vk•²­&Ý[¼:í"RMGRz‹W+ø*ÔŸzÒÝ~ÖVn¼eñ:¢ÅX¥‘‹Uú\E -G¨P2Á}˜¬½ ¶Z\ï*ëcª\èœo0œÒ6dY^<6™ýNMZj–"á ù"\Ø«7ºžü$†[9·ÿ*ZÃPpÌc4–SíUó@þhôW6Aê˜ÓÜð"ÛÙÖPØI?îÙ’ÄzYÿ6IJœ7J|ûoãOP”ãCzk¾yAKáâ)œ ál)N•pß,ç¾ùÒî²zí+¡Ìßaà+€ÚcV;ŒÍ|ø#¡² r &%J›ª¬VºÄ¬ËdQa”j´û ü NxsÆg ÕÞœqªDJŽý)jˆJ ”âÿñ©Ï,~Vü îìÂn&Ú•mFA&>:(Ç_ýËðÏtåÞØrd?¥ÎJSéMZ¦G“SF§fJS÷ &]‚>WïÀ@ÖƒæOJ½ xÖœÀj< ¸å÷ÊšGvî‹×nÞ¼é ´( &i:=‚+AŸ|3Ÿ+m(‰‚z¾Ð¤.3ÞàÃi„Pv°·As„†à[è_ƒËž¾ ŒS"$_‹ÁŽ"¯A„“èœ7¹>…”‹h9ðL¡ã·:“—–³5xd1ô çfm&Î}ÅÆ©’¯ÄÀâ2WšKùÕ÷K¯´‰ëe.i†L.UTMs½Í€m6Wö Ýý™ƒ=œ°xÏàð3_xÞ¢à0ŠÃAƒ´µ­¡´H€ç\Š¿(6Ψõ6 ÌÄFŠ«IxÙEL“UÝ6²ÀT^ΩSc•*óPZz¢Vo²ªY›2ßœOgæäd¥5(:ï|p÷Ñ™”öZ¥ÑÂTR%ðÀ]‹œ‚ŽƒG»Îö1ÙÕX*0“ph"ÿ(*¥lÛ¿ðŽXG~ÌAyìgÒ³h ±ù@Á gPìçP‘uý=,ÙùåØI yÜ,ÞPäêŸÍ=‘G¢¹Þ´<œûǧÙ{ÍH¸‘ŒÄ!œŽïßA -à= -®B°ÓÛ]9â¥n¯·á6Yâm³1—ü§,PKÊ8ïKüËûþ;oŸõ76þOCáÓ¿ÀJÐ~…ÚÝ‘ÝÔ&nk¨ëê«ÏŒ=¤â8'?“ŽU¶\gA“»º£K|cKßä”\CA"£ËÖ -è\mI¹‘Uáè ViÔ[Œ)HuáÖܪóbøùâ'‹a«‰E5„q9ãÓ½%Ý•‘}£³[!÷ W~` d±Æwó½Ô ÜôêU-þmùÉéOŽïY#d'@5áåý˜£ÉâtŠ;ÔÕÒ,•<-×–Q’ÅO8Õyk½PÙß,nTÔdÄ+¢_Ñ1úyà¸oÕIóï’„À£*‚!žQ0„7À0™´Öÿ 8´JMáv(RÐ÷*÷s†³Á/žÏ¦vã&RpÄÅ!ÈHÜHÜͧ«FâɃ¿]Wž$Ÿ’‚Žòg]Qõïæ¡ yú½%N»©”Ô­®¡ÿ U…··OÀû›ÄëC’¿ð\uƒÏÏnÞ_~å¶ÏCðÀ ›*¤é%™4€Éó¾÷vÜýÉÜ¢%‰كœ‹l0^2”«›ÃFŒ~ 996®=¹¯§­½§/¹ ¼`ðÍ/üvà Ípć0µ´e4:GÕ*ð­Ùîó,ÕÈ-¥%¥|€ 4×TtÐuuúü$5¥”…Û‡+ëJ\Þf§o´Ë[q¼æÁ¯ñ€ˆC +ðÓE€Ï'Ãl>Ž„hà,@Ñ[hZßBÁAñ¾ÿ¸Mrïë òeJ}­×xsi@ŒB¿`Ð_ ðö‡„³À£‚)×y`mq»GÜÚÚúz’Û½bøáÙòMà‚æ| Ët„Zû€®ˆ)Њ ùÙŽ›èuµNÒÁŽc°©pÊM”SWáo3„©°r#€«½å] a×5ÄéÊÍ¿+Wy{ê?q*>;^›Z9ëÚÕ —ºyÞƇŒ¬pºˆ‹€'óÑ‚ó\©h"Õ²¿k¶px”’¦ÑتTŒÛÛRà)ú§°oǤ°„p!ÈUK¹ÍQìÿ,…KŸ -endstream -endobj -1317 0 obj -<< -/Filter /FlateDecode -/Subtype /Type1C -/Length 11578 ->> -stream -xœzwxTÕÚ/CØ…½’I™Ùf³÷F&X*ˆˆ€ô -dÒë¤L’I&½Ìd&½·I2“BHB „Б*Š"¢¢Ç‚¢~õ¨krV<÷® úï»Ï½ß÷™Ì“é{­w½ëWÞwI&M™qxâð«Ã˜t¶t‰t·t¿4Iš*Í‘vJû¤¤Ÿ9Nv”9.s\åxÄÑß1Â1ɱƱÉñ¼ãÇGŽß:N38-rZéôšÓ:§ÍN¾NÉNeN§>§!§»N÷œ8}éôÓÿržê,u~ÆYtžã¼Ðyµó^gogµs–ss›s¯ó€óMçÏsþ§ í¹¼ä²Þ個K˜‹Ê%Î%É%եĥÅeÈå¬Ë—.w\î¹|íò™Df/s–Í’-‘mùËÔ²Y¦¬Tf–õÉÎÊ®ÊnÊÞ—}"ûRöDö£ìWÙ?åùT¹TÎÉÈWÈ7Ê÷È˽åAòpy´<^ž"7È«ä-ò>ù¨ü–üùòoä?Ê“³–fØéìv1»†ÝÄîc°¾lÍjØ46‡5°El9[Ï6³Ýìö2ûûýšý‘ý……5c"{'·>’wKNYŸE«¬ŸR}Ј C7«#Âiš×¹­ •"̦ ¤ßÑb3|E&!Ò/M2Ð0‰ê©,…+ΡéÁ=Æ? µs€õ6¦Têh8Ž<¢©³¹p*NRÁÁêÔpîX’霿A»£©þŒb8•A…Ô’÷G£]5#W„­Ô@µ¹àV1 Æ6ZX¯Ô*è|@DÑÖ&<4¤?­|!%ùHútô²P» -bË›•ÅæÆ BucÉÎ×*+`±ú°ha49 «ÈŒV¤êÒÓUÂ&´=ÜA¨Ïåå•(ª+Š-ùB0£ühª)·(·(=_?´ æ´Gg¦%¸‡U!Ñ\zFI©!ÏhÈ …†ÂE[Œ)P¥<Úr¹³­¼±Ahh2õu= ¬ÏN«=UVÖžGîn»<Mvç”gÄ(´ZeèëBr@F¬6 u »>á3ãD™¢¦¬°'_0#m4Õ›cÐ×o1ä3ò36¡µÓ¢o|KÄdW—+jËŠ[ ð§TÑTsvAVË8œ™f&¡3¼ñ3ºA4‘ð%ëœêî’’–Âé~ ²pN,•”••­å²Ò…±<$¦ †{yËŽ[ú=~ò¨B~aQž=N'D W-dcyQQ)WX¢Ë)SÚFRÛ¸Ç7®ýžr`Þ)qS[„¥SÑÑjéiiNkÔå Õ^4€Ö0ÉÌB3Ðì'ÏÁiÐõ»_àLøìÂ_+BÙ+õûÑJô¢Ú}Ïnõ98®¯½.‚±]c+Y©"ßÉ®ÉW¨µ!QÞ²C;=LÊ-Ôu…Ó“›r**Õ%ý‚Å@XЗ*ªS×o -‚ÛÐçÓжPkKÓÊu÷hx°…‚¾è6}ÈñmÖØ¢‚êü"¡¬¨¦ÐÂ}Óí=k[’_Hœ•”¹VOƒZ"?ÍXù:’-ÝÁ^ñNñ¥9ÊMñÖa¯×w¶œU -‘ÝɉïÒá(¯l.*)©áJÊs3Åijݙƒœÿ÷Ï¡Ë}Ïû[OˆhÞûĪ¾XËÅõS=ïœ>ïRè ¬ 7m¥Á0ßÖu2ìx (5.âÁ¾¤7x`fƒ -´Õ= -«ÚM f”¤|ÒŸÌ Ï4:øøG%ã–îÿÎøåÄÑšêŒÔ"±0=/ÏGqÈPÛÃj6÷^ß|f š1ïYôáÿíÐÿœ  Å›;¯2{ÇñȃsÑsÐÿ8ÉÜsK„2Êø~yÅûx×^®¬ºU*XPˆŠz?Ó·A®Pègë:¨HSmÒÞ3âx8¥ß­ÑnÈ u%\ÊFaé š˜@è´ez:œ„ë<¢yœ¢2Ñ,¢™´|×\gê‘v|þ4•UFe|¶ðÆK8ãà"FQº“5­Ú„üÕT›¾&NçO£áèo¢" šà èc§¡HÊ ‰ÐÓjèx]wMCÛŸßjÕWÇéið -ÓK?ãÒúåkÇÍù?”m¶ÆYg±ç‘+%kÛÌÈz×ó²Ëך\Wô¥hdïù3²E! ˆgà]`U²Æ²¢.¢î3Caƒâ¸º1"2.6L›W'”(‹ƒ ÃiÐØh®hâºj#÷‰(›Ò+µšà\:¦P`yAj¹îm5SÐ1gÔ£ºÔAbü2ymd-£CЩ€7Sºµ©i+0~ƒXE¬6=P'dëµúd=~a Ó_Žs"DcêÓ‹Ö¯ÐÞhªC_oPÒãÅø’ q!z|É -|ŸÈÀÏaû#ÚB”&3ÓMj¤¤†C¦2JÜíõ͆EÓ¾ƒÂ­oÌu׋§›P¬º‚j7«U‰ñšq«á\"©-§¬TQ[[ajMè‚šÉm¸22 h±æ€2Ô—KSÃS<éSm©,ª®n¥«5Hôb@Rž vöÁ“=HÀ¹ðu¸ÔVÀ‹¬ÞlÊ«æ¾8_fÏ·µ_¸©¸|aw»`ò *Þ¡OÕ&r4·:Q·¦ ‹ø5üÔ²„·š§Luø«'š³RkÄ–ø]Šyþǧjò Aº‘ÀöŒà¨cŠ=÷ƒ  \ýÅèö ¼o×ð8¿räl3Õ|âDKK»(ÝÈxk™ ýƒcÈãÙµ9…¹tAn~V¶"--!5U@r$'¬ëÑ®hꄾ:Áà7±¾šøЉ•0SG‹sŠôÃ4$©&ówHAÔ©ŠR2)ÉYz!jÍTjv~EºˆÜ`Ãßm®»Q‚ÿ<Àî†ÏÚÙõÕ -SyE½PjB1j²#£8mIàTÔ=-ü Â?)9!™ËÌ.,‹)Ca^A>¦³Æð UÀ±“ÁÃÃ=7Íä­Äi -‹» xŸÃk\_¥OÌ -O‹äßÞu²í/ì1ðIµ`<¦²O`ÎO(‡'[[Ož ÇïÖߨ­6ôÑ°€ú›ÛÅWwyDìöÀ÷y]ÖtJ~€Ëa|Ó®ÊÞ%a(| -ÐC"tFI"º¢"od×'ŽÒãi”Wdì>ê¬Ù&SEq෺„÷ª7—+^P/ ñ<‚ŽÄlå<µ•§E@é®V×u2¹WjêÏézº«®'Gu5ñº=ôx#Š°6ú1¤t//5I¬³ ”=ê–àÃ)Ãk»Ôb|{fGÎæ‘YŸ$ ‡öÐÝ!ž¦ýrXù<šfü”><ÓþÍM•ÁÕ,dòk{Ïr­uI¡%bA|~”á D×^ãÕ¸€îÌ¡#m¡§ ó EB¯Uù¼á.œ—”îJµmˆÀÈÉY8>gØÁB«‹¦.›‡¸M槼ŀ®ó1§¹ó—›ºÅî~ÓÛwð™eÿHA¯ÄáHŒ,Ë"óƒ¶+Ðê„}Ž% ÿ -·Õ\Á¹ºÛZ:¤€_§RhåV"Ž*;^WXÂÕk#DE6$§nãÔ0‡·%á ¸Ào`îóÀzµ+€(cà‹ŸÝ€S¡üåwÑ«±9y¹‡E¿)ÆšÒ¼rîÑо¹‹}ÜÖû¥Tõ‹`´¼©Ôl¢eÖ‹ià”âbÈÙ=íB“·géznzšIÙ¯O†.Zâí¶A©­ê­mSêI™õog®\~ôÈÊœ´p&/è‰VÀÈSY¼q±ª!‚€N|K±|Ú²G᧊RjRŠLqBXsFAx>™®ÕÆ+‚{ãÞèÉa Ýá/îÕ¤—Vä -„rêxtc2Ful¿ÿµ«Ã]˜$ÃBƒý[Ã{ñy -GŠd€o ÓÛÞÞÓzÂ_Ag!%6ÁbêzöñÄ.ázØΓë87ÿ¤QVÚ >dÆœL. ¯6öÁs]ë›úÁÕzƒì‡D ™¾€Ï1š„25§V©Ëµmñ"œµ„°~þ6HãSüž »{GHôáx:%<8+‚Û²½k4\ ?ŸüÞO -è ~nè¸Ó\“¡ÍÑeåf -Y©ºŒL…¦4©"G^î~ŠàU^Úi“#UuH€ø%<W'1¶Y†4US–œª8]½ŽZuFùsK}~y».*ní8ýr´&7=\ÈÑfè29mVQ…˜ˆq®…´dæØÁ>ØÇ¢DÃRß1¦hÇ/K`v`Α¥}’_ óÏ`A€ß²QŠ¦Žmb½}<;‚;;ƒ:½Ei¿’!` ©ÛKQÆ}I‰ k˜ØmvÔ%zW -å{Xƒ‡!*XžÇ^1ÖÆŽ€É?1àÌØÔLÀÞ&ù8+°ˆÀÎ@Ù#(Ûx¨tý·ð6…U'”M(¬–Vï­â;(„Fæ¼®ú/4Ùi)Z9#€®”¾³¢¾¹6ë2 s¶ûäð°²ÃÌ붸´¦¢Rh45U4r£æ]è08ÝÊá¡žž! ¸pê€Gx¸'½Èr—;±—‚ÕP…ð?¼æŽ/ Q‡éèl5 0‘Q…)&CG³“ÛjÁ{ß›4(Žnñûùz€MQ¨)‡ -Ó4vE`éKå.ö™ú,bms±¹p„–FjMF¼æU]o[óÚ#hÖ—dF)ÐEÊ Ô&Ú€:x(•ž‡OúŸÆãVöàùœo9Û*ÔŸ(鹪îÑ^‘b\`Ʊe -€Ò|—òp ¼?2‰À5,æ)¾´šÍ­Oñ…Œ‰‰ŠhŠiiijji‰iŠAÑBºz ëéïïåqÒètw÷i`â¥?3¥%˜õ ê뛹J@þˆù'ඊ - iŽ:ÑÞÜÜ~BÕ,‚ŒÍè©)8ùÚà'@y‘ïí>ÑÞÓÚî'¶ÁÂ"›*p1Q®“V±ç‰ä¯ãkÙS-µB¤Ò–Ôè+4 °ÕS¹"H0“½ ÕÜ© (/-S3NÀiâáéúDò€ÚtQ]4œI4‹2&Ç®¾Vƒýƒ*/ÛöϱI@¬èúEé÷pë)8딜汣à Êa@ Ä6-ãEl½“%;º€!ARyyZ —_dÌ/‰ -Ç݈‰}˜Ð:±ÖkÀx ÌäÝ¢–γ\_S¨hîǃìŸ$èRÜ>uøp°Ïö¨BÛ|ñŒµÙ -(i|½Õ§zU“ ÀˆìŸIò1’m+(,,,((Î/.üÚÞ~ ®ØRb©(+-(¬°wxÚÙ7É<é;Él‰I2,¹+“gOž79rÅäÎÉßÚM±[n·Ù.ÒNm÷Ñ”W¦lšR5¥nÊ}b1L\ ®ïŸ?“!äWÔkTUHÕQŸÐ›èP:—®¢ÿ˜:yê SÓ§~Â,f¶01Œ†IgŒÌ s H€ LžÀ:À5pÓ^jÏÚO·çíØ/³ßn¿ßþ°½—}®}½}“ýÇö_8Lr f9¸9¨RÚ>vøÌá±Ãw¿;ŒK'IIéé*éZé^©Fš*Í•öJ‡¥W¥w¥Hÿp´wtr|ÅñUÇ×}3›O;žs¼êxÇñsÇ_œœ^rÚáäéádp:îÔã4êtÓé§Oþæô›Ó¿œ§9Ïs^îü†ófg/ç`çXçTçRç&çÎ=ÎÎÃÎç?tþÑ…p™á2Óe¹Ëz—Ý.‡]¼]]’]Š]Z]κÜuùÂåw-“ËfÈž“=/[#Û-ó”EÉ2dY™¬ZÖ ë•–Ý½#»'ûJö›œ”?#Vþ¼ü%ù+òuò=òCò ¹Zž&Ï’åeòZy£¼MÞ%‘ß’$$ÿ^>ÆNb¥ì3ìLv»}™]ƾÉîb²Al›Áæ²¥l-{œígÏ°çÙ«ìmö=öûû»ëdW©«Âu®ë‹®Ë]_wÝàºÕu·«›ëWo×`×XWk¶k‘k•«Åõ¤ë°ëˆëE×kc{þ,I?Ènˆ{s¢j9ß[<çuÎà”ñAUå'FŒ¼UÔݬr­ñM}A-h8ü¶—xæfÂûÜï6vˆ§{š?üVq“Ò\èÌ>–àÍù…×uʼn '2:rîÒ°Ìú,i8¤‡î 9jÚÇ!§óÑ\4÷Ó…Ð铳íOn‰˜ÜײÐ>¿®ï,7\„ìŒb¬ÈkêyH‰uí<4úâÛ<|óƒÞh>Þ€ž¢>˜…“o"䇎íÄ -œFÔ,¨¼`è Hé"°†ÝdÓÒˆÜÜ\}6—œYR+Â*êóµ£HŠˆµßð1G÷÷µš;«²+µ5BzYnQ‘¢¡¥æ¤þd‹ÐVy!·:-T¡ÍMÊIÐô=š¿"R;s+ÊæêÒÁ|Áh«vˆtKEõëLéåÑ] §!½ì­“ˆìZ]q©¢­¢t´@° au*«,½ö0 ÿqZ+ wZ£àÚñ(¢…„G­?Vö56tæOI¼‹c¹[Bò²»³Ù·óÙOË0Á]&‘#Ü@¼Dâl .‘pÊR+iaר«ôçik°Æ²rCWRœ›] –¦åé ÏÓÈ -Ë.ŠöUŸ ºÍAúÁïpÙ½äwbÛDSR|Y4çZÓ‘"¶¥çëJ2èÊäâ(¥bÏ[»½:ð u:¢783G¯ÏÁõÀ*{BF®àÁ#^_ÿ§ÈOnKx*òÇ\ š 'ƒ×¨CChÿcûÖp;m:kèDš~n›êÏ*êvn½ÆÊèÝ“S7dÐ*ë} -lÌ3Y]\>âe­'ÍÛyÙéÅŒìÁ«<ø¬ÂM8ÚŸJPbNÿÏ> Ñ”ùhÊÄ>Úbÿ )½öESƒ™%éï4Æ;#Ý+kÅ,3€­{·š¨üâò²2dðpÖ—è±³f©ÙÙKÞx]\¿Ñwá<¢{^y´VØpÿ«ÀŸ¸Ÿ¿ê~ÿ}ñþýžo~UüêÿxÝû‡o,î~–Cß»°p?\„oûá>¸-BûÑ^ôzíZ°æÄíMÂæ;¾Vxm!šÕ‡MbilA‚J‘Ñ?Œ¡Ò{ìÒÑ‘Ð IòëÜуe/ÐpëxºŽ¼ «NèáäÒ jk+yz*2Ùm؇¦h%附IWÈý2©N‰æÂâZú®œ:õq~qk"ŸaryEÙËðŒ™Ý™ŸZ©¿AÃ=Ôhë@GIi¦¦\ˆ¬¯No溛[{†‚:ŽRFm:*Êî~ŠöDS§²ËS&:6i©Þ™8põ 0`L -³Ø(Ò­Úô:Òê8F5ã T:F´’g xºÎÓU: ݆q—?ˆië -H¬/Á ö,"‰’¸‚”tETBª· R˦t2Ú˜Y¬éBeÖŸ¦EÈgütÜÖ°`?Ýt5Tšb©ÀÈzsi~eI£ØóñLg?CžZL‚»le<”˜p±Þ7©ÈCMa„.« Þ¾ZÑ0(~Ú{ññ -Hnú~f§Z–h\_˜`˜VNéÏÖÔàÌ4¡ûj39¢«MЦS)ŸÜÍ9‘™¾ñÓWø¸-™¯˜ñåOý„ØÓWR¯sðE«†ý9ëâkh¦‰û_›“% Ù®sŠö?„¢μôð§"Aú -?ÑæXË'¦Çƒe¼©ë/ð&/ËÜÀÈ6üUÐÑøÚ“-ÙÜÇ1¼6NglUh88 ÿ]ˆ.‚ÙÇ(…œ(6«à]ê*J£€—)«+"¡Š¬5” -å%õE­Ü{ç‚ßê[ܪÖ/W,R/ûïµi˜Œ´t¬Ìª ©…ÚjtÅ*wC·“–Jð–=¥¢fåF+4Ù¡¡ -Cá ¦ŒïWWß+xÊ#­Ó,ÊJ; t¹‰5^E¾+¶, -¬Á®¶QU¯Ž§_‹ôõ;¨8V—p3F,ú^E~–SfÜGÏ¢†ÇnÞîS?,x]Šç†Û†,BlU¶N¯ÏÈ™žÆ\ØÛ9uÉFwz<œÒ¹'§àU¶¾KõBo"’ Ý¢ŽŒ <Ý4opÛ*°CH£~ïS"²\l†ŸG‘ÉhŠ_À, ã©îŠÒ _žO‰hÚ#«Â*7ÓfxhÛ^ÐCÐÀÿÕçm -êZ;ÌüUÅ‘µ5{ç`A½Óí~Â.ö‹ÐL¤6ê ‹55¥-B RW9qm;áËèâ4ôŠ­ ®†_›Â¶ðù—h¸™‚ -ØÒs¿¼|¨hº íS›¨ •&+QƒÍº+üÂB®)Í(Õ}@Ãä ƒµô'íƒ×>R@zÑm´z¢]ÿNvU’q½­]¯ß‘ë™Cç¦ét©"Ô[?b›j+:ux¡IÛS”Q1‚::8sÆ–²K¶Dïß*®Ûî¿MQ`ƒ¤@3 <ˆüþpÇMáÖ¶5í³¹‚@ö÷kh&“ƒÛÐÔ™G=[‡"ÅÐ Éïþª€N¦†/ -µ}œJ©‹U'ëÊ3S²s³²Ò„ô¤œ´TÌ7kTäÎe¾ž[÷Ó‘£èµuÀ÷î¾ãŸö ªrB©Ö$Çq©µ}"¼`ë_)d\xûöAe¸VUs£{ÍÖ®š†Âbºç|us­¢8§ ###G›$èõ:]®y¡•Ór(ŒJ«°»§ïŒÜþ›âî±á]‡‚CÀÓ½­«VÛööÈðêpÝ–@>þjëYÄh²ótQ"$ -Ê Å±»ò - Œ­šãqä¤ÿ𙞓ÃglÕÀþú¦­¦{SëKÈÂ$»S­'ÄNúÀ#h!| ݧÀ #âÐkh-V`xÝÑóXÍÀNr t„ÄEéƶîÐQî››¿`©ê|ø›ÍÑ1!Áõq]Yb_¦A_©¥«‹#<s=·­÷¬í‰Z®V (ÎÄ´(£CyVh:bÛ7ÖÃG¤tÜàI?\cÿ¬¨š¨¨¹åÒ@§@ç`¤ƒið{ÛÒÐjæÌeqØBæàRï=¿ãH@øÆáG -mÙ_¨&òK - ùœyÂn߇ÎÔÄ’1àò"@ýÕ[Á€§E’bKÑ9Zúú¸{¡Yä±?jþ¦Á†âòÖ&Ü:h²¾ZÀ£¿f·%ñà SÀ5Áøê§Ý¡Ò‰îPéŸÝ¡¬¤t]&•cë}eMg‡ë•/¾˜päHBד'uÝgDàþ˜ÖçØ>&F¨çW0î¿´òQ2õÊzâõÕ=©ÿKWßL=íûK­snß¾mج´Ä­£ŽKåÜTW¡²-W¯t´G+«D`s21;ù¿Úä{¡œx@¡^(‹qƒþ«ˆ?ìHðC_j;‘“’¥Ïà|BºßOÑä:6# r{B˜›’Èy©GqèÅ.ö6û ºC”Æk‹Ë -ó‹‹Ê„²êlŽG™ƒýUª€ÝWŽ¶ZmL¤:`"Ò2 ?¯žì…5½.ð,ô_Ä2®’ý:dïzj²Ì¸<Þ,†‰V²›¿z»;mÇ\†Ò‹² Þôÿ›à+5@;L p6È3IÀ+L»©ÎâãáK| ÐùÃçy`3XôæC[—*Þê?zm¤ãĹ¡°ÞÙYyú,AŸ«ËÓq©¥}ý÷GEðSl(ãþž°ÛÃc¼x[3a_b)ÄÉq“ô!vSÆS’ÞPœP©?GÃD˜…)ÝÁDíÛÙ¥o)pÞØÈC¹Jô­#€•€ Ùõ¥¯ç8x³!¿¦¤AŒ?ÞœÞÊõ¶4o©ŠˆÎJÂÛs[BxLt]U Íùp6¯«|ëɧmyœ&`aÿ”6 t®¡ö¶­pø­Šz'פÁ„‚|Qô¥ €1oÚâ›Ybºž´ ZWD¶jcµŒ¿QÆe©aœ)“ü³r|š?n MÀ”±Ä†žh9Z†Qâ \ŽVÀCø¶®À{?[…ˆàd yµ[µ­K0y²¦­EGV+ß„CÝî×|3Hð4ÚÎî2jêuØ#?K=cÑžæ ñðp>œùÆdb  õHœº½nM«l%:Œ¡H°€Qûz¬æ!F\„D©Bþ,ŸhjÅ”5)Ï £h‡³Ë PvÜ¥>' [ -åìe@<˜åщP.‡î8”+ð͇r9|Lœh02à+Æêcb=È]s‰:€Ö·‘…ßœùãû”ƒ=¢þ{¢\ä{ÀÓÎx}sV='›Ñ„¤€n¶Îa;˜/?¯½Ždñ9q…Œøt]—QfÑ‹ C¨ -Š,x‹C;à N 2¾‡ák¸¾Æçܶj·PÒ£¼ídÖ±Cõ¡~Š M\THrÓÈ`AM}« e­s¶ŒXnS?¼=Ž¿t›G[G¼>¼wîÜ=0aKá6“änogm7³ÐÚ£¸x¤oë[îž»|ð°Þ=ŸÞþÁ㋆:îw ðþý€ûî≯¯Š²ŽñgCXUtrf4§Œ7ÛjèÚH2 )=-ŽKO.(‹Í{Ž”ÛúÂ(€B¯A"¾)³²Jaª©hÀŸýÀæþø ÜÅþ¿èy°†æõ4€/ÃÅp6¾¿Œ×îþë@S½ °;`õ¨«ØÒ#ÞÑ‘jn‹÷Íäˆ:0ª={ºaÓ™x“ P·]ú†¥Q ‡9­:Ó7gwÚü:@î‰×I„Áÿ<ˆ/›(ƒ‰Ë©-J»i€Vgó¶îkn5†'yCô9)"¸*×àû×ìÆüŽ¹â£‰ÖPœó;¾yë‘ð@ni—áºnøj¯D«Ô*›š-øäZ×pKBJ• Á¤ãÖD*Р 1¿Â¨¨b÷)ÓÛï`±¿fÁåX[!þnÿ3 ïwre`¦ -ØÎ}àÜ%àr€u=˜8«’€qtK~‡Î6‰jÆ[È…8Cžu;2vä#$ Ñ³”º ©4gÞ3Œí0#@¿ìÌÊ -ÒOWk'T²_Ž–Pài³ì¤®¦$1R¸(Ïlç?Àÿñå¼£ -endstream -endobj -1323 0 obj -<< -/Length 8093 ->> -stream + -17.435 -19.926 Td [(11.)]TJ 0 g 0 G + [-500(If)-268(the)-268(matrix)-268(is)-268(in)-268(the)-268(update)-268(state,)-273(any)-268(entries)-268(in)-268(positions)-268(that)-268(wer)18(e)-268(not)]TJ 17.435 -11.955 Td [(pr)18(esent)-250(in)-250(the)-250(original)-250(matrix)-250(ar)18(e)-250(ignor)18(ed.)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(5.3)-1000(psb)]TJ -ET -q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 201.825 706.129 Td [(gather)-250(\227)-250(Gather)-250(Global)-250(Dense)-250(Matrix)]TJ/F54 9.9626 Tf -51.12 -19.441 Td [(These)-280(subr)18(outines)-280(collect)-280(the)-280(portions)-280(of)-280(g)1(lobal)-280(dense)-280(matrix)-280(distributed)-280(over)]TJ 0 -11.955 Td [(all)-250(pr)18(ocess)-250(into)-250(one)-250(single)-250(array)-250(stor)18(ed)-250(on)-250(one)-250(pr)18(ocess.)]TJ/F52 9.9626 Tf 120.161 -25.465 Td [(g)-25(l)-55(o)-35(b)]TJ -ET -q -1 0 0 1 289.521 649.467 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F52 9.9626 Tf 292.803 649.268 Td [(x)]TJ/F83 10.3811 Tf 8.097 0 Td [(\040)]TJ/F52 9.9626 Tf 13.398 0 Td [(c)-25(o)-35(l)-55(l)-55(e)-25(c)-25(t)]TJ/F85 10.3811 Tf 27.705 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(l)-55(o)-35(c)]TJ -ET -q -1 0 0 1 359.144 649.467 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F52 9.9626 Tf 362.427 649.268 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F85 10.3811 Tf 2.875 1.96 Td [(\051)]TJ/F54 9.9626 Tf -219.744 -22.41 Td [(wher)18(e:)]TJ + 141.968 -360.647 Td [(85)]TJ 0 g 0 G -/F52 9.9626 Tf 0.344 -20.664 Td [(g)-25(l)-55(o)-35(b)]TJ ET -q -1 0 0 1 169.703 606.393 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F52 9.9626 Tf 172.986 606.194 Td [(x)]TJ + +endstream +endobj +1563 0 obj +<< +/Length 6789 +>> +stream 0 g 0 G -/F54 9.9626 Tf 10.187 0 Td [(is)-250(the)-250(global)-250(submatrix)]TJ/F52 9.9626 Tf 103.256 0 Td [(g)-25(l)-55(o)-35(b)]TJ -ET -q -1 0 0 1 305.084 606.393 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F52 9.9626 Tf 308.366 606.194 Td [(x)]TJ/F54 7.5716 Tf 5.106 -1.858 Td [(1)-13(:)]TJ/F52 7.5716 Tf 5.963 0 Td [(m)]TJ/F54 7.5716 Tf 5.985 0 Td [(,1)-13(:)]TJ/F52 7.5716 Tf 7.856 0 Td [(n)]TJ 0 g 0 G -/F52 9.9626 Tf -182.447 -19.051 Td [(l)-55(o)-35(c)]TJ +BT +/F59 11.9552 Tf 150.705 706.129 Td [(6.9)-1000(psb)]TJ ET q -1 0 0 1 163.696 585.484 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F52 9.9626 Tf 166.979 585.285 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ +/F59 11.9552 Tf 201.825 706.129 Td [(spasb)-250(\227)-250(Sparse)-250(matrix)-250(assembly)-250(routine)]TJ 0 g 0 G -/F54 9.9626 Tf 7.732 1.96 Td [(is)-250(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)-250(on)-250(pr)18(ocess)]TJ/F52 9.9626 Tf 234.034 0 Td [(i)]TJ/F54 9.9626 Tf 2.964 0 Td [(.)]TJ 0 g 0 G -/F52 9.9626 Tf -266.027 -20.91 Td [(c)-25(o)-35(l)-55(l)-55(e)-25(c)-25(t)]TJ +/F67 9.9626 Tf -51.12 -19.204 Td [(call)-525(psb_spasb\050a,)-525(desc_a,)-525(info)-525([,)-525(afmt,)-525(upd,)-1050(mold]\051)]TJ 0 g 0 G -/F54 9.9626 Tf 32.563 0 Td [(is)-250(the)-250(collect)-250(function.)]TJ +/F59 9.9626 Tf 0 -22.289 Td [(T)90(ype:)]TJ 0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G +/F59 9.9626 Tf -29.828 -20.421 Td [(On)-250(Entry)]TJ 0 g 0 G +0 g 0 G + 0 -20.421 Td [(desc)]TJ ET q -1 0 0 1 230.392 543.107 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -BT -/F52 9.9626 Tf 236.663 534.539 Td [(x)]TJ/F52 7.5716 Tf 5.148 -1.96 Td [(i)]TJ/F54 9.9626 Tf 2.75 1.96 Td [(,)]TJ/F52 9.9626 Tf 4.276 0 Td [(y)]TJ/F51 9.9626 Tf 108.448 0 Td [(Subroutine)]TJ -ET -q -1 0 0 1 230.392 530.753 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -BT -/F54 9.9626 Tf 236.369 522.185 Td [(Integer)-8983(psb)]TJ -ET -q -1 0 0 1 373.603 522.385 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 376.592 522.185 Td [(gather)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 373.603 510.429 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 376.592 510.23 Td [(gather)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 373.603 498.474 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 376.592 498.275 Td [(gather)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ -ET -q -1 0 0 1 373.603 486.519 cm +1 0 0 1 171.218 623.994 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 376.592 486.32 Td [(gather)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F59 9.9626 Tf 174.207 623.794 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in/out)]TJ/F62 9.9626 Tf 27.297 0 Td [(.)]TJ -59.098 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 373.603 474.564 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 360.068 576.173 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 376.592 474.365 Td [(gather)]TJ +/F67 9.9626 Tf 363.206 575.974 Td [(desc)]TJ ET q -1 0 0 1 230.392 470.579 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +1 0 0 1 384.755 576.173 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 278.277 442.2 Td [(T)92(able)-250(19:)-310(Data)-250(types)]TJ -0 g 0 G -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -127.572 -27.052 Td [(call)]TJ +/F67 9.9626 Tf 387.893 575.974 Td [(type)]TJ 0 g 0 G - [-525(psb_gather\050glob_x,)-525(loc_x,)-525(desc_a,)-525(info,)-525(root\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-190(call)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G - [-525(psb_gather\050glob_x,)-525(loc_x,)-525(desc_a,)-525(info,)-525(root\051)]TJ +/F59 9.9626 Tf -258.11 -20.421 Td [(afmt)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.902 Td [(T)90(ype:)]TJ +/F62 9.9626 Tf 26.012 0 Td [(the)-250(storage)-250(format)-250(for)-250(the)-250(sparse)-250(matrix.)]TJ -1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(array)-250(of)-250(characters.)-310(Defalt:)-310('CSR'.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F59 9.9626 Tf -24.906 -20.42 Td [(upd)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.91 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 23.243 0 Td [(Pr)18(ovide)-250(for)-250(updates)-250(to)-250(the)-250(matrix)-250(coef)18(\002cients.)]TJ 1.663 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(integer)74(,)-250(possible)-250(values:)]TJ/F67 9.9626 Tf 165.219 0 Td [(psb_upd_srch_)]TJ/F62 9.9626 Tf 67.994 0 Td [(,)]TJ/F67 9.9626 Tf 4.981 0 Td [(psb_upd_perm_)]TJ 0 g 0 G +/F59 9.9626 Tf -263.1 -20.421 Td [(mold)]TJ 0 g 0 G - 0 -20.909 Td [(loc)]TJ +/F62 9.9626 Tf 28.782 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(matrix)-250(storage.)]TJ -3.876 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(a)-250(class)-250(derived)-250(fr)18(om)]TJ/F67 9.9626 Tf 201.393 0 Td [(psb)]TJ ET q -1 0 0 1 164.583 350.626 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 393.323 371.449 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 167.571 350.427 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.664 0 Td [(g)-25(l)-55(o)-35(b)]TJ +/F67 9.9626 Tf 396.461 371.249 Td [(T)]TJ ET q -1 0 0 1 371.853 350.626 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 402.319 371.449 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F52 9.9626 Tf 375.135 350.427 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(.)]TJ -204.73 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ +/F67 9.9626 Tf 405.457 371.249 Td [(base)]TJ ET q -1 0 0 1 436.673 302.805 cm +1 0 0 1 427.006 371.449 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 439.811 302.606 Td [(T)]TJ +/F67 9.9626 Tf 430.144 371.249 Td [(sparse)]TJ ET q -1 0 0 1 445.669 302.805 cm +1 0 0 1 462.154 371.449 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 448.807 302.606 Td [(vect)]TJ +/F67 9.9626 Tf 465.292 371.249 Td [(mat)]TJ/F62 9.9626 Tf 15.691 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -330.278 -22.289 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20.421 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(the)-250(matrix)-250(to)-250(be)-250(assembled.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf -28.344 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf -24 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 470.356 302.805 cm +1 0 0 1 360.068 280.918 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 473.495 302.606 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf -297.884 -11.955 Td [(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(19)]TJ +/F67 9.9626 Tf 363.206 280.719 Td [(Tspmat)]TJ +ET +q +1 0 0 1 395.216 280.918 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 398.354 280.719 Td [(type)]TJ 0 g 0 G - [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -20.91 Td [(desc)]TJ +/F59 9.9626 Tf -268.57 -20.421 Td [(desc)]TJ ET q -1 0 0 1 171.218 269.941 cm +1 0 0 1 171.218 260.497 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 269.741 Td [(a)]TJ +/F59 9.9626 Tf 174.207 260.298 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in/out)]TJ/F62 9.9626 Tf 27.297 0 Td [(.)]TJ -59.098 -11.956 Td [(Speci\002ed)-290(as:)-389(a)-290(str)8(uctur)18(ed)-290(data)-289(of)-290(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 171.305 0 Td [(psb)]TJ ET q -1 0 0 1 360.068 222.12 cm +1 0 0 1 363.235 212.677 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 363.206 221.921 Td [(desc)]TJ +/F67 9.9626 Tf 366.373 212.477 Td [(desc)]TJ ET q -1 0 0 1 384.755 222.12 cm +1 0 0 1 387.922 212.677 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 387.893 221.921 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F67 9.9626 Tf 391.06 212.477 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -20.91 Td [(root)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)-429(If)-290(the)-290(matrix)-289(was)]TJ -236.371 -11.955 Td [(allocated)-209(with)]TJ/F67 9.9626 Tf 64.153 0 Td [(bldmode=psb_matbld_remote_)]TJ/F62 9.9626 Tf 135.988 0 Td [(,)-217(then)-210(the)-209(descriptor)-209(will)-209(be)]TJ -200.141 -11.955 Td [(r)18(eassembled.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.252 0 Td [(The)-253(pr)18(ocess)-254(that)-253(holds)-253(the)-253(global)-254(copy)111(.)-319(If)]TJ/F52 9.9626 Tf 182.635 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F85 10.3811 Tf 19.983 0 Td [(=)]TJ/F83 10.3811 Tf 11.147 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)-253(all)-254(t)1(he)-254(pr)18(ocesses)-253(will)]TJ -220.305 -11.955 Td [(have)-250(a)-250(copy)-250(of)-250(the)-250(global)-250(vector)74(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable)]TJ/F83 10.3811 Tf 142.42 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)]TJ/F83 10.3811 Tf 7.873 0 Td [(\024)]TJ/F52 9.9626 Tf 10.986 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F83 10.3811 Tf 19.923 0 Td [(\024)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1,)-250(default)]TJ/F83 10.3811 Tf 43.89 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1.)]TJ +/F59 9.9626 Tf -24.906 -20.421 Td [(info)]TJ 0 g 0 G -/F51 9.9626 Tf -301.107 -20.909 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G -0 g 0 G -/F54 9.9626 Tf 166.874 -29.888 Td [(66)]TJ + 141.968 -29.888 Td [(86)]TJ 0 g 0 G ET endstream endobj -1328 0 obj +1567 0 obj << -/Length 1417 +/Length 3146 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(glob)]TJ -ET -q -1 0 0 1 120.976 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.965 706.129 Td [(x)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(array)-250(wher)18(e)-250(the)-250(local)-250(parts)-250(must)-250(be)-250(gather)18(ed.)]TJ -9.126 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(or)-250(two)-250(array)-250(with)-250(the)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 202.459 0 Td [(ALLOCATABLE)]TJ +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(On)-226(entry)-227(to)-226(this)-227(r)18(outine)-226(the)-227(descriptor)-226(must)-227(be)-226(in)-227(the)-226(assembled)-227(state,)-231(i.e.)]TJ/F67 9.9626 Tf 12.453 -11.956 Td [(psb_cdasb)]TJ/F62 9.9626 Tf 49.564 0 Td [(must)-250(alr)18(eady)-250(have)-250(been)-250(called.)]TJ 0 g 0 G -/F54 9.9626 Tf 60.024 0 Td [(attribute.)]TJ + -62.017 -19.925 Td [(2.)]TJ 0 g 0 G -/F51 9.9626 Tf -287.39 -19.925 Td [(info)]TJ + [-500(The)-250(sparse)-250(matrix)-250(may)-250(be)-250(in)-250(either)-250(the)-250(build)-250(or)-250(update)-250(state;)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + 0 -19.925 Td [(3.)]TJ 0 g 0 G - 141.968 -500.124 Td [(67)]TJ + [-500(Duplicate)-421(entries)-422(ar)18(e)-421(detected)-421(and)-422(handled)-421(in)-421(both)-421(build)-422(and)-421(update)]TJ 12.453 -11.955 Td [(state,)-244(wit)1(h)-242(the)-242(exception)-242(of)-242(the)-242(err)18(or)-242(action)-242(that)-242(is)-242(only)-241(taken)-242(in)-242(the)-242(build)]TJ 0 -11.955 Td [(state,)-250(i.e.)-310(on)-250(the)-250(\002rst)-250(assembly;)]TJ +0 g 0 G + -12.453 -19.926 Td [(4.)]TJ +0 g 0 G + [-500(If)-190(the)-190(update)-190(choice)-190(is)]TJ/F67 9.9626 Tf 108.372 0 Td [(psb_upd_perm_)]TJ/F62 9.9626 Tf 67.995 0 Td [(,)-202(then)-190(subsequent)-190(calls)-190(to)]TJ/F67 9.9626 Tf 109.946 0 Td [(psb_spins)]TJ/F62 9.9626 Tf -273.86 -11.955 Td [(to)-309(update)-309(the)-308(matrix)-309(must)-309(be)-309(arranged)-309(in)-308(such)-309(a)-309(way)-309(as)-309(to)-308(pr)18(oduce)-309(ex-)]TJ 0 -11.955 Td [(actly)-319(the)-320(same)-319(sequence)-320(of)-319(coef)18(\002cient)-319(values)-320(as)-319(encounter)18(ed)-319(at)-320(the)-319(\002rst)]TJ 0 -11.955 Td [(assembly;)]TJ +0 g 0 G + -12.453 -19.926 Td [(5.)]TJ +0 g 0 G + [-500(The)-250(output)-250(storage)-250(format)-250(need)-250(not)-250(be)-250(the)-250(same)-250(on)-250(all)-250(pr)18(ocesses;)]TJ +0 g 0 G + 0 -19.925 Td [(6.)]TJ +0 g 0 G + [-500(On)-249(exit)-249(fr)18(om)-250(this)-249(r)18(outine)-249(the)-249(matrix)-249(is)-250(in)-249(the)-249(assembled)-249(state,)-250(and)-249(thus)-249(is)]TJ 12.453 -11.955 Td [(suitable)-250(for)-250(the)-250(computational)-250(r)18(outines;)]TJ +0 g 0 G + -12.453 -19.925 Td [(7.)]TJ +0 g 0 G + [-500(If)-431(the)]TJ/F67 9.9626 Tf 41.543 0 Td [(bldmode=psb_matbld_remote_)]TJ/F62 9.9626 Tf 140.288 0 Td [(value)-431(was)-432(speci\002ed)-431(at)-432(allocation)]TJ -169.378 -11.955 Td [(time,)-278(contributions)-272(de\002ned)-272(on)-272(the)-273(curr)18(ent)-272(pr)18(ocess)-272(but)-272(belonging)-273(to)-272(a)-272(r)18(e-)]TJ 0 -11.956 Td [(mote)-267(pr)18(ocess)-266(will)-267(be)-267(handled)-267(accor)18(dingly)111(.)-360(This)-267(is)-266(most)-267(likely)-267(to)-266(occur)-267(in)]TJ 0 -11.955 Td [(\002nite)-288(element)-288(applications,)-297(with)]TJ/F67 9.9626 Tf 145.88 0 Td [(dupl=psb_dupl_add_)]TJ/F62 9.9626 Tf 94.147 0 Td [(;)-307(it)-288(is)-287(necessary)-288(to)]TJ -240.027 -11.955 Td [(check)-236(for)-235(possible)-236(updates)-236(needed)-235(in)-236(the)-236(descriptor)74(,)-238(hence)-236(ther)18(e)-236(will)-235(be)-236(a)]TJ 0 -11.955 Td [(r)8(untime)-250(over)18(head.)]TJ +0 g 0 G + 141.968 -332.752 Td [(87)]TJ 0 g 0 G ET endstream endobj -1335 0 obj +1580 0 obj << -/Length 7178 +/Length 2987 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(5.4)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(6.10)-1000(psb)]TJ ET q -1 0 0 1 198.238 706.328 cm +1 0 0 1 204.216 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(scatter)-250(\227)-250(Scatter)-250(Global)-250(Dense)-250(Matrix)]TJ/F54 9.9626 Tf -51.12 -20.363 Td [(These)-223(subr)18(outines)-223(scatters)-224(the)-223(portions)-223(of)-224(global)-223(dense)-223(matrix)-223(owned)-224(by)-223(a)-223(pr)18(o-)]TJ 0 -11.955 Td [(cess)-250(to)-250(all)-250(the)-250(pr)18(ocesses)-250(in)-250(the)-250(pr)18(ocesses)-250(grid.)]TJ/F52 9.9626 Tf 119.478 -26.893 Td [(l)-55(o)-35(c)]TJ -ET -q -1 0 0 1 283.05 647.117 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F52 9.9626 Tf 286.333 646.918 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F83 10.3811 Tf 5.642 1.96 Td [(\040)]TJ/F52 9.9626 Tf 13.398 0 Td [(s)-25(c)-40(a)-25(t)-25(t)-25(e)-15(r)]TJ/F85 10.3811 Tf 28.632 0 Td [(\050)]TJ/F52 9.9626 Tf 4.493 0 Td [(g)-25(l)-55(o)-35(b)]TJ -ET -q -1 0 0 1 362.3 647.117 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F52 9.9626 Tf 365.583 646.918 Td [(x)]TJ/F85 10.3811 Tf 5.329 0 Td [(\051)]TJ/F54 9.9626 Tf -220.207 -23.362 Td [(wher)18(e:)]TJ -0 g 0 G -/F52 9.9626 Tf 0.344 -22.091 Td [(g)-25(l)-55(o)-35(b)]TJ -ET -q -1 0 0 1 169.703 601.664 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F52 9.9626 Tf 172.986 601.465 Td [(x)]TJ +/F59 11.9552 Tf 207.803 706.129 Td [(spfree)-250(\227)-250(Frees)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G -/F54 9.9626 Tf 10.187 0 Td [(is)-250(the)-250(global)-250(matrix)]TJ/F52 9.9626 Tf 87.515 0 Td [(g)-25(l)-55(o)-35(b)]TJ -ET -q -1 0 0 1 289.343 601.664 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F52 9.9626 Tf 292.626 601.465 Td [(x)]TJ/F54 7.5716 Tf 5.105 -1.858 Td [(1)-13(:)]TJ/F52 7.5716 Tf 5.963 0 Td [(m)]TJ/F54 7.5716 Tf 5.985 0 Td [(,1)-13(:)]TJ/F52 7.5716 Tf 7.856 0 Td [(n)]TJ 0 g 0 G -/F52 9.9626 Tf -166.706 -20.955 Td [(l)-55(o)-35(c)]TJ -ET -q -1 0 0 1 163.696 578.851 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F52 9.9626 Tf 166.979 578.652 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ +/F67 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_spfree\050a,)-525(desc_a,)-525(info\051)]TJ 0 g 0 G -/F54 9.9626 Tf 7.732 1.96 Td [(is)-250(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)-250(on)-250(pr)18(ocess)]TJ/F52 9.9626 Tf 234.034 0 Td [(i)]TJ/F54 9.9626 Tf 2.964 0 Td [(.)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F52 9.9626 Tf -266.027 -22.813 Td [(s)-25(c)-40(a)-25(t)-25(t)-25(e)-15(r)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 33.489 0 Td [(is)-250(the)-250(scatter)-250(function.)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G + 0 -19.925 Td [(a)]TJ 0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(matrix)-250(to)-250(be)-250(fr)18(eed.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf -28.343 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(required)]TJ/F62 9.9626 Tf -23.999 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.137 0 Td [(psb)]TJ ET q -1 0 0 1 230.392 532.667 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -BT -/F52 9.9626 Tf 236.663 524.099 Td [(x)]TJ/F52 7.5716 Tf 5.148 -1.96 Td [(i)]TJ/F54 9.9626 Tf 2.75 1.96 Td [(,)]TJ/F52 9.9626 Tf 4.276 0 Td [(y)]TJ/F51 9.9626 Tf 108.448 0 Td [(Subroutine)]TJ -ET -q -1 0 0 1 230.392 520.313 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +1 0 0 1 360.068 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 236.369 511.745 Td [(Integer)-8983(psb)]TJ +/F67 9.9626 Tf 363.206 577.576 Td [(Tspmat)]TJ ET q -1 0 0 1 373.603 511.945 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 395.216 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 376.592 511.745 Td [(scatter)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F67 9.9626 Tf 398.354 577.576 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -268.57 -19.925 Td [(desc)]TJ ET q -1 0 0 1 373.603 499.989 cm +1 0 0 1 171.218 557.85 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 376.592 499.79 Td [(scatter)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F59 9.9626 Tf 174.207 557.651 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 373.603 488.034 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 360.068 510.029 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 376.592 487.835 Td [(scatter)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F67 9.9626 Tf 363.206 509.83 Td [(desc)]TJ ET q -1 0 0 1 373.603 476.079 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 384.755 510.029 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 376.592 475.88 Td [(scatter)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F67 9.9626 Tf 387.893 509.83 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 141.968 -329.728 Td [(88)]TJ +0 g 0 G ET -q -1 0 0 1 373.603 464.124 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + +endstream +endobj +1586 0 obj +<< +/Length 3858 +>> +stream +0 g 0 G +0 g 0 G BT -/F54 9.9626 Tf 376.592 463.925 Td [(scatter)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(6.11)-1000(psb)]TJ ET q -1 0 0 1 230.392 460.139 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 278.277 431.76 Td [(T)92(able)-250(20:)-310(Data)-250(types)]TJ +/F59 11.9552 Tf 156.993 706.129 Td [(sprn)-254(\227)-255(Reinit)-254(sparse)-255(matrix)-254(structure)-254(for)-255(psblas)-254(rou-)]TJ -24.221 -13.948 Td [(tines.)]TJ 0 g 0 G 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -112.628 -28.004 Td [(call)]TJ +/F67 9.9626 Tf -32.877 -18.964 Td [(call)-525(psb_sprn\050a,)-525(decsc_a,)-525(info,)-525(clear\051)]TJ 0 g 0 G - [-525(psb_scatter\050glob_x,)-525(loc_x,)-525(desc_a,)-525(info,)-525(root,)-525(mold\051)]TJ +/F59 9.9626 Tf 0 -21.917 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -14.944 -24.806 Td [(T)90(ype:)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F59 9.9626 Tf -29.828 -19.926 Td [(On)-250(Entry)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -22.813 Td [(On)-250(Entry)]TJ 0 g 0 G + 0 -19.925 Td [(a)]TJ 0 g 0 G - 0 -22.813 Td [(glob)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(matrix)-250(to)-250(be)-250(r)18(einitialized.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf -28.343 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf -24 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 171.786 333.523 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 309.258 563.828 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 174.774 333.324 Td [(x)]TJ +/F67 9.9626 Tf 312.397 563.628 Td [(Tspmat)]TJ +ET +q +1 0 0 1 344.406 563.828 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 347.544 563.628 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(array)-250(that)-250(must)-250(be)-250(scatter)18(ed)-250(into)-250(local)-250(pieces.)]TJ -9.126 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(or)-250(two)-250(array)111(.)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -22.814 Td [(desc)]TJ +/F59 9.9626 Tf -268.571 -19.925 Td [(desc)]TJ ET q -1 0 0 1 171.218 262.89 cm +1 0 0 1 120.408 543.902 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 262.69 Td [(a)]TJ +/F59 9.9626 Tf 123.397 543.703 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 360.068 215.069 cm +1 0 0 1 309.258 496.082 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 363.206 214.87 Td [(desc)]TJ +/F67 9.9626 Tf 312.397 495.882 Td [(desc)]TJ ET q -1 0 0 1 384.755 215.069 cm +1 0 0 1 333.945 496.082 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 387.893 214.87 Td [(type)]TJ +/F67 9.9626 Tf 337.084 495.882 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -22.813 Td [(root)]TJ +/F59 9.9626 Tf -258.11 -19.925 Td [(clear)]TJ 0 g 0 G -/F54 9.9626 Tf 23.252 0 Td [(The)-218(pr)18(ocess)-218(that)-218(holds)-219(t)1(he)-219(global)-218(copy)111(.)-299(If)]TJ/F52 9.9626 Tf 179.982 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F85 10.3811 Tf 19.922 0 Td [(=)]TJ/F83 10.3811 Tf 11.086 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)-218(all)-218(the)-218(pr)18(ocesses)-219(have)]TJ -217.53 -11.956 Td [(a)-250(copy)-250(of)-250(the)-250(global)-250(vector)74(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-258(as:)-327(an)-258(integer)-259(variable)]TJ/F83 10.3811 Tf 142.917 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)]TJ/F83 10.3811 Tf 8.027 0 Td [(\024)]TJ/F52 9.9626 Tf 11.139 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F83 10.3811 Tf 20.077 0 Td [(\024)]TJ/F52 9.9626 Tf 11.239 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.534 0 Td [(\000)]TJ/F54 9.9626 Tf 10.162 0 Td [(1,)-260(default)]TJ/F59 9.9626 Tf 43.952 0 Td [(psb_root_)]TJ/F54 9.9626 Tf 47.073 0 Td [(,)]TJ -316.314 -11.955 Td [(i.e.)-310(pr)18(ocess)-250(0.)]TJ +/F62 9.9626 Tf 26.561 0 Td [(Choose)-250(whether)-250(to)-250(zer)18(o)-250(out)-250(matrix)-250(coef)18(\002cients)]TJ -1.654 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Default:)-310(tr)8(ue.)]TJ 0 g 0 G - 141.968 -29.888 Td [(68)]TJ +/F59 9.9626 Tf -24.907 -21.917 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(On)-250(exit)-250(fr)18(om)-250(this)-250(r)18(outine)-250(the)-250(sparse)-250(matrix)-250(is)-250(in)-250(the)-250(update)-250(state.)]TJ +0 g 0 G + 154.421 -206.192 Td [(89)]TJ 0 g 0 G ET endstream endobj -1342 0 obj +1593 0 obj << -/Length 3984 +/Length 6166 >> stream 0 g 0 G 0 g 0 G -0 g 0 G -BT -/F51 9.9626 Tf 99.895 706.129 Td [(mold)]TJ -0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(vector)-250(storage.)]TJ -3.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-223(as:)-296(an)-223(object)-223(of)-222(a)-223(class)-223(derived)-223(fr)18(om)]TJ/F59 9.9626 Tf 199.086 0 Td [(psb)]TJ -ET -q -1 0 0 1 340.207 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 343.345 658.308 Td [(T)]TJ -ET -q -1 0 0 1 349.203 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 352.341 658.308 Td [(base)]TJ -ET -q -1 0 0 1 373.89 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q BT -/F59 9.9626 Tf 377.028 658.308 Td [(vect)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(6.12)-1000(psb)]TJ ET q -1 0 0 1 398.577 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 401.716 658.308 Td [(type)]TJ/F54 9.9626 Tf 20.921 0 Td [(;)-232(this)]TJ -297.835 -11.955 Td [(is)-250(only)-250(allowed)-250(when)-250(loc)]TJ +/F59 11.9552 Tf 207.803 706.129 Td [(geall)-250(\227)-250(Allocates)-250(a)-250(dense)-250(matrix)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_geall\050x,)-525(desc_a,)-525(info[,)-525(dupl,)-525(bldmode,)-525(n,)-525(lb]\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -19.627 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.01 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.009 Td [(desc)]TJ ET q -1 0 0 1 234.988 646.552 cm +1 0 0 1 171.218 629.719 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 237.976 646.353 Td [(x)-250(is)-250(of)-250(type)]TJ +/F59 9.9626 Tf 174.207 629.519 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 50.53 0 Td [(psb)]TJ -ET -q -1 0 0 1 304.825 646.552 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 307.963 646.353 Td [(T)]TJ +/F67 9.9626 Tf 136.328 0 Td [(psb)]TJ ET q -1 0 0 1 313.821 646.552 cm +1 0 0 1 328.257 581.898 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 316.959 646.353 Td [(vect)]TJ +/F67 9.9626 Tf 331.395 581.699 Td [(desc)]TJ ET q -1 0 0 1 338.508 646.552 cm +1 0 0 1 352.944 581.898 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 341.646 646.353 Td [(type)]TJ +/F67 9.9626 Tf 356.083 581.699 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -262.673 -19.925 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -226.299 -30.965 Td [(n)]TJ 0 g 0 G +/F62 9.9626 Tf 11.068 0 Td [(The)-250(number)-250(of)-250(columns)-250(of)-250(the)-250(dense)-250(matrix)-250(to)-250(be)-250(allocated.)]TJ 13.838 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-297(as:)-404(Integer)-297(scalar)74(,)-309(default)-297(1.)-450(It)-297(is)-297(not)-297(a)-297(valid)-297(ar)18(gument)-297(if)]TJ/F60 9.9626 Tf 295.578 0 Td [(x)]TJ/F62 9.9626 Tf 8.164 0 Td [(is)-297(a)]TJ -303.742 -11.956 Td [(rank-1)-250(array)111(.)]TJ 0 g 0 G - 0 -19.926 Td [(loc)]TJ -ET -q -1 0 0 1 113.773 606.702 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 116.762 606.502 Td [(x)]TJ +/F59 9.9626 Tf -24.906 -19.009 Td [(lb)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.664 0 Td [(g)-25(l)-55(o)-35(b)]TJ -ET -q -1 0 0 1 321.043 606.702 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F52 9.9626 Tf 324.326 606.502 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -204.729 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-214(as:)-292(a)-215(rank)-214(one)-214(or)-214(two)-215(ALLOCA)74(T)74(ABLE)-214(array)-214(or)-214(an)-215(object)-214(of)-214(type)]TJ +/F62 9.9626 Tf 14.386 0 Td [(The)-237(lower)-238(bound)-237(for)-238(the)-237(column)-238(index)-237(range)-237(of)-238(the)-237(dense)-238(matrix)-237(to)-238(be)-237(allo-)]TJ 10.52 -11.955 Td [(cated.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-297(as:)-404(Integer)-297(scalar)74(,)-309(default)-297(1.)-450(It)-297(is)-297(not)-297(a)-297(valid)-297(ar)18(gument)-297(if)]TJ/F60 9.9626 Tf 295.578 0 Td [(x)]TJ/F62 9.9626 Tf 8.164 0 Td [(is)-297(a)]TJ -303.742 -11.955 Td [(rank-1)-250(array)111(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.009 Td [(dupl)]TJ +0 g 0 G +/F62 9.9626 Tf 26.56 0 Td [(How)-250(to)-250(handle)-250(duplicate)-250(coef)18(\002cients.)]TJ -1.654 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-243(as:)-306(integer)74(,)-244(possible)-243(values:)]TJ/F67 9.9626 Tf 164.942 0 Td [(psb_dupl_ovwrt_)]TJ/F62 9.9626 Tf 78.455 0 Td [(,)]TJ/F67 9.9626 Tf 4.923 0 Td [(psb_dupl_add_)]TJ/F62 9.9626 Tf 67.994 0 Td [(;)]TJ/F67 9.9626 Tf -316.314 -11.955 Td [(psb_dupl_err_)]TJ/F62 9.9626 Tf 70.485 0 Td [(has)-250(no)-250(ef)18(fect.)]TJ +0 g 0 G +/F59 9.9626 Tf -95.391 -19.009 Td [(bldmode)]TJ +0 g 0 G +/F62 9.9626 Tf 45.937 0 Td [(Whether)-372(to)-372(keep)-371(track)-372(of)-372(matrix)-372(entries)-371(that)-372(do)-372(not)-372(belong)-371(to)-372(the)]TJ -21.031 -11.955 Td [(curr)18(ent)-250(pr)18(ocess.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-190(as:)-280(an)-190(integer)-190(value)]TJ/F67 9.9626 Tf 128.288 0 Td [(psb_matbld_noremote_)]TJ/F62 9.9626 Tf 104.607 0 Td [(,)]TJ/F67 9.9626 Tf 4.503 0 Td [(psb_matbld_remote_)]TJ/F62 9.9626 Tf 94.146 0 Td [(.)]TJ -331.544 -11.955 Td [(Default:)]TJ/F67 9.9626 Tf 38.515 0 Td [(psb_matbld_noremote_)]TJ/F62 9.9626 Tf 104.607 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -168.028 -19.627 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.009 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(The)-250(dense)-250(matrix)-250(to)-250(be)-250(allocated.)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.943 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-273(as:)-357(a)-273(rank)-274(one)-273(or)-274(two)-273(array)-273(with)-274(the)-273(ALLOCA)74(T)74(ABLE)-273(attribute)]TJ 0 -11.955 Td [(or)-250(an)-250(object)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 0 -11.955 Td [(psb)]TJ +/F67 9.9626 Tf 86.634 0 Td [(psb)]TJ ET q -1 0 0 1 141.121 546.926 cm +1 0 0 1 278.564 132.48 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 144.259 546.727 Td [(T)]TJ +/F67 9.9626 Tf 281.702 132.281 Td [(T)]TJ ET q -1 0 0 1 150.117 546.926 cm +1 0 0 1 287.56 132.48 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 153.255 546.727 Td [(vect)]TJ +/F67 9.9626 Tf 290.699 132.281 Td [(vect)]TJ ET q -1 0 0 1 174.804 546.926 cm +1 0 0 1 312.247 132.48 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 177.942 546.727 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 23.412 0 Td [(containing)-250(numbers)-250(of)-250(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(20)]TJ +/F67 9.9626 Tf 315.386 132.281 Td [(type)]TJ 0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -101.459 -19.926 Td [(info)]TJ +/F62 9.9626 Tf 20.921 0 Td [(,)-250(of)-250(type)-250(r)18(eal,)-250(complex)-250(or)-250(integer)74(.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ -0 g 0 G - 141.968 -388.543 Td [(69)]TJ + -18.728 -41.843 Td [(90)]TJ 0 g 0 G ET endstream endobj -1346 0 obj +1597 0 obj << -/Length 6319 +/Length 925 >> stream 0 g 0 G 0 g 0 G -BT -/F51 14.3462 Tf 150.705 706.042 Td [(6)-1000(Data)-250(management)-250(routines)]TJ/F51 11.9552 Tf 0 -24.694 Td [(6.1)-1000(psb)]TJ -ET -q -1 0 0 1 198.238 681.547 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 201.825 681.348 Td [(cdall)-250(\227)-250(Allocates)-250(a)-250(communication)-250(descriptor)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,mg=mg,parts=parts\051)]TJ 0 -11.955 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,vg=vg,[mg=mg,flag=flag]\051)]TJ 0 -11.955 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,vl=vl,[nl=nl,globalcheck=.false.,lidx=lidx]\051)]TJ 0 -11.955 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,nl=nl\051)]TJ 0 -11.956 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,mg=mg,repl=.true.\051)]TJ/F54 9.9626 Tf 14.944 -19.771 Td [(This)-377(subr)18(outine)-378(initializes)-377(the)-378(communication)-377(descriptor)-378(associated)-377(with)]TJ -14.944 -11.956 Td [(an)-271(index)-271(space.)-373(One)-272(o)1(f)-272(the)-271(optional)-271(ar)18(guments)]TJ/F59 9.9626 Tf 209.77 0 Td [(parts)]TJ/F54 9.9626 Tf 26.152 0 Td [(,)]TJ/F59 9.9626 Tf 5.244 0 Td [(vg)]TJ/F54 9.9626 Tf 10.461 0 Td [(,)]TJ/F59 9.9626 Tf 5.244 0 Td [(vl)]TJ/F54 9.9626 Tf 10.461 0 Td [(,)]TJ/F59 9.9626 Tf 5.244 0 Td [(nl)]TJ/F54 9.9626 Tf 13.161 0 Td [(or)]TJ/F59 9.9626 Tf 12.076 0 Td [(repl)]TJ/F54 9.9626 Tf 23.622 0 Td [(must)]TJ -321.435 -11.955 Td [(be)-250(speci\002ed,)-250(ther)18(eby)-250(choosing)-250(the)-250(speci\002c)-250(initialization)-250(strategy)111(.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -18.208 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.067 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.067 Td [(icontxt)]TJ -0 g 0 G -/F54 9.9626 Tf 35.965 0 Td [(the)-250(communication)-250(context.)]TJ -11.058 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.066 Td [(vg)]TJ -0 g 0 G -/F54 9.9626 Tf 16.06 0 Td [(Data)-250(allocation:)-310(each)-250(index)]TJ/F52 9.9626 Tf 121.707 0 Td [(i)]TJ/F83 10.3811 Tf 5.856 0 Td [(2)-290(f)]TJ/F54 9.9626 Tf 15.245 0 Td [(1)-179(.)-192(.)-191(.)]TJ/F52 9.9626 Tf 19.967 0 Td [(m)-47(g)]TJ/F83 10.3811 Tf 13.449 0 Td [(g)]TJ/F54 9.9626 Tf 7.806 0 Td [(is)-250(allocated)-250(to)-250(pr)18(ocess)]TJ/F52 9.9626 Tf 98.454 0 Td [(v)-47(g)]TJ/F85 10.3811 Tf 10.68 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.089 0 Td [(\051)]TJ/F54 9.9626 Tf 4.149 0 Td [(.)]TJ -295.759 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.067 Td [(\003ag)]TJ -0 g 0 G -/F54 9.9626 Tf 21.589 0 Td [(Speci\002es)-250(whether)-250(entries)-250(in)]TJ/F52 9.9626 Tf 123.401 0 Td [(v)-47(g)]TJ/F54 9.9626 Tf 13.046 0 Td [(ar)18(e)-250(zer)18(o-)-250(or)-250(one-based.)]TJ -133.129 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0,)-167(1,)-250(default)-250(0.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.067 Td [(mg)]TJ 0 g 0 G -/F54 9.9626 Tf 19.377 0 Td [(the)-250(\050global\051)-250(number)-250(of)-250(r)18(ows)-250(of)-250(the)-250(pr)18(oblem.)]TJ 5.53 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-262(as:)-335(an)-263(integer)-262(value.)-348(It)-262(is)-262(r)18(equir)18(ed)-263(if)]TJ/F59 9.9626 Tf 203.091 0 Td [(parts)]TJ/F54 9.9626 Tf 28.766 0 Td [(or)]TJ/F59 9.9626 Tf 11.99 0 Td [(repl)]TJ/F54 9.9626 Tf 23.536 0 Td [(is)-262(speci\002ed,)]TJ -267.383 -11.955 Td [(it)-250(is)-250(optional)-250(if)]TJ/F59 9.9626 Tf 66.141 0 Td [(vg)]TJ/F54 9.9626 Tf 12.951 0 Td [(is)-250(speci\002ed.)]TJ -0 g 0 G -/F51 9.9626 Tf -103.999 -19.067 Td [(parts)]TJ +BT +/F59 9.9626 Tf 99.895 706.129 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 27.666 0 Td [(the)-250(subr)18(outine)-250(that)-250(de\002nes)-250(the)-250(partitioning)-250(scheme.)]TJ -2.759 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(subr)18(outine.)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.067 Td [(vl)]TJ +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ 0 g 0 G -/F54 9.9626 Tf 13.838 0 Td [(Data)-293(allocation:)-395(the)-293(set)-292(of)-293(global)-293(i)1(ndices)]TJ/F52 9.9626 Tf 181.166 0 Td [(v)-25(l)]TJ/F85 10.3811 Tf 8.548 0 Td [(\050)]TJ/F54 9.9626 Tf 4.149 0 Td [(1)-369(:)]TJ/F52 9.9626 Tf 14.955 0 Td [(n)-25(l)]TJ/F85 10.3811 Tf 9.105 0 Td [(\051)]TJ/F54 9.9626 Tf 7.065 0 Td [(belonging)-293(to)-292(the)-293(calling)]TJ -213.919 -11.955 Td [(pr)18(ocess.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)]TJ + [-500(Using)]TJ/F67 9.9626 Tf 41.798 0 Td [(psb_matbld_remote_)]TJ/F62 9.9626 Tf 97.28 0 Td [(is)-314(likely)-315(to)-315(cau)1(se)-315(a)-315(r)8(untime)-314(over)18(head)-315(at)-314(as-)]TJ -126.625 -11.955 Td [(sembly)-250(time;)]TJ 0 g 0 G - 141.967 -29.887 Td [(70)]TJ + 141.968 -514.072 Td [(91)]TJ 0 g 0 G ET endstream endobj -1233 0 obj +1492 0 obj << /Type /ObjStm /N 100 -/First 996 -/Length 12673 ->> -stream -280 0 1231 57 1227 115 1235 248 1225 396 1226 540 1237 687 1234 746 1244 840 1238 1015 -1239 1157 1240 1302 1241 1449 1242 1593 1246 1740 284 1798 1247 1855 1243 1913 1250 2046 1248 2185 -1252 2333 288 2392 1249 2450 1258 2531 1253 2688 1254 2832 1255 2979 1260 3126 292 3184 1261 3241 -1257 3299 1265 3433 1269 3581 1270 3708 1271 3751 1272 3958 1273 4196 1274 4472 1256 4708 1263 4855 -1267 5002 1268 5061 1264 5120 1278 5256 1280 5374 1277 5432 1286 5500 1282 5657 1283 5801 1284 5946 -1288 6093 296 6152 1289 6210 1285 6269 1295 6402 1290 6559 1292 6705 1293 6851 1297 6996 1298 7054 -1299 7112 1300 7170 1294 7228 1303 7335 1305 7453 1302 7512 1307 7580 1310 7698 1311 7825 1312 7868 -1313 8075 1314 8313 1315 8589 1309 8825 1301 8883 1306 8941 1322 9038 1318 9195 1319 9336 1320 9483 -1324 9630 300 9689 1325 9747 1321 9806 1327 9939 1329 10057 1326 10115 1334 10209 1331 10348 1336 10495 -304 10554 1337 10612 1333 10671 1341 10804 1332 10961 1338 11104 1339 11247 1343 11394 1340 11452 1345 11559 -% 280 0 obj -<< -/D [1228 0 R /XYZ 99.895 716.092 null] ->> -% 1231 0 obj -<< -/D [1228 0 R /XYZ 99.895 560.161 null] +/First 979 +/Length 10305 >> -% 1227 0 obj +stream +1487 0 1496 94 1493 242 1494 387 1498 534 324 592 1495 649 1501 743 1499 882 1503 1027 +328 1086 1500 1144 1507 1238 1504 1386 1505 1531 1509 1678 332 1736 1506 1793 1512 1913 1514 2031 +1515 2090 1516 2149 1511 2208 1520 2289 1517 2437 1518 2584 1522 2729 336 2787 1523 2844 1519 2902 +1525 2996 1527 3114 1528 3173 1529 3232 1530 3291 1524 3350 1533 3444 1535 3562 340 3620 1532 3677 +1539 3797 1531 3954 1536 4097 1537 4242 1541 4385 1542 4444 1543 4502 1544 4561 1545 4620 1546 4679 +1538 4738 1548 4858 1550 4976 1551 5034 1552 5092 1553 5150 1554 5208 1555 5266 1556 5324 1547 5382 +1562 5502 1558 5659 1559 5806 1560 5951 1564 6097 344 6156 1561 6214 1566 6308 1568 6426 1569 6484 +1570 6542 1571 6600 1572 6658 1573 6716 1574 6774 1575 6832 1565 6890 1579 6984 1576 7132 1577 7275 +1581 7422 348 7481 1578 7539 1585 7633 1582 7781 1583 7926 1587 8073 352 8131 1588 8188 1584 8246 +1592 8340 1589 8488 1590 8631 1594 8775 356 8834 1591 8892 1596 8999 1598 9117 1599 9175 1595 9232 +% 1487 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1235 0 obj +% 1496 0 obj << /Type /Page -/Contents 1236 0 R -/Resources 1234 0 R +/Contents 1497 0 R +/Resources 1495 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1232 0 R -/Annots [ 1225 0 R 1226 0 R ] +/Parent 1471 0 R +/Annots [ 1493 0 R 1494 0 R ] >> -% 1225 0 obj +% 1493 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [314.141 702.323 390.195 714.383] -/A << /S /GoTo /D (vdata) >> +/Rect [291.943 573.77 359.001 585.83] +/A << /S /GoTo /D (descdata) >> >> -% 1226 0 obj +% 1494 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [293.677 690.368 305.632 702.428] -/A << /S /GoTo /D (table.14) >> +/Rect [291.943 484.107 359.001 496.166] +/A << /S /GoTo /D (descdata) >> >> -% 1237 0 obj +% 1498 0 obj << -/D [1235 0 R /XYZ 149.705 753.953 null] +/D [1496 0 R /XYZ 98.895 753.953 null] >> -% 1234 0 obj +% 324 0 obj << -/Font << /F54 586 0 R /F59 812 0 R /F51 584 0 R >> +/D [1496 0 R /XYZ 99.895 716.092 null] +>> +% 1495 0 obj +<< +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1244 0 obj +% 1501 0 obj << /Type /Page -/Contents 1245 0 R -/Resources 1243 0 R +/Contents 1502 0 R +/Resources 1500 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1232 0 R -/Annots [ 1238 0 R 1239 0 R 1240 0 R 1241 0 R 1242 0 R ] ->> -% 1238 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [263.331 430.55 339.385 442.61] -/A << /S /GoTo /D (vdata) >> ->> -% 1239 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [224.557 418.595 231.53 430.655] -/A << /S /GoTo /D (table.2) >> +/Parent 1471 0 R +/Annots [ 1499 0 R ] >> -% 1240 0 obj +% 1499 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 352.283 323.106 364.342] +/Rect [342.753 573.77 409.811 585.83] /A << /S /GoTo /D (descdata) >> >> -% 1241 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [263.331 201.166 339.385 213.226] -/A << /S /GoTo /D (vdata) >> ->> -% 1242 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [242.868 189.211 254.823 201.271] -/A << /S /GoTo /D (table.16) >> ->> -% 1246 0 obj -<< -/D [1244 0 R /XYZ 98.895 753.953 null] ->> -% 284 0 obj +% 1503 0 obj << -/D [1244 0 R /XYZ 99.895 716.092 null] +/D [1501 0 R /XYZ 149.705 753.953 null] >> -% 1247 0 obj +% 328 0 obj << -/D [1244 0 R /XYZ 99.895 566.828 null] +/D [1501 0 R /XYZ 150.705 716.092 null] >> -% 1243 0 obj +% 1500 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1250 0 obj +% 1507 0 obj << /Type /Page -/Contents 1251 0 R -/Resources 1249 0 R +/Contents 1508 0 R +/Resources 1506 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1232 0 R -/Annots [ 1248 0 R ] +/Parent 1510 0 R +/Annots [ 1504 0 R 1505 0 R ] >> -% 1248 0 obj +% 1504 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [378.029 655.624 385.003 667.684] -/A << /S /GoTo /D (section.6) >> +/Rect [291.943 452.321 369.462 464.381] +/A << /S /GoTo /D (spdata) >> +>> +% 1505 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 209.193 359.001 221.252] +/A << /S /GoTo /D (descdata) >> >> -% 1252 0 obj +% 1509 0 obj << -/D [1250 0 R /XYZ 149.705 753.953 null] +/D [1507 0 R /XYZ 98.895 753.953 null] >> -% 288 0 obj +% 332 0 obj << -/D [1250 0 R /XYZ 150.705 716.092 null] +/D [1507 0 R /XYZ 99.895 716.092 null] >> -% 1249 0 obj +% 1506 0 obj << -/Font << /F51 584 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R >> /ProcSet [ /PDF /Text ] >> -% 1258 0 obj +% 1512 0 obj << /Type /Page -/Contents 1259 0 R -/Resources 1257 0 R +/Contents 1513 0 R +/Resources 1511 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1232 0 R -/Annots [ 1253 0 R 1254 0 R 1255 0 R ] +/Parent 1510 0 R >> -% 1253 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 344.818 444.603 356.877] -/A << /S /GoTo /D (vdata) >> ->> -% 1254 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [326.652 332.863 338.608 344.922] -/A << /S /GoTo /D (table.17) >> ->> -% 1255 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 264.733 359.001 276.793] -/A << /S /GoTo /D (descdata) >> ->> -% 1260 0 obj +% 1514 0 obj << -/D [1258 0 R /XYZ 98.895 753.953 null] +/D [1512 0 R /XYZ 149.705 753.953 null] >> -% 292 0 obj +% 1515 0 obj << -/D [1258 0 R /XYZ 99.895 716.092 null] +/D [1512 0 R /XYZ 150.705 716.092 null] >> -% 1261 0 obj +% 1516 0 obj << -/D [1258 0 R /XYZ 99.895 513.636 null] +/D [1512 0 R /XYZ 150.705 663.469 null] >> -% 1257 0 obj +% 1511 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F60 1027 0 R /F59 812 0 R >> +/Font << /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1265 0 obj +% 1520 0 obj << /Type /Page -/Contents 1266 0 R -/Resources 1264 0 R +/Contents 1521 0 R +/Resources 1519 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1232 0 R -/Annots [ 1256 0 R 1263 0 R ] +/Parent 1510 0 R +/Annots [ 1517 0 R 1518 0 R ] >> -% 1269 0 obj +% 1517 0 obj << -/Producer (GPL Ghostscript 9.22) -/CreationDate (D:20180323100645Z00'00') -/ModDate (D:20180323100645Z00'00') +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 571.679 359.001 583.739] +/A << /S /GoTo /D (descdata) >> >> -% 1270 0 obj +% 1518 0 obj << -/Type /ExtGState -/OPM 1 +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 227.887 369.462 239.946] +/A << /S /GoTo /D (spdata) >> >> -% 1271 0 obj +% 1522 0 obj << -/BaseFont /XYUGDR+Times-Roman -/FontDescriptor 1273 0 R -/Type /Font -/FirstChar 48 -/LastChar 57 -/Widths [ 500 500 500 500 500 500 500 500 500 500] -/Encoding /WinAnsiEncoding -/Subtype /Type1 +/D [1520 0 R /XYZ 98.895 753.953 null] >> -% 1272 0 obj +% 336 0 obj << -/BaseFont /XISTAL+Times-Bold -/FontDescriptor 1274 0 R -/Type /Font -/FirstChar 48 -/LastChar 80 -/Widths [ 500 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 611] -/Encoding /WinAnsiEncoding -/Subtype /Type1 +/D [1520 0 R /XYZ 99.895 716.092 null] >> -% 1273 0 obj +% 1523 0 obj << -/Type /FontDescriptor -/FontName /XYUGDR+Times-Roman -/FontBBox [ 0 -14 476 688] -/Flags 65568 -/Ascent 688 -/CapHeight 688 -/Descent -14 -/ItalicAngle 0 -/StemV 71 -/MissingWidth 250 -/CharSet (/eight/five/four/nine/one/seven/six/three/two/zero) -/FontFile3 1275 0 R +/D [1520 0 R /XYZ 99.895 136.374 null] >> -% 1274 0 obj +% 1519 0 obj << -/Type /FontDescriptor -/FontName /XISTAL+Times-Bold -/FontBBox [ 0 -13 600 688] -/Flags 65568 -/Ascent 688 -/CapHeight 676 -/Descent -13 -/ItalicAngle 0 -/StemV 90 -/MissingWidth 250 -/CharSet (/P/one/zero) -/FontFile3 1276 0 R +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1256 0 obj +% 1525 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [231.023 625.272 242.978 634.682] -/A << /S /GoTo /D (table.17) >> +/Type /Page +/Contents 1526 0 R +/Resources 1524 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1510 0 R >> -% 1263 0 obj +% 1527 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [458.157 276.439 465.131 290.202] -/A << /S /GoTo /D (figure.3) >> +/D [1525 0 R /XYZ 149.705 753.953 null] >> -% 1267 0 obj +% 1528 0 obj << -/D [1265 0 R /XYZ 149.705 753.953 null] +/D [1525 0 R /XYZ 150.705 716.092 null] >> -% 1268 0 obj +% 1529 0 obj << -/D [1265 0 R /XYZ 150.705 326.444 null] +/D [1525 0 R /XYZ 150.705 699.334 null] >> -% 1264 0 obj +% 1530 0 obj +<< +/D [1525 0 R /XYZ 150.705 644.819 null] +>> +% 1524 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F59 812 0 R >> -/XObject << /Im4 1262 0 R >> +/Font << /F62 667 0 R /F60 666 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1278 0 obj +% 1533 0 obj << /Type /Page -/Contents 1279 0 R -/Resources 1277 0 R +/Contents 1534 0 R +/Resources 1532 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1281 0 R +/Parent 1510 0 R >> -% 1280 0 obj +% 1535 0 obj << -/D [1278 0 R /XYZ 98.895 753.953 null] +/D [1533 0 R /XYZ 98.895 753.953 null] >> -% 1277 0 obj +% 340 0 obj +<< +/D [1533 0 R /XYZ 99.895 716.092 null] +>> +% 1532 0 obj << -/Font << /F54 586 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R /F93 915 0 R >> /ProcSet [ /PDF /Text ] >> -% 1286 0 obj +% 1539 0 obj << /Type /Page -/Contents 1287 0 R -/Resources 1285 0 R +/Contents 1540 0 R +/Resources 1538 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1281 0 R -/Annots [ 1282 0 R 1283 0 R 1284 0 R ] +/Parent 1510 0 R +/Annots [ 1531 0 R 1536 0 R 1537 0 R ] >> -% 1282 0 obj +% 1531 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 345.485 495.412 357.545] -/A << /S /GoTo /D (vdata) >> +/Rect [310.942 654.503 378 666.562] +/A << /S /GoTo /D (descdata) >> >> -% 1283 0 obj +% 1536 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [377.462 333.53 389.417 345.59] -/A << /S /GoTo /D (table.18) >> +/Rect [342.753 480.963 420.271 493.022] +/A << /S /GoTo /D (spdata) >> >> -% 1284 0 obj +% 1537 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 266.146 409.811 278.205] +/Rect [310.942 411.699 378 423.758] /A << /S /GoTo /D (descdata) >> >> -% 1288 0 obj +% 1541 0 obj << -/D [1286 0 R /XYZ 149.705 753.953 null] +/D [1539 0 R /XYZ 149.705 753.953 null] >> -% 296 0 obj +% 1542 0 obj << -/D [1286 0 R /XYZ 150.705 716.092 null] +/D [1539 0 R /XYZ 150.705 306.27 null] >> -% 1289 0 obj +% 1543 0 obj << -/D [1286 0 R /XYZ 150.705 510.975 null] +/D [1539 0 R /XYZ 150.705 272.927 null] >> -% 1285 0 obj +% 1544 0 obj +<< +/D [1539 0 R /XYZ 150.705 236.878 null] +>> +% 1545 0 obj +<< +/D [1539 0 R /XYZ 150.705 167.614 null] +>> +% 1546 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F59 812 0 R /F85 814 0 R >> +/D [1539 0 R /XYZ 150.705 146.171 null] +>> +% 1538 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F60 666 0 R /F93 915 0 R >> /ProcSet [ /PDF /Text ] >> -% 1295 0 obj +% 1548 0 obj << /Type /Page -/Contents 1296 0 R -/Resources 1294 0 R +/Contents 1549 0 R +/Resources 1547 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1281 0 R -/Annots [ 1290 0 R 1292 0 R 1293 0 R ] +/Parent 1557 0 R >> -% 1290 0 obj +% 1550 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [202.52 554.876 214.475 566.936] -/A << /S /GoTo /D (table.18) >> +/D [1548 0 R /XYZ 98.895 753.953 null] >> -% 1292 0 obj +% 1551 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [407.408 325.46 414.381 339.127] -/A << /S /GoTo /D (figure.4) >> +/D [1548 0 R /XYZ 99.895 716.092 null] >> -% 1293 0 obj +% 1552 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [309.226 301.825 316.2 313.885] -/A << /S /GoTo /D (figure.3) >> +/D [1548 0 R /XYZ 99.895 651.514 null] >> -% 1297 0 obj +% 1553 0 obj << -/D [1295 0 R /XYZ 98.895 753.953 null] +/D [1548 0 R /XYZ 99.895 607.678 null] >> -% 1298 0 obj +% 1554 0 obj << -/D [1295 0 R /XYZ 99.895 464.818 null] +/D [1548 0 R /XYZ 99.895 575.798 null] >> -% 1299 0 obj +% 1555 0 obj << -/D [1295 0 R /XYZ 99.895 430.343 null] +/D [1548 0 R /XYZ 99.895 520.007 null] >> -% 1300 0 obj +% 1556 0 obj << -/D [1295 0 R /XYZ 99.895 386.508 null] +/D [1548 0 R /XYZ 99.895 476.171 null] >> -% 1294 0 obj +% 1547 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F59 812 0 R >> +/Font << /F62 667 0 R /F60 666 0 R /F93 915 0 R /F91 914 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1303 0 obj +% 1562 0 obj << /Type /Page -/Contents 1304 0 R -/Resources 1302 0 R +/Contents 1563 0 R +/Resources 1561 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1281 0 R +/Parent 1557 0 R +/Annots [ 1558 0 R 1559 0 R 1560 0 R ] >> -% 1305 0 obj +% 1558 0 obj << -/D [1303 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 572.168 409.811 584.228] +/A << /S /GoTo /D (descdata) >> >> -% 1302 0 obj +% 1559 0 obj << -/Font << /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 276.913 420.271 288.973] +/A << /S /GoTo /D (spdata) >> >> -% 1307 0 obj +% 1560 0 obj << -/Type /Page -/Contents 1308 0 R -/Resources 1306 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1281 0 R +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [345.92 208.672 412.978 220.731] +/A << /S /GoTo /D (descdata) >> >> -% 1310 0 obj +% 1564 0 obj << -/Producer (GPL Ghostscript 9.22) -/CreationDate (D:20180323100658Z00'00') -/ModDate (D:20180323100658Z00'00') +/D [1562 0 R /XYZ 149.705 753.953 null] >> -% 1311 0 obj +% 344 0 obj << -/Type /ExtGState -/OPM 1 +/D [1562 0 R /XYZ 150.705 716.092 null] >> -% 1312 0 obj +% 1561 0 obj << -/BaseFont /XYUGDR+Times-Roman -/FontDescriptor 1314 0 R -/Type /Font -/FirstChar 48 -/LastChar 57 -/Widths [ 500 500 500 500 500 500 500 500 500 500] -/Encoding /WinAnsiEncoding -/Subtype /Type1 +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1313 0 obj +% 1566 0 obj << -/BaseFont /XISTAL+Times-Bold -/FontDescriptor 1315 0 R -/Type /Font -/FirstChar 48 -/LastChar 80 -/Widths [ 500 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 611] -/Encoding /WinAnsiEncoding -/Subtype /Type1 +/Type /Page +/Contents 1567 0 R +/Resources 1565 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1557 0 R >> -% 1314 0 obj +% 1568 0 obj << -/Type /FontDescriptor -/FontName /XYUGDR+Times-Roman -/FontBBox [ 0 -14 476 688] -/Flags 65568 -/Ascent 688 -/CapHeight 688 -/Descent -14 -/ItalicAngle 0 -/StemV 71 -/MissingWidth 250 -/CharSet (/eight/five/four/nine/one/seven/six/three/two/zero) -/FontFile3 1316 0 R +/D [1566 0 R /XYZ 98.895 753.953 null] >> -% 1315 0 obj +% 1569 0 obj << -/Type /FontDescriptor -/FontName /XISTAL+Times-Bold -/FontBBox [ 0 -13 600 688] -/Flags 65568 -/Ascent 688 -/CapHeight 676 -/Descent -13 -/ItalicAngle 0 -/StemV 90 -/MissingWidth 250 -/CharSet (/P/one/zero) -/FontFile3 1317 0 R +/D [1566 0 R /XYZ 99.895 701.929 null] +>> +% 1570 0 obj +<< +/D [1566 0 R /XYZ 99.895 667.454 null] +>> +% 1571 0 obj +<< +/D [1566 0 R /XYZ 99.895 647.529 null] +>> +% 1572 0 obj +<< +/D [1566 0 R /XYZ 99.895 603.693 null] >> -% 1309 0 obj +% 1573 0 obj << -/D [1307 0 R /XYZ 98.895 753.953 null] +/D [1566 0 R /XYZ 99.895 547.902 null] >> -% 1301 0 obj +% 1574 0 obj << -/D [1307 0 R /XYZ 99.895 282.918 null] +/D [1566 0 R /XYZ 99.895 527.977 null] >> -% 1306 0 obj +% 1575 0 obj << -/Font << /F54 586 0 R >> -/XObject << /Im5 1291 0 R >> +/D [1566 0 R /XYZ 99.895 496.097 null] +>> +% 1565 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1322 0 obj +% 1579 0 obj << /Type /Page -/Contents 1323 0 R -/Resources 1321 0 R +/Contents 1580 0 R +/Resources 1578 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1281 0 R -/Annots [ 1318 0 R 1319 0 R 1320 0 R ] ->> -% 1318 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 298.8 495.412 310.86] -/A << /S /GoTo /D (vdata) >> +/Parent 1557 0 R +/Annots [ 1576 0 R 1577 0 R ] >> -% 1319 0 obj +% 1576 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [255.331 289.495 267.287 298.905] -/A << /S /GoTo /D (table.19) >> +/Rect [342.753 573.77 420.271 585.83] +/A << /S /GoTo /D (spdata) >> >> -% 1320 0 obj +% 1577 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 218.115 409.811 230.175] +/Rect [342.753 506.024 409.811 518.084] /A << /S /GoTo /D (descdata) >> >> -% 1324 0 obj -<< -/D [1322 0 R /XYZ 149.705 753.953 null] ->> -% 300 0 obj +% 1581 0 obj << -/D [1322 0 R /XYZ 150.705 716.092 null] +/D [1579 0 R /XYZ 149.705 753.953 null] >> -% 1325 0 obj +% 348 0 obj << -/D [1322 0 R /XYZ 150.705 460.417 null] +/D [1579 0 R /XYZ 150.705 716.092 null] >> -% 1321 0 obj +% 1578 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1327 0 obj +% 1585 0 obj << /Type /Page -/Contents 1328 0 R -/Resources 1326 0 R +/Contents 1586 0 R +/Resources 1584 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1330 0 R +/Parent 1557 0 R +/Annots [ 1582 0 R 1583 0 R ] >> -% 1329 0 obj -<< -/D [1327 0 R /XYZ 98.895 753.953 null] ->> -% 1326 0 obj -<< -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 1334 0 obj +% 1582 0 obj << -/Type /Page -/Contents 1335 0 R -/Resources 1333 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1330 0 R -/Annots [ 1331 0 R ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 559.823 369.462 571.882] +/A << /S /GoTo /D (spdata) >> >> -% 1331 0 obj +% 1583 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 211.064 409.811 223.124] +/Rect [291.943 492.077 359.001 504.136] /A << /S /GoTo /D (descdata) >> >> -% 1336 0 obj +% 1587 0 obj << -/D [1334 0 R /XYZ 149.705 753.953 null] +/D [1585 0 R /XYZ 98.895 753.953 null] >> -% 304 0 obj +% 352 0 obj << -/D [1334 0 R /XYZ 150.705 716.092 null] +/D [1585 0 R /XYZ 99.895 716.092 null] >> -% 1337 0 obj +% 1588 0 obj << -/D [1334 0 R /XYZ 150.705 449.977 null] +/D [1585 0 R /XYZ 99.895 312.355 null] >> -% 1333 0 obj +% 1584 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1341 0 obj +% 1592 0 obj << /Type /Page -/Contents 1342 0 R -/Resources 1340 0 R +/Contents 1593 0 R +/Resources 1591 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1330 0 R -/Annots [ 1332 0 R 1338 0 R 1339 0 R ] +/Parent 1557 0 R +/Annots [ 1589 0 R 1590 0 R ] >> -% 1332 0 obj +% 1589 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [287.51 642.547 363.564 654.607] -/A << /S /GoTo /D (vdata) >> +/Rect [310.942 577.893 378 589.953] +/A << /S /GoTo /D (descdata) >> >> -% 1338 0 obj +% 1590 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [123.806 542.921 199.86 554.981] +/Rect [261.249 128.475 337.303 140.535] /A << /S /GoTo /D (vdata) >> >> -% 1339 0 obj +% 1594 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [421.516 542.921 433.471 554.981] -/A << /S /GoTo /D (table.20) >> +/D [1592 0 R /XYZ 149.705 753.953 null] >> -% 1343 0 obj +% 356 0 obj << -/D [1341 0 R /XYZ 98.895 753.953 null] +/D [1592 0 R /XYZ 150.705 716.092 null] >> -% 1340 0 obj +% 1591 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1345 0 obj +% 1596 0 obj << /Type /Page -/Contents 1346 0 R -/Resources 1344 0 R +/Contents 1597 0 R +/Resources 1595 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1330 0 R +/Parent 1600 0 R +>> +% 1598 0 obj +<< +/D [1596 0 R /XYZ 98.895 753.953 null] +>> +% 1599 0 obj +<< +/D [1596 0 R /XYZ 99.895 632.19 null] +>> +% 1595 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] >> endstream endobj -1352 0 obj +1605 0 obj << -/Length 6337 +/Length 6336 +>> +stream +0 g 0 G +0 g 0 G +BT +/F59 11.9552 Tf 150.705 706.129 Td [(6.13)-1000(psb)]TJ +ET +q +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 207.803 706.129 Td [(geins)-250(\227)-250(Dense)-250(matrix)-250(insertion)-250(routine)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_geins\050m,)-525(irw,)-525(val,)-525(x,)-525(desc_a,)-525(info)-525([,local]\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -20.57 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.386 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.386 Td [(m)]TJ +0 g 0 G +/F62 9.9626 Tf 13.838 0 Td [(Number)-250(of)-250(r)18(ows)-250(in)]TJ/F60 9.9626 Tf 86.569 0 Td [(v)-40(a)-25(l)]TJ/F62 9.9626 Tf 15.736 0 Td [(to)-250(be)-250(inserted.)]TJ -91.237 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.386 Td [(irw)]TJ +0 g 0 G +/F62 9.9626 Tf 20.473 0 Td [(Indices)-381(of)-382(the)-382(r)18(ows)-381(to)-382(be)-381(inserted.)-705(Speci\002cally)111(,)-414(r)18(ow)]TJ/F60 9.9626 Tf 239.84 0 Td [(i)]TJ/F62 9.9626 Tf 6.765 0 Td [(of)]TJ/F60 9.9626 Tf 12.683 0 Td [(v)-40(a)-25(l)]TJ/F62 9.9626 Tf 17.046 0 Td [(will)-381(be)-382(in-)]TJ -271.901 -11.955 Td [(serted)-344(into)-344(the)-344(local)-344(r)18(ow)-344(corr)18(esponding)-344(to)-344(the)-344(global)-344(r)18(ow)-344(index)]TJ/F60 9.9626 Tf 290.218 0 Td [(i)-22(r)-35(w)]TJ/F93 10.3811 Tf 14.654 0 Td [(\050)]TJ/F60 9.9626 Tf 4.205 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 4.149 0 Td [(.)]TJ -316.314 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.386 Td [(val)]TJ +0 g 0 G +/F62 9.9626 Tf 18.819 0 Td [(the)-250(dense)-250(submatrix)-250(to)-250(be)-250(inserted.)]TJ 6.087 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(1)-250(or)-250(2)-250(array)111(.)-310(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.386 Td [(desc)]TJ +ET +q +1 0 0 1 171.218 414.446 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 174.207 414.247 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 360.068 366.626 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 363.206 366.426 Td [(desc)]TJ +ET +q +1 0 0 1 384.755 366.626 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 387.893 366.426 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -258.11 -19.386 Td [(local)]TJ +0 g 0 G +/F62 9.9626 Tf 26.56 0 Td [(Whether)-240(the)-240(entries)-241(in)-240(the)-240(index)-240(vector)]TJ/F67 9.9626 Tf 173.162 0 Td [(irw)]TJ/F62 9.9626 Tf 15.692 0 Td [(,)-242(ar)18(e)-240(alr)18(eady)-241(i)1(n)-241(local)-240(number)18(-)]TJ -190.508 -11.955 Td [(ing.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -62.187 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(value;)-250(default:)]TJ/F67 9.9626 Tf 162.678 0 Td [(.false.)]TJ/F62 9.9626 Tf 36.613 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -224.197 -20.57 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.387 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(the)-250(output)-250(dense)-250(matrix.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(rank)-190(one)-190(or)-190(two)-190(array)-190(or)-190(an)-190(object)-190(of)-190(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 242.569 0 Td [(psb)]TJ +ET +q +1 0 0 1 434.498 211.642 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 437.636 211.443 Td [(T)]TJ +ET +q +1 0 0 1 443.494 211.642 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 446.633 211.443 Td [(vect)]TJ +ET +q +1 0 0 1 468.182 211.642 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 471.32 211.443 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(,)]TJ -316.63 -11.955 Td [(of)-250(type)-250(r)18(eal,)-250(complex)-250(or)-250(integer)74(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -31.342 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 141.968 -29.888 Td [(92)]TJ +0 g 0 G +ET + +endstream +endobj +1610 0 obj +<< +/Length 539 +>> +stream +0 g 0 G +0 g 0 G +BT +/F59 11.9552 Tf 99.895 706.129 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(Dense)-250(vectors/matrices)-250(do)-250(not)-250(have)-250(an)-250(associated)-250(state;)]TJ +0 g 0 G + 0 -19.926 Td [(2.)]TJ +0 g 0 G + [-500(Duplicate)-326(entries)-326(ar)18(e)-325(either)-326(overwritten)-326(or)-326(added,)-345(ther)18(e)-325(is)-326(no)-326(pr)18(ovision)]TJ 12.453 -11.955 Td [(for)-250(raising)-250(an)-250(err)18(or)-250(condition.)]TJ +0 g 0 G + 141.968 -563.885 Td [(93)]TJ +0 g 0 G +ET + +endstream +endobj +1619 0 obj +<< +/Length 6120 >> stream 0 g 0 G 0 g 0 G +BT +/F59 11.9552 Tf 150.705 706.129 Td [(6.14)-1000(psb)]TJ +ET +q +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 207.803 706.129 Td [(geasb)-250(\227)-250(Assembly)-250(a)-250(dense)-250(matrix)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_geasb\050x,)-525(desc_a,)-525(info,)-525(mold\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(desc)]TJ +ET +q +1 0 0 1 171.218 625.596 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 174.207 625.397 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 136.328 0 Td [(psb)]TJ +ET +q +1 0 0 1 328.257 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 331.395 577.576 Td [(desc)]TJ +ET +q +1 0 0 1 352.944 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 356.083 577.576 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -226.299 -31.88 Td [(mold)]TJ 0 g 0 G +/F62 9.9626 Tf 28.782 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(vector)-250(storage.)]TJ -3.876 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-223(as:)-296(an)-223(object)-223(of)-222(a)-223(class)-223(derived)-223(fr)18(om)]TJ/F67 9.9626 Tf 199.087 0 Td [(psb)]TJ +ET +q +1 0 0 1 391.016 498.074 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 394.155 497.875 Td [(T)]TJ +ET +q +1 0 0 1 400.012 498.074 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 403.151 497.875 Td [(base)]TJ +ET +q +1 0 0 1 424.7 498.074 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F51 9.9626 Tf 99.895 706.129 Td [(nl)]TJ -0 g 0 G -/F54 9.9626 Tf 14.386 0 Td [(Data)-305(allocation:)-421(in)-305(a)-305(generalized)-305(block-r)18(ow)-305(distribution)-306(the)-305(number)-305(of)-305(in-)]TJ 10.521 -11.955 Td [(dices)-250(belonging)-250(to)-250(the)-250(curr)18(ent)-250(pr)18(ocess.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(May)-250(be)-250(speci\002ed)-250(together)-250(with)]TJ/F59 9.9626 Tf 272.943 0 Td [(vl)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -308.311 -20.135 Td [(repl)]TJ -0 g 0 G -/F54 9.9626 Tf 23.243 0 Td [(Data)-288(allocation:)-385(build)-288(a)-288(r)18(eplicated)-287(index)-288(space)-288(\050i.e.)-423(all)-288(pr)18(ocesses)-287(own)-288(all)]TJ 1.664 -11.955 Td [(indices\051.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(the)-250(logical)-250(value)]TJ/F59 9.9626 Tf 132.133 0 Td [(.true.)]TJ -0 g 0 G -/F51 9.9626 Tf -157.04 -20.135 Td [(globalcheck)]TJ -0 g 0 G -/F54 9.9626 Tf 59.766 0 Td [(Data)-250(allocation:)-310(do)-250(global)-250(checks)-250(on)-250(the)-250(local)-250(index)-250(lists)]TJ/F59 9.9626 Tf 247.788 0 Td [(vl)]TJ/F54 9.9626 Tf -282.647 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(value,)-250(default:)]TJ/F59 9.9626 Tf 162.678 0 Td [(.false.)]TJ +/F67 9.9626 Tf 427.838 497.875 Td [(vect)]TJ +ET +q +1 0 0 1 449.387 498.074 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 452.525 497.875 Td [(type)]TJ/F62 9.9626 Tf 20.921 0 Td [(;)-232(this)]TJ -297.835 -11.955 Td [(is)-250(only)-250(allowed)-250(when)]TJ/F60 9.9626 Tf 97.12 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(is)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 42.899 0 Td [(psb)]TJ +ET +q +1 0 0 1 339.644 486.119 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 342.783 485.92 Td [(T)]TJ +ET +q +1 0 0 1 348.641 486.119 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 351.779 485.92 Td [(vect)]TJ +ET +q +1 0 0 1 373.328 486.119 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 376.466 485.92 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -187.585 -20.135 Td [(lidx)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 22.685 0 Td [(Data)-308(allocation:)-425(the)-307(set)-308(of)-307(local)-308(indices)]TJ/F52 9.9626 Tf 175.731 0 Td [(l)-48(i)-32(d)-42(x)]TJ/F85 10.3811 Tf 17.065 0 Td [(\050)]TJ/F54 9.9626 Tf 4.15 0 Td [(1)-397(:)]TJ/F52 9.9626 Tf 15.505 0 Td [(n)-25(l)]TJ/F85 10.3811 Tf 9.105 0 Td [(\051)]TJ/F54 9.9626 Tf 7.214 0 Td [(to)-308(be)-307(assigned)-308(to)-307(the)]TJ -226.548 -11.955 Td [(global)-250(indices)]TJ/F52 9.9626 Tf 63.476 0 Td [(v)-25(l)]TJ/F54 9.9626 Tf 8.423 0 Td [(.)]TJ -71.899 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)]TJ +/F59 9.9626 Tf -246.682 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.127 Td [(On)-250(Return)]TJ 0 g 0 G + 0 -19.925 Td [(x)]TJ 0 g 0 G - 0 -20.135 Td [(desc)]TJ +/F62 9.9626 Tf 9.962 0 Td [(The)-250(dense)-250(matrix)-250(to)-250(be)-250(assembled.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-259(as:)-328(a)-259(rank)-258(one)-259(or)-259(two)-259(array)-259(with)-259(the)-259(ALLOCA)74(T)74(ABLE)-258(or)-259(an)-259(ob-)]TJ 0 -11.955 Td [(ject)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 50.55 0 Td [(psb)]TJ ET q -1 0 0 1 120.408 376.512 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 242.48 384.5 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 123.397 376.313 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 245.618 384.301 Td [(T)]TJ ET q -1 0 0 1 309.258 328.692 cm +1 0 0 1 251.476 384.5 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 312.397 328.492 Td [(desc)]TJ +/F67 9.9626 Tf 254.614 384.301 Td [(vect)]TJ ET q -1 0 0 1 333.945 328.692 cm +1 0 0 1 276.163 384.5 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 337.084 328.492 Td [(type)]TJ +/F67 9.9626 Tf 279.301 384.301 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F62 9.9626 Tf 20.922 0 Td [(,)-250(of)-250(type)-250(r)18(eal,)-250(complex)-250(or)-250(integer)74(.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -20.135 Td [(info)]TJ +/F59 9.9626 Tf -149.518 -31.881 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -22.128 Td [(Notes)]TJ +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -20.082 Td [(1.)]TJ +/F62 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ 0 g 0 G - [-500(One)-309(of)-310(the)-309(optional)-310(ar)18(guments)]TJ/F59 9.9626 Tf 152.661 0 Td [(parts)]TJ/F54 9.9626 Tf 26.152 0 Td [(,)]TJ/F59 9.9626 Tf 5.723 0 Td [(vg)]TJ/F54 9.9626 Tf 10.46 0 Td [(,)]TJ/F59 9.9626 Tf 5.723 0 Td [(vl)]TJ/F54 9.9626 Tf 10.46 0 Td [(,)]TJ/F59 9.9626 Tf 5.723 0 Td [(nl)]TJ/F54 9.9626 Tf 13.544 0 Td [(or)]TJ/F59 9.9626 Tf 12.458 0 Td [(repl)]TJ/F54 9.9626 Tf 24.005 0 Td [(must)-310(be)-309(speci-)]TJ -254.456 -11.956 Td [(\002ed,)-250(ther)18(eby)-250(choosing)-250(the)-250(initialization)-250(strategy)-250(as)-250(follows:)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -20.135 Td [(parts)]TJ -0 g 0 G -/F54 9.9626 Tf 27.666 0 Td [(In)-242(this)-242(case)-242(we)-243(have)-242(a)-242(subr)18(outine)-242(specifying)-242(the)-242(mapping)-242(between)]TJ -5.748 -11.955 Td [(global)-312(indices)-312(and)-311(pr)18(ocess/local)-312(index)-312(pairs.)-496(If)-311(this)-312(optional)-312(ar)18(gu-)]TJ 0 -11.955 Td [(ment)-230(is)-230(speci\002ed,)-234(then)-230(it)-230(is)-230(mandatory)-230(to)-230(specify)-230(the)-230(ar)18(gument)]TJ/F59 9.9626 Tf 274.929 0 Td [(mg)]TJ/F54 9.9626 Tf 12.752 0 Td [(as)]TJ -287.681 -11.955 Td [(well.)-310(The)-250(subr)18(outine)-250(must)-250(conform)-250(to)-250(the)-250(following)-250(interface:)]TJ + [-500(On)-227(entry)-226(to)-227(this)-226(r)18(outine)-227(th)1(e)-227(descriptor)-226(must)-227(be)-226(in)-227(the)-226(assembled)-227(state,)-231(i.e.)]TJ/F67 9.9626 Tf 12.453 -11.956 Td [(psb_cdasb)]TJ/F62 9.9626 Tf 49.564 0 Td [(must)-250(alr)18(eady)-250(have)-250(been)-250(called.)]TJ 0 g 0 G + -62.017 -19.925 Td [(2.)]TJ 0 g 0 G -/F59 9.9626 Tf 10.46 -18.09 Td [(interface)]TJ 15.691 -11.955 Td [(subroutine)-525(psb_parts\050glob_index,mg,np,pv,nv\051)]TJ + [-500(If)-431(the)]TJ/F67 9.9626 Tf 41.544 0 Td [(bldmode=psb_matbld_remote_)]TJ/F62 9.9626 Tf 140.287 0 Td [(value)-431(was)-432(speci\002ed)-431(at)-432(allocation)]TJ -169.378 -11.955 Td [(time,)-278(contributions)-272(de\002ned)-272(on)-273(the)-272(curr)18(ent)-272(pr)18(ocess)-272(but)-272(belonging)-273(to)-272(a)-272(r)18(e-)]TJ 0 -11.955 Td [(mote)-267(pr)18(ocess)-266(will)-267(be)-267(handled)-267(accor)18(dingly)111(.)-360(This)-267(is)-266(most)-267(likely)-267(to)-266(occur)-267(in)]TJ 0 -11.955 Td [(\002nite)-250(element)-250(applications,)-250(with)]TJ/F67 9.9626 Tf 144.277 0 Td [(dupl=psb_dupl_add_)]TJ/F62 9.9626 Tf 94.146 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 93.899 -29.888 Td [(71)]TJ + -96.455 -104.573 Td [(94)]TJ 0 g 0 G ET endstream endobj -1358 0 obj +1627 0 obj << -/Length 9985 +/Length 3224 >> stream 0 g 0 G 0 g 0 G BT -/F59 9.9626 Tf 234.142 706.129 Td [(integer,)-525(intent)-525(\050in\051)-1050(::)-525(glob_index,np,mg)]TJ 0 -11.955 Td [(integer,)-525(intent)-525(\050out\051)-525(::)-525(nv,)-525(pv\050*\051)]TJ -10.461 -11.955 Td [(end)-525(subroutine)-525(psb_parts)]TJ -15.691 -11.956 Td [(end)-525(interface)]TJ/F54 9.9626 Tf -10.461 -17.586 Td [(The)-250(input)-250(ar)18(guments)-250(ar)18(e:)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -15.594 Td [(glob)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(6.15)-1000(psb)]TJ ET q -1 0 0 1 218.61 637.283 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 221.599 637.083 Td [(index)]TJ -0 g 0 G -/F54 9.9626 Tf 30.436 0 Td [(The)-250(global)-250(index)-250(to)-250(be)-250(mapped;)]TJ +/F59 11.9552 Tf 156.993 706.129 Td [(gefree)-250(\227)-250(Frees)-250(a)-250(dense)-250(matrix)]TJ 0 g 0 G -/F51 9.9626 Tf -54.506 -13.774 Td [(np)]TJ 0 g 0 G -/F54 9.9626 Tf 17.156 0 Td [(The)-250(number)-250(of)-250(pr)18(ocesses)-250(in)-250(the)-250(mapping;)]TJ +/F67 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_gefree\050x,)-525(desc_a,)-525(info\051)]TJ 0 g 0 G -/F51 9.9626 Tf -17.156 -13.774 Td [(mg)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 19.377 0 Td [(The)-250(total)-250(number)-250(of)-250(global)-250(r)18(ows)-250(in)-250(the)-250(mapping;)]TJ -19.377 -15.594 Td [(The)-250(output)-250(ar)18(guments)-250(ar)18(e:)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -15.594 Td [(nv)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 16.608 0 Td [(The)-250(number)-250(of)-250(entries)-250(in)]TJ/F59 9.9626 Tf 111.052 0 Td [(pv)]TJ/F54 9.9626 Tf 10.461 0 Td [(;)]TJ 0 g 0 G -/F51 9.9626 Tf -138.121 -13.774 Td [(pv)]TJ -0 g 0 G -/F54 9.9626 Tf 16.608 0 Td [(A)-393(vector)-394(containing)-393(the)-394(indices)-393(of)-394(the)-394(pr)18(ocesses)-393(to)-394(which)-393(the)]TJ 2.022 -11.955 Td [(global)-357(index)-357(should)-356(be)-357(assigend;)-410(each)-357(entry)-357(must)-357(satisfy)-357(0)]TJ/F83 10.3811 Tf 270.063 0 Td [(\024)]TJ/F52 9.9626 Tf -269.39 -11.956 Td [(p)-25(v)]TJ/F85 10.3811 Tf 10.461 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F61 10.3811 Tf 8.665 0 Td [(<)]TJ/F52 9.9626 Tf 12.71 0 Td [(n)-80(p)]TJ/F54 9.9626 Tf 11.442 0 Td [(;)-382(if)]TJ/F52 9.9626 Tf 16.006 0 Td [(n)-25(v)]TJ/F61 10.3811 Tf 15.409 0 Td [(>)]TJ/F54 9.9626 Tf 12.586 0 Td [(1)-338(we)-338(have)-338(an)-338(index)-338(assigned)-338(to)-338(multiple)]TJ -95.244 -11.955 Td [(pr)18(ocesses,)-250(i.e.)-310(we)-250(have)-250(an)-250(overlap)-250(among)-250(the)-250(subdomains.)]TJ -0 g 0 G -/F51 9.9626 Tf -40.548 -15.594 Td [(vg)]TJ -0 g 0 G -/F54 9.9626 Tf 16.06 0 Td [(In)-330(this)-330(case)-330(the)-330(association)-330(between)-331(an)-330(index)-330(and)-330(a)-330(pr)18(ocess)-330(is)-330(spec-)]TJ 5.858 -11.955 Td [(i\002ed)-371(via)-372(an)-371(integer)-372(vector)]TJ/F59 9.9626 Tf 120.986 0 Td [(vg\0501:mg\051)]TJ/F54 9.9626 Tf 41.842 0 Td [(;)-432(each)-372(index)]TJ/F52 9.9626 Tf 58.923 0 Td [(i)]TJ/F83 10.3811 Tf 8.096 0 Td [(2)-506(f)]TJ/F54 9.9626 Tf 17.485 0 Td [(1)-179(.)-192(.)-192(.)]TJ/F52 9.9626 Tf 19.967 0 Td [(m)-47(g)]TJ/F83 10.3811 Tf 13.449 0 Td [(g)]TJ/F54 9.9626 Tf 9.016 0 Td [(is)]TJ -289.764 -11.955 Td [(assigned)-381(to)-381(pr)18(ocess)]TJ/F52 9.9626 Tf 91.547 0 Td [(v)-47(g)]TJ/F85 10.3811 Tf 10.68 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.089 0 Td [(\051)]TJ/F54 9.9626 Tf 4.149 0 Td [(.)-703(The)-380(vector)]TJ/F59 9.9626 Tf 61.203 0 Td [(vg)]TJ/F54 9.9626 Tf 14.255 0 Td [(must)-381(be)-381(identical)-381(on)-380(all)]TJ -189.127 -11.955 Td [(calling)-354(pr)18(ocesses;)-406(its)-355(entri)1(es)-355(may)-354(have)-354(the)-354(ranges)]TJ/F85 10.3811 Tf 226.209 0 Td [(\050)]TJ/F54 9.9626 Tf 4.149 0 Td [(0)-179(.)-192(.)-191(.)]TJ/F52 9.9626 Tf 19.967 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.888 0 Td [(\000)]TJ/F54 9.9626 Tf 10.515 0 Td [(1)]TJ/F85 10.3811 Tf 5.106 0 Td [(\051)]TJ/F54 9.9626 Tf 7.678 0 Td [(or)]TJ/F85 10.3811 Tf -287.387 -11.955 Td [(\050)]TJ/F54 9.9626 Tf 4.149 0 Td [(1)-179(.)-192(.)-191(.)]TJ/F52 9.9626 Tf 19.967 0 Td [(n)-80(p)]TJ/F85 10.3811 Tf 11.566 0 Td [(\051)]TJ/F54 9.9626 Tf 6.984 0 Td [(accor)18(ding)-284(to)-285(the)-284(value)-285(of)]TJ/F59 9.9626 Tf 113.068 0 Td [(flag)]TJ/F54 9.9626 Tf 20.922 0 Td [(.)-413(The)-285(size)]TJ/F52 9.9626 Tf 45.955 0 Td [(m)-47(g)]TJ/F54 9.9626 Tf 16.159 0 Td [(may)-285(be)-284(spec-)]TJ -238.895 -11.955 Td [(i\002ed)-349(via)-349(the)-349(optional)-349(ar)18(gument)]TJ/F59 9.9626 Tf 144.092 0 Td [(mg)]TJ/F54 9.9626 Tf 10.46 0 Td [(;)-399(the)-349(default)-349(is)-349(to)-349(use)-349(the)-349(entir)18(e)]TJ -154.552 -11.956 Td [(vector)]TJ/F59 9.9626 Tf 29.937 0 Td [(vg)]TJ/F54 9.9626 Tf 10.461 0 Td [(,)-250(thus)-250(having)]TJ/F59 9.9626 Tf 59.885 0 Td [(mg=size\050vg\051)]TJ/F54 9.9626 Tf 57.534 0 Td [(.)]TJ + 0 -19.925 Td [(x)]TJ 0 g 0 G -/F51 9.9626 Tf -179.735 -15.593 Td [(vl)]TJ +/F62 9.9626 Tf 9.963 0 Td [(The)-250(dense)-250(matrix)-250(to)-250(be)-250(fr)18(eed.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-259(as:)-328(a)-258(rank)-259(one)-259(or)-259(two)-259(array)-259(with)-259(the)-259(ALLOCA)74(T)74(ABLE)-258(or)-259(an)-259(ob-)]TJ 0 -11.955 Td [(ject)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 50.55 0 Td [(psb)]TJ +ET +q +1 0 0 1 191.67 565.82 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 194.809 565.621 Td [(T)]TJ +ET +q +1 0 0 1 200.666 565.82 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 203.805 565.621 Td [(vect)]TJ +ET +q +1 0 0 1 225.354 565.82 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 228.492 565.621 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 13.838 0 Td [(In)-383(this)-382(case)-383(we)-383(ar)18(e)-382(specifying)-383(the)-383(list)-382(of)-383(indices)]TJ/F59 9.9626 Tf 220.787 0 Td [(vl\0501:nl\051)]TJ/F54 9.9626 Tf 45.655 0 Td [(assigned)]TJ -258.362 -11.955 Td [(to)-401(the)-400(curr)18(ent)-401(pr)18(ocess;)-476(thus,)-438(the)-400(global)-401(pr)18(oblem)-400(size)]TJ/F52 9.9626 Tf 243.82 0 Td [(m)-47(g)]TJ/F54 9.9626 Tf 17.316 0 Td [(is)-401(given)]TJ -261.136 -11.956 Td [(by)-435(the)-435(range)-435(of)-435(the)-435(aggr)18(egate)-435(of)-435(the)-435(individual)-435(vectors)]TJ/F59 9.9626 Tf 259.368 0 Td [(vl)]TJ/F54 9.9626 Tf 14.794 0 Td [(spec-)]TJ -274.162 -11.955 Td [(i\002ed)-429(in)-429(the)-429(calling)-428(pr)18(ocesses.)-847(The)-429(size)-429(may)-429(be)-428(speci\002ed)-429(via)-429(the)]TJ 0 -11.955 Td [(optional)-438(ar)18(gument)]TJ/F59 9.9626 Tf 88.319 0 Td [(nl)]TJ/F54 9.9626 Tf 10.46 0 Td [(;)-532(the)-438(default)-438(is)-438(to)-438(use)-438(the)-438(entir)18(e)-438(vector)]TJ/F59 9.9626 Tf 185.156 0 Td [(vl)]TJ/F54 9.9626 Tf 10.461 0 Td [(,)]TJ -294.396 -11.955 Td [(thus)-364(having)]TJ/F59 9.9626 Tf 57.178 0 Td [(nl=size\050vl\051)]TJ/F54 9.9626 Tf 57.534 0 Td [(.)-652(If)]TJ/F59 9.9626 Tf 19.294 0 Td [(globalcheck=.true.)]TJ/F54 9.9626 Tf 97.774 0 Td [(the)-364(subr)18(outine)]TJ -231.78 -11.955 Td [(will)-403(check)-403(how)-404(many)-403(times)-403(each)-403(entry)-403(in)-404(the)-403(global)-403(index)-403(space)]TJ/F85 10.3811 Tf 0.125 -11.955 Td [(\050)]TJ/F54 9.9626 Tf 4.149 0 Td [(1)-179(.)-192(.)-191(.)]TJ/F52 9.9626 Tf 19.967 0 Td [(m)-47(g)]TJ/F85 10.3811 Tf 13.449 0 Td [(\051)]TJ/F54 9.9626 Tf 6.245 0 Td [(is)-210(speci\002ed)-211(in)-210(the)-210(input)-210(lists)]TJ/F59 9.9626 Tf 122.836 0 Td [(vl)]TJ/F54 9.9626 Tf 10.461 0 Td [(,)-218(thus)-211(allowin)1(g)-211(for)-210(the)-210(pr)18(es-)]TJ -177.232 -11.956 Td [(ence)-302(of)-302(overlap)-302(in)-302(the)-302(input,)-315(and)-302(checki)1(ng)-302(for)-302(\223orphan\224)-302(indices.)-466(If)]TJ/F59 9.9626 Tf 0 -11.955 Td [(globalcheck=.false.)]TJ/F54 9.9626 Tf 99.377 0 Td [(,)-437(the)-400(subr)18(outine)-400(will)-400(not)-400(check)-400(for)-400(overlap,)]TJ -99.377 -11.955 Td [(and)-255(may)-255(be)-255(signi\002cantly)-255(faster)74(,)-257(but)-255(the)-255(user)-255(is)-255(implicitly)-255(guarantee-)]TJ 0 -11.955 Td [(ing)-250(that)-250(ther)18(e)-250(ar)18(e)-250(neither)-250(orphan)-250(nor)-250(overlap)-250(indices.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(,)-250(of)-250(type)-250(r)18(eal,)-250(complex)-250(or)-250(integer)74(.)]TJ 0 g 0 G -/F51 9.9626 Tf -21.918 -15.594 Td [(lidx)]TJ +/F59 9.9626 Tf -149.518 -31.881 Td [(desc)]TJ +ET +q +1 0 0 1 120.408 533.94 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 123.397 533.74 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 22.685 0 Td [(The)-377(optional)-376(ar)18(gument)]TJ/F59 9.9626 Tf 107.528 0 Td [(lidx)]TJ/F54 9.9626 Tf 24.674 0 Td [(is)-377(available)-376(for)-377(those)-377(cases)-377(in)-376(which)]TJ -132.969 -11.955 Td [(the)-361(user)-361(has)-361(alr)18(eady)-361(established)-361(a)-361(global-to-local)-361(mapping;)-416(if)-361(it)-361(is)]TJ 0 -11.955 Td [(speci\002ed,)-253(each)-253(index)-253(in)]TJ/F59 9.9626 Tf 105.128 0 Td [(vl\050i\051)]TJ/F54 9.9626 Tf 28.669 0 Td [(will)-253(be)-252(mapped)-253(to)-253(the)-252(corr)18(esponding)]TJ -133.797 -11.955 Td [(local)-317(index)]TJ/F59 9.9626 Tf 51.649 0 Td [(lidx\050i\051)]TJ/F54 9.9626 Tf 36.612 0 Td [(.)-510(When)-317(specifying)-316(the)-317(ar)18(gument)]TJ/F59 9.9626 Tf 148.638 0 Td [(lidx)]TJ/F54 9.9626 Tf 24.076 0 Td [(the)-317(user)]TJ -260.975 -11.956 Td [(would)-329(also)-330(likely)-329(employ)]TJ/F59 9.9626 Tf 117.394 0 Td [(lidx)]TJ/F54 9.9626 Tf 24.203 0 Td [(in)-329(calls)-330(to)]TJ/F59 9.9626 Tf 46.656 0 Td [(psb_cdins)]TJ/F54 9.9626 Tf 50.355 0 Td [(and)]TJ/F59 9.9626 Tf 20.148 0 Td [(local)]TJ/F54 9.9626 Tf 29.433 0 Td [(in)]TJ -288.189 -11.955 Td [(calls)-250(to)]TJ/F59 9.9626 Tf 33.095 0 Td [(psb_spins)]TJ/F54 9.9626 Tf 49.564 0 Td [(and)]TJ/F59 9.9626 Tf 19.358 0 Td [(psb_geins)]TJ/F54 9.9626 Tf 47.073 0 Td [(;)-250(see)-250(also)-250(sec.)]TJ +/F62 9.9626 Tf 9.963 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG - [-250(2.3.1)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -171.008 -15.593 Td [(nl)]TJ +/F67 9.9626 Tf 136.327 0 Td [(psb)]TJ +ET +q +1 0 0 1 277.448 486.119 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 280.586 485.92 Td [(desc)]TJ +ET +q +1 0 0 1 302.135 486.119 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 305.273 485.92 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 14.386 0 Td [(If)-411(this)-411(ar)18(gument)-411(is)-411(speci\002ed)-411(alone)-411(\050i.e.)-793(without)]TJ/F59 9.9626 Tf 223.432 0 Td [(vl)]TJ/F54 9.9626 Tf 10.461 0 Td [(\051)-411(the)-411(r)18(esult)-411(is)-411(a)]TJ -226.361 -11.956 Td [(generalized)-280(r)18(ow-block)-280(distribution)-280(in)-280(which)-281(each)-280(pr)18(ocess)]TJ/F52 9.9626 Tf 257.148 0 Td [(I)]TJ/F54 9.9626 Tf 6.642 0 Td [(gets)-280(as-)]TJ -263.79 -11.955 Td [(signed)-250(a)-250(consecutive)-250(chunk)-250(of)]TJ/F52 9.9626 Tf 135.186 0 Td [(N)]TJ/F52 7.5716 Tf 7.851 -1.808 Td [(I)]TJ/F85 10.3811 Tf 6.316 1.808 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-25(l)]TJ/F54 9.9626 Tf 11.472 0 Td [(global)-250(indices.)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -193.829 -15.593 Td [(repl)]TJ +/F59 9.9626 Tf -226.3 -33.873 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 23.243 0 Td [(This)-418(ar)18(guments)-417(speci\002es)-418(to)-417(r)18(eplicate)-418(all)-417(indices)-418(on)-418(all)-417(pr)18(ocesses.)]TJ -1.325 -11.956 Td [(This)-366(is)-367(a)-366(special)-366(purpose)-366(data)-367(allocation)-366(that)-366(is)-366(useful)-367(in)-366(the)-366(con-)]TJ 0 -11.955 Td [(str)8(uction)-250(of)-250(some)-250(multilevel)-250(pr)18(econditioners.)]TJ 0 g 0 G - -34.371 -19.579 Td [(2.)]TJ + 0 -19.925 Td [(info)]TJ 0 g 0 G - [-500(On)-250(exit)-250(fr)18(om)-250(this)-250(r)18(outine)-250(the)-250(descriptor)-250(is)-250(in)-250(the)-250(build)-250(state.)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 154.421 -29.888 Td [(72)]TJ + 141.968 -293.863 Td [(95)]TJ 0 g 0 G ET endstream endobj -1364 0 obj +1631 0 obj << -/Length 2555 +/Length 3218 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F54 9.9626 Tf 112.349 706.129 Td [(3.)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(6.16)-1000(psb)]TJ +ET +q +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 207.803 706.129 Td [(gelp)-250(\227)-250(Applies)-250(a)-250(left)-250(permutation)-250(to)-250(a)-250(dense)-250(matrix)]TJ 0 g 0 G - [-500(Calling)-260(the)-260(r)18(outine)-260(with)]TJ/F59 9.9626 Tf 120.886 0 Td [(vg)]TJ/F54 9.9626 Tf 13.052 0 Td [(or)]TJ/F59 9.9626 Tf 11.965 0 Td [(parts)]TJ/F54 9.9626 Tf 28.742 0 Td [(implies)-260(that)-260(every)-260(pr)18(ocess)-260(will)-260(scan)]TJ -162.192 -11.955 Td [(the)-250(entir)18(e)-250(index)-250(space)-250(to)-250(\002gur)18(e)-250(out)-250(the)-250(local)-250(indices.)]TJ 0 g 0 G - -12.453 -19.926 Td [(4.)]TJ +/F67 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_gelp\050trans,)-525(iperm,)-525(x,)-525(info\051)]TJ 0 g 0 G - [-500(Overlapped)-250(indices)-250(ar)18(e)-250(possible)-250(with)-250(both)]TJ/F59 9.9626 Tf 201.093 0 Td [(parts)]TJ/F54 9.9626 Tf 28.643 0 Td [(and)]TJ/F59 9.9626 Tf 19.357 0 Td [(vl)]TJ/F54 9.9626 Tf 12.951 0 Td [(invocations.)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G - -262.044 -19.925 Td [(5.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - [-500(When)-190(the)-190(subr)18(outine)-190(is)-190(invoked)-190(with)]TJ/F59 9.9626 Tf 175.161 0 Td [(vl)]TJ/F54 9.9626 Tf 12.354 0 Td [(in)-190(conjunction)-190(with)]TJ/F59 9.9626 Tf 86.235 0 Td [(globalcheck=.true.)]TJ/F54 9.9626 Tf 94.146 0 Td [(,)]TJ -355.443 -11.955 Td [(it)-280(will)-281(perform)-280(a)-280(scan)-281(of)-280(the)-281(index)-280(space)-280(to)-281(sear)18(ch)-280(for)-280(overlap)-281(or)-280(orphan)]TJ 0 -11.955 Td [(indices.)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - -12.453 -19.925 Td [(6.)]TJ 0 g 0 G - [-500(When)-190(the)-190(subr)18(outine)-190(is)-190(invoked)-190(with)]TJ/F59 9.9626 Tf 175.161 0 Td [(vl)]TJ/F54 9.9626 Tf 12.354 0 Td [(in)-190(conjunction)-190(with)]TJ/F59 9.9626 Tf 86.235 0 Td [(globalcheck=.false.)]TJ/F54 9.9626 Tf 99.377 0 Td [(,)]TJ -360.674 -11.956 Td [(no)-338(index)-337(space)-338(scan)-337(will)-338(take)-337(place.)-573(Thus)-337(it)-338(is)-338(the)-337(r)18(esponsibility)-338(of)-337(the)]TJ 0 -11.955 Td [(user)-328(to)-328(make)-328(sur)18(e)-328(that)-328(the)-328(indices)-328(speci\002ed)-328(in)]TJ/F59 9.9626 Tf 209.973 0 Td [(vl)]TJ/F54 9.9626 Tf 13.729 0 Td [(have)-328(neither)-328(orphans)]TJ -223.702 -11.955 Td [(nor)-250(overlaps;)-250(if)-250(this)-250(assumption)-250(fails,)-250(r)18(esults)-250(will)-250(be)-250(unpr)18(edictable.)]TJ + 0 -19.925 Td [(trans)]TJ 0 g 0 G - -12.453 -19.925 Td [(7.)]TJ +/F62 9.9626 Tf 27.666 0 Td [(A)-250(character)-250(that)-250(speci\002es)-250(whether)-250(to)-250(permute)]TJ/F60 9.9626 Tf 203.748 0 Td [(A)]TJ/F62 9.9626 Tf 9.808 0 Td [(or)]TJ/F60 9.9626 Tf 12.488 0 Td [(A)]TJ/F60 7.5716 Tf 7.511 3.616 Td [(T)]TJ/F62 9.9626 Tf 5.401 -3.616 Td [(.)]TJ -241.716 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(single)-250(character)-250(with)-250(value)-250('N')-250(for)]TJ/F60 9.9626 Tf 218.195 0 Td [(A)]TJ/F62 9.9626 Tf 9.808 0 Td [(or)-250('T')-250(for)]TJ/F60 9.9626 Tf 41.807 0 Td [(A)]TJ/F60 7.5716 Tf 7.511 3.616 Td [(T)]TJ/F62 9.9626 Tf 5.401 -3.616 Td [(.)]TJ 0 g 0 G - [-500(Orphan)-417(and)-416(overlap)-417(indices)-416(ar)18(e)-417(impossible)-416(by)-417(constr)8(uction)-417(when)-416(the)]TJ 12.453 -11.955 Td [(subr)18(outine)-250(is)-250(invoked)-250(with)]TJ/F59 9.9626 Tf 121.164 0 Td [(nl)]TJ/F54 9.9626 Tf 12.951 0 Td [(\050alone\051,)-250(or)]TJ/F59 9.9626 Tf 47.372 0 Td [(vg)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)]TJ +/F59 9.9626 Tf -307.628 -31.88 Td [(iperm)]TJ 0 g 0 G - -49.98 -452.304 Td [(73)]TJ +/F62 9.9626 Tf 32.099 0 Td [(An)-250(integer)-250(array)-250(containing)-250(permutation)-250(information.)]TJ -7.193 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(one-dimensional)-250(array)111(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -31.881 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(The)-250(dense)-250(matrix)-250(to)-250(be)-250(permuted.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(one)-250(or)-250(two)-250(dimensional)-250(array)111(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -33.873 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 141.968 -226.117 Td [(96)]TJ 0 g 0 G ET endstream endobj -1375 0 obj +1636 0 obj << -/Length 7006 +/Length 6238 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.2)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(6.17)-1000(psb)]TJ ET q -1 0 0 1 198.238 706.328 cm +1 0 0 1 153.407 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(cdins)-250(\227)-250(Communication)-250(descriptor)-250(insert)-250(routine)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_cdins\050nz,)-525(ia,)-525(ja,)-525(desc_a,)-525(info)-525([,ila,jla]\051)]TJ 0 -11.955 Td [(call)-525(psb_cdins\050nz,ja,desc,info[,jla,mask,lidx]\051)]TJ/F54 9.9626 Tf 14.944 -20.366 Td [(This)-336(subr)18(outine)-335(examines)-336(the)-336(edges)-335(of)-336(the)-336(graph)-335(associated)-336(with)-335(the)-336(dis-)]TJ -14.944 -11.955 Td [(cr)18(etization)-260(mesh)-261(\050and)-260(isomorphic)-260(to)-261(the)-260(sparsity)-260(pattern)-261(of)-260(a)-260(linear)-261(system)-260(co-)]TJ 0 -11.955 Td [(ef)18(\002cient)-238(matrix\051,)-241(storing)-238(them)-239(as)-238(necessary)-238(into)-239(the)-238(communication)-238(descriptor)74(.)]TJ 0 -11.955 Td [(In)-259(the)-260(\002rst)-259(form)-260(the)-259(edges)-259(ar)18(e)-260(speci\002ed)-259(as)-260(pairs)-259(of)-260(indices)]TJ/F52 9.9626 Tf 255.974 0 Td [(i)-47(a)]TJ/F85 10.3811 Tf 7.91 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(,)]TJ/F52 9.9626 Tf 4.624 0 Td [(j)-40(a)]TJ/F85 10.3811 Tf 7.841 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(;)-264(the)-260(start)1(-)]TJ -299.233 -11.956 Td [(ing)-299(index)]TJ/F52 9.9626 Tf 44.948 0 Td [(i)-47(a)]TJ/F85 10.3811 Tf 7.91 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 7.13 0 Td [(should)-299(belong)-299(to)-299(the)-299(curr)18(ent)-299(pr)18(ocess.)-458(In)-299(the)-299(second)-299(form)-299(only)]TJ -67.28 -11.955 Td [(the)-250(r)18(emote)-250(indices)]TJ/F52 9.9626 Tf 83.65 0 Td [(j)-40(a)]TJ/F85 10.3811 Tf 7.841 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 6.64 0 Td [(ar)18(e)-250(speci\002ed.)]TJ -0 g 0 G -/F51 9.9626 Tf -105.423 -20.366 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.304 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.305 Td [(nz)]TJ -0 g 0 G -/F54 9.9626 Tf 16.05 0 Td [(the)-250(number)-250(of)-250(points)-250(being)-250(inserted.)]TJ 8.857 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.305 Td [(ia)]TJ -0 g 0 G -/F54 9.9626 Tf 13.28 0 Td [(the)-250(indices)-250(of)-250(the)-250(starting)-250(vertex)-250(of)-250(the)-250(edges)-250(being)-250(inserted.)]TJ 11.627 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F52 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.336 0 Td [(.)]TJ +/F59 11.9552 Tf 156.993 706.129 Td [(glob)]TJ +ET +q +1 0 0 1 182.29 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 185.877 706.129 Td [(to)]TJ +ET +q +1 0 0 1 197.222 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 200.809 706.129 Td [(loc)-250(\227)-250(Global)-250(to)-250(local)-250(indices)-250(convertion)]TJ 0 g 0 G -/F51 9.9626 Tf -207.221 -19.304 Td [(ja)]TJ 0 g 0 G -/F54 9.9626 Tf 13.28 0 Td [(the)-250(indices)-250(of)-250(the)-250(end)-250(vertex)-250(of)-250(the)-250(edges)-250(being)-250(inserted.)]TJ 11.627 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F52 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.336 0 Td [(.)]TJ +/F67 9.9626 Tf -100.914 -18.964 Td [(call)-525(psb_glob_to_loc\050x,)-525(y,)-525(desc_a,)-525(info,)-525(iact,owned\051)]TJ 0 -11.955 Td [(call)-525(psb_glob_to_loc\050x,)-525(desc_a,)-525(info,)-525(iact,owned\051)]TJ 0 g 0 G -/F51 9.9626 Tf -207.221 -19.304 Td [(mask)]TJ +/F59 9.9626 Tf 0 -21.109 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.33 0 Td [(Mask)-247(entries)-248(in)]TJ/F59 9.9626 Tf 69.91 0 Td [(ja)]TJ/F54 9.9626 Tf 10.461 0 Td [(,)-248(they)-247(ar)18(e)-248(inserted)-247(only)-248(when)-247(the)-247(corr)18(esponding)]TJ/F59 9.9626 Tf 213.089 0 Td [(mask)]TJ/F54 9.9626 Tf -297.883 -11.956 Td [(entries)-250(ar)18(e)]TJ/F59 9.9626 Tf 48.139 0 Td [(.true.)]TJ/F54 9.9626 Tf -48.139 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(array)-250(of)-250(length)]TJ/F52 9.9626 Tf 164.297 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.336 0 Td [(,)-250(default)]TJ/F59 9.9626 Tf 38.784 0 Td [(.true.)]TJ/F54 9.9626 Tf 31.382 0 Td [(.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -269.706 -19.305 Td [(lidx)]TJ +/F59 9.9626 Tf -29.828 -19.602 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 22.685 0 Td [(User)-250(de\002ned)-250(local)-250(indices)-250(for)]TJ/F59 9.9626 Tf 131.117 0 Td [(ja)]TJ/F54 9.9626 Tf 10.46 0 Td [(.)]TJ -139.355 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F52 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.336 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -207.221 -20.366 Td [(On)-250(Return)]TJ + 0 -19.601 Td [(x)]TJ 0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(An)-250(integer)-250(vector)-250(of)-250(indices)-250(to)-250(be)-250(converted.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in,)-250(inout)]TJ/F62 9.9626 Tf 38.735 0 Td [(.)]TJ -70.535 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ 0 g 0 G - 0 -19.305 Td [(desc)]TJ +/F59 9.9626 Tf -24.907 -31.557 Td [(desc)]TJ ET q -1 0 0 1 171.218 168.346 cm +1 0 0 1 120.408 535.72 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 168.146 Td [(a)]TJ +/F59 9.9626 Tf 123.397 535.52 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(updated)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 360.068 120.525 cm +1 0 0 1 309.258 487.899 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 363.206 120.326 Td [(desc)]TJ +/F67 9.9626 Tf 312.397 487.7 Td [(desc)]TJ ET q -1 0 0 1 384.755 120.525 cm +1 0 0 1 333.945 487.899 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 387.893 120.326 Td [(type)]TJ +/F67 9.9626 Tf 337.084 487.7 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - -91.236 -29.888 Td [(74)]TJ +/F59 9.9626 Tf -258.11 -19.602 Td [(iact)]TJ 0 g 0 G -ET - -endstream -endobj -1380 0 obj -<< -/Length 3007 ->> -stream +/F62 9.9626 Tf 21.021 0 Td [(speci\002es)-250(action)-250(to)-250(be)-250(taken)-250(in)-250(case)-250(of)-250(range)-250(err)18(ors.)-310(Scope:)]TJ/F59 9.9626 Tf 253.796 0 Td [(global)]TJ/F62 9.9626 Tf -249.91 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-190(as:)-280(a)-190(character)-190(variable)]TJ/F67 9.9626 Tf 143.341 0 Td [(I)]TJ/F62 9.9626 Tf 5.23 0 Td [(gnor)18(e,)]TJ/F67 9.9626 Tf 29.808 0 Td [(W)]TJ/F62 9.9626 Tf 5.231 0 Td [(arning)-190(or)]TJ/F67 9.9626 Tf 42.111 0 Td [(A)]TJ/F62 9.9626 Tf 5.231 0 Td [(bort,)-202(default)]TJ/F67 9.9626 Tf 55.839 0 Td [(I)]TJ/F62 9.9626 Tf 5.231 0 Td [(gnor)18(e.)]TJ 0 g 0 G +/F59 9.9626 Tf -316.929 -19.601 Td [(owned)]TJ 0 g 0 G +/F62 9.9626 Tf 35.975 0 Td [(Spec\002es)-250(valid)-250(range)-250(of)-250(input)-250(Scope:)]TJ/F59 9.9626 Tf 159.54 0 Td [(global)]TJ/F62 9.9626 Tf -170.608 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(If)-320(tr)8(ue,)-337(then)-320(only)-320(indices)-320(strictly)-320(owned)-320(by)-320(the)-319(curr)18(ent)-320(pr)18(ocess)-320(ar)18(e)-320(con-)]TJ 0 -11.955 Td [(sider)18(ed)-250(valid,)-250(if)-250(false)-250(then)-250(halo)-250(indices)-250(ar)18(e)-250(also)-250(accepted.)-310(Default:)-310(false.)]TJ 0 g 0 G -BT -/F51 9.9626 Tf 99.895 706.129 Td [(info)]TJ +/F59 9.9626 Tf -24.907 -21.109 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(ila)]TJ + 0 -19.601 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 16.598 0 Td [(the)-250(local)-250(indices)-250(of)-250(the)-250(starting)-250(vertex)-250(of)-250(the)-250(edges)-250(being)-250(inserted.)]TJ 8.309 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F52 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.336 0 Td [(.)]TJ +/F62 9.9626 Tf 9.963 0 Td [(If)]TJ/F60 9.9626 Tf 9.727 0 Td [(y)]TJ/F62 9.9626 Tf 8.032 0 Td [(is)-294(not)-294(pr)18(esent,)-304(then)]TJ/F60 9.9626 Tf 88.385 0 Td [(x)]TJ/F62 9.9626 Tf 8.132 0 Td [(is)-294(overwritten)-294(with)-293(the)-294(translated)-294(integer)-294(indices.)]TJ -99.332 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ 0 g 0 G -/F51 9.9626 Tf -207.221 -19.925 Td [(jla)]TJ +/F59 9.9626 Tf -24.907 -19.602 Td [(y)]TJ 0 g 0 G -/F54 9.9626 Tf 16.598 0 Td [(the)-250(local)-250(indices)-250(of)-250(the)-250(end)-250(vertex)-250(of)-250(the)-250(edges)-250(being)-250(inserted.)]TJ 8.309 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F52 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.336 0 Td [(.)]TJ/F51 11.9552 Tf -207.221 -21.918 Td [(Notes)]TJ +/F62 9.9626 Tf 10.521 0 Td [(If)]TJ/F60 9.9626 Tf 9.521 0 Td [(y)]TJ/F62 9.9626 Tf 7.827 0 Td [(is)-273(pr)18(esent,)-279(then)]TJ/F60 9.9626 Tf 70.133 0 Td [(y)]TJ/F62 9.9626 Tf 7.827 0 Td [(is)-273(overwritten)-273(with)-273(the)-273(translated)-274(integer)-273(indices,)-279(and)]TJ/F60 9.9626 Tf -80.628 -11.955 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(is)-250(left)-250(unchanged.)-310(Scope:)]TJ/F59 9.9626 Tf 112.557 0 Td [(global)]TJ/F62 9.9626 Tf -120.547 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +/F59 9.9626 Tf -24.907 -19.602 Td [(info)]TJ 0 g 0 G - [-500(This)-250(r)18(outine)-250(may)-250(only)-250(be)-250(called)-250(if)-250(the)-250(descriptor)-250(is)-250(in)-250(the)-250(build)-250(state;)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -21.108 Td [(Notes)]TJ 0 g 0 G - 0 -19.925 Td [(2.)]TJ +/F62 9.9626 Tf 166.875 -29.888 Td [(97)]TJ 0 g 0 G - [-500(This)-370(r)18(outine)-370(automatically)-370(ignor)18(es)-370(edges)-370(that)-370(do)-370(not)-370(insist)-370(on)-370(the)-370(cur)18(-)]TJ 12.453 -11.955 Td [(r)18(ent)-288(pr)18(ocess,)-298(i.e.)-424(edges)-288(for)-288(which)-288(neither)-288(the)-288(starting)-288(nor)-288(the)-288(end)-288(vertex)]TJ 0 -11.955 Td [(belong)-250(to)-250(the)-250(curr)18(ent)-250(pr)18(ocess.)]TJ +ET + +endstream +endobj +1641 0 obj +<< +/Length 672 +>> +stream 0 g 0 G - -12.453 -19.926 Td [(3.)]TJ 0 g 0 G - [-500(The)-322(second)-323(form)-322(of)-323(this)-322(r)18(outine)-323(will)-322(be)-322(useful)-323(when)-322(dealing)-323(with)-322(user)18(-)]TJ 12.453 -11.955 Td [(speci\002ed)-250(index)-250(mappings;)-250(see)-250(also)]TJ -0 0 1 rg 0 0 1 RG - [-250(2.3.1)]TJ 0 g 0 G - [(.)]TJ +BT +/F62 9.9626 Tf 163.158 706.129 Td [(1.)]TJ 0 g 0 G - 141.968 -314.819 Td [(75)]TJ + [-500(If)-272(an)-273(input)-272(index)-272(is)-273(out)-272(of)-273(range)1(,)-278(then)-273(the)-272(corr)18(esponding)-272(output)-273(index)-272(is)]TJ 12.453 -11.955 Td [(set)-250(to)-250(a)-250(negative)-250(number;)]TJ +0 g 0 G + -12.453 -19.926 Td [(2.)]TJ +0 g 0 G + [-500(The)-416(default)]TJ/F67 9.9626 Tf 68.74 0 Td [(I)]TJ/F62 9.9626 Tf 5.23 0 Td [(gnor)18(e)-416(means)-417(that)-416(the)-417(negative)-416(output)-416(is)-417(the)-416(only)-416(action)]TJ -61.517 -11.955 Td [(taken)-250(on)-250(an)-250(out-of-range)-250(input.)]TJ +0 g 0 G + 141.968 -571.855 Td [(98)]TJ 0 g 0 G ET endstream endobj -1389 0 obj +1648 0 obj << -/Length 5969 +/Length 5458 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.3)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(6.18)-1000(psb)]TJ ET q -1 0 0 1 198.238 706.328 cm +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 156.993 706.129 Td [(loc)]TJ +ET +q +1 0 0 1 173.646 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(cdasb)-250(\227)-250(Communication)-250(descriptor)-250(assembly)-250(routine)]TJ +/F59 11.9552 Tf 177.233 706.129 Td [(to)]TJ +ET +q +1 0 0 1 188.578 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 192.165 706.129 Td [(glob)-250(\227)-250(Local)-250(to)-250(global)-250(indices)-250(conversion)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_cdasb\050desc_a,)-525(info)-525([,)-525(mold]\051)]TJ +/F67 9.9626 Tf -92.27 -18.964 Td [(call)-525(psb_loc_to_glob\050x,)-525(y,)-525(desc_a,)-525(info,)-525(iact\051)]TJ 0 -11.955 Td [(call)-525(psb_loc_to_glob\050x,)-525(desc_a,)-525(info,)-525(iact\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.925 Td [(desc)]TJ + 0 -19.925 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(An)-250(integer)-250(vector)-250(of)-250(indices)-250(to)-250(be)-250(converted.)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in,)-250(inout)]TJ/F62 9.9626 Tf 38.735 0 Td [(.)]TJ -70.535 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -31.881 Td [(desc)]TJ ET q -1 0 0 1 171.218 625.596 cm +1 0 0 1 120.408 533.94 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 625.397 Td [(a)]TJ +/F59 9.9626 Tf 123.397 533.74 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 360.068 577.775 cm +1 0 0 1 309.258 486.119 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 363.206 577.576 Td [(desc)]TJ +/F67 9.9626 Tf 312.397 485.92 Td [(desc)]TJ ET q -1 0 0 1 384.755 577.775 cm +1 0 0 1 333.945 486.119 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 387.893 577.576 Td [(type)]TJ +/F67 9.9626 Tf 337.084 485.92 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -19.925 Td [(mold)]TJ +/F59 9.9626 Tf -258.11 -19.926 Td [(iact)]TJ +0 g 0 G +/F62 9.9626 Tf 21.021 0 Td [(speci\002es)-250(action)-250(to)-250(be)-250(taken)-250(in)-250(case)-250(of)-250(range)-250(err)18(ors.)-310(Scope:)]TJ/F59 9.9626 Tf 253.796 0 Td [(global)]TJ/F62 9.9626 Tf -249.91 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(character)-190(variable)]TJ/F67 9.9626 Tf 143.341 0 Td [(I)]TJ/F62 9.9626 Tf 5.23 0 Td [(gnor)18(e,)]TJ/F67 9.9626 Tf 29.808 0 Td [(W)]TJ/F62 9.9626 Tf 5.231 0 Td [(arning)-190(or)]TJ/F67 9.9626 Tf 42.111 0 Td [(A)]TJ/F62 9.9626 Tf 5.231 0 Td [(bort,)-202(default)]TJ/F67 9.9626 Tf 55.839 0 Td [(I)]TJ/F62 9.9626 Tf 5.231 0 Td [(gnor)18(e.)]TJ +0 g 0 G +/F59 9.9626 Tf -316.929 -21.918 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(If)]TJ/F60 9.9626 Tf 9.727 0 Td [(y)]TJ/F62 9.9626 Tf 8.032 0 Td [(is)-294(not)-294(pr)18(esent,)-304(then)]TJ/F60 9.9626 Tf 88.385 0 Td [(x)]TJ/F62 9.9626 Tf 8.132 0 Td [(is)-294(overwritten)-294(with)-293(the)-294(translated)-294(integer)-294(indices.)]TJ -99.332 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(y)]TJ +0 g 0 G +/F62 9.9626 Tf 10.521 0 Td [(If)]TJ/F60 9.9626 Tf 9.705 0 Td [(y)]TJ/F62 9.9626 Tf 8.011 0 Td [(is)-292(not)-291(pr)18(esent,)-302(then)]TJ/F60 9.9626 Tf 88.122 0 Td [(y)]TJ/F62 9.9626 Tf 8.011 0 Td [(is)-292(overwritten)-291(with)-292(the)-291(translated)-292(integer)-292(indice)1(s,)]TJ -99.463 -11.955 Td [(and)]TJ/F60 9.9626 Tf 19.651 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(is)-250(left)-250(unchanged.)-310(Scope:)]TJ/F59 9.9626 Tf 112.557 0 Td [(global)]TJ/F62 9.9626 Tf -139.904 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 141.968 -114.535 Td [(99)]TJ 0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(index)-250(storage.)]TJ -3.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(object)-190(of)-190(type)-190(derived)-190(fr)18(om)-190(\050integer\051)]TJ/F59 9.9626 Tf 221.926 0 Td [(psb)]TJ -ET -q -1 0 0 1 413.855 510.029 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 416.994 509.83 Td [(T)]TJ ET -q -1 0 0 1 422.851 510.029 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q + +endstream +endobj +1653 0 obj +<< +/Length 3169 +>> +stream +0 g 0 G +0 g 0 G BT -/F59 9.9626 Tf 425.99 509.83 Td [(base)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(6.19)-1000(psb)]TJ ET q -1 0 0 1 447.539 510.029 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 450.677 509.83 Td [(vect)]TJ +/F59 11.9552 Tf 207.803 706.129 Td [(is)]TJ ET q -1 0 0 1 472.226 510.029 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 217.809 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 475.364 509.83 Td [(type)]TJ/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F59 11.9552 Tf 221.396 706.129 Td [(owned)-250(\227)]TJ 0 g 0 G -/F51 9.9626 Tf -345.58 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G +/F67 9.9626 Tf -70.691 -18.964 Td [(call)-525(psb_is_owned\050x,)-525(desc_a\051)]TJ 0 g 0 G - 0 -19.925 Td [(desc)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(Integer)-250(index.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(integer)74(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -31.88 Td [(desc)]TJ ET q -1 0 0 1 171.218 468.186 cm +1 0 0 1 171.218 545.895 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 467.987 Td [(a)]TJ +/F59 9.9626 Tf 174.207 545.696 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 360.068 420.366 cm +1 0 0 1 360.068 498.074 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 363.206 420.166 Td [(desc)]TJ +/F67 9.9626 Tf 363.206 497.875 Td [(desc)]TJ ET q -1 0 0 1 384.755 420.366 cm +1 0 0 1 384.755 498.074 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 387.893 420.166 Td [(type)]TJ +/F67 9.9626 Tf 387.893 497.875 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F59 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -19.925 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.906 -21.917 Td [(Notes)]TJ + 0 -19.925 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 12.453 -19.926 Td [(1.)]TJ +/F62 9.9626 Tf 72.777 0 Td [(A)-261(logical)-260(mask)-261(which)-261(is)-261(tr)8(ue)-261(if)]TJ/F60 9.9626 Tf 137.304 0 Td [(x)]TJ/F62 9.9626 Tf 7.803 0 Td [(is)-261(owned)-261(by)-260(the)-261(curr)18(ent)-261(pr)18(o-)]TJ -192.978 -11.955 Td [(cess)-250(Scope:)]TJ/F59 9.9626 Tf 51.567 0 Td [(local)]TJ/F62 9.9626 Tf -51.567 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ/F59 11.9552 Tf -71.651 -33.873 Td [(Notes)]TJ 0 g 0 G - [-500(On)-250(exit)-250(fr)18(om)-250(this)-250(r)18(outine)-250(the)-250(descriptor)-250(is)-250(in)-250(the)-250(assembled)-250(state.)]TJ -12.453 -19.925 Td [(This)-274(call)-275(will)-274(set)-275(up)-274(all)-275(the)-274(necessary)-275(information)-274(for)-275(the)-274(halo)-275(data)-274(exchanges.)]TJ 0 -11.955 Td [(In)-337(doing)-337(so,)-358(the)-337(library)-337(will)-337(need)-336(to)-337(identify)-337(the)-337(set)-337(of)-337(pr)18(ocesse)1(s)-337(owning)-337(the)]TJ 0 -11.955 Td [(halo)-381(indices)-381(thr)18(ough)-380(the)-381(use)-381(of)-381(the)]TJ/F59 9.9626 Tf 163.76 0 Td [(desc%fnd_owner\050\051)]TJ/F54 9.9626 Tf 87.479 0 Td [(method;)-446(the)-381(owning)]TJ -251.239 -11.956 Td [(pr)18(ocesses)-273(ar)18(e)-273(the)-273(topological)-272(neighbours)-273(of)-273(the)-273(calling)-273(pr)18(ocess.)-379(If)-272(the)-273(user)-273(has)]TJ 0 -11.955 Td [(some)-248(backgr)18(ound)-248(information)-248(on)-248(the)-248(pr)18(ocesses)-248(that)-248(ar)18(e)-248(neighbours)-248(of)-248(the)-248(cur)18(-)]TJ 0 -11.955 Td [(r)18(ent)-274(one,)-281(it)-274(is)-275(possible)-274(to)-274(specify)-275(explicitly)-274(the)-274(list)-275(of)-274(adjacent)-274(pr)18(ocesses)-275(with)-274(a)]TJ 0 -11.955 Td [(call)-327(to)]TJ/F59 9.9626 Tf 30.401 0 Td [(desc%set_p_adjcncy\050list\051)]TJ/F54 9.9626 Tf 125.529 0 Td [(;)-365(this)-327(will)-327(speed)-327(u)1(p)-327(the)-327(subsequent)-327(call)-327(to)]TJ/F59 9.9626 Tf -155.93 -11.955 Td [(psb_cdasb)]TJ/F54 9.9626 Tf 47.073 0 Td [(.)]TJ +/F62 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ 0 g 0 G - 119.801 -116.528 Td [(76)]TJ + [-500(This)-240(r)18(outine)-239(r)18(eturns)-240(a)]TJ/F67 9.9626 Tf 109.67 0 Td [(.true.)]TJ/F62 9.9626 Tf 33.769 0 Td [(value)-240(for)-239(an)-240(index)-239(that)-240(is)-239(strictly)-240(owned)-239(by)]TJ -130.986 -11.955 Td [(the)-250(curr)18(ent)-250(pr)18(ocess,)-250(excluding)-250(the)-250(halo)-250(indices)]TJ +0 g 0 G + 139.477 -263.975 Td [(100)]TJ 0 g 0 G ET endstream endobj -1396 0 obj +1659 0 obj << -/Length 3168 +/Length 4795 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.4)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(6.20)-1000(psb)]TJ ET q -1 0 0 1 147.429 706.328 cm +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 156.993 706.129 Td [(owned)]TJ +ET +q +1 0 0 1 194.903 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(cdcpy)-250(\227)-250(Copies)-250(a)-250(communication)-250(descriptor)]TJ +/F59 11.9552 Tf 198.489 706.129 Td [(index)-250(\227)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -51.121 -18.964 Td [(call)-525(psb_cdcpy\050desc_in,)-525(desc_out,)-525(info\051)]TJ +/F67 9.9626 Tf -98.594 -18.964 Td [(call)-525(psb_owned_index\050y,)-525(x,)-525(desc_a,)-525(info\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.925 Td [(desc)]TJ + 0 -19.925 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(Integer)-250(indices.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in,)-250(inout)]TJ/F62 9.9626 Tf 38.735 0 Td [(.)]TJ -70.535 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(or)-250(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -31.88 Td [(desc)]TJ ET q -1 0 0 1 120.408 625.596 cm +1 0 0 1 120.408 545.895 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 625.397 Td [(in)]TJ +/F59 9.9626 Tf 123.397 545.696 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 14.386 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -12.981 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 309.258 577.775 cm +1 0 0 1 309.258 498.074 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 312.397 577.576 Td [(desc)]TJ +/F67 9.9626 Tf 312.397 497.875 Td [(desc)]TJ ET q -1 0 0 1 333.945 577.775 cm +1 0 0 1 333.945 498.074 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 337.084 577.576 Td [(type)]TJ +/F67 9.9626 Tf 337.084 497.875 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -258.11 -19.925 Td [(iact)]TJ 0 g 0 G +/F62 9.9626 Tf 21.021 0 Td [(speci\002es)-250(action)-250(to)-250(be)-250(taken)-250(in)-250(case)-250(of)-250(range)-250(err)18(ors.)-310(Scope:)]TJ/F59 9.9626 Tf 253.796 0 Td [(global)]TJ/F62 9.9626 Tf -249.91 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(character)-190(variable)]TJ/F67 9.9626 Tf 143.341 0 Td [(I)]TJ/F62 9.9626 Tf 5.23 0 Td [(gnor)18(e,)]TJ/F67 9.9626 Tf 29.808 0 Td [(W)]TJ/F62 9.9626 Tf 5.231 0 Td [(arning)-190(or)]TJ/F67 9.9626 Tf 42.111 0 Td [(A)]TJ/F62 9.9626 Tf 5.231 0 Td [(bort,)-202(default)]TJ/F67 9.9626 Tf 55.839 0 Td [(I)]TJ/F62 9.9626 Tf 5.231 0 Td [(gnor)18(e.)]TJ 0 g 0 G - 0 -19.925 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 535.932 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 535.733 Td [(out)]TJ +/F59 9.9626 Tf -316.929 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 19.925 0 Td [(the)-250(communication)-250(descriptor)-250(copy)111(.)]TJ -18.52 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 488.112 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 487.912 Td [(desc)]TJ -ET -q -1 0 0 1 333.945 488.112 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 337.084 487.912 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ + 0 -19.925 Td [(y)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -19.925 Td [(info)]TJ +/F62 9.9626 Tf 10.521 0 Td [(A)-200(logical)-200(mask)-200(which)-201(is)-200(tr)8(ue)-200(for)-200(all)-200(corr)18(esponding)-200(entries)-200(of)]TJ/F60 9.9626 Tf 260.812 0 Td [(x)]TJ/F62 9.9626 Tf 7.2 0 Td [(that)-200(ar)18(e)-200(owned)]TJ -253.626 -11.955 Td [(by)-250(the)-250(curr)18(ent)-250(pr)18(ocess)-250(Scope:)]TJ/F59 9.9626 Tf 131.027 0 Td [(local)]TJ/F62 9.9626 Tf -131.027 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(or)-250(rank)-250(one)-250(logical)-250(array)111(.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F59 9.9626 Tf -24.907 -19.925 Td [(info)]TJ 0 g 0 G - 141.968 -329.728 Td [(77)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -21.917 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.454 -19.926 Td [(1.)]TJ +0 g 0 G + [-500(This)-429(r)18(outine)-428(r)18(eturns)-429(a)]TJ/F67 9.9626 Tf 117.209 0 Td [(.true.)]TJ/F62 9.9626 Tf 35.654 0 Td [(value)-429(for)-428(those)-429(indices)-429(that)-429(ar)18(e)-428(strictly)]TJ -140.41 -11.955 Td [(owned)-250(by)-250(the)-250(curr)18(ent)-250(pr)18(ocess,)-250(excluding)-250(the)-250(halo)-250(indices)]TJ +0 g 0 G + 139.477 -140.438 Td [(101)]TJ 0 g 0 G ET endstream endobj -1401 0 obj +1665 0 obj << -/Length 2167 +/Length 3147 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.5)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(6.21)-1000(psb)]TJ ET q -1 0 0 1 198.238 706.328 cm +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 207.803 706.129 Td [(is)]TJ +ET +q +1 0 0 1 217.809 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(cdfree)-250(\227)-250(Frees)-250(a)-250(communication)-250(descriptor)]TJ +/F59 11.9552 Tf 221.396 706.129 Td [(local)-250(\227)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_cdfree\050desc_a,)-525(info\051)]TJ +/F67 9.9626 Tf -70.691 -18.964 Td [(call)-525(psb_is_local\050x,)-525(desc_a\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.925 Td [(desc)]TJ + 0 -19.925 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(Integer)-250(index.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(integer)74(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -31.88 Td [(desc)]TJ ET q -1 0 0 1 171.218 625.596 cm +1 0 0 1 171.218 545.895 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 625.397 Td [(a)]TJ +/F59 9.9626 Tf 174.207 545.696 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)-250(to)-250(be)-250(fr)18(eed.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 360.068 577.775 cm +1 0 0 1 360.068 498.074 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 363.206 577.576 Td [(desc)]TJ +/F67 9.9626 Tf 363.206 497.875 Td [(desc)]TJ ET q -1 0 0 1 384.755 577.775 cm +1 0 0 1 384.755 498.074 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 387.893 577.576 Td [(type)]TJ +/F67 9.9626 Tf 387.893 497.875 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G - 0 -19.925 Td [(info)]TJ + 0 -19.925 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F62 9.9626 Tf 72.777 0 Td [(A)-244(logical)-244(mask)-243(which)-244(is)-244(tr)8(ue)-244(if)]TJ/F60 9.9626 Tf 136.118 0 Td [(x)]TJ/F62 9.9626 Tf 7.635 0 Td [(is)-244(local)-244(to)-243(the)-244(curr)18(ent)-244(pr)18(ocess)]TJ -191.623 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ/F59 11.9552 Tf -71.651 -33.873 Td [(Notes)]TJ 0 g 0 G - 141.968 -397.474 Td [(78)]TJ +/F62 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(This)-199(r)18(outine)-200(r)18(eturns)-199(a)]TJ/F67 9.9626 Tf 108.069 0 Td [(.true.)]TJ/F62 9.9626 Tf 33.369 0 Td [(value)-199(for)-200(an)-199(index)-199(that)-200(is)-199(local)-200(to)-199(the)-199(curr)18(ent)]TJ -128.984 -11.955 Td [(pr)18(ocess,)-250(including)-250(the)-250(halo)-250(indices)]TJ +0 g 0 G + 139.476 -263.975 Td [(102)]TJ 0 g 0 G ET endstream endobj -1407 0 obj +1671 0 obj << -/Length 5710 +/Length 4785 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.6)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(6.22)-1000(psb)]TJ ET q -1 0 0 1 147.429 706.328 cm +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 156.993 706.129 Td [(local)]TJ +ET +q +1 0 0 1 183.605 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(cdbldext)-190(\227)-190(Build)-190(an)-190(extended)-190(communication)-190(descrip-)]TJ -24.221 -13.948 Td [(tor)]TJ +/F59 11.9552 Tf 187.192 706.129 Td [(index)-250(\227)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -26.9 -19.693 Td [(call)-525(psb_cdbldext\050a,desc_a,nl,desc_out,)-525(info,)-525(extype\051)]TJ/F54 9.9626 Tf 14.944 -23.422 Td [(This)-379(subr)18(outine)-379(builds)-379(an)-379(extended)-379(communication)-379(descriptor)74(,)-411(based)-379(on)]TJ -14.944 -11.955 Td [(the)-428(input)-428(descriptor)]TJ/F59 9.9626 Tf 95.499 0 Td [(desc_a)]TJ/F54 9.9626 Tf 35.646 0 Td [(and)-428(on)-428(the)-428(stencil)-428(speci\002ed)-428(thr)18(ough)-428(the)-427(input)]TJ -131.145 -11.955 Td [(sparse)-250(matrix)]TJ/F59 9.9626 Tf 62.107 0 Td [(a)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ +/F67 9.9626 Tf -87.297 -18.964 Td [(call)-525(psb_local_index\050y,)-525(x,)-525(desc_a,)-525(info\051)]TJ 0 g 0 G -/F51 9.9626 Tf -67.337 -21.054 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -21.429 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -21.43 Td [(a)]TJ + 0 -19.925 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(A)-250(sparse)-250(matrix)-250(Scope:)]TJ/F51 9.9626 Tf 100.691 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -107.326 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(type.)]TJ +/F62 9.9626 Tf 9.963 0 Td [(Integer)-250(indices.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in,)-250(inout)]TJ/F62 9.9626 Tf 38.735 0 Td [(.)]TJ -70.535 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(or)-250(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.429 Td [(desc)]TJ +/F59 9.9626 Tf -24.907 -31.88 Td [(desc)]TJ ET q -1 0 0 1 120.408 504.147 cm +1 0 0 1 120.408 545.895 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 503.948 Td [(a)]TJ +/F59 9.9626 Tf 123.397 545.696 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 309.258 456.326 cm +1 0 0 1 309.258 498.074 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 312.397 456.127 Td [(Tspmat)]TJ +/F67 9.9626 Tf 312.397 497.875 Td [(desc)]TJ ET q -1 0 0 1 344.406 456.326 cm +1 0 0 1 333.945 498.074 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 347.544 456.127 Td [(type)]TJ +/F67 9.9626 Tf 337.084 497.875 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -268.571 -21.43 Td [(nl)]TJ +/F59 9.9626 Tf -258.11 -19.925 Td [(iact)]TJ 0 g 0 G -/F54 9.9626 Tf 14.386 0 Td [(the)-250(number)-250(of)-250(additional)-250(layers)-250(desir)18(ed.)]TJ 10.521 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F52 9.9626 Tf 131.102 0 Td [(n)-25(l)]TJ/F83 10.3811 Tf 11.873 0 Td [(\025)]TJ/F54 9.9626 Tf 10.961 0 Td [(0.)]TJ +/F62 9.9626 Tf 21.021 0 Td [(speci\002es)-250(action)-250(to)-250(be)-250(taken)-250(in)-250(case)-250(of)-250(range)-250(err)18(ors.)-310(Scope:)]TJ/F59 9.9626 Tf 253.796 0 Td [(global)]TJ/F62 9.9626 Tf -249.91 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(character)-190(variable)]TJ/F67 9.9626 Tf 143.341 0 Td [(I)]TJ/F62 9.9626 Tf 5.23 0 Td [(gnor)18(e,)]TJ/F67 9.9626 Tf 29.808 0 Td [(W)]TJ/F62 9.9626 Tf 5.231 0 Td [(arning)-190(or)]TJ/F67 9.9626 Tf 42.111 0 Td [(A)]TJ/F62 9.9626 Tf 5.231 0 Td [(bort,)-202(default)]TJ/F67 9.9626 Tf 55.839 0 Td [(I)]TJ/F62 9.9626 Tf 5.231 0 Td [(gnor)18(e.)]TJ 0 g 0 G -/F51 9.9626 Tf -178.843 -21.43 Td [(extype)]TJ +/F59 9.9626 Tf -316.929 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 34.869 0 Td [(the)-250(kind)-250(of)-250(estension)-250(r)18(equir)18(ed.)]TJ -9.962 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 40.677 0 Td [(.)]TJ -64.677 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-345(as:)-501(an)-345(integer)-346(value)]TJ/F59 9.9626 Tf 136.676 0 Td [(psb_ovt_xhal_)]TJ/F54 9.9626 Tf 67.994 0 Td [(,)]TJ/F59 9.9626 Tf 6.169 0 Td [(psb_ovt_asov_)]TJ/F54 9.9626 Tf 67.994 0 Td [(,)-369(default:)]TJ/F59 9.9626 Tf -278.833 -11.955 Td [(psb_ovt_xhal_)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -23.422 Td [(On)-250(Return)]TJ + 0 -19.925 Td [(y)]TJ 0 g 0 G +/F62 9.9626 Tf 10.521 0 Td [(A)-270(logical)-270(mask)-270(which)-271(is)-270(tr)8(ue)-270(for)-270(all)-270(corr)18(esponding)-270(entries)-270(of)]TJ/F60 9.9626 Tf 268.484 0 Td [(x)]TJ/F62 9.9626 Tf 7.897 0 Td [(that)-270(ar)18(e)-270(local)]TJ -261.995 -11.955 Td [(to)-250(the)-250(curr)18(ent)-250(pr)18(ocess)-250(Scope:)]TJ/F59 9.9626 Tf 128.666 0 Td [(local)]TJ/F62 9.9626 Tf -128.666 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(or)-250(rank)-250(one)-250(logical)-250(array)111(.)]TJ 0 g 0 G - 0 -21.43 Td [(desc)]TJ +/F59 9.9626 Tf -24.907 -19.925 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -21.917 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.454 -19.926 Td [(1.)]TJ +0 g 0 G + [-500(This)-264(r)18(outine)-265(r)18(eturns)-264(a)]TJ/F67 9.9626 Tf 110.663 0 Td [(.true.)]TJ/F62 9.9626 Tf 34.017 0 Td [(value)-264(for)-265(those)-264(indices)-265(that)-264(ar)18(e)-265(local)-264(to)-265(the)]TJ -132.227 -11.955 Td [(curr)18(ent)-250(pr)18(ocess,)-250(including)-250(the)-250(halo)-250(indices.)]TJ +0 g 0 G + 139.477 -140.438 Td [(103)]TJ +0 g 0 G +ET + +endstream +endobj +1678 0 obj +<< +/Length 3647 +>> +stream +0 g 0 G +0 g 0 G +BT +/F59 11.9552 Tf 150.705 706.129 Td [(6.23)-1000(psb)]TJ ET q -1 0 0 1 120.408 261.018 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 207.803 706.129 Td [(get)]TJ +ET +q +1 0 0 1 225.126 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 123.397 260.819 Td [(out)]TJ +/F59 11.9552 Tf 228.712 706.129 Td [(boundary)-250(\227)-250(Extract)-250(list)-250(of)-250(boundary)-250(elements)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -78.007 -18.964 Td [(call)-525(psb_get_boundary\050bndel,)-525(desc,)-525(info\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(desc)]TJ 0 g 0 G -/F54 9.9626 Tf 19.925 0 Td [(the)-250(extended)-250(communication)-250(descriptor)74(.)]TJ -18.52 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 309.258 213.198 cm +1 0 0 1 360.068 577.775 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 312.397 212.998 Td [(desc)]TJ +/F67 9.9626 Tf 363.206 577.576 Td [(desc)]TJ ET q -1 0 0 1 333.945 213.198 cm +1 0 0 1 384.755 577.775 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 337.084 212.998 Td [(type)]TJ +/F67 9.9626 Tf 387.893 577.576 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -21.429 Td [(info)]TJ +/F59 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -23.422 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 166.875 -29.888 Td [(79)]TJ + 0 -19.925 Td [(bndel)]TJ 0 g 0 G -ET - -endstream -endobj -1412 0 obj -<< -/Length 1484 ->> -stream +/F62 9.9626 Tf 31.541 0 Td [(The)-307(list)-307(of)-307(boundary)-307(elements)-307(on)-306(the)-307(calling)-307(pr)18(ocess,)-321(in)-307(local)-307(number)18(-)]TJ -6.635 -11.955 Td [(ing.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(Speci\002ed)-234(as:)-302(a)-234(rank)-234(one)-234(a)-1(r)1(ray)-235(with)-234(the)-234(ALLOCA)74(T)74(ABLE)-234(attribute,)-237(of)-234(type)]TJ 0 -11.955 Td [(integer)74(.)]TJ 0 g 0 G +/F59 9.9626 Tf -24.906 -31.881 Td [(info)]TJ 0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 163.158 706.129 Td [(1.)]TJ +/F62 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ 0 g 0 G - [-500(Specifying)]TJ/F59 9.9626 Tf 61.745 0 Td [(psb_ovt_xhal_)]TJ/F54 9.9626 Tf 70.881 0 Td [(for)-290(the)]TJ/F59 9.9626 Tf 32.282 0 Td [(extype)]TJ/F54 9.9626 Tf 34.269 0 Td [(ar)18(gument)-290(the)-289(user)-290(will)-290(obtain)]TJ -186.724 -11.955 Td [(a)-400(descriptor)-400(for)-400(a)-400(domain)-400(partition)-400(in)-400(which)-400(the)-400(additional)-400(layers)-400(ar)18(e)]TJ 0 -11.955 Td [(fetched)-222(as)-221(part)-222(of)-221(an)-222(\050extended\051)-221(halo;)-232(however)-221(the)-222(index-to-pr)18(ocess)-221(map-)]TJ 0 -11.956 Td [(ping)-250(is)-250(identical)-250(to)-250(that)-250(of)-250(the)-250(base)-250(descriptor;)]TJ + [-500(If)-219(ther)18(e)-220(ar)18(e)-219(no)-220(boundary)-219(elements)-219(\050i.e.,)-226(if)-219(the)-220(local)-219(part)-219(of)-220(the)-219(connectivity)]TJ 12.453 -11.956 Td [(graph)-391(is)-392(self-contained\051)-391(the)-392(output)-391(vector)-391(is)-392(set)-391(to)-391(the)-392(\223not)-391(allocated\224)]TJ 0 -11.955 Td [(state.)]TJ 0 g 0 G -12.453 -19.925 Td [(2.)]TJ 0 g 0 G - [-500(Specifying)]TJ/F59 9.9626 Tf 61.745 0 Td [(psb_ovt_asov_)]TJ/F54 9.9626 Tf 70.881 0 Td [(for)-290(the)]TJ/F59 9.9626 Tf 32.282 0 Td [(extype)]TJ/F54 9.9626 Tf 34.269 0 Td [(ar)18(gument)-290(the)-289(user)-290(will)-290(obtain)]TJ -186.724 -11.955 Td [(a)-330(descriptor)-331(with)-330(an)-330(overlapped)-331(decomposition:)-470(the)-331(additional)-330(layer)-330(is)]TJ 0 -11.955 Td [(aggr)18(egated)-326(to)-326(the)-326(local)-326(subdomain)-326(\050and)-326(thus)-326(is)-325(an)-326(overlap\051,)-345(and)-326(a)-326(new)]TJ 0 -11.955 Td [(halo)-250(extending)-250(beyond)-250(the)-250(last)-250(additional)-250(layer)-250(is)-250(formed.)]TJ + [-500(Otherwise)-206(the)-205(size)-206(of)]TJ/F67 9.9626 Tf 105.891 0 Td [(bndel)]TJ/F62 9.9626 Tf 28.201 0 Td [(will)-206(be)-205(exactly)-206(equal)-206(to)-206(the)-205(number)-206(of)-206(bound-)]TJ -121.639 -11.955 Td [(ary)-250(elements.)]TJ 0 g 0 G - 141.968 -524.035 Td [(80)]TJ + 139.477 -196.229 Td [(104)]TJ 0 g 0 G ET endstream endobj -1420 0 obj +1685 0 obj << -/Length 5699 +/Length 3458 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.7)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(6.24)-1000(psb)]TJ ET q -1 0 0 1 147.429 706.328 cm +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 156.993 706.129 Td [(get)]TJ +ET +q +1 0 0 1 174.316 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(spall)-250(\227)-250(Allocates)-250(a)-250(sparse)-250(matrix)]TJ +/F59 11.9552 Tf 177.903 706.129 Td [(overlap)-250(\227)-250(Extract)-250(list)-250(of)-250(overlap)-250(elements)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -51.121 -19.277 Td [(call)-525(psb_spall\050a,)-525(desc_a,)-525(info)-525([,)-525(nnz,)-525(dupl,)-525(bldmode]\051)]TJ +/F67 9.9626 Tf -78.008 -18.964 Td [(call)-525(psb_get_overlap\050ovrel,)-525(desc,)-525(info\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.403 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.571 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -20.572 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 623.505 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 623.306 Td [(a)]TJ + 0 -19.925 Td [(desc)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 309.258 575.684 cm +1 0 0 1 309.258 577.775 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 312.397 575.485 Td [(desc)]TJ +/F67 9.9626 Tf 312.397 577.576 Td [(desc)]TJ ET q -1 0 0 1 333.945 575.684 cm +1 0 0 1 333.945 577.775 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 337.084 575.485 Td [(type)]TJ +/F67 9.9626 Tf 337.084 577.576 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -20.572 Td [(nnz)]TJ +/F59 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 22.137 0 Td [(An)-230(estimate)-230(of)-230(the)-230(number)-230(of)-231(nonzer)18(oes)-230(in)-230(the)-230(local)-230(part)-230(of)-230(the)-230(assembled)]TJ 2.77 -11.955 Td [(matrix.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.572 Td [(dupl)]TJ + 0 -19.925 Td [(ovrel)]TJ 0 g 0 G -/F54 9.9626 Tf 26.561 0 Td [(How)-250(to)-250(handle)-250(duplicate)-250(coef)18(\002cients.)]TJ -1.654 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-243(as:)-306(integer)74(,)-244(possible)-243(values:)]TJ/F59 9.9626 Tf 164.941 0 Td [(psb_dupl_ovwrt_)]TJ/F54 9.9626 Tf 78.455 0 Td [(,)]TJ/F59 9.9626 Tf 4.923 0 Td [(psb_dupl_add_)]TJ/F54 9.9626 Tf 67.995 0 Td [(,)]TJ/F59 9.9626 Tf -316.314 -11.955 Td [(psb_dupl_err_)]TJ/F54 9.9626 Tf 67.994 0 Td [(.)]TJ +/F62 9.9626 Tf 28.234 0 Td [(The)-250(list)-250(of)-250(overlap)-250(elements)-250(on)-250(the)-250(calling)-250(pr)18(ocess,)-250(in)-250(local)-250(numbering.)]TJ -3.327 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-234(as:)-302(a)-234(rank)-234(one)-234(array)-235(with)-234(the)-234(ALLOCA)74(T)74(ABLE)-234(attribute,)-237(of)-234(type)]TJ 0 -11.955 Td [(integer)74(.)]TJ 0 g 0 G -/F51 9.9626 Tf -92.901 -20.572 Td [(bldmode)]TJ +/F59 9.9626 Tf -24.907 -31.88 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 45.938 0 Td [(Whether)-372(to)-372(kee)1(p)-372(track)-372(of)-372(matrix)-372(entries)-371(that)-372(do)-372(not)-372(belong)-371(to)-372(the)]TJ -21.031 -11.955 Td [(curr)18(ent)-250(pr)18(ocess.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-190(as:)-280(an)-190(integer)-190(value)]TJ/F59 9.9626 Tf 128.287 0 Td [(psb_matbld_noremote_)]TJ/F54 9.9626 Tf 104.607 0 Td [(,)]TJ/F59 9.9626 Tf 4.503 0 Td [(psb_matbld_remote_)]TJ/F54 9.9626 Tf 94.146 0 Td [(.)]TJ -331.543 -11.955 Td [(Default:)]TJ/F59 9.9626 Tf 38.515 0 Td [(psb_matbld_noremote_)]TJ/F54 9.9626 Tf 104.607 0 Td [(.)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ 0 g 0 G -/F51 9.9626 Tf -168.029 -22.402 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ 0 g 0 G + [-500(If)-287(ther)18(e)-287(ar)18(e)-287(no)-287(overlap)-287(elements)-287(the)-287(output)-287(vector)-287(is)-287(set)-287(to)-287(the)-287(\223not)-287(allo-)]TJ 12.453 -11.955 Td [(cated\224)-250(state.)]TJ 0 g 0 G - 0 -20.572 Td [(a)]TJ + -12.453 -19.926 Td [(2.)]TJ +0 g 0 G + [-500(Otherwise)-194(the)-194(size)-195(of)]TJ/F67 9.9626 Tf 105.434 0 Td [(ovrel)]TJ/F62 9.9626 Tf 28.087 0 Td [(will)-194(be)-194(exactly)-195(equal)-194(to)-194(the)-194(number)-195(of)-194(overlap)]TJ -121.068 -11.955 Td [(elements.)]TJ +0 g 0 G + 139.477 -220.139 Td [(105)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(matrix)-250(to)-250(be)-250(allocated.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf -28.343 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf -24 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET + +endstream +endobj +1692 0 obj +<< +/Length 5480 +>> +stream +0 g 0 G +0 g 0 G +BT +/F59 11.9552 Tf 150.705 706.129 Td [(6.25)-1000(psb)]TJ ET q -1 0 0 1 309.258 231.892 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 312.397 231.692 Td [(Tspmat)]TJ +/F59 11.9552 Tf 207.803 706.129 Td [(sp)]TJ ET q -1 0 0 1 344.406 231.892 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 221.133 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 347.544 231.692 Td [(type)]TJ +/F59 11.9552 Tf 224.719 706.129 Td [(getrow)-250(\227)-250(Extract)-250(row\050s\051)-250(from)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -268.571 -20.571 Td [(info)]TJ +/F67 9.9626 Tf -74.014 -19.204 Td [(call)-525(psb_sp_getrow\050row,)-525(a,)-525(nz,)-525(ia,)-525(ja,)-525(val,)-525(info,)-525(&)]TJ 73.225 -11.955 Td [(&)-525(append,)-525(nzin,)-525(lrw\051)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -22.564 Td [(Notes)]TJ +/F59 9.9626 Tf -73.225 -22.29 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -20.41 Td [(1.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - [-500(On)-250(exit)-250(fr)18(om)-250(this)-250(r)18(outine)-250(the)-250(sparse)-250(matrix)-250(is)-250(in)-250(the)-250(build)-250(state.)]TJ +/F59 9.9626 Tf -29.828 -20.42 Td [(On)-250(Entry)]TJ 0 g 0 G - 154.421 -29.888 Td [(81)]TJ 0 g 0 G + 0 -20.421 Td [(row)]TJ +0 g 0 G +/F62 9.9626 Tf 22.695 0 Td [(The)-250(\050\002rst\051)-250(r)18(ow)-250(to)-250(be)-250(extracted.)]TJ 2.212 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf -28.343 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(required)]TJ/F62 9.9626 Tf -23.999 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)]TJ/F69 10.3811 Tf 104.322 0 Td [(>)]TJ/F62 9.9626 Tf 10.962 0 Td [(0.)]TJ +0 g 0 G +/F59 9.9626 Tf -140.191 -20.42 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(matrix)-250(fr)18(om)-250(which)-250(to)-250(get)-250(r)18(ows.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf -28.343 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(required)]TJ/F62 9.9626 Tf -23.999 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.137 0 Td [(psb)]TJ ET - -endstream -endobj -1425 0 obj -<< -/Length 1141 ->> -stream +q +1 0 0 1 360.068 495.976 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 363.206 495.777 Td [(Tspmat)]TJ +ET +q +1 0 0 1 395.216 495.976 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 398.354 495.777 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -268.57 -20.421 Td [(append)]TJ +0 g 0 G +/F62 9.9626 Tf 39.292 0 Td [(Whether)-250(to)-250(append)-250(or)-250(overwrite)-250(existing)-250(output.)]TJ -14.386 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf -28.344 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf -24 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(value)-250(default:)-310(false)-250(\050overwrite\051.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -20.421 Td [(nzin)]TJ 0 g 0 G +/F62 9.9626 Tf 25.454 0 Td [(Input)-250(size)-250(to)-250(be)-250(appended)-250(to.)]TJ -0.548 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf -28.344 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf -24 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-304(as:)-417(an)-303(integer)]TJ/F69 10.3811 Tf 106.988 0 Td [(>)]TJ/F62 9.9626 Tf 11.949 0 Td [(0.)-471(When)-303(append)-303(is)-304(tr)8(ue,)-317(speci\002es)-303(how)-304(many)]TJ -118.937 -11.955 Td [(entries)-250(in)-250(the)-250(output)-250(vectors)-250(ar)18(e)-250(alr)18(eady)-250(\002lled.)]TJ 0 g 0 G +/F59 9.9626 Tf -24.906 -20.421 Td [(lrw)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 163.158 706.129 Td [(2.)]TJ +/F62 9.9626 Tf 20.473 0 Td [(The)-250(last)-250(r)18(ow)-250(to)-250(be)-250(extracted.)]TJ 4.433 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf -28.344 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf -24 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)]TJ/F69 10.3811 Tf 104.323 0 Td [(>)]TJ/F62 9.9626 Tf 10.962 0 Td [(0,)-250(default:)]TJ/F60 9.9626 Tf 46.878 0 Td [(r)-17(o)-35(w)]TJ/F62 9.9626 Tf 16.134 0 Td [(.)]TJ 0 g 0 G - [-500(The)-250(descriptor)-250(may)-250(be)-250(in)-250(either)-250(the)-250(build)-250(or)-250(assembled)-250(state.)]TJ +/F59 9.9626 Tf -203.203 -22.29 Td [(On)-250(Return)]TJ 0 g 0 G - 0 -19.925 Td [(3.)]TJ 0 g 0 G - [-500(Pr)18(oviding)-219(a)-219(good)-219(estimate)-218(for)-219(the)-219(number)-219(of)-219(nonzer)18(oes)]TJ/F52 9.9626 Tf 255.761 0 Td [(n)-25(n)-25(z)]TJ/F54 9.9626 Tf 18.305 0 Td [(in)-219(the)-219(assem-)]TJ -261.613 -11.956 Td [(bled)-295(matri)1(x)-295(may)-294(substantially)-295(impr)18(ove)-294(performance)-295(in)-294(the)-295(matrix)-294(build)]TJ 0 -11.955 Td [(phase,)-370(as)-346(it)-346(will)-345(r)18(educe)-346(or)-346(eliminate)-346(the)-346(need)-346(for)-345(\050potentially)-346(multiple\051)]TJ 0 -11.955 Td [(data)-250(r)18(eallocations;)]TJ + 0 -20.42 Td [(nz)]TJ +0 g 0 G +/F62 9.9626 Tf 16.05 0 Td [(the)-250(number)-250(of)-250(elements)-250(r)18(eturned)-250(by)-250(this)-250(call.)]TJ 8.856 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(Returned)-250(as:)-310(an)-250(integer)-250(scalar)74(.)]TJ 0 g 0 G - -12.453 -19.925 Td [(4.)]TJ +/F59 9.9626 Tf -24.906 -20.421 Td [(ia)]TJ 0 g 0 G - [-500(Using)]TJ/F59 9.9626 Tf 41.798 0 Td [(psb_matbld_remote_)]TJ/F54 9.9626 Tf 97.28 0 Td [(is)-315(likel)1(y)-315(to)-315(cause)-314(a)-315(r)8(untime)-314(over)18(head)-315(at)-314(as-)]TJ -126.625 -11.955 Td [(sembly)-250(time;)]TJ +/F62 9.9626 Tf 13.28 0 Td [(the)-250(r)18(ow)-250(indices.)]TJ 11.626 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(with)-250(the)]TJ/F67 9.9626 Tf 169.114 0 Td [(ALLOCATABLE)]TJ/F62 9.9626 Tf 60.025 0 Td [(attribute.)]TJ 0 g 0 G - 141.968 -528.02 Td [(82)]TJ + -89.662 -29.887 Td [(106)]TJ 0 g 0 G ET endstream endobj -1433 0 obj +1696 0 obj << -/Length 5375 +/Length 3529 >> stream 0 g 0 G 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.8)-1000(psb)]TJ -ET -q -1 0 0 1 147.429 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 151.016 706.129 Td [(spins)-233(\227)-233(Insert)-233(a)-233(set)-233(of)-234(coef)18(\002cients)-233(into)-233(a)-233(sparse)-233(matrix)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -51.121 -20.373 Td [(call)-525(psb_spins\050nz,)-525(ia,)-525(ja,)-525(val,)-525(a,)-525(desc_a,)-525(info)-525([,local]\051)]TJ 0 -11.956 Td [(call)-525(psb_spins\050nr,)-525(irw,)-525(irp,)-525(ja,)-525(val,)-525(a,)-525(desc_a,)-525(info)-525([,local]\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -24.099 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -22.835 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -22.834 Td [(nz)]TJ -0 g 0 G -/F54 9.9626 Tf 16.05 0 Td [(the)-250(number)-250(of)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 8.857 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(scalar)74(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.834 Td [(nr)]TJ +BT +/F59 9.9626 Tf 99.895 706.129 Td [(ja)]TJ 0 g 0 G -/F54 9.9626 Tf 14.944 0 Td [(the)-250(number)-250(of)-250(r)18(ows)-250(to)-250(be)-250(inserted.)]TJ 9.963 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(scalar)74(.)]TJ +/F62 9.9626 Tf 13.281 0 Td [(the)-250(column)-250(indices)-250(of)-250(the)-250(elements)-250(to)-250(be)-250(inserted.)]TJ 11.626 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(with)-250(the)]TJ/F67 9.9626 Tf 169.114 0 Td [(ALLOCATABLE)]TJ/F62 9.9626 Tf 60.024 0 Td [(attribute.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.834 Td [(irw)]TJ +/F59 9.9626 Tf -254.045 -19.925 Td [(val)]TJ 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(the)-250(\002rst)-250(r)18(ow)-250(to)-250(be)-250(inserted.)]TJ 4.434 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(scalar)74(.)]TJ +/F62 9.9626 Tf 18.82 0 Td [(the)-250(elements)-250(to)-250(be)-250(inserted.)]TJ 6.087 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(r)18(eal)-250(array)-250(with)-250(the)]TJ/F67 9.9626 Tf 148.761 0 Td [(ALLOCATABLE)]TJ/F62 9.9626 Tf 60.024 0 Td [(attribute.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.834 Td [(ia)]TJ +/F59 9.9626 Tf -233.692 -19.925 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 13.281 0 Td [(the)-250(r)18(ow)-250(indices)-250(of)-250(the)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 11.626 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(size)]TJ/F52 9.9626 Tf 160.8 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.336 0 Td [(.)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ 0 g 0 G -/F51 9.9626 Tf -196.043 -22.834 Td [(irp)]TJ +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ 0 g 0 G -/F54 9.9626 Tf 18.262 0 Td [(the)-250(r)18(ow)-250(pointers)-250(of)-250(the)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 6.645 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(size)]TJ/F52 9.9626 Tf 160.8 0 Td [(n)-15(r)]TJ/F85 10.3811 Tf 11.85 0 Td [(+)]TJ/F54 9.9626 Tf 10.131 0 Td [(1.)]TJ + [-500(The)-307(output)]TJ/F60 9.9626 Tf 65.308 0 Td [(n)-25(z)]TJ/F62 9.9626 Tf 13.394 0 Td [(is)-307(always)-307(the)-307(size)-307(of)-307(the)-307(output)-307(generated)-307(by)-306(the)-307(curr)18(ent)]TJ -66.249 -11.955 Td [(call;)-283(thus,)-277(if)]TJ/F67 9.9626 Tf 53.971 0 Td [(append=.true.)]TJ/F62 9.9626 Tf 67.994 0 Td [(,)-278(the)-271(total)-272(output)-272(size)-272(will)-272(be)]TJ/F60 9.9626 Tf 129.372 0 Td [(n)-25(z)-18(i)-32(n)]TJ/F93 10.3811 Tf 21.286 0 Td [(+)]TJ/F60 9.9626 Tf 10.336 0 Td [(n)-25(z)]TJ/F62 9.9626 Tf 10.337 0 Td [(,)-277(with)]TJ -293.296 -11.955 Td [(the)-292(newly)-293(extracted)-292(coef)18(\002cients)-293(stor)18(ed)-292(in)-293(entries)]TJ/F67 9.9626 Tf 217.177 0 Td [(nzin+1:nzin+nz)]TJ/F62 9.9626 Tf 76.139 0 Td [(of)-292(the)]TJ -293.316 -11.955 Td [(array)-250(ar)18(guments;)]TJ 0 g 0 G -/F51 9.9626 Tf -207.688 -22.835 Td [(ja)]TJ + -12.453 -19.926 Td [(2.)]TJ 0 g 0 G -/F54 9.9626 Tf 13.28 0 Td [(the)-250(column)-250(indices)-250(of)-250(the)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 11.627 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(size)]TJ/F52 9.9626 Tf 160.8 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.336 0 Td [(.)]TJ + [-500(When)]TJ/F67 9.9626 Tf 41.275 0 Td [(append=.true.)]TJ/F62 9.9626 Tf 70.485 0 Td [(the)-250(output)-250(arrays)-250(ar)18(e)-250(r)18(eallocated)-250(as)-250(necessary;)]TJ 0 g 0 G -/F51 9.9626 Tf -196.043 -22.835 Td [(val)]TJ + -111.76 -19.925 Td [(3.)]TJ 0 g 0 G -/F54 9.9626 Tf 18.82 0 Td [(the)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 6.087 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-315(as:)-439(an)-314(array)-315(of)-315(size)]TJ/F52 9.9626 Tf 131.853 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.337 0 Td [(.)-504(Must)-314(be)-315(of)-315(the)-314(same)-315(type)-315(and)-314(kind)-315(of)]TJ -142.19 -11.956 Td [(the)-250(coef)18(\002cients)-250(of)-250(the)-250(sparse)-250(matrix)]TJ/F52 9.9626 Tf 157.901 0 Td [(a)]TJ/F54 9.9626 Tf 4.548 0 Td [(.)]TJ + [-500(The)-218(r)18(ow)-218(and)-219(column)-218(indices)-218(ar)18(e)-218(r)18(eturned)-218(in)-219(the)-218(local)-218(numbering)-218(scheme;)]TJ 12.453 -11.955 Td [(if)-190(the)-190(global)-190(numbering)-190(is)-190(desir)18(ed,)-202(the)-190(user)-190(may)-190(employ)-190(the)]TJ/F67 9.9626 Tf 258.836 0 Td [(psb_loc_to_glob)]TJ/F62 9.9626 Tf -258.836 -11.955 Td [(r)18(outine)-250(on)-250(the)-250(output.)]TJ 0 g 0 G - -20.481 -29.887 Td [(83)]TJ + 139.477 -290.909 Td [(107)]TJ 0 g 0 G ET endstream endobj -1439 0 obj +1706 0 obj << -/Length 6861 +/Length 3995 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(desc)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(6.26)-1000(psb)]TJ ET q -1 0 0 1 171.218 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 174.207 706.129 Td [(a)]TJ +/F59 11.9552 Tf 207.803 706.129 Td [(sizeof)-250(\227)-250(Memory)-250(occupation)]TJ/F62 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(memory)-250(occupation)-250(of)-250(a)-250(PSBLAS)-250(object.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.381 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 136.328 0 Td [(psb)]TJ -ET -q -1 0 0 1 328.257 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 331.395 658.308 Td [(desc)]TJ -ET -q -1 0 0 1 352.944 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 356.083 658.308 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F67 9.9626 Tf 0 -21.918 Td [(isz)-525(=)-525(psb_sizeof\050a\051)]TJ 0 -11.955 Td [(isz)-525(=)-525(psb_sizeof\050desc_a\051)]TJ 0 -11.955 Td [(isz)-525(=)-525(psb_sizeof\050prec\051)]TJ 0 g 0 G -/F51 9.9626 Tf -226.299 -33.398 Td [(local)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 26.56 0 Td [(Whether)-207(the)-207(entries)-207(in)-207(the)-208(indices)-207(vectors)]TJ/F59 9.9626 Tf 181.487 0 Td [(ia)]TJ/F54 9.9626 Tf 10.46 0 Td [(,)]TJ/F59 9.9626 Tf 4.64 0 Td [(ja)]TJ/F54 9.9626 Tf 12.524 0 Td [(ar)18(e)-207(alr)18(eady)-207(in)-207(local)-208(num-)]TJ -210.765 -11.956 Td [(bering.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.187 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(value;)-250(default:)]TJ/F59 9.9626 Tf 162.678 0 Td [(.false.)]TJ/F54 9.9626 Tf 36.613 0 Td [(.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -224.197 -23.056 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -21.444 Td [(a)]TJ + 0 -19.925 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(matrix)-250(into)-250(which)-250(coef)18(\002cients)-250(will)-250(be)-250(inserted.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf -28.344 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf -24 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(A)-250(sparse)-250(matrix)]TJ/F60 9.9626 Tf 72.97 0 Td [(A)]TJ/F62 9.9626 Tf 7.318 0 Td [(.)]TJ -65.344 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.137 0 Td [(psb)]TJ ET q -1 0 0 1 360.068 484.968 cm +1 0 0 1 360.068 531.947 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 363.206 484.768 Td [(Tspmat)]TJ +/F67 9.9626 Tf 363.206 531.748 Td [(Tspmat)]TJ ET q -1 0 0 1 395.216 484.968 cm +1 0 0 1 395.216 531.947 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 398.354 484.768 Td [(type)]TJ +/F67 9.9626 Tf 398.354 531.748 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -268.57 -21.443 Td [(desc)]TJ +/F59 9.9626 Tf -268.57 -19.925 Td [(desc)]TJ ET q -1 0 0 1 171.218 463.524 cm +1 0 0 1 171.218 512.022 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 463.325 Td [(a)]TJ +/F59 9.9626 Tf 174.207 511.823 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.381 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.962 0 Td [(Communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 136.328 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 328.257 415.704 cm +1 0 0 1 360.068 464.201 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 331.395 415.504 Td [(desc)]TJ +/F67 9.9626 Tf 363.206 464.002 Td [(desc)]TJ ET q -1 0 0 1 352.944 415.704 cm +1 0 0 1 384.755 464.201 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 356.083 415.504 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F67 9.9626 Tf 387.893 464.002 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -226.299 -33.398 Td [(info)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.906 -23.436 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.453 -21.064 Td [(1.)]TJ -0 g 0 G - [-500(On)-312(entry)-312(to)-312(this)-312(r)18(out)1(ine)-312(the)-312(descriptor)-312(may)-312(be)-312(in)-312(either)-312(the)-311(build)-312(or)-312(as-)]TJ 12.453 -11.955 Td [(sembled)-250(state.)]TJ -0 g 0 G - -12.453 -21.443 Td [(2.)]TJ -0 g 0 G - [-500(On)-314(entry)-315(to)-314(this)-315(r)18(ou)1(tine)-315(the)-314(sparse)-315(matrix)-314(may)-314(be)-315(in)-314(either)-314(the)-315(build)-314(or)]TJ 12.453 -11.955 Td [(update)-250(state.)]TJ -0 g 0 G - -12.453 -21.444 Td [(3.)]TJ -0 g 0 G - [-500(If)-263(the)-263(descriptor)-263(is)-262(in)-263(the)-263(build)-263(state,)-266(then)-263(the)-263(sparse)-263(matrix)-262(must)-263(also)-263(be)]TJ 12.453 -11.955 Td [(in)-212(the)-212(build)-213(state;)-224(the)-213(action)-212(of)-212(the)-212(r)18(outine)-212(is)-213(to)-212(\050implicitly\051)-212(call)]TJ/F59 9.9626 Tf 271.732 0 Td [(psb_cdins)]TJ/F54 9.9626 Tf -271.732 -11.955 Td [(to)-259(add)-259(entries)-259(to)-259(the)-259(sparsity)-259(pattern;)-263(each)-259(sparse)-259(matrix)-259(entry)-259(implicitly)]TJ 0 -11.955 Td [(de\002nes)-288(a)-288(graph)-288(edge,)-297(that)-288(is)-288(passed)-288(to)-288(the)-288(descriptor)-288(r)18(outine)-288(for)-288(the)-288(ap-)]TJ 0 -11.955 Td [(pr)18(opriate)-250(pr)18(ocessing;)]TJ -0 g 0 G - -12.453 -21.444 Td [(4.)]TJ -0 g 0 G - [-500(The)-250(input)-250(data)-250(can)-250(be)-250(passed)-250(in)-250(either)-250(COO)-250(or)-250(CSR)-250(formats;)]TJ -0 g 0 G - 0 -21.443 Td [(5.)]TJ -0 g 0 G - [-500(In)-307(COO)-307(format)-307(the)-306(coef)18(\002cients)-307(to)-307(be)-307(inserted)-307(ar)18(e)-307(r)18(epr)18(esented)-306(by)-307(the)-307(or)18(-)]TJ 12.453 -11.955 Td [(der)18(ed)-194(triples)]TJ/F52 9.9626 Tf 57.352 0 Td [(i)-47(a)]TJ/F85 10.3811 Tf 7.911 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(,)]TJ/F52 9.9626 Tf 4.624 0 Td [(j)-40(a)]TJ/F85 10.3811 Tf 7.84 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.149 0 Td [(,)]TJ/F52 9.9626 Tf 4.276 0 Td [(v)-40(a)-25(l)]TJ/F85 10.3811 Tf 13.37 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(,)-205(for)]TJ/F52 9.9626 Tf 19.208 0 Td [(i)]TJ/F85 10.3811 Tf 5.856 0 Td [(=)]TJ/F54 9.9626 Tf 10.961 0 Td [(1,)-179(.)-192(.)-191(.)-180(,)]TJ/F52 9.9626 Tf 26.608 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.337 0 Td [(;)-212(these)-194(triples)-194(ar)18(e)-193(arbitrary;)]TJ -0 g 0 G - -60.701 -29.888 Td [(84)]TJ +/F59 9.9626 Tf -258.11 -19.925 Td [(prec)]TJ 0 g 0 G +/F62 9.9626 Tf 24.348 0 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -30.874 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 196.511 0 Td [(psb)]TJ ET - -endstream -endobj -1448 0 obj -<< -/Length 4535 ->> -stream -0 g 0 G -0 g 0 G -0 g 0 G +q +1 0 0 1 388.441 408.41 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F54 9.9626 Tf 112.349 706.129 Td [(6.)]TJ -0 g 0 G - [-500(In)-272(CSR)-271(format)-272(the)-271(coef)18(\002cients)-272(to)-271(be)-272(inserted)-272(for)-271(each)-272(input)-271(r)18(ow)]TJ/F52 9.9626 Tf 294.598 0 Td [(i)]TJ/F85 10.3811 Tf 6.254 0 Td [(=)]TJ/F54 9.9626 Tf 11.36 0 Td [(1,)]TJ/F52 9.9626 Tf 9.257 0 Td [(n)-15(r)]TJ/F54 9.9626 Tf -309.016 -11.955 Td [(ar)18(e)-311(r)18(epr)18(esented)-312(by)-311(the)-311(or)18(der)18(ed)-312(triples)]TJ/F85 10.3811 Tf 171.689 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 5.251 0 Td [(+)]TJ/F52 9.9626 Tf 10.413 0 Td [(i)-22(r)-35(w)]TJ/F83 10.3811 Tf 16.818 0 Td [(\000)]TJ/F54 9.9626 Tf 10.358 0 Td [(1)]TJ/F85 10.3811 Tf 5.106 0 Td [(\051)]TJ/F54 9.9626 Tf 4.149 0 Td [(,)]TJ/F52 9.9626 Tf 4.624 0 Td [(j)-40(a)]TJ/F85 10.3811 Tf 7.841 0 Td [(\050)]TJ/F52 9.9626 Tf 4.622 0 Td [(j)]TJ/F85 10.3811 Tf 3.019 0 Td [(\051)]TJ/F54 9.9626 Tf 4.149 0 Td [(,)]TJ/F52 9.9626 Tf 4.276 0 Td [(v)-40(a)-25(l)]TJ/F85 10.3811 Tf 13.37 0 Td [(\050)]TJ/F52 9.9626 Tf 4.622 0 Td [(j)]TJ/F85 10.3811 Tf 3.019 0 Td [(\051)]TJ/F54 9.9626 Tf 4.149 0 Td [(,)-327(for)]TJ/F52 9.9626 Tf 22.013 0 Td [(j)]TJ/F85 10.3811 Tf 6.917 0 Td [(=)]TJ/F52 9.9626 Tf -310.555 -11.955 Td [(i)-22(r)-90(p)]TJ/F85 10.3811 Tf 12.991 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(,)-179(.)-192(.)-191(.)-180(,)]TJ/F52 9.9626 Tf 21.557 0 Td [(i)-22(r)-90(p)]TJ/F85 10.3811 Tf 12.991 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 5.301 0 Td [(+)]TJ/F54 9.9626 Tf 10.407 0 Td [(1)]TJ/F85 10.3811 Tf 5.106 0 Td [(\051)]TJ/F83 10.3811 Tf 6.486 0 Td [(\000)]TJ/F54 9.9626 Tf 10.407 0 Td [(1;)-362(these)-325(triples)-324(should)-325(belong)-325(to)-324(the)-325(curr)18(ent)-325(pr)18(o-)]TJ -100.947 -11.956 Td [(cess,)-276(i.e.)]TJ/F52 9.9626 Tf 39.307 0 Td [(i)]TJ/F85 10.3811 Tf 5.103 0 Td [(+)]TJ/F52 9.9626 Tf 10.263 0 Td [(i)-22(r)-35(w)]TJ/F83 10.3811 Tf 16.669 0 Td [(\000)]TJ/F54 9.9626 Tf 10.209 0 Td [(1)-271(should)-271(be)-271(one)-271(of)-271(the)-271(local)-270(indices,)-277(but)-271(ar)18(e)-270(otherwise)]TJ -81.551 -11.955 Td [(arbitrary;)]TJ -0 g 0 G - -12.453 -19.925 Td [(7.)]TJ -0 g 0 G - [-500(Ther)18(e)-315(is)-314(no)-315(r)18(equir)18(ement)-314(that)-315(a)-315(given)-314(r)18(ow)-315(must)-315(be)-314(passed)-315(in)-315(its)-314(entir)18(ety)]TJ 12.453 -11.955 Td [(to)-298(a)-299(single)-298(call)-298(to)-299(thi)1(s)-299(r)18(outine:)-406(the)-299(buildup)-298(of)-298(a)-299(r)18(ow)-298(may)-298(be)-299(split)-298(into)-298(as)]TJ 0 -11.955 Td [(many)-250(calls)-250(as)-250(desir)18(ed)-250(\050even)-250(in)-250(the)-250(CSR)-250(format\051;)]TJ -0 g 0 G - -12.453 -19.926 Td [(8.)]TJ -0 g 0 G - [-500(Coef)18(\002cients)-288(fr)18(om)-289(dif)18(fer)18(ent)-288(r)18(ows)-288(may)-289(also)-288(be)-288(mixed)-289(up)-288(fr)18(eely)-288(in)-289(a)-288(single)]TJ 12.453 -11.955 Td [(call,)-250(accor)18(ding)-250(to)-250(the)-250(application)-250(needs;)]TJ -0 g 0 G - -12.453 -19.925 Td [(9.)]TJ +/F67 9.9626 Tf 391.579 408.211 Td [(prec)]TJ +ET +q +1 0 0 1 413.128 408.41 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 416.266 408.211 Td [(type)]TJ 0 g 0 G - [-500(Coef)18(\002cients)-190(fr)18(om)-190(matrix)-190(r)18(ows)-190(not)-190(owned)-190(by)-190(the)-190(calling)-190(pr)18(ocess)-190(ar)18(e)-190(tr)18(eated)]TJ 12.453 -11.955 Td [(accor)18(ding)-254(to)-254(the)-253(value)-254(of)]TJ/F59 9.9626 Tf 111.539 0 Td [(bldmode)]TJ/F54 9.9626 Tf 39.141 0 Td [(speci\002ed)-254(at)-253(allocation)-254(time;)-256(if)]TJ/F59 9.9626 Tf 131.512 0 Td [(bldmode)]TJ/F54 9.9626 Tf -282.192 -11.956 Td [(was)-300(chosen)-300(as)]TJ/F59 9.9626 Tf 66.146 0 Td [(psb_matbld_remote_)]TJ/F54 9.9626 Tf 97.136 0 Td [(the)-300(library)-300(will)-300(keep)-300(track)-301(of)-300(them,)]TJ -163.282 -11.955 Td [(otherwise)-250(they)-250(ar)18(e)-250(silently)-250(ignor)18(ed;)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G - -17.435 -19.925 Td [(10.)]TJ +/F59 9.9626 Tf -286.483 -19.925 Td [(On)-250(Return)]TJ 0 g 0 G - [-500(If)-295(the)-294(descriptor)-295(is)-295(i)1(n)-295(the)-295(assembled)-294(state,)-306(then)-295(any)-294(entries)-295(in)-295(the)-294(sparse)]TJ 17.435 -11.955 Td [(matrix)-284(that)-284(would)-284(generate)-284(additional)-284(communication)-284(r)18(equir)18(ements)-284(ar)18(e)]TJ 0 -11.955 Td [(ignor)18(ed;)]TJ 0 g 0 G - -17.435 -19.926 Td [(11.)]TJ + 0 -19.925 Td [(Function)-250(value)]TJ 0 g 0 G - [-500(If)-268(the)-268(matrix)-268(is)-268(in)-268(the)-268(update)-268(state,)-273(any)-268(entries)-268(in)-268(positions)-268(that)-268(wer)18(e)-268(not)]TJ 17.435 -11.955 Td [(pr)18(esent)-250(in)-250(the)-250(original)-250(matrix)-250(ar)18(e)-250(ignor)18(ed.)]TJ +/F62 9.9626 Tf 72.776 0 Td [(The)-322(memory)-322(occupation)-322(of)-323(the)-322(object)-322(speci\002ed)-322(in)-322(the)-322(calling)]TJ -47.87 -11.956 Td [(sequence,)-250(in)-250(bytes.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(Returned)-250(as:)-310(an)]TJ/F67 9.9626 Tf 71.82 0 Td [(integer\050psb_long_int_k_\051)]TJ/F62 9.9626 Tf 128.019 0 Td [(number)74(.)]TJ 0 g 0 G - 141.968 -360.647 Td [(85)]TJ + -60.362 -242.057 Td [(108)]TJ 0 g 0 G ET endstream endobj -1348 0 obj +1607 0 obj << /Type /ObjStm /N 100 -/First 977 -/Length 10176 ->> -stream -1347 0 308 59 312 117 1344 174 1351 307 1349 446 1353 593 1354 651 1350 709 1357 829 -1355 968 1359 1126 1361 1185 1356 1244 1363 1391 1365 1509 1366 1567 1367 1625 1368 1683 1369 1741 -1370 1799 1362 1857 1374 1938 1372 2077 1376 2222 316 2281 1373 2339 1379 2459 1377 2598 1381 2756 -1382 2814 1383 2872 1384 2930 1378 2988 1388 3082 1385 3230 1386 3375 1390 3521 320 3580 1391 3638 -1387 3697 1395 3791 1392 3939 1393 4084 1397 4231 324 4289 1394 4346 1400 4440 1398 4579 1402 4724 -328 4783 1399 4841 1406 4935 1403 5083 1404 5228 1408 5375 332 5433 1405 5490 1411 5610 1413 5728 -1414 5787 1415 5846 1410 5905 1419 5986 1416 6134 1417 6281 1421 6426 336 6484 1422 6541 1418 6599 -1424 6693 1426 6811 1427 6870 1428 6929 1429 6988 1423 7047 1432 7141 1434 7259 340 7317 1431 7374 -1438 7494 1430 7651 1435 7794 1436 7939 1440 8082 1441 8141 1442 8199 1443 8258 1444 8317 1445 8376 -1437 8435 1447 8555 1449 8673 1450 8731 1451 8789 1452 8847 1453 8905 1454 8963 1455 9021 1446 9079 -% 1347 0 obj -<< -/D [1345 0 R /XYZ 149.705 753.953 null] ->> -% 308 0 obj -<< -/D [1345 0 R /XYZ 150.705 716.092 null] ->> -% 312 0 obj -<< -/D [1345 0 R /XYZ 150.705 691.48 null] ->> -% 1344 0 obj -<< -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F85 814 0 R >> -/ProcSet [ /PDF /Text ] +/First 973 +/Length 10526 >> -% 1351 0 obj +stream +1604 0 1601 148 1602 294 1606 438 360 497 1603 555 1609 675 1611 793 1612 851 1613 909 +1608 967 1618 1048 1614 1205 1615 1346 1616 1489 1620 1633 364 1692 1621 1750 1622 1809 1617 1868 +1626 1975 1623 2123 1624 2266 1628 2413 368 2471 1625 2528 1630 2622 1632 2740 372 2799 1629 2857 +1635 2964 1633 3103 1637 3250 376 3308 1634 3365 1640 3472 1642 3590 1643 3649 1644 3708 1639 3767 +1647 3848 1645 3987 1649 4134 380 4192 1646 4249 1652 4356 1650 4495 1654 4642 384 4701 1655 4759 +1651 4818 1658 4925 1656 5064 1660 5211 388 5269 1661 5326 1657 5384 1664 5491 1662 5630 1666 5777 +392 5836 1667 5894 1663 5953 1670 6060 1668 6199 1672 6346 396 6404 1673 6461 1669 6519 1677 6626 +1675 6765 1679 6910 400 6969 1680 7027 1681 7086 1676 7145 1684 7239 1682 7378 1686 7523 405 7581 +1687 7638 1688 7696 1683 7754 1691 7848 1689 7987 1693 8132 409 8191 1690 8249 1695 8370 1697 8488 +1698 8546 1699 8604 1700 8662 1694 8720 1705 8840 1701 8997 1702 9142 1703 9289 1707 9436 413 9495 +% 1604 0 obj << /Type /Page -/Contents 1352 0 R -/Resources 1350 0 R +/Contents 1605 0 R +/Resources 1603 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1330 0 R -/Annots [ 1349 0 R ] +/Parent 1600 0 R +/Annots [ 1601 0 R 1602 0 R ] >> -% 1349 0 obj +% 1601 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 324.687 359.001 336.746] +/Rect [342.753 362.621 409.811 374.68] /A << /S /GoTo /D (descdata) >> >> -% 1353 0 obj +% 1602 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [417.183 207.637 493.237 219.697] +/A << /S /GoTo /D (vdata) >> +>> +% 1606 0 obj << -/D [1351 0 R /XYZ 98.895 753.953 null] +/D [1604 0 R /XYZ 149.705 753.953 null] >> -% 1354 0 obj +% 360 0 obj << -/D [1351 0 R /XYZ 99.895 234.157 null] +/D [1604 0 R /XYZ 150.705 716.092 null] >> -% 1350 0 obj +% 1603 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R /F85 814 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R /F93 915 0 R >> /ProcSet [ /PDF /Text ] >> -% 1357 0 obj +% 1609 0 obj << /Type /Page -/Contents 1358 0 R -/Resources 1356 0 R +/Contents 1610 0 R +/Resources 1608 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1330 0 R -/Annots [ 1355 0 R ] +/Parent 1600 0 R >> -% 1355 0 obj +% 1611 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [405.298 215.702 427.216 227.166] -/A << /S /GoTo /D (subsubsection.2.3.1) >> +/D [1609 0 R /XYZ 98.895 753.953 null] >> -% 1359 0 obj +% 1612 0 obj << -/D [1357 0 R /XYZ 149.705 753.953 null] +/D [1609 0 R /XYZ 99.895 701.929 null] >> -% 1361 0 obj +% 1613 0 obj << -/D [1357 0 R /XYZ 150.705 133.283 null] +/D [1609 0 R /XYZ 99.895 680.684 null] >> -% 1356 0 obj +% 1608 0 obj << -/Font << /F59 812 0 R /F54 586 0 R /F51 584 0 R /F83 813 0 R /F52 585 0 R /F85 814 0 R /F61 1360 0 R >> +/Font << /F59 665 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1363 0 obj +% 1618 0 obj << /Type /Page -/Contents 1364 0 R -/Resources 1362 0 R +/Contents 1619 0 R +/Resources 1617 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1371 0 R +/Parent 1600 0 R +/Annots [ 1614 0 R 1615 0 R 1616 0 R ] >> -% 1365 0 obj +% 1614 0 obj << -/D [1363 0 R /XYZ 98.895 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [310.942 573.77 378 585.83] +/A << /S /GoTo /D (descdata) >> >> -% 1366 0 obj +% 1615 0 obj << -/D [1363 0 R /XYZ 99.895 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [322.33 482.114 398.384 494.174] +/A << /S /GoTo /D (vdata) >> >> -% 1367 0 obj +% 1616 0 obj << -/D [1363 0 R /XYZ 99.895 687.379 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [225.165 380.495 301.219 392.555] +/A << /S /GoTo /D (vdata) >> >> -% 1368 0 obj +% 1620 0 obj << -/D [1363 0 R /XYZ 99.895 667.454 null] +/D [1618 0 R /XYZ 149.705 753.953 null] >> -% 1369 0 obj +% 364 0 obj << -/D [1363 0 R /XYZ 99.895 626.268 null] +/D [1618 0 R /XYZ 150.705 716.092 null] >> -% 1370 0 obj +% 1621 0 obj +<< +/D [1618 0 R /XYZ 150.705 278.482 null] +>> +% 1622 0 obj << -/D [1363 0 R /XYZ 99.895 567.828 null] +/D [1618 0 R /XYZ 150.705 244.007 null] >> -% 1362 0 obj +% 1617 0 obj << -/Font << /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1374 0 obj +% 1626 0 obj << /Type /Page -/Contents 1375 0 R -/Resources 1373 0 R +/Contents 1627 0 R +/Resources 1625 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1371 0 R -/Annots [ 1372 0 R ] +/Parent 1600 0 R +/Annots [ 1623 0 R 1624 0 R ] >> -% 1372 0 obj +% 1623 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 116.52 409.811 128.58] +/Rect [174.355 561.815 250.41 573.875] +/A << /S /GoTo /D (vdata) >> +>> +% 1624 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [260.133 482.114 327.191 494.174] /A << /S /GoTo /D (descdata) >> >> -% 1376 0 obj +% 1628 0 obj << -/D [1374 0 R /XYZ 149.705 753.953 null] +/D [1626 0 R /XYZ 98.895 753.953 null] >> -% 316 0 obj +% 368 0 obj << -/D [1374 0 R /XYZ 150.705 716.092 null] +/D [1626 0 R /XYZ 99.895 716.092 null] >> -% 1373 0 obj +% 1625 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1379 0 obj +% 1630 0 obj << /Type /Page -/Contents 1380 0 R -/Resources 1378 0 R +/Contents 1631 0 R +/Resources 1629 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1600 0 R +>> +% 1632 0 obj +<< +/D [1630 0 R /XYZ 149.705 753.953 null] +>> +% 372 0 obj +<< +/D [1630 0 R /XYZ 150.705 716.092 null] +>> +% 1629 0 obj +<< +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1635 0 obj +<< +/Type /Page +/Contents 1636 0 R +/Resources 1634 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1371 0 R -/Annots [ 1377 0 R ] +/Parent 1638 0 R +/Annots [ 1633 0 R ] >> -% 1377 0 obj +% 1633 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [278.165 401.451 300.083 413.511] -/A << /S /GoTo /D (subsubsection.2.3.1) >> +/Rect [291.943 483.894 359.001 495.954] +/A << /S /GoTo /D (descdata) >> >> -% 1381 0 obj +% 1637 0 obj << -/D [1379 0 R /XYZ 98.895 753.953 null] +/D [1635 0 R /XYZ 98.895 753.953 null] +>> +% 376 0 obj +<< +/D [1635 0 R /XYZ 99.895 716.092 null] +>> +% 1634 0 obj +<< +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1640 0 obj +<< +/Type /Page +/Contents 1641 0 R +/Resources 1639 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1638 0 R >> -% 1382 0 obj +% 1642 0 obj << -/D [1379 0 R /XYZ 99.895 496.698 null] +/D [1640 0 R /XYZ 149.705 753.953 null] >> -% 1383 0 obj +% 1643 0 obj << -/D [1379 0 R /XYZ 99.895 474.179 null] +/D [1640 0 R /XYZ 150.705 716.092 null] >> -% 1384 0 obj +% 1644 0 obj << -/D [1379 0 R /XYZ 99.895 430.343 null] +/D [1640 0 R /XYZ 150.705 687.379 null] >> -% 1378 0 obj +% 1639 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1388 0 obj +% 1647 0 obj << /Type /Page -/Contents 1389 0 R -/Resources 1387 0 R +/Contents 1648 0 R +/Resources 1646 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1371 0 R -/Annots [ 1385 0 R 1386 0 R ] ->> -% 1385 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 573.77 409.811 585.83] -/A << /S /GoTo /D (descdata) >> +/Parent 1638 0 R +/Annots [ 1645 0 R ] >> -% 1386 0 obj +% 1645 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 416.361 409.811 428.42] +/Rect [291.943 482.114 359.001 494.174] /A << /S /GoTo /D (descdata) >> >> -% 1390 0 obj -<< -/D [1388 0 R /XYZ 149.705 753.953 null] ->> -% 320 0 obj +% 1649 0 obj << -/D [1388 0 R /XYZ 150.705 716.092 null] +/D [1647 0 R /XYZ 98.895 753.953 null] >> -% 1391 0 obj +% 380 0 obj << -/D [1388 0 R /XYZ 150.705 326.302 null] +/D [1647 0 R /XYZ 99.895 716.092 null] >> -% 1387 0 obj +% 1646 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1395 0 obj +% 1652 0 obj << /Type /Page -/Contents 1396 0 R -/Resources 1394 0 R +/Contents 1653 0 R +/Resources 1651 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1371 0 R -/Annots [ 1392 0 R 1393 0 R ] +/Parent 1638 0 R +/Annots [ 1650 0 R ] >> -% 1392 0 obj +% 1650 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 573.77 359.001 585.83] +/Rect [342.753 494.069 409.811 506.129] /A << /S /GoTo /D (descdata) >> >> -% 1393 0 obj +% 1654 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 484.107 359.001 496.166] -/A << /S /GoTo /D (descdata) >> +/D [1652 0 R /XYZ 149.705 753.953 null] >> -% 1397 0 obj +% 384 0 obj << -/D [1395 0 R /XYZ 98.895 753.953 null] +/D [1652 0 R /XYZ 150.705 716.092 null] >> -% 324 0 obj +% 1655 0 obj << -/D [1395 0 R /XYZ 99.895 716.092 null] +/D [1652 0 R /XYZ 150.705 382.093 null] >> -% 1394 0 obj +% 1651 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1400 0 obj +% 1658 0 obj << /Type /Page -/Contents 1401 0 R -/Resources 1399 0 R +/Contents 1659 0 R +/Resources 1657 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1371 0 R -/Annots [ 1398 0 R ] +/Parent 1638 0 R +/Annots [ 1656 0 R ] >> -% 1398 0 obj +% 1656 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 573.77 409.811 585.83] +/Rect [291.943 494.069 359.001 506.129] /A << /S /GoTo /D (descdata) >> >> -% 1402 0 obj +% 1660 0 obj << -/D [1400 0 R /XYZ 149.705 753.953 null] +/D [1658 0 R /XYZ 98.895 753.953 null] >> -% 328 0 obj +% 388 0 obj << -/D [1400 0 R /XYZ 150.705 716.092 null] +/D [1658 0 R /XYZ 99.895 716.092 null] >> -% 1399 0 obj +% 1661 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/D [1658 0 R /XYZ 99.895 258.556 null] +>> +% 1657 0 obj +<< +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1406 0 obj +% 1664 0 obj << /Type /Page -/Contents 1407 0 R -/Resources 1405 0 R +/Contents 1665 0 R +/Resources 1663 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1409 0 R -/Annots [ 1403 0 R 1404 0 R ] +/Parent 1638 0 R +/Annots [ 1662 0 R ] >> -% 1403 0 obj +% 1662 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 452.321 369.462 464.381] -/A << /S /GoTo /D (spdata) >> +/Rect [342.753 494.069 409.811 506.129] +/A << /S /GoTo /D (descdata) >> >> -% 1404 0 obj +% 1666 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 209.193 359.001 221.252] -/A << /S /GoTo /D (descdata) >> +/D [1664 0 R /XYZ 149.705 753.953 null] >> -% 1408 0 obj +% 392 0 obj << -/D [1406 0 R /XYZ 98.895 753.953 null] +/D [1664 0 R /XYZ 150.705 716.092 null] >> -% 332 0 obj +% 1667 0 obj << -/D [1406 0 R /XYZ 99.895 716.092 null] +/D [1664 0 R /XYZ 150.705 382.093 null] >> -% 1405 0 obj +% 1663 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1411 0 obj +% 1670 0 obj << /Type /Page -/Contents 1412 0 R -/Resources 1410 0 R +/Contents 1671 0 R +/Resources 1669 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1409 0 R +/Parent 1674 0 R +/Annots [ 1668 0 R ] >> -% 1413 0 obj +% 1668 0 obj << -/D [1411 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 494.069 359.001 506.129] +/A << /S /GoTo /D (descdata) >> >> -% 1414 0 obj +% 1672 0 obj << -/D [1411 0 R /XYZ 150.705 716.092 null] +/D [1670 0 R /XYZ 98.895 753.953 null] >> -% 1415 0 obj +% 396 0 obj << -/D [1411 0 R /XYZ 150.705 663.469 null] +/D [1670 0 R /XYZ 99.895 716.092 null] >> -% 1410 0 obj +% 1673 0 obj +<< +/D [1670 0 R /XYZ 99.895 258.556 null] +>> +% 1669 0 obj << -/Font << /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1419 0 obj +% 1677 0 obj << /Type /Page -/Contents 1420 0 R -/Resources 1418 0 R +/Contents 1678 0 R +/Resources 1676 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1409 0 R -/Annots [ 1416 0 R 1417 0 R ] +/Parent 1674 0 R +/Annots [ 1675 0 R ] >> -% 1416 0 obj +% 1675 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 571.679 359.001 583.739] +/Rect [342.753 573.77 409.811 585.83] /A << /S /GoTo /D (descdata) >> >> -% 1417 0 obj +% 1679 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 227.887 369.462 239.946] -/A << /S /GoTo /D (spdata) >> +/D [1677 0 R /XYZ 149.705 753.953 null] >> -% 1421 0 obj +% 400 0 obj << -/D [1419 0 R /XYZ 98.895 753.953 null] +/D [1677 0 R /XYZ 150.705 716.092 null] >> -% 336 0 obj +% 1680 0 obj << -/D [1419 0 R /XYZ 99.895 716.092 null] +/D [1677 0 R /XYZ 150.705 358.183 null] >> -% 1422 0 obj +% 1681 0 obj << -/D [1419 0 R /XYZ 99.895 136.374 null] +/D [1677 0 R /XYZ 150.705 314.403 null] >> -% 1418 0 obj +% 1676 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1424 0 obj +% 1684 0 obj << /Type /Page -/Contents 1425 0 R -/Resources 1423 0 R +/Contents 1685 0 R +/Resources 1683 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1409 0 R ->> -% 1426 0 obj -<< -/D [1424 0 R /XYZ 149.705 753.953 null] ->> -% 1427 0 obj -<< -/D [1424 0 R /XYZ 150.705 716.092 null] ->> -% 1428 0 obj -<< -/D [1424 0 R /XYZ 150.705 699.334 null] +/Parent 1674 0 R +/Annots [ 1682 0 R ] >> -% 1429 0 obj +% 1682 0 obj << -/D [1424 0 R /XYZ 150.705 644.819 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 573.77 359.001 585.83] +/A << /S /GoTo /D (descdata) >> >> -% 1423 0 obj +% 1686 0 obj << -/Font << /F54 586 0 R /F52 585 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/D [1684 0 R /XYZ 98.895 753.953 null] >> -% 1432 0 obj +% 405 0 obj << -/Type /Page -/Contents 1433 0 R -/Resources 1431 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1409 0 R +/D [1684 0 R /XYZ 99.895 716.092 null] >> -% 1434 0 obj +% 1687 0 obj << -/D [1432 0 R /XYZ 98.895 753.953 null] +/D [1684 0 R /XYZ 99.895 370.138 null] >> -% 340 0 obj +% 1688 0 obj << -/D [1432 0 R /XYZ 99.895 716.092 null] +/D [1684 0 R /XYZ 99.895 338.313 null] >> -% 1431 0 obj +% 1683 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1438 0 obj +% 1691 0 obj << /Type /Page -/Contents 1439 0 R -/Resources 1437 0 R +/Contents 1692 0 R +/Resources 1690 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1409 0 R -/Annots [ 1430 0 R 1435 0 R 1436 0 R ] +/Parent 1674 0 R +/Annots [ 1689 0 R ] >> -% 1430 0 obj +% 1689 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [310.942 654.503 378 666.562] -/A << /S /GoTo /D (descdata) >> +/Rect [342.753 491.971 420.271 504.031] +/A << /S /GoTo /D (spdata) >> >> -% 1435 0 obj +% 1693 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 480.963 420.271 493.022] -/A << /S /GoTo /D (spdata) >> +/D [1691 0 R /XYZ 149.705 753.953 null] >> -% 1436 0 obj +% 409 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [310.942 411.699 378 423.758] -/A << /S /GoTo /D (descdata) >> +/D [1691 0 R /XYZ 150.705 716.092 null] >> -% 1440 0 obj +% 1690 0 obj << -/D [1438 0 R /XYZ 149.705 753.953 null] +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F69 1460 0 R /F60 666 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1441 0 obj +% 1695 0 obj << -/D [1438 0 R /XYZ 150.705 306.27 null] +/Type /Page +/Contents 1696 0 R +/Resources 1694 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1674 0 R >> -% 1442 0 obj +% 1697 0 obj << -/D [1438 0 R /XYZ 150.705 272.927 null] +/D [1695 0 R /XYZ 98.895 753.953 null] >> -% 1443 0 obj +% 1698 0 obj << -/D [1438 0 R /XYZ 150.705 236.878 null] +/D [1695 0 R /XYZ 99.895 496.698 null] >> -% 1444 0 obj +% 1699 0 obj << -/D [1438 0 R /XYZ 150.705 167.614 null] +/D [1695 0 R /XYZ 99.895 438.313 null] >> -% 1445 0 obj +% 1700 0 obj << -/D [1438 0 R /XYZ 150.705 146.171 null] +/D [1695 0 R /XYZ 99.895 418.388 null] >> -% 1437 0 obj +% 1694 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R /F85 814 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F60 666 0 R /F93 915 0 R >> /ProcSet [ /PDF /Text ] >> -% 1447 0 obj +% 1705 0 obj << /Type /Page -/Contents 1448 0 R -/Resources 1446 0 R +/Contents 1706 0 R +/Resources 1704 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1456 0 R +/Parent 1674 0 R +/Annots [ 1701 0 R 1702 0 R 1703 0 R ] >> -% 1449 0 obj -<< -/D [1447 0 R /XYZ 98.895 753.953 null] ->> -% 1450 0 obj +% 1701 0 obj << -/D [1447 0 R /XYZ 99.895 716.092 null] ->> -% 1451 0 obj -<< -/D [1447 0 R /XYZ 99.895 651.514 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 527.942 420.271 540.002] +/A << /S /GoTo /D (spdata) >> >> -% 1452 0 obj +% 1702 0 obj << -/D [1447 0 R /XYZ 99.895 607.678 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 460.196 409.811 472.256] +/A << /S /GoTo /D (descdata) >> >> -% 1453 0 obj +% 1703 0 obj << -/D [1447 0 R /XYZ 99.895 575.798 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [371.126 404.405 438.184 416.465] +/A << /S /GoTo /D (precdata) >> >> -% 1454 0 obj +% 1707 0 obj << -/D [1447 0 R /XYZ 99.895 520.007 null] +/D [1705 0 R /XYZ 149.705 753.953 null] >> -% 1455 0 obj +% 413 0 obj << -/D [1447 0 R /XYZ 99.895 476.171 null] +/D [1705 0 R /XYZ 150.705 716.092 null] >> -% 1446 0 obj + +endstream +endobj +1711 0 obj << -/Font << /F54 586 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/Length 5626 >> +stream +0 g 0 G +0 g 0 G +BT +/F59 11.9552 Tf 99.895 706.129 Td [(6.27)-1000(Sorting)-250(utilities)-250(\227)]TJ 0 -20.164 Td [(psb)]TJ +ET +q +1 0 0 1 120.53 686.164 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 124.116 685.965 Td [(msort)-250(\227)-250(Sorting)-250(by)-250(the)-250(Merge-sort)-250(algorithm)]TJ -24.221 -12.574 Td [(psb)]TJ +ET +q +1 0 0 1 120.53 673.59 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 124.116 673.391 Td [(qsort)-250(\227)-250(Sorting)-250(by)-250(the)-250(Quicksort)-250(algorithm)]TJ -24.221 -12.575 Td [(psb)]TJ +ET +q +1 0 0 1 120.53 661.016 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 124.116 660.816 Td [(hsort)-250(\227)-250(Sorting)-250(by)-250(the)-250(Heapsort)-250(algorithm)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -24.221 -22.402 Td [(call)-525(psb_msort\050x,ix,dir,flag\051)]TJ 0 -11.955 Td [(call)-525(psb_qsort\050x,ix,dir,flag\051)]TJ 0 -11.955 Td [(call)-525(psb_hsort\050x,ix,dir,flag\051)]TJ/F62 9.9626 Tf 14.944 -21.783 Td [(These)-236(serial)-235(r)18(outines)-236(sort)-236(a)-235(sequence)]TJ/F60 9.9626 Tf 162.066 0 Td [(X)]TJ/F62 9.9626 Tf 9.884 0 Td [(into)-236(ascending)-235(or)-236(descending)-236(or)18(der)74(.)]TJ -186.894 -11.955 Td [(The)-243(ar)18(gument)-243(meaning)-243(is)-242(identical)-243(for)-243(the)-243(thr)18(ee)-243(calls;)-245(the)-243(only)-243(dif)18(fer)18(ence)-242(is)-243(the)]TJ 0 -11.955 Td [(algorithm)-250(used)-250(to)-250(accomplish)-250(the)-250(task)-250(\050see)-250(Usage)-250(Notes)-250(below\051.)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.783 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -22.402 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -22.402 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(The)-250(sequence)-250(to)-250(be)-250(sorted.)]TJ 14.944 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(,)-250(r)18(eal)-250(or)-250(complex)-250(array)-250(of)-250(rank)-250(1.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.402 Td [(ix)]TJ +0 g 0 G +/F62 9.9626 Tf 13.281 0 Td [(A)-250(vector)-250(of)-250(indices.)]TJ 11.626 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(\050at)-250(least\051)-250(the)-250(same)-250(size)-250(as)]TJ/F60 9.9626 Tf 254.189 0 Td [(X)]TJ/F62 9.9626 Tf 7.537 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -286.633 -22.402 Td [(dir)]TJ +0 g 0 G +/F62 9.9626 Tf 18.262 0 Td [(The)-250(desir)18(ed)-250(or)18(dering.)]TJ 6.645 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value:)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -22.402 Td [(Integer)-250(and)-250(real)-250(data:)]TJ +0 g 0 G +/F67 9.9626 Tf 101.28 0 Td [(psb_sort_up_)]TJ/F62 9.9626 Tf 62.764 0 Td [(,)]TJ/F67 9.9626 Tf 5.525 0 Td [(psb_sort_down_)]TJ/F62 9.9626 Tf 73.225 0 Td [(,)]TJ/F67 9.9626 Tf 5.525 0 Td [(psb_asort_up_)]TJ/F62 9.9626 Tf 67.995 0 Td [(,)]TJ/F67 9.9626 Tf -294.396 -11.955 Td [(psb_asort_down_)]TJ/F62 9.9626 Tf 78.455 0 Td [(;)-250(default)]TJ/F67 9.9626 Tf 38.784 0 Td [(psb_sort_up_)]TJ/F62 9.9626 Tf 62.764 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -201.921 -17.178 Td [(Complex)-250(data:)]TJ +0 g 0 G +/F67 9.9626 Tf 70.286 0 Td [(psb_lsort_up_)]TJ/F62 9.9626 Tf 67.995 0 Td [(,)]TJ/F67 9.9626 Tf 4.503 0 Td [(psb_lsort_down_)]TJ/F62 9.9626 Tf 78.455 0 Td [(,)]TJ/F67 9.9626 Tf 4.503 0 Td [(psb_asort_up_)]TJ/F62 9.9626 Tf 67.994 0 Td [(,)]TJ/F67 9.9626 Tf 4.504 0 Td [(psb_asort_down_)]TJ/F62 9.9626 Tf 78.455 0 Td [(;)]TJ -354.777 -11.956 Td [(default)]TJ/F67 9.9626 Tf 33.803 0 Td [(psb_lsort_up_)]TJ/F62 9.9626 Tf 67.994 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -148.622 -22.402 Td [(\003ag)]TJ +0 g 0 G +/F62 9.9626 Tf 21.589 0 Td [(Whether)-250(to)-250(keep)-250(the)-250(original)-250(values)-250(in)]TJ/F60 9.9626 Tf 171.52 0 Td [(I)-81(X)]TJ/F62 9.9626 Tf 11.661 0 Td [(.)]TJ -179.863 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-190(as:)-280(an)-190(integer)-190(value)]TJ/F67 9.9626 Tf 128.287 0 Td [(psb_sort_ovw_idx_)]TJ/F62 9.9626 Tf 90.809 0 Td [(or)]TJ/F67 9.9626 Tf 11.268 0 Td [(psb_sort_keep_idx_)]TJ/F62 9.9626 Tf 94.146 0 Td [(;)]TJ -324.51 -11.955 Td [(default)]TJ/F67 9.9626 Tf 33.803 0 Td [(psb_sort_ovw_idx_)]TJ/F62 9.9626 Tf 88.916 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -147.626 -24.395 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -22.402 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(The)-250(sequence)-250(of)-250(values,)-250(in)-250(the)-250(chosen)-250(or)18(dering.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(,)-250(r)18(eal)-250(or)-250(complex)-250(array)-250(of)-250(rank)-250(1.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.402 Td [(ix)]TJ +0 g 0 G +/F62 9.9626 Tf 13.281 0 Td [(A)-250(vector)-250(of)-250(indices.)]TJ 11.626 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(An)-238(integer)-237(array)-238(of)-237(rank)-238(1,)-240(whose)-238(entries)-237(ar)18(e)-238(moved)-237(to)-238(the)-238(same)-237(position)]TJ 0 -11.955 Td [(as)-250(the)-250(corr)18(esponding)-250(entries)-250(in)]TJ/F60 9.9626 Tf 138.215 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ +0 g 0 G + -3.943 -44.517 Td [(109)]TJ +0 g 0 G +ET endstream endobj -1462 0 obj +1716 0 obj << -/Length 6789 +/Length 7300 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.9)-1000(psb)]TJ -ET -q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 201.825 706.129 Td [(spasb)-250(\227)-250(Sparse)-250(matrix)-250(assembly)-250(routine)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ 0 g 0 G + [-500(For)-370(integer)-370(or)-370(r)18(eal)-370(data)-370(the)-370(sorting)-370(can)-370(be)-370(performed)-370(in)-370(the)-370(up/down)]TJ 12.453 -11.956 Td [(dir)18(ection,)-250(on)-250(the)-250(natural)-250(or)-250(absolute)-250(values;)]TJ 0 g 0 G -/F59 9.9626 Tf -51.12 -19.204 Td [(call)-525(psb_spasb\050a,)-525(desc_a,)-525(info)-525([,)-525(afmt,)-525(upd,)-1050(mold]\051)]TJ + -12.453 -19.925 Td [(2.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.289 Td [(T)90(ype:)]TJ + [-500(For)-329(complex)-330(data)-329(the)-330(sorting)-329(can)-330(be)-329(done)-329(in)-330(a)-329(lexicographic)-330(or)18(der)-329(\050i.e.:)]TJ 12.453 -11.955 Td [(sort)-263(on)-263(the)-263(r)18(eal)-263(part)-263(with)-263(ties)-263(br)18(oken)-263(accor)18(ding)-263(to)-263(the)-263(imaginary)-263(part\051)-263(or)]TJ 0 -11.955 Td [(on)-250(the)-250(absolute)-250(values;)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ + -12.453 -19.925 Td [(3.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.421 Td [(On)-250(Entry)]TJ + [-500(The)-325(r)18(outines)-325(r)18(eturn)-325(the)-325(items)-324(in)-325(the)-325(chosen)-325(or)18(dering;)-362(the)-325(output)-325(dif)18(fer)18(-)]TJ 12.453 -11.956 Td [(ence)-244(is)-245(the)-244(handling)-244(of)-244(ties)-244(\050i.e.)-309(items)-244(with)-244(an)-244(equal)-245(v)1(alue\051)-245(in)-244(the)-244(original)]TJ 0 -11.955 Td [(input.)-316(W)55(ith)-252(the)-252(mer)18(ge-sort)-252(algorithm)-252(ties)-252(ar)18(e)-252(pr)18(eserved)-252(in)-252(the)-252(same)-252(r)18(ela-)]TJ 0 -11.955 Td [(tive)-278(or)18(der)-278(as)-278(they)-278(had)-278(in)-278(the)-278(original)-278(sequence,)-285(while)-278(this)-278(is)-278(not)-278(guaran-)]TJ 0 -11.955 Td [(teed)-250(for)-250(quicksort)-250(or)-250(heapsort;)]TJ 0 g 0 G + -12.453 -19.925 Td [(4.)]TJ 0 g 0 G - 0 -20.421 Td [(desc)]TJ + [-500(If)]TJ/F60 9.9626 Tf 22.66 0 Td [(f)-160(l)-70(a)-47(g)]TJ/F93 10.3811 Tf 20.72 0 Td [(=)]TJ/F60 9.9626 Tf 11.634 0 Td [(p)-25(s)-25(b)]TJ ET q -1 0 0 1 171.218 623.994 cm +1 0 0 1 232.862 542.941 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 623.794 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in/out)]TJ/F54 9.9626 Tf 27.297 0 Td [(.)]TJ -59.098 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F60 9.9626 Tf 235.975 542.742 Td [(s)-25(o)-25(r)-35(t)]TJ ET q -1 0 0 1 360.068 576.173 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 253.036 542.941 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 363.206 575.974 Td [(desc)]TJ +/F60 9.9626 Tf 256.149 542.742 Td [(o)-35(v)-25(w)]TJ ET q -1 0 0 1 384.755 576.173 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 274.067 542.941 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 387.893 575.974 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -258.11 -20.421 Td [(afmt)]TJ -0 g 0 G -/F54 9.9626 Tf 26.012 0 Td [(the)-250(storage)-250(format)-250(for)-250(the)-250(sparse)-250(matrix.)]TJ -1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(array)-250(of)-250(characters.)-310(Defalt:)-310('CSR'.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -20.42 Td [(upd)]TJ -0 g 0 G -/F54 9.9626 Tf 23.243 0 Td [(Pr)18(ovide)-250(for)-250(updates)-250(to)-250(the)-250(matrix)-250(coef)18(\002cients.)]TJ 1.663 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(integer)74(,)-250(possible)-250(values:)]TJ/F59 9.9626 Tf 165.219 0 Td [(psb_upd_srch_)]TJ/F54 9.9626 Tf 67.994 0 Td [(,)]TJ/F59 9.9626 Tf 4.981 0 Td [(psb_upd_perm_)]TJ +/F60 9.9626 Tf 277.11 542.742 Td [(i)-32(d)-42(x)]TJ +ET +q +1 0 0 1 291.402 542.941 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 296.503 542.742 Td [(then)-212(the)-212(entries)-212(in)]TJ/F60 9.9626 Tf 80.283 0 Td [(i)-49(x)]TJ/F93 10.3811 Tf 8.588 0 Td [(\050)]TJ/F62 9.9626 Tf 4.149 0 Td [(1)-290(:)]TJ/F60 9.9626 Tf 13.381 0 Td [(n)]TJ/F93 10.3811 Tf 5.788 0 Td [(\051)]TJ/F62 9.9626 Tf 6.262 0 Td [(wher)18(e)]TJ/F60 9.9626 Tf 29.644 0 Td [(n)]TJ/F62 9.9626 Tf 7.776 0 Td [(is)-212(the)-212(size)]TJ -276.762 -11.956 Td [(of)]TJ/F60 9.9626 Tf 12.255 0 Td [(x)]TJ/F62 9.9626 Tf 8.411 0 Td [(ar)18(e)-322(initialized)-321(to)]TJ/F60 9.9626 Tf 76.228 0 Td [(i)-49(x)]TJ/F93 10.3811 Tf 8.588 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F91 10.3811 Tf 8.364 0 Td [(\040)]TJ/F60 9.9626 Tf 14.651 0 Td [(i)]TJ/F62 9.9626 Tf 2.963 0 Td [(;)-358(thus,)-339(upon)-322(r)18(eturn)-321(fr)18(om)-322(the)-322(subr)18(outine,)]TJ -138.753 -11.955 Td [(for)-270(each)-271(index)]TJ/F60 9.9626 Tf 65.501 0 Td [(i)]TJ/F62 9.9626 Tf 5.657 0 Td [(we)-270(have)-271(in)]TJ/F60 9.9626 Tf 51.095 0 Td [(i)-49(x)]TJ/F93 10.3811 Tf 8.587 0 Td [(\050)]TJ/F60 9.9626 Tf 4.205 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 6.843 0 Td [(the)-270(position)-271(that)-270(the)-270(item)]TJ/F60 9.9626 Tf 114.324 0 Td [(x)]TJ/F93 10.3811 Tf 5.33 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 6.844 0 Td [(occupied)]TJ -278.766 -11.955 Td [(in)-250(the)-250(original)-250(data)-250(sequence;)]TJ 0 g 0 G -/F51 9.9626 Tf -263.1 -20.421 Td [(mold)]TJ + -12.453 -19.925 Td [(5.)]TJ 0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(matrix)-250(storage.)]TJ -3.876 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(a)-250(class)-250(derived)-250(fr)18(om)]TJ/F59 9.9626 Tf 201.393 0 Td [(psb)]TJ + [-500(If)]TJ/F60 9.9626 Tf 24.08 0 Td [(f)-160(l)-70(a)-47(g)]TJ/F93 10.3811 Tf 22.648 0 Td [(=)]TJ/F60 9.9626 Tf 13.563 0 Td [(p)-25(s)-25(b)]TJ ET q -1 0 0 1 393.323 371.449 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 238.138 487.15 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 396.461 371.249 Td [(T)]TJ +/F60 9.9626 Tf 241.252 486.951 Td [(s)-25(o)-25(r)-35(t)]TJ ET q -1 0 0 1 402.319 371.449 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 258.312 487.15 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 405.457 371.249 Td [(base)]TJ +/F60 9.9626 Tf 261.426 486.951 Td [(k)-30(e)-25(e)-80(p)]TJ ET q -1 0 0 1 427.006 371.449 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 280.648 487.15 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 430.144 371.249 Td [(sparse)]TJ +/F60 9.9626 Tf 283.692 486.951 Td [(i)-32(d)-42(x)]TJ ET q -1 0 0 1 462.154 371.449 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 297.983 487.15 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 465.292 371.249 Td [(mat)]TJ/F54 9.9626 Tf 15.691 0 Td [(.)]TJ +/F62 9.9626 Tf 304.504 486.951 Td [(the)-355(r)18(ou)1(tine)-355(will)-354(assume)-355(that)-355(th)1(e)-355(entries)-355(i)1(n)]TJ/F60 9.9626 Tf -128.838 -11.955 Td [(i)-49(x)]TJ/F93 10.3811 Tf 8.588 0 Td [(\050)]TJ/F62 9.9626 Tf 4.274 0 Td [(:)]TJ/F93 10.3811 Tf 2.74 0 Td [(\051)]TJ/F62 9.9626 Tf 6.64 0 Td [(have)-250(alr)18(eady)-250(been)-250(initialized)-250(by)-250(the)-250(user;)]TJ +0 g 0 G + -34.75 -19.926 Td [(6.)]TJ 0 g 0 G -/F51 9.9626 Tf -330.278 -22.289 Td [(On)-250(Return)]TJ + [-500(The)-270(thr)18(ee)-269(sorting)-270(algorithms)-269(have)-270(a)-269(similar)]TJ/F60 9.9626 Tf 205.79 0 Td [(O)]TJ/F93 10.3811 Tf 8 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(n)]TJ/F62 9.9626 Tf 7.324 0 Td [(log)]TJ/F60 9.9626 Tf 15.663 0 Td [(n)]TJ/F93 10.3811 Tf 5.788 0 Td [(\051)]TJ/F62 9.9626 Tf 6.835 0 Td [(expected)-270(r)8(unning)]TJ -241.221 -11.955 Td [(time;)-278(in)-268(the)-268(average)-269(case)-268(quicksort)-268(will)-269(be)-268(the)-268(fastest)-269(and)-268(mer)18(ge-sort)-268(the)]TJ 0 -11.955 Td [(slowest.)-310(However)-250(note)-250(that:)]TJ 0 g 0 G + 5.321 -19.925 Td [(\050a\051)]TJ 0 g 0 G - 0 -20.421 Td [(a)]TJ + [-500(The)-336(worst)-336(case)-336(r)8(unning)-336(time)-336(for)-337(quicksort)-336(is)]TJ/F60 9.9626 Tf 220.017 0 Td [(O)]TJ/F93 10.3811 Tf 8 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(n)]TJ/F62 7.5716 Tf 5.664 3.616 Td [(2)]TJ/F93 10.3811 Tf 4.408 -3.616 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(;)-379(the)-336(algorithm)]TJ -229.916 -11.955 Td [(implemented)-293(her)18(e)-293(follows)-293(the)-292(well-known)-293(median-of-thr)18(ee)-293(heuris-)]TJ 0 -11.956 Td [(tics,)-250(but)-250(the)-250(worst)-250(case)-250(may)-250(still)-250(apply;)]TJ +0 g 0 G + -17.125 -15.94 Td [(\050b\051)]TJ +0 g 0 G + [-500(The)-190(worst)-190(case)-190(r)8(unning)-190(time)-190(for)-190(mer)18(ge-sort)-190(and)-190(heap-sort)-190(is)]TJ/F60 9.9626 Tf 277.76 0 Td [(O)]TJ/F93 10.3811 Tf 8 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(n)]TJ/F62 9.9626 Tf 7.324 0 Td [(log)]TJ/F60 9.9626 Tf 15.663 0 Td [(n)]TJ/F93 10.3811 Tf 5.788 0 Td [(\051)]TJ/F62 9.9626 Tf -301.684 -11.955 Td [(as)-250(the)-250(average)-250(case;)]TJ +0 g 0 G + -16.04 -15.94 Td [(\050c\051)]TJ +0 g 0 G + [-500(The)-244(mer)18(ge-sort)-244(algorithm)-243(is)-244(implemented)-244(to)-244(take)-244(advantage)-243(of)-244(sub-)]TJ 16.04 -11.955 Td [(sequences)-314(that)-313(may)-314(be)-314(alr)18(eady)-314(in)-313(the)-314(desir)18(ed)-314(or)18(dering)-314(prior)-313(to)-314(the)]TJ 0 -11.956 Td [(subr)18(outine)-390(call;)-459(this)-390(situation)-389(is)-390(r)18(elatively)-390(common)-389(when)-390(dealing)]TJ 0 -11.955 Td [(with)-335(gr)18(oups)-335(of)-335(indices)-335(of)-335(sparse)-336(matrix)-335(entries,)-356(thus)-335(mer)18(ge-sort)-335(is)]TJ 0 -11.955 Td [(the)-319(pr)18(eferr)18(ed)-318(choice)-319(when)-319(a)-318(sorting)-319(is)-319(needed)-318(by)-319(other)-319(r)18(outi)1(nes)-319(in)]TJ 0 -11.955 Td [(the)-250(library)111(.)]TJ +0 g 0 G + 117.559 -193.275 Td [(110)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(matrix)-250(to)-250(be)-250(assembled.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf -28.344 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf -24 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ ET -q -1 0 0 1 360.068 280.918 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q + +endstream +endobj +1729 0 obj +<< +/Length 171 +>> +stream +0 g 0 G +0 g 0 G BT -/F59 9.9626 Tf 363.206 280.719 Td [(Tspmat)]TJ +/F59 14.3462 Tf 99.895 705.784 Td [(7)-1000(Parallel)-250(environment)-250(routines)]TJ +0 g 0 G +/F62 9.9626 Tf 164.384 -615.346 Td [(111)]TJ +0 g 0 G ET -q -1 0 0 1 395.216 280.918 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 398.354 280.719 Td [(type)]TJ + +endstream +endobj +1733 0 obj +<< +/Length 5510 +>> +stream 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -268.57 -20.421 Td [(desc)]TJ +BT +/F59 11.9552 Tf 150.705 706.129 Td [(7.1)-1000(psb)]TJ ET q -1 0 0 1 171.218 260.497 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 174.207 260.298 Td [(a)]TJ +/F59 11.9552 Tf 201.825 706.129 Td [(init)-250(\227)-250(Initializes)-250(PSBLAS)-250(parallel)-250(environment)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in/out)]TJ/F54 9.9626 Tf 27.297 0 Td [(.)]TJ -59.098 -11.956 Td [(Speci\002ed)-290(as:)-389(a)-290(str)8(uctur)18(ed)-290(data)-289(of)-290(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 171.305 0 Td [(psb)]TJ +0 g 0 G +/F67 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_init\050ctxt,)-525(np,)-525(basectxt,)-525(ids\051)]TJ/F62 9.9626 Tf 14.944 -21.918 Td [(This)-214(subr)18(outine)-215(initializes)-214(the)-215(PSBLAS)-214(parallel)-215(envir)18(onment,)-221(de\002ning)-215(a)-214(vir)18(-)]TJ -14.944 -11.955 Td [(tual)-250(parallel)-250(machine.)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(np)]TJ +0 g 0 G +/F62 9.9626 Tf 17.156 0 Td [(Number)-250(of)-250(pr)18(ocesses)-250(in)-250(the)-250(PSBLAS)-250(virtual)-250(parallel)-250(machine.)]TJ 7.751 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-560(Default:)-310(use)-250(all)-250(available)-250(pr)18(ocesses.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.926 Td [(basectxt)]TJ +0 g 0 G +/F62 9.9626 Tf 41.494 0 Td [(the)-321(initial)-321(communication)-321(context.)-524(The)-321(new)-321(context)-321(will)-321(be)-321(de\002ned)]TJ -16.587 -11.955 Td [(fr)18(om)-250(the)-250(pr)18(ocesses)-250(participating)-250(in)-250(the)-250(initial)-250(one.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-560(Default:)-310(use)-250(MPI)]TJ ET q -1 0 0 1 363.235 212.677 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 387.574 466.194 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 366.373 212.477 Td [(desc)]TJ +/F62 9.9626 Tf 390.563 465.994 Td [(COMM)]TJ ET q -1 0 0 1 387.922 212.677 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 424.904 466.194 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 391.06 212.477 Td [(type)]TJ +/F62 9.9626 Tf 427.893 465.994 Td [(WORLD.)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)-429(If)-290(the)-290(matrix)-289(was)]TJ -236.371 -11.955 Td [(allocated)-209(with)]TJ/F59 9.9626 Tf 64.153 0 Td [(bldmode=psb_matbld_remote_)]TJ/F54 9.9626 Tf 135.988 0 Td [(,)-217(then)-210(the)-209(descriptor)-209(will)-209(be)]TJ -200.141 -11.955 Td [(r)18(eassembled.)]TJ +/F59 9.9626 Tf -277.188 -19.925 Td [(ids)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -20.421 Td [(info)]TJ +/F62 9.9626 Tf 18.809 0 Td [(Identities)-306(of)-307(the)-306(pr)18(ocesses)-307(to)-306(use)-306(for)-307(the)-306(new)-306(context;)-335(the)-306(ar)18(gument)-307(is)-306(ig-)]TJ 6.097 -11.955 Td [(nor)18(ed)-388(when)]TJ/F67 9.9626 Tf 58.258 0 Td [(np)]TJ/F62 9.9626 Tf 14.324 0 Td [(is)-388(not)-388(speci\002ed.)-723(This)-388(allows)-388(the)-387(pr)18(ocesses)-388(in)-388(the)-388(new)]TJ -72.582 -11.955 Td [(envir)18(onment)-250(to)-250(be)-250(in)-250(an)-250(or)18(der)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(original)-250(one.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)-560(Default:)-310(use)-250(the)-250(indices)]TJ/F93 10.3811 Tf 240.57 0 Td [(\050)]TJ/F62 9.9626 Tf 4.15 0 Td [(0)-179(.)-192(.)-191(.)]TJ/F60 9.9626 Tf 19.966 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.504 0 Td [(\000)]TJ/F62 9.9626 Tf 10.131 0 Td [(1)]TJ/F93 10.3811 Tf 5.106 0 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F59 9.9626 Tf -322.483 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G - 141.968 -29.888 Td [(86)]TJ +0 g 0 G + 0 -19.925 Td [(ctxt)]TJ +0 g 0 G +/F62 9.9626 Tf 21.021 0 Td [(the)-285(communication)-284(context)-285(identifying)-285(the)-284(virtual)-285(parallel)-285(machine,)-293(type)]TJ/F67 9.9626 Tf 3.885 -11.955 Td [(psb_ctxt_type)]TJ/F62 9.9626 Tf 67.995 0 Td [(.)-327(Note)-256(that)-256(this)-256(is)-256(always)-256(a)-255(duplicate)-256(of)]TJ/F67 9.9626 Tf 174.426 0 Td [(basectxt)]TJ/F62 9.9626 Tf 41.842 0 Td [(,)-257(so)-256(that)]TJ -284.263 -11.955 Td [(library)-296(communications)-297(ar)18(e)-296(completely)-297(separated)-296(fr)18(om)-297(other)-296(communi-)]TJ 0 -11.955 Td [(cation)-250(operations.)]TJ 0 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -66.381 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ/F59 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(A)-250(call)-250(to)-250(this)-250(r)18(outine)-250(must)-250(pr)18(ecede)-250(any)-250(other)-250(PSBLAS)-250(call.)]TJ +0 g 0 G + 0 -19.926 Td [(2.)]TJ +0 g 0 G + [-500(It)-194(is)-195(an)-194(err)18(or)-194(to)-194(specify)-195(a)-194(value)-194(for)]TJ/F60 9.9626 Tf 158.156 0 Td [(n)-80(p)]TJ/F62 9.9626 Tf 13.378 0 Td [(gr)18(eater)-194(than)-195(the)-194(number)-194(of)-194(pr)18(ocesses)]TJ -159.081 -11.955 Td [(available)-250(in)-250(the)-250(underlying)-250(base)-250(parallel)-250(envir)18(onment.)]TJ +0 g 0 G + 139.477 -84.647 Td [(112)]TJ 0 g 0 G ET endstream endobj -1467 0 obj +1739 0 obj << -/Length 3146 +/Length 4457 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(Notes)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(7.2)-1000(psb)]TJ +ET +q +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 151.016 706.129 Td [(info)-211(\227)-211(Return)-211(information)-210(about)-211(PSBLAS)-211(parallel)-211(en-)]TJ -24.221 -13.948 Td [(vironment)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ 0 g 0 G - [-500(On)-226(entry)-227(to)-226(this)-227(r)18(outine)-226(the)-227(descriptor)-226(must)-227(be)-226(in)-227(the)-226(assembled)-227(state,)-231(i.e.)]TJ/F59 9.9626 Tf 12.453 -11.956 Td [(psb_cdasb)]TJ/F54 9.9626 Tf 49.564 0 Td [(must)-250(alr)18(eady)-250(have)-250(been)-250(called.)]TJ +/F67 9.9626 Tf -26.9 -18.964 Td [(call)-525(psb_info\050ctxt,)-525(iam,)-525(np\051)]TJ/F62 9.9626 Tf 14.944 -21.917 Td [(This)-397(subr)18(outine)-396(r)18(eturns)-397(information)-397(about)-396(the)-397(PSBLAS)-397(parallel)-396(envir)18(on-)]TJ -14.944 -11.956 Td [(ment,)-250(de\002ning)-250(a)-250(virtual)-250(parallel)-250(machine.)]TJ 0 g 0 G - -62.017 -19.925 Td [(2.)]TJ +/F59 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G - [-500(The)-250(sparse)-250(matrix)-250(may)-250(be)-250(in)-250(either)-250(the)-250(build)-250(or)-250(update)-250(state;)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - 0 -19.925 Td [(3.)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - [-500(Duplicate)-421(entries)-422(ar)18(e)-421(detected)-421(and)-422(handled)-421(in)-421(both)-421(build)-422(and)-421(update)]TJ 12.453 -11.955 Td [(state,)-244(wit)1(h)-242(the)-242(exception)-242(of)-242(the)-242(err)18(or)-242(action)-242(that)-242(is)-242(only)-241(taken)-242(in)-242(the)-242(build)]TJ 0 -11.955 Td [(state,)-250(i.e.)-310(on)-250(the)-250(\002rst)-250(assembly;)]TJ 0 g 0 G - -12.453 -19.926 Td [(4.)]TJ + 0 -19.925 Td [(ctxt)]TJ 0 g 0 G - [-500(If)-190(the)-190(update)-190(choice)-190(is)]TJ/F59 9.9626 Tf 108.372 0 Td [(psb_upd_perm_)]TJ/F54 9.9626 Tf 67.995 0 Td [(,)-202(then)-190(subsequent)-190(calls)-190(to)]TJ/F59 9.9626 Tf 109.946 0 Td [(psb_spins)]TJ/F54 9.9626 Tf -273.86 -11.955 Td [(to)-309(update)-309(the)-308(matrix)-309(must)-309(be)-309(arranged)-309(in)-308(such)-309(a)-309(way)-309(as)-309(to)-308(pr)18(oduce)-309(ex-)]TJ 0 -11.955 Td [(actly)-319(the)-320(same)-319(sequence)-320(of)-319(coef)18(\002cient)-319(values)-320(as)-319(encounter)18(ed)-319(at)-320(the)-319(\002rst)]TJ 0 -11.955 Td [(assembly;)]TJ +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G - -12.453 -19.926 Td [(5.)]TJ +/F59 9.9626 Tf -24.907 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G - [-500(The)-250(output)-250(storage)-250(format)-250(need)-250(not)-250(be)-250(the)-250(same)-250(on)-250(all)-250(pr)18(ocesses;)]TJ 0 g 0 G - 0 -19.925 Td [(6.)]TJ + 0 -19.925 Td [(iam)]TJ 0 g 0 G - [-500(On)-249(exit)-249(fr)18(om)-250(this)-249(r)18(outine)-249(the)-249(matrix)-249(is)-250(in)-249(the)-249(assembled)-249(state,)-250(and)-249(thus)-249(is)]TJ 12.453 -11.955 Td [(suitable)-250(for)-250(the)-250(computational)-250(r)18(outines;)]TJ +/F62 9.9626 Tf 22.137 0 Td [(Identi\002er)-250(of)-250(curr)18(ent)-250(pr)18(ocess)-250(in)-250(the)-250(PSBLAS)-250(virtual)-250(parallel)-250(machine.)]TJ 2.77 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ/F91 10.3811 Tf 134.19 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1)]TJ/F91 10.3811 Tf 7.873 0 Td [(\024)]TJ/F60 9.9626 Tf 11.017 0 Td [(i)-47(a)-25(m)]TJ/F91 10.3811 Tf 18.677 0 Td [(\024)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.504 0 Td [(\000)]TJ/F62 9.9626 Tf 10.131 0 Td [(1)]TJ 0 g 0 G - -12.453 -19.925 Td [(7.)]TJ +/F59 9.9626 Tf -239.579 -19.925 Td [(np)]TJ 0 g 0 G - [-500(If)-431(the)]TJ/F59 9.9626 Tf 41.543 0 Td [(bldmode=psb_matbld_remote_)]TJ/F54 9.9626 Tf 140.288 0 Td [(value)-431(was)-432(speci\002ed)-431(at)-432(allocation)]TJ -169.378 -11.955 Td [(time,)-278(contributions)-272(de\002ned)-272(on)-272(the)-273(curr)18(ent)-272(pr)18(ocess)-272(but)-272(belonging)-273(to)-272(a)-272(r)18(e-)]TJ 0 -11.956 Td [(mote)-267(pr)18(ocess)-266(will)-267(be)-267(handled)-267(accor)18(dingly)111(.)-360(This)-267(is)-266(most)-267(likely)-267(to)-266(occur)-267(in)]TJ 0 -11.955 Td [(\002nite)-288(element)-288(applications,)-297(with)]TJ/F59 9.9626 Tf 145.88 0 Td [(dupl=psb_dupl_add_)]TJ/F54 9.9626 Tf 94.147 0 Td [(;)-307(it)-288(is)-287(necessary)-288(to)]TJ -240.027 -11.955 Td [(check)-236(for)-235(possible)-236(updates)-236(needed)-235(in)-236(the)-236(descriptor)74(,)-238(hence)-236(ther)18(e)-236(will)-235(be)-236(a)]TJ 0 -11.955 Td [(r)8(untime)-250(over)18(head.)]TJ +/F62 9.9626 Tf 17.156 0 Td [(Number)-250(of)-250(pr)18(ocesses)-250(in)-250(the)-250(PSBLAS)-250(virtual)-250(parallel)-250(machine.)]TJ 7.751 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ 0 g 0 G - 141.968 -332.752 Td [(87)]TJ +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(For)-396(pr)18(ocesses)-396(in)-395(the)-396(virtual)-396(parallel)-396(machine)-396(the)-396(ident)1(i\002er)-396(will)-396(satisfy)]TJ 12.453 -11.955 Td [(0)]TJ/F91 10.3811 Tf 7.873 0 Td [(\024)]TJ/F60 9.9626 Tf 11.016 0 Td [(i)-47(a)-25(m)]TJ/F91 10.3811 Tf 18.678 0 Td [(\024)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.504 0 Td [(\000)]TJ/F62 9.9626 Tf 10.131 0 Td [(1;)]TJ +0 g 0 G + -84.741 -19.925 Td [(2.)]TJ +0 g 0 G + [-500(If)-349(the)-349(user)-350(has)-349(r)18(equested)-349(on)]TJ/F67 9.9626 Tf 142.217 0 Td [(psb_init)]TJ/F62 9.9626 Tf 45.321 0 Td [(a)-349(number)-349(of)-350(pr)18(ocesses)-349(less)-349(than)]TJ -175.085 -11.956 Td [(the)-321(total)-322(available)-321(in)-321(the)-322(parallel)-321(execution)-322(envir)18(onment,)-339(the)-321(r)18(emaining)]TJ 0 -11.955 Td [(pr)18(ocesses)-229(will)-228(have)-229(on)-229(r)18(eturn)]TJ/F60 9.9626 Tf 130.21 0 Td [(i)-47(a)-25(m)]TJ/F93 10.3811 Tf 18.678 0 Td [(=)]TJ/F91 10.3811 Tf 11.086 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1;)-236(the)-229(on)1(ly)-229(call)-229(involving)]TJ/F67 9.9626 Tf 110.162 0 Td [(ctxt)]TJ/F62 9.9626 Tf 23.2 0 Td [(that)]TJ -301.53 -11.955 Td [(any)-250(such)-250(pr)18(ocess)-250(may)-250(execute)-250(is)-250(to)]TJ/F67 9.9626 Tf 155.296 0 Td [(psb_exit)]TJ/F62 9.9626 Tf 41.843 0 Td [(.)]TJ +0 g 0 G + -57.662 -174.311 Td [(113)]TJ 0 g 0 G ET endstream endobj -1480 0 obj +1745 0 obj << -/Length 2987 +/Length 4180 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.10)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(7.3)-1000(psb)]TJ ET q -1 0 0 1 204.216 706.328 cm +1 0 0 1 198.238 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(spfree)-250(\227)-250(Frees)-250(a)-250(sparse)-250(matrix)]TJ +/F59 11.9552 Tf 201.825 706.129 Td [(exit)-250(\227)-250(Exit)-250(from)-250(PSBLAS)-250(parallel)-250(environment)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_spfree\050a,)-525(desc_a,)-525(info\051)]TJ +/F67 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_exit\050ctxt\051)]TJ 0 -11.955 Td [(call)-525(psb_exit\050ctxt,close\051)]TJ/F62 9.9626 Tf 14.944 -21.918 Td [(This)-250(subr)18(outine)-250(exits)-250(fr)18(om)-250(the)-250(PSBLAS)-250(parallel)-250(virtual)-250(machine.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf -14.944 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.925 Td [(a)]TJ + 0 -19.926 Td [(ctxt)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(matrix)-250(to)-250(be)-250(fr)18(eed.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf -28.343 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(required)]TJ/F54 9.9626 Tf -23.999 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.137 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 577.576 Td [(Tspmat)]TJ -ET -q -1 0 0 1 395.216 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 398.354 577.576 Td [(type)]TJ +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F59 9.9626 Tf -24.907 -19.926 Td [(close)]TJ 0 g 0 G -/F51 9.9626 Tf -268.57 -19.925 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 557.85 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 557.651 Td [(a)]TJ +/F62 9.9626 Tf 27.666 0 Td [(Whether)-369(to)-368(close)-369(all)-369(data)-368(str)8(uctur)18(es)-369(r)18(elated)-369(to)-368(the)-369(virtual)-369(parall)1(el)-369(ma-)]TJ -2.759 -11.955 Td [(chine,)-250(besides)-250(those)-250(associated)-250(with)-250(ctxt.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(variable,)-250(default)-250(value:)-310(tr)8(ue.)]TJ/F59 11.9552 Tf -24.907 -19.925 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 510.029 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 509.83 Td [(desc)]TJ -ET -q -1 0 0 1 384.755 510.029 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 387.893 509.83 Td [(type)]TJ +/F62 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ + [-500(This)-376(r)18(outine)-376(may)-377(be)-376(called)-376(even)-376(if)-377(a)-376(pr)18(evious)-376(call)-376(to)]TJ/F67 9.9626 Tf 255.069 0 Td [(psb_info)]TJ/F62 9.9626 Tf 45.591 0 Td [(has)-376(r)18(e-)]TJ -288.206 -11.955 Td [(turned)-251(with)]TJ/F60 9.9626 Tf 55.156 0 Td [(i)-47(a)-25(m)]TJ/F93 10.3811 Tf 18.696 0 Td [(=)]TJ/F91 10.3811 Tf 11.104 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1;)-251(indeed,)-252(it)-251(it)-251(i)1(s)-251(the)-251(only)-251(r)18(outine)-251(that)-251(may)-251(be)-251(called)]TJ -93.15 -11.955 Td [(with)-250(ar)18(gument)]TJ/F67 9.9626 Tf 68.133 0 Td [(ctxt)]TJ/F62 9.9626 Tf 23.412 0 Td [(in)-250(this)-250(situation.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ + -103.999 -19.926 Td [(2.)]TJ 0 g 0 G + [-500(A)-269(call)-269(to)-268(this)-269(r)18(outine)-269(with)]TJ/F67 9.9626 Tf 128.502 0 Td [(close=.true.)]TJ/F62 9.9626 Tf 65.442 0 Td [(implies)-269(a)-268(call)-269(to)]TJ/F67 9.9626 Tf 72.059 0 Td [(MPI_Finalize)]TJ/F62 9.9626 Tf 62.764 0 Td [(,)]TJ -316.313 -11.955 Td [(after)-250(which)-250(no)-250(parallel)-250(r)18(outine)-250(may)-250(be)-250(called.)]TJ 0 g 0 G - 0 -19.925 Td [(info)]TJ + -12.454 -19.925 Td [(3.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + [-500(If)-289(the)-288(user)-288(whishes)-289(to)-288(use)-289(multiple)-288(communication)-289(contexts)-288(in)-289(the)-288(same)]TJ 12.454 -11.955 Td [(pr)18(ogram,)-401(or)-371(to)-371(enter)-371(and)-371(exit)-371(multiple)-371(times)-370(into)-371(the)-371(parallel)-371(envir)18(on-)]TJ 0 -11.956 Td [(ment,)-425(this)-389(r)18(outine)-390(may)-390(be)-390(called)-389(to)-390(selectively)-390(close)-390(the)-389(contexts)-390(with)]TJ/F67 9.9626 Tf 0 -11.955 Td [(close=.false.)]TJ/F62 9.9626 Tf 67.994 0 Td [(,)-202(while)-190(on)-190(the)-190(last)-190(call)-190(it)-190(should)-190(be)-190(called)-190(with)]TJ/F67 9.9626 Tf 196.919 0 Td [(close=.true.)]TJ/F62 9.9626 Tf -264.913 -11.955 Td [(to)-250(shutdown)-250(in)-250(a)-250(clean)-250(way)-250(the)-250(entir)18(e)-250(parallel)-250(envir)18(onment.)]TJ 0 g 0 G - 141.968 -329.728 Td [(88)]TJ + 139.477 -212.169 Td [(114)]TJ 0 g 0 G ET endstream endobj -1486 0 obj +1752 0 obj << -/Length 3858 +/Length 2476 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.11)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(7.4)-1000(psb)]TJ ET q -1 0 0 1 153.407 706.328 cm +1 0 0 1 147.429 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 156.993 706.129 Td [(sprn)-254(\227)-255(Reinit)-254(sparse)-255(matrix)-254(structure)-254(for)-255(psblas)-254(rou-)]TJ -24.221 -13.948 Td [(tines.)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -32.877 -18.964 Td [(call)-525(psb_sprn\050a,)-525(decsc_a,)-525(info,)-525(clear\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.917 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.926 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(matrix)-250(to)-250(be)-250(r)18(einitialized.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf -28.343 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf -24 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F59 11.9552 Tf 151.016 706.129 Td [(get)]TJ ET q -1 0 0 1 309.258 563.828 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 168.338 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 312.397 563.628 Td [(Tspmat)]TJ +/F59 11.9552 Tf 171.925 706.129 Td [(mpi)]TJ ET q -1 0 0 1 344.406 563.828 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 194.556 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 347.544 563.628 Td [(type)]TJ +/F59 11.9552 Tf 198.143 706.129 Td [(comm)-250(\227)-250(Get)-250(the)-250(MPI)-250(communicator)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -268.571 -19.925 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 543.902 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 543.703 Td [(a)]TJ +/F67 9.9626 Tf -98.248 -18.964 Td [(icomm)-525(=)-525(psb_get_mpi_comm\050ctxt\051)]TJ/F62 9.9626 Tf 14.944 -21.918 Td [(This)-417(func)1(tion)-417(r)18(eturns)-416(the)-417(MPI)-416(communicator)-417(associated)-416(with)-417(a)-416(PSBLAS)]TJ -14.944 -11.955 Td [(context)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 496.082 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 495.882 Td [(desc)]TJ -ET -q -1 0 0 1 333.945 496.082 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 337.084 495.882 Td [(type)]TJ +/F59 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -19.925 Td [(clear)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 26.561 0 Td [(Choose)-250(whether)-250(to)-250(zer)18(o)-250(out)-250(matrix)-250(coef)18(\002cients)]TJ -1.654 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Default:)-310(tr)8(ue.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.917 Td [(On)-250(Return)]TJ + 0 -19.926 Td [(ctxt)]TJ 0 g 0 G +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G - 0 -19.926 Td [(info)]TJ +/F59 9.9626 Tf -24.907 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ + 0 -19.926 Td [(Function)-250(value)]TJ 0 g 0 G - [-500(On)-250(exit)-250(fr)18(om)-250(this)-250(r)18(outine)-250(the)-250(sparse)-250(matrix)-250(is)-250(in)-250(the)-250(update)-250(state.)]TJ +/F62 9.9626 Tf 72.777 0 Td [(The)-372(MPI)-371(communicator)-372(associated)-371(with)-372(the)-372(PSBLAS)-371(virtual)]TJ -47.87 -11.955 Td [(parallel)-250(machine.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ/F59 11.9552 Tf -71.651 -33.873 Td [(Notes)]TJ/F62 9.9626 Tf 34.165 0 Td [(The)-230(subr)18(outine)-230(version)]TJ/F67 9.9626 Tf 103.913 0 Td [(psb_get_mpicomm)]TJ/F62 9.9626 Tf 80.748 0 Td [(is)-230(still)-230(available)-230(but)-230(is)-230(depr)18(e-)]TJ -218.826 -11.955 Td [(cated.)]TJ 0 g 0 G - 154.421 -206.192 Td [(89)]TJ + 164.384 -319.766 Td [(115)]TJ 0 g 0 G ET endstream endobj -1493 0 obj +1757 0 obj << -/Length 6166 +/Length 3337 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.12)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(7.5)-1000(psb)]TJ ET q -1 0 0 1 204.216 706.328 cm +1 0 0 1 198.238 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(geall)-250(\227)-250(Allocates)-250(a)-250(dense)-250(matrix)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_geall\050x,)-525(desc_a,)-525(info[,)-525(dupl,)-525(bldmode,)-525(n,)-525(lb]\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -19.627 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.01 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.009 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 629.719 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 629.519 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 136.328 0 Td [(psb)]TJ +/F59 11.9552 Tf 201.825 706.129 Td [(get)]TJ ET q -1 0 0 1 328.257 581.898 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 219.148 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 331.395 581.699 Td [(desc)]TJ +/F59 11.9552 Tf 222.735 706.129 Td [(mpi)]TJ ET q -1 0 0 1 352.944 581.898 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 245.365 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 356.083 581.699 Td [(type)]TJ +/F59 11.9552 Tf 248.952 706.129 Td [(rank)-250(\227)-250(Get)-250(the)-250(MPI)-250(rank)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -226.299 -30.965 Td [(n)]TJ +/F67 9.9626 Tf -98.247 -18.964 Td [(rank)-525(=)-525(psb_get_mpi_rank\050ctxt,)-525(id\051)]TJ/F62 9.9626 Tf 14.944 -21.918 Td [(This)-250(function)-250(r)18(eturns)-250(the)-250(MPI)-250(rank)-250(of)-250(the)-250(PSBLAS)-250(pr)18(ocess)]TJ/F60 9.9626 Tf 257.337 0 Td [(i)-32(d)]TJ 0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(The)-250(number)-250(of)-250(columns)-250(of)-250(the)-250(dense)-250(matrix)-250(to)-250(be)-250(allocated.)]TJ 13.838 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-297(as:)-404(Integer)-297(scalar)74(,)-309(default)-297(1.)-450(It)-297(is)-297(not)-297(a)-297(valid)-297(ar)18(gument)-297(if)]TJ/F52 9.9626 Tf 295.578 0 Td [(x)]TJ/F54 9.9626 Tf 8.164 0 Td [(is)-297(a)]TJ -303.742 -11.956 Td [(rank-1)-250(array)111(.)]TJ +/F59 9.9626 Tf -272.281 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.009 Td [(lb)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 14.386 0 Td [(The)-237(lower)-238(bound)-237(for)-238(the)-237(column)-238(index)-237(range)-237(of)-238(the)-237(dense)-238(matrix)-237(to)-238(be)-237(allo-)]TJ 10.52 -11.955 Td [(cated.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-297(as:)-404(Integer)-297(scalar)74(,)-309(default)-297(1.)-450(It)-297(is)-297(not)-297(a)-297(valid)-297(ar)18(gument)-297(if)]TJ/F52 9.9626 Tf 295.578 0 Td [(x)]TJ/F54 9.9626 Tf 8.164 0 Td [(is)-297(a)]TJ -303.742 -11.955 Td [(rank-1)-250(array)111(.)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.009 Td [(dupl)]TJ 0 g 0 G -/F54 9.9626 Tf 26.56 0 Td [(How)-250(to)-250(handle)-250(duplicate)-250(coef)18(\002cients.)]TJ -1.654 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-243(as:)-306(integer)74(,)-244(possible)-243(values:)]TJ/F59 9.9626 Tf 164.942 0 Td [(psb_dupl_ovwrt_)]TJ/F54 9.9626 Tf 78.455 0 Td [(,)]TJ/F59 9.9626 Tf 4.923 0 Td [(psb_dupl_add_)]TJ/F54 9.9626 Tf 67.994 0 Td [(;)]TJ/F59 9.9626 Tf -316.314 -11.955 Td [(psb_dupl_err_)]TJ/F54 9.9626 Tf 70.485 0 Td [(has)-250(no)-250(ef)18(fect.)]TJ + 0 -19.926 Td [(ctxt)]TJ 0 g 0 G -/F51 9.9626 Tf -95.391 -19.009 Td [(bldmode)]TJ +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G -/F54 9.9626 Tf 45.937 0 Td [(Whether)-372(to)-372(keep)-371(track)-372(of)-372(matrix)-372(entries)-371(that)-372(do)-372(not)-372(belong)-371(to)-372(the)]TJ -21.031 -11.955 Td [(curr)18(ent)-250(pr)18(ocess.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-190(as:)-280(an)-190(integer)-190(value)]TJ/F59 9.9626 Tf 128.288 0 Td [(psb_matbld_noremote_)]TJ/F54 9.9626 Tf 104.607 0 Td [(,)]TJ/F59 9.9626 Tf 4.503 0 Td [(psb_matbld_remote_)]TJ/F54 9.9626 Tf 94.146 0 Td [(.)]TJ -331.544 -11.955 Td [(Default:)]TJ/F59 9.9626 Tf 38.515 0 Td [(psb_matbld_noremote_)]TJ/F54 9.9626 Tf 104.607 0 Td [(.)]TJ +/F59 9.9626 Tf -24.907 -19.926 Td [(id)]TJ 0 g 0 G -/F51 9.9626 Tf -168.028 -19.627 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 14.386 0 Td [(Identi\002er)-250(of)-250(a)-250(pr)18(ocess)-250(in)-250(the)-250(PSBLAS)-250(virtual)-250(parallel)-250(machine.)]TJ 10.521 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(0)]TJ/F91 10.3811 Tf 141.938 0 Td [(\024)]TJ/F60 9.9626 Tf 11.017 0 Td [(i)-32(d)]TJ/F91 10.3811 Tf 11.086 0 Td [(\024)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.503 0 Td [(\000)]TJ/F62 9.9626 Tf 10.132 0 Td [(1)]TJ 0 g 0 G +/F59 9.9626 Tf -223.669 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G - 0 -19.009 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(dense)-250(matrix)-250(to)-250(be)-250(allocated.)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.943 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-273(as:)-357(a)-273(rank)-274(one)-273(or)-274(two)-273(array)-273(with)-274(the)-273(ALLOCA)74(T)74(ABLE)-273(attribute)]TJ 0 -11.955 Td [(or)-250(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 86.634 0 Td [(psb)]TJ -ET -q -1 0 0 1 278.564 132.48 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 281.702 132.281 Td [(T)]TJ -ET -q -1 0 0 1 287.56 132.48 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 290.699 132.281 Td [(vect)]TJ -ET -q -1 0 0 1 312.247 132.48 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 315.386 132.281 Td [(type)]TJ + 0 -19.925 Td [(Funciton)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(,)-250(of)-250(type)-250(r)18(eal,)-250(complex)-250(or)-250(integer)74(.)]TJ +/F62 9.9626 Tf 72.777 0 Td [(The)-250(MPI)-250(rank)-250(associated)-250(with)-250(the)-250(PSBLAS)-250(pr)18(ocess)]TJ/F60 9.9626 Tf 224.291 0 Td [(i)-32(d)]TJ/F62 9.9626 Tf 8.195 0 Td [(.)]TJ -280.356 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ/F59 11.9552 Tf -71.651 -33.873 Td [(Notes)]TJ/F62 9.9626 Tf 35.734 0 Td [(The)-388(subr)18(outine)-387(version)]TJ/F67 9.9626 Tf 108.62 0 Td [(psb_get_rank)]TJ/F62 9.9626 Tf 66.626 0 Td [(is)-388(still)-387(available)-388(but)-387(is)-388(depr)18(e-)]TJ -210.98 -11.955 Td [(cated.)]TJ 0 g 0 G - -18.728 -41.843 Td [(90)]TJ + 164.384 -275.93 Td [(116)]TJ 0 g 0 G ET endstream endobj -1497 0 obj +1761 0 obj << -/Length 925 +/Length 1155 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(info)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(7.6)-1000(psb)]TJ +ET +q +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 151.016 706.129 Td [(wtime)-250(\227)-250(W)74(all)-250(clock)-250(timing)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +/F67 9.9626 Tf -51.121 -18.964 Td [(time)-525(=)-525(psb_wtime\050\051)]TJ/F62 9.9626 Tf 14.944 -21.918 Td [(This)-298(function)-298(r)18(eturns)-299(a)-298(wall)-298(clock)-298(timer)74(.)-455(The)-298(r)18(esolution)-298(of)-299(the)-298(timer)-298(is)-298(de-)]TJ -14.944 -11.955 Td [(pendent)-250(on)-250(the)-250(underlying)-250(parallel)-250(envir)18(onment)-250(implementation.)]TJ 0 g 0 G - [-500(Using)]TJ/F59 9.9626 Tf 41.798 0 Td [(psb_matbld_remote_)]TJ/F54 9.9626 Tf 97.28 0 Td [(is)-314(likely)-315(to)-315(cau)1(se)-315(a)-315(r)8(untime)-314(over)18(head)-315(at)-314(as-)]TJ -126.625 -11.955 Td [(sembly)-250(time;)]TJ +/F59 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G - 141.968 -514.072 Td [(91)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Exit)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(the)-250(elapsed)-250(time)-250(in)-250(seconds.)]TJ -47.87 -11.955 Td [(Returned)-250(as:)-310(a)]TJ/F67 9.9626 Tf 66.022 0 Td [(real\050psb_dpk_\051)]TJ/F62 9.9626 Tf 75.715 0 Td [(variable.)]TJ +0 g 0 G + -2.26 -491.123 Td [(117)]TJ 0 g 0 G ET endstream endobj -1505 0 obj +1765 0 obj << -/Length 6336 +/Length 1388 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.13)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(7.7)-1000(psb)]TJ ET q -1 0 0 1 204.216 706.328 cm +1 0 0 1 198.238 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(geins)-250(\227)-250(Dense)-250(matrix)-250(insertion)-250(routine)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_geins\050m,)-525(irw,)-525(val,)-525(x,)-525(desc_a,)-525(info)-525([,local]\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -20.57 Td [(T)90(ype:)]TJ +/F59 11.9552 Tf 201.825 706.129 Td [(barrier)-240(\227)-240(Sinchronization)-239(point)-240(parallel)-240(environment)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.386 Td [(On)-250(Entry)]TJ +/F67 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_barrier\050ctxt\051)]TJ/F62 9.9626 Tf 14.944 -21.918 Td [(This)-358(subr)18(outine)-357(acts)-358(as)-358(an)-358(explicit)-357(synchr)18(onization)-358(point)-358(for)-357(the)-358(PSBLAS)]TJ -14.944 -11.955 Td [(parallel)-250(virtual)-250(machine.)]TJ 0 g 0 G +/F59 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G - 0 -19.386 Td [(m)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 13.838 0 Td [(Number)-250(of)-250(r)18(ows)-250(in)]TJ/F52 9.9626 Tf 86.569 0 Td [(v)-40(a)-25(l)]TJ/F54 9.9626 Tf 15.736 0 Td [(to)-250(be)-250(inserted.)]TJ -91.237 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.386 Td [(irw)]TJ 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(Indices)-381(of)-382(the)-382(r)18(ows)-381(to)-382(be)-381(inserted.)-705(Speci\002cally)111(,)-414(r)18(ow)]TJ/F52 9.9626 Tf 239.84 0 Td [(i)]TJ/F54 9.9626 Tf 6.765 0 Td [(of)]TJ/F52 9.9626 Tf 12.683 0 Td [(v)-40(a)-25(l)]TJ/F54 9.9626 Tf 17.046 0 Td [(will)-381(be)-382(in-)]TJ -271.901 -11.955 Td [(serted)-344(into)-344(the)-344(local)-344(r)18(ow)-344(corr)18(esponding)-344(to)-344(the)-344(global)-344(r)18(ow)-344(index)]TJ/F52 9.9626 Tf 290.218 0 Td [(i)-22(r)-35(w)]TJ/F85 10.3811 Tf 14.654 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.149 0 Td [(.)]TJ -316.314 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)]TJ + 0 -19.926 Td [(ctxt)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.386 Td [(val)]TJ +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G -/F54 9.9626 Tf 18.819 0 Td [(the)-250(dense)-250(submatrix)-250(to)-250(be)-250(inserted.)]TJ 6.087 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(1)-250(or)-250(2)-250(array)111(.)-310(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ + 139.477 -455.258 Td [(118)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.386 Td [(desc)]TJ ET -q -1 0 0 1 171.218 414.446 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 414.247 Td [(a)]TJ + +endstream +endobj +1769 0 obj +<< +/Length 1283 +>> +stream +0 g 0 G 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 366.626 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q BT -/F59 9.9626 Tf 363.206 366.426 Td [(desc)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(7.8)-1000(psb)]TJ ET q -1 0 0 1 384.755 366.626 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 387.893 366.426 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F59 11.9552 Tf 151.016 706.129 Td [(abort)-250(\227)-250(Abort)-250(a)-250(computation)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -19.386 Td [(local)]TJ 0 g 0 G -/F54 9.9626 Tf 26.56 0 Td [(Whether)-240(the)-240(entries)-241(in)-240(the)-240(index)-240(vector)]TJ/F59 9.9626 Tf 173.162 0 Td [(irw)]TJ/F54 9.9626 Tf 15.692 0 Td [(,)-242(ar)18(e)-240(alr)18(eady)-241(i)1(n)-241(local)-240(number)18(-)]TJ -190.508 -11.955 Td [(ing.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.187 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(value;)-250(default:)]TJ/F59 9.9626 Tf 162.678 0 Td [(.false.)]TJ/F54 9.9626 Tf 36.613 0 Td [(.)]TJ +/F67 9.9626 Tf -51.121 -18.964 Td [(call)-525(psb_abort\050ctxt\051)]TJ/F62 9.9626 Tf 14.944 -21.918 Td [(This)-250(subr)18(outine)-250(aborts)-250(computation)-250(on)-250(the)-250(parallel)-250(virtual)-250(machine.)]TJ 0 g 0 G -/F51 9.9626 Tf -224.197 -20.57 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -14.944 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - 0 -19.387 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(output)-250(dense)-250(matrix.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(rank)-190(one)-190(or)-190(two)-190(array)-190(or)-190(an)-190(object)-190(of)-190(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 242.569 0 Td [(psb)]TJ -ET -q -1 0 0 1 434.498 211.642 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 437.636 211.443 Td [(T)]TJ -ET -q -1 0 0 1 443.494 211.642 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 446.633 211.443 Td [(vect)]TJ -ET -q -1 0 0 1 468.182 211.642 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 471.32 211.443 Td [(type)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(,)]TJ -316.63 -11.955 Td [(of)-250(type)-250(r)18(eal,)-250(complex)-250(or)-250(integer)74(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -31.342 Td [(info)]TJ + 0 -19.926 Td [(ctxt)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G - 141.968 -29.888 Td [(92)]TJ + 139.477 -467.213 Td [(119)]TJ 0 g 0 G ET endstream endobj -1509 0 obj +1773 0 obj << -/Length 539 +/Length 5526 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(Notes)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(7.9)-1000(psb)]TJ +ET +q +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 201.825 706.129 Td [(bcast)-250(\227)-250(Broadcast)-250(data)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ 0 g 0 G - [-500(Dense)-250(vectors/matrices)-250(do)-250(not)-250(have)-250(an)-250(associated)-250(state;)]TJ +/F67 9.9626 Tf -51.12 -20.269 Td [(call)-525(psb_bcast\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F62 9.9626 Tf 14.944 -24.611 Td [(This)-221(subr)18(outine)-222(implements)-221(a)-221(br)18(oadcast)-222(operation)-221(based)-221(on)-222(the)-221(underlying)]TJ -14.944 -11.955 Td [(communication)-250(library)111(.)]TJ 0 g 0 G - 0 -19.926 Td [(2.)]TJ +/F59 9.9626 Tf 0 -21.945 Td [(T)90(ype:)]TJ 0 g 0 G - [-500(Duplicate)-326(entries)-326(ar)18(e)-325(either)-326(overwritten)-326(or)-326(added,)-345(ther)18(e)-325(is)-326(no)-326(pr)18(ovision)]TJ 12.453 -11.955 Td [(for)-250(raising)-250(an)-250(err)18(or)-250(condition.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G - 141.968 -563.885 Td [(93)]TJ +/F59 9.9626 Tf -29.828 -22.619 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -22.619 Td [(ctxt)]TJ +0 g 0 G +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.619 Td [(dat)]TJ +0 g 0 G +/F62 9.9626 Tf 19.367 0 Td [(On)-250(the)-250(r)18(oot)-250(pr)18(ocess,)-250(the)-250(data)-250(to)-250(be)-250(br)18(oadcast.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-269(may)-270(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-344(a)-344(rank)-344(1)-344(or)-344(2)-344(array)111(,)-367(or)-344(a)-344(character)-344(or)-344(logical)-344(variable,)-367(which)-344(may)-344(be)]TJ 0 -11.955 Td [(a)-377(scalar)-377(or)-377(rank)-377(1)-377(array)111(.)-1067(T)90(ype,)-409(kind,)-409(rank)-377(and)-376(size)-377(must)-377(agr)18(ee)-377(on)-377(all)]TJ 0 -11.955 Td [(pr)18(ocesses.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.619 Td [(root)]TJ +0 g 0 G +/F62 9.9626 Tf 23.253 0 Td [(Root)-250(pr)18(ocess)-250(holding)-250(data)-250(to)-250(be)-250(br)18(oadcast.)]TJ 1.654 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F69 10.3811 Tf 138.85 0 Td [(<)]TJ/F93 10.3811 Tf 8.319 0 Td [(=)]TJ/F60 9.9626 Tf 10.986 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F69 10.3811 Tf 19.923 0 Td [(<)]TJ/F93 10.3811 Tf 8.319 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.503 0 Td [(\000)]TJ/F62 9.9626 Tf 10.132 0 Td [(1,)-250(default)-250(0)]TJ +0 g 0 G +/F59 9.9626 Tf -246.025 -22.618 Td [(mode)]TJ +0 g 0 G +/F62 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.955 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F67 9.9626 Tf 199.497 0 Td [(OR)]TJ/F62 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F67 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F62 9.9626 Tf 109.837 0 Td [(,)]TJ/F67 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F62 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.956 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -34.573 Td [(request)]TJ +0 g 0 G +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -24.612 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 164.384 -29.887 Td [(120)]TJ 0 g 0 G ET endstream endobj -1518 0 obj +1777 0 obj << -/Length 6120 +/Length 5329 >> stream 0 g 0 G 0 g 0 G +0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.14)-1000(psb)]TJ -ET -q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 207.803 706.129 Td [(geasb)-250(\227)-250(Assembly)-250(a)-250(dense)-250(matrix)]TJ +/F59 9.9626 Tf 99.895 706.129 Td [(dat)]TJ 0 g 0 G +/F62 9.9626 Tf 19.368 0 Td [(On)-250(all)-250(pr)18(ocesses)-250(other)-250(than)-250(r)18(oot,)-250(the)-250(br)18(oadcasted)-250(data.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-270(may)-269(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-315(a)-314(rank)-315(1)-315(or)-314(2)-315(array)111(,)-331(or)-314(a)-315(character)-315(or)-314(logical)-315(scalar)74(.)-819(T)90(ype,)-330(kind,)-331(rank)]TJ 0 -11.955 Td [(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ 0 g 0 G -/F59 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_geasb\050x,)-525(desc_a,)-525(info,)-525(mold\051)]TJ +/F59 9.9626 Tf -24.907 -19.925 Td [(request)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + [-500(The)]TJ/F67 9.9626 Tf 31.023 0 Td [(dat)]TJ/F62 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.956 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ 0 g 0 G + -12.453 -19.925 Td [(2.)]TJ 0 g 0 G - 0 -19.925 Td [(desc)]TJ + [-500(The)]TJ/F67 9.9626 Tf 32.225 0 Td [(mode)]TJ/F62 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F67 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F62 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F67 9.9626 Tf 16.309 0 Td [(request)]TJ/F62 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 171.218 625.596 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 124.802 417.212 cm +0 0 318.804 27.895 re f Q -BT -/F51 9.9626 Tf 174.207 625.397 Td [(a)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 136.328 0 Td [(psb)]TJ -ET -q -1 0 0 1 328.257 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 331.395 577.576 Td [(desc)]TJ -ET -q -1 0 0 1 352.944 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F59 9.9626 Tf 356.083 577.576 Td [(type)]TJ +/F102 8.9664 Tf 137.205 434.448 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ + [-525(psb_bcast\050ctxt,dat,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -226.299 -31.88 Td [(mold)]TJ + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(vector)-250(storage.)]TJ -3.876 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-223(as:)-296(an)-223(object)-223(of)-222(a)-223(class)-223(derived)-223(fr)18(om)]TJ/F59 9.9626 Tf 199.087 0 Td [(psb)]TJ -ET -q -1 0 0 1 391.016 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 394.155 497.875 Td [(T)]TJ -ET -q -1 0 0 1 400.012 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 403.151 497.875 Td [(base)]TJ -ET -q -1 0 0 1 424.7 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 427.838 497.875 Td [(vect)]TJ -ET -q -1 0 0 1 449.387 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 452.525 497.875 Td [(type)]TJ/F54 9.9626 Tf 20.921 0 Td [(;)-232(this)]TJ -297.835 -11.955 Td [(is)-250(only)-250(allowed)-250(when)]TJ/F52 9.9626 Tf 97.12 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 42.899 0 Td [(psb)]TJ -ET -q -1 0 0 1 339.644 486.119 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 342.783 485.92 Td [(T)]TJ -ET -q -1 0 0 1 348.641 486.119 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 351.779 485.92 Td [(vect)]TJ -ET -q -1 0 0 1 373.328 486.119 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 376.466 485.92 Td [(type)]TJ + [-525(mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(ior)]TJ 0 g 0 G -/F51 9.9626 Tf -246.682 -21.918 Td [(On)-250(Return)]TJ + [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G 0 g 0 G - 0 -19.925 Td [(x)]TJ +/F62 9.9626 Tf -48.393 -36.165 Td [(3.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(dense)-250(matrix)-250(to)-250(be)-250(assembled.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-259(as:)-328(a)-259(rank)-258(one)-259(or)-259(two)-259(array)-259(with)-259(the)-259(ALLOCA)74(T)74(ABLE)-258(or)-259(an)-259(ob-)]TJ 0 -11.955 Td [(ject)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 50.55 0 Td [(psb)]TJ -ET -q -1 0 0 1 242.48 384.5 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 245.618 384.301 Td [(T)]TJ -ET -q -1 0 0 1 251.476 384.5 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 254.614 384.301 Td [(vect)]TJ + [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F67 9.9626 Tf 216.877 0 Td [(dat)]TJ/F62 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F60 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F62 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 276.163 384.5 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 124.802 302.642 cm +0 0 318.804 60.772 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F59 9.9626 Tf 279.301 384.301 Td [(type)]TJ +/F102 8.9664 Tf 137.205 352.754 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(,)-250(of)-250(type)-250(r)18(eal,)-250(complex)-250(or)-250(integer)74(.)]TJ + [-525(psb_bcast\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -149.518 -31.881 Td [(info)]TJ + [(psb_collective_start_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - [-500(On)-227(entry)-226(to)-227(this)-226(r)18(outine)-227(th)1(e)-227(descriptor)-226(must)-227(be)-226(in)-227(the)-226(assembled)-227(state,)-231(i.e.)]TJ/F59 9.9626 Tf 12.453 -11.956 Td [(psb_cdasb)]TJ/F54 9.9626 Tf 49.564 0 Td [(must)-250(alr)18(eady)-250(have)-250(been)-250(called.)]TJ + [(bcast_request\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -62.017 -19.925 Td [(2.)]TJ + -23.537 -10.959 Td [(.......)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(If)-431(the)]TJ/F59 9.9626 Tf 41.544 0 Td [(bldmode=psb_matbld_remote_)]TJ/F54 9.9626 Tf 140.287 0 Td [(value)-431(was)-432(speci\002ed)-431(at)-432(allocation)]TJ -169.378 -11.955 Td [(time,)-278(contributions)-272(de\002ned)-272(on)-273(the)-272(curr)18(ent)-272(pr)18(ocess)-272(but)-272(belonging)-273(to)-272(a)-272(r)18(e-)]TJ 0 -11.955 Td [(mote)-267(pr)18(ocess)-266(will)-267(be)-267(handled)-267(accor)18(dingly)111(.)-360(This)-267(is)-266(most)-267(likely)-267(to)-266(occur)-267(in)]TJ 0 -11.955 Td [(\002nite)-250(element)-250(applications,)-250(with)]TJ/F59 9.9626 Tf 144.277 0 Td [(dupl=psb_dupl_add_)]TJ/F54 9.9626 Tf 94.146 0 Td [(.)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ 0 g 0 G - -96.455 -104.573 Td [(94)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F102 8.9664 Tf -37.659 -10.959 Td [(call)]TJ +0 g 0 G + [-525(psb_bcast\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(psb_collective_end_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(bcast_request\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 103.537 -218.48 Td [(121)]TJ 0 g 0 G ET endstream endobj -1526 0 obj +1786 0 obj << -/Length 3224 +/Length 5829 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.15)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(7.10)-1000(psb)]TJ ET q -1 0 0 1 153.407 706.328 cm +1 0 0 1 204.216 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 156.993 706.129 Td [(gefree)-250(\227)-250(Frees)-250(a)-250(dense)-250(matrix)]TJ +/F59 11.9552 Tf 207.803 706.129 Td [(sum)-250(\227)-250(Global)-250(sum)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_gefree\050x,)-525(desc_a,)-525(info\051)]TJ +/F67 9.9626 Tf -57.098 -19.198 Td [(call)-525(psb_sum\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F62 9.9626 Tf 14.944 -22.401 Td [(This)-353(subr)18(outine)-353(implements)-354(a)-353(sum)-353(r)18(eduction)-353(operation)-353(based)-354(on)-353(the)-353(un-)]TJ -14.944 -11.955 Td [(derlying)-250(communication)-250(library)111(.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 0 -20.288 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -20.409 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.925 Td [(x)]TJ + 0 -20.408 Td [(ctxt)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(dense)-250(matrix)-250(to)-250(be)-250(fr)18(eed.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-259(as:)-328(a)-258(rank)-259(one)-259(or)-259(two)-259(array)-259(with)-259(the)-259(ALLOCA)74(T)74(ABLE)-258(or)-259(an)-259(ob-)]TJ 0 -11.955 Td [(ject)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 50.55 0 Td [(psb)]TJ -ET -q -1 0 0 1 191.67 565.82 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 194.809 565.621 Td [(T)]TJ -ET -q -1 0 0 1 200.666 565.82 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 203.805 565.621 Td [(vect)]TJ -ET -q -1 0 0 1 225.354 565.82 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 228.492 565.621 Td [(type)]TJ +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(,)-250(of)-250(type)-250(r)18(eal,)-250(complex)-250(or)-250(integer)74(.)]TJ +/F59 9.9626 Tf -24.907 -20.409 Td [(dat)]TJ 0 g 0 G -/F51 9.9626 Tf -149.518 -31.881 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 533.94 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 533.74 Td [(a)]TJ +/F62 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(sum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-269(may)-270(be)-269(a)-269(scalar)74(,)]TJ 0 -11.956 Td [(or)-300(a)-300(rank)-300(1)-300(or)-301(2)-300(array)111(.)-760(T)90(ype,)-313(kind,)-312(rank)-300(and)-301(size)-300(must)-300(agr)18(ee)-300(on)-300(all)-300(pr)18(o-)]TJ 0 -11.955 Td [(cesses.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 136.327 0 Td [(psb)]TJ -ET -q -1 0 0 1 277.448 486.119 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 280.586 485.92 Td [(desc)]TJ -ET -q -1 0 0 1 302.135 486.119 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 305.273 485.92 Td [(type)]TJ +/F59 9.9626 Tf -24.907 -20.408 Td [(root)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F62 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-250(to)-250(hold)-250(the)-250(\002nal)-250(sum,)-250(or)]TJ/F91 10.3811 Tf 143.744 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1)-250(to)-250(make)-250(it)-250(available)-250(on)-250(all)-250(pr)18(ocesses.)]TJ -150.284 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F91 10.3811 Tf 131.101 0 Td [(\000)]TJ/F62 9.9626 Tf 8.195 0 Td [(1)]TJ/F69 10.3811 Tf 7.873 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F69 10.3811 Tf 19.923 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.504 0 Td [(\000)]TJ/F62 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ 0 g 0 G -/F51 9.9626 Tf -226.3 -33.873 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -254.343 -20.409 Td [(mode)]TJ 0 g 0 G +/F62 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.956 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F67 9.9626 Tf 199.497 0 Td [(OR)]TJ/F62 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F67 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F62 9.9626 Tf 109.837 0 Td [(,)]TJ/F67 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F62 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.955 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ 0 g 0 G - 0 -19.925 Td [(info)]TJ +/F59 9.9626 Tf -24.907 -32.364 Td [(request)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.956 Td [(be)-250(pr)18(esent.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.401 Td [(On)-250(Return)]TJ 0 g 0 G - 141.968 -293.863 Td [(95)]TJ +0 g 0 G + 0 -20.408 Td [(dat)]TJ +0 g 0 G +/F62 9.9626 Tf 19.367 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(sum)-250(operation.)]TJ 5.54 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.781 0 Td [(.)]TJ +0 g 0 G + 79.264 -29.887 Td [(122)]TJ 0 g 0 G ET endstream endobj -1530 0 obj +1790 0 obj << -/Length 3218 +/Length 4964 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.16)-1000(psb)]TJ +/F62 9.9626 Tf 124.802 706.129 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-270(may)-269(be)-269(a)-269(scalar)74(,)]TJ 0 -11.956 Td [(or)-250(a)-250(rank)-250(1)-250(or)-250(2)-250(array)111(.)]TJ 0 -11.955 Td [(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(request)]TJ +0 g 0 G +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(The)]TJ/F67 9.9626 Tf 31.023 0 Td [(dat)]TJ/F62 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.955 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ +0 g 0 G + -12.453 -19.926 Td [(2.)]TJ +0 g 0 G + [-500(The)]TJ/F67 9.9626 Tf 32.225 0 Td [(mode)]TJ/F62 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F67 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F62 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F67 9.9626 Tf 16.309 0 Td [(request)]TJ/F62 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 124.802 441.123 cm +0 0 318.804 27.895 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F51 11.9552 Tf 207.803 706.129 Td [(gelp)-250(\227)-250(Applies)-250(a)-250(left)-250(permutation)-250(to)-250(a)-250(dense)-250(matrix)]TJ +/F102 8.9664 Tf 137.205 458.358 Td [(call)]TJ 0 g 0 G + [-525(psb_sum\050ctxt,dat,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_gelp\050trans,)-525(iperm,)-525(x,)-525(info\051)]TJ + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ + [-525(mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(ior)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G 0 g 0 G - 0 -19.925 Td [(trans)]TJ +/F62 9.9626 Tf -48.393 -36.164 Td [(3.)]TJ 0 g 0 G -/F54 9.9626 Tf 27.666 0 Td [(A)-250(character)-250(that)-250(speci\002es)-250(whether)-250(to)-250(permute)]TJ/F52 9.9626 Tf 203.748 0 Td [(A)]TJ/F54 9.9626 Tf 9.808 0 Td [(or)]TJ/F52 9.9626 Tf 12.488 0 Td [(A)]TJ/F52 7.5716 Tf 7.511 3.616 Td [(T)]TJ/F54 9.9626 Tf 5.401 -3.616 Td [(.)]TJ -241.716 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(single)-250(character)-250(with)-250(value)-250('N')-250(for)]TJ/F52 9.9626 Tf 218.195 0 Td [(A)]TJ/F54 9.9626 Tf 9.808 0 Td [(or)-250('T')-250(for)]TJ/F52 9.9626 Tf 41.807 0 Td [(A)]TJ/F52 7.5716 Tf 7.511 3.616 Td [(T)]TJ/F54 9.9626 Tf 5.401 -3.616 Td [(.)]TJ + [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F67 9.9626 Tf 216.877 0 Td [(dat)]TJ/F62 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F60 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F62 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 124.802 326.552 cm +0 0 318.804 60.772 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F51 9.9626 Tf -307.628 -31.88 Td [(iperm)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 32.099 0 Td [(An)-250(integer)-250(array)-250(containing)-250(permutation)-250(information.)]TJ -7.193 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(one-dimensional)-250(array)111(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 137.205 376.664 Td [(call)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -31.881 Td [(x)]TJ + [-525(psb_sum\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(dense)-250(matrix)-250(to)-250(be)-250(permuted.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(one)-250(or)-250(two)-250(dimensional)-250(array)111(.)]TJ + [(psb_collective_start_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.906 -33.873 Td [(On)-250(Return)]TJ + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - 0 -19.926 Td [(info)]TJ + [(sum_request\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + -23.537 -10.958 Td [(.......)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 141.968 -226.117 Td [(96)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F102 8.9664 Tf -37.659 -10.959 Td [(call)]TJ +0 g 0 G + [-525(psb_sum\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(psb_collective_end_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(sum_request\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 103.537 -242.391 Td [(123)]TJ 0 g 0 G ET endstream endobj -1535 0 obj +1797 0 obj << -/Length 6238 +/Length 5548 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.17)-1000(psb)]TJ -ET -q -1 0 0 1 153.407 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 156.993 706.129 Td [(glob)]TJ -ET -q -1 0 0 1 182.29 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 185.877 706.129 Td [(to)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(7.11)-1000(psb)]TJ ET q -1 0 0 1 197.222 706.328 cm +1 0 0 1 204.216 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 200.809 706.129 Td [(loc)-250(\227)-250(Global)-250(to)-250(local)-250(indices)-250(convertion)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -100.914 -18.964 Td [(call)-525(psb_glob_to_loc\050x,)-525(y,)-525(desc_a,)-525(info,)-525(iact,owned\051)]TJ 0 -11.955 Td [(call)-525(psb_glob_to_loc\050x,)-525(desc_a,)-525(info,)-525(iact,owned\051)]TJ +/F59 11.9552 Tf 207.803 706.129 Td [(max)-250(\227)-250(Global)-250(maximum)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.109 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.602 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.601 Td [(x)]TJ +/F67 9.9626 Tf -57.098 -20.269 Td [(call)-525(psb_max\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F62 9.9626 Tf 14.944 -24.611 Td [(This)-354(subr)18(outine)-354(implements)-354(a)-354(maximum)-354(valuer)18(eduction)-354(operation)-354(based)]TJ -14.944 -11.955 Td [(on)-250(the)-250(underlying)-250(communication)-250(library)111(.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(An)-250(integer)-250(vector)-250(of)-250(indices)-250(to)-250(be)-250(converted.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in,)-250(inout)]TJ/F54 9.9626 Tf 38.735 0 Td [(.)]TJ -70.535 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ +/F59 9.9626 Tf 0 -21.945 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -31.557 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 535.72 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 535.52 Td [(a)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 487.899 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 487.7 Td [(desc)]TJ -ET -q -1 0 0 1 333.945 487.899 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 337.084 487.7 Td [(type)]TJ +/F59 9.9626 Tf -29.828 -22.619 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -19.602 Td [(iact)]TJ + 0 -22.619 Td [(ctxt)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(speci\002es)-250(action)-250(to)-250(be)-250(taken)-250(in)-250(case)-250(of)-250(range)-250(err)18(ors.)-310(Scope:)]TJ/F51 9.9626 Tf 253.796 0 Td [(global)]TJ/F54 9.9626 Tf -249.91 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-190(as:)-280(a)-190(character)-190(variable)]TJ/F59 9.9626 Tf 143.341 0 Td [(I)]TJ/F54 9.9626 Tf 5.23 0 Td [(gnor)18(e,)]TJ/F59 9.9626 Tf 29.808 0 Td [(W)]TJ/F54 9.9626 Tf 5.231 0 Td [(arning)-190(or)]TJ/F59 9.9626 Tf 42.111 0 Td [(A)]TJ/F54 9.9626 Tf 5.231 0 Td [(bort,)-202(default)]TJ/F59 9.9626 Tf 55.839 0 Td [(I)]TJ/F54 9.9626 Tf 5.231 0 Td [(gnor)18(e.)]TJ +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G -/F51 9.9626 Tf -316.929 -19.601 Td [(owned)]TJ +/F59 9.9626 Tf -24.907 -22.619 Td [(dat)]TJ 0 g 0 G -/F54 9.9626 Tf 35.975 0 Td [(Spec\002es)-250(valid)-250(range)-250(of)-250(input)-250(Scope:)]TJ/F51 9.9626 Tf 159.54 0 Td [(global)]TJ/F54 9.9626 Tf -170.608 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(If)-320(tr)8(ue,)-337(then)-320(only)-320(indices)-320(strictly)-320(owned)-320(by)-320(the)-319(curr)18(ent)-320(pr)18(ocess)-320(ar)18(e)-320(con-)]TJ 0 -11.955 Td [(sider)18(ed)-250(valid,)-250(if)-250(false)-250(then)-250(halo)-250(indices)-250(ar)18(e)-250(also)-250(accepted.)-310(Default:)-310(false.)]TJ +/F62 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(maximum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-264(as:)-339(an)-264(integer)-264(or)-265(r)18(eal)-264(variable,)-268(which)-264(may)-264(be)-264(a)-265(scalar)74(,)-268(or)-264(a)-264(rank)]TJ 0 -11.955 Td [(1)-250(or)-250(2)-250(array)111(.)-560(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.109 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -24.907 -22.619 Td [(root)]TJ 0 g 0 G +/F62 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-255(to)-255(hold)-255(the)-255(\002nal)-255(maximum,)-257(or)]TJ/F91 10.3811 Tf 170.502 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1)-255(to)-255(make)-255(it)-255(available)-255(on)-255(all)-255(pr)18(o-)]TJ -177.042 -11.955 Td [(cesses.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F91 10.3811 Tf 131.101 0 Td [(\000)]TJ/F62 9.9626 Tf 8.195 0 Td [(1)]TJ/F69 10.3811 Tf 7.873 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F69 10.3811 Tf 19.923 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.504 0 Td [(\000)]TJ/F62 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ 0 g 0 G - 0 -19.601 Td [(x)]TJ +/F59 9.9626 Tf -254.343 -34.574 Td [(mode)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(If)]TJ/F52 9.9626 Tf 9.727 0 Td [(y)]TJ/F54 9.9626 Tf 8.032 0 Td [(is)-294(not)-294(pr)18(esent,)-304(then)]TJ/F52 9.9626 Tf 88.385 0 Td [(x)]TJ/F54 9.9626 Tf 8.132 0 Td [(is)-294(overwritten)-294(with)-293(the)-294(translated)-294(integer)-294(indices.)]TJ -99.332 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ +/F62 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.955 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F67 9.9626 Tf 199.497 0 Td [(OR)]TJ/F62 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F67 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F62 9.9626 Tf 109.837 0 Td [(,)]TJ/F67 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F62 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.956 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.602 Td [(y)]TJ +/F59 9.9626 Tf -24.907 -34.573 Td [(request)]TJ 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(If)]TJ/F52 9.9626 Tf 9.521 0 Td [(y)]TJ/F54 9.9626 Tf 7.827 0 Td [(is)-273(pr)18(esent,)-279(then)]TJ/F52 9.9626 Tf 70.133 0 Td [(y)]TJ/F54 9.9626 Tf 7.827 0 Td [(is)-273(overwritten)-273(with)-273(the)-273(translated)-274(integer)-273(indices,)-279(and)]TJ/F52 9.9626 Tf -80.628 -11.955 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(left)-250(unchanged.)-310(Scope:)]TJ/F51 9.9626 Tf 112.557 0 Td [(global)]TJ/F54 9.9626 Tf -120.547 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.602 Td [(info)]TJ +/F59 9.9626 Tf -24.907 -24.612 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.108 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 166.875 -29.888 Td [(97)]TJ +/F62 9.9626 Tf 164.384 -29.887 Td [(124)]TJ 0 g 0 G ET endstream endobj -1540 0 obj +1801 0 obj << -/Length 672 +/Length 5227 >> stream 0 g 0 G 0 g 0 G 0 g 0 G BT -/F54 9.9626 Tf 163.158 706.129 Td [(1.)]TJ +/F59 9.9626 Tf 99.895 706.129 Td [(dat)]TJ 0 g 0 G - [-500(If)-272(an)-273(input)-272(index)-272(is)-273(out)-272(of)-273(range)1(,)-278(then)-273(the)-272(corr)18(esponding)-272(output)-273(index)-272(is)]TJ 12.453 -11.955 Td [(set)-250(to)-250(a)-250(negative)-250(number;)]TJ +/F62 9.9626 Tf 19.368 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(maximum)-250(operation.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-264(as:)-339(an)-264(integer)-264(or)-265(r)18(eal)-264(variable,)-268(which)-264(may)-264(be)-264(a)-265(scalar)74(,)-268(or)-264(a)-264(rank)]TJ 0 -11.955 Td [(1)-250(or)-250(2)-250(array)111(.)-560(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ 0 g 0 G - -12.453 -19.926 Td [(2.)]TJ +/F59 9.9626 Tf -24.907 -19.925 Td [(request)]TJ 0 g 0 G - [-500(The)-416(default)]TJ/F59 9.9626 Tf 68.74 0 Td [(I)]TJ/F54 9.9626 Tf 5.23 0 Td [(gnor)18(e)-416(means)-417(that)-416(the)-417(negative)-416(output)-416(is)-417(the)-416(only)-416(action)]TJ -61.517 -11.955 Td [(taken)-250(on)-250(an)-250(out-of-range)-250(input.)]TJ +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ 0 g 0 G - 141.968 -571.855 Td [(98)]TJ +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ 0 g 0 G -ET - -endstream -endobj -1547 0 obj -<< -/Length 5458 ->> -stream + [-500(The)]TJ/F67 9.9626 Tf 31.023 0 Td [(dat)]TJ/F62 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.955 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ 0 g 0 G + -12.453 -19.926 Td [(2.)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.18)-1000(psb)]TJ -ET -q -1 0 0 1 153.407 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 156.993 706.129 Td [(loc)]TJ -ET -q -1 0 0 1 173.646 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 177.233 706.129 Td [(to)]TJ + [-500(The)]TJ/F67 9.9626 Tf 32.225 0 Td [(mode)]TJ/F62 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F67 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F62 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F67 9.9626 Tf 16.309 0 Td [(request)]TJ/F62 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 188.578 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 124.802 429.167 cm +0 0 318.804 27.895 re f Q -BT -/F51 11.9552 Tf 192.165 706.129 Td [(glob)-250(\227)-250(Local)-250(to)-250(global)-250(indices)-250(conversion)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf -92.27 -18.964 Td [(call)-525(psb_loc_to_glob\050x,)-525(y,)-525(desc_a,)-525(info,)-525(iact\051)]TJ 0 -11.955 Td [(call)-525(psb_loc_to_glob\050x,)-525(desc_a,)-525(info,)-525(iact\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 137.205 446.403 Td [(call)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ + [-525(psb_max\050ctxt,dat,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + [-525(mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(ior)]TJ 0 g 0 G - 0 -19.925 Td [(x)]TJ + [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(An)-250(integer)-250(vector)-250(of)-250(indices)-250(to)-250(be)-250(converted.)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in,)-250(inout)]TJ/F54 9.9626 Tf 38.735 0 Td [(.)]TJ -70.535 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -31.881 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 533.94 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 533.74 Td [(a)]TJ +/F62 9.9626 Tf -48.393 -36.164 Td [(3.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 486.119 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 485.92 Td [(desc)]TJ + [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F67 9.9626 Tf 216.877 0 Td [(dat)]TJ/F62 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F60 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F62 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.956 Td [(accessed)-250(between)-250(calls:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 333.945 486.119 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 124.802 314.597 cm +0 0 318.804 60.772 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F59 9.9626 Tf 337.084 485.92 Td [(type)]TJ +/F102 8.9664 Tf 137.205 364.709 Td [(call)]TJ +0 g 0 G + [-525(psb_max\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ + [(psb_collective_start_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -258.11 -19.926 Td [(iact)]TJ + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(speci\002es)-250(action)-250(to)-250(be)-250(taken)-250(in)-250(case)-250(of)-250(range)-250(err)18(ors.)-310(Scope:)]TJ/F51 9.9626 Tf 253.796 0 Td [(global)]TJ/F54 9.9626 Tf -249.91 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(character)-190(variable)]TJ/F59 9.9626 Tf 143.341 0 Td [(I)]TJ/F54 9.9626 Tf 5.23 0 Td [(gnor)18(e,)]TJ/F59 9.9626 Tf 29.808 0 Td [(W)]TJ/F54 9.9626 Tf 5.231 0 Td [(arning)-190(or)]TJ/F59 9.9626 Tf 42.111 0 Td [(A)]TJ/F54 9.9626 Tf 5.231 0 Td [(bort,)-202(default)]TJ/F59 9.9626 Tf 55.839 0 Td [(I)]TJ/F54 9.9626 Tf 5.231 0 Td [(gnor)18(e.)]TJ + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -316.929 -21.918 Td [(On)-250(Return)]TJ + [(max_request\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + -23.537 -10.959 Td [(.......)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(x)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(If)]TJ/F52 9.9626 Tf 9.727 0 Td [(y)]TJ/F54 9.9626 Tf 8.032 0 Td [(is)-294(not)-294(pr)18(esent,)-304(then)]TJ/F52 9.9626 Tf 88.385 0 Td [(x)]TJ/F54 9.9626 Tf 8.132 0 Td [(is)-294(overwritten)-294(with)-293(the)-294(translated)-294(integer)-294(indices.)]TJ -99.332 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(y)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F102 8.9664 Tf -37.659 -10.958 Td [(call)]TJ +0 g 0 G + [-525(psb_max\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(If)]TJ/F52 9.9626 Tf 9.705 0 Td [(y)]TJ/F54 9.9626 Tf 8.011 0 Td [(is)-292(not)-291(pr)18(esent,)-302(then)]TJ/F52 9.9626 Tf 88.122 0 Td [(y)]TJ/F54 9.9626 Tf 8.011 0 Td [(is)-292(overwritten)-291(with)-292(the)-291(translated)-292(integer)-292(indice)1(s,)]TJ -99.463 -11.955 Td [(and)]TJ/F52 9.9626 Tf 19.651 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(left)-250(unchanged.)-310(Scope:)]TJ/F51 9.9626 Tf 112.557 0 Td [(global)]TJ/F54 9.9626 Tf -139.904 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ + [(psb_collective_end_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(info)]TJ + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - 141.968 -114.535 Td [(99)]TJ + [(max_request\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 103.537 -230.436 Td [(125)]TJ 0 g 0 G ET endstream endobj -1552 0 obj +1808 0 obj << -/Length 3169 +/Length 5813 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.19)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(7.12)-1000(psb)]TJ ET q 1 0 0 1 204.216 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(is)]TJ -ET -q -1 0 0 1 217.809 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 221.396 706.129 Td [(owned)-250(\227)]TJ +/F59 11.9552 Tf 207.803 706.129 Td [(min)-250(\227)-250(Global)-250(minimum)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -70.691 -18.964 Td [(call)-525(psb_is_owned\050x,)-525(desc_a\051)]TJ +/F67 9.9626 Tf -57.098 -19.198 Td [(call)-525(psb_min\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F62 9.9626 Tf 14.944 -22.401 Td [(This)-328(subr)18(outine)-327(implements)-328(a)-328(minimum)-327(value)-328(r)18(eduction)-328(o)1(peration)-328(based)]TJ -14.944 -11.955 Td [(on)-250(the)-250(underlying)-250(communication)-250(library)111(.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 0 -20.288 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -20.409 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.925 Td [(x)]TJ + 0 -20.408 Td [(ctxt)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(Integer)-250(index.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(integer)74(.)]TJ +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -31.88 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 545.895 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 545.696 Td [(a)]TJ +/F59 9.9626 Tf -24.907 -20.409 Td [(dat)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 497.875 Td [(desc)]TJ -ET -q -1 0 0 1 384.755 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 387.893 497.875 Td [(type)]TJ +/F62 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(minimum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-264(as:)-339(an)-264(integer)-264(or)-265(r)18(eal)-264(variable,)-268(which)-264(may)-264(be)-264(a)-265(scalar)74(,)-268(or)-264(a)-264(rank)]TJ 0 -11.956 Td [(1)-250(or)-250(2)-250(array)111(.)-560(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F59 9.9626 Tf -24.907 -20.408 Td [(root)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-221(to)-221(hold)-221(the)-222(\002nal)-221(value,)-227(or)]TJ/F91 10.3811 Tf 147.052 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1)-221(to)-221(make)-222(it)-221(available)-221(on)-221(all)-221(pr)18(ocesses.)]TJ -153.592 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F91 10.3811 Tf 131.101 0 Td [(\000)]TJ/F62 9.9626 Tf 8.195 0 Td [(1)]TJ/F69 10.3811 Tf 7.873 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F69 10.3811 Tf 19.923 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.504 0 Td [(\000)]TJ/F62 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ 0 g 0 G +/F59 9.9626 Tf -254.343 -32.364 Td [(mode)]TJ 0 g 0 G - 0 -19.925 Td [(Function)-250(value)]TJ +/F62 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.956 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F67 9.9626 Tf 199.497 0 Td [(OR)]TJ/F62 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F67 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F62 9.9626 Tf 109.837 0 Td [(,)]TJ/F67 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F62 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.955 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(A)-261(logical)-260(mask)-261(which)-261(is)-261(tr)8(ue)-261(if)]TJ/F52 9.9626 Tf 137.304 0 Td [(x)]TJ/F54 9.9626 Tf 7.803 0 Td [(is)-261(owned)-261(by)-260(the)-261(curr)18(ent)-261(pr)18(o-)]TJ -192.978 -11.955 Td [(cess)-250(Scope:)]TJ/F51 9.9626 Tf 51.567 0 Td [(local)]TJ/F54 9.9626 Tf -51.567 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ/F51 11.9552 Tf -71.651 -33.873 Td [(Notes)]TJ +/F59 9.9626 Tf -24.907 -32.364 Td [(request)]TJ 0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.956 Td [(be)-250(pr)18(esent.)]TJ 0 g 0 G - [-500(This)-240(r)18(outine)-239(r)18(eturns)-240(a)]TJ/F59 9.9626 Tf 109.67 0 Td [(.true.)]TJ/F54 9.9626 Tf 33.769 0 Td [(value)-240(for)-239(an)-240(index)-239(that)-240(is)-239(strictly)-240(owned)-239(by)]TJ -130.986 -11.955 Td [(the)-250(curr)18(ent)-250(pr)18(ocess,)-250(excluding)-250(the)-250(halo)-250(indices)]TJ +/F59 9.9626 Tf -24.907 -22.401 Td [(On)-250(Return)]TJ 0 g 0 G - 139.477 -263.975 Td [(100)]TJ +0 g 0 G + 0 -20.408 Td [(dat)]TJ +0 g 0 G +/F62 9.9626 Tf 19.367 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(minimum)-250(operation.)]TJ 5.54 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.781 0 Td [(.)]TJ +0 g 0 G + 79.264 -29.887 Td [(126)]TJ 0 g 0 G ET endstream endobj -1558 0 obj +1812 0 obj << -/Length 4795 +/Length 4946 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.20)-1000(psb)]TJ -ET -q -1 0 0 1 153.407 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 156.993 706.129 Td [(owned)]TJ -ET -q -1 0 0 1 194.903 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 198.489 706.129 Td [(index)-250(\227)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -98.594 -18.964 Td [(call)-525(psb_owned_index\050y,)-525(x,)-525(desc_a,)-525(info\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F62 9.9626 Tf 124.802 706.129 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-264(as:)-339(an)-264(integer)-264(or)-265(r)18(eal)-264(variable,)-268(which)-264(may)-264(be)-264(a)-265(scalar)74(,)-268(or)-264(a)-264(rank)]TJ 0 -11.956 Td [(1)-250(or)-250(2)-250(array)111(.)]TJ 0 -11.955 Td [(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F59 9.9626 Tf -24.907 -19.925 Td [(request)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ 0 g 0 G +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ 0 g 0 G - 0 -19.925 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(Integer)-250(indices.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in,)-250(inout)]TJ/F54 9.9626 Tf 38.735 0 Td [(.)]TJ -70.535 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(or)-250(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ + [-500(The)]TJ/F67 9.9626 Tf 31.023 0 Td [(dat)]TJ/F62 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.955 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -31.88 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 545.895 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 545.696 Td [(a)]TJ + -12.453 -19.926 Td [(2.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 497.875 Td [(desc)]TJ + [-500(The)]TJ/F67 9.9626 Tf 32.225 0 Td [(mode)]TJ/F62 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F67 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F62 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F67 9.9626 Tf 16.309 0 Td [(request)]TJ/F62 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 333.945 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 124.802 441.123 cm +0 0 318.804 27.895 re f Q -BT -/F59 9.9626 Tf 337.084 497.875 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -258.11 -19.925 Td [(iact)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(speci\002es)-250(action)-250(to)-250(be)-250(taken)-250(in)-250(case)-250(of)-250(range)-250(err)18(ors.)-310(Scope:)]TJ/F51 9.9626 Tf 253.796 0 Td [(global)]TJ/F54 9.9626 Tf -249.91 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(character)-190(variable)]TJ/F59 9.9626 Tf 143.341 0 Td [(I)]TJ/F54 9.9626 Tf 5.23 0 Td [(gnor)18(e,)]TJ/F59 9.9626 Tf 29.808 0 Td [(W)]TJ/F54 9.9626 Tf 5.231 0 Td [(arning)-190(or)]TJ/F59 9.9626 Tf 42.111 0 Td [(A)]TJ/F54 9.9626 Tf 5.231 0 Td [(bort,)-202(default)]TJ/F59 9.9626 Tf 55.839 0 Td [(I)]TJ/F54 9.9626 Tf 5.231 0 Td [(gnor)18(e.)]TJ -0 g 0 G -/F51 9.9626 Tf -316.929 -21.918 Td [(On)-250(Return)]TJ -0 g 0 G +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G - 0 -19.925 Td [(y)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(A)-200(logical)-200(mask)-200(which)-201(is)-200(tr)8(ue)-200(for)-200(all)-200(corr)18(esponding)-200(entries)-200(of)]TJ/F52 9.9626 Tf 260.812 0 Td [(x)]TJ/F54 9.9626 Tf 7.2 0 Td [(that)-200(ar)18(e)-200(owned)]TJ -253.626 -11.955 Td [(by)-250(the)-250(curr)18(ent)-250(pr)18(ocess)-250(Scope:)]TJ/F51 9.9626 Tf 131.027 0 Td [(local)]TJ/F54 9.9626 Tf -131.027 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(or)-250(rank)-250(one)-250(logical)-250(array)111(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 137.205 458.358 Td [(call)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(info)]TJ + [-525(psb_min\050ctxt,dat,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.917 Td [(Notes)]TJ + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 12.454 -19.926 Td [(1.)]TJ + [-525(mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - [-500(This)-429(r)18(outine)-428(r)18(eturns)-429(a)]TJ/F59 9.9626 Tf 117.209 0 Td [(.true.)]TJ/F54 9.9626 Tf 35.654 0 Td [(value)-429(for)-428(those)-429(indices)-429(that)-429(ar)18(e)-428(strictly)]TJ -140.41 -11.955 Td [(owned)-250(by)-250(the)-250(curr)18(ent)-250(pr)18(ocess,)-250(excluding)-250(the)-250(halo)-250(indices)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(ior)]TJ 0 g 0 G - 139.477 -140.438 Td [(101)]TJ + [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -ET - -endstream -endobj -1564 0 obj -<< -/Length 3147 ->> -stream 0 g 0 G +/F62 9.9626 Tf -48.393 -36.164 Td [(3.)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.21)-1000(psb)]TJ -ET -q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 207.803 706.129 Td [(is)]TJ + [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F67 9.9626 Tf 216.877 0 Td [(dat)]TJ/F62 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F60 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F62 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 217.809 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 124.802 326.552 cm +0 0 318.804 60.772 re f Q -BT -/F51 11.9552 Tf 221.396 706.129 Td [(local)-250(\227)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf -70.691 -18.964 Td [(call)-525(psb_is_local\050x,)-525(desc_a\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 137.205 376.664 Td [(call)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ + [-525(psb_min\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [(psb_collective_start_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - 0 -19.925 Td [(x)]TJ + [(min_request\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(Integer)-250(index.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(integer)74(.)]TJ + -23.537 -10.958 Td [(.......)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -31.88 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 545.895 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 545.696 Td [(a)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 497.875 Td [(desc)]TJ -ET -q -1 0 0 1 384.755 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 387.893 497.875 Td [(type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F102 8.9664 Tf -37.659 -10.959 Td [(call)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ + [-525(psb_min\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G + [(psb_collective_end_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(Function)-250(value)]TJ + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(A)-244(logical)-244(mask)-243(which)-244(is)-244(tr)8(ue)-244(if)]TJ/F52 9.9626 Tf 136.118 0 Td [(x)]TJ/F54 9.9626 Tf 7.635 0 Td [(is)-244(local)-244(to)-243(the)-244(curr)18(ent)-244(pr)18(ocess)]TJ -191.623 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ/F51 11.9552 Tf -71.651 -33.873 Td [(Notes)]TJ + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ + [(min_request\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G - [-500(This)-199(r)18(outine)-200(r)18(eturns)-199(a)]TJ/F59 9.9626 Tf 108.069 0 Td [(.true.)]TJ/F54 9.9626 Tf 33.369 0 Td [(value)-199(for)-200(an)-199(index)-199(that)-200(is)-199(local)-200(to)-199(the)-199(curr)18(ent)]TJ -128.984 -11.955 Td [(pr)18(ocess,)-250(including)-250(the)-250(halo)-250(indices)]TJ 0 g 0 G - 139.476 -263.975 Td [(102)]TJ +/F62 9.9626 Tf 103.537 -242.391 Td [(127)]TJ 0 g 0 G ET endstream endobj -1464 0 obj +1708 0 obj << /Type /ObjStm /N 100 -/First 976 -/Length 10636 ->> -stream -1461 0 1457 157 1458 304 1459 449 1463 595 344 654 1460 712 1466 806 1468 924 1469 982 -1470 1040 1471 1098 1472 1156 1473 1214 1474 1272 1475 1330 1465 1388 1479 1482 1476 1630 1477 1773 -1481 1920 348 1979 1478 2037 1485 2131 1482 2279 1483 2424 1487 2571 352 2629 1488 2686 1484 2744 -1492 2838 1489 2986 1490 3129 1494 3273 356 3332 1491 3390 1496 3497 1498 3615 1499 3673 1495 3730 -1504 3824 1501 3972 1502 4118 1506 4262 360 4321 1503 4379 1508 4499 1510 4617 1511 4675 1512 4733 -1507 4791 1517 4872 1513 5029 1514 5170 1515 5313 1519 5457 364 5516 1520 5574 1521 5633 1516 5692 -1525 5799 1522 5947 1523 6090 1527 6237 368 6295 1524 6352 1529 6446 1531 6564 372 6623 1528 6681 -1534 6788 1532 6927 1536 7074 376 7132 1533 7189 1539 7296 1541 7414 1542 7473 1543 7532 1538 7591 -1546 7672 1544 7811 1548 7958 380 8016 1545 8073 1551 8180 1549 8319 1553 8466 384 8525 1554 8583 -1550 8642 1557 8749 1555 8888 1559 9035 388 9093 1560 9150 1556 9208 1563 9315 1561 9454 1565 9601 -% 1461 0 obj +/First 970 +/Length 9209 +>> +stream +1704 0 1710 107 1712 225 417 283 1709 340 1715 447 1717 565 1718 624 1719 683 1720 742 +1721 801 1722 860 1723 919 1724 978 1725 1037 1726 1096 1714 1155 1728 1275 1730 1393 421 1451 +1727 1508 1732 1589 1734 1707 425 1766 1735 1824 1736 1883 1731 1942 1738 2075 1740 2193 429 2251 +1741 2308 1742 2365 1737 2421 1744 2554 1746 2672 433 2731 1747 2789 1748 2848 1749 2907 1743 2966 +1751 3099 1753 3217 437 3275 1750 3332 1756 3426 1758 3544 441 3603 1755 3661 1760 3781 1762 3899 +445 3957 1759 4014 1764 4108 1766 4226 449 4285 1763 4343 1768 4437 1770 4555 453 4613 1767 4670 +1772 4764 1774 4882 457 4941 1771 4999 1776 5146 1778 5264 1779 5322 1780 5380 1781 5438 1775 5496 +1785 5633 1787 5751 461 5810 1784 5868 1789 6015 1791 6133 1792 6191 1793 6249 1794 6307 1788 6365 +1796 6502 1798 6620 465 6679 1795 6737 1800 6884 1802 7002 1803 7060 1804 7118 1805 7176 1799 7233 +1807 7370 1809 7488 469 7547 1806 7605 1811 7752 1813 7870 1814 7928 1815 7986 1816 8044 1810 8102 +% 1704 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F60 666 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1710 0 obj << /Type /Page -/Contents 1462 0 R -/Resources 1460 0 R +/Contents 1711 0 R +/Resources 1709 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1456 0 R -/Annots [ 1457 0 R 1458 0 R 1459 0 R ] +/Parent 1713 0 R >> -% 1457 0 obj +% 1712 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 572.168 409.811 584.228] -/A << /S /GoTo /D (descdata) >> +/D [1710 0 R /XYZ 98.895 753.953 null] >> -% 1458 0 obj +% 417 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 276.913 420.271 288.973] -/A << /S /GoTo /D (spdata) >> +/D [1710 0 R /XYZ 99.895 716.092 null] >> -% 1459 0 obj +% 1709 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [345.92 208.672 412.978 220.731] -/A << /S /GoTo /D (descdata) >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1463 0 obj +% 1715 0 obj << -/D [1461 0 R /XYZ 149.705 753.953 null] +/Type /Page +/Contents 1716 0 R +/Resources 1714 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1713 0 R >> -% 344 0 obj +% 1717 0 obj << -/D [1461 0 R /XYZ 150.705 716.092 null] +/D [1715 0 R /XYZ 149.705 753.953 null] >> -% 1460 0 obj +% 1718 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/D [1715 0 R /XYZ 150.705 701.929 null] >> -% 1466 0 obj +% 1719 0 obj << -/Type /Page -/Contents 1467 0 R -/Resources 1465 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1456 0 R +/D [1715 0 R /XYZ 150.705 668.729 null] >> -% 1468 0 obj +% 1720 0 obj << -/D [1466 0 R /XYZ 98.895 753.953 null] +/D [1715 0 R /XYZ 150.705 624.894 null] >> -% 1469 0 obj +% 1721 0 obj << -/D [1466 0 R /XYZ 99.895 701.929 null] +/D [1715 0 R /XYZ 150.705 555.872 null] >> -% 1470 0 obj +% 1722 0 obj << -/D [1466 0 R /XYZ 99.895 667.454 null] +/D [1715 0 R /XYZ 150.705 500.082 null] >> -% 1471 0 obj +% 1723 0 obj << -/D [1466 0 R /XYZ 99.895 647.529 null] +/D [1715 0 R /XYZ 150.705 468.201 null] >> -% 1472 0 obj +% 1724 0 obj << -/D [1466 0 R /XYZ 99.895 603.693 null] +/D [1715 0 R /XYZ 150.705 425.023 null] >> -% 1473 0 obj +% 1725 0 obj << -/D [1466 0 R /XYZ 99.895 547.902 null] +/D [1715 0 R /XYZ 150.705 382.522 null] >> -% 1474 0 obj +% 1726 0 obj << -/D [1466 0 R /XYZ 99.895 527.977 null] +/D [1715 0 R /XYZ 150.705 354.627 null] >> -% 1475 0 obj +% 1714 0 obj << -/D [1466 0 R /XYZ 99.895 496.097 null] +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F93 915 0 R /F91 914 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1465 0 obj +% 1728 0 obj +<< +/Type /Page +/Contents 1729 0 R +/Resources 1727 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1713 0 R +>> +% 1730 0 obj +<< +/D [1728 0 R /XYZ 98.895 753.953 null] +>> +% 421 0 obj +<< +/D [1728 0 R /XYZ 99.895 716.092 null] +>> +% 1727 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1479 0 obj +% 1732 0 obj << /Type /Page -/Contents 1480 0 R -/Resources 1478 0 R +/Contents 1733 0 R +/Resources 1731 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1456 0 R -/Annots [ 1476 0 R 1477 0 R ] +/Parent 1713 0 R >> -% 1476 0 obj +% 1734 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 573.77 420.271 585.83] -/A << /S /GoTo /D (spdata) >> +/D [1732 0 R /XYZ 149.705 753.953 null] >> -% 1477 0 obj +% 425 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 506.024 409.811 518.084] -/A << /S /GoTo /D (descdata) >> +/D [1732 0 R /XYZ 150.705 716.092 null] >> -% 1481 0 obj +% 1735 0 obj << -/D [1479 0 R /XYZ 149.705 753.953 null] +/D [1732 0 R /XYZ 150.705 222.691 null] >> -% 348 0 obj +% 1736 0 obj << -/D [1479 0 R /XYZ 150.705 716.092 null] +/D [1732 0 R /XYZ 150.705 200.171 null] >> -% 1478 0 obj +% 1731 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F93 915 0 R /F60 666 0 R /F91 914 0 R >> /ProcSet [ /PDF /Text ] >> -% 1485 0 obj +% 1738 0 obj << /Type /Page -/Contents 1486 0 R -/Resources 1484 0 R +/Contents 1739 0 R +/Resources 1737 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1456 0 R -/Annots [ 1482 0 R 1483 0 R ] ->> -% 1482 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 559.823 369.462 571.882] -/A << /S /GoTo /D (spdata) >> +/Parent 1713 0 R >> -% 1483 0 obj +% 1740 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 492.077 359.001 504.136] -/A << /S /GoTo /D (descdata) >> +/D [1738 0 R /XYZ 98.895 753.953 null] >> -% 1487 0 obj +% 429 0 obj << -/D [1485 0 R /XYZ 98.895 753.953 null] +/D [1738 0 R /XYZ 99.895 716.092 null] >> -% 352 0 obj +% 1741 0 obj << -/D [1485 0 R /XYZ 99.895 716.092 null] +/D [1738 0 R /XYZ 99.895 348.22 null] >> -% 1488 0 obj +% 1742 0 obj << -/D [1485 0 R /XYZ 99.895 312.355 null] +/D [1738 0 R /XYZ 99.895 313.8 null] >> -% 1484 0 obj +% 1737 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F91 914 0 R /F60 666 0 R /F93 915 0 R >> /ProcSet [ /PDF /Text ] >> -% 1492 0 obj +% 1744 0 obj << /Type /Page -/Contents 1493 0 R -/Resources 1491 0 R +/Contents 1745 0 R +/Resources 1743 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1456 0 R -/Annots [ 1489 0 R 1490 0 R ] +/Parent 1713 0 R >> -% 1489 0 obj +% 1746 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [310.942 577.893 378 589.953] -/A << /S /GoTo /D (descdata) >> +/D [1744 0 R /XYZ 149.705 753.953 null] >> -% 1490 0 obj +% 433 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [261.249 128.475 337.303 140.535] -/A << /S /GoTo /D (vdata) >> +/D [1744 0 R /XYZ 150.705 716.092 null] >> -% 1494 0 obj +% 1747 0 obj << -/D [1492 0 R /XYZ 149.705 753.953 null] +/D [1744 0 R /XYZ 150.705 441.869 null] >> -% 356 0 obj +% 1748 0 obj << -/D [1492 0 R /XYZ 150.705 716.092 null] +/D [1744 0 R /XYZ 150.705 395.439 null] >> -% 1491 0 obj +% 1749 0 obj +<< +/D [1744 0 R /XYZ 150.705 363.559 null] +>> +% 1743 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R /F93 915 0 R /F91 914 0 R >> /ProcSet [ /PDF /Text ] >> -% 1496 0 obj +% 1751 0 obj << /Type /Page -/Contents 1497 0 R -/Resources 1495 0 R +/Contents 1752 0 R +/Resources 1750 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1500 0 R +/Parent 1754 0 R >> -% 1498 0 obj +% 1753 0 obj << -/D [1496 0 R /XYZ 98.895 753.953 null] +/D [1751 0 R /XYZ 98.895 753.953 null] >> -% 1499 0 obj +% 437 0 obj << -/D [1496 0 R /XYZ 99.895 632.19 null] +/D [1751 0 R /XYZ 99.895 716.092 null] >> -% 1495 0 obj +% 1750 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1504 0 obj +% 1756 0 obj << /Type /Page -/Contents 1505 0 R -/Resources 1503 0 R +/Contents 1757 0 R +/Resources 1755 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1500 0 R -/Annots [ 1501 0 R 1502 0 R ] +/Parent 1754 0 R >> -% 1501 0 obj +% 1758 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 362.621 409.811 374.68] -/A << /S /GoTo /D (descdata) >> +/D [1756 0 R /XYZ 149.705 753.953 null] >> -% 1502 0 obj +% 441 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [417.183 207.637 493.237 219.697] -/A << /S /GoTo /D (vdata) >> +/D [1756 0 R /XYZ 150.705 716.092 null] >> -% 1506 0 obj +% 1755 0 obj << -/D [1504 0 R /XYZ 149.705 753.953 null] +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R >> +/ProcSet [ /PDF /Text ] >> -% 360 0 obj +% 1760 0 obj +<< +/Type /Page +/Contents 1761 0 R +/Resources 1759 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1754 0 R +>> +% 1762 0 obj << -/D [1504 0 R /XYZ 150.705 716.092 null] +/D [1760 0 R /XYZ 98.895 753.953 null] >> -% 1503 0 obj +% 445 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R >> +/D [1760 0 R /XYZ 99.895 716.092 null] +>> +% 1759 0 obj +<< +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1508 0 obj +% 1764 0 obj << /Type /Page -/Contents 1509 0 R -/Resources 1507 0 R +/Contents 1765 0 R +/Resources 1763 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1500 0 R ->> -% 1510 0 obj -<< -/D [1508 0 R /XYZ 98.895 753.953 null] +/Parent 1754 0 R >> -% 1511 0 obj +% 1766 0 obj << -/D [1508 0 R /XYZ 99.895 701.929 null] +/D [1764 0 R /XYZ 149.705 753.953 null] >> -% 1512 0 obj +% 449 0 obj << -/D [1508 0 R /XYZ 99.895 680.684 null] +/D [1764 0 R /XYZ 150.705 716.092 null] >> -% 1507 0 obj +% 1763 0 obj << -/Font << /F51 584 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1517 0 obj +% 1768 0 obj << /Type /Page -/Contents 1518 0 R -/Resources 1516 0 R +/Contents 1769 0 R +/Resources 1767 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1500 0 R -/Annots [ 1513 0 R 1514 0 R 1515 0 R ] ->> -% 1513 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [310.942 573.77 378 585.83] -/A << /S /GoTo /D (descdata) >> +/Parent 1754 0 R >> -% 1514 0 obj +% 1770 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [322.33 482.114 398.384 494.174] -/A << /S /GoTo /D (vdata) >> +/D [1768 0 R /XYZ 98.895 753.953 null] >> -% 1515 0 obj +% 453 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [225.165 380.495 301.219 392.555] -/A << /S /GoTo /D (vdata) >> +/D [1768 0 R /XYZ 99.895 716.092 null] >> -% 1519 0 obj +% 1767 0 obj << -/D [1517 0 R /XYZ 149.705 753.953 null] +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] >> -% 364 0 obj +% 1772 0 obj << -/D [1517 0 R /XYZ 150.705 716.092 null] +/Type /Page +/Contents 1773 0 R +/Resources 1771 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1754 0 R >> -% 1520 0 obj +% 1774 0 obj << -/D [1517 0 R /XYZ 150.705 278.482 null] +/D [1772 0 R /XYZ 149.705 753.953 null] >> -% 1521 0 obj +% 457 0 obj << -/D [1517 0 R /XYZ 150.705 244.007 null] +/D [1772 0 R /XYZ 150.705 716.092 null] >> -% 1516 0 obj +% 1771 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F69 1460 0 R /F93 915 0 R /F60 666 0 R /F91 914 0 R >> /ProcSet [ /PDF /Text ] >> -% 1525 0 obj +% 1776 0 obj << /Type /Page -/Contents 1526 0 R -/Resources 1524 0 R +/Contents 1777 0 R +/Resources 1775 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1500 0 R -/Annots [ 1522 0 R 1523 0 R ] +/Parent 1783 0 R >> -% 1522 0 obj +% 1778 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [174.355 561.815 250.41 573.875] -/A << /S /GoTo /D (vdata) >> +/D [1776 0 R /XYZ 98.895 753.953 null] >> -% 1523 0 obj +% 1779 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [260.133 482.114 327.191 494.174] -/A << /S /GoTo /D (descdata) >> +/D [1776 0 R /XYZ 99.895 528.579 null] >> -% 1527 0 obj +% 1780 0 obj << -/D [1525 0 R /XYZ 98.895 753.953 null] +/D [1776 0 R /XYZ 99.895 494.104 null] >> -% 368 0 obj +% 1781 0 obj << -/D [1525 0 R /XYZ 99.895 716.092 null] +/D [1776 0 R /XYZ 99.895 403.265 null] >> -% 1524 0 obj +% 1775 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F102 1016 0 R /F60 666 0 R /F120 1782 0 R >> /ProcSet [ /PDF /Text ] >> -% 1529 0 obj +% 1785 0 obj << /Type /Page -/Contents 1530 0 R -/Resources 1528 0 R +/Contents 1786 0 R +/Resources 1784 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1500 0 R +/Parent 1783 0 R >> -% 1531 0 obj +% 1787 0 obj << -/D [1529 0 R /XYZ 149.705 753.953 null] +/D [1785 0 R /XYZ 149.705 753.953 null] >> -% 372 0 obj +% 461 0 obj << -/D [1529 0 R /XYZ 150.705 716.092 null] +/D [1785 0 R /XYZ 150.705 716.092 null] >> -% 1528 0 obj +% 1784 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F91 914 0 R /F69 1460 0 R /F93 915 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1534 0 obj +% 1789 0 obj << /Type /Page -/Contents 1535 0 R -/Resources 1533 0 R +/Contents 1790 0 R +/Resources 1788 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1537 0 R -/Annots [ 1532 0 R ] +/Parent 1783 0 R >> -% 1532 0 obj +% 1791 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 483.894 359.001 495.954] -/A << /S /GoTo /D (descdata) >> +/D [1789 0 R /XYZ 98.895 753.953 null] >> -% 1536 0 obj +% 1792 0 obj << -/D [1534 0 R /XYZ 98.895 753.953 null] +/D [1789 0 R /XYZ 99.895 552.489 null] >> -% 376 0 obj +% 1793 0 obj << -/D [1534 0 R /XYZ 99.895 716.092 null] +/D [1789 0 R /XYZ 99.895 518.014 null] >> -% 1533 0 obj +% 1794 0 obj +<< +/D [1789 0 R /XYZ 99.895 427.175 null] +>> +% 1788 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F62 667 0 R /F59 665 0 R /F67 913 0 R /F102 1016 0 R /F60 666 0 R /F120 1782 0 R >> /ProcSet [ /PDF /Text ] >> -% 1539 0 obj +% 1796 0 obj << /Type /Page -/Contents 1540 0 R -/Resources 1538 0 R +/Contents 1797 0 R +/Resources 1795 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1537 0 R ->> -% 1541 0 obj -<< -/D [1539 0 R /XYZ 149.705 753.953 null] +/Parent 1783 0 R >> -% 1542 0 obj +% 1798 0 obj << -/D [1539 0 R /XYZ 150.705 716.092 null] +/D [1796 0 R /XYZ 149.705 753.953 null] >> -% 1543 0 obj +% 465 0 obj << -/D [1539 0 R /XYZ 150.705 687.379 null] +/D [1796 0 R /XYZ 150.705 716.092 null] >> -% 1538 0 obj +% 1795 0 obj << -/Font << /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F91 914 0 R /F69 1460 0 R /F93 915 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1546 0 obj +% 1800 0 obj << /Type /Page -/Contents 1547 0 R -/Resources 1545 0 R +/Contents 1801 0 R +/Resources 1799 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1537 0 R -/Annots [ 1544 0 R ] +/Parent 1783 0 R >> -% 1544 0 obj +% 1802 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 482.114 359.001 494.174] -/A << /S /GoTo /D (descdata) >> +/D [1800 0 R /XYZ 98.895 753.953 null] >> -% 1548 0 obj +% 1803 0 obj << -/D [1546 0 R /XYZ 98.895 753.953 null] +/D [1800 0 R /XYZ 99.895 540.534 null] >> -% 380 0 obj +% 1804 0 obj << -/D [1546 0 R /XYZ 99.895 716.092 null] +/D [1800 0 R /XYZ 99.895 506.059 null] >> -% 1545 0 obj +% 1805 0 obj +<< +/D [1800 0 R /XYZ 99.895 415.22 null] +>> +% 1799 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F102 1016 0 R /F60 666 0 R /F120 1782 0 R >> /ProcSet [ /PDF /Text ] >> -% 1551 0 obj +% 1807 0 obj << /Type /Page -/Contents 1552 0 R -/Resources 1550 0 R +/Contents 1808 0 R +/Resources 1806 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1537 0 R -/Annots [ 1549 0 R ] ->> -% 1549 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 494.069 409.811 506.129] -/A << /S /GoTo /D (descdata) >> ->> -% 1553 0 obj -<< -/D [1551 0 R /XYZ 149.705 753.953 null] +/Parent 1783 0 R >> -% 384 0 obj +% 1809 0 obj << -/D [1551 0 R /XYZ 150.705 716.092 null] +/D [1807 0 R /XYZ 149.705 753.953 null] >> -% 1554 0 obj +% 469 0 obj << -/D [1551 0 R /XYZ 150.705 382.093 null] +/D [1807 0 R /XYZ 150.705 716.092 null] >> -% 1550 0 obj +% 1806 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F91 914 0 R /F69 1460 0 R /F93 915 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1557 0 obj +% 1811 0 obj << /Type /Page -/Contents 1558 0 R -/Resources 1556 0 R +/Contents 1812 0 R +/Resources 1810 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1537 0 R -/Annots [ 1555 0 R ] +/Parent 1817 0 R >> -% 1555 0 obj +% 1813 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 494.069 359.001 506.129] -/A << /S /GoTo /D (descdata) >> +/D [1811 0 R /XYZ 98.895 753.953 null] >> -% 1559 0 obj +% 1814 0 obj << -/D [1557 0 R /XYZ 98.895 753.953 null] +/D [1811 0 R /XYZ 99.895 552.489 null] >> -% 388 0 obj +% 1815 0 obj << -/D [1557 0 R /XYZ 99.895 716.092 null] +/D [1811 0 R /XYZ 99.895 518.014 null] >> -% 1560 0 obj +% 1816 0 obj << -/D [1557 0 R /XYZ 99.895 258.556 null] +/D [1811 0 R /XYZ 99.895 427.175 null] >> -% 1556 0 obj +% 1810 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F62 667 0 R /F59 665 0 R /F67 913 0 R /F102 1016 0 R /F60 666 0 R /F120 1782 0 R >> /ProcSet [ /PDF /Text ] >> -% 1563 0 obj + +endstream +endobj +1820 0 obj << -/Type /Page -/Contents 1564 0 R -/Resources 1562 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1537 0 R -/Annots [ 1561 0 R ] +/Length 5616 >> -% 1561 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F59 11.9552 Tf 150.705 706.129 Td [(7.13)-1000(psb)]TJ +ET +q +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 207.803 706.129 Td [(amx)-250(\227)-250(Global)-250(maximum)-250(absolute)-250(value)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -57.098 -20.269 Td [(call)-525(psb_amx\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F62 9.9626 Tf 14.944 -24.611 Td [(This)-342(subr)18(outine)-342(implements)-342(a)-342(maximum)-341(absolute)-342(value)-342(r)18(eduction)-342(opera-)]TJ -14.944 -11.955 Td [(tion)-250(based)-250(on)-250(the)-250(underlying)-250(communication)-250(library)111(.)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.945 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -22.619 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -22.619 Td [(ctxt)]TJ +0 g 0 G +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.619 Td [(dat)]TJ +0 g 0 G +/F62 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(maximum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-269(may)-270(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-300(a)-300(rank)-300(1)-300(or)-301(2)-300(array)111(.)-760(T)90(ype,)-313(kind,)-312(rank)-300(and)-301(size)-300(must)-300(agr)18(ee)-300(on)-300(all)-300(pr)18(o-)]TJ 0 -11.955 Td [(cesses.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.619 Td [(root)]TJ +0 g 0 G +/F62 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-221(to)-221(hold)-221(the)-222(\002nal)-221(value,)-227(or)]TJ/F91 10.3811 Tf 147.052 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1)-221(to)-221(make)-222(it)-221(available)-221(on)-221(all)-221(pr)18(ocesses.)]TJ -153.592 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F91 10.3811 Tf 131.101 0 Td [(\000)]TJ/F62 9.9626 Tf 8.195 0 Td [(1)]TJ/F69 10.3811 Tf 7.873 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F69 10.3811 Tf 19.923 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.504 0 Td [(\000)]TJ/F62 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ +0 g 0 G +/F59 9.9626 Tf -254.343 -34.574 Td [(mode)]TJ +0 g 0 G +/F62 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.955 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F67 9.9626 Tf 199.497 0 Td [(OR)]TJ/F62 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F67 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F62 9.9626 Tf 109.837 0 Td [(,)]TJ/F67 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F62 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.956 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -34.573 Td [(request)]TJ +0 g 0 G +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -24.612 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 164.384 -29.887 Td [(128)]TJ +0 g 0 G +ET + +endstream +endobj +1825 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 494.069 409.811 506.129] -/A << /S /GoTo /D (descdata) >> +/Length 5225 >> -% 1565 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 99.895 706.129 Td [(dat)]TJ +0 g 0 G +/F62 9.9626 Tf 19.368 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(maximum)-250(operation.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-270(may)-269(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-300(a)-300(rank)-300(1)-300(or)-301(2)-300(array)111(.)-760(T)90(ype,)-313(kind,)-312(rank)-300(and)-301(size)-300(must)-300(agr)18(ee)-300(on)-300(all)-300(pr)18(o-)]TJ 0 -11.955 Td [(cesses.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(request)]TJ +0 g 0 G +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(The)]TJ/F67 9.9626 Tf 31.023 0 Td [(dat)]TJ/F62 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.956 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ +0 g 0 G + -12.453 -19.925 Td [(2.)]TJ +0 g 0 G + [-500(The)]TJ/F67 9.9626 Tf 32.225 0 Td [(mode)]TJ/F62 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F67 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F62 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F67 9.9626 Tf 16.309 0 Td [(request)]TJ/F62 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 124.802 417.212 cm +0 0 318.804 27.895 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 137.205 434.448 Td [(call)]TJ +0 g 0 G + [-525(psb_amx\050ctxt,dat,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(ior)]TJ +0 g 0 G + [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F62 9.9626 Tf -48.393 -36.165 Td [(3.)]TJ +0 g 0 G + [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F67 9.9626 Tf 216.877 0 Td [(dat)]TJ/F62 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F60 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F62 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 124.802 302.642 cm +0 0 318.804 60.772 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 127.791 352.754 Td [(call)]TJ +0 g 0 G + [-525(psb_amx\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(psb_collective_start_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.536 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(amx_request\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + -14.122 -10.959 Td [(.......)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F102 8.9664 Tf -37.659 -10.959 Td [(call)]TJ +0 g 0 G + [-525(psb_amx\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(psb_collective_end_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(amx_request\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 103.537 -218.48 Td [(129)]TJ +0 g 0 G +ET + +endstream +endobj +1832 0 obj +<< +/Length 5619 +>> +stream +0 g 0 G +0 g 0 G +BT +/F59 11.9552 Tf 150.705 706.129 Td [(7.14)-1000(psb)]TJ +ET +q +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 207.803 706.129 Td [(amn)-250(\227)-250(Global)-250(minimum)-250(absolute)-250(value)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -57.098 -20.269 Td [(call)-525(psb_amn\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F62 9.9626 Tf 14.944 -24.611 Td [(This)-360(subr)18(outine)-360(impl)1(ements)-360(a)-360(minimum)-360(absolute)-360(value)-359(r)18(eduction)-360(opera-)]TJ -14.944 -11.955 Td [(tion)-250(based)-250(on)-250(the)-250(underlying)-250(communication)-250(library)111(.)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.945 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -22.619 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -22.619 Td [(ctxt)]TJ +0 g 0 G +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.619 Td [(dat)]TJ +0 g 0 G +/F62 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(minimum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-269(may)-270(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-300(a)-300(rank)-300(1)-300(or)-301(2)-300(array)111(.)-760(T)90(ype,)-313(kind,)-312(rank)-300(and)-301(size)-300(must)-300(agr)18(ee)-300(on)-300(all)-300(pr)18(o-)]TJ 0 -11.955 Td [(cesses.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.619 Td [(root)]TJ +0 g 0 G +/F62 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-221(to)-221(hold)-221(the)-222(\002nal)-221(value,)-227(or)]TJ/F91 10.3811 Tf 147.052 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1)-221(to)-221(make)-222(it)-221(available)-221(on)-221(all)-221(pr)18(ocesses.)]TJ -153.592 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F91 10.3811 Tf 131.101 0 Td [(\000)]TJ/F62 9.9626 Tf 8.195 0 Td [(1)]TJ/F69 10.3811 Tf 7.873 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F69 10.3811 Tf 19.923 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.504 0 Td [(\000)]TJ/F62 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ +0 g 0 G +/F59 9.9626 Tf -254.343 -34.574 Td [(mode)]TJ +0 g 0 G +/F62 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.955 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F67 9.9626 Tf 199.497 0 Td [(OR)]TJ/F62 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F67 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F62 9.9626 Tf 109.837 0 Td [(,)]TJ/F67 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F62 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.956 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -34.573 Td [(request)]TJ +0 g 0 G +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -24.612 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 164.384 -29.887 Td [(130)]TJ +0 g 0 G +ET + +endstream +endobj +1836 0 obj << -/D [1563 0 R /XYZ 149.705 753.953 null] +/Length 5262 >> +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 99.895 706.129 Td [(dat)]TJ +0 g 0 G +/F62 9.9626 Tf 19.368 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(minimum)-250(operation.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-270(may)-269(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-250(a)-250(rank)-250(1)-250(or)-250(2)-250(array)111(.)]TJ 0 -11.955 Td [(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(request)]TJ +0 g 0 G +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(The)]TJ/F67 9.9626 Tf 31.023 0 Td [(dat)]TJ/F62 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.956 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ +0 g 0 G + -12.453 -19.925 Td [(2.)]TJ +0 g 0 G + [-500(The)]TJ/F67 9.9626 Tf 32.225 0 Td [(mode)]TJ/F62 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F67 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F62 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F67 9.9626 Tf 16.309 0 Td [(request)]TJ/F62 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 124.802 417.212 cm +0 0 318.804 27.895 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 137.205 434.448 Td [(call)]TJ +0 g 0 G + [-525(psb_amn\050ctxt,dat,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(ior)]TJ +0 g 0 G + [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F62 9.9626 Tf -48.393 -36.165 Td [(3.)]TJ +0 g 0 G + [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F67 9.9626 Tf 216.877 0 Td [(dat)]TJ/F62 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F60 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F62 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 124.802 302.642 cm +0 0 318.804 60.772 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 137.205 352.754 Td [(call)]TJ +0 g 0 G + [-525(psb_amn\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(psb_collective_start_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(amn_request\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + -23.537 -10.959 Td [(.......)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F102 8.9664 Tf -37.659 -10.959 Td [(call)]TJ +0 g 0 G + [-525(psb_amn\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(psb_collective_end_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(amn_request\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 103.537 -218.48 Td [(131)]TJ +0 g 0 G +ET endstream endobj -1571 0 obj +1843 0 obj << -/Length 4785 +/Length 5776 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.22)-1000(psb)]TJ -ET -q -1 0 0 1 153.407 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 156.993 706.129 Td [(local)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(7.15)-1000(psb)]TJ ET q -1 0 0 1 183.605 706.328 cm +1 0 0 1 204.216 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 187.192 706.129 Td [(index)-250(\227)]TJ +/F59 11.9552 Tf 207.803 706.129 Td [(nrm2)-250(\227)-250(Global)-250(2-norm)-250(reduction)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -87.297 -18.964 Td [(call)-525(psb_local_index\050y,)-525(x,)-525(desc_a,)-525(info\051)]TJ +/F67 9.9626 Tf -57.098 -19.198 Td [(call)-525(psb_nrm2\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F62 9.9626 Tf 14.944 -22.401 Td [(This)-297(subr)18(outine)-296(implements)-297(a)-297(2-norm)-296(value)-297(r)18(eduction)-297(operation)-296(based)-297(on)]TJ -14.944 -11.955 Td [(the)-250(underlying)-250(communication)-250(library)111(.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 0 -20.288 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -20.409 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.925 Td [(x)]TJ + 0 -20.408 Td [(ctxt)]TJ +0 g 0 G +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -20.409 Td [(dat)]TJ +0 g 0 G +/F62 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(minimum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-370(as:)-551(a)-371(r)18(eal)-370(variable,)-401(which)-370(may)-371(be)-370(a)-371(scalar)74(,)-400(or)-371(a)-370(rank)-371(1)-370(array)111(.)]TJ 0 -11.956 Td [(Kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -20.408 Td [(root)]TJ +0 g 0 G +/F62 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-221(to)-221(hold)-221(the)-222(\002nal)-221(value,)-227(or)]TJ/F91 10.3811 Tf 147.052 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1)-221(to)-221(make)-222(it)-221(available)-221(on)-221(all)-221(pr)18(ocesses.)]TJ -153.592 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F91 10.3811 Tf 131.101 0 Td [(\000)]TJ/F62 9.9626 Tf 8.195 0 Td [(1)]TJ/F69 10.3811 Tf 7.873 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F69 10.3811 Tf 19.923 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.504 0 Td [(\000)]TJ/F62 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ +0 g 0 G +/F59 9.9626 Tf -254.343 -32.364 Td [(mode)]TJ +0 g 0 G +/F62 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.956 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F67 9.9626 Tf 199.497 0 Td [(OR)]TJ/F62 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F67 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F62 9.9626 Tf 109.837 0 Td [(,)]TJ/F67 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F62 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.955 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -32.364 Td [(request)]TJ +0 g 0 G +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.956 Td [(be)-250(pr)18(esent.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.401 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20.408 Td [(dat)]TJ +0 g 0 G +/F62 9.9626 Tf 19.367 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(2-norm)-250(r)18(eduction.)]TJ 5.54 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.781 0 Td [(.)]TJ +0 g 0 G + 79.264 -29.887 Td [(132)]TJ +0 g 0 G +ET + +endstream +endobj +1847 0 obj +<< +/Length 6252 +>> +stream +0 g 0 G +0 g 0 G +BT +/F62 9.9626 Tf 124.802 706.129 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(r)18(eal)-250(variable,)-250(which)-250(may)-250(be)-250(a)-250(scalar)74(,)-250(or)-250(a)-250(rank)-250(1)-250(array)111(.)]TJ 0 -11.956 Td [(Kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(request)]TJ +0 g 0 G +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.956 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(This)-345(r)18(eduction)-345(is)-346(appr)18(opriate)-345(to)-345(compute)-345(the)-345(r)18(esults)-346(of)-345(multiple)-345(\050local\051)]TJ 12.453 -11.955 Td [(NRM2)-250(operations)-250(at)-250(the)-250(same)-250(time.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(Integer)-250(indices.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in,)-250(inout)]TJ/F54 9.9626 Tf 38.735 0 Td [(.)]TJ -70.535 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(or)-250(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ + -12.453 -19.925 Td [(2.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -31.88 Td [(desc)]TJ + [-500(Denoting)-249(by)]TJ/F60 9.9626 Tf 69.789 0 Td [(d)-40(a)-25(t)]TJ/F60 7.5716 Tf 13.536 -1.96 Td [(i)]TJ/F62 9.9626 Tf 5.23 1.96 Td [(the)-249(value)-249(of)-248(the)-249(variable)]TJ/F60 9.9626 Tf 108.808 0 Td [(d)-40(a)-25(t)]TJ/F62 9.9626 Tf 15.973 0 Td [(on)-249(pr)18(ocess)]TJ/F60 9.9626 Tf 49.078 0 Td [(i)]TJ/F62 9.9626 Tf 2.964 0 Td [(,)-249(the)-249(output)]TJ/F60 9.9626 Tf 53.71 0 Td [(r)-17(e)-25(s)]TJ/F62 9.9626 Tf -306.635 -11.955 Td [(is)-250(equivalent)-250(to)-250(the)-250(computation)-250(of)]TJ/F60 9.9626 Tf 124.796 -25.468 Td [(r)-17(e)-25(s)]TJ/F93 10.3811 Tf 15.061 0 Td [(=)]TJ/F17 9.9626 Tf 11.086 10.922 Td [(r)]TJ ET q -1 0 0 1 120.408 545.895 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 285.832 490.532 cm +[]0 d 0 J 0.389 w 0 0 m 30.512 0 l S Q BT -/F51 9.9626 Tf 123.397 545.696 Td [(a)]TJ +/F104 13.9477 Tf 285.957 477.344 Td [(\345)]TJ/F60 7.5716 Tf 4.245 -8.764 Td [(i)]TJ/F60 9.9626 Tf 8.364 10.836 Td [(d)-40(a)-25(t)]TJ/F62 7.5716 Tf 13.495 3.473 Td [(2)]TJ/F60 7.5716 Tf 0.041 -7.027 Td [(i)]TJ/F62 9.9626 Tf 4.243 3.554 Td [(,)]TJ -191.543 -30.806 Td [(with)-250(car)18(e)-250(taken)-250(to)-250(avoid)-250(unnecessary)-250(over\003ow)92(.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ + -12.453 -19.926 Td [(3.)]TJ +0 g 0 G + [-500(The)]TJ/F67 9.9626 Tf 31.023 0 Td [(dat)]TJ/F62 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.955 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ +0 g 0 G + -12.453 -19.925 Td [(4.)]TJ +0 g 0 G + [-500(The)]TJ/F67 9.9626 Tf 32.225 0 Td [(mode)]TJ/F62 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F67 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F62 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F67 9.9626 Tf 16.309 0 Td [(request)]TJ/F62 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 309.258 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 124.802 333.043 cm +0 0 318.804 27.895 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F59 9.9626 Tf 312.397 497.875 Td [(desc)]TJ +/F102 8.9664 Tf 137.205 350.279 Td [(call)]TJ +0 g 0 G + [-525(psb_nrm2\050ctxt,dat,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(ior)]TJ +0 g 0 G + [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F62 9.9626 Tf -48.393 -36.165 Td [(5.)]TJ +0 g 0 G + [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F67 9.9626 Tf 216.877 0 Td [(dat)]TJ/F62 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F60 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F62 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 333.945 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 124.802 218.473 cm +0 0 318.804 60.772 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F59 9.9626 Tf 337.084 497.875 Td [(type)]TJ +/F102 8.9664 Tf 127.791 268.585 Td [(call)]TJ +0 g 0 G + [-525(psb_nrm2\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ + [(psb_collective_start_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -258.11 -19.925 Td [(iact)]TJ + 23.536 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(speci\002es)-250(action)-250(to)-250(be)-250(taken)-250(in)-250(case)-250(of)-250(range)-250(err)18(ors.)-310(Scope:)]TJ/F51 9.9626 Tf 253.796 0 Td [(global)]TJ/F54 9.9626 Tf -249.91 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(character)-190(variable)]TJ/F59 9.9626 Tf 143.341 0 Td [(I)]TJ/F54 9.9626 Tf 5.23 0 Td [(gnor)18(e,)]TJ/F59 9.9626 Tf 29.808 0 Td [(W)]TJ/F54 9.9626 Tf 5.231 0 Td [(arning)-190(or)]TJ/F59 9.9626 Tf 42.111 0 Td [(A)]TJ/F54 9.9626 Tf 5.231 0 Td [(bort,)-202(default)]TJ/F59 9.9626 Tf 55.839 0 Td [(I)]TJ/F54 9.9626 Tf 5.231 0 Td [(gnor)18(e.)]TJ + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -316.929 -21.918 Td [(On)-250(Return)]TJ + [(nrm2_request\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + -14.122 -10.959 Td [(.......)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(y)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F102 8.9664 Tf -37.659 -10.959 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(A)-270(logical)-270(mask)-270(which)-271(is)-270(tr)8(ue)-270(for)-270(all)-270(corr)18(esponding)-270(entries)-270(of)]TJ/F52 9.9626 Tf 268.484 0 Td [(x)]TJ/F54 9.9626 Tf 7.897 0 Td [(that)-270(ar)18(e)-270(local)]TJ -261.995 -11.955 Td [(to)-250(the)-250(curr)18(ent)-250(pr)18(ocess)-250(Scope:)]TJ/F51 9.9626 Tf 128.666 0 Td [(local)]TJ/F54 9.9626 Tf -128.666 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(or)-250(rank)-250(one)-250(logical)-250(array)111(.)]TJ + [-525(psb_nrm2\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(info)]TJ + [(psb_collective_end_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.917 Td [(Notes)]TJ + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 12.454 -19.926 Td [(1.)]TJ + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - [-500(This)-264(r)18(outine)-265(r)18(eturns)-264(a)]TJ/F59 9.9626 Tf 110.663 0 Td [(.true.)]TJ/F54 9.9626 Tf 34.017 0 Td [(value)-264(for)-265(those)-264(indices)-265(that)-264(ar)18(e)-265(local)-264(to)-265(the)]TJ -132.227 -11.955 Td [(curr)18(ent)-250(pr)18(ocess,)-250(including)-250(the)-250(halo)-250(indices.)]TJ + [(nrm2_request\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G - 139.477 -140.438 Td [(103)]TJ +0 g 0 G +/F62 9.9626 Tf 103.537 -134.311 Td [(133)]TJ 0 g 0 G ET endstream endobj -1578 0 obj +1858 0 obj << -/Length 3647 +/Length 5352 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.23)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(7.16)-1000(psb)]TJ ET q 1 0 0 1 204.216 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(get)]TJ -ET -q -1 0 0 1 225.126 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 228.712 706.129 Td [(boundary)-250(\227)-250(Extract)-250(list)-250(of)-250(boundary)-250(elements)]TJ -0 g 0 G +/F59 11.9552 Tf 207.803 706.129 Td [(snd)-250(\227)-250(Send)-250(data)]TJ 0 g 0 G -/F59 9.9626 Tf -78.007 -18.964 Td [(call)-525(psb_get_boundary\050bndel,)-525(desc,)-525(info\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F67 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_snd\050ctxt,)-525(dat,)-525(dst,)-525(m\051)]TJ/F62 9.9626 Tf 14.944 -21.918 Td [(This)-250(subr)18(outine)-250(sends)-250(a)-250(packet)-250(of)-250(data)-250(to)-250(a)-250(destination.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F59 9.9626 Tf -14.944 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous:)-310(see)-250(usage)-250(notes.)]TJ 0 g 0 G +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - 0 -19.925 Td [(desc)]TJ 0 g 0 G -/F54 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 577.576 Td [(desc)]TJ -ET -q -1 0 0 1 384.755 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 387.893 577.576 Td [(type)]TJ + 0 -19.926 Td [(ctxt)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -24.907 -19.926 Td [(dat)]TJ 0 g 0 G +/F62 9.9626 Tf 19.367 0 Td [(The)-250(data)-250(to)-250(be)-250(sent.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-269(may)-270(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-220(a)-220(rank)-219(1)-220(or)-220(2)-220(array)111(,)-226(or)-220(a)-219(character)-220(or)-220(logical)-220(scalar)74(.)-520(T)90(ype,)-225(kind)-220(and)-220(rank)]TJ 0 -11.956 Td [(must)-215(agr)18(ee)-216(on)-215(sender)-215(and)-216(r)18(eceiver)-215(pr)18(ocess;)-227(if)]TJ/F60 9.9626 Tf 197.687 0 Td [(m)]TJ/F62 9.9626 Tf 10.021 0 Td [(is)-215(not)-216(speci\002ed,)-222(size)-215(must)]TJ -207.708 -11.955 Td [(agr)18(ee)-250(as)-250(well.)]TJ 0 g 0 G - 0 -19.925 Td [(bndel)]TJ +/F59 9.9626 Tf -24.907 -19.925 Td [(dst)]TJ 0 g 0 G -/F54 9.9626 Tf 31.541 0 Td [(The)-307(list)-307(of)-307(boundary)-307(elements)-307(on)-306(the)-307(calling)-307(pr)18(ocess,)-321(in)-307(local)-307(number)18(-)]TJ -6.635 -11.955 Td [(ing.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(Speci\002ed)-234(as:)-302(a)-234(rank)-234(one)-234(a)-1(r)1(ray)-235(with)-234(the)-234(ALLOCA)74(T)74(ABLE)-234(attribute,)-237(of)-234(type)]TJ 0 -11.955 Td [(integer)74(.)]TJ +/F62 9.9626 Tf 18.809 0 Td [(Destination)-250(pr)18(ocess.)]TJ 6.098 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F69 10.3811 Tf 138.85 0 Td [(<)]TJ/F93 10.3811 Tf 8.319 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(d)-25(s)-25(t)]TJ/F69 10.3811 Tf 15.689 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.504 0 Td [(\000)]TJ/F62 9.9626 Tf 10.131 0 Td [(1.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -31.881 Td [(info)]TJ +/F59 9.9626 Tf -241.89 -31.88 Td [(m)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ +/F62 9.9626 Tf 13.838 0 Td [(Number)-250(of)-250(r)18(ows.)]TJ 11.069 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F62 9.9626 Tf 40.946 0 Td [(.)]TJ -68.034 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F69 10.3811 Tf 138.85 0 Td [(<)]TJ/F93 10.3811 Tf 8.319 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(m)]TJ/F69 10.3811 Tf 10.767 0 Td [(<)]TJ/F93 10.3811 Tf 8.319 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(s)-18(i)-32(z)-25(e)]TJ/F93 10.3811 Tf 15.94 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(d)-40(a)-25(t)]TJ/F62 9.9626 Tf 13.494 0 Td [(,)-167(1)]TJ/F93 10.3811 Tf 9.257 0 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(.)]TJ -235.542 -11.955 Td [(When)]TJ/F60 9.9626 Tf 29.859 0 Td [(d)-40(a)-25(t)]TJ/F62 9.9626 Tf 16.898 0 Td [(is)-342(a)-341(rank)-342(2)-341(array)111(,)-365(speci\002es)-342(the)-341(number)-342(of)-341(r)18(ows)-342(to)-342(be)-341(sent)-342(in-)]TJ -46.757 -11.955 Td [(dependently)-341(of)-340(the)-341(leading)-341(dimension)]TJ/F60 9.9626 Tf 175.121 0 Td [(s)-18(i)-32(z)-25(e)]TJ/F93 10.3811 Tf 15.94 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(d)-40(a)-25(t)]TJ/F62 9.9626 Tf 13.494 0 Td [(,)-167(1)]TJ/F93 10.3811 Tf 9.257 0 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(;)-386(must)-341(have)-340(the)-341(same)]TJ -222.236 -11.955 Td [(value)-250(on)-250(sending)-250(and)-250(r)18(eceiving)-250(pr)18(ocesses.)]TJ 0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ +/F59 9.9626 Tf -24.907 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G - [-500(If)-219(ther)18(e)-220(ar)18(e)-219(no)-220(boundary)-219(elements)-219(\050i.e.,)-226(if)-219(the)-220(local)-219(part)-219(of)-220(the)-219(connectivity)]TJ 12.453 -11.956 Td [(graph)-391(is)-392(self-contained\051)-391(the)-392(output)-391(vector)-391(is)-392(set)-391(to)-391(the)-392(\223not)-391(allocated\224)]TJ 0 -11.955 Td [(state.)]TJ +/F59 11.9552 Tf 0 -21.918 Td [(Notes)]TJ 0 g 0 G - -12.453 -19.925 Td [(2.)]TJ +/F62 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ 0 g 0 G - [-500(Otherwise)-206(the)-205(size)-206(of)]TJ/F59 9.9626 Tf 105.891 0 Td [(bndel)]TJ/F54 9.9626 Tf 28.201 0 Td [(will)-206(be)-205(exactly)-206(equal)-206(to)-206(the)-205(number)-206(of)-206(bound-)]TJ -121.639 -11.955 Td [(ary)-250(elements.)]TJ + [-500(This)-292(subr)18(outine)-292(impl)1(ies)-292(a)-292(synchr)18(onization,)-302(but)-292(only)-292(between)-291(the)-292(calling)]TJ 12.454 -11.955 Td [(pr)18(ocess)-250(and)-250(the)-250(destination)-250(pr)18(ocess)]TJ/F60 9.9626 Tf 158.309 0 Td [(d)-25(s)-25(t)]TJ/F62 9.9626 Tf 12.797 0 Td [(.)]TJ 0 g 0 G - 139.477 -196.229 Td [(104)]TJ + -31.629 -104.573 Td [(134)]TJ 0 g 0 G ET endstream endobj -1585 0 obj +1863 0 obj << -/Length 3458 +/Length 5356 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.24)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(7.17)-1000(psb)]TJ ET q 1 0 0 1 153.407 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 156.993 706.129 Td [(get)]TJ -ET -q -1 0 0 1 174.316 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 177.903 706.129 Td [(overlap)-250(\227)-250(Extract)-250(list)-250(of)-250(overlap)-250(elements)]TJ +/F59 11.9552 Tf 156.993 706.129 Td [(rcv)-250(\227)-250(Receive)-250(data)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -78.008 -18.964 Td [(call)-525(psb_get_overlap\050ovrel,)-525(desc,)-525(info\051)]TJ +/F67 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_rcv\050ctxt,)-525(dat,)-525(src,)-525(m\051)]TJ/F62 9.9626 Tf 14.944 -21.918 Td [(This)-250(subr)18(outine)-250(r)18(eceives)-250(a)-250(packet)-250(of)-250(data)-250(to)-250(a)-250(destination.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf -14.944 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous:)-310(see)-250(usage)-250(notes.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.925 Td [(desc)]TJ + 0 -19.926 Td [(ctxt)]TJ +0 g 0 G +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.926 Td [(src)]TJ +0 g 0 G +/F62 9.9626 Tf 17.704 0 Td [(Sour)18(ce)-250(pr)18(ocess.)]TJ 7.203 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F69 10.3811 Tf 138.85 0 Td [(<)]TJ/F93 10.3811 Tf 8.319 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(s)-15(r)-17(c)]TJ/F69 10.3811 Tf 15.141 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.504 0 Td [(\000)]TJ/F62 9.9626 Tf 10.131 0 Td [(1.)]TJ +0 g 0 G +/F59 9.9626 Tf -241.342 -31.881 Td [(m)]TJ +0 g 0 G +/F62 9.9626 Tf 13.838 0 Td [(Number)-250(of)-250(r)18(ows.)]TJ 11.069 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F62 9.9626 Tf 40.946 0 Td [(.)]TJ -68.034 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F69 10.3811 Tf 138.85 0 Td [(<)]TJ/F93 10.3811 Tf 8.319 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(m)]TJ/F69 10.3811 Tf 10.767 0 Td [(<)]TJ/F93 10.3811 Tf 8.319 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(s)-18(i)-32(z)-25(e)]TJ/F93 10.3811 Tf 15.94 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(d)-40(a)-25(t)]TJ/F62 9.9626 Tf 13.494 0 Td [(,)-167(1)]TJ/F93 10.3811 Tf 9.257 0 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(.)]TJ -235.542 -11.956 Td [(When)]TJ/F60 9.9626 Tf 29.859 0 Td [(d)-40(a)-25(t)]TJ/F62 9.9626 Tf 16.898 0 Td [(is)-342(a)-341(rank)-342(2)-341(array)111(,)-365(speci\002es)-342(the)-341(number)-342(of)-341(r)18(ows)-342(to)-342(be)-341(sent)-342(in-)]TJ -46.757 -11.955 Td [(dependently)-341(of)-340(the)-341(leading)-341(dimension)]TJ/F60 9.9626 Tf 175.121 0 Td [(s)-18(i)-32(z)-25(e)]TJ/F93 10.3811 Tf 15.94 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(d)-40(a)-25(t)]TJ/F62 9.9626 Tf 13.494 0 Td [(,)-167(1)]TJ/F93 10.3811 Tf 9.257 0 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(;)-386(must)-341(have)-340(the)-341(same)]TJ -222.236 -11.955 Td [(value)-250(on)-250(sending)-250(and)-250(r)18(eceiving)-250(pr)18(ocesses.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -21.918 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(dat)]TJ +0 g 0 G +/F62 9.9626 Tf 19.368 0 Td [(The)-250(data)-250(to)-250(be)-250(r)18(eceived.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-270(may)-269(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-220(a)-220(rank)-219(1)-220(or)-220(2)-220(array)111(,)-226(or)-220(a)-219(character)-220(or)-220(logical)-220(scalar)74(.)-520(T)90(ype,)-225(kind)-220(and)-220(rank)]TJ 0 -11.955 Td [(must)-215(agr)18(ee)-216(on)-215(sender)-215(and)-216(r)18(eceiver)-215(pr)18(ocess;)-227(if)]TJ/F60 9.9626 Tf 197.687 0 Td [(m)]TJ/F62 9.9626 Tf 10.021 0 Td [(is)-215(not)-216(speci\002ed,)-222(size)-215(must)]TJ -207.708 -11.955 Td [(agr)18(ee)-250(as)-250(well.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(This)-292(subr)18(outine)-291(implies)-292(a)-292(synchr)18(onization,)-302(but)-292(only)-292(between)-291(the)-292(calling)]TJ 12.453 -11.955 Td [(pr)18(ocess)-250(and)-250(the)-250(sour)18(ce)-250(pr)18(ocess)]TJ/F60 9.9626 Tf 137.538 0 Td [(s)-15(r)-17(c)]TJ/F62 9.9626 Tf 12.249 0 Td [(.)]TJ +0 g 0 G + -10.31 -104.573 Td [(135)]TJ 0 g 0 G -/F54 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ ET -q -1 0 0 1 309.258 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q + +endstream +endobj +1870 0 obj +<< +/Length 6241 +>> +stream +0 g 0 G +0 g 0 G BT -/F59 9.9626 Tf 312.397 577.576 Td [(desc)]TJ +/F59 14.3462 Tf 150.705 705.784 Td [(8)-1000(Error)-250(handling)]TJ/F62 9.9626 Tf 0 -22.702 Td [(The)-382(PSBLAS)-382(library)-381(err)18(or)-382(handling)-382(policy)-382(has)-382(been)-382(comple)1(tely)-382(r)18(ewritten)-382(in)]TJ 0 -11.955 Td [(version)-359(2.0.)-638(The)-359(idea)-359(behind)-360(the)-359(design)-359(of)-359(this)-359(new)-360(err)18(or)-359(handling)-359(strategy)]TJ 0 -11.955 Td [(is)-303(to)-303(keep)-303(err)18(or)-303(messages)-303(on)-303(a)-303(stack)-303(allowing)-303(the)-303(user)-303(to)-303(trace)-303(back)-303(up)-303(to)-303(the)]TJ 0 -11.955 Td [(point)-317(wher)18(e)-318(the)-317(\002rst)-317(err)18(or)-318(message)-317(has)-318(been)-317(generated.)-512(Every)-317(r)18(outine)-318(in)-317(the)]TJ 0 -11.956 Td [(PSBLAS-2.0)-336(library)-336(has,)-358(as)-336(last)-337(non-optional)-336(ar)18(gument,)-358(an)-336(integer)]TJ/F67 9.9626 Tf 298.678 0 Td [(info)]TJ/F62 9.9626 Tf 24.271 0 Td [(vari-)]TJ -322.949 -11.955 Td [(able;)-364(whenever)74(,)-344(inside)-326(the)-326(r)18(outine,)-345(an)-326(err)18(or)-325(is)-326(detected,)-345(this)-326(variable)-326(is)-325(set)-326(to)]TJ 0 -11.955 Td [(a)-384(value)-384(corr)18(esponding)-384(to)-384(a)-384(speci\002c)-384(err)18(or)-384(code.)-711(Then)-384(this)-384(err)18(or)-384(code)-384(is)-384(also)]TJ 0 -11.955 Td [(pushed)-274(on)-273(the)-274(err)18(or)-274(stack)-274(and)-273(then)-274(either)-274(contr)18(ol)-274(is)-273(r)18(eturned)-274(to)-274(the)-273(caller)-274(r)18(ou-)]TJ 0 -11.955 Td [(tine)-342(or)-342(the)-342(execution)-343(is)-342(aborted,)-365(depending)-342(on)-342(the)-342(users)-342(choice.)-587(At)-342(the)-342(time)]TJ 0 -11.955 Td [(when)-243(the)-242(execution)-243(is)-242(aborted,)-244(an)-243(err)18(or)-242(message)-243(is)-243(p)1(rinted)-243(on)-243(standar)18(d)-242(output)]TJ 0 -11.956 Td [(with)-257(a)-256(level)-257(of)-256(verbosity)-257(than)-256(can)-257(be)-256(chosen)-257(by)-257(the)-256(user)74(.)-330(If)-256(the)-257(execution)-257(is)-256(not)]TJ 0 -11.955 Td [(aborted,)-259(then,)-259(the)-257(caller)-257(r)18(outine)-258(checks)-257(the)-257(value)-257(r)18(eturned)-257(in)-258(the)]TJ/F67 9.9626 Tf 284.621 0 Td [(info)]TJ/F62 9.9626 Tf 23.484 0 Td [(variable)]TJ -308.105 -11.955 Td [(and,)-290(if)-282(not)-282(zer)18(o,)-291(an)-282(err)18(or)-282(condition)-282(is)-282(raised.)-407(This)-282(pr)18(ocess)-282(continues)-282(on)-282(all)-282(the)]TJ 0 -11.955 Td [(levels)-203(of)-203(nested)-203(calls)-203(until)-203(the)-203(level)-203(wher)18(e)-202(the)-203(user)-203(decides)-203(to)-203(abort)-203(the)-203(pr)18(ogram)]TJ 0 -11.955 Td [(execution.)]TJ 14.944 -11.955 Td [(Figur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-286(5)]TJ +0 g 0 G + [-285(shows)-286(the)-286(layou)1(t)-286(of)-286(a)-285(generic)]TJ/F67 9.9626 Tf 172.064 0 Td [(psb_foo)]TJ/F62 9.9626 Tf 39.458 0 Td [(r)18(outine)-286(with)-285(r)18(espect)-286(to)-286(the)]TJ -226.466 -11.956 Td [(PSBLAS-2.0)-258(err)18(or)-259(handling)-258(policy)111(.)-335(It)-258(is)-258(possible)-259(to)-258(see)-258(how)92(,)-261(whenever)-258(an)-258(err)18(or)]TJ 0 -11.955 Td [(condition)-298(is)-298(detected,)-311(the)]TJ/F67 9.9626 Tf 114.879 0 Td [(info)]TJ/F62 9.9626 Tf 23.893 0 Td [(variable)-298(is)-299(set)-298(to)-298(the)-298(corr)18(esponding)-299(err)18(or)-298(code)]TJ -138.772 -11.955 Td [(which)-309(is,)-324(then,)-324(pushed)-310(on)-309(top)-309(of)-310(the)-309(stack)-309(by)-309(means)-310(of)-309(the)]TJ/F67 9.9626 Tf 265.277 0 Td [(psb_errpush)]TJ/F62 9.9626 Tf 57.534 0 Td [(.)-488(An)]TJ -322.811 -11.955 Td [(err)18(or)-325(condition)-326(may)-325(be)-326(dir)18(ectl)1(y)-326(detected)-325(inside)-326(a)-325(r)18(outine)-325(or)-326(indir)18(ectly)-325(check-)]TJ 0 -11.955 Td [(ing)-331(the)-331(err)18(or)-331(code)-331(r)18(eturned)-331(r)18(eturned)-331(by)-331(a)-331(called)-331(r)18(outine.)-553(Whenever)-331(an)-331(err)18(or)]TJ 0 -11.956 Td [(is)-253(encounter)18(ed,)-255(after)-253(it)-254(has)-253(been)-254(pushed)-253(on)-254(st)1(ack,)-255(the)-253(pr)18(ogram)-254(execution)-253(skips)]TJ 0 -11.955 Td [(to)-264(a)-265(point)-264(wher)18(e)-264(the)-265(err)18(or)-264(condition)-264(is)-264(handled;)-272(the)-264(err)18(or)-265(condition)-264(is)-264(handled)]TJ 0 -11.955 Td [(either)-336(by)-336(r)18(eturning)-336(contr)18(ol)-336(to)-336(the)-336(caller)-335(r)17(o)1(utine)-336(or)-336(by)-336(calling)-336(the)]TJ/F67 9.9626 Tf 291.408 0 Td [(psb\134_error)]TJ/F62 9.9626 Tf -291.408 -11.955 Td [(r)18(outine)-273(which)-274(prints)-273(the)-274(content)-273(of)-273(the)-274(err)18(or)-273(stack)-274(and)-273(aborts)-273(the)-274(pr)18(ogram)-273(ex-)]TJ 0 -11.955 Td [(ecution,)-373(accor)18(ding)-348(to)-348(the)-348(choice)-348(made)-348(by)-348(the)-348(user)-348(with)]TJ/F67 9.9626 Tf 252.305 0 Td [(psb_set_erraction)]TJ/F62 9.9626 Tf 88.915 0 Td [(.)]TJ -341.22 -11.955 Td [(The)-297(default)-296(is)-297(to)-296(print)-297(the)-297(err)18(or)-296(and)-297(terminate)-296(the)-297(pr)18(ogram,)-308(but)-297(the)-297(user)-296(may)]TJ 0 -11.956 Td [(choose)-250(to)-250(handle)-250(the)-250(err)18(or)-250(explicitly)111(.)]TJ 14.944 -11.955 Td [(Figur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-347(6)]TJ +0 g 0 G + [-348(r)18(eports)-347(a)-347(sample)-347(err)18(or)-348(message)-347(generated)-347(by)-348(the)-347(PSBLAS-2.0)-347(li-)]TJ -14.944 -11.955 Td [(brary)111(.)-539(This)-327(err)18(or)-326(has)-327(been)-326(generated)-327(by)-326(the)-326(fact)-327(that)-326(the)-327(user)-326(has)-327(chosen)-326(the)]TJ 0 -11.955 Td [(invalid)-379(\223FOO\224)-380(stor)1(a)-1(g)1(e)-380(format)-379(to)-379(r)18(epr)18(esent)-380(the)-379(sparse)-379(matrix.)-698(Fr)18(om)-380(this)-379(er)18(-)]TJ 0 -11.955 Td [(r)18(or)-394(message)-393(it)-394(is)-393(possible)-394(to)-394(se)1(e)-394(that)-394(the)-393(err)18(or)-394(has)-393(been)-394(detected)-394(inside)-393(the)]TJ/F67 9.9626 Tf 0 -11.955 Td [(psb_cest)]TJ/F62 9.9626 Tf 45.361 0 Td [(subr)18(outine)-353(called)-353(by)]TJ/F67 9.9626 Tf 95.326 0 Td [(psb_spasb)]TJ/F62 9.9626 Tf 50.591 0 Td [(...)-619(by)-354(pr)18(ocess)-353(0)-353(\050i.e.)-619(the)-353(r)18(oot)-354(pr)18(o-)]TJ -191.278 -11.956 Td [(cess\051.)]TJ +0 g 0 G + 164.384 -198.123 Td [(136)]TJ +0 g 0 G ET + +endstream +endobj +1876 0 obj +<< +/Length 10302 +>> +stream +0 g 0 G +0 g 0 G +0 g 0 G +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG q -1 0 0 1 333.945 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 99.895 417.212 cm +0 0 343.711 292.902 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F59 9.9626 Tf 337.084 577.576 Td [(type)]TJ +/F67 9.9626 Tf 102.884 698.757 Td [(subroutine)]TJ +0 g 0 G + [-525(psb_foo\050some)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(args,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 9.9626 Tf 15.691 -11.956 Td [(!...)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 0 -11.955 Td [(if)]TJ +0 g 0 G + [(\050error)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(detected\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +0 g 0 G + 15.691 -11.955 Td [(info)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(errcode1)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ +0 g 0 G + [-525(psb_errpush\050)]TJ +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG + [(\015psb_foo\015)]TJ +0 g 0 G + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(errcode1\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(goto)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(9999)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -15.691 -11.955 Td [(end)-525(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 9.9626 Tf 0 -11.956 Td [(!...)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 0 -11.955 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ + [-525(psb_bar\050some)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ + [-525(args,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(info\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(ovrel)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(if)]TJ 0 g 0 G -/F54 9.9626 Tf 28.234 0 Td [(The)-250(list)-250(of)-250(overlap)-250(elements)-250(on)-250(the)-250(calling)-250(pr)18(ocess,)-250(in)-250(local)-250(numbering.)]TJ -3.327 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-234(as:)-302(a)-234(rank)-234(one)-234(array)-235(with)-234(the)-234(ALLOCA)74(T)74(ABLE)-234(attribute,)-237(of)-234(type)]TJ 0 -11.955 Td [(integer)74(.)]TJ + [(\050info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -31.88 Td [(info)]TJ + [-525(.ne.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ + [-525(zero\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ 0 g 0 G - [-500(If)-287(ther)18(e)-287(ar)18(e)-287(no)-287(overlap)-287(elements)-287(the)-287(output)-287(vector)-287(is)-287(set)-287(to)-287(the)-287(\223not)-287(allo-)]TJ 12.453 -11.955 Td [(cated\224)-250(state.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG 0 g 0 G - -12.453 -19.926 Td [(2.)]TJ + 15.691 -11.955 Td [(info)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - [-500(Otherwise)-194(the)-194(size)-195(of)]TJ/F59 9.9626 Tf 105.434 0 Td [(ovrel)]TJ/F54 9.9626 Tf 28.087 0 Td [(will)-194(be)-194(exactly)-195(equal)-194(to)-194(the)-194(number)-195(of)-194(overlap)]TJ -121.068 -11.955 Td [(elements.)]TJ + [(errcode2)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 139.477 -220.139 Td [(105)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ 0 g 0 G -ET - -endstream -endobj -1592 0 obj -<< -/Length 5480 ->> -stream + [-525(psb_errpush\050)]TJ +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG + [(\015psb_foo\015)]TJ 0 g 0 G + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.25)-1000(psb)]TJ -ET -q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 207.803 706.129 Td [(sp)]TJ -ET -q -1 0 0 1 221.133 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 224.719 706.129 Td [(getrow)-250(\227)-250(Extract)-250(row\050s\051)-250(from)-250(a)-250(sparse)-250(matrix)]TJ + [-525(errcode2\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.956 Td [(goto)]TJ 0 g 0 G -/F59 9.9626 Tf -74.014 -19.204 Td [(call)-525(psb_sp_getrow\050row,)-525(a,)-525(nz,)-525(ia,)-525(ja,)-525(val,)-525(info,)-525(&)]TJ 73.225 -11.955 Td [(&)-525(append,)-525(nzin,)-525(lrw\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -73.225 -22.29 Td [(T)90(ype:)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(9999)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -20.42 Td [(On)-250(Entry)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -15.691 -11.955 Td [(end)-525(if)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -20.421 Td [(row)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 9.9626 Tf 0 -11.955 Td [(!...)]TJ 0 g 0 G -/F54 9.9626 Tf 22.695 0 Td [(The)-250(\050\002rst\051)-250(r)18(ow)-250(to)-250(be)-250(extracted.)]TJ 2.212 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf -28.343 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(required)]TJ/F54 9.9626 Tf -23.999 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)]TJ/F61 10.3811 Tf 104.322 0 Td [(>)]TJ/F54 9.9626 Tf 10.962 0 Td [(0.)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG +/F67 9.9626 Tf -15.691 -11.955 Td [(9999)]TJ 0 g 0 G -/F51 9.9626 Tf -140.191 -20.42 Td [(a)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(matrix)-250(fr)18(om)-250(which)-250(to)-250(get)-250(r)18(ows.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf -28.343 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(required)]TJ/F54 9.9626 Tf -23.999 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.137 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 495.976 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 495.777 Td [(Tspmat)]TJ -ET -q -1 0 0 1 395.216 495.976 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 398.354 495.777 Td [(type)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(continue)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 15.691 -11.955 Td [(if)]TJ 0 g 0 G -/F51 9.9626 Tf -268.57 -20.421 Td [(append)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 39.292 0 Td [(Whether)-250(to)-250(append)-250(or)-250(overwrite)-250(existing)-250(output.)]TJ -14.386 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf -28.344 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf -24 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(value)-250(default:)-310(false)-250(\050overwrite\051.)]TJ + [-525(\050err_act)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.906 -20.421 Td [(nzin)]TJ + [-525(.eq.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 25.454 0 Td [(Input)-250(size)-250(to)-250(be)-250(appended)-250(to.)]TJ -0.548 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf -28.344 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf -24 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-304(as:)-417(an)-303(integer)]TJ/F61 10.3811 Tf 106.988 0 Td [(>)]TJ/F54 9.9626 Tf 11.949 0 Td [(0.)-471(When)-303(append)-303(is)-304(tr)8(ue,)-317(speci\002es)-303(how)-304(many)]TJ -118.937 -11.955 Td [(entries)-250(in)-250(the)-250(output)-250(vectors)-250(ar)18(e)-250(alr)18(eady)-250(\002lled.)]TJ + [-525(act_abort\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -20.421 Td [(lrw)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 15.691 -11.955 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(The)-250(last)-250(r)18(ow)-250(to)-250(be)-250(extracted.)]TJ 4.433 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf -28.344 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf -24 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)]TJ/F61 10.3811 Tf 104.323 0 Td [(>)]TJ/F54 9.9626 Tf 10.962 0 Td [(0,)-250(default:)]TJ/F52 9.9626 Tf 46.878 0 Td [(r)-17(o)-35(w)]TJ/F54 9.9626 Tf 16.134 0 Td [(.)]TJ + [-525(psb_error\050icontxt\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -203.203 -22.29 Td [(On)-250(Return)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.956 Td [(return)]TJ 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -15.691 -11.955 Td [(else)]TJ 0 g 0 G - 0 -20.42 Td [(nz)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(return)]TJ 0 g 0 G -/F54 9.9626 Tf 16.05 0 Td [(the)-250(number)-250(of)-250(elements)-250(r)18(eturned)-250(by)-250(this)-250(call.)]TJ 8.856 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(Returned)-250(as:)-310(an)-250(integer)-250(scalar)74(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -15.691 -11.955 Td [(end)-525(if)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -20.421 Td [(ia)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -23.91 Td [(end)-525(subroutine)]TJ 0 g 0 G -/F54 9.9626 Tf 13.28 0 Td [(the)-250(r)18(ow)-250(indices.)]TJ 11.626 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(with)-250(the)]TJ/F59 9.9626 Tf 169.114 0 Td [(ALLOCATABLE)]TJ/F54 9.9626 Tf 60.025 0 Td [(attribute.)]TJ + [-525(psb_foo)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G - -89.662 -29.887 Td [(106)]TJ 0 g 0 G +/F62 9.9626 Tf -2.989 -41.729 Td [(Listing)-289(5:)-387(The)-289(layout)-289(of)-289(a)-289(generic)]TJ/F67 9.9626 Tf 149.96 0 Td [(psb)]TJ ET - -endstream -endobj -1596 0 obj -<< -/Length 3529 ->> -stream +q +1 0 0 1 266.174 382.258 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 269.312 382.059 Td [(foo)]TJ/F62 9.9626 Tf 18.568 0 Td [(r)18(outine)-289(with)-289(r)18(espect)-288(to)-289(PSBLAS-2.0)]TJ -187.985 -11.955 Td [(err)18(or)-250(handling)-250(policy)111(.)]TJ 0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG +/F67 9.9626 Tf 0 -19.609 Td [(==========================================================)]TJ 0 g 0 G + 0 -11.955 Td [(Process:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F51 9.9626 Tf 99.895 706.129 Td [(ja)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(0)]TJ 0 g 0 G -/F54 9.9626 Tf 13.281 0 Td [(the)-250(column)-250(indices)-250(of)-250(the)-250(elements)-250(to)-250(be)-250(inserted.)]TJ 11.626 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(with)-250(the)]TJ/F59 9.9626 Tf 169.114 0 Td [(ALLOCATABLE)]TJ/F54 9.9626 Tf 60.024 0 Td [(attribute.)]TJ + [(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -254.045 -19.925 Td [(val)]TJ + [-1050(PSBLAS)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 18.82 0 Td [(the)-250(elements)-250(to)-250(be)-250(inserted.)]TJ 6.087 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(r)18(eal)-250(array)-250(with)-250(the)]TJ/F59 9.9626 Tf 148.761 0 Td [(ALLOCATABLE)]TJ/F54 9.9626 Tf 60.024 0 Td [(attribute.)]TJ + [-525(Error)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -233.692 -19.925 Td [(info)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(\050)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(4010)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(\051)]TJ 0 g 0 G - [-500(The)-307(output)]TJ/F52 9.9626 Tf 65.308 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 13.394 0 Td [(is)-307(always)-307(the)-307(size)-307(of)-307(the)-307(output)-307(generated)-307(by)-306(the)-307(curr)18(ent)]TJ -66.249 -11.955 Td [(call;)-283(thus,)-277(if)]TJ/F59 9.9626 Tf 53.971 0 Td [(append=.true.)]TJ/F54 9.9626 Tf 67.994 0 Td [(,)-278(the)-271(total)-272(output)-272(size)-272(will)-272(be)]TJ/F52 9.9626 Tf 129.372 0 Td [(n)-25(z)-18(i)-32(n)]TJ/F85 10.3811 Tf 21.286 0 Td [(+)]TJ/F52 9.9626 Tf 10.336 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.337 0 Td [(,)-277(with)]TJ -293.296 -11.955 Td [(the)-292(newly)-293(extracted)-292(coef)18(\002cients)-293(stor)18(ed)-292(in)-293(entries)]TJ/F59 9.9626 Tf 217.177 0 Td [(nzin+1:nzin+nz)]TJ/F54 9.9626 Tf 76.139 0 Td [(of)-292(the)]TJ -293.316 -11.955 Td [(array)-250(ar)18(guments;)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -12.453 -19.926 Td [(2.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(in)]TJ 0 g 0 G - [-500(When)]TJ/F59 9.9626 Tf 41.275 0 Td [(append=.true.)]TJ/F54 9.9626 Tf 70.485 0 Td [(the)-250(output)-250(arrays)-250(ar)18(e)-250(r)18(eallocated)-250(as)-250(necessary;)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -111.76 -19.925 Td [(3.)]TJ + [-525(subroutine:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(The)-218(r)18(ow)-218(and)-219(column)-218(indices)-218(ar)18(e)-218(r)18(eturned)-218(in)-219(the)-218(local)-218(numbering)-218(scheme;)]TJ 12.453 -11.955 Td [(if)-190(the)-190(global)-190(numbering)-190(is)-190(desir)18(ed,)-202(the)-190(user)-190(may)-190(employ)-190(the)]TJ/F59 9.9626 Tf 258.836 0 Td [(psb_loc_to_glob)]TJ/F54 9.9626 Tf -258.836 -11.955 Td [(r)18(outine)-250(on)-250(the)-250(output.)]TJ + [-525(df_sample)]TJ 0 -11.955 Td [(Error)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 139.477 -290.909 Td [(107)]TJ + [-525(from)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1606 0 obj -<< -/Length 3995 ->> -stream + [-525(call)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(to)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.26)-1000(psb)]TJ -ET -q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 207.803 706.129 Td [(sizeof)-250(\227)-250(Memory)-250(occupation)]TJ/F54 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(memory)-250(occupation)-250(of)-250(a)-250(PSBLAS)-250(object.)]TJ + [-525(subroutine)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(mat)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf 0 -21.918 Td [(isz)-525(=)-525(psb_sizeof\050a\051)]TJ 0 -11.955 Td [(isz)-525(=)-525(psb_sizeof\050desc_a\051)]TJ 0 -11.955 Td [(isz)-525(=)-525(psb_sizeof\050prec\051)]TJ +0.73 0.38 0.84 rg 0.73 0.38 0.84 RG + [-525(dist)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + 0 -11.955 Td [(==========================================================)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + 0 -11.955 Td [(Process:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(0)]TJ 0 g 0 G + [(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(a)]TJ + [-1050(PSBLAS)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(A)-250(sparse)-250(matrix)]TJ/F52 9.9626 Tf 72.97 0 Td [(A)]TJ/F54 9.9626 Tf 7.318 0 Td [(.)]TJ -65.344 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.137 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 531.947 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 531.748 Td [(Tspmat)]TJ -ET -q -1 0 0 1 395.216 531.947 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 398.354 531.748 Td [(type)]TJ + [-525(Error)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(\050)]TJ 0 g 0 G -/F51 9.9626 Tf -268.57 -19.925 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 512.022 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 511.823 Td [(a)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(4010)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(Communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 464.201 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 464.002 Td [(desc)]TJ -ET -q -1 0 0 1 384.755 464.201 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 387.893 464.002 Td [(type)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(\051)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -258.11 -19.925 Td [(prec)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(in)]TJ 0 g 0 G -/F54 9.9626 Tf 24.348 0 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -30.874 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 196.511 0 Td [(psb)]TJ -ET -q -1 0 0 1 388.441 408.41 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 391.579 408.211 Td [(prec)]TJ -ET -q -1 0 0 1 413.128 408.41 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 416.266 408.211 Td [(type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ + [-525(subroutine:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -286.483 -19.925 Td [(On)-250(Return)]TJ + [-525(mat_distv)]TJ 0 -11.955 Td [(Error)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(from)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(Function)-250(value)]TJ + [-525(call)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 72.776 0 Td [(The)-322(memory)-322(occupation)-322(of)-323(the)-322(object)-322(speci\002ed)-322(in)-322(the)-322(calling)]TJ -47.87 -11.956 Td [(sequence,)-250(in)-250(bytes.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(Returned)-250(as:)-310(an)]TJ/F59 9.9626 Tf 71.82 0 Td [(integer\050psb_long_int_k_\051)]TJ/F54 9.9626 Tf 128.019 0 Td [(number)74(.)]TJ + [-525(to)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -60.362 -242.057 Td [(108)]TJ + [-525(subroutine)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1610 0 obj -<< -/Length 5626 ->> -stream +0.73 0.38 0.84 rg 0.73 0.38 0.84 RG + [-525(psb_spasb)]TJ 0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + 0 -11.956 Td [(==========================================================)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.27)-1000(Sorting)-250(utilities)-250(\227)]TJ 0 -20.164 Td [(psb)]TJ -ET -q -1 0 0 1 120.53 686.164 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 124.116 685.965 Td [(msort)-250(\227)-250(Sorting)-250(by)-250(the)-250(Merge-sort)-250(algorithm)]TJ -24.221 -12.574 Td [(psb)]TJ -ET -q -1 0 0 1 120.53 673.59 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 124.116 673.391 Td [(qsort)-250(\227)-250(Sorting)-250(by)-250(the)-250(Quicksort)-250(algorithm)]TJ -24.221 -12.575 Td [(psb)]TJ -ET -q -1 0 0 1 120.53 661.016 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 124.116 660.816 Td [(hsort)-250(\227)-250(Sorting)-250(by)-250(the)-250(Heapsort)-250(algorithm)]TJ + 0 -11.955 Td [(Process:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(0)]TJ 0 g 0 G + [(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf -24.221 -22.402 Td [(call)-525(psb_msort\050x,ix,dir,flag\051)]TJ 0 -11.955 Td [(call)-525(psb_qsort\050x,ix,dir,flag\051)]TJ 0 -11.955 Td [(call)-525(psb_hsort\050x,ix,dir,flag\051)]TJ/F54 9.9626 Tf 14.944 -21.783 Td [(These)-236(serial)-235(r)18(outines)-236(sort)-236(a)-235(sequence)]TJ/F52 9.9626 Tf 162.066 0 Td [(X)]TJ/F54 9.9626 Tf 9.884 0 Td [(into)-236(ascending)-235(or)-236(descending)-236(or)18(der)74(.)]TJ -186.894 -11.955 Td [(The)-243(ar)18(gument)-243(meaning)-243(is)-242(identical)-243(for)-243(the)-243(thr)18(ee)-243(calls;)-245(the)-243(only)-243(dif)18(fer)18(ence)-242(is)-243(the)]TJ 0 -11.955 Td [(algorithm)-250(used)-250(to)-250(accomplish)-250(the)-250(task)-250(\050see)-250(Usage)-250(Notes)-250(below\051.)]TJ + [-1050(PSBLAS)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -21.783 Td [(T)90(ype:)]TJ + [-525(Error)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(\050)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -22.402 Td [(On)-250(Entry)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(4010)]TJ 0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(\051)]TJ 0 g 0 G - 0 -22.402 Td [(x)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(sequence)-250(to)-250(be)-250(sorted.)]TJ 14.944 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(,)-250(r)18(eal)-250(or)-250(complex)-250(array)-250(of)-250(rank)-250(1.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(in)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.402 Td [(ix)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 13.281 0 Td [(A)-250(vector)-250(of)-250(indices.)]TJ 11.626 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(\050at)-250(least\051)-250(the)-250(same)-250(size)-250(as)]TJ/F52 9.9626 Tf 254.189 0 Td [(X)]TJ/F54 9.9626 Tf 7.537 0 Td [(.)]TJ + [-525(subroutine:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -286.633 -22.402 Td [(dir)]TJ + [-525(psb_spasb)]TJ 0 -11.955 Td [(Error)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 18.262 0 Td [(The)-250(desir)18(ed)-250(or)18(dering.)]TJ 6.645 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value:)]TJ + [-525(from)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -22.402 Td [(Integer)-250(and)-250(real)-250(data:)]TJ + [-525(call)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf 101.28 0 Td [(psb_sort_up_)]TJ/F54 9.9626 Tf 62.764 0 Td [(,)]TJ/F59 9.9626 Tf 5.525 0 Td [(psb_sort_down_)]TJ/F54 9.9626 Tf 73.225 0 Td [(,)]TJ/F59 9.9626 Tf 5.525 0 Td [(psb_asort_up_)]TJ/F54 9.9626 Tf 67.995 0 Td [(,)]TJ/F59 9.9626 Tf -294.396 -11.955 Td [(psb_asort_down_)]TJ/F54 9.9626 Tf 78.455 0 Td [(;)-250(default)]TJ/F59 9.9626 Tf 38.784 0 Td [(psb_sort_up_)]TJ/F54 9.9626 Tf 62.764 0 Td [(.)]TJ + [-525(to)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -201.921 -17.178 Td [(Complex)-250(data:)]TJ + [-525(subroutine)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf 70.286 0 Td [(psb_lsort_up_)]TJ/F54 9.9626 Tf 67.995 0 Td [(,)]TJ/F59 9.9626 Tf 4.503 0 Td [(psb_lsort_down_)]TJ/F54 9.9626 Tf 78.455 0 Td [(,)]TJ/F59 9.9626 Tf 4.503 0 Td [(psb_asort_up_)]TJ/F54 9.9626 Tf 67.994 0 Td [(,)]TJ/F59 9.9626 Tf 4.504 0 Td [(psb_asort_down_)]TJ/F54 9.9626 Tf 78.455 0 Td [(;)]TJ -354.777 -11.956 Td [(default)]TJ/F59 9.9626 Tf 33.803 0 Td [(psb_lsort_up_)]TJ/F54 9.9626 Tf 67.994 0 Td [(.)]TJ +0.73 0.38 0.84 rg 0.73 0.38 0.84 RG + [-525(psb_cest)]TJ 0 g 0 G -/F51 9.9626 Tf -148.622 -22.402 Td [(\003ag)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + 0 -11.955 Td [(==========================================================)]TJ 0 g 0 G -/F54 9.9626 Tf 21.589 0 Td [(Whether)-250(to)-250(keep)-250(the)-250(original)-250(values)-250(in)]TJ/F52 9.9626 Tf 171.52 0 Td [(I)-81(X)]TJ/F54 9.9626 Tf 11.661 0 Td [(.)]TJ -179.863 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-190(as:)-280(an)-190(integer)-190(value)]TJ/F59 9.9626 Tf 128.287 0 Td [(psb_sort_ovw_idx_)]TJ/F54 9.9626 Tf 90.809 0 Td [(or)]TJ/F59 9.9626 Tf 11.268 0 Td [(psb_sort_keep_idx_)]TJ/F54 9.9626 Tf 94.146 0 Td [(;)]TJ -324.51 -11.955 Td [(default)]TJ/F59 9.9626 Tf 33.803 0 Td [(psb_sort_ovw_idx_)]TJ/F54 9.9626 Tf 88.916 0 Td [(.)]TJ + 0 -11.955 Td [(Process:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -147.626 -24.395 Td [(On)-250(Return)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(0)]TJ 0 g 0 G + [(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -22.402 Td [(x)]TJ + [-1050(PSBLAS)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(sequence)-250(of)-250(values,)-250(in)-250(the)-250(chosen)-250(or)18(dering.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(,)-250(r)18(eal)-250(or)-250(complex)-250(array)-250(of)-250(rank)-250(1.)]TJ + [-525(Error)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -22.402 Td [(ix)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(\050)]TJ 0 g 0 G -/F54 9.9626 Tf 13.281 0 Td [(A)-250(vector)-250(of)-250(indices.)]TJ 11.626 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(An)-238(integer)-237(array)-238(of)-237(rank)-238(1,)-240(whose)-238(entries)-237(ar)18(e)-238(moved)-237(to)-238(the)-238(same)-237(position)]TJ 0 -11.955 Td [(as)-250(the)-250(corr)18(esponding)-250(entries)-250(in)]TJ/F52 9.9626 Tf 138.215 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(136)]TJ 0 g 0 G - -3.943 -44.517 Td [(109)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(\051)]TJ 0 g 0 G -ET - -endstream -endobj -1615 0 obj -<< -/Length 7300 ->> -stream +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(in)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(Notes)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ + [-525(subroutine:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(For)-370(integer)-370(or)-370(r)18(eal)-370(data)-370(the)-370(sorting)-370(can)-370(be)-370(performed)-370(in)-370(the)-370(up/down)]TJ 12.453 -11.956 Td [(dir)18(ection,)-250(on)-250(the)-250(natural)-250(or)-250(absolute)-250(values;)]TJ + [-525(psb_cest)]TJ 0 -11.955 Td [(Format)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -12.453 -19.925 Td [(2.)]TJ + [-525(FOO)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(For)-329(complex)-330(data)-329(the)-330(sorting)-329(can)-330(be)-329(done)-329(in)-330(a)-329(lexicographic)-330(or)18(der)-329(\050i.e.:)]TJ 12.453 -11.955 Td [(sort)-263(on)-263(the)-263(r)18(eal)-263(part)-263(with)-263(ties)-263(br)18(oken)-263(accor)18(ding)-263(to)-263(the)-263(imaginary)-263(part\051)-263(or)]TJ 0 -11.955 Td [(on)-250(the)-250(absolute)-250(values;)]TJ + [-525(is)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -12.453 -19.925 Td [(3.)]TJ +0.73 0.38 0.84 rg 0.73 0.38 0.84 RG + [-525(unknown)]TJ 0 g 0 G - [-500(The)-325(r)18(outines)-325(r)18(eturn)-325(the)-325(items)-324(in)-325(the)-325(chosen)-325(or)18(dering;)-362(the)-325(output)-325(dif)18(fer)18(-)]TJ 12.453 -11.956 Td [(ence)-244(is)-245(the)-244(handling)-244(of)-244(ties)-244(\050i.e.)-309(items)-244(with)-244(an)-244(equal)-245(v)1(alue\051)-245(in)-244(the)-244(original)]TJ 0 -11.955 Td [(input.)-316(W)55(ith)-252(the)-252(mer)18(ge-sort)-252(algorithm)-252(ties)-252(ar)18(e)-252(pr)18(eserved)-252(in)-252(the)-252(same)-252(r)18(ela-)]TJ 0 -11.955 Td [(tive)-278(or)18(der)-278(as)-278(they)-278(had)-278(in)-278(the)-278(original)-278(sequence,)-285(while)-278(this)-278(is)-278(not)-278(guaran-)]TJ 0 -11.955 Td [(teed)-250(for)-250(quicksort)-250(or)-250(heapsort;)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + 0 -11.956 Td [(==========================================================)]TJ 0 g 0 G - -12.453 -19.925 Td [(4.)]TJ + 0 -11.955 Td [(Aborting...)]TJ 0 g 0 G - [-500(If)]TJ/F52 9.9626 Tf 22.66 0 Td [(f)-160(l)-70(a)-47(g)]TJ/F85 10.3811 Tf 20.72 0 Td [(=)]TJ/F52 9.9626 Tf 11.634 0 Td [(p)-25(s)-25(b)]TJ +/F62 9.9626 Tf 0 -29.397 Td [(Listing)-364(6:)-537(A)-364(sample)-364(PSBLAS-3.0)-363(err)18(or)-364(message.)-651(Pr)18(ocess)-364(0)-364(detected)-363(an)-364(err)18(or)]TJ 0 -11.955 Td [(condition)-250(inside)-250(the)-250(psb)]TJ ET q -1 0 0 1 232.862 542.941 cm +1 0 0 1 206.215 153.925 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F52 9.9626 Tf 235.975 542.742 Td [(s)-25(o)-25(r)-35(t)]TJ +/F62 9.9626 Tf 209.204 153.726 Td [(cest)-250(subr)18(outine)]TJ +0 g 0 G + 55.075 -63.288 Td [(137)]TJ +0 g 0 G ET -q -1 0 0 1 253.036 542.941 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + +endstream +endobj +1880 0 obj +<< +/Length 3570 +>> +stream +0 g 0 G +0 g 0 G BT -/F52 9.9626 Tf 256.149 542.742 Td [(o)-35(v)-25(w)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(8.1)-1000(psb)]TJ ET q -1 0 0 1 274.067 542.941 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F52 9.9626 Tf 277.11 542.742 Td [(i)-32(d)-42(x)]TJ +/F59 11.9552 Tf 201.825 706.129 Td [(errpush)-250(\227)-250(Pushes)-250(an)-250(error)-250(code)-250(onto)-250(the)-250(error)-250(stack)]TJ/F62 9.9626 Tf -49.379 -24.942 Td [(c)-175(a)-175(l)-174(l)-874(p)-98(s)-99(b)]TJ ET q -1 0 0 1 291.402 542.941 cm +1 0 0 1 200.841 681.387 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 296.503 542.742 Td [(then)-212(the)-212(entries)-212(in)]TJ/F52 9.9626 Tf 80.283 0 Td [(i)-49(x)]TJ/F85 10.3811 Tf 8.588 0 Td [(\050)]TJ/F54 9.9626 Tf 4.149 0 Td [(1)-290(:)]TJ/F52 9.9626 Tf 13.381 0 Td [(n)]TJ/F85 10.3811 Tf 5.788 0 Td [(\051)]TJ/F54 9.9626 Tf 6.262 0 Td [(wher)18(e)]TJ/F52 9.9626 Tf 29.644 0 Td [(n)]TJ/F54 9.9626 Tf 7.776 0 Td [(is)-212(the)-212(size)]TJ -276.762 -11.956 Td [(of)]TJ/F52 9.9626 Tf 12.255 0 Td [(x)]TJ/F54 9.9626 Tf 8.411 0 Td [(ar)18(e)-322(initialized)-321(to)]TJ/F52 9.9626 Tf 76.228 0 Td [(i)-49(x)]TJ/F85 10.3811 Tf 8.588 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F83 10.3811 Tf 8.364 0 Td [(\040)]TJ/F52 9.9626 Tf 14.651 0 Td [(i)]TJ/F54 9.9626 Tf 2.963 0 Td [(;)-358(thus,)-339(upon)-322(r)18(eturn)-321(fr)18(om)-322(the)-322(subr)18(outine,)]TJ -138.753 -11.955 Td [(for)-270(each)-271(index)]TJ/F52 9.9626 Tf 65.501 0 Td [(i)]TJ/F54 9.9626 Tf 5.657 0 Td [(we)-270(have)-271(in)]TJ/F52 9.9626 Tf 51.095 0 Td [(i)-49(x)]TJ/F85 10.3811 Tf 8.587 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 6.843 0 Td [(the)-270(position)-271(that)-270(the)-270(item)]TJ/F52 9.9626 Tf 114.324 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 6.844 0 Td [(occupied)]TJ -278.766 -11.955 Td [(in)-250(the)-250(original)-250(data)-250(sequence;)]TJ -0 g 0 G - -12.453 -19.925 Td [(5.)]TJ -0 g 0 G - [-500(If)]TJ/F52 9.9626 Tf 24.08 0 Td [(f)-160(l)-70(a)-47(g)]TJ/F85 10.3811 Tf 22.648 0 Td [(=)]TJ/F52 9.9626 Tf 13.563 0 Td [(p)-25(s)-25(b)]TJ +/F62 9.9626 Tf 204.812 681.187 Td [(e)-99(r)-98(r)-99(p)-98(u)-99(s)-99(h)-232(\050)-266(e)-132(r)-132(r)]TJ ET q -1 0 0 1 238.138 487.15 cm +1 0 0 1 270.843 681.387 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F52 9.9626 Tf 241.252 486.951 Td [(s)-25(o)-25(r)-35(t)]TJ +/F62 9.9626 Tf 275.151 681.187 Td [(c)-440(,)-825(r)]TJ ET q -1 0 0 1 258.312 487.15 cm +1 0 0 1 299.7 681.387 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F52 9.9626 Tf 261.426 486.951 Td [(k)-30(e)-25(e)-80(p)]TJ +/F62 9.9626 Tf 303.188 681.187 Td [(n)-50(a)-50(m)-50(e)-276(,)-929(i)]TJ ET q -1 0 0 1 280.648 487.15 cm +1 0 0 1 348.561 681.387 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F52 9.9626 Tf 283.692 486.951 Td [(i)-32(d)-42(x)]TJ +/F62 9.9626 Tf 353.087 681.187 Td [(e)-154(r)-155(r)-483(,)-920(a)]TJ ET q -1 0 0 1 297.983 487.15 cm +1 0 0 1 392.305 681.387 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 304.504 486.951 Td [(the)-355(r)18(ou)1(tine)-355(will)-354(assume)-355(that)-355(th)1(e)-355(entries)-355(i)1(n)]TJ/F52 9.9626 Tf -128.838 -11.955 Td [(i)-49(x)]TJ/F85 10.3811 Tf 8.588 0 Td [(\050)]TJ/F54 9.9626 Tf 4.274 0 Td [(:)]TJ/F85 10.3811 Tf 2.74 0 Td [(\051)]TJ/F54 9.9626 Tf 6.64 0 Td [(have)-250(alr)18(eady)-250(been)-250(initialized)-250(by)-250(the)-250(user;)]TJ -0 g 0 G - -34.75 -19.926 Td [(6.)]TJ -0 g 0 G - [-500(The)-270(thr)18(ee)-269(sorting)-270(algorithms)-269(have)-270(a)-269(similar)]TJ/F52 9.9626 Tf 205.79 0 Td [(O)]TJ/F85 10.3811 Tf 8 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(n)]TJ/F54 9.9626 Tf 7.324 0 Td [(log)]TJ/F52 9.9626 Tf 15.663 0 Td [(n)]TJ/F85 10.3811 Tf 5.788 0 Td [(\051)]TJ/F54 9.9626 Tf 6.835 0 Td [(expected)-270(r)8(unning)]TJ -241.221 -11.955 Td [(time;)-278(in)-268(the)-268(average)-269(case)-268(quicksort)-268(will)-269(be)-268(the)-268(fastest)-269(and)-268(mer)18(ge-sort)-268(the)]TJ 0 -11.955 Td [(slowest.)-310(However)-250(note)-250(that:)]TJ +/F62 9.9626 Tf 396.74 681.187 Td [(e)-145(r)-145(r)-279(\051)]TJ 0 g 0 G - 5.321 -19.925 Td [(\050a\051)]TJ -0 g 0 G - [-500(The)-336(worst)-336(case)-336(r)8(unning)-336(time)-336(for)-337(quicksort)-336(is)]TJ/F52 9.9626 Tf 220.017 0 Td [(O)]TJ/F85 10.3811 Tf 8 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(n)]TJ/F54 7.5716 Tf 5.664 3.616 Td [(2)]TJ/F85 10.3811 Tf 4.408 -3.616 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(;)-379(the)-336(algorithm)]TJ -229.916 -11.955 Td [(implemented)-293(her)18(e)-293(follows)-293(the)-292(well-known)-293(median-of-thr)18(ee)-293(heuris-)]TJ 0 -11.956 Td [(tics,)-250(but)-250(the)-250(worst)-250(case)-250(may)-250(still)-250(apply;)]TJ 0 g 0 G - -17.125 -15.94 Td [(\050b\051)]TJ 0 g 0 G - [-500(The)-190(worst)-190(case)-190(r)8(unning)-190(time)-190(for)-190(mer)18(ge-sort)-190(and)-190(heap-sort)-190(is)]TJ/F52 9.9626 Tf 277.76 0 Td [(O)]TJ/F85 10.3811 Tf 8 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(n)]TJ/F54 9.9626 Tf 7.324 0 Td [(log)]TJ/F52 9.9626 Tf 15.663 0 Td [(n)]TJ/F85 10.3811 Tf 5.788 0 Td [(\051)]TJ/F54 9.9626 Tf -301.684 -11.955 Td [(as)-250(the)-250(average)-250(case;)]TJ +/F59 9.9626 Tf -246.035 -27.895 Td [(T)90(ype:)]TJ 0 g 0 G - -16.04 -15.94 Td [(\050c\051)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - [-500(The)-244(mer)18(ge-sort)-244(algorithm)-243(is)-244(implemented)-244(to)-244(take)-244(advantage)-243(of)-244(sub-)]TJ 16.04 -11.955 Td [(sequences)-314(that)-313(may)-314(be)-314(alr)18(eady)-314(in)-313(the)-314(desir)18(ed)-314(or)18(dering)-314(prior)-313(to)-314(the)]TJ 0 -11.956 Td [(subr)18(outine)-390(call;)-459(this)-390(situation)-389(is)-390(r)18(elatively)-390(common)-389(when)-390(dealing)]TJ 0 -11.955 Td [(with)-335(gr)18(oups)-335(of)-335(indices)-335(of)-335(sparse)-336(matrix)-335(entries,)-356(thus)-335(mer)18(ge-sort)-335(is)]TJ 0 -11.955 Td [(the)-319(pr)18(eferr)18(ed)-318(choice)-319(when)-319(a)-318(sorting)-319(is)-319(needed)-318(by)-319(other)-319(r)18(outi)1(nes)-319(in)]TJ 0 -11.955 Td [(the)-250(library)111(.)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - 117.559 -193.275 Td [(110)]TJ 0 g 0 G + 0 -19.925 Td [(err)]TJ ET - -endstream -endobj -1628 0 obj -<< -/Length 171 ->> -stream -0 g 0 G -0 g 0 G +q +1 0 0 1 164.035 613.641 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q BT -/F51 14.3462 Tf 99.895 705.784 Td [(7)-1000(Parallel)-250(environment)-250(routines)]TJ +/F59 9.9626 Tf 167.023 613.442 Td [(c)]TJ 0 g 0 G -/F54 9.9626 Tf 164.384 -615.346 Td [(111)]TJ +/F62 9.9626 Tf 9.405 0 Td [(the)-250(err)18(or)-250(code)]TJ -0.817 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(.)]TJ 0 g 0 G -ET - -endstream -endobj -1632 0 obj -<< -/Length 5510 ->> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.1)-1000(psb)]TJ +/F59 9.9626 Tf -24.906 -19.925 Td [(r)]TJ ET q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 155.178 545.895 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(init)-250(\227)-250(Initializes)-250(PSBLAS)-250(parallel)-250(environment)]TJ -0 g 0 G +/F59 9.9626 Tf 158.167 545.696 Td [(name)]TJ 0 g 0 G -/F59 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_init\050ctxt,)-525(np,)-525(basectxt,)-525(ids\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-214(subr)18(outine)-215(initializes)-214(the)-215(PSBLAS)-214(parallel)-215(envir)18(onment,)-221(de\002ning)-215(a)-214(vir)18(-)]TJ -14.944 -11.955 Td [(tual)-250(parallel)-250(machine.)]TJ +/F62 9.9626 Tf 29.888 0 Td [(the)-250(soutine)-250(wher)18(e)-250(the)-250(err)18(or)-250(has)-250(been)-250(caught.)]TJ -12.444 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(string.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.926 Td [(np)]TJ -0 g 0 G -/F54 9.9626 Tf 17.156 0 Td [(Number)-250(of)-250(pr)18(ocesses)-250(in)-250(the)-250(PSBLAS)-250(virtual)-250(parallel)-250(machine.)]TJ 7.751 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-560(Default:)-310(use)-250(all)-250(available)-250(pr)18(ocesses.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.926 Td [(basectxt)]TJ -0 g 0 G -/F54 9.9626 Tf 41.494 0 Td [(the)-321(initial)-321(communication)-321(context.)-524(The)-321(new)-321(context)-321(will)-321(be)-321(de\002ned)]TJ -16.587 -11.955 Td [(fr)18(om)-250(the)-250(pr)18(ocesses)-250(participating)-250(in)-250(the)-250(initial)-250(one.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-560(Default:)-310(use)-250(MPI)]TJ +/F59 9.9626 Tf -24.906 -31.881 Td [(i)]TJ ET q -1 0 0 1 387.574 466.194 cm +1 0 0 1 154.62 466.194 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 390.563 465.994 Td [(COMM)]TJ +/F59 9.9626 Tf 157.609 465.994 Td [(err)]TJ +0 g 0 G +/F62 9.9626 Tf 17.713 0 Td [(addional)-250(info)-250(for)-250(err)18(or)-250(code)]TJ 0.289 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -31.881 Td [(a)]TJ ET q -1 0 0 1 424.904 466.194 cm +1 0 0 1 156.284 398.448 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 427.893 465.994 Td [(WORLD.)]TJ -0 g 0 G -/F51 9.9626 Tf -277.188 -19.925 Td [(ids)]TJ -0 g 0 G -/F54 9.9626 Tf 18.809 0 Td [(Identities)-306(of)-307(the)-306(pr)18(ocesses)-307(to)-306(use)-306(for)-307(the)-306(new)-306(context;)-335(the)-306(ar)18(gument)-307(is)-306(ig-)]TJ 6.097 -11.955 Td [(nor)18(ed)-388(when)]TJ/F59 9.9626 Tf 58.258 0 Td [(np)]TJ/F54 9.9626 Tf 14.324 0 Td [(is)-388(not)-388(speci\002ed.)-723(This)-388(allows)-388(the)-387(pr)18(ocesses)-388(in)-388(the)-388(new)]TJ -72.582 -11.955 Td [(envir)18(onment)-250(to)-250(be)-250(in)-250(an)-250(or)18(der)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(original)-250(one.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)-560(Default:)-310(use)-250(the)-250(indices)]TJ/F85 10.3811 Tf 240.57 0 Td [(\050)]TJ/F54 9.9626 Tf 4.15 0 Td [(0)-179(.)-192(.)-191(.)]TJ/F52 9.9626 Tf 19.966 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1)]TJ/F85 10.3811 Tf 5.106 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(.)]TJ +/F59 9.9626 Tf 159.273 398.249 Td [(err)]TJ 0 g 0 G -/F51 9.9626 Tf -322.483 -21.918 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 17.713 0 Td [(addional)-250(info)-250(for)-250(err)18(or)-250(code)]TJ -1.375 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(string.)]TJ 0 g 0 G -0 g 0 G - 0 -19.925 Td [(ctxt)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-285(communication)-284(context)-285(identifying)-285(the)-284(virtual)-285(parallel)-285(machine,)-293(type)]TJ/F59 9.9626 Tf 3.885 -11.955 Td [(psb_ctxt_type)]TJ/F54 9.9626 Tf 67.995 0 Td [(.)-327(Note)-256(that)-256(this)-256(is)-256(always)-256(a)-255(duplicate)-256(of)]TJ/F59 9.9626 Tf 174.426 0 Td [(basectxt)]TJ/F54 9.9626 Tf 41.842 0 Td [(,)-257(so)-256(that)]TJ -284.263 -11.955 Td [(library)-296(communications)-297(ar)18(e)-296(completely)-297(separated)-296(fr)18(om)-297(other)-296(communi-)]TJ 0 -11.955 Td [(cation)-250(operations.)]TJ 0 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -66.381 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ/F51 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(A)-250(call)-250(to)-250(this)-250(r)18(outine)-250(must)-250(pr)18(ecede)-250(any)-250(other)-250(PSBLAS)-250(call.)]TJ -0 g 0 G - 0 -19.926 Td [(2.)]TJ -0 g 0 G - [-500(It)-194(is)-195(an)-194(err)18(or)-194(to)-194(specify)-195(a)-194(value)-194(for)]TJ/F52 9.9626 Tf 158.156 0 Td [(n)-80(p)]TJ/F54 9.9626 Tf 13.378 0 Td [(gr)18(eater)-194(than)-195(the)-194(number)-194(of)-194(pr)18(ocesses)]TJ -159.081 -11.955 Td [(available)-250(in)-250(the)-250(underlying)-250(base)-250(parallel)-250(envir)18(onment.)]TJ -0 g 0 G - 139.477 -84.647 Td [(112)]TJ + 139.477 -271.945 Td [(138)]TJ 0 g 0 G ET endstream endobj -1638 0 obj +1886 0 obj << -/Length 4457 +/Length 1332 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(7.2)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(8.2)-1000(psb)]TJ ET q 1 0 0 1 147.429 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(info)-211(\227)-211(Return)-211(information)-210(about)-211(PSBLAS)-211(parallel)-211(en-)]TJ -24.221 -13.948 Td [(vironment)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -26.9 -18.964 Td [(call)-525(psb_info\050ctxt,)-525(iam,)-525(np\051)]TJ/F54 9.9626 Tf 14.944 -21.917 Td [(This)-397(subr)18(outine)-396(r)18(eturns)-397(information)-397(about)-396(the)-397(PSBLAS)-397(parallel)-396(envir)18(on-)]TJ -14.944 -11.956 Td [(ment,)-250(de\002ning)-250(a)-250(virtual)-250(parallel)-250(machine.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(ctxt)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -21.918 Td [(On)-250(Return)]TJ -0 g 0 G +/F59 11.9552 Tf 151.016 706.129 Td [(error)-306(\227)-306(Prints)-306(the)-306(error)-307(s)1(tack)-307(content)-306(and)-306(aborts)-306(exe-)]TJ -24.221 -13.948 Td [(cution)]TJ/F62 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-900(p)-126(s)-125(b)]TJ +ET +q +1 0 0 1 151.092 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 155.328 667.24 Td [(e)-125(r)-125(r)-126(o)-125(r)-259(\050)-279(i)-146(c)-146(o)-147(n)-146(t)-146(x)-146(t)-280(\051)]TJ 0 g 0 G - 0 -19.925 Td [(iam)]TJ 0 g 0 G -/F54 9.9626 Tf 22.137 0 Td [(Identi\002er)-250(of)-250(curr)18(ent)-250(pr)18(ocess)-250(in)-250(the)-250(PSBLAS)-250(virtual)-250(parallel)-250(machine.)]TJ 2.77 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ/F83 10.3811 Tf 134.19 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)]TJ/F83 10.3811 Tf 7.873 0 Td [(\024)]TJ/F52 9.9626 Tf 11.017 0 Td [(i)-47(a)-25(m)]TJ/F83 10.3811 Tf 18.677 0 Td [(\024)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1)]TJ 0 g 0 G -/F51 9.9626 Tf -239.579 -19.925 Td [(np)]TJ +/F59 9.9626 Tf -55.433 -27.896 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 17.156 0 Td [(Number)-250(of)-250(pr)18(ocesses)-250(in)-250(the)-250(PSBLAS)-250(virtual)-250(parallel)-250(machine.)]TJ 7.751 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - [-500(For)-396(pr)18(ocesses)-396(in)-395(the)-396(virtual)-396(parallel)-396(machine)-396(the)-396(ident)1(i\002er)-396(will)-396(satisfy)]TJ 12.453 -11.955 Td [(0)]TJ/F83 10.3811 Tf 7.873 0 Td [(\024)]TJ/F52 9.9626 Tf 11.016 0 Td [(i)-47(a)-25(m)]TJ/F83 10.3811 Tf 18.678 0 Td [(\024)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1;)]TJ 0 g 0 G - -84.741 -19.925 Td [(2.)]TJ + 0 -19.925 Td [(icontxt)]TJ 0 g 0 G - [-500(If)-349(the)-349(user)-350(has)-349(r)18(equested)-349(on)]TJ/F59 9.9626 Tf 142.217 0 Td [(psb_init)]TJ/F54 9.9626 Tf 45.321 0 Td [(a)-349(number)-349(of)-350(pr)18(ocesses)-349(less)-349(than)]TJ -175.085 -11.956 Td [(the)-321(total)-322(available)-321(in)-321(the)-322(parallel)-321(execution)-322(envir)18(onment,)-339(the)-321(r)18(emaining)]TJ 0 -11.955 Td [(pr)18(ocesses)-229(will)-228(have)-229(on)-229(r)18(eturn)]TJ/F52 9.9626 Tf 130.21 0 Td [(i)-47(a)-25(m)]TJ/F85 10.3811 Tf 18.678 0 Td [(=)]TJ/F83 10.3811 Tf 11.086 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1;)-236(the)-229(on)1(ly)-229(call)-229(involving)]TJ/F59 9.9626 Tf 110.162 0 Td [(ctxt)]TJ/F54 9.9626 Tf 23.2 0 Td [(that)]TJ -301.53 -11.955 Td [(any)-250(such)-250(pr)18(ocess)-250(may)-250(execute)-250(is)-250(to)]TJ/F59 9.9626 Tf 155.296 0 Td [(psb_exit)]TJ/F54 9.9626 Tf 41.843 0 Td [(.)]TJ +/F62 9.9626 Tf 35.965 0 Td [(the)-250(communication)-250(context.)]TJ -11.058 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(.)]TJ 0 g 0 G - -57.662 -174.311 Td [(113)]TJ + 139.477 -461.235 Td [(139)]TJ 0 g 0 G ET endstream endobj -1644 0 obj +1893 0 obj << -/Length 4180 +/Length 1526 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.3)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(8.3)-1000(psb)]TJ ET q 1 0 0 1 198.238 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(exit)-250(\227)-250(Exit)-250(from)-250(PSBLAS)-250(parallel)-250(environment)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_exit\050ctxt\051)]TJ 0 -11.955 Td [(call)-525(psb_exit\050ctxt,close\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-250(subr)18(outine)-250(exits)-250(fr)18(om)-250(the)-250(PSBLAS)-250(parallel)-250(virtual)-250(machine.)]TJ -0 g 0 G -/F51 9.9626 Tf -14.944 -19.925 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.926 Td [(ctxt)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +/F59 11.9552 Tf 201.825 706.129 Td [(set)]TJ +ET +q +1 0 0 1 217.809 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 221.396 706.129 Td [(errverbosity)-190(\227)-190(Sets)-190(the)-190(verbosity)-190(of)-190(error)-190(messages)]TJ/F62 9.9626 Tf -68.95 -24.942 Td [(c)-175(a)-175(l)-174(l)-921(p)-147(s)-146(b)]TJ +ET +q +1 0 0 1 202.736 681.387 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 207.181 681.187 Td [(s)-146(e)-146(t)]TJ +ET +q +1 0 0 1 224.391 681.387 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 228.836 681.187 Td [(e)-146(r)-146(r)-146(v)-147(e)-146(r)-146(b)-146(o)-146(s)-146(i)-146(t)-147(y)-279(\050)-151(v)-151(\051)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.926 Td [(close)]TJ 0 g 0 G -/F54 9.9626 Tf 27.666 0 Td [(Whether)-369(to)-368(close)-369(all)-369(data)-368(str)8(uctur)18(es)-369(r)18(elated)-369(to)-368(the)-369(virtual)-369(parall)1(el)-369(ma-)]TJ -2.759 -11.955 Td [(chine,)-250(besides)-250(those)-250(associated)-250(with)-250(ctxt.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(variable,)-250(default)-250(value:)-310(tr)8(ue.)]TJ/F51 11.9552 Tf -24.907 -19.925 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ +/F59 9.9626 Tf -78.131 -27.895 Td [(T)90(ype:)]TJ 0 g 0 G - [-500(This)-376(r)18(outine)-376(may)-377(be)-376(called)-376(even)-376(if)-377(a)-376(pr)18(evious)-376(call)-376(to)]TJ/F59 9.9626 Tf 255.069 0 Td [(psb_info)]TJ/F54 9.9626 Tf 45.591 0 Td [(has)-376(r)18(e-)]TJ -288.206 -11.955 Td [(turned)-251(with)]TJ/F52 9.9626 Tf 55.156 0 Td [(i)-47(a)-25(m)]TJ/F85 10.3811 Tf 18.696 0 Td [(=)]TJ/F83 10.3811 Tf 11.104 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1;)-251(indeed,)-252(it)-251(it)-251(i)1(s)-251(the)-251(only)-251(r)18(outine)-251(that)-251(may)-251(be)-251(called)]TJ -93.15 -11.955 Td [(with)-250(ar)18(gument)]TJ/F59 9.9626 Tf 68.133 0 Td [(ctxt)]TJ/F54 9.9626 Tf 23.412 0 Td [(in)-250(this)-250(situation.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - -103.999 -19.926 Td [(2.)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - [-500(A)-269(call)-269(to)-268(this)-269(r)18(outine)-269(with)]TJ/F59 9.9626 Tf 128.502 0 Td [(close=.true.)]TJ/F54 9.9626 Tf 65.442 0 Td [(implies)-269(a)-268(call)-269(to)]TJ/F59 9.9626 Tf 72.059 0 Td [(MPI_Finalize)]TJ/F54 9.9626 Tf 62.764 0 Td [(,)]TJ -316.313 -11.955 Td [(after)-250(which)-250(no)-250(parallel)-250(r)18(outine)-250(may)-250(be)-250(called.)]TJ 0 g 0 G - -12.454 -19.925 Td [(3.)]TJ + 0 -19.925 Td [(v)]TJ 0 g 0 G - [-500(If)-289(the)-288(user)-288(whishes)-289(to)-288(use)-289(multiple)-288(communication)-289(contexts)-288(in)-289(the)-288(same)]TJ 12.454 -11.955 Td [(pr)18(ogram,)-401(or)-371(to)-371(enter)-371(and)-371(exit)-371(multiple)-371(times)-370(into)-371(the)-371(parallel)-371(envir)18(on-)]TJ 0 -11.956 Td [(ment,)-425(this)-389(r)18(outine)-390(may)-390(be)-390(called)-389(to)-390(selectively)-390(close)-390(the)-389(contexts)-390(with)]TJ/F59 9.9626 Tf 0 -11.955 Td [(close=.false.)]TJ/F54 9.9626 Tf 67.994 0 Td [(,)-202(while)-190(on)-190(the)-190(last)-190(call)-190(it)-190(should)-190(be)-190(called)-190(with)]TJ/F59 9.9626 Tf 196.919 0 Td [(close=.true.)]TJ/F54 9.9626 Tf -264.913 -11.955 Td [(to)-250(shutdown)-250(in)-250(a)-250(clean)-250(way)-250(the)-250(entir)18(e)-250(parallel)-250(envir)18(onment.)]TJ +/F62 9.9626 Tf 10.52 0 Td [(the)-250(verbosity)-250(level)]TJ 14.386 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(.)]TJ 0 g 0 G - 139.477 -212.169 Td [(114)]TJ + 139.477 -475.183 Td [(140)]TJ 0 g 0 G ET endstream endobj -1651 0 obj +1899 0 obj << -/Length 2476 +/Length 2016 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(7.4)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(8.4)-1000(psb)]TJ ET q 1 0 0 1 147.429 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(get)]TJ +/F59 11.9552 Tf 151.016 706.129 Td [(set)]TJ ET q -1 0 0 1 168.338 706.328 cm +1 0 0 1 166.999 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 171.925 706.129 Td [(mpi)]TJ +/F59 11.9552 Tf 170.586 706.129 Td [(erraction)-223(\227)-223(Set)-222(the)-223(type)-223(of)-223(action)-223(to)-222(be)-223(taken)-223(upon)]TJ -43.792 -13.948 Td [(error)-250(condition)]TJ/F62 9.9626 Tf -25.157 -24.941 Td [(c)-175(a)-175(l)-174(l)-926(p)-151(s)-151(b)]TJ ET q -1 0 0 1 194.556 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 152.113 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 156.605 667.24 Td [(s)-151(e)-151(t)]TJ +ET +q +1 0 0 1 173.955 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 178.447 667.24 Td [(e)-151(r)-151(r)-150(a)-151(c)-151(t)-151(i)-151(o)-151(n)-284(\050)-296(e)-163(r)-162(r)]TJ +ET +q +1 0 0 1 257.102 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 11.9552 Tf 198.143 706.129 Td [(comm)-250(\227)-250(Get)-250(the)-250(MPI)-250(communicator)]TJ +/F62 9.9626 Tf 261.712 667.24 Td [(a)-163(c)-162(t)-296(\051)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -98.248 -18.964 Td [(icomm)-525(=)-525(psb_get_mpi_comm\050ctxt\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-417(func)1(tion)-417(r)18(eturns)-416(the)-417(MPI)-416(communicator)-417(associated)-416(with)-417(a)-416(PSBLAS)]TJ -14.944 -11.955 Td [(context)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf -161.817 -27.896 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.926 Td [(ctxt)]TJ + 0 -19.925 Td [(err)]TJ +ET +q +1 0 0 1 113.225 599.693 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 116.214 599.494 Td [(act)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +/F62 9.9626 Tf 17.703 0 Td [(the)-250(type)-250(of)-250(action.)]TJ -9.115 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(.)-310(Possible)-250(values:)]TJ/F67 9.9626 Tf 179.116 0 Td [(psb_act_ret)]TJ/F62 9.9626 Tf 57.534 0 Td [(,)]TJ/F67 9.9626 Tf 4.981 0 Td [(psb_act_abort)]TJ/F62 9.9626 Tf 67.995 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.918 Td [(On)-250(Return)]TJ + -170.149 -461.235 Td [(141)]TJ 0 g 0 G +ET + +endstream +endobj +1905 0 obj +<< +/Length 507 +>> +stream 0 g 0 G - 0 -19.926 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-372(MPI)-371(communicator)-372(associated)-371(with)-372(the)-372(PSBLAS)-371(virtual)]TJ -47.87 -11.955 Td [(parallel)-250(machine.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ/F51 11.9552 Tf -71.651 -33.873 Td [(Notes)]TJ/F54 9.9626 Tf 34.165 0 Td [(The)-230(subr)18(outine)-230(version)]TJ/F59 9.9626 Tf 103.913 0 Td [(psb_get_mpicomm)]TJ/F54 9.9626 Tf 80.748 0 Td [(is)-230(still)-230(available)-230(but)-230(is)-230(depr)18(e-)]TJ -218.826 -11.955 Td [(cated.)]TJ +BT +/F59 14.3462 Tf 150.705 705.784 Td [(9)-1000(Utilities)]TJ/F62 9.9626 Tf 0 -22.702 Td [(W)92(e)-323(have)-322(some)-323(utilities)-322(available)-323(for)-322(input)-323(and)-323(output)-322(of)-323(sparse)-322(matrices;)-359(the)]TJ 0 -11.955 Td [(interfaces)-250(to)-250(these)-250(r)18(outines)-250(ar)18(e)-250(available)-250(in)-250(the)-250(module)]TJ/F67 9.9626 Tf 242.009 0 Td [(psb_util_mod)]TJ/F62 9.9626 Tf 62.764 0 Td [(.)]TJ 0 g 0 G - 164.384 -319.766 Td [(115)]TJ + -140.39 -580.689 Td [(142)]TJ 0 g 0 G ET endstream endobj -1656 0 obj +1910 0 obj << -/Length 3337 +/Length 4553 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.5)-1000(psb)]TJ -ET -q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 201.825 706.129 Td [(get)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(9.1)-1206(hb)]TJ ET q -1 0 0 1 219.148 706.328 cm +1 0 0 1 144.589 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 222.735 706.129 Td [(mpi)]TJ +/F59 11.9552 Tf 148.175 706.129 Td [(read)-206(\227)-207(Read)-206(a)-207(sparse)-206(matrix)-207(fr)1(om)-207(a)-206(\002le)-207(in)-206(the)-207(Harwell\226)]TJ -21.381 -13.948 Td [(Boeing)-250(format)]TJ/F62 9.9626 Tf -25.157 -24.941 Td [(c)-175(a)-175(l)-174(l)-865(h)-90(b)]TJ ET q -1 0 0 1 245.365 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 144.379 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 11.9552 Tf 248.952 706.129 Td [(rank)-250(\227)-250(Get)-250(the)-250(MPI)-250(rank)]TJ +/F62 9.9626 Tf 148.265 667.24 Td [(r)-90(e)-90(a)-90(d)-223(\050)-167(a)-242(,)-927(i)-151(r)-152(e)-151(t)-478(,)-905(i)-129(u)-130(n)-129(i)-130(t)-434(,)-871(f)-97(i)-96(l)-96(e)-96(n)-96(a)-97(m)-96(e)-367(,)-791(b)-206(,)-919(m)-143(t)-144(i)-143(t)-143(l)-144(e)-277(\051)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -98.247 -18.964 Td [(rank)-525(=)-525(psb_get_mpi_rank\050ctxt,)-525(id\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-250(function)-250(r)18(eturns)-250(the)-250(MPI)-250(rank)-250(of)-250(the)-250(PSBLAS)-250(pr)18(ocess)]TJ/F52 9.9626 Tf 257.337 0 Td [(i)-32(d)]TJ 0 g 0 G -/F51 9.9626 Tf -272.281 -19.925 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf -48.37 -27.896 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.926 Td [(ctxt)]TJ + 0 -19.925 Td [(\002lename)]TJ +0 g 0 G +/F62 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(r)18(ead.)]TJ -19.367 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.956 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F67 9.9626 Tf 298.534 0 Td [(-)]TJ/F62 9.9626 Tf 5.23 0 Td [(,)-387(in)]TJ -303.764 -11.955 Td [(which)-254(case)-253(the)-254(default)-254(input)-253(unit)-254(5)-254(\050i.e.)-321(standar)18(d)-253(input)-254(in)-254(Unix)-253(jar)18(gon\051)-254(is)]TJ 0 -11.955 Td [(used.)-310(Default:)]TJ/F67 9.9626 Tf 65.185 0 Td [(-)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -95.322 -19.925 Td [(iunit)]TJ +0 g 0 G +/F62 9.9626 Tf 27.109 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.202 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F67 9.9626 Tf 287.757 0 Td [(-)]TJ/F62 9.9626 Tf 5.231 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -317.895 -21.918 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)-250(r)18(ead)-250(fr)18(om)-250(\002le.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 309.258 442.283 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 312.397 442.084 Td [(Tspmat)]TJ +ET +q +1 0 0 1 344.406 442.283 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 347.544 442.084 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.926 Td [(id)]TJ +/F59 9.9626 Tf -268.571 -19.925 Td [(b)]TJ 0 g 0 G -/F54 9.9626 Tf 14.386 0 Td [(Identi\002er)-250(of)-250(a)-250(pr)18(ocess)-250(in)-250(the)-250(PSBLAS)-250(virtual)-250(parallel)-250(machine.)]TJ 10.521 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(0)]TJ/F83 10.3811 Tf 141.938 0 Td [(\024)]TJ/F52 9.9626 Tf 11.017 0 Td [(i)-32(d)]TJ/F83 10.3811 Tf 11.086 0 Td [(\024)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.503 0 Td [(\000)]TJ/F54 9.9626 Tf 10.132 0 Td [(1)]TJ +/F62 9.9626 Tf 11.069 0 Td [(Rigth)-250(hand)-250(side\050s\051.)]TJ 13.838 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(An)-235(array)-234(of)-235(type)-234(r)18(eal)-235(or)-234(complex,)-238(rank)-234(2)-235(and)-234(having)-235(the)-234(ALLOCA)74(T)74(ABLE)]TJ 0 -11.956 Td [(attribute;)-361(will)-324(be)-324(al)1(located)-324(and)-324(\002lled)-324(in)-324(if)-324(the)-324(input)-324(\002le)-323(contains)-324(a)-324(right)]TJ 0 -11.955 Td [(hand)-250(side,)-250(otherwise)-250(will)-250(be)-250(left)-250(in)-250(the)-250(UNALLOCA)74(TED)-250(state.)]TJ 0 g 0 G -/F51 9.9626 Tf -223.669 -21.918 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -24.907 -19.925 Td [(mtitle)]TJ 0 g 0 G +/F62 9.9626 Tf 32.09 0 Td [(Matrix)-250(title.)]TJ -7.183 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(A)-337(charachter)-338(variable)-337(of)-338(l)1(ength)-338(72)-337(holding)-338(a)-337(copy)-338(of)-337(the)-337(matrix)-338(title)-337(as)]TJ 0 -11.956 Td [(speci\002ed)-250(by)-250(the)-250(Harwell-Boeing)-250(format)-250(and)-250(contained)-250(in)-250(the)-250(input)-250(\002le.)]TJ 0 g 0 G - 0 -19.925 Td [(Funciton)-250(value)]TJ +/F59 9.9626 Tf -24.907 -19.925 Td [(iret)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-250(MPI)-250(rank)-250(associated)-250(with)-250(the)-250(PSBLAS)-250(pr)18(ocess)]TJ/F52 9.9626 Tf 224.291 0 Td [(i)-32(d)]TJ/F54 9.9626 Tf 8.195 0 Td [(.)]TJ -280.356 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ/F51 11.9552 Tf -71.651 -33.873 Td [(Notes)]TJ/F54 9.9626 Tf 35.734 0 Td [(The)-388(subr)18(outine)-387(version)]TJ/F59 9.9626 Tf 108.62 0 Td [(psb_get_rank)]TJ/F54 9.9626 Tf 66.626 0 Td [(is)-388(still)-387(available)-388(but)-387(is)-388(depr)18(e-)]TJ -210.98 -11.955 Td [(cated.)]TJ +/F62 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.434 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 164.384 -275.93 Td [(116)]TJ + 139.477 -184.274 Td [(143)]TJ 0 g 0 G ET endstream endobj -1660 0 obj +1917 0 obj << -/Length 1155 +/Length 4948 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(7.6)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(9.2)-1000(hb)]TJ ET q -1 0 0 1 147.429 706.328 cm +1 0 0 1 192.93 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(wtime)-250(\227)-250(W)74(all)-250(clock)-250(timing)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -51.121 -18.964 Td [(time)-525(=)-525(psb_wtime\050\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-298(function)-298(r)18(eturns)-299(a)-298(wall)-298(clock)-298(timer)74(.)-455(The)-298(r)18(esolution)-298(of)-299(the)-298(timer)-298(is)-298(de-)]TJ -14.944 -11.955 Td [(pendent)-250(on)-250(the)-250(underlying)-250(parallel)-250(envir)18(onment)-250(implementation.)]TJ +/F59 11.9552 Tf 196.517 706.129 Td [(write)-298(\227)-297(W)74(rite)-298(a)-298(sparse)-298(matrix)-297(to)-298(a)-298(\002le)-298(in)-297(the)-298(Harwell\226)]TJ -18.913 -13.948 Td [(Boeing)-250(format)]TJ/F62 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-884(h)-109(b)]TJ +ET +q +1 0 0 1 195.753 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 199.827 667.24 Td [(w)-109(r)-109(i)-109(t)-109(e)-242(\050)-167(a)-241(,)-927(i)-152(r)-151(e)-152(t)-478(,)-904(i)-130(u)-129(n)-130(i)-129(t)-435(,)-871(f)-96(i)-96(l)-96(e)-97(n)-96(a)-96(m)-96(e)-368(,)-816(k)-42(e)-42(y)-259(,)-855(r)-79(h)-80(s)-335(,)-918(m)-144(t)-143(i)-144(t)-143(l)-143(e)-277(\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Exit)]TJ +/F59 9.9626 Tf -49.122 -27.896 Td [(T)90(ype:)]TJ 0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - 0 -19.926 Td [(Function)-250(value)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(the)-250(elapsed)-250(time)-250(in)-250(seconds.)]TJ -47.87 -11.955 Td [(Returned)-250(as:)-310(a)]TJ/F59 9.9626 Tf 66.022 0 Td [(real\050psb_dpk_\051)]TJ/F54 9.9626 Tf 75.715 0 Td [(variable.)]TJ 0 g 0 G - -2.26 -491.123 Td [(117)]TJ + 0 -19.925 Td [(a)]TJ 0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)-250(to)-250(be)-250(written.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.292 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.137 0 Td [(psb)]TJ ET - -endstream -endobj -1664 0 obj -<< -/Length 1388 ->> -stream -0 g 0 G -0 g 0 G +q +1 0 0 1 360.068 575.783 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.7)-1000(psb)]TJ +/F67 9.9626 Tf 363.206 575.584 Td [(Tspmat)]TJ ET q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 395.216 575.783 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(barrier)-240(\227)-240(Sinchronization)-239(point)-240(parallel)-240(environment)]TJ +/F67 9.9626 Tf 398.354 575.584 Td [(type)]TJ 0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F59 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_barrier\050ctxt\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-358(subr)18(outine)-357(acts)-358(as)-358(an)-358(explicit)-357(synchr)18(onization)-358(point)-358(for)-357(the)-358(PSBLAS)]TJ -14.944 -11.955 Td [(parallel)-250(virtual)-250(machine.)]TJ +/F59 9.9626 Tf -268.57 -19.926 Td [(b)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ +/F62 9.9626 Tf 11.068 0 Td [(Rigth)-250(hand)-250(side.)]TJ 13.838 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(Optional)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(An)-235(array)-234(of)-235(type)-234(r)18(eal)-235(or)-234(complex,)-238(rank)-234(1)-235(and)-234(having)-235(the)-234(ALLOCA)74(T)74(ABLE)]TJ 0 -11.955 Td [(attribute;)-361(will)-324(be)-324(allocated)-323(and)-324(\002lled)-324(in)-324(if)-324(the)-324(input)-324(\002le)-323(contains)-324(a)-324(right)]TJ 0 -11.955 Td [(hand)-250(side.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F59 9.9626 Tf -24.906 -19.926 Td [(\002lename)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(written)-250(to.)]TJ -19.368 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -62.187 -11.955 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F67 9.9626 Tf 298.534 0 Td [(-)]TJ/F62 9.9626 Tf 5.231 0 Td [(,)-387(in)]TJ -303.765 -11.955 Td [(which)-234(case)-234(the)-233(default)-234(output)-234(unit)-234(6)-234(\050i)1(.e.)-305(standar)18(d)-234(output)-234(in)-233(Unix)-234(jar)18(gon\051)]TJ 0 -11.955 Td [(is)-250(used.)-310(Default:)]TJ/F67 9.9626 Tf 74.799 0 Td [(-)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ 0 g 0 G +/F59 9.9626 Tf -104.935 -19.926 Td [(iunit)]TJ 0 g 0 G - 0 -19.926 Td [(ctxt)]TJ +/F62 9.9626 Tf 27.108 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.202 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -62.187 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F67 9.9626 Tf 287.758 0 Td [(-)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +/F59 9.9626 Tf -317.894 -19.925 Td [(key)]TJ 0 g 0 G - 139.477 -455.258 Td [(118)]TJ +/F62 9.9626 Tf 21.589 0 Td [(Matrix)-250(key)111(.)]TJ 3.317 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(Optional)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(A)-291(charachter)-291(variable)-291(of)-291(length)-291(8)-291(holding)-291(the)-290(matrix)-291(key)-291(as)-291(speci\002ed)-291(by)]TJ 0 -11.955 Td [(the)-250(Harwell-Boeing)-250(format)-250(and)-250(to)-250(be)-250(written)-250(to)-250(\002le.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.925 Td [(mtitle)]TJ +0 g 0 G +/F62 9.9626 Tf 32.089 0 Td [(Matrix)-250(title.)]TJ -7.183 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(Optional)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(A)-239(charachter)-239(variable)-239(of)-239(length)-240(72)-239(holding)-239(the)-239(matrix)-239(title)-239(as)-239(speci\002ed)-239(by)]TJ 0 -11.956 Td [(the)-250(Harwell-Boeing)-250(format)-250(and)-250(to)-250(be)-250(written)-250(to)-250(\002le.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -21.917 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(iret)]TJ +0 g 0 G +/F62 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.433 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 139.477 -128.483 Td [(144)]TJ 0 g 0 G ET endstream endobj -1668 0 obj +1924 0 obj << -/Length 1283 +/Length 3542 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(7.8)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(9.3)-1000(mm)]TJ ET q -1 0 0 1 147.429 706.328 cm +1 0 0 1 148.768 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(abort)-250(\227)-250(Abort)-250(a)-250(computation)]TJ +/F59 11.9552 Tf 152.354 706.129 Td [(mat)]TJ +ET +q +1 0 0 1 173.658 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 177.245 706.129 Td [(read)-202(\227)-203(Read)-202(a)-203(sparse)-202(matrix)-203(from)-202(a)-203(\002le)-202(in)-203(the)-202(Ma-)]TJ -50.45 -13.948 Td [(trixMarket)-250(format)]TJ/F62 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-810(m)-35(m)]TJ +ET +q +1 0 0 1 149.022 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 152.359 667.24 Td [(m)-35(a)-35(t)]TJ +ET +q +1 0 0 1 171.029 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 174.367 667.24 Td [(r)-35(e)-35(a)-35(d)-169(\050)-166(a)-242(,)-927(i)-151(r)-152(e)-151(t)-478(,)-905(i)-129(u)-130(n)-129(i)-130(t)-434(,)-882(f)-107(i)-107(l)-107(e)-107(n)-107(a)-106(m)-107(e)-241(\051)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -51.121 -18.964 Td [(call)-525(psb_abort\050ctxt\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-250(subr)18(outine)-250(aborts)-250(computation)-250(on)-250(the)-250(parallel)-250(virtual)-250(machine.)]TJ 0 g 0 G -/F51 9.9626 Tf -14.944 -19.925 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf -74.472 -27.896 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.926 Td [(ctxt)]TJ + 0 -19.925 Td [(\002lename)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +/F62 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(r)18(ead.)]TJ -19.367 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.956 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F67 9.9626 Tf 298.534 0 Td [(-)]TJ/F62 9.9626 Tf 5.23 0 Td [(,)-387(in)]TJ -303.764 -11.955 Td [(which)-254(case)-253(the)-254(default)-254(input)-253(unit)-254(5)-254(\050i.e.)-321(standar)18(d)-253(input)-254(in)-254(Unix)-253(jar)18(gon\051)-254(is)]TJ 0 -11.955 Td [(used.)-310(Default:)]TJ/F67 9.9626 Tf 65.185 0 Td [(-)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ 0 g 0 G - 139.477 -467.213 Td [(119)]TJ +/F59 9.9626 Tf -95.322 -19.925 Td [(iunit)]TJ +0 g 0 G +/F62 9.9626 Tf 27.109 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.202 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F67 9.9626 Tf 287.757 0 Td [(-)]TJ/F62 9.9626 Tf 5.231 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -317.895 -21.918 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)-250(r)18(ead)-250(fr)18(om)-250(\002le.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 309.258 442.283 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 312.397 442.084 Td [(Tspmat)]TJ +ET +q +1 0 0 1 344.406 442.283 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 347.544 442.084 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -268.571 -19.925 Td [(iret)]TJ +0 g 0 G +/F62 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.434 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 139.477 -307.811 Td [(145)]TJ 0 g 0 G ET endstream endobj -1567 0 obj +1822 0 obj << /Type /ObjStm /N 100 -/First 970 -/Length 9451 ->> -stream -392 0 1566 58 1562 117 1570 224 1568 363 1572 510 396 568 1573 625 1569 683 1577 790 -1575 929 1579 1074 400 1133 1580 1191 1581 1250 1576 1309 1584 1403 1582 1542 1586 1687 405 1745 -1587 1802 1588 1860 1583 1918 1591 2012 1589 2151 1593 2296 409 2355 1590 2413 1595 2534 1597 2652 -1598 2710 1599 2768 1600 2826 1594 2884 1605 3004 1601 3161 1602 3306 1603 3453 1607 3600 413 3659 -1604 3717 1609 3824 1611 3942 417 4000 1608 4057 1614 4164 1616 4282 1617 4341 1618 4400 1619 4459 -1620 4518 1621 4577 1622 4636 1623 4695 1624 4754 1625 4813 1613 4872 1627 4992 1629 5110 421 5168 -1626 5225 1631 5306 1633 5424 425 5483 1634 5541 1635 5600 1630 5659 1637 5792 1639 5910 429 5968 -1640 6025 1641 6082 1636 6138 1643 6271 1645 6389 433 6448 1646 6506 1647 6565 1648 6624 1642 6683 -1650 6816 1652 6934 437 6992 1649 7049 1655 7143 1657 7261 441 7320 1654 7378 1659 7498 1661 7616 -445 7674 1658 7731 1663 7825 1665 7943 449 8002 1662 8060 1667 8154 1669 8272 453 8330 1666 8387 -% 392 0 obj +/First 972 +/Length 9404 +>> +stream +1819 0 1821 118 473 177 1818 235 1824 382 1826 500 1827 558 1828 616 1829 674 1823 732 +1831 869 1833 987 477 1046 1830 1104 1835 1251 1837 1369 1838 1427 1839 1485 1840 1543 1834 1601 +1842 1738 1844 1856 481 1915 1841 1973 1846 2120 1848 2238 1849 2296 1850 2354 1852 2411 1853 2469 +1854 2527 1845 2585 1857 2764 1859 2882 485 2941 1860 2999 1856 3058 1862 3205 1864 3323 489 3381 +1865 3438 1861 3496 1869 3643 1866 3791 1867 3939 1871 4087 493 4146 1868 4204 1875 4298 1877 4416 +1872 4474 1873 4532 1874 4590 1879 4686 1881 4804 497 4863 1882 4921 1883 4980 1878 5039 1885 5120 +1887 5238 501 5296 1888 5353 1889 5410 1884 5468 1892 5549 1894 5667 505 5726 1895 5784 1896 5843 +1891 5902 1898 5983 1900 6101 509 6159 1901 6216 1902 6273 1897 6331 1904 6425 1906 6543 513 6602 +1903 6660 1909 6754 1907 6893 1911 7038 517 7096 1912 7153 1913 7211 1908 7269 1916 7363 1914 7502 +1918 7647 521 7706 1919 7764 1920 7823 1915 7882 1923 7976 1921 8115 1925 8260 525 8318 1926 8375 +% 1819 0 obj << -/D [1563 0 R /XYZ 150.705 716.092 null] +/Type /Page +/Contents 1820 0 R +/Resources 1818 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1817 0 R >> -% 1566 0 obj +% 1821 0 obj << -/D [1563 0 R /XYZ 150.705 382.093 null] +/D [1819 0 R /XYZ 149.705 753.953 null] >> -% 1562 0 obj +% 473 0 obj +<< +/D [1819 0 R /XYZ 150.705 716.092 null] +>> +% 1818 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F91 914 0 R /F69 1460 0 R /F93 915 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1570 0 obj +% 1824 0 obj << /Type /Page -/Contents 1571 0 R -/Resources 1569 0 R +/Contents 1825 0 R +/Resources 1823 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1574 0 R -/Annots [ 1568 0 R ] +/Parent 1817 0 R >> -% 1568 0 obj +% 1826 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 494.069 359.001 506.129] -/A << /S /GoTo /D (descdata) >> +/D [1824 0 R /XYZ 98.895 753.953 null] >> -% 1572 0 obj +% 1827 0 obj << -/D [1570 0 R /XYZ 98.895 753.953 null] +/D [1824 0 R /XYZ 99.895 528.579 null] >> -% 396 0 obj +% 1828 0 obj << -/D [1570 0 R /XYZ 99.895 716.092 null] +/D [1824 0 R /XYZ 99.895 494.104 null] >> -% 1573 0 obj +% 1829 0 obj << -/D [1570 0 R /XYZ 99.895 258.556 null] +/D [1824 0 R /XYZ 99.895 403.265 null] >> -% 1569 0 obj +% 1823 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F102 1016 0 R /F60 666 0 R /F120 1782 0 R >> /ProcSet [ /PDF /Text ] >> -% 1577 0 obj +% 1831 0 obj << /Type /Page -/Contents 1578 0 R -/Resources 1576 0 R +/Contents 1832 0 R +/Resources 1830 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1574 0 R -/Annots [ 1575 0 R ] +/Parent 1817 0 R >> -% 1575 0 obj +% 1833 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 573.77 409.811 585.83] -/A << /S /GoTo /D (descdata) >> +/D [1831 0 R /XYZ 149.705 753.953 null] >> -% 1579 0 obj +% 477 0 obj << -/D [1577 0 R /XYZ 149.705 753.953 null] +/D [1831 0 R /XYZ 150.705 716.092 null] >> -% 400 0 obj +% 1830 0 obj << -/D [1577 0 R /XYZ 150.705 716.092 null] +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F91 914 0 R /F69 1460 0 R /F93 915 0 R /F60 666 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1580 0 obj +% 1835 0 obj << -/D [1577 0 R /XYZ 150.705 358.183 null] +/Type /Page +/Contents 1836 0 R +/Resources 1834 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1817 0 R >> -% 1581 0 obj +% 1837 0 obj << -/D [1577 0 R /XYZ 150.705 314.403 null] +/D [1835 0 R /XYZ 98.895 753.953 null] >> -% 1576 0 obj +% 1838 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/D [1835 0 R /XYZ 99.895 528.579 null] >> -% 1584 0 obj +% 1839 0 obj << -/Type /Page -/Contents 1585 0 R -/Resources 1583 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1574 0 R -/Annots [ 1582 0 R ] +/D [1835 0 R /XYZ 99.895 494.104 null] >> -% 1582 0 obj +% 1840 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 573.77 359.001 585.83] -/A << /S /GoTo /D (descdata) >> +/D [1835 0 R /XYZ 99.895 403.265 null] >> -% 1586 0 obj +% 1834 0 obj << -/D [1584 0 R /XYZ 98.895 753.953 null] +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F102 1016 0 R /F60 666 0 R /F120 1782 0 R >> +/ProcSet [ /PDF /Text ] >> -% 405 0 obj +% 1842 0 obj << -/D [1584 0 R /XYZ 99.895 716.092 null] +/Type /Page +/Contents 1843 0 R +/Resources 1841 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1817 0 R >> -% 1587 0 obj +% 1844 0 obj << -/D [1584 0 R /XYZ 99.895 370.138 null] +/D [1842 0 R /XYZ 149.705 753.953 null] >> -% 1588 0 obj +% 481 0 obj << -/D [1584 0 R /XYZ 99.895 338.313 null] +/D [1842 0 R /XYZ 150.705 716.092 null] >> -% 1583 0 obj +% 1841 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F91 914 0 R /F69 1460 0 R /F93 915 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1591 0 obj +% 1846 0 obj << /Type /Page -/Contents 1592 0 R -/Resources 1590 0 R +/Contents 1847 0 R +/Resources 1845 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1574 0 R -/Annots [ 1589 0 R ] +/Parent 1855 0 R >> -% 1589 0 obj +% 1848 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 491.971 420.271 504.031] -/A << /S /GoTo /D (spdata) >> +/D [1846 0 R /XYZ 98.895 753.953 null] >> -% 1593 0 obj +% 1849 0 obj << -/D [1591 0 R /XYZ 149.705 753.953 null] +/D [1846 0 R /XYZ 99.895 564.444 null] >> -% 409 0 obj +% 1850 0 obj << -/D [1591 0 R /XYZ 150.705 716.092 null] +/D [1846 0 R /XYZ 99.895 529.97 null] >> -% 1590 0 obj +% 1852 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F61 1360 0 R /F52 585 0 R >> +/D [1846 0 R /XYZ 99.895 441.815 null] +>> +% 1853 0 obj +<< +/D [1846 0 R /XYZ 99.895 409.935 null] +>> +% 1854 0 obj +<< +/D [1846 0 R /XYZ 99.895 319.095 null] +>> +% 1845 0 obj +<< +/Font << /F62 667 0 R /F59 665 0 R /F67 913 0 R /F60 666 0 R /F93 915 0 R /F17 1851 0 R /F104 1254 0 R /F102 1016 0 R /F120 1782 0 R >> /ProcSet [ /PDF /Text ] >> -% 1595 0 obj +% 1857 0 obj << /Type /Page -/Contents 1596 0 R -/Resources 1594 0 R +/Contents 1858 0 R +/Resources 1856 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1574 0 R ->> -% 1597 0 obj -<< -/D [1595 0 R /XYZ 98.895 753.953 null] +/Parent 1855 0 R >> -% 1598 0 obj +% 1859 0 obj << -/D [1595 0 R /XYZ 99.895 496.698 null] +/D [1857 0 R /XYZ 149.705 753.953 null] >> -% 1599 0 obj +% 485 0 obj << -/D [1595 0 R /XYZ 99.895 438.313 null] +/D [1857 0 R /XYZ 150.705 716.092 null] >> -% 1600 0 obj +% 1860 0 obj << -/D [1595 0 R /XYZ 99.895 418.388 null] +/D [1857 0 R /XYZ 150.705 222.691 null] >> -% 1594 0 obj +% 1856 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R /F85 814 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R /F69 1460 0 R /F93 915 0 R /F91 914 0 R >> /ProcSet [ /PDF /Text ] >> -% 1605 0 obj +% 1862 0 obj << /Type /Page -/Contents 1606 0 R -/Resources 1604 0 R +/Contents 1863 0 R +/Resources 1861 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1574 0 R -/Annots [ 1601 0 R 1602 0 R 1603 0 R ] +/Parent 1855 0 R >> -% 1601 0 obj +% 1864 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 527.942 420.271 540.002] -/A << /S /GoTo /D (spdata) >> +/D [1862 0 R /XYZ 98.895 753.953 null] >> -% 1602 0 obj +% 489 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 460.196 409.811 472.256] -/A << /S /GoTo /D (descdata) >> +/D [1862 0 R /XYZ 99.895 716.092 null] >> -% 1603 0 obj +% 1865 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [371.126 404.405 438.184 416.465] -/A << /S /GoTo /D (precdata) >> +/D [1862 0 R /XYZ 99.895 222.691 null] >> -% 1607 0 obj +% 1861 0 obj << -/D [1605 0 R /XYZ 149.705 753.953 null] +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F69 1460 0 R /F93 915 0 R /F60 666 0 R /F91 914 0 R >> +/ProcSet [ /PDF /Text ] >> -% 413 0 obj +% 1869 0 obj << -/D [1605 0 R /XYZ 150.705 716.092 null] +/Type /Page +/Contents 1870 0 R +/Resources 1868 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1855 0 R +/Annots [ 1866 0 R 1867 0 R ] >> -% 1604 0 obj +% 1866 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [196.011 499.949 202.985 512.009] +/A << /S /GoTo /D (listing.5) >> >> -% 1609 0 obj +% 1867 0 obj << -/Type /Page -/Contents 1610 0 R -/Resources 1608 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1612 0 R +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [196.625 344.532 203.599 356.591] +/A << /S /GoTo /D (listing.6) >> >> -% 1611 0 obj +% 1871 0 obj << -/D [1609 0 R /XYZ 98.895 753.953 null] +/D [1869 0 R /XYZ 149.705 753.953 null] >> -% 417 0 obj +% 493 0 obj << -/D [1609 0 R /XYZ 99.895 716.092 null] +/D [1869 0 R /XYZ 150.705 716.092 null] >> -% 1608 0 obj +% 1868 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1614 0 obj +% 1875 0 obj << /Type /Page -/Contents 1615 0 R -/Resources 1613 0 R +/Contents 1876 0 R +/Resources 1874 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1612 0 R ->> -% 1616 0 obj -<< -/D [1614 0 R /XYZ 149.705 753.953 null] +/Parent 1855 0 R >> -% 1617 0 obj +% 1877 0 obj << -/D [1614 0 R /XYZ 150.705 701.929 null] +/D [1875 0 R /XYZ 98.895 753.953 null] >> -% 1618 0 obj +% 1872 0 obj << -/D [1614 0 R /XYZ 150.705 668.729 null] +/D [1875 0 R /XYZ 99.895 411.235 null] >> -% 1619 0 obj +% 1873 0 obj << -/D [1614 0 R /XYZ 150.705 624.894 null] +/D [1875 0 R /XYZ 99.895 182.902 null] >> -% 1620 0 obj +% 1874 0 obj << -/D [1614 0 R /XYZ 150.705 555.872 null] +/Font << /F67 913 0 R /F120 1782 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1621 0 obj +% 1879 0 obj << -/D [1614 0 R /XYZ 150.705 500.082 null] +/Type /Page +/Contents 1880 0 R +/Resources 1878 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1855 0 R >> -% 1622 0 obj +% 1881 0 obj << -/D [1614 0 R /XYZ 150.705 468.201 null] +/D [1879 0 R /XYZ 149.705 753.953 null] >> -% 1623 0 obj +% 497 0 obj << -/D [1614 0 R /XYZ 150.705 425.023 null] +/D [1879 0 R /XYZ 150.705 716.092 null] >> -% 1624 0 obj +% 1882 0 obj << -/D [1614 0 R /XYZ 150.705 382.522 null] +/D [1879 0 R /XYZ 150.705 690.058 null] >> -% 1625 0 obj +% 1883 0 obj << -/D [1614 0 R /XYZ 150.705 354.627 null] +/D [1879 0 R /XYZ 150.705 693.143 null] >> -% 1613 0 obj +% 1878 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R >> +/Font << /F59 665 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1627 0 obj +% 1885 0 obj << /Type /Page -/Contents 1628 0 R -/Resources 1626 0 R +/Contents 1886 0 R +/Resources 1884 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1612 0 R +/Parent 1890 0 R >> -% 1629 0 obj +% 1887 0 obj << -/D [1627 0 R /XYZ 98.895 753.953 null] +/D [1885 0 R /XYZ 98.895 753.953 null] >> -% 421 0 obj +% 501 0 obj << -/D [1627 0 R /XYZ 99.895 716.092 null] +/D [1885 0 R /XYZ 99.895 716.092 null] >> -% 1626 0 obj +% 1888 0 obj +<< +/D [1885 0 R /XYZ 99.895 678.98 null] +>> +% 1889 0 obj +<< +/D [1885 0 R /XYZ 99.895 679.195 null] +>> +% 1884 0 obj << -/Font << /F51 584 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1631 0 obj +% 1892 0 obj << /Type /Page -/Contents 1632 0 R -/Resources 1630 0 R +/Contents 1893 0 R +/Resources 1891 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1612 0 R +/Parent 1890 0 R >> -% 1633 0 obj +% 1894 0 obj << -/D [1631 0 R /XYZ 149.705 753.953 null] +/D [1892 0 R /XYZ 149.705 753.953 null] >> -% 425 0 obj +% 505 0 obj << -/D [1631 0 R /XYZ 150.705 716.092 null] +/D [1892 0 R /XYZ 150.705 716.092 null] >> -% 1634 0 obj +% 1895 0 obj << -/D [1631 0 R /XYZ 150.705 222.691 null] +/D [1892 0 R /XYZ 150.705 689.963 null] >> -% 1635 0 obj +% 1896 0 obj << -/D [1631 0 R /XYZ 150.705 200.171 null] +/D [1892 0 R /XYZ 150.705 693.143 null] >> -% 1630 0 obj +% 1891 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F85 814 0 R /F52 585 0 R /F83 813 0 R >> +/Font << /F59 665 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1637 0 obj +% 1898 0 obj << /Type /Page -/Contents 1638 0 R -/Resources 1636 0 R +/Contents 1899 0 R +/Resources 1897 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1612 0 R +/Parent 1890 0 R >> -% 1639 0 obj +% 1900 0 obj << -/D [1637 0 R /XYZ 98.895 753.953 null] +/D [1898 0 R /XYZ 98.895 753.953 null] >> -% 429 0 obj +% 509 0 obj << -/D [1637 0 R /XYZ 99.895 716.092 null] +/D [1898 0 R /XYZ 99.895 716.092 null] >> -% 1640 0 obj +% 1901 0 obj << -/D [1637 0 R /XYZ 99.895 348.22 null] +/D [1898 0 R /XYZ 99.895 678.98 null] >> -% 1641 0 obj +% 1902 0 obj << -/D [1637 0 R /XYZ 99.895 313.8 null] +/D [1898 0 R /XYZ 99.895 679.195 null] >> -% 1636 0 obj +% 1897 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F83 813 0 R /F52 585 0 R /F85 814 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1643 0 obj +% 1904 0 obj << /Type /Page -/Contents 1644 0 R -/Resources 1642 0 R +/Contents 1905 0 R +/Resources 1903 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1612 0 R ->> -% 1645 0 obj -<< -/D [1643 0 R /XYZ 149.705 753.953 null] ->> -% 433 0 obj -<< -/D [1643 0 R /XYZ 150.705 716.092 null] ->> -% 1646 0 obj -<< -/D [1643 0 R /XYZ 150.705 441.869 null] +/Parent 1890 0 R >> -% 1647 0 obj +% 1906 0 obj << -/D [1643 0 R /XYZ 150.705 395.439 null] +/D [1904 0 R /XYZ 149.705 753.953 null] >> -% 1648 0 obj +% 513 0 obj << -/D [1643 0 R /XYZ 150.705 363.559 null] +/D [1904 0 R /XYZ 150.705 716.092 null] >> -% 1642 0 obj +% 1903 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1650 0 obj +% 1909 0 obj << /Type /Page -/Contents 1651 0 R -/Resources 1649 0 R +/Contents 1910 0 R +/Resources 1908 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1653 0 R ->> -% 1652 0 obj -<< -/D [1650 0 R /XYZ 98.895 753.953 null] +/Parent 1890 0 R +/Annots [ 1907 0 R ] >> -% 437 0 obj +% 1907 0 obj << -/D [1650 0 R /XYZ 99.895 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 438.278 369.462 450.338] +/A << /S /GoTo /D (spdata) >> >> -% 1649 0 obj +% 1911 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/D [1909 0 R /XYZ 98.895 753.953 null] >> -% 1655 0 obj +% 517 0 obj << -/Type /Page -/Contents 1656 0 R -/Resources 1654 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1653 0 R +/D [1909 0 R /XYZ 99.895 716.092 null] >> -% 1657 0 obj +% 1912 0 obj << -/D [1655 0 R /XYZ 149.705 753.953 null] +/D [1909 0 R /XYZ 99.895 676.015 null] >> -% 441 0 obj +% 1913 0 obj << -/D [1655 0 R /XYZ 150.705 716.092 null] +/D [1909 0 R /XYZ 99.895 679.195 null] >> -% 1654 0 obj +% 1908 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1659 0 obj +% 1916 0 obj << /Type /Page -/Contents 1660 0 R -/Resources 1658 0 R +/Contents 1917 0 R +/Resources 1915 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1653 0 R +/Parent 1890 0 R +/Annots [ 1914 0 R ] >> -% 1661 0 obj -<< -/D [1659 0 R /XYZ 98.895 753.953 null] ->> -% 445 0 obj +% 1914 0 obj << -/D [1659 0 R /XYZ 99.895 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 571.778 420.271 583.837] +/A << /S /GoTo /D (spdata) >> >> -% 1658 0 obj +% 1918 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/D [1916 0 R /XYZ 149.705 753.953 null] >> -% 1663 0 obj +% 521 0 obj << -/Type /Page -/Contents 1664 0 R -/Resources 1662 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1653 0 R +/D [1916 0 R /XYZ 150.705 716.092 null] >> -% 1665 0 obj +% 1919 0 obj << -/D [1663 0 R /XYZ 149.705 753.953 null] +/D [1916 0 R /XYZ 150.705 676.015 null] >> -% 449 0 obj +% 1920 0 obj << -/D [1663 0 R /XYZ 150.705 716.092 null] +/D [1916 0 R /XYZ 150.705 679.195 null] >> -% 1662 0 obj +% 1915 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1667 0 obj +% 1923 0 obj << /Type /Page -/Contents 1668 0 R -/Resources 1666 0 R +/Contents 1924 0 R +/Resources 1922 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1653 0 R +/Parent 1928 0 R +/Annots [ 1921 0 R ] >> -% 1669 0 obj +% 1921 0 obj << -/D [1667 0 R /XYZ 98.895 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 438.278 369.462 450.338] +/A << /S /GoTo /D (spdata) >> >> -% 453 0 obj +% 1925 0 obj << -/D [1667 0 R /XYZ 99.895 716.092 null] +/D [1923 0 R /XYZ 98.895 753.953 null] >> -% 1666 0 obj +% 525 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/D [1923 0 R /XYZ 99.895 716.092 null] +>> +% 1926 0 obj +<< +/D [1923 0 R /XYZ 99.895 678.98 null] >> endstream endobj -1672 0 obj +1933 0 obj << -/Length 5526 +/Length 4155 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.9)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(9.4)-1000(mm)]TJ ET q -1 0 0 1 198.238 706.328 cm +1 0 0 1 199.577 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(bcast)-250(\227)-250(Broadcast)-250(data)]TJ -0 g 0 G +/F59 11.9552 Tf 203.164 706.129 Td [(array)]TJ +ET +q +1 0 0 1 231.784 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 235.371 706.129 Td [(read)-222(\227)-223(Read)-222(a)-223(dense)-222(array)-223(from)-222(a)-223(\002le)-222(in)-223(the)-222(Ma-)]TJ -57.767 -13.948 Td [(trixMarket)-250(format)]TJ/F62 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-845(m)-71(m)]TJ +ET +q +1 0 0 1 200.884 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 204.572 667.24 Td [(a)-70(r)-70(r)-71(a)-70(y)]TJ +ET +q +1 0 0 1 232.04 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 235.728 667.24 Td [(r)-70(e)-70(a)-71(d)-203(\050)-150(b)-206(,)-926(i)-152(r)-151(e)-152(t)-478(,)-905(i)-129(u)-130(n)-129(i)-130(t)-434(,)-882(f)-107(i)-107(l)-107(e)-107(n)-106(a)-107(m)-107(e)-241(\051)]TJ 0 g 0 G -/F59 9.9626 Tf -51.12 -20.269 Td [(call)-525(psb_bcast\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F54 9.9626 Tf 14.944 -24.611 Td [(This)-221(subr)18(outine)-222(implements)-221(a)-221(br)18(oadcast)-222(operation)-221(based)-221(on)-222(the)-221(underlying)]TJ -14.944 -11.955 Td [(communication)-250(library)111(.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.945 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F59 9.9626 Tf -85.023 -27.896 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -22.619 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - 0 -22.619 Td [(ctxt)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ + 0 -19.925 Td [(\002lename)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.619 Td [(dat)]TJ +/F62 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(r)18(ead.)]TJ -19.367 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.956 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F67 9.9626 Tf 298.533 0 Td [(-)]TJ/F62 9.9626 Tf 5.231 0 Td [(,)-387(in)]TJ -303.764 -11.955 Td [(which)-254(case)-253(the)-254(default)-254(input)-253(unit)-254(5)-254(\050i.e.)-321(standar)18(d)-253(input)-254(in)-254(Unix)-253(jar)18(gon\051)-254(is)]TJ 0 -11.955 Td [(used.)-310(Default:)]TJ/F67 9.9626 Tf 65.184 0 Td [(-)]TJ/F62 9.9626 Tf 5.231 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(On)-250(the)-250(r)18(oot)-250(pr)18(ocess,)-250(the)-250(data)-250(to)-250(be)-250(br)18(oadcast.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-269(may)-270(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-344(a)-344(rank)-344(1)-344(or)-344(2)-344(array)111(,)-367(or)-344(a)-344(character)-344(or)-344(logical)-344(variable,)-367(which)-344(may)-344(be)]TJ 0 -11.955 Td [(a)-377(scalar)-377(or)-377(rank)-377(1)-377(array)111(.)-1067(T)90(ype,)-409(kind,)-409(rank)-377(and)-376(size)-377(must)-377(agr)18(ee)-377(on)-377(all)]TJ 0 -11.955 Td [(pr)18(ocesses.)]TJ +/F59 9.9626 Tf -95.322 -19.925 Td [(iunit)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.619 Td [(root)]TJ +/F62 9.9626 Tf 27.108 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.201 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F67 9.9626 Tf 287.757 0 Td [(-)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.253 0 Td [(Root)-250(pr)18(ocess)-250(holding)-250(data)-250(to)-250(be)-250(br)18(oadcast.)]TJ 1.654 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F61 10.3811 Tf 138.85 0 Td [(<)]TJ/F85 10.3811 Tf 8.319 0 Td [(=)]TJ/F52 9.9626 Tf 10.986 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F61 10.3811 Tf 19.923 0 Td [(<)]TJ/F85 10.3811 Tf 8.319 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.503 0 Td [(\000)]TJ/F54 9.9626 Tf 10.132 0 Td [(1,)-250(default)-250(0)]TJ +/F59 9.9626 Tf -317.894 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -246.025 -22.618 Td [(mode)]TJ 0 g 0 G -/F54 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.955 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F59 9.9626 Tf 199.497 0 Td [(OR)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F59 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F54 9.9626 Tf 109.837 0 Td [(,)]TJ/F59 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F54 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.956 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ + 0 -19.926 Td [(b)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -34.573 Td [(request)]TJ +/F62 9.9626 Tf 11.068 0 Td [(Rigth)-250(hand)-250(side\050s\051.)]TJ 13.839 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(An)-398(array)-398(of)-397(type)-398(r)18(eal)-398(or)-398(complex,)-434(rank)-398(1)-398(or)-398(2)-398(and)-398(h)1(a)-1(v)1(ing)-398(the)-398(ALLO-)]TJ 0 -11.955 Td [(CA)74(T)74(ABLE)-257(attribute,)-258(or)-257(an)-257(object)-257(of)-257(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 177.91 0 Td [(psb)]TJ +ET +q +1 0 0 1 369.841 430.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 372.979 430.129 Td [(T)]TJ +ET +q +1 0 0 1 378.837 430.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 381.975 430.129 Td [(vect)]TJ +ET +q +1 0 0 1 403.524 430.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 406.663 430.129 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(,)-259(of)-257(type)-256(r)18(eal)-257(or)]TJ -251.973 -11.955 Td [(complex.)]TJ 0 -11.955 Td [(W)55(ill)-275(be)-276(allocated)-275(and)-276(\002ll)1(ed)-276(in)-275(if)-276(the)-275(input)-275(\002le)-276(contains)-275(a)-275(right)-276(hand)-275(side,)]TJ 0 -11.956 Td [(otherwise)-250(will)-250(be)-250(left)-250(in)-250(the)-250(UNALLOCA)74(TED)-250(state.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -24.612 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -24.906 -31.88 Td [(iret)]TJ 0 g 0 G +/F62 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.434 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G -/F54 9.9626 Tf 164.384 -29.887 Td [(120)]TJ + 139.476 -248.035 Td [(146)]TJ 0 g 0 G ET endstream endobj -1677 0 obj +1940 0 obj << -/Length 4820 +/Length 7231 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(dat)]TJ -0 g 0 G -/F54 9.9626 Tf 19.368 0 Td [(On)-250(all)-250(pr)18(ocesses)-250(other)-250(than)-250(r)18(oot,)-250(the)-250(br)18(oadcasted)-250(data.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-270(may)-269(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-315(a)-314(rank)-315(1)-315(or)-314(2)-315(array)111(,)-331(or)-314(a)-315(character)-315(or)-314(logical)-315(scalar)74(.)-819(T)90(ype,)-330(kind,)-331(rank)]TJ 0 -11.955 Td [(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(request)]TJ -0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 31.023 0 Td [(dat)]TJ/F54 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.956 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ -0 g 0 G - -12.453 -19.925 Td [(2.)]TJ -0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 32.225 0 Td [(mode)]TJ/F54 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F59 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F54 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F59 9.9626 Tf 16.309 0 Td [(request)]TJ/F54 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F59 11.9552 Tf 99.895 706.129 Td [(9.5)-1000(mm)]TJ ET q -1 0 0 1 124.802 417.212 cm -0 0 318.804 27.895 re f +1 0 0 1 148.768 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 434.448 Td [(call)]TJ -0 g 0 G - [-525(psb_bcast\050ctxt,dat,&)]TJ 23.537 -10.959 Td [(&)-525(mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [(ior)]TJ -0 g 0 G - [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0 g 0 G -/F54 9.9626 Tf -48.393 -32.18 Td [(3.)]TJ -0 g 0 G - [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F59 9.9626 Tf 216.877 0 Td [(dat)]TJ/F54 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F52 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F54 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F59 11.9552 Tf 152.354 706.129 Td [(mat)]TJ ET q -1 0 0 1 124.802 306.627 cm -0 0 318.804 60.772 re f +1 0 0 1 173.658 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 356.739 Td [(call)]TJ -0 g 0 G - [-525(psb_bcast\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(psb_collective_start_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(bcast_request\051)]TJ -23.537 -10.959 Td [(.......)]TJ -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F94 8.9664 Tf -37.659 -10.959 Td [(call)]TJ +/F59 11.9552 Tf 177.245 706.129 Td [(write)-275(\227)-275(W)74(rite)-275(a)-275(sparse)-275(matrix)-275(to)-275(a)-275(\002le)-275(in)-275(the)-275(Ma-)]TJ -50.45 -13.948 Td [(trixMarket)-250(format)]TJ/F62 9.9626 Tf -25.158 -24.48 Td [(c)-175(a)-175(l)-174(l)-828(m)-52(m)]TJ +ET +q +1 0 0 1 149.539 667.901 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 153.049 667.701 Td [(m)-52(a)-53(t)]TJ +ET +q +1 0 0 1 172.236 667.901 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 175.746 667.701 Td [(w)-52(r)-53(i)-52(t)-52(e)-186(\050)-167(a)-242(,)-900(m)-126(t)-125(i)-126(t)-125(l)-126(e)-426(,)-926(i)-152(r)-151(e)-152(t)-478(,)-904(i)-130(u)-129(n)-130(i)-130(t)-434(,)-882(f)-107(i)-107(l)-106(e)-107(n)-107(a)-107(m)-107(e)-240(\051)]TJ 0 g 0 G - [-525(psb_bcast\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ 0 g 0 G - [(psb_collective_end_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ 0 g 0 G - [(bcast_request\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F59 9.9626 Tf -75.851 -26.279 Td [(T)90(ype:)]TJ 0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 103.537 -222.465 Td [(121)]TJ +/F59 9.9626 Tf -29.828 -19.464 Td [(On)-250(Entry)]TJ 0 g 0 G -ET - -endstream -endobj -1686 0 obj -<< -/Length 5829 ->> -stream 0 g 0 G + 0 -19.464 Td [(a)]TJ 0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)-250(to)-250(be)-250(written.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 309.258 578.783 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.10)-1000(psb)]TJ +/F67 9.9626 Tf 312.397 578.584 Td [(Tspmat)]TJ ET q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 344.406 578.783 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(sum)-250(\227)-250(Global)-250(sum)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -57.098 -19.198 Td [(call)-525(psb_sum\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F54 9.9626 Tf 14.944 -22.401 Td [(This)-353(subr)18(outine)-353(implements)-354(a)-353(sum)-353(r)18(eduction)-353(operation)-353(based)-354(on)-353(the)-353(un-)]TJ -14.944 -11.955 Td [(derlying)-250(communication)-250(library)111(.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -20.288 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.409 Td [(On)-250(Entry)]TJ +/F67 9.9626 Tf 347.544 578.584 Td [(type)]TJ 0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G - 0 -20.408 Td [(ctxt)]TJ +/F59 9.9626 Tf -268.571 -19.464 Td [(mtitle)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +/F62 9.9626 Tf 32.09 0 Td [(Matrix)-250(title.)]TJ -7.183 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(A)-231(charachter)-230(variable)-231(holding)-231(a)-230(descriptive)-231(title)-231(for)-230(the)-231(matrix)-231(to)-230(be)-231(writ-)]TJ 0 -11.955 Td [(ten)-250(to)-250(\002le.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.409 Td [(dat)]TJ +/F59 9.9626 Tf -24.907 -19.464 Td [(\002lename)]TJ 0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(sum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-269(may)-270(be)-269(a)-269(scalar)74(,)]TJ 0 -11.956 Td [(or)-300(a)-300(rank)-300(1)-300(or)-301(2)-300(array)111(.)-760(T)90(ype,)-313(kind,)-312(rank)-300(and)-301(size)-300(must)-300(agr)18(ee)-300(on)-300(all)-300(pr)18(o-)]TJ 0 -11.955 Td [(cesses.)]TJ +/F62 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(written)-250(to.)]TJ -19.367 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F67 9.9626 Tf 298.534 0 Td [(-)]TJ/F62 9.9626 Tf 5.23 0 Td [(,)-387(in)]TJ -303.764 -11.955 Td [(which)-234(case)-234(the)-233(default)-234(output)-234(unit)-234(6)-233(\050i.e.)-305(standar)18(d)-234(output)-234(in)-233(Unix)-234(jar)18(gon\051)]TJ 0 -11.956 Td [(is)-250(used.)-310(Default:)]TJ/F67 9.9626 Tf 74.799 0 Td [(-)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.408 Td [(root)]TJ +/F59 9.9626 Tf -104.936 -19.463 Td [(iunit)]TJ 0 g 0 G -/F54 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-250(to)-250(hold)-250(the)-250(\002nal)-250(sum,)-250(or)]TJ/F83 10.3811 Tf 143.744 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)-250(to)-250(make)-250(it)-250(available)-250(on)-250(all)-250(pr)18(ocesses.)]TJ -150.284 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F83 10.3811 Tf 131.101 0 Td [(\000)]TJ/F54 9.9626 Tf 8.195 0 Td [(1)]TJ/F61 10.3811 Tf 7.873 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F61 10.3811 Tf 19.923 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ +/F62 9.9626 Tf 27.109 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.202 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F67 9.9626 Tf 287.757 0 Td [(-)]TJ/F62 9.9626 Tf 5.231 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -254.343 -20.409 Td [(mode)]TJ +/F59 9.9626 Tf -317.895 -20.764 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.956 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F59 9.9626 Tf 199.497 0 Td [(OR)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F59 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F54 9.9626 Tf 109.837 0 Td [(,)]TJ/F59 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F54 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.955 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -32.364 Td [(request)]TJ + 0 -19.463 Td [(iret)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.956 Td [(be)-250(pr)18(esent.)]TJ +/F62 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.434 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -20.763 Td [(Notes)]TJ/F62 9.9626 Tf 14.944 -11.956 Td [(If)-283(this)-282(function)-283(is)-283(called)-282(on)-283(a)-282(matrix)-283(a)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.401 Td [(On)-250(Return)]TJ 0 g 0 G + [-283(on)-282(a)-283(distributed)-283(communicator)-282(only)]TJ -14.944 -11.955 Td [(the)-316(local)-317(part)-316(is)-316(written)-317(in)-316(output.)-509(T)92(o)-316(get)-317(a)-316(single)-316(MatrixMarket)-317(\002le)-316(with)-316(the)]TJ 0 -11.955 Td [(whole)-225(matrix)-225(when)-225(appr)18(opriate,)-230(e.g.)-302(for)-225(debugging)-225(purposes,)-230(one)-225(could)]TJ/F60 9.9626 Tf 318.257 0 Td [(gather)]TJ/F62 9.9626 Tf -318.257 -11.955 Td [(the)-339(whole)-338(matrix)-339(on)-338(a)-339(single)-338(rank)-339(and)-338(then)-339(write)-338(it.)-576(Consider)-339(the)-338(following)]TJ 0 -11.955 Td [(example)-250(for)-250(a)]TJ/F60 9.9626 Tf 62.495 0 Td [(double)]TJ/F62 9.9626 Tf 28.692 0 Td [(pr)18(ecision)-250(matrix)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 99.895 178.717 cm +0 0 343.711 82.69 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G - 0 -20.408 Td [(dat)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 102.884 250.747 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(sum)-250(operation.)]TJ 5.54 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.781 0 Td [(.)]TJ + [(\050psb_ldspmat_type\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 79.264 -29.887 Td [(122)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -ET - -endstream -endobj -1690 0 obj -<< -/Length 4455 ->> -stream +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(aglobal)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -21.918 Td [(call)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 124.802 706.129 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-270(may)-269(be)-269(a)-269(scalar)74(,)]TJ 0 -11.956 Td [(or)-250(a)-250(rank)-250(1)-250(or)-250(2)-250(array)111(.)]TJ 0 -11.955 Td [(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ + [-525(psb_gather\050aglobal,a,desc_a,info\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(if)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(request)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ + [-525(\050iam)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(==)]TJ 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 31.023 0 Td [(dat)]TJ/F54 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.955 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -12.453 -19.926 Td [(2.)]TJ + [-525(psb_root_\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 32.225 0 Td [(mode)]TJ/F54 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F59 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F54 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F59 9.9626 Tf 16.309 0 Td [(request)]TJ/F54 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -ET -q -1 0 0 1 124.802 441.123 cm -0 0 318.804 27.895 re f -Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ 0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -BT -/F94 8.9664 Tf 137.205 458.358 Td [(call)]TJ + 37.659 -10.959 Td [(call)]TJ 0 g 0 G - [-525(psb_sum\050ctxt,dat,&)]TJ 23.537 -10.959 Td [(&)-525(mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [-525(mm_mat_write\050aglobal,mtitle,info,filename\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -37.659 -10.959 Td [(end)-525(if)]TJ 0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [(ior)]TJ + 0 -10.959 Td [(call)]TJ 0 g 0 G - [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG + [-525(psb_spfree\050aglobal,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf -48.393 -32.179 Td [(3.)]TJ + [-525(info\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G - [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F59 9.9626 Tf 216.877 0 Td [(dat)]TJ/F54 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F52 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F54 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ +/F62 9.9626 Tf -2.989 -23.747 Td [(T)92(o)-250(simplify)-250(this)-250(pr)18(ocedur)18(e)-250(in)]TJ/F67 9.9626 Tf 129.513 0 Td [(C)]TJ/F62 9.9626 Tf 5.23 0 Td [(,)-250(ther)18(e)-250(is)-250(a)-250(utility)-250(function)]TJ 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 124.802 330.537 cm -0 0 318.804 60.772 re f +1 0 0 1 99.895 137.797 cm +0 0 343.711 16.936 re f Q 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 380.649 Td [(call)]TJ -0 g 0 G - [-525(psb_sum\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(psb_collective_start_,&)]TJ 23.537 -10.958 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(sum_request\051)]TJ -23.537 -10.959 Td [(.......)]TJ -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F94 8.9664 Tf -37.659 -10.959 Td [(call)]TJ +/F102 8.9664 Tf 102.884 144.073 Td [(psb_i_t)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_sum\050ctxt,dat,mode)]TJ + [-525(psb_c_)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [(<)]TJ 0 g 0 G - [(psb_collective_end_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ + [(s,d,c,z)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [(>)]TJ 0 g 0 G - [(sum_request\051)]TJ + [(global_mat_write\050ah,cdh\051;)]TJ 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G +/F62 9.9626 Tf -2.989 -23.747 Td [(that)-250(pr)18(oduces)-250(exactly)-250(this)-250(r)18(esult.)]TJ 0 g 0 G -/F54 9.9626 Tf 103.537 -246.376 Td [(123)]TJ + 164.384 -29.888 Td [(147)]TJ 0 g 0 G ET endstream endobj -1697 0 obj +1947 0 obj << -/Length 5548 +/Length 7073 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.11)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(9.6)-1000(mm)]TJ ET q -1 0 0 1 204.216 706.328 cm +1 0 0 1 199.577 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(max)-250(\227)-250(Global)-250(maximum)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -57.098 -20.269 Td [(call)-525(psb_max\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F54 9.9626 Tf 14.944 -24.611 Td [(This)-354(subr)18(outine)-354(implements)-354(a)-354(maximum)-354(valuer)18(eduction)-354(operation)-354(based)]TJ -14.944 -11.955 Td [(on)-250(the)-250(underlying)-250(communication)-250(library)111(.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.945 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -22.619 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -22.619 Td [(ctxt)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -22.619 Td [(dat)]TJ -0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(maximum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-264(as:)-339(an)-264(integer)-264(or)-265(r)18(eal)-264(variable,)-268(which)-264(may)-264(be)-264(a)-265(scalar)74(,)-268(or)-264(a)-264(rank)]TJ 0 -11.955 Td [(1)-250(or)-250(2)-250(array)111(.)-560(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -22.619 Td [(root)]TJ -0 g 0 G -/F54 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-255(to)-255(hold)-255(the)-255(\002nal)-255(maximum,)-257(or)]TJ/F83 10.3811 Tf 170.502 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)-255(to)-255(make)-255(it)-255(available)-255(on)-255(all)-255(pr)18(o-)]TJ -177.042 -11.955 Td [(cesses.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F83 10.3811 Tf 131.101 0 Td [(\000)]TJ/F54 9.9626 Tf 8.195 0 Td [(1)]TJ/F61 10.3811 Tf 7.873 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F61 10.3811 Tf 19.923 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ +/F59 11.9552 Tf 203.164 706.129 Td [(array)]TJ +ET +q +1 0 0 1 231.784 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 235.371 706.129 Td [(write)-374(\227)-375(W)74(rite)-374(a)-375(dense)-374(array)-374(from)-375(a)-374(\002le)-375(in)-374(the)]TJ -57.767 -13.948 Td [(MatrixMarket)-250(format)]TJ/F62 9.9626 Tf -25.158 -24.509 Td [(c)-175(a)-175(l)-174(l)-858(m)-83(m)]TJ +ET +q +1 0 0 1 201.262 667.872 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 205.076 667.672 Td [(a)-83(r)-83(r)-83(a)-83(y)]TJ +ET +q +1 0 0 1 233.175 667.872 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 236.989 667.672 Td [(w)-83(r)-83(i)-83(t)-82(e)-217(\050)-149(b)-206(,)-941(v)-165(t)-165(i)-165(t)-166(l)-165(e)-505(,)-927(i)-151(r)-152(e)-151(t)-478(,)-905(i)-130(u)-129(n)-130(i)-129(t)-435(,)-881(f)-107(i)-107(l)-107(e)-107(n)-107(a)-107(m)-107(e)-240(\051)]TJ 0 g 0 G -/F51 9.9626 Tf -254.343 -34.574 Td [(mode)]TJ 0 g 0 G -/F54 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.955 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F59 9.9626 Tf 199.497 0 Td [(OR)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F59 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F54 9.9626 Tf 109.837 0 Td [(,)]TJ/F59 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F54 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.956 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -34.573 Td [(request)]TJ +/F59 9.9626 Tf -86.284 -26.38 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -24.612 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -29.828 -19.493 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G -/F54 9.9626 Tf 164.384 -29.887 Td [(124)]TJ + 0 -19.493 Td [(b)]TJ 0 g 0 G +/F62 9.9626 Tf 11.068 0 Td [(Rigth)-250(hand)-250(side\050s\051.)]TJ 13.839 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(An)-190(array)-190(of)-190(type)-190(r)18(eal)-190(or)-190(complex,)-202(rank)-190(1)-190(or)-190(2,)-202(or)-190(an)-190(object)-190(of)-190(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 277.745 0 Td [(psb)]TJ ET - -endstream -endobj -1701 0 obj -<< -/Length 4718 ->> -stream -0 g 0 G -0 g 0 G -0 g 0 G +q +1 0 0 1 469.676 578.595 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F51 9.9626 Tf 99.895 706.129 Td [(dat)]TJ -0 g 0 G -/F54 9.9626 Tf 19.368 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(maximum)-250(operation.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-264(as:)-339(an)-264(integer)-264(or)-265(r)18(eal)-264(variable,)-268(which)-264(may)-264(be)-264(a)-265(scalar)74(,)-268(or)-264(a)-264(rank)]TJ 0 -11.955 Td [(1)-250(or)-250(2)-250(array)111(.)-560(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(request)]TJ -0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 31.023 0 Td [(dat)]TJ/F54 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.955 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ -0 g 0 G - -12.453 -19.926 Td [(2.)]TJ -0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 32.225 0 Td [(mode)]TJ/F54 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F59 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F54 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F59 9.9626 Tf 16.309 0 Td [(request)]TJ/F54 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F67 9.9626 Tf 472.814 578.396 Td [(T)]TJ ET q -1 0 0 1 124.802 429.167 cm -0 0 318.804 27.895 re f +1 0 0 1 478.672 578.595 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 446.403 Td [(call)]TJ -0 g 0 G - [-525(psb_max\050ctxt,dat,&)]TJ 23.537 -10.959 Td [(&)-525(mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [(ior)]TJ -0 g 0 G - [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0 g 0 G -/F54 9.9626 Tf -48.393 -32.179 Td [(3.)]TJ -0 g 0 G - [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F59 9.9626 Tf 216.877 0 Td [(dat)]TJ/F54 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F52 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F54 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.956 Td [(accessed)-250(between)-250(calls:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F67 9.9626 Tf 481.81 578.396 Td [(vect)]TJ ET q -1 0 0 1 124.802 318.582 cm -0 0 318.804 60.772 re f +1 0 0 1 503.359 578.595 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 368.694 Td [(call)]TJ +/F67 9.9626 Tf 506.497 578.396 Td [(type)]TJ 0 g 0 G - [-525(psb_max\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F62 9.9626 Tf 20.922 0 Td [(,)]TJ -351.808 -11.955 Td [(of)-250(type)-250(r)18(eal)-250(or)-250(complex;)-250(its)-250(contents)-250(will)-250(be)-250(written)-250(to)-250(disk.)]TJ 0 g 0 G - [(psb_collective_start_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F59 9.9626 Tf -24.906 -31.448 Td [(\002lename)]TJ 0 g 0 G - [(max_request\051)]TJ -23.537 -10.959 Td [(.......)]TJ -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ +/F62 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(written.)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F94 8.9664 Tf -37.659 -10.958 Td [(call)]TJ +/F59 9.9626 Tf -44.274 -31.448 Td [(vtitle)]TJ 0 g 0 G - [-525(psb_max\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F62 9.9626 Tf 28.772 0 Td [(Matrix)-250(title.)]TJ -3.865 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(A)-244(charachter)-243(variable)-244(holding)-244(a)-243(descriptive)-244(title)-244(for)-243(the)-244(vector)-244(to)-243(be)-244(writ-)]TJ 0 -11.955 Td [(ten)-250(to)-250(\002le.)-310(T)90(ype:)]TJ/F59 9.9626 Tf 70.763 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -108.95 -11.955 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F67 9.9626 Tf 298.533 0 Td [(-)]TJ/F62 9.9626 Tf 5.231 0 Td [(,)-387(in)]TJ -303.764 -11.956 Td [(which)-254(case)-253(the)-254(default)-254(input)-253(unit)-254(5)-254(\050i.e.)-321(standar)18(d)-253(input)-254(in)-254(Unix)-253(jar)18(gon\051)-254(is)]TJ 0 -11.955 Td [(used.)-310(Default:)]TJ/F67 9.9626 Tf 65.184 0 Td [(-)]TJ/F62 9.9626 Tf 5.231 0 Td [(.)]TJ 0 g 0 G - [(psb_collective_end_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F59 9.9626 Tf -95.322 -19.492 Td [(iunit)]TJ 0 g 0 G - [(max_request\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F62 9.9626 Tf 27.108 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.201 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F67 9.9626 Tf 287.757 0 Td [(-)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ 0 g 0 G +/F59 9.9626 Tf -317.894 -20.836 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 103.537 -234.421 Td [(125)]TJ 0 g 0 G -ET - -endstream -endobj -1708 0 obj -<< -/Length 5813 ->> -stream + 0 -19.492 Td [(iret)]TJ 0 g 0 G +/F62 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.434 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -20.836 Td [(Notes)]TJ/F62 9.9626 Tf 14.944 -11.955 Td [(If)-290(this)-289(function)-290(is)-290(call)1(ed)-290(on)-290(a)-289(vector)-290(v)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.12)-1000(psb)]TJ +0 g 0 G + [-290(on)-289(a)-290(distributed)-290(communicator)-289(only)]TJ -14.944 -11.955 Td [(the)-316(local)-317(part)-316(is)-316(written)-317(in)-316(output.)-509(T)92(o)-316(get)-317(a)-316(single)-316(MatrixMarket)-317(\002le)-316(with)-316(the)]TJ 0 -11.955 Td [(whole)-243(vect)1(or)-243(when)-243(appr)18(opriate,)-244(e.g.)-307(for)-243(debugging)-242(purposes,)-244(one)-243(could)]TJ/F60 9.9626 Tf 318.257 0 Td [(gather)]TJ/F62 9.9626 Tf -318.257 -11.955 Td [(the)-349(whole)-349(vector)-349(on)-349(a)-349(single)-349(rank)-349(and)-349(then)-349(writ)1(e)-349(it.)-607(Consider)-349(the)-349(following)]TJ 0 -11.956 Td [(example)-250(for)-250(a)]TJ/F60 9.9626 Tf 62.495 0 Td [(double)]TJ/F62 9.9626 Tf 28.692 0 Td [(pr)18(ecision)-250(vector)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 150.705 149.348 cm +0 0 343.711 82.69 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG BT -/F51 11.9552 Tf 207.803 706.129 Td [(min)-250(\227)-250(Global)-250(minimum)]TJ +/F102 8.9664 Tf 153.694 221.378 Td [(real)]TJ +0 g 0 G + [(\050psb_dpk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ 0 g 0 G -/F59 9.9626 Tf -57.098 -19.198 Td [(call)-525(psb_min\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F54 9.9626 Tf 14.944 -22.401 Td [(This)-328(subr)18(outine)-327(implements)-328(a)-328(minimum)-327(value)-328(r)18(eduction)-328(o)1(peration)-328(based)]TJ -14.944 -11.955 Td [(on)-250(the)-250(underlying)-250(communication)-250(library)111(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -20.288 Td [(T)90(ype:)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -20.409 Td [(On)-250(Entry)]TJ + [-525(vglobal\050:\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -21.918 Td [(call)]TJ 0 g 0 G + [-525(psb_gather\050vglobal,v,desc,info\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(if)]TJ 0 g 0 G - 0 -20.408 Td [(ctxt)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ + [-525(\050iam)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -20.409 Td [(dat)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(==)]TJ 0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(minimum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-264(as:)-339(an)-264(integer)-264(or)-265(r)18(eal)-264(variable,)-268(which)-264(may)-264(be)-264(a)-265(scalar)74(,)-268(or)-264(a)-264(rank)]TJ 0 -11.956 Td [(1)-250(or)-250(2)-250(array)111(.)-560(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -20.408 Td [(root)]TJ + [-525(psb_root_\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-221(to)-221(hold)-221(the)-222(\002nal)-221(value,)-227(or)]TJ/F83 10.3811 Tf 147.052 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)-221(to)-221(make)-222(it)-221(available)-221(on)-221(all)-221(pr)18(ocesses.)]TJ -153.592 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F83 10.3811 Tf 131.101 0 Td [(\000)]TJ/F54 9.9626 Tf 8.195 0 Td [(1)]TJ/F61 10.3811 Tf 7.873 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F61 10.3811 Tf 19.923 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ 0 g 0 G -/F51 9.9626 Tf -254.343 -32.364 Td [(mode)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.956 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F59 9.9626 Tf 199.497 0 Td [(OR)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F59 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F54 9.9626 Tf 109.837 0 Td [(,)]TJ/F59 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F54 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.955 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ + [-525(mm_array_write\050vglobal,vtitle,info,filename\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.958 Td [(end)-525(if)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -32.364 Td [(request)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(call)-525(deallocate)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.956 Td [(be)-250(pr)18(esent.)]TJ + [(\050vglobal,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -22.401 Td [(On)-250(Return)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(stat)]TJ 0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - 0 -20.408 Td [(dat)]TJ + [(info\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(minimum)-250(operation.)]TJ 5.54 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.781 0 Td [(.)]TJ +/F62 9.9626 Tf -2.989 -23.777 Td [(T)92(o)-250(simplify)-250(this)-250(pr)18(ocedur)18(e)-250(in)]TJ/F67 9.9626 Tf 129.513 0 Td [(C)]TJ/F62 9.9626 Tf 5.23 0 Td [(,)-250(ther)18(e)-250(is)-250(a)-250(utility)-250(function)]TJ 0 g 0 G - 79.264 -29.887 Td [(126)]TJ + 29.64 -41.41 Td [(148)]TJ 0 g 0 G ET endstream endobj -1712 0 obj +1953 0 obj << -/Length 4437 +/Length 655 >> stream 0 g 0 G 0 g 0 G -BT -/F54 9.9626 Tf 124.802 706.129 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-264(as:)-339(an)-264(integer)-264(or)-265(r)18(eal)-264(variable,)-268(which)-264(may)-264(be)-264(a)-265(scalar)74(,)-268(or)-264(a)-264(rank)]TJ 0 -11.956 Td [(1)-250(or)-250(2)-250(array)111(.)]TJ 0 -11.955 Td [(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(request)]TJ -0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 31.023 0 Td [(dat)]TJ/F54 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.955 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ -0 g 0 G - -12.453 -19.926 Td [(2.)]TJ -0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 32.225 0 Td [(mode)]TJ/F54 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F59 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F54 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F59 9.9626 Tf 16.309 0 Td [(request)]TJ/F54 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -ET q -1 0 0 1 124.802 441.123 cm -0 0 318.804 27.895 re f +1 0 0 1 99.895 695.17 cm +0 0 343.711 16.936 re f Q 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 458.358 Td [(call)]TJ +/F102 8.9664 Tf 102.884 701.446 Td [(psb_i_t)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_min\050ctxt,dat,&)]TJ 23.537 -10.959 Td [(&)-525(mode)]TJ + [-525(psb_c_)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [(<)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [(ior)]TJ + [(s,d,c,z)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(>)]TJ 0 g 0 G - [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ + [(global_vec_write\050vh,cdh\051;)]TJ 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G +/F62 9.9626 Tf -2.989 -24.209 Td [(that)-250(pr)18(oduces)-250(exactly)-250(this)-250(r)18(esult.)]TJ 0 g 0 G -/F54 9.9626 Tf -48.393 -32.179 Td [(3.)]TJ + 164.384 -586.799 Td [(149)]TJ 0 g 0 G - [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F59 9.9626 Tf 216.877 0 Td [(dat)]TJ/F54 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F52 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F54 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET -q -1 0 0 1 124.802 330.537 cm -0 0 318.804 60.772 re f -Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -BT -/F94 8.9664 Tf 137.205 380.649 Td [(call)]TJ -0 g 0 G - [-525(psb_min\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(psb_collective_start_,&)]TJ 23.537 -10.958 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + +endstream +endobj +1957 0 obj +<< +/Length 1109 +>> +stream 0 g 0 G - [(min_request\051)]TJ -23.537 -10.959 Td [(.......)]TJ -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F94 8.9664 Tf -37.659 -10.959 Td [(call)]TJ +BT +/F59 14.3462 Tf 150.705 705.784 Td [(10)-1000(Preconditioner)-250(routines)]TJ/F62 9.9626 Tf 0 -22.702 Td [(The)-228(base)-227(PSBLAS)-228(library)-227(contains)-228(the)-227(implementation)-228(of)-227(two)-228(simple)-227(pr)18(econdi-)]TJ 0 -11.955 Td [(tioning)-250(techniques:)]TJ 0 g 0 G - [-525(psb_min\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + 13.888 -19.925 Td [(\225)]TJ 0 g 0 G - [(psb_collective_end_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [-500(Diagonal)-250(Scaling)]TJ 0 g 0 G - [(min_request\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG + 0 -19.926 Td [(\225)]TJ 0 g 0 G + [-500(Block)-250(Jacobi)-250(with)-250(ILU\0500\051)-250(factorization)]TJ -13.888 -19.925 Td [(The)-356(supporting)-356(data)-356(type)-356(and)-356(subr)18(outine)-356(interfaces)-356(ar)18(e)-356(de\002ned)-356(in)-356(the)-356(mod-)]TJ 0 -11.955 Td [(ule)]TJ/F67 9.9626 Tf 16.301 0 Td [(psb_prec_mod)]TJ/F62 9.9626 Tf 62.764 0 Td [(.)-350(The)-263(old)-263(interfaces)]TJ/F67 9.9626 Tf 87.314 0 Td [(psb_precinit)]TJ/F62 9.9626 Tf 65.386 0 Td [(and)]TJ/F67 9.9626 Tf 19.489 0 Td [(psb_precbld)]TJ/F62 9.9626 Tf 60.156 0 Td [(ar)18(e)-263(still)]TJ -311.41 -11.955 Td [(supported)-250(for)-250(backwar)18(d)-250(compatibility)]TJ 0 g 0 G -/F54 9.9626 Tf 103.537 -246.376 Td [(127)]TJ + 164.383 -497.003 Td [(150)]TJ 0 g 0 G ET endstream endobj -1720 0 obj +1963 0 obj << -/Length 5616 +/Length 5016 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.13)-1000(psb)]TJ -ET -q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 207.803 706.129 Td [(amx)-250(\227)-250(Global)-250(maximum)-250(absolute)-250(value)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(10.1)-1000(init)-250(\227)-250(Initialize)-250(a)-250(preconditioner)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -57.098 -20.269 Td [(call)-525(psb_amx\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F54 9.9626 Tf 14.944 -24.611 Td [(This)-342(subr)18(outine)-342(implements)-342(a)-342(maximum)-341(absolute)-342(value)-342(r)18(eduction)-342(opera-)]TJ -14.944 -11.955 Td [(tion)-250(based)-250(on)-250(the)-250(underlying)-250(communication)-250(library)111(.)]TJ +/F67 9.9626 Tf 0 -18.964 Td [(call)-525(prec%init\050icontxt,ptype,)-525(info\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.945 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -22.619 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -22.619 Td [(ctxt)]TJ + 0 -19.925 Td [(icontxt)]TJ +0 g 0 G +/F62 9.9626 Tf 35.965 0 Td [(the)-250(communication)-250(context.)]TJ -11.058 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(ptype)]TJ +0 g 0 G +/F62 9.9626 Tf 30.994 0 Td [(the)-250(type)-250(of)-250(pr)18(econditioner)74(.)-310(Scope:)]TJ/F59 9.9626 Tf 151.121 0 Td [(global)]TJ/F62 9.9626 Tf -157.208 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(character)-250(string,)-250(see)-250(usage)-250(notes.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(On)-250(Exit)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(prec)]TJ +0 g 0 G +/F62 9.9626 Tf 24.349 0 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -30.874 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 196.511 0 Td [(psb)]TJ +ET +q +1 0 0 1 337.631 446.268 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 340.77 446.069 Td [(prec)]TJ +ET +q +1 0 0 1 362.319 446.268 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 365.457 446.069 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +/F59 9.9626 Tf -286.483 -19.925 Td [(info)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.619 Td [(dat)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -30.326 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(Err)18(or)-250(code:)-310(if)-250(no)-250(err)18(or)74(,)-250(0)-250(is)-250(r)18(eturned.)]TJ/F59 11.9552 Tf -24.907 -21.917 Td [(Notes)]TJ/F62 9.9626 Tf 34.311 0 Td [(Legal)-245(inputs)-244(to)-245(this)-245(subr)18(outine)-245(ar)18(e)-244(interpr)18(eted)-245(depending)-245(on)-244(the)]TJ/F60 9.9626 Tf 285.595 0 Td [(p)-25(t)-25(y)-80(p)-25(e)]TJ/F62 9.9626 Tf -319.906 -11.956 Td [(string)-250(as)-250(follows)]TJ +0 0 1 rg 0 0 1 RG +/F62 7.5716 Tf 72.358 3.617 Td [(4)]TJ 0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(maximum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-269(may)-270(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-300(a)-300(rank)-300(1)-300(or)-301(2)-300(array)111(.)-760(T)90(ype,)-313(kind,)-312(rank)-300(and)-301(size)-300(must)-300(agr)18(ee)-300(on)-300(all)-300(pr)18(o-)]TJ 0 -11.955 Td [(cesses.)]TJ +/F62 9.9626 Tf 4.284 -3.617 Td [(:)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.619 Td [(root)]TJ +/F59 9.9626 Tf -76.642 -19.925 Td [(NONE)]TJ 0 g 0 G -/F54 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-221(to)-221(hold)-221(the)-222(\002nal)-221(value,)-227(or)]TJ/F83 10.3811 Tf 147.052 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)-221(to)-221(make)-222(it)-221(available)-221(on)-221(all)-221(pr)18(ocesses.)]TJ -153.592 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F83 10.3811 Tf 131.101 0 Td [(\000)]TJ/F54 9.9626 Tf 8.195 0 Td [(1)]TJ/F61 10.3811 Tf 7.873 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F61 10.3811 Tf 19.923 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ +/F62 9.9626 Tf 35.965 0 Td [(No)-250(pr)18(econditioning,)-250(i.e.)-310(the)-250(pr)18(econditioner)-250(is)-250(just)-250(a)-250(copy)-250(operator)74(.)]TJ 0 g 0 G -/F51 9.9626 Tf -254.343 -34.574 Td [(mode)]TJ +/F59 9.9626 Tf -35.965 -19.925 Td [(DIAG)]TJ 0 g 0 G -/F54 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.955 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F59 9.9626 Tf 199.497 0 Td [(OR)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F59 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F54 9.9626 Tf 109.837 0 Td [(,)]TJ/F59 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F54 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.956 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ +/F62 9.9626 Tf 33.205 0 Td [(Diagonal)-371(scaling;)-432(each)-371(entry)-372(of)-371(the)-371(input)-371(vector)-372(is)-371(multiplied)-371(by)-371(the)]TJ -8.298 -11.955 Td [(r)18(ecipr)18(ocal)-266(of)-267(the)-266(sum)-267(of)-266(the)-266(absolute)-267(values)-266(of)-267(the)-266(coef)18(\002cients)-266(in)-267(the)-266(cor)18(-)]TJ 0 -11.955 Td [(r)18(esponding)-250(r)18(ow)-250(of)-250(matrix)]TJ/F60 9.9626 Tf 116.148 0 Td [(A)]TJ/F62 9.9626 Tf 7.318 0 Td [(;)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -34.573 Td [(request)]TJ +/F59 9.9626 Tf -148.373 -19.926 Td [(BJAC)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ +/F62 9.9626 Tf 30.446 0 Td [(Pr)18(econdition)-211(by)-212(a)-211(factorization)-212(of)-211(the)-212(block-diagonal)-211(of)-212(matrix)]TJ/F60 9.9626 Tf 273.867 0 Td [(A)]TJ/F62 9.9626 Tf 7.317 0 Td [(,)-219(wher)18(e)]TJ -286.723 -11.955 Td [(block)-347(boundaries)-348(ar)18(e)-347(determined)-347(by)-348(the)-347(data)-347(allocation)-348(boundaries)-347(for)]TJ 0 -11.955 Td [(each)-223(pr)18(ocess;)-232(r)18(equir)18(es)-222(no)-223(communication.)-301(Only)-223(the)-222(incomplete)-223(factoriza-)]TJ 0 -11.955 Td [(tion)]TJ/F60 9.9626 Tf 20.498 0 Td [(I)-96(L)-9(U)]TJ/F93 10.3811 Tf 18.202 0 Td [(\050)]TJ/F62 9.9626 Tf 4.149 0 Td [(0)]TJ/F93 10.3811 Tf 5.106 0 Td [(\051)]TJ/F62 9.9626 Tf 6.64 0 Td [(is)-250(curr)18(ently)-250(implemented.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -24.612 Td [(On)-250(Return)]TJ +ET +q +1 0 0 1 99.895 130.181 cm +[]0 d 0 J 0.398 w 0 0 m 137.482 0 l S +Q +BT +/F62 5.9776 Tf 110.755 123.219 Td [(4)]TJ/F62 7.9701 Tf 3.487 -2.893 Td [(The)-250(string)-250(is)-250(case-insensitive)]TJ 0 g 0 G 0 g 0 G -/F54 9.9626 Tf 164.384 -29.887 Td [(128)]TJ +/F62 9.9626 Tf 150.037 -29.888 Td [(151)]TJ 0 g 0 G ET endstream endobj -1724 0 obj +1973 0 obj << -/Length 4760 +/Length 7572 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(dat)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(10.2)-1000(build)-250(\227)-250(Builds)-250(a)-250(preconditioner)]TJ 0 g 0 G -/F54 9.9626 Tf 19.368 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(maximum)-250(operation.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-270(may)-269(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-300(a)-300(rank)-300(1)-300(or)-301(2)-300(array)111(.)-760(T)90(ype,)-313(kind,)-312(rank)-300(and)-301(size)-300(must)-300(agr)18(ee)-300(on)-300(all)-300(pr)18(o-)]TJ 0 -11.955 Td [(cesses.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(request)]TJ +/F67 9.9626 Tf 0 -20.364 Td [(call)-525(prec%build\050a,)-525(desc_a,)-525(info[,amold,vmold,imold]\051)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +/F59 9.9626 Tf 0 -24.086 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 31.023 0 Td [(dat)]TJ/F54 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.956 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ +/F59 9.9626 Tf -29.828 -22.815 Td [(On)-250(Entry)]TJ 0 g 0 G - -12.453 -19.925 Td [(2.)]TJ 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 32.225 0 Td [(mode)]TJ/F54 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F59 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F54 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F59 9.9626 Tf 16.309 0 Td [(request)]TJ/F54 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG + 0 -22.816 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(system)-250(sparse)-250(matrix.)-310(Scope:)]TJ/F59 9.9626 Tf 146.229 0 Td [(local)]TJ/F62 9.9626 Tf -131.285 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(,)-250(tar)18(get.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(sparse)-250(matrix)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 190.872 0 Td [(psb)]TJ ET q -1 0 0 1 124.802 417.212 cm -0 0 318.804 27.895 re f +1 0 0 1 382.802 580.382 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 385.94 580.183 Td [(Tspmat)]TJ +ET +q +1 0 0 1 417.95 580.382 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 434.448 Td [(call)]TJ +/F67 9.9626 Tf 421.088 580.183 Td [(type)]TJ 0 g 0 G - [-525(psb_amx\050ctxt,dat,&)]TJ 23.537 -10.959 Td [(&)-525(mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [(ior)]TJ +/F59 9.9626 Tf -291.305 -22.815 Td [(prec)]TJ 0 g 0 G - [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F62 9.9626 Tf 24.348 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-190(as:)-280(an)-190(alr)18(eady)-190(initialized)-190(pr)18(econdtioner)-190(data)-190(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 277.288 0 Td [(psb)]TJ +ET +q +1 0 0 1 469.217 509.746 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 472.355 509.547 Td [(prec)]TJ +ET +q +1 0 0 1 493.904 509.746 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 497.043 509.547 Td [(type)]TJ 0 g 0 G 0 g 0 G -/F54 9.9626 Tf -48.393 -32.18 Td [(3.)]TJ +/F59 9.9626 Tf -346.338 -34.771 Td [(desc)]TJ +ET +q +1 0 0 1 171.218 474.975 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 174.207 474.776 Td [(a)]TJ 0 g 0 G - [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F59 9.9626 Tf 216.877 0 Td [(dat)]TJ/F54 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F52 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F54 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F62 9.9626 Tf 9.962 0 Td [(the)-250(pr)18(oblem)-250(communication)-250(descriptor)74(.)-310(Scope:)]TJ/F59 9.9626 Tf 208.625 0 Td [(local)]TJ/F62 9.9626 Tf -217.183 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(,)-250(tar)18(get.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(communication)-250(descriptor)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 247.649 0 Td [(psb)]TJ ET q -1 0 0 1 124.802 306.627 cm -0 0 318.804 60.772 re f +1 0 0 1 439.579 439.11 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +BT +/F67 9.9626 Tf 442.717 438.911 Td [(desc)]TJ +ET +q +1 0 0 1 464.266 439.11 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 467.404 438.911 Td [(type)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -337.62 -22.816 Td [(amold)]TJ +0 g 0 G +/F62 9.9626 Tf 33.763 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(matrix)-250(storage.)]TJ -8.857 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(a)-250(class)-250(derived)-250(fr)18(om)]TJ/F67 9.9626 Tf 201.393 0 Td [(psb)]TJ +ET +q +1 0 0 1 393.323 368.474 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 396.461 368.275 Td [(T)]TJ +ET +q +1 0 0 1 402.319 368.474 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 405.457 368.275 Td [(base)]TJ +ET +q +1 0 0 1 427.006 368.474 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 430.144 368.275 Td [(sparse)]TJ +ET +q +1 0 0 1 462.154 368.474 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F94 8.9664 Tf 127.791 356.739 Td [(call)]TJ +/F67 9.9626 Tf 465.292 368.275 Td [(mat)]TJ/F62 9.9626 Tf 15.691 0 Td [(.)]TJ 0 g 0 G - [-525(psb_amx\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F59 9.9626 Tf -330.278 -22.816 Td [(vmold)]TJ 0 g 0 G - [(psb_collective_start_,&)]TJ 23.536 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F62 9.9626 Tf 34.321 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(vector)-250(storage.)]TJ -9.415 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(a)-250(class)-250(derived)-250(fr)18(om)]TJ/F67 9.9626 Tf 201.393 0 Td [(psb)]TJ +ET +q +1 0 0 1 393.323 297.838 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 396.461 297.638 Td [(T)]TJ +ET +q +1 0 0 1 402.319 297.838 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 405.457 297.638 Td [(base)]TJ +ET +q +1 0 0 1 427.006 297.838 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 430.144 297.638 Td [(vect)]TJ +ET +q +1 0 0 1 451.693 297.838 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 454.832 297.638 Td [(type)]TJ/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - [(amx_request\051)]TJ -14.122 -10.959 Td [(.......)]TJ -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ +/F59 9.9626 Tf -325.048 -22.815 Td [(imold)]TJ +0 g 0 G +/F62 9.9626 Tf 32.099 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(integer)-250(vector)-250(storage.)]TJ -7.193 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-190(as:)-280(an)-190(object)-190(of)-190(a)-190(class)-190(derived)-190(fr)18(om)-190(\050integer\051)]TJ/F67 9.9626 Tf 235.804 0 Td [(psb)]TJ +ET +q +1 0 0 1 427.733 227.202 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 430.872 227.002 Td [(T)]TJ +ET +q +1 0 0 1 436.73 227.202 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 439.868 227.002 Td [(base)]TJ +ET +q +1 0 0 1 461.417 227.202 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 464.555 227.002 Td [(vect)]TJ +ET +q +1 0 0 1 486.104 227.202 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 489.242 227.002 Td [(type)]TJ/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F94 8.9664 Tf -37.659 -10.959 Td [(call)]TJ +/F59 9.9626 Tf -359.459 -24.085 Td [(On)-250(Return)]TJ 0 g 0 G - [-525(psb_amx\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ 0 g 0 G - [(psb_collective_end_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + 0 -22.816 Td [(prec)]TJ 0 g 0 G - [(amx_request\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F62 9.9626 Tf 24.348 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econdtioner)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 193.612 0 Td [(psb)]TJ +ET +q +1 0 0 1 385.542 132.48 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 388.68 132.281 Td [(prec)]TJ +ET +q +1 0 0 1 410.229 132.48 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 413.367 132.281 Td [(type)]TJ 0 g 0 G 0 g 0 G -/F54 9.9626 Tf 103.537 -222.465 Td [(129)]TJ +/F62 9.9626 Tf -98.279 -41.843 Td [(152)]TJ 0 g 0 G ET endstream endobj -1731 0 obj +1977 0 obj << -/Length 5619 +/Length 1021 >> stream 0 g 0 G 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.14)-1000(psb)]TJ -ET -q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 207.803 706.129 Td [(amn)-250(\227)-250(Global)-250(minimum)-250(absolute)-250(value)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -57.098 -20.269 Td [(call)-525(psb_amn\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F54 9.9626 Tf 14.944 -24.611 Td [(This)-360(subr)18(outine)-360(impl)1(ements)-360(a)-360(minimum)-360(absolute)-360(value)-359(r)18(eduction)-360(opera-)]TJ -14.944 -11.955 Td [(tion)-250(based)-250(on)-250(the)-250(underlying)-250(communication)-250(library)111(.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.945 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -22.619 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -22.619 Td [(ctxt)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -22.619 Td [(dat)]TJ -0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(minimum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-269(may)-270(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-300(a)-300(rank)-300(1)-300(or)-301(2)-300(array)111(.)-760(T)90(ype,)-313(kind,)-312(rank)-300(and)-301(size)-300(must)-300(agr)18(ee)-300(on)-300(all)-300(pr)18(o-)]TJ 0 -11.955 Td [(cesses.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -22.619 Td [(root)]TJ -0 g 0 G -/F54 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-221(to)-221(hold)-221(the)-222(\002nal)-221(value,)-227(or)]TJ/F83 10.3811 Tf 147.052 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)-221(to)-221(make)-222(it)-221(available)-221(on)-221(all)-221(pr)18(ocesses.)]TJ -153.592 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F83 10.3811 Tf 131.101 0 Td [(\000)]TJ/F54 9.9626 Tf 8.195 0 Td [(1)]TJ/F61 10.3811 Tf 7.873 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F61 10.3811 Tf 19.923 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ -0 g 0 G -/F51 9.9626 Tf -254.343 -34.574 Td [(mode)]TJ -0 g 0 G -/F54 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.955 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F59 9.9626 Tf 199.497 0 Td [(OR)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F59 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F54 9.9626 Tf 109.837 0 Td [(,)]TJ/F59 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F54 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.956 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -34.573 Td [(request)]TJ -0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -24.612 Td [(On)-250(Return)]TJ +BT +/F59 9.9626 Tf 99.895 706.129 Td [(info)]TJ 0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ -24.907 -21.918 Td [(The)]TJ/F67 9.9626 Tf 20.388 0 Td [(amold)]TJ/F62 9.9626 Tf 26.152 0 Td [(,)]TJ/F67 9.9626 Tf 6.506 0 Td [(vmold)]TJ/F62 9.9626 Tf 29.862 0 Td [(and)]TJ/F67 9.9626 Tf 20.577 0 Td [(imold)]TJ/F62 9.9626 Tf 29.862 0 Td [(ar)18(guments)-372(may)-373(be)-372(employed)-373(to)-372(interface)-372(with)]TJ -133.347 -11.955 Td [(special)-250(devices,)-250(such)-250(as)-250(GPUs)-250(and)-250(other)-250(accelerators.)]TJ 0 g 0 G -/F54 9.9626 Tf 164.384 -29.887 Td [(130)]TJ + 164.384 -533.997 Td [(153)]TJ 0 g 0 G ET endstream endobj -1735 0 obj +1985 0 obj << -/Length 4753 +/Length 5673 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(dat)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(10.3)-1000(apply)-250(\227)-250(Preconditioner)-250(application)-250(routine)]TJ 0 g 0 G -/F54 9.9626 Tf 19.368 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(minimum)-250(operation.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-270(may)-269(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-250(a)-250(rank)-250(1)-250(or)-250(2)-250(array)111(.)]TJ 0 -11.955 Td [(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(request)]TJ +/F67 9.9626 Tf 0 -18.964 Td [(call)-525(prec%apply\050x,y,desc_a,info,trans,work\051)]TJ 0 -11.955 Td [(call)-525(prec%apply\050x,desc_a,info,trans\051)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 31.023 0 Td [(dat)]TJ/F54 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.956 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - -12.453 -19.925 Td [(2.)]TJ 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 32.225 0 Td [(mode)]TJ/F54 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F59 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F54 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F59 9.9626 Tf 16.309 0 Td [(request)]TJ/F54 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG + 0 -19.925 Td [(prec)]TJ +0 g 0 G +/F62 9.9626 Tf 24.348 0 Td [(the)-250(pr)18(econditioner)74(.)-310(Scope:)]TJ/F59 9.9626 Tf 117.837 0 Td [(local)]TJ/F62 9.9626 Tf -117.279 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 196.511 0 Td [(psb)]TJ ET q -1 0 0 1 124.802 417.212 cm -0 0 318.804 27.895 re f +1 0 0 1 388.441 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 434.448 Td [(call)]TJ -0 g 0 G - [-525(psb_amn\050ctxt,dat,&)]TJ 23.537 -10.959 Td [(&)-525(mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [(ior)]TJ -0 g 0 G - [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0 g 0 G -/F54 9.9626 Tf -48.393 -32.18 Td [(3.)]TJ -0 g 0 G - [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F59 9.9626 Tf 216.877 0 Td [(dat)]TJ/F54 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F52 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F54 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F67 9.9626 Tf 391.579 577.576 Td [(prec)]TJ ET q -1 0 0 1 124.802 306.627 cm -0 0 318.804 60.772 re f +1 0 0 1 413.128 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 356.739 Td [(call)]TJ -0 g 0 G - [-525(psb_amn\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(psb_collective_start_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(amn_request\051)]TJ -23.537 -10.959 Td [(.......)]TJ -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F94 8.9664 Tf -37.659 -10.959 Td [(call)]TJ -0 g 0 G - [-525(psb_amn\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(psb_collective_end_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(amn_request\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F67 9.9626 Tf 416.266 577.576 Td [(type)]TJ 0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 103.537 -222.465 Td [(131)]TJ +/F59 9.9626 Tf -286.483 -19.925 Td [(x)]TJ 0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(the)-250(sour)18(ce)-250(vector)74(.)-310(Scope:)]TJ/F59 9.9626 Tf 111.142 0 Td [(local)]TJ/F62 9.9626 Tf -96.198 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 218.688 0 Td [(psb)]TJ ET - -endstream -endobj -1742 0 obj -<< -/Length 5776 ->> -stream -0 g 0 G -0 g 0 G +q +1 0 0 1 410.618 521.985 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.15)-1000(psb)]TJ +/F67 9.9626 Tf 413.756 521.785 Td [(T)]TJ ET q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 419.614 521.985 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(nrm2)-250(\227)-250(Global)-250(2-norm)-250(reduction)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -57.098 -19.198 Td [(call)-525(psb_nrm2\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F54 9.9626 Tf 14.944 -22.401 Td [(This)-297(subr)18(outine)-296(implements)-297(a)-297(2-norm)-296(value)-297(r)18(eduction)-297(operation)-296(based)-297(on)]TJ -14.944 -11.955 Td [(the)-250(underlying)-250(communication)-250(library)111(.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -20.288 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.409 Td [(On)-250(Entry)]TJ +/F67 9.9626 Tf 422.752 521.785 Td [(vect)]TJ +ET +q +1 0 0 1 444.301 521.985 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 447.439 521.785 Td [(type)]TJ 0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G - 0 -20.408 Td [(ctxt)]TJ +/F59 9.9626 Tf -317.656 -19.925 Td [(desc)]TJ +ET +q +1 0 0 1 171.218 502.059 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 174.207 501.86 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +/F62 9.9626 Tf 9.962 0 Td [(the)-250(pr)18(oblem)-250(communication)-250(descriptor)74(.)-310(Scope:)]TJ/F59 9.9626 Tf 208.625 0 Td [(local)]TJ/F62 9.9626 Tf -217.183 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(communication)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 200.207 0 Td [(psb)]TJ +ET +q +1 0 0 1 392.137 466.194 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 395.275 465.994 Td [(desc)]TJ +ET +q +1 0 0 1 416.824 466.194 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 419.963 465.994 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.409 Td [(dat)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(minimum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-370(as:)-551(a)-371(r)18(eal)-370(variable,)-401(which)-370(may)-371(be)-370(a)-371(scalar)74(,)-400(or)-371(a)-370(rank)-371(1)-370(array)111(.)]TJ 0 -11.956 Td [(Kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ +/F59 9.9626 Tf -290.179 -19.925 Td [(trans)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.408 Td [(root)]TJ +/F62 9.9626 Tf 27.666 0 Td [(Scope:)]TJ -2.76 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(character)74(.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-221(to)-221(hold)-221(the)-222(\002nal)-221(value,)-227(or)]TJ/F83 10.3811 Tf 147.052 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)-221(to)-221(make)-222(it)-221(available)-221(on)-221(all)-221(pr)18(ocesses.)]TJ -153.592 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F83 10.3811 Tf 131.101 0 Td [(\000)]TJ/F54 9.9626 Tf 8.195 0 Td [(1)]TJ/F61 10.3811 Tf 7.873 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F61 10.3811 Tf 19.923 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ +/F59 9.9626 Tf -24.906 -19.926 Td [(work)]TJ 0 g 0 G -/F51 9.9626 Tf -254.343 -32.364 Td [(mode)]TJ +/F62 9.9626 Tf 28.782 0 Td [(an)-250(optional)-250(work)-250(space)-250(Scope:)]TJ/F59 9.9626 Tf 136.476 0 Td [(local)]TJ/F62 9.9626 Tf -140.352 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(double)-250(pr)18(ecision)-250(array)111(.)]TJ 0 g 0 G -/F54 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.956 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F59 9.9626 Tf 199.497 0 Td [(OR)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F59 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F54 9.9626 Tf 109.837 0 Td [(,)]TJ/F59 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F54 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.955 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ +/F59 9.9626 Tf -24.906 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -32.364 Td [(request)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.956 Td [(be)-250(pr)18(esent.)]TJ + 0 -19.925 Td [(y)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.401 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 10.52 0 Td [(the)-250(destination)-250(vector)74(.)-310(Scope:)]TJ/F59 9.9626 Tf 131.914 0 Td [(local)]TJ/F62 9.9626 Tf -117.528 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 218.688 0 Td [(psb)]TJ +ET +q +1 0 0 1 410.618 276.904 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 413.756 276.704 Td [(T)]TJ +ET +q +1 0 0 1 419.614 276.904 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 422.752 276.704 Td [(vect)]TJ +ET +q +1 0 0 1 444.301 276.904 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 447.439 276.704 Td [(type)]TJ 0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G - 0 -20.408 Td [(dat)]TJ +/F59 9.9626 Tf -317.656 -19.925 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(2-norm)-250(r)18(eduction.)]TJ 5.54 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.781 0 Td [(.)]TJ +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 79.264 -29.887 Td [(132)]TJ + 139.477 -118.52 Td [(154)]TJ 0 g 0 G ET endstream endobj -1746 0 obj +1990 0 obj << -/Length 5783 +/Length 3387 >> stream 0 g 0 G 0 g 0 G BT -/F54 9.9626 Tf 124.802 706.129 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(r)18(eal)-250(variable,)-250(which)-250(may)-250(be)-250(a)-250(scalar)74(,)-250(or)-250(a)-250(rank)-250(1)-250(array)111(.)]TJ 0 -11.956 Td [(Kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(request)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(10.4)-1000(descr)-250(\227)-250(Prints)-250(a)-250(description)-250(of)-250(current)-250(preconditioner)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.956 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +/F67 9.9626 Tf 0 -18.964 Td [(call)-525(prec%descr\050info\051)]TJ 0 -11.955 Td [(call)-525(prec%descr\050info,iout,)-525(root\051)]TJ 0 g 0 G - [-500(This)-345(r)18(eduction)-345(is)-346(appr)18(opriate)-345(to)-345(compute)-345(the)-345(r)18(esults)-346(of)-345(multiple)-345(\050local\051)]TJ 12.453 -11.955 Td [(NRM2)-250(operations)-250(at)-250(the)-250(same)-250(time.)]TJ -0 g 0 G - -12.453 -19.925 Td [(2.)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G - [-500(Denoting)-249(by)]TJ/F52 9.9626 Tf 69.789 0 Td [(d)-40(a)-25(t)]TJ/F52 7.5716 Tf 13.536 -1.96 Td [(i)]TJ/F54 9.9626 Tf 5.23 1.96 Td [(the)-249(value)-249(of)-248(the)-249(variable)]TJ/F52 9.9626 Tf 108.808 0 Td [(d)-40(a)-25(t)]TJ/F54 9.9626 Tf 15.973 0 Td [(on)-249(pr)18(ocess)]TJ/F52 9.9626 Tf 49.078 0 Td [(i)]TJ/F54 9.9626 Tf 2.964 0 Td [(,)-249(the)-249(output)]TJ/F52 9.9626 Tf 53.71 0 Td [(r)-17(e)-25(s)]TJ/F54 9.9626 Tf -306.635 -11.955 Td [(is)-250(equivalent)-250(to)-250(the)-250(computation)-250(of)]TJ/F52 9.9626 Tf 124.796 -25.468 Td [(r)-17(e)-25(s)]TJ/F85 10.3811 Tf 15.061 0 Td [(=)]TJ/F1 9.9626 Tf 11.086 10.922 Td [(r)]TJ -ET -q -1 0 0 1 285.832 490.532 cm -[]0 d 0 J 0.389 w 0 0 m 30.512 0 l S -Q -BT -/F96 13.9477 Tf 285.957 477.344 Td [(\345)]TJ/F52 7.5716 Tf 4.245 -8.764 Td [(i)]TJ/F52 9.9626 Tf 8.364 10.836 Td [(d)-40(a)-25(t)]TJ/F54 7.5716 Tf 13.495 3.473 Td [(2)]TJ/F52 7.5716 Tf 0.041 -7.027 Td [(i)]TJ/F54 9.9626 Tf 4.243 3.554 Td [(,)]TJ -191.543 -30.806 Td [(with)-250(car)18(e)-250(taken)-250(to)-250(avoid)-250(unnecessary)-250(over\003ow)92(.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - -12.453 -19.926 Td [(3.)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 31.023 0 Td [(dat)]TJ/F54 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.955 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ 0 g 0 G - -12.453 -19.925 Td [(4.)]TJ + 0 -19.925 Td [(prec)]TJ 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 32.225 0 Td [(mode)]TJ/F54 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F59 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F54 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F59 9.9626 Tf 16.309 0 Td [(request)]TJ/F54 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F62 9.9626 Tf 24.349 0 Td [(the)-250(pr)18(econditioner)74(.)-310(Scope:)]TJ/F59 9.9626 Tf 117.836 0 Td [(local)]TJ/F62 9.9626 Tf -117.278 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 196.511 0 Td [(psb)]TJ ET q -1 0 0 1 124.802 333.043 cm -0 0 318.804 27.895 re f +1 0 0 1 337.631 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 350.279 Td [(call)]TJ -0 g 0 G - [-525(psb_nrm2\050ctxt,dat,&)]TJ 23.537 -10.959 Td [(&)-525(mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [(ior)]TJ -0 g 0 G - [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0 g 0 G -/F54 9.9626 Tf -48.393 -32.18 Td [(5.)]TJ -0 g 0 G - [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F59 9.9626 Tf 216.877 0 Td [(dat)]TJ/F54 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F52 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F54 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F67 9.9626 Tf 340.77 577.576 Td [(prec)]TJ ET q -1 0 0 1 124.802 222.458 cm -0 0 318.804 60.772 re f +1 0 0 1 362.319 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 127.791 272.57 Td [(call)]TJ +/F67 9.9626 Tf 365.457 577.576 Td [(type)]TJ 0 g 0 G - [-525(psb_nrm2\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - [(psb_collective_start_,&)]TJ 23.536 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F59 9.9626 Tf -286.483 -19.925 Td [(iout)]TJ 0 g 0 G - [(nrm2_request\051)]TJ -14.122 -10.959 Td [(.......)]TJ -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ +/F62 9.9626 Tf 23.243 0 Td [(output)-250(unit.)-310(Scope:)]TJ/F59 9.9626 Tf 87.391 0 Td [(local)]TJ/F62 9.9626 Tf -85.727 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(number)74(.)-310(Default:)-310(default)-250(output)-250(unit.)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F94 8.9664 Tf -37.659 -10.959 Td [(call)]TJ +/F59 9.9626 Tf -24.907 -19.925 Td [(root)]TJ 0 g 0 G - [-525(psb_nrm2\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F62 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-250(fr)18(om)-250(which)-250(to)-250(print)-250(Scope:)]TJ/F59 9.9626 Tf 155.834 0 Td [(local)]TJ/F62 9.9626 Tf -154.18 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-387(as:)-585(an)-387(integer)-387(number)-388(between)-387(0)-387(and)]TJ/F60 9.9626 Tf 220.442 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 14.01 0 Td [(\000)]TJ/F62 9.9626 Tf 10.638 0 Td [(1,)-422(in)-387(which)-387(case)]TJ -245.09 -11.955 Td [(the)-314(speci\002e)1(d)-314(pr)18(ocess)-314(will)-313(print)-314(the)-313(description,)-330(or)]TJ/F91 10.3811 Tf 225.38 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1,)-329(in)-314(which)-314(case)-313(all)]TJ -233.574 -11.955 Td [(pr)18(ocesses)-250(will)-250(print.)-310(Default:)-310(0.)]TJ 0 g 0 G - [(psb_collective_end_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F59 9.9626 Tf -24.907 -19.925 Td [(On)-250(Return)]TJ 0 g 0 G - [(nrm2_request\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G + 0 -19.925 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G -/F54 9.9626 Tf 103.537 -138.296 Td [(133)]TJ + 139.477 -263.975 Td [(155)]TJ 0 g 0 G ET endstream -endobj -1757 0 obj -<< -/Length 5352 ->> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.16)-1000(psb)]TJ -ET -q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 207.803 706.129 Td [(snd)-250(\227)-250(Send)-250(data)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_snd\050ctxt,)-525(dat,)-525(dst,)-525(m\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-250(subr)18(outine)-250(sends)-250(a)-250(packet)-250(of)-250(data)-250(to)-250(a)-250(destination.)]TJ +endobj +1994 0 obj +<< +/Length 973 +>> +stream 0 g 0 G -/F51 9.9626 Tf -14.944 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous:)-310(see)-250(usage)-250(notes.)]TJ +BT +/F59 11.9552 Tf 150.705 706.129 Td [(10.5)-1000(clone)-250(\227)-250(clone)-250(current)-250(preconditioner)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G +/F67 9.9626 Tf 0 -18.964 Td [(call)-1050(prec%clone\050precout,info\051)]TJ 0 g 0 G - 0 -19.926 Td [(ctxt)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.926 Td [(dat)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(The)-250(data)-250(to)-250(be)-250(sent.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-269(may)-270(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-220(a)-220(rank)-219(1)-220(or)-220(2)-220(array)111(,)-226(or)-220(a)-219(character)-220(or)-220(logical)-220(scalar)74(.)-520(T)90(ype,)-225(kind)-220(and)-220(rank)]TJ 0 -11.956 Td [(must)-215(agr)18(ee)-216(on)-215(sender)-215(and)-216(r)18(eceiver)-215(pr)18(ocess;)-227(if)]TJ/F52 9.9626 Tf 197.687 0 Td [(m)]TJ/F54 9.9626 Tf 10.021 0 Td [(is)-215(not)-216(speci\002ed,)-222(size)-215(must)]TJ -207.708 -11.955 Td [(agr)18(ee)-250(as)-250(well.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(dst)]TJ + 0 -19.925 Td [(prec)]TJ 0 g 0 G -/F54 9.9626 Tf 18.809 0 Td [(Destination)-250(pr)18(ocess.)]TJ 6.098 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F61 10.3811 Tf 138.85 0 Td [(<)]TJ/F85 10.3811 Tf 8.319 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(d)-25(s)-25(t)]TJ/F61 10.3811 Tf 15.689 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1.)]TJ +/F62 9.9626 Tf 24.348 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -241.89 -31.88 Td [(m)]TJ +/F59 9.9626 Tf -77.917 -33.873 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 13.838 0 Td [(Number)-250(of)-250(r)18(ows.)]TJ 11.069 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F54 9.9626 Tf 40.946 0 Td [(.)]TJ -68.034 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F61 10.3811 Tf 138.85 0 Td [(<)]TJ/F85 10.3811 Tf 8.319 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(m)]TJ/F61 10.3811 Tf 10.767 0 Td [(<)]TJ/F85 10.3811 Tf 8.319 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(s)-18(i)-32(z)-25(e)]TJ/F85 10.3811 Tf 15.94 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(d)-40(a)-25(t)]TJ/F54 9.9626 Tf 13.494 0 Td [(,)-167(1)]TJ/F85 10.3811 Tf 9.257 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(.)]TJ -235.542 -11.955 Td [(When)]TJ/F52 9.9626 Tf 29.859 0 Td [(d)-40(a)-25(t)]TJ/F54 9.9626 Tf 16.898 0 Td [(is)-342(a)-341(rank)-342(2)-341(array)111(,)-365(speci\002es)-342(the)-341(number)-342(of)-341(r)18(ows)-342(to)-342(be)-341(sent)-342(in-)]TJ -46.757 -11.955 Td [(dependently)-341(of)-340(the)-341(leading)-341(dimension)]TJ/F52 9.9626 Tf 175.121 0 Td [(s)-18(i)-32(z)-25(e)]TJ/F85 10.3811 Tf 15.94 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(d)-40(a)-25(t)]TJ/F54 9.9626 Tf 13.494 0 Td [(,)-167(1)]TJ/F85 10.3811 Tf 9.257 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(;)-386(must)-341(have)-340(the)-341(same)]TJ -222.236 -11.955 Td [(value)-250(on)-250(sending)-250(and)-250(r)18(eceiving)-250(pr)18(ocesses.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.918 Td [(On)-250(Return)]TJ + 0 -19.926 Td [(precout)]TJ 0 g 0 G -/F51 11.9552 Tf 0 -21.918 Td [(Notes)]TJ +/F62 9.9626 Tf 39.292 0 Td [(A)-250(copy)-250(of)-250(the)-250(input)-250(object.)]TJ 0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ +/F59 9.9626 Tf -39.292 -19.925 Td [(info)]TJ 0 g 0 G - [-500(This)-292(subr)18(outine)-292(impl)1(ies)-292(a)-292(synchr)18(onization,)-302(but)-292(only)-292(between)-291(the)-292(calling)]TJ 12.454 -11.955 Td [(pr)18(ocess)-250(and)-250(the)-250(destination)-250(pr)18(ocess)]TJ/F52 9.9626 Tf 158.309 0 Td [(d)-25(s)-25(t)]TJ/F54 9.9626 Tf 12.797 0 Td [(.)]TJ +/F62 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ 0 g 0 G - -31.629 -104.573 Td [(134)]TJ + 140.583 -449.28 Td [(156)]TJ 0 g 0 G ET endstream endobj -1762 0 obj +2000 0 obj << -/Length 5356 +/Length 2703 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(7.17)-1000(psb)]TJ -ET -q -1 0 0 1 153.407 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 156.993 706.129 Td [(rcv)-250(\227)-250(Receive)-250(data)]TJ -0 g 0 G +/F59 11.9552 Tf 99.895 706.129 Td [(10.6)-1000(free)-250(\227)-250(Free)-250(a)-250(preconditioner)]TJ 0 g 0 G -/F59 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_rcv\050ctxt,)-525(dat,)-525(src,)-525(m\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-250(subr)18(outine)-250(r)18(eceives)-250(a)-250(packet)-250(of)-250(data)-250(to)-250(a)-250(destination.)]TJ 0 g 0 G -/F51 9.9626 Tf -14.944 -19.925 Td [(T)90(ype:)]TJ +/F67 9.9626 Tf 0 -18.964 Td [(call)-525(prec%free\050info\051)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous:)-310(see)-250(usage)-250(notes.)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - 0 -19.926 Td [(ctxt)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.926 Td [(src)]TJ + 0 -19.925 Td [(prec)]TJ 0 g 0 G -/F54 9.9626 Tf 17.704 0 Td [(Sour)18(ce)-250(pr)18(ocess.)]TJ 7.203 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F61 10.3811 Tf 138.85 0 Td [(<)]TJ/F85 10.3811 Tf 8.319 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(s)-15(r)-17(c)]TJ/F61 10.3811 Tf 15.141 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1.)]TJ +/F62 9.9626 Tf 24.349 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 196.511 0 Td [(psb)]TJ +ET +q +1 0 0 1 337.631 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 340.77 577.576 Td [(prec)]TJ +ET +q +1 0 0 1 362.319 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 365.457 577.576 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -241.342 -31.881 Td [(m)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 13.838 0 Td [(Number)-250(of)-250(r)18(ows.)]TJ 11.069 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F54 9.9626 Tf 40.946 0 Td [(.)]TJ -68.034 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F61 10.3811 Tf 138.85 0 Td [(<)]TJ/F85 10.3811 Tf 8.319 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(m)]TJ/F61 10.3811 Tf 10.767 0 Td [(<)]TJ/F85 10.3811 Tf 8.319 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(s)-18(i)-32(z)-25(e)]TJ/F85 10.3811 Tf 15.94 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(d)-40(a)-25(t)]TJ/F54 9.9626 Tf 13.494 0 Td [(,)-167(1)]TJ/F85 10.3811 Tf 9.257 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(.)]TJ -235.542 -11.956 Td [(When)]TJ/F52 9.9626 Tf 29.859 0 Td [(d)-40(a)-25(t)]TJ/F54 9.9626 Tf 16.898 0 Td [(is)-342(a)-341(rank)-342(2)-341(array)111(,)-365(speci\002es)-342(the)-341(number)-342(of)-341(r)18(ows)-342(to)-342(be)-341(sent)-342(in-)]TJ -46.757 -11.955 Td [(dependently)-341(of)-340(the)-341(leading)-341(dimension)]TJ/F52 9.9626 Tf 175.121 0 Td [(s)-18(i)-32(z)-25(e)]TJ/F85 10.3811 Tf 15.94 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(d)-40(a)-25(t)]TJ/F54 9.9626 Tf 13.494 0 Td [(,)-167(1)]TJ/F85 10.3811 Tf 9.257 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(;)-386(must)-341(have)-340(the)-341(same)]TJ -222.236 -11.955 Td [(value)-250(on)-250(sending)-250(and)-250(r)18(eceiving)-250(pr)18(ocesses.)]TJ +/F59 9.9626 Tf -286.483 -19.925 Td [(On)-250(Exit)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G + 0 -19.926 Td [(prec)]TJ 0 g 0 G - 0 -19.925 Td [(dat)]TJ +/F62 9.9626 Tf 24.349 0 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -30.874 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 196.511 0 Td [(psb)]TJ +ET +q +1 0 0 1 337.631 502.059 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 340.77 501.86 Td [(prec)]TJ +ET +q +1 0 0 1 362.319 502.059 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 365.457 501.86 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 19.368 0 Td [(The)-250(data)-250(to)-250(be)-250(r)18(eceived.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-270(may)-269(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-220(a)-220(rank)-219(1)-220(or)-220(2)-220(array)111(,)-226(or)-220(a)-219(character)-220(or)-220(logical)-220(scalar)74(.)-520(T)90(ype,)-225(kind)-220(and)-220(rank)]TJ 0 -11.955 Td [(must)-215(agr)18(ee)-216(on)-215(sender)-215(and)-216(r)18(eceiver)-215(pr)18(ocess;)-227(if)]TJ/F52 9.9626 Tf 197.687 0 Td [(m)]TJ/F54 9.9626 Tf 10.021 0 Td [(is)-215(not)-216(speci\002ed,)-222(size)-215(must)]TJ -207.708 -11.955 Td [(agr)18(ee)-250(as)-250(well.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +/F59 9.9626 Tf -286.483 -19.925 Td [(info)]TJ 0 g 0 G - [-500(This)-292(subr)18(outine)-291(implies)-292(a)-292(synchr)18(onization,)-302(but)-292(only)-292(between)-291(the)-292(calling)]TJ 12.453 -11.955 Td [(pr)18(ocess)-250(and)-250(the)-250(sour)18(ce)-250(pr)18(ocess)]TJ/F52 9.9626 Tf 137.538 0 Td [(s)-15(r)-17(c)]TJ/F54 9.9626 Tf 12.249 0 Td [(.)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -30.326 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Err)18(or)-250(code:)-310(if)-250(no)-250(err)18(or)74(,)-250(0)-250(is)-250(r)18(eturned.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ/F62 9.9626 Tf 34.363 0 Td [(Releases)-250(all)-250(internal)-250(storage.)]TJ 0 g 0 G - -10.31 -104.573 Td [(135)]TJ + 130.021 -333.713 Td [(157)]TJ 0 g 0 G ET endstream endobj -1769 0 obj +2005 0 obj << -/Length 6241 +/Length 548 >> stream 0 g 0 G 0 g 0 G BT -/F51 14.3462 Tf 150.705 705.784 Td [(8)-1000(Error)-250(handling)]TJ/F54 9.9626 Tf 0 -22.702 Td [(The)-382(PSBLAS)-382(library)-381(err)18(or)-382(handling)-382(policy)-382(has)-382(been)-382(comple)1(tely)-382(r)18(ewritten)-382(in)]TJ 0 -11.955 Td [(version)-359(2.0.)-638(The)-359(idea)-359(behind)-360(the)-359(design)-359(of)-359(this)-359(new)-360(err)18(or)-359(handling)-359(strategy)]TJ 0 -11.955 Td [(is)-303(to)-303(keep)-303(err)18(or)-303(messages)-303(on)-303(a)-303(stack)-303(allowing)-303(the)-303(user)-303(to)-303(trace)-303(back)-303(up)-303(to)-303(the)]TJ 0 -11.955 Td [(point)-317(wher)18(e)-318(the)-317(\002rst)-317(err)18(or)-318(message)-317(has)-318(been)-317(generated.)-512(Every)-317(r)18(outine)-318(in)-317(the)]TJ 0 -11.956 Td [(PSBLAS-2.0)-336(library)-336(has,)-358(as)-336(last)-337(non-optional)-336(ar)18(gument,)-358(an)-336(integer)]TJ/F59 9.9626 Tf 298.678 0 Td [(info)]TJ/F54 9.9626 Tf 24.271 0 Td [(vari-)]TJ -322.949 -11.955 Td [(able;)-364(whenever)74(,)-344(inside)-326(the)-326(r)18(outine,)-345(an)-326(err)18(or)-325(is)-326(detected,)-345(this)-326(variable)-326(is)-325(set)-326(to)]TJ 0 -11.955 Td [(a)-384(value)-384(corr)18(esponding)-384(to)-384(a)-384(speci\002c)-384(err)18(or)-384(code.)-711(Then)-384(this)-384(err)18(or)-384(code)-384(is)-384(also)]TJ 0 -11.955 Td [(pushed)-274(on)-273(the)-274(err)18(or)-274(stack)-274(and)-273(then)-274(either)-274(contr)18(ol)-274(is)-273(r)18(eturned)-274(to)-274(the)-273(caller)-274(r)18(ou-)]TJ 0 -11.955 Td [(tine)-342(or)-342(the)-342(execution)-343(is)-342(aborted,)-365(depending)-342(on)-342(the)-342(users)-342(choice.)-587(At)-342(the)-342(time)]TJ 0 -11.955 Td [(when)-243(the)-242(execution)-243(is)-242(aborted,)-244(an)-243(err)18(or)-242(message)-243(is)-243(p)1(rinted)-243(on)-243(standar)18(d)-242(output)]TJ 0 -11.956 Td [(with)-257(a)-256(level)-257(of)-256(verbosity)-257(than)-256(can)-257(be)-256(chosen)-257(by)-257(the)-256(user)74(.)-330(If)-256(the)-257(execution)-257(is)-256(not)]TJ 0 -11.955 Td [(aborted,)-259(then,)-259(the)-257(caller)-257(r)18(outine)-258(checks)-257(the)-257(value)-257(r)18(eturned)-257(in)-258(the)]TJ/F59 9.9626 Tf 284.621 0 Td [(info)]TJ/F54 9.9626 Tf 23.484 0 Td [(variable)]TJ -308.105 -11.955 Td [(and,)-290(if)-282(not)-282(zer)18(o,)-291(an)-282(err)18(or)-282(condition)-282(is)-282(raised.)-407(This)-282(pr)18(ocess)-282(continues)-282(on)-282(all)-282(the)]TJ 0 -11.955 Td [(levels)-203(of)-203(nested)-203(calls)-203(until)-203(the)-203(level)-203(wher)18(e)-202(the)-203(user)-203(decides)-203(to)-203(abort)-203(the)-203(pr)18(ogram)]TJ 0 -11.955 Td [(execution.)]TJ 14.944 -11.955 Td [(Figur)18(e)]TJ -0 0 1 rg 0 0 1 RG - [-286(5)]TJ -0 g 0 G - [-285(shows)-286(the)-286(layou)1(t)-286(of)-286(a)-285(generic)]TJ/F59 9.9626 Tf 172.064 0 Td [(psb_foo)]TJ/F54 9.9626 Tf 39.458 0 Td [(r)18(outine)-286(with)-285(r)18(espect)-286(to)-286(the)]TJ -226.466 -11.956 Td [(PSBLAS-2.0)-258(err)18(or)-259(handling)-258(policy)111(.)-335(It)-258(is)-258(possible)-259(to)-258(see)-258(how)92(,)-261(whenever)-258(an)-258(err)18(or)]TJ 0 -11.955 Td [(condition)-298(is)-298(detected,)-311(the)]TJ/F59 9.9626 Tf 114.879 0 Td [(info)]TJ/F54 9.9626 Tf 23.893 0 Td [(variable)-298(is)-299(set)-298(to)-298(the)-298(corr)18(esponding)-299(err)18(or)-298(code)]TJ -138.772 -11.955 Td [(which)-309(is,)-324(then,)-324(pushed)-310(on)-309(top)-309(of)-310(the)-309(stack)-309(by)-309(means)-310(of)-309(the)]TJ/F59 9.9626 Tf 265.277 0 Td [(psb_errpush)]TJ/F54 9.9626 Tf 57.534 0 Td [(.)-488(An)]TJ -322.811 -11.955 Td [(err)18(or)-325(condition)-326(may)-325(be)-326(dir)18(ectl)1(y)-326(detected)-325(inside)-326(a)-325(r)18(outine)-325(or)-326(indir)18(ectly)-325(check-)]TJ 0 -11.955 Td [(ing)-331(the)-331(err)18(or)-331(code)-331(r)18(eturned)-331(r)18(eturned)-331(by)-331(a)-331(called)-331(r)18(outine.)-553(Whenever)-331(an)-331(err)18(or)]TJ 0 -11.956 Td [(is)-253(encounter)18(ed,)-255(after)-253(it)-254(has)-253(been)-254(pushed)-253(on)-254(st)1(ack,)-255(the)-253(pr)18(ogram)-254(execution)-253(skips)]TJ 0 -11.955 Td [(to)-264(a)-265(point)-264(wher)18(e)-264(the)-265(err)18(or)-264(condition)-264(is)-264(handled;)-272(the)-264(err)18(or)-265(condition)-264(is)-264(handled)]TJ 0 -11.955 Td [(either)-336(by)-336(r)18(eturning)-336(contr)18(ol)-336(to)-336(the)-336(caller)-335(r)17(o)1(utine)-336(or)-336(by)-336(calling)-336(the)]TJ/F59 9.9626 Tf 291.408 0 Td [(psb\134_error)]TJ/F54 9.9626 Tf -291.408 -11.955 Td [(r)18(outine)-273(which)-274(prints)-273(the)-274(content)-273(of)-273(the)-274(err)18(or)-273(stack)-274(and)-273(aborts)-273(the)-274(pr)18(ogram)-273(ex-)]TJ 0 -11.955 Td [(ecution,)-373(accor)18(ding)-348(to)-348(the)-348(choice)-348(made)-348(by)-348(the)-348(user)-348(with)]TJ/F59 9.9626 Tf 252.305 0 Td [(psb_set_erraction)]TJ/F54 9.9626 Tf 88.915 0 Td [(.)]TJ -341.22 -11.955 Td [(The)-297(default)-296(is)-297(to)-296(print)-297(the)-297(err)18(or)-296(and)-297(terminate)-296(the)-297(pr)18(ogram,)-308(but)-297(the)-297(user)-296(may)]TJ 0 -11.956 Td [(choose)-250(to)-250(handle)-250(the)-250(err)18(or)-250(explicitly)111(.)]TJ 14.944 -11.955 Td [(Figur)18(e)]TJ -0 0 1 rg 0 0 1 RG - [-347(6)]TJ -0 g 0 G - [-348(r)18(eports)-347(a)-347(sample)-347(err)18(or)-348(message)-347(generated)-347(by)-348(the)-347(PSBLAS-2.0)-347(li-)]TJ -14.944 -11.955 Td [(brary)111(.)-539(This)-327(err)18(or)-326(has)-327(been)-326(generated)-327(by)-326(the)-326(fact)-327(that)-326(the)-327(user)-326(has)-327(chosen)-326(the)]TJ 0 -11.955 Td [(invalid)-379(\223FOO\224)-380(stor)1(a)-1(g)1(e)-380(format)-379(to)-379(r)18(epr)18(esent)-380(the)-379(sparse)-379(matrix.)-698(Fr)18(om)-380(this)-379(er)18(-)]TJ 0 -11.955 Td [(r)18(or)-394(message)-393(it)-394(is)-393(possible)-394(to)-394(se)1(e)-394(that)-394(the)-393(err)18(or)-394(has)-393(been)-394(detected)-394(inside)-393(the)]TJ/F59 9.9626 Tf 0 -11.955 Td [(psb_cest)]TJ/F54 9.9626 Tf 45.361 0 Td [(subr)18(outine)-353(called)-353(by)]TJ/F59 9.9626 Tf 95.326 0 Td [(psb_spasb)]TJ/F54 9.9626 Tf 50.591 0 Td [(...)-619(by)-354(pr)18(ocess)-353(0)-353(\050i.e.)-619(the)-353(r)18(oot)-354(pr)18(o-)]TJ -191.278 -11.956 Td [(cess\051.)]TJ +/F59 14.3462 Tf 150.705 705.784 Td [(11)-1000(Iterative)-250(Methods)]TJ/F62 9.9626 Tf 0 -22.702 Td [(In)-402(this)-403(chapter)-402(we)-403(pr)18(ovide)-402(r)18(outines)-403(for)-402(pr)18(econditioners)-402(and)-403(iterative)-402(meth-)]TJ 0 -11.955 Td [(ods.)-472(The)-304(interfaces)-304(for)-304(Krylov)-304(subspace)-303(methods)-304(ar)18(e)-304(available)-304(in)-304(the)-304(module)]TJ/F67 9.9626 Tf 0 -11.955 Td [(psb_krylov_mod)]TJ/F62 9.9626 Tf 73.225 0 Td [(.)]TJ 0 g 0 G - 164.384 -198.123 Td [(136)]TJ + 91.158 -568.734 Td [(158)]TJ 0 g 0 G ET endstream endobj -1775 0 obj +2012 0 obj << -/Length 6642 +/Length 8246 >> stream 0 g 0 G 0 g 0 G -0 g 0 G -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +BT +/F59 11.9552 Tf 99.895 706.129 Td [(11.1)-1000(psb)]TJ +ET q -1 0 0 1 99.895 417.212 cm -0 0 343.711 292.902 re f +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F59 9.9626 Tf 102.884 698.757 Td [(subroutine)]TJ -0 g 0 G - [-525(psb_foo\050some)-525(args,)-525(info\051)]TJ -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 9.9626 Tf 15.691 -11.956 Td [(!...)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 0 -11.955 Td [(if)]TJ -0 g 0 G - [(\050error)-525(detected\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -0 g 0 G - 15.691 -11.955 Td [(info)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(errcode1)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ -0 g 0 G - [-525(psb_errpush\050)]TJ -0.25 0.44 0.63 rg 0.25 0.44 0.63 RG - [(\015psb_foo\015)]TJ -0 g 0 G - [(,)-525(errcode1\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(goto)]TJ -0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [-525(9999)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -15.691 -11.955 Td [(end)-525(if)]TJ -0 g 0 G -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 9.9626 Tf 0 -11.956 Td [(!...)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 0 -11.955 Td [(call)]TJ -0 g 0 G - [-525(psb_bar\050some)-525(args,)-525(info\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(if)]TJ -0 g 0 G - [(\050info)-525(.ne.)-525(zero\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -0 g 0 G - 15.691 -11.955 Td [(info)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(errcode2)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ -0 g 0 G - [-525(psb_errpush\050)]TJ -0.25 0.44 0.63 rg 0.25 0.44 0.63 RG - [(\015psb_foo\015)]TJ -0 g 0 G - [(,)-525(errcode2\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.956 Td [(goto)]TJ -0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [-525(9999)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -15.691 -11.955 Td [(end)-525(if)]TJ -0 g 0 G -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 9.9626 Tf 0 -11.955 Td [(!...)]TJ -0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG -/F59 9.9626 Tf -15.691 -11.955 Td [(9999)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(continue)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 15.691 -11.955 Td [(if)]TJ -0 g 0 G - [-525(\050err_act)-525(.eq.)-525(act_abort\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 15.691 -11.955 Td [(call)]TJ -0 g 0 G - [-525(psb_error\050icontxt\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.956 Td [(return)]TJ +/F59 11.9552 Tf 156.993 706.129 Td [(krylov)-250(\227)-250(Krylov)-250(Methods)-250(Driver)-250(Routine)]TJ/F62 9.9626 Tf -57.098 -18.964 Td [(This)-266(subr)18(outine)-266(is)-267(a)-266(driver)-266(that)-267(p)1(r)18(ovides)-267(a)-266(general)-266(interface)-266(for)-267(all)-266(the)-266(Krylov-)]TJ 0 -11.955 Td [(Subspace)-250(family)-250(methods)-250(implemented)-250(in)-250(PSBLAS)-250(version)-250(2.)]TJ 14.944 -11.955 Td [(The)-250(stopping)-250(criterion)-250(can)-250(take)-250(the)-250(following)-250(values:)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -15.691 -11.955 Td [(else)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(return)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -15.691 -11.955 Td [(end)-525(if)]TJ +/F59 9.9626 Tf -14.944 -18.774 Td [(1)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -23.91 Td [(end)-525(subroutine)]TJ -0 g 0 G - [-525(psb_foo)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F62 9.9626 Tf 9.963 0 Td [(normwise)-222(backwar)18(d)-221(err)18(or)-222(in)-221(the)-222(in\002nity)-222(norm;)-231(the)-221(iteration)-222(is)-222(stopped)-221(when)]TJ/F60 9.9626 Tf 109.036 -26.864 Td [(e)-15(r)-25(r)]TJ/F93 10.3811 Tf 15.14 0 Td [(=)]TJ/F91 10.3811 Tf 40.62 6.745 Td [(k)]TJ/F60 9.9626 Tf 5.34 0 Td [(r)]TJ/F60 7.5716 Tf 4.041 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(k)]TJ +ET +q +1 0 0 1 246.191 620.108 cm +[]0 d 0 J 0.398 w 0 0 m 74.372 0 l S +Q +BT +/F93 10.3811 Tf 246.316 610.783 Td [(\050)]TJ/F91 10.3811 Tf 4.274 0 Td [(k)]TJ/F60 9.9626 Tf 5.938 0 Td [(A)]TJ/F91 10.3811 Tf 7.442 0 Td [(k)-24(k)]TJ/F60 9.9626 Tf 11.048 0 Td [(x)]TJ/F60 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.876 1.96 Td [(k)]TJ/F93 10.3811 Tf 7.376 0 Td [(+)]TJ/F91 10.3811 Tf 10.256 0 Td [(k)]TJ/F60 9.9626 Tf 5.44 0 Td [(b)]TJ/F91 10.3811 Tf 4.861 0 Td [(k)]TJ/F93 10.3811 Tf 5.44 0 Td [(\051)]TJ/F69 10.3811 Tf 8.236 6.834 Td [(<)]TJ/F60 9.9626 Tf 11.087 0 Td [(e)-80(p)-25(s)]TJ 0 g 0 G +/F59 9.9626 Tf -235.842 -29.908 Td [(2)]TJ 0 g 0 G -/F54 9.9626 Tf -2.989 -41.729 Td [(Listing)-289(5:)-387(The)-289(layout)-289(of)-289(a)-289(generic)]TJ/F59 9.9626 Tf 149.96 0 Td [(psb)]TJ +/F62 9.9626 Tf 9.963 0 Td [(Relative)-250(r)18(esidual)-250(in)-250(the)-250(2-norm;)-250(the)-250(iteration)-250(is)-250(stopped)-250(when)]TJ/F60 9.9626 Tf 136.209 -26.865 Td [(e)-15(r)-25(r)]TJ/F93 10.3811 Tf 15.141 0 Td [(=)]TJ/F91 10.3811 Tf 13.446 6.745 Td [(k)]TJ/F60 9.9626 Tf 5.34 0 Td [(r)]TJ/F60 7.5716 Tf 4.041 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(k)]TJ ET q -1 0 0 1 266.174 382.258 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 273.365 563.335 cm +[]0 d 0 J 0.398 w 0 0 m 20.025 0 l S Q BT -/F59 9.9626 Tf 269.312 382.059 Td [(foo)]TJ/F54 9.9626 Tf 18.568 0 Td [(r)18(outine)-289(with)-289(r)18(espect)-288(to)-289(PSBLAS-2.0)]TJ -187.985 -11.955 Td [(err)18(or)-250(handling)-250(policy)111(.)]TJ +/F91 10.3811 Tf 273.49 554.01 Td [(k)]TJ/F60 9.9626 Tf 5.439 0 Td [(b)]TJ/F91 10.3811 Tf 4.862 0 Td [(k)]TJ/F62 7.5716 Tf 5.315 -1.744 Td [(2)]TJ/F69 10.3811 Tf 8.371 8.578 Td [(<)]TJ/F60 9.9626 Tf 11.086 0 Td [(e)-80(p)-25(s)]TJ 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG -/F59 9.9626 Tf 0 -19.609 Td [(==========================================================)]TJ +/F59 9.9626 Tf -208.668 -29.848 Td [(3)]TJ 0 g 0 G - 0 -11.955 Td [(Process:)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [-525(0)]TJ +/F62 9.9626 Tf 9.963 0 Td [(Relative)-250(r)18(esidual)-250(r)18(eduction)-250(in)-250(the)-250(2-norm;)-250(the)-250(iteration)-250(is)-250(stopped)-250(when)]TJ/F60 9.9626 Tf 134.486 -26.865 Td [(e)-15(r)-25(r)]TJ/F93 10.3811 Tf 15.14 0 Td [(=)]TJ/F91 10.3811 Tf 15.17 6.745 Td [(k)]TJ/F60 9.9626 Tf 5.34 0 Td [(r)]TJ/F60 7.5716 Tf 4.041 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(k)]TJ +ET +q +1 0 0 1 271.641 506.622 cm +[]0 d 0 J 0.398 w 0 0 m 23.472 0 l S +Q +BT +/F91 10.3811 Tf 271.766 497.297 Td [(k)]TJ/F60 9.9626 Tf 5.34 0 Td [(r)]TJ/F62 7.5716 Tf 4 -1.744 Td [(0)]TJ/F91 10.3811 Tf 4.408 1.744 Td [(k)]TJ/F62 7.5716 Tf 5.315 -1.744 Td [(2)]TJ/F69 10.3811 Tf 8.371 8.578 Td [(<)]TJ/F60 9.9626 Tf 11.086 0 Td [(e)-80(p)-25(s)]TJ/F62 9.9626 Tf -210.391 -29.848 Td [(The)-248(behaviour)-248(is)-248(contr)18(olled)-249(by)-248(the)-248(istop)-248(ar)18(gument)-248(\050see)-248(later\051.)-310(In)-248(the)-248(above)-248(for)18(-)]TJ 0 -11.956 Td [(mulae,)]TJ/F60 9.9626 Tf 32.81 0 Td [(x)]TJ/F60 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F62 9.9626 Tf 5.303 1.96 Td [(is)-256(the)-256(tentative)-257(soluti)1(on)-257(and)]TJ/F60 9.9626 Tf 125.144 0 Td [(r)]TJ/F60 7.5716 Tf 4.042 -1.96 Td [(i)]TJ/F93 10.3811 Tf 5.757 1.96 Td [(=)]TJ/F60 9.9626 Tf 11.2 0 Td [(b)]TJ/F91 10.3811 Tf 6.822 0 Td [(\000)]TJ/F60 9.9626 Tf 10.777 0 Td [(A)-42(x)]TJ/F60 7.5716 Tf 12.759 -1.96 Td [(i)]TJ/F62 9.9626 Tf 5.303 1.96 Td [(the)-256(corr)18(esponding)-256(r)18(esidual)]TJ -225.064 -11.955 Td [(at)-250(the)]TJ/F60 9.9626 Tf 27.083 0 Td [(i)]TJ/F62 9.9626 Tf 2.964 0 Td [(-th)-250(iteration.)]TJ -28.305 -17.357 Td [(c)-175(a)-175(l)-174(l)-880(p)-105(s)-105(b)]TJ +ET +q +1 0 0 1 150.28 433.215 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 154.313 433.015 Td [(k)-105(r)-105(y)-104(l)-105(o)-105(v)-238(\050)-156(m)-21(e)-22(t)-21(h)-22(o)-22(d)-218(,)-208(a)-242(,)-255(p)-80(r)-81(e)-80(c)-335(,)-191(b)-206(,)-203(x)-231(,)-234(e)-60(p)-59(s)-293(,)-273(d)-98(e)-97(s)-98(c)]TJ +ET +q +1 0 0 1 352.02 433.215 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 355.983 433.015 Td [(a)-370(,)-283(i)-108(n)-108(f)-108(o)-274(,)-57(&)]TJ -227.086 -11.955 Td [(&)-580(i)-69(t)-69(m)-70(a)-69(x)-313(,)-327(i)-151(t)-152(e)-151(r)-478(,)-281(e)-107(r)-106(r)-387(,)-321(i)-145(t)-146(r)-146(a)-145(c)-146(e)-466(,)-336(i)-161(r)-160(s)-161(t)-496(,)-291(i)-116(s)-116(t)-116(o)-116(p)-407(,)-219(c)-43(o)-43(n)-44(d)-177(\051)]TJ 0 g 0 G - [(.)-1050(PSBLAS)-525(Error)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(\050)]TJ 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(4010)]TJ 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(\051)]TJ +/F59 9.9626 Tf -29.002 -25.88 Td [(T)90(ype:)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(in)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G - [-525(subroutine:)-525(df_sample)]TJ 0 -11.955 Td [(Error)-525(from)-525(call)-525(to)-525(subroutine)-525(mat)]TJ -0.73 0.38 0.84 rg 0.73 0.38 0.84 RG - [-525(dist)]TJ +/F59 9.9626 Tf -29.828 -19.349 Td [(On)-250(Entry)]TJ 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - 0 -11.955 Td [(==========================================================)]TJ 0 g 0 G - 0 -11.955 Td [(Process:)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [-525(0)]TJ + 0 -19.349 Td [(method)]TJ 0 g 0 G - [(.)-1050(PSBLAS)-525(Error)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(\050)]TJ +/F62 9.9626 Tf 39.851 0 Td [(a)-193(string)-194(that)-193(de\002nes)-194(the)-194(it)1(erative)-194(method)-193(to)-194(be)-194(use)1(d.)-292(Supported)-193(values)]TJ -14.944 -11.956 Td [(ar)18(e:)]TJ 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(4010)]TJ +/F59 9.9626 Tf 0 -19.349 Td [(CG:)]TJ 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(\051)]TJ +/F62 9.9626 Tf 22.964 0 Td [(the)-250(Conjugate)-250(Gradient)-250(method;)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(in)]TJ +/F59 9.9626 Tf -22.964 -15.364 Td [(CGS:)]TJ 0 g 0 G - [-525(subroutine:)-525(mat_distv)]TJ 0 -11.955 Td [(Error)-525(from)-525(call)-525(to)-525(subroutine)]TJ -0.73 0.38 0.84 rg 0.73 0.38 0.84 RG - [-525(psb_spasb)]TJ +/F62 9.9626 Tf 29.051 0 Td [(the)-250(Conjugate)-250(Gradient)-250(Stabilized)-250(method;)]TJ 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - 0 -11.956 Td [(==========================================================)]TJ +/F59 9.9626 Tf -29.051 -15.365 Td [(GCR:)]TJ 0 g 0 G - 0 -11.955 Td [(Process:)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [-525(0)]TJ +/F62 9.9626 Tf 30.157 0 Td [(the)-250(Generalized)-250(Conjugate)-250(Residual)-250(method;)]TJ 0 g 0 G - [(.)-1050(PSBLAS)-525(Error)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(\050)]TJ +/F59 9.9626 Tf -30.157 -15.364 Td [(FCG:)]TJ 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(4010)]TJ +/F62 9.9626 Tf 28.503 0 Td [(the)-250(Flexible)-250(Conjugate)-250(Gradient)-250(method)]TJ +0 0 1 rg 0 0 1 RG +/F62 7.5716 Tf 176.854 3.616 Td [(5)]TJ 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(\051)]TJ +/F62 9.9626 Tf 4.284 -3.616 Td [(;)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(in)]TJ +/F59 9.9626 Tf -209.641 -15.364 Td [(BICG:)]TJ 0 g 0 G - [-525(subroutine:)-525(psb_spasb)]TJ 0 -11.955 Td [(Error)-525(from)-525(call)-525(to)-525(subroutine)]TJ -0.73 0.38 0.84 rg 0.73 0.38 0.84 RG - [-525(psb_cest)]TJ +/F62 9.9626 Tf 33.484 0 Td [(the)-250(Bi-Conjugate)-250(Gradient)-250(method;)]TJ 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - 0 -11.955 Td [(==========================================================)]TJ +/F59 9.9626 Tf -33.484 -15.365 Td [(BICGST)92(AB:)]TJ 0 g 0 G - 0 -11.955 Td [(Process:)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [-525(0)]TJ +/F62 9.9626 Tf 59.696 0 Td [(the)-250(Bi-Conjugate)-250(Gradient)-250(Stabilized)-250(method;)]TJ 0 g 0 G - [(.)-1050(PSBLAS)-525(Error)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(\050)]TJ +/F59 9.9626 Tf -59.696 -15.364 Td [(BICGST)92(ABL:)]TJ 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(136)]TJ +/F62 9.9626 Tf 65.783 0 Td [(the)-218(Bi-Conjugate)-217(Gradient)-218(Stabilized)-218(method)-217(with)-218(r)18(estart-)]TJ -43.865 -11.955 Td [(ing;)]TJ 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(\051)]TJ +/F59 9.9626 Tf -21.918 -15.365 Td [(RGMRES:)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(in)]TJ +/F62 9.9626 Tf 52.294 0 Td [(the)-250(Generalized)-250(Minimal)-250(Residual)-250(method)-250(with)-250(r)18(estarting.)]TJ 0 g 0 G - [-525(subroutine:)-525(psb_cest)]TJ 0 -11.955 Td [(Format)-525(FOO)-525(is)]TJ -0.73 0.38 0.84 rg 0.73 0.38 0.84 RG - [-525(unknown)]TJ +/F59 9.9626 Tf -77.201 -19.349 Td [(a)]TJ 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - 0 -11.956 Td [(==========================================================)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(sparse)-250(matrix)]TJ/F60 9.9626 Tf 178.414 0 Td [(A)]TJ/F62 9.9626 Tf 7.317 0 Td [(.)]TJ -170.787 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 309.258 138.701 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 312.397 138.501 Td [(Tspmat)]TJ +ET +q +1 0 0 1 344.406 138.701 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 347.544 138.501 Td [(type)]TJ 0 g 0 G - 0 -11.955 Td [(Aborting...)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 0 -29.397 Td [(Listing)-364(6:)-537(A)-364(sample)-364(PSBLAS-3.0)-363(err)18(or)-364(message.)-651(Pr)18(ocess)-364(0)-364(detected)-363(an)-364(err)18(or)]TJ 0 -11.955 Td [(condition)-250(inside)-250(the)-250(psb)]TJ ET q -1 0 0 1 206.215 153.925 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 99.895 130.091 cm +[]0 d 0 J 0.398 w 0 0 m 137.482 0 l S Q BT -/F54 9.9626 Tf 209.204 153.726 Td [(cest)-250(subr)18(outine)]TJ +/F62 5.9776 Tf 110.755 123.219 Td [(5)]TJ/F62 7.9701 Tf 3.487 -2.893 Td [(Note:)-310(the)-250(implementation)-250(is)-250(for)]TJ/F60 7.9701 Tf 113.297 0 Td [(F)-31(C)-45(G)]TJ/F93 8.3049 Tf 16.387 0 Td [(\050)]TJ/F62 7.9701 Tf 3.319 0 Td [(1)]TJ/F93 8.3049 Tf 4.085 0 Td [(\051)]TJ/F62 7.9701 Tf 3.32 0 Td [(.)]TJ 0 g 0 G - 55.075 -63.288 Td [(137)]TJ +0 g 0 G +/F62 9.9626 Tf 9.629 -29.888 Td [(159)]TJ 0 g 0 G ET endstream endobj -1779 0 obj +2023 0 obj << -/Length 3570 +/Length 7054 >> stream 0 g 0 G 0 g 0 G +0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(8.1)-1000(psb)]TJ +/F59 9.9626 Tf 150.705 706.129 Td [(prec)]TJ +0 g 0 G +/F62 9.9626 Tf 24.348 0 Td [(The)-250(data)-250(str)8(uctur)18(e)-250(containing)-250(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 360.068 658.507 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(errpush)-250(\227)-250(Pushes)-250(an)-250(error)-250(code)-250(onto)-250(the)-250(error)-250(stack)]TJ/F54 9.9626 Tf -49.379 -24.942 Td [(c)-175(a)-175(l)-174(l)-874(p)-98(s)-99(b)]TJ +/F67 9.9626 Tf 363.206 658.308 Td [(prec)]TJ ET q -1 0 0 1 200.841 681.387 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 384.755 658.507 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 204.812 681.187 Td [(e)-99(r)-98(r)-99(p)-98(u)-99(s)-99(h)-232(\050)-266(e)-132(r)-132(r)]TJ +/F67 9.9626 Tf 387.893 658.308 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -258.11 -22.202 Td [(b)]TJ +0 g 0 G +/F62 9.9626 Tf 11.068 0 Td [(The)-250(RHS)-250(vector)74(.)]TJ 13.838 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 218.688 0 Td [(psb)]TJ ET q -1 0 0 1 270.843 681.387 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 410.618 588.484 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 275.151 681.187 Td [(c)-440(,)-825(r)]TJ +/F67 9.9626 Tf 413.756 588.285 Td [(T)]TJ ET q -1 0 0 1 299.7 681.387 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 419.614 588.484 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 303.188 681.187 Td [(n)-50(a)-50(m)-50(e)-276(,)-929(i)]TJ +/F67 9.9626 Tf 422.752 588.285 Td [(vect)]TJ ET q -1 0 0 1 348.561 681.387 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 444.301 588.484 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 353.087 681.187 Td [(e)-154(r)-155(r)-483(,)-920(a)]TJ +/F67 9.9626 Tf 447.439 588.285 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -317.656 -22.202 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(The)-250(initial)-250(guess.)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 218.687 0 Td [(psb)]TJ ET q -1 0 0 1 392.305 681.387 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 410.618 518.461 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 396.74 681.187 Td [(e)-145(r)-145(r)-279(\051)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -/F51 9.9626 Tf -246.035 -27.895 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(err)]TJ +/F67 9.9626 Tf 413.756 518.262 Td [(T)]TJ ET q -1 0 0 1 164.035 613.641 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 419.614 518.461 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 167.023 613.442 Td [(c)]TJ -0 g 0 G -/F54 9.9626 Tf 9.405 0 Td [(the)-250(err)18(or)-250(code)]TJ -0.817 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(r)]TJ +/F67 9.9626 Tf 422.752 518.262 Td [(vect)]TJ ET q -1 0 0 1 155.178 545.895 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 444.301 518.461 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 158.167 545.696 Td [(name)]TJ +/F67 9.9626 Tf 447.439 518.262 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.888 0 Td [(the)-250(soutine)-250(wher)18(e)-250(the)-250(err)18(or)-250(has)-250(been)-250(caught.)]TJ -12.444 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(string.)]TJ +/F59 9.9626 Tf -317.656 -22.203 Td [(eps)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -31.881 Td [(i)]TJ +/F62 9.9626 Tf 20.473 0 Td [(The)-250(stopping)-250(tolerance.)]TJ 4.433 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(r)18(eal)-250(number)74(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -22.203 Td [(desc)]TJ ET q -1 0 0 1 154.62 466.194 cm +1 0 0 1 171.218 426.236 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 157.609 465.994 Td [(err)]TJ -0 g 0 G -/F54 9.9626 Tf 17.713 0 Td [(addional)-250(info)-250(for)-250(err)18(or)-250(code)]TJ 0.289 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)]TJ +/F59 9.9626 Tf 174.207 426.036 Td [(a)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -31.881 Td [(a)]TJ +/F62 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 156.284 398.448 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 360.068 378.415 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 363.206 378.216 Td [(desc)]TJ +ET +q +1 0 0 1 384.755 378.415 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 159.273 398.249 Td [(err)]TJ +/F67 9.9626 Tf 387.893 378.216 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 17.713 0 Td [(addional)-250(info)-250(for)-250(err)18(or)-250(code)]TJ -1.375 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(string.)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G - 139.477 -271.945 Td [(138)]TJ +/F59 9.9626 Tf -258.11 -22.203 Td [(itmax)]TJ +0 g 0 G +/F62 9.9626 Tf 30.436 0 Td [(The)-250(maximum)-250(number)-250(of)-250(iterations)-250(to)-250(perform.)]TJ -5.529 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F60 9.9626 Tf 38.569 0 Td [(i)-32(t)-25(m)-40(a)-42(x)]TJ/F93 10.3811 Tf 27.744 0 Td [(=)]TJ/F62 9.9626 Tf 10.961 0 Td [(1000.)]TJ -77.274 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable)]TJ/F60 9.9626 Tf 142.349 0 Td [(i)-32(t)-25(m)-40(a)-42(x)]TJ/F91 10.3811 Tf 27.743 0 Td [(\025)]TJ/F62 9.9626 Tf 10.962 0 Td [(1.)]TJ +0 g 0 G +/F59 9.9626 Tf -205.961 -22.202 Td [(itrace)]TJ +0 g 0 G +/F62 9.9626 Tf 29.878 0 Td [(If)]TJ/F69 10.3811 Tf 11.007 0 Td [(>)]TJ/F62 9.9626 Tf 14.142 0 Td [(0)-422(print)-423(out)-422(an)-422(informational)-423(message)-422(about)-422(conver)18(gence)-423(every)]TJ/F60 9.9626 Tf -30.066 -11.955 Td [(i)-32(t)-15(r)-50(a)-25(c)-25(e)]TJ/F62 9.9626 Tf 26.396 0 Td [(iterations.)-310(If)]TJ/F93 10.3811 Tf 56.313 0 Td [(=)]TJ/F62 9.9626 Tf 10.961 0 Td [(0)-250(print)-250(a)-250(message)-250(in)-250(case)-250(of)-250(conver)18(gence)-250(failur)18(e.)]TJ -93.724 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf -31.431 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F60 9.9626 Tf 38.57 0 Td [(i)-32(t)-15(r)-50(a)-25(c)-25(e)]TJ/F93 10.3811 Tf 26.796 0 Td [(=)]TJ/F91 10.3811 Tf 11.086 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1.)]TJ +0 g 0 G +/F59 9.9626 Tf -109.553 -34.158 Td [(irst)]TJ +0 g 0 G +/F62 9.9626 Tf 19.915 0 Td [(An)-250(integer)-250(specifying)-250(the)-250(r)18(estart)-250(parameter)74(.)]TJ 4.992 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(V)92(alues:)]TJ/F60 9.9626 Tf 34.613 0 Td [(i)-22(r)-35(s)-25(t)]TJ/F69 10.3811 Tf 17.671 0 Td [(>)]TJ/F62 9.9626 Tf 10.961 0 Td [(0.)-298(This)-214(is)-213(employed)-214(for)-213(the)-214(BiCGST)74(ABL)-214(or)-213(RGMRES)-214(meth-)]TJ -63.245 -11.955 Td [(ods,)-250(otherwise)-250(it)-250(is)-250(ignor)18(ed.)]TJ +0 g 0 G + 139.477 -29.888 Td [(160)]TJ 0 g 0 G ET endstream endobj -1785 0 obj +2028 0 obj << -/Length 1332 +/Length 4489 >> stream 0 g 0 G 0 g 0 G +0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(8.2)-1000(psb)]TJ +/F59 9.9626 Tf 99.895 706.129 Td [(istop)]TJ +0 g 0 G +/F62 9.9626 Tf 27.666 0 Td [(An)-250(integer)-250(specifying)-250(the)-250(stopping)-250(criterion.)]TJ -2.759 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(V)92(alues:)-351(1:)-351(use)-271(the)-270(normwise)-271(backwar)18(d)-270(err)18(or)74(,)-276(2:)-351(use)-271(the)-270(scaled)-271(2-norm)-270(of)]TJ 0 -11.955 Td [(the)-250(r)18(esidual,)-250(3:)-310(use)-250(the)-250(r)18(esidual)-250(r)18(eduction)-250(in)-250(the)-250(2-norm.)-310(Default:)-310(2.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(The)-250(computed)-250(solution.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 218.688 0 Td [(psb)]TJ ET q -1 0 0 1 147.429 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 359.808 558.881 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(error)-306(\227)-306(Prints)-306(the)-306(error)-307(s)1(tack)-307(content)-306(and)-306(aborts)-306(exe-)]TJ -24.221 -13.948 Td [(cution)]TJ/F54 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-900(p)-126(s)-125(b)]TJ +/F67 9.9626 Tf 362.947 558.682 Td [(T)]TJ ET q -1 0 0 1 151.092 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 368.804 558.881 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 371.943 558.682 Td [(vect)]TJ +ET +q +1 0 0 1 393.492 558.881 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 396.63 558.682 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -317.656 -19.925 Td [(iter)]TJ +0 g 0 G +/F62 9.9626 Tf 20.473 0 Td [(The)-250(number)-250(of)-250(iterations)-250(performed.)]TJ 4.434 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Returned)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(err)]TJ +0 g 0 G +/F62 9.9626 Tf 17.714 0 Td [(The)-250(conver)18(gence)-250(estimate)-250(on)-250(exit.)]TJ 7.193 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Returned)-250(as:)-310(a)-250(r)18(eal)-250(number)74(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(cond)]TJ +0 g 0 G +/F62 9.9626 Tf 27.119 0 Td [(An)-210(esti)1(mate)-210(of)-210(the)-209(condition)-210(number)-209(of)-210(matrix)]TJ/F60 9.9626 Tf 204.999 0 Td [(A)]TJ/F62 9.9626 Tf 7.318 0 Td [(;)-223(only)-210(available)-209(with)-210(the)]TJ/F60 9.9626 Tf -214.444 -11.956 Td [(C)-45(G)]TJ/F62 9.9626 Tf 17.001 0 Td [(method)-250(on)-250(r)18(eal)-250(data.)]TJ -17.086 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Returned)-249(as:)-310(a)-249(r)18(eal)-249(number)74(.)-310(A)-249(corr)18(ect)-250(r)18(esult)-249(will)-249(be)-249(gr)18(eater)-250(than)-249(or)-249(equal)]TJ 0 -11.955 Td [(to)-403(one;)-480(if)-403(speci\002ed)-403(for)-403(non-r)18(eal)-403(data,)-441(or)-403(an)-403(err)18(or)-403(occurr)18(ed,)-441(zer)18(o)-403(is)-403(r)18(e-)]TJ 0 -11.956 Td [(turned.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 139.477 -161.394 Td [(161)]TJ +0 g 0 G +ET + +endstream +endobj +2038 0 obj +<< +/Length 7598 +>> +stream +0 g 0 G +0 g 0 G +BT +/F59 14.3462 Tf 150.705 706.042 Td [(12)-1000(Extensions)]TJ/F62 9.9626 Tf 0 -22.702 Td [(The)-216(EXT)74(,)-217(CUDA)-216(and)-217(RSB)-216(subdir)18(ectories)-216(contains)-217(a)-216(set)-217(of)-216(extensions)-216(to)-217(the)-216(base)]TJ 0 -11.955 Td [(library)111(.)-678(The)-373(extensions)-373(pr)18(ovide)-372(additional)-373(storage)-373(formats)-373(beyond)-372(the)-373(ones)]TJ 0 -11.955 Td [(alr)18(eady)-250(contained)-250(in)-250(the)-250(base)-250(library)111(,)-250(as)-250(well)-250(as)-250(interfaces)-250(to:)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -19.102 Td [(SPGPU)]TJ +0 g 0 G +/F62 9.9626 Tf 39.292 0 Td [(a)-296(CUDA)-297(library)-296(originally)-296(published)-297(as)]TJ +0 1 0 0 k 0 1 0 0 K +/F67 9.9626 Tf 178.891 0 Td [(https://code.google.com/)]TJ -193.277 -11.955 Td [(p/spgpu/)]TJ +0 g 0 G +/F62 9.9626 Tf 46.128 0 Td [(and)-430(now)-430(included)-430(in)-431(t)1(he)]TJ/F67 9.9626 Tf 119.328 0 Td [(cuda)]TJ/F62 9.9626 Tf 25.207 0 Td [(subdir)74(,)-475(for)-430(computations)-430(on)]TJ -190.663 -11.955 Td [(NVIDIA)-250(GPUs;)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.514 Td [(LIBRSB)]TJ +0 g 0 G +0 1 0 0 k 0 1 0 0 K +/F67 9.9626 Tf 41.514 0 Td [(http://sourceforge.net/projects/librsb/)]TJ +0 g 0 G +/F62 9.9626 Tf 203.983 0 Td [(,)-398(for)-368(computations)-368(on)]TJ -220.591 -11.955 Td [(multicor)18(e)-250(parallel)-250(machines.)]TJ -24.906 -19.102 Td [(The)-318(infrastr)8(uctur)18(e)-317(laid)-318(out)-317(in)-318(the)-318(base)-317(library)-318(to)-318(allow)-317(for)-318(these)-318(extensions)-317(is)]TJ 0 -11.956 Td [(detailed)-299(in)-299(the)-299(r)18(efer)18(ences)-299([)]TJ +1 0 0 rg 1 0 0 RG + [(20)]TJ +0 g 0 G + [(,)]TJ +1 0 0 rg 1 0 0 RG + [-299(21)]TJ +0 g 0 G + [(,)]TJ +1 0 0 rg 1 0 0 RG + [-299(10)]TJ +0 g 0 G + [(];)-324(the)-299(CUDA-speci\002c)-299(data)-299(formats)-299(ar)18(e)-299(de-)]TJ 0 -11.955 Td [(scribed)-250(in)-250([)]TJ +1 0 0 rg 1 0 0 RG + [(22)]TJ +0 g 0 G + [(].)]TJ/F59 11.9552 Tf 0 -28.94 Td [(12.1)-1000(Using)-250(the)-250(extensions)]TJ/F62 9.9626 Tf 0 -18.964 Td [(A)-279(sample)-279(application)-279(using)-279(the)-279(PSBLAS)-279(extensions)-279(will)-279(contain)-279(the)-279(following)]TJ 0 -11.955 Td [(steps:)]TJ +0 g 0 G + 13.888 -19.102 Td [(\225)]TJ +0 g 0 G +/F67 9.9626 Tf 11.018 0 Td [(USE)]TJ/F62 9.9626 Tf 18.182 0 Td [(the)-250(appr)18(opriat)-250(modules)-250(\050)]TJ/F67 9.9626 Tf 110.036 0 Td [(psb_ext_mod)]TJ/F62 9.9626 Tf 57.534 0 Td [(,)]TJ/F67 9.9626 Tf 4.981 0 Td [(psb_cuda_mod)]TJ/F62 9.9626 Tf 62.764 0 Td [(\051;)]TJ +0 g 0 G + -264.515 -19.514 Td [(\225)]TJ +0 g 0 G + [-500(Declar)18(e)-190(a)]TJ/F60 9.9626 Tf 53.1 0 Td [(mold)]TJ/F62 9.9626 Tf 21.818 0 Td [(variable)-190(of)-190(the)-190(necessary)-190(type)-190(\050e.g.)]TJ/F67 9.9626 Tf 151.361 0 Td [(psb_d_ell_sparse_mat)]TJ/F62 9.9626 Tf 104.607 0 Td [(,)]TJ/F67 9.9626 Tf -319.867 -11.955 Td [(psb_d_hlg_sparse_mat)]TJ/F62 9.9626 Tf 104.606 0 Td [(,)]TJ/F67 9.9626 Tf 4.982 0 Td [(psb_d_vect_cuda)]TJ/F62 9.9626 Tf 78.455 0 Td [(\051;)]TJ +0 g 0 G + -199.062 -19.514 Td [(\225)]TJ +0 g 0 G + [-500(Pass)-289(the)-290(mold)-289(variable)-290(to)-289(the)-290(base)-289(library)-289(interface)-290(wher)18(e)-289(needed)-290(to)-289(en-)]TJ 11.019 -11.955 Td [(sur)18(e)-250(the)-250(appr)18(opriate)-250(dynamic)-250(type.)]TJ -24.907 -19.102 Td [(Suppose)-366(you)-367(want)-366(to)-367(use)-367(the)-366(CUDA-enabled)-366(ELLP)92(ACK)-367(data)-366(str)8(uctur)18(e;)-425(you)]TJ 0 -11.955 Td [(would)-371(use)-370(a)-371(piece)-370(of)-371(code)-370(like)-371(this)-370(\050and)-371(don't)-370(for)18(get,)-401(you)-371(need)-370(CUDA-side)]TJ 0 -11.955 Td [(vectors)-250(along)-250(with)-250(the)-250(matrices\051:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 150.705 120.326 cm +0 0 343.711 225.156 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F54 9.9626 Tf 155.328 667.24 Td [(e)-125(r)-125(r)-126(o)-125(r)-259(\050)-279(i)-146(c)-146(o)-147(n)-146(t)-146(x)-146(t)-280(\051)]TJ +/F102 8.9664 Tf 153.694 334.821 Td [(program)]TJ +0 g 0 G + [-525(my_cuda_test)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 9.414 -10.959 Td [(use)]TJ +0 g 0 G + [-525(psb_base_mod)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(use)]TJ +0 g 0 G + [-525(psb_util_mod)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.958 Td [(use)]TJ +0 g 0 G + [-525(psb_ext_mod)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(use)]TJ +0 g 0 G + [-525(psb_cuda_mod)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ +0 g 0 G + [(\050psb_dspmat_type\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(agpu)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ +0 g 0 G + [(\050psb_d_vect_type\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(xg,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(bg)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -21.918 Td [(real)]TJ +0 g 0 G + [(\050psb_dpk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(xtmp\050:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ +0 g 0 G + [(\050psb_d_vect_cuda\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-3675(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(vmold)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ +0 g 0 G + [(\050psb_d_elg_sparse_mat\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(aelg)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ +0 g 0 G + [(\050psb_ctxt_type\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(ctxt)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(integer)]TJ 0 g 0 G -/F51 9.9626 Tf -55.433 -27.896 Td [(T)90(ype:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-6825(::)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(iam,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(icontxt)]TJ + [-525(np)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 35.965 0 Td [(the)-250(communication)-250(context.)]TJ -11.058 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -32.876 Td [(call)]TJ 0 g 0 G - 139.477 -461.235 Td [(139)]TJ + [-525(psb_init\050ctxt\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(call)]TJ +0 g 0 G + [-525(psb_info\050ctxt,iam,np\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(call)]TJ +0 g 0 G + [-525(psb_cuda_init\050ctxt,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(iam\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 151.98 -58.082 Td [(162)]TJ 0 g 0 G ET endstream endobj -1674 0 obj +1929 0 obj << /Type /ObjStm /N 100 -/First 975 -/Length 9667 ->> -stream -1671 0 1673 118 457 177 1670 235 1676 382 1678 500 1679 558 1680 616 1681 674 1675 731 -1685 866 1687 984 461 1043 1684 1101 1689 1248 1691 1366 1692 1424 1693 1482 1694 1540 1688 1597 -1696 1732 1698 1850 465 1909 1695 1967 1700 2114 1702 2232 1703 2290 1704 2348 1705 2406 1699 2464 -1707 2599 1709 2717 469 2776 1706 2834 1711 2981 1713 3099 1714 3157 1715 3215 1716 3273 1710 3330 -1719 3465 1721 3583 473 3642 1718 3700 1723 3847 1725 3965 1726 4023 1727 4081 1728 4139 1722 4196 -1730 4331 1732 4449 477 4508 1729 4566 1734 4713 1736 4831 1737 4889 1738 4947 1739 5005 1733 5062 -1741 5197 1743 5315 481 5374 1740 5432 1745 5579 1747 5697 1748 5755 1749 5813 1751 5870 1752 5928 -1753 5986 1744 6043 1756 6218 1758 6336 485 6395 1759 6453 1755 6512 1761 6659 1763 6777 489 6835 -1764 6892 1760 6950 1768 7097 1765 7245 1766 7393 1770 7541 493 7600 1767 7658 1774 7752 1776 7870 -1771 7928 1772 7986 1773 8044 1778 8140 1780 8258 497 8317 1781 8375 1782 8434 1777 8493 1784 8574 -% 1671 0 obj -<< -/Type /Page -/Contents 1672 0 R -/Resources 1670 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1653 0 R ->> -% 1673 0 obj -<< -/D [1671 0 R /XYZ 149.705 753.953 null] +/First 976 +/Length 11413 >> -% 457 0 obj +stream +1927 0 1922 58 1932 152 1930 291 1934 434 529 493 1935 551 1936 609 1931 668 1939 762 +1937 901 1941 1046 533 1104 1942 1161 1943 1219 1938 1277 1946 1399 1944 1538 1948 1680 537 1739 +1949 1797 1950 1856 1945 1915 1952 2037 1954 2155 1951 2213 1956 2296 1958 2414 541 2473 1955 2531 +1962 2625 1959 2773 1960 2920 1964 3068 545 3126 1965 3183 1961 3241 1972 3361 1967 3527 1968 3672 +1969 3818 1970 3965 1974 4112 549 4171 1971 4229 1976 4323 1978 4441 1975 4499 1984 4593 1979 4759 +1980 4904 1981 5047 1982 5193 1986 5337 553 5396 1983 5454 1989 5548 1987 5687 1991 5832 557 5890 +1988 5947 1993 6067 1995 6185 561 6244 1992 6302 1999 6396 1996 6544 1997 6689 2001 6836 565 6894 +1998 6951 2004 7045 2006 7163 569 7222 2003 7280 2011 7374 2007 7522 2008 7671 2013 7816 573 7874 +2014 7931 2015 7989 2016 8047 2017 8105 2010 8163 2022 8310 2009 8476 2018 8623 2019 8767 2020 8911 +2024 9056 2021 9115 2027 9262 2025 9401 2029 9545 2026 9603 2037 9710 2030 9903 2040 10079 2031 10254 +% 1927 0 obj << -/D [1671 0 R /XYZ 150.705 716.092 null] +/D [1923 0 R /XYZ 99.895 679.195 null] >> -% 1670 0 obj +% 1922 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F61 1360 0 R /F85 814 0 R /F52 585 0 R /F83 813 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1676 0 obj +% 1932 0 obj << /Type /Page -/Contents 1677 0 R -/Resources 1675 0 R +/Contents 1933 0 R +/Resources 1931 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1683 0 R +/Parent 1928 0 R +/Annots [ 1930 0 R ] +>> +% 1930 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [352.526 426.323 428.58 438.383] +/A << /S /GoTo /D (vdata) >> >> -% 1678 0 obj +% 1934 0 obj << -/D [1676 0 R /XYZ 98.895 753.953 null] +/D [1932 0 R /XYZ 149.705 753.953 null] >> -% 1679 0 obj +% 529 0 obj << -/D [1676 0 R /XYZ 99.895 528.579 null] +/D [1932 0 R /XYZ 150.705 716.092 null] >> -% 1680 0 obj +% 1935 0 obj << -/D [1676 0 R /XYZ 99.895 494.104 null] +/D [1932 0 R /XYZ 150.705 678.98 null] >> -% 1681 0 obj +% 1936 0 obj << -/D [1676 0 R /XYZ 99.895 407.25 null] +/D [1932 0 R /XYZ 150.705 679.195 null] >> -% 1675 0 obj +% 1931 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F94 915 0 R /F52 585 0 R /F112 1682 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1685 0 obj +% 1939 0 obj << /Type /Page -/Contents 1686 0 R -/Resources 1684 0 R +/Contents 1940 0 R +/Resources 1938 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1683 0 R +/Parent 1928 0 R +/Annots [ 1937 0 R ] >> -% 1687 0 obj +% 1937 0 obj << -/D [1685 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 574.778 369.462 586.838] +/A << /S /GoTo /D (spdata) >> >> -% 461 0 obj +% 1941 0 obj << -/D [1685 0 R /XYZ 150.705 716.092 null] +/D [1939 0 R /XYZ 98.895 753.953 null] >> -% 1684 0 obj +% 533 0 obj +<< +/D [1939 0 R /XYZ 99.895 716.092 null] +>> +% 1942 0 obj +<< +/D [1939 0 R /XYZ 99.895 679.441 null] +>> +% 1943 0 obj +<< +/D [1939 0 R /XYZ 99.895 679.657 null] +>> +% 1938 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F83 813 0 R /F61 1360 0 R /F85 814 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F60 666 0 R /F102 1016 0 R >> /ProcSet [ /PDF /Text ] >> -% 1689 0 obj +% 1946 0 obj << /Type /Page -/Contents 1690 0 R -/Resources 1688 0 R +/Contents 1947 0 R +/Resources 1945 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1683 0 R +/Parent 1928 0 R +/Annots [ 1944 0 R ] >> -% 1691 0 obj +% 1944 0 obj << -/D [1689 0 R /XYZ 98.895 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [452.361 574.59 528.415 586.65] +/A << /S /GoTo /D (vdata) >> >> -% 1692 0 obj +% 1948 0 obj << -/D [1689 0 R /XYZ 99.895 552.489 null] +/D [1946 0 R /XYZ 149.705 753.953 null] >> -% 1693 0 obj +% 537 0 obj << -/D [1689 0 R /XYZ 99.895 518.014 null] +/D [1946 0 R /XYZ 150.705 716.092 null] >> -% 1694 0 obj +% 1949 0 obj << -/D [1689 0 R /XYZ 99.895 431.16 null] +/D [1946 0 R /XYZ 150.705 679.413 null] >> -% 1688 0 obj +% 1950 0 obj << -/Font << /F54 586 0 R /F51 584 0 R /F59 812 0 R /F94 915 0 R /F52 585 0 R /F112 1682 0 R >> +/D [1946 0 R /XYZ 150.705 679.628 null] +>> +% 1945 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F60 666 0 R /F102 1016 0 R >> /ProcSet [ /PDF /Text ] >> -% 1696 0 obj +% 1952 0 obj << /Type /Page -/Contents 1697 0 R -/Resources 1695 0 R +/Contents 1953 0 R +/Resources 1951 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1683 0 R ->> -% 1698 0 obj -<< -/D [1696 0 R /XYZ 149.705 753.953 null] +/Parent 1928 0 R >> -% 465 0 obj +% 1954 0 obj << -/D [1696 0 R /XYZ 150.705 716.092 null] +/D [1952 0 R /XYZ 98.895 753.953 null] >> -% 1695 0 obj +% 1951 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F83 813 0 R /F61 1360 0 R /F85 814 0 R /F52 585 0 R >> +/Font << /F102 1016 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1700 0 obj +% 1956 0 obj << /Type /Page -/Contents 1701 0 R -/Resources 1699 0 R +/Contents 1957 0 R +/Resources 1955 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1683 0 R +/Parent 1928 0 R >> -% 1702 0 obj +% 1958 0 obj << -/D [1700 0 R /XYZ 98.895 753.953 null] +/D [1956 0 R /XYZ 149.705 753.953 null] >> -% 1703 0 obj +% 541 0 obj << -/D [1700 0 R /XYZ 99.895 540.534 null] +/D [1956 0 R /XYZ 150.705 716.092 null] >> -% 1704 0 obj +% 1955 0 obj << -/D [1700 0 R /XYZ 99.895 506.059 null] +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1705 0 obj +% 1962 0 obj << -/D [1700 0 R /XYZ 99.895 419.205 null] +/Type /Page +/Contents 1963 0 R +/Resources 1961 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1966 0 R +/Annots [ 1959 0 R 1960 0 R ] >> -% 1699 0 obj +% 1959 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F94 915 0 R /F52 585 0 R /F112 1682 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [320.317 442.264 387.374 454.323] +/A << /S /GoTo /D (precdata) >> >> -% 1707 0 obj +% 1960 0 obj << -/Type /Page -/Contents 1708 0 R -/Resources 1706 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1683 0 R +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [171.257 352.6 177.533 366.303] +/A << /S /GoTo /D (Hfootnote.4) >> >> -% 1709 0 obj +% 1964 0 obj << -/D [1707 0 R /XYZ 149.705 753.953 null] +/D [1962 0 R /XYZ 98.895 753.953 null] >> -% 469 0 obj +% 545 0 obj +<< +/D [1962 0 R /XYZ 99.895 716.092 null] +>> +% 1965 0 obj << -/D [1707 0 R /XYZ 150.705 716.092 null] +/D [1962 0 R /XYZ 114.242 129.79 null] >> -% 1706 0 obj +% 1961 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F83 813 0 R /F61 1360 0 R /F85 814 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R /F93 915 0 R >> /ProcSet [ /PDF /Text ] >> -% 1711 0 obj +% 1972 0 obj << /Type /Page -/Contents 1712 0 R -/Resources 1710 0 R +/Contents 1973 0 R +/Resources 1971 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1717 0 R +/Parent 1966 0 R +/Annots [ 1967 0 R 1968 0 R 1969 0 R 1970 0 R ] >> -% 1713 0 obj +% 1967 0 obj << -/D [1711 0 R /XYZ 98.895 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [365.487 576.377 443.006 588.437] +/A << /S /GoTo /D (spdata) >> >> -% 1714 0 obj +% 1968 0 obj << -/D [1711 0 R /XYZ 99.895 552.489 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [451.902 505.741 518.96 517.801] +/A << /S /GoTo /D (precdata) >> >> -% 1715 0 obj +% 1969 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [422.264 435.105 489.322 447.165] +/A << /S /GoTo /D (descdata) >> +>> +% 1970 0 obj << -/D [1711 0 R /XYZ 99.895 518.014 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [368.227 128.475 435.285 140.535] +/A << /S /GoTo /D (precdata) >> >> -% 1716 0 obj +% 1974 0 obj << -/D [1711 0 R /XYZ 99.895 431.16 null] +/D [1972 0 R /XYZ 149.705 753.953 null] >> -% 1710 0 obj +% 549 0 obj +<< +/D [1972 0 R /XYZ 150.705 716.092 null] +>> +% 1971 0 obj << -/Font << /F54 586 0 R /F51 584 0 R /F59 812 0 R /F94 915 0 R /F52 585 0 R /F112 1682 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1719 0 obj +% 1976 0 obj << /Type /Page -/Contents 1720 0 R -/Resources 1718 0 R +/Contents 1977 0 R +/Resources 1975 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1717 0 R ->> -% 1721 0 obj -<< -/D [1719 0 R /XYZ 149.705 753.953 null] +/Parent 1966 0 R >> -% 473 0 obj +% 1978 0 obj << -/D [1719 0 R /XYZ 150.705 716.092 null] +/D [1976 0 R /XYZ 98.895 753.953 null] >> -% 1718 0 obj +% 1975 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F83 813 0 R /F61 1360 0 R /F85 814 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1723 0 obj +% 1984 0 obj << /Type /Page -/Contents 1724 0 R -/Resources 1722 0 R +/Contents 1985 0 R +/Resources 1983 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1717 0 R ->> -% 1725 0 obj -<< -/D [1723 0 R /XYZ 98.895 753.953 null] ->> -% 1726 0 obj -<< -/D [1723 0 R /XYZ 99.895 528.579 null] +/Parent 1966 0 R +/Annots [ 1979 0 R 1980 0 R 1981 0 R 1982 0 R ] >> -% 1727 0 obj +% 1979 0 obj << -/D [1723 0 R /XYZ 99.895 494.104 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [371.126 573.77 438.184 585.83] +/A << /S /GoTo /D (precdata) >> >> -% 1728 0 obj +% 1980 0 obj << -/D [1723 0 R /XYZ 99.895 407.25 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [393.303 517.98 469.357 530.039] +/A << /S /GoTo /D (vdata) >> >> -% 1722 0 obj +% 1981 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F94 915 0 R /F52 585 0 R /F112 1682 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [374.822 462.189 441.88 474.248] +/A << /S /GoTo /D (descdata) >> >> -% 1730 0 obj +% 1982 0 obj << -/Type /Page -/Contents 1731 0 R -/Resources 1729 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1717 0 R +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [393.303 272.899 469.357 284.958] +/A << /S /GoTo /D (vdata) >> >> -% 1732 0 obj +% 1986 0 obj << -/D [1730 0 R /XYZ 149.705 753.953 null] +/D [1984 0 R /XYZ 149.705 753.953 null] >> -% 477 0 obj +% 553 0 obj << -/D [1730 0 R /XYZ 150.705 716.092 null] +/D [1984 0 R /XYZ 150.705 716.092 null] >> -% 1729 0 obj +% 1983 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F83 813 0 R /F61 1360 0 R /F85 814 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1734 0 obj +% 1989 0 obj << /Type /Page -/Contents 1735 0 R -/Resources 1733 0 R +/Contents 1990 0 R +/Resources 1988 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1717 0 R ->> -% 1736 0 obj -<< -/D [1734 0 R /XYZ 98.895 753.953 null] +/Parent 1966 0 R +/Annots [ 1987 0 R ] >> -% 1737 0 obj +% 1987 0 obj << -/D [1734 0 R /XYZ 99.895 528.579 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [320.317 573.77 387.374 585.83] +/A << /S /GoTo /D (precdata) >> >> -% 1738 0 obj +% 1991 0 obj << -/D [1734 0 R /XYZ 99.895 494.104 null] +/D [1989 0 R /XYZ 98.895 753.953 null] >> -% 1739 0 obj +% 557 0 obj << -/D [1734 0 R /XYZ 99.895 407.25 null] +/D [1989 0 R /XYZ 99.895 716.092 null] >> -% 1733 0 obj +% 1988 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F94 915 0 R /F52 585 0 R /F112 1682 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R >> /ProcSet [ /PDF /Text ] >> -% 1741 0 obj +% 1993 0 obj << /Type /Page -/Contents 1742 0 R -/Resources 1740 0 R +/Contents 1994 0 R +/Resources 1992 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1717 0 R +/Parent 1966 0 R >> -% 1743 0 obj +% 1995 0 obj << -/D [1741 0 R /XYZ 149.705 753.953 null] +/D [1993 0 R /XYZ 149.705 753.953 null] >> -% 481 0 obj +% 561 0 obj << -/D [1741 0 R /XYZ 150.705 716.092 null] +/D [1993 0 R /XYZ 150.705 716.092 null] >> -% 1740 0 obj +% 1992 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F83 813 0 R /F61 1360 0 R /F85 814 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1745 0 obj +% 1999 0 obj << /Type /Page -/Contents 1746 0 R -/Resources 1744 0 R +/Contents 2000 0 R +/Resources 1998 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1754 0 R +/Parent 2002 0 R +/Annots [ 1996 0 R 1997 0 R ] >> -% 1747 0 obj +% 1996 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [320.317 573.77 387.374 585.83] +/A << /S /GoTo /D (precdata) >> +>> +% 1997 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [320.317 498.054 387.374 510.114] +/A << /S /GoTo /D (precdata) >> +>> +% 2001 0 obj << -/D [1745 0 R /XYZ 98.895 753.953 null] +/D [1999 0 R /XYZ 98.895 753.953 null] >> -% 1748 0 obj +% 565 0 obj << -/D [1745 0 R /XYZ 99.895 564.444 null] +/D [1999 0 R /XYZ 99.895 716.092 null] >> -% 1749 0 obj +% 1998 0 obj << -/D [1745 0 R /XYZ 99.895 529.97 null] +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1751 0 obj +% 2004 0 obj << -/D [1745 0 R /XYZ 99.895 441.815 null] +/Type /Page +/Contents 2005 0 R +/Resources 2003 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 2002 0 R >> -% 1752 0 obj +% 2006 0 obj << -/D [1745 0 R /XYZ 99.895 409.935 null] +/D [2004 0 R /XYZ 149.705 753.953 null] >> -% 1753 0 obj +% 569 0 obj << -/D [1745 0 R /XYZ 99.895 323.08 null] +/D [2004 0 R /XYZ 150.705 716.092 null] >> -% 1744 0 obj +% 2003 0 obj << -/Font << /F54 586 0 R /F51 584 0 R /F59 812 0 R /F52 585 0 R /F85 814 0 R /F1 1750 0 R /F96 1154 0 R /F94 915 0 R /F112 1682 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1756 0 obj +% 2011 0 obj << /Type /Page -/Contents 1757 0 R -/Resources 1755 0 R +/Contents 2012 0 R +/Resources 2010 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1754 0 R +/Parent 2002 0 R +/Annots [ 2007 0 R 2008 0 R ] >> -% 1758 0 obj +% 2007 0 obj << -/D [1756 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [329.163 275.278 335.44 288.868] +/A << /S /GoTo /D (Hfootnote.5) >> >> -% 485 0 obj +% 2008 0 obj << -/D [1756 0 R /XYZ 150.705 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 134.696 369.462 146.755] +/A << /S /GoTo /D (spdata) >> >> -% 1759 0 obj +% 2013 0 obj << -/D [1756 0 R /XYZ 150.705 222.691 null] +/D [2011 0 R /XYZ 98.895 753.953 null] >> -% 1755 0 obj +% 573 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F61 1360 0 R /F85 814 0 R /F83 813 0 R >> -/ProcSet [ /PDF /Text ] +/D [2011 0 R /XYZ 99.895 716.092 null] >> -% 1761 0 obj +% 2014 0 obj << -/Type /Page -/Contents 1762 0 R -/Resources 1760 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1754 0 R +/D [2011 0 R /XYZ 99.895 444.811 null] >> -% 1763 0 obj +% 2015 0 obj << -/D [1761 0 R /XYZ 98.895 753.953 null] +/D [2011 0 R /XYZ 99.895 444.971 null] >> -% 489 0 obj +% 2016 0 obj << -/D [1761 0 R /XYZ 99.895 716.092 null] +/D [2011 0 R /XYZ 99.895 433.015 null] >> -% 1764 0 obj +% 2017 0 obj << -/D [1761 0 R /XYZ 99.895 222.691 null] +/D [2011 0 R /XYZ 114.242 129.79 null] >> -% 1760 0 obj +% 2010 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F61 1360 0 R /F85 814 0 R /F52 585 0 R /F83 813 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F93 915 0 R /F91 914 0 R /F69 1460 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1768 0 obj +% 2022 0 obj << /Type /Page -/Contents 1769 0 R -/Resources 1767 0 R +/Contents 2023 0 R +/Resources 2021 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1754 0 R -/Annots [ 1765 0 R 1766 0 R ] +/Parent 2002 0 R +/Annots [ 2009 0 R 2018 0 R 2019 0 R 2020 0 R ] >> -% 1765 0 obj +% 2009 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [196.011 499.949 202.985 512.009] -/A << /S /GoTo /D (listing.5) >> +/Rect [342.753 654.503 409.811 666.562] +/A << /S /GoTo /D (precdata) >> >> -% 1766 0 obj +% 2018 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [196.625 344.532 203.599 356.591] -/A << /S /GoTo /D (listing.6) >> +/Rect [393.303 584.479 469.357 596.539] +/A << /S /GoTo /D (vdata) >> >> -% 1770 0 obj +% 2019 0 obj << -/D [1768 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [393.303 514.456 469.357 526.516] +/A << /S /GoTo /D (vdata) >> >> -% 493 0 obj +% 2020 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 374.41 409.811 386.47] +/A << /S /GoTo /D (descdata) >> +>> +% 2024 0 obj << -/D [1768 0 R /XYZ 150.705 716.092 null] +/D [2022 0 R /XYZ 149.705 753.953 null] >> -% 1767 0 obj +% 2021 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F60 666 0 R /F93 915 0 R /F91 914 0 R /F69 1460 0 R >> /ProcSet [ /PDF /Text ] >> -% 1774 0 obj +% 2027 0 obj << /Type /Page -/Contents 1775 0 R -/Resources 1773 0 R +/Contents 2028 0 R +/Resources 2026 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1754 0 R +/Parent 2002 0 R +/Annots [ 2025 0 R ] >> -% 1776 0 obj -<< -/D [1774 0 R /XYZ 98.895 753.953 null] ->> -% 1771 0 obj +% 2025 0 obj << -/D [1774 0 R /XYZ 99.895 411.235 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.493 554.876 418.548 566.936] +/A << /S /GoTo /D (vdata) >> >> -% 1772 0 obj +% 2029 0 obj << -/D [1774 0 R /XYZ 99.895 182.902 null] +/D [2027 0 R /XYZ 98.895 753.953 null] >> -% 1773 0 obj +% 2026 0 obj << -/Font << /F59 812 0 R /F112 1682 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1778 0 obj +% 2037 0 obj << /Type /Page -/Contents 1779 0 R -/Resources 1777 0 R +/Contents 2038 0 R +/Resources 2036 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1754 0 R ->> -% 1780 0 obj -<< -/D [1778 0 R /XYZ 149.705 753.953 null] ->> -% 497 0 obj -<< -/D [1778 0 R /XYZ 150.705 716.092 null] ->> -% 1781 0 obj -<< -/D [1778 0 R /XYZ 150.705 690.058 null] +/Parent 2002 0 R +/Annots [ 2030 0 R 2040 0 R 2031 0 R 2032 0 R 2033 0 R 2034 0 R 2035 0 R ] >> -% 1782 0 obj +% 2030 0 obj << -/D [1778 0 R /XYZ 150.705 693.143 null] +/Type /Annot +/Border[0 0 0]/H/I/C[0 1 1] +/Rect [367.891 636.522 495.412 648.582] +/Subtype/Link/A<> >> -% 1777 0 obj +% 2040 0 obj << -/Font << /F51 584 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Border[0 0 0]/H/I/C[0 1 1] +/Rect [174.615 624.567 218.45 636.627] +/Subtype/Link/A<> >> -% 1784 0 obj +% 2031 0 obj << -/Type /Page -/Contents 1785 0 R -/Resources 1783 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1789 0 R +/Type /Annot +/Border[0 0 0]/H/I/C[0 1 1] +/Rect [191.223 593.098 397.198 605.158] +/Subtype/Link/A<> >> endstream endobj -1793 0 obj +2053 0 obj << -/Length 1526 +/Length 8697 >> stream 0 g 0 G 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(8.3)-1000(psb)]TJ -ET -q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 201.825 706.129 Td [(set)]TJ -ET -q -1 0 0 1 217.809 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 221.396 706.129 Td [(errverbosity)-190(\227)-190(Sets)-190(the)-190(verbosity)-190(of)-190(error)-190(messages)]TJ/F54 9.9626 Tf -68.95 -24.942 Td [(c)-175(a)-175(l)-174(l)-921(p)-147(s)-146(b)]TJ -ET -q -1 0 0 1 202.736 681.387 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 207.181 681.187 Td [(s)-146(e)-146(t)]TJ -ET -q -1 0 0 1 224.391 681.387 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 228.836 681.187 Td [(e)-146(r)-146(r)-146(v)-147(e)-146(r)-146(b)-146(o)-146(s)-146(i)-146(t)-147(y)-279(\050)-151(v)-151(\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +q +1 0 0 1 99.895 421.197 cm +0 0 343.711 290.909 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +BT +/F120 8.9664 Tf 112.299 701.446 Td [(!)-525(My)-525(own)-525(home-grown)-525(matrix)-525(generator)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F102 8.9664 Tf 0 -10.958 Td [(call)]TJ +0 g 0 G + [-525(gen_matrix\050ctxt,)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(idim)]TJ +0 g 0 G + [(,desc_a,a,x,info\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(\050info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(/=)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(0)]TJ +0 g 0 G + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(goto)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(9999)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -21.918 Td [(call)]TJ +0 g 0 G + [-525(a%cscnv\050agpu,info,mold)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(aelg\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(\050info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(/=)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(0)]TJ +0 g 0 G + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(goto)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(9999)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 0 -10.959 Td [(xtmp)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(x%get_vect\050\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(call)]TJ +0 g 0 G + [-525(xg%bld\050xtmp,mold)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(vmold\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(call)]TJ +0 g 0 G + [-525(bg%bld\050size\050xtmp\051,mold)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(vmold\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 8.9664 Tf 0 -21.918 Td [(!)-525(Do)-525(sparse)-525(MV)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F102 8.9664 Tf 0 -10.958 Td [(call)]TJ +0 g 0 G + [-525(psb_spmm\050done,agpu,xg,dzero,bg,desc_a,info\051)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + -9.415 -32.877 Td [(9999)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(continue)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 9.415 -10.959 Td [(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(\050info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(==)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(0)]TJ +0 g 0 G + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 14.122 -10.959 Td [(write)]TJ +0 g 0 G + [(\050)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(*)]TJ +0 g 0 G + [(,)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(*)]TJ +0 g 0 G + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG + [-525(\01542\015)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -14.122 -10.959 Td [(else)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 14.122 -10.959 Td [(write)]TJ +0 g 0 G + [(\050)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(*)]TJ +0 g 0 G + [(,)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(*)]TJ +0 g 0 G + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG + [-525(\015Something)-525(went)-525(wrong)-525(\015)]TJ +0 g 0 G + [(,info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -14.122 -10.959 Td [(end)-525(if)]TJ 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -32.876 Td [(call)]TJ 0 g 0 G + [-525(psb_cuda_exit\050\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -78.131 -27.895 Td [(T)90(ype:)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [-525(psb_exit\050ctxt\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(stop)]TJ 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.415 -10.959 Td [(end)-525(program)]TJ 0 g 0 G - 0 -19.925 Td [(v)]TJ + [-525(my_cuda_test)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(verbosity)-250(level)]TJ 14.386 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(.)]TJ +/F62 9.9626 Tf -2.989 -24.267 Td [(A)-222(full)-223(example)-222(of)-223(this)-222(strategy)-222(can)-223(be)-222(seen)-223(in)-222(the)]TJ/F67 9.9626 Tf 212.576 0 Td [(test/ext/kernel)]TJ/F62 9.9626 Tf 80.671 0 Td [(and)]TJ/F67 9.9626 Tf 19.082 0 Td [(test/-)]TJ -312.329 -11.955 Td [(cuda/kernel)]TJ/F62 9.9626 Tf 61.29 0 Td [(subdir)18(ectories,)-409(wher)18(e)-377(we)-377(pr)18(ov)1(ide)-377(sample)-377(pr)18(ograms)-377(to)-377(test)-377(the)]TJ -61.29 -11.956 Td [(speed)-276(of)-275(the)-275(sparse)-276(matrix-vector)-275(pr)18(oduct)-276(with)-275(the)-276(various)-275(data)-276(str)8(uctur)18(es)-275(in-)]TJ 0 -11.955 Td [(cluded)-250(in)-250(the)-250(library)111(.)]TJ/F59 11.9552 Tf 0 -29.243 Td [(12.2)-1000(Extensions')-250(Data)-250(Structures)]TJ/F62 9.9626 Tf 0 -19.001 Td [(Access)-232(to)-233(the)-232(facilities)-232(pr)18(ovided)-233(by)-232(the)-232(EXT)-233(library)-232(is)-233(mainl)1(y)-233(achieved)-232(thr)18(ough)]TJ 0 -11.955 Td [(the)-384(data)-385(types)-384(that)-384(ar)18(e)-385(pr)18(ovi)1(ded)-385(within.)-713(The)-384(data)-384(classes)-385(ar)18(e)-384(derived)-384(fr)18(om)]TJ 0 -11.955 Td [(the)-247(base)-247(classes)-248(in)-247(PSBLAS,)-247(thr)18(ough)-247(the)-247(Fortran)-247(2003)-248(mechanism)-247(of)]TJ/F60 9.9626 Tf 299.187 0 Td [(type)-247(exten-)]TJ -299.187 -11.955 Td [(sion)]TJ/F62 9.9626 Tf 19.098 0 Td [([)]TJ +1 0 0 rg 1 0 0 RG + [(17)]TJ 0 g 0 G - 139.477 -475.183 Td [(140)]TJ + [(].)]TJ -4.154 -11.975 Td [(The)-255(data)-255(classes)-255(ar)18(e)-254(divided)-255(between)-255(the)-255(general)-255(purpose)-254(CPU)-255(extensions,)]TJ -14.944 -11.955 Td [(the)-232(GPU)-232(interfaces)-232(and)-232(the)-232(RSB)-232(interfaces.)-304(In)-232(the)-232(description)-232(we)-232(will)-232(make)-232(use)]TJ 0 -11.955 Td [(of)-250(the)-250(notation)-250(intr)18(oduced)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(21)]TJ +0 g 0 G + [(.)]TJ/F59 11.9552 Tf 0 -29.243 Td [(12.3)-1000(CPU-class)-250(extensions)]TJ/F59 9.9626 Tf 0 -19.001 Td [(ELLP)74(ACK)]TJ/F62 9.9626 Tf 0 -19.001 Td [(The)-190(ELLP)92(ACK/ITP)92(ACK)-190(format)-190(\050shown)-190(in)-190(Figur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-190(6)]TJ +0 g 0 G + [(\051)-190(comprises)-190(two)-190(2-dimensional)]TJ 0 -11.956 Td [(arrays)]TJ/F67 9.9626 Tf 30.302 0 Td [(AS)]TJ/F62 9.9626 Tf 13.165 0 Td [(and)]TJ/F67 9.9626 Tf 19.571 0 Td [(JA)]TJ/F62 9.9626 Tf 13.166 0 Td [(with)]TJ/F67 9.9626 Tf 22.958 0 Td [(M)]TJ/F62 9.9626 Tf 7.935 0 Td [(r)18(ows)-272(and)]TJ/F67 9.9626 Tf 44.005 0 Td [(MAXNZR)]TJ/F62 9.9626 Tf 34.087 0 Td [(columns,)-277(wher)18(e)]TJ/F67 9.9626 Tf 72.949 0 Td [(MAXNZR)]TJ/F62 9.9626 Tf 34.087 0 Td [(is)-272(th)1(e)-272(maxi-)]TJ -292.225 -11.955 Td [(mum)-211(number)-211(of)-212(nonzer)18(os)-211(in)-211(any)-211(r)18(ow)-211([)]TJ/F59 9.9626 Tf 167.954 0 Td [(?)]TJ/F62 9.9626 Tf 4.424 0 Td [(].)-297(Each)-211(r)18(ow)-211(of)-212(the)-211(arrays)]TJ/F67 9.9626 Tf 108.255 0 Td [(AS)]TJ/F62 9.9626 Tf 12.564 0 Td [(and)]TJ/F67 9.9626 Tf 18.971 0 Td [(JA)]TJ/F62 9.9626 Tf 12.565 0 Td [(con-)]TJ -324.733 -11.955 Td [(tains)-218(the)-217(coef)18(\002cients)-218(and)-217(column)-218(indices;)-228(r)18(ows)-218(shorter)-217(than)]TJ/F67 9.9626 Tf 260.482 0 Td [(MAXNZR)]TJ/F62 9.9626 Tf 33.55 0 Td [(ar)18(e)-218(padded)]TJ -294.032 -11.955 Td [(with)-315(zer)18(o)-315(coef)18(\002cients)-315(and)-315(appr)18(opriate)-315(column)-315(indices,)-331(e.g.)-505(the)-315(last)-315(valid)-315(one)]TJ 0 -11.955 Td [(found)-250(in)-250(the)-250(same)-250(r)18(ow)92(.)]TJ +0 g 0 G + 164.384 -29.888 Td [(163)]TJ 0 g 0 G ET endstream endobj -1799 0 obj +2063 0 obj << -/Length 2016 +/Length 4574 >> stream 0 g 0 G 0 g 0 G +0 g 0 G +0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(8.4)-1000(psb)]TJ +/F62 9.9626 Tf 189.471 698.871 Td [(T)92(able)-250(21:)-310(Notation)-250(for)-250(parameters)-250(describing)-250(a)-250(sparse)-250(matrix)]TJ +0 g 0 G +0 g 0 G +0 g 0 G ET q -1 0 0 1 147.429 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 222.652 684.904 cm +[]0 d 0 J 0.398 w 0 0 m 199.817 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(set)]TJ +/F62 7.9701 Tf 228.629 678.079 Td [(Name)-3364(Description)]TJ ET q -1 0 0 1 166.999 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 222.652 675.041 cm +[]0 d 0 J 0.398 w 0 0 m 199.817 0 l S Q BT -/F51 11.9552 Tf 170.586 706.129 Td [(erraction)-223(\227)-223(Set)-222(the)-223(type)-223(of)-223(action)-223(to)-222(be)-223(taken)-223(upon)]TJ -43.792 -13.948 Td [(error)-250(condition)]TJ/F54 9.9626 Tf -25.157 -24.941 Td [(c)-175(a)-175(l)-174(l)-926(p)-151(s)-151(b)]TJ +/F62 7.9701 Tf 228.629 668.216 Td [(M)-5111(Number)-250(of)-250(r)18(ows)-250(in)-250(matrix)]TJ 0 -9.464 Td [(N)-5226(Number)-250(of)-250(columns)-250(in)-250(matrix)]TJ 0 -9.465 Td [(NZ)-4559(Number)-250(of)-250(nonzer)18(os)-250(in)-250(matrix)]TJ 0 -9.464 Td [(A)111(VGNZR)-1739(A)92(verage)-250(number)-250(of)-250(nonzer)18(os)-250(per)-250(r)18(ow)]TJ 0 -9.465 Td [(MAXNZR)-1500(Maximum)-250(number)-250(of)-250(nonzer)18(os)-250(per)-250(r)18(ow)]TJ 0 -9.464 Td [(NDIAG)-2574(Numer)18(o)-250(of)-250(nonzer)18(o)-250(diagonals)]TJ 0 -9.465 Td [(AS)-4754(Coef)18(\002cients)-250(array)]TJ 0 -9.464 Td [(IA)-4942(Row)-250(indices)-250(array)]TJ 0 -9.465 Td [(JA)-4946(Column)-250(indices)-250(array)]TJ 0 -9.464 Td [(IRP)-4448(Row)-250(start)-250(pointers)-250(array)]TJ 0 -9.465 Td [(JCP)-4411(Column)-250(start)-250(pointers)-250(array)]TJ 0 -9.464 Td [(NZR)-3891(Number)-250(of)-250(nonzer)18(os)-250(per)-250(r)18(ow)-250(array)]TJ 0 -9.465 Td [(OFFSET)-2410(Of)18(fset)-250(for)-250(diagonals)]TJ ET q -1 0 0 1 152.113 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 222.652 551.604 cm +[]0 d 0 J 0.398 w 0 0 m 199.817 0 l S Q -BT -/F54 9.9626 Tf 156.605 667.24 Td [(s)-151(e)-151(t)]TJ -ET +0 g 0 G +0 g 0 G +1 0 0 1 247.614 396.819 cm q -1 0 0 1 173.955 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 178.447 667.24 Td [(e)-151(r)-151(r)-150(a)-151(c)-151(t)-151(i)-151(o)-151(n)-284(\050)-296(e)-163(r)-162(r)]TJ -ET +.33653 0 0 .33653 0 0 cm q -1 0 0 1 257.102 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 0 0 cm +/Im6 Do +Q Q -BT -/F54 9.9626 Tf 261.712 667.24 Td [(a)-163(c)-162(t)-296(\051)]TJ 0 g 0 G +1 0 0 1 -247.614 -396.819 cm +BT +/F62 9.9626 Tf 245.769 374.901 Td [(Figur)18(e)-250(5:)-310(Example)-250(of)-250(sparse)-250(matrix)]TJ 0 g 0 G 0 g 0 G -/F51 9.9626 Tf -161.817 -27.896 Td [(T)90(ype:)]TJ + -80.12 -32.171 Td [(The)-289(matrix-vector)-289(pr)18(oduct)]TJ/F60 9.9626 Tf 120.156 0 Td [(y)]TJ/F93 10.3811 Tf 8.719 0 Td [(=)]TJ/F60 9.9626 Tf 12.305 0 Td [(A)-42(x)]TJ/F62 9.9626 Tf 15.697 0 Td [(can)-289(be)-289(computed)-289(with)-289(the)-289(code)-290(shown)]TJ -171.821 -11.956 Td [(in)-365(Alg.)]TJ +0 0 1 rg 0 0 1 RG + [-365(1)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [(;)-423(it)-365(costs)-365(one)-365(memory)-366(write)-365(per)-365(outer)-365(iteration,)-394(plus)-365(thr)18(ee)-365(memory)]TJ 0 -11.955 Td [(r)18(eads)-250(and)-250(two)-250(\003oating-point)-250(operations)-250(per)-250(inner)-250(iteration.)]TJ 14.944 -11.955 Td [(Unless)-251(all)-252(r)18(ows)-251(have)-251(exactly)-251(the)-252(same)-251(number)-251(of)-252(nonzer)18(os,)-251(some)-252(of)-251(the)-251(co-)]TJ -14.944 -11.955 Td [(ef)18(\002cients)-225(in)-226(the)]TJ/F67 9.9626 Tf 68.551 0 Td [(AS)]TJ/F62 9.9626 Tf 12.705 0 Td [(array)-225(will)-226(be)-225(zer)18(os;)-233(ther)18(efor)18(e)-226(this)-225(data)-225(str)8(uctur)18(e)-225(will)-226(have)-225(an)]TJ -81.256 -11.955 Td [(over)18(head)-261(both)-261(in)-260(terms)-261(of)-261(memory)-261(space)-261(and)-261(r)18(edundant)-260(operations)-261(\050multipli-)]TJ 0 -11.956 Td [(cations)-250(by)-250(zer)18(o\051.)-310(The)-250(over)18(head)-250(can)-250(be)-250(acceptable)-250(if:)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + 12.453 -19.399 Td [(1.)]TJ 0 g 0 G + [-500(The)-289(maximum)-289(number)-289(of)-289(nonzer)18(os)-289(per)-290(r)18(ow)-289(is)-289(not)-289(much)-289(lar)18(ger)-289(than)-289(the)]TJ 12.453 -11.956 Td [(average;)]TJ 0 g 0 G - 0 -19.925 Td [(err)]TJ -ET -q -1 0 0 1 113.225 599.693 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 116.214 599.494 Td [(act)]TJ + -12.453 -19.662 Td [(2.)]TJ 0 g 0 G -/F54 9.9626 Tf 17.703 0 Td [(the)-250(type)-250(of)-250(action.)]TJ -9.115 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(.)-310(Possible)-250(values:)]TJ/F59 9.9626 Tf 179.116 0 Td [(psb_act_ret)]TJ/F54 9.9626 Tf 57.534 0 Td [(,)]TJ/F59 9.9626 Tf 4.981 0 Td [(psb_act_abort)]TJ/F54 9.9626 Tf 67.995 0 Td [(.)]TJ + [-500(The)-273(r)18(egularity)-274(of)-273(the)-274(data)-273(str)8(uctur)18(e)-274(allows)-273(for)-274(faster)-273(code,)-279(e.g.)-381(by)-273(allow-)]TJ 12.453 -11.955 Td [(ing)-247(vectorization,)-247(ther)18(eby)-247(of)18(fsetting)-247(the)-247(additional)-246(storage)-247(r)18(equir)18(ements.)]TJ -24.906 -19.4 Td [(In)-372(the)-372(extr)18(eme)-372(case)-372(wher)18(e)-371(the)-372(input)-372(matrix)-372(has)-372(one)-372(full)-372(r)18(ow)92(,)-402(the)-372(ELLP)92(ACK)]TJ 0 -11.955 Td [(str)8(uctur)18(e)-273(would)-273(r)18(equir)18(e)-273(mor)18(e)-273(memory)-273(than)-273(the)-273(normal)-273(2D)-273(array)-273(storage.)-379(The)]TJ 0 -11.956 Td [(ELLP)92(ACK)-305(storage)-305(format)-305(was)-305(very)-305(popular)-305(in)-305(the)-305(vector)-305(computing)-305(days;)-332(in)]TJ 0 -11.955 Td [(modern)-304(CPUs)-305(it)-304(is)-305(not)-304(quite)-305(as)-304(popular)74(,)-318(but)-305(it)-304(is)-305(the)-304(basis)-305(for)-304(many)-305(GPU)-304(for)18(-)]TJ 0 -11.955 Td [(mats.)]TJ 14.944 -11.955 Td [(The)-250(r)18(elevant)-250(data)-250(type)-250(is)]TJ/F67 9.9626 Tf 110.952 0 Td [(psb_T_ell_sparse_mat)]TJ/F62 9.9626 Tf 104.607 0 Td [(:)]TJ 0 g 0 G - -170.149 -461.235 Td [(141)]TJ + -66.12 -38.412 Td [(164)]TJ 0 g 0 G ET endstream endobj -1805 0 obj +2046 0 obj << -/Length 507 +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (./figures/mat.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 2068 0 R +/BBox [0 0 438 395] +/Resources << +/ProcSet [ /PDF /ImageC ] +/ExtGState << +/R7 2069 0 R +>>/XObject << +/R8 2070 0 R +>>>> +/Length 3551 +/Filter /FlateDecode >> stream -0 g 0 G -0 g 0 G -BT -/F51 14.3462 Tf 150.705 705.784 Td [(9)-1000(Utilities)]TJ/F54 9.9626 Tf 0 -22.702 Td [(W)92(e)-323(have)-322(some)-323(utilities)-322(available)-323(for)-322(input)-323(and)-323(output)-322(of)-323(sparse)-322(matrices;)-359(the)]TJ 0 -11.955 Td [(interfaces)-250(to)-250(these)-250(r)18(outines)-250(ar)18(e)-250(available)-250(in)-250(the)-250(module)]TJ/F59 9.9626 Tf 242.009 0 Td [(psb_util_mod)]TJ/F54 9.9626 Tf 62.764 0 Td [(.)]TJ -0 g 0 G - -140.39 -580.689 Td [(142)]TJ -0 g 0 G -ET - +xœí\K®,· ÷*j®èÿ™ðØö ¸pìÁívÙ~HŠ‡¬R 'žƒ¤ø·É–ÈsDEIÕþég<ÿ§?Þ¯?|ÛþöúéŽ~Ž0ê{tùãÕz:spMËálÍ @TÔŠžÏÑë^c=kq*¼ ‚jô²°“øxýùÃñ÷ HH¡Ò¨œa†0úä1 +uÖ~|ûõSÉÿøúîÇ8K!-yKdïýŠ9žü©3·œÑÎœµh‘G%Çõ¨sž-f²’â{œÇÌ4j:Ê9ú˜ñˆyfêéòlõœ<ФZÈy>`!‡0Ï1ÔCÎ5œ‰ÛAn“Q@˜}æä°LÌ0~|¾"yŠ“\ô˜Îž;ó=r-áé˜Ô±„väT‡ƒ†³žmÎt¤YÉÛ<|ÊôMª4òÉDbÏ.ƒ¦¤~†|ÒˆM ‘᲋'M•XÎLãB0)"•8SC¡žfƤÑÌl¼" $óçéú =ª÷ñy¿öñ»oÙ"0.@Œö^bLãë9ÐãŠ@•)Rg9öIäèyH.äíJþ>8e ÿmôyäp`tM¾Ž¿h4@{üþ9·9ÐÉÕ>Gn³h\çÕšhŸ/LÄ}¢îy%Ó,£,ÚtéS¤(I˜M_?ÿh=‹öDŽÖ®"ògóùº|\=: +q””_R ÇW4¥Z¯b›gHy­ãøã_y‚ý¦Ö+_¿¾jý+¥Nš¼G!äü´ÿü=%™o~}}1ÿOFÿKɈ—. Jó¼­0n +‚VÒ§ZɼxÍKØJ®í$ã¶LË]÷¨©èA[ +Ä Ý-ª j€ˆ¨) d‹-š“3bFOÆ]±zPá!õ<,î>’Í +-Ë›£O×JXü°j™‰™›a• \«NÔÀæsI•Š žZ<™eU„¢uHª Ír~6d S«RHA¦½ 4cUŸ/ë£wÒ%h›SE²™y¿vWd¸.T K/TÌ Ç +v]¡Žµ?pm.}¯•(_œ¼5´¹fÅ9MÿbqM“ãS/q6†ƒà£j̦Fh÷Ê[‰ÿ$¢ÍÎûµû¢Ç!ðjàšxìD{žTIŠî¶¹\@:-‘T!çØí¹y bšFU£¸Ç,V!†®^a@Ÿ4Ð_Þ¦$2>Ó§Ù:xšé7oר-^brW–_Äû{X•ÕòÎëíŠ$Ä°¤9$Àjâ/býò6ñ n_¼_[×ÏF·å{ÇË\ø’Våê´í¼Žƒ)V¯Ô¦"°šZ‘ÂÌ"Õ¹ÎТV¹¿0 &ïitVÀ¦|ø´ +k¡ aàNB’Ô#yÉB'•„”ŸÂÔÂÞ5eÕå™Ï&ø”f®ºRº@æ’Š¾c²T¸ÏáLž¨ì¡‰ÊA-ä8Vi¯> SEa-€6µçr»F.õ*'‡ÎŽæÁ‹Ø:9%e\ +.Óh¹#ßÔZ55ÌóRoÍ9Vyµ“þðfG»C´Z +-–Ð{•Rf¥¼ë×À¦wôò€=Œ‘fÃÆ÷NЀS‰|ÔdœrËz +ˆGZîÏ/¬ùvª{Í-À…î +ö•úׯ½7üšÆINAJÜX™FQÑÚze•ôšßY¥Ôo¬¿ºà¬ š[(—u_°Í>XÁ¿~ tè½á—ÇqZ‘ +rµçõ“iP·f¾È*^×ò2¦xå˯¹„ä-Rì‚ {åk­|a•/|Xýd(ÐÂp«…ÇŠØS¹IäÂ\ñtvÐ[¨2=„5_¼ð嫼|)g”ù†ºWE/{¡ÐšÝœ™"°€¬Ä6 +´'ÒÒÛY>|¼T¼PhÅøì2YAËê½xšSß[_Š¹ÎFÑÛD”5PhÝc½¥.Ên^e¾¥Uÿh¡ðPìÞá¯ãYŒ8J•b-/göQù•½÷E“ ϦÂ+ù4~ߌ+—©­Rú@‘97àòrA ¹óÛ 00úàùdTþxÕF5°³àh=•Ù-j¥QníWn%È5§ÙÍM²³yVÙ¹©Â«——‹ ·åÕV¨•²ut*_¸©Æ0ªÅ-jcÆQÃïhœ›k4üÐñ[ç†_ó@£?öá +fݸ90jA5Q-ì$8h%¦;eÙ?;5×,d”uåM^§¼,ç*NmRZnò³Ê&L„_(˜uWàæÀ¨´Dµ°“Ð ó‰­}sËÒË©™F©•Ä»qc–»ìÖmo9„öô×ûC„[(€ Ýœ™"@„ÂÚ:?–äŤْ9+§h¸¢éGšnC-¼µèY²Êœ=0U´¬ }õ7Qpö€†&X­ú§²^—„•){‚jó2°s´øH^ˆmÐFŽgmY‰ñÏ0‘¿²Á뙞ޞÅ-¿½£"Óš‹ïRjÚq ;dØW^¬IÞ W3ù:€¨´Š)€W{oð«'1ÒµKöe5´³7=M¡ý\®|îxcWb>iåi&;?Èœ€CZEÖÒLznùa€ ”ã¡©üþo9`1£sÝ~Aò2kÔ0°±ÐÅë‘ÌtßøžØ1M“ÖdŠaŒ57i®%>ÊÎò.(d‚:Ö{ÜÐÌBé£N³`rÇ|&¥5¡`!5šoâsù€Ì¸PX E ;ÝA?”›Þ„ñÛÓ=+[çe{Ždžæ ¼gû4ùµ{o0æªqW-»f%kô×\n,ÛÕâÕ# —`ã´ÞÊR‡xº‡ ;çÂ… òyNS*Oø9eYZ­Eœ\*Yÿ%zB†B6º#ŸÃ'|E` 6 +녜ҺÆËó=4H×jÙ~=ß™µXÀ­ÿ=C¡¹Ý‘ËáÀ“½"°€ …k¼EKâÕ£ß ýrR“÷J¬EJ·“—±O7îäÍ‚îô͇ +´JXØxèóC¹ù[— S¬ë¹Ïæ^zÝ°.¼¯÷ëFÜ$ä 5`2. L£× 0 · æÁî# Z(Dô¿Sð÷žÅjý²¥¬³*'fÖåÃÝÖ;?buÞîR­rœ†þ—* Ýœ—"@„ÂúaËiý(ÿ« o^ÿ/o*o endstream endobj -1810 0 obj +2070 0 obj << -/Length 4553 +/Subtype /Image +/ColorSpace /DeviceGray +/Width 454 +/Height 425 +/BitsPerComponent 1 +/Interpolate true +/Filter /CCITTFaxDecode +/DecodeParms << +/K -1 +/Columns 454 +>> +/Length 164 +>> +stream +& ÙÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿøÿÀ@ +endstream +endobj +2074 0 obj +<< +/Length 7231 >> stream 0 g 0 G 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(9.1)-1206(hb)]TJ -ET +0 g 0 G +1 0 0 1 154.285 609.491 cm q -1 0 0 1 144.589 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 148.175 706.129 Td [(read)-206(\227)-207(Read)-206(a)-207(sparse)-206(matrix)-207(fr)1(om)-207(a)-206(\002le)-207(in)-206(the)-207(Harwell\226)]TJ -21.381 -13.948 Td [(Boeing)-250(format)]TJ/F54 9.9626 Tf -25.157 -24.941 Td [(c)-175(a)-175(l)-174(l)-865(h)-90(b)]TJ -ET +.52 0 0 .52 0 0 cm q -1 0 0 1 144.379 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 0 0 cm +/Im7 Do +Q Q -BT -/F54 9.9626 Tf 148.265 667.24 Td [(r)-90(e)-90(a)-90(d)-223(\050)-167(a)-242(,)-927(i)-151(r)-152(e)-151(t)-478(,)-905(i)-129(u)-130(n)-129(i)-130(t)-434(,)-871(f)-97(i)-96(l)-96(e)-96(n)-96(a)-97(m)-96(e)-367(,)-791(b)-206(,)-919(m)-143(t)-144(i)-143(t)-143(l)-144(e)-277(\051)]TJ 0 g 0 G +1 0 0 1 -154.285 -609.491 cm +BT +/F62 9.9626 Tf 152.938 587.573 Td [(Figur)18(e)-250(6:)-310(ELLP)92(ACK)-250(compr)18(ession)-250(of)-250(matrix)-250(in)-250(Figur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-250(5)]TJ 0 g 0 G 0 g 0 G -/F51 9.9626 Tf -48.37 -27.896 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G +/F59 8.9664 Tf -16.48 -31.498 Td [(d)-11(o)]TJ/F62 8.9664 Tf 17.426 0 Td [(i)-243(=)-89(1)-178(,)-98(n)]TJ -5.537 -10.959 Td [(t)-168(=)-32(0)]TJ/F59 8.9664 Tf -1.13 -10.958 Td [(d)-11(o)]TJ/F62 8.9664 Tf 17.682 0 Td [(j)-272(=)-89(1)-177(,)-121(m)-32(a)-32(x)-32(n)-32(z)-32(r)]TJ -5.792 -10.959 Td [(t)-734(=)-734(t)-734(+)-1289(a)-92(s)-226(\050)-236(i)-381(,)-358(j)-342(\051)]TJ 85.313 -2.332 Td [(*)]TJ 5.293 2.332 Td [(x)-176(\050)-288(j)-156(a)-289(\050)-236(i)-381(,)-358(j)-361(\051)-178(\051)]TJ/F59 8.9664 Tf -102.419 -10.959 Td [(e)-19(n)-20(d)-630(d)-11(o)]TJ/F62 8.9664 Tf 0.022 -10.959 Td [(y)-156(\050)-288(i)-288(\051)-730(=)-734(t)]TJ/F59 8.9664 Tf -10.782 -10.959 Td [(e)-19(n)-20(d)-630(d)-12(o)]TJ 0 g 0 G - 0 -19.925 Td [(\002lename)]TJ 0 g 0 G -/F54 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(r)18(ead.)]TJ -19.367 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.956 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F59 9.9626 Tf 298.534 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(,)-387(in)]TJ -303.764 -11.955 Td [(which)-254(case)-253(the)-254(default)-254(input)-253(unit)-254(5)-254(\050i.e.)-321(standar)18(d)-253(input)-254(in)-254(Unix)-253(jar)18(gon\051)-254(is)]TJ 0 -11.955 Td [(used.)-310(Default:)]TJ/F59 9.9626 Tf 65.185 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -95.322 -19.925 Td [(iunit)]TJ 0 g 0 G -/F54 9.9626 Tf 27.109 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.202 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F59 9.9626 Tf 287.757 0 Td [(-)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -317.895 -21.918 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf 16.498 -17.519 Td [(Algorithm)-250(1:)]TJ/F62 9.9626 Tf 60.055 0 Td [(Matrix-V)111(ector)-250(pr)18(oduct)-250(in)-250(ELL)-250(format)]TJ 0 g 0 G 0 g 0 G - 0 -19.926 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)-250(r)18(ead)-250(fr)18(om)-250(\002le.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 442.283 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 442.084 Td [(Tspmat)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 344.406 442.283 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 99.895 316.473 cm +0 0 343.711 126.526 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F59 9.9626 Tf 347.544 442.084 Td [(type)]TJ +/F102 8.9664 Tf 112.299 432.339 Td [(type)]TJ +0 g 0 G + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(extends)]TJ +0 g 0 G + [(\050psb_d_base_sparse_mat\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F51 9.9626 Tf -268.571 -19.925 Td [(b)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 11.069 0 Td [(Rigth)-250(hand)-250(side\050s\051.)]TJ 13.838 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(An)-235(array)-234(of)-235(type)-234(r)18(eal)-235(or)-234(complex,)-238(rank)-234(2)-235(and)-234(having)-235(the)-234(ALLOCA)74(T)74(ABLE)]TJ 0 -11.956 Td [(attribute;)-361(will)-324(be)-324(al)1(located)-324(and)-324(\002lled)-324(in)-324(if)-324(the)-324(input)-324(\002le)-323(contains)-324(a)-324(right)]TJ 0 -11.955 Td [(hand)-250(side,)-250(otherwise)-250(will)-250(be)-250(left)-250(in)-250(the)-250(UNALLOCA)74(TED)-250(state.)]TJ + [-525(psb_d_ell_sparse_mat)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(mtitle)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 8.9664 Tf 9.414 -10.959 Td [(!)]TJ 0 g 0 G -/F54 9.9626 Tf 32.09 0 Td [(Matrix)-250(title.)]TJ -7.183 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(A)-337(charachter)-338(variable)-337(of)-338(l)1(ength)-338(72)-337(holding)-338(a)-337(copy)-338(of)-337(the)-337(matrix)-338(title)-337(as)]TJ 0 -11.956 Td [(speci\002ed)-250(by)-250(the)-250(Harwell-Boeing)-250(format)-250(and)-250(contained)-250(in)-250(the)-250(input)-250(\002le.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(iret)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG + 0 -10.959 Td [(!)-525(ITPACK/ELL)-525(format,)-525(extended.)]TJ 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.434 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 139.477 -184.274 Td [(143)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG + 0 -10.959 Td [(!)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F102 8.9664 Tf 0 -21.918 Td [(integer)]TJ +0 g 0 G + [(\050psb_ipk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(irn\050:\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(ja\050:,:\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(idiag\050:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(real)]TJ +0 g 0 G + [(\050psb_dpk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(val\050:,:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.414 -21.918 Td [(contains)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 9.414 -10.959 Td [(....)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.414 -10.958 Td [(end)-525(type)]TJ +0 g 0 G + [-525(psb_d_ell_sparse_mat)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +/F59 9.9626 Tf -12.404 -39.81 Td [(Hacked)-250(ELLP)74(ACK)]TJ/F62 9.9626 Tf 0 -19.057 Td [(The)]TJ/F60 9.9626 Tf 20.491 0 Td [(hacked)-383(ELLP)129(ACK)]TJ/F62 9.9626 Tf 76.975 0 Td [(\050)]TJ/F59 9.9626 Tf 3.318 0 Td [(HLL)]TJ/F62 9.9626 Tf 20.473 0 Td [(\051)-383(format)-382(alleviates)-383(the)-383(main)-383(pr)18(oblem)-382(of)-383(the)-383(ELL-)]TJ -121.257 -11.955 Td [(P)92(ACK)-303(format,)-316(that)-303(is,)-316(the)-302(amount)-303(of)-303(memory)-303(r)18(equir)18(ed)-303(by)-303(padding)-302(for)-303(sparse)]TJ 0 -11.955 Td [(matrices)-250(in)-250(which)-250(the)-250(maximum)-250(r)18(ow)-250(length)-250(is)-250(lar)18(ger)-250(than)-250(the)-250(average.)]TJ 14.944 -12.003 Td [(The)-190(number)-190(of)-190(elements)-190(allocated)-190(to)-190(padding)-190(is)]TJ/F93 10.3811 Tf 207.724 0 Td [([)-24(\050)]TJ/F60 9.9626 Tf 7.402 0 Td [(m)]TJ/F91 10.3811 Tf 9.384 0 Td [(\003)]TJ/F60 9.9626 Tf 6.823 0 Td [(m)-40(a)-42(x)-70(N)-76(R)]TJ/F93 10.3811 Tf 34.072 0 Td [(\051)]TJ/F91 10.3811 Tf 5.658 0 Td [(\000)]TJ/F93 10.3811 Tf 9.702 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(m)]TJ/F91 10.3811 Tf 9.384 0 Td [(\003)]TJ/F60 9.9626 Tf 6.972 0 Td [(a)-25(v)-47(g)-60(N)-76(R)]TJ/F93 10.3811 Tf 31.103 0 Td [(\051)-289(=)]TJ/F60 9.9626 Tf -347.317 -11.955 Td [(m)]TJ/F91 10.3811 Tf 9.436 0 Td [(\003)]TJ/F93 10.3811 Tf 6.876 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(m)-40(a)-42(x)-70(N)-76(R)]TJ/F91 10.3811 Tf 35.508 0 Td [(\000)]TJ/F60 9.9626 Tf 9.904 0 Td [(a)-25(v)-47(g)-60(N)-76(R)]TJ/F93 10.3811 Tf 31.103 0 Td [(\051)-23(])]TJ/F62 9.9626 Tf 9.227 0 Td [(for)-196(both)]TJ/F67 9.9626 Tf 36.586 0 Td [(AS)]TJ/F62 9.9626 Tf 12.41 0 Td [(and)]TJ/F67 9.9626 Tf 18.816 0 Td [(JA)]TJ/F62 9.9626 Tf 12.41 0 Td [(arrays,)-207(wher)18(e)]TJ/F60 9.9626 Tf 61.626 0 Td [(m)]TJ/F62 9.9626 Tf 9.825 0 Td [(is)-196(equal)-195(to)-196(the)-196(num-)]TJ -258.126 -11.955 Td [(ber)-197(of)-198(r)18(ows)-197(of)-197(the)-198(matrix,)]TJ/F60 9.9626 Tf 110.796 0 Td [(m)-40(a)-42(x)-70(N)-76(R)]TJ/F62 9.9626 Tf 35.914 0 Td [(is)-197(the)-198(maximum)-197(number)-197(of)-198(nonzer)18(o)-197(elements)]TJ -146.71 -11.956 Td [(in)-220(every)-220(r)18(ow)-221(and)]TJ/F60 9.9626 Tf 76.764 0 Td [(a)-25(v)-47(g)-60(N)-76(R)]TJ/F62 9.9626 Tf 33.173 0 Td [(is)-220(the)-220(average)-220(number)-221(of)-220(nonzer)18(os.)-300(Ther)18(efor)18(e)-220(a)-220(single)]TJ -109.937 -11.955 Td [(densely)-250(populated)-250(r)18(ow)-250(can)-250(seriously)-250(af)18(fect)-250(the)-250(total)-250(size)-250(of)-250(the)-250(allocation.)]TJ 14.944 -12.003 Td [(T)92(o)-385(limit)-384(this)-385(ef)18(fect,)-418(in)-384(the)-385(HLL)-384(format)-385(we)-384(br)18(eak)-385(the)-384(original)-385(matrix)-384(into)]TJ -14.944 -11.955 Td [(equally)-283(sized)-284(gr)18(oups)-283(of)-283(r)18(ows)-284(\050cal)1(led)]TJ/F60 9.9626 Tf 163.395 0 Td [(hacks)]TJ/F62 9.9626 Tf 21.758 0 Td [(\051,)-292(and)-283(then)-283(stor)18(e)-283(these)-284(gr)18(oups)-283(as)-283(in-)]TJ -185.153 -11.955 Td [(dependent)-304(matrices)-305(in)-304(ELLP)92(ACK)-304(format.)-473(The)-304(gr)18(oups)-304(can)-305(be)-304(arranged)-304(select-)]TJ 0 -11.955 Td [(ing)-253(r)18(ows)-252(in)-253(an)-253(arbitrarily)-253(manner;)-254(indeed,)-253(if)-253(the)-252(r)18(ows)-253(ar)18(e)-253(sorted)-252(by)-253(decr)18(easing)]TJ 0 -11.955 Td [(number)-256(of)-255(nonzer)18(os)-256(we)-255(obtain)-256(essentially)-255(the)-256(JAgged)-255(Diagonals)-256(format.)-327(If)-255(the)]TJ +0 g 0 G + 164.384 -29.888 Td [(165)]TJ 0 g 0 G ET endstream endobj -1817 0 obj +2048 0 obj << -/Length 4948 +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (./figures/ell.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 2085 0 R +/BBox [0 0 447 205] +/Resources << +/ProcSet [ /PDF /ImageC /Text ] +/ExtGState << +/R7 2086 0 R +>>/XObject << +/R8 2087 0 R +>>/Font << /R9 2088 0 R/R11 2089 0 R>> +>> +/Length 2281 +/Filter /FlateDecode +>> +stream +xœÝYËŠ%¹ÝçWäÎU‹’õ~,ÛØoº§À˜Á«kOý6=^ø÷!Å )E—™M3Pôâv…â)ŒÌúrZãNËÿä÷ö8~ÿ©œŸÿs|9Zñ&”vúêM +î|-7¬;}¡ßâΖ‹)…Äd²_Dg|¨çí³©]]¶ç`l©§:HÍ4Çúd(f²)‰Ö›ñŽÅd)´Lš"é×bZ^š3ÅÍý-’ßÕ~«Æ¦%€ÖLŽuyf ˆ¤ ¨\Hx‡ÄPiGZb¤_NIˆÈIK–P@``kÃMã8¯Ø–Æ8N¼6tÂ%Ê3L1V“óì‹‹Ñø®-»£5Iý¹@ñøº´Å…h\ñڼɶ!%ˆ³/ª 5QRµÅ…ÔUBªk€*#HÛ‘?H…f[µÞùÙ—cÇ‘ƒˆ³-ª€"ÀÀÖ„ÙNª¸õ¾PÚÞYíC°&²i½»]ï‹3¹»D_(Ô®—QveíŒ3©¶ª¡j¥ÂJVggTenº@in‚Ô;¤Y¢iŠ­‚ø@fs¸ùa¹3Ñg]FgsTAê ¶VÜŽ¿žÿ¢òÚÓ“+nßãpÖNñ>Ä.Pˆ÷®;¥Ÿú~R·‘À@»c,S¼‘õ ócß—3‘õίޓÞKH…NùK®ÄR1 Ò­çÿ}|¤_Ži½,¼K@4y²ÉtҔÁ[Š³©a>¸3†˜-Ý0'‰ÑÄI¬$W²³Poˆ%Rè*NÕ¿Ú§†¼ø¯‘ÎÓŸÈK@£XÐ ÊŠð¦F(¼ª9(ój–ScA-\úpCc&à”¼¤2ˆkƒÙÔ@ç=ØWZDD ND bEN“z‘õÔU™zÙV£®ˆ`T}ÆY@3”íZa_YQÞÔ…W5e^MrjŒ"¨…K´-“wQU0ªFš·œ¥ˆ°šöEX>•5(!N bEZ“z‘øÔ¨…Q¹Å…”Vc¡1âÍ,2ófº‹ÉŠˆ¼‰Á«Èb2/ò\4z¦…K/ñ¸2oˆmeÞ.N®íÊ;õzç‹©îW0´¯4Lè¯eð®úÿI¼û GÌ,Nûÿ{PeZ®D‡>ÝùR2³áƒ…ûÿïÇ÷$]×.‹¹¬‹,-‹­­‹,-‹.¸uµ‹ër¾„ÔÅu¹]¢êâ²ìRÎðŒNUšJ¬bÏ’Ö%‘Æ’³e]ƒ(‹Ñ^E”Åâ/‹"ö "Y¡–EÛ(:ß{œ/ȪUâ®dÕr63¸h¹h…¶k rÑ¢[»i ÒµøÄøäø8ÑÜ i ®Ù®Z‚¬Z9úb ÈE+ò#÷¢%ÈE«Ô-. «V¤Ú\m¹hE¿ÅdÕ¢7Rëyl´Æ6ky`d(µTÎ_>Ó”äÆe”ŸÛãüÃ+]@zšÇóõÇc¼ÎòÀC׸ËÆl’¥¥Çñäž_&û‰ž_6Ó4å™äõ²xnôë_Ž'ÿü»ãeÑõüÔº¡ë:9¶Fæ=Q/xؾ±_ýÜã ¼•†Ý¾Pyá…êN†ŸhÇž(~Jò7õ+n$Ö­ÑLÜxxÑ„8CÛÍyS¨ÚׄÆÒ ]¨¾f;XûÓkL'/ƒ<íùË?O¦G7øîíž}"´Ö„ÄÃ5ž^ B‡Üò|IgÊñ8D7†*ãm‘7Ó!ßGó&?-8ÇkšX˜òðA;€doië©r¦¹®L"ßBR,ìyÜèÁÔôùÈ.~‹²ô-ü &gjg*±ûªŽêR㙼'ƒS¾|÷È$j­¡ìöI]žVè*ÉîFÇ„Ÿ#0ùv Xöoñߎß_JÜ«F>,¿kV@V¥w‚è½FÍüjý’U¡Q‹/Bñ ö‹8Ý°°Q‹ý™¢!‹=îÕ»L‹ûå|L†»O/ÉsbŠøj p4°žbÍ&z½p‰î®ŠLÖf.(Åí$}6Os¸XÈ…_­\@¤ ƒ*HŒj`Ë‚[ö^3[oÙÌM §¡Ëq™¹á<‘ã†ýÕ­ˆ Û·ØŸ‰! öÖ[ö®Òâ~ UμHX`ZD .žy Y*Ç~ˆêTD%»4ÌÏ´‰û·ø—§×ûI‰û„ùcf¥ˆ„… QcÆYHC¦ É*cJRÒ€G ;É¿õ©˜Ï@3~ýZL²kÄÅŽÿ8EÜóú8~xúðìÓ÷ç‡OŸž_([zR6 +Íy-?ýíùï¯ßñGz·5®„ùËéOß}àMžså¾Atû·ÔÇÿ|THF +endstream +endobj +2087 0 obj +<< +/Subtype /Image +/ColorSpace /DeviceGray +/Width 510 +/Height 227 +/BitsPerComponent 1 +/Interpolate true +/Filter /CCITTFaxDecode +/DecodeParms << +/K -1 +/Columns 510 +>> +/Length 48 +>> +stream +ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ;Y¯ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿà +endstream +endobj +2091 0 obj +<< +/Filter /FlateDecode +/Length 171 +>> +stream +xœ]Mƒ F÷œ‚àOÛĸ±mš¶Àa0,‚¸èí; vÑ„ß›|ˆ~¸ÖD.ÁÁ #×ƪ€‹[ q2–•WâNy‡Yz&ú›ôïGNÔßåŒâYžóM¹9à.^i'dmQt­ÖC«þžªMõž¬)YŸ:2èDØ^26Y?‚iRªt4à°†€6æÞ¹Wêc,þ¾æO§Å¾=+V +endstream +endobj +2093 0 obj +<< +/Filter /FlateDecode +/Length 191 +>> +stream +xœ]= …{NÁ Ñ™ahbc¡ã¨ °d( !…·w!‰…ÅÛáƒ}ûC3\/Wï2m)èdj7 –°& t„ÉyÂ[jœÎ;Õ¨gI3ÜT|"PL»ñ]ÍÐ> +stream +xœÍzy|TU²pÕ9÷ö’îNwÖî$$Ý›ÎÖ Y€Ä–ܬ, $,B‚b:@@0$(&¸¡ Oq‚‹C§#ØŸDŸ:ƒ:Ž3nÏ‘8â¸Ìd†qÐGHuo”ï7¿÷{ïû}¼{RUçTÕÙêÔ©sN ˜ 8ÔÕÎÏÉõË<.xvC +œÇIð + Ã,Ø¥P;a:œ†#ðM@‚ +8.´ƒ*°¢Ãp¬ƒÏ`Ò¡>ÆHj§Ú!Š‚_®†{ƒÇI+ Êág0„«q>äP~ËB7õÜ+¤|ŸJOÁg˜€”ûD@tÿA$¬‚7‚ B3ìÇMø%8Á Û„¡7x\Gá·XM¹Ù°A|_VS­gЊÃÁ³ÁÏá%¡…Zºî¥ûa˜Mäåb?8 ®…9ÐDÒÛàŒÂI\¦Ë‚w?|ÍÜìu®¥q¸a&4Âý°‡¬ñ.œƒoЀ…ø¤ô6þY|ŸÆV ]°‘üê)²Þ~8ÇqNbVf%kY!®#Yì¥þá VcãË|¯˜;VŒÆ?!êi„»áeêãæ’õÀ“y§$tŠy—¶Ð —Á“pÞ¦q|Lvÿþ™”>a·³îà¢ààg4Øa*Ì…ÅÐëáxšVõxþŠß3=iž^7Šçƒ’mS¡ŒÆ^KÚó©ím´J~Pz—fšÅTœƒópöá#Àð¦aN¶–}Å}üMþ‘0YƒÅÔR,$Q¿,‚VZÛÉÚÒ|Àkp +c0³iFïRýoÙ5¬‚Ò3ì4û˜ßÍû„‹â=c#cû>Ø Zò²éd‡.xŽ¬ðŒ¥1dà*ìÀOiä;Øó<œ[¸Ä y)_Àø½|'ÿÿ•°N8(|(Λăڦ±›ÇÞVï"[ hh\i0…üg9yÓM4¾vJë`l^x€üåA臃4ï“p +~ ¿ƒ?Ñ +:iÌ+©÷5äuwã”ÇCø2¾†§ðüVI,™R:›ÌJX9«b+ØÝ”v²3ì]öŸÀ—ònÞCi?Æ?@„ ˜Gi†¸MܯyS›®¡mÖ½uqôR楆KÁXüØõcŒ½<öypapßÙ0‘Fº•Fù8ùà^JÏ‘'ƒ×á-xOë×ÈP$·¡DÞE«V‚Óq&¥Ù8—Òu”ábJMØŒ­”º±ïÀ;ñ.¼VÓc4·½ø,£ôQú-žÅ?àWø5#'fœ¼ÙÅÒX+¢™–³é¬–Í£´‚µQjgëØzZ¡ýlgïò(îâÙ¼‰¯åóŸñWøoøw²„Á#,Vw +§…·…÷…ïE»X)¶Š»ÄW4 šÍušUšÇ4G4_h.j5Ú:m³v“ö7Ú ÎEÑêç4ï£ðã/Gs;ÄháVv–ö…·‹[ñ:²˜†-à«ùü×âr<Ïø!öò•ü¦à3¼Šýƒ·áBv“¹],æËa;ñ û„]`Ÿ 1¸€}‰éÂ¿á ¬—3Ò‰øŽ#Ü)~ÀÞƒb¶‡ÙküN~gðß¡XÜ…gÅ]ìmp#, +ÎÒ®ÞÊ¥J¿b+Ù6¨ +Äïa%ÙýYñV²÷4v/fòß»à3.±¿áy|„¢Æ/q–ÂndEx"î%L‚Q\ íø0Èx‡@<À÷c 3Òjù˜ §Ð1öKîÄßð0hPƈ©,ëØyvQs†"R”ø5lDŽ¹ä;—¿1¸™vÀN–F1­’¢É;˜6x”âý…±•ˆ-¾/n#?Ûó`äÂö&ÓÞøŒR=Üy0D>x/ä²Ç`S°—QÜŸMñ“AWA(ZZilÝt^IJdŠ…Ôë?(þ¿AQ¿ÿ · ƒvÖ0¤ Šd»PI‘ÉKñw¥e°„JOƒš£â;P‹VÁ1¶‹¼ü#¸‘ÎœO©ÿxðÐøÃ!‹Fí È¼–j<96dJ÷À›È`3yíó:aEÞG‚«h†+錪¡3ñ¬ > +å´vó‚w·AcpOðXóƒ(þ®úa2lØBÑ-PŒ=…¯ÒyôŸ¸âö øâ‘ mð¥ŸÑˆ¦‰' WxbgIp{ð·CöH& 5Ó)zÖÀŸÉn3ø0äÍaÁ*ÞN'ÔY˜Ü´c´WSä}öjEŠ==$î•e¹dÚµžkŠ‹¦N™\XŸ7)7gbv–;3#=-Õ•"%;ö¤Ä ñq6kltTd„Ån2Âô:­F8CȪ”ª¼_ª×'¤J3fd+e©‰M?bx}bU]­ãsxU5ÇÕš2i.ÿ¿4å¦|E-x²³•’Ã÷Ë +ÉÀÅsë)…ÔàðªùÙj~‡š7QÞé¤ +ŽJ[k…Ç^G¥¯j}ko¥·‚š0„•Kå-aÙY0f ¬r>«Ô>€Öi¨f˜µ²x€ÎDƒòÅK•¾8©B»*›–ùêæÖWV$8 ÙY>,_*5û@*ó™Ýª +”«Ýø4å>­Úc¥2ØæÈîÝ°@³×m\&-kº¡ÞÇ›”>"ÜÔo…Ϻñœí‡"5Y^¿õÇÒÞ[i[éPŠ½½[¾þ¹õ?–:ÜÐ@mø˜«ÊÛ[Eo'VÏwP_ìî†zÞM:”y(s +Í®EªT8ÞUŸ^*“Z{Wyiaâ{}0oƒÓ/Ž@|¥£wA½äô•$H M¢¡wÞ†Á8Ùwµ$;kÀ2ë@¸y·Û—™©8ˆ¶œV”Æ8M-fg­0ŸÔnq!óAÙ¶©¡8‡Œït*Ë»- C3|=sëCe4'øAÎq7ø˜W‘ _–Ä\§Hz.K®T÷JäÇσò²ˆñéR¯ü™-±Q•­Å>Œý/Ä-!yõ|©zîâzGe¯wÜ¶Õ ®*…äS¯ÈÆsÁ}‚‹,5S"×›·¸^aП誒*WzgÐV£1ú¢ÊëykåXW›"ÿ½áJËJ¡Þ¨´%¸4ªÿ/ huäÀ*U>‹wF7„9ÿÍJày¥–J~¨6>'_±ûêò5W•¯ž±—Ó€…TV½`qooØU²* +V½½U’£ª×ÛÛö4K‹Ô{œ×óúÞöJïå凶%øª¶7Ð$Z±8[¹#‚Ö9V ‹,ð}çØC0Bpž@€6¾º úŽ`&l'È!¨U8ü ?HãÜKõÍ„sÚúXÀŸ#þM +æø*:í|;=Lcˆnã©ô§Dã‰>M|z>ð=TVèîñòDùOÆùS9–ècãôQâ'}D}ðÚùÃãåõ¼K­×9Nûy‡?Én)M"¹ƒ —€Sn'åv’évR #]ìV«= Í#º&DÉ\›ýNI]£ÍƒÖ¸¼~2éf2ýf²Üf²ÜfH´é²Î¦N6ßD:›Hgél"«äòꯃ [œìÞAvWø>ÂÃgTþ]„wô+%~ Ù1ƒFu_åO·““­,’óJNÐ=©Ùåƒq‰y}?”ôaŠ# §fE·E•¶ ê +·e0>1DIë¦Òp¾n#`M8… € ‚@àKý)9ö!>Öè@·w³nÞ-t‹BnFžäyP§rÉHž RÈ°7zpŠWß®ïÑs‹Þ¡ÏÕËú:½ØFO¢>Îí<‡—ðZÞÈÅ@pد-Î'"O×çï0ô|†aÃèÓ kÎhF4ç5¢C“«‘5u¯¦]Ó£Ù¡é×èwhvh™×Ðnè1p‹ÁaÈ5Ȇ:ƒh×béݼYÙ´„-í;²q#ñüF‚FZF2ÅÄÂ@% ÁÊ©d&=3陉k&®™¸@X‘Ôx ÚÇ¥š+’ËuýóŠ„ ¤áÄ 'ÛŽ>¯äfQÉD%•L¤u†]¤Z;ê¸Ê! ¯!|Y–;.÷hTùyUç²LV겋rSÚpú2°?wd ì))Í““ EFF6J®ÆôƽB›ÔæjKoÛ+ÔJµ®ÚôÚ½B‰Tâ*I/Ù+äH9®œôœ½‚]²»ìéö½B_Í‘š“5§k„Æš¶šî>…–nÐïÎÍSi²K¡GýqñyS̥ײ#4F» Îp0¶ä”´ˆìˆÊ=LÜÃÄ= µ"Õ:¬„Âöq™Âß­Ê”œ"gWÉ9Mþ¿8¿¶´†Ân#ÁnNm"ù!U;”;¢ò}„GT~í¸~¿ÊW´ì—ë)Ap±îÓ6\ %í"œæ‹à,µNØNÐNp„@à‹)-â‹ØaJ‡Ø!ž%›&ÅØ!6–N–È¥ÔÂŒä &< âÇT|ŸŠKTœ"‡Ï2};ËôÒ,Ó=³Li”aét™p§Š²¡Ôô|©©¶Ô”Qj¢Ö¬à‹Q±FÁøGÏQq–í4}ç4ýÍiú«Óô”Ó´ÖiºÖ©Ô›@{ØÄ¢UlP0=RÐ@%»7€° žTaÝ <îŽbÎÝ÷'(tÓÝ÷74`µox)T7;|ßΧy„Ñ%U”Êl»¾ÄV9-¢¨ªâ_ ï8vÿðÙÜ?þl‰¾Gªç×ûžKlðå)™`bCµoºò,<ÎÖ²¶ÊŠã¬]! õÇq#[[9OáãÆŠ†+jÌÚI < +QÔ!YQƒdTÕjT5rÓäÊŠääÒ+8SQ"÷yEUZj+…º ¶êBj, RÔ¶RX’¢FþjÌüãÆŒ€fµ1³ÔÆ&(J.©d¹•).RpMQÅK®ÐpÀ¥öãµÄtÒC:äã:LG:îÿŸ_KÙÿ@›>Z¶Tyœ{¥Ê¯oÛúV›¯§ÙáXöÑø«=ÕÛ¼´U¡M-¾¤– +ß2©Â1дô_ˆ—*â&©b–V.¨X*·Tø›ä¦J©©¢ap_wyõU}Ýw¥¯òîÑX·ÒX¹Ò×¾ê!®VÄû”¾ª•¾ª•¾öÉûÔ¾ªç•au]ý€Êèu§ÒAf£ýàMp6”ÅZÚ§©›ã§íö„!èØ2ÐËØ(•ùLŠ(»4»TÑîTDáÊÏ/ã"Ûí×8†ðÀ¸ÈBì© Ü`«\Yq寣££S®.7áÎ.›Êë¤Mëœ_í«R‹Ÿ§Ò'{+PYŽ®ñ¯¼^¶œôœö°6O·§Ï³ÛsÄ#vu5;òdòéd֘ܖÜÜ—¼;ùH²FÜPLöìNþK2ï"oÂNú*+Ô>»ˆÒŸRììêP> :Bݹ»Üåõ¥É°”n½H7ôlˆ"ò æˆð„ß!ø”àoÜIø!‚gÏæÙôt¯Pzlp+AÇÆós ó¦ˆ6-Ñù‹C´rNˆzJólDý%ùa¥fº€# ~ƒàC‚¯þI ò<ž§6ÞòÚ†èp# ¨Ð© w'º)ƒŠ¹;;ÜnP@qpZRuãÕ~ØÑd +Z"¤¤r;”j] +½¢Ç”‡³¨Þ™A ³ž`/ÑeUËNúAì¥ç9„i•ÌQ„8F|Qá{pÃjð?O]jÄVz&>*GË6¯­ß6bÀ&ÛØz¸Xxi®¤›ŠûéiÊÕ¼ŽòUþ½®WB,q¿–é 63=CQ¯32CøwRŸ)G†‡›åˆÂ\s·y‡¹ß,˜ã¬C,Ïшi°KÜžÙ–ÑseÄ%žˆÈ"Œ(‚oF/â7n÷¤\X‚k—D¹ò#¢cc­1ÎÂi¬0¢ -5UJÖžÇYÎ(Ï cÌ;56LëŠw• ?ßóýÖuS“˜ËÅ'mdíÌt$Ù•9fÑÒ“°J¾Ck3Ym®-°É„âdNŠÍÐz´3µÏj5²ãza±îzëbÛMºÎˆÎÈ' O…?qÈp(ü”xÊú ÛÖl#Žï„ï¬1tâÄ„˜¸Ø8k¢M«·l†Ä‚¸éq÷YûZ[cÖø8cœÆÄ㘨±Ycc¢µQ‚)€­²^/GKzô¨ð|ÙhãûâpwÜ‘87ÄóÉp÷"3&ð~Ùšß×F5FµEuG QÔÊQ2M*²£ÇÁ½Ž~sÄÀïÈwL(ËѬu³>v’fgÙ_˜ŽÅÙ‡ðºH…L>ûœgtŽeÉÚo—̾°dÔ2J¦½´d­§äÒZ÷¹ˆHk‘º ‘EEÌîÙj7¿þê¤\\»n ­‡â­näÎB€ÂZVš<9?O™šF˴μɓ§ðƒG° »n^¶;Õwú‰½¿Ëµï»iؼzQU<Šcß»° {v˾®µÇ_ÿÍŽ+ž>:v~ªeR¶ê“óƒ_ð…´^yXs‚#~c‘^yõzŒE¥úÊ°*Cu²pZS3äoÁé‚‘‚¿‡i¡KõÝÒƉϥOšxjâYé¬ë?'~•ü¥Ë8S—Àíƒéé°sƒgr17À ŽrÑ‹±Ü}4Qvç$ÒõsÐbÊH?­ zö)=”ÉÆl‡jcZ©AŸÜAüìžl¶#»?›eÿh£¶›f`ŸÉarö °à´䨓Q,*.“ð‹+  ZtÉÚ +:G{JFÝ£ëJF—ŒFåŒNÊ-ß Ož˜“”f4ÉNÉ™ât9è +OM s4cŽÝŒIfÊ9 iͦŸ¨ÉmF»)±™¢‘Å3‡2·Ð§î¡u°ÖíŽRJuÂXu¹œÉ©…*+Ö›Ÿ7¹¶–²¹$eŸ)k«m-¸ë™EeC›{Úûã}Ksœqñ·Z]™Ë•âíîGæ8jwÏØâ}¢U˜ußëjïÜ5éØm¾-*Ò³tb‰Æ°kumõÔÄôÒ¤°ïª]ѽOù•Ã&Zß^Zß8H…|Ü(5 êóíù™imù“{ =Æžøž„;\=©½ùÏÚöÆïw Ÿ!õDÚka¯Þ3Åj! 5&¯O‹5Yã]&Wx5nÇ;Mw‡? á×@1Òýg¦7âõi7䯂U¸’­H]•ÖšnJ[Ÿµ)¿Oè{´=º;"îˆì‹î‹}LxD·3â‘È'b÷¥N;œŽé¾4|eü2üË´/ó2´&}Z1áÔ<±BÆø4AE«ƒÞ~˜­(Sb©ž®yz”UÈ¥¼—Ë(” ™\è-ì/) +¥IÀqd¢Y˵ÊÖVn+Â?ã——=dô¨â#£ç.Ðö-U¶%Z‹TÇÈsç$%GÄ +º—S”šÁ®MlƬèÌf˜I^,[$é¹c³›!'‚¨·0Õ)Ÿ ú[‡©! wÐhc×ÐHÉi +Ï5yrh{ÇDÇZ£4 +÷¼oÏ’·žýé/VôÕ|8ðòê…pÒ­òúåË{ +'Mž_wÿšÕw¤Ngïê_x×Iÿºš]7Ý;gùÚ¾774u,xwõæÚ•·¬¯-hÍû¼j¯wËÍ(Z¥üCŽqœ|"Œ^ÆïÉé±&2R¥I6sÙŒ™FŒÑ"Ó ×‹ŒF“ 1š(–N#µºh­V§ã‚VcÔÝ„¦ø$žÜ-›DÔèuNŒFáÝé9_Ëeƒ^o減áŒðï² KÔ jF/R#fnÖÈZÔÆ…ÿ(r®õ¨ûÖCa“²°(çlIQŽÅCiÔri'¢(Bœ['º…Í–W•¬Ùl¦slÝ\²vÆHR„³ó‰ ?~lï¥WX×Í{ÇRðÂc?Áå=üŽ‹ÛÙžK¡_ÓçÏñd+¤¡Qο-íñ½äÒ„Vaƒ¸Y·Q‹ñVÓ†¨[ÛtwF…éu}ì˜fs¦ÙDžä@+ÑãÛ†òóiu4Šn²>ÇÕ梒— ‘ÂâóV+˜lCä‘ñh~"-‘ŽHÀ92䌞 .gx3ú3F2„ "Ïv’šv2Œ…Å¥_×FCíÒ9:R,äµË–Œ®³\ ÷-R=X9UTÎLHÑES-® ©RªÝäl†Ds|3¦è(ç0$5cB¡d½+ä»—ïU!×]b-œ<9r +ù*¹ê”ñ Æ´ § y­ÑVß1òvÆSÝ}o-¿íõý·<øñë{^bù‘ef7ÜÓPÚ8ñö .Ö…)GZ~÷‚Û³½¿ÿý؆-«Øñ;æ4}rkÿ®wnY˜¥®Ã,ŠW/Ð:X †üM:u׋bŒBL¦øíãH}<¤Ê©LNõ¦ö§Ž¤ +© +;¼Ú ú Ÿîvq.ÅXWNáÐ<;´¿U£Ô`Š”’œÂ4 9¹»Ö5!!1!)k¢RÍ.Cª-ÎÇ4N!‚¶¼†ŒN¹X£b6¤Ýž lùHKL3Ä…Å4ÿ°ã3UÈÌÜU2VD4S÷ù‹j¥)“#ÔÈO_Ãfmï\ì}rÓ÷¾ÓüÊ–5¯V­Ü™417¥(£¸¢pFÛõÖÎ+ÝýÚØ‘?{ø³—ÿ>öÅÀÃMëaÑOtä:¯?ö¤z†×ïîkÀ‡ƒŒJ˜&ª¿HP&N‡"ÏÔ—lòšúMoà)ö>¾ÏFL†x@‚I6q& +B’ã9‹æœ Ü$ÊÓ Åߣ†ˆæ÷Êm6€ë7 !Î(±/€³Ïe#Aê„~A^dã¸É-tÞžSí~A¹ï¸-£îÏVq¢{køæË·›ud¯(g Jè$ShÓ~ÅÞó´ãÃcÛÖæ.ÈOkRÿù’ðZÂD¯!ôï]øÞÃÖó!Lø•Ø*~xåßÀ€®\ý'ñøî8½tÂÿCÝÿ•Ÿ*ûœ/ + ç*XùIŠ°Vý9Š+O Pþož¡™#D*ÿ­ŠrˆX°rMKÇœ–[æµ­iº¹nþìêX¸c¼ÞçÓ]]<çƒW1ÆÿUSS„EœóaOsaÔ„Öõ¿ø”úìí? øŽ 5š=ßèâB>ýiâ+ +=zí |ßyi»¥XÊ¿–Ž÷÷ +“žÑ +endstream +endobj +2095 0 obj +<< +/Filter /FlateDecode +/Length1 3512 +/Length 2264 +>> +stream +xœÍW{l[Wÿν~Û±ïÃ×vìÚ¹×7±Û\;îüHâÒG–¦éÒ”.khewi×4MÓŽf ]ºiÝØXe ¬PU“@BC“àƹN¶!Ĩ!e â! ¡b­T›–„ï\»i7!þã^ùœóûÎwÎ÷ø}÷žk Ðg‡±ûÇsy°.ßulöOÍNÎ5±W ¯MÍŸV½²ïü/››™ ,澉s'¸é™“OkêÛÏàïåãÓ“GyÏW.¡³(ì=ŽçŸà¯"î<>{úLS¿m›u'šlÙ°f'ÏÌ‘o WPÿ_ˆÕG&g§[þ½MlîÑÇN·Öfós§¦çj“Ñ=¶útÍËÜ%ÈHà»6ód{`Þˆfàx ʆ$ÄàØ 2A…mp/l€O 8GAÚá0<9‚N8ypÀN8_€sPƒ ð9˜ƒÇá‡OcfŸ‚çàyð‡B†‚xêÝŒÅÎ6mÍçÌ^gƒ†s&8õ0xŒ«nƒ–rfÁÖ ´?GóK‹½"P»WÀGÛXŒZ¨M2íhµ£‚™%†™7<é¬ËXt 0 Ò‚D9lP^0ûpí=ÖZ³ŒROV”h¸LÝâø;»;q˜–ꮀX.—iP¤Î2˜áNQ2yw¹¼ñ’)A?q:œzq«½¯·TLéI‡D©CO{å|s”*{‰¨‰vMÔˆ¶ûd2Ù•ˆ¼Ë°…¸®mÙ®M©°´ñàîÚ %¤kù:ùËÊRç^Ý7Ü9šËÕ¤”öæûvæ‡S÷FQ +?5¾ü-ŽÇC•ûü?ãKÝË +VsϤËÜ7 +qØusKIaQrBS| ŸÏ×CœBõ2ñ»¼;U3Œ´¸´àÄÁÌ´‚bÝ_ψtKÔÇ,öðHL!*äû +a½‡4É 'H“5„N=]HB¾—<9xèààÌp÷OÔ¡„Ú#S›{6 Ý?U™ßŸŸñyJ„»Žlïz17RìÛ_ÞýP2ù›Ê'ŠÖß—)Ílýø‰¤ì¿úàþm-~.àé¬B7|êa–a1bñc¶)È~•`éºÖ`±ÃšY´0 [‚Ù2r4¶dJþÕòu)Æ2&…ÜF=&±a,Œ@GžJr†i7Ì öV®€•Kí" `j„¦¦½L5‘ºY¥S»UÔ N ::æ!¬è¼“¥DÑX-ˆÅTº$³¢°êœèÆžñ<å¹+JWü•ýꦕwûãñþï=ñ©ê¡„Â]òë»J/hd¹=ÈɤÈý¸WµÉ²MßJÌR‡C^ù!·y|x{@Áºð`N0'ìË¢îeoŸ4u7K•BaQi0‰èXÀ„våh`ÉtøuG€íð`ü¼nÃLaÄŽ€UÌ`*¾VUÅOd-¤-$ÝÃ¥Kš‚Äwë N—¸…t¿q±sûÊ•!í³ùb2J";Å‹_ò'ÒÜ%™Ï¾s¹ÜÁÉ2¯ÝûÚ[#Û‚D–íírûŸÞ û%?V9;L#©uÉi1êGE§Å¨“E´gŒ2#òÚq aD}ÈW!ßäFÃèñÖ+çcJÄå÷¥Ô-¤cKGå£?~2ªFÉ/û:}+çͬWƒ‰æÙHa…œ%g9wŠû9b®uz±YPQü9à®7…µ3ùÖf6Îêìwßûÿâ²ÁE«µ±HnVWW±UY‹ØÿEîz}ô«åvªnûÀÝì­UJ9KÛ 0|jzz|úÔ‰cÖ~ä¥ÿ!¿®Ûpsõ?ùÇU Éý +œˆÿ0ÞGüU<äY|žï–æÓ×Ê7Lú7 +lþ'÷uþoL|E÷¾r«Ço¸ß9®ñߺӣú¾Á( +endstream +endobj +2102 0 obj +<< +/Length 7891 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(9.2)-1000(hb)]TJ +/F62 9.9626 Tf 150.705 706.129 Td [(r)18(ows)-336(ar)18(e)-337(not)-336(in)-336(the)-337(original)-336(or)18(der)74(,)-358(then)-336(an)-337(additional)-336(vector)]TJ/F60 9.9626 Tf 272.611 0 Td [(rIdx)]TJ/F62 9.9626 Tf 20.507 0 Td [(is)-336(r)18(equir)18(ed,)]TJ -293.118 -11.955 Td [(storing)-250(the)-250(actual)-250(r)18(ow)-250(index)-250(for)-250(each)-250(r)18(ow)-250(in)-250(the)-250(data)-250(str)8(uctur)18(e.)]TJ 14.944 -12.021 Td [(The)-391(multiple)-390(ELLP)92(ACK-like)-391(buf)18(fers)-390(ar)18(e)-391(stacked)-390(together)-391(inside)-391(a)-390(single,)]TJ -14.944 -11.955 Td [(one)-395(dimensional)-394(array;)-467(an)-395(additional)-395(vector)]TJ/F60 9.9626 Tf 204.327 0 Td [(hackOffsets)]TJ/F62 9.9626 Tf 50.049 0 Td [(is)-395(pr)18(ovided)-394(to)-395(keep)]TJ -254.376 -11.956 Td [(track)-287(of)-288(the)-288(indiv)1(idual)-288(submatrices.)-423(All)-287(hacks)-288(have)-287(the)-288(same)-287(number)-288(of)-287(r)18(ows)]TJ/F60 9.9626 Tf 0 -11.955 Td [(hackSize)]TJ/F62 9.9626 Tf 34.49 0 Td [(;)-237(hence,)-235(the)]TJ/F60 9.9626 Tf 51.365 0 Td [(hackOffsets)]TJ/F62 9.9626 Tf 48.416 0 Td [(vector)-231(is)-231(an)-231(array)-230(of)]TJ/F93 10.3811 Tf 89.104 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(m)]TJ/F62 9.9626 Tf 8 0 Td [(/)]TJ/F60 9.9626 Tf 6.336 0 Td [(h)-40(a)-25(c)-25(k)-30(S)-18(i)-32(z)-25(e)]TJ/F93 10.3811 Tf 36.682 0 Td [(\051)-192(+)]TJ/F62 9.9626 Tf 15.99 0 Td [(1)-231(elements,)]TJ -294.657 -11.955 Td [(each)-338(one)-338(pointing)-338(to)-338(the)-338(\002rst)-338(index)-338(of)-337(a)-338(submatrix)-338(inside)-338(the)-338(stacked)]TJ/F60 9.9626 Tf 314.252 0 Td [(cM)]TJ/F62 9.9626 Tf 13.459 0 Td [(/)]TJ/F60 9.9626 Tf 6.037 0 Td [(rP)]TJ/F62 9.9626 Tf -333.748 -11.955 Td [(buf)18(fers,)-449(plus)-409(an)-409(additional)-409(element)-408(pointing)-409(past)-409(the)-409(end)-409(of)-409(the)-409(last)-409(block,)]TJ 0 -11.955 Td [(wher)18(e)-261(the)-261(next)-261(one)-261(would)-261(begin.)-344(W)92(e)-261(thus)-261(have)-261(the)-261(pr)18(operty)-261(that)-261(the)-261(elements)]TJ 0 -11.955 Td [(of)-353(the)]TJ/F60 9.9626 Tf 29.729 0 Td [(k)]TJ/F62 9.9626 Tf 4.598 0 Td [(-th)]TJ/F60 9.9626 Tf 15.878 0 Td [(hack)]TJ/F62 9.9626 Tf 21.448 0 Td [(ar)18(e)-353(stor)18(ed)-353(between)]TJ/F67 9.9626 Tf 88.761 0 Td [(hackOffsets[k])]TJ/F62 9.9626 Tf 76.739 0 Td [(and)]TJ/F67 9.9626 Tf 20.382 0 Td [(hackOffsets[k+1])]TJ/F62 9.9626 Tf 83.685 0 Td [(,)]TJ -341.22 -11.956 Td [(similarly)-250(to)-250(what)-250(happens)-250(in)-250(the)-250(CSR)-250(format.)]TJ +0 g 0 G ET +1 0 0 1 197.579 464.41 cm q -1 0 0 1 192.93 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +.50096 0 0 .50096 0 0 cm +q +1 0 0 1 0 0 cm +/Im8 Do Q +Q +0 g 0 G +1 0 0 1 -197.579 -464.41 cm BT -/F51 11.9552 Tf 196.517 706.129 Td [(write)-298(\227)-297(W)74(rite)-298(a)-298(sparse)-298(matrix)-297(to)-298(a)-298(\002le)-298(in)-297(the)-298(Harwell\226)]TJ -18.913 -13.948 Td [(Boeing)-250(format)]TJ/F54 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-884(h)-109(b)]TJ +/F62 9.9626 Tf 185.456 442.492 Td [(Figur)18(e)-250(7:)-310(Hacked)-250(ELLP)92(ACK)-250(compr)18(ession)-250(of)-250(matrix)-250(in)-250(Figur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-250(5)]TJ +0 g 0 G +0 g 0 G +0 g 0 G + -19.807 -24.042 Td [(W)55(ith)-207(this)-206(data)-207(str)8(uctur)18(e)-206(a)-207(very)-206(long)-207(r)18(ow)-207(only)-206(af)18(fects)-207(one)-206(hack,)-216(and)-206(ther)18(efor)18(e)]TJ -14.944 -11.955 Td [(the)-250(additional)-250(memory)-250(is)-250(limited)-250(to)-250(the)-250(hack)-250(in)-250(which)-250(the)-250(r)18(ow)-250(appears.)]TJ 14.944 -12.021 Td [(The)-250(r)18(elevant)-250(data)-250(type)-250(is)]TJ/F67 9.9626 Tf 110.952 0 Td [(psb_T_hll_sparse_mat)]TJ/F62 9.9626 Tf 104.607 0 Td [(:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 195.753 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 150.705 244.903 cm +0 0 343.711 137.484 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F54 9.9626 Tf 199.827 667.24 Td [(w)-109(r)-109(i)-109(t)-109(e)-242(\050)-167(a)-241(,)-927(i)-152(r)-151(e)-152(t)-478(,)-904(i)-130(u)-129(n)-130(i)-129(t)-435(,)-871(f)-96(i)-96(l)-96(e)-97(n)-96(a)-96(m)-96(e)-368(,)-816(k)-42(e)-42(y)-259(,)-855(r)-79(h)-80(s)-335(,)-918(m)-144(t)-143(i)-144(t)-143(l)-143(e)-277(\051)]TJ +/F102 8.9664 Tf 163.108 371.728 Td [(type)]TJ 0 g 0 G + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(extends)]TJ 0 g 0 G -/F51 9.9626 Tf -49.122 -27.896 Td [(T)90(ype:)]TJ + [(\050psb_d_base_sparse_mat\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(psb_d_hll_sparse_mat)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(a)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 8.9664 Tf 9.415 -10.959 Td [(!)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)-250(to)-250(be)-250(written.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.292 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.137 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 575.783 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 575.584 Td [(Tspmat)]TJ -ET -q -1 0 0 1 395.216 575.783 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 398.354 575.584 Td [(type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG + 0 -10.959 Td [(!)-525(HLL)-525(format.)-525(\050Hacked)-525(ELL\051)]TJ 0 g 0 G -/F51 9.9626 Tf -268.57 -19.926 Td [(b)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(Rigth)-250(hand)-250(side.)]TJ 13.838 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(Optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(An)-235(array)-234(of)-235(type)-234(r)18(eal)-235(or)-234(complex,)-238(rank)-234(1)-235(and)-234(having)-235(the)-234(ALLOCA)74(T)74(ABLE)]TJ 0 -11.955 Td [(attribute;)-361(will)-324(be)-324(allocated)-323(and)-324(\002lled)-324(in)-324(if)-324(the)-324(input)-324(\002le)-323(contains)-324(a)-324(right)]TJ 0 -11.955 Td [(hand)-250(side.)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG + 0 -10.959 Td [(!)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.926 Td [(\002lename)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(written)-250(to.)]TJ -19.368 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.187 -11.955 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F59 9.9626 Tf 298.534 0 Td [(-)]TJ/F54 9.9626 Tf 5.231 0 Td [(,)-387(in)]TJ -303.765 -11.955 Td [(which)-234(case)-234(the)-233(default)-234(output)-234(unit)-234(6)-234(\050i)1(.e.)-305(standar)18(d)-234(output)-234(in)-233(Unix)-234(jar)18(gon\051)]TJ 0 -11.955 Td [(is)-250(used.)-310(Default:)]TJ/F59 9.9626 Tf 74.799 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F102 8.9664 Tf 0 -10.959 Td [(integer)]TJ 0 g 0 G -/F51 9.9626 Tf -104.935 -19.926 Td [(iunit)]TJ + [(\050psb_ipk_\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 27.108 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.202 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.187 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F59 9.9626 Tf 287.758 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F51 9.9626 Tf -317.894 -19.925 Td [(key)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 21.589 0 Td [(Matrix)-250(key)111(.)]TJ 3.317 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(Optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(A)-291(charachter)-291(variable)-291(of)-291(length)-291(8)-291(holding)-291(the)-290(matrix)-291(key)-291(as)-291(speci\002ed)-291(by)]TJ 0 -11.955 Td [(the)-250(Harwell-Boeing)-250(format)-250(and)-250(to)-250(be)-250(written)-250(to)-250(\002le.)]TJ + [-525(hksz)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(mtitle)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 32.089 0 Td [(Matrix)-250(title.)]TJ -7.183 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(Optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(A)-239(charachter)-239(variable)-239(of)-239(length)-240(72)-239(holding)-239(the)-239(matrix)-239(title)-239(as)-239(speci\002ed)-239(by)]TJ 0 -11.956 Td [(the)-250(Harwell-Boeing)-250(format)-250(and)-250(to)-250(be)-250(written)-250(to)-250(\002le.)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(integer)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -21.917 Td [(On)-250(Return)]TJ + [(\050psb_ipk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ 0 g 0 G - 0 -19.926 Td [(iret)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.433 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G - 139.477 -128.483 Td [(144)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1824 0 obj -<< -/Length 3542 ->> -stream + [-525(irn\050:\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(ja\050:\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(9.3)-1000(mm)]TJ -ET -q -1 0 0 1 148.768 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 152.354 706.129 Td [(mat)]TJ -ET -q -1 0 0 1 173.658 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 177.245 706.129 Td [(read)-202(\227)-203(Read)-202(a)-203(sparse)-202(matrix)-203(from)-202(a)-203(\002le)-202(in)-203(the)-202(Ma-)]TJ -50.45 -13.948 Td [(trixMarket)-250(format)]TJ/F54 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-810(m)-35(m)]TJ -ET -q -1 0 0 1 149.022 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 152.359 667.24 Td [(m)-35(a)-35(t)]TJ -ET -q -1 0 0 1 171.029 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 174.367 667.24 Td [(r)-35(e)-35(a)-35(d)-169(\050)-166(a)-242(,)-927(i)-151(r)-152(e)-151(t)-478(,)-905(i)-129(u)-130(n)-129(i)-130(t)-434(,)-882(f)-107(i)-107(l)-107(e)-107(n)-107(a)-106(m)-107(e)-241(\051)]TJ + [-525(idiag\050:\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + 18.829 -10.959 Td [(hkoffs\050:\051)]TJ/F69 5.1905 Tf -15.277 0 Td [(,)]TJ/F91 5.1905 Tf 0.61 0 Td [(!)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -74.472 -27.896 Td [(T)90(ype:)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F102 8.9664 Tf -4.162 -10.959 Td [(real)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [(\050psb_dpk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(\002lename)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F54 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(r)18(ead.)]TJ -19.367 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.956 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F59 9.9626 Tf 298.534 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(,)-387(in)]TJ -303.764 -11.955 Td [(which)-254(case)-253(the)-254(default)-254(input)-253(unit)-254(5)-254(\050i.e.)-321(standar)18(d)-253(input)-254(in)-254(Unix)-253(jar)18(gon\051)-254(is)]TJ 0 -11.955 Td [(used.)-310(Default:)]TJ/F59 9.9626 Tf 65.185 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -95.322 -19.925 Td [(iunit)]TJ + [-525(val\050:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 27.109 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.202 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F59 9.9626 Tf 287.757 0 Td [(-)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.415 -21.918 Td [(contains)]TJ 0 g 0 G -/F51 9.9626 Tf -317.895 -21.918 Td [(On)-250(Return)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + 4.708 -10.958 Td [(....)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.926 Td [(a)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -4.708 -10.959 Td [(end)-525(type)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)-250(r)18(ead)-250(fr)18(om)-250(\002le.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 442.283 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 442.084 Td [(Tspmat)]TJ -ET -q -1 0 0 1 344.406 442.283 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 347.544 442.084 Td [(type)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F59 9.9626 Tf -12.403 -39.966 Td [(Diagonal)-250(storage)]TJ/F62 9.9626 Tf 0 -19.092 Td [(The)-399(DIAgonal)-399(\050DIA\051)-399(format)-399(\050shown)-399(in)-400(Figur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-399(8)]TJ 0 g 0 G -/F51 9.9626 Tf -268.571 -19.925 Td [(iret)]TJ + [(\051)-399(has)-399(a)-399(2-dimensional)-399(array)]TJ/F67 9.9626 Tf 0 -11.955 Td [(AS)]TJ/F62 9.9626 Tf 13.716 0 Td [(containing)-327(in)-327(each)-327(column)-326(the)-327(coef)18(\002cients)-327(along)-327(a)-327(diagonal)-327(of)-327(the)-326(matrix,)]TJ -13.716 -11.955 Td [(and)-302(an)-302(integer)-302(array)]TJ/F67 9.9626 Tf 94.018 0 Td [(OFFSET)]TJ/F62 9.9626 Tf 34.39 0 Td [(that)-302(determines)-302(wher)18(e)-302(each)-302(diagonal)-302(starts.)-466(The)]TJ -128.408 -11.955 Td [(diagonals)-250(in)]TJ/F67 9.9626 Tf 56.527 0 Td [(AS)]TJ/F62 9.9626 Tf 12.952 0 Td [(ar)18(e)-250(padded)-250(with)-250(zer)18(os)-250(as)-250(necessary)111(.)]TJ -54.535 -12.021 Td [(The)-194(code)-193(to)-194(compute)-193(the)-194(matrix-vector)-194(pr)18(oduct)]TJ/F60 9.9626 Tf 206.92 0 Td [(y)]TJ/F93 10.3811 Tf 7.997 0 Td [(=)]TJ/F60 9.9626 Tf 11.584 0 Td [(A)-42(x)]TJ/F62 9.9626 Tf 14.746 0 Td [(is)-194(shown)-193(in)-194(Alg.)]TJ +0 0 1 rg 0 0 1 RG + [-193(2)]TJ 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.434 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + [(;)-213(it)]TJ -256.191 -11.955 Td [(costs)-205(one)-205(memory)-206(r)18(ead)-205(per)-205(outer)-205(iteration,)-214(plus)-205(thr)18(ee)-206(memory)-205(r)18(eads,)-214(one)-205(mem-)]TJ 0 -11.955 Td [(ory)-322(write)-321(and)-322(two)-321(\003oating-point)-322(operations)-322(per)-321(inner)-322(iteration.)-525(The)-321(accesses)]TJ 0 g 0 G - 139.477 -307.811 Td [(145)]TJ + 164.383 -29.888 Td [(166)]TJ 0 g 0 G ET endstream endobj -1832 0 obj +2096 0 obj << -/Length 4155 +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (../figures/hll.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 2106 0 R +/BBox [0 0 494 214] +/Resources << +/ProcSet [ /PDF /Text ] +/ExtGState << +/R7 2107 0 R +>>/Font << /R8 2108 0 R/R10 2109 0 R>> +>> +/Length 2880 +/Filter /FlateDecode +>> +stream +xœÝZËŽ%· Ý×WÔ²{Ñe½ËN`;pAz0 #«›x‚ o€qùýIQÂŒ‘Ezfq›,Š<<,Q*U}:ÝåO‡ÿù÷v?¾y©çÇŸw†«ô™zÁGwþú·óÇóŸpéã‘rM0*”zùÎûTÄ+—x‚®–AvWª$»+Âõ\¯Óyƒ0¶VÐä+9“K‚ˆ 'r ²¿\VGÀàTŒü›ñýÕÒ#õ+ƒ"Á¥jPŠ\5kn éT5^ݧ£–+å8P µ]Àש(›»Bó3–)ó‘é´L¨æÊÄ`6Å [A.Ÿ¿H9|Š—ó¦>>ö+… õñ±\1Θ>Æ«öhê㣿€q­Ü—sIóyÖgZ f¦‡Á1Ø(û%˜ƬOå*mÖÇÃO¥ +sŒ [?-Ïúø\.Ÿò̃åYc1˜PÌ•‰Ál* +f_@.Å0õ ¨ñÕÔ÷}Ög/Ï^¤6öY¤~È#+–MuÔ‚yQR½8[m!ÏNÁÌÌ:–9wˆ”Yÿˆ´…lj‘X_´6‰7µyÖÆXÓà ÊÆ๣(x²È¥X›Pøè4Ÿ +ø¸MºÍL¸A=µ•žHoÐwõ¡ØW¨35è3 ƒe£ÎU<Ê¡^QEª·î*Öo—ëKŸ¯œ ŸF«ˆ"$hÄÐT¨x'ŸÃCŘÐv°ïª¢BÍ Z2Jh\5'MƒEÊ{ä©̃:D™ƒH…À<+ĵ¶:8›ÜM³:Ø5ZÏRl*-% êJtÙTûR-A«ƒ} W NKÄYc@¼Lƒ9bp;1 î'D–9‰YŸFWy,šöüY÷H¤{„QBÛ27™ˆ³:Ó€yPƒ(a©„y¸Vb™;¤Zªƒ™§Y$†–Í™KÕV¹õuV¹UÓbÑTg ^Ô3gB0·Š¹WˆÜ•{/Ðx¿?wý67m¢ð™v¥oLJ£æÍFÆÆ׊0G;nBÀH41á¨@VÁ;º:­D³X¥€û9kŚŠ+—‚%¢h¬UIJ±µ¬ÐY¤sÙç~h|Éؤ}jÆJ5Ö +±.F¬°65€÷bDc¬b4·ÌÐk_Ö&Ç͆Ö&ø͆ƒØÚc&­4¸™l?§b1Êq3bÅbÜfÄ +‚TœNéÞ§ÆÏx±G!\¬Dc¬nÿ­‘(¬MÎB¸‰ÆÆË™I¨\–³äšŠÅ(§Íˆ‹Qð›+è.€MI…éŒáÇõæ¤ ;ì_Tü’Uõ¨ŠÆJ4Ö*¶ÝJ4»@,~›Œ¢Y&PÛdµJãôÈX‰f±Êe›Œ¢Y¬Zƒ-KµV¬Y¦?“LÃ~®òRÇ-_[È«IžXYòÄj’'V–Õ”7YàZ( í@¦ç•o‡o}XŠEËãW<ˆÌ1p„h +í£ÔCs…j(1X†‚‚5÷ð°çq;þŽÜGðS`­«™XïS­.â“[tÖœ³¶äqÑQöáú‚§YµÀÅ|©‘9ŽšøAŠ=t˜À-!2Œ`jÁ(ÕÖ-ï67ª\){v¢l%¬Ùᾧ›]ökv©®Ù‰<³cbcŠc˜ì…Z0Jõ°åA•{·¹Qå ý 6§BÜ»jRçÓ¯ÉoÊòCkYdˆÜaS‹Ú»z`YbàÓ2k`+™ÈçðPð!Gp ‘a£P Á-¶<¨rï6·1çðp5ÛìX£Ø +lþ0[ÁŽ¡Úì2,ë}$XÇK¦á€e“œhÚp È9‚É1¨… [cƽÏÌxç'›kùëø%ÔkrwUðþQ¸‘ý¥x™;ÐÏrü¼"Zü$e¿r?öÑo‡°¡»báOvÅŸ7s_üoýŽ¹×ËUñ™´v¼ËïcyÁ—e•¾8U.°UpØ=pT±$žÔâb(`FÁ&ûœ<çJ÷€C\"…ÑÝ<žÊœÕÐÍð-™È¶Û—<¯Ð1;Â#|¹&D(šÂçýnÚû± Äæö529¸´²,ämI3é©î¢áãxðiWÐÓŠKÇÞ3Ï3TÃg o‡ ‘ÃÁ‡[LFBnæ¡Æ}j8ÔnùÆw1Ö •Eõ« +,ÃØRúi¨¶ ã”9Ó—„¯¦/«,}Ù·8κt ˜š1Æ7 !ÌudÊX6?ØÑÖXoSÃ+Ÿ¢K:–•O#óu !÷zŸÀn|YpUÃ$dO[ +% ÉuÁ.Œ¢é>Cx²„o1i»ôƒï×F/Guk#Š¹žË Ÿ@ÁÛÑ™½„ʺÛ …Æ3öÀÈÖ˜È'<¹Äÿüs»Ÿ¿{=¾yÁ£s˜…¯Ð³Çåó _ŽWXTàŽ…?á÷õ~üüð‡çßÿñüÓwß}x|‚ýc¯5<|ûzÎ?<ŸÏ//Ï¿•*þá§sû÷üOؽ{ÿð-‡›Üüé½”‡ŸÿòúÃñíëñgøÿ[‡¬¿~übB †dó‰;ö&X&a· +Ù<øÇ×O?Ó‚•2.šg9_ÿ*×Ps ŸOøYT¨•®U¼v²rƈºŸÂãSóøbzh”8†Ñ½ÂXZÊü°ó`—ú矆@8èwàô)aÿ…¢ªküœËEvëñ»›šK°|dG˜s%´D YšVòÔ@( ázé‹ñCüÿæ׬á¿8Ç7•xVÇ‹ éÑi3kè4½ðw(Ölú¸V*|ik¬Ô¦âÓ*økµøåÀ—žÑÑ÷K•_á•Box:ÀÀ¯¿Æë‘HÏ+x´“êˆÆi[‡;¬<ž¿Á²ÐùÅ+i œ°ùŸø`8¼=ÿ³·‡ +endstream +endobj +2111 0 obj +<< +/Filter /FlateDecode +/Length 214 +>> +stream +xœ]Á‚0 †ï{Š½Á@²ì¢£¾ÀÙ±L<øö¶E> +stream +xœ]Mƒ F÷œbn€?MLãÆn\Ø4m/€0qÑÛP»èƒïM>x?Ü2øÃ[ùÂÚò¸ÚÍK„ gC¬¬@Ê»\„c¼…{B  Þù.äϲÉ7åîH«puB¢4#k‹¢kµî’ú{ªvaÒG²ŽÉúÒE#ž"6¯›¬ŸÁ4)U:€Ü¼G +¹wî•úÂßלuÉ‚¸Ø<~V +endstream +endobj +2114 0 obj +<< +/Filter /FlateDecode +/Length1 5268 +/Length 3426 +>> +stream +xœÍX}l[×u?÷=~“z|||)’ïñ‰’%’¢ý(‘¢"ÉŠ,ËVì9Ž;dª8‘dEqj·ž›zIÐdmƒ[“¸ Ò! Ú¥hÑ­²ûhKܬuÝu¶Åm‡Lè†.²Dk6¬A$íÜGJv²Æþ ½wϹ_çüÎïžs) Їo?Z´Àù(€ãKgζeéÈ‹KçÒƒJèS¨ø þ=}ÿÙ•3áVñØw€[^9ýÈýíñî‡BûX^8ɾø,@d•åPá]â‡ø×Pî}àÌC·ÇG\ø=ýÑ¥…Î~Wð‘:³ððYÒ¯áøÿBYÿÈ™åŽ}Oã#qö£{¨3œõŸ=·|¶±?àŠ¡M÷°r•{òÀRøíÃoÆaîwt‡à~çQZÀÇ  +d wÀAP`tØ ·Â|NÁ"œ Tè†ûà( ô °ÀÜ ûàIø-¸ ¸¿gáãð)8 +¿†È~~> !PÈSPÐÁÃ5zÛù:swŒzrµ‰º£ûd]e(V $¯ÿ å +”Ë8RÛkÖåó§b::\3èT½@]y6Õ0GkŸxµžÀqµõĿצAݹ=_w:êu\Ïïš¿»@=y;C.àîú…ùù\Æ›·{ÕԶʗ—%½Z,P^ÿ$ÛäOqòÙ9S§®¾Û(®5– :kŒ& £žh8Ò‘¶Ä6 ´­¢+óú_;î„òz‘zsó5]ßgÎ.<¨×ô“‹í%ظ.¶3n­7ô}Ù³¡7Lg;“-N§p$úÇtj™ 8GpvšXFB_m 8i­9Ö±Íp†…ó¦¾ÚÙÜÔkŽ& Jêµ:4g6L½1×0Ø„öö*P‘…AF»%ækÈp Á^æƒ÷Ýè ›ªäщÆ ¶ÛNš /Õ×ÆW°'’¿SdjzšxI„%pžlð±{©™‹h½9À1§ù©#µ&2qÏÒt“è_T_¢ÝË=[{©yŠZćôxb>»9 +'=?~ïâºêy“iÞ÷@þ¦<û‘áÊDÙTßoc>p¿D6?GÉçáõ®Ù„7CúÞ¦k…Ë9‚_ß{ŸÞ„ŽìÃNµ#xY§ïz§;½™ŽàFÁó‹ŽdkŽv„ +A«#t¡êi „ +"¨Ê\ÎægÏ£¨ 1·E™í«8ûv4»·EE)¼-†Ùx‘˜×Ç«L#ܸB”iT.z]£á"ѹm1†¢–ßë4’cpAfóe®Ì½ &æ’Tàehȵz»@qå¨U´ËÞ5ªmð®55ä.¦ûst¤h—\k„Ž©µÚ*KÆÑev®w¤f<ÃFÇ“8:.Ú’³ûµµKþ‚/×ò‹p'ô‹võ|dò¢]Á¹»œ¹vµ‚$S­JýÒ%z{±Ù/7}a©Z­ÒˆD½U°µ^I¶yµºs±¢jD ^מtWÊ#Ã}fÆ£FPë13ÃeÅj·úF†ËD2$·!Ä8x:“ÉZa‰÷¹Â®(—Ý]ÈŽõiòÎ{Fõt,šJ‘ìúÏÈ?m"MîkÇf{ Sw+zFî=ÇöY³}1ÿNIÖ>qtý;FTK&£1ûÍ÷þ’\W‘Èa¬I—1ûçàëÐL"²T,µR^È @a9aYV³G­§ÔʶµnßÔ¶@Köuål` ç‹ÔXµc«3Ö©XÂÚo5“ rÖ¢1ÑVIŽË kÍÊúB"öy­¦b’*¡ä¶X8½"œ®ÚY@ý½ EÅ@ÜúG U ª‰jª12I&ˆ¡nµG¤íþWI15F¢c)}tãßÆR?j¿Û:²rµœv+Š;]Þ˜ѱåÊìþÞµ­±·z9§«ÜW!Iƒ¦Ä"¥–ÜÆø0C*ŠHµ )ŠÀ0©¢fŽDèHÍϪ&³q¸H /ƾ䈥¤`K‹YBaÕckTí 9 Â]ÈpŸ¾*1¶ß‹¤FSmŸ(É—ø.5t¶~‡á`UÔ‚¨Å˜–ïB­‡Åb¸\²¢%«RÒÌ6—SJš@Å敧¦Îú ¿ê×*¿{æñÂÁ;†ÎŸ’‡ž<öÌKdïóß3Œ7.õÂ]Ÿ¾5¹,OL_vò1Ãø +r±@Ó`‡J­Lá fòmܶpÏ£Ö9åÙU» X´š=YF¤–rV3Ûä¬Ò€ÅÎ-ؤS|§s(¦„‰–"Œ>Ö¤éU$Þ_B§$qÎl†4ˆg¸´ëÛk3VÿÆx9øÝsÕãúØÆ¿Þ’ÖÇHÏDÚ>¤h÷äR÷®WÞËŒ áx·×%ß7zJýV%íÂ>s’Ø#k¥+¯¾°?îâk,ž6Æ3LCXD“G¹h{üȪL‘òxÜ0–xžB4lÍ6ñŠ¡cà©bÄä$;2<óÑí¤”’“^”á!ÞÌ|;T“9ùÄå—žzú_Þד«$w*¤‚‘ü]ãõBDøÎs_ºúÝç¿ômr|ÿÙ™¼8¸ûøí¥ÅÅññÅÅíxe1^½ð"4ƒ,+‡œ°P¿ÕŒ` ì„Íi´”v¸‹„æϵÒî`OIÚµÖt§™ÖMü¹fÚÍšiÓŸ#4[¤áUÛƒ1MYMO˜ux8&ìaÍpªXÔ#bÝÍQR›Ð ÚÒ;*Âa¤t‹·;„„íªÚ +;…2;…æÉ‘ÆXs´O’”¬r¥DÞÐü;&.coUSz…Søî®ò± ‘ÐÅñ¿öæ„ùÉ^þ†>µ§¯K}Ýoß-R›?'ÿ€1‚Ç¡ÙǸ¬—ì<1R´ýx:„ÈωHÌÙb‘zV1_Úš¼ÖêáCè‰&Ú€AM +köN|k}h½Y¥ ýïÄõ<+8IùRH”Øa;ß‘šžäV„ü2 1w1è#7œÕ!Ò.Cˆ aBV“8Ì'nt¥ôX‘ºW[.îÄ¡.‡ö¶€eUÐdDJXSSp† Ú~tÞç ·»q¬ËíäqÛDz<^V0¡;D¨tüÜ.¯#ï•ï¬Ê17ì‰IÕcåÌäÆO&uîôБo~óhaƒTtvŒ)æKy}‰ã₩ÊbîÆðºY¥7 I!~;üxMilÕö¢=2Ú.{™Á²€½2kzñœÛ=ØëÅÜFUV‹ÜS\±X»¡ÞK[ק[Ô˜Oõé$=‘®Ý‘MžŽëñiòƒvÖMW6ž<±²C¤®×ö‹h³ƒðhjì´ +¥VÌ1ÒîR±î C?z­´ÓÓ "Ëɹ"M¬2¨a5å„ã2¸™·RM[Tñ‡$B¢­Ùy|÷ʘ¤E¼R·DÃè›Èœì®RC¢~v›¤nçâ˜âÔˆ×c:EÊä?Xœv¡pê/1s‡ŽZ”箩Ùä obAúéh29ú­G~µ~"¥rÏ +æm#Ÿ3Èzw„SÈ0÷ý²¾]…Òeã»ÜøÑÙ=av/ &—œÜ~âävvÍ¡Pj©íóJb¦S„·ö/ÏÖ,Ûž°s[ unDÄ©¿êõ«\ÿÇ≠*Ý©¸#Ü¥þÑÜ3½{6®ÍŸ±†3qÛ'=ó”êçžUøÂôW«iNQxãÖ¯ÌíŽ`­uw+ݯ¿¡ ²€7$ö¡d†|ü'÷(~¯ñóüë®s®»-÷+ž^h³ ýë/‚f <Ž¸éÇ}³®ç÷ââAöß\1ä 0}×Í×þñqÁ3ÎÓÅðy§¾¹‰O=QfÝ€›~8üc˜ûÁë¬ +xïquúÚÛãÛÈsæzp&Ìž[^>º|îÔýÎ +äéÿƒ]¾÷‹ïÀ;›ïStþ+ÀÕ Ãý„¹áß—Áä¾yã]Ô¿†íaÔIâñ}Âäqp¿ƒí?A»ù›ìÏì}¾ú¶Mÿðò½áñÿà¾Â¿ÅÔ×Ìà [ï÷.®ÿ­çMþ÷n´ö¿j¬O +endstream +endobj +2115 0 obj +<< +/Filter /FlateDecode +/Length1 11124 +/Length 7803 +>> +stream +xœÍzy|TU²pÕ9÷ö’N§;kw’îÎMw–NHÈ$¶äfeI aÓ ‚IH‚‚`‚ÐðÜF… ¨àÐé6Á'ѧÎà2à8ãö‰#ŽËLfq„ôW÷v@ù~ó{¿÷¾ß÷Ç»‡ª:§ªÎVUgëFèuµ sò@ý2Z¼|]S{¨œ1ç-ßÐeÿÛÜ·ã#í++ÛW­›Ö} @7 v®Z»qeH?‰êžkmiZñú…±*·‡˜S[‰¾ "†Ê©­ëºné»BqkÛ–7Mô—J(f]ÓÍíšµfKõö›ÖµLŒïnBR{[g×DýIŠ¼}}Kû¯WºÏ“~=€É*ÞG£ªÁ$¾ ‚Ÿœ%øb|Nð‚xHãk‚£<šj?;Nxö@*œÃ)ð2ŒÀØ¥P»`&œ‚Ãñ @‚ +x +œhU`Aà:XŸÁ(¤C5|ŒQÔN%´C¿$\ w‘V”ÃÏa×âBÈ¡ü,–…nê¹?8H¾|ŸJÃg˜„Y”ûDBôÀ¿A¬×ƒ B3ÀÍø%8À Û…¡/x\Gà·XM¹¹°Q|_ÖR­'Ñ‚#Á3ÁÏáE¡…Zº î¦ûa„MæåâØÁWÃ&»ÿÀLJŸ°[YOpIð©àg4Ø`:̇¥Ðà&x‚¼ú2¼Å4O ¯Š›ÄsÁûɶ.(£±×’öBj{;yÉJïÒ,#ÑN³˜Žóp®Â~|ø~À4ÌÁ:ØWÜÇßà SE1XL-ÅA2õ+Áh%ÜJÖ¾Ÿæû¼ +'1]˜M3z—êË®b”žd§ØÇüNÞ/\ïÿãø÷Á>ÐR”Í$;tÃ3d…¿`!×`'~J#ßÉžãÜÌ%^ÈKù"ÞÀïæ»ø/ù¯„õÂAáCq¶Ø$Ô6ß8þv°:xÙACãJƒ,(€i?+)šn ñµSZ›a+ôÁ}/÷äyŸ€“ð[øü‰<è 1¯¦Þ×QÔ݉÷QzáKø*žÄOð[%±Jél*+a嬊­bwRÚÅN³wÙ|_Î{x/¥Ýü(ÿ@A‚b¥Yâvñ€æ mºv–¶Y÷æ…±‹™.~<ã ã׎?8þÒøçÁÅÁ4~'dÃdé6å#ƒû(=C‘x^ƒ7á=u¬_#C‘"ÞŠECy­gâlJsq>¥k(-Á¥”š°[)õ`/Þ†·ãx/> ¦‡inûði «x;Pg`~ð@ІaÐ\K;ï °O+ÒÞÓ Éâ>Y–Kf\í¹ª¸hú´©…ùySrs&gg¹33ÒÓ\ÎT)Åa·%'MJLˆ·Zâb¢£"ͦc¸!L¯ÓjD3„¬J©Êk÷¹¼>Á%Íš•­”¥&b4ýˆáõÙ‰Uu¥ŽÏîUÕìWjʤ¹òÿÒ”CšòeM4Û=àÉβWJvß[’=€Kç×SþÞ +©ÁîSósÕüN5o¤¼ÃAì•ÖÖ +»½öJ_Õ†Ö¾Jo57h+—Ê[²³`0Ì@Yå|©}-3PÍ0Keñ ‘åK**}ñR…2wV6­ðÕͯ¯¬Ht8²³|X¾\jöTæ3¹U(W»ñiÊ}Zµûje6°Ý>˜5Ò·#`†f¯;|…´¢éºzojPúˆtS¿>˦³ÖŠÔxTyý¶Ky_¥uµ])öõm³ûæ×ÿXêPpCµácÎ*o_u¼ƒLX½ÐN}±;ê}x'uhWæ¡Ì)4»©Ráx×Ø}z©Ljí[ã%Ç$ôù`ÁF‡?!A>…„J{ߢzÉá+I”š*& Æ@ß‚Cñ²=þJIvÖ 92dÖÁÓD&ÜøãLËe™šSÕ•\õ‚ËvEeDÒl +Ÿ}¹FR/Ñœ¦+¨e:ô-ŸNjô5 Õò­ ¬öé˽}æb⛕ú>Ñi–ì}ßù_ûÓ•œ¦ ŽÆiþ”¬%—ä—ò>·Û—™©ˆ¶œæU$#—$±×(’ÞK’ËÕ½Åñs ¼,b}:×å&s\tek±ãþ qKH^½Pªž¿´Þ^Ùç°mõ¢+J!ùô˲‰†dpŸà$KÍ–(ô,­WôOtVI•«½³h©Ñ}Ñåõ<‘5„r,‘«MQü^w¹e¥P®´%85jü¯huÀ*íU>³wV7„9ÿÍJà9¥–J~¨61'_±ûÊòUW”¯^x§ .V½hi__ز*Ú¬úúª${UŸ·¯)ìm–ìf©ï¯çõ}í•ÞK'úªv4Ð$Z±8[¹#‚Ö1^ KÌð}×øs±ê¬Låèák:}Ú@¤²™Þ1¥t7xËd¥·$;០UVæÉ¢îÉ*õ§g䩤¼矰CtµãŒ?.Q•|ì/+›ÈLÊ efç) ãÃ_ÿ˜Ÿ¡CE­5”>9ï\©‘Èoñ6à¿™8”êÊÛs‚¿Iò×ùI:O•j'ýÆÈ=ø^*+tÏDùQ¢Šü§üG¨Gôá úñ‰>¨>xmü‰òÞ­Öëš ¼ÓŸl3—&“ÜNKÀ)·‹r»Èt»¨„‘.vkÕž‰æ]¢d®-~‡¤úhË%>o€Lº…L¿…,·…,·m¾¤³9¤“Í7“ÎfÒÙL:›É*¹¼“úë$‡a3€“Ý;Éî +ßGx„à´Ê¿ƒðN‚¥Äo";fШîáküé6 +²UCEr^ÉqºÇ"5»r(>)¯ÿ‡’>L D¢Ԥ趨Җ!}¸ÂmJH +QÒº¡4‚/‡[ÄN%( ¨ ørjŽm˜Ïƒu:#l=¬‡÷=¢[Q'xÔé€B2Šgƒ‡2lœæÕ·ë{õܬ·ësõ²¾N/¶Ñ“¨ŸsÏá%¼–7r1ñk‹ó‰È35Åù; ŸaÄpÚ ú4#šÓšQÍ9h×äjdMÆ«i×ôjvj4úšZæ5´z Ül°r ²¡Î Ú´8Pz'oV-a3A;ÁNlÜH|;¿ž ‘¼ÑH¦¸žø@¨d&8MùQ¢"•L¤g"=qMÄ5+’:/Aû„TsYr©Ž¢N‘¤‘4‚¸dÛQÂç”Á*©d¤’‘´N³ 4B3a;AWy£5„/Ér'ä^*?§ê\’ÉJ]vAnJÉ@_dàÎ ”=%¥yr +¡¨¨¨F©ÑÙ˜Þ¸Oh“Úœmémû„Z©ÖY›^»O(‘Jœ%é%û„)Ç™“ž³O°I6§-ݶOè¯9\s¢æTÐXÓVÓSç‘ë†üîÜ<•¦8zÄŸ7ÍTz5;LÓi$¼‡à aAA AÈ«Üg‰û,qŸ…Z‚F‘j=«l1„m2…¿G•)9Eήsšü!q~mi m»{8µ}ˆä‡TíPî°Ê÷Uùµú*_Ѳ\ª§l‚KÕín)-Ã¥PBÐHÐN Â)¾ÎPë„mí‡ ¾”Ò¾„=Ké;ijdã”XÄÅÑÉ©3—šY8Å‚ŸRñÃ*¾GÅ%*N•#æ¿c|qŽñ®9Æ4Ê°t:ƒŒ¸KÅÙPj|®ÔX[jÌ(5Rkp€‘ŪX£`ü£Šç©8KŽq¿sÿæ0þÕa|Üaìp¯v(õ&Ñ6²LTÏQ±K6ØŒ¯ÙŒKlÆi6c©w#õe*NVq¢‚ñëçL&Ðǯ¡‚ZB¿'Ã`  ú=¥DÆýž™D.ú=»‰üÓïù‰íüÕ£ ¿õ§žµ•Æâyœ-(å¿Mпâlz.Ùèñ<›n6Ütý™ß³UÑ’êÿ”ÊO@ŠNÑßKA…îÁÙ*ÿñ‰zù³š©×GýY©×ŸB–ÚëCþ¬³Äý‰?ë"÷û³Öé÷;•®ñ{2m¥‘¸ +R™¢»œLIÍD³¨åµDg†*Wú³”ZJ,÷KSˆ¤)£|%¨S»³ù%u’I ©MLIt"8U&uðFHQ©Î/m¥V4Ï9ÏÚþî9®L¾A“·íÓh~‹©ø{œí?h{û˜b.¿íTVGm¿’ŽÛ^M àb¿m$+ #Á‰¬Ã#¶A2²tµÎZe{VR¥û$’’«÷x²mJKm8©ì·mÍzA¬£/&qCÖ [ç ­Ê@ËêL³KëmEÄžÀÙCmSRÊPr©ƒGm™Ô£KR‡rÍ´aVZì–³´]Úfíbí|íUÚ|m¶Ö®MÒNÒÆè¢tf]„.\¦Óé4:AÇt ‹ Ge·ro‹Ñ˜ÕŸø jÞÌÌÔŸþ€¡ŽÑÚñEójV½° }QÕP½¨Ì7Í]Ðø¦»«}ººkëïk ’Ý@XTOª°îLTwÇ1çÎ{ºùÎ{°Ú7²ª›í¾oÒ<Âè’*JeVˆÛPb-‰šYTUñ/w»ø¬îÖ$ßƒÕ ë}Ï$5øò”L0©¡Ú7Syc¬­²âkWHCý1ÜÄ:*(|ÜTÑpY RX;©G!ŠÚ¤(j‚CªZªFašRY1˜’Rzg+J>/«J«Bm¥RÔVBH%CªÚV*KVÔ(B™~ÜX8 ImÌjc“¥A§“T²œŠÊà4') :§©âƒ?ˆ%gh8 àTûqbƒÚâ:é!Š‚ ¦#÷ÿϯ¥ì ŒCM­X®<νRe ×·}C«Õ×Ûl·®øhâÕîò6/oUhS‹ï#©¥Â·Bª°6-ÿâ劸Iª„å•‹ê—Ë-þ&¹©RjªhÚßS^}E_÷\ç_4Ö£4V®ôµ¿ú_ˆ«ñ~¥¯j¥¯j¥¯ýò~µ¯êeX]W?¨ƒ²zÝ©tˆÂh=x eqæöêâ¸Êa½5qX:¶ ô2—Ê|FE”]š]ªˆhu*¢åç— ‘õÖ«‰ÃøÔ„ÈLìH© Ü`­\]qù_ggg—ÝÝnÂ]ÝV•×E‹Ö±°ÚW¥<=>O¥OöV4 âŽî‰¯¼^6Ÿðœò°6O§ß³ÇsØ#vw7;êDÊ©Ö˜Ò–Ò“ÒŸ²'åpŠF\WTöìIùK +ï¦hÂ.ú*+Ô>»‰Ò?¥ØÕÝ©|@t„ºsw»ËëKS`9Ýz‘nèÙM ä,$á?¿Cð)Á߸ðOž$R8<›gÓÓ½Bé±Á­l:Vž7”[˜7=@´ieˆ.\¢•óBÔSšg%ê/É+5Ña˜ðë|EðO‘çñ<µñîPÔ6tB§iø@….uº»ÐMTÌÝÕévƒJ€“HÕWÆ=`g7)È!DHIåv*ÕºzY)gQ½3ƒæ2<Î^¤Ëª–ðƒ(Ø‹ÏqÓ*™#ñ:x‚ä 8f€oÀëÁê6ë¹è™g>ï™{Ñ%”7_ 4%×éˆt¢­.ØùÈY„ïÁ.Œ¨›ÿ9êR#¶Ò3ñ!9F¶z­ÖQ«VÙÊ6À]À"J£q5ÝTô8@OS®æu”—¨ò?èu½âˆøµLg°‰éŠz]8ã0Œ'õÙrTD„IŽ,Ì5õ˜všL‚)Þ2ÌRñ,˜»Ìí™k;kVF\≌*ÂÈ"øfì~ãvOÉ…eر,Ú™g‰uÎ`…‘i.—”¢=‡sÑžëÆ™wz\˜Ö™à,~±÷ûmë§'3§“%MÙÄ>Ú•iO¶)s¬ žåûÄ0À³Çèý9:8CTOGÊÄëPä™ú2^ã€ñu<ÉÞÇ÷Ù¨Ñh@0ÊFÎDA ËŽœÀY çLàFQžY(þ5D4¿W,ÀGŽÐ.³/è=ñ¹Ng­ u€ +/°?@8&ÓmFñÔ²±³Êä瞧i¹ÍcîÏ6q²{[Ä–W¦äbÇúe°žB)Ú‹:R´mÚ¯Ø{ãžv|`|{Gî¢ü$±ÆõÏ…W'{ ê6 ‹üxü˜ŒUòmZ«¡ÈbtuU&¯ Sr\\†Ö£­}Z«‘í× +Ku×Z–ZoÐuEvE=fx<â‘ÈC†C'Å“–_Z?°|`µ'|g‰¥;—/&ÆÆÇÅ[’¬Z½Å`5$ÄÏŒ¿ÇÒo×Zã³$ćÇkŒ<ž‰«%.6F-Ø*ëõrLxI¯õž/‡›Å„þxÜ8žÅó| +Ž{‡…'ð^Ùšß×F7F·E÷D ÑÔÊÑ2M*ì²½×νö;³ÇÇïh}Q–cYëaýì;Åΰ¿0‹· ã}ªyUËžõŒÍ3/ëø–l¼lÌ<¦Øùâ²OÉÅ÷ÙÈ(K‘jQEEÌáÙf·¼qÉòË”E näŽB€Â +6Vš:5?O™šFË´Ž¼©S§ñƒF± í»o\±ÇåŒ?õè¾ßåÎÙÿÝ l^»¤*ÅñïX†?½uwDZ×~³sÕª'ŽŒŸ›nž’ Ê*ßü‚÷‘¿âÁù¸In@ÔçÛò3ÓÚò7¥ôzÃ{zosöºúòŸ¶îK8à +.áy×ñ´WÃ^5¼gŒÓBjŒ,AŸg´$8ΈjÜ·ïŒx"®‚b¤;ÎNoÄkÓ®Ë_kp5[åZ“Öš nNÛµ9¿_è{µ½ºÛ"o‹êé{XxP·+òÁ¨Gãö»žM{6? Õ}iø*üˈ/Ó¾ÌËÐõiÅP„ÓóÄ +„'¤ *2[ìtÏökÄl…D“JõtÕÑ£¬B.å͸R6C¡\ÈäBoá@áh¡P(½@Ž3 MrX®E¶ì´pK|Á0þ¿¼ä±ócŠÇΞ'÷•Œ)nCKÑ”Üòrž;'9%2NÐÅ:¢Ô 6mR3fÅd6Ãä¨ÜfLì͘¬#äŽËn†œHB ÞDÔýv+}´³@‡²Î\®Â‚w5Ú8%~5RJšÂsNrlLœ%Z£üCO‘-†árþ-iˆï¥|&´ +Å-ºMú›Âo6nŒ¾É¾]w{t˜^ןÁ®Ò‰iVGšUäÉN´â0=¦­(?—V§E-yMÖç8ÛœtâA²²ü"D$w?g±€Ñ:L«+MÏC”9ÊÅ£Ø"GA†œÑ›Áå oÆ@Æh†Ã´J¤&‡cañéWøk,ä°‹giû4Ó +$->OK±H]ʪ:-31Uî2;'¹$—Íèh†$SB3¦ê(g7$7cb$¡½3´/Ý“BËp™¥pêÔ¨i´îÈ1Ó&œÅ´ •Zª§ÖÞ6úvÆã=ýo®¼åµ7Ýÿñk{_dùQeç6ÜÕPÚ8ùÖINÖ©‡[~÷¼ûÓ}¿ÿýøÆ­kرÛæ5}róÀîwnZœ¥úa­³çÉfH…a“NÝÁD1V!FcB€ö¤(}¸d“]^×€kÔ%¸"vD#´AôÃÝÕâÃzÈXêy37´W©F©ÁT)5%•ird­sRbRbr"×D»LNƒËo‰g‡IÛ—†ŒA¹¸pÅlHž¨l_QæØfˆ‹mþa÷ÊT!3sktATÈX‘1Lݳ¦™U+M›©F4²†ÍÙѵÔûØæGï~§ùå­ë^©,ê˜Ú•<97µ(£¸¢pVÛýÖ.(Ýóêøá?}à³—þ>þÅàMëaÑvæ:®^8þ˜j3;ZÇÈfa`„÷äô8#mà•FÙÄef†c¬–&ˆ\/jP7A7 +šp#Ý&ÉQZ]ŒV«ÓqA« ×͈ÆãøÝn ¸G6Š¨Ñë4(„‡ ÇéÍÍé~¹R6èõ&Ž{øaÎxÿ.[±D½˜ÐK·ÈQ7idZñ?:õ;> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(9.4)-1000(mm)]TJ +/F62 9.9626 Tf 99.895 706.129 Td [(to)]TJ/F67 9.9626 Tf 12.291 0 Td [(AS)]TJ/F62 9.9626 Tf 14.062 0 Td [(and)]TJ/F67 9.9626 Tf 20.47 0 Td [(x)]TJ/F62 9.9626 Tf 8.834 0 Td [(ar)18(e)-362(in)-361(strict)-362(sequential)-361(or)18(der)74(,)-390(ther)18(efor)18(e)-362(no)-361(indir)18(ect)-362(addr)18(essing)-362(is)]TJ -55.657 -11.955 Td [(r)18(equir)18(ed.)]TJ +0 g 0 G ET +1 0 0 1 146.769 574.688 cm q -1 0 0 1 199.577 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 203.164 706.129 Td [(array)]TJ -ET +.49594 0 0 .49594 0 0 cm q -1 0 0 1 231.784 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 0 0 cm +/Im9 Do Q -BT -/F51 11.9552 Tf 235.371 706.129 Td [(read)-222(\227)-223(Read)-222(a)-223(dense)-222(array)-223(from)-222(a)-223(\002le)-222(in)-223(the)-222(Ma-)]TJ -57.767 -13.948 Td [(trixMarket)-250(format)]TJ/F54 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-845(m)-71(m)]TJ -ET -q -1 0 0 1 200.884 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q +0 g 0 G +1 0 0 1 -146.769 -574.688 cm BT -/F54 9.9626 Tf 204.572 667.24 Td [(a)-70(r)-70(r)-71(a)-70(y)]TJ +/F62 9.9626 Tf 166.233 552.771 Td [(Figur)18(e)-250(8:)-310(DIA)-250(compr)18(ession)-250(of)-250(matrix)-250(in)-250(Figur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-250(5)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +0 g 0 G +0 g 0 G +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 232.04 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 114.839 401.402 cm +0 0 313.823 115.567 re f Q -BT -/F54 9.9626 Tf 235.728 667.24 Td [(r)-70(e)-70(a)-71(d)-203(\050)-150(b)-206(,)-926(i)-152(r)-151(e)-152(t)-478(,)-905(i)-129(u)-130(n)-129(i)-130(t)-434(,)-882(f)-107(i)-107(l)-107(e)-107(n)-106(a)-107(m)-107(e)-241(\051)]TJ -0 g 0 G -0 g 0 G +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F51 9.9626 Tf -85.023 -27.896 Td [(T)90(ype:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 136.657 506.308 Td [(do)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + [-525(j)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ 0 g 0 G - 0 -19.925 Td [(\002lename)]TJ + [(,ndiag)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(r)18(ead.)]TJ -19.367 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.956 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F59 9.9626 Tf 298.533 0 Td [(-)]TJ/F54 9.9626 Tf 5.231 0 Td [(,)-387(in)]TJ -303.764 -11.955 Td [(which)-254(case)-253(the)-254(default)-254(input)-253(unit)-254(5)-254(\050i.e.)-321(standar)18(d)-253(input)-254(in)-254(Unix)-253(jar)18(gon\051)-254(is)]TJ 0 -11.955 Td [(used.)-310(Default:)]TJ/F59 9.9626 Tf 65.184 0 Td [(-)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 9.415 -10.958 Td [(if)]TJ 0 g 0 G -/F51 9.9626 Tf -95.322 -19.925 Td [(iunit)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 27.108 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.201 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F59 9.9626 Tf 287.757 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ + [-525(\050offset\050j\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -317.894 -21.918 Td [(On)-250(Return)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(>)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.926 Td [(b)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(0)]TJ 0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(Rigth)-250(hand)-250(side\050s\051.)]TJ 13.839 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(An)-398(array)-398(of)-397(type)-398(r)18(eal)-398(or)-398(complex,)-434(rank)-398(1)-398(or)-398(2)-398(and)-398(h)1(a)-1(v)1(ing)-398(the)-398(ALLO-)]TJ 0 -11.955 Td [(CA)74(T)74(ABLE)-257(attribute,)-258(or)-257(an)-257(object)-257(of)-257(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 177.91 0 Td [(psb)]TJ -ET -q -1 0 0 1 369.841 430.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 372.979 430.129 Td [(T)]TJ -ET -q -1 0 0 1 378.837 430.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 381.975 430.129 Td [(vect)]TJ -ET -q -1 0 0 1 403.524 430.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 406.663 430.129 Td [(type)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(,)-259(of)-257(type)-256(r)18(eal)-257(or)]TJ -251.973 -11.955 Td [(complex.)]TJ 0 -11.955 Td [(W)55(ill)-275(be)-276(allocated)-275(and)-276(\002ll)1(ed)-276(in)-275(if)-276(the)-275(input)-275(\002le)-276(contains)-275(a)-275(right)-276(hand)-275(side,)]TJ 0 -11.956 Td [(otherwise)-250(will)-250(be)-250(left)-250(in)-250(the)-250(UNALLOCA)74(TED)-250(state.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -31.88 Td [(iret)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.434 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + 9.414 -10.959 Td [(ir1)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 139.476 -248.035 Td [(146)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -ET - -endstream -endobj -1839 0 obj -<< -/Length 6815 ->> -stream +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(1)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(9.5)-1000(mm)]TJ -ET -q -1 0 0 1 148.768 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 152.354 706.129 Td [(mat)]TJ -ET -q -1 0 0 1 173.658 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 177.245 706.129 Td [(write)-275(\227)-275(W)74(rite)-275(a)-275(sparse)-275(matrix)-275(to)-275(a)-275(\002le)-275(in)-275(the)-275(Ma-)]TJ -50.45 -13.948 Td [(trixMarket)-250(format)]TJ/F54 9.9626 Tf -25.158 -24.48 Td [(c)-175(a)-175(l)-174(l)-828(m)-52(m)]TJ -ET -q -1 0 0 1 149.539 667.901 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 153.049 667.701 Td [(m)-52(a)-53(t)]TJ -ET -q -1 0 0 1 172.236 667.901 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 175.746 667.701 Td [(w)-52(r)-53(i)-52(t)-52(e)-186(\050)-167(a)-242(,)-900(m)-126(t)-125(i)-126(t)-125(l)-126(e)-426(,)-926(i)-152(r)-151(e)-152(t)-478(,)-904(i)-130(u)-129(n)-130(i)-130(t)-434(,)-882(f)-107(i)-107(l)-106(e)-107(n)-107(a)-107(m)-107(e)-240(\051)]TJ + [(;)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(ir2)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -75.851 -26.279 Td [(T)90(ype:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [-525(m)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.464 Td [(On)-250(Entry)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(-)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.464 Td [(a)]TJ + [-525(offset\050j\051;)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)-250(to)-250(be)-250(written.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 578.783 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 578.584 Td [(Tspmat)]TJ -ET -q -1 0 0 1 344.406 578.783 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 347.544 578.584 Td [(type)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.414 -10.959 Td [(else)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG 0 g 0 G -/F51 9.9626 Tf -268.571 -19.464 Td [(mtitle)]TJ + 9.414 -10.959 Td [(ir1)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 32.09 0 Td [(Matrix)-250(title.)]TJ -7.183 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(A)-231(charachter)-230(variable)-231(holding)-231(a)-230(descriptive)-231(title)-231(for)-230(the)-231(matrix)-231(to)-230(be)-231(writ-)]TJ 0 -11.955 Td [(ten)-250(to)-250(\002le.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.464 Td [(\002lename)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(written)-250(to.)]TJ -19.367 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F59 9.9626 Tf 298.534 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(,)-387(in)]TJ -303.764 -11.955 Td [(which)-234(case)-234(the)-233(default)-234(output)-234(unit)-234(6)-233(\050i.e.)-305(standar)18(d)-234(output)-234(in)-233(Unix)-234(jar)18(gon\051)]TJ 0 -11.956 Td [(is)-250(used.)-310(Default:)]TJ/F59 9.9626 Tf 74.799 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(1)]TJ 0 g 0 G -/F51 9.9626 Tf -104.936 -19.463 Td [(iunit)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 27.109 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.202 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F59 9.9626 Tf 287.757 0 Td [(-)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(-)]TJ 0 g 0 G -/F51 9.9626 Tf -317.895 -20.764 Td [(On)-250(Return)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(offset\050j\051;)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.463 Td [(iret)]TJ + [-525(ir2)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.434 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -20.763 Td [(Notes)]TJ/F54 9.9626 Tf 14.944 -11.956 Td [(If)-283(this)-282(function)-283(is)-283(called)-282(on)-283(a)-282(matrix)-283(a)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-283(on)-282(a)-283(distributed)-283(communicator)-282(only)]TJ -14.944 -11.955 Td [(the)-316(local)-317(part)-316(is)-316(written)-317(in)-316(output.)-509(T)92(o)-316(get)-317(a)-316(single)-316(MatrixMarket)-317(\002le)-316(with)-316(the)]TJ 0 -11.955 Td [(whole)-225(matrix)-225(when)-225(appr)18(opriate,)-230(e.g.)-302(for)-225(debugging)-225(purposes,)-230(one)-225(could)]TJ/F52 9.9626 Tf 318.257 0 Td [(gather)]TJ/F54 9.9626 Tf -318.257 -11.955 Td [(the)-339(whole)-338(matrix)-339(on)-338(a)-339(single)-338(rank)-339(and)-338(then)-339(write)-338(it.)-576(Consider)-339(the)-338(following)]TJ 0 -11.955 Td [(example)-250(for)-250(a)]TJ/F52 9.9626 Tf 62.495 0 Td [(double)]TJ/F54 9.9626 Tf 28.692 0 Td [(pr)18(ecision)-250(matrix)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -ET -q -1 0 0 1 99.895 178.717 cm -0 0 343.711 82.69 re f -Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG + [-525(m;)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -BT -/F94 8.9664 Tf 102.884 250.747 Td [(type)]TJ + -9.414 -10.959 Td [(end)-525(if)]TJ 0 g 0 G - [(\050psb_ldspmat_type\051)]TJ 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ + 0 -10.959 Td [(do)]TJ 0 g 0 G - [-525(aglobal)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -21.918 Td [(call)]TJ + [-525(i)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - [-525(psb_gather\050aglobal,a,desc_a,info\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.959 Td [(if)]TJ + [(ir1,ir2)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(\050iam)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(==)]TJ + 9.414 -10.959 Td [(y\050i\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_root_\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 37.659 -10.959 Td [(call)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(mm_mat_write\050aglobal,mtitle,info,filename\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -37.659 -10.959 Td [(end)-525(if)]TJ + [-525(y\050i\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.959 Td [(call)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(+)]TJ 0 g 0 G - [-525(psb_spfree\050aglobal,)-525(desc_a,)-525(info\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf -2.989 -23.747 Td [(T)92(o)-250(simplify)-250(this)-250(pr)18(ocedur)18(e)-250(in)]TJ/F59 9.9626 Tf 129.513 0 Td [(C)]TJ/F54 9.9626 Tf 5.23 0 Td [(,)-250(ther)18(e)-250(is)-250(a)-250(utility)-250(function)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -ET -q -1 0 0 1 99.895 137.797 cm -0 0 343.711 16.936 re f -Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG + [-525(alpha)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(*)]TJ 0 g 0 G -BT -/F94 8.9664 Tf 102.884 144.073 Td [(psb_i_t)-525(psb_c_)]TJ + [(as\050i,j\051)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(<)]TJ + [(*)]TJ 0 g 0 G - [(s,d,c,z)]TJ + [(x\050i)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(>)]TJ + [(+)]TJ +0 g 0 G + [(offset\050j\051\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.414 -10.959 Td [(end)-525(do)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.415 -10.959 Td [(end)-525(do)]TJ 0 g 0 G - [(global_mat_write\050ah,cdh\051;)]TJ 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F54 9.9626 Tf -2.989 -23.747 Td [(that)-250(pr)18(oduces)-250(exactly)-250(this)-250(r)18(esult.)]TJ 0 g 0 G - 164.384 -29.888 Td [(147)]TJ 0 g 0 G -ET - -endstream -endobj -1846 0 obj -<< -/Length 6720 ->> -stream 0 g 0 G +/F59 9.9626 Tf 16.096 -32.463 Td [(Algorithm)-250(2:)]TJ/F62 9.9626 Tf 60.055 0 Td [(Matrix-V)111(ector)-250(pr)18(oduct)-250(in)-250(DIA)-250(format)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(9.6)-1000(mm)]TJ -ET -q -1 0 0 1 199.577 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 203.164 706.129 Td [(array)]TJ -ET -q -1 0 0 1 231.784 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 235.371 706.129 Td [(write)-374(\227)-375(W)74(rite)-374(a)-375(dense)-374(array)-374(from)-375(a)-374(\002le)-375(in)-374(the)]TJ -57.767 -13.948 Td [(MatrixMarket)-250(format)]TJ/F54 9.9626 Tf -25.158 -24.509 Td [(c)-175(a)-175(l)-174(l)-858(m)-83(m)]TJ -ET -q -1 0 0 1 201.262 667.872 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 205.076 667.672 Td [(a)-83(r)-83(r)-83(a)-83(y)]TJ +0 g 0 G +0 g 0 G + -97.969 -26.976 Td [(The)-250(r)18(elevant)-250(data)-250(type)-250(is)]TJ/F67 9.9626 Tf 110.953 0 Td [(psb_T_dia_sparse_mat)]TJ/F62 9.9626 Tf 104.607 0 Td [(:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 233.175 667.872 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 99.895 220.639 cm +0 0 343.711 115.567 re f Q -BT -/F54 9.9626 Tf 236.989 667.672 Td [(w)-83(r)-83(i)-83(t)-82(e)-217(\050)-149(b)-206(,)-941(v)-165(t)-165(i)-165(t)-166(l)-165(e)-505(,)-927(i)-151(r)-152(e)-151(t)-478(,)-905(i)-130(u)-129(n)-130(i)-129(t)-435(,)-881(f)-107(i)-107(l)-107(e)-107(n)-107(a)-107(m)-107(e)-240(\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 112.299 325.546 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -86.284 -26.38 Td [(T)90(ype:)]TJ + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(extends)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.493 Td [(On)-250(Entry)]TJ + [(\050psb_d_base_sparse_mat\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G - 0 -19.493 Td [(b)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(Rigth)-250(hand)-250(side\050s\051.)]TJ 13.839 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(An)-190(array)-190(of)-190(type)-190(r)18(eal)-190(or)-190(complex,)-202(rank)-190(1)-190(or)-190(2,)-202(or)-190(an)-190(object)-190(of)-190(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 277.745 0 Td [(psb)]TJ -ET -q -1 0 0 1 469.676 578.595 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 472.814 578.396 Td [(T)]TJ -ET -q -1 0 0 1 478.672 578.595 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 481.81 578.396 Td [(vect)]TJ -ET -q -1 0 0 1 503.359 578.595 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 506.497 578.396 Td [(type)]TJ + [-525(psb_d_dia_sparse_mat)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(,)]TJ -351.808 -11.955 Td [(of)-250(type)-250(r)18(eal)-250(or)-250(complex;)-250(its)-250(contents)-250(will)-250(be)-250(written)-250(to)-250(disk.)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 8.9664 Tf 9.414 -10.959 Td [(!)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -31.448 Td [(\002lename)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(written.)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG + 0 -10.959 Td [(!)-525(DIA)-525(format,)-525(extended.)]TJ 0 g 0 G -/F51 9.9626 Tf -44.274 -31.448 Td [(vtitle)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 28.772 0 Td [(Matrix)-250(title.)]TJ -3.865 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(A)-244(charachter)-243(variable)-244(holding)-244(a)-243(descriptive)-244(title)-244(for)-243(the)-244(vector)-244(to)-243(be)-244(writ-)]TJ 0 -11.955 Td [(ten)-250(to)-250(\002le.)-310(T)90(ype:)]TJ/F51 9.9626 Tf 70.763 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -108.95 -11.955 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F59 9.9626 Tf 298.533 0 Td [(-)]TJ/F54 9.9626 Tf 5.231 0 Td [(,)-387(in)]TJ -303.764 -11.956 Td [(which)-254(case)-253(the)-254(default)-254(input)-253(unit)-254(5)-254(\050i.e.)-321(standar)18(d)-253(input)-254(in)-254(Unix)-253(jar)18(gon\051)-254(is)]TJ 0 -11.955 Td [(used.)-310(Default:)]TJ/F59 9.9626 Tf 65.184 0 Td [(-)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG + 0 -10.959 Td [(!)]TJ 0 g 0 G -/F51 9.9626 Tf -95.322 -19.492 Td [(iunit)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 27.108 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.201 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F59 9.9626 Tf 287.757 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F102 8.9664 Tf 0 -21.917 Td [(integer)]TJ 0 g 0 G -/F51 9.9626 Tf -317.894 -20.836 Td [(On)-250(Return)]TJ + [(\050psb_ipk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ 0 g 0 G - 0 -19.492 Td [(iret)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.434 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -20.836 Td [(Notes)]TJ/F54 9.9626 Tf 14.944 -11.955 Td [(If)-290(this)-289(function)-290(is)-290(call)1(ed)-290(on)-290(a)-289(vector)-290(v)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-290(on)-289(a)-290(distributed)-290(communicator)-289(only)]TJ -14.944 -11.955 Td [(the)-316(local)-317(part)-316(is)-316(written)-317(in)-316(output.)-509(T)92(o)-316(get)-317(a)-316(single)-316(MatrixMarket)-317(\002le)-316(with)-316(the)]TJ 0 -11.955 Td [(whole)-243(vect)1(or)-243(when)-243(appr)18(opriate,)-244(e.g.)-307(for)-243(debugging)-242(purposes,)-244(one)-243(could)]TJ/F52 9.9626 Tf 318.257 0 Td [(gather)]TJ/F54 9.9626 Tf -318.257 -11.955 Td [(the)-349(whole)-349(vector)-349(on)-349(a)-349(single)-349(rank)-349(and)-349(then)-349(writ)1(e)-349(it.)-607(Consider)-349(the)-349(following)]TJ 0 -11.956 Td [(example)-250(for)-250(a)]TJ/F52 9.9626 Tf 62.495 0 Td [(double)]TJ/F54 9.9626 Tf 28.692 0 Td [(pr)18(ecision)-250(vector)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -ET -q -1 0 0 1 150.705 149.348 cm -0 0 343.711 82.69 re f -Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG + [-525(offset\050:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G 0.56 0.13 0.00 rg 0.56 0.13 0.00 RG -BT -/F94 8.9664 Tf 153.694 221.378 Td [(real)]TJ + 0 -10.959 Td [(integer)]TJ 0 g 0 G - [(\050psb_dpk_\051,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(allocatable)]TJ + [(\050psb_ipk_\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG [-525(::)]TJ 0 g 0 G - [-525(vglobal\050:\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -21.918 Td [(call)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_gather\050vglobal,v,desc,info\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.959 Td [(if)]TJ + [-525(nzeros)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(\050iam)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(==)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(real)]TJ 0 g 0 G - [-525(psb_root_\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ + [(\050psb_dpk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.959 Td [(call)]TJ + [-525(allocatable)]TJ 0 g 0 G - [-525(mm_array_write\050vglobal,vtitle,info,filename\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.958 Td [(end)-525(if)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.959 Td [(call)-525(deallocate)]TJ + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(\050vglobal,)]TJ 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(stat)]TJ + [-525(data)]TJ 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [(\050:,:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.414 -21.918 Td [(end)-525(type)]TJ 0 g 0 G - [(info\051)]TJ 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F54 9.9626 Tf -2.989 -23.777 Td [(T)92(o)-250(simplify)-250(this)-250(pr)18(ocedur)18(e)-250(in)]TJ/F59 9.9626 Tf 129.513 0 Td [(C)]TJ/F54 9.9626 Tf 5.23 0 Td [(,)-250(ther)18(e)-250(is)-250(a)-250(utility)-250(function)]TJ +/F59 9.9626 Tf -12.404 -39.731 Td [(Hacked)-250(DIA)]TJ/F62 9.9626 Tf 0 -19.039 Td [(Storage)-362(by)-362(DIAgonals)-362(is)-362(an)-362(attractive)-362(option)-361(for)-362(matrices)-362(whose)-362(coef)18(\002cients)]TJ 0 -11.955 Td [(ar)18(e)-247(located)-246(on)-247(a)-246(small)-247(set)-247(of)-246(diagonals,)-248(since)-246(they)-247(do)-246(away)-247(with)-247(storing)-246(explic-)]TJ 0 -11.955 Td [(itly)-361(the)-361(indices)-361(and)-361(ther)18(efor)18(e)-362(r)18(educe)-361(signi\002cantly)-361(memory)-361(traf)18(\002c.)-643(However)74(,)]TJ 0 -11.955 Td [(having)-229(a)-229(few)-228(coef)18(\002cients)-229(outside)-229(of)-229(the)-228(main)-229(set)-229(of)-229(diagonals)-228(may)-229(signi\002cantly)]TJ 0 -11.955 Td [(incr)18(ease)-201(the)-201(amount)-202(of)-201(needed)-201(padding;)-217(mor)18(eover)74(,)-211(while)-201(the)-202(DIA)-201(code)-201(is)-201(easily)]TJ 0 g 0 G - 29.64 -41.41 Td [(148)]TJ + 164.384 -29.888 Td [(167)]TJ 0 g 0 G ET endstream endobj -1852 0 obj +2118 0 obj +<< +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (./figures/dia.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 2126 0 R +/BBox [0 0 499 211] +/Resources << +/ProcSet [ /PDF /Text ] +/ExtGState << +/R7 2127 0 R +>>/Font << /R8 2128 0 R/R10 2129 0 R>> +>> +/Length 2502 +/Filter /FlateDecode +>> +stream +xœÝZ¹neÇÍïWÜ xÕûJ€$À‰á†a(¢íQ@9Ðﻪ»NuÝÖÈv Áཪ®åœîêõñóé.:þ“Ï—·ã«õüô¯#¸óçÃ]¹÷âúùýá[¾Jð§ï1\ù|[ŠVI~=>Ùo&ªP“7U¨‰÷‰U&Ë$í&é&µl&P,“Îòχ!¹Ak€Ò°’Ö\m+K¶µ;ÛÊ’mõ!Øæ!ÞÚs¹µ³xkoíÖÎ"·>ÜY¯FÝ\ÎÔ»;ƒ§Ì?ýýüóùOjútÔ\®”úYc»Zån€¦P7¤’ÏêÓÕœ? u`mù,%^±7•_ŽâýÕÛÒõ+º2r¼ªéî +5žˆÐ"}æ¨9 ¿@â–;—㇟ê]p™Š÷rÔë­v.e—{®ç‡ïß1ÿ¹<\m—³ì 6ï"9.v*³#ÍoU´NÉM×@^IU#°$PsôÅ ¹¡Q´â¼£Ÿcú¾ÙQJ®_5&Ã)æv…¶8E_®â&'|WNP1œ!#ü┊¿|¬à”]¸jœð}q‚FÑŠóŽÞŽÒ{a4F)¶F“ÔrrŠ©^QQûÐóÕ +RºR7€yÃ_ä•W5@&ùLa¨MÚÌðß8Œ±z—¼Æˆ•Ö¯vc&EV}I yîåj–Z¦…"îÙÅ«j"jКDPè3…¡&Ô¨`c1Æì2£6ÇÞ§F%1x)hfñЙ'%Ž»äH‹@l£¼fQ¼Þæ/´«Î5Ð,²•çä×ÂnÙߌ&ö«-nr§îè7©ŒÀé‘oéÞT!~¯˜XJâñì_œ-†W |⑳4ã,±Â¨ˆ(8½,¸K3]pžAÄ=çD²Ç‘³W3¡R¤2&WÇH3‹FsH„=ç}ö54®Èµ_ùRùd û•/tâ¦3dsl~×ý +Š±yõ…(Áe·bEãc6+ߟfhù¾6+hÔé¼CÇ$ŠtÊ¥‹@íéªãŠÔÇäõ•æw$>Kï#<;8ê7îRYÕ%„h^W™J*Ì+šÎÕ¦Š!_‘9Ð)hl¥Œù4äpnØÖ¼º‡ÝqùKœÊ56ÎÓ,@ …/¾)ߎÝç•©3w§¼Í+Ï+(ÅÕRœ +­æm¥Ð°Kã©Ž½®8½†Ëŵ2@þ2Úß'.#æ»õÞòvìÞ¯ºöB£ý7ÓoÝùb+â7ŒÊˆ÷çÚÅ☦xQ¿¸4ÞBL›ƨ_¥ó+J§•Êw“˹4ŒÛM3ß\¦ÄÝ +kE7úÍ +k%¯+0Y¯+hŸ¯+Ú®¯+hŸ¯+ڎוÿå!8:äÐW_i›œj +mQ´YOë"¿áê:ÚÆ-õãGä—C5-t^_õÍÊBýNÛÜžU†x‹óvì¹(2àX¥&TÚÓX5®*$1ÜlK‰Zx±ù­Sbè·N ­Ý:EdÓ)©¦­SD£1S ·N‘¬<›þ_܆G¼ÌðH.3<@#Ý° +$†;€m)ˆ8¹‚’…ÂKÌ(½ÆqW@È/Újˆ rAAãý,$t!]+nHE~Y] ‰wc°ß%¯/.Þ|ÖâSB£Ž æ0_¶#­q¦]EmOÍú«¨íµßü!¢®tÖ_EmOÞú«ˆöÀ'+bç[w£ºâ­}>ºk;^ÝïÛ&>DÍßã-?DmÏu*Z›ím‰·vÉväÿÓÜ~út|æ̓ÿäãåíüæùøêC£Íð|¦þl<.|·+éÊ%žÏoÇ_žÂãSæêËOþñ©ÐZÀßÝãSãóJϤ­|ËèáŒ}udáêãÏ8¾}&|ÿõ—:®§ËSõíxcXš8{ƒ¾7»Y¨$§+•_ø÷ˆÆ¢eAÓ„b­*ÃC4´ µt®µëŽæ˜2{,€î<ÆûIëeqS ã·‰d·Yp†[åG\c1—ÐAeå !#Çâ°JD¸óÜz–(Êmi&²žh³ yO‚K¹õØæeÔ‡ã=T7Õ2 È5‡rS°JD¸ó˜Ü®&g_妚‰Œ¯„nÕ §ðÍŽÁ ÕzY8õu&—SË~â\•‰æ`“;jfRG˜n˜Í@C«|*yu†¯ÑGÕZe*¿9‰³‘%åÒLP+Â=ƒÑÊ-Šm:ïèÇØ$gÇ%­~å)³Yâ€bËætqÆ"ÐRi梊(6(¤Õ]ú_è¤5¼ö8#”2õÕSEGÛx:IvãÑ¡ºÌÏ.¡P•r‹LÛwI“„ZLA܇àó4äïÄ…îbË‹ÈôhâN™Í‘@†ø`2 ƒ2QQ  ¿ºp²™¡1Ì¢iæÏfÜ!O2* +u´3º’Ñäh8¸ì“LË‹I3=Êï`Ù H­À©Lht‹7tíµý ¢ð 8à œˆ¾˜ ?,šUE=8xÚ‘k9#_ÎSçû^¹ ¡¡‹©¿J;#ô8úMytFžçù©ñü0•è| h r.òídæ€5:*™j!¯qãÿKq­Øq‹\Oq­L|Ó"›qS 7 òÊ PèÈ Êsë|Ž¥ó¡¤&;ÝGh-„‚O¶•ÿÛ%Ïb€ì;:H5i”ðnö92@&‡J—Óê—ÝdúøG@^)D£ $‚‚ÜXXf­£"ÀMÅ1JAåT45Ír’üÅÕ† 3µÜ@d“€ +rca™ñ8ÏzD—‹bH…°d'¥¢šÃ\LÀ/ó¹¬1ÙŒ,0F c¨)T#  7Ì,¥j7ß%Î¥šäÛæ›RÙ6ßq4›/ÉÙ¸g]åû\¦—×\ÃW\¬ò+3,êmó½Lr´›ïJŽ·Í7ñÅÓÐÈþ¶ù&þ?9³ù.y’QQШ» Ñ•Œ&Gs¼m¾7ì“LÏ‹I7=šûmóM¹m›/]”o›/ÉvóUQxT»ù._àlÛæ»òâ›QUÔÌá×.Ü$Rg{çæñÊØcé‚Lçúqçþú1º‡ç×><>¾bûJ:ªŠÞËÃ_ƽ:dº’·p>yºovª†ç¿‘ã¿ûîã·ÏÓ‘¯ì-u„縑óß¿Ë&& +endstream +endobj +2131 0 obj +<< +/Filter /FlateDecode +/Length 177 +>> +stream +xœ]1à EwNÁ ‰Ô,KºdhUµ½1!Co_I‡ßâë›Óur6Qöˆ^½ QcŽ°ù=* 3,Ö‘¦¥ÚªtP©j•°ñ&Ãû€â˜Êw¹{6—ÒiªGy [ +¢t s1#8ý÷ÕUÃlŽÉV‹"α’¡Cc×fÄb/Šû²íôåÅ9ሪ=Fp©œQbæxÖÁïÒàCvQùðY¢ +endstream +endobj +2133 0 obj +<< +/Filter /FlateDecode +/Length 197 +>> +stream +xœ]Mƒ …÷œ‚ˆFmL»qѦi{ÄÁ° ꢷï0j]¼ Ìß#k»kçÝʳGœÍ Vn",ó ðFçY^ðÁ™õ ŠfÒeíM‡÷'Ç°;ßõÙ3¿ÐM¾×˜y€%hQû˜BIk?ü=Õ{AoÌ3“„ÀÈd™+’+Äš°Jh ›ËªP²*â ±Q$ĆFŸCÒÉι=7[ŒàWòLž’çá÷-a©Š£ØsÇbã +endstream +endobj +2134 0 obj +<< +/Filter /FlateDecode +/Length1 9528 +/Length 6467 +>> +stream +xœÍY{xTÕµ_{ïsf&™Lfòž$™ÉÉ„À$&„wÉä1á‘á!$TÌ$!¼HB«L¨E4 p->¨ +Q/`ËÉDh@¯D¯¶­«µêµ+VÚ[ÚÔWkÌýí“€r¿~÷»ý¾ûÇ=;k­½×Z{¯µ×^gŸ½'ĈÈF$¨zÞÂüB2ž¬6 Åëê[†Û€U7nls6ç½-h¼Od~yEËÊuSÚæYº‰ÔÖ•ko[1¬?ýÔÎUMõË_½t¡‚È+™“W‘06f/Ql¢4²j]ÛwFìý (~mscýp;s(v]ýwZL«‡QŠ¶ûÖúuM#úÕÒ©–æÖ¶‘ö€”·lhjùÅ +ßçЯ!²;ÕûàE¹£ÄnJ'Š|88?4;rI½…´¡5‘‘€ÞÏŒ€töAÚGY4ÈÆÓKÔO³é•P5í¦tšŽP,ÝÆ^#A)§§ÈË\Ä©‚R˜J{è]º‰6ÐÇ4@9TI°xŒ¤J¦i‘ßWÒÝ‘ãЊ¦2ú`kÙBÊG}&Ïe>XÞé§ʉ¼y­Çèc–é¡™¨ý–âh uÐ?S<­¡W#—ä´©²Mìwä¡mW&*]‘[èz:J¿d•¨Í¡ÛÔw¢ŽÒZôz’¥°þÈÙÈ'ô‚¨ #}—î†Çaêç׉2µ›Ü”M7Ð\ª‡ôŸè]–ÀÆ‹@dL¤4²܃ô)÷ñŸ3üðÑ,ª£{éqDãm:G_0+›Äc‡PÞ`Tßo•ÔN·#¯CôÒa:ÎƳñ<…§ Z)4–n„l'í‡ý^:Ã*Y-ëg/ŠýjÁPq$1’ù$¡qT÷Ñ‹°ñ9+€,ˆLѦd(mjáå-˜árz”ÎÐðãÄý ú+‡ò!¿ƒwD–DžŠ| _,䢩4Ÿ–R3m¤oÓXÕ—èeú3»È£ yZyE½]ŒÜØfS)|Ÿí…{;V)L}(oc–qÌYLesÙ¶’íd²>ö.{—›¸‡¯ç¿ºxM¼¯LVÕHFJ¦ ØÕh ­Â +Ühßù>E¯Ð)–IJYfô6úɯçå(OòÓü±UìT.©w ýçÐÅH™‘e3‡vzQøK†cÙÖÊ>‚ç»ø³"V8„&&‰±HÔŠ»ÅnñïâçÊåòž:K­W™ë‡nz#Rùž|É¿ÆP.M¤)ÈŸȦ[à_ ÊÚD[¨‹îC¾ÜOÝtó>I§è—ôkúV€˜>¯†õuȺ­ì>”=ì0{‘½ÂN±Ù—²ðL”>™ó2^ÁWò­(»ùþ6?/F‰FÑ!:QöŠcâ]…E‰¨…(3ÕíêAÓkæóLsƒåg—.\w¹öòC4”6ô­¡‡^ú$²8rü÷R]O·ÁË=ÈÁý(O#ÑOègô+Ã×Og*2ÞÉ4dC.V­˜Í`³Pæ°ù(7¢,aKQêY[…ÒÁ:ÙwÙì{ì^ö€QÆÜö³²c(?f'P~Éβ߲߳O9’˜ d³—áù|fZÆgðy|ÊJÞŒÒÂ7ðX¡ƒ¼—ço‹áy¢^¬{ÄÄKâ-ñ•Â•\%_ñ+‹••ÊÊiå åå¢êRƒê*u¯ú’)Ý4Ñt£iéaÓÓyÓ%³É\mn0o2¿eŽX¼Ø­~Šy¥o>ù¦Ó¬UMT¾ÃÏâ½pŠu»3ñEb­¸OüB]Á…›½ÇºÄjqKäIQÁÿ*šÙb~’e +—Z$VЊ°CüCþ9ÿDIb‹øïXŽòÏìǼY”q“4¢¾©$)wªç‰ø¯¨ˆofýüq§¸3ò¯T¤îegÕ½ü r+<Îâ­ÞÆB§ŸóÕ|;Õ(Õ‹´qÿ¡úÄ{:¿›o){éc¡ñÏØ {»Æël¶’ÅoæÓØ!츗Y]`ë©…=@öû5ë#ÆžYÁjéÜƦà3öºð°·D4ÕJY6ObÕ|ß(ž7“Ã.ñ º V€Ü¹ò Ñ­xvó1ØÓ‚ØMÞd…䤇°ß>ô¼Ü±ÕwÔíȳÇE.- ZÆ_£"¼£ÔÐ]TH'ƒwS˜6E:Ùrìûs°rêck(ŸY±[¦À·|/’y&öÂ:Xý+öÿW±ëW²?Ò·™oV?å(R²C bg +aÿÝŽ²œ–¡õ(Ýo:ª¾IóX +‘âÚ‹,ŸnÆ7ç#ØO#?ü[J+¹ðÚy=z<:4“(wÑkŒÓfø<ïyµ2;5˜áj|£ªðM¶t~ ê÷–kµný‚QŸcÔwuê:¸ƒÎUån…ÜA½b㪮`¨ÃõX£Ë´²¦è¼\ꉶ¢jEMOÑZzXÊtfTxJ°¨‡“ŧô4­<¨§jåÒ]xƒõËõêù5Áòt§6/WgeZƒNZ©n÷*Tf˜ÑMeºÙ0ã^-gCÛÝ=¹ý];úÔòÅ,×–×ßT£‹úZi#λåzÊíçœ_71x|YͶoJÓEWйÚ-›]]ÛÜz÷üšoJ=×Öb {+B]0¼!¬\è†-¾µ¶Fg[aÐ-ç!ç4<»&-(9¡5n=J+ÕVu­ aaÒºtZp›'œ–8 ´ »kQæÑ‹ÓµÚúòQ=‰Ôµà¶ÞÔ€;õZI^n#n8¬=±ö‘JŒí›•¦«2£f¨ËZå‚«qeÒ#mÒAw7ºáI†9M•¨i*u5N…žZ†^úr¬Çj=ª,Ôå(ß!ûëª×¡¹»¾ ¬¿vá×rêG8&¯ã ’U™%W ò+uÝçÓÇ“ b.ÊÂÇéF{R^îÆ>®k-7ÂGÕˆm}mQ>‚ïñÈåÝÞ 4ôÎù5Ãm75¤‡)ï«ÕyHJú¯H’n”’Î+’«ÝCòøY’7‹$Ý’}õÏîHN®*ÒYòÿ n–W.Ô*ç/­q»B#±­\tMkX>õªl¤Æ†¸®x©YRoÁÒÉÀŸê­Ð‚«C3ñªÁG=¡¬F¤óÚáOÆPÈß›®Ž,51r,Åk2òyŸÙ‚68Ì]¡;B3‡qm´Çó¿ìÔ”½ òu·‘9éE¾kÛ×_Ӿƽ˜.‡•l^¹hiWWô5² +lV]]š»¢+ÔUßélÐܭ븨5]-ÁЕå؞®Wì¨Å$V±¢Âà˜èS|}šIÅ÷Î{L ίÛòã‘~ñao0Xèõ]gÐpÎØBCNUø¯âC~QgÃÉé†äƒpiéHeòÔáJ︼³%Ñâú€‹ÄY|TŒ^½9×–ØÀ`â²ãï¢nñkÒœâ½Þ¬ìÂ}'ÅÏ UœÂ÷Tv;¶ÅbÀŸŠã”îÂ9ôèˆähol\!•´Š{1½~à3€À @¡fq:;G +Ù]€|À<ɇÄ!ø¹ýíÀù€fÀN€B‹ÄÓàß"±xJ¬Á‡Ñ%vàbšº]|ß ÿšúø¸>ˆÇÑ–tßHûP)ÿÁÚÉ ЇÀO}иðºÄ#í¢Ýè×6B»Ek8Ãå(É€Ü (Ôv£¶¡Û3ìÖ–z@ A× S„ksØ£k´¹7%µ°!ÝŒÐoFä6#r›IhÓMÃ:ybt6Agt6!*¢öZ±`ì¸qoEÜ%_îœ1øßÞè–-ñmÄq,¼ºG¬ ç¸d+{§ +‹ŸÃ9–aؽ©£ w~ÝŠŠ–‰;BíR·É6õFÅHnSoÚèa +­[JbE#ý€S"p`"  ˆÆpV¾ë„˜Kë,ˆuuðÑ¡t¨JA9‹?) +©ÚBHÉx‘G~(ŒuÕùÙ”PTKTg”pD¹£ +¢QÕQj3®D;…p‰|Q,æ‰:¡öEúÃæ¢ ¦¢ »¬ÝVÝÚo=cUuS¿éŒiÀ4hRݦSÀTm +™ZL¦]¦nSÔ.Ó.3Y[¬Vá°º­Ö€µÚªºÌ¬»d«h/-°ÐØPã:ðÝâf@V£¡¸|&´€3¨€ªhÙ¡g‡ž\;¸vp XJª!@ˈÔtUr¥Ô”ÀHcÁEl€e 0-Z6´lÐ:Ã/ÁC°P o€¬¾"+‘‡&C>hè\‘d_~)P?¦,ÓDzî±l×Xð—2âããë´:o]NÝ~¥Ykö6ç4ïWæió¼óræíWŠµboqNñ~%_Ë÷æçäïW\šËëÊqíWvV©:YuºJ©«j®ê¨S°t½a_A¡A3½’ §¦N±—ÜÀ`:uÀûg‚ìÀ.@> Ð Pùƒû ¸Ï€û ÍÔTôzFn1À®™äï3d²&åü¹À䇋&Ì+©Â¶[Øû0ä‡ íáÚƒ¯üy#úÝ_j¹WúÉMp©±Ý-Åk¸”Šu€€J§Å: ÀèÀ.@ à@KQ–ˆ%ü”Ãü°È ØÆ'¹(9Yþgq”8x rÁÆž2ðþÇÀÅÎ +Äζ}9ÛöÂlÛ]³mcPá9øÙØn{ÖÛ³%¶y%¶±%6Œ–B²ñ$›$fÿià¹Î $zl_ylŸylöØóØÖ{l7xd¿Qx‡m<ÑÀV‰qI•x¶³V—í'.Û—mŠËVbc{¬S©3 œ.1ûôY{¹¢žcŸR9FbaÿXW'ƒ°HØ_2öϹöïù[Øÿ}×óì+f|ÚØ—á¬s®’$ö9›¥Èög#ôÏl®K.\žgáâæbÈϼ ÿöo‘úO¢ÿÐ~‚2-Rÿq\%ÝÇfüÇFú=Îm€ÕG¹·Áê(×°úP8÷¸ßçÞr8w-ÈΰW:¸&ìç*‰c+)‹KÝFòréIՈřy-èŒáÎÁp®ìU. ô±²°6dŒôòy¦QµaÎÖŒIŽ&Íbi†Óéä5h,³ÎÛ(Ó –°¶£˜žõžsýÅÿœœ8}Áìá½®žÇü£ù6+|ÈõÆq®°ëtnósý\{ÎõJV[võçöY 8™ÛÇÙQW‚¬C—³c®#¹+]Ïh†t¿)–zŸ?Ïõˆ¶ÔµÇ‹vص%÷yé­ÃŒC\›;ÝUå?äªðö1ˆ~ D»Š´ ®i`Oíc³z¹ÆgõIW +0Æ¡c®q°˜­®Ü8åŸDfÖÈ5·™Ì‹ÍóÍ×›'˜óÌnóhó(s¢%Þâ°ÄZb,Ñ‹ÅdQ,ÜB–ľÈ@À'Ïm‰&‡ñŸ"±bÔ\bnüôGœY8Þ=ATòÊ…¥L¯¤ÊE¥ú_eŸ9²@Ÿê«Ô-Õߪéaì¾Z´t~w£E5HPÉÚš./wlj±ü­÷¦Kºië½µµ¬Rïo¤Ê·þåBÌ#‡TU+uRòÆbgqüô¸iå…F°ïëÇéûæã­?X¹°Fzt­^(+‘ѵ•ú y-<Î×óæ`ùqÞ"ImÍqv;_\ ùìöòÚ«j”É[ F~I¤Z/eJ5Êd½†Z•¡†4Í –÷df+½ÄfI%¤ÏK†ÒÊá±²`cUK5žAYÆXY[VÜŒc´RÝ¢¼’¼)ÂÛ)E±òç—‘óŽë=é'ØS#"ØqZ)ùÈ\]~õ¯µµµMB{»¸­ÝiðÚðÒzVêò²è×ýA=*¯er9ÚGž²š€ã¤ÿ´Ÿ7û;ü;ýûüGüj{{-Øñ'3OgòºÌæÌŽÌ™û2dš¤à¦šcÿ¾Ì?eŠvdkÃ,7l¶ƒâO6ÛÚ[åC0Ð +6çk÷•Õ”dR#N½ 'ô’ +U»6¶B’Ám•ÝÚ%½ªÇåÅY5ÎÌd¦9=œ=Ç_ÀaÕÌO†IUúø Ï +Š6ËÊQF©“zrN‚¥(v »™œ>Ç—þËþ¹ŽÏýs.û©uÇ% ñž8Oœ[?]r‹þK•.’[é76ÿ…¬“×à$¨8àæjçèå“;TÆŒ nìVÍBlëfg˜‰õ±‰G©SY´TZ»¼ÌïðSþàñl™/Á“äYÈÕËyÊCÆ픋éRWá¼Sø®Ùi–âuÃDg(U"{FròX³ß<ËüC³)àþ–²Ôò­”¥Î[,mqmñZ‹ÝwØz8ö”z*åß寮ëp¥|•’„s€’ª¦'¥&§¦Œvš£R¬Në艩3RïIÙé6;S9OIKI5ÙD*WMΔä¤Ds‚bëc«QQĘâÎ(Õ'&bjÚÎT¶/õH*O=!& ž÷ö2“ÑÇî ØÈô›y u Í  JB3˜T¹îN·¹»ÝÜúû +±³±@ ±Ž7󾓟ä§ùYþ'ná©®ì>Óéú,›sÎa®cÙú/—Íù|ÙÇ,Ò…ËËÖû‹/¯÷‹‹O™?¦qG¬›CÝürìˈìú Ëh™L,ò1á™D4ib¶–i2k“'O(”S3™¹ÙS8yòq¨îÒ«gî½·.ß—íM=ýÈþ_Ì>ðÕtÖ°vIES‡.zY){ø‡[´¯?þ“·v­\ùÄÑ¡Á©ŽñyÆz b½LX¯dz(p†œÝΧB΀“o¤»ˆÇ–$°Õ8µF±nÊD¬dÝ‚º†Î%;[MÉàû4€ó˜Gq¦FYb¸ ì/PŸˆµâ&Ø;ì»ìÝvÅžšr‚g±s#òùç8.œsÈì-öÁ˜F_\¸Ä¾ðùÆ ë—%x'Ä%&'§$y&Mç“â&ŽÉF(̃l¶'ÁÓMMŽ6{Ó¼¥ÊO¿¸mÃÔ îõòÑãoçïïçÎpá¤C³#çÅ1Geщp½ÅSbXU“$±ÙÒú˜=•FÙlÈewgd+Ùq’[GÍÔA;©ïiª÷Ëøze‡×u–ôBñ…ñe·ªX––•™ÅMœ ÆMfï¨ôÑéé”m÷Z³©)©ÜäQâÈeJk`‰±¨%Ç –ÅÜ ,ÝïHj Ôh ã*Ñ8ÆÛ’01~ +–?%9.‘›´Ì1ÙS)É +'O™,ƒ2FFÅÄgïh[ztÓ#w¿ÙðÒ–u/§­ŸÜ–q]AÖ´±Eå“fNä{ϳy Jö½2täCÇøøÅ¿ ïy ~Ãa6íü#­ž=jäEUäœØ¯V‘•ž9NJd 7!}ºjœ.QIµ0UŒ‹*¥€-d붽ÊNñwØ;|ÀfM#fed ØW—…@šà‰BpEØÔÀŒIêo˜ Äô¹3õ±=Ǻ­Ìš£žàçqÿ$ƒ³ªPª•nEUžç¿¥˜‘;¾\váœ÷Ïå;äs\ðû·©×ù¶Ån¾òÆl@¼°#1y +ó˜Ÿó_ ù[ØCÛ×,š0Z­ÊþÛ Ê+é×…¬Ã¿]úi ^gù\Þo´ùȯ›‰ò'/! `¢ìÁŠàÆ?GÿÁžÿ…&X‘³œ‰çI,¯ÖÆåbøQ ~¼ü÷8É߇㈭^×Ô:·éÛ š×ÕßZ½pÎ"£'ÛõDÊrms#×0F~6Mc£øÓ´ÎåBi6U ¯éÿðȾü?ôèGNÔÙý_XR‡=ñÑè—$=zC/]l»¼ÃQd‰E3ꊭÿîi +endstream +endobj +2135 0 obj +<< +/Filter /FlateDecode +/Length1 4304 +/Length 2777 +>> +stream +xœÍW}l[Õ?÷=ûÙÏIì÷e?'®÷üòÑÄvÜÚIwiÒ6mÚ® M[l(…$MK¡…¬”°nƒµò6J7TCš˜¶ñ»Ïõ4`@µ¡IÛ*6uÑ61&±M­CiÒ@$Ù¹ÏNÊ×Ôí¿Ùò½çwî¹÷žó;çÞ÷ šàð0¾c"çø6{¦LÎÖ°? @ž>~ÌhP@Åkø{äÀìÁ#Jú»8v€›9xøž5{÷ÝÞ'o™™ÜÏû¾y@z•ý· Â3ͯà/"n»åȱ»köÒ86+ß1=YßOÃ&tdòîYB¥ hÏü1nŸ<2S÷홽ãÎcõùlcöèÌli²e;€+Œ>ÝÈFÈyî,$bøíÀo? ÂptÛa7~÷":·Ãâka¨Ð ¬ƒk  nƒC0ûA† 4ÃÍp¤a´Á>È€üp=l‚‡ákp +Jp¾³p<ðydö>ø<ࣤ n¥Ýãºåx‘‚µ.L…DamÑÑÝ_4.R¢ö„S”$?ÑÆDŠrÉ­; ­¢™¢|òPØ Ãã“SÔ•dSM˼·ðçÈoŠ´+ÌGþQŒX&u' +tôxÑ(q=w²iïõ)*$í89…»§öîPÀe'Á48-3Þ]`í΂5…Þ[#ìˆ5‚Ìï,”±×O”‰A°£Æ4mžY±´W0IQ‹¼`“rŠÕ‹°_øÃgæƒÂ%¦ùÈÇçh|äï0,lÆ +çsbS½ã}à~Ž,>DÉ×a+õŽlB.Ú£,£TÂbÕv¢p¢Eæ÷€ºŒ”Ol >ccÙuK8@46¾¾/,B{qPÌׇ z¯ +8è‰×ð^4°5ê AC¦š4®¨Býu4Ôúû¹„ÍG¨"Ô¬e¨°}UgߺFF¥yJåÀ2 0{‰XWìƒLãÿð +!¦ r¡+ -Ã0B=¹‹TK° ¸ ¾ø<×Ͻ Þ%YÈÁóPî&¨´5êJÐLÚî÷T©ž¶ÁS-ëàKœÝ´/mg]UBÒ43Wé—!€ÖýíD¥ÅAå–8³n‰¢u‹d§HÂîÔ«ç|)o¢"Jp'tJ¶†z^«R^²s8wµ3×Σ֗’ªç©(Ÿ[wŠJÙóù<ÕdêɃ­·ÉŠÍ‹ùüªÕ$ +j~âî\ç@âѶõ 6˜_ÎôÆ[Hx“üèi¬“;«ò©‘7Îç[9UåÍkž}ilFTÕݬ6¿þ†îWüXå +X‹ïr6Öy˜0eŒ+Ë\IÛË_=KTÍPAÂçl‚Z™ +Hpî ’­ã½’`Üåäl7Ë_SÞVÙí¤°Ã‹Ç5A>Vš¦îhãB”Ù\–¼¡‹+מ1×,\ÎÇŒ§òÍMý»Oig»DýÒZëÕµmüREš.õôúŽ¦àëÖ ,sS@nºÀ†²È2È#L¨4ÔÈPC-¨dÂÒÛ¢²àZð4Vb]¢Š Å¡®Óvµcô±.&Æ⌡î4õÏÙ€ E3e?{Ö û‘!ªd íHK> stream 0 g 0 G 0 g 0 G +BT +/F62 9.9626 Tf 150.705 706.129 Td [(vectorized,)-267(it)-264(does)-263(not)-264(necessarily)-263(make)-264(optimal)-264(use)-263(of)-264(the)-264(memory)-263(hierar)18(chy)111(.)]TJ 0 -11.955 Td [(While)-228(pr)18(ocessing)-229(each)-228(diagonal)-229(we)-228(ar)18(e)-229(updating)-228(entries)-228(in)-229(the)-228(output)-229(vector)]TJ/F67 9.9626 Tf 335.99 0 Td [(y)]TJ/F62 9.9626 Tf 5.23 0 Td [(,)]TJ -341.22 -11.955 Td [(which)-290(is)-291(then)-290(accessed)-291(multiple)-290(times;)-311(if)-290(the)-290(vector)]TJ/F67 9.9626 Tf 229.535 0 Td [(y)]TJ/F62 9.9626 Tf 8.124 0 Td [(is)-290(too)-291(lar)18(ge)-290(to)-291(r)18(emain)-290(in)]TJ -237.659 -11.956 Td [(the)-250(cache)-250(memory)111(,)-250(the)-250(associated)-250(cache)-250(miss)-250(penalty)-250(is)-250(paid)-250(multiple)-250(times.)]TJ 14.944 -12.068 Td [(The)]TJ/F60 9.9626 Tf 20.923 0 Td [(hacked)-426(DIA)]TJ/F62 9.9626 Tf 53.49 0 Td [(\050)]TJ/F59 9.9626 Tf 3.317 0 Td [(HDIA)]TJ/F62 9.9626 Tf 28.224 0 Td [(\051)-426(format)-426(was)-426(designed)-426(to)-427(contain)-426(the)-426(amount)-426(of)]TJ -120.898 -11.956 Td [(padding,)-416(by)-384(br)18(eaking)-383(the)-383(original)-383(matrix)-383(into)-383(equally)-384(sized)-383(gr)18(oups)-383(of)-383(r)18(ows)]TJ 0 -11.955 Td [(\050)]TJ/F60 9.9626 Tf 3.317 0 Td [(hacks)]TJ/F62 9.9626 Tf 21.758 0 Td [(\051,)-271(and)-267(then)-266(storing)-267(these)-266(gr)18(oups)-267(as)-267(independent)-266(matrices)-267(in)-267(DIA)-266(format.)]TJ -25.075 -11.955 Td [(This)-256(appr)18(oach)-256(is)-257(similar)-256(to)-256(that)-256(of)-257(HLL,)-256(and)-256(r)18(equir)18(es)-256(using)-256(an)-257(of)18(fset)-256(vector)-256(for)]TJ 0 -11.955 Td [(each)-283(submatrix.)-411(Again,)-292(similarly)-283(to)-284(HLL,)-283(the)-284(various)-283(submatrices)-284(ar)18(e)-283(stacked)]TJ 0 -11.955 Td [(inside)-313(a)-314(linear)-313(array)-314(to)-313(impr)18(ove)-314(memory)-313(management.)-500(The)-314(fact)-313(that)-314(the)-313(ma-)]TJ 0 -11.955 Td [(trix)-337(is)-336(accessed)-337(in)-336(slices)-337(helps)-337(in)-336(r)18(educing)-337(cache)-336(misses,)-359(especially)-336(r)18(egar)18(ding)]TJ 0 -11.956 Td [(accesses)-250(to)-250(the)-250(vector)]TJ/F67 9.9626 Tf 95.959 0 Td [(y)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ -86.245 -12.068 Td [(An)-246(additional)-246(vector)]TJ/F60 9.9626 Tf 93.666 0 Td [(hackOffsets)]TJ/F62 9.9626 Tf 48.566 0 Td [(is)-246(pr)18(ovided)-246(to)-246(complete)-246(the)-246(matrix)-246(format;)]TJ -157.176 -11.956 Td [(given)-321(that)]TJ/F60 9.9626 Tf 48.303 0 Td [(hackSize)]TJ/F62 9.9626 Tf 37.686 0 Td [(is)-321(the)-320(number)-321(of)-321(r)18(ows)-321(of)-320(each)-321(hack,)-339(the)]TJ/F60 9.9626 Tf 180.964 0 Td [(hackOffsets)]TJ/F62 9.9626 Tf 49.311 0 Td [(vector)]TJ -316.264 -11.955 Td [(is)-321(made)-321(by)-322(an)-321(array)-321(of)]TJ/F93 10.3811 Tf 105.045 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(m)]TJ/F62 9.9626 Tf 8 0 Td [(/)]TJ/F60 9.9626 Tf 6.336 0 Td [(h)-40(a)-25(c)-25(k)-30(S)-18(i)-32(z)-25(e)]TJ/F93 10.3811 Tf 36.682 0 Td [(\051)-235(+)]TJ/F62 9.9626 Tf 16.868 0 Td [(1)-321(elements,)-339(pointing)-321(to)-322(the)-321(\002rst)-321(diag-)]TJ -177.205 -11.955 Td [(onal)-289(of)18(fset)-290(of)-289(a)-289(submatrix)-289(inside)-290(the)-289(stacked)]TJ/F60 9.9626 Tf 198.567 0 Td [(offsets)]TJ/F62 9.9626 Tf 27.788 0 Td [(buf)18(fers,)-299(plus)-289(an)-290(additional)]TJ -226.355 -11.955 Td [(element)-350(equal)-350(to)-350(the)-351(number)-350(of)-350(nonzer)18(o)-350(diagonals)-350(in)-350(the)-350(whole)-351(matrix.)-610(W)92(e)]TJ 0 -11.955 Td [(thus)-315(have)-314(the)-315(pr)18(operty)-315(that)-315(the)-314(number)-315(of)-315(diagonals)-315(of)-314(the)]TJ/F60 9.9626 Tf 267.65 0 Td [(k)]TJ/F62 9.9626 Tf 4.598 0 Td [(-th)]TJ/F60 9.9626 Tf 15.499 0 Td [(hack)]TJ/F62 9.9626 Tf 21.068 0 Td [(is)-315(given)]TJ -308.815 -11.955 Td [(by)]TJ/F60 9.9626 Tf 13.539 0 Td [(hackOffsets[k+1])-250(-)-250(hackOffsets[k])]TJ/F62 9.9626 Tf 133.667 0 Td [(.)]TJ +0 g 0 G +ET +1 0 0 1 197.579 381.801 cm +q +.4451 0 0 .4451 0 0 cm +q +1 0 0 1 0 0 cm +/Im10 Do +Q +Q +0 g 0 G +1 0 0 1 -197.579 -381.801 cm +BT +/F62 9.9626 Tf 198.751 359.883 Td [(Figur)18(e)-250(9:)-310(Hacked)-250(DIA)-250(compr)18(ession)-250(of)-250(matrix)-250(in)-250(Figur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-250(5)]TJ +0 g 0 G +0 g 0 G +0 g 0 G + -33.102 -24.137 Td [(The)-250(r)18(elevant)-250(data)-250(type)-250(is)]TJ/F67 9.9626 Tf 110.952 0 Td [(psb_T_hdia_sparse_mat)]TJ/F62 9.9626 Tf 109.837 0 Td [(:)]TJ 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET q -1 0 0 1 99.895 695.17 cm -0 0 343.711 16.936 re f +1 0 0 1 150.705 120.326 cm +0 0 343.711 203.238 re f Q 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 102.884 701.446 Td [(psb_i_t)-525(psb_c_)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(<)]TJ +/F102 8.9664 Tf 163.108 312.904 Td [(type)]TJ 0 g 0 G - [(s,d,c,z)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(>)]TJ + [-525(pm)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(global_vec_write\050vh,cdh\051;)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 14.122 -10.959 Td [(real)]TJ +0 g 0 G + [(\050psb_dpk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-1050(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(data)]TJ 0 g 0 G -/F54 9.9626 Tf -2.989 -24.209 Td [(that)-250(pr)18(oduces)-250(exactly)-250(this)-250(r)18(esult.)]TJ + [(\050:,:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 164.384 -586.799 Td [(149)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -14.122 -10.959 Td [(end)-525(type)]TJ 0 g 0 G -ET - -endstream -endobj -1856 0 obj -<< -/Length 1109 ->> -stream + [-525(pm)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -21.918 Td [(type)]TJ 0 g 0 G -BT -/F51 14.3462 Tf 150.705 705.784 Td [(10)-1000(Preconditioner)-250(routines)]TJ/F54 9.9626 Tf 0 -22.702 Td [(The)-228(base)-227(PSBLAS)-228(library)-227(contains)-228(the)-227(implementation)-228(of)-227(two)-228(simple)-227(pr)18(econdi-)]TJ 0 -11.955 Td [(tioning)-250(techniques:)]TJ + [-525(po)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 13.888 -19.925 Td [(\225)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 14.122 -10.959 Td [(integer)]TJ 0 g 0 G - [-500(Diagonal)-250(Scaling)]TJ + [(\050psb_ipk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.926 Td [(\225)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ 0 g 0 G - [-500(Block)-250(Jacobi)-250(with)-250(ILU\0500\051)-250(factorization)]TJ -13.888 -19.925 Td [(The)-356(supporting)-356(data)-356(type)-356(and)-356(subr)18(outine)-356(interfaces)-356(ar)18(e)-356(de\002ned)-356(in)-356(the)-356(mod-)]TJ 0 -11.955 Td [(ule)]TJ/F59 9.9626 Tf 16.301 0 Td [(psb_prec_mod)]TJ/F54 9.9626 Tf 62.764 0 Td [(.)-350(The)-263(old)-263(interfaces)]TJ/F59 9.9626 Tf 87.314 0 Td [(psb_precinit)]TJ/F54 9.9626 Tf 65.386 0 Td [(and)]TJ/F59 9.9626 Tf 19.489 0 Td [(psb_precbld)]TJ/F54 9.9626 Tf 60.156 0 Td [(ar)18(e)-263(still)]TJ -311.41 -11.955 Td [(supported)-250(for)-250(backwar)18(d)-250(compatibility)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 164.383 -497.003 Td [(150)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-1050(::)]TJ 0 g 0 G -ET - -endstream -endobj -1862 0 obj -<< -/Length 5016 ->> -stream +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(off\050:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(10.1)-1000(init)-250(\227)-250(Initialize)-250(a)-250(preconditioner)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -14.122 -10.959 Td [(end)-525(type)]TJ 0 g 0 G + [-525(po)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf 0 -18.964 Td [(call)-525(prec%init\050icontxt,ptype,)-525(info\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -21.918 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(extends)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + [(\050psb_d_base_sparse_mat\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G - 0 -19.925 Td [(icontxt)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 35.965 0 Td [(the)-250(communication)-250(context.)]TJ -11.058 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ + [-525(psb_d_hdia_sparse_mat)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(ptype)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 8.9664 Tf 9.415 -10.959 Td [(!)]TJ 0 g 0 G -/F54 9.9626 Tf 30.994 0 Td [(the)-250(type)-250(of)-250(pr)18(econditioner)74(.)-310(Scope:)]TJ/F51 9.9626 Tf 151.121 0 Td [(global)]TJ/F54 9.9626 Tf -157.208 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(character)-250(string,)-250(see)-250(usage)-250(notes.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(On)-250(Exit)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG + 0 -10.959 Td [(!)-525(HDIA)-525(format,)-525(extended.)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(prec)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG + 0 -10.958 Td [(!)]TJ 0 g 0 G -/F54 9.9626 Tf 24.349 0 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -30.874 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 196.511 0 Td [(psb)]TJ -ET -q -1 0 0 1 337.631 446.268 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 340.77 446.069 Td [(prec)]TJ -ET -q -1 0 0 1 362.319 446.268 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 365.457 446.069 Td [(type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F102 8.9664 Tf 0 -21.918 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -286.483 -19.925 Td [(info)]TJ + [(\050pm\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -30.326 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(Err)18(or)-250(code:)-310(if)-250(no)-250(err)18(or)74(,)-250(0)-250(is)-250(r)18(eturned.)]TJ/F51 11.9552 Tf -24.907 -21.917 Td [(Notes)]TJ/F54 9.9626 Tf 34.311 0 Td [(Legal)-245(inputs)-244(to)-245(this)-245(subr)18(outine)-245(ar)18(e)-244(interpr)18(eted)-245(depending)-245(on)-244(the)]TJ/F52 9.9626 Tf 285.595 0 Td [(p)-25(t)-25(y)-80(p)-25(e)]TJ/F54 9.9626 Tf -319.906 -11.956 Td [(string)-250(as)-250(follows)]TJ -0 0 1 rg 0 0 1 RG -/F54 7.5716 Tf 72.358 3.617 Td [(4)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ 0 g 0 G -/F54 9.9626 Tf 4.284 -3.617 Td [(:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -76.642 -19.925 Td [(NONE)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F54 9.9626 Tf 35.965 0 Td [(No)-250(pr)18(econditioning,)-250(i.e.)-310(the)-250(pr)18(econditioner)-250(is)-250(just)-250(a)-250(copy)-250(operator)74(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -35.965 -19.925 Td [(DIAG)]TJ + [-525(hdia\050:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 33.205 0 Td [(Diagonal)-371(scaling;)-432(each)-371(entry)-372(of)-371(the)-371(input)-371(vector)-372(is)-371(multiplied)-371(by)-371(the)]TJ -8.298 -11.955 Td [(r)18(ecipr)18(ocal)-266(of)-267(the)-266(sum)-267(of)-266(the)-266(absolute)-267(values)-266(of)-267(the)-266(coef)18(\002cients)-266(in)-267(the)-266(cor)18(-)]TJ 0 -11.955 Td [(r)18(esponding)-250(r)18(ow)-250(of)-250(matrix)]TJ/F52 9.9626 Tf 116.148 0 Td [(A)]TJ/F54 9.9626 Tf 7.318 0 Td [(;)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -148.373 -19.926 Td [(BJAC)]TJ + [(\050po\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 30.446 0 Td [(Pr)18(econdition)-211(by)-212(a)-211(factorization)-212(of)-211(the)-212(block-diagonal)-211(of)-212(matrix)]TJ/F52 9.9626 Tf 273.867 0 Td [(A)]TJ/F54 9.9626 Tf 7.317 0 Td [(,)-219(wher)18(e)]TJ -286.723 -11.955 Td [(block)-347(boundaries)-348(ar)18(e)-347(determined)-347(by)-348(the)-347(data)-347(allocation)-348(boundaries)-347(for)]TJ 0 -11.955 Td [(each)-223(pr)18(ocess;)-232(r)18(equir)18(es)-222(no)-223(communication.)-301(Only)-223(the)-222(incomplete)-223(factoriza-)]TJ 0 -11.955 Td [(tion)]TJ/F52 9.9626 Tf 20.498 0 Td [(I)-96(L)-9(U)]TJ/F85 10.3811 Tf 18.202 0 Td [(\050)]TJ/F54 9.9626 Tf 4.149 0 Td [(0)]TJ/F85 10.3811 Tf 5.106 0 Td [(\051)]TJ/F54 9.9626 Tf 6.64 0 Td [(is)-250(curr)18(ently)-250(implemented.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ 0 g 0 G -ET -q -1 0 0 1 99.895 130.181 cm -[]0 d 0 J 0.398 w 0 0 m 137.482 0 l S -Q -BT -/F54 5.9776 Tf 110.755 123.219 Td [(4)]TJ/F54 7.9701 Tf 3.487 -2.893 Td [(The)-250(string)-250(is)-250(case-insensitive)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F54 9.9626 Tf 150.037 -29.888 Td [(151)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1872 0 obj -<< -/Length 7572 ->> -stream + [-525(offset\050:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(integer)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(10.2)-1000(build)-250(\227)-250(Builds)-250(a)-250(preconditioner)]TJ + [(\050psb_ipk_\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F59 9.9626 Tf 0 -20.364 Td [(call)-525(prec%build\050a,)-525(desc_a,)-525(info[,amold,vmold,imold]\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -24.086 Td [(T)90(ype:)]TJ + [-525(nblocks,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ + [-525(nzeros)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -22.815 Td [(On)-250(Entry)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(integer)]TJ 0 g 0 G + [(\050psb_ipk_\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -22.816 Td [(a)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(system)-250(sparse)-250(matrix.)-310(Scope:)]TJ/F51 9.9626 Tf 146.229 0 Td [(local)]TJ/F54 9.9626 Tf -131.285 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(,)-250(tar)18(get.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(sparse)-250(matrix)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 190.872 0 Td [(psb)]TJ -ET -q -1 0 0 1 382.802 580.382 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 385.94 580.183 Td [(Tspmat)]TJ -ET -q -1 0 0 1 417.95 580.382 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 421.088 580.183 Td [(type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ + [-525(hack)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -291.305 -22.815 Td [(prec)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F54 9.9626 Tf 24.348 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-190(as:)-280(an)-190(alr)18(eady)-190(initialized)-190(pr)18(econdtioner)-190(data)-190(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 277.288 0 Td [(psb)]TJ -ET -q -1 0 0 1 469.217 509.746 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 472.355 509.547 Td [(prec)]TJ -ET -q -1 0 0 1 493.904 509.746 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 497.043 509.547 Td [(type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(64)]TJ 0 g 0 G -/F51 9.9626 Tf -346.338 -34.771 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 474.975 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 474.776 Td [(a)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(pr)18(oblem)-250(communication)-250(descriptor)74(.)-310(Scope:)]TJ/F51 9.9626 Tf 208.625 0 Td [(local)]TJ/F54 9.9626 Tf -217.183 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(,)-250(tar)18(get.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(communication)-250(descriptor)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 247.649 0 Td [(psb)]TJ -ET -q -1 0 0 1 439.579 439.11 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 442.717 438.911 Td [(desc)]TJ -ET -q -1 0 0 1 464.266 439.11 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 467.404 438.911 Td [(type)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(integer)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ + [(\050psb_long_int_k_\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -337.62 -22.816 Td [(amold)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F54 9.9626 Tf 33.763 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(matrix)-250(storage.)]TJ -8.857 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(a)-250(class)-250(derived)-250(fr)18(om)]TJ/F59 9.9626 Tf 201.393 0 Td [(psb)]TJ -ET -q -1 0 0 1 393.323 368.474 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 396.461 368.275 Td [(T)]TJ -ET -q -1 0 0 1 402.319 368.474 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 405.457 368.275 Td [(base)]TJ -ET -q -1 0 0 1 427.006 368.474 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 430.144 368.275 Td [(sparse)]TJ -ET -q -1 0 0 1 462.154 368.474 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 465.292 368.275 Td [(mat)]TJ/F54 9.9626 Tf 15.691 0 Td [(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -330.278 -22.816 Td [(vmold)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(dim)]TJ 0 g 0 G -/F54 9.9626 Tf 34.321 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(vector)-250(storage.)]TJ -9.415 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(a)-250(class)-250(derived)-250(fr)18(om)]TJ/F59 9.9626 Tf 201.393 0 Td [(psb)]TJ -ET -q -1 0 0 1 393.323 297.838 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 396.461 297.638 Td [(T)]TJ -ET -q -1 0 0 1 402.319 297.838 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 405.457 297.638 Td [(base)]TJ -ET -q -1 0 0 1 427.006 297.838 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 430.144 297.638 Td [(vect)]TJ -ET -q -1 0 0 1 451.693 297.838 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 454.832 297.638 Td [(type)]TJ/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -325.048 -22.815 Td [(imold)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(0)]TJ +0 g 0 G +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F54 9.9626 Tf 32.099 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(integer)-250(vector)-250(storage.)]TJ -7.193 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-190(as:)-280(an)-190(object)-190(of)-190(a)-190(class)-190(derived)-190(fr)18(om)-190(\050integer\051)]TJ/F59 9.9626 Tf 235.804 0 Td [(psb)]TJ -ET -q -1 0 0 1 427.733 227.202 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 430.872 227.002 Td [(T)]TJ -ET -q -1 0 0 1 436.73 227.202 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 439.868 227.002 Td [(base)]TJ -ET -q -1 0 0 1 461.417 227.202 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 464.555 227.002 Td [(vect)]TJ -ET -q -1 0 0 1 486.104 227.202 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 489.242 227.002 Td [(type)]TJ/F54 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -359.459 -24.085 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 142.565 -36.164 Td [(168)]TJ 0 g 0 G +ET + +endstream +endobj +2136 0 obj +<< +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (../figures/hdia.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 2143 0 R +/BBox [0 0 556 211] +/Resources << +/ProcSet [ /PDF /ImageC /Text ] +/ExtGState << +/R7 2144 0 R +>>/XObject << +/R8 2145 0 R +>>/Font << /R9 2146 0 R/R11 2147 0 R>> +>> +/Length 3027 +/Filter /FlateDecode +>> +stream +xœÝZK9Ý¿¢–E×øýX˜ 4IKE¬&êeXÌßç\û>\î$HhFBQé{¿ãû´OÙ®úx¸ÓŽþñÿÏ·oÞÔãý¿oÞ?ßÜñýÍ÷~ºpøÖÛéòñ¬ +߆âéöö–ý†Å‚ñîÌ6c‰á¬õð.1X4!“ÂtìyG±fE¥P7”h.¨Üvk.¨öÕ^¢Ø†Í•ýŽbÍŠªg«(ˆO¥Npög£³¢xÈnb}mˆ+(xw‰âJ~±âª;¨¾E·DqŸX±‚êYú¬DÓ¦ûÈsM4©GHµï(Ñ,¨àÆRÍŠJqG‰fEÕ¨ú… žÚ˜à¡œ™ÐaLùgÓø44<Áw”hT +mC©fEå¾£D³¢Ú T{‰ÂÂõsVú™a+šs»hZP9í(ÖŒúá¯4mŽEÐ[?1WÆdè ÛºRÓ†ÍJ'Ó*¯aæRVÅÀ2FƒÌ£`BØ0¬XóˆY+!•ì±¯šÑᬕÈŠ5Tp»-Öê#¨¸œÁ·šœ‹;°²ÝñÓß?ÿ„ )"E:Ù&_4Ó±ì(Ѭ¨v”hõJ‰‡Wy×)îÔñAÜX²9ö#?Ï!û¿™œ±þ›?o¦I'ÖÖ¡Ð +ˆ hHz>iÊ9og¤U˜(”?a†EÀ³u¶ßaÈFg0Pð«ùŒõ]½ùÏ>[pSÐ؇(©É@I]LsqF(ÓwépF­ºOM‹å‘ëLN’»X¸ÖÿQ‚§sé>- é@¤–´D–ÕE“Ý;~·†€bšsMÒ(»ÖÄgCyj¯KC*’K®hGj…I’9-‘­' bF-pé\\ ‚*¯’°$0DÉjþlZ*´tÅ…3DkŠ‹´ôµ'S\Z¿KGxôµþÚlnböeiHèg-=kðtCTÆJó¾®+¤Ÿ)øn+¤ Žæu ÏÖýºBi yš´œÔ…&­ˆY± sÙ|èlç ÆRIX¢äÇ56-²†àéÚxvŒàbA —ù$²õD\µpmÂãeuXXĨƒ¹r0ñiŠÂ¶Olâ‡aa00)CŸ¦HÓœ{º­ϱ<[ÐÖ±ãë;÷ØjlK{ˆ®÷¹oÇïþ²%º eá[hPצ¤9c()©¬œešIijùN}(#†°KKB™QÖ&„ +±ÑžG 7DÔÙgû=fZ:š·‚‹yl$oApYÔOíŇL~‰‚‡F9å5Öh¦Ó‚Ubú0º”(„O%Já[ÉÃY2]£fáÚIÉЮ”b^)yˆÆÁü%JFšqGê5 …žÎÈ4C`ôc^¹Çs.­ ¨ÝDbš¦kAGã’ÐE5þìiàË\+ˆ ÁA£Á,Ä4zŠãИ’S],¹q +àÕÀ–ÅèÚWšÙ‡ñZ1m¹©†CË8?¬¹e—¶Ühc·æÖó57‘-7ÖhhlABg–›Ä ŽQ ìYP×¾ÖÌÆZkaðfÃÞ%÷1#YSñüõtÃIoÃ`·FzT#Ô:ž©""ÒÒw+ ÄÁíj€eqA#Xƒ#͸g =з‰ 1€cP€D-¶,ÆZûJ3k-Í^Zn¢‘Ðrœ³ABÏqLË-‡9Ÿæ|S,›cÕphbABg–›Ä ‰Z lYŒµö•fF]æf^˜ò¨gÚ8ų7ÕÄŠýlT3"ª± Ú¿þV)Z?·¾ãf̺¡1Ö°X]]ƒùLÈ¿†ÙYáKjÏ¢ÖKedjˆ›<Ÿ®ð/o•¢‡‹í—çÛ>úéÆÅPó\+u¿Uóq™¿¨Ùñ¼(q|KàcÉtõ¼h|¤;ÓDŸ8TÜè3¦ÆÃYË«HŒÇŠ”Û+½’¬Q‹‰n¨û±ûìþǨÙ¡k£1*R$¢™®`˜jš“†Òè •f‘ª,vU#žÅûØ}JoÒ|íYÖ„5%sMè%Çxé?k‚SW-"RMXQ»Ö„G°Æ|d®ÉÕ§Ô„ì5kšéŠjÒãéÕwjt·n²¨VUÁne4;Ørô!”6t„A· ¡Y!éeB±:³h ¬yâJà×:ùD#†R[Dï°a§˜Ì–Isµ<H«eÕðd¡ èm8Ï% P§õ§#Þ´Ï·màxG¼ÎZË‘ýn9s…#Àƒjõ§á‚N5‚ÌcG,r©™®Çum”Ò|ûi¬½–»8¾PÃôE!øæ*]Šâ ÛæGR,_#ù¬ÝÍ?fCésöâÄñßâÝ´Ï·}$½”Ç®`Ly®…ÈR+Q—æ#­ÍK³ÙzT&zµ)ŠH_x6£=“¥O±ôù¢Og„ixLEð8•ˆÉ«KŽc³2žÙOO¦áU ¡òjSÃJ[êYâE,l^yÖÑgeƒ”´Ø¢þð™‰o² }DêBtôú§ëÏÎ'ºÖÑ"ý¨†ù‡Ç +Š}–é惄†Ì¶fbôBÍ™þïñùøÍÃí›7ýéxøñ6¿qžßŸ`Ú dœöPÀ‡çÛ{õð›Ïø%¥ƒ^w¬ã_îÎ…)ôzÜãÉŽ-îú ƒʪ#Oú! smèéRßa÷ý»;ÿê¾aÃì\º«¯þúð‡Û=–ŒsØﮆßÝݦ@ïwñÕ}Íî.ã?„NC ¬Ô,PŒ˜{¤ …áÛÔ…öJŸ«÷ôº8¬å¹×ô}¯ ÏHzw÷û׿ýãñ§ï¾{‹ÀU­áîÛ‡}±£ÇÀ„nbURA4Ûƒãǹ@7ú#Ãׯ"íW}¼{{¼~ófZÍÍã‡êðg)wæG?Üþ4º‘ +endstream +endobj +2145 0 obj +<< +/Subtype /Image +/ColorSpace /DeviceGray +/Width 2362 +/Height 946 +/BitsPerComponent 1 +/Interpolate true +/Filter /CCITTFaxDecode +/DecodeParms << +/K -1 +/Columns 2362 +>> +/Length 150 +>> +stream +ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ 0l?ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿü@ +endstream +endobj +2149 0 obj +<< +/Filter /FlateDecode +/Length 177 +>> +stream +xœ]1à EwNÁ €0´C”%]24ªÚ^€€‰ˆ¡·/8I‡¶ülë›õÃmð.SöHA¿ Së¼I°†-i ÌÎÑPãt>³^T$¬¿«øþD eìΣZ€=Å;b×è``JCR~ÒrÞµÖv¼ùÉ]0Ùc³1ç%“V¡KUP¼ J¼vêêáêð4Dõ–øŒo ÍjÏyø}C¬*Z‚|ïY› +endstream +endobj +2151 0 obj +<< +/Filter /FlateDecode +/Length 213 +>> +stream +xœ]1Â0 E÷œ"7hZ(¢Rä.¦ÊÐ4 +eàöØ.00<«/µ¥oW»Ãþ⬫s™ügb +>¦gñ¨{¼Ç¤êFÑÏ“êG—Uµ;º|{eÔÔ€añ“±ºÔ¼ÔËŒŸ|d籸tGe(LÃ߯nè糡NƪʮkŒ¡JºtÅÚ’nD[Ö-¤[ÖÒž5€@JIlÛ€m׬ôEÚ@ÚIÎo"ŽÌ»WÕþY +¦Y$àÅcÂß ó”yJê Ûál« +endstream +endobj +2152 0 obj +<< +/Filter /FlateDecode +/Length1 10604 +/Length 7344 +>> +stream +xœÍzyxTU²xÕ9÷ÞîNÒésÓÙ:!!„%±%7+K !áÓIXÂf!( LÂ(¢…qpâ‚ð‡NG° >‰>ynê¸>G¢âˆÎd†qw„ô¯ÎM@ù}ó½ïÍ÷ýþøÝCUSUg«S§Î9À =À¡¶¦.7ô/ãB [×4wŒ•Ó3°¶uÃzû—sÞÛJŒ÷ Ï/ëX¾fj×\c€Ü¹|õÆecúITÏôhÛÒæ%/]©È, æ”6bDd†<IåÔ¶5ëoïï+BöÕí­Íãý yäšæ;”ÕÖä?Mȯo^³t\_ÔS;Ú;ו3…¾½cÝҎח¹¨­ÐzK¬|ªl‰|$>$8Kpntvà‚¼ +ÔÑ•aAµŸ'Ü û ÎãDx†`6<%P {`œ‚# +ñe@…r8N´ƒJˆAîƒwáZXŸÀ0d@|€áÔNt@4>#\·Ž“V”Áo`WcäR~&ËFõ¼+01x5ð•„O05Ð3)÷'ƒtè†_B8¬„—„¡àfü àRÔXWÁQx«(76ÊÂjªõÆàPàLàSxFBXJ-ýn¥û`ˆMàerØ! ®†¹ÐLÒŸÁ»¹H”î#îø‚¹Øo¹Æá‚YзÃCd·à,|Á8ÄC”^ÿÊïÐت  6‘_=HÖ;‡á8Nĉ,†Åµb ®!Ù.ØOýÀi¬ÂÂgù~9o´8ˆ +|@ÔÓ÷Á³ÔÇW˜G:ÔOáë¥di½œq+Íp <§á5Çd÷¯á;Ì¢ô!û9ë, + |Bc1‚ ¦Áp'Ù6 Jiì5¤]Gmï UòŸÒ[4Ë0´Ó,¦á\œËqÞ~|ße +s°µìsîå/ó÷¥)²(¢–¢!™úUa´Ñ +üœ¬}'Í÷ ¼/b¦aÍè-ªÿ »Š•Sz„bðm|—tA¾etxôÏ£?zÁ@^6ƒìГþ†Ñ4†L\‰ø1|7{’‡r+Wùd^Âð~+ßÃÿ‹ÿ^Z'’Þ“gÉÍò!Cóèõ£¯ª7“-W:dCL%ÿYFÞ´ŠÆ×Ail†­Ð w¿Ü }pˆæ}^„7áðZ@yõ¾†¼nÞAé><ŒÏâ ø"~ˆßˆÄR(e°)¬˜•±J¶œm£´‡fo±s<‘·ònÞCi/?Æß•@’¤€œOi¦¼C> ¼lÈ0Ì4´_¹0r1ëbÃÅFa4~ôßFï}vôÓÀÂÀF¿r`t;ò>òÁý”'O<¿…Wàm}¬_ C™<>Uò†lZµbœ³(ÍÁy”®¡´SjÆl£Ô=ø ¼ oÆÛñ.=ÝKsÛÿŽÇ(=…ƒ”ÞÄ3ø'ü¿`äÄŒ“7;Y:Ëe…4Ó26ƒÕ°ù”–³vJlÛ@+t€ °ãì-Á<‡7óµü>þþÿÿ^bR¶”+¹¥…Òré&é”ôšôŽôƒl“+ä6y¯üœ’ (×(+•{•#Ê9å‚A1ÔZ › 0ŒNŠV¿£y…Ÿ~¹Ê)ì”#¥ÙÚ±¼CÞŽ×Ŷ€¯æwð×åexžÛñ=ìå+øªÀ#¼’}ÇÛq!;‰)Ü&ñe°xˆ}ȾbŸJQ¸€}†Ò/ñ)Ö΢":‘ߢ¤›äsìm(b[pˆ½Àoâ7þŠä½xFÞË^»4Ì"à íêíìªô{¶‚í€z©@þVÝÿ]¾‘ì=ÝŠYüÒ^ø„«ìK‘¢Äë° 9æ‘ï\úFázÚ{X:Å´ +Š&o`>ÄÂ=ï¿}ZDlùyùÙC<æC4²—¡ˆöÆ'”êáȇAòÁ[!Ý ›=¸„âþŠŸ ü¸r1˜¢e ­›Î‹h–B±°‰zýŽâÿKõ«ð¯pÚig A†$$;¥ +ŠLŠ¿;(-F*=w*Gå7 c$ûè^òò÷á::s>¦þãÁMã[ IÙ4j;EæµTãÑ™ Qº^F[hÌÓiŸ×J3)òÞXI3\AgT5‰/ŠÀ=PFk7?pS`4 +\ Ë¡.pâ¦Àv¹-”]RÅØñy:þwPÜž ïQí²l<‡c2¸Wr’¥f©äzó× ý“•jÅ +ÏLÚj4FoDY=O` c9–Àõ¦È¯½Ü²(Ô‡ˆ¶$§¢ûÿ¿ÁH¬sÐ^éµzfŽá† ‡ãYÉ8/jéäÇjãsò¹®,_uEùŠá…ôr°”ƪ,îí ºBVIÁª··RµWözz›ýžÕnU{óz^ßÛQá¹´üþÀàŽoåΚDåˆ;"£°È +?¬o-Ò맟Aç˜à :}ÚA¦óÎJ¼j‰¥·$;âTTäk~¢® :õedæë_|bþðÙaºˆÚˆqÆ K>ð•–Žg¦LË dåäŸ) âÀßÿ€Ÿ¡CE¯51!ÿ|‰™È:ãmÐÇÿ^o 5-ßIþ +É_â/Òy*ª½è3‡åSƒ¿ãOÑ-ÝF÷У㒣¡aùPÒÉo§é >M0Lpž@‚v~º v!ÀBØFKP#8ü?DãÜOõ-„s Ú vH°€?NüUóƒ|%Œ6¾“¦QDwð_éôQ¢ñD&>=øCTtßxù~¢Bþëqþ}TŽ&zï8½‡ø DïÖ¼6~×xyïÒë­§}¼Ó—l³–$“ÜNGÀ)·‡r{Èt{¨„‘.v«õžú‰æ]3FÉ\[|U_£-1qù}dÒ-dú-d¹-d¹- ‘hó%Íc:9|3él&ͤ³™¬’Ç;©¿NZ0 l%°p²{'Ù]𽄇Nëü› ï&è%~Ù1“Fu_éË°‘“-(Ôò‹OÐ=©ÙeqIù»~,™‚„# §¡»T—.0…îÒø¤1JZ«JBy+üŒ€A$áT‚‚r‰·úRsmƒ|.¬1‚jëfݼ[ê–¥¼r ?Éó¡Öä’á<ܤikrãT©ÃÔcâV“Ý”gÒLµ&¹žD»8·ñ\^Ìkx—ý!Ÿ¡hm†R4iwp_°7x(øt°ìU†”ÓÊ°r^‘íJž¢)µŠGéPz”ÝJŸbÚ­ì60OpGpO0·Ûƒó‚µàÚ`ÙfÀ¾’m¼ElZÂV‚‚ÝÙ¸‰øv~A­F™â:âa ’•à4凉ÊT²ž…ô,ĵ×B\ ,$µ‚Žq©rYr©ŽÐ?/$é$ %n(Ùv˜ðy‘#˜M%3•ÌT2“ÖivFh%l'¨%à:o˜€¼†ð%YÞ¸ÜC èòóºÎ%™&ê² ZsúP&z3±/wg¢æ..É×R…‡‡7©MΦŒ¦ýR»ÚîlÏhß/Õ¨5ΚŒšýR±Zì,Î(Þ/媹ÎÜŒÜý’Mµ9m¶ýÒ®ê#Õ'«OUKMÕíÕÝÕ|*-݀ϕ—¯Ó§ G}qñùS-%W³#4&ÂûÎp°¶ä´ÈìˆÎ}‚¸O÷ ¨!h"©Ö"Ķ˟.9!gWÈ9Mþ°¯hRMI5…Ý&‚}œÚ>LòúöXîˆÎ÷Öù5ãú}:_hÙ.ÕAp±îÓ6\ ÅM2œâ‹à µNØFÐAp„@â‹)-â‹Ø”³Ã<[3OŒ²At4,áaFk‰•…/˜ñ ŽïÕñm:.Öqª:ÛüÍló3³Í·Ì6§S†eÐdÆ=:vhÁ%æ'KÌ5%æÌ3µ0³(+ãŸuèpê4-úàÍ¢S£OÝJ­(O:ÏÚ¾uŸ‡¯ÑâÛkûøišßB*~„³|‡l¯æòÙNeûÑyÌö{õ„í…T?.ôÙ†²ýFœÌö3wŽí~u±í>'•}¶­ÙO‹aÀšñB7dO·U»Ù*~$±æ¦Î´ [‘ºÎVHìi~œ5pÈ61Õ/†’Gm:fË¢ÓT}(×Ld“Á€]Z¶a½¡Å°Ð0Ïp•a’!Ç`7$ ‘Æp£Õj 1FÅ(™Œ‘þÀ°æ÷¶HŪÿÄ' ,éy+˜é?ýC#£½ãàU¬ª®½áUPµ Ô;ÕUå7æ{§¹ª¼ÆÚ«ïG¼£J^v«aA=9¨`mK»ã€˜»íöA7o»½¡«¼C­PÕb÷~SGó¢Kª¬–ÆBô†âØâðéa…•åÿyƱëÇ/ÖõÓ/6É{wU]½÷ñ¤o¾È’ª¼3ijð8[ËÚ+ʳAêã&¶¶b¾àã¦ò†ËjÂ:H Ü‚µHj‚ºZµ®FnšRQÞŸ’2¦ôÎJä>ÏéJËÇÚJ¥.¨­ZAH%CªÞV*KjäcY~ÚX EoÌzc‰B©ßé$•l§PéŸê$…~çT]|èG±êN8õ~œØ ÷ƒø£NƘyÁ¸3’Žëÿå·´ô_PÆæ÷—´ŠÇ¹G­XJàñîØÐëíi±Ûû—¼?þjOó´´¶ Ú¼Ôû¾º´Ü»D-·÷7·þq«7«åýÐZ± ¾¿U[ZîkÖš+Ôæò†ÇºËª®èë¶Ë}•uÿ“ƺEce¢¯Çªþ‰¸Jˆ}U‰¾ªD_ié}UÍ/ŪÚú~#”6ÐëN§,8ˆöƒ'ÁÑPm혮oŽ«±?O”€Ž­`z‡¨¥^3å”ä”íN! +?¿Œ‹b~•#aŽ‹¬ÄSKÁ±+Ê/ÿëìì\/ «ËEx}W¬Î[O›ÖQWå­E·×]áÕ<å (–£kü+«×¬'ݧܬÝÝíÞåÞç>â–»ºˆ~2åT +kJiOéNÙ•²/åHŠ"×ÖÓÜûRþ–»țp=}åzŸ]DéŸ(®ïêPcݹº\eõ%)ÐJ·^¤zD¨“êdøOÂo|Lð%7þÁ#‚Ãsx=ÝËE .tbyþ@Þäüi~¢ÍËÆhÝâ1Z1wŒºKòc‰úŠ'•XèŽ0Hø%‚÷>'øÌóy¾Þxט×6tB§ iø@…õuºÖ£‹2(̽¾ÓåÂÁiHÕ…Wú=`g)hAˆ’ÎíÕº½¬ÇÄÃYÖïÌôŠžÓÏð{†.«vÒ²ägÏ<É!È 2G⌊|’ä 8f‚ Wáuë²~ã¾èžkýÊ=碊)o½@hbž#Ìæ$D¡.ØùÐM†À. Q—öçøSr½ÔSaÐ×l´Ó©é“å(AÌæx?Z´pS<¤iiLKó¤õ¥ §Iia‚ÚDünØ}4î8ç &ÓQë¢4Ιkm\ûÍœ()™˜W¶Q«ÆT55%•) 92ÅàLLHJHNàJDšÅœÇ‡Ö6%¾#C)B¹T´·`‚‘P¸5ªâ‚é1E ,²²¶F„O2)?&:,’)jJzÚTkLô¤ü)S§„¤§¥§©)…ÍÞ¹~±çÍ÷ßúFËs[×<_Q¸vÊúä y©…™Eå“g°½ç°f~ɾFüeôØ]Ÿ<ûíè¹þ»š×ÆÂs÷wæ9®®}@·™–é8Ù,ˆnqokÑf +îfÍÂ5 f…`”&ˆÜ$+(…›A +1KJˆÙìÇD-Ü`Œ4ŒF.”#ØÌh>ÐJã>Í,£b2*ŠQ–BB¤tþp0â2-Ød²pÜÇpÆýø­KW]¤»=–>Ë°…[Í€†¸ÐA¼ãÒ¬u‹htÏùª‘²² +Ÿ(.̵º)X/®s‡†bXxáö .i‹õy‘µX,ó`]#6®]‡Qj˜昌“ˆ ?~lÿÅçX×õûGSñ«;FËzø/.ìd]lÒÿ°çÉ& +Ù$îÑ"µXOl_ìp¬±Z,Û· -‰Àtã5a¤ßŠ¼‘ò*UþŽf²¢‰ø…Fw9 31”MÆÆa¿%õYZxh¨E ›œgé¶ì¦YK–¸˜A–ŠgÇ]Îåžc9kž_즹`X!|=r¿v¹hN¸¶1Â9),2::&Ê1y:›,ÜBxÅyœíˆp_;Ê<Ó¢ƒ Îxg©ô»‡~ؾnZ2s:YÒÄMìý=Yöd›˜cuà,ß/WC0‰pD¡ŠÚ†ôß³·GÝx×莵y &%ÉÕiÿxFz!a‚'xlë°‡ÕÓ£ˆC±fgrOÒ’)Ý2¢þƒ fÅZôànìÃÓ¨  ŽB´`±ËÅFò-È!L}7Š^uL¾ø‹¹g¼mŠ5 ÉGò±ú8†}!…&ñk†;¤°ÄTT\•"2afæ´L­ÀSpª`¸àÛ `‰©[Ý4áñÔ㩃^œpF=ãüï Ÿ§|æ ™eÌôãÎŒ +øÙÙÓy˜ççG¹lÆh?î;š¤¹r ’èY1`5gfœÀ6ˆûX ®µ÷ØÙn{ŸÙý|Ò€7Cü¸›ø9=9lwN_Ë!þÑ&C·üì-H+À¾‚¡V@k4ý)-âd‹ˆ›$ÂÞ¹KaמּçF×~%ÐYŠÅ]#ëŠGG sÇ"á” ¹ÉiAIIq¨ŽT‡Ó!)²34--ˆB\®”Ó‚ÉÊ9‚Ó[0È4AÉkA›9IÄ<«{ü|ÉÚJŸîÓë`--ï=òEE¢R)i“ tV´'ë0MU…ߧo+ê¿ù‘E¥ƒ[z:îýóm­¹Ž¸ø°cœYËîQãm®»çÚköÍÜ깿Mš}Û]+kïÙ;ñØϼ[–§'eåb%xïꚪiI%ÉA×Ý\³¼û±±õm¡õÝHûÃAþ\ö¨„á É+’»ån¥;i§t{’a2›ì¸†_c_äX•¸AÞ˜¸õÆ÷&>šúÔaÕBo;‹5,<"*:Æifœ‹€fwDÚ¹dwÄ'$rC¬$w߀ÝîˆÄïéôÐ(²âGÀ>r8hû âtzXÏ8Úcèë†_Óº©¨©•©äß³²>:D#šÉ®Yû¬Ì—2ˆwágú"žm¤0bm1R_ʳ´±(O§˜¾€UÂÂc +·'¸d +š +b£5B#]Ž´àUR{ø’ä¹#InlÀF48 ’XE1Щ4¶,—…Ö$ùƹ£m hºÛ¢›çunÜÔ>AOÏ­šÓÕ¿wÇš§Q’«?–¾÷Vÿªc=éSëò]VGA÷ÏÞ,Ê10‹8‹êÉæý´§b!F´¬.Ó† Baz×ù™SQ8nᛤMÑÛb$·1C‘¹—§p{“´'ŽÙÓ0-ÍBÛøöXÅÑ?`1Óí5±Z8…®,-‹iYž¬¾¬á,)+n̾$‚k„="/B‹ØÑaˆˆËüñp¡qÎųã7} Pp"ë5Ž¬#sá˜Íh#d':MáI‰É‰L sšÓœ&•\ݚЎPÊ¥¥µ`b¸½RBÁ¥#_x¿îûÊ ÂžÂçÅ‘Vž:e*Q‘—M¬¦(üî›<²*u÷/w¼²|ó+;šŸ¹-ß­ºøJøŒÊI³Ývë–´Er›Ó\óðïnkö>¾óñk0éέ¿X¾½Îóaiî£÷ú‡]x¸Þ¢à÷:+gëx‹”(½®ßÆþ +)~§MO À¿öѵ9(Ì`”þÅšÿÿ~Lѱ$ìs~j @8G`ñèsÖóÂj +YÑF.þÎ X°bÍÒιKo˜ß¾¦ùúÚº9 ô,p÷xýÿÍg¼²xή`ŒÿK)ÄDÀl;Áy‚jö8Ô­ƒ¨[ßÿáí°×þÒï=2ØdqmŒëøá“žôèÕðÃú‹;­EÆP%ï÷ÿ=X +endstream +endobj +2153 0 obj +<< +/Filter /FlateDecode +/Length1 5080 +/Length 3303 +>> +stream +xœÍW}pÇu{¾ûÀá@‚îp)!H€`HŠ¦(J´TY–,pdÙ$EËr¤„uÕöäÃi’Zƒ6¶•hœŽëIâL:I󇻢S[IGi:´V“ŽËq;©;ã~ØhR'ÓvEöí¤d7MÿëanoßÛ·ï÷~ûvBððpø¶£E œGþw,Ž/Ÿ[\íÈÒòüòù‡ô€ü$*~ŠïS÷­ž>i¿mg¸•Óg¹¯cï~ péþ•ÅS¼ÿ‹O(_Eeù~Tx–ùÿ +Êý÷Ÿ{èᎽr‹Ñ³Y^ìÎÄ¢ïÜâëdM¼Šöÿ…²þáÅs+Ýõ½ŠEbõ#}¨Ûÿ1Ö¾úàÊjc±÷€+Žkº›µ+ÜÓHáoe˜„¸ÏÑ‚cø;Òiø0|ª Ap;FA‡Ýp Á‡à ,Á)@…¸"ÌA?œ ¼†»`<¿ á·`>Ÿ„£ðˆìÇá7á³?…<å>\£·ž¯S0wÇ©«MÕÝ'êú+”(#ñ%yýïh0W \þÀ‘Ú^³n(Ÿ?×éÌášAgêêʳ®†iag¦©õ¸a$ôõ€p5Ǻk3³HÞÔ×»“›zíÀÑ„AI½Ö@‡̆©7æ"ëÐéÂ>*²0ȸn‰9À*òûh°¹øÀ½7zº*yt¢ñ8ƒíÖSfÃCõõÉÄKØͯÁ ™™%^aœ’«±òHÍ\ÂÕ›³ üs‘Ÿ9Rk"÷,Ï6‰NðCõeÚ³Ò·5—š§¨E\°(8¤ÇóÙÍq8%¼úîÅkªðÓ¼ç 9šùg˜ö#Ã9”9ˆ±®ÞßÅV÷ dós”|PïášMÈu{žE”ŠHÖ謄ZÅb´\²b%«RÒÌ—SJZ˜ ‹Í—žœYõ>Õ§U¾zîÓ…ƒ·œ?#$`Ñjöe‘úXÈYÍl“²JCÛ·`eN½;Mé0%B´aô±¦ÝH¯"ñü +:%‰³g3¤A„ÑÒ®ï¶ç¬Á¿âeÄïs/TèÕ'6þåi}‚ôM¥íCŠæ>ƒ\êÙõw2#H8ÞíqÉ÷ŽŸQ¿]I»°Íœ&ö˜ÁjéÊËÏíOGBrÅÓÆxÀ€Yh‹hã(mÁ‡¬Ê)Û c‰û)ˆAóÅÛ¶‰ß`¡Š““lËðÌG·“RJNzQFGx3æ;¡šæÈ©Ç/¿ðäSüâ¾¾ÜD%¹ëP!ˆæª¢áï=ó¥+ßöKß%Ç÷¯ÎåÅáÝÇo*--MN.-mÇ+‹ñê‡ç¡`Y9è„…ú¬fe'|m§ÒR:áJDY,š/×J»QÜ%iW»éN3­›ørÍ´›UÓ¦/Gh¶H#부1MYM!Â?ÚDV0 ŠEÏÝ51!u ¢­!½c"FJ°x»ƒHØPÕVØ.”Ù.4GHŽ¼?Æš£ÍIR²Ê•y]ó혺hLl¼YMéNá{Båc¢Á‹“C>í)óÇSýü 1|rÏ@H}ÍœìÜ-R›¿ ÿ€1OCs€qY/Ùy b´hûpw†"s$*1g‹E*¬c¾´5¹Ýáƒè‰&Ú€AM†ÛöNüj¸z³JAú#ÞíÕóìÀIÊkAQb›ì|× *5…ävùddîbÐÇnØ«#¤sð é;'Šs°”bŽ“GgOÞ={z~xÉß+¨±žøòäÈÄÜm˵óÇ­ÓÁ€Hb)-»´§2÷xqa´r¼zðžLæojwŒíØŸ;=ýkg2Jø•ßÝåG ù164}ŒÅ˜œJ+Ð!„ÂXÒ‹JVÙJ¦½ +Ã¥7†,I ùdI +Y2”bÚ¡,2 5Ī© n¸HÃë6 K’V3ÌÎÿ™0²„ÊcB©‘±Zýjô‹v ©¡v¨‘cÔàÈI¶Ç[­Úr/ÊbV‰Æ>@ <ÿ;CTGüKcâC'²}…kÿTÑ·yòx4ôÌLÖ§iþég'Ã¥Ûÿ@qcä;ש28þætfk݆8iøo¨©0x¢}$!ñB ´šÄa>q£{,¥Ç‹Ô½Þr‰pšºÚÛa> +stream 0 g 0 G - 0 -22.816 Td [(prec)]TJ 0 g 0 G -/F54 9.9626 Tf 24.348 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econdtioner)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 193.612 0 Td [(psb)]TJ -ET -q -1 0 0 1 385.542 132.48 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 388.68 132.281 Td [(prec)]TJ -ET +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG q -1 0 0 1 410.229 132.48 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 99.895 662.293 cm +0 0 343.711 49.813 re f Q -BT -/F59 9.9626 Tf 413.367 132.281 Td [(type)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf -98.279 -41.843 Td [(152)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 112.299 690.488 Td [(contains)]TJ 0 g 0 G -ET - -endstream -endobj -1876 0 obj -<< -/Length 1021 ->> -stream +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + 4.707 -10.959 Td [(....)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -4.707 -10.959 Td [(end)-525(type)]TJ 0 g 0 G -BT -/F51 9.9626 Tf 99.895 706.129 Td [(info)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ -24.907 -21.918 Td [(The)]TJ/F59 9.9626 Tf 20.388 0 Td [(amold)]TJ/F54 9.9626 Tf 26.152 0 Td [(,)]TJ/F59 9.9626 Tf 6.506 0 Td [(vmold)]TJ/F54 9.9626 Tf 29.862 0 Td [(and)]TJ/F59 9.9626 Tf 20.577 0 Td [(imold)]TJ/F54 9.9626 Tf 29.862 0 Td [(ar)18(guments)-372(may)-373(be)-372(employed)-373(to)-372(interface)-372(with)]TJ -133.347 -11.955 Td [(special)-250(devices,)-250(such)-250(as)-250(GPUs)-250(and)-250(other)-250(accelerators.)]TJ 0 g 0 G - 164.384 -533.997 Td [(153)]TJ +/F62 9.9626 Tf 151.98 -578.132 Td [(169)]TJ 0 g 0 G ET endstream endobj -1884 0 obj +2161 0 obj << -/Length 5673 +/Length 7014 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(10.3)-1000(apply)-250(\227)-250(Preconditioner)-250(application)-250(routine)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(12.4)-1000(CUDA-class)-250(extensions)]TJ/F62 9.9626 Tf 0 -18.964 Td [(For)-285(computing)-285(with)-285(CUDA)-285(we)-285(de\002ne)-285(a)-285(dual)-285(memorization)-285(strategy)-285(in)-285(which)]TJ 0 -11.955 Td [(each)-368(variable)-368(on)-368(the)-369(CPU)-368(\050\223host\224\051)-368(side)-368(has)-368(a)-368(GPU)-368(\050\223device\224\051)-368(side.)-665(When)-368(a)]TJ 0 -11.955 Td [(GPU-type)-355(variable)-355(is)-355(initialized,)-382(the)-355(data)-355(contained)-355(is)-356(\050usually\051)-355(the)-355(same)-355(on)]TJ 0 -11.955 Td [(both)-246(sides.)-309(Each)-246(operator)-245(invoked)-246(on)-246(the)-246(variable)-246(may)-246(change)-246(the)-246(data)-246(so)-246(that)]TJ 0 -11.956 Td [(only)-250(the)-250(host)-250(side)-250(or)-250(the)-250(device)-250(side)-250(ar)18(e)-250(up-to-date.)]TJ 14.944 -11.955 Td [(Keeping)-208(track)-209(of)-209(the)-208(updates)-209(to)-208(data)-209(in)-208(the)-209(variables)-208(is)-209(essential:)-289(we)-209(want)-208(to)]TJ -14.944 -11.955 Td [(perform)-220(most)-221(comp)1(utations)-221(on)-220(the)-220(GPU,)-221(but)-220(we)-220(cannot)-220(af)18(for)18(d)-221(the)-220(time)-220(needed)]TJ 0 -11.955 Td [(to)-327(move)-326(data)-327(between)-326(the)-327(host)-327(memory)-326(and)-327(the)-326(device)-327(memory)-327(because)-326(the)]TJ 0 -11.955 Td [(bandwidth)-351(of)-352(the)-351(inter)18(connection)-352(bus)-351(would)-351(become)-352(the)-351(main)-352(bottleneck)-351(of)]TJ 0 -11.955 Td [(the)-262(computation.)-347(Thus,)-265(each)-263(and)-262(every)-262(computational)-262(r)18(outine)-262(in)-263(the)-262(library)-262(is)]TJ 0 -11.956 Td [(built)-250(accor)18(ding)-250(to)-250(the)-250(following)-250(principles:)]TJ 0 g 0 G + 13.888 -18.472 Td [(\225)]TJ 0 g 0 G -/F59 9.9626 Tf 0 -18.964 Td [(call)-525(prec%apply\050x,y,desc_a,info,trans,work\051)]TJ 0 -11.955 Td [(call)-525(prec%apply\050x,desc_a,info,trans\051)]TJ + [-500(If)-254(the)-253(data)-254(type)-253(being)-254(handled)-254(is)-253(GPU-enabled,)-255(make)-253(sur)18(e)-254(that)-254(its)-253(device)]TJ 11.018 -11.955 Td [(copy)-351(is)-352(up)-351(to)-351(date,)-377(perform)-351(any)-351(arithmetic)-351(operation)-351(on)-352(the)-351(GPU,)-351(and)]TJ 0 -11.955 Td [(if)-314(the)-313(data)-314(has)-314(been)-313(alter)18(ed)-314(as)-313(a)-314(r)18(esult,)-330(mark)-313(the)-314(main-memory)-314(copy)-313(as)]TJ 0 -11.955 Td [(outdated.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ + -11.018 -19.199 Td [(\225)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ + [-500(The)-266(main-memory)-266(copy)-266(is)-266(never)-267(u)1(pdated)-267(unless)-266(this)-266(is)-266(r)18(equested)-266(by)-266(the)]TJ 11.018 -11.955 Td [(user)-250(either)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf 0 -19.199 Td [(explicitly)]TJ 0 g 0 G +/F62 9.9626 Tf 47.582 0 Td [(by)-250(invoking)-250(a)-250(synchr)18(onization)-250(method;)]TJ 0 g 0 G - 0 -19.925 Td [(prec)]TJ +/F59 9.9626 Tf -47.582 -15.213 Td [(implicitly)]TJ 0 g 0 G -/F54 9.9626 Tf 24.348 0 Td [(the)-250(pr)18(econditioner)74(.)-310(Scope:)]TJ/F51 9.9626 Tf 117.837 0 Td [(local)]TJ/F54 9.9626 Tf -117.279 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 196.511 0 Td [(psb)]TJ -ET -q -1 0 0 1 388.441 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 391.579 577.576 Td [(prec)]TJ +/F62 9.9626 Tf 49.793 0 Td [(by)-276(invoking)-276(a)-277(method)-276(that)-276(involves)-276(other)-277(data)-276(items)-276(that)-276(ar)18(e)]TJ -27.875 -11.955 Td [(not)-250(GPU-enabled,)-250(e.g.,)-250(by)-250(assignment)-250(ov)-250(a)-250(vector)-250(to)-250(a)-250(normal)-250(array)111(.)]TJ -46.824 -19.199 Td [(In)-264(this)-264(way)111(,)-268(data)-264(items)-264(ar)18(e)-265(put)-264(on)-264(the)-264(GPU)-264(memory)-265(\223on)-264(demand\224)-264(and)-264(r)18(emain)]TJ 0 -11.955 Td [(ther)18(e)-337(as)-337(long)-337(as)-337(\223normal\224)-337(computations)-337(ar)18(e)-337(carried)-337(out.)-571(As)-337(an)-336(example,)-359(the)]TJ 0 -11.955 Td [(following)-250(call)-250(to)-250(a)-250(matrix-vector)-250(pr)18(oduct)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 413.128 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 150.705 364.481 cm +0 0 343.711 16.936 re f Q -BT -/F59 9.9626 Tf 416.266 577.576 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F51 9.9626 Tf -286.483 -19.925 Td [(x)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(sour)18(ce)-250(vector)74(.)-310(Scope:)]TJ/F51 9.9626 Tf 111.142 0 Td [(local)]TJ/F54 9.9626 Tf -96.198 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 218.688 0 Td [(psb)]TJ -ET -q -1 0 0 1 410.618 521.985 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 413.756 521.785 Td [(T)]TJ -ET -q -1 0 0 1 419.614 521.985 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 422.752 521.785 Td [(vect)]TJ -ET -q -1 0 0 1 444.301 521.985 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F59 9.9626 Tf 447.439 521.785 Td [(type)]TJ +/F102 8.9664 Tf 172.523 370.757 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ + [-525(psb_spmm\050alpha,a,x,beta,y,desc_a,info\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F51 9.9626 Tf -317.656 -19.925 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 502.059 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 501.86 Td [(a)]TJ +/F62 9.9626 Tf -21.818 -23.482 Td [(will)-321(transpar)18(ently)-322(and)-321(automatically)-321(be)-322(performed)-321(on)-321(the)-322(GPU)-321(whenever)-321(all)]TJ 0 -11.955 Td [(thr)18(ee)-274(data)-274(inputs)]TJ/F67 9.9626 Tf 78.001 0 Td [(a)]TJ/F62 9.9626 Tf 5.23 0 Td [(,)]TJ/F67 9.9626 Tf 5.282 0 Td [(x)]TJ/F62 9.9626 Tf 7.961 0 Td [(and)]TJ/F67 9.9626 Tf 19.598 0 Td [(y)]TJ/F62 9.9626 Tf 7.962 0 Td [(ar)18(e)-274(GPU-enabled.)-382(If)-275(a)-274(pr)18(ogram)-274(makes)-274(many)-274(such)]TJ -124.034 -11.956 Td [(calls)-250(sequentially)111(,)-250(then)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(pr)18(oblem)-250(communication)-250(descriptor)74(.)-310(Scope:)]TJ/F51 9.9626 Tf 208.625 0 Td [(local)]TJ/F54 9.9626 Tf -217.183 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(communication)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 200.207 0 Td [(psb)]TJ -ET -q -1 0 0 1 392.137 466.194 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 395.275 465.994 Td [(desc)]TJ -ET -q -1 0 0 1 416.824 466.194 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 419.963 465.994 Td [(type)]TJ + 13.888 -18.472 Td [(\225)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ + [-500(The)-322(\002rst)-322(kernel)-322(invocation)-322(will)-322(\002nd)-322(the)-322(data)-322(in)-322(main)-322(memory)111(,)-340(and)-322(will)]TJ 11.019 -11.955 Td [(copy)-356(it)-357(to)-356(the)-357(GPU)-356(memory)111(,)-383(thus)-356(incurring)-357(a)-356(signi\002cant)-356(over)18(head;)-410(the)]TJ 0 -11.955 Td [(r)18(esult)-250(is)-250(however)]TJ/F60 9.9626 Tf 78.155 0 Td [(not)]TJ/F62 9.9626 Tf 15.771 0 Td [(copied)-250(back,)-250(and)-250(ther)18(efor)18(e:)]TJ 0 g 0 G -/F51 9.9626 Tf -290.179 -19.925 Td [(trans)]TJ + -104.945 -19.199 Td [(\225)]TJ 0 g 0 G -/F54 9.9626 Tf 27.666 0 Td [(Scope:)]TJ -2.76 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(character)74(.)]TJ + [-500(Subsequent)-378(kernel)-377(invocations)-378(involving)-377(the)-378(same)-377(vector)-378(will)-378(\002nd)-377(the)]TJ 11.018 -11.955 Td [(data)-250(on)-250(the)-250(GPU)-250(side)-250(so)-250(that)-250(they)-250(will)-250(r)8(un)-250(at)-250(full)-250(speed.)]TJ -24.906 -18.472 Td [(For)-285(al)1(l)-285(invocations)-285(af)1(ter)-285(the)-285(\002rst)-284(the)-285(only)-284(data)-285(that)-284(will)-285(have)-284(to)-285(be)-284(transferr)18(ed)]TJ 0 -11.955 Td [(to/fr)18(om)-298(the)-298(main)-298(memory)-297(will)-298(be)-298(the)-298(scalars)]TJ/F67 9.9626 Tf 204.122 0 Td [(alpha)]TJ/F62 9.9626 Tf 29.12 0 Td [(and)]TJ/F67 9.9626 Tf 19.834 0 Td [(beta)]TJ/F62 9.9626 Tf 20.921 0 Td [(,)-310(and)-298(the)-298(r)18(eturn)]TJ -273.997 -11.955 Td [(code)]TJ/F67 9.9626 Tf 23.213 0 Td [(info)]TJ/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.926 Td [(work)]TJ +/F59 9.9626 Tf -44.134 -20.101 Td [(V)111(ectors:)]TJ 0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(an)-250(optional)-250(work)-250(space)-250(Scope:)]TJ/F51 9.9626 Tf 136.476 0 Td [(local)]TJ/F54 9.9626 Tf -140.352 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(double)-250(pr)18(ecision)-250(array)111(.)]TJ +/F62 9.9626 Tf 40.677 0 Td [(The)-253(data)-253(type)]TJ/F67 9.9626 Tf 63.08 0 Td [(psb_T_vect_gpu)]TJ/F62 9.9626 Tf 75.745 0 Td [(pr)18(ovides)-253(a)-253(GPU-enabled)-253(extension)-253(of)]TJ -154.595 -11.955 Td [(the)-306(inner)-307(type)]TJ/F67 9.9626 Tf 65.726 0 Td [(psb_T_base_vect_type)]TJ/F62 9.9626 Tf 104.607 0 Td [(,)-321(and)-306(must)-307(be)-306(used)-306(together)-307(with)]TJ -170.333 -11.955 Td [(the)-327(other)-326(inner)-327(matrix)-326(type)-327(to)-327(make)-326(full)-327(use)-326(of)-327(the)-327(GPU)-326(computational)]TJ 0 -11.956 Td [(capabilities;)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -21.918 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -24.907 -19.198 Td [(CSR:)]TJ 0 g 0 G +/F62 9.9626 Tf 27.945 0 Td [(The)-210(data)-210(type)]TJ/F67 9.9626 Tf 61.792 0 Td [(psb_T_csrg_sparse_mat)]TJ/F62 9.9626 Tf 111.928 0 Td [(pr)18(ovides)-210(an)-210(interface)-209(to)-210(the)-210(GPU)]TJ -176.758 -11.955 Td [(version)-250(of)-250(CSR)-250(available)-250(in)-250(the)-250(NVIDIA)-250(CuSP)92(ARSE)-250(library;)]TJ 0 g 0 G - 0 -19.925 Td [(y)]TJ + 139.476 -29.888 Td [(170)]TJ 0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(destination)-250(vector)74(.)-310(Scope:)]TJ/F51 9.9626 Tf 131.914 0 Td [(local)]TJ/F54 9.9626 Tf -117.528 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 218.688 0 Td [(psb)]TJ ET -q -1 0 0 1 410.618 276.904 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q + +endstream +endobj +2165 0 obj +<< +/Length 5445 +>> +stream +0 g 0 G +0 g 0 G +0 g 0 G BT -/F59 9.9626 Tf 413.756 276.704 Td [(T)]TJ +/F59 9.9626 Tf 99.895 706.129 Td [(HYB:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.061 0 Td [(The)-201(data)-202(typ)1(e)]TJ/F67 9.9626 Tf 61.537 0 Td [(psb_T_hybg_sparse_mat)]TJ/F62 9.9626 Tf 111.842 0 Td [(pr)18(ovides)-201(an)-201(interface)-202(to)-201(the)-201(HYB)]TJ -177.533 -11.955 Td [(GPU)-353(storage)-354(available)-354(in)-353(the)-354(NVIDIA)-353(CuSP)92(ARSE)-354(library)111(.)-621(The)-353(internal)]TJ 0 -11.955 Td [(str)8(uctur)18(e)-251(is)-252(opaque,)-252(hence)-251(the)-252(host)-251(side)-252(is)-252(just)-251(CSR;)-252(the)-251(HYB)-252(data)-251(format)]TJ 0 -11.956 Td [(is)-250(only)-250(available)-250(up)-250(to)-250(CUDA)-250(version)-250(10.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -20.26 Td [(ELL:)]TJ +0 g 0 G +/F62 9.9626 Tf 25.734 0 Td [(The)-290(data)-291(type)]TJ/F67 9.9626 Tf 64.203 0 Td [(psb_T_elg_sparse_mat)]TJ/F62 9.9626 Tf 107.501 0 Td [(pr)18(ovides)-290(an)-291(interface)-290(to)-291(the)-290(ELL-)]TJ -172.531 -11.955 Td [(P)92(ACK)-250(implementation)-250(fr)18(om)-250(SPGPU;)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -20.261 Td [(HLL:)]TJ +0 g 0 G +/F62 9.9626 Tf 27.946 0 Td [(The)-190(data)-190(type)]TJ/F67 9.9626 Tf 61.199 0 Td [(psb_T_hlg_sparse_mat)]TJ/F62 9.9626 Tf 106.5 0 Td [(pr)18(ovides)-190(an)-190(interface)-190(to)-190(the)-190(Hacked)]TJ -170.738 -11.955 Td [(ELLP)92(ACK)-250(implementation)-250(fr)18(om)-250(SPGPU;)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -20.261 Td [(HDIA:)]TJ +0 g 0 G +/F62 9.9626 Tf 35.696 0 Td [(The)-341(data)-342(type)]TJ/F67 9.9626 Tf 65.722 0 Td [(psb_T_hdiag_sparse_mat)]TJ/F62 9.9626 Tf 118.468 0 Td [(pr)18(ovides)-341(an)-342(int)1(erface)-342(to)-341(the)]TJ -194.979 -11.955 Td [(Hacked)-250(DIAgonals)-250(implementation)-250(fr)18(om)-250(SPGPU;)]TJ/F59 14.3462 Tf -24.907 -34.763 Td [(13)-1000(CUDA)-250(Environment)-250(Routines)]TJ/F59 11.9552 Tf 0 -24.857 Td [(psb)]TJ ET q -1 0 0 1 419.614 276.904 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 120.53 514.195 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 422.752 276.704 Td [(vect)]TJ +/F59 11.9552 Tf 124.116 513.996 Td [(cuda)]TJ ET q -1 0 0 1 444.301 276.904 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 150.729 514.195 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 447.439 276.704 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -317.656 -19.925 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F59 11.9552 Tf 154.315 513.996 Td [(init)-250(\227)-250(Initializes)-250(PSBLAS-CUDA)-250(environment)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -54.42 -19.126 Td [(call)]TJ 0 g 0 G - 139.477 -118.52 Td [(154)]TJ + [-525(psb_cuda_init\050ctxt)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1889 0 obj -<< -/Length 3387 ->> -stream + [-525([,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(device]\051)]TJ/F62 9.9626 Tf 14.944 -22.253 Td [(This)-250(subr)18(outine)-250(initializes)-250(the)-250(PSBLAS-CUDA)-250(envir)18(onment.)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(10.4)-1000(descr)-250(\227)-250(Prints)-250(a)-250(description)-250(of)-250(current)-250(preconditioner)]TJ +/F59 9.9626 Tf -14.944 -20.177 Td [(T)90(ype:)]TJ 0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F59 9.9626 Tf 0 -18.964 Td [(call)-525(prec%descr\050info\051)]TJ 0 -11.955 Td [(call)-525(prec%descr\050info,iout,)-525(root\051)]TJ +/F59 9.9626 Tf -29.828 -20.261 Td [(On)-250(Entry)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + 0 -20.26 Td [(device)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 34.311 0 Td [(ID)-250(of)-250(CUDA)-250(device)-250(to)-250(attach)-250(to.)]TJ -9.404 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-254(as:)-317(an)-254(integer)-253(value.)-575(Default:)-317(use)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 193.007 0 Td [(mod)]TJ 0 g 0 G + [(\050iam,ngpu\051)]TJ/F62 9.9626 Tf 70.521 0 Td [(wher)18(e)]TJ/F67 9.9626 Tf 29.935 0 Td [(iam)]TJ/F62 9.9626 Tf 18.218 0 Td [(is)]TJ -311.681 -11.955 Td [(the)-275(call)1(ing)-275(pr)18(ocess)-275(index)-274(and)]TJ/F67 9.9626 Tf 131.323 0 Td [(ngpu)]TJ/F62 9.9626 Tf 23.656 0 Td [(is)-275(the)-274(total)-275(number)-274(of)-275(CUDA)-274(devices)]TJ -154.979 -11.956 Td [(available)-250(on)-250(the)-250(curr)18(ent)-250(node.)]TJ/F59 11.9552 Tf -24.907 -20.176 Td [(Notes)]TJ 0 g 0 G - 0 -19.925 Td [(prec)]TJ +/F62 9.9626 Tf 12.454 -20.177 Td [(1.)]TJ 0 g 0 G -/F54 9.9626 Tf 24.349 0 Td [(the)-250(pr)18(econditioner)74(.)-310(Scope:)]TJ/F51 9.9626 Tf 117.836 0 Td [(local)]TJ/F54 9.9626 Tf -117.278 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 196.511 0 Td [(psb)]TJ + [-500(A)-250(call)-250(to)-250(this)-250(r)18(outine)-250(must)-250(pr)18(ecede)-250(any)-250(other)-250(PSBLAS-CUDA)-250(call.)]TJ/F59 11.9552 Tf -12.454 -29.61 Td [(psb)]TJ ET q -1 0 0 1 337.631 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 120.53 270.423 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 340.77 577.576 Td [(prec)]TJ +/F59 11.9552 Tf 124.116 270.224 Td [(cuda)]TJ ET q -1 0 0 1 362.319 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 150.729 270.423 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 365.457 577.576 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -286.483 -19.925 Td [(iout)]TJ +/F59 11.9552 Tf 154.315 270.224 Td [(exit)-250(\227)-250(Exit)-250(from)-250(PSBLAS-CUDA)-250(environment)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -54.42 -19.126 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 23.243 0 Td [(output)-250(unit.)-310(Scope:)]TJ/F51 9.9626 Tf 87.391 0 Td [(local)]TJ/F54 9.9626 Tf -85.727 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(number)74(.)-310(Default:)-310(default)-250(output)-250(unit.)]TJ + [-525(psb_cuda_exit\050ctxt\051)]TJ/F62 9.9626 Tf 14.944 -22.254 Td [(This)-250(subr)18(outine)-250(exits)-250(fr)18(om)-250(the)-250(PSBLAS)-250(CUDA)-250(context.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(root)]TJ +/F59 9.9626 Tf -14.944 -20.176 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-250(fr)18(om)-250(which)-250(to)-250(print)-250(Scope:)]TJ/F51 9.9626 Tf 155.834 0 Td [(local)]TJ/F54 9.9626 Tf -154.18 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-387(as:)-585(an)-387(integer)-387(number)-388(between)-387(0)-387(and)]TJ/F52 9.9626 Tf 220.442 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 14.01 0 Td [(\000)]TJ/F54 9.9626 Tf 10.638 0 Td [(1,)-422(in)-387(which)-387(case)]TJ -245.09 -11.955 Td [(the)-314(speci\002e)1(d)-314(pr)18(ocess)-314(will)-313(print)-314(the)-313(description,)-330(or)]TJ/F83 10.3811 Tf 225.38 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1,)-329(in)-314(which)-314(case)-313(all)]TJ -233.574 -11.955 Td [(pr)18(ocesses)-250(will)-250(print.)-310(Default:)-310(0.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -29.828 -20.261 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.925 Td [(info)]TJ + 0 -20.261 Td [(ctxt)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G - 139.477 -263.975 Td [(155)]TJ + 139.477 -29.888 Td [(171)]TJ 0 g 0 G ET endstream endobj -1790 0 obj +2041 0 obj << /Type /ObjStm /N 100 -/First 967 -/Length 10102 +/First 1000 +/Length 12945 >> stream -1786 0 501 58 1787 115 1788 172 1783 230 1792 311 1794 429 505 488 1795 546 1796 605 -1791 664 1798 745 1800 863 509 921 1801 978 1802 1035 1797 1093 1804 1187 1806 1305 513 1364 -1803 1422 1809 1516 1807 1655 1811 1800 517 1858 1812 1915 1813 1973 1808 2031 1816 2125 1814 2264 -1818 2409 521 2468 1819 2526 1820 2585 1815 2644 1823 2738 1821 2877 1825 3022 525 3080 1826 3137 -1827 3194 1822 3252 1831 3346 1829 3485 1833 3628 529 3687 1834 3745 1835 3803 1830 3862 1838 3956 -1836 4095 1840 4240 533 4298 1841 4355 1842 4413 1837 4471 1845 4591 1843 4730 1847 4872 537 4931 -1848 4989 1849 5048 1844 5107 1851 5227 1853 5345 1850 5403 1855 5484 1857 5602 541 5661 1854 5719 -1861 5813 1858 5961 1859 6108 1863 6256 545 6314 1864 6371 1860 6429 1871 6549 1866 6715 1867 6860 -1868 7006 1869 7153 1873 7300 549 7359 1870 7417 1875 7511 1877 7629 1874 7687 1883 7781 1878 7947 -1879 8092 1880 8235 1881 8381 1885 8525 553 8584 1882 8642 1888 8736 1886 8875 1890 9020 557 9078 -% 1786 0 obj -<< -/D [1784 0 R /XYZ 98.895 753.953 null] ->> -% 501 0 obj -<< -/D [1784 0 R /XYZ 99.895 716.092 null] ->> -% 1787 0 obj -<< -/D [1784 0 R /XYZ 99.895 678.98 null] ->> -% 1788 0 obj -<< -/D [1784 0 R /XYZ 99.895 679.195 null] ->> -% 1783 0 obj -<< -/Font << /F51 584 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 1792 0 obj -<< -/Type /Page -/Contents 1793 0 R -/Resources 1791 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1789 0 R ->> -% 1794 0 obj -<< -/D [1792 0 R /XYZ 149.705 753.953 null] ->> -% 505 0 obj -<< -/D [1792 0 R /XYZ 150.705 716.092 null] ->> -% 1795 0 obj -<< -/D [1792 0 R /XYZ 150.705 689.963 null] ->> -% 1796 0 obj -<< -/D [1792 0 R /XYZ 150.705 693.143 null] ->> -% 1791 0 obj -<< -/Font << /F51 584 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 1798 0 obj -<< -/Type /Page -/Contents 1799 0 R -/Resources 1797 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1789 0 R ->> -% 1800 0 obj -<< -/D [1798 0 R /XYZ 98.895 753.953 null] ->> -% 509 0 obj -<< -/D [1798 0 R /XYZ 99.895 716.092 null] ->> -% 1801 0 obj -<< -/D [1798 0 R /XYZ 99.895 678.98 null] ->> -% 1802 0 obj -<< -/D [1798 0 R /XYZ 99.895 679.195 null] ->> -% 1797 0 obj -<< -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 1804 0 obj -<< -/Type /Page -/Contents 1805 0 R -/Resources 1803 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1789 0 R ->> -% 1806 0 obj -<< -/D [1804 0 R /XYZ 149.705 753.953 null] ->> -% 513 0 obj -<< -/D [1804 0 R /XYZ 150.705 716.092 null] ->> -% 1803 0 obj +2032 0 2033 153 2034 308 2035 460 2039 612 577 671 581 729 2036 787 2052 909 2044 1066 +2045 1215 2047 1362 2054 1509 585 1567 589 1624 2055 1681 2056 1739 2051 1797 2062 1934 2068 2073 +2069 2253 2050 2296 2064 2443 2059 2502 2065 2561 2066 2620 2067 2679 2061 2737 2073 2873 2085 3012 +2086 3192 2088 3235 2089 3440 2090 3729 2092 3950 2049 4163 2075 4309 2060 4367 2076 4426 2077 4485 +2078 4544 2079 4603 2080 4662 2081 4721 2082 4780 2083 4838 2071 4897 2084 4956 2072 5014 2101 5206 +2106 5363 2107 5543 2108 5586 2109 5889 2110 6094 2112 6307 2097 6528 2098 6675 2099 6822 2103 6967 +2104 7026 2105 7085 2100 7144 2121 7350 2126 7489 2127 7669 2128 7712 2129 7913 2130 8208 2132 8429 +2119 8642 2123 8789 2116 8847 2124 8906 2117 8964 2125 9023 2120 9080 2139 9233 2143 9372 2144 9552 +2146 9595 2147 9802 2148 10103 2150 10324 2137 10537 2141 10684 2142 10743 2138 10802 2155 10982 2157 11100 +2154 11158 2160 11241 2162 11359 593 11418 2159 11476 2164 11598 2166 11716 597 11774 601 11830 2167 11887 +% 2032 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [267.568 552.736 279.523 561.692] +/A << /S /GoTo /D (cite.DesPat:11) >> >> -% 1809 0 obj +% 2033 0 obj << -/Type /Page -/Contents 1810 0 R -/Resources 1808 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1789 0 R -/Annots [ 1807 0 R ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [283.001 552.835 294.956 561.841] +/A << /S /GoTo /D (cite.CaFiRo:2014) >> >> -% 1807 0 obj +% 2034 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 438.278 369.462 450.338] -/A << /S /GoTo /D (spdata) >> +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [298.433 552.736 310.389 561.841] +/A << /S /GoTo /D (cite.Sparse03) >> >> -% 1811 0 obj +% 2035 0 obj << -/D [1809 0 R /XYZ 98.895 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [198.555 540.88 210.51 549.737] +/A << /S /GoTo /D (cite.OurTechRep) >> >> -% 517 0 obj +% 2039 0 obj << -/D [1809 0 R /XYZ 99.895 716.092 null] +/D [2037 0 R /XYZ 149.705 753.953 null] >> -% 1812 0 obj +% 577 0 obj << -/D [1809 0 R /XYZ 99.895 676.015 null] +/D [2037 0 R /XYZ 150.705 716.092 null] >> -% 1813 0 obj +% 581 0 obj << -/D [1809 0 R /XYZ 99.895 679.195 null] +/D [2037 0 R /XYZ 150.705 525.151 null] >> -% 1808 0 obj +% 2036 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F60 666 0 R /F102 1016 0 R >> /ProcSet [ /PDF /Text ] >> -% 1816 0 obj +% 2052 0 obj << /Type /Page -/Contents 1817 0 R -/Resources 1815 0 R +/Contents 2053 0 R +/Resources 2051 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1789 0 R -/Annots [ 1814 0 R ] +/Parent 2057 0 R +/Annots [ 2044 0 R 2045 0 R 2047 0 R ] >> -% 1814 0 obj +% 2044 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [121.315 282.176 133.27 291.182] +/A << /S /GoTo /D (cite.MRC:11) >> +>> +% 2045 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 571.778 420.271 583.837] -/A << /S /GoTo /D (spdata) >> +/Rect [253.836 246.191 265.791 255.601] +/A << /S /GoTo /D (table.21) >> >> -% 1818 0 obj +% 2047 0 obj << -/D [1816 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [314.807 176.296 321.781 188.355] +/A << /S /GoTo /D (figure.6) >> >> -% 521 0 obj +% 2054 0 obj << -/D [1816 0 R /XYZ 150.705 716.092 null] +/D [2052 0 R /XYZ 98.895 753.953 null] >> -% 1819 0 obj +% 585 0 obj +<< +/D [2052 0 R /XYZ 99.895 349.256 null] +>> +% 589 0 obj << -/D [1816 0 R /XYZ 150.705 676.015 null] +/D [2052 0 R /XYZ 99.895 231.912 null] >> -% 1820 0 obj +% 2055 0 obj << -/D [1816 0 R /XYZ 150.705 679.195 null] +/D [2052 0 R /XYZ 99.895 211.058 null] >> -% 1815 0 obj +% 2056 0 obj +<< +/D [2052 0 R /XYZ 99.895 120.166 null] +>> +% 2051 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F120 1782 0 R /F102 1016 0 R /F62 667 0 R /F67 913 0 R /F59 665 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1823 0 obj +% 2062 0 obj << /Type /Page -/Contents 1824 0 R -/Resources 1822 0 R +/Contents 2063 0 R +/Resources 2061 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1828 0 R -/Annots [ 1821 0 R ] +/Parent 2057 0 R +/Annots [ 2050 0 R ] >> -% 1821 0 obj +% 2068 0 obj +<< +/Producer (GPL Ghostscript 9.10) +/CreationDate (D:20140329133929+01'00') +/ModDate (D:20140329133929+01'00') +/Creator (cairo 1.13.1 \(http://cairographics.org\)) +>> +% 2069 0 obj +<< +/Type /ExtGState +/OPM 1 +>> +% 2050 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 438.278 369.462 450.338] -/A << /S /GoTo /D (spdata) >> +/Rect [184.361 326.969 191.335 339.028] +/A << /S /GoTo /D (algocf.1) >> >> -% 1825 0 obj +% 2064 0 obj << -/D [1823 0 R /XYZ 98.895 753.953 null] +/D [2062 0 R /XYZ 149.705 753.953 null] >> -% 525 0 obj +% 2059 0 obj << -/D [1823 0 R /XYZ 99.895 716.092 null] +/D [2062 0 R /XYZ 150.705 716.092 null] >> -% 1826 0 obj +% 2065 0 obj << -/D [1823 0 R /XYZ 99.895 678.98 null] +/D [2062 0 R /XYZ 397.506 408.774 null] >> -% 1827 0 obj +% 2066 0 obj +<< +/D [2062 0 R /XYZ 150.705 264.598 null] +>> +% 2067 0 obj << -/D [1823 0 R /XYZ 99.895 679.195 null] +/D [2062 0 R /XYZ 150.705 232.98 null] >> -% 1822 0 obj +% 2061 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F62 667 0 R /F60 666 0 R /F93 915 0 R /F67 913 0 R >> +/XObject << /Im6 2046 0 R >> /ProcSet [ /PDF /Text ] >> -% 1831 0 obj +% 2073 0 obj << /Type /Page -/Contents 1832 0 R -/Resources 1830 0 R +/Contents 2074 0 R +/Resources 2072 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1828 0 R -/Annots [ 1829 0 R ] ->> -% 1829 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [352.526 426.323 428.58 438.383] -/A << /S /GoTo /D (vdata) >> +/Parent 2057 0 R +/Annots [ 2049 0 R ] >> -% 1833 0 obj +% 2085 0 obj << -/D [1831 0 R /XYZ 149.705 753.953 null] +/Producer (GPL Ghostscript 9.10) +/CreationDate (D:20140329133928+01'00') +/ModDate (D:20140329133928+01'00') +/Creator (cairo 1.13.1 \(http://cairographics.org\)) >> -% 529 0 obj +% 2086 0 obj << -/D [1831 0 R /XYZ 150.705 716.092 null] +/Type /ExtGState +/OPM 1 >> -% 1834 0 obj +% 2088 0 obj << -/D [1831 0 R /XYZ 150.705 678.98 null] +/BaseFont /YAZDUX+TimesNewRomanPSMT +/FontDescriptor 2090 0 R +/ToUnicode 2091 0 R +/Type /Font +/FirstChar 48 +/LastChar 57 +/Widths [ 500 500 500 500 500 0 0 500 500 500] +/Subtype /TrueType >> -% 1835 0 obj +% 2089 0 obj << -/D [1831 0 R /XYZ 150.705 679.195 null] +/BaseFont /NDNSMY+FreeSerif +/FontDescriptor 2092 0 R +/ToUnicode 2093 0 R +/Type /Font +/FirstChar 32 +/LastChar 89 +/Widths [ 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 721 0 0 0 0 0 0 0 0 385 0 0 0 0 0 0 0 667 529 0 0 0 0 0 701] +/Subtype /TrueType >> -% 1830 0 obj +% 2090 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/Type /FontDescriptor +/FontName /YAZDUX+TimesNewRomanPSMT +/FontBBox [ 15 -13 638 675] +/Flags 65540 +/Ascent 675 +/CapHeight 675 +/Descent -13 +/ItalicAngle 0 +/StemV 95 +/MissingWidth 777 +/FontFile2 2094 0 R >> -% 1838 0 obj +% 2092 0 obj << -/Type /Page -/Contents 1839 0 R -/Resources 1837 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1828 0 R -/Annots [ 1836 0 R ] +/Type /FontDescriptor +/FontName /NDNSMY+FreeSerif +/FontBBox [ 0 -71 706 752] +/Flags 65540 +/Ascent 752 +/CapHeight 679 +/Descent -71 +/ItalicAngle 0 +/StemV 105 +/MissingWidth 600 +/FontFile2 2095 0 R >> -% 1836 0 obj +% 2049 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 574.778 369.462 586.838] -/A << /S /GoTo /D (spdata) >> +/Rect [384.587 583.767 391.56 595.827] +/A << /S /GoTo /D (figure.5) >> >> -% 1840 0 obj +% 2075 0 obj << -/D [1838 0 R /XYZ 98.895 753.953 null] +/D [2073 0 R /XYZ 98.895 753.953 null] >> -% 533 0 obj +% 2060 0 obj << -/D [1838 0 R /XYZ 99.895 716.092 null] +/D [2073 0 R /XYZ 389.217 621.446 null] >> -% 1841 0 obj +% 2076 0 obj << -/D [1838 0 R /XYZ 99.895 679.441 null] +/D [2073 0 R /XYZ 114.839 563.747 null] >> -% 1842 0 obj +% 2077 0 obj << -/D [1838 0 R /XYZ 99.895 679.657 null] +/D [2073 0 R /XYZ 114.839 567.034 null] >> -% 1837 0 obj +% 2078 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R /F94 915 0 R >> -/ProcSet [ /PDF /Text ] +/D [2073 0 R /XYZ 114.839 556.075 null] >> -% 1845 0 obj +% 2079 0 obj << -/Type /Page -/Contents 1846 0 R -/Resources 1844 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1828 0 R -/Annots [ 1843 0 R ] +/D [2073 0 R /XYZ 114.839 545.116 null] >> -% 1843 0 obj +% 2080 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [452.361 574.59 528.415 586.65] -/A << /S /GoTo /D (vdata) >> +/D [2073 0 R /XYZ 114.839 534.158 null] >> -% 1847 0 obj +% 2081 0 obj << -/D [1845 0 R /XYZ 149.705 753.953 null] +/D [2073 0 R /XYZ 114.839 523.199 null] >> -% 537 0 obj +% 2082 0 obj << -/D [1845 0 R /XYZ 150.705 716.092 null] +/D [2073 0 R /XYZ 114.839 512.24 null] >> -% 1848 0 obj +% 2083 0 obj << -/D [1845 0 R /XYZ 150.705 679.413 null] +/D [2073 0 R /XYZ 114.839 501.281 null] >> -% 1849 0 obj +% 2071 0 obj << -/D [1845 0 R /XYZ 150.705 679.628 null] +/D [2073 0 R /XYZ 114.839 481.057 null] >> -% 1844 0 obj +% 2084 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R /F94 915 0 R >> +/D [2073 0 R /XYZ 99.895 294.895 null] +>> +% 2072 0 obj +<< +/Font << /F62 667 0 R /F59 665 0 R /F102 1016 0 R /F120 1782 0 R /F60 666 0 R /F93 915 0 R /F91 914 0 R /F67 913 0 R >> +/XObject << /Im7 2048 0 R >> /ProcSet [ /PDF /Text ] >> -% 1851 0 obj +% 2101 0 obj << /Type /Page -/Contents 1852 0 R -/Resources 1850 0 R +/Contents 2102 0 R +/Resources 2100 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1828 0 R +/Parent 2057 0 R +/Annots [ 2097 0 R 2098 0 R 2099 0 R ] >> -% 1853 0 obj +% 2106 0 obj << -/D [1851 0 R /XYZ 98.895 753.953 null] +/Producer (GPL Ghostscript 9.10) +/CreationDate (D:20140329133928+01'00') +/ModDate (D:20140329133928+01'00') +/Creator (cairo 1.13.1 \(http://cairographics.org\)) >> -% 1850 0 obj +% 2107 0 obj << -/Font << /F94 915 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/Type /ExtGState +/OPM 1 >> -% 1855 0 obj +% 2108 0 obj << -/Type /Page -/Contents 1856 0 R -/Resources 1854 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1828 0 R +/BaseFont /MCSFLP+FreeSerif +/FontDescriptor 2110 0 R +/ToUnicode 2111 0 R +/Type /Font +/FirstChar 32 +/LastChar 89 +/Widths [ 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 721 0 670 0 610 564 0 714 0 385 709 0 0 0 724 0 0 667 529 606 0 0 0 0 701] +/Subtype /TrueType >> -% 1857 0 obj +% 2109 0 obj << -/D [1855 0 R /XYZ 149.705 753.953 null] +/BaseFont /AJRCAD+TimesNewRomanPSMT +/FontDescriptor 2112 0 R +/ToUnicode 2113 0 R +/Type /Font +/FirstChar 48 +/LastChar 57 +/Widths [ 500 500 500 500 500 0 0 500 500 500] +/Subtype /TrueType >> -% 541 0 obj +% 2110 0 obj << -/D [1855 0 R /XYZ 150.705 716.092 null] +/Type /FontDescriptor +/FontName /MCSFLP+FreeSerif +/FontBBox [ 0 -71 706 752] +/Flags 65540 +/Ascent 752 +/CapHeight 679 +/Descent -71 +/ItalicAngle 0 +/StemV 105 +/MissingWidth 600 +/FontFile2 2114 0 R >> -% 1854 0 obj +% 2112 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/Type /FontDescriptor +/FontName /AJRCAD+TimesNewRomanPSMT +/FontBBox [ 15 -13 638 675] +/Flags 65540 +/Ascent 675 +/CapHeight 675 +/Descent -13 +/ItalicAngle 0 +/StemV 95 +/MissingWidth 777 +/FontFile2 2115 0 R >> -% 1861 0 obj +% 2097 0 obj << -/Type /Page -/Contents 1862 0 R -/Resources 1860 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1865 0 R -/Annots [ 1858 0 R 1859 0 R ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [453.687 438.686 460.661 450.746] +/A << /S /GoTo /D (figure.5) >> >> -% 1858 0 obj +% 2098 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [320.317 442.264 387.374 454.323] -/A << /S /GoTo /D (precdata) >> +/Rect [362.965 188.317 369.938 200.376] +/A << /S /GoTo /D (figure.8) >> >> -% 1859 0 obj +% 2099 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [171.257 352.6 177.533 366.303] -/A << /S /GoTo /D (Hfootnote.4) >> +/Rect [477.685 140.43 484.659 152.49] +/A << /S /GoTo /D (algocf.2) >> >> -% 1863 0 obj +% 2103 0 obj << -/D [1861 0 R /XYZ 98.895 753.953 null] +/D [2101 0 R /XYZ 149.705 753.953 null] >> -% 545 0 obj +% 2104 0 obj << -/D [1861 0 R /XYZ 99.895 716.092 null] +/D [2101 0 R /XYZ 447.542 476.365 null] >> -% 1864 0 obj +% 2105 0 obj << -/D [1861 0 R /XYZ 114.242 129.79 null] +/D [2101 0 R /XYZ 150.705 223.169 null] >> -% 1860 0 obj +% 2100 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R >> +/Font << /F62 667 0 R /F60 666 0 R /F93 915 0 R /F67 913 0 R /F102 1016 0 R /F120 1782 0 R /F69 1460 0 R /F91 914 0 R /F59 665 0 R >> +/XObject << /Im8 2096 0 R >> /ProcSet [ /PDF /Text ] >> -% 1871 0 obj +% 2121 0 obj << /Type /Page -/Contents 1872 0 R -/Resources 1870 0 R +/Contents 2122 0 R +/Resources 2120 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1865 0 R -/Annots [ 1866 0 R 1867 0 R 1868 0 R 1869 0 R ] +/Parent 2057 0 R +/Annots [ 2119 0 R ] >> -% 1866 0 obj +% 2126 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [365.487 576.377 443.006 588.437] -/A << /S /GoTo /D (spdata) >> +/Producer (GPL Ghostscript 9.10) +/CreationDate (D:20140329133927+01'00') +/ModDate (D:20140329133927+01'00') +/Creator (cairo 1.13.1 \(http://cairographics.org\)) >> -% 1867 0 obj +% 2127 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [451.902 505.741 518.96 517.801] -/A << /S /GoTo /D (precdata) >> +/Type /ExtGState +/OPM 1 >> -% 1868 0 obj +% 2128 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [422.264 435.105 489.322 447.165] -/A << /S /GoTo /D (descdata) >> +/BaseFont /BNRUAU+TimesNewRomanPSMT +/FontDescriptor 2130 0 R +/ToUnicode 2131 0 R +/Type /Font +/FirstChar 45 +/LastChar 55 +/Widths [ 333 0 0 500 500 500 0 0 0 0 500] +/Subtype /TrueType >> -% 1869 0 obj +% 2129 0 obj +<< +/BaseFont /ICFUKB+FreeSerif +/FontDescriptor 2132 0 R +/ToUnicode 2133 0 R +/Type /Font +/FirstChar 32 +/LastChar 89 +/Widths [ 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 721 0 0 0 610 564 0 0 0 0 0 0 0 0 724 0 0 667 529 606 0 0 0 0 701] +/Subtype /TrueType +>> +% 2130 0 obj +<< +/Type /FontDescriptor +/FontName /BNRUAU+TimesNewRomanPSMT +/FontBBox [ 21 -13 638 675] +/Flags 65540 +/Ascent 675 +/CapHeight 675 +/Descent -13 +/ItalicAngle 0 +/StemV 95 +/MissingWidth 777 +/FontFile2 2134 0 R +>> +% 2132 0 obj +<< +/Type /FontDescriptor +/FontName /ICFUKB+FreeSerif +/FontBBox [ 0 -71 706 752] +/Flags 65540 +/Ascent 752 +/CapHeight 679 +/Descent -71 +/ItalicAngle 0 +/StemV 105 +/MissingWidth 600 +/FontFile2 2135 0 R +>> +% 2119 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.227 128.475 435.285 140.535] -/A << /S /GoTo /D (precdata) >> +/Rect [371.292 548.965 378.265 561.025] +/A << /S /GoTo /D (figure.5) >> >> -% 1873 0 obj +% 2123 0 obj << -/D [1871 0 R /XYZ 149.705 753.953 null] +/D [2121 0 R /XYZ 98.895 753.953 null] >> -% 549 0 obj +% 2116 0 obj +<< +/D [2121 0 R /XYZ 396.732 586.644 null] +>> +% 2124 0 obj +<< +/D [2121 0 R /XYZ 99.895 537.967 null] +>> +% 2117 0 obj +<< +/D [2121 0 R /XYZ 114.839 395.424 null] +>> +% 2125 0 obj << -/D [1871 0 R /XYZ 150.705 716.092 null] +/D [2121 0 R /XYZ 99.895 199.14 null] >> -% 1870 0 obj +% 2120 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F62 667 0 R /F67 913 0 R /F102 1016 0 R /F59 665 0 R /F120 1782 0 R >> +/XObject << /Im9 2118 0 R >> /ProcSet [ /PDF /Text ] >> -% 1875 0 obj +% 2139 0 obj << /Type /Page -/Contents 1876 0 R -/Resources 1874 0 R +/Contents 2140 0 R +/Resources 2138 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1865 0 R +/Parent 2057 0 R +/Annots [ 2137 0 R ] >> -% 1877 0 obj +% 2143 0 obj << -/D [1875 0 R /XYZ 98.895 753.953 null] +/Producer (GPL Ghostscript 9.10) +/CreationDate (D:20140329133928+01'00') +/ModDate (D:20140329133928+01'00') +/Creator (cairo 1.13.1 \(http://cairographics.org\)) >> -% 1874 0 obj +% 2144 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/Type /ExtGState +/OPM 1 >> -% 1883 0 obj +% 2146 0 obj +<< +/BaseFont /PBIKKX+TimesNewRomanPSMT +/FontDescriptor 2148 0 R +/ToUnicode 2149 0 R +/Type /Font +/FirstChar 45 +/LastChar 55 +/Widths [ 333 0 0 500 500 0 500 500 500 500 500] +/Subtype /TrueType +>> +% 2147 0 obj +<< +/BaseFont /ZBHFTP+FreeSerif +/FontDescriptor 2150 0 R +/ToUnicode 2151 0 R +/Type /Font +/FirstChar 32 +/LastChar 89 +/Widths [ 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 721 0 670 0 610 564 0 714 0 0 709 0 0 0 724 0 0 667 529 606 0 0 0 0 701] +/Subtype /TrueType +>> +% 2148 0 obj +<< +/Type /FontDescriptor +/FontName /PBIKKX+TimesNewRomanPSMT +/FontBBox [ 15 -13 638 675] +/Flags 65540 +/Ascent 675 +/CapHeight 675 +/Descent -13 +/ItalicAngle 0 +/StemV 95 +/MissingWidth 777 +/FontFile2 2152 0 R +>> +% 2150 0 obj << -/Type /Page -/Contents 1884 0 R -/Resources 1882 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1865 0 R -/Annots [ 1878 0 R 1879 0 R 1880 0 R 1881 0 R ] +/Type /FontDescriptor +/FontName /ZBHFTP+FreeSerif +/FontBBox [ 0 -71 706 752] +/Flags 65540 +/Ascent 752 +/CapHeight 679 +/Descent -71 +/ItalicAngle 0 +/StemV 105 +/MissingWidth 600 +/FontFile2 2153 0 R >> -% 1878 0 obj +% 2137 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [371.126 573.77 438.184 585.83] -/A << /S /GoTo /D (precdata) >> +/Rect [440.392 356.078 447.366 368.137] +/A << /S /GoTo /D (figure.5) >> >> -% 1879 0 obj +% 2141 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [393.303 517.98 469.357 530.039] -/A << /S /GoTo /D (vdata) >> +/D [2139 0 R /XYZ 149.705 753.953 null] >> -% 1880 0 obj +% 2142 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [374.822 462.189 441.88 474.248] -/A << /S /GoTo /D (descdata) >> +/D [2139 0 R /XYZ 447.542 393.756 null] >> -% 1881 0 obj +% 2138 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [393.303 272.899 469.357 284.958] -/A << /S /GoTo /D (vdata) >> +/Font << /F62 667 0 R /F67 913 0 R /F60 666 0 R /F59 665 0 R /F93 915 0 R /F102 1016 0 R /F120 1782 0 R >> +/XObject << /Im10 2136 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1885 0 obj +% 2155 0 obj << -/D [1883 0 R /XYZ 149.705 753.953 null] +/Type /Page +/Contents 2156 0 R +/Resources 2154 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 2158 0 R >> -% 553 0 obj +% 2157 0 obj << -/D [1883 0 R /XYZ 150.705 716.092 null] +/D [2155 0 R /XYZ 98.895 753.953 null] >> -% 1882 0 obj +% 2154 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F102 1016 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1888 0 obj +% 2160 0 obj << /Type /Page -/Contents 1889 0 R -/Resources 1887 0 R +/Contents 2161 0 R +/Resources 2159 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1865 0 R -/Annots [ 1886 0 R ] +/Parent 2158 0 R >> -% 1886 0 obj +% 2162 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [320.317 573.77 387.374 585.83] -/A << /S /GoTo /D (precdata) >> +/D [2160 0 R /XYZ 149.705 753.953 null] >> -% 1890 0 obj +% 593 0 obj << -/D [1888 0 R /XYZ 98.895 753.953 null] +/D [2160 0 R /XYZ 150.705 716.092 null] >> -% 557 0 obj +% 2159 0 obj << -/D [1888 0 R /XYZ 99.895 716.092 null] +/Font << /F59 665 0 R /F62 667 0 R /F102 1016 0 R /F67 913 0 R /F60 666 0 R >> +/ProcSet [ /PDF /Text ] >> - -endstream -endobj -1894 0 obj +% 2164 0 obj << -/Length 973 +/Type /Page +/Contents 2165 0 R +/Resources 2163 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 2158 0 R >> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(10.5)-1000(clone)-250(\227)-250(clone)-250(current)-250(preconditioner)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf 0 -18.964 Td [(call)-1050(prec%clone\050precout,info\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(prec)]TJ -0 g 0 G -/F54 9.9626 Tf 24.348 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -77.917 -33.873 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -19.926 Td [(precout)]TJ -0 g 0 G -/F54 9.9626 Tf 39.292 0 Td [(A)-250(copy)-250(of)-250(the)-250(input)-250(object.)]TJ -0 g 0 G -/F51 9.9626 Tf -39.292 -19.925 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ -0 g 0 G - 140.583 -449.28 Td [(156)]TJ -0 g 0 G -ET - -endstream -endobj -1900 0 obj +% 2166 0 obj << -/Length 2703 +/D [2164 0 R /XYZ 98.895 753.953 null] >> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(10.6)-1000(free)-250(\227)-250(Free)-250(a)-250(preconditioner)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf 0 -18.964 Td [(call)-525(prec%free\050info\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(prec)]TJ -0 g 0 G -/F54 9.9626 Tf 24.349 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 196.511 0 Td [(psb)]TJ -ET -q -1 0 0 1 337.631 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 340.77 577.576 Td [(prec)]TJ -ET -q -1 0 0 1 362.319 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 365.457 577.576 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -286.483 -19.925 Td [(On)-250(Exit)]TJ -0 g 0 G -0 g 0 G - 0 -19.926 Td [(prec)]TJ -0 g 0 G -/F54 9.9626 Tf 24.349 0 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -30.874 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 196.511 0 Td [(psb)]TJ -ET -q -1 0 0 1 337.631 502.059 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 340.77 501.86 Td [(prec)]TJ -ET -q -1 0 0 1 362.319 502.059 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 365.457 501.86 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -286.483 -19.925 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -30.326 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Err)18(or)-250(code:)-310(if)-250(no)-250(err)18(or)74(,)-250(0)-250(is)-250(r)18(eturned.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ/F54 9.9626 Tf 34.363 0 Td [(Releases)-250(all)-250(internal)-250(storage.)]TJ -0 g 0 G - 130.021 -333.713 Td [(157)]TJ -0 g 0 G -ET - -endstream -endobj -1905 0 obj +% 597 0 obj << -/Length 548 +/D [2164 0 R /XYZ 99.895 554.06 null] +>> +% 601 0 obj +<< +/D [2164 0 R /XYZ 99.895 527.944 null] +>> +% 2167 0 obj +<< +/D [2164 0 R /XYZ 99.895 315.727 null] >> -stream -0 g 0 G -0 g 0 G -BT -/F51 14.3462 Tf 150.705 705.784 Td [(11)-1000(Iterative)-250(Methods)]TJ/F54 9.9626 Tf 0 -22.702 Td [(In)-402(this)-403(chapter)-402(we)-403(pr)18(ovide)-402(r)18(outines)-403(for)-402(pr)18(econditioners)-402(and)-403(iterative)-402(meth-)]TJ 0 -11.955 Td [(ods.)-472(The)-304(interfaces)-304(for)-304(Krylov)-304(subspace)-303(methods)-304(ar)18(e)-304(available)-304(in)-304(the)-304(module)]TJ/F59 9.9626 Tf 0 -11.955 Td [(psb_krylov_mod)]TJ/F54 9.9626 Tf 73.225 0 Td [(.)]TJ -0 g 0 G - 91.158 -568.734 Td [(158)]TJ -0 g 0 G -ET endstream endobj -1912 0 obj +2171 0 obj << -/Length 8246 +/Length 5390 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(11.1)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(psb)]TJ ET q -1 0 0 1 153.407 706.328 cm +1 0 0 1 171.339 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 156.993 706.129 Td [(krylov)-250(\227)-250(Krylov)-250(Methods)-250(Driver)-250(Routine)]TJ/F54 9.9626 Tf -57.098 -18.964 Td [(This)-266(subr)18(outine)-266(is)-267(a)-266(driver)-266(that)-267(p)1(r)18(ovides)-267(a)-266(general)-266(interface)-266(for)-267(all)-266(the)-266(Krylov-)]TJ 0 -11.955 Td [(Subspace)-250(family)-250(methods)-250(implemented)-250(in)-250(PSBLAS)-250(version)-250(2.)]TJ 14.944 -11.955 Td [(The)-250(stopping)-250(criterion)-250(can)-250(take)-250(the)-250(following)-250(values:)]TJ -0 g 0 G -/F51 9.9626 Tf -14.944 -18.774 Td [(1)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(normwise)-222(backwar)18(d)-221(err)18(or)-222(in)-221(the)-222(in\002nity)-222(norm;)-231(the)-221(iteration)-222(is)-222(stopped)-221(when)]TJ/F52 9.9626 Tf 109.036 -26.864 Td [(e)-15(r)-25(r)]TJ/F85 10.3811 Tf 15.14 0 Td [(=)]TJ/F83 10.3811 Tf 40.62 6.745 Td [(k)]TJ/F52 9.9626 Tf 5.34 0 Td [(r)]TJ/F52 7.5716 Tf 4.041 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(k)]TJ -ET -q -1 0 0 1 246.191 620.108 cm -[]0 d 0 J 0.398 w 0 0 m 74.372 0 l S -Q -BT -/F85 10.3811 Tf 246.316 610.783 Td [(\050)]TJ/F83 10.3811 Tf 4.274 0 Td [(k)]TJ/F52 9.9626 Tf 5.938 0 Td [(A)]TJ/F83 10.3811 Tf 7.442 0 Td [(k)-24(k)]TJ/F52 9.9626 Tf 11.048 0 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.876 1.96 Td [(k)]TJ/F85 10.3811 Tf 7.376 0 Td [(+)]TJ/F83 10.3811 Tf 10.256 0 Td [(k)]TJ/F52 9.9626 Tf 5.44 0 Td [(b)]TJ/F83 10.3811 Tf 4.861 0 Td [(k)]TJ/F85 10.3811 Tf 5.44 0 Td [(\051)]TJ/F61 10.3811 Tf 8.236 6.834 Td [(<)]TJ/F52 9.9626 Tf 11.087 0 Td [(e)-80(p)-25(s)]TJ -0 g 0 G -/F51 9.9626 Tf -235.842 -29.908 Td [(2)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(Relative)-250(r)18(esidual)-250(in)-250(the)-250(2-norm;)-250(the)-250(iteration)-250(is)-250(stopped)-250(when)]TJ/F52 9.9626 Tf 136.209 -26.865 Td [(e)-15(r)-25(r)]TJ/F85 10.3811 Tf 15.141 0 Td [(=)]TJ/F83 10.3811 Tf 13.446 6.745 Td [(k)]TJ/F52 9.9626 Tf 5.34 0 Td [(r)]TJ/F52 7.5716 Tf 4.041 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(k)]TJ +/F59 11.9552 Tf 174.926 706.129 Td [(cuda)]TJ ET q -1 0 0 1 273.365 563.335 cm -[]0 d 0 J 0.398 w 0 0 m 20.025 0 l S +1 0 0 1 201.538 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F83 10.3811 Tf 273.49 554.01 Td [(k)]TJ/F52 9.9626 Tf 5.439 0 Td [(b)]TJ/F83 10.3811 Tf 4.862 0 Td [(k)]TJ/F54 7.5716 Tf 5.315 -1.744 Td [(2)]TJ/F61 10.3811 Tf 8.371 8.578 Td [(<)]TJ/F52 9.9626 Tf 11.086 0 Td [(e)-80(p)-25(s)]TJ -0 g 0 G -/F51 9.9626 Tf -208.668 -29.848 Td [(3)]TJ +/F59 11.9552 Tf 205.125 706.129 Td [(DeviceSync)-250(\227)-250(Synchronize)-250(CUDA)-250(device)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -54.42 -19.65 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(Relative)-250(r)18(esidual)-250(r)18(eduction)-250(in)-250(the)-250(2-norm;)-250(the)-250(iteration)-250(is)-250(stopped)-250(when)]TJ/F52 9.9626 Tf 134.486 -26.865 Td [(e)-15(r)-25(r)]TJ/F85 10.3811 Tf 15.14 0 Td [(=)]TJ/F83 10.3811 Tf 15.17 6.745 Td [(k)]TJ/F52 9.9626 Tf 5.34 0 Td [(r)]TJ/F52 7.5716 Tf 4.041 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(k)]TJ -ET -q -1 0 0 1 271.641 506.622 cm -[]0 d 0 J 0.398 w 0 0 m 23.472 0 l S -Q -BT -/F83 10.3811 Tf 271.766 497.297 Td [(k)]TJ/F52 9.9626 Tf 5.34 0 Td [(r)]TJ/F54 7.5716 Tf 4 -1.744 Td [(0)]TJ/F83 10.3811 Tf 4.408 1.744 Td [(k)]TJ/F54 7.5716 Tf 5.315 -1.744 Td [(2)]TJ/F61 10.3811 Tf 8.371 8.578 Td [(<)]TJ/F52 9.9626 Tf 11.086 0 Td [(e)-80(p)-25(s)]TJ/F54 9.9626 Tf -210.391 -29.848 Td [(The)-248(behaviour)-248(is)-248(contr)18(olled)-249(by)-248(the)-248(istop)-248(ar)18(gument)-248(\050see)-248(later\051.)-310(In)-248(the)-248(above)-248(for)18(-)]TJ 0 -11.956 Td [(mulae,)]TJ/F52 9.9626 Tf 32.81 0 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F54 9.9626 Tf 5.303 1.96 Td [(is)-256(the)-256(tentative)-257(soluti)1(on)-257(and)]TJ/F52 9.9626 Tf 125.144 0 Td [(r)]TJ/F52 7.5716 Tf 4.042 -1.96 Td [(i)]TJ/F85 10.3811 Tf 5.757 1.96 Td [(=)]TJ/F52 9.9626 Tf 11.2 0 Td [(b)]TJ/F83 10.3811 Tf 6.822 0 Td [(\000)]TJ/F52 9.9626 Tf 10.777 0 Td [(A)-42(x)]TJ/F52 7.5716 Tf 12.759 -1.96 Td [(i)]TJ/F54 9.9626 Tf 5.303 1.96 Td [(the)-256(corr)18(esponding)-256(r)18(esidual)]TJ -225.064 -11.955 Td [(at)-250(the)]TJ/F52 9.9626 Tf 27.083 0 Td [(i)]TJ/F54 9.9626 Tf 2.964 0 Td [(-th)-250(iteration.)]TJ -28.305 -17.357 Td [(c)-175(a)-175(l)-174(l)-880(p)-105(s)-105(b)]TJ + [-525(psb_cuda_DeviceSync\050\051)]TJ/F62 9.9626 Tf 14.944 -23.333 Td [(This)-310(subr)18(outine)-310(ensur)18(es)-310(that)-309(all)-310(pr)18(eviosly)-310(invoked)-310(kernels,)-325(i.e.)-489(all)-310(invoca-)]TJ -14.944 -11.955 Td [(tion)-250(of)-250(CUDA-side)-250(code,)-250(have)-250(completed.)]TJ/F59 11.9552 Tf 0 -31.147 Td [(psb)]TJ ET q -1 0 0 1 150.28 433.215 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 171.339 620.243 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F54 9.9626 Tf 154.313 433.015 Td [(k)-105(r)-105(y)-104(l)-105(o)-105(v)-238(\050)-156(m)-21(e)-22(t)-21(h)-22(o)-22(d)-218(,)-208(a)-242(,)-255(p)-80(r)-81(e)-80(c)-335(,)-191(b)-206(,)-203(x)-231(,)-234(e)-60(p)-59(s)-293(,)-273(d)-98(e)-97(s)-98(c)]TJ +/F59 11.9552 Tf 174.926 620.044 Td [(cuda)]TJ ET q -1 0 0 1 352.02 433.215 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 201.538 620.243 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F54 9.9626 Tf 355.983 433.015 Td [(a)-370(,)-283(i)-108(n)-108(f)-108(o)-274(,)-57(&)]TJ -227.086 -11.955 Td [(&)-580(i)-69(t)-69(m)-70(a)-69(x)-313(,)-327(i)-151(t)-152(e)-151(r)-478(,)-281(e)-107(r)-106(r)-387(,)-321(i)-145(t)-146(r)-146(a)-145(c)-146(e)-466(,)-336(i)-161(r)-160(s)-161(t)-496(,)-291(i)-116(s)-116(t)-116(o)-116(p)-407(,)-219(c)-43(o)-43(n)-44(d)-177(\051)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -/F51 9.9626 Tf -29.002 -25.88 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.349 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.349 Td [(method)]TJ -0 g 0 G -/F54 9.9626 Tf 39.851 0 Td [(a)-193(string)-194(that)-193(de\002nes)-194(the)-194(it)1(erative)-194(method)-193(to)-194(be)-194(use)1(d.)-292(Supported)-193(values)]TJ -14.944 -11.956 Td [(ar)18(e:)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -19.349 Td [(CG:)]TJ -0 g 0 G -/F54 9.9626 Tf 22.964 0 Td [(the)-250(Conjugate)-250(Gradient)-250(method;)]TJ -0 g 0 G -/F51 9.9626 Tf -22.964 -15.364 Td [(CGS:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.051 0 Td [(the)-250(Conjugate)-250(Gradient)-250(Stabilized)-250(method;)]TJ -0 g 0 G -/F51 9.9626 Tf -29.051 -15.365 Td [(GCR:)]TJ -0 g 0 G -/F54 9.9626 Tf 30.157 0 Td [(the)-250(Generalized)-250(Conjugate)-250(Residual)-250(method;)]TJ -0 g 0 G -/F51 9.9626 Tf -30.157 -15.364 Td [(FCG:)]TJ -0 g 0 G -/F54 9.9626 Tf 28.503 0 Td [(the)-250(Flexible)-250(Conjugate)-250(Gradient)-250(method)]TJ -0 0 1 rg 0 0 1 RG -/F54 7.5716 Tf 176.854 3.616 Td [(5)]TJ +/F59 11.9552 Tf 205.125 620.044 Td [(getDeviceCount)]TJ/F67 9.9626 Tf -54.42 -19.65 Td [(ngpus)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 4.284 -3.616 Td [(;)]TJ -0 g 0 G -/F51 9.9626 Tf -209.641 -15.364 Td [(BICG:)]TJ -0 g 0 G -/F54 9.9626 Tf 33.484 0 Td [(the)-250(Bi-Conjugate)-250(Gradient)-250(method;)]TJ -0 g 0 G -/F51 9.9626 Tf -33.484 -15.365 Td [(BICGST)92(AB:)]TJ -0 g 0 G -/F54 9.9626 Tf 59.696 0 Td [(the)-250(Bi-Conjugate)-250(Gradient)-250(Stabilized)-250(method;)]TJ -0 g 0 G -/F51 9.9626 Tf -59.696 -15.364 Td [(BICGST)92(ABL:)]TJ -0 g 0 G -/F54 9.9626 Tf 65.783 0 Td [(the)-218(Bi-Conjugate)-217(Gradient)-218(Stabilized)-218(method)-217(with)-218(r)18(estart-)]TJ -43.865 -11.955 Td [(ing;)]TJ -0 g 0 G -/F51 9.9626 Tf -21.918 -15.365 Td [(RGMRES:)]TJ -0 g 0 G -/F54 9.9626 Tf 52.294 0 Td [(the)-250(Generalized)-250(Minimal)-250(Residual)-250(method)-250(with)-250(r)18(estarting.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -77.201 -19.349 Td [(a)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(sparse)-250(matrix)]TJ/F52 9.9626 Tf 178.414 0 Td [(A)]TJ/F54 9.9626 Tf 7.317 0 Td [(.)]TJ -170.787 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 138.701 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 138.501 Td [(Tspmat)]TJ + [-1050(psb_cuda_getDeviceCount\050\051)]TJ/F62 9.9626 Tf 14.944 -23.333 Td [(Get)-250(number)-250(of)-250(devices)-250(available)-250(on)-250(curr)18(ent)-250(computing)-250(node.)]TJ/F59 11.9552 Tf -14.944 -31.147 Td [(psb)]TJ ET q -1 0 0 1 344.406 138.701 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 171.339 546.113 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 347.544 138.501 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ -0 g 0 G +/F59 11.9552 Tf 174.926 545.914 Td [(cuda)]TJ ET q -1 0 0 1 99.895 130.091 cm -[]0 d 0 J 0.398 w 0 0 m 137.482 0 l S +1 0 0 1 201.538 546.113 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F54 5.9776 Tf 110.755 123.219 Td [(5)]TJ/F54 7.9701 Tf 3.487 -2.893 Td [(Note:)-310(the)-250(implementation)-250(is)-250(for)]TJ/F52 7.9701 Tf 113.297 0 Td [(F)-31(C)-45(G)]TJ/F85 8.3049 Tf 16.387 0 Td [(\050)]TJ/F54 7.9701 Tf 3.319 0 Td [(1)]TJ/F85 8.3049 Tf 4.085 0 Td [(\051)]TJ/F54 7.9701 Tf 3.32 0 Td [(.)]TJ -0 g 0 G -0 g 0 G -/F54 9.9626 Tf 9.629 -29.888 Td [(159)]TJ -0 g 0 G -ET - -endstream -endobj -1923 0 obj -<< -/Length 7054 ->> -stream -0 g 0 G +/F59 11.9552 Tf 205.125 545.914 Td [(getDevice)]TJ/F67 9.9626 Tf -54.42 -19.65 Td [(ngpus)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -BT -/F51 9.9626 Tf 150.705 706.129 Td [(prec)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 24.348 0 Td [(The)-250(data)-250(str)8(uctur)18(e)-250(containing)-250(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ + [-1050(psb_cuda_getDevice\050\051)]TJ/F62 9.9626 Tf 14.944 -23.333 Td [(Get)-250(device)-250(in)-250(use)-250(by)-250(curr)18(ent)-250(pr)18(ocess.)]TJ/F59 11.9552 Tf -14.944 -31.147 Td [(psb)]TJ ET q -1 0 0 1 360.068 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 171.339 471.983 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 363.206 658.308 Td [(prec)]TJ +/F59 11.9552 Tf 174.926 471.784 Td [(cuda)]TJ ET q -1 0 0 1 384.755 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 201.538 471.983 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 387.893 658.308 Td [(type)]TJ +/F59 11.9552 Tf 205.125 471.784 Td [(setDevice)]TJ/F67 9.9626 Tf -54.42 -19.65 Td [(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -22.202 Td [(b)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(The)-250(RHS)-250(vector)74(.)]TJ 13.838 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 218.688 0 Td [(psb)]TJ -ET -q -1 0 0 1 410.618 588.484 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 413.756 588.285 Td [(T)]TJ + [-525(psb_cuda_setDevice\050dev\051)]TJ/F62 9.9626 Tf 14.944 -23.333 Td [(Set)-250(device)-250(to)-250(be)-250(used)-250(by)-250(curr)18(ent)-250(pr)18(ocess.)]TJ/F59 11.9552 Tf -14.944 -31.147 Td [(psb)]TJ ET q -1 0 0 1 419.614 588.484 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 171.339 397.853 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 422.752 588.285 Td [(vect)]TJ +/F59 11.9552 Tf 174.926 397.654 Td [(cuda)]TJ ET q -1 0 0 1 444.301 588.484 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 201.538 397.853 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 447.439 588.285 Td [(type)]TJ +/F59 11.9552 Tf 205.125 397.654 Td [(DeviceHasUV)129(A)]TJ/F67 9.9626 Tf -54.42 -19.65 Td [(hasUva)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -317.656 -22.202 Td [(x)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(initial)-250(guess.)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 218.687 0 Td [(psb)]TJ + [-525(psb_cuda_DeviceHasUVA\050\051)]TJ/F62 9.9626 Tf 14.944 -23.333 Td [(Returns)-295(tr)8(ue)-294(if)-295(device)-295(curr)18(ently)-295(in)-294(use)-295(supports)-295(UV)111(A)-295(\050Uni\002ed)-294(V)55(irtual)-295(Ad-)]TJ -14.944 -11.955 Td [(dr)18(essing\051.)]TJ/F59 11.9552 Tf 0 -31.147 Td [(psb)]TJ ET q -1 0 0 1 410.618 518.461 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 171.339 311.768 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 413.756 518.262 Td [(T)]TJ +/F59 11.9552 Tf 174.926 311.569 Td [(cuda)]TJ ET q -1 0 0 1 419.614 518.461 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 201.538 311.768 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 422.752 518.262 Td [(vect)]TJ +/F59 11.9552 Tf 205.125 311.569 Td [(W)74(arpSize)]TJ/F67 9.9626 Tf -54.42 -19.65 Td [(nw)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(psb_cuda_WarpSize\050\051)]TJ/F62 9.9626 Tf 14.944 -23.333 Td [(Returns)-250(the)-250(warp)-250(size.)]TJ/F59 11.9552 Tf -14.944 -31.147 Td [(psb)]TJ ET q -1 0 0 1 444.301 518.461 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 171.339 237.638 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 447.439 518.262 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -317.656 -22.203 Td [(eps)]TJ -0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(The)-250(stopping)-250(tolerance.)]TJ 4.433 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(r)18(eal)-250(number)74(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -22.203 Td [(desc)]TJ +/F59 11.9552 Tf 174.926 237.439 Td [(cuda)]TJ ET q -1 0 0 1 171.218 426.236 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 201.538 237.638 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 174.207 426.036 Td [(a)]TJ +/F59 11.9552 Tf 205.125 237.439 Td [(MultiProcessors)]TJ/F67 9.9626 Tf -54.42 -19.65 Td [(nmp)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(psb_cuda_MultiProcessors\050\051)]TJ/F62 9.9626 Tf 14.944 -23.333 Td [(Returns)-250(the)-250(number)-250(of)-250(multipr)18(ocessors)-250(in)-250(the)-250(CUDA)-250(device.)]TJ/F59 11.9552 Tf -14.944 -31.147 Td [(psb)]TJ ET q -1 0 0 1 360.068 378.415 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 171.339 163.508 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 363.206 378.216 Td [(desc)]TJ +/F59 11.9552 Tf 174.926 163.309 Td [(cuda)]TJ ET q -1 0 0 1 384.755 378.415 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 201.538 163.508 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 387.893 378.216 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -258.11 -22.203 Td [(itmax)]TJ -0 g 0 G -/F54 9.9626 Tf 30.436 0 Td [(The)-250(maximum)-250(number)-250(of)-250(iterations)-250(to)-250(perform.)]TJ -5.529 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F52 9.9626 Tf 38.569 0 Td [(i)-32(t)-25(m)-40(a)-42(x)]TJ/F85 10.3811 Tf 27.744 0 Td [(=)]TJ/F54 9.9626 Tf 10.961 0 Td [(1000.)]TJ -77.274 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable)]TJ/F52 9.9626 Tf 142.349 0 Td [(i)-32(t)-25(m)-40(a)-42(x)]TJ/F83 10.3811 Tf 27.743 0 Td [(\025)]TJ/F54 9.9626 Tf 10.962 0 Td [(1.)]TJ +/F59 11.9552 Tf 205.125 163.309 Td [(MaxThreadsPerMP)]TJ/F67 9.9626 Tf -54.42 -19.65 Td [(nt)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -205.961 -22.202 Td [(itrace)]TJ -0 g 0 G -/F54 9.9626 Tf 29.878 0 Td [(If)]TJ/F61 10.3811 Tf 11.007 0 Td [(>)]TJ/F54 9.9626 Tf 14.142 0 Td [(0)-422(print)-423(out)-422(an)-422(informational)-423(message)-422(about)-422(conver)18(gence)-423(every)]TJ/F52 9.9626 Tf -30.066 -11.955 Td [(i)-32(t)-15(r)-50(a)-25(c)-25(e)]TJ/F54 9.9626 Tf 26.396 0 Td [(iterations.)-310(If)]TJ/F85 10.3811 Tf 56.313 0 Td [(=)]TJ/F54 9.9626 Tf 10.961 0 Td [(0)-250(print)-250(a)-250(message)-250(in)-250(case)-250(of)-250(conver)18(gence)-250(failur)18(e.)]TJ -93.724 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf -31.431 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F52 9.9626 Tf 38.57 0 Td [(i)-32(t)-15(r)-50(a)-25(c)-25(e)]TJ/F85 10.3811 Tf 26.796 0 Td [(=)]TJ/F83 10.3811 Tf 11.086 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -109.553 -34.158 Td [(irst)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 19.915 0 Td [(An)-250(integer)-250(specifying)-250(the)-250(r)18(estart)-250(parameter)74(.)]TJ 4.992 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(V)92(alues:)]TJ/F52 9.9626 Tf 34.613 0 Td [(i)-22(r)-35(s)-25(t)]TJ/F61 10.3811 Tf 17.671 0 Td [(>)]TJ/F54 9.9626 Tf 10.961 0 Td [(0.)-298(This)-214(is)-213(employed)-214(for)-213(the)-214(BiCGST)74(ABL)-214(or)-213(RGMRES)-214(meth-)]TJ -63.245 -11.955 Td [(ods,)-250(otherwise)-250(it)-250(is)-250(ignor)18(ed.)]TJ + [-525(psb_cuda_MaxThreadsPerMP\050\051)]TJ/F62 9.9626 Tf 14.944 -23.333 Td [(Returns)-250(the)-250(maximum)-250(number)-250(of)-250(thr)18(eads)-250(per)-250(multipr)18(ocessor)74(.)]TJ 0 g 0 G - 139.477 -29.888 Td [(160)]TJ + 149.439 -29.888 Td [(172)]TJ 0 g 0 G ET endstream endobj -1928 0 obj +2175 0 obj << -/Length 4489 +/Length 2721 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(istop)]TJ -0 g 0 G -/F54 9.9626 Tf 27.666 0 Td [(An)-250(integer)-250(specifying)-250(the)-250(stopping)-250(criterion.)]TJ -2.759 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(V)92(alues:)-351(1:)-351(use)-271(the)-270(normwise)-271(backwar)18(d)-270(err)18(or)74(,)-276(2:)-351(use)-271(the)-270(scaled)-271(2-norm)-270(of)]TJ 0 -11.955 Td [(the)-250(r)18(esidual,)-250(3:)-310(use)-250(the)-250(r)18(esidual)-250(r)18(eduction)-250(in)-250(the)-250(2-norm.)-310(Default:)-310(2.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -19.926 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(computed)-250(solution.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 218.688 0 Td [(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(psb)]TJ ET q -1 0 0 1 359.808 558.881 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 120.53 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 362.947 558.682 Td [(T)]TJ +/F59 11.9552 Tf 124.116 706.129 Td [(cuda)]TJ ET q -1 0 0 1 368.804 558.881 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 150.729 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 371.943 558.682 Td [(vect)]TJ +/F59 11.9552 Tf 154.315 706.129 Td [(MaxRegistersPerBlock)]TJ/F67 9.9626 Tf -54.42 -18.964 Td [(nr)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(psb_cuda_MaxRegistersPerBlock\050\051)]TJ/F62 9.9626 Tf 14.944 -21.918 Td [(Returns)-250(the)-250(maximum)-250(number)-250(of)-250(r)18(egister)-250(per)-250(thr)18(ead)-250(block.)]TJ/F59 11.9552 Tf -14.944 -29.133 Td [(psb)]TJ ET q -1 0 0 1 393.492 558.881 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 120.53 636.313 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 124.116 636.114 Td [(cuda)]TJ +ET +q +1 0 0 1 150.729 636.313 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 396.63 558.682 Td [(type)]TJ +/F59 11.9552 Tf 154.315 636.114 Td [(MemoryClockRate)]TJ/F67 9.9626 Tf -54.42 -18.964 Td [(cl)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -317.656 -19.925 Td [(iter)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(The)-250(number)-250(of)-250(iterations)-250(performed.)]TJ 4.434 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Returned)-250(as:)-310(an)-250(integer)-250(variable.)]TJ + [-525(psb_cuda_MemoryClockRate\050\051)]TJ/F62 9.9626 Tf 14.944 -21.917 Td [(Returns)-250(the)-250(memory)-250(clock)-250(rate)-250(in)-250(KHz,)-250(as)-250(an)-250(integer)74(.)]TJ/F59 11.9552 Tf -14.944 -29.133 Td [(psb)]TJ +ET +q +1 0 0 1 120.53 566.299 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 124.116 566.1 Td [(cuda)]TJ +ET +q +1 0 0 1 150.729 566.299 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 154.315 566.1 Td [(MemoryBusW)37(idth)]TJ/F67 9.9626 Tf -54.42 -18.964 Td [(nb)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(err)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F54 9.9626 Tf 17.714 0 Td [(The)-250(conver)18(gence)-250(estimate)-250(on)-250(exit.)]TJ 7.193 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Returned)-250(as:)-310(a)-250(r)18(eal)-250(number)74(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(cond)]TJ + [-525(psb_cuda_MemoryBusWidth\050\051)]TJ/F62 9.9626 Tf 14.944 -21.918 Td [(Returns)-250(the)-250(memory)-250(bus)-250(width)-250(in)-250(bits.)]TJ/F59 11.9552 Tf -14.944 -29.133 Td [(psb)]TJ +ET +q +1 0 0 1 120.53 496.284 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 124.116 496.085 Td [(cuda)]TJ +ET +q +1 0 0 1 150.729 496.284 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 154.315 496.085 Td [(MemoryPeakBandwidth)]TJ/F67 9.9626 Tf -54.42 -18.964 Td [(bw)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 27.119 0 Td [(An)-210(esti)1(mate)-210(of)-210(the)-209(condition)-210(number)-209(of)-210(matrix)]TJ/F52 9.9626 Tf 204.999 0 Td [(A)]TJ/F54 9.9626 Tf 7.318 0 Td [(;)-223(only)-210(available)-209(with)-210(the)]TJ/F52 9.9626 Tf -214.444 -11.956 Td [(C)-45(G)]TJ/F54 9.9626 Tf 17.001 0 Td [(method)-250(on)-250(r)18(eal)-250(data.)]TJ -17.086 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Returned)-249(as:)-310(a)-249(r)18(eal)-249(number)74(.)-310(A)-249(corr)18(ect)-250(r)18(esult)-249(will)-249(be)-249(gr)18(eater)-250(than)-249(or)-249(equal)]TJ 0 -11.955 Td [(to)-403(one;)-480(if)-403(speci\002ed)-403(for)-403(non-r)18(eal)-403(data,)-441(or)-403(an)-403(err)18(or)-403(occurr)18(ed,)-441(zer)18(o)-403(is)-403(r)18(e-)]TJ 0 -11.956 Td [(turned.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + [-525(psb_cuda_MemoryPeakBandwidth\050\051)]TJ/F62 9.9626 Tf 0 -21.918 Td [(Returns)-250(the)-250(peak)-250(memory)-250(bandwidth)-250(in)-250(MB/s)-250(\050r)18(eal)-250(double)-250(pr)18(ecision\051.)]TJ 0 g 0 G - 139.477 -161.394 Td [(161)]TJ + 164.384 -364.765 Td [(173)]TJ 0 g 0 G ET endstream endobj -1932 0 obj +2179 0 obj << /Length 81 >> @@ -26278,104 +31329,104 @@ stream 0 g 0 G 0 g 0 G BT -/F54 9.9626 Tf 315.088 90.438 Td [(162)]TJ +/F62 9.9626 Tf 315.088 90.438 Td [(174)]TJ 0 g 0 G ET endstream endobj -1936 0 obj +2183 0 obj << -/Length 6590 +/Length 6353 >> stream 0 g 0 G 0 g 0 G BT -/F51 14.3462 Tf 99.895 705.784 Td [(References)]TJ -0 g 0 G -/F54 9.9626 Tf 4.982 -22.702 Td [([1])]TJ +/F59 14.3462 Tf 99.895 705.784 Td [(References)]TJ 0 g 0 G - [-500(D.)-314(Barbieri,)-330(V)129(.)-314(Car)18(dellini,)-330(S.)-314(Filippone)-314(and)-314(D.)-314(Rouson)]TJ/F52 9.9626 Tf 258.302 0 Td [(Design)-314(Patterns)-314(for)]TJ -241.705 -11.955 Td [(Scienti\002c)-224(Computations)-225(on)-224(Sparse)-224(Matrices)]TJ/F54 9.9626 Tf 175.255 0 Td [(,)-229(HPSS)-225(2011,)-229(Algorithms)-225(and)-224(Pr)18(o-)]TJ -175.255 -11.955 Td [(gramming)-396(T)92(ools)-395(for)-396(Next-Generation)-396(High-Performance)-396(Scie)1(nti\002c)-396(Soft-)]TJ 0 -11.955 Td [(war)18(e,)-250(Bor)18(deaux,)-250(Sep.)-250(2011)]TJ +/F62 9.9626 Tf 4.982 -23.579 Td [([1])]TJ 0 g 0 G - -16.597 -19.642 Td [([2])]TJ + [-500(G.)-362(Bella,)-391(S.)-362(Filippone,)-390(A.)-363(De)-362(Maio)-362(and)-363(M.)-362(T)92(esta,)]TJ/F60 9.9626 Tf 239.345 0 Td [(A)-362(Simulation)-362(Model)-363(for)]TJ -222.748 -11.955 Td [(For)18(est)-346(Fir)18(es)]TJ/F62 9.9626 Tf 47.93 0 Td [(,)-370(in)-346(J.)-346(Dongarra,)-370(K.)-346(Madsen,)-370(J.)-346(W)92(asniewski,)-370(editors,)-370(Pr)18(oceed-)]TJ -47.93 -11.955 Td [(ings)-338(of)-337(P)92(ARA)-338(04)-338(W)92(orkshop)-337(on)-338(State)-338(of)-338(the)-337(Art)-338(in)-338(Scienti\002c)-337(Computing,)]TJ 0 -11.955 Td [(pp.)-250(546\226553,)-250(Lectur)18(e)-250(Notes)-250(in)-250(Computer)-250(Science,)-250(Springer)74(,)-250(2005.)]TJ 0 g 0 G - [-500(G.)-362(Bella,)-391(S.)-362(Filippone,)-390(A.)-363(De)-362(Maio)-362(and)-363(M.)-362(T)92(esta,)]TJ/F52 9.9626 Tf 239.345 0 Td [(A)-362(Simulation)-362(Model)-363(for)]TJ -222.748 -11.955 Td [(For)18(est)-346(Fir)18(es)]TJ/F54 9.9626 Tf 47.93 0 Td [(,)-370(in)-346(J.)-346(Dongarra,)-370(K.)-346(Madsen,)-370(J.)-346(W)92(asniewski,)-370(editors,)-370(Pr)18(oceed-)]TJ -47.93 -11.955 Td [(ings)-338(of)-337(P)92(ARA)-338(04)-338(W)92(orkshop)-337(on)-338(State)-338(of)-338(the)-337(Art)-338(in)-338(Scienti\002c)-337(Computing,)]TJ 0 -11.955 Td [(pp.)-250(546\226553,)-250(Lectur)18(e)-250(Notes)-250(in)-250(Computer)-250(Science,)-250(Springer)74(,)-250(2005.)]TJ -0 g 0 G - -16.597 -19.642 Td [([3])]TJ + -16.597 -21.736 Td [([2])]TJ 0 g 0 G [-500(A.)-216(Buttari,)-223(D.)-216(di)-217(Sera\002no,)-223(P)129(.)-216(D'Ambra,)-223(S.)-216(Filippone,)-71(2LEV)74(-D2P4:)-293(a)-216(package)]TJ 16.597 -11.955 Td [(of)-247(high-performance)-247(pr)18(econditioners,)-107(Applicable)-247(Algebra)-247(in)-247(Engineering,)]TJ 0 -11.955 Td [(Communications)-292(and)-293(Computing,)-303(V)111(olume)-292(18,)-303(Number)-292(3,)-303(May)111(,)-303(2007,)-303(pp.)]TJ 0 -11.955 Td [(223-239)]TJ 0 g 0 G - -16.597 -19.642 Td [([4])]TJ + -16.597 -21.736 Td [([3])]TJ 0 g 0 G [-500(P)129(.)-248(D'Ambra,)-248(S.)-248(Filippone,)-249(D.)-248(Di)-248(Sera\002no)-107(On)-248(the)-248(Development)-248(of)-248(PSBLAS-)]TJ 16.597 -11.955 Td [(based)-523(Parallel)-524(T)90(wo-level)-523(Schwarz)-523(Pr)18(econditioners)-1235(Applied)-523(Numeri-)]TJ 0 -11.955 Td [(cal)-376(Mathematics,)-408(Elsevier)-376(Science,)-408(V)111(olume)-376(57,)-408(Issues)-376(11-12,)-408(November)18(-)]TJ 0 -11.955 Td [(December)-250(2007,)-250(Pages)-250(1181-1196.)]TJ 0 g 0 G - -16.597 -19.642 Td [([5])]TJ + -16.597 -21.736 Td [([4])]TJ 0 g 0 G [-500(Dongarra,)-268(J.)-265(J.,)-269(DuCr)18(oz,)-268(J.,)-269(Hammarli)1(ng,)-269(S.)-265(and)-264(Hanson,)-269(R.,)-268(An)-265(Extended)]TJ 16.597 -11.955 Td [(Set)-463(of)-463(Fortran)-463(Basic)-464(Linear)-463(Algebra)-463(Subpr)18(ograms,)-516(ACM)-463(T)90(rans.)-463(Math.)]TJ 0 -11.955 Td [(Softw)92(.)-250(vol.)-250(14,)-250(1\22617,)-250(1988.)]TJ 0 g 0 G - -16.597 -19.642 Td [([6])]TJ + -16.597 -21.736 Td [([5])]TJ 0 g 0 G [-500(Dongarra,)-422(J.,)-421(DuCr)18(oz,)-422(J.,)-422(Hammarling,)-422(S.)-387(and)-387(Duf)18(f,)-422(I.,)-422(A)-387(Set)-387(of)-388(level)-387(3)]TJ 16.597 -11.955 Td [(Basic)-308(Linear)-307(Algebra)-308(Subpr)18(ograms,)-322(ACM)-308(T)90(rans.)-308(Math.)-308(Soft)1(w)91(.)-307(vol.)-308(16,)-322(1\226)]TJ 0 -11.955 Td [(17,)-250(1990.)]TJ 0 g 0 G - -16.597 -19.642 Td [([7])]TJ + -16.597 -21.735 Td [([6])]TJ +0 g 0 G + [-500(J.)-349(J.)-350(Dongarra)-349(and)-350(R.)-350(C.)-349(Whaley)111(,)]TJ/F60 9.9626 Tf 166.061 0 Td [(A)-349(User)-37(')55(s)-350(Guide)-349(to)-350(the)-349(BLACS)-350(v)55(.)-349(1.1)]TJ/F62 9.9626 Tf 152.062 0 Td [(,)-374(La-)]TJ -301.526 -11.956 Td [(pack)-234(W)92(orking)-233(Note)-234(94,)-237(T)92(ech.)-233(Rep.)-234(UT)55(-CS-95-281,)-237(University)-234(of)-233(T)92(ennessee,)]TJ 0 -11.955 Td [(Mar)18(ch)-250(1995)-250(\050updated)-250(May)-250(1997\051.)]TJ 0 g 0 G - [-500(J.)-349(J.)-350(Dongarra)-349(and)-350(R.)-350(C.)-349(Whaley)111(,)]TJ/F52 9.9626 Tf 166.061 0 Td [(A)-349(User)-37(')55(s)-350(Guide)-349(to)-350(the)-349(BLACS)-350(v)55(.)-349(1.1)]TJ/F54 9.9626 Tf 152.062 0 Td [(,)-374(La-)]TJ -301.526 -11.955 Td [(pack)-234(W)92(orking)-233(Note)-234(94,)-237(T)92(ech.)-233(Rep.)-234(UT)55(-CS-95-281,)-237(University)-234(of)-233(T)92(ennessee,)]TJ 0 -11.955 Td [(Mar)18(ch)-250(1995)-250(\050updated)-250(May)-250(1997\051.)]TJ + -16.597 -21.735 Td [([7])]TJ 0 g 0 G - -16.597 -19.642 Td [([8])]TJ + [-500(I.)-259(Duf)18(f,)-262(M.)-260(Marr)18(one,)-261(G.)-260(Radicati)-259(and)-259(C.)-260(V)55(ittoli,)]TJ/F60 9.9626 Tf 223.407 0 Td [(Level)-259(3)-260(Basic)-259(Linear)-260(Algebra)]TJ -206.81 -11.955 Td [(Subpr)18(ograms)-328(f)1(or)-328(Sparse)-327(Matrices:)-465(a)-328(User)-327(Level)-328(Interface)]TJ/F62 9.9626 Tf 233.522 0 Td [(,)-347(ACM)-327(T)90(ransactions)]TJ -233.522 -11.956 Td [(on)-250(Mathematical)-250(Softwar)18(e,)-250(23\0503\051,)-250(pp.)-250(379\226401,)-250(1997.)]TJ 0 g 0 G - [-500(I.)-259(Duf)18(f,)-262(M.)-260(Marr)18(one,)-261(G.)-260(Radicati)-259(and)-259(C.)-260(V)55(ittoli,)]TJ/F52 9.9626 Tf 223.407 0 Td [(Level)-259(3)-260(Basic)-259(Linear)-260(Algebra)]TJ -206.81 -11.955 Td [(Subpr)18(ograms)-328(f)1(or)-328(Sparse)-327(Matrices:)-465(a)-328(User)-327(Level)-328(Interface)]TJ/F54 9.9626 Tf 233.522 0 Td [(,)-347(ACM)-327(T)90(ransactions)]TJ -233.522 -11.955 Td [(on)-250(Mathematical)-250(Softwar)18(e,)-250(23\0503\051,)-250(pp.)-250(379\226401,)-250(1997.)]TJ + -16.597 -21.735 Td [([8])]TJ 0 g 0 G - -16.597 -19.641 Td [([9])]TJ + [-500(I.)-282(Duf)18(f,)-289(M.)-282(Her)18(oux)-281(and)-282(R.)-281(Pozo,)]TJ/F60 9.9626 Tf 160.607 0 Td [(An)-282(Overview)-281(of)-282(the)-281(Sparse)-282(Basic)-282(Linear)-281(Al-)]TJ -144.01 -11.955 Td [(gebra)-287(Subpr)18(ograms:)-383(the)-287(New)-287(Standard)-286(fr)18(om)-287(the)-287(BLAS)-287(T)111(echnical)-286(Forum)]TJ/F62 9.9626 Tf 292.455 0 Td [(,)-296(ACM)]TJ -292.455 -11.955 Td [(T)90(ransactions)-250(on)-250(Mathematical)-250(Softwar)18(e,)-250(28\0502\051,)-250(pp.)-250(239\226267,)-250(2002.)]TJ 0 g 0 G - [-500(I.)-282(Duf)18(f,)-289(M.)-282(Her)18(oux)-281(and)-282(R.)-281(Pozo,)]TJ/F52 9.9626 Tf 160.607 0 Td [(An)-282(Overview)-281(of)-282(the)-281(Sparse)-282(Basic)-282(Linear)-281(Al-)]TJ -144.01 -11.956 Td [(gebra)-287(Subpr)18(ograms:)-383(the)-287(New)-287(Standard)-286(fr)18(om)-287(the)-287(BLAS)-287(T)111(echnical)-286(Forum)]TJ/F54 9.9626 Tf 292.455 0 Td [(,)-296(ACM)]TJ -292.455 -11.955 Td [(T)90(ransactions)-250(on)-250(Mathematical)-250(Softwar)18(e,)-250(28\0502\051,)-250(pp.)-250(239\226267,)-250(2002.)]TJ + -16.597 -21.736 Td [([9])]TJ 0 g 0 G - -21.579 -19.641 Td [([10])]TJ + [-500(S.)-298(Filippone)-298(and)-297(M.)-298(Colajanni,)]TJ/F60 9.9626 Tf 154.96 0 Td [(PSBLAS:)-298(A)-298(Library)-297(for)-298(Parallel)-298(Linear)-298(Alge-)]TJ -138.363 -11.955 Td [(bra)-340(Computation)-340(on)-340(Sparse)-340(Matrices)]TJ/F62 9.9626 Tf 151.715 0 Td [(,)-670(ACM)-340(T)90(ransactions)-340(on)-340(Mathematical)]TJ -151.715 -11.955 Td [(Softwar)18(e,)-250(26\0504\051,)-250(pp.)-250(527\226550,)-250(2000.)]TJ 0 g 0 G - [-500(S.)-298(Filippone)-298(and)-298(M.)-297(Colajanni,)]TJ/F52 9.9626 Tf 159.942 0 Td [(PSBLAS:)-298(A)-298(Library)-297(for)-298(Parallel)-298(Linear)-298(Alge-)]TJ -138.363 -11.955 Td [(bra)-340(Computation)-340(on)-340(Sparse)-340(Matrices)]TJ/F54 9.9626 Tf 151.715 0 Td [(,)-670(ACM)-340(T)90(ransactions)-340(on)-340(Mathematical)]TJ -151.715 -11.956 Td [(Softwar)18(e,)-250(26\0504\051,)-250(pp.)-250(527\226550,)-250(2000.)]TJ + -21.579 -21.736 Td [([10])]TJ 0 g 0 G - -21.579 -19.641 Td [([11])]TJ + [-500(S.)-397(Filippone)-398(and)-397(A.)-397(Buttari,)]TJ/F60 9.9626 Tf 151.683 0 Td [(Object-Oriented)-397(T)111(echniques)-397(for)-398(Sparse)-397(Matrix)]TJ -130.104 -11.955 Td [(Computations)-353(in)-353(Fortran)-354(2003)]TJ/F62 9.9626 Tf 126.789 0 Td [(,)-716(ACM)-353(T)90(ransactions)-353(on)-354(Mathematic)1(al)-354(Soft-)]TJ -126.789 -11.955 Td [(war)18(e,)-250(38\0504\051,)-250(2012.)]TJ 0 g 0 G - [-500(S.)-397(Filippone)-398(and)-397(A.)-397(Buttari,)]TJ/F52 9.9626 Tf 151.683 0 Td [(Object-Oriented)-397(T)111(echniques)-397(for)-398(Sparse)-397(Matrix)]TJ -130.104 -11.955 Td [(Computations)-353(in)-353(Fortran)-354(2003)]TJ/F54 9.9626 Tf 126.789 0 Td [(,)-716(ACM)-353(T)90(ransactions)-353(on)-354(Mathemati)1(cal)-354(Soft-)]TJ -126.789 -11.955 Td [(war)18(e,)-250(38\0504\051,)-250(2012.)]TJ + -21.579 -21.735 Td [([11])]TJ 0 g 0 G - -21.579 -19.642 Td [([12])]TJ + [-500(S.)-339(Filippone,)-360(P)129(.)-339(D'Ambra,)-360(M.)-339(Colajanni,)]TJ/F60 9.9626 Tf 202.675 0 Td [(Using)-339(a)-338(Parallel)-339(Library)-338(of)-339(Sparse)]TJ -181.096 -11.956 Td [(Linear)-350(Algebra)-350(in)-350(a)-349(Fluid)-350(Dynamics)-350(Applications)-350(Code)-350(on)-350(Linux)-349(Clusters)]TJ/F62 9.9626 Tf 307.21 0 Td [(,)-375(in)]TJ -307.21 -11.955 Td [(G.)-262(Joubert,)-264(A.)-262(Murli,)-264(F)92(.)-262(Peters,)-265(M.)-261(V)92(anneschi,)-265(editors,)-265(Parallel)-261(Computing)]TJ 0 -11.955 Td [(-)-250(Advances)-250(&)-250(Curr)18(ent)-250(Issues,)-250(pp.)-250(441\226448,)-250(Imperial)-250(College)-250(Pr)18(ess,)-250(2002.)]TJ 0 g 0 G - [-500(S.)-339(Filippone,)-360(P)129(.)-339(D'Ambra,)-360(M.)-339(Colajanni,)]TJ/F52 9.9626 Tf 202.675 0 Td [(Using)-339(a)-338(Parallel)-339(Library)-338(of)-339(Sparse)]TJ -181.096 -11.955 Td [(Linear)-350(Algebra)-350(in)-350(a)-349(Fluid)-350(Dynamics)-350(Applications)-350(Code)-350(on)-350(Linux)-349(Clusters)]TJ/F54 9.9626 Tf 307.21 0 Td [(,)-375(in)]TJ -307.21 -11.955 Td [(G.)-262(Joubert,)-264(A.)-262(Murli,)-264(F)92(.)-262(Peters,)-265(M.)-261(V)92(anneschi,)-265(editors,)-265(Parallel)-261(Computing)]TJ 0 -11.955 Td [(-)-250(Advances)-250(&)-250(Curr)18(ent)-250(Issues,)-250(pp.)-250(441\226448,)-250(Imperial)-250(College)-250(Pr)18(ess,)-250(2002.)]TJ + -21.579 -21.735 Td [([12])]TJ 0 g 0 G - 142.805 -29.888 Td [(163)]TJ + [-500(Gamma,)-217(E.,)-434(Helm,)-216(R.,)-434(Johnson,)-217(R.,)-434(and)-417(Vlissides,)-217(J.)-208(1995.)]TJ/F60 9.9626 Tf 276.041 0 Td [(Design)-208(Patterns:)]TJ -254.462 -11.956 Td [(Elements)-250(of)-250(Reusable)-250(Object-Oriented)-250(Softwar)18(e)]TJ/F62 9.9626 Tf 190.781 0 Td [(.)-250(Addison-W)92(esley)111(.)]TJ +0 g 0 G + -47.976 -29.887 Td [(175)]TJ 0 g 0 G ET endstream endobj -1945 0 obj +2192 0 obj << -/Length 3007 +/Length 4492 >> stream 0 g 0 G 0 g 0 G 0 g 0 G BT -/F54 9.9626 Tf 150.705 706.129 Td [([13])]TJ +/F62 9.9626 Tf 150.705 706.129 Td [([13])]TJ 0 g 0 G - [-500(Gamma,)-217(E.,)-434(Helm,)-216(R.,)-434(Johnson,)-217(R.,)-434(and)-417(Vlissides,)-216(J.)-209(1995.)]TJ/F52 9.9626 Tf 276.04 0 Td [(Design)-209(Patterns)1(:)]TJ -254.461 -11.955 Td [(Elements)-250(of)-250(Reusable)-250(Object-Oriented)-250(Softwar)18(e)]TJ/F54 9.9626 Tf 190.781 0 Td [(.)-250(Addison-W)92(esley)111(.)]TJ + [-500(Karypis,)-422(G.)-388(and)-387(Kumar)74(,)-422(V)129(.,)]TJ/F60 9.9626 Tf 149.763 0 Td [(METIS:)-388(Unstructur)18(ed)-387(Graph)-388(Partitioning)-388(and)]TJ -128.184 -11.955 Td [(Sparse)-452(Matrix)-452(Ordering)-452(System)]TJ/F62 9.9626 Tf 135.842 0 Td [(.)-452(Minneapolis,)-503(MN)-452(55455:)-714(University)-452(of)]TJ -135.842 -11.955 Td [(Minnesota,)-531(Department)-475(of)-476(Comp)1(uter)-476(Science,)-531(1995.)-475(Internet)-475(Addr)18(ess:)]TJ/F67 9.9626 Tf 0 -11.956 Td [(http://www.cs.umn.edu/~karypis)]TJ/F62 9.9626 Tf 156.91 0 Td [(.)]TJ 0 g 0 G - -212.36 -19.926 Td [([14])]TJ + -178.489 -19.925 Td [([14])]TJ 0 g 0 G - [-500(Karypis,)-422(G.)-388(and)-387(Kumar)74(,)-422(V)129(.,)]TJ/F52 9.9626 Tf 149.763 0 Td [(METIS:)-388(Unstructur)18(ed)-387(Graph)-388(Partitioning)-388(and)]TJ -128.184 -11.955 Td [(Sparse)-452(Matrix)-452(Ordering)-452(System)]TJ/F54 9.9626 Tf 135.842 0 Td [(.)-452(Minneapolis,)-503(MN)-452(55455:)-714(University)-452(of)]TJ -135.842 -11.955 Td [(Minnesota,)-531(Department)-475(of)-476(Comp)1(uter)-476(Science,)-531(1995.)-475(Internet)-475(Addr)18(ess:)]TJ/F59 9.9626 Tf 0 -11.955 Td [(http://www.cs.umn.edu/~karypis)]TJ/F54 9.9626 Tf 156.91 0 Td [(.)]TJ + [-500(Lawson,)-314(C.,)-314(Hanson,)-314(R.,)-313(Kincaid,)-314(D.)-301(and)-301(Kr)18(ogh,)-314(F)92(.,)-314(Basic)-301(Linear)-301(Algebra)]TJ 21.579 -11.955 Td [(Subpr)18(ograms)-288(for)-288(Fortran)-288(usage,)-298(ACM)-288(T)90(rans.)-288(Math.)-288(Softw)92(.)-288(vol.)-288(5,)-298(38\226329,)]TJ 0 -11.955 Td [(1979.)]TJ 0 g 0 G - -178.489 -19.925 Td [([15])]TJ + -21.579 -19.926 Td [([15])]TJ 0 g 0 G - [-500(Lawson,)-314(C.,)-314(Hanson,)-314(R.,)-313(Kincaid,)-314(D.)-301(and)-301(Kr)18(ogh,)-314(F)92(.,)-314(Basic)-301(Linear)-301(Algebra)]TJ 21.579 -11.956 Td [(Subpr)18(ograms)-288(for)-288(Fortran)-288(usage,)-298(ACM)-288(T)90(rans.)-288(Math.)-288(Softw)92(.)-288(vol.)-288(5,)-298(38\226329,)]TJ 0 -11.955 Td [(1979.)]TJ + [-500(Machiels,)-240(L.)-239(and)-238(Deville,)-240(M.)]TJ/F60 9.9626 Tf 146.833 0 Td [(Fortran)-238(90:)-304(An)-238(entry)-239(to)-238(object-oriented)-238(pr)18(ogram-)]TJ -125.254 -11.955 Td [(ming)-211(for)-210(the)-211(solution)-210(of)-211(partial)-210(differ)18(ential)-211(equations.)]TJ/F62 9.9626 Tf 211.899 0 Td [(ACM)-211(T)90(rans.)-210(Math.)-211(Softw)92(.)]TJ -211.899 -11.955 Td [(vol.)-250(23,)-250(32\22649.)]TJ 0 g 0 G -21.579 -19.925 Td [([16])]TJ 0 g 0 G - [-500(Machiels,)-240(L.)-239(and)-238(Deville,)-240(M.)]TJ/F52 9.9626 Tf 146.833 0 Td [(Fortran)-238(90:)-304(An)-238(entry)-239(to)-238(object-oriented)-238(pr)18(ogram-)]TJ -125.254 -11.955 Td [(ming)-211(for)-210(the)-211(solution)-210(of)-211(partial)-210(differ)18(ential)-211(equations.)]TJ/F54 9.9626 Tf 211.899 0 Td [(ACM)-211(T)90(rans.)-210(Math.)-211(Softw)92(.)]TJ -211.899 -11.955 Td [(vol.)-250(23,)-250(32\22649.)]TJ + [-500(Metcalf,)-251(M.,)-250(Reid,)-251(J.)-250(and)-251(Cohen,)-251(M.)]TJ/F60 9.9626 Tf 177.874 0 Td [(Fortran)-250(95/2003)-251(explained.)]TJ/F62 9.9626 Tf 112.004 0 Td [(Oxfor)18(d)-251(Uni-)]TJ -268.299 -11.955 Td [(versity)-250(Pr)18(ess,)-250(2004.)]TJ 0 g 0 G -21.579 -19.926 Td [([17])]TJ 0 g 0 G - [-500(Metcalf,)-251(M.,)-250(Reid,)-251(J.)-250(and)-251(Cohen,)-251(M.)]TJ/F52 9.9626 Tf 177.874 0 Td [(Fortran)-250(95/2003)-251(explained.)]TJ/F54 9.9626 Tf 112.004 0 Td [(Oxfor)18(d)-251(Uni-)]TJ -268.299 -11.955 Td [(versity)-250(Pr)18(ess,)-250(2004.)]TJ + [-500(Metcalf,)-258(M.,)-259(Reid,)-258(J.)-257(and)-256(Cohen,)-259(M.)]TJ/F60 9.9626 Tf 178.365 0 Td [(Modern)-257(Fortran)-256(explained.)]TJ/F62 9.9626 Tf 111.452 0 Td [(Oxfor)18(d)-257(Uni-)]TJ -268.238 -11.955 Td [(versity)-250(Pr)18(ess,)-250(2011.)]TJ 0 g 0 G -21.579 -19.925 Td [([18])]TJ 0 g 0 G @@ -26383,15 +31434,27 @@ BT 0 g 0 G -21.579 -19.926 Td [([19])]TJ 0 g 0 G - [-500(M.)-282(Snir)74(,)-290(S.)-282(Otto,)-289(S.)-282(Huss-Lederman,)-290(D.)-282(W)92(alker)-282(and)-282(J.)-281(Dongarra,)]TJ/F52 9.9626 Tf 304.659 0 Td [(MPI:)-282(The)]TJ -283.08 -11.955 Td [(Complete)-369(Refer)18(ence.)-369(V)74(olume)-369(1)-370(-)-369(The)-369(MPI)-369(Cor)18(e)]TJ/F54 9.9626 Tf 195.586 0 Td [(,)-399(second)-369(edition,)-399(MIT)-369(Pr)18(ess,)]TJ -195.586 -11.955 Td [(1998.)]TJ + [-500(M.)-282(Snir)74(,)-290(S.)-282(Otto,)-289(S.)-282(Huss-Lederman,)-290(D.)-282(W)92(alker)-282(and)-282(J.)-281(Dongarra,)]TJ/F60 9.9626 Tf 304.659 0 Td [(MPI:)-282(The)]TJ -283.08 -11.955 Td [(Complete)-369(Refer)18(ence.)-369(V)74(olume)-369(1)-370(-)-369(The)-369(MPI)-369(Cor)18(e)]TJ/F62 9.9626 Tf 195.586 0 Td [(,)-399(second)-369(edition,)-399(MIT)-369(Pr)18(ess,)]TJ -195.586 -11.955 Td [(1998.)]TJ +0 g 0 G + -21.579 -19.925 Td [([20])]TJ +0 g 0 G + [-500(D.)-314(Barbieri,)-330(V)129(.)-314(Car)18(dellini,)-330(S.)-314(Filippone)-314(and)-314(D.)-314(Rouson)]TJ/F60 9.9626 Tf 263.283 0 Td [(Design)-314(Patterns)-314(for)]TJ -241.704 -11.955 Td [(Scienti\002c)-224(Computations)-225(on)-224(Sparse)-224(Matrices)]TJ/F62 9.9626 Tf 175.254 0 Td [(,)-229(HPSS)-225(2011,)-229(Algorithms)-225(and)-224(Pr)18(o-)]TJ -175.254 -11.956 Td [(gramming)-396(T)92(ools)-395(for)-396(Next-Generation)-396(High-Performance)-395(Scienti\002c)-396(Soft-)]TJ 0 -11.955 Td [(war)18(e,)-250(Bor)18(deaux,)-250(Sep.)-250(2011)]TJ +0 g 0 G + -21.579 -19.925 Td [([21])]TJ +0 g 0 G + [-500(Car)18(dellini,)-494(V)129(.,)-988(Filippone)1(,)-494(S.,)-988(and)-890(Rouson,)-494(D.)-445(2014,)-494(Design)-445(patterns)]TJ 21.579 -11.955 Td [(for)-240(sparse-matrix)-240(computations)-240(on)-240(hybrid)-240(CPU/GPU)-240(platforms,)]TJ/F60 9.9626 Tf 284.952 0 Td [(Scienti\002c)]TJ -284.952 -11.955 Td [(Pr)18(ogramming)-250(22,)]TJ/F62 9.9626 Tf 73.713 0 Td [(1,)-250(1\22619.)]TJ 0 g 0 G - 142.804 -352.677 Td [(164)]TJ + -95.292 -19.926 Td [([22])]TJ +0 g 0 G + [-500(D.)-200(Barbieri,)-210(V)129(.)-200(Car)18(dellini,)-210(A.)-201(Fanfarillo,)-210(S.)-200(Filippone,)-210(Thr)18(ee)-200(storage)-200(formats)]TJ 21.579 -11.955 Td [(for)-318(sparse)-317(matrices)-318(on)-318(GPGPUs,)-335(T)92(ech.)-317(Rep.)-318(DICII)-318(RR-15.6,)-334(Universit)]TJ 305.831 0.025 Td [(\036)]TJ -0.832 -0.025 Td [(a)-318(di)]TJ -304.999 -11.955 Td [(Roma)-250(T)92(or)-250(V)111(er)18(gata)-250(\050Febr)8(uary)-250(2015\051.)]TJ +0 g 0 G + 142.805 -209.215 Td [(176)]TJ 0 g 0 G ET endstream endobj -1962 0 obj +2209 0 obj << /Length1 1383 /Length2 5908 @@ -26462,12 +31525,12 @@ W ò6S&ªå_!“½SÎ|esU›FÌR™y† ¢Y‹¥ýžï­§N endstream endobj -1964 0 obj +2211 0 obj << -/Length1 1537 -/Length2 2827 +/Length1 1956 +/Length2 7144 /Length3 0 -/Length 4364 +/Length 9100 >> stream %!PS-AdobeFont-1.0: CMITT10 003.002 @@ -26487,7 +31550,7 @@ FontDirectory/CMITT10 known{/CMITT10 findfont dup/UniqueID known{dup 11 dict begin /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0 ]readonly def -/FontName /MPVPBL+CMITT10 def +/FontName /SFGIZH+CMITT10 def /FontBBox {11 -233 669 696 }readonly def /PaintType 0 def /FontInfo 9 dict dup begin @@ -26503,44 +31566,88 @@ FontDirectory/CMITT10 known{/CMITT10 findfont dup/UniqueID known{dup end readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for +dup 65 /A put +dup 67 /C put dup 68 /D put +dup 69 /E put +dup 72 /H put +dup 73 /I put +dup 75 /K put +dup 76 /L put +dup 77 /M put +dup 80 /P put +dup 84 /T put +dup 86 /V put dup 97 /a put dup 99 /c put +dup 44 /comma put dup 100 /d put dup 101 /e put dup 33 /exclam put +dup 102 /f put +dup 103 /g put +dup 104 /h put +dup 45 /hyphen put +dup 105 /i put +dup 107 /k put +dup 109 /m put dup 110 /n put dup 111 /o put +dup 112 /p put +dup 40 /parenleft put +dup 41 /parenright put dup 46 /period put +dup 114 /r put dup 115 /s put +dup 47 /slash put dup 116 /t put +dup 119 /w put +dup 120 /x put +dup 121 /y put readonly def currentdict end currentfile eexec ÙÖoc;„j²„¼ø°Aw-åÎ3Ã6Uöÿu4 lã.ÂNk©4¡õ8•DPËh>®[M†E7wk9B½.…H‰ì.³CÍS†oøZçèqO«£­ OèÖ)9ú‚îOHl~Q:ÔɾD0j‚‡— J¿¶Ñù'JZ ¶ì÷­½`ÕÖÄB 5Ô†G -t²ðbY©7:¾Í¿2úh½K@¾À,¥Š 'çó,¥É÷ÙL’~## ê¥gˆyëMŽ¢iMæç?à úi8ðZ¼‡ý+ÐÕ’~Ëó[\3©èMn‚Àû=r=_­VxÎgpÄÁ÷¢%vïbÒ¨ˆSr\ËÆ©ˆµÉ*ó£'"$¶Æ9iVˆ«ð¬2Ý;äxÕ¿£ü§`e'ËôóÄ«fz•+‹;¥æ€îZÑ)¢Ëÿ˜ýæÓ£ÿ°¸¡4š¸ÍêYŽ,Œ1i$ÂWšºVÁC™oì _±S*`6 »Ij$¨I‹Ý/}„½!9'n0%3SV´ÝP}vÖÂqôsF}¶OTÓŒ+¼œO*½6ž½¸’I¤é*\-ÌRÚ3ª°ÚNÙœ:gd¿;p/V;aZþÀ6è©¡Ü:£“s–wQu+MÉô…àåF€Ãœ•Õ¡n>s ¯­›”Éå*ŸÁHPëix€†Î¡\x6²¥u±»j^tüâTg'Dà¦i&kò|í5Üé.Æ+ E¦0E,¼€ýâì»TÙ™~Ìçô#N™ƒa½¡X®Ï–Ý.-ÁŒüEæJ:êcx¶éΖçxh¯¤N ýT#O½£¤ #k]ºFÖ¼‚…XNÇw³iD—>?„Ö·Ç×—ßÁð”ÞÌÝ5vîY3ϳ7”@)‘r¬:J?ÅlM{OÞ{s…§ªø,¯ïܽóÿ^…$2œG £›Žl’G¿E¶Û¼Šì±@½ÝD²ŒÎu°ŽÈ]÷í–ú >2ëw‘3àêYq÷ÕÖ­±³¶Ã’¯ØP[Ë0q;˜‰HzÎ -¯'.«5¾"æmÙ«úN4Kª5ËàRcÄ8jÓLÛþY·Lö Š¼ÛâÕoÔ Œï­À32p!ûqb‰1Ù6#&9èŠV _ÌZ‡­wBsIéƒ$PDdÿc‰›³dîˆG°3Õ¡,­¼E šð»~ÍëÛ× -°Dt «J{\«©]Êô/yõ~UVV$-äIfygñEŸYŽ<ºáÝ Q«J\Z¿" -¦Hõàä1ÓRzìÛUS -ÕªµÏQÛhÏM°hËYF² -(ûGØÅÒ­û`÷D áç -Ž%=ÀýÇ Œû¨t’ ¯¸Ì ˆP›’JvŠÀï… ä‘¸,q­“™:ÿ7çÀ¹JUBRºgs¹PóÆ–b,-’>·½µeÁ¥|»ÉgEz´P5¤—õ2öá~ û'ÅÚñ®ØÖ#œ÷E’ä‰5kb†¼ÃNú&YÍJ†·ïG¾7ºV8#Â3ú²Ž0gVà|¶(抉˜ âBHÞàÂ4ÊÚÒ>¶__W!ýr£©N—(ÈÌክ\,¨!Ì¥ÃaåÃÄE›Žbã÷ô$·@ÂSìè*sOˤÌPøWógæµ½4ouçnHŠÊšOq%ÍØxh6`t%b)K0ñV•Òm¬³kqèWØQºcÕM -cPJ‰]¨6›œÿïµÇ¢`³óHÄG4ê¼ÅuO§d“áœ)u_è^Ö~µŸÅF]Î/ Ç€ ½ ë4lØ{kNæõ˜åíÉðÑ»!t¸=m~æ®ìø®"v_Š±dBtúýÞ¸±ÛRSª‡ É<¹øœ¸K-Pö\kìˆô®¸ ˜ÐÄo×ý¿DCîY¥J E!ûnÞ™ÿÉ -Ch¹gˆwý‘¢}ùÅ¡fuhk~7‹\ñøhi ¢ÜH˜gÝGv&›€ Û'kÜD®²šàWÚ©îÉÄ鯤Ä^3íÃé:o"éugŒ˜ìŠløæOÈ-È`á˜MjŠãj§ëb¦íÛ×?'Ѥ”п6žûŸ8ùÐ08Tâg$Þ(R5PP½;u¥4êôÓ© ˆD]o.ì—þÆý… Ôª²×³é<è›,oIÂÞ§ò ¤GÀvO¼7Iö OƒA—TWyí¸›¬{ &š¬Šž²R ”ÄXWAN8ijŸ³ùHUÖ`h"a@Qî=µC/>±è?I¾ÈãÊ©é",F°óyeŠ5èh<5¦‹¶'gç¬éŠ+Ïï7-Ç^fëí´8Í«±lUŸ?K°’0>Ú§cDœX,y±‚Éâa=Ä'ÿ·¾K)pw£ -ˆ‹NèšlU§¨·¾Š›”€dcÚ¸ZUµf Kê#v˜G¸ø@Hw¶²sPfC@µ¥™zS J[p—éÌ -þG?Oÿ'6ÄMpxX¹†n÷o“ó¨E*ØÔ5^ ÇÇ|@~¬ßðdˆ@¼M8É€DgäÒLžKý:ñâO:ι®ÿìÞ¯¾€e§£EzEX,¸u¢8ŒáWÔ¸Õ»PŽb'¯±Ôés¸-'†3¢ÀioCÈy z,Lf¶±p@•LéñK{ï¸ÃDhþ—[,s§œîæ£Y• ŒèG:œ¾{Ò¨ØU¿™5Fd‘vJ1Ã&!Ùƒ»ÿÓØî _uºYÿ²þß´Á,©„u8´(š¹õ³íK,Í{Íf1;Ö¼Re!X¨†–Äv„jÃÁy¦pßÖ–„×¹ŽFðÙy û`'¡ÿB`XÈŽCóÝXzï`´Aã¢îÀ<Üìk5(PU m–IzGÂùJþ.Èê=¤[¯!5q°Oëˆi³·ò‘ÆCÑu,)Ÿ`–÷„q:ì×@h«]â±Ù¼º…mž’gž“!Ÿ„ßh¼Ô“#Æs–1ªÐÞºŒsÜÓî~äU5Œu] ÍÐBO8V™D”EƒbØ/¤ž -IªŸXå£Õ§„ -¢²Ç¸jfÒ&ËÌô¡NÖ^k¡?h·&ÈÒ #РH}JgC ó3•ó¼>À•ƒÇ…ÚKЬűµ—&>-áéÄÛ#ƒ°kÜ¡T‚‡ó²Úã -vÖð4-tœÃ’»u•{ÌÞ½ifMVZÂgG4ÄHb©éY¿Å\þe# ÑåÒ«ÀDüͱU•œR“ Ú—JRw¥¾‚G*“ìõÊ"yh„ ”qCÄÊií:àË“(’RÚg³gü¼¢X_žøÑ/ÿÌ÷ wR}–,.}Ch›7´\å[Þ"Û“B¶+cË»~ï̓p הؓ–l†DZâN]‡Á„ð­³ü¥ &סŽšZ˜+ªÅrçø’Øv)^º¥aÓªÛ»öyÏ¢6¹cøÀÓDøVÖF“ŠÌw$Šíyâ ¥( H:º€=.¬›1eëÜPä12)g¿¡<8üŽ Õ ³&×o]ï7»ªoµüϾ-/}tõN5ÒY#£˜,¨!Ó:oãIÙL}ªeÝëbŠq€z¶Ï‹Û5†€9ÒÆïC• -{#¬°ÈÚ| -endstream -endobj -1966 0 obj -<< -/Length1 1416 -/Length2 6052 +t²ðbY©7:¾Í¿2úh½K@¾À,¥Š 'çó,¥É÷ÙL’~## ê¥gˆyëMŽ¢iMæç?à úi8ðZ¼‡ý+ÐÕ’~Ëó[\3©èMn‚Àû=r=_­VxÎgpÄÁ÷¢%vïbÒ¨ˆSr\ËÆ©ˆµÉ*ó£'"$¶Æ9iVˆ«ð¬2Ý;äxÕ¿£ü§`e'ËôóÄ«fz•+‹;¥æ€îZÑ)¢Ëÿ˜ýæÓ£ÿ°¸¡4š¸ÍêYŽ,Œ1i$ÂWšºVÁC™oì _±S*`6 »Ij$¨I‹Ý/}„½!9'n0%3SV´ÝP}vÖÂqôsF}¶OTÓŒ+¼œO*½6ž½¸’I¤é*\-ÌRÚ3ª°ÚNÙœ:gd¿;p/V;aZþÀ6è©¡Ü:£“s–wQu+MÉô…àåF€Ãœ•Õ¡n>s ¯­›”Éå*ŸÁHPëix€†Î¡\x6²¥u±»j^tüâTg'Dà¦i&kò|í5Üé.Æ+ E¦0E,¼€ýâì»TÙ™~Ìçô#N™ƒa½¡X®Ï–Ý.-ÁŒüEæJ:êcx¶éΖçxh¯¤N ýT#O½£¤ #k]ºFÖ¼‚…XNÉá±D –Aj†ùvÕÊóìfú rÆ®–/*{âŸ,Ed),É# òƒ¶¿ñvihÓÓÍøÊ@ÂŽ áçÌ`ܲ ~êy)öΚgÙ‰Š•ïÇZln÷ñÙüÖv»´ªø×é̉˭0È2L7Š‡ÏçPkg÷¦ûøcWÇïãÏ%» Nç «|]§É¼Üûgñ¶n– ÉL¢è³>DšõC¶çj}§x›t¨7$ËÀ 1€Œìµ—ÏõTõ +"ç*v’OBdÌNÓÇ#™/Ãõ_,N¿Ð f/’.˜1fuU¤oëÄÝ«ÿøcýtÜݶ.±¹ÆãaÂÿLœEE¬Ô O´¾¼8NFôm|@*7Ȫë–q—–EïIu£eXÊŠ Hm{+¹œRZ¹¾“\ Düç¤o þºŠ6Ã;‚%GÉí÷ã‡þC`•]m%4B­Hi*Gßö§Fn‡Ó²Ý/‰Á„ðÕ.'¶…–1#F\ÚŒ£SÆÈàÅÖ†Ek˜Sò*KbÒ¯ix VA‹yÍqÊÎÜøô‹·iú,ki”X—¡¹áÕWø&ô`BAâ…:[2« ôºGÖ’—LQ2”}—O6É%ù™Ú‘ÊšX;îåäñýóa~ŠS?ø雂KoÚ»6lðúTyª6D¼È±óF‡D“ðï×¥À±±W‰Ó\g?¢""Ñv™|]íJÊz`¡“%i~üH £õ‘ Ä69Ûtö @Y¦ P˜a¾÷Ъ' ¸é› $|,ú­ŒÐúÏký¿©ß’2Ç{fÉåC,ý1ŸšiJŸ•Ý¹`zÑ$ïR·I½)ôO!À¢lG{žINÇüNù~zj"(¢:ÑË.AòØoÓnãÑߦvSÃw_#a\ ÏöQ½THÁÌ5̯EUSûOjŽ¾Dò¦1ì7£Ø³BKÿ­TÖ*u¢»ôNºEh†ZÝwïÐ…Â?’rÄ81…¶ÙÔÅ:›ËðAíðJОö»Qú¨÷[»“‘Í¡`첈ã~lÙ÷&P}Ǹ­™?p™µ?{ªêÛF§.§t@…í_ÌbÃïkîÏçqµÁí‚ÓÝ ¤¥·ü 7/# àio »jihŠ¿÷'`±“7ƒ]7DÏço·™¶VÍKN4Ÿµe˜ïÍîí©œu +Ÿrp²Úáòùnó×¼‘ñÁ!7PÑ“Ðœ€šžwe¡®ŸÇÁ(ùíB +ßþ•É ³€Õg‰»¶ì­ D¹ÂÄ +þߦ¹Ñ‡çPqË2t…•Î¾Xm7š+‹tº?a’ä­å}“iÒ/Êk±5Ýiªœð:ˆS`ƒ$‰¼¢u²”5³n2ù/Ä¿K‡ô #é&}ùî•Ø 6ªÓÑf +{a$ò6šÊŸwºAòKY¤ó0×tÒ…dšïž³Ât¸¤ åIÀZ´IÍýù+ñàcÄC­Öð'àÕ0ë@-hœ¢q—m +–÷™ÅæyšóÏ3ÒP^Ù>]¥©à†A•ÔÓ{G +ÚI7®Ðè{¹ðp™ìõrÈ œÝµóÕ(órècÐ(j|/ Gç‚ì%! ›.åÍ +.I›ãö~NžKõGd}ý ]l5Ï, j]~ "¨´CçxûÊôóCHt¥ x@¶x /ÛÄ[ìo=Î~¶‡h7 /°æOawÒ÷Ü>0xd);?uš^q#+ˆuI£Ö¦| +ĽT,ïG…f¬Ku(˜‘ÊU ’êcÉ]~n8:>NA ôn›”åíc´wŠvZ÷ý¼B3]%ãö#q×c)¸Á„!ñÿ56#ý¹D!Ø¡oC¨>ÎFb£™2§y° X˜pätl‘ÿ0;µ¸4bO¿Á˜÷³NÌœÄmo OöAgn£¨]¤µumÇíiMC]GE½ŠØýoé±nÝõÞX5ÀcI+Q] +Áqe@E¥±„•ç…+­LíNv{*Hï‘!¥‚AǯÛÈ™ð·÷å?=A%;W xBÂ6JŠ&@;éì§l ƽƒ&=ȘŠ˜eÌi±™æyΧ®ðñ‹ +.ü} ì Ê-˘ñ½xÝVJÜï{npJ¬F岨ÇuÝ:õJA/‡õ%È›šý ØVဿOÄb0¸SK¿Ê”‚ëÁ_äœõñ4C©ß–J`ÔìèÐ@©`ÅÇŽƒ%* M º©^ÃA{y뿱)æ—šØÄû’ˆ³ÿ’dô«=§UÁ'oXRxº/8ôÊ–Ý~TM> žÙ¢Lêi;g}c[Í«.òàìlýv—ÿœj˜g‡)G£©66„û[þ>kxÙÄ-´áöSð´Ã¿³ë,M®/sVðiæ«êÍúŽ‹‘ÌÒj£³jü]’wɶÿ“u/¢Ûº*º,y¸©LðC÷¡|ŽQÔø±qSA†ú™pFî~_U³—ó¯ÙÅ0øˆ«|6¹;è"å–s;1Î(—V¥Ã$ŸÅ +YGr®àÄ“çÊă‹ÄoX¬r6¡0ź3jî‚wÁ…8«6*ˆ¬Ì£ÁyáVK£­¡evëG=Œú€¢>‚N£lÖðÑNÊ +…Ñæ¬Æ£ñ+ª@ðËñòÞ²¦î–g5ƒ¿:ƒ0_D¡‚’*Ñ… açõÈ·½»7¦6¨å¾M·ÿxv.Tï­RG†»&µÚßÛ!'PM7>ˆ‡z-™ˆ~‡±¿}Iì”EH}YN8‘Ùlj"ÞS7n£˜£yN»½§£R¦á_c–êá½…›äÿý„£+˜ñ¤ÅgfQÉ›ƒì½æayî³áêø ÿâô@< zß~ÕCÍÐÞ̃ùú´D:®`12Y¹åzåÿÌ—›­ÀçK€D»|‰ænd­5Ç„à?MµÇ GÄ&¿Š”ì‡û¬³[®;ʱÍÖþ G­ŽxIÅd§‡°êXhX7 ¿`⟵ÄmR¬IŒhölüöQ¹ØÝó”:CÍ–õ‰¨æ;;EÞWrU–k&´‰8Óþ.®[Ôh% fäYBÅ‹èÕ5Q —Á¡2m'c ¢F #Êׇ žeDõ»”ÂFJ0î7Dņ¦¡¸󩔪°é\g¡W}Lj̉Xÿ€ÔíÎéwFuæè§ËÂœº¼¬³ž¡T0C"N¹^c*´<À¹’wÛ3|‹ñ™B<¢ÁaYˆ:…»¢¢ÉŇ…99tÎ8IÚEäL}w½…mxPiòV§<4àÆYzänOZ×aH8x¦m¥ùºQ¬—Í¡9EëË‚sr6íàï‰öþìTü?~N{3iÄ Lh±Å.*s®Úai9J1?Ç-B[\†SOSÞ[ Ø÷h‡ ÈŸ bïug2ã£JUlÿkí›vgÇUÚû X»=ç>CxíYEk4è Ø`½UçV©ìèˆ0N sÔö–,—KU þŒ–!ä¥ú„lÞj =òÊ”}ÝwfXƒ±"uŸ'ªÆ”BºéŠsŸµ]P{­b‚×ÿð­aáܱl7r^c¥JÝY% x~Ú@¦£„ŸtýÖ åN¿¨%½õ†=œßb½Wa[´¶pT7·°29w¦¡¦"ƒ£6œ3M‚"‹¬(žMÓÊlàúY¹Äl U½&F†ÙFãJ®å«‹™‡Çj#™È6åE¼Šú¾ù¼éÒæäMkðwæÃÑ-hÕ£lµ½%s󲘮K6‚}žu‰nYA(uƒ–nrïÎ]‚Û ŒæRø™Wù?¦8^•Áv_Ö%4ïÜÄ5…]Z÷oH0ò¢gÜ?ãÛˆ®¯½Å2f!‚¯zËq³*. Ós¨¦¿f'Üö:"¹ë5aãMóÔ“6Ù·Lcùø$¸KØtÎÌÈ…¯¶•Ar’SÞ'\sW°F¨•™6 û &™µoðíß›L‹ y¶JYÿRVRnðØZïT 8žÛ7´=ù”n4P’ëärŽZ\JÌ¢t¿7ô¦v½h¸˜P¤;?'’Á\©z¬òÀ*ÚÛÆ™ õmÕ⌟)ÿ‰õìÈØVÕlÆÅ\‹…IG/r›+sã÷íZ|Ÿâªá§ººÍ5Ë®­ÜDªàCt C ˜ @'h ézHê]/mÛ)”µzýk¿ÃtàÄ[寴ÉE {êä¥NX¥cŠ¨KyÀX—ÉþG¦R |ÿÎûØA=1WÓÈ‘EJŒÉ½<¸ºnYr[ÕTƒ ‰asÒϼ=£1Šá=‹r%SíÒ¤ÔZ6T’’Wa9¾Ý KÆK­ã®0ÅýWŸ1"mƒNÂü6d]w#t>6ç¬ïÏ‘*½^œvú´[¸S÷JÈàž†÷$ +úÝ`D/Ì>¼w³­°jêEÕ*¿*ᜠXú(H“‘ö ò¦|¬ r¤=ÌŸòéƒz¸#-1Õ_1¬kmàøG\Ëeò+Æ@T‚o3©'P’îjÞ~-cÎv~û H3Z¸êW»-Z×ëzÔ"d¼mŽ+:éäšZoǹ Oì@aØ[C¹*DôÈ3* fT,í¤1´‡ˆÑñœQÐÙĤÓjôçJ àzÔ6þh¿[Ù+J£ÍJ6L|Þö"?ŽÀ°äN|¼ûieÝÉÔsï{ïÝŒÚ +A¹ehX‘ª'v2d*U1‡¶ã{®¶¬d^ +_g\e+J`Ú¼uÜ4ã¤Éî– +þšÅˆÈÁß"þr?ÿ±¡iœ„'5´DÇ*,uƒ¯Îêžö àeòH%. $åÇfØß'ÛT¡Õìx„ ÂýÉ®3aâÐ\©<ò;3&“à.8‚±Ìoé aè­?shp/Ú`ȶB¨F–{ˆ·D1U#¡¢¦†ùžË'õ@/ÒwNwú¿öæ½O{WC°Sž2ºtP=‹'B—\n’ ‹m‹…$`ˆ¼Õ≲Xÿv¸G^¹Hf0@Ǽ„8Dz”gˆðtÛ…ôÕ×-4ŽÌ}ôýGW +Ú ýó£XêYÖ¾q¥7¾D(m‘Dç¶ ³¢‰"ögFA­uÏÁÛshB…!&«s–X¤Ó’ü}¼W·üTÅÃZÈsµæc@ŽŠÙ‰úø`9’ÈîzmÚ§ÊuËÀxç“leà :»wæšýÞn§­Êa`uú‚•ÑÃÏ©n¡ëtÕvóí87”JMS”rrdI8üMïÔƒi¯6ß?é.ôÎÃ"{O-|ä‘lòÕÊð ‘vÐ~6|¬gªÇ|P.³¾SD~nLjàyDZŠãÓÛÒ®o‹äåW™$ÃQp$³äÿ,;øøCÀ꺵nÝÆ­ó|ƶ法dÓ…ëw€ÝPÂw?WL²Iqß>\h(óÉFÚìÍS*Ñ—Qs¤ä™“Á3 +ŸÚª´ ®N«×À@I€ß;ƒi±eg¤Ü|F\•ÿ‘;;MÌZk±_çä`÷½£ý:BçY–±9 »LxqyKÕN,H`#øWè$ãV=U‘DxkU.¬Þ Ò"*4è´xSR‹è¹?õ €±ü¡§PrP#‚ä½ü”Ýý¤®>³½iª$!¨E–Ö°sG&làzÒ¿g$|oüëjÚ0ÑPˆëËsÕëffÊÄXú ô_ÏÉèÏç°»ÿ§‚ñ£^ û颤Ñþa9ïMh- téŸÏÁ¸\œZéZ³çŸ ™¯ßHzÅŒ°)XHm]cÕ&Aƒsl†¾ÛY³² x=VZ]wq½¢³ Ù¾fp™n‘v%\Ü*iš;ü2ýf<¸¢ž“ŒÙªÿÿcZ}‡Z‚;“É{,X¸]“T´ûÔ]ÐÙ4~jýr²d×.1º²…ò=ºâx%LE(‘ö“C_ÇïLлF»|zyå8ÓÅ‘;èênö—F+G'jDWTÓ2-ÚÁ–ÆhZîVÛ‡ôÐ »öcAÔA­^dÃùE¹É_{0âŸvÇÈŸ)Š‡Ë7Þèyëß΂€Lhé ³á§9‘SFúsþù¨Úàf†®³_-vAYµó}Ö1ï)GMÚoÑ\‘<¬Ÿ+Â’‘Xc?«[´ªÖl&/”ÿ’øÇ_®ï¤ÐÝiËÿºâÇœÓT%˜jéü¡Ì.2‚Õô­ÌF"C"Ç#«à¥UѸ 4?“h)DÞnkcœ7@ñÊ*üãY­ÇÄÌ7sáîùÕݹõ=°žœRµ½ï»9YçBÝ…TZÞGýàkÇJý‡`íÊž`­ \þj+@{“kF]ÆÓ Oì­ç‰`èwŸqŽrÇÁ¿1-—F8©ÿA(èpóÔžèÆ@f]‹ Å»ƒ#xq"ï»Tïã›À× +*!Ñî¿/Š°¾µ§ãñ’û½š®K¹ÝBdî$¶ɉڊêý`²ÀÅiõç‘ÉçÝjÍMz³Jü7âä]Äjœ[¹‡¼JXö¶p h¬핤>ðÉÃ’mþ@¿ž4G«ëÐ׆‡õÁ®7¤ +¥ý2‚˜3>nå–“ éjVwYˆ³»2¥8Qד%ÕÊuÍh@}y‚S€._6dž‚òw…Al‚Ÿé¥Œ?Ê¡- +4ö8ɹ ˆÁktÃ’ë:öÿ×Øý]XŒû¼|jÄ9ž' 7ˆ,¶} +öR±{>þ¸gºõ7,}ù™¬‰ÃP+Þzm5ÞÇ˺ÀHõÿ‚èU?iSÂr,+ +F½Aa´´ÿ.P’gvŸþì˜Çaósl ø¯ZBh·õ´gæÇ –Wµ Û-‹BѶ”N˜°ŠÜhú`˨[ÉW÷jßí)qˆ(µÎãÓÌý÷ÍŒþð.wYÕæ‹$ÞQ¬ÿ0±€®Ñ]Šü +endstream +endobj +2213 0 obj +<< +/Length1 1442 +/Length2 6151 /Length3 0 -/Length 7468 +/Length 7593 >> stream %!PS-AdobeFont-1.0: CMMI10 003.002 @@ -26560,7 +31667,7 @@ FontDirectory/CMMI10 known{/CMMI10 findfont dup/UniqueID known{dup 11 dict begin /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0 ]readonly def -/FontName /SYFPBV+CMMI10 def +/FontName /TPELEW+CMMI10 def /FontBBox {-32 -250 1048 750 }readonly def /PaintType 0 def /FontInfo 10 dict dup begin @@ -26577,6 +31684,7 @@ FontDirectory/CMMI10 known{/CMMI10 findfont dup/UniqueID known{dup end readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for +dup 44 /arrowhookleft put dup 62 /greater put dup 60 /less put readonly def @@ -26607,18 +31715,17 @@ D jà…О¤á”Á ˜|-°cL„ô´TŸîz~ÈœÿLú†7Ô¥­G+ZÀËŸBŒ±ßŠ¿ê=¸*&ÌT7«8~‡#˜ÒàÆ¿l•Â8¶²Å½¸hJû·¦Á ¢(jÿbå*=|tŒ,cÒ“0Å0l·ï°´ÍsŒ‰‡K› @û3Õ\îSN:8¡£'gé¦ßU?Þ)äÔRb%ÑûÚoŒ·€˜è>Š6 ãÄÇ~)˜ O’ «©ÇXw5Í/"Ëá|k™¨(e$/ÞJ«¾G ¸à“ÇõÁž0Mõo#hpÑTé%Äòå-R (>¥*Îú Ò¨ìÈ25ˆh¼èïº|I.5uÁ`QP£÷Ö‚)`ñ©—QQǶé(ü÷4“5•³^§ƒÞ„‚M߯oˆ5G*`?ÍRFM¨ àÀþÕÌgãŽêpÀfÚßnþ¾5Æ1d¬¤¹‘íKÄë[L y¸q ´BÀ¶$Ã@'ˆÊ6é!Wëà -ÎúE¶ÆI¹ï6ø,ûITÁ$‡þÎy›Ù‡î“H–pi¹æÿVchž]l&Ûß¡FÃÝ:¹ÂO¤é$#ÈŠ!D:¨‹…%)aã›Mx“ýÈÇ"_L—ú?Âd*ú:‡Ö0ï2]7x(ìº4p ®P8¼*.v5žsËb<õŽ³]LQ†0ï*ø¶Kí•×+·@>e.-ÚoªÓèþC®–¡Æ€lÊïÆÑ\~R+$vÃo–D¥™Úg†ßþyùê èaˆ çDNnrè*ïÌö®z ®høƒÙø[&¦Å&‡ÀÿæÔüq.®‹Êø¿ÝÐ¥‡ÄÎ¥ëÇl”Vx3õTª ö%LbzbSzÓ´1þ¿m‚­ ÛËžpØïMc(áÐÀ»yÁJqØü^ó]{¤+ÝSŠŠ1¶Ý¶Q< ÁŠM~Ñ |[77{>º5ÔŸ5I ¥ò8ŒÄÔ”_ki®d¥‘€4ÀcÖ@NÏ!T‘m + +ÎúE¶ÆI¹ï6ø,ûITÁ$‡þÎy›Ù‡î“H–pi¹æÿVchž]l&Ûß¡FÃÝ:¹ÂO¤é$#ÈŠ!D:¨‹…%)aã›Mx“ýÈÇ"_L—ú?Âd*ú:‡Ö0ï2]7x(ìº4p ®P8¼*.v5žsËb<õŽ³]LQ†0ï*ø¶Kí•×+·@>e.-ÚoªÓèþC®–¡Æ€lÊïÆÑ\~R+$vÃo–D¥™Úg†ßþyùê èaˆ çDNnrè*ïÌö®z ®høƒÙø[&¦Å&‡ÀÿæÔüq.®‹Êø¿ÝÐ¥‡ÄÎ¥ëÇl”Vx3õTª ö%LbzbSzÓ´Hv­÷šÒѦ·Sc)]M|Yœ÷IGC¼rç õrº²ô®ÍmGý­`—½2N×q$ž +n^Ø90ªI`V<_qŽnA¤©UÉ´Ý^°å"2ض>K;ägþæB +r sb¢VØb)l 2ÅþSŠ„T£­H+PMzjÔîëzËiÍfí$ŽDáŒMkV©° Ã?ã\Ù¼Ä>Ž¬߈ƶa¹<¡ÈèEE£ÜL½¦D…¯6=t¯–EÒ÷µ€ðàÀ.…(Á%FoÜ~­´ô6€rý¦\l•ù;séñ'$Öµ…ïß>ž4¹ÿr$ï2¨DZý~*®\R ù·Yi$=Þ¡¥k‡å'¯Öå“™úý.m…6¯Â–„tY0’O$ð)ºQñ’ršÈUQ…»+¶ßªúÔ4(¨«,‡°Qb8¬—mÓ©qý±âÀÁÝHä°=‘œ,YŠ8i  ÿ‡½ª”SZ6Žöã½mØ@ˆ»`é7~â"L7\ã¹ßFdN#FþýHR´K´KHpb40 2ÂÚ~HÇKε֊úøX endstream endobj -1968 0 obj +2215 0 obj << -/Length1 1429 -/Length2 6269 +/Length1 1478 +/Length2 6403 /Length3 0 -/Length 7698 +/Length 7881 >> stream %!PS-AdobeFont-1.0: CMR10 003.002 @@ -26638,7 +31745,7 @@ FontDirectory/CMR10 known{/CMR10 findfont dup/UniqueID known{dup 11 dict begin /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0 ]readonly def -/FontName /GIGFZE+CMR10 def +/FontName /SOSTRQ+CMR10 def /FontBBox {-40 -250 1009 750 }readonly def /PaintType 0 def /FontInfo 9 dict dup begin @@ -26654,6 +31761,8 @@ FontDirectory/CMR10 known{/CMR10 findfont dup/UniqueID known{dup end readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for +dup 91 /bracketleft put +dup 93 /bracketright put dup 61 /equal put dup 40 /parenleft put dup 41 /parenright put @@ -26687,17 +31796,17 @@ f}cq ¶ØtíþþYá5`F~_›xÁƒUž|‹'X’µÇ‡„G@UÖ6I \™„çÜö>ü:ÉV_ûÉ>Õ¦9*I0}Óµ\"{Øï3UÚzÆñÉuäFœfÌÙ¤…õPߨÊ)Eˆ8‚üU¥œúw)®ðgŠËqÔ:àdj#¥¹¯ÜˆhqÇ ¥œ¼so -¹@’ KG7ã"­|QЇæªúå Zòzs|…‚}£ÄLÕ´SŠ¥:€fªÕm¼§ÂA3ïÖðS 2Ä=‘Ö[³d·Ø“üJœ\ܳ­òcìÚ€üù|Ët¾j1ÑÆ›AçrþŠ®ÀÑ®þÌJvÂ,K| ò´…Ï•VÞœ ˜ü›Q„´[ÜÞ<×nrWso}7÷œxrpn„mõãHpF%·EÛä¦ä!$š«ˆºâÎRŒJuçñíˆ:OgŠ7Ç)¢B›@OûdfzîÙG0ž%άá†u¦cs4²ï…C¢[+‡Ö ­cù½¬pÑ v¯sÛpwb»“ xr{¬"Op½ñ q!ëtXœÑ¯}ú´Ú­›%àjé:Ò_;¨´Œ¾[ ;*vU¶þ8´Cé@³e®]F‹ ÑãÛéA£EKS/Œˆzýÿ˜åKcÅ +OÞ,¨4¾•e|Ó{‹G»Êø#Rvðà©KÎýyÝ{ K1èEúŸÖýVËVw¯b´æûÙç6­«ÂªÎÖ=…»ºyeçŸÀeìÜ‘üÆ÷ uEØ>M%;,ˆš1–W}w ~hW¹ «—¦˜_èFÌâÑî9QcyƼ®E¸~ôÙf®áz\©a’:a<ÿÔ2eugg°SRŸ1ØEÜ’(=F™=Ž£ƒ;âq')^1>Çu6 +GÁ0ÿÑûòÿ¦œÿ+B°¸+›æ¸žb¥qGYÀðitâLxRIPv®Õ½·hÎÄÓÔ¥ºÒÏ‹^ž½:nwåòœÕp5¿Ð>^؉R¥Þä}ð4ü¸¿Œò™áùÞaŒgül}×^™(©t7ƒaÝÔ &ó½¶)±ZS«ˆ‘ ”¾8ÿn}@f0;­b.Û£ELì›7†%·¹›3Cª,ò@ä7¹Õï[I [1sI¬îÅXÚ<Æü[›fÜŒ¹¶Ýa¯:?yºÈç3"èu/'Ø\Ödñ ÁDÓ¹Ú=»qUü êškóκùH‚â$ y§ïQ7¼plŽ*žmhS]ADäÛäøù5Îã½üÆ«ü‹79µ endstream endobj -1970 0 obj +2217 0 obj << -/Length1 1641 -/Length2 8102 +/Length1 1688 +/Length2 8444 /Length3 0 -/Length 9743 +/Length 10132 >> stream %!PS-AdobeFont-1.0: CMSY10 003.002 @@ -26717,7 +31826,7 @@ FontDirectory/CMSY10 known{/CMSY10 findfont dup/UniqueID known{dup 11 dict begin /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0 ]readonly def -/FontName /DMJGRR+CMSY10 def +/FontName /VKSUEJ+CMSY10 def /FontBBox {-29 -960 1116 775 }readonly def /PaintType 0 def /FontInfo 9 dict dup begin @@ -26737,6 +31846,8 @@ dup 66 /B put dup 72 /H put dup 73 /I put dup 32 /arrowleft put +dup 33 /arrowright put +dup 3 /asteriskmath put dup 106 /bar put dup 107 /bardbl put dup 102 /braceleft put @@ -26764,23 +31875,29 @@ A5 ŸÇÉ[ÓLŠhŸŽY³)Øo57kßþ“#%H’\,¬xÎ|ôs¯sxí¥HžÍ†|¶;`Å vû%øÚ¯T[ícÖ]ï‚eŠ"G—Ujß«„yŠðtvxÕ:udQ‘uZy_²Td"“ª£b+çÁO†]Øl¯xà'6wòã6X²C¿DKOÁ‡f›(g@w]f¦V™ËÂ[ê~ëŒ1Fùé^A3v¬ ÇâO›vÒ¯¶2v zúåT¸ÞúË0ÙÝb#ö›[›zyÏ’Õ$e1¬ÛkÕ5—dRAì`(Å…@{=yW>¿ðˆä>ú‘¯çy@æÅÿ¹Uáú;”œ±=ÃH;Çc{–ðÛ3û0´ü£^Õ®”)Õ™-{·O¹°OÖú2R¨Â´xÈèëˆQÅôwBgБâ¼ýSý»ž›"תl›bðöz;¹±˜IyÕ\E×ÁËê‰pr¨kM ·@@Âe&À 3•˜lÃʉtS÷æÄ%í)è/pRÇøÞÙÀª"2µu ˜Ee’5ø+ÄÙ°s§Zò'±¹§õ˜àçbˆQ²ôð°SlŸjÅö zvÞ€â~%.Kz\N¡Š„çüã÷Eê,ãeV2Ò=Mo)-JcÇVà¤:ÒUÙ2Ѹ>É1ˆ×È“"¿QWº ƒOýõÑ‘4*Ozûº.c¸ójc”«Y&üßÜØØÏìê=ë7”Y™´öøÈðê,A5nÊIßFŠ•/œ?üÌŠ™á ]as.´N/þCÕ»tÜýH…G50x}eb9—S&ùœ¬é0år¾à«J§H€‘ôN­\¿?^4'îœèb©É»ÏÀ':KÒ`wpcÏT–!ÜÛ_ÿ öH˜°øŠ¹ÝŠ†‘óþ{–ÅÚ}þH¿mZto”àÞê7Ê«¾ß‘ŸQžÇ·¾Ùbi>ÜœZO¤§ „Š“dºdFÕ¿cc -$"«˜ |ðhJQvB'\³Í%Ì6Âä²3è¯J)A‚g²+TÂò‰ªÀî§ø¼†ÊrnßãLì“O™ÁN”@µEzçHr$©d(|º÷Áµ*zˆÜtiF¾ Õˆ›º&«Öþ«Œ{Ó5rT[°)C÷ =t†<¢na‹´€ï3å—³b«m ~4¢@±Š¶xký‚surGàE½_à D#D¯°%Z‘ÿúR·ƒ”sqbª®ÝÆ:sÅBÛŸö‚šB6Õ©3bÝó7:GÆÛ mšÄ*‡^Ö¬ÇW B#?-Áö`]díNgåe§É ”â=Ñäž+J#Ÿñ|zµ +²”"¥^(}tÝŒ%ê…RdøŽ Ç?É̳sûßõhÓžPaLäUÍ ºs ç™ÈÏEAª«4c[ùŒÞþ‚墶Ûë§Üm%êq³ñ ]/`3"<fùµÕ²øÕ6hó :%~üÃ>ÓàCŠ¥³Çà™ÑÜÃåú"Ê dò$L8AðŒ€x„úÝqÐ0ø-ÈELÌæ’ßuÀ&yÅ2ÁrÛ2¸È‹z–^{4µžXúQ†¾Ö­Þs5\Ë¡ #Üç~Ç"eQ|7Åt)Ÿ½qv[_ÄQÀS‚5­‚Ü-1ߦ$F;‰Ã*yøXL™@`ÓƒMuûù¥$HCÑáB’®¼ºÂ3ãáU„phÒÌåZQ¬ é;H×vQva;—ϲE–c&µ098ì”)vC¶pzílpª@¡L|Né4fÒæÍS¾õ'/V9WŠØ -ƒŠš|N´”Áü¥À'IrÉYfŽ,¶YÍ:§ò|êûA£C­fªMúw ì6:9xõ'õøŒ&KkçOªšÀCqó:[ÚŠ¦¼3(Ðr™äÖ1Åà?ü™GÑ Õo˃ÜôÓ×;‘ÖxÿN˜.uœáŠYùÄN®¦~¹“qdx ‘£N’oÖ†Ð̦îúˆÖTÃb£ÿ>Þë󡤶¬]2Àr×dëµ0D>šìµ¾‹¼2ÝØ$1u2Àkdï·²±Í¡£W8Ô æZβšI8ÜQ²ÕñÒ»î¢=)K82d 3tcÆÕÃT¶‹ÄÉN°UÞ'KMC¬*Ïû7BGrJ›!˜-2àJÃùœ™¸ÅåFØÕ*øõ• °«tÔß¾Ï\Ö˲ɑBÝðHé~¹Ò¥ÏVØÒä#½“Œ&Ôû|Ob°˜ðÆ2Zìšà$ç̘ÞRLµø0ü)²²´ªG|'é.äðšùÑu‚âc©ß=w¼;é—¯9`ëVÛ Þ_>}G²ï´>i„ã»:æm",&e1à\jÀ'ܹ ¥-Â*]Á™l€Ë ŸIcBÁœ0G)‘³¡¸Jõ!ŸDùžgr€AÊXÔj×dýCÂ۹uÀB˜„-Ç&n,Ñ @¼ô¥‰ë-Y†µ½,Yê“–ø ü8°¡á¼U>ç -¢wÏ–ÜZ÷ÅBû¡ìÍšÁ«e/s@Ê–v`œ6K¿J F-㌱БÑä åfä·‰× -äME7vÑIÉ—„Ý9KúCP^Ï9Ú³:`õO˦Šq» ³ «‡l›1p¸…ù*gs4¡ýPצ‹>±ÉO›ÿßÒI*ôXkW±ôžÃ}™bËó`ŸÔý^ Ò˜‘ø·rؽٷBŽ‰6IA(q -¢£²È1=tåR´&×6‘𨑶~ͤúTªáQ¬PŽ!Øõüì¿Éeîìj kâîŒ>á}ÙËyFê.ßâ–Û2"`µÐ‡˜ÕHÄQ«Ê÷Šæ©æ™QT–ÁBÔ„¥¼Äê-¶î’×9ðf4¸±è×¹.ôÌC—AH¬+ûûîiuc=Ý!%Éæ²è‚ÖGÈx&ßـ߿㠻@à·ÏŽïmÏH:Ýᳯp‹zÂAsÉLG† -˜R£ïÞ,fúM#›†|¹ÿÕ’–k×–>‰yé1¨#póÅC^ÛÕr=éxaK_7mG¾È¿{·Ç=y¢¤­Ë㼟ªD‹ÄG§ãiSVWÞ4{,i"È“]üeÉr×4ŸxSôHó ô™ Ó…‹ßD…¡f‚†øHº|eÝ/Ó3Nýƒ?g™¹id“CžYd£ê€Ÿêš—²’†éMš5ä O8à*­_è÷0Ú,ØÙwæÛ=õÍ#®ö†šJ²†ÿ˜ÀÝqV+Çhô÷Eª÷ÐéBŠÍy;ˉð˜í}àº-Ä%i¿Øˆ9­ÒD¶A,’N­ô %E‹oËŒw»\”ó7Pm̵z¶²íµf8ËÙÂÕÀ¤ûÈ"ô"¹ÈHg»!ýŸ¸zbª¢ËŸ_d[ KXÉ;Ú -¨Ÿ§Õ‹HD¨ÞÔmÅCT>—,Ù§Ùò“ûS?þ..KΡ˜¤¡åˆ+ù¶HšÁ“Žä3›ßÁ -ŒdýgD§æ§`(C5¹¯Ž‡€Ÿ¶W·jokÙ˜*„Èóç)º9Âë J ±XÈÊHLAÒ±, ñéûŽÅ¢l’+Ô³Aø÷Íêʦ—8‚‡|éV¨%r½ÁÆ¢¶wéSVЃìÏö{'J×™N7ÔJ£éM6¦¼~4ªÕTLìœDX¤ÄKäÕÅr÷6çHÕÁ9¶o›=åùØ«ºL$5ç^«ow¡ùiØÝÊ?© o)¤QÅÑ|Ž*‹Ïç¶Å·ìît…±":JíÛ ÜSôc6Zsº~éã øCEÝñ¯ZF Ø'W±³ZÝÍ#¨KãEÄ%¨ÙÐw^ jÔ!|è»öà W*Œm9sC©¦=[Uõl¡œÁÌV W;Ú:Ë$º×†/z{œŽïj4ºJ}üì7Õø²:]ôD‚¯µî’ÀÕgVyUßRo–'É)ƒ ¼ ’úÿ¾,;-]÷jŠœ5ǯëÚn»@ñˆÞqŒ©Ó½[ªxì…ŽÛG[Gßv®ÖQ³XËÉ`s}«“¾¥Å^LgÎ’áæՆݯÈ|X{`£‰Ä(+?wåNwÀ>y>M€ZvA  BÀ’&çaVd9»¾¹ÐÌ -^(w-Âè ÏìÔ +$"«˜ |ðhJQvB'\³Í%Ì6Âä²3è¯J)A‚g²+TÂò‰ªÀî§ø¼†ÊrnßãLì“O™ÁN”@µEzçHr$©d(|º÷Áµ*zˆÜtiF¾ Õˆ›º&«Öþ«Œ{Ó5rT[°)C÷ =t†<¢na‹´€ï3å—³b«m ~WåÒ¯Ò-ÿ¢-ã*_>Góe‡È¾ºJµö!De:ñ‘1˜´‹ 4Åæ*ørkd›«Tý^ï\ä”(^”å·|a›ÊuØÝj“íË•û‘ÇNø×–GÁÔß² ‚Óh$ èÇ™¼Op—H.'çHã •Æ"&+t +Õd[øÍZìÿÏ—ŒþEßÁ1Éøktê±èŸ/È”’Ë7ï\xßY +hÊ^IJvh>üã\/µ?1ªX½ ¶Ç½þnyWlÕ.ÀëŽýAñka«i»³Yè4!@¬ž.‰Sø§Eg_»ç’Ž;Ûâb$;ˆ ‡ëÂótO¯EN,×@ß}Ë€¡À°ù¢g~Ÿ ÕŒª ÿìÓf,ÚtäyÁÓô–¼yËV½Ðc) ¤ðävŠM‘/ã¶"{i è~?pÕ·uŠCùMãâ ÙD=Þϵ0íŽâsñίŸSÖ΄V"å'»„¬†B &ïä·!©U,)úÛö8®RîÞ’¼JRD0’õ˜]¨<±M£r|±98CÙ׫NnŸ3Œ®=Cøl·âÛ ¬¹?×NÛ©Y$£õçÌO6­¾f¥;çHÄ2¶h,°èbþ=&qfžN¢é]ŒÜŽbt¸íÑ)Œª~±ƒ2¾3ªØÈFG?‹ì¼ˆç¯[¡PöÝ«_²Nò2@´kÕ +ç=‘ûC:•æ¨ÞÛ‡uVÝÂ%Oÿ^øðí"¢….œ-ٛ冕ÅK =°‡¹G,üû[{uscˆœ–8`ÏÏ=ýŸ,†aÒ… 4` ÆÅ ú>,®Ò¶Ö!?îòy)ÃéFë°ËåðÜîá„ 6áò.sv¼¿×W«:iAaQ±Ãà²$gŒ§õp¬$~ÓɹPKx 8IöJ«cxîÈœ6ò‡Í¯MæHkbŒTþ':ËX¨ £¤ IÖ°(ý¥t#˜+Ëd‚Ô¶dó‘áÏS¿¨¸ëçTOöh†¯Ã¬tÙRŠ<%¢(•Ð^Ì0ƒ.9u¯»U€˜÷ÒŠo4jµѾ9fyï)œeYgSÆ¡’#åKçÅÚ)«šàAªÆ¾‰kW2«Ìê1#áÚE~Üì›—‡‘€"@q´¡Ñí ¼3ýÍõ•Ý|aTõçÑ ²õçýÜ=iï¬Péi~‡ô¶!zå ¿I·&;çXÃÔe”eg¥8:¨ÞæKäÓª¦Då‘K[h¾$Ñ)Zd‘7Ï’–íÁmQ¸õF*TÕ™<ƒÀ•çãDMI ÷%’ê¬w¨}Ö姞!‰zQl#9Éñkãt²_©¤#åJèôn}ƒcy6PoÀ Óq_eÝŒñU¶]6-Q +±ÄÕÎr™}—G~D"ØiŸfì=¤öóåºÄzиƒ“ÒhΚ¿(±0ÐóøEÕ‘çë17MæÔã1Zͬå +è-óÔÓ“<q­œžOvÉ™/$«ú—¶#9æuÀÙ®ywÓ ”}w|ê&ŽÏÓúÇ{×?M + –±Ï~x3Ï L'-@Zºkv•hcîåQ Ñßž†JˆˆQbù<¿ +w‘­q¤žÈ$vßKGš¹è¨„¶ù4Ý[ ß´g´<ôçæáŸìa’éÒø4Ð}×D+VWFº].ÈÙÌY3‚8:Þ­*£VtE§;Ôa‚£Æzv âäÂŽwâþê•žúT¬¶ÕƒU·ðÊT³Çd¹,lW³Þêa|‹h6*¡úšÿíA»TÒfÎ’*”•*òþð!#ÞÔY‡Ñ¬¶ù} ™©爳ܒ@ Ë\ÖÛæ mÏÈ©õÜüÍ—Ðh\‚§ÅÃ0jÉïzôÑdA7y,¾Y‘‰õŽM5?̶ÀÒí„´™¼Äðï•òa õ1@Ñê5~#uñÔÖ ð'µœYá! +5 +¡Gí´ž ¶ò +êN¨ /?|$Ŧm§®µØ<£¾$(Ò [ÎýxxGµÊt„Që¨WâÖ˳uÜu±_.3YôNò&ÍŽ”£3ôëãÜ¥OÛL'ÔŒP??赨ðÕT!Ã¥¿ÿQž2ð¾B#7<.*zÿš¢¹ÎWSá®6zÂçjÃQyºtˆH h´t«p1ÓÇ-Y£¤`ÑÊßòaÕóñ9·³Ö<‡còÊ¸í±¦½lXôüT"t:ój˜Y|… +ùÉ,»oêã%þ¥Ý|å.' õ@qÞ8Š‹¥;ÁÏ;Oƒ˜"!L’LÄpëÚ42ð»ûÓºKG||mHƒvÚ®Vka›WŒ£Ìè{9 ¦nRgÏëaZ87͈ºŒ,ÒKØçÚÐê¯`˜vÕ+ѯÊZôvÈø–˜Å^(!ñ|øÛ¢ç~†}Mr(îéÊÙîY|ÙÅ~v,§ ƒO5í³¸¤·«ÓØ€@ïÝŒzªé ?|1òá?º§êŠRâT*kqMèÜ+pª^ñ\¿u PÍoCàŠ½zž¿$Åt@ò[àB-ïE,ì9g"Ù#£.º±:N¦ßú=¸Bœ*×ÝjyqëÀl6Þ]{j?Ê"AsŸNŒÜ9 +p¿3úù¶ô4}|0IÇ•çÜ›‰Ó±ŠõͯHöûê¥=ÍÝY©É§ÿÂ`?ö.¼5‡=¯ØP?Ÿ9<¯N›¥;Içq×ÈW²s1nÙ„&mèÛÁLÖ§_&ÿßczÙqâ9lÁÉ£Øg^[Uz(ß@ °ï™üî]øbžéõ½|ŠÝæx‰LB=öÇÙ”6Wé€ÀîûÍ*Gçt.P/6œn¼ª±*Ç|›Dï¼ëäÑ'+T%^–Hho'«‰ÝÄE¬ÁÖZÑü©xøT*!i¦…ÍZ±w2l°f7ƒ*VÄÈš8Ÿîá+ÌzìïÀÿxŸâ67HÿÆ0 +bŠ|Ü<dF.ÊÂÃOü¶½£æ[–ßÈÝv°ó¿ö±O¨hgÃy‡Qì®—»å¶ ‘«h›0}¦ª»½˜Üõ,ïåè:Õ(Ñ–~¨–À–’6uü‹mXóR·VÃ<‡­Ð³JVíM\Fw%T6VÍvýÑQnéϨÿ¦:Ï M¤\Ú…Uá-:φ57M}ÎÞ!ãZ‘D ês‰ ç {$Qû0Ý„në®êÓ Ê(ãN3°Þ[( ½¶?™ÓŒoÜî9T½”ÒuæÒ«6nÀu÷ðD!qáíZ”ÞU›·ÍÜT”!pÚ¹VØžïó‡/…È\ýh^‡:ñ'.AµEõ¾S‹6P*BËY‹LŸ endstream endobj -1972 0 obj +2219 0 obj << -/Length1 2519 -/Length2 17749 +/Length1 2589 +/Length2 18452 /Length3 0 -/Length 20268 +/Length 21041 >> stream %!PS-AdobeFont-1.0: CMTT10 003.002 @@ -26800,7 +31917,7 @@ FontDirectory/CMTT10 known{/CMTT10 findfont dup/UniqueID known{dup 11 dict begin /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0 ]readonly def -/FontName /UFPYIQ+CMTT10 def +/FontName /QGKXNM+CMTT10 def /FontBBox {-4 -233 537 696 }readonly def /PaintType 0 def /FontInfo 9 dict dup begin @@ -26822,7 +31939,9 @@ dup 67 /C put dup 68 /D put dup 69 /E put dup 70 /F put +dup 72 /H put dup 73 /I put +dup 74 /J put dup 75 /K put dup 76 /L put dup 77 /M put @@ -26833,8 +31952,11 @@ dup 82 /R put dup 83 /S put dup 84 /T put dup 85 /U put +dup 86 /V put dup 87 /W put +dup 88 /X put dup 89 /Y put +dup 90 /Z put dup 97 /a put dup 38 /ampersand put dup 126 /asciitilde put @@ -26922,54 +32044,52 @@ Q ˆ\mãY.n² ±š\B2‚áúûE†¯ðOð<´_½˜ø”&Ó6Š ÇCöN&Y‘Ï2·1h I¾’Ü-Ĭ±ÝDœõ§œ¸&q(ã7q{é?º:9jà„ÝA‡Ná+?>í~Âx­¾rÞlt(+—–ƒÅšÐ ”ˆ3ZÌ2 ÖJ¶ÍÆïv8ÌHÔšx‚ÔQš>w$ô“<èZn:¤Cb¡•à„ËȨx]ªøg¢¢EϸÁ_ןž©Ím„Ä(célšø0/¸µÜü*•âMžÐ|‚PF–ñ¨bK—K•1O¬–Õ.åµbyÔmñ2&Ö.@ µ_S»¨0-hB#Ã+½ýÛ¡f2œ™°=žœ$É‹¢Þ–KŸ_Æb`,¸Y–[å„©´l„(hPú\|ë] ˆöÄÀ*øcN¤À3kéìÓžÇrzþ-¤µ¾\ÿWÜöh« Xa:-i~O#kyö®Gд•¨ JØÖ@;hÉÅ-‚ /ý‹F!?*Eô~öh2zû3 -µ…*Oô3£a½Dôu°Y[ò‘ns± ì]CÝ)uVŒJI6U5º0í[ц§îډϙñSÙ}›oÁxv¯²I‘lñG)Í«Ý¥WÆÓúz=UXÅíŠYÎk¹×_žÿ’6]ôäQò^bw8‰SŽægý!rõì¢ÅQRP?B¹}’} -U#Œ*÷í@6,­=£´.7ÔØMß–ö_ƒ¸^bVVÅuÚÆãšC‡¡Š©B¥J9œä¸q'CvÌ•ô$‡ž>?üp¾>ów¿žÂû¯÷\ Šs$röHêv,¡!1 ±ŠŽ$oQðRT¹'µ'½Üñ`WN7ºÛ§˜×8I$ÔLy èÈÚ¶Ûú¨ëTêIbSYâÝÅUÔh»ãô4÷¿‡Kk‰ˆ Ðiº³Æö®/]²"–gxMÑQïXÔpwâpXù4Å͉‡[ -üý|6Æ-’L›0Q?Èœ¦»Mn–<‡=xJKŸ«ŒÆpCð‹Öƒ§ï³=wdÒ‡SÆRU^ÒúG¶PtöÛofÕ48¥—IÓÊXL7ÍŽ™‹þfVãH -;‡½ÎUÄ:G25ËY‘u¼ón”ÓÞG³¢}?æÚƦh=$z~]YIÑGy4!¦WJ Ó6éì™HþzWöƒ°%œl{Ë»79èvSäØ5[ìÆè¿pWˆ+INV§‹Ü¹ €¤ƒ›Ú^”á¼0T*ìa±º”Ÿ³ìàKWÄ7_:þM²Ê&,~3x?O UYnÓÚ-¶Öé^Ÿ¥ Ê$m¹ß‚¿€ÎÓ}7`/ëA"ì„Ø wY¹9¶¤ÄÅ>‡ÀC‰QRëo¢}«ùÉį€ðeNÃ_#ñ l_GAŠíµ Û¨š… úœD…¤Ñš+Q6ôù}²((&¦Ëö–U_]zýWÓûLˆ{úìªv4XW[H¢ÐÜA~m[Ï’«Ë?òkõШ™Èw¡÷õ·¾‹òÎj—µû;]‹Ve§òˆFÕ0ÿ5ˆáqº*kpú–tùCçìz½KÍ/È÷pè ºíÏ q’¶1þ#ºƒ2•kØhA$yå´4>Ò[hÏÿYã8 'ÿ¼ã2Ï•®>×Ó·‹"Ü2(p³!æÊMºÈ˜ë@4ú9'M.2‰¯£˜t:+0 I.û -ï©Ú&5ßq¼|úWÓøF†©¥¼‰+¯º…}sFaØCžTü¢‡ù” ž’xÔÌþo\£›{´`¤—íq=[f0‘‹cb%@ê¾NV!nñô“à·Ç½:jN-{ hî!ÖÀ-E5a?fÆÈ’XåáwJþQZÙaésTþ£:ï9ûõûŠ‹žµ.5Ë`"dè˜p´òŸ3R‹ÇßƵp~HLÀ«ÀõRhëŸvÈ6bQÚ´ö§?v*^¸ù.¸¥«…ÆxþA²Â¾–q‡?øÁ,1ç‹QùùÂóe­Smla·Šëãq‘ÕÈ*Ø¡õD`–쉃cµú•p¡~|- öÃ1}[ªÂ)þeÝë›ëµiI"•4^nÒٞݽ‡ÂŽå ãúSü§}D‘+eÞ³í¶Ü'¨Í`ßòó©Ô8=Ä>‰uL¼é”AÔ ¡:ÙMC4 ~üüZ#×ÓjWi ”æHXê‹nDКnnÑcàÈ;q†aN0ÙÙ¿Û«H†¥MY&4!® -ÕÊù§:’ÕÛïqg¬í4ž E*u"jd›ní {ïD?wCp8ÎØ"'(î Sq¯%IÔò2œÑ7c½þÍøjY€rPaHÇ°±''+½¤¸?gò®Bo< -FˆžúìÐäÔŽÉ—t~V¶aCqÝá˜ö*UOÑ;VLñtv€æHô±‘¾ˆÀ€¾ªŸ[nqðsÑÖi¿x¬8óê1@ûZ¡ÎËwS9.ÀÙ‰¡(á(Jo=Aƒ¢o㬦5 Àó£˜Äÿ´_câlã(©–‹Æh$÷¯ÅUqB'„"VL¡„ϱ‰+¢ZÀ ”ZC šáký¥“òœëô¸ÿÓè*£Ä f2AÌ3°$¶÷––yk& ´4C¤ÓëfÞ×Ù©@2!à]JX»;!ÛM•÷²,§Ý–yêÞÌÁ¿Øâg¼UQw1ÕMÝÉÌw¶ŸŽš#Sþß)vŠ™Ì?=l½úyZŽ -¨W¡!`™+Ô%!³@îUê>W¢©Ê1A7í¦8 –,ïü¬ð÷|âðƒ[œ§í%Û-íX7‘|*ç>åÛ³êØùå Mræ°öÈÖÃ3ÅF?ÀZ>ÃïÚ-ì“xñí76àÜ(Áë=Úµ'N>ü/® -3ÖqŠLT>ß6—Þ\]´ª³é1TÃØ g‰ÂZ—P$æs^îVUkb? ¤=ì€úš"ÀΦ™pü“”^b~ŽÉ¸ÛmwÖMu¸Ldͯâ§Îú:' áƒ5­Þ•+T¡“È6Æoñ;fI­W¼šúu€b=âJ`H‘;á—z–ît€žhþ Ç‹„É,.ñ†èl†‘6¶nÜ$y¦W¥J?"øš&j^d9yÌD¥£TŸ$ß´k\˜fŒ -nÝùGý¶¡&CO—V$ì -ÿªžQÅ”2QF{77`¼W§/o(âàã ¿ÌcfUTLaœðûqÊx!1 .ÀÛ SQ“Uô^È’³å‹d*ÊÝ(u} E&$ºíHŒRÈH[¦ -š̸ì+Û”Ô]Š–=Áï󈮾â¬ü«3d?äéá“„7×&Ÿp/jö¬Ýõ°ÜùÖÄ~– LQ4Å7C“Ïë!¾(^Ð- 8í<(âóSN7¼©Ð1që·`èÂ[ÿ à©Ïo,¸ÜþA -9¿%ãb.RUÙŽÓ„¾MúkéÑÓŠ¾16•FPoÃc|0Kò:E°W†óòÈ`•”ÖA@Âlºõå‹ /ê‡0;¸0B•ýB‚IçÀC©`v0\bÛr,öÁóIÁ›vÅð€”’h¥ð•Ýœ•äé‚žùê%U¾Yؘz󣟜3w[Ë驪Ï?%s}ôï”SËbÇù­Ø\W‡²,°OÚJ'cÍ'ýÞ©šƒú¼Aý -§Y³‡5*Òì”Ô„t·v”fè†MQÅZ”Ë:Hï%Ý:ÐÁx)F -^¸·1Fk5%´&ª›••Ü˜ÇDX“µÒR½x{ù9å‰çÅá&¿^X2»P¨÷·hí¯÷cm„l%r?cõYV9íêBÊ”áÊdû¸×yËÔ r¦ËÎ ̯ýû7¾ —Â××XB¥Io«g™Þpí}"µ— ®ÍÈD⎻ÀÄ„1^ä+ȼÂe*åw; Æ¿§ƒ¡ÑsªüºçÀÁù›'Òæ#fúò‹@5>#Aû 9 -D+îÄÿžÞ|×ZñxjIo»ÞtÇå+ªMƒVBŒ7‡ý®‡N1j©P?ð”NV^èú»Dé³ã,ï~ú#¨ã<;:y~æÌ6¾â™-©aŽ¼ÞœÏ¢Bˆ¤œ¢;¨…}öèݶ?ä1î•Ö®$‰2äx')Üøm{»¶ƒ_ÍíûùDT&›%—óCÔž÷”neÇÐïöBÂÂOÆ7Ú3µ¦wðå^rg{©±³QÎç³Ðç=SD®Ùå0T=DQñBoÊæWí›"Í--ˆg¿ù·™í\ê Å ƒ4 £È»¿Å‡@¦röT«—ÄÁø[4ÿs™7_-yÍYŒ+&eäe¡ýÆ3“ôuØÈ|TfІÈÒ+%‹#mC t<¦™m¨ªš)Œß‘SuôF?Emc Cµ> jœ¡˜£ªuê1Eõj·£*)º¾Ö ɧ뗂::°Ë…oN¥ÅðÌL_±+bmõÈsÐ8iŸŒ¶yjOë»_þ¼_r(wæqË¥º”tÌh.#¾rPí2ù§^¹t5Ù­s”¶9{“¦ÿžŽ±¿ðA<1Ól~ë;̹ À¸<`ë¼ýI:Âìb"èê³LKâiÑ_Úïy²–_@³²»œ¾Å–^$n Ç@m;š®k2ÏØGY/Üh|°gås{\u½m[*¢¦å=œÛ[´:â…é ¡cc©¨<G'´ úÛ5¢KèO±§m kkí„sYdðsU±9ŠH Ø“’’Ÿ‡?D÷Ño¥Ç“S:;10ÿø™ÖxÑ(n¸Uœ¨úÔUƒY«S¹É${¢]”ÎõÅxàWó?_Àò»mrl>%J¢D‰€IßÖ3ˆáÝÔŠÓ9@¦‚½±TŸ[U_4O ¼ B»ÅWOÜæ, ³‹ÙCüé¹äÝ=\MÜÅ ðâï:"5±k­©ôö·'úšõãÁ"éñ/ÑÓW5sWxsvZ_fLQ Wbr¤“²(£ñ#¡¸‰Ñ%’õ úåRÒ” -+û ¶‰évß'ÒwHÀ$ƒQ)¼—4dHç¶ÊïöR4C_±|‘×έ1ðÁ®W¹à?“)ì3}iWô¹¡FÓY@:‚©k„“Ä{_ÏSƒéŽä{C–A«XzïÍ+Ö<ÿ„àÌ$ÀæLù1^émZûm¸ ¸‰frÒ0¨ o{Ã:>¾±¢Î`LáŽPøñZˆðH»z‰cˆ”$¤OƒÃ5Á¯ä¼BW“?˜o¥Dø‹3)è0 ¾"WVñ˜‡ƒÚ¥çå§oÝ«ý÷¿"Ùð®xnð4â•‹&Ðñêו/€d;Îꯆf3Õßó·­¥™A¹í¹ÚnI§H·¦O•¥‰‰K°ê áP1šŠ…-Ë2J[p™]îO¯µõüwÉR¨HŒ,]§¹‡2“* €ú>”î*{ ”¶Š­‡w¦zBç2=QN (;û"Á.·;Qð^?¹Q7… -‘í&5k¾¶š:_NŽqëqr4›óðŽ;0D 7eŒÒÁÒÕ¨*gjLÏ»éÁ#€%šfó0Z$1¦;!N" ‘¢sL‚úpÉñ&¡¬›_ƒ{P¡¼FUÕ`=´¶ù$üØ §Óñ±mé¾&Þ‰×ãåüVtmL÷ã©®¼VBÓåiÛOêÎ?†é¢[ZPFMv'ÿ›€ÚVZÎñœÑ•Ö¥™ÙÕÍaàIà,$i*øn|ÈE+N0VˆWük—„]âtwhp°Á´òç–íd[Žíš§hpSô–4¦ø_zW†ÏŽšaØÀZ{\·¯¢ž%tNVÊ€“Qüx¬W°I*i?ß­µÁýßÛÛL5W;“Ùò~6oVeÁ€Ù¡gdãcY†ùˆ¶—Úß`¥ý«)^]7ò·üžN$JCBÂu:³¥><ؾP(á-–«œqƒk2º àQ†Ñ‹ÎFzi…RòaN^¶#jŽ…äÄÔ9aÆ\ÿ,ÙªÍójåè#Zºÿ7Nø¥á„;¾7‘ó´Sm§G;œ©tËO¯Û¼ñ@§j.½R[ ³n4S6WXõ4æá„U7Û_€mW¹°žáм_óŒðÕtk¢Çt²Q0p‚‘+äöÔA·BÕhŽÑ÷ËL›âaÆÆïñ1ÌÏ4P}ÎnkÓ!ù³Ó´ZZåÊ‘sTÍ{Xž…¹Öâ…f†ê\ òÆȺm4;½D=8ôî¸~¢K0çúfîv±Gù{”ˆQrC½ôbF¥ÁýZî4Ùìs&¨ú@L%Åp…ÒlÜSä`&;87ëz>6櫧›T÷)F/[ŒÑÊÐP°Tv£(ë§ þ`¹¨¢49g@Åjö©òßñŠu>M&AØ«ùðqK1ŒÛ7¾f‰B 4#ïÓ4÷°ª€íÖ‚g±‘uÖgðs×=·#É'Â&WÛ^2,Ÿjªb$³¬ë› ùõ„v‹L¼ó”ÇÿêÐb’!¬-½]ù -ÏÅt¯œÞ^ÊKöuBvp ƒ`fácݪõô\9!¾{„þL ëIƒ14A€ð ÿ -6q!ܾö5Ï\Ž,éŠkÿš­ÿ~mWùGk±}!Ó¡¥ƒn-}6 Ð.z""XCð<ŸÔ±¹‹HÉq¦Ô4ÍÛ‰ÚÍ¿’ÿœÇñùñ:æ&d°±‚øUýŸ;æ .‰wv•%a¿Ø Å­¯»Š"ýÆ©‘ù”¿Êoᕹ‰ÝûÝ­Ò’R0âƒ{\Qf)òF<Žt7@²p™j c.UPXñuÏÓuMe/–¤žáÒÛ‡âlä²½üø½2cïòPÊcíb!&&˜¹?z;ËZt®°ëŒªíÅèØáMÜ„÷ø/UO$>‘«ªhÜãŠuλ<ïÊù‰óà}a¶”§š6Éø‚-‡Ž‚b£{O²TÄV¬à+í噿äDMõ_ܯ¡Ó݈àm’ € ëþÙö ‰¨ÑLlýÁòÕª™ [0/l|µDà…Ìò(¹ìáœï¥AtZ’ÑVÓÔgÁJüM‡ˆjk·7ñõøÚ˜¹Ñ=¹ÚKÈÛ®çÙ˜øl‚žš²­Î‹E‹=;aÇ}+£jX;ûô*´…Š0aÐ¥ââÂÄÃß~¾YDŠ ›x³A…S¼¸ÑÝò‰‡»4áúkôï­Ø¬K$¨Ý„â -4ðbú“»é].ÝÏ7ãÆc„H–Wòíbe =ÅíÁ ™1}û~£âgb÷œ·Ö5Bãcn"-£¨ymƒíðA“Ù¼¤j³óp@¯M¤Ü`ü/[ÃTÊIBŒæ’üøÇæ½Ì5x²áòr•‚:–;¥p -¡ËÄVmž?ÝöÀÆ^å-wЖÁ_SŒ‰#IÛé.œ›Ý½Î‘ð‡ ˆ™¦øZνÒë°Š·JOÄ­Ê8,¦ž„ö~Æ‚Gò˨µjŠË×£þ¨Eç”dvà1ûœŒ!Oè20̨ޥIÂ+L¾NŸxê‡:È ÍÝ Sübó‰8žk…Ä9g4&mXQíG lÅF+÷ÜýdÅ“C¡CÎVL÷Iv ƒ.rÝ÷¾ê/1d—Vv—¡‘£‚æaDp sòà®R_ ON1ãsb;¬ æýÛ7}N¹fÃÐ4J¦Dkg‚DÝGaaÆ1üù¦ˆµ$DY£ ÍÖè–Re»ËׂL.½Mýìsê{´eHÅ^g>(+1~žþÈ÷½(¿»­Ñ¾P¯¦êP†Ùpœmäw ã®ù¸(*ÏÅW9XÝmÅknr•æI9‰òõ 0º”mš»¦ÏLtû€×:ý=PSÌß³tʆæ/CÜÃ#1ÓÓÄ: -$äÕ›Ä(ŽÀ‹EW” ¬Äƒ\^ó½|`__ú*™±h¹j¬ \©¬/f©;&$'™h—,5gÁ4 GïzçiVWS#ó9ñhP¿a Y¬ «ìÍÈ.…¤ˆÆŽ –#eÂh¢öPqd('>öš±æ,}ä/h=NÔj°›æÿs­¶]Ÿ¸€DŽ¶Ð¶¨h–†p¹ðízk@hO°.䶃7-¹‘TAÐá&´:Å´fÌá ¯¹5‘ŸTþJÜÍ.îÏ1ôFB”ÌgdD¤$‡Ðt,ƒSÊŒ'®>«``ó|¨ß˜ŠÞÄ(ì /Æ~=wpsç¿å“¶qd«EiNüíŒg@å)5_¬Û 4Ú¥ÄN õ `ó*)†1ÐdÉvþ™}SÓ©TŒ·O!ßZ8¢ï €¸È¤OPý¼'~êSté~%>™¥§:Ó¥=wè}¼ S9l„VùñZÐâžå”¶HËQ,¼©Šˆš«·Aœ ©å^tú¡/ìÀÓn‰wsµN 9ûaÞIŸÿ™‚¢ ®Œg¢~Ú£$9pµì¡›ö 6QCñ—­J”Aá…ÓÉ¿ -ìÍëγÃE¾6=“?ÚÛÚ%È!U ƒhø;‹)`“t¤q|â§(Ë=M)Y»®FŸi¥ºü´ñ»¹ i{^™È[Å£N|.$@Ú9\P‹}¼š/¬¸DâEû°T é šŠ7&y˜0x›éÌÕÕ×½ p~D`[ -u€/3Ðå9:¡º”ü|z—€(i­ÅI"ÄQZ…—‚á•X"E¸bæÅlr¸†=qX¤HüP*¿UNäÇTA¯b¾YÃF[, -yæ ¬€Ê?0-š7oßPÿ°ßÞLá Á8º‘Ûh¥}ažÚ›ÏðÚUü¥£s¢"Ñ8FW_´â‰LÞ(®Mù1®ý4ãe6D¤:¡žÞþ]h¡tó,èò @`ˆ¶úœ'Î2³f‡‹¸Ç?²ùvn½º—¡Á6–½@P^˜Å@mÙßÂnê¦ôÖX$µ8I-ûKŽ·A6B–4ävg`“ØGlÇÚÛÚ½g;ÿcû€«¿<­k–'ÂQb#U, ŽÈI2ÆWg] õùé$ýâ½’DKa°‘lX:úZ¸àþ8yÔ¯þ„˜«–È-îKÔ$s؈B˜Ðks¡ù¯ è[ ß™xŽÙ4ðÝå%Ÿlyé LåLD -@¦Ÿ¬&†/ú–@3†%׶9•£éÇì˜çûÙ Eÿ‚Œr’—ߨÆßKÍoÁU4 kó¿JŇ ’h-xh¯Õ?ZB1g&눬‰áÌ]‡óXÇØd0LÊÉ>v¿rËŽ¢¥É@b¾¦â¹].`xD$âYÇ_kµ,KÄaȶUŸó¯•¨Î[á£Áúb2¤¥©·ñjÁVUà£'b±Úûv~^zÃ|óqÆÓÿŸõeò½A)!Ü3= ÎGe{!£}ÔB÷J@vA_<6]LÓÂHÚ ^¹¥ÝJŽ¼Bø‡S[û`ƒ§4I<˜ÍÜQ×Ã;-ã7àh k h>6F”·OClå±nÄ/‚Ï3‡„è–Ó{Û~ßš®9lûìû\*o„ÃÇ‘ õ#Q¤© -õ`÷}ßy$êˆñ7Ÿ±Ã7Êì1Ž™­Ë׉H‚*)§0„ü8;¨ºñyé€è -HÓ/ƒþIÜ,%ÚY);Ÿøõ=ÇGñOOž#€p1);»‘ -Ø£®O_LJ»%²¥ûêg’åvKãý­t…Šfõ¬Ýj‡}殩Ã;TΠ½4§ÀËö~eP÷Æz«üÈ­} ; ¥È“j¬R£Z£¡)÷'¾uZÑœÅ3a€ÝövÚ¼˜c8'éãâV·*£á=‚³o”Ðõ&l)*ÿ­b¶S_ó“ÔaxçR·Ò~Á€D‰ñ•8ÔmX¹GEnßöëÄüv¾ -—ÎT‡¸ê[9Š4m®X’;³é!xŒ¨»cŸ^«}›Š™°·.™ -*/'%¨9„s…kÓËogž‚û»€DA(.†/9 %:Ø(P¼árÈP2‰(õå[Š5é°þ Òl¯<r2 –yË'/«¹Š=ä<Ä81Õ{ïƒWA‰À­Ãµ)žTBÌãç}ÒÎŽLÝ_cN?¯ku‘pcG<Èúbvt¾£`¦ ¤Ÿez_Ê5½§³?Û§í;)áa·SÛjÎÆý¢G_,“åÅx([v €’P’¶iw|ÒiaIRRâݳR©Xu%iò¿cFHˆGT€?.=TyØ"+$Wög½ŽB\ÇH™YP~æ¶!UØO–¥qä<ÄÂãhiI$È!ø½~h_® “ºÁs†Ó`r !ãÏQKÉ­cÿ4µºòáÐãvÙx?ο'0Øý4×t@£‡DXÎJÍ›‚ãüÊ‚uKyÞQZUTïêŸO‡Í¶Âf¾{k(ànL™2Íøàž‹ËŠµëqéÚô\ùc°T.ë TÂOP€ÓEfT×ò÷g>9•ÌÅí9÷%š;Y|ÔÉ>ÓQ¯\!ðc¨èæëSý®H1ë1ú4çÜ00 ö!·=ê$R'ø¦U?®p~†™m¡§ñ±'´v‹¦‡Ñ( ? ]þÛÄѽHwp Y¯¦ÁGƒÅüÜ謮ëâ²Óôü -endstream -endobj -1974 0 obj +µ…*Oô3£a½Dôu°Y[ò‘ns± ì]CÝ)uVŒJI6U5º0í[ц§îډϙñSÙ}›oÁxv¯²I‘lñG)Í«Ý¥WÆÓúz=UXÅíŠYÎk¹×_žÿ’6]ôäQò^bw8‰SŽægý!r+ól~ÌÆg«ß®¢…šŠºa\æ”.ڂˬÍs °¡Ô;3øzN¥‹oöý»PªJõʤÁRãçooªä +æ©õ ÷BeÅŒÔCD ŠïI2ñÜ·­£$ꤢþt#ÿCë¤W“¦õ1ÈÝ÷L 3ÝñƒÂa\éÀÛ•(çC²CÌÅþÏ@ +Hõ¾±¢ŠìÜüŒ©˜¨jlZ%Ú$è·*G¾Li#*‡ƒ5 +xÀ*‚z´~ šrà·Ð9ËëCˆt}'h–g½*æ6 ¹LÑì§ÿÑWM[”çXôüŒRÿ]MŸC‹–ÿ¡?È+kÿE!ÈK±µ,6T£ùùí ð姓j°a-C¥ç=‰òémô!Nz™û,ÿßYQÙS½’ËŒ’'ûènF4M1Ä…ÜøC„—sã¿U2jUxüE-v/Ô}%Â3A†ìÃ-ÿYd,0N ^(®B·P»SiÜÌ9" ä@çÔCúE͵íÎŽ:Óûß‹¡üìo|Ø‚®ŒÍ sóªlR-ð¸ùÀç~5ñÕ»ÁVåiQÚ¶”ÇxÔP«ú úÿ9Æ@/ CáNöóZÐfh!­›QëvK+Zœ–éxSV%ÍyßIæpûô$À{XØÄÿtûÛ¾Z¢Ò*Zâ×ɸÌ0¦Àžü9B«+ù¦œahÁ×ót9c)ÓÉWž‰¦œz§²þÃ/¦Å^i€lYˆo»gÅkæÅ(oÄQœ”4ÈVã~2>e°S¢ê¹=Ÿ°àô®ójHò'Q±í¾é‡ðßêà™ß~ýNÚÃuÏŒ¶ÿÌèü‚øQ¿Q+ÿ¦žÅXÒ H ÇÖàL7¦æ¿oú>Ã}¶'’ †¹Ÿô¶>.¾~çQº1+Å%ïcÏ»uÝ öݽ‹­WíQ5C³Èf?Jk!7Ø#ûå“÷—'èþÿ‡®NÊÓ\Nɹo­4,¦·rè©Ë®uñ>\Ê‹«EN"ñÈ€´2„Сëûäöä¸6IÖÛFà!YÞO#ÿæ/Ì’ ôÏ2wƒ0e œîÙ°ˆÓ +pE4S¨àg¥GT·¬œ"`šýshÅ´š¥bש¿/“0;m‚ˆTtѶ¶÷õPDA;Ú PÙ'<·)U”aŒ”˜w Ðf]XÈsUßSÂ&ݦ„]×gÝ 5{¦!¥òoÊã÷mÐô¼ÕT+tgì„ÉÄÊO®<¦¼=œ{¯ÁÝ=ù»1Qµp­zú/ä=ï>•»±»J2¹/Äٳ [(·¹Öôx/ m¦D:¹¿Ûá_EèZl‹“í.ṁ×ÀòõÁæTÏ+`*õAAšI‡&^yŽÞÜlCñ„gl1#;~0-bFI\ºÃ%¹Oî³Ir{¨Ê­qe/ýn›æZVDIœ2ÝÒÀGml$а Òà“}pl2p#cu„^æëSíK”dø=ã®&æ÷wðçíÞ€ÔÔ6_<ËÏ3m‹¾«š[ûû¢ß!þÒá`AÒA¤òØ@Ð÷DMÝ£¶iVj³©j=PMи½N/â#/Aã?ûÊë¨ ý͘Õ[^zM¶è¯ ·Å¼0n +Ÿ:ùÞ)ÓÉí+c;Æó¨f×N|G·º¢Ó‘èQØÏÞœ×Q3!e.«Ž#¢7%ˆàä:k(½më÷_@W ”:Zºîò˜R˜C¶¨v|h[ +[åó->#´íjá×md}\.âY²Ó„60“k´¬ÑfÈ%^h7÷ú?>Ãda÷XwL€÷¸‹ër$ÒãÄÓè¤w3ã]üú#2E2¡«hùñŇ–R) ñšú–V±%‡Êdz§"š6rGzšwv€WŒåJñˆ¡ú/7V° Hšíb1óõ{ãñ~¨™Èw¡÷õ·¾‹òÎj—µ‡ÉšæÝÌ$×ÛÊtèÆ@õÒª_‰3WÙ§u*yÙ3›ÑÉ@ž-9•jà‘u„§S>É$›ª§ƒPf ªÙsê>­ë5Ÿ¾g¼Q¿vª·´W\aaéK™üâyL,ãò‡É]f‹£6®>â’×íQj%[êýOê§Õòq©â\ÛÞ"ðyID»B‡fjªgÇ-¦¨(Y&tÖ³÷¯C‹€Üí’`Šušw­ð°&±§Gjl¢4ǧ٘7W”vN5NnmRänZ[QRä:¦ó»8ÛØ;õÐrS+üMàV\B2 &»JïàíÐM¬¾¶Ð=­÷¡R½^aùm!Þ­™@Ð3ÌSÛÑ¡ÜŸ9Íî`<&³FÑs‡h˜¶¥pREãVWÜÊøQÇ÷Ží­æ]± ÿÓŃÆp½H›H¿ç½14H×þ²•y¡Ãeœç/¥}z¥` Ìé÷)•èo ÜV$r§|Wt¾´ä‚ HÃÃyá«:ì†b>]ô1–#W/ŠµŒs‘•ü-ÿ~*Ÿ&¾YèÄêKÿc º¡Î¢U–˜¶”Œï^¶áÁûÀøèEBz5‹8Wð°5lçl‰T=.Hà«úÓ”òûhèqw“û•yÙzœ1îY»*²ÍpŠWù«PøÙ,¹^VJð¬Ù·hÉ{_ ?@‡ªšùÈ™N!ƒü•4—ü“žá.3ÇéXr%'9*¨û‘‚žAÉ"Jß²@].˜oXc I ï1úÅHè¶ý?õóY·öÑ,H9¡ÚYb/ Á²½l£åéƒRÊúy´çcЧ#¹îÑ$eŽe«1;]âkè²Í›™™ñý§W9URSß+@s +UÅ{d™úæh—P¿nü;àö·uñ1Dò„ìÕ*þ+˜¬‰lD¶¥³œ¹7ä[($íש¦j—?„ä¤*(àvñ÷hoÂÇÑÞ¢[Ò†®Ÿ¾/¹€­°aQÿ1U—l®³*;B$NøxQV§È*JÌˈ„®cæ“íËÏxF0²JÜ{ÑOz±²tí×תàñGØÒüºò|éd¸åX̹8*fÆE”üLþ¡=A91ãéÛüÌÜ[æ‹_óµ®ÎUõî«#˜JAû@aKgì¬}•ˆ'ZEóóu·C¼XÁÃ×®Â% ßô•áI–8pdM +âã÷9{ÄÒ{jpAû¯½o.Çx]ùü“|±$ýx},-tJ¡| ±Èô|ýÚ—åé¿Sµ0ûKý­°“¤¡§GUêY"™?DdøurO u€»O@¼ú$>Ñ·]Ú\H>{*X`À"gˬÀDw¦÷ÞµV€zÌÑë€ZˆçÔKqHì‘ž&À\Ö +”º}<†ñ{ì‡Yãp1‹³ÔZ4Þ–JÇÓÕ…UwÞ.ótž—0Š0sÃÇ޴uŒá$Ñcn×-uà£CQh@xˆ™â`ýÂßrÆeÊâ“H‡P؉Þñìñ1‘¶åa9ÎòÆQuq]Äé˜<°,íBò>z–PÒ €SuQŠÓ'n/3LÙV-&'o–†ºl va¼y\ä¾}É8Ñ,¿¤…ü2BÆUSÒ» ÐÙ8n>û1H”a°½Wê¥}$ý/$,èáú%žÞ´¡fÍë*¿‡s·Š©Î9¥TXJ[ž\¯ô¾â +ûøU¯¾©®í&„¬a‚Ök3i!$¿pÈ*‘ÏgñìèL‚„ ­ä—Öð/þpwìòœŽØ¯@Vó 3…=,TåÚºámc´÷ÛðÞ‹ÙÞNÖß.Ÿ4øŽ$ŽVV¥—7¬´6¢÷nø3î:þS$Õëò‡f/ +Á3Y“&0h8ñà2…æN©—xj®°ËcëSþ$G‰¤ÓcÒªY‡.yÆiG€C¯¸?–ÞÍ^¥”bþÀ^¶ñvsx| Û>­tÄï·õÛi1ñb¿„×^ÓÖÂ>ïù¡<«Ý¢l¬&‹Ûå8@é¿Éž\¹]á•ãÞ±¾húf}™Š%DHË E”AP¥•Jä•÷ÓéبÍû©™€Éƒ›Õ8÷tß²üÆÊQ =¨HEc°¦ΡкŸ(ɇÐT`ËðÖP2šÉÿ¥Ã8Iñÿ^.{¥WhõmKX%B´¾U©Ôä¸wÀ¥Á¢oá/ˆmòÖz{·#?)ÌV  s+Ò7yÁ +gh$àKq|¤ÂÙ¤c¦;nB\°Utû­âUÑI&¹¼š‹¯f}^`„°1Çšœç#ÌÑ~cA®,Ò0pW¸g1Ó=D¹C2;+o'}¢g9^uÀ*à•Ó×€‡ümµJ®ÐK‘Ýz;ËJÊd–­*:=>éjE‹À§ã0ˆ^¬sÉÂDëH—»š²DG2̆ÿô¹x‘ÚõâJ#O:»åé'J28ˠǺÝXÚI¾ té[1ÆÛ?WÇö‹<>/ýi¢Œ…$ÐÆà aäÄsÇc-Š8–ÑâúÊæ‡&jlR~>ün@ŒíízB÷ú9‚ÚÃaÛ÷R¢¼Ðp¾ïXT ^ÜÔJp4©Ø,¿9ç5Þ[~ʦÀä.·3gÊú‡_°²~`þ1ÒNq”5» +åe·ïC;#gŸ+¢†’ é”Ô<ÞóIK¤‘º19ˆÖÿZH¯ý:2Þ˘>DRrÄîaV òð{àÃL®*茌ȱ‰,rÄ °µpÀòX_ÞX1<ù’¯lå]ZZ µ¤ígt°oXgZ:òzk£÷8ì ŸÂ5šGƒñm±Ÿ^_Ú5F!Úð…X \OJCNU¿ÝÞöU”‹Þ艊âHbC›¸½;ï.ø X„:†V^ñ°a0ŠfÿY¯õ1Oš@N3%=FU`»¦.žÔ¶Âªz€Ï5¼À¸û©wZäyävbÈ#EÙ2k+gd]{èyüಠ+&‘»;IGº{ÜÞ‹ÇTbÂWþC›6QrÓ‰Åÿî1fZË ÷ +ÏQJ¡ü¼Û**û~]´û­ǶÂk6tõO‹+U›þÅлr3l92èéDòÈåWöl…£‹Uh5ˆçÖGÑ™iÆdÄihËzœÞ¥°‹¶Ãºû˜ÞÝE‘ôH÷‡AL\Þ[ñ/ küp᧼Œ¿Kc”úîðÑB ”>S²vvÔŠÌÕßìùšœ 9\£ ´`Ô-–Nú¯‹LéùdÌZOïTs†*£Jg'r31 8++g³,@döïr‹÷3ïmzRVô¯ãÚ6 +E‘‹N¥»Ò«”ƒsŸ53ñ>„çë.ÇFª!Ž!`h:þ›fbEêãü¢ በ+ÝI_+qÖ§9)jzuYkéëË­¤ùO+3¢N~;›_¿Ÿ/CÿT; "Y&'cAŽšð¹ +3üù>ü1=ê¿°€²á0Ñ¿ê|mÞ¯³ gY"æTjÍõΫÚ}YÉ0ÁyÆö=²|ää~Q(š ÝëŒTî<þÅôœ± ¢7ߌ.°ÿR†ÝÀo³ƒ>`F«áy©çœ?—dŽ·y=íñ‹ò +ðÉ%Ñ“/Ùš3îN9šKU[áôÞ¡¡}ߢŸ +ƒ…* zÆfÝ| ­4þ«µäÒœjšˆ_¥ðîîù´«B—¼¦ còÌ_&ÂDÛûLÛ,Äå†îœ@´„Zþ˜Ì—ú wg0?öºêy«‰]·ÑÒ×fêMº +ˆ'y<úb"{4pИ¸w÷ÙPè©-¤ýlû¢¨ÒÓJŸó›­øÆ›Åìg)#KÏöˆ/Ìõú€ÏïîPÖ]Åg×`Ø#)hËóÚ3þÙu4Ïí¢$y'`¥·+w´H°ç[(µU åñÐ$j½WÆÒ:hØáçìC`—„ƒ×}%•IˆB½‡úî •úßXÝò ¡ã)vX~Àûgÿ¥œI8/Ô©œ¥T¸Íëœ{IêÂð;’OMÌnÏ…6”{ÄAUv¶ó°$jGª`þQ3œR¸/ô§mi­—Ï%ÉTCS_×½~è‰Ï ÑBSʜƓz)©eBÙûvX— H}ºV_yE0­¡ Ý3wl>êjNN[QËÀ›ˆå¹VHåjLù–€ß4Á“ZuQæae8Mƒéiâ>Pÿ„D„MX —’~®Òâ¼ôv4ÐXAß Ç… +U<|>tU¼”­ó ²Òò®Ö³?SNˆ™½‚Žúl´=ÐõQœ»¯ùá~dMÑļãmhÙ˜dŠªœ_'u»x”Y¨b %ih»ø©¬î’Áõ˜5‹®V‰16^Í¿Ý#‹ƒCú½IžŸ¨ÜÊV%K>§PL¢TR¢éÐxUkݶ¦ÑÁ}\Ï7=+/ÙÛ£ŒÂC¨Áº¦ðJ”Vdà<Îÿu³™GÎÙ~^NPóÎ…Ñ`4ð•«9³öÝîð/Y“T~ÆgEAÏÔ5}º%¢üµˆ9XqèL¶îül¦Ì'Kk¶xCºà¬6BRšx‡ +Þ—?4 \PlL‘³Âü” V—ö,@JÖv]0¡ðì„RÎèœéßvz١ﬠS§8÷'èS%ó-g¤‹jfqÀœ‚$UÁÖ…5 –´DÈ­|5»ñÖ­nÏâ¢ÉHznÕæi;i©­®ó ›ˆê¬Â×Añ‡ª¾ÞÊWœ«¶+Jó:ô†0Æ„A£U9p¤öl¢‹#Ô4vv]1jâ8z»;’Ð~Ÿ‚ ¤̹¦Nóœ“2ôŠßg÷$)™¾Db¾|3$9ÔsÇ‚`¶ZaG®îh¤B3ˆû[Ã?н¨^¸£@\²[7)Ü@”€ÛR$Ö[¿bˆ]Nnc,ÇÎOH¬ñ#‡,ªqZØ°YP7=:cèÒ4ÉnݤõLÔ3xU_™Ô?ü=VJÿ©Í¦&é±Öø²ÎØ$÷Nr\K·øðÓ8?(”ª›'ôˆ¯ò»æÏÊ¥ÃJ¾®ÙFê'ŽæÀ8µ§Wåo@÷¾Æ¾î—™s›^p¾t÷!ÎÖ°œj°x¸í·ÖéÂœ.eÂZÍÎ.]Ã' ÍWvØED  -–Ê|cƒ±§‹‘^¾^iêÛŠ8lÈ⯷Ksú6{7Ñáéã´ð$€œ…Œ¾6D¸"‚NªK +¸2’ @GsŒ/´ª,óµBéOÏ_•—ëù,³•§ë2;îÚ~”Gêê°#ÆR““×ßÕÖ—=}gƒ¡!Äê™ØµKééï=¦f¬®ôïh–nëÄôýp˜Xô_Äà87»ª³%úd?¼Fü¬«*šîˆ|«Vv£8SGZêeª¯5MΠa$úd!»BúTJïZõ58wÔ¥Êâ6ê>©5XjG3Í Š®bù€~=¼F¾Rik0ÒúÁÑþHa¿ð½C[¸x0*yè;gáŹF)²2Å´-‡Ámr•u%öÀr›»ôÙ²«A4`"­}w;Q +Òj7e²ØáµÆ»ÈãKr ;Z‹i¬ÙøYp€F=Pðg)³K ã|K¤J¿š%Ï4*…^b̈‰ ŸEd‡·fæÅŸ¿ýàZ ‘6&±!Ñ¢7À~ZuŒÎwxЃWuñcØe¯å¨¤o=¸Ï<…e̸¼ˆÓS CXØ¿ò)/)<Âm›'õ[3ýT%߇ž3Yú÷Š¿èV€’}yóÿ=Ò}#YvÞN›°ElCc»¿-Ÿ²^¥7m#ƃi‡q?Ö2Øþ» Ø,™/ÁÓç1xHcuà·$éEy±0ô4âéj3xÃbZ{Ÿrï‡O]èœ÷Ô­YEÝg¦]éZ3yq7{µEüñÔÌ)cçDLnóZ/Žè›Tà¾û˜a4.IÖÍÙ«ŒòäÔ³nZä*/6ìÜM ð f¶â& ºŒùïnŒ²,*ü*ìÛÈOŸ¡“|[…¶·/î2ý÷|äÜ §Þ_ ?d.†×m½â/ÜчÅ!И¾®"ºP¨F×'/ØûBÇ ! #MËœ8Ùv\ÁŸ¾a ‘¹ß •¬8ê>]bE0ò¿ò,$«·:—©ÁîÛ¥(c™•ß"¨GU¹fí9©CéUÛpBø^ )]tdùÞÖ¬îFHñ£TrÐ4§Vc'"ZºPÆÝ"Éá¹ãX»­†n¾dïÿœæŠn»æ0ð%Du‰ó‰šZdñ4Á"”©™¢úE`«ú¨P²cãÈ ²~þó¡F1%…´úÖ·‡áâÑZ/ wr,uk¤iøÊë2ÃÌCÇmA¸d²^aÔ ‡¡öRÚÿ¿[:@nˆRêSZÕx•D©ŒPá‹vÅànRÕr +‘²‚îWfâÚwÊHC‹×µ>[âBÉô.¶ßµw0?où¨CË‘àÔžELB8òS?žFDtsÚ¿éTnæ+×½éEe¶¤‹ÊÎôJÆC.9œâÐHØ`~¸à$îÊØ;`¡¼|‘-G,é'湿zÐhGoMå´¡á‚ù= yˆ°©·þ˧ÖsÑ;¶VÏ9Ý¡P±íæIªo„Yü8ú}† 1o(EùØÉÁœÇ' +ã5hIõ]3 Ã^å'•qLÚ›ºõÐa‹  ÕwcñÃ}$ˆ7ÿ@.wEžöË¢Qëu—¥Ü ®Õ‹µ¨§Dd~ÆÙ8Ó§ñwmöÀu4åáwè­!ö,Õ¶sJ bÿnV˜Ž_ÊouÿQ>gÝ»bÀä!õ¥ƒ?µÍà¥äÔœáoâ¼~]´êÅl•%Ôd! Ø~iÕºø( Ž>¾m þNF3.­gû2 ’»@Å ä³Ió½º:ø´Á»DzN_êðTš`X©^^ôëág¬Mc›rƒp—•F»|ù„7¢²ßsÂT¢|Óÿ±å ©zHƪ Â1ăá¯00¢¹„qCÞ£·#S–˜åÆ@•ã‰ô`¬êí-¾è‰ú«3ÕŸÄñš¿Á…î°G1?ùßpýÒ+Xý•ö Ézfñi\ä%Æ#[Íl+udzä$ŠkþS)èý-”fÛ©|øˆ] OÔTÙÓJîNd̲¬ÊèØï*Ë<Ý‚Œ*`v×7«zá¡ÁŒŒùéY šuøؤõ?ÂÄò×%·š:$vÐØ×8KòPdµ W>úÔkT‚{ïYC³È= +§†,„eÄƺ.7š0q{sÅÑSKy„“e½WÜ Ç­²1Ó|Q0^—µf=Kúè©÷@=À}&l™/|óôô¶· Qfs5»FÌò8ó<ôÉyì¿ë{2xrÝ7tP´¯É^gžîúl¥d.…AÜ3xé# ]›Ù9ÒMÁƒyÞdŽÙ$sšØå7|Ⱦòtדּ逼:³(3{GÖŠ«’”±ätTäÁ†x¸1kæ;ÉX×Rà N%Ü’|z²: àDßgèô0ã=!…̲ÊŠ¾€5þ_¶€_:4‚|è*T=öþjyr2û›ÒIûÒÃeú%F².Ž÷‘OÞŸ§Što]UrÖÒT?6ö‹Ò>3ʦ•Ì/TCX¡U½+òÄ ú0tÜ’qÎÃ.Ö ÌgnÑIJiNd?sΔA±Í’À`« ¶"/åYa'e^Й%(æ„TZBø›X®×ÍÜ$m*5p°ÑBëNÛ·¤GÚL™f¶F •cÀ™Ø¬Ê9çÙܽ¾Æ/ÜüÞã¹ì8þoŸ´WO°ï¯zkxÇ!xAÓ‡¯o;kPþÆ7_ŒA;ä°% ä1:™º,ó÷¥¿›cz Çò±‡¿” +¾¦ad!¼ôoƒEZ]9z㹄öƒ‡”(âf½Âï’KG±Ž•ŽQ0¶ tIÅv`®60_÷Ä‹8•Ùxã1PÐ÷‹¤wq¾61%Ø÷ö.§}/idåYvK?Þ‹@_c´¹ÓNÑ/®áW§š^îT2Ó¥œÝÅçJú&zÏô2¸ÿ›IÂúè@jNG×±)à:Ž[Þ…ðM¾½suïT…öþ»ƒªëØFRË”Þßæ'–-VŒèK^¾ä×*6Ä¿7aTb­¹Þ¦™”tjýÃ…¹þWV\Ò8vœž¢ÇNÌ[9 +…õ°u•R¼âèWjcÞp«›0¿0 Xq-Ñß•¢yVPRŽñC7XIt†‡“EªõZ£«‰fν˜TFH_Q5YØ°NdâfçèOSPßibØå]Z”€¿¢j¸€‘L‚%èJM‰áïØD³X‖ÜÚ–Æò!ã;î‹r)#sG§ÚwŸ6äÞ⹑ۯº +Õc6Eó‡@›h‡N +&¯cü1ÓWNûêHÞèZìÏŽ,Í9kßO“ê(/c/o6ÍaÕPžßewcW­²•Íˆîù8ÌzâÁ +Q%“? +ËŽN¹h¬|ÇóWò+@Ü4–yŽקËJ,n +ê.¥§FùíÉyÓrÖ›Øt:£iµ°IÁ›£uLÞÇ*­nüOC|e!êU0Âñ@ö7ÈZx§îl«¤Ö¢kÚÈäh†•¶›˜!-´'¢`RcAl•sCmía +endstream +endobj +2221 0 obj << /Length1 1494 /Length2 2555 @@ -27033,12 +32153,12 @@ currentfile eexec &Ì¡‰ç·Y5<1B îdlaà;À«9^¿'l„Hn,³ÛúºW˜÷ƒ{ÐÊY³µ¬Ô[œvñ endstream endobj -1976 0 obj +2223 0 obj << -/Length1 2068 -/Length2 12106 +/Length1 2299 +/Length2 13894 /Length3 0 -/Length 14174 +/Length 16193 >> stream %!PS-AdobeFont-1.0: CMTT9 003.002 @@ -27058,7 +32178,7 @@ FontDirectory/CMTT9 known{/CMTT9 findfont dup/UniqueID known{dup 11 dict begin /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0 ]readonly def -/FontName /BQXTWV+CMTT9 def +/FontName /RQJPKO+CMTT9 def /FontBBox {-6 -233 542 698 }readonly def /PaintType 0 def /FontInfo 9 dict dup begin @@ -27080,10 +32200,12 @@ dup 73 /I put dup 75 /K put dup 78 /N put dup 80 /P put +dup 83 /S put dup 84 /T put dup 89 /Y put dup 97 /a put dup 38 /ampersand put +dup 42 /asterisk put dup 98 /b put dup 99 /c put dup 58 /colon put @@ -27092,24 +32214,34 @@ dup 100 /d put dup 101 /e put dup 61 /equal put dup 102 /f put +dup 52 /four put dup 103 /g put dup 62 /greater put dup 104 /h put +dup 45 /hyphen put dup 105 /i put +dup 106 /j put dup 107 /k put dup 108 /l put dup 60 /less put dup 109 /m put dup 110 /n put +dup 57 /nine put dup 111 /o put +dup 49 /one put dup 112 /p put dup 40 /parenleft put dup 41 /parenright put +dup 37 /percent put dup 46 /period put +dup 43 /plus put dup 113 /q put +dup 13 /quotesingle put dup 114 /r put dup 115 /s put dup 59 /semicolon put +dup 54 /six put +dup 47 /slash put dup 116 /t put dup 50 /two put dup 117 /u put @@ -27119,6 +32251,7 @@ dup 119 /w put dup 120 /x put dup 121 /y put dup 122 /z put +dup 48 /zero put readonly def currentdict end currentfile eexec @@ -27152,23 +32285,43 @@ T ÖÜ­“a’èŒîjfnÌUØÛ™ŒÃCž›¹a£t·/ŒÜçÄæ½òvÛÜ×ßÚPqE$NP^_Ù…‚dùèv±¸ö§"TŸC[ þ¿|¨½¦wÒ1|7ô—B%Óÿ¶èf‚£W­"dÀá6QIa½÷Ò†ÅE.á<¾SiœùÁ—Õ™)Õ+µž Ì%ÐÅY`; —Š®5!‡KoLS»—NÕ’öhPhäDðÚ¶R‘™-¾„®¹Sòéæ‹„ç»? ò¦åÕ¤S8ûŠ!ª—,ÊÏ „«vÏN²[¼ `É"Baæ’Â5ÖÂ#ŽdwºéW%Yl‰¾~:/´‚>®Z¨;eÔFÁ{N# {Òí}Ê?7yøg ˜hS·…¢ à`tZ¦" *èQC&KsulÊÔJ·š|6×iÞ$rˆEÖFXggξß&vÊÈH_I«¦)³ÇÝLRá*Ž±·QÂgÉX¯ušV,Ð’"ª%_©L΄¤"4vܹ¥ÂÙ©v€Ã»ZÇõ! åÕÌîûßY@øLŠ‰{Ìvߺ‘+‹ÎN*Y“Ë+qi]øî/^L)㊴Š’myß1=óˆ‡K굎 ™¯~{iµt{ÒnÚ?'†‡2²d,µä½›ÎÐrˆît‘ß³É÷”ö-¼FÜ…­¦Š¤ˆFl L y‰fÆæF?Mè€_¢‚Ý*aÁ–Œ¹ØÄÁ5ÕºäÓ½•ÂŒ!»¬‰£d§ 2‹ãΛiú¨w/"Ï;qfWò"Ž—^®ã0$go„ï¯aWó¡vêm.üÍ¢BÛÀ/c@ž{:¿d"€Aú·vÏø]P}Cð*&kxÊ;ô¡k€6܆ÒxɃ%ºÕ ^X2ÑLŒg]¡’øó…ÌVm¯·­¦Mhx÷äŸP&ï(Ð{Ÿæò‚gû¢Ð^Ö£Èý ĤԒË|í±º(ìdkÈå¿ì/¡ÎܺH€Ô”qÏJ'w¬Bî£t®½§€—žgOÝ`O± ¥ßcÔô¹“ ®\œK*ó/˜Íû´®¸™¼g²9Zö<0väB‘·ôWK=—äQ‘]a9<ÏÀV<-­Meb^|Þ€£q£À"Ä^ÏÌ×&l |ésêgiÿÑQ=¶ÀéõrÓå m[Ò:rL(Ó¸ó…„aXÂœšÃIlµ«eŠøÓý€wƽÑ?Z®Nm‚CU†´úØ ü‘Ö•;l€Àc;^=§ª÷¼/ÊE#ëà…q$SpÕ TÀð /.qKAEÍ æ6–ñ­¦ËêÃ`ÍQDAϪÐ'Ön¦6O¨ýGÒã$áDÍ’ô[Q»ç8s X¼¸9ÓÏ'¬qQ@µÀ{ÄŸ§ðn™ù઴œÎ¶Þ×Z'v³4쓪ӕDºúîÙŽ»-GºîÑáªJÀ8šÊáðY+:âóâA<ÇôF&úÜÑ3'Ë]˶qȧ–·¸òÁ·¤äÕù 7׬ÆLà”ÿ O|òg¥dÑ–ÓÏè6hç0ÿ£Ú%ÎPL•ðƒSÂñañ8ž R>?ü6æq¯Â…$¤‘ø‹Qø8sZx5í¤TÒxo†$3ÂãØ^5ê t·§áhœÚ¾Ä›èÒ~Uƒ¥f”vRÊq{oíY“—00¿4¶W2ºö‡n­D׀Ƹz¾ã§[›žlÔÜ´ûÑ;qÛ£ßóâf…}å²°¤$ñçÄ0¸}Œ¢PgR¸ %Bîu0³Óí”Þ»i[(wí4À’(G§¥"O#õÅ-_ª‹jó5E]WÊÍhíˆÒd]ý#ñÓå‰_óúȋ˘†ƒÿ"ƒ€Œ·´äëõ,ªª Üí…ÑbTlyu~‰ôCT…C¸þî•Qªyb9›6›jÍQÉÍ$1†Šq¿JƒzÕ®Š°e4¦ëÝvþ†ýõµÈ§ƒG%Õ[)”è¤}›º@ìh],l…u2 ‘—"t%åãE=3jI3Ï?èÌÕ·8‚›G[Ñsv›ä³hÊvì!RÔf1~ÂãYA Q Œ77Œ½ú²5tÇc¸Ó.,ë {ºr]‹,ú’“õxôæ›wÈ€„¢X.ð}Ãuç= Q°-‰–U>s/Þ‘˜/;v†e)Q/ñÀ?§üê…ùÎuRT -«ç>¾€dýkWÇþB¯øë¾&¢’ÃèÚR|S }Ob¹Öƒ˜mLjH'ØéhsO’gëw¡7ÂAeìÉ Ì>M«¡©w§ú–_bó'(>+¶ú˜:ù¢hÀÅ*þÛYÛ&Ã쌢côômØXá݆ŠÇ@åˆ>ÓêÖŒoÕ*‰ˆQ F¤µÒC†'L ¤Ò쪕Ën2¶s)|LêÊ"Ëóë–²ÀöTE®°=1IÄ(j臗œ/ ªLƒè@„Ò„ÙVŒ §Ïù$½'‰IV¬­–Ž‰É_ø:ÐÚf;z8˜®cë½üÕ›Xè^§@±nÓp60×Ü8׊…iÒ1”4®Áæ±Á…ýÎçZä…×µö>8¤ÿKò]•ÐAÖ¸û?HOEW/²‹Q´æû5NÒPŸÄo;&[ -±)D*Þ Œž\ÚÕÖ¶d@aTÁ˜À` £Ùk"¶}EÏi4$¥žK‹ÖüÈx…‰»Ì8}g 0Ϝǩië}ªüÔ ÃÅ^½': ÞÇ_óyZn*Š/!¦‹{0*öÒ™üA`èäEÃT:‹ñL|Ì̬Êÿp\cL7ω²¡ûÚêU/’â³ýè•yE¨¾jC9ŽÌHAòèw4ßÓìòÁ? Ø—þß•¨“øtD,wgþD¼ÄÇ?æ1ÏÇM ÅVSÏá¤ûGáºx)°¨Í÷:­Q†É­o^ÕHžåÎÇVƒ*0Jó:ÝlŽ+h³FLÛ*+Pd‡™Uc‰ÕÎ(½öoü A•[ÅÀÇùj0p$8æˆMFFi+’+ßy•Jq‡Í ÙÆIÎ[-¼þ¤—i­¾¶m@M9õZ€?û7•QE¼­•œü» -VÕHÖ_3™ÍoZ¬NbüIGÿ´ÁëÊ'ûñ›ÁB©²g ˆn{ ~‘]Æ/JÒL kY©/Ô½˜3‡¢DH´®×y¿+äS@ø4}¶'±ysv·žMÄT±m7@ÎJ†rŠ ux¾xŽEË/Ùª.©E3綴Í=öNCÙG¶QvÊ«‚Ôb8ª¼ÍvwC®GË™£ Ý$ví™ÈŒJ› Sª*ƒØÒ«CTenBüœ*þݯzƲÝS_I‡Ñp’%PÝfp²J_>^x~ Ã8¤RÑ‡EZáTåyª¯„>ê±°&E-ŒlÁ:wc€XDƒp¹ -/Û’¦ìô‡ÕÂN}ÛÖ°õ²qÑ:ÈUC-þ5‰È¡Rº®§J©¯û€ù•†èf2¨ÊÑ>~É;vM:!ýý*â5¬Vcjpp]Ÿ13‚¡SšþƒìŒ]fyÖþ:Òà KÆ@ï—¤‡˜’æµSèÕq›D¾’^Éõt*! îY¸˜ÀÅ/lÀðïù¹WÜ"IXxNy=Ï+ÛfîE´¨PzõD‚ÿiãô°®² è»>Ô …aĵàq[Àåõjåm‡R§Èke{kõxe]‘;Î_ÿ•Ž=J§è YiÄžÆw/*’íaq°â_ÇLÓáqߨç«õ52³ðŠW U%r7õéq &ŽtE»ÃÕŒîHçyß QMøcÝ(­jg¹u7ñSÕxã0p®ŽC‘~°Ò èüÍ _ “|NêåµÛûæ2s"@hò æj¬S‚î1›eɉ#ÀIË4R'6ég¹,‰Ñ÷”X+Ê)¥•Ýãa[TÌâi„¸fdà+›8¨ÜTÝžõK¼TÅlTâê6Ëø•Ôncwà-‹°[¨’E䯱¥¤TìµGÈ?Itr¨ç ÑCe1K³¦²/Ô0GØ'Wí„¢´*sÅ•S8Ê-H;×Ö`¼fméú#3ªt^þŸ”U<Ý;YãÙŸŠ£“?pɈM|*"ÑÍ÷ÔÐPµuÃ%v!&ŠSÚ Õú»µG×7÷ó9œ·HÅÒ¥Ph›õLXGÓB“—Ë)xTIøÃcó]Z¼Ÿî¸p0k•˜®›ºÃþb|Ë ¿š£„¿h?%ÍžÀ…2͸t|¨î -ÁíôÜ/S¿.FMË„B4Ã4at&š øy{¬áswfnB–Rî%Èv—ðó2¹ƒ«ôI¨÷vÓ V‚±ýÿ9 ?…Fú!AN„ø,?˜uê6¼â¡uÿÜýUö 4…u±XÙf†Kæ»ZS"ç@?uîãÿhQ+sët²'ÁéMš;ÜÅ|¬ÌîíÚË¥NCÞ9 £Héu -À|Í=ízÑaǹ(b•œi¤¶¢„Ñ)¼­·Ò¾ä aÇþÓ÷±¾xªù.,DŒž f{­V) ~?éû EÏ_Uñª•ƒ÷ÁƳ«GsÀ±Ú ÒëMbl«hc{sú´d¡‚ðrÌ=-š4°)L<áU\éHmRî¸7Ñ™=h̼¢¸K8Y¢Ï¨”KÞ§ æYâ’IÊ¿·Ø¿fÍ™=‹,ЙgF;)«îxë ‘ÜŸ¿¤ŠIíOG Çî;6Ù±*$§5Gi‘BÀPîe©CGè÷‡£ ¤£íRÌ?Œ=Œ‚_ÒçÊ>„1ŽÁ¾U²¢ oÚ7wZ‰){åí£‰Žã.z‚×G£›`„cÒƒC&m8Ô¼w´6Þ¸©…´œYØ Ó2$bP:àðʼnØ\èG¦Y‚›Í`©™1¶TÅSN±ë»tvŠgÉ`ŒzÌâã{ Àfkû)¶ íüøÍ`!QGrUžj9°½iiw¹k¯±Vw5iÆ $Þ^òðÉè’_BíªÏqþçî~y;4TúÕ¼äÛ¦¯†ÓÔ 2† 5 Ã"ÓåÒ Ë•ŽU±‡\ÊÌjXNßä¤ xÕ -Íf‘­åÛRôùÉXJ5&¶*çuìfX¸m1N”é=ªV.•c¯·S¦£ÄpW ïìrËÆ¥ 9ýžþ>–„”Þºµåk¨|‚ -ƒ¡ˆ‹˜ÍtºgŸ9ú*ïv*Q' …wwEèÊ alAòã±®¹@ÂrvÏ·:m{C{çÛ‘8aný}>¡·lNüI… xY.L ô>*««¯0Q¨Ø¶àŸ#¾ÈZÓp((œNŸQý2E@ôC¯¢%Kn˜¹!ë|OØæS3½XǾŸ†¬öÝ3òï´A·©£¬ºãÞßÒBYƒ˜ª³Ùìd1î>ÜE‰„$5ÐE3ðúï»Ëó%?˜  æÚË>4Z‘Ã|mcX<ÆÖju—pe2Ó ˜¤¶àVzA•ûʲíMqœ¡]~Ž@ÕÝÝB¸Àö—ððöC×ÒNapP¨ÝMVf³Øå>÷¡®ò+´?+í¸^;¿+øÜu,¿ ¶J(De¯5È?}wTK\{ñÓSá˜ú«›dšÜ¡e(¶À¾zb«þù(a/÷OÌ­æ`Ý@èø¢4çÙB:ÈAœ9Ú,.dƒWÓ]ÀNÆ°N0aÍÌì}ÉÊûP* i}¤iriûÀ­’ÐtePsªÞç.ˆ›bé~z×ÇÎHòáÒ äøO x5ßµ:Nºþ綢Sr%–¹ÿ\¦#­CÐ=úÅä›í(ÍØ_”eV­4Ç›§ºÙé0"½âÌhÑÚs7::`Á¢ä cü:(à;:O}JB´Orßµ KõQ'·Bö±* Ëy;8 w¯ÑÏ©4ë½>@rÀÐñ8ìv#ÅW6:£̃d(èZ6ÔÚŠ#ƒ*³ÓoŸ»,0`ãdÕÆeÒ­n…¬½È‘˜- õ³„CÊ)„P6«å¡b*& Âaâ9·oV‚L“°ðH³hý…¦¬Ûfþo²™>¼t[Dš)i:ü|Ûóãói0Í"§g£JnwÏû«ën¨ÏàÒE5 -ÿ[Ð_s‘–]júç|#÷f>[6³µ…Ü@@¯Œ<×ÿQe ½ÚE /@8 ÿ€RÊìdÁÉ´–ëÿÜê&y.¥MúŽ"C‰I”­ítŽËê“y—Žã&³ËÛ”Â2"ò§n}ÜÙ’ÙÌdß‹ÄiàÞ`càFƒœõ ¡JßC¹‹ EMãitÑÑLS~ðNÏŸ%Ò8)&>ë‡ÊVtâWë£Å=2êfý P@ÚÂË)iå«>êÁ2Í!fQ²6ÂÃ!¤àNëkb ×N´iXWƒpu*˜½bá®c<2: Fþî’¢ÍHùŒˆ¨¢MH„2ïK|Õ§›ªµøX©^i?HÁûÏZÖ¿6±kÜþ^Öï°Ê ôšÈzîâdÔŸQÆrÍÁ°²Ä+±¶äjŠÓÙ®¯ê½× iƒWÍØU ’¥ýÿxj™Ô\²„åf1»×¯ÛÊ)Oðq[T7NÙÉUÑlŠï -endstream -endobj -1978 0 obj +ˆp8BäJ°ŒUõŽå«mš¤3œònA™Õ&é,oXØ4 ô°+×A£‚ÛÍéÏÒ¼4ßàÃÚ“n›µ«]âÃä·ýR½¡Ü½Yþz»ÂxH±9V{ÐR9’÷=3®zʵ(6Zå#:γ“žÆ,a»Ï†×¡ÃV!6"ÃDiu…™×‰Î•q•`$qöìv ËN†#/˜„íy§R!Ò£w!ŒúŒäÑÿ£¹BÞbðNo¿ËaÛîõ¾l—ûuô£´¾Ö±ã¯$;z×/hm Ä)Ò×gJbFòLaÌæDËlаÃl*,¨Tóò`ééúî]¯™xBfÖÌøÕ$F™¢Ç¡÷OŽÂO–Ëih$ÁGÓN×ÐEk³ë€9ˆöæùˆê 4A†<Ëc›ÅÖ]¹t©öÄAYÄ'4žgôS¨d9`š~tµL×}¦Š+ÖFÖ!:¿<Î(rµ¼vÈû»EÐØ ðÆÒh|˜ý*f®Ü/±£?ðð£üôÙ¢k1S,ncðà[Ìúûg̅ЕJE S=‡á$Mz#¨Öß\[m¤gçV¼ó¥ÊÏóùé Má`¯¾r[t’ Ç$;ä¯=·o‰v7ÕCnu€F/¹íÅ7Ö1nVxÎî×ÿe½þÐ;úñϦUBä Pìðò}àKJ;(lî%G º±Ø¢Nýù^Wé»P¢NõKê(3¼þ«b2ØV¨?ɯY” ëò¨%@±¢4^üÉHü(‰`– ·eJ.¯K,W?É¢áN¿¸z˜ ¦S†U#Yw¦_ I´ßMÛlÈ&,•pôcK”l”©S3TªºÙï®>‰Ï2F£rïß$>•©.N©ãÝ:;f7÷ã0\ +^M +åaQ±u`8‚@jûk’¦p–XDS›<€–§µ×¶nh×yãDIÔ#!“‚MJF. 2E7'GìYN¡±}ÖÓN4Qs¤©Ë)­yÀ*ÿ>YAV`°upmSà6œƒ„ò)¤3F@ +k·Èqk‚‘áüû~ ¡©½Û]¦ +š¤Ù»”cÁM*i»[S.ƥʨ ýÍ5…M/ÕôgꦘÅ4i,ô–ãèü&ø=¶Eºáò*‚<¢» +&~ˆ Žfð”@ºa-Ò¥!Eî=3jŸd+aNé@#˜‚R/ëË甲’< öyo·‹¦oæ?”M˜*¤]ÿ¹ö¹„± +,ƒ™Ûsw[cöUŽørô¼IÔáû8#…ð0i·a³ÉFk‚4•5û7÷>ºók0ÒÇ v*êDÙi*”Ù°zu;åœÛÌÖ °\ +¬ oØ´úñó“ÐÑña;rÿÞ( Ö’ÚÈÀwz«KNB¨á˜“'Šgfä¤W?»Ä1Yr!k¯U`)´Ù˜íùÊ«µ\®$<Þ“€ŒX¡<9)ÜAžÈV”(`ú¶å­Ï©r›ëÞ).áUߊ]ýò–ëX/^¯Ó:bå²[BŒ'’‚ðA§’‹ÖÁ—«+u×}ƒì¯Ï"‡ñ +pæC3ˆ‘q'z}SdÉxÕBiؤ+GøFÅ.Y2µp£Ø8þo˜ªv}§áü@ÏÈëµ*‹QzöñI°DV `Ê¥ˆ—Ñkéʦèä.œÏ=­h9µ—8nA‚’@$` H¬Â@0ˆQ-HË1gýÜ­¾³«—уvÅËS›=í“áôç©]m¶¨;Ƭ›µÝ–V¡´Jêd™<*§ ¥Ov¯æIë<]_Žîcô‚K.Í&Ø CŒÈÊ­µE­] ÕW¿÷¡îšz7Ìöá=/ îè¾\­`ΫTÙ(~Ö°Ñ.øŠ)lšÆ$üÊaésZyñæ‹Þ§^ÜpÙõ2Bü™Cö [Àb€@ñØÎ1£X“<9µœyr0vƒË!êSsíÝK¨/æRú'ô»ÖÞââèÁ3eÛXÛ¸Øjótoåž+9n{Á„šcg@f)8}ÔE¾°€UØ9Ð_²¶£uð-¥Íyœj\}qÁëÑ¥Þ~¦'qêX™GÞ«kú`$ðq¢pï×vq•š&¾°"àŽ<Þ½¼ãK¤Üñ©‡Ð.Xo|%’Ñ}ŒÂ¯Ä|ãžû5®F½Z¹ãX»­ƒšKbµ X½p3Ÿß¨{é÷Ç/x+HˆvœE pº («ì +µL 4öæ–m¨(J«;¸Zs·r÷°%ÐPVúÏîR0©²?*ˆ=€ÒètÞ‘."¥¶)“¹$y8V+"V_•ËH‹BtUñ2.ÙßÃ?-–B'fFÐÐÛï̽ä"]çT‘+l#c-­Û°º´hn·rÁéQÈ‘"`Q4èÑ¥ˆO§Þ°æ†cþ¥7’?ÄÒ¯ÜnßÞ-y°šc¤(êœÝÚ¡gD¶»ŽB^Ù„ b~*àÐé 9 E?Å<‘‡mðdŠ¶Hö1ŒÙL'A­iFˆ»¨áIß_ẘèîTÃC¤6ÖôÓVÖ[)ÒB,øXÛv¦bÑ‚ð®$ØœPÃŲ w£«µ˜#Œ}&ˆÞ K<9èª^Óù—:tj@ yw´d†¶cW§+ÓBúqDå‘ ?ß+˜j3ž ëK,dd î3GLÂ÷¸±Ì‹e„®M$â/Å¢ø"gÉ7Wë`q`a¼“¹¸(Ãl²8ܹœE;ÿÕŸ–£º8ÞMî·ÏR‰€LžXý2d¼(o‘‡ÿ"„µ‹z¹c„û…æã/Ͳs\z* üeýô7àÃ>^mÁqºYÀá‰FgÑ€ue‰ºP/6þD¾úYÿ½DPßq‹ûæº|2I¾ÿSƒ¶°ðöî¯HFÃ1`<ŠÏF¶¾_J‰ÍÞô[@kÆÄ|m˜A]±ërˆÑßв¹ÿ4ÇZ'Låaóý‘DÅ;–6†[à!uyçd9/¾ÜP€E²–½®…KŒ}!n|<¬­}±-P{ýš÷°K¯–‹8TzY†‚Æ²Ê ¾9%˜t¬ÿz0¨ÝΤ‰rƒ¿jWɃ>¾¸¼Ž‹üvÚq…þV?Ç›²ëŸúÔP9~D£üèXʘøyˆ¥*0u¹˜žê&$G>HR§spXÏÅÏØ3DBª K[ØíÒB8ãJ s+5³t•P“ÝäíÙ‡ÉIw]+Ý~dI± +75V¼$.Ùa®drTàªÂúéÀ§$g–ËÃÐ,~â·…™°m0íx;éYfíõ‚£ØQÊÜÖ%¦D"IGi“¯×™z{BSá³ry>•¥j7‘üw<›÷OŽó¥I ð‹^E+G¦ÍwSÊžº‰Hq¡kiÈæ艨!ÂÆÃfÒÔ³{üÃé>±úLRÁ„®È-ÁúŸCIQ·m.NNT·Ü×\ò²Êo£=ÓuÊì~šðªÿ †ÒŸf*Ö;óJ._Zp¨3¢T‘_Í»rЕÉFÎÅÎhŒ°((t\ÄîJH‚ûå´zˆ Þÿî2æÑV ðèéQ#Fþ©U18Š8ü¡“'¤ â¹´üú.ÃA2eLfÚÙÌ@¯_Ž`‡4âýp“å2èÏ¡º§qˆÆúÐ  3Ñ+Í~ð*ߣo‚9•(ï>Û”T‘­Ž=_k Jâ9¯®ÿ¿¤+ø^æ}Ðîm{•~,÷1ag`;7TS.ãßÄòcîØW(°DÒ,Ê7q3oiNZ'-É8ÂÜI”t"öÏœŽæbߺÿZUÇnôíâ¡I4Æ`0jš!íá=¼L?Ò„*¶|?aöÈ’¤ëkü&T¶þ§QØwÖAñ2NœÜq€ðvAÚþénxb‰²]¶-¶uº‰¥¥ áÈvÚÞ–§Vï>ÉûxE.MýOͪ°RУÍÚ.ÎþÌašÙÔLŽwj;ç¶Ü ¶Â73SqhTlM‚1¯ˆv•RBaUbäó«ß:›²HŒÓüª®SŒliD71nœMIr oÁñwÛçxø¼Sºí×Ì£é@m Ó·ýì_&m•­ÀE†g}•`”Tt AÕS¬ú¯òÀ(^²ŒÜªÏÀí=5ÚX­¾&„NNqƱ¼s7©„>ãT@H¤‰»Á7a‚ôó%ü}Ǥ¡l pMØ…¸\ÔÐDÁ· Ÿ0]æð²ÍAŽ¢Ò[°±îv¶: +A0µÝ¶KÈ•ô“E5“7X·¢Ã|ÙŠ^åÖ’=“W1iqÎSw¸”V½]ò6KøzbêÔŸêz){ ë‰G`õÈ;Ÿt7Òzw"81ÕbàøÜ Õ¨,Oسì×NÚ¢CBÜ ´[’[¦ç;-¼²´Ó\é{;,ŽBò‡¶e,-:¹)³*ì îCÜUDÃV"¨/ä<{ÝÕ]ê;u9㢩9’^ ì0ÉùZM‘ ËVR£t+>¨[qh­wƒ^fôó®ŽÃø‚¥”1šuÆû¤=<)bÖ8Y•÷6”²¿“ü;û=U¦I<ºu­C»±Nö_ÔaÁ•–PŽi9[€a\PUá¾)ž”M`ÉR})”¿ýe>ÚÝ2VÆÖì¸/¾ÖHÂ’~“궉­S@­ Swu¸¿'¥¬²{—/$0UŸFÑï:›U–³³Ë°‘[M¦öÁ‡…}ø†‹(©ñ<½mõ «b˜xáýÂ,7\0F&º0*7ßnW3©­îýÿ»´9âF`Ä…v~b…#á»Ü~éÉ$€é_d¡ Šª6/ÄúÛHJVñÁe J_Óš2ÎCCtœO¥÷ü> +õNX¬Æ ¸Õyö W¢ÐSÞòKb3i:˜0/\©Ϩ†äH½§[ß/7]Ýreç;‹B'§Ð×ãåNk¼ÃY%>€ô¥)û‹ý€#-•Ø°w#“ÍÃø1Š¼\;ÝAèÐØË'‰—Ûâ”D¼u¦sÅi”ûßZqêö¼w§›Û=à@w`€Ÿxí†àüÿ$D«œà#øS>å´Z T† +V~éžHMÑ™Bëg÷?º¿;žµ®Ð"^m +ýUÖ÷æ +x)Ýw"×+öêyêý™ü¾#àÆ\ˆ¹­Å1«æþ¸3…W£âç">¦LWrô¤DKñ9³Êc¡êEÈGoÐE¾µY h +ݼ~§)(÷D(Ñ’c{/ Z´X(’&³axÑe‡pû¹-,ΙåÀSÃ=ÎLŠÈ8Ù–/GUÙx^2xˆnÈsO:‹Òvºì5ÉÍ3ŽŒäδ +cvö,wúª:ôØ!ü¹S?Üp–‰G!ìr–{ðŽªàʇÊu€c<ƒþ{ É*F븙ᦪÓ]eTZqu\ý“ k~dáwjRÌX6€;+šþÓû“í¶b÷÷:V§ãÂ,¡÷£`£:G>mcF"!«Þ`Så1hç~ó±€Âæ¶iÏ’à1îoõªèNÏ E,iXn ï +xhΡÔÞíçh.²›mdI-SF3œ†¸hu*:|çÊB{ UusC´®õ©Ùb]= +Q, +ÇŠ¥ì´þl¶za5ñ8 µAÒphÞ(Wü\‚ÿ¾™2×t©·¢wdOe³%(¿2 ´EpD»¬ +Š0ão6ˆ–Ž±NÇÝÍñƒlA·q»qäã”NÎ@ i®é@ÓÔ†Ê0OØ8sË\–¯}šrÑ_ÀDÈ1ŒÏ£RaÈòY _4„%L=ì‰õ<—±¡ÔÎxgUÕÝ Y¹:¶GÇE–*ºõ£6no¿ÌÂívµõ»á•ˆ/Q§Ëÿ.3dwòøüR8uÞæ"´c¿IÇãŠGZÝ9‰Í´ô곺>ÃÿEJQP¿bTžë6‡Ý–:¥š<„nˆUYÚ–†½Â±£tW_eÐqÀ®’[,m+7?Ϻ¡Ê&]ÓDN½ú£³BåÝÝS…’Dçë“zî´Ñõ‡?@!}öDÎ#“/Tò‘€¢5ñ¹´ß ¦ï@†r°ŒöäKþ®mEŒpè1°ÖÑù.HòFo$[îgt‹µõMBU+™ÑQŠ€:áúúH§¢´H¤›¤ªðªáñÝ“Â{ÌbçpyĆºÓU|ŠYðÃñ”AwB™ªXÜ{™ÿý±³Ã(2ôœ‡±wP’ýᣓà¦Y™_|«w Bµ¾$ÅíxæI6›öUV{ܕ̧$clàêpjÄysÝÅñ!¯Z 猇m·ZÓµ os‡» ^/ nT˜L…¥aó‘ÔZò2’g³ú†ï &Cºx5÷KTZr,«goHBN “Ò­½äÛ)ÇèØþç»Ó®ÝkZã¡Øê"ð]@Êñ­ëœ¸¬ö( yfŽÕ…¼HgË©ö}·T«_RF]»»Ú´›º»5K©Q™^Ò8D2%¤ ø™ ÆCý…q`V;âD†x?gºâMAÙA.Ûp·ûÙF2Pm5ŒÀœ¬Çîë¦Á @¨rµ-39RR#Ð?X‹pKµK¿žì¥‘*äU¶Ä°‡àbôÂ<Ç´p{â­úŠ"^Å‹¹ÌîÒsúC Ÿ ”šá‹âÛjø››ü•¨OÃýcŸDȦ„æ6#©”˜ínË"€¿ }w_€^75lØŠèÒY$ÀôÚ¡ãh²Ç“ìK¥ c0U¡ã+‰ϘŒ÷ot_ƒ–änË“'0”À*ÒCè2†4lÊ[(äEa¼Ê4'yPÏÉWÐÞ$[‹d½ùš£ZqšsX¼ ù<·XHøêû‚“î4>ì­Ô‡ÒÅöZBééýœ©mè/Ÿ¯Ât·`ûÈ{«NwNåçý¿¦"ñ¤˜"Žu3{±³¦1уæó18 +û虫Í ÇDh­œ]Dü¢Ó~ ÐmPDŠveY5¹Qö>ý¢>®iØVrSí~/qùï>U¢ø)Ÿä,Àþ'¤këAqòU©º‰Ó\ªc.D¼4œg$›n=b˜,o®e©h 8FM¾¼‹±Åg1°‚/qÎ0ü +®òÙ­e€c[Ö.¶?Rj^âÏ„êeàNx“l0•1Û‰<YäóöÍs—ã7™ ‡‡ï~]ìg%ÂüO#AÛæ½Ò”¦çáyûŸ˜-©ÏüÛ…áÐåŽÕôñ™â~” ¬X0‡Ñyö²n¶æ×\‡ŸK³ŠbF¹ÂåT{ô-aè.[DÃü}ÜÁyaºÌ0-%Hç÷E±5ªÝKàרß}¯“¼£âÉùqŸ†Ò5yG¹hÈïTåW=0ËÒ}•t(÷‘o +üÝ8ÂU+Œ`3|ÁHŠ¿q*ˆåG!h5v*b>›¼ƒ FÌñçVÇÔz½Éˆµidvî]ÁŠÏÐ=Ü<¢Ð8i¸ÿ²™b yºœ1*ß 6&>‰1ÐÄ·J¸cì(` •2±ø&a§%&]‡€œ*T:vFÓoi5.{|—í¹®2Ðâ°¦ð“Ø©&Û|TcýYRíýçù¦M3W\†0y8Ž÷¢HZúb7}íÀEʈ"è| É!^Ü©lf‹º‹ZÛÂ`ñžÈp+nøöYfƒ/3@/À%ïzùüÓÕ’ªF:}Wˆ–Ãoÿ0U!†¶nŽŒWꜵ¯ªŒ³ξä[ñ~KS™çÔ[³iÜ)3ŸMâèƒ(¾]7—_üÎçp0ÊÞzþOùšËà맕8-÷ýMìåbћœLåü|¡¾ÍüIxÁcóKo{ðÍH±hÅ=¤œÌ&;0£ wÓY¸3 ¢1ãL™'Ÿ³f†de­ý-«o—åÆO݇^ñ%&¹_ ƒë•¯=íI&Q óÞÑÉݵ¹¹÷`½÷ísj;zø Yã•óžI4/Š! P17cß±9½÷:Î/ºðaSøèêÈÏ´ØžsE]2©€ÌSCð1×Zü5 þ¼~šc‹8܇QOñ¹õ€ÓZ_1/Jå‘Ú£{rokÃ'Ñ&ÀºO`ÕŒ4P;@Ï/QÕ£­¿`7(z«ž öèS ]þPÿr?^h¼ã|è ÕedÒMÞ–á=“«åˆÆù-3ž5Ð’¾×u¿s ‡…嘗Ÿ7 ö7Š¯&ë¾r¹æ"óåc.µ#mU'Ö¹áZÓÝ?Åy`Y†¡!ÛxÞKc /™Z (ì-2å³×i¤¯ °¤ý4¹¼¶›hYË +CC'{¿…i‘ýF<Þ<±"TåJPÿraW?ëæ׬ð¼ ô*òõ5|ãË ˜¾a*ÿÒ‘^~!ð^ìF¤ÒlwÔ´u8éÙÑßXZÌ\¿½tHHKXžTí&àë¸mçî<Ì\Hnh"8SÉ 2v|§þRµóq”Å‘'¸QìÝÜÉû™=:.I¹xy»ÏxµdŒ¸W¤*lü{u™u®µJrôÖ ¬ÙbD42vi‘"-Åì“7K¼ÐLw¥@Å lÔþ4²¸×%>ml (£«ÙSÃÐÁ“þbc‚jøVÙÄßÀ©g¸þÎ7hð‹|ýt÷Át܇[ßhÑ"îì‰àº Ž ®"oÖ˜ç õ'œ—ÍTÁKŠ¢EA­2I”›ð8Ó€PT +ö@ã {ûÃ߬o[wr1žà÷úªå…|l‹¯¾§q8Í¢ê/ÖYxIJê›Ã”ÇGT×]2d©·Fô_8Q‹A¢~Cü–ñ1·ºÛ}ðý‹s»„,XõíT›/ÆôµÛÞèW%)¼ÐÁhú‡þÙçg¯÷"“^úêYÿ°‹(NPåÚ鑸øÛ Üëu÷ñ)Æ£—;–^|/êñª>¼‹ò¥§â°£(ŸCdŧŸ”bïÊÃä³Æ|t¥f‹pmÏûË]ù£év½«ÓT‹³äØm‹H›¸ö¶3!ž¬–40èêï•uª²b\áÓðI>ÞíUµ›ë{l`ÉèpÙàùµ„‚ª)ÊZ«òñÙßÙêÐ%š’$ã7Á}zqQøÉScõáR¹Ÿ-ñ”’º)ÃÂô”©g׶ iR³pN +îG|yTC£¢½c^;™[6Í,#«Œ¹33$ b4 ѧτ…ƒÿêóX¾ÒLÈAÊ’,IÞ„†Ø{Іϭt¯ëÓâqŸC<ìÛê5?ÞßHš˜/+ÀÔõ‰Ü & +Zƒ¾ëkþ`¡bŽ²ƒZ}ˆú utËÂqš;¾z$,|ãÒu\ϧˆ îO?Ú"ÍnùÔâºY0 ö2»Å{,˜yc-Œ#à«:Æc©Éž{“t‹{“)üím}}Gá\c q·öÜõ2ÿqAÒà"‹}¾Ï$‚T&š] +§f‡Ë€ºl²kü ßwC·~//v—¡d9øñèÄêÄÀ]ñ%íïmšfß-}V]ùe¸…b縥Ñt+î0  ¾Âõ¦Ç{ÒÕ†/J%,Ò|9*«™yBuŸ=M«ë”ù­×Œ oƒÈ¡”Õí>Aµ‚òMwOÐ§Ü +« ó7;Þ_:d:>OªoDÖ +ß\4 ‹Îÿ³?¯yÈàÉÝùÁò€ˆ–Lèqœô9ê‚ÁŒÏuýÈ·ÅÛ  +”Ä^ùF‰v2¥Áp›X†ò«ô !‰,x?‘%øÞáJ;·I†ù¶ò ûWdøéÒKC¾”–Ú(Éc?•”É„Üû-¡Ëy” ý^°ÂÆtQ`§-o23b$FíÞ#o”®ÐuÙm]AUYßwŽäሴÐÝí¼Ô rDäºiÅû@ˆÝþŸxììë„Þߎ\S%Ù oOb¼ç÷EÚÿ¦ ¦ÞÅ: ›ÝÌ}*±—ùEBÒ™`%YúyX¬›¹>+G‘ d«LeuáÁ„…“¦³Å…eÔ✓®á¢ÜâÓ6Ü]6&™mÙ9€Œ=nP‘;ÔO¬,^µƒTÃ乸s¢^å ÏVÀTK +P¸^à0WÇÔݶŸ'ñ S¾>¨üáPúŠô £CÚÝòØï¯í#tp'çJyB¦…v†“;äËKó™ØWÖTëwÛg ›k¾íTÙz¢Ø7ÃÛŒPB„/zÒœ9*)ÎåC³yöÀ€J¥®ѽaXèáX1ø¿jÎm‹Ù࣬1ÑK‘¨Ÿ¡Ê‡zT겟j&Š¦–[TtOáu‰¹¥ûãÀ!Å…9‡cÚP,c”0°©‡9׿»¶¥º5ŠÔ­åpáéL+1ÃCƒ † Óº ªõ@¶ˆç-Gø‚QPŸï”H$W”D%ÞôÇ4ô¢Ä{ ¾ Œ$9ÕMª|_y"×=ƒ–ÛyÝÉ·1ë^[ˆïƒN"yMc޲ܡ_~#†–ÿ 5"XÕíq¢O]xevKr +endstream +endobj +2225 0 obj << /Length1 1173 /Length2 2916 @@ -27216,7 +32369,7 @@ s; ƒ$ü-UÒÛÊ3¤³# endstream endobj -1980 0 obj +2227 0 obj << /Length1 1188 /Length2 2740 @@ -27265,12 +32418,12 @@ AÖª^ 6» f žÑrº^‚7£ö1ë5aj³à¡Ašº£¨nWÉ©!µp© cílov®>ÕªI8©3!ÕÜ/ª˜ JÝÚ?|iî$dØ_ìñ(°/;Åb÷üÒ49Ãï•Û%X¾eÊal¥Øw픋v_Õ̹ûœ@dr®Ëp.álä§<çêÉJt~½ŸM«÷[£†yœ ä™û3¹î[Î{ endstream endobj -1982 0 obj +2229 0 obj << /Length1 1614 -/Length2 21998 +/Length2 22531 /Length3 0 -/Length 23612 +/Length 24145 >> stream %!PS-AdobeFont-1.0: URWPalladioL-Bold 1.05 @@ -27295,7 +32448,7 @@ stream /UnderlinePosition -100 def /UnderlineThickness 50 def end readonly def -/FontName /TVMKYN+URWPalladioL-Bold def +/FontName /BDDEWM+URWPalladioL-Bold def /PaintType 0 def /WMode 0 def /FontBBox {-152 -301 1000 935} readonly def @@ -27309,96 +32462,88 @@ s(kz Ó¿EâLGFô_™îßxÄiÞ¬‚Ç5, +€T üBÁ)þ¼‰äŽÿ©.s†€«ÍLÁUt …µÛ«€Ã±$)Lè¦ä xM¹ó`Ñ0õ„ß^üÓº[—U´[éѸ\Üî²™ P0¢þØ“ 뎇aWºî³¤ê;ü]´Æ ̨۟eÞ&§6È&þ~sD3ÈR¢œ=‰çŠR–Ck Ê‹6ïá>O2‘S;ì—z250Œý"¯²)ŒcèÞd/ArÿÀÙ;òK8+“f^²Ñ9ÀEŽhpFRtgŒ,,zÑf€x0±œ“5¦z êÉl“ʢжˆ6\¨E+PõOCêj“ðˆ6=,¢œ3 .óh­…aD¡ YW0þ„šÉ ‘†…râºGüÆßÇSì£W­=¡ûø˜¶k½¦cnÍÞ0¨7 ?Ý›ú`÷ߌÖD©çì«D}Ò£€Ó$‡!”g *ýV¬µ#ªHô;(ö˜èxSG >Ú|írIúÎx„:[[O£eÉÚ>8‹^tûíÎá8Õ…\-áwÐk(lÙ.9FÌâ&S6¥0J½r‰#D‰’àÂÿ­^b2>PèEoó¡#­C@—Hêg @¢aˆm°1%-½~@RÖ@,DÃÜFXÅP¤c*ûöõ®~G áf ØÁ »»- -kDMu¸‘‹¦Xœêà2+m˜ìQžMý'¹é…õÎõG’¯j*4×<#BL’Š†²l´N‰ ] ´ÛÂK’[ÁŸ^G |ãhLV8œ¤6K[ƒÜÌS¢…>ü‘¥êÓïšÀHÛw[ísýa…jÿ_:@³A"ÓD"ŸZ0­†äf„Ú’æ»ç®¡v¼rãVkÖO¹C5&Òl€<Ku<¾u{ä-›¢z’-ËGFø·ª“+PiêáçCñq@ʳi¾Ž°9h•BZØ´Üch>ݶ¤7¼§ÛÑMÉ -9³ãÀ»Å Lƒ3À¬}1W_^1³†º}R;ÒXÓs)qP¡½¥d¥ˆª@šOPó-r8#ì•h:kûU¼öphôu`p±)=T•·™ÅìÏ/y<¦h>Ф‘«ë@¶À×T­Xûù&©Rt´ûõƱ}…ÞtŸ^}’‘>ñ!I>5?ù/²Èñ•ä>á’) ŸVwǚČt±¬¹¬´¯†7ȱÏi‹†®]x¡Ö´ÃÝ/X˜!U+–_(¹ÇÎTåþcmç^ú*·µ!‘½¯7›Ñ¾©‹Ø˜Ñ 5ÕãÖÛïæ“Òwù?z¼ÖžnkD{xKúr»ù(ñÙ8º·¤°Ÿxl‡uÍgéõ0_dŽ Á%et®ˆ'3y8Vþ»8ñ:¥˜.qkIË;ZPóÉü©’(fœn œiÖO<½SÞÃxw±,ñ„Ê^~Nªå±ÊåY×°$N¼Ì©ÐÇXV+)2O7=eÙ½¨ “Ý¥õ núY’öÛ3;CpKý¶lÎ#OÝÚ>¡¤4÷Dã¶öô’øÓª·â’Ž‡D=oàiuˆ8Š¹ºÜ6V£¬4ó¤ònº.Zó³‹Ó†à¨³ä/|¾Ãfæ An&‹0ß&ü8‡ –VoIÑq'ªyÑwZBqÛ"Ìú%cœý­«²nn_ `5,uZ"uß»>‹r—g;¥HLÆÂ߇ðñǘ‚Ö—€÷Ëå·–ÝðifãÜæHÀiÉð¢ m²ƒéI§Gò°ë˸$eV ÒUõÐ -ÉÈ.cK±?ÇÕwžhY°æW’ £G±t™-¸ÕB¾Æ1È ó¶ó£Ì,ÃЂçMEÝLÍkGÍz´m¢™1pÝkøEMcÖ׸‚² …#HÚ…_ U>¿a‚ñWGMœî -åñ0Š#ºë$kaŒ¾H"^@¨ 7œ–´dl3YMŒáov©>_‚V:÷8 y§PK¿[¦±]W6‚=ŽìПٯ#5ÌÜP+N|ÏQüÝ;ÞUO*CÙ$_œqúýj©¿^†ñíÞF]~Îdb×½0ïGgPêâ(Cfƪò²’|ú)4¹]~eT€}Æ’8“KBƒÂ3ÈtOÕì„K)f÷/Ú#ê:zPûÅĨÿ® k•9#&w@“{Zó·)µ~at·F•2)2Ã0ihª?ÏmÏë[ðò2•Ï°`\¶PËVyÅè a¦Qþ¿Û í¥“]ýõÁVÁÕ 0ƒ¥L9{ö{"ª%º€Úû#€Æ©N3g|›¤s *Œ&ÀéÁ;t“oDû³››vÿÃÆ!¯³A)/™Hý¯Q5²3(@0nÚóA%5Áã -ë A’Q Æ6l`}­î>–½~Œ1ix¶¹ç3㊓fÑ•Q¶×?®pN¯5/Ñ -üðÏ™$ËfE‹ì²±¯fƒå VÒ›JC¯ÁãS㓘]k†*pa…§ÈÉm‚ÙœRÁ€pn2Êt¿Æ_K‰Øº•®)TyonŒ«3Bt æ´mèO‹zäÉäfbXzÝBeq¡Suø©±Ž;Sq‹Ê§n¥ºü´ñ»¹ ;°äo3‰ ¢3kî(}Oû¹Š÷Çkˆ jGª,ƒÀ­#7‹úXª7î ò’Þga°¹â*GÈ \ƒJ³‚SŸ»Ø\âù—å3Íó¾:ž%ÕÆÁoökYב‚Ó²5´V"fA –¾p2ºBÞ?[‘ŽÑåWX|€T­ä±–ÞžaÂY©¼²wÚ$KýȮ왆IC@ÿì€+–°XM þ£8›¨dE ™:v'½&•Ë è ‹íŠ:©ÁS&rÜ”·µ}N÷Ť§ø­æRB€z Ö›\cW0 kçù@^æ²}¶¸´Àý*$h!çWþŸ‘úoóiQT›ºnŸfµ.Ú®¨Ö ×–½ë¬“•GçÐ5‘—ûb®“Þæ@zjyö:]o2>é*£FÊù[fv7s“‰Â]÷°±‹ -:W‰z­ÏzÕº°|ÁçîNù1›ãá‘ÃïƒKÞ^ÓD÷¹uyR±t?ðƘ¨û -yvvýûAæÃ)¾*k4µE~nHXè-$Š n€íÅŒwxUîÁIù\…> -ºIuž„ó®—D»‡D“2š„(WH˜¤C`Àb«m‘%›¤m­@QX´xÞJÍHø H¼2ŽH¨~P>c{äCzÑsþÍKnðéÞ¶uÑD:¸kUZµÒß°ç¦Î‰ê|’àÆõ’·Kô‡Üèz!Ï{ž%–í-ö”è»e/’ëÐõOTª¢ü:Z—¢Á³©2 …… »³Â÷O] Ú×U–*lGc}d+w•ŒK+œÝ‡ö5Ù3¡ü!Òžƒ®Ý³Q¦¶{¬ŒZDÕJhöç÷z]r{þ©))ÿÓÃ#"Qjg,éiÅOݼ -Ëó@ ±ÈÃýÆZ!àe½Yb[¼WÞxí“ã‰êXZw¤nOÈöwUë¢ñ -->Io£¼;fNÓ²%Z3‘ÉÒÔ‡Ð3mø*ĪÇNA—ZAÀ\Dãæ…9HžÆ$ÿDÒl7#9¢¶Æ¢ï?õìnLs´…eÿª -jåw‹*˜P‹”ùÕáR×mžzìQ_É_A™í·+?c ¤¢%¹s4W½¢0¶6âO”5å$~ÓªV”I„hŸðm²P<Ïó':|Íó¶Ø-NŒésÒF4/ì‰ÖO9®½\)Q±mtzûœd‘õS˜¶"T&f—&ӵǩ5¡Ujƒæ—z&.Û1V’Ö¾)KÎêd·³¿ÜÍÎemô²ô>~V\su2±k +é•EÄ8fv_ -ëê>eAž2þ Ïó.$8‚ÈçSM¼UÞµ¢a·ßBha=¸Ò,i–˜?ããÓÙ–‘ytûR'@ÅøVz›kb,Ñ•í«F™»ê›[Šlpõm¿Â™,»ò¢åª¤{é¢zt‰B§;Fé¹$ -1å†+B›û3Ö‹8â¨lH’EhÁ G}ØQˆV_ Œ\ŠEÆàßÓT2¤1™kÔX”¨ïe ©I½xüW7¹ ö;ãö’E‘Tâ«,oM‰z‹èÈCí"HñŒ/$f‰Ö¸z¢÷½]aÉ¢æºOw 4n»¸™ñöŒ²›O¥½ž}`ü’–Ëlyb%¥—g(1†ɫ@ül3'Hঃ¯ì>«rÝŸ¼—ëmÅþ*©H®s‹ñá["Wúô˜ÆòDìƒp%÷팱ïó„B„˜¼m¹2 ×}A~,òŸÂJNzñ÷E`#Ë”ÙïÑ}’@ˆÖ’VaµµÄü-ØcM lÿ)»5mbÙ›“Q3ð €> »ËTJy©KÁ: anYUª©ÚeÖ­¥>3ë*3p(±FâNpŸßß∔Å!tû_ -(&¾åýC4|®–¥iL[9YûªÝkZøAñjÎùŽFi6C+Ù‚¡®k¿7®ãZÔë)éh¿‹§¢ûÒ5äu•ëi†P‹j™'ëdp{SgžÞ <™^{O“ [ß~½¾)üæõo“̧ïÄšæZCãYõ ¥‹O­­N© žÍ;ñ’Ï™o«|ª`le+ædcöxÁhÕW¤¸¼Å2ÒkÎñùÔ’Ö7÷Ü‹ÕØé -MKõDúè¨âˆÕÖUO+’(¹…öˆˆ¨†U ”4bEKq”ÉËP«š-爠¹f¶½é¯"çæ¼o71ai­‡ÚG]Ó}|sï7žly–V«ùÞÚ¥YéµeŽÜxhW¹G4•¬DË•@—I~ë8ð./,ËÖÀ©ðQNE,„pñkÛJD¡5„å9K»þ` ’Iží%`ûh+#‚V£ …øÕYmè4Ò¥¶Ljh€½ÕØ]—†2j%Ú#!þ f¢`*Ž_}‹ îe|'8X‡ûzÏúœîßàÕIOÉ—¢åuÖ`kHuåÊ·N£ñ:Î p‚³GþD‡0ŽÚÜ{Áÿ­O9ueÏ%ˆ›pb°©ëŸF Ê ]¦¢Ê.Ôb’= ÙÛHà®2!5ù?•àTgZYÚû7±=IDw@ÚŽñ ‚É=Üäh¤úµÉ8ÐÇÅ–BÌ è+b&W -9Ö7´Áà{qQs¾b)ÁZˆ>`S÷/¿Ï¬¤ÕaÒ‘w-á4{¬ô…nvŽ=‹Â'}×>ÃÇ¡~6~ßžº€eus#!:„ÄÏ -FÓE®ŠïÑ.ÉÈ?*Ecžše¹ô§ °VjAÜ{#}1/«`„ô›Ô{Vàùþ>L¡tCØÀCݼ‚‘uɦVU§ð¿á+@ˆÔwñØ„³jgð$¥ÄÔ !mµÌ8í)šïJ… -U+žÂçr÷²e¹p4‡o|v2¡‰ÏD % »p~´Ž»§4© Qaû¬C<‘Å{óÜ>l\$r½õÅl+öºB?„ÄiSÑ‚Ï–êñAíÈJæã+X«kVà£Jˆvøø;wÑ•ÎIZlÇÑÿÜC0º9þoù~®$.üç¦9íºdpÅYÍÇìP±`% $“þÙ’WNÆ¡éݦáub|óoJR¡~Év9˹ƒ‰]¨÷陂O•½Ps™c–Ê·DÐÔÏ,݈15ÅÆ¥ãRÆtwÚª Ų$™x£6‘츙s`+%ÜJyxÕ÷°í9"Ä=¶ ÉŠ…-°R¯õ¿¦NÉj–çƒÊ„qæAŸM±p¼±×±•CÌ£Š;À‡ŒM.äøÓv’Þ=ô Eßw\jÖ&I3µ‰®dì)q›$·Q“Ä;…Æ%’÷$Ýgº4;ÕçBTä;V¥¥Ü›yôU0\:­?šz2Pµf&ÄðöŸI^œNq˜’ S;ù:¨M!UºÎÀϺoø‚Yª€¡G4C’º” ŽÈûø‘†"ìM]h⵸D“Tí…=xzoÕóPÆSãîmz¬«wvÝö¬áÂpã¬çYÅ6äYá( ÔzÔŠë=„ ˆÊ³ö ÌESQ1f /˜*ý2N0Gâm_Þ€s€d‡Ì¡ <ãV}FȲúH£Ï£/A0‡0eÖ2bèFkHbñ*ÿu)ÞÕ5SxV"dÚ)Ç"=Ñ@ªfJd²ì½m 0R·åŠ ÌÊæh·Ý·W"Î5ªE¥j²«´ß>òåÍ&«åÚ,Ñï /a©ñ8óN÷À€ôÕqþ;$ÊÂ$ƒFXÈ“Ô–bf^\ã ð„Úƒ+~¸ Üq…ßø|p¹`ã^àv¢ß˜Ì©*É -÷1ÏàÌj…¬"ž,Ú¡&Æqä$?ë‘ -¨® °ÛÂPª˜ÓÐ'^»â¢€Œ pBx•¦Ä"c׳Z¶Ð+æý´¾ZvÚóSM ”½±x˜’o?£bÞu˜¢ÂxZ }·lûÛ7bæŒ%ŸÊÍå3û¸D—bö㣘%»)žÓQÍÌú¦ëû$(6JMØ+¨€§¥@¢M킯2nÏA‡ ã~‘d6‚ªÚNËø‡ÖfŲ̂ß|V5w[†ûÓ¿}:ÑÛ•c€g¾ $ŒŽ²ù@§Ñ† .H/eüØGiÜ3gU;1c *´[‹ÖªÃBB_…Êëø®o3šÊ›>奮œ©í‰‰T™ý&ÙG@:¬½aH…‹<Ÿv]ý™Ö+røˆÒ;ÊZëAҡѪ|ÏÕp(Nîë#w_@#AŽa½we¯@ÔGä[/Š ìúM:X™ëÀ¹Ÿå=ÞB1ÆéÀ:á‡9Åf•ÐZ”‚ÿ­¨Üu„}”­º)R¢".I‘š ÞDk åºò}átCëƒ|X›Lﬕ[ÐOKËYÖýPS†wôiÆ5-eŸ¦óp?Çš -&-ð2Ê ê¾!9Ú_½UܤŽHË˱™VÝòéoÊ…°…ÊÛùÞÝ £×¡N$=ݼ±/&û»œCÝ{÷f]\‡ì]›‚³-sh앉qöÝ]—éïðU°j C á'ŒUBmêN£2ÕM!a¡}ñ8óÙõǯœíBC'¨/A|eâãû­;¼Õ"Îì[|©ìü;iX™±òÌ iÇ]ÚcÿÀr1¦ªÛ ÓµkH\%v‘(T^´É¢@É«æ ÂR¹WßÁ©}R,°ùrÛ–·rùµ%(à—Ö.¼gD¢ÕÊ\ë#@ð&FíÚ¦°E—}#!_§DNÂü¸ñ >=Ü擱fEÓTåNÖÉ~Ôõ‘b6EÉ’qh•ûúÈ— ¾ ·ŽÍ„n½G׎ÇKECõR¹|s‘‚ÇÆ¡Õ,úv˜˜ùOÜF5/cìä°†ä€ûT’úêHe@=ì|)µÃ>ÞÓwìXŠ×[epïtÀ‚YŠÆâv//tN½®“’%š/Óœ/0q)x–ä'r®6‹Fÿp/ý2ˆ}2ÎÖ":Í}ê>`ŒTÚĪo(ËUÄw:B– -UGE.6†köNIó—oæÿ¬Š­L¼ƒšmªÎF¶<”Üçƒw^E§ë…T…šÃXå‚çÂÞåa{¿ìÊïý\û´ŽãÅ=57xÇÜY“î‘MÚÇ6Ÿ;Òp4=™?"Eví’bŽöU ¶·ö»Ú qØ]o7Ò½éáŒJIî{¼‚Øë¦k¦Ë¶æ²;Út¨¹z?#íû‚ȉÝâQŒÚ ÛÕ "ªò°$sÕcej“Ïmkí¹ïðô¼±U‘M9×UgÖuÞRJqF6ty>7D¤«>N"×ïVóå=szÂƾ‹ªé> Ç—¢„µg­œÕ9aêÑ¢hC¤pÇûªŒ-W㉓ˆ~·çí‘ËC#o?T#=eÏŸ‰îð …¢l– ûÞ¯îj ðfçжMÔ±å,¾ï:.I ÷™_l`×R+9†ÐeôÀãM3€i‹c<ŸòótÂö¢;¶-g]©ëzç$¹¹¨Ö~ôÊú&)”>¬¼ ̽…'ð“4"æ&¶äåâÕ}ØÉŸ Â=ÕepÐð‘5iü‡p¤·c†Vj,)D+1åÛH¹°”›|Ýy\Ø<û½5üo<7ðñý»¥.«0ÍaÇDF|¸ê=ê´ï´š§» »jp™w,½J^&*G=Gfr^l"¦óÓÆG(cªi5ÐDÖØÔ‘KŠù£sTÇê«à5ò¡šÚ蘷ðlåfµbÕ²Gq2„h*T€ËÒç1Ù<[p #a;…049]&°þ²f¨.þ%D6öÅòoN(„°…c"µ2E¡>xl4„1‡°º_èwóðÍв¬ÖN<猘gõ¿ÍC~ yÒ½½¦ºœÒ1; øvéš/䥪¨”:½S åœ!ýuŒ´'ñâËTcÓiŽÕ=JÍtb´áµ(¡š.PîÌJ¿ivªM€äû,dÇœN{w÷ÇêÕ…Q¨TýÒ߸"‘Êþ}vpNEâ(‚í´GþÐHÇÚßo/ãò6cÝ=/5¼e¶ Ct˜KŠJåbý5S/Ìe^£…ÞI+²v® ðÓÔ,«#õ”%xDú‚:s;²û¡]ÝðÎgOŽ¸+î݇ñv‡¦á¦˜ÕV–äÚ§ÆAp¥¿%‚O~® - `‰»‘ù=êS¦†ÌÒÔ1ƺ`®¯9'{±;°žh?Š¤¶Qé"à]™5%&i'V (ÔkئÈ󨃫ö 4göªy‰Ô:?ü í=‹7k!ô;Ï.7‘ª§3ÞFüœ ÝRTþ<óVÎѪ¿…¥e&¡S¡½ç°zÀZÎljtx–Mïà³Ïv§Äq·3ă6f“ Ñ’žvîáê˜ko§aÝh„[‡ºøºûl]†Îˆ†4˜BüºjÖ»`"whœ {'ow;`÷8±øÂQg6Ô´û1RÃQå'铧æRv iã‚Öti7ù¨Ã@N>¯…Oà¼Eî­rUâþ~¦0ñ÷£—„<7Ãf"ï…Á8£ÐŒQÁ5`ñBª–\ãæÏ7”šïú¼?Ûâ‘ŸÖÞÖB@‹âw˜Wj(¢ØfúuÍ9­tzéNuîºÒùð‰U3ÅãR´×ÄÁg‰!PCÙf%ÿˆª.uã}ü¹ÿoÛÝH6­Žõ;æŸ;º=ËÈêl Ôí¿@Dðâüü,2»ÜѶËÂŒØ>^6ͯY…Zö[ -’„Þ3ôdýûðŒ‚« =îP…It4[ÉΓ³õóä:ûcøG¦ëŸ—¾®+êFcOñvíšYxÅùrùrǧ‘ò²•’Zç:Ô€!p U${¾Ÿ¦$âÛÿÂ<¸žz*m´¼±¥˜¼} FF主‰ Cúu”rù+ßÙ‹<¨ÆJE²ÔÒ­µÎîØ%ÜÌh}·öpAǽH²¾Zƒûã6ØBÏL×È*V¾”ck3 L,JöSöù\Ú‰o錋Ð1^MDætV@ÞF=y³E‰6¹êÁ}%)¥y&?bÓæÒbùs‰'ë*¼§AµW%h†cBMNƒÓ“  ~æåÊÉjrÈæ'ÂB^fdF]Ÿ¥z†òé9HAaÙƒtõŠpvzçÂTÓ”Q6y7„Ã`dIbfô¯<©/享]0€ËÈlÅÛ&ÖÖGºBä¸H䥭‘9éØ,½)óò£nCp“‡O|<`x{]¥|$K%›73|7Ã}ó*±‹ü'žF=<1ééCøÄŠŠgR(­bÚÙ^iNtΤØõh\Q G™§»»¨É11V[컕ñÞ¿e8&» ©÷a 66¡Ž%µ½Íp`jŠ€p¸q}°î{/ãEðB)C_ËÉWEè‡ïìWð.R¥9â–²°tiˆ(½ƒÆ­ž…Ù¶Ñæ2’Hr7‘n(f™“>ÎHZ*êUaqÄ©b£ºMêW -œ’õp2a°›òWI[‘°‹¶ì˜Ê¯·™;§Ì‰{¥$rtÑÆDâBlÉZÄsˆvöõ\¤:+y\Wª‡>¨7”¤¨rbcÛý–5±iõM7°–!- ˆi £-¬Á@Éäêsé^Щœ¶ÖígØEæ/ÉŠÖê¡œ'›vÝØëBD„»ö1¨¬«ªg·D ƒâ¯N7ÞB )œk(¡p1®/AçöïQA"4™‡=–âÂÅg5%>eÖÕMÀpü9áÕPRÈÇ ìƒ4ízéÆ“%žÿÏ…o2g¯(кü§÷çâ$<2Jðbš&›Ã¾ˆ¼oÕž#¿r(¨P«_tÞ(’*¨íO·¸ èu}UCòq#Ô†'6ÔeNñ"ÌF›9ù[×íÿLÿ;{¼¶ìCOpTq]Õ:ê›?vÂFS-¬È´YóqõöôÑ7¶lZ­Ž©A$½Å†õv{g„APl™_Šsï˜oy*‰8IhhÒñø^ŸnSUT#ÔÃloœýiXXiMô»4Oi•ÿµÖø»ß¬š>¡Ôo1=‚ùúÛ®Tš"UÇzß”emŠx–†Fà v»þïý\ -Hkµ‡ÖeîNÂà…>ñÀ¨J´ ƒ÷MÿfÔÿûOŽÅ~ß9XÎTЗšq>ŒÑ~<óú|ÄÅ©7Ë¢Lÿç©Ã™¢VN—9‹qpÙLßÝñ^2vÆ-ìÈ÷½p‰ æO°*Ê]ó«VUò¤k–/`’AjæÌ( mÊ/¼³¾[NµñjB¦G&ƒ·»„13óˆaòÅ\÷æ}Ý;¾«‘çÁ>ð›%Òx{l7G‡*±1LPòç…hTn‘À‡] <±[ˆ’ÎiA±k°\úT ¨ -JŽÒ³†ã©:Eù8 ù0ÿŸB§ŒÉ®-4‘þ¯SL¥(åUƒÃŸíE÷µ J»~­¾õ@-sûe[›Ä¾¨Øú@Û©-Pܯ„@Þ§ÓØ7 -‰kzxm²+ -™g‘hÁ‚5Ÿkò°~ªââAö| mpˆ ‹,‘½}¡]j ͆jÔ`N…—ÿC\&¾%JçÿÀo¹ió™ép€{ì׫(÷¿2‚Õ$ñÞ5ÝØÕÃÑ-&Ëó,»·êË]6C¿&ÔáO•±•nh§ý‰ÊÅÑVEÿ€nP(4G]xÚA¹ôͧÈÊêOîÛ\vËïòé„ê”Ô9i›á—Û™DØ[‡ùÁΊäzÖ£W'ç}ÞÔºÛ_ á§n8•¬ -ÿ–S¼¼|;€ˆ® ŽÿTŽ™¿6)Q“]éZßâ«÷A€-1ùª@f5VO2 mdn:“\Y·sáóýÇñ›¸®?í’DM`Û•Iß"êé²5êÔÍî–å“¿Üà£é"*\¥0Ò]¯Q¦R6µüjBÁÏp?ªVxš|g«7Yx«[þî1íŽÕO¯ï§ç9îâì†q!—ÂÀ’êo7bW)wRG¦¬eÞûÎeJ"D“òò¡°Ñ?Ý÷æG®(² …Ü®¸ü:¨ß=X†HGvj¹?r­i7Q¶¿Zèº_JáÕ?òõ.K>¬ŒvBÚïgÈ ÎôíWþGåÂøw––ÊWûzù„Ö¼–öô•*¤§© mCœª -aA…D4ñ9Š]yóóqJLÌ/ƒ°ßíkhºr¯†º!äÔdŽGApX¶—N ÉA@@4Zß_ˆè°tuž^÷ QMR N"ÚzLpPyrl/>äÅ7X–TB×gª)éVð*Ó^uÓe2<)ª$"Óv/r}HX†óe—ÿ1¡•yIÏŽÃPFñVoY^d{ 5`àc‘½é %Û›FA¥¶¢=¥F.§Ie¡QèBƒÂÿÉû¨·ùØÝ|¯µŒþY΋n;{6NwÚ0ËùDƒÁ×ÅÎ×W#‘wl8M=©{üsŸ÷ç2Y–kF5ÿ8˜|ÝLÕò´Ú^?1a'a}*Ḉêw"ëB_9g„(‹QŨ \r8«ˆ>>¥™Y£d -9ÌÕÒ ÙÍ ŒØÍ¿£Äs/Î\«N|fÉÎFœ6LiG¾.d¦ÚU¶Ô@¥–ÊmC>LñhX¡ï€äÊŠÒ˜·\îo üû(b¯¬Õú¬×öd×j¯•ÛxLe`=ßÞù,¹É¢_f?j(¢JäMxeÃÐZ!~ÚýØz/ÁZeÃõ˜"ÒåXw`Ïþ¼W9>IØ Y¬›pj=ó.õ»uäÔȨ’þ×¾¯FÂWv\Hûv;®ø¸g ño®ž_ŠbKÉaC-ß° -ÙF˜»ú„°9ÿ¥q=éÄb;˜¸Ó·~•;6 ?¡î¨AåíM8g›îqZVõ½OÖU õYñ½†â³ -ºÓ{b|ñÇIþã.Ò‡ AŠ-Ã>ì¼™q“¾À²žOC°XM(öè ÿ•:Y@àÌÞ2ØÝøý€A©ý ΢ü°]ÒQ™®4N_ ù[}ïcBu{hÕÇZ1?rX É¸©ãˆ44YêAŠÑµXM5¦g7Z ß6Î2Êüg±FÚ’ôÚì{åqÀ0½y‚³&Ö5¢ åXø™|å¶D*6„™±–‡åñµšgëß鵎˓9åE.³lÈ÷Ñî¥Ú©7z¥¢Vöœ”P"E}õ¿ OæBr ¨¡ÌyÓxï¼x„"upœ±/à>‡%¨s×½ÏZà45œ‰Ç;uz“íêÏÒIññcºô럗Ïݦ¦¸R‹Ÿ+¤´ün -·%ôbâô:ÛN]kÀ;aœOÊ\­;a¡]cüY–äQ._R¯Ï‰µÊÒÎÜ‹Raï[Ñx×HÃÎj°Æ¿^졺A•Äp›¹Q²HÎNîz×Eíó›Ñ26{7¸?RßE'æz†ôì\ñá;ÁKÅR¼"$ŒlfÌß3Ô•–Ù€ þ¼7‚LÀü`֛ϠÍÀ^EÕŸó×(3Üp,GŽìØÛŸ;¦¬îÒ‘6 ³if˜g“ÛxÎgËæ’Ò$—ˆÁ[x &ÞÜ°$ß]쫧­ö‡°ŸvÉ\/<>ôBœ^D_wÌDE¤¢ñ°qB‹ëã¿áØò’Š²u|0‡7ÏQÜÚáÄ´È<î‹\¹o=š4‡dTþ¯ìÇ—³ÙdA»°r¤îœHtƒ ¯Ebœ]ͨ4 /Q6kAiÛ¶ÃÞÞ.Ç=äŸ@ Ü~.“u’ŒbY9 ¦›è/;çàyî5U îŒXâZ6¾`¡s“‡W}gÛèð}Nch—ä˜Æ*ð ß7w§ßòéq-øS;+mB®¢Šø¡,NbgÈêm‰`z,|`K‰e±–ÏFÍeÉaߧ©×X -Á[ÈfVl³,?ºšæ°®Mƒhà›ªºÕÞòÀèTîÍ Û 0˜3/ØÎl -9:±½Ë¾"NÃ\CÞ ZOÿ0^ÑTî3º¨íh2+ƹ†œã¾n…Qã:õ UV¿÷Ì„K+q…˜ð‚\ÙtËz*)YÚÊŸŒh|k¾ij¦s-ºœBr8•W•¸µðvõö×s ~öy -÷¶—¦ðz©_ÂX¹ÃÁxf"b²bÿÏ+{k<å &NâÿuÉΈÑ}L0i{€™ò<½Pv'ÚdìPP:øø —Ï%ˆñ¢KÛ8´Ì`dIƒâ&RR!žÝ)˜IBÎ>ZiùŸý÷}…Ô37Yƒ¼„rù¯?ùéCnˆ†­·¨È$#˜ixWh÷£7!õ£ãª/n>ãžÇ¬o·@øîÍ£6%› ÐË?(§Ý˜•H+骖˜¯ô&<`™¶Ó´K6a§”-GAÚFâ–âÒÄMn½MŽT.Ȧó‡9ìncÜK•ÐNS÷‘Y—XËZM$Cë: Ôº¯÷ x¦Â¸˜¯·Kïu›ßͧz`Ý%i¼¹ -„ òcF*U ê’‘Ù2'þc¡9`åj¬EÊOlÑß\ ©Ÿ…Ï—%zXrpð.F[«œJ=g° ¹ÄìY}®<±¼ÝA à…z´W~&™ñZçáþº|áw¸f­{§µ}Ù–[[B–*I“PepücØ.豧ÑùO~ÿŒÃ,M1ƒ’“£¬Ñ’­Ê^ÕÌ«Â˳ַÁ`‰ç^gŽÓ™‘Uä¬\šcYE­gþ7¼slõ=ù­†}Ù;h»ü·x³v]B‘^%çht°xLžðyT'² ƒEî™íF€3yþ­|žÌIòÀríÞÅL!WsÀÒ‚ÜH¾Ò)¨óXf»xTH:lé5«ÉU¸HÈüÓ¶íŽæ=‰Ê±¹#Öp[@ˆŸ7oø?¸¤Œ`<×þŠ…ŒÛ£09‚wwUjLQræÐ×Oô¾¤É8K³·I#E³ZQ>áâåhÝÑ®™¥ºkKqÞ•4ÈGÒñÎÁáPõSÙ¹šbIgïÜnlë€k>l“t¶§—ïiÁy®¨"Ão,) èKgšK-lî}ži̹ª–ç3Öµ¥1Uò( ×R¹ºMQvÐzCF‡þê<‚¿}©ËU¹…³v•¢¾ˆDý`\¶o6BiáÒÄŸ™ê*÷Ñå앺îNéÄ0gŠÖ©fœ‰Me´3{K¿;&«^ž@oÒüVüÛ¦wûç˜92ßòêb%k7îÊZ)r½±!Ã󣂅ÞXAop…†ÇÙÜTF¹~:ø ëM¹QvMÏmÃeȬòïjaam+<祒ô™¹.ѲÝQÞX^ —,Õ‡£pðp¤|D¾¿Î2ɄØÛh U:é,K…uͤLÐÓ€S\¤¿‘‰]ûâ,©³32Oc¥/Ÿf±†ËuáÄäÇ¢Úx¾øeÈ漜„)nu -~Àg ®|ñÉ\ç çë´VŽíCÚ(ájòú$‘p$Üšåž,°Ë•XIzfÎrIs5Cv®÷7JáÔæM°CA§ºt]=óÝ Z°(òꡤ{.qä´ñ¢­/,»Ñ L­7_tøjS¨¶!S?j8èº9àunµ¯Uu"]‹äB3äåLŒUÊwª 5`‹²õÞ£*ÄæˆKþfAÀÓMÅ÷ë7•G±Šs){ò°Š€‡Lð؉(Ga°Ú §×=] £µºâC‚Ë:¶H5¦1qÛ–JøB -zV=y]Žž`õ".sCôª_ìêÈîˆÈƒ RnÙ&MOU¯èªEˆÕÞŸ3DË„LÏð¶Y™õ¿ÛÎ^@qªå ZÍË!c™b™ú%çS•ª¦Â"­mÑ^]ãf<­pÀ´>>Ù“u×uÓˆ'Þ†È@Ô!;ñé`+û!aq,b’ª¨Å}Vùu…‘êÊíŸu4¶) ”¥ì, -ƒù±ŠN+Ù˜/~Òói“9æl¡MnñJ •gF¤ØÄ“¼ã·ÖÅ…÷‡©ŒPù‚'fL8§M’:áÙÃ131Nãêöô’øÓª·aã¡BÙýGÁ%”â“ne¼*4 -•Æº÷ÚÊßD2vɧ©{‹ u]=áqéLÖ¾Ö]Ê4¤¦$[Xò¤‹ÄÔ§Vùƒ$æIùü‡t\¯;<î¨MQæÓ×oëŠ×‘œ;IÃÉMÚRîI³¾œr$šÛ!e†‰­'æÇ/mÅy’9a®\!´ÔÿUg A¾·=Þ¶ŠM ”ÿV0co>„(h—(GÏT)¯¨×¶32±=-™Bú²-fõɈ\ª9l%É£Ú“Ö¾|Çþ}‡8tMˆôvzÝý¦¥ÿn:O¯È)N„»žHí -ÈGÑcFÚP•±^×|b‘±! U4™–?ûà›/¬µT=i?˦[æUÉÀ0C¦ž³XcS.è?®48‚.ñÁ‡i÷è†S³ßÆ“‡—ôÊ´lHáð -o‡ò'kDQ°sÿif½“ ×g3‡ãn¬$sAJ®×þ*Ç%$1o‹UD ì½|—“É£‹ß¼[v«§@|“ØáX Ë3©bt‘ 'aO=®Ÿ’ÕÅUGc)ù­”Mò|ü¯ŽŒa7lüÇ,å+Ûý»ë{ÀZáP{H†ytÒsù’5üŠwõ'4c5^ŒFÜó¼Ë¨ŠŒ„Í@ƒ2ò5~#ÐfÚ3‚·Pš;Šd„ºq>ŒT“èþJdš ä¿jáeŠUä+|œÄ™]k­HÔ_}ÔwçœÝ7ý°q1~¿…áL(7iRsƒ‘Wvœ‹ê}Éö˜AÀf‡µ½—?{ìù B15Ý+Ž8Ã*A½j ²æ„íøìâ¹Lª”ùè \3x'ªEbˆD¥Btÿ.Z‹Äúµ#Óž­oþkÿ¼JéîJð@ÝæØá[)\ -hj:úå¥*ú^_8Võ;àk™"äL–£€FhèJÐ툙ø·±»Qá@¡I-o W¾ -©eϼ£êhäåéù4ŸKן‹5‘ÉϽqk$FŽÛ2¥ÕlEÆ€ù´ƒšÞ#8§²Í]áî ÏØvÈ4ô õ,„S÷‘=Òɪe–40*h˜|{±{ãµç¦:x"«V&$ÆÑ稜eX eŒC½ãß½é’/+¸÷Y;åˆUvÊ{tƒ)N4;×ÁÕDý¢NÑ[¡Q¶F·¬ÊR¨¬<~«=nmÇ÷¶¤C‚®Q.„6ÓΗž› s˜E2&¿{@¬Ä‹€Êf5Hlp[Œ -'}Ò#ôoÛöØs ù×ùŠ¯k#­_Õo÷+ Ÿ€UÒÅ}'ÂL혊#ÒˆÊ43_³#TÝJ¾‹7çR³ÇÅÏþòñøm5¤ šÏ… Thû:ɽôS­Q¯ã}X¿«$î e£ñQ §^÷GŸéÄ›öÄ8c¢|7ÜÉÙ`6Û9T>û´íV‚¯¦G†¥…eHÞtÌvmùÎZ^~ic}læý²ÎÛfcË?njj¥-µ–W’Šºù$o| ™Ü€ýè9 -Ðûÿš¦>,ë>J[Y´£9^C”(ÔÇFµs.»2’˜û~×i Ñåsö¦Æô­FM¼Ãˆ  ïöoݲq £ÂÜEcµRâÅåÖjp­A~rA›ÈTŸ£Íý“Ìi‹Ô¥)êYmåõº$m¬¿ ²™óŽùŽ÷ï¯:ׇ“¾‘ª£Ì_“nE.>/ Ⳙí7f†Sßv÷ÆÛœH -übˆoª³,ƒxÈñ¶yÿŸ‚àÐìë^”ó­ßWĔѽà³ "ëaéíøxýÅ¡ ßúñp>: áêžVUqIUG¬ô‘qÂ+âsa•´ØŠÒ›tåʦ˜zëW…ârÎÖ $ªó¥$RTH5nXÁ4Ѐšo¯†¢I…WÖg€s3´¾4i•åìË™B©7s‘ ¡ŽÂä»MåÅ~Ô { ÜÁ1¥*Ž1‰ˆ˜’¥:x‘ËTýhï]ßd3¡*ù»â©­ŽAQtbQÊ ÁÚŠ/M&Òùž ÿÎdÈü+èψؠ«¶@r~ åv/÷Ñ2 Ê0ÓxCB¿Oð±¥<Ö3zÜOWÌ÷¥p-´ÿ žÄNˆ%3p–ê‹Û`]­éÚÄUÌG³ŠœgðÁ&ŽÃ3ªJ" ûŒ¶H:e+¥î\5]ðàÊ”“¹³=Z¨ê³B)BÑœÇó¿æ~I§èøþÄðº\zwóü®PŸ+Ö´âµkI:¡»® ’"˜¹û„¯’)?ÞTÝ8“í$Ýr‚0qíÇzTÉ m\Q‰W€ZAäh -Ì£œ‰7_ÀX.!¯_èdü}gSTÌùþëR#êî…¸e}Õ8㥂Z£Öx_ùbÁ}Žt Im¿.¾:'éC:FÓ0@µ* ÔØoÜ$9DƒêEÜ^e2Ê–DÙôBü·ù4ÎñÁ¸¿…’‘U‹É[6üýüƒò—„›ÈñV`}@H¾¦Çõ‚…+À׬n¹'ÙœAŸw,xp³%ÑWõ®59 é»ç!œ¾4 ?„#V’!»$Ï~ñÈIØþóo(ÆMn>ϯ¾(“å"¥ÍÒh±ÚÁ¥pÿ‡m¾Î‡’cc\ˆúŒ -[#´óÑŸ~h„ð~倃HÀˆòÇd!,€+{òöîŸs†{ls}¢òf°ïÇ««æÅyŸ7¬ŸÙ¢vß…,w”…bݼ+LQYùÕ+ÅŸ1oœ‚ýgnøœÃþš:º;Û MŠðÄB´‚­} Ä[(EI:‘Ä|¶¥ê%C”~Qj¦µ=@™œ­Û^Œü&”A.MÆÔ™ê8àì·6dX ò›ýñË!±Òþ-öúpÝP -Ú^-³kÎ3Æù§óå®cš1ÑakÉxÅY8$\Ñb<É\¸çDÛÎQ(·½¯H Ðn -q·žK“+‡MyÑþPúþ%£=Mz„KAyÉãŽ.ÝwôwÿÖnñò(ê×¹ªöPyá:YNqó‚Iqй{ƒ€´Gf~§w–Åùðâö‹p„ŸCJOtTŸIâ2ûµyûLD»j}êyfp9x‰ÏQ{ò]Êø‚Óíù· (3¯Ün&N].Úáy\.J;²áBž0嵊Ò]¥3 ª¨¤#í=H#RUÝ[A?«5×gˇ”ê¬Ø‡kŸÙKã¬Zð²K€³&*œÎK§u±!L3æ—\åLoÔ116¹æ‰$¥aõΣRÕ:V™§íŸ€*wisçOø%^ðç-Î\ÝîÇã·6nïòñ i¶égƒŸÄÅé§?4eB~ÈÓ$Ó44œîCõ-³ôÍ®%Ô!déÿýÞi|µþ/+™ô=ÍòÿGÎ&à=Ú!R0q’å=Mž5à‰+9ŽíÁ:+Z纓‹ñ¾ÈÔ™Q.“†\–^>EÄkf,9AòUÌ…êÔ4_Ä R#þþû¯°&Ü‹Mò•pVü‚^øI›³(>üéún¿?„š6š¿“Ye¨Ä¬¯Ï‹ý(3dæÓÑUág#¿îµ¢Úœ†®Vvžˆ?B• ÃoZ:Ú6®ÀuÓdÍ‘p>g«Ïº¼ì¼&v“k¸ˆ5O¸æ4¯ûfãˆüþàðHIÀ¹Œõ~†²ŒŠTW0=1hÆ‚y²¤†O:ÿJGŠ±Ê¢®«ãÌ´_Ð?B×\~2BÐt¢jwË}f»†EpÿÀ¾Wcªjüçä9Ç’PV],G92"~§ !æÏKÒ§¶Ä!»èh;ðºmXÃzžª‡ ¿†Ýxõ]N·ŒGz=?„þT«ˆº“it Ó!^ù™¹SõÖ†ÞÛSµº@Ú*ñ´NªŠà?Þ «íñÉå]”Òï®^òS¶mõ3(AÙ»#δXúpmn÷¤´‚ˆ–Oõå?%ƒ™sù¦Ô¹Eÿçù$ü÷²>t›Òä"š¥à1;å TŒ$ •~k®ÒçX|“*l%ª#ýÿBAX³…®ß @G¢ú»z™˜ö³yè(WÆè÷8©ÄA$€!`ô²,Ö4ŒäÞ—xłɪ«‘"kZh- iOf¾Â¬ñg¦â‰Ñvî:RëWp -—E]÷mk’A¿ÖÃ!«ƒúêÂŽW‚·ñ3MÊÛCÁmÔD“ÕiÍjb™šm^B‹ûz¤[=ÞÎ÷Ž0J;Å+>ˆéÖm7\§²Ñ—yAȆ†™˜ Öc©Pdd)´à®%Jˆ˜ºêÛßCƒì¸D¶ÒU -"Óü)OFF‹3_ÔeѾœóFƒo5ý9ÿ—2H{Ü*¬ZŠ‹<˃£Ps:»; -Ô½Yðt -Š &WxÖ™ _ãqüñR²†Á—oòëqðÁ8p8¤Êiøö¤ŸòªÍ^ë3|î3ìï†ù¢Ÿñïû«cz½™71à^œ‚w{ÞA("S…kÙP2ø=9r¼·Òì@ëóÃöûç ʳZÂÙu -¦Hð@ûc¸ÛË"ˆÍ¿ï ÈûO‹ÖÃQpzýKT¶5žœõøÕÂζ¤LÒÏxýj±Jj R‹¦'‚ âçV–¿Oj:Ú¬+jNvV­Éu_h¨÷—8Û ß½ïÇ>9`Sh¿¿ðï;x_1¬TéþuÇS~Ýb®øX¸çO¨E"Öœoë꒢Р)”L7Wù!VZv¦¤Ç±W®yð«ñ ñÒ⥕dèfÌ^È!ÆÞnÒ’½ËQ¢õfy@uñ°ç?ÓÂTM£|tbñ xHå^’ìÙ¶#¦]Åuþ\ʆêßÜòÃQU9›§g--Ü›Î/zUÏ0/<;;§]2@dÉÖÂyýnií ö'¿­r­ï Â×ĈŸ“qøPÝâ­á>¬pUIǼ5kd õ® GåÆRàâ«î -‚ÆùÜ!›hEñ}¤ttÏ°ê4¡¼C´äœ“'7çÊÅÊ~é—-¤©6Æ>|é¿¿”’nnö³è>í‘FJ㫱ŸJ4XbuìÓä»PDèð99ťȆ–2ZÅC’}?¸^—–ÝQ%‰‰5~ÏØ‹—;~¦¡Æ#“ݸ^'ÆdZî]€Ü ÂOêiöFéuoP´1ì •s½s=Éh[ãÞ‰#ÒþHþ¬¨‡ãµigH—ÚH‹GãùCt¾Kâ}¼j]YŒñª06)8¸˜ošYjKû Ií•Ú€«´`Ì"=€£ÁŽÄP=?Ëþ„¸xfȤž§»Âã™™OvŨ¡^ÖãTÏóCŽÓ#òÞ/‚,G/†Àv·‡ZŠêÿvyÈç H‘O)Rv)Üêð'©•\*¥M‘É09ÂM1uáóFÂÆ°yË_U[ŠØQ|Å4¹‰Ó ,÷×»îÿ51ÒL®'°é”¡Ù—ü,•»3_îÄd_à—¼¢Rª“¢ÿý÷¯Ý¹ç\UÙ71×™œdI°g Çà±iÊòººÒµw,Äç;J…¥Ë”þhõ'hÆÔ¿Xè®v0æxa/û°ôBæšQ,fßL—k¡‚J&£‰€(,qø(ÆÂ=K¬;lâNNö‡^>ò«9*>:¸ÜÙëÅ{ÿ·qEÉǨˆ·èv9àÍâT´¡šÊ¾Þö!QfÃE É”Åh=ˆ ¬Z “MLØvZrò˜ø“E›2Ä—Ô|'’Îf@¡³çé6“‘DüÎz¸ñ’iúÄD‘1 Eȧì"“¶ã™ßyV –u¼ºÑÒ"ŒJ!ç›±‰û½¤žÂWeMî÷¿’:*~ô³ßöR£ñÒî¯â݆§T1…L4;|({+Þt´nÊh%»Ú'àû?J]!b Ü'dÁ„ý&¢áúVi³T3pLËó{Û"§/§{7M˜â\ïµ rgkË0ØDç]™Ü]ðh¿w{ŠÌgÄÕœVOêç]×îöM³ÒI\Š -UÅ­| t·“åfkÍ#²¿Yï%”«7òûÈEd ¤„¥fw¼Çwy}»¤”!|žè9=c©Á1ôåaW£ø©#×cQÀÃZ;€§ ÉaR"/7@ðõmDCZÈØïªùr¢î‘R=s€ü5cØŠHë5l÷åИœ"h´; ‘þ¥Ôïsòm*µÒ;b`~ÌÅÈ–ÜTx=ÞŽ:ÓÓã¦+Ø-R¿å|püįbr´®ÄÛGLj$#‚°ô-Ó|>R‡Ï˜âá5ü*räPM[;aS,@ª83B$|_T¡»„|Pž¯Ç®q^kåf3‚ˆl·<Ç!ÀgmgÝæ½SzîÂ÷Ú™ñ!O~WSjeàWÏçGeyÀë4 ýêÜFây8£ÊHx!øÕÚ²AQƒ³è³†ØÍÕ8aVŸC8³D¨÷?D3h!־σٚþk óýœ³ý/TM².t!¯Ô¸?r¤ 'Qyuë…öë0PwPF×K8[5ÚNQúÿz0ÑÖh¡2"J]5ž‘‰øà y#žÙÂ<á˼nz²`AŸµx¾ÍE2á«©ù 3y‡U­Æ<Ö…*‹ù&ºã±‘ÈjB8½®A’䶲?Pê­ÿ8Mqà£F*AÑænÌy”vŒ:"‡B¾úpXRIµoئê7’+Ývã7ú—Œ“°ÂAÉ7ïGYB0 8!³êZ é‡^f¬’æ¼Î7ç™zS’*jÏp'âçÀ꺵xb¤/VH[ݤQ€¹zˆšÑ]†'MµÜG =µ·û©3ö*Í´0“åáÏ·rJ®kÔŽxÃÂÙsƒÐ©Ò©*²eÂ/kí>0„#§\}à&äÐ+X¤´r“!>d•“­àœvíýùY‘Œç5>…Óív¾Ä¶,ýgmÔ3wØi½yŸ™ØìÄ®În/¾¨qü6Üœn/”äL ³CúJ£ÔÁæ†VDÇZGhXiåÕ™y—åÐq˜ŒÏO(óõ‡}I|)DÓTéédšé§cÍ4jÉULÎTh^Þ/þ×ôþ¹6ÙÏP7Èÿ †¼ ür3Ç\5Š'3ž.Rá_ƒêɦGÀ6CVuÅʾ«/<è>Ya—áÕKˆò·ì¡»ÏæZ\ß{xúçíÄ<é&SéÁ­Ïúø:Ai’oÊ #}»Õ(ÚÓ¬E—-âé…Ì-¹tf_½ MK1ëXIÝ¡¡d*Á´óŠDÁ‰¿»ç€H ã”Ë÷,aÝ„?Âݯ#±:)€57[íf=%^–ÚxÌUþñîû‘ÏÅÔ ÊÉKêáK¬{7J,C!lÜÛùuU¢Æô’A#ti:Z1!Y1Ç|X5¯…ÌËUÑžJÀ“‹Ý¤°IúÌ7ÿÞ›†ö»Gp×IՄȹ×L+„Ït|LG>?yC€¡í¿É ¥’JgSˆÈÇiZK]`# A­ëÏI­0Øâý1Gœ™dƒ,ÆírzÖI¯Î£tYÝÁlQîHpü­Æ?ÉFÊõl(¸0NDï°ù>XVV8µ[þÆžd[î ˆ!¬§.3C™f"ÇÍ h5æ¥øɪçgL*GXü8ÖIÁf®¹·ôi<¢¸gu> X©Õ‘KÖ°o‹à:ü¨ˆ—ø‘: "ûÅQRT SAàl¸ôƸ hÝ£ÿÀÙé%ÍME©)…zJ4Õþ™ð¹«êúX&\6à9ÙÔ%A¯Õb·$ö?gFw/q_èõ$,Kj‹æ0·ö¬„¤ ~ô ì´íR;ª ¯®ÈŸ²G·^GÓÂK‘¬ñŽÄËüwçád-`¾9Wláðå¹»y¶ûa׳ æèŠ4’ZæAVŠ!!.Ñö¡³~ÓñÜ6׎5¬YÙýwéŒM£Ÿ«¶Úê2u0¿»Ãæù±¸Ø&!Gïa˜=Qy*;_²WÍÂÜU7¿_è ªð$²¦õ.Êûwµ-ào·¦µ÷Å [§å‰5¡ë{Á^k~'Ù½nîØé8ÅWÊÚ#2%õ5³1Ì'êAðn½Ð@Ï祫ê×cú9»'1výL-/÷(Èœø*)cí% -=ùG{ùÄ)óêÿíñôIDJæ6ˆ7©ÐÑÊË y@{r‘&m»ŒÖ/EßHP8Mj'Ðj Æži1•N1ý ”²TôYß…-ÆC N1J ~”ƒ]"ùÈãÉðŽÃÂÌu{¤iýu×5MíìØVY!:“ -M -F_¾e@eO23Éß4FQ÷ïèÂFi9'Z’dïÊk7l݃)?ñ4Þ2i¡S"Ç({x]çù_*g ¯&¼ZÇeª—c$¨§µj$Hwÿmìx¬'à$³Î¬XrÊôÁpÒG· ,g¿@ *5YÆ®>u€#Û/¿]çè–ÆÕ¢Ã.zGefÙéð~N7^_‘©šV3{¨.¶vMüãy'½Æ8‰ËÂzüº³Ao%.fxÒß.tžx8VêG‹FE^Î QíŒÑD27”OW…[(e6Yü³èàtK—oü‘¥êÓïšÀHÛw[ísýa…jÿ_:@³A"ÓD"ŸZ0­†äf„Ú’æ»ç®¡v¼rãVkÖO¹C5&Òl€<Ku<¾u{ä-›¢z’-ËGFø·ª“+PiEÅÄB¢Iÿ³†È šš-¨ýu»TŽIˆ¥€Ý“ò„±&ÆáÖ;1ß]Þ'8\Í1D·zë¥çÅïÐ9?âÌm&¨x\±¡¹ETØ"IÜy¾™ëÌà“÷ÄõŠdÊû-µIæÑy¦S´x/¾zñ¡Õ­NmP+ü Nl-d«íRpåÅo&Q„´gö•áðE0pÐ2XÆ*B¤éù6]‘Mé¹eó.0záV¥î´@ aâÇÅÏÿad¦&ÉÝf‚ÑV»+r +ªçJÚrZ»n–€žÑ@~mÐâ%*ë»—þ^AøÆ6%ªNý!“Á_ÄàÌx˜/Nj¶s·ìõêK.‡6»l0M”ÇÏq ¢6 •à›KùÎÌoC +s}Z4IEŸ;•Øi91_ª Ž`~Ÿ]{»þ:U-ðÒ ПÐÔ<_ Ûìyl>Ò’ Ï~F»FC6ÓþÓXUŸ2ÿÉàl©&F_Â’B>ßô…†|^;¡•¿WGy)ˆï÷4Û0J\ß ¹Ÿˆ$›âÎeàÌ_¼‘¼þêh:OÒ4™mFÓN°§#îYsJëÍ|TÛ©øÄqžoÚ]Z÷ð£ +°ïCQÇÓ–‚Î$˜»¡÷nÐl…Ÿ-PýJq”̪àn3 6ý`ú ç° hµ/<oLµàHX€Ä÷Ú8¨U¦Åµ-xÔ#Êe:zaÛ—~ǽÆÜÊgf‚òÑAä´›øì®&–.)OŠðrºcä^ +mÍÜB‚·4Ú„×+¨/tá>IB´ ¿K ·ËÕõ#}ô¾ö˜ú½¬œ…× ŠâíHCöJú È/÷©z©W5…ÐgàÉùYÛOë‚*‰W8¡œÉEv(ŽK³$ñ!!NÓ11™ïwϬj3«Û’|=M‰¾C!gp3 ukZ¸G°C²ñ”Ï7©V¥˜#+OÉ —õ1Œ–ZÒðåo”‚¸¯š?þj`;Fù_~ gœ _à. ùªEbˆD¥BscŠ‡Ñ(bû¶s‹'õÑ›­UÀ¼^ƒP¸ôƒÈúY®è=ZL™2®ŒCñ UÝ'ƒÌ‘@º5ò4Ò¥^ý±'u%¤3Üèýåþ"Uþ¾PÛQ²ÛÂPª˜ÓÐ'%¡Y5‘ºࣔÒB¼°š/¬´M[—m–IùX[H:¡× +q6ïÂ|!mm9IÓõïñnÝJìÞ@Ã#ŽÝ ½§s{·‘Ñðd]ÎlËšmÛƒ$2ÒÄp{êmÛ}§T+¥ˆ9›®ÝEÅÅ,7h3̜ћ°ŠVŸ¸ÒªÛǃ;ý—x%™¿S¥éˆw®»O+z}ŽóU|ØGpG‘ëÐÐ+z;Ž*:E›…2“¢£I‘ÞäCñTAR°‡Ö«ƒ…ý6iVùã + Ái)É>ãA=ÓògöFE·[ïK¾/R?½WŠžm˜î[pºg:­t¦yáUÎx4=üÛÙ•˜tÍb¡!Ëü´éJÙ NÈ6=õÛfâ¡Ùn<…ÿø‰<ÄCTÊ +"EAPüaÿ&­¨¯rÐ9l ±ªá‡r‹ù¹ë?Á ZBdʱ~3Ë»´‹Õß\b¬«Àa ø+â\S?;Ř¾Ó6§?ñ#‡ÅWÖ¶g­º".ÆöäyU +ný<ÍÇŒŽüêN;0¶µý•ès!  .‘~ÃäŠE³86ðo=òËm¤>¡€Ê:ø1.C,]9ð™&ës<×yGÿ#È`ÂL™±Êµy!œ´c2¸Ò”=3,]ÝŽ—»´?ZC †šIö³Ú"‰œŒòÌ7®c[‡g Çùâgj~ù°PºúØ/¡Ç*ëîƒÏÂS6‹ãÅmIxÑ~ë\+à¤2Úg ‘­1Ó’x:ÿÉ2ç(øîôä½÷LS&Žg}ãàìPõåùõV{±ÍqCCþ­…Í°»V›µUñ‘Z^ò=ê,ß¾Ô¼|ËU¨O~¶ÿJŒö„þjÐa¾ëâ ÓÃãe$ !Ç:!î–mÀ +ka·,´D:øm¾ÆGÔ)¨ßãm4†!‹|¡¹ß4²CY1¦sXHü¡f°•µ+ߥyƒälú8áe»ÆÑ*Q2DSðkŠ5$q¥ÍÜ:¯4~ð_÷÷þž¤Ê°èÑWö¾Ó°° Çòn`‘µÔû²Ø×£i™ˆzÔÑ-Úä\ªÿÕ[h©‹S´t\Ý*z|äTFl»è±çêX¹aÇKl•å%,¾­¨që×ÕÏ×òKWv !ÐWYðS¼E}è©:­Q”&ýZºŒ /)ó™“n¡â‡©4‘BÖÃñüNØT‰e,!"ïš bJxœÎ¥«>1qUMáVäB‹¤Êg¶`4S@¦ý-å²’ÙÑà_z¯!$‚,•¯‰1­Ú¤‰…›&8cr§”ðN8®²¾ ³”dùaפGÐ\WªÊ½ƒú¤ÊògÍÚ6CȽA4 öJgðKËË'<ì¿©Ýwx$›÷̵«Üj½è§O_L{´(3‘#çrM/î {¦ +C+cÞk¢¶ÌSÁð¼ðQjDùjžqK¨f`Áœ¨ºïð2õÙ›ûXDËC·7(¶´6h íå²ö„‡§ÌÀÀÕ„›ö/¬/.)ïàqêB¾ËÕꃙøÂüÁ#=~Úmü9Ï&’¸£‰ £èŽëÉÓ³ÃÂdÿjc„AŽÌQ¼hSôÔ—£Žiáçë²ïØÐ¥Û œ6±]ûgCP[Þ”‰ÞXi£Í©gÎ<+³Î¡æ±ÿ–—‚Àm¯V‡)Çï¢ ƒ\Ö‘’Bï˜=IÝ--Ÿ^}»í±¥Üh¦uM‡ŠîàäÊF¤òóžE›¸S^Ï_‹IEýy J˜[¢E¿¹/I{Ù3/´uIWX~:ñx…!=D,u^JxúÁ÷ðÌf¬»žÊ,ñÏÒorï¦Ï\MÔ_³¿PtÞËÚlBýã]ÙËïê Ì£Œ Š32…í´q×Òƒ†c°À>#€XÙÐB€g ðxÝþ×ú­“ÁbéàþâgðÙí]m  :ˆ±¼‡k߸„šõlS2&­ŒÕ¨ï¨¦j'~‘‰‹\lÅ[GGÚ:jØO„»Í=ÖÓu!Ç£¨Ðv®…}™»¤·Bµ­ôLže†¾Þ›AIñÔ·Ò®žžæŒEÃ+5'V»º`—*Gx!”Ðú{„³ÉŠpä4GïÂå +V@ÑÄƈÜÕìG‹bI¡%¾Úò39öö$~‹)QõrFe•òì[úÑ«â{Képc ɯE$$µÇèº$L×ÂÊ<%þž¸;J±.ßÉx³$ze´F«Â0»4#îþ*Go«±‡XJÛ94¾ô|ÊKÍt´­›ð/;%I£Hå4âfHJCÔÙb4Æhð\¬$3vyèX ¶ÈÎœ¾K€åÆRM—IˆìHÔm_^)7É[PÐ TÞ°2ü@¬Až¿é"ÜÒnL ö:5Ww~¡ \xÅ+²†•¸z(0GnÕÙaY›™›úš-L+M$ã³"Öu£ù›VEÔtL$?¢,¸¶ówë®+Íb]O½sò[kvc§Ujé™ùa)Žc(€uÕ®gæFí²”§ÏW0Î'ÆÎÒYÖCØ"%++• ‰h˜bJA¹¶é +£h@&×>rWeQ`ÀSMé´míàe謺v¬ß 5%kªVÓ¸¿nkÁh@öèÅÍ€åOš$U¡¡£]Ïû@ŸÌ¥ÈÔ‘q8–ÑÜ[»zòTGù¸WÚž¹xÈïhÂÃâ ÞEæÆ|i¦h´:ã ¤À[2Æú†ùÆžg‰;¢rßTŽÿ:ºŠb×YºÌƒ¾*í…OÈÈÌÈ@¶ jNô?¢Ó…ÊÓ–sÏ€fœ%Û1w„ +ìsí?ù…›¹L@ãB;DÈcM7ΊÀì™V)ïj“í×ÕKà)š÷+GNÝ_ýð ’• +¯[U@¿†*ÌRo>_Ÿ^”¡Ü×Û¾miz £IÅxÝ0'õÔ›<Fÿ–ÁA˜÷=U¿~ž´3DËjó]*©eï*9=7³Lo-É)<œÃ¾*LþìåÓè‘»ÇÏpÀs3~(@ qjqM†sÅ}™éæøþJ|YìžÐKÐyƒM&ç'Kûór„6ãÑç“S ŽTH(|øõ•ñUZªfœ(´w΀ðk@Ùd#z]ŽD­ê¯®C&"¼Y\Š¹& Y** Sp·?0¦¾ ƘuRú©x“ª²ái‰9Z©QÜ“½®ø@&²ÃPw”¢i9»Ó>L»®x„!ˆ’ôƒ½‹ñ[ã/ËÁ°.¦k&GæŒÙ—¿7õ˜¸WRh´Iì.Cœ>>ƒ\ö ‘¥ —¿¾Ä(Õ&Ö¯ø©¨XÅ$ý6~¦ÞÙŠy±™?ã; K€ÝëWŒsª¦ÜëÅ)ì±Eĸ4+S¹çß<ž¿‘BšÎÔÅìí>Ubq)S6ö×Àî‚J*.¡›²Ð&õÞâÃ$X o´ü4îŒî.Æö µ‹Ö*izµƒ\Qª}qPF§ºƒ9]q£ Á$ Ù‘“2ç%cê‰j]”Ú +;é#î%$6üØ>OïÑP(}ú nœq!$Òý“OXÙöjM/ã„A…Žz^ÐÊ$kq;²’á8¬œ©„ƒßzÎI$š‚ éüÊRwŽTi™öÙXIs[~}Ä,dQü®½ôc*¦@/¸”J ƒÂäî^l¢©Î~ÃÞWA ¡.p6ÂÝ#sDå—ý äoöA™ŒktÝ åþ ©ÎÇÝ¢¼Î¯*Òœ‚f3'Xš~é]kJ€Á¿¨'äñ^b:–úLFyCĨN$=ݼ±/&û»œCÝ{÷f]\„KûvN¤ÖOЫtc5##K2z¸.'p+ø3 Ã¨šÒ¥à6Ìf뢨¥z·òÔ^3—›Ÿçñ1 ª< Å z%šó/LÙ³”pÙ™<¼:½¼Bb¢mgžÔ؉²Óÿˆ®ÆqïY™Ö¬žÖøãœF¯{j- »²`çÕ!7½nå0ã2¡›ñ«8mš4&5Œ•sRHà`†ÐA¿û•å¸$Œ(Gú¯¿ P LãYšKgadŸ[»ŽÓN.³ú8!übÃËÈ!½ó nõ/ #ü@öÉäŸë°ёަ`UîÈ12r½  Âb#Q8ï:“ ÔñA~½‰õyuøåÜË‹3ØÖ_Öi‹+q ÚxÜsúÃMßãkêlÉÉ΋a²`À¹IQâùlÈmÎiûÿJJ÷Ðu½6óÞ*øܘuÂ"ûùÉ-GnÉ8PÎÜ‚ÀTñ—"j2w<tÈ·S½(·×1R5Ïo„õ=¿íÚ-I×$ñBÄ™ÓjéÂyœR@´PÒ +· +‚ó,· ˜Xœy?QHE|®Ü<ÇŠoR1<4êå4áÅ,!¹éñâY§ßk¤ fí¥?#;™Ó»0¡ak:{•#! êJÕøÈ_÷ÊÖa\ÁÚþñøc™d;Ntæå”sî >8Pú«œƒê¬6]õ÷ÌÕaÜz¼8E9BB9ÆñÙ/g#椫Òƒpl×rÓ0Úy.<<©º÷m¿µx=³ÀöÖ4¾YKe×i·äˆœ#¹±Ô©o>¬]âŸôæñN„E…ø>!W›£jÆìÜ_:†rRu8Y‡A¶Ò<3)›ÍÁ˜÷™ìÓ‹°,oG0—¿‘’>„èúýk ¢^ǘâ3â½TÞÎvúíÐÙCˆ¶®³ó öHkE³9&Ž¦\ÏE{o;¨–94çÊòj7Í̓ˆPйÁ((É̯7¤F½¥‹Ä-6Ù†QJøF|ÊÁçåQ'[pû5Cfקvôë-º£`QWiBy}‡Ï0fz1o ¨E2Ü °PVp*–loübÀÚ‹:J¼êmå¹kR¬M+¢OÊvñ0f¨1 2§hô_bDÆ:RÖ†#Ü÷˜¶w^¸5Ñ4¿aM…Ñ*êµR7ÌÝA‡`µ1ª¶ möÚq'±òGÿG<_‘¼N#5{b¨Ô„r5^r»¬%%7ã=y·æÉê}¬Ž5OêœvÑW낉FÉ3©L\õ:zEø“(Õ󯚛šË <Ëü'^sqâ–Sw˃×.áïÞç²6 +¦üg±%ÛãÜ!sKBÔ1Ce>w®¾_êñÕòãÖ¡»¤þXâ¨Æùpä+‚a·4œæîR¤uKAüû_€†¾Ù'J‡ 'ðÃÍ#2%lñ*èy}S«ÐÖ‘›Xù‹Èntw"Kv!.L!¨^Aëc‘0̸ÙIâ]}´>ó,âÌÄ¡ sûŒäÑÿ£¹BÞôÆÇ”SúµšÀíÆ ñI§2Š=åííëõ aÙ pUÇk}aÙÁ1I™tˆ¦b=¦ÉßÖÀ04ŠýxžœÈÒ²8­:I#_ó¸Ë%‰8søKïÓ n}èˆùˆßB<`û{6ä=ÆØwP>[ŸŸâåú÷wÞ{M‰õÔ½‡iÖ]ân¯Å6øäð4÷6*þNÙ'3—Èê‰ÔM;óžÙ6Ö°M„ì'*…bôOÈ”óßþpÄM–1bM‘gî³ÓÜ›þ„«]C~ƒo´çå6œž j •©¿¨Å½û¯1 aƯ^$Œýr¥S§ê)³ˆ¸Ã08IúËoö CŸÒ8ÏÜÓCKc +]=@°BßIÍOö`íÏäIÑ_ŒÓ1Gð®Œeq„Mß*¨öߦE˜åûSø„ˆHƪŠÍ@j†‰ÚÍZ×cÔ¯—UŽ1>@†ÅqBôÈPû©Ú|?.}i¯k÷­õÚ¿þ«£ÀÙlÖ–Ç Ðw`dþû{9ÛI’|á›üã%Œhm€}Ì8Œx‹A™Àâa›Î|ÁºáÈàuR¶oüjc×u§­2e‰‚ƒ4fôVqÜ‚I#Ø'—£>K4Ìhô M‡ª‘øUòbŸŠžÓªï(%ƈÚâºóµÔ§Ÿg{~‹˜ÿóÌLT±{óQ!XŒÃ”ý••ÜXfÑFe5˽؉ħäò0ÀöÀ`{‚›ÑݽèОµ5±‘!ÜïB»÷Á!á©÷†²šjã±xÕ +­üîD±Z¿ŒÏ[LÂâ­?Bg8Ì&ú±¼„!~æ-ùÊoŸ€FÉ)! Òùؘz¯­#H,K[Òšýÿÿăed*eDœ·š®Ïð%@ÿ3Ë™épHݾúzÓÒŠÑn—í®ç»~Þì—"eüA#Ïê†ijz=ÄÛà“véE«ž¶n˜FÆ×»Ú¢£^±?!¾=ˆÒno_h…]I~'.I–»O˜µG(r{Í/¨ê„oá +]mmgf³Ù£ºï\ÛleÉ!5¬cG‹”ƒ´U [d] ô`ÁÓBÉC"îA‘ʸ`ìϲ¸˜,aÆéo'oÃM( ÐØßVÙžÓÛ> +í)×ß=1î’£}¿Ó?uÎùµèa¨ p‹ç©Ú©ùßz%沃ࡠ+ø{oJ¿c?ˆˆiëi’‰ ü˜ ÌÂþìڰ¢Z¦„­wÊ7mORwl"l›Šè¼™@3삽å‹CF÷¸33¾hr‡ ÿkªVmç,«Õ£gÍp1,üb1Cã²c”d©$È´–õA`•`hã.Ë1ë(dKåªUs7#‚©zYVdièk0XÊ»>Ñ"ó➤Ԃÿã[ÌW£}×-ZÍ !hÂÃ:%ýó+–Äò·'ÓÛƳgfä`»ê!wÈrã>7^EÊ|Ž¿C%ìYäc0¬ t'úh/©œûÝZ~~y…AGñ7›W n*È:Ú÷QŸæª.YW‰,.^¹vÏ×ÈÛ)}J ܈è…)ÿÆŽú-ËÀÂWp/MP÷£-©Œ@É"³´¦®AHl¸Àpf09X”Êè­9 ýXØÖ)A} %T*ãPi¢wisÎ<-«'Q­:Ýô ÖœÀ‡8Ì¢Z¥±â°tãÆ4T¯² ){, ¬ç<ª¢‰8•3¡þÿ†.ߪ–ËÌ䌙¦U¨A½Õ[Ç-´ÿ}ÖT*ß›J‚†ay&„ã ²'ùÌa¾_$1æb{U±H#ï†Ìz¯ ¢ªª—Ôf"ùÔ‘Ü~,Ú”ß*Àù-9‡¾ 0—cEtEO™ÚVÕ^í_ºÎ’8”¶\0G“¸.ô08(2³ 4 ä+ç¨âô ™cyJØÎAz*B„×#ªß åÌ™d5 b…@ä a!%ú°M¾‚¸j[§t?l³ð¹ç 3É¥Þ!ÜU{UÓŠ–/á@JÓ‡sK€V+!Æкã"ÒY(c„)*.Ða½"ƒm:²wp_Çš‹òËbí‹ÑÓÊO¶¹û ~\Â^æcò‰¼B1uëÈ$°¾rå%mxÀ’¶øýy$‹µÕ{[ÆîÙãDïP. ÷‘fÅ7› +-[ãµþ^P‰Ö¾?M]åÂP€Ló»ØÃì˱9äý¤Å}Ër8fŸmQK…·¥Š —û¹™gx`ŸQ J»7Ö7X…ÈEv1ûN ß"PK R±!×V¼ãʪիkÀàåì ÌHÅ5חǽ-¸NOÀUÊ8&þ“ØãX;ÛE +BmC":ÚDw|ò™¡²FYVf(÷ZŒfF=b«Ì~C³fw§nꘅãä ðõ&i^«´)H’uWkõüëP`š…KÒ(%9©»´ /:=°œ'œ!/™ß8=JУÿ=¥§9r¼««}<Ž²‹Ò,tMD{p°Ì?äN k·ÜNvò7køª³’õ±{ Bv?³²+ƒhAÞê’Œ=¿£Ëî—JsÀ«€?iHÐé1ŒkV—:Ìnð Ï‘èŽôrн ³â–mÙ.O„FæD‘.lLçò ')aãw¶¬`t¤ˆ¡ìA#;•FæÝ8€~ø%ô„ñHšÀá;×I›AZتý™¨:ý®Íà@n:†Ú(£q';yûÜ‘1j—lóðÜâ BáBq–'ÚÝúÑ܊ﱪýÆzÜ>|Æ÷ÀÍkù‚æ¾Ç=e"ĨåK¯ÕW +ipµÓÁµY-§bJÝÏwpvõS:õ4óøn¸?,b)Dós ‚S®‹¼¼è­GCŠ g°œÆB´n*X]Ø¿€Ö Ô*f q[ï¦}NÎ yZÂì¹uVìÄ@GmÌLϾ¥åô:÷É’0oUý®àb›€k~ÇBA_€ë°Ÿ‘M¾,ƒ–¬˜‡¶8ű‘ë–½¤m6è×WÜ'µ :§¾£R¾ºÅ~ ¶rƒQÕO>é%³*Š]¹eŠ«|x„ë|´T".ïsiX­7”È“¹Î—¿•ÒÄ~Ð1-%·«çÐØηá¿L+ÓÜŠ‡rÒ ¤±ŽôQxpÍÞ)QP9ܬáúì;›§´æ êdÈß÷!-q k× Ø%ÅÑm*c_8pI ÃY!RhÉ¢ˆãËÖÐ,¿öp¥e¹I¢C¢„-$âcŒ¹Ø•Rɠߢ>b Mâ\G0" %²0RAê·&±Âc´OB`)›zÈ£cÓZh4|HF¼ÞFFW­ùø Lô“Ë¡W­×îÍdðLld»Qßéï챈0CQ9(O^½èÉF:ŒÝoíż[(ÌÏuGÝž"q8j +ˆ –íz.F>ip"fûχõ–HÂÓ_A ;–71{òø5h¤Td}³yÄåyÖ`ïـ܌¹©ÙIž•‹ã}TZ6Íð\“öAºÒµw2ˆ•Ã¢º£T,bÁŽçÔG¿"¥Æ­øÆû-ŒÝd!ñ¢èlÑ™OÂýHY˜\e&þ£ÄuÌ=§âùàÏá!(54ùþJLê¤Ò¤þ0°öRüzá^kRU“Ÿ£Þ¢u÷I@Ã,ûÎóí)ëaàáªÓ4ÊÕ‘+{(På4y Q¼®ß…\Êmsyö¾/õ^[iˆê?v@ðé"jÓÄ„ „\Z+ìµ¥ û“/!Ù_=aSú8¯‡¹Ä +¤Ükæ+ǶçV óåiýwíï·ÈÈÑ5UŸük"@!ô½‡ôl6›3Tìõ\Ùí»ñ õR‚ÀŸuŸbÔ¿#;ô¶¬E âT;ûõü¢G§9›5K]Ý“zŠöVVE}£jHÛGô+¬w>ðÝž÷ gFÇàý'î8ð + +eª\Ã6OÐM9Eâ´yIW¬w#û<ËŒ)Ùp%¦¤æŠ¯þq\ÎDs“#¡ÔWô<Ìœèèúõd̨ßHJ«çVkr5›QŽ¶«· ‰ ©£œ_ùç !=;¾2t?!fš÷Óη¿ëÞ¢€%þm®.÷õÕwËÜr¿j2ÇXÜš,Rªm¤Ñ™«%mè•rcâ4ç€2—(2^œAªÙ¸IÒk™¬µrí»ru®¤,G}³¥jЄ)¼î³ó<òˆ|ÓÀVäZXô ¾ë‚SeAAý/öªå„<”™R>ž¤)\:qPš4±…= £a(uê3©1~:ø^ðàZX»Æä+½8iù7_[ú™ÇNòþßDp)„3.©Óƒ¥L®›µLíÞ\ñΗ³}VòÖ"Agýlvâ²33«òáYþþ‰Ênˆô>,±G`ø¹¡ZZÍ +3è¶2šx}]O-iD¦u¦VQD%émÙpØMçMî”ÑÎß-)»C†Jµ·p¡L˜¤6ãÕdÖ1A±í æÿÑØ,äЄ5.—ð•ïdUšS¿Ç̽®-‚1»|јé|u“Ã`óÈî:zåœf\ÖBõñë£F=ªˆ.©jKǪ¯ž«Ö»'+MÚ¢jþ»&õ×æ@ ÜÑ^­¾Þ8uR<ßÿ>¾.ÊŒ°w¸£Qƒb 8S•uÝ€Œ^#TÐ +öO1PZ®ÉÓŒ$|%Ï!¿)OãÎóz¦Y°F1òH/$Lt¥èâ‡MùšßÒá“‚Ìßû%æo·a+¿ã×™ p"D›vz£©ë.\'ø±`+žÀNcéã¥&8‡{ìe‚œo3§Ï¶Ö˯·fuL‰4ù«s‰0ëvTÉbƒ¯Y7^XWG–Bïgc§n[w½µ¾àÄ9´>š§\µT®·vöžÿí'nÓDŸ·Ó ’­qr¿L¼f§ˆÒ Pü*/™±‰»…¤ÏU¸º‘´wˆÈ¡ˆåÉf3 +=£c3oy&˜Ü8¸|1H‹™Ý#g&¼ñÚ49TV]5•Èæö¬8äê‰mÒÚ¹Ù†ü]Ú±MÆF ÜUÃìÁ¸·# f°'[Ö¸´òÂK¥Fˆ‰KÑ©Þ͸W]Ú¸¯“XñÅÒ:"¦¿M=sX¦ ^&Xµeö‡É§uÙjIüPdóX—Tå =Dït œ‰)¶QÈ©ú¦˜»";垎ã«ù†:ò \E:Là—#‘’šEøÊòGXÄÔdvß@ºuúìqÑçùæ”è?—Ó§"áÝSØ•}Ö"ó¬zåÕ—ÙÊo{$i0Ýûé¡Ïˆ4æ/Ô pvÓ”þi혤 y±×¬¤ëG±ø%­¼ͯ:ÞâÜ‚ ØÿƒãH7l#Uÿž úÜ*+¶X*{ò¾§NÐ,ÛÜRh§˜ŽR[‡ê!{´’>î›eÎÀA˜FY"Fªžƒ9ñ“Ÿ•ø¿é®þ±É·²jÞ9Y‰Ã\²Óäö_Nœ'W)Cûdï3²ôB×LzMLN óO…X[`˜®j¶H5¦?¨æ³MR‹ÐðäìËãNîŠÐ;>#G3c;¥)§`UÆvLfioÉxY’õ‰¶&;„)•QܱÁ®G^òýØ"›H¦£ô™™>jHDkº.­”_Æ«Ï>MÁ&:0–‚AhˆýD@CºÈò7¢’æ A›ÍámIŸ¾T\€A×Ñ2‹u¯‡lX6`òçnfmÿÎ{_Ü0´‘(.ÜW—ägcmþI÷ZfƒFan½dÒÅj,SÈ©cyÉUâèЕa`•‹‰a5“]€*Ü,Y\ú57÷©–==´5lN,ŽnÛÕ·-Ðk£JCq¦µ +÷÷éº!á ´ ÏÃ̺¶¬8ØàC‘L'2•£ ¡v³6Æ Ç.ßßÂwŽ”ÈÒ+·¥Ný8ö"‹@gbp}&©¾\kš Ñéî‹9¦Ñs\ðž¿FxϫѦÓû¿ƒWÔÜ'É”úðA(øcHûšQÉÅò¤”Š¸ëPeB½[À»>¼âçÅn sulÍ®¬’¦Ú7"^Ž¡ü\¥V4É8ˬŒÚ‰—W¬ÀÏO)Ã1ÃtÐaDÒPãQ˜;ñF!Œò6'O布î„ýô6­;dmýõgƒÄfYúw1ù`¢=PºM†MWY"hã“¿ Fêù¼=†ã`x2ÐåÜ:ab(™¦Í¡¶ºwÏE%é7ó óùðj¿dm%Ä©¹7úÕÌ0Z6ÅÇ@¼[6ŠŽ}e4<§ºðñ:Zkn¯Ã¼¡Å°þðÜ¢“ðž1,´Ö½enþG¥÷Œ4³Ýø:ä#YL*GÇoGÐè‚üv›![©N»ìë c l_Í2=çu¹ôëø@ãòÙAa“Õï´,y§x’ˆÓ¯p +\¤ëøýô_l7´&Ü$jdŽˆ:B»¿IPåÈ¡qJ+ôµ7Ñ\f¶Àîº .»ì„ÉØ9ÏBbdìbJ^{.B²Á¬œŸ{v˜Žö¥ûVÇÅ3Ä(lH„Å|ìà«œ¸uçô_‰X”™5âNûŠHn‹½‰%üL›÷ϦÀ Ê»·!5Ô”´Ö¾ªØšÐ¾ÈSÁ<ó£m/|£:QJåo>'û(Ê: DX^U¯Y½ÌMÊEõpy‰>Äz‚ã–µœ@€ëH§wØš_†½‘ C UýkºýoZ°ç&ÖÜnðú¶s£?¤ëHrª0=3ÕG‰•µ3†K«ÚX©¡‚ ĶL›ýQÂà? TæëÂá@C6@²OJ¾4]j:%<–{ÑöÐæ΢tÅT¸„ý¦€\m˜1«æÃÈ,uò¬îU ‹€kC%ªôW^(.ãÐãÁ£•‚Š·WÖùÜ¢-!´ +½š|ü AûqOîPïGˆŒ…íèU\a¡}Q[&IqHñ†¨2Þí\Ÿ´&5¹C{;§þºéórÃ}K"bw ›åGºÂòàÅ ÇGiä—ðó Fæp•• Ør®æš«Jïy[¾xÒ±š§óÌYƃْ‰MÙÀuÃÜIL€ê;¤`b#+ŒMúNº±íùŒM$ÒP¬?k÷×ØUêà—ÃQ ÒzaMò|:r±i¨è÷a×ix¹!ªñå"°E'Bi—ÔC@ŽBšù±Üi/…+¶—éÞBâx/íL´ŒÎaaÏ_|‹F·¥pËV1¬ê4êºðÞL"6Ó~Ǧ¸îO/³9e`äV†.‰Až°iY–¯™áR%!e +ïšéñ¿Vxúýz‘ͪc6P„˜2)toŸÃ×0š v;XMâÀ2R’wM Û®ÎÆÙh*Ø¡Ñ{ÒWP‘<íUM3YŸµŠÔuÍK·VÄaÎ9 <Ä•N‰Ú +cä°(É?Ìç#ÁºKô¤o~L4~¡5¥ó^EÄZlã0‡“¢°{ïc6VböD|&ëS^‹;’·o½ù`0´á™™­¢3y€òHÑ â° êñR½HÇî;c%Is>7Ú}v)‚n˜]Q8ÁŒÿ`ô­ö‘Í]ö!$Tá(ût6gù[éòYSÊ4Ï4k-mž³câ[*FÅmi¼«ÈH­w#@™ÂRš÷ž£©Ôo¯N,vžÄÕ6ã`Ké}_PÐâêí8µ`¢ß3¯›ï™––ïU—HE\'=êk*·¥¨KìBŽIjÕá¡“êÌãX%Yú¦ýÃßß‹×=Eëõ[#6•œJ9é°}Nªwß1û4Yåõg‹)­´6ù“Ëu…l(ŽêDW¹8qâ¨qB~8^~^L)®?Y½š6?eö8ä6îºIÊõë©ðÃÔ@ÿ«RÌãÏorqFán¢8íؾ¹d__Ͷô_ÚÙr½J!Zn¹G`~«|ØgòœÅ¢*Ì^åcD>Å-2Ý_ý€â_c„öõˆÍ}òÕ)p™†ú#µŒ<¼U‹z¾!JÐK÷‰@NÿžbÔøòIX²G”ŸJ°TLäÀíuT9F +Ý×(c Ý•ZGkðÉÙ„gÙ\1Ù6‡¾í.f¿üIò8¢ãŽ£’dJ¤ ûg×=Ò8]„Sò5©æ98·+[?KM9MÚ%ã?f#lTðEø ¦¼—ŠQ,j“0bœbü9s§J.¿õð2qìĤ÷Àx!RˆÝ¼ÓæÜÄ0²:e÷âri~¬ìè•ÏWcœÕOéÜ®µ§ãIÄKtå\rU<2—»ÚT­Ä%®,­8¼Ÿ ûìqftÎGéµ/$’¶ÄqØoc!ß]AV웚Y3NÕc`‡L—ºå» ✽õ ¶ ~FJ¸ˆ÷²T€Ž^L õÀoþÇ%Ýʧ9NûRýŽàì3`x¾z²…«£$Œ‘Oá†r&;b“¨¿‹îÄUEÍLÚ<~ôB!ËÏ~½võy8ð æ’ V'ñÜ­˜¿öÓœšÏv„Ys†ãŽ\öË.˜ü4…D=KxŸ;Y8â¦Aù ©ü_zû1ÃIP¤;´S¨l» ÞLi¤}ˆ§Ê/!RŒóúp¤:g¼¢Ÿœå·† ód"Á•Â˜IÍ~•©?è$a:Ë"d£õ}2“žž±tþäE®Lׂ§.d øÑytïa+ïÄ@çlY™.ðêb‘ЯqN Ö@èŽ5¿¯äâòwr†e"¸DÐPX›w¬Yî0m»%2ÌðÔ +5ú»$['·‚Ͳ„c´V"ƒÞ¨à@õŸáøì‡ÞB´QÕm"Šx”U²¿™’„‹Kéa­h†rt]«¥›Õ +.p›˜]&îpO%ýßW¡4Dß}åzà²fîÞNL[ € :û1SSQLv+"DCPžîooñ½:LBygnUÞœÍÙ«×”[®žûúeÐ`Ê;»¯XL5¬–ŽÖp„Ôq-b¼‘€‚²Ðezï,¼xY¹-)…í==£ìø3E~‚a “ûSŒ'NrûL8«ÕÌŽbZá¿Ž¦—9D£t-íÿ2RÛôÍ#p£YÖ9ËÝ«…ݧŠcUÀƒ«ÃãÕEå‚ð¸æ­j²™%0C¯H–ÈÄÂýÐÐÀÏ`s„¦ ++ö3^9zî IÕ×¹Œ¬}èŽn“†;ä}€¢I5>Wuí´æÑÍ„8ÏoÇ+ýa Pˆ0â!ŸM§¸ð þj“ì"Sk/mÇñ’M¿£¯NbÁRäZù&N•ì|8j5ô¾¯E;Á£A*¾‰-›šŽˆî Êpô›èßÈ?Sœ¾ÓW÷ñs*NÔÒ`9Ÿd$dô1ÍLZkЙ± «2ÆÊpñN:®Ædk?›‡ 0fêéÝðÛé4 +rätÌU©[ øþ‘«jC-_¨A›ÓøÙœùxÕ'Ôê»vPµ0çÛÃV±³äÉH06h|œo3‘/rŒ¤ Æ×k¸3åÓ]‚ ßÂÆ$I\ÓꪢóðñåÔ£[Üêwû羪$Ïüâ Õ6\[_%ö!Ã+߇–ïgÛåÌó»– ®î ˲6 ˆ_uª`‡C§õr¶®¿[Û|¼Ÿ;ª{| Ç­ûúú›îÀí7Œ¬QŸè¨÷0û Õäßáhæt”Wzn˜0œOb`E~ñìK7’ 60Ƕq~‹yJ²ÛjÜ€œŸ·#Êö5wŽ{4c²~¦ŸKìKØ#”R1°Oo\+Où~prИ¸ôlà–á&„‚@±Ç?‚tÇoã¿!Õ÷ŽhÃÁòœ {Ú¦CòpÑÒ0Ý„ÔÂÒ­B,ÆÕÍgåÕœ™ +|¡5æø–Qš€ÀÏn™kŽ ÄŸÂ™òûZç e›¹´¥§–.@to½W¥=Ñ’]u3%pÞ°ó8Cxz°Â:‰ Æ÷·a>²É …ßÈl‹s÷«Û¯L²6&CÓ‡âè“”Bú.Ù‹Ii Š¥U9CTƒ#¤( zÝœŸ(K•âá—á8u(ç¼qCS)ŸO6/Åkªúôú«»ÅßuZ*óì6lðqÄsvjЃôk—_ur5OmÙ{(UºòÖT‚wO +Nˆ Ãl¶Iì¿ëF WR}7¿Åù¸±‡¹…Æ[ãCfô0$ðäõñ<Ño…¯eóg­ÈÓyÒñ{p…jü»kx ØÌõjÛ¿K Uæ(–ǵÇ'Äzñ4÷èýëò7—ÿAóoùR§@á@ˆÞÅþÌ9`á¤Z凄Þú / 4˜otñ©?r]êiç|Ïm1«Ø0OÆ›ÒÑ‹ý^u²^}™›¶ºÇ‹|Ç»ÍßKËê¸ái¥ñ,di ™¤üµÓÃ]}ÙE¿H‘$,¤:Â]_³|ÑŒØ?T 3¼ÇETâã&ýV.Øçìù"‹ýüŠø "‘4@·ôÃÏQ§ÕŒT sHß"&e0‡KÍ"|=Ž…Ø7“Ñ®ü‘½°†G'íÜîý”8b"#UÆDL¶ ë)­s|õäÂw>…Ñøº’}3}PþqÕ^‚þ~|#R¾™ZŸm,Ô¦ÁÄB‡à£îÙåâÒ é™Ú¯¯¤¤zÝߥ±–©· èó\JñYz ãçhRϸ«é 62ùâßÉúâ9ùuDˆŠç ‚»ýrÖû#‘^W\T¯˜2ä<ÎÛÏÜ —ßÜ/ +ù¿Ü ú•8f©'¶ùÀóuÚVñ’™©.d§Ú,i ¹ú+HTzw¡ïõqFÎѾ™N¿le "…ºèX¨¤å¡’æZ˜ ’q£ÅK‰iߧÊ"½úJ¦5¶Þ!î¡ì#ü$ ž*+ J×ô•|ï)åÆüQÅËÅvŽH¨¡÷ råÙu˜üŒ ,Fµ Ddÿ`qõÌùܼu„Y½ íž4šs +Ü»ýžo6Û¿A_* +×õüp¦ÅG½´Ra$Ñžl~ð«Þ9éÉGâa‘6l{Í„ÒiM×oª~Y·+‹ f¬F¿KŸjVP3{ÿ”žÃfò¶S£ÞñBSïçP7Ðîd!Õñ-lÔèl;Öá—r³lz¿'—Hhb»ÿøû¤}Èw²ñòj\îgð”@€nÔNà qúÞä» ¼6¿\ ŽmuX"øqßmÔ) 5/H^ì#ìï<”º‚Z^O{DtË+ó‰|ïÿÝcÎÌÒü±nÓþ\3ÛNsñòÕ¶t‰Ã-…˜nöÖÅÂ{ã±bˆ®÷çÓ÷óˆeBý’ÐÀymÊvȉÞÞ#ó¥¡þ‡ö!9Ü«“u¨#x!F¢"†8{Kpv¾Út8áÔ’öã3å%Ü×&43檑9t‡IlPhïb”w}‚.½BÑÏEûKj€Í×°‰ÎG?Ý“õÂÑÜÏÜÒ*ØC„yR,@dØñÛ¬ ªÄI€‚G~ÝŠ toAvAö¶Ö³ìçß<.ì‹ÿA¥úîsúâŠoTµc§µÌ‡Ô†,Ì°"kÀfwá`ù?ŸU­8ÿ·á‡ÔÄÿt&ë½ ùÞ:nAJ`uúdã­êìš'øýuØ`S6©¯Ë)«èÁ¶3£Ö©›ãâ R„‹vç"–ÂÌKE±+2ï*[h!œŸ“…m"Ž5œh:gó@uF æ\‡š¥.X»(øµ¹Ìãû,Ôà•<­ww ý[p–Ê–Ú¢Á‹Zª‰aF >vC%{¸Êÿ*z<$TÐ.Vyñ=K¶âW;"%Â-goà½uÇŠwÎ+´•ÞbQ?Ú,Bö¯1JR¦ïYn±l/;]ñ)5j„é çÜ>m¨îobê¡Še/¹kY„\¤^ÃmæüF. Äåe}ûDÿÆÂ@ƒUî0 ëaë„ü«[˜Ž©¤IÄßô¶d|Vg`ü”ó®PR‰OEÑ°¶4¦qúÌK½cJâ±WM‡Vô@íÖD‰‡±êƒÆ±‘Ñ%5é»ÝcÞÆ°2ûö*¯éŒ«¼˜}úÁ¦ ;å:YxöFJ4¨«ô[ ªô·3‚•Ï’/öÖ%¢Eo™rŠ—µ?=Kå­B b/òlÆ òÖÅ,£¾šq1:ð?„Þ˜ÝÜ(r²áF°’„ßò±±Kþ)îh߈~ÌfÀÀ8ç~/ñHN°æ&ª¼Š?µÿßëq±z¶l©üý,Ö˜$ ¹Ó~>€_Jb߬J_PJ´—tø';^˜ ·*·†¬}½;àÚÈ "ÞówGñdU:d ì´^3[ÑDª\›t‚&þÛOq“Èscy„g2KLSTºÕHKûÃ;91lZ©¤§˜á¿‘þrÉý…òê%¡[ +aU[ ð¼ñ"cÂ8)¿²ï‚\amFñjËHão0.ÜV‹óE‡ŽçWI×Ó&ô±ïUëSj—uXÄãÃqÀö%kE Çíœq5·ÚŸÆ_&Çç,IþDzíýœ3×HÎÔ2„À ^¢%µ’>*ã1H´þÉA[Ú±Z$`NÏŠ§ö±§XmNm|ê#{â€7jû¹[øXä4ë:ÛÕ°¯YŒ6vOO)†Â»›Ÿoó`ØФ©+¬Â¤wU/Ì`»ÙvúÁ¿¾M×Ü WŠŒ§2¹‚/ß×Æuÿö`¥Ç©`1ÂMé >¶Øëòš{»ß„¸ ~Fp‚Q6'‡ü6Ô”ùýÔ³qЛ‘òWE˜:Ú‘daže#Vip †çô’ñfJÞïjs%ã‡E˜¯p½”y½€y Ç·‰yG+ðôó(Â-ö¬0ZZvO:c]ßçSSEZ¿·7®Õ`Ù ® #"’t¬ +xÞîBSÔ·HÖ_f,~·&ßù@þôz]ˆÕØ!/öð-¤j7Ò½éáŒJIp•kíþ ­cìA/çRÕÃ8·ú©Ç•°Ç£ žH:ÜžÓ?Sû#„`%ѤMánóÀÑÑ;’êF8ÿøU7Ú,r†Ÿ@Ûà³==Òò­€ø¦I/³ÅxÖT8)&fþ[ƒ_‰}Y ‡L¤žD9ûÉÏ9­ŸöÂrÏ!hX¡“/w4xz°Ÿ[qXPÅ[< Á\¤Y.Öì#©GcõØ„?`˜°[Q°`¡>ƒúa<Û ¥ïú¸u€~€µg0.!ßÖ7Ã,Ò%á޵ײ+Ùοô$G±oˆcJ+‚m3ëµ;ÞuA&:è@ÒÙ|ŽS)ÞÛ1!,¹ÖSjÜÉqÔµ[ñ:E‹W: ÂDYçe»zê³þmFeg7l™sø]gÍa^C¼&IO÷E¤»àoŒÖ>ž47¥K»m÷üzüŠynThm1úG cg@%‘ +Žψ«"û\Õ©>ö.êÆC’Í"E5#Ëæèkrò.aÓûsáóhÐìî‚ ˜“7UÿÜã´¢p VÜ\Vï¦ÂH'½ ±Î„H¼Æj}lŠà®YI½^~yHׇ®¡3ÌSóXx¿/|Þå7ž[…ËŽN ò¸(FÝLŠ}ø6ŒêŠ¥o9®èîA®€+*†Ä÷½c÷p‹ÕãÑ+¨õíÙ›Ë ^tÃð +ef,éX¢µxdc +~¿H+Ø« Ô˜'rè®c´¿«Xƒ7Š.]¿—ºOì‡q¿”D@QTp %BfË¿lôòÁ75¦Çy’ûÚÀDý6·l›pme¯øŠÆ©`ìb÷õ.×)V㪾5°™鵕Iœ‰é?Eù–AÑîÀüOö-)|µv¸æ½1²_+, +¬ÂëÖ±ð7¶JøǶңâí€æ.›¹§”åmG£«XlÀ ì¸ïzyD.æébÕP/„îÿûœFrP4|ÇÓ~ ñ]®õšfïóÞÞã¸|§§üPbKû>픉ŠÛ5žÔ<‘ ·¸ñW;›…¦fÏaûÌ +ôQ å¢Ä©Z nÑ?ó~ÓªàÆb朤qÀrªÇߪOÓ#ïÆ#éÕríé¹–cà¥8ÆbE‹’í‹Áx¢9eðJÉ{ÀDÎëžÞ™K´hVÃ¥G¯=,qIM„`ÌG€}ÂÆ”õFúp[ö„¶C=y‰R¤Õ¸8­åm.ð(õÀ_B²œßkö}úò“âý{ 2V¸¤â‰x÷]ˆðήA>Vi^ÜóúyíH¿Ñ>Òõ5æôAÌdf/“¸'Vß„EŒNvï×à©PÈÅaF×Gáå¶böEPË'K½™–Pp'¿  Õ,9©QîаBIŠ©H{1¥ñ<žw%*ÞWK³o¹…ÿòÎÿ:rª¡Ìfð! Ì¡–²â(T æ"Étx¥L6nÍ|9‡ƒh¿A‹`‹($ZÏ¿ÿÍZŒºF­êo|ÉvúÐÔ³à_9Ò°ÚŠ$³mUËì=Fh®`¢¿5ðg®c´ŠZ½qŽ‚ž»®r'áÆml¸ŠÔZ‚l¡Æ`šFk; ùœ¿½Ââ0ÏcoêI q?”kÒÇk2™‰°#™«7²Ë¹A0ëXႲzzµ¶S%å7&mùâr!©K¼Z¸y’î¶K8¤ßW=²ÚÚ0¼ªãAÉû<%Ý){ZQ*ktªñ³"~1[‰ÝÜOp¹ôé„ÈÄäÇ›‹µ¿×ÍvT³»(4ZL»Œ+ønUâ(‡0µhDqgÅä.ÿ¤v†ÎÈà.9–Å…ÃNkao”|—å(îËÃÈÉŸË3œJî]¢[YFñE(“}zÑå•XF©ù Ò2,‹<}|s—ÝCGÎb¸6ÚtÛù*lrny]Éüð©÷Åð$-ôɦbnF'1q#…P¬!¯jÀSÑm£Cî@õõEe¿3O3É•ùú=ÀÑ’žÛéÌ]ü•*€©~kæ²f$»zìNGé7v +s‹üs¡!²¼wfbE¨Ý^ìŽP/vV"çæ}£ +þ·"þÅp®®ä€&DxÀ¨0µR+Ð1DÈ<4E{W"†Ñ¸§.ENÖ>71,[ÙúÙhPèWRI‘¯ò¤–„8üÏëý·éUìø"M—È,½þçmˆ P0ñzKz`´>¾µè]}Üû£M?0—j÷’Lçž,ijFf}¡QJ‰RQ0N•-¹@pd[LÄ¥ÃO*½À™ãå2ýmˆÒn"C+.GæX“7%¸RÿÈOŸéæÖÞù:úx‹cÁa%>61jÑ'ÀÛD¯x(yð@†ËhUÝ-fð÷z2wêæ;¯ÕÜçB1ä¾&&Ág92ø<|0/•"×”,"jܒѱÁqmÒ×^7û߸úC™˜³‰c0ð/ÚL€qè¹)¿9lcé‚ aƸ–¿PzNȲÀ)Å®þ„Ah>6V-ÂÁwœU?so$]v¶Ú÷rÝ&¾[î´ž”ÞŒà,‚níYlO·lž5«¥ˆ{ÝzRiõ«N6_™¤doÆ¢SNÐ ž zoú.¨Û4S»vªV „H{Õ>i†9dÞìæ÷GwjVâ*Jß.öÖz2ñCðÌVÞžH[gçeÌÑlÖŸF¥bg’’Šw\7¤êu%1d¥F­ºCÆJ)ô”?x¾AFÀ­¥¶o]ÅÑYÏ_¹ lüÕ:…áí×7´®¶·/0F€Ô(rçí+¬€Âsíw)Â'>èˆ7q‘-i‘ú¢µÚž¨Õh†Qá ?Õ$%LòÔ«fÕ²=Ö¾!4ì !Aþwr¨ô\ ÝNSÓ~ò8œÉð¿JŽiNJ…¾píx¯¶„›X¢Žp ±Â4sÒ‰bÆ;eMnÏBBê£Fº˜Ð&éUù]v¡’g×ÌŒ|z k/•¹3;‚ÁnËøé4N\%8 འþj|.ŽWgÖˆd¾Ì4º£ ¾ÆVÏÈðò#Üž …ªÈÀh>NFV)†5*uQ”ð™v˜ÁUU#,Ú?Ô ƒ~·ÁÞcõ®Â3ÚE$rn›#§È•»AúÑé÷&"ðoR¾Aò*§LÌ)WÛ°A«I Ê@ÛöûÔE ¬koʾ|,]îõAªïò]€"7M_Qc®©Ñ kÊ–6«%7#¹Qãf›Ik $.ŒÖxˆpŒôÊò Àsé+¥tB|¾ ìS@;2‚µc x{baTn¤WZ +êeä¼>ôòf0à(hËh ñÙžð +–¼Æs4[œR‚ƒ[âÛ^ý*FØ™e£^™ŠbÖv†Çþ¤ˆ× °Ÿ„ôj©Ñ«|_¬ó)heS.y>y•*·&6ØæP`ƒèª¸ öíÂ7Ø8öM)IªÃ5&À¶Å1@Ý”±Q¬·˜çµº({8ŠµÉâÉ¶Ì +éØžzBX‡—¦½ƱÂöî´«ï.~K55>=&-†Ú7æŒå.‘qòù«JÇ‹»Æ÷uŒãäiêþþÜ¡ïOzP\'L™½[«©ÓJnOœÅ|Á–“˜Æ6?ÑÈï±IËgA 0mÊPÝq•]c)?çdºС–Ù%¥$J¤lÚqâó?ò›,õk=Ä¢¾Ý·”4RÈâ"ý£PXëcó¦0K\J‚SM_Š´é“¢¨é« +ñëN^Ï¢ÙÒ«=ô½É¨ÜØNÙÇ6(°ñÅêÊY¸.h–Â5Ù‰£][ÿ4ø·ýqwÑÙf ¤Ç"/ÀX•hXd/ìvI»ºm­Ýç¥GfØX÷ +ÿK=ö² ’á(ÛHfUïÎãrÒø¾IQ$W0LÇòÅÕµ8ŽÅX‘k¯ß‰ô¹­JÏMÐz{äPãÀ÷–¢Ô“‹¾>$ yò§ÑéÚ"Éjrº½¾‹Xf¯yÓLEœtnÖy“ÉŽœ‰ö•ƒQ]Ex.ò¢B‚…;®úˆ[„çðÂXánÍUîOýÅ›1Nd É¸&÷ïÎŒþ +endstream +endobj +2231 0 obj << /Length1 1616 -/Length2 23613 +/Length2 24418 /Length3 0 -/Length 25229 +/Length 26034 >> stream %!PS-AdobeFont-1.0: URWPalladioL-Roma 1.05 @@ -27423,7 +32568,7 @@ stream /UnderlinePosition -100 def /UnderlineThickness 50 def end readonly def -/FontName /TCRNJT+URWPalladioL-Roma def +/FontName /GLTUCO+URWPalladioL-Roma def /PaintType 0 def /WMode 0 def /FontBBox {-166 -283 1021 943} readonly def @@ -27434,98 +32579,96 @@ currentdict end currentfile eexec ÙÖoc;„j˜›™t°ŸÆÌD[ÂÀ1Æ…p§³T¤¢€®o¿˜ˆà9«`ü¯….´Î:þ¹yÕêpýäJ*åÈÀl'¿–eî¡}#)Á¢Ý&»7+å‚/^§ ™ëWŒ{ïÔLßZ60VåáÌQR^¦üλ3rï÷)€#v¢€$öp~c—²¼´°£êë&“è÷ö'ÄÉÇÆÅ¿ñÁä+ž %;a~•ê!”ty`rô³cþúv5øb‘/¼W©¹™zî²#gß|fà‡ ËÞSS°CŽ¼3R>›NnÁ'Р(ÒeadÌ/³¾•BÒ¡ ¢ÑnKs­Ò¡¦ï°êß w8H3øETzðÒX‘H£W‘}ÎàæÒó±EpúÔÚÅsg“¾cxHSAÑB±Î]& *Pí>9Ýš.‰ÎTZwŒ½¢^±q$ÛšÎÊS²Ï ¾·QÂgÉX¯ušV,Ð’"ª%_â /w!¦•ÿjßTšƒûaù­¼ÝJç@¢Æâ„Pq[¿vÜõ·ÐM~³ïˆµ­ZIêf ·†îÚèè1÷N4Æ6{ÉAö"‡Ç€ôŽ¤ò<Ãhú8ôØ›Ø)¢²¼˜Ð—Âæ¯xl¸aì®-Etö T"ÿ5ñÚµ^1z=Ê̸õe.°ä`•¥Éâé`̹–å,±âÀl‚Iº þõpïpšäЮ#QçrWJ^URòlUïQ6ë·O {Àµà6OQžD´;•Dn -¹`]NèIHΣ{¦ü+¹à×Î#ß逞(BUndê/’‚¶*ƒ ÓošâÀxq¸Ýð{ïo1Ø1«€Hÿ(Áä虸·ˆãÏ–à ¢Ÿ•[äävå ™‡½½i•ïÓ'¾²òj"E`$•Ì·…ÄИ8K¦°iÈbÚîjë{äÃ0eLáÂUluJP, ãiËÙfÆà'ÛªhŸÉ7 › ³®$6â༊:f‰W½<Û6 -%)p\àÏçèeþjÊïÝa—ß\" ûÿ9!œg‰Ç @GˆLmúÂs0½_JEn|‘&$SÁ…Ÿ·úÐ@È\mèªÿý<؇Áu®Zý=½]¡m€jX “æâ}i·¨]ÌP»Û¸ˆ]…à'`€ùL¥Õû}½ÏY>ªÔ;h÷ñŽõdã¯Èg ¨¡û‚ol^k¨‡ i‚âŽEb¹µËЯ'ÕA´vàÅíYqe}¼±ØnŽ—PÞÔA 5Ñ]«X]a¬î-ÎæñÞãš]3ÎÌP›ïi1{#>â訇6sàIŒæ·•Á„÷$G(ìF´ VèŸ[y•·}/ônÂ÷§òñw"Ħ¬¨Ü"}©y Ú8çUÝók¸n‘éÈ'zØûaâñyÄâ5vóT¦g¤g!ï]®¥`WÙn’mmòº7Á[ï]‡lÀ•’2N>|k6ë—Ù ,Wã:á/lÀþv_*ùpñ¸ -jÛFSÇÊ­h1`Œðwým wèÏ¡>¦,,ÕÂ.$aGª:>kÍc½ÐìÛaF‰½VG¨»bF>#ì-¸]cÓÝÁjRÔ¸úŸâjrÃÜxñËTæ;{ê^è1ÿïÿ €æTão¡ ·)a_c]Î#ƒMÄY¨Ò%ð™"#¸áoyÌâñ4g&Ôö¶²è’)^Õ_8ƒÒô‰üÜ‹KŽ?µÂa‚£•üáB…òÔY¢ù.cñWRó²׫Ä°ü§Wãœg`Àöó ?Sù[U Òh"Ï×Jwc|µò—=¥]T$ÚX‘×{ÄSHNÄ£d«´—ÍŽÏÚ£½gÍy7 íü œcÚÿÿ†&R/4z~TÉi  åÏ?\u(ç£Û­ +· üñÒô/oªà>¥<³uY$éÿKBžYxrMt![I¬<èæ²1†:Ê—Ç+uªnÓ=ëÆ(-ž¼˜»lço¯ÃÕìiŠûî$ФûÊffd¢ÜßCVìÃQ4}2\qæiØx–¸%Z¤ĸèÁQLZl)ëëã⟜n¿ñ'y¹Ý¥q5LÒn83£V­ü<߬ËÙjúÎÎ7Àf>„%6Hèdu[éJú ר@DlîÐôgóë¦q¨Ÿ‰a맪¸.^©óêm B#§ß&7ô¤ÍpNŽ¯ÇWoã.®¿XHà -7Òu,š_ !ñî´DLÈ‚Ù™õ2ìmTÊ»V-íMV‚2Î÷7?„Û þr™GŠe->4paÓ¹Ù2ƒ=ƒü·|Iö ¶ ~FJ¸ñBE=)?Q¡»—ÚXbogèsó“V»=…’É"¸(LìÔÞWþ¢ˆm˜‚I%Y -èoŒ1Ü©imÜû]žiÑâeÛ/R¸R@ö»óÖG«æW>QÅ×9Ú+@ì7?±7ңªZÆ8õ_ê8ÕódG¸%1ÐBå ¶ 0 kC[}Ëdü{žÑ“Z[x’œ)SÓÿ#WÖß³dÄš¥ÿæ MdFÖ,*Ác‰ÿ=uói2D é왥–©)ßcS6Ò:œ>³–èª+5xë¨-´6â”ñ%’k“°Ú0³áu¤šÂÿCÐØ+Ÿ ÷“Ý3Á¡cr?nf®ä hj÷áãoÔ¬D“a#/],mE(SmÔúz¥¾&*›#™$’bmÅÚ0ÆEÉò;×øÅRº?@zx~ÀY樜ª'òÂ/:Ûœ2ar6Jžš]Ãþî[‰àÑeýÿÉ9'‹÷C¶ ΤÆäÍâÁÿOžQkšâ®Lm9r ·6œòé¢@gSù s‚È¿¢[ùçˆ0U7…]:çaª÷°¹§AK h…ËS5©²zöL…mÚ3IŸ ±—©ÔUœrRxM›×¼,åQ©û Õ2òµ¿Ë"ËŽxgDôõ%¿kXS°Ö††äÍF5W@^“jYªÅš80l‰x?ÑRè%@ßõ1œg×±ày _¯¸Í”äÓäœ%ãg¬×gƪÀàD‚Íüîže„8%MÒùö\è§a݉ão.ª³·vT$R'l.1UøX¼²Ê¬ûºlnþ8LÂ!ë3ž_±'”/˜9Iýêñ¤¯–^|‹ŽcÜjx¶ON‘Žø”ÏÔ°’‚QL%"¸“[‘>ÇAñF -Iâé&†¬Q-RÅ Ö Žø‘3©ã žlz{%_> ù{—Sži¡:–ʵ¼’åë” gò­±BœM“î¶ÇÃGiü kEªÑй©ƒÚÑß펎V==ÄážÈÃØè[vû±@ðÚÞ嬽»C0xSøI¤'¥‡4ãhÞÁ¦¸ÕßtZÎà!îˆ$ JÅý¢'?›²9yA¤'òפÜ3UŒIi”r0Àƒß‘å9Â8)”JQˆv‹O0ŸÎ<{>hçá«ò 2CG‡¡+ý>áÛôŽ˜%„EâÓïðÛ<±N1/± -ꃼ”Xp>î¯ýXþH¥™Äž¿'e?¹pµ‡€G(¯ôCc+XŠI¡\æûTÐb“ó´Xø3åZ$V£ |ÜþEsPæ± r¹Dß"euˆÝ(ï´7áÌ ö‘@¯¬†þk¡ -<ÚzW&üíY83LÎOª*¾~ïb¡÷ áʺpÛÝ;¼‚Ÿ¦Î…véMÁP+C -¿.å–³+³ÙËbßÂ<±™‚sê^ønC¸JÎ4Ö7 ðû¤%ú‘‚žAÉ"\Ayý„&7ç;Ÿ ®ù_†3'‡)€j2á1“oÖGáZþ>iJ~®G Â¹wÊ@Ë‘!<˜‘ôï)‚½_?Ž=Ðÿo*›|Ib\§§W$L)µt+Dq—r­3Ö°ÀYl؆5Ú;”á­ˆJ¢ö¡„ä|ˆí5¥9Ü ?oU -‡ÇÃijØ<¡P™ìß:'>Œë2 -‡Ÿ’D°n;Ní»’Z¼µ¬n!¯êïM' ²N¢ çq Aáçý -´K±h94°˜·çˆÞ¡ŸeÙXŸ‹ >ξVŒßw‚ƒBy6¶±µÈ'Ù¶3ý\Ó.D1ô”ö{ýæh]åK¶ù…Ç\ÕX*äØJ7Íämºó Î »QÖ[u«û.žÄ§Âö=ñno‹ÝDD<Ý FYÞ£ƒ aí—äOÌ4õ7gI–yN¤k\Và郵´¥Þª4¶`X~ª›]c»g·¡7 -š»¼EyS°³F`Õ “Ïeä’‚ä•…b¡>ÞÙINtÞ& Û«ñ×qv­è6¨¨~°6òèóŸ·¶{FeDlZSVFB²nRTPŒ¥Z/›èõ º -µz`g­ÆM` -ÔËfØÆIiMæ+•4Øûÿ5€Ÿ j¯ýœ:ÐV¹ÍÈÕ]S_J¯HA‚c¤‘1¨úzw¨¢Ÿ#ôgœò¶Î=›°+Uk²(â¤Ã™é 4í}ç•ö7ŠÖÀM°Êæ^Ÿ[å©\1d´ñ<‹Ï -*½­ê^òºþeu‡ËŠVYlšMœý›Mü;ì -WR¥„ÃŽè°û ½]\ ÒSZ|ÞPÅ?Wu׬X“È÷.ë±î¾µj\ß’hž„S¯WC¹Ï }3¥À¢ ™‡Œž ŠÇÐR5 ¨pÓWà îf"¿Þ#Ègr¹ìjE?9ñÅNdðBl¯µ>M§†ì£‰šú‰ û"¡^®HÊ Båõd&¬³Ú²Ì}nŸ3]ÓkÔñGoõP–¿eŽÜxhW¹»LFåÀÜ*$(]fµŸV\s‘ا/ÓzhUSíéÌÒ}è(㎧õ§õâÞrö¢3Š#:>²G£#{æë;ˆÏe»¤³±MÚÞ¸g¥[ò4º;‘Їf T:¾˜î»û:¾£Tjë˳%¬X?1Z¸ÂLöbÌTsÃdbUçûG1ñ,uµÞq ]WÍ‚´Þ—û¤]¿­ù°‚€|75þé½Ïs;ξùE#„~/AVx³áÚ&ü¼à,ŽÎÊ!ÓÚ+;±¸ÃO™¾Ù6Ó÷p<⪳’Ыÿ·xq±fdãéu³ »[‹›Gf <µQæŸ_$Ð=nûqRüGÕ ß z >pÎFù}Ñ òóÎÎ]¬ð Γd “,ŒKYÍ2s6âäŒÄ‚eÐ&¢Ë|¹6'šÒcœÆs6÷º•¬.’»â×ëk• -œ÷(ûãöJæhnë¡}Kÿ) -–a~ß̧½ù°-þ Œ·LÚ9î'*ß]25RqŸ6gOu–´»‚~I΄NJZW›1›Tú 0*ÞðUgÞ??¿½9¯ÈŸx.>¢AÊÒˆ¢E@Óâ’eYûáØB8ùncç±$ø†Ø¬ˆsí_N›õì]__ñKú«ÖV…º>·wqZÝ,2 ;YÏÔíè«À¢5œöІ‚ÌžÑi³X©FóFxbº×Óçøž ·ÏÏvê,,ì" Á-è²W¬9«ÌÛcf¨šü®X=j*çŸUr~J{<×Ɉå~û ¾N /ÅàÓM¼ÆÓÍ›* tÌ*ášh îøÒJüu3GÚò¶‡ÝŸ× .¸*óû|X`‘D',âjt/ªz&ÔéšõœÒÓö,¿‹J¯Ø“°±®'kØZ¡rÎóQ‰µÃéO®ÃÏödž™Ø,ë ««ÆjWþ:G}PM”‘Šï¼ì"Ókò1qô‹ÜvE¶‘ñLT'扼šº «FŸ,O"ïTN§çscF¬gúÎ\T‚ûøÒ”#äzh[;ÇÞÎÎÅ-n^ß^¸œ=Bî·9<Œ\¡¿O.ðóƒßŸ_0ÖïMðP)7:ž>¸mR§7ZØhКcÆD}|˜{3kFÉ"¬;‰ñ@%æ+t1Â+©ÔªJ2:ìI?— C(§ÎòîV³Úç ùa#û­ùo±ZãU½›J 8ßyX€cú\x€-Úž!7»°Pt~}Q/œg)l·5ÄŠeO5Ž~lì”äÏÕ.f¤ÿx°é$±=È“ŠUŸ [òÚÔAr^☬qÖ7¥hƬ7‘Øå B(p$¥M§:8_šønöãõ!¾#Ï¡tz¯ `¹G0¾äüê˜ 1‰LøÒJ <¼=®F{,0jk&ÇG®j¢·u\Kα…ËöóbÁF¢Ú?ZŽ®–{g8yzü÷~\J2’ͨ!#ä቗xk§.‘º'Yu>–£'¶ÇÌ Ö-½Ê^µ|ƒ-,Å6Jµ©núöEd]'ܵ iÖç8ú´ü¦Ã/¼%¥ž†K¬ÒárqSžT=ý¤'/Ï“î:4Ãððá]]&§/H—ŸŒ<çÖZ5_ÝbùŒªìÖL}„hÚmÏ'g§2~IÓZÉñ8y`û„>²ž;jPf›Ñ¤ƒ†Ù©Ú -h["šð¸X­Ð£EÍQ8,ÑšÌJXxd5i檾5°™é“n›,„ˆ¸¸úmTÉÈJ`§9óÍl‰B¶Ò.LEÜËÑÐ< Âó¿ŠÂäÎó?jÚiS"õåü® ~¢kšL^Š%wk÷¬½Õˆ…:ùß@ä"¶7:˦ð8âþF¢(Ðo.•uàæE_ô%A¹V¢CÊ ÿšN™ÅäPKp–àaVi17.DFr’”7Kî%”a}´¨;¬»È"¸°ÂÊæàщՌ¨LU €ŽŠS(tv/,,å€ xß8¢=Z/ˆmWÁ†hÅú.ˆ‹† -×Cs/%ò„pµ`¢°u7®® ñ ×.®4@,99媦VȪ›Xc‚œ8Ñâc\2‰öñF]Ï3½.›~r ß|ImÃ…9ÔMãÎ@é¶PæE©o£šÁy ¥»/!-]HPIíŒRnãQ%( â= -€`W²‹a"N¿$ÿ}ø$Íít«ª@ì¸[vem Ùgùd&òLÍžE*“Lffp–10<‡,šûµíO‚"ß÷&ý-½uƒ‘ê"ª¤‰¸zHm Û´:B×Õ[@4µ)ãåÒo¼Ù:$¬^”V›¯68À¸ ÑKƒb±õ¶,sà ŒÄ½c>í†Ö-¿¸&bWÝ­f'ÑÜ?âÀÝ•§•Øˆu{5oÜVƒÁôÚHí°ž`s¢çK•äl9ë K7Šæáž8‚)TbNoNÙ(!z¢v:ƒ—ä "ÓuçyîX-ÒÔ½ŽÏàh- ×ËÔFÑe WnÑâ1lÄÖŒË"3‰C•µHNÓw¶¹ŽuUëG4 (|—Âל}´à±í h°ÝÉ'3˜`«3³iëú«ÁÊ-Q4ô])¦³O¨Z¸K²Å=¿i@ä™UÃ3ðÎLx–¥h{Ùó=2vͪ, P‘ÇkŸë츜k~"Æ»]£û‡jd8n… ÝFLfÃnäÏ¢ÐYK²Ïlî”-k$Çh¯‚“YÙX^d9 %Þ¬_ŠG€%Žô„<9íÖMYý”æ˜Ð¦P£ ѳ[k¬×°€¾wÛ/‹œ Ë©%y®r7ö{1=]˘ó“yùæ"GßxË×=aM Ñ‹.¸@áo¡Öû gWeW8$î$é¥c¶ØØ‚À—·¼÷Õ­d -%¿t*tÿŒ;,kîOf6”RuËJF’9?VÃ9_m›Šäý î¬ûW vÐ ò/Mj7D¿[¢äYÑ]ª,mÕŸLµQºãÄ‹ɇ˜Ižc½YjóížáÓ´¤~Z°á¿d9C¸Þ÷ ¥È™|Z|ö}†mÌ’™Ä¨ éªçŸÇþ狵Ó*8pÞù—T¡ »…•š)ú‰u*Á‚ÿ…Éïa‰#ŸR7—­6—K‡!n<¨¶ï‹h+ôEå§< ,·Vl¾æ«€R÷ 4CCÎÒÃЇ1࣠™ø v×T…ÜHÇk¯t"]ç2Ÿ_ÅÐà%ûBÞLbR“Ÿ{QJ÷–¼Ý…”#öSÈyÃ?6¥ã¾)~~<BÓéWe$k7‚ýÜT‚|™F¸lœyS•tÂÄž#ã³æ(]æw‚ôSíÅH)P5iNy˜@Sü›fÁýÐ1Å¢HóÑÑ8Â3þÔÓ¥Æ^.£)ö¦ØšpfœˆJv]}ühàwÒqò}bÝG`ÞoЊº3·+âD!Íöñ«v€Š6ÂÞlb${ ­:â*š£9‚&³«JeWH­Ä¹å<¤ú2 YêÏÈþôJ€i¾—¾;"ž_ôfÇêúðÒ˜fͨ‚˜<¤èþXIöBÀ€ -‘\ ƒI¾Å “ÝP†¡¼‡Œ=‰P¶ÝÖÁ²ùÐ7Ÿê(Üõs`;AÛ⊸4ž2¶Z”y¿U½6dmåì r‹p#ÃïK¸_¢&LUÖ©˜6ˆ`ñ-V Sÿ€CÉuâo€WöšÞÒ(FLÀ†Mú® -çù·8VÁ‹cllJ1 Rí®ÇÖ€MÍË›0µŒÁmõ: -wiÀ6›}›ì;@|:Ñê=ÔÖÓïxí¬"éÌz2ßù rx¥– cmÓlÕiµA^a<ïÖwPæ“pê¿=ÕXZçš.B­d•z{$ì¯'hþxI’'@þìr3[û,èeÈHïq×ÛPßþo–´Øô¢]­é±½\ñ”ÎÛ›*žxt)o ƤPÚ‡gûIOf rö…º¦<ÄÈ{Xp¿®É²¤•î$P N‰ôÍ1©<1.¤gG0Acîùï}kB<¾÷ê;U%™´!?̲a?d/»ü‚ ,j5›¦IEf¬e8þ„ö;–¯7Ìž.ã—Q!öÀóLÝ¥ÆÐÇûŽ±²_P//8Çáy¥ºk™”žË xN,ˆ®÷}Õ¼N™,%&%õ¶ãÔûÖçݸÀ³ÇwN@«`ЀÖŸN÷¼›Qõ¢qèIåþ^<ÍnªÐ¡b‹câ1ÕPh'†8@~¾¥ dˆöJA=±¶ìHø³°ÜÏÿÝÈÒÑ«€´gqÛ -'ø‹—¸7dÍKË=ù÷¤•-¢ãÐ*U(ô`¾ßÝÓ‹üàvÓKØÿûªcAHôb¸¹H`£2@æó¸íÑGƒ¿OFÊy‰Ó+Lå¯-1¤«Ó94Pdeä¡·=DÁùC¶Š¦¯?)«Æg Þ?)§nטQQ -èæ".ÔÕÆ_p.T¬šdFÞcV¬O‰í>Àß‘ÔyT&³ãÉí?±©n´†¬›aÐH±.Àb kÔVÛ5-‹få›å>Ú³Ú}YÌnúWŸŠ7Š 0Ì鍊3óÁ55á|ˆ‡Ò7Fbc ¿À;<¢Sf&^R:üÊgýÌC‰ób•¯švz£©ë.\'ø±>Š®—pñ6Cwå÷^4_=`ØÅUº-ñí¹¦Uî4Ÿ VÛmbhÆÏ °Ù>\¡Xí|›ùaXFî6n2¥fÞJÚ(‡{k†vÕ?ÖÏ#K -X›1aÃH´E±;lÿ•G=0H#õœíÛ †íœÔ¤žFÛDkjÉäEöô½k© Él™®B¸]´˜‹ÃTó™§jV©”þõ‚#û%º‡½à¡—Pʔѕ<ûïùT‹äÔ -À¿.' K?Ë-¸ø|ˆwAÀ¢U¡mêHàŽµp“-D\ÔßÌ„šp#ñ¼Ü™;1Ó :¿.±°êÅÖ ÊOÕVªŸ±ŒÓx¨ßmc=Åvqà vü_?1 q‰™/Ëk9’,+ÃæÜYþ6ãà!ÈÏâ¤Áœ‚mÑ´Õ¡õÓW&¦¡'½«J[2LÕ´ÕR«é«›ŸI´¶¦»(¾_ª} Øå))x=ÍÈ%‡fGo°µ G¼>ÏS^ÅF¨ÍbAfÖÇÁíKÎ@îÇàÈ?ff%h…æìoá}Œ 5WyY×ø¹]ÌŠU¤úÖ?ÝúóQªçK¦û¿31ø{O5t(œÆ*¼EñKEz‘J°wÄvížz~ßv%¶s+å;A…´é:ØA'*ϯëˆâ>ôE…ó#jÏêÒÏE´3Hõ~½8¡ËÞ\¬Äx´Ãö÷T²6k9¶*çÓ7í´f8ëã<±ðCŸÂB~³°àrö±0 -/ qvP;*¾Òk’ã‚ë/¥å³úœò]žFÍ8=8Wñ>„„¡þa TêÛoUw²ÒGtOpwN` ·_[gÕAC Hż¨D%ÞJ€ ÐÊÏ(¥ñ^+ÓPI úÊÎèÜ ˜ÝmÍlRX£\/6ÕŠáQÈ)Âo›ˆ¬œ ó––fü^¿ ‰l× ø „‹ŽS<ܧäT’0¾—Vþ#”[^{£Mk|ø¼ë´iÔ 9§%ìŽñžþO‰Q}LS°èJÔ…Ìçk“7¦°¥£›P¹/ I@ Mš‹êPØ©õAº<C¸tÏ·ÛïÑ›¨½nOY¯×Nr­Í{görìxúãì~Äc2gD€ß¢BuÏD¸@|+O}2M ë4±ã±ú¤,2ÚM$8Cµf˜bRÅ A'æ?ZÕšañ¬Ä¯§Sm“…ô–mÑKÛ­ñÝÌ-(¦ÔÉÐûˆ*À}Ò;›˜‘$é–e+&ìÝÇÒô/áI§p@C›;mFôÀéeQq){åÅÈXxüdž½Níhá§ÿf[ïpçmŸòY±HEµ³AUøS™‡Àƒ -<ºÁ¨Fjýê#÷Eä/÷)¨3\Çcœ(ÅÀŒ†r]dÖOPR2È9´øÏšT }]â(­Wæ¿y(R?“C- Þ{³UŽÇ­BÆîøJC¡æÚTGúP )D|"6Á—rÖ­>ÎËu*0 ˆi~¦rS¥Zé©Ùµ…ùÒ1òø‰¦‹²£ƒ-÷Lî°!VÄ6¡þœ£kF±¤® )>AôÆGü -K©’Yˆ ê©>ñÃ'O0r¶QÆš…WCÔ%ÊÂhÆ» Ó¢ˆi+3yÉŠÌbh¶’̃$ŒÃ‡zmT'¢ªºtôná¹gJã'EJjI\%LúåùôøJÍvb •×P³.Èés =)¢U¤½¢„¡ µ‚Ùfê>”„€åí–ˆÁ‡.at<ÒRí劙 ã7xÊRxQD¼³;j%À_”·®fRI쾦Ž+ÆpDè|¤¦S…ŠV kU«pNTÌW@;Lµô—™¢ Áш֊ê2º÷°O¯¬Ím ´ŽL “é«]ÞÒ/“Š÷ð^b~Bò·RWzYTí°­½Å¤ÊÕ‡æ]"Íùk€#<•‹w¦=œ}¨üØçF2ÿŒ÷9Jµ¶E,ë†t®#C`¶¬²év¬Ó'èGØã2N*Ô§ÿ£HÓ!ú7³\ÐyZß%m$\»Vi2SV3¦à¾UBÉ©q)zf†o–ï %4Þáâ§B¹ðòvM²6„\ì)®"&T|N¡wb­%š°6UèÛžYjžžzëÚÈ}^%:1‹¶¸¯ •-ñÆ…vûDé²²£0H ßR>~“ƒà_´8¬“f½×°ø{àG J·c°ä‰à;ÉÈ€É?XÝ,ÿôCÅ°*ŒS EÛC´Œ#)¢å:¿tÃá&¹‘xÜÊ›F/áø¢ äÕÉ¿0"8t -±Ê”ó‘C]l˜å–ðol½I^†èó¿ØèÞ]Xº“4âtâl›0ªîŠ7»Žëùé HOÈÈÖ7ööÀn°£¶…x0ümì­µKÚÒýé!qä„2»«•Ž¢:ù¹ <ðh—â^ ·YMåøeu…z4ƒµ, ÏrFÛ !.qÚ -]q‘‘ƒ)Åä’™›¢ Ô™Zb+¯~!z.cŠÝΈÎPÌÈ€Óú\l;%H£‚øFÌ Å '®}q÷qðfXßÇýiáfæ¥<ú·+©œÃòö -–ñ‘lNú¬eó‹­ÖålPlð‚Ž›‡8 Ô[Gnm˜šS¢˜6f wEÔ$›ŒÍú‘ˆ‡‚¥$jÊÒæš#',õ™1U„#¥XÓ#Ö (Œ5Õ¸y¡þƒ:¸NçF'â$YNßtk“«–)Jz31ˆà@W]´OˆS5Ë(mïu‹Ê/é}Ž=/’Bv®‰üÀý•>I·[îÛ½›ëIÂUý]Ö{6-H±05>¬Ê—Ôæ$²MDí=?v(Í-Œ§¡±3§EØüì2Ñ')f$!œ½áG!†O)ÎÐÉFÍeÉaߧ©×X -Á[ÈfVl±J;Á5ŽÃú"á_Æóì` VË–>n¨ BÄjq™ÇQ›*Üäùyvwpÿ–ÌÙâ?rãõ -‘Aªüæs;7ó`5å5¼F8öÑg.ã7ˆm €áÊUÞxu´äÈl÷bÈkyQšû¹¹´%‹R5ZBbÉF{–£õ 3ÆÙþL„¤ÁW1¹zߟ–‹O÷]ã]4c/" ÇÂhâvWÏÕNÿÖjõZ¡Jñsõ­;¯‹7ï Kkåg®ÝÞ§)E+eÒ¯ð³iû~—iJúvwOÃúZXïø¬Ñ2Ʋ™ Í=¶§Ñšnõ-(· ëËÕ;!˜—PÂÂNi—ÌÏæ%šõ½Wms`®¦YŠÚÍOº -LÀ œ÷¸FcÕ/R"ÚÔJ-»™¡s|½”W¯Ëš¤*Ã?bÀ1|Ñjçº.f™‘§7y QºÑÔ ¿£í üW5öÌ5qÑ Å } X´¯§6lO(2œ:>";òÜv,…›Ü¨hŒ‹z0ÞE:8¿ãn7Ãœª=§áº¤@þat&s‰©°c¡h±†Ù¬,)›=Ä.ì±1Œ°_\26=ÛMNiÈçO€þºþ\5YKœ½$O­ê‹«sy_øìo9È[¦‚ÏœÆRg#Ÿ¹ý§úê{n5,fZÜ3ƒA<Ö jÚË’é-(¤´M`‰= Mçœ_{¸^£y=w÷R¼º ´5ÁøȶqÚ·eÞ‡”¨Ÿl æçf3Hº‰ñ«¼ä ön¸ÐzûÉ9K©1¿V[«syÐÁYjN(ç8ÐÅWì—ì¤ÁW1¹zߟ–ˆ"M™}àeáŽ(ŒŸ¨Ï{»_ë,K^—QÈÈ9qšÃž.^Ér1’bʦã|ôÚ{³¹Ì9¥a¾¢7 r ê*çÝtô3<áô+%ï+AH`ƒ-d¶¾ -“Tm€þRîu°]fi””±m+~&ÜO$‡!—OƒÏ@Q´y(È«ÜÄ„%|2SÌ=•*éO0À #a;…07L½ºŠÚf•Š ©AvûÏÉíÑ]‘îA•¦[ÀùÏzÅ% ,ÞS@›ÎÊéDAi ÕêÿÃËÀq‡9-"ÿåZI) dDÁ[Ò]Q(==‡…ÝÖ2£‡.ãÇkÙ\½jcóË=UBóÉ–™?ñuü²»Ûó¨iœ¨NpK¼…ÖÚ]§Æ¡Yl*cú:*@ˆ*<ž6CìY9É¡­„Vv0E‡¼Õ~”I0ÜE¶ÈYlAQß}]µ­w‡©b+j6“yžŠˆÒZèÊ 1[a¶¾Ž/s¢#U8·BÏfÈmá°d¢µ›ê—,¸Ê>YŒ¡¸CÏóÊX€͖ʵÑ“Æ]õ™ÿí|sn”_„S«¼ ™êèΠ®Ú¬è4‘#ïãQ=1Ýq ¼§éÝ'ÌÝv»›‹†{ïaæ]y¢Iª>ËP1ßÚÄ‰Ê±å• KÒÅeƒ}ËÏSU=‚”+ý;­Þ)ö#ïPɱ*0 -dC¾ü5 ’1ºHÂ?¤|)QÐêyµëIg´]™$ØñL@<ÜüÇ9 a2H½)¶WÖ+©P!Š:®£Gok¯ÒRj(‰s¡“RÙæÖöe3i‰‹\ê õÃŽàïNZ‰¨G×—¦’ÄK>"(H¯UóRÒ«T)¿vá§SˆÒ…´ä¬0Ñ°žu6‰ŽüÑb6Yœ·ŠçR6—ä åÑ_³Œjæ¶Ç²_ÓÂ&\‡0Ù³¦Eší¶Ç©hÄÍ–šo߸’ÑímÍŸïÆÝÂAO+(£oý‹z†ÿÀH§ÄáþË|¼J›$5Ö ¾¦ÆÇWWn äKRÐÕí.‹rW6ˆOevc¬†^~ö#BÌ'L$¶³¦„Ý5ìT&Í-Ž »Šº ”Z?ÓÉìËÚKÓ¿Øj¨[ý¢Ù&‘×D*wsgÞÐïo’­Lxõ\Mm¨bµŒ,»»…ê2#I¡¦º¯Ä&wO ¬¾Ì‹ž´6+ÞOÉæ½±\ï&¨‘÷‘ê4ª»9‹„ŠËM¶X˜2©K*Ø™´oJ2ü|ë~æ&Å0ÕSèÑ÷WQÑ‘BO5Šïµ‘Ítcgj²WPš†Ê¾&…-V8Á¾zÉEóøTuÅâ¥Ax³é7ËÚ9û‹ÑÙˆÙ"œ6lC»Eü§P‹y±g&Õ>„ ¸þŒaºÔ©ÈÉv°-–]jÉÁ±|¥¼v²ê}AùÅÜí–zçšåM˜RH'#ÆnÆ…zBèSÞ(oK8;õ_˺“Qju±•=wi<2TÍsôÆpDõ÷X `9Âi–ÕÚ«ê…W‡ †’Eh]v&<íÝK ¿ÙNÁv!D{À$µ¾)v$µ‹|fÛÚ2Õèêö§TZÀkv~w+bøM*òø:Ž%úšóèA»ÑrÌæÍ+`á6ùš­˜„ÂÒEKñ=âõ®¶š‡ -”ÌAôÓ›m_Åá˧¼Îß²"hg#$Ÿ\ïá³Áaà–[¡÷1}Rõ¨„V›G¹ëâ×?¦¬ø´~ã蜟»g°E¢.‡Å)qÇëçD¥]ep[_;¾ ÈÛ‰ Ø4iص“ ¬{ð• ù¹.WàxNåö-¤%¤' ]œaꙂgûQÏBî­{°ª@êÜú1Š®¯œ&3ƒiö‚™‡Àûƒ˜D ~` ¨ ß‹K¯å¼Ét¼ÂŽU‘H° øøvíÌO8@Uý7ÅuraY<”¹:L:eèÈ á„+§‰ðUD)å]ôÍ,Ùšå­"óyq(Íñ „U_ŽÙIRwN ü6îpñ…ØT='ȱí·Ä¿üx¿»ŸÛä§âF«kI'$Uv[ÿØ#Žf.Ó” SsVƒ@:ü3‚¬ÑCÀÒ}!ø’¹®”Y@…bYMÿžúºqÛü›a?ÇGn)´Ö}kn=F·JŸ¿½ËÏÏÇ?|®ýÁWœq°‚߃ýãÐR½.Ìé0]MlChÖg– Ê‘`j¾˜Qnë”Bcm2^y÷¿¬á9¸†ÿÔÃâxÝ¢Ô—ðšÜ4”‹hÔkD qB‚¾!x˜Cq'ÎßÂÁ!Ô\`dx¯Ï0À’ñݶöЋ”KG2‹XTD,_¹·Aõ²åþǧu>Fí^ƒÅ]cÔ.`=õ®Pµ_ATÖc8œKZˆ9ùù³mc=rØ«ÜËÁï…2?û'*#>žúÕVvûÉ=“&MÛ¶›¬Ë+Îp{Ó›HžTEÿhÏ ðàzJ2dÉ ‘a鶛-;±c`¡è¸/þ®ºnÞÅþØvº˜ß\´¬Ÿ#ÏU’м~ûÕËŨΟ7N»qïóƒÜXümª1ÄuåÔsÍ\»%)a‘Ø©ð‘a¤¬jr|é£â,n”8äõY1f‹i‹<¾& C¾õŒ.P1îz_38AâÝsR%³¿úêKo@Z“u¿yjõ èïºæÃsú*Òˆ°|)ŸiÊ€‘õè,jÔ9< _ .Ã4¢°üùR…%|B¦í·ŒqYPç -¬Œ®±ž«6Ø µ¨‚(aGV£$}sÆý_»¿ÿå…²GFCõðÔíõã"K''ËúŠ’¿$+È•óšCC6]|6è*L5áòšÅþº…2>#6ðÖ±òm´¾ì׋Šãµ¤I¿“™›U²«€lU¶ÿpHÕûU‡,Ùå[.:â‹Œ_MqûŸóùWcE‚†ß‘á~ï²3ø¹äø'‹ÇÏ3˜:sÞýé©À¹š ‚uñ/ð/pÌ~s¯ääô8jÙ«úN4Kª5H~&Š£Töȼ#°ïÌÈNÏPFlp²6À¡_Tre—ô»É&É*>ðÁÉ’¬™¿Õ¯ -Âb`…¢½³êØlsóÉVÁ#ƒƒM%Bା¿ð­ÐÌ`|ze‹µ¸†mYKû½;3àPø^P„!øã4g„„¡oœ9õA†ž:éÌ{`‡ÉópâC(Ã+•Ç*$ݬÝúØRÞ´¡ò#a ¥W:XÛAfæêY¤½0ù5»W5>¿›WSlú÷Ž`Â(ÍqnG,(ŽßJ¼-ëÕÃÍ×ÓtEÙÜUЦhu†BÎzñe} ‰žèÂÍVfumŹÏ‡»™»“Ï#Ön§œ>Ì\ÝGA/}ðÊÙîop†…ƒ)ªÓDZÛÝ­áŒÈ—Ë0a¥±Þ/ñÙˆïàD´ê¾4îÛMg HF÷óÐгñàIc­áÙB4J7gÂÃ*-¹áœxÄï/Ÿ–Ã/]\›-æ `#f8íK‰n#ÝNgv#¡Ó¬u^0Õ¦1Ô'ùG~úx"º30æì7V´Û¨MõΗºS$7ˆZõWÐ’j|³µg¹‡vù]›ô­h7[úŠë‰’³Ä¢ôèbÞ:ÄN%“Ué4«H§FcôüÛOÛ¤|õ1q8A§I;Ï­b p;¬;«¨Õ^fëÅÕý'-K„4ƒƒÐƒÜ¥HÔÅ"EºÍâK4ý¿Üó¤Ï„Ò*_B¾=Óèw`2±] aŠØ4ÎI eâ¦ÎŽˆËtzˆQÛ‘›A\ÓI¢=ƒ< ‹u)îøsf­_&‡‚+“£ú¼8xÀcI‘m*çhùÈ -Ö©/ê™ú2¡‡îB˜Åà6^AĪĉܴÊ3,-ų=@–û2¿àí /ÑŠÈÄ< A…ÌŠNðµÌ/òXS©dÔ5LUî×zl×õ¸Rëýí9%d"‘ß&Š]ÈóÑR“,}e…Aè$[}…–ž|.g¨È·C©¶˜´ˆaGoqo<_o­>J?*ÿB—œèºPõñL¦ä. á‡úYØDàmž uõô¯#ZÜ¥­9[„ªp€Á¼ -™‰kU'¾ eŠÌ-i4¸³<‚‚àn5ë \Õ}@ru}ØM[G£±±¾ß*Ï4³~Æ葦~>}ƒ=’ ×lØJ¬‚qUÔª õq5ÉvâéÎÄ>‹¸B­M“îxš%U*ØþîD(®><㹃¿ß8Í%ŸtF¶føòÃèѺÇy¡Éh×cÊ/ %o8#rtÀr9/ãÀEUÀc¨ëjU“tb'ø!«—ds¯Ñ2mÁeÇÛ{‚¹Îb. µæÃ!¸ÕgFë‚VyÔR/â!/™?Ëf>I)tItÃÞ^ÛDã~sRg½gpx Ž¢ÚGÖp÷¢2„ÚÝ©±ý<P«p{ᆛè†6á<:Ó¾ÒLaJ«À×HßÂòÐ)[ÇA0"Ì÷ž’¾ eèå~&%«ï)}- Ø:þí%ÏÕv 5˜5Î~®˜Ô;öùq - L¥C{€~é>ôd†ûEÅ:C;”æ§.‚aT/«VÊ/·I½­tŽCf c7y–¤è -qqv~Þ…[R -ß -•€$\GŒÊËŸ~LSÇôßFÕD² Ô2ûR7«NÙ­œ;7½€ÇÏ¡Á.È2(€†±ŽýÀñœ4ɦ—’ÌÞ¢“;d!òêãFªMº€êvÈ®>î¥oˆûæÀfT„h>›+/,³=†|T¥¶bÂë°Ù-ÝJ&ºú—tÓáÅ‹å8ÎZ„lí8§–Àæ £,wÈ$¬A•šŸÍɨ†ÚœÊòÜ7v"HàTr¥nE­^r¿Òç­]…KOÓÉi[áøvÒÓ*‡À¬K0ªFoX;îQŽºrË`Ëâ¿KÆØCóœ¢<–ظ†D¸â˜Nk¯èáSDAôš¦Ïm… ¾\RR@Ô§U¾°4_Ó®÷˜«3ç¾ÁŸÿö&}YbµqøÉÛ§òû ¤ìéS *Õþr¼ QŽœC¼ø@ @9Lß”VËßÇE iLI&T(ŠÏØÆÅu3ÅÁBCÔË­góåòß[Ê­8zI?Í48‹ÜzìÀbÜÎÛb)u€Êæ(G÷TÞR[¿ß_\Ww‡U‘ÛÌß²ÇÁدGq3ùò#óØÖÀwCé3²˜† {ÉFíë°ÖA½¬‹R$¨Œ´zzÊwØl–es? cF+Š\ôqïh€-“áÆ´[šH¦Q8êõyÌSé,ø´‰eí÷ïÈ°‰A"X‹–î ²',Šj¸RM“,›ò¶)0ËÀtýR` ƒy®Û“^õóšS™b’~6¢ä¦Vã6?:ìz»fùÈGýÅ7x9‰*¾ñ™oÀ`ÃïäÑöé 4Âê”d¿­6±úƒkOËqEKÌŽÍÒÑ-À¼][wœh‰f„æ9•È®.eÒîÿ!îëRîÙ*•S1ö芷†1üªŠð`‡]ÖfÁ,zÒH˜Þ“è3:±×΃Rìv¼ï}òññ¨I-C ZKp*I‰4ï7`Ø+ÒߥE§²%ÀZð€ fÜþNú§ÕFVJ¬°Ç¥ƒ ù(d³KI 6s)•OY5#ÝPu¸x¨þ©½¿cØ e®PÚùžÿEu×d­d‚<ä"<âÓsánÛRÿÁÔ‘f„ßu`ƒàÕûF:w5—oá«a©ÂþÓÍ™ÙL 7¸×HæS-“ý‘hD¾;¢;ÖeÑl W#d¶-L¨Qû€»ÝŒ›¯è)\ïòò›…DãÃçRjG”ëvœ¼™¡þk3°ÿÈrÊu;Qdµ°ë ±­¦§½"#eô`[ªÖâÃœ ¬´³ó©JÁ;ÏДs,ûC] › -QeVÕ~ %rÁó¯zK’FÞ—¿||œ.W{¤Íq`S~iï/Èõ_«—©ÏwvêçË6T¡nªÙ™1[aj¦:“%Ö˜<½¥ÑV¡ò:* -v ÄWô(V¥¦ò=3=ía-väTÓÎ.볫»Zü¯ít„q­ƒE˜o‹ŒIZûfþ,)ƒ¬X& /ÏØ Iw½RÀ8¬ŽZ¹$kÚh’[ÉNXÒpyL8e–Gï+Xh¤òW"þýs[åp†pÿ$9·Rÿ°eÒngÞGÌåÙ{¶õîK5­zsÆd‡ ga1ëL7Üd”¶Ìiœî_Jç.°Ç×L§(¥cr¦ô¶Î•”Á@„ëT VƒN5Ó ÷Œ;E"ž½ -ëm³•d6U2ñ‚…M^?o¾ám#"|Íi s„•–Ñ  -¯ôÓ7’Ã9˜®ý"u)B­ÐÖA|ôR}Šyzi—ñÿJL}Ä{úWNËu]¹¬Ò¸n?Ê…+ -ßc‘çÄ»}tõeýgZjI„|¡Úwñ€Þd¨4raäÛÚ æ+ù_]bq KžÕ}‚ਪó?—}ͤ 1»ðÐõOxÇ «f˜jší>LXêÔ³©wêßsPñEµ‚‡"ðúu6ƒ¸èKFýu”ë¯Rë‘¿xo³ 5M…Ç15:„\uå—FòÅâ$´TZL ©NÔx§ØøuÓs 2¿_ÛèaTœ†;²˜Âì/{¸s”u½®­qg]ô²›H² fbÒ(³-p܆l®u®;.y{”¨,Ÿ]Õåòûþú8ÙY -8¦ê¶Ý눡ktðŸ‡Þ‚0ñ±ü‘ĽTÏ@¾jŽˆ·K£©¬i¶¬9€)B†oJÆ e Oì,V}]IŠƒ)Åâ—‚63 þÕÁ -°4®{«'(× (F¸¢SÇ|Ò[Á5º§&¢ ÚæÚžu~–0ÓÐz1fë²Ad|_i ÏAižšîîKƒ×ûú½ÙÛ¤‡rýœåž¾-Ít§œ_ÑûFˮݹW¬vѳ-÷ÂH§[X¸¶ÓIT;ŠÀ§íÁGPèÿ–!¸Gžq~Œ#§p¼îM!ììeEv_vžŒJW–)4œàÁ(šo;à%Ícpºn³xx¡5; ó€c¸ £9ÄŽ‹-<ÃFób­ LÛ²R 0ÔÀû–@&„³”uÓ¿ß¹0ð=-®Q±#Å Óúe°õæ1¶”8jŸ·6¥ûß@„“VŸZÿzβ/ÏêJ($É””Tr§ªR‹° Zìáùp !ãz"JG@jŠa@lÁ%‘O5(¦GfåFÌF¾¬ÑêÛßÃ3jt©ùªÆ@£÷m Æü" ¡cgÈx‘ÎÍé2Æà‡}D?9 (ߊ¬Y@´Œ[êª×ui¦+[÷¾Åˆ½< ÌDIÜ>…¢•Šèáù‹‰(îz¯"hh1a¢Ò›'ŒPÀ™Ÿ|Øpú6ßå|êBÆörýl¿™í?Àd^V'jÆ)÷†p=nÐpî´÷Ô‚ ìÛÏŒ„=Êè¸nÿâu¯ Ä3rD ²N° ǃO¼m¬¨ÒXèw©ççû -þ4žºDp]Šñ`ïZ=áDÔK¶CTœÍuÒ‚®“°dMÿ¥ÂJÎ’à±Ê¨ñ¿Òã`ûHK~gO”ê èönEµ\A¨pC8»gÙO?xŸ/¢dáqúCé¤Ùø&äZCßi7¯^ðºû&ÿtÞ—$CN‚‡òÁÿË™ØgP+ÁRÕü´H+¸þÃçâ{qQßôÌ0€”Üm%]߬8m^çŽþÊ}Â~+Z:-`ãLÓ¾<­6ãî—ÑD—~ö«šlFpzðY¥›Â(üÏÇC7™*fë fš£;&áôµÈ‚™ß?k>ô˜iÀ[ۃ܌Ž(+:3{&ú'òè©Ù%óeÄÑ黋ÞŽÄzõq_µÙÔwH±wyŒëAȪ¿ZyDí3.«‡Ãæº]§ädžªk¼êMâoŠ‰GY­„›ÖöªâÃè -à¨\os é‡SM5E†YÍbü ¤¡H¨'›éа‡Âîjë®ÏJŽ‘ăhP§iÑp|Fƒuˆ¿®Ïѵ¯ ,•;Ü>r -¡•I#“Ä–³3MÌÈóXÓéíþ+À?ƒgÕALÖEXÁóÀM÷‡1ö,”9ü˜¹9|nË­Àç~ps•›£v>An^N6ÙÅz¨ºæ£1*ŒˆVØ—e_9hÜ=n2£•BAÿ×Rêy]žcÁ2ÈgX`M•#X@>êzŸÉýØ2¤éB¼$èÇt—˜Æˆ[àjD5Ÿþ'ç‡_{^íÃh™ë /¿‘-Ž¶I ŠáŸÚÜ:uÑ9¾¦Ø´këÁÜš§‘,”¿k¢Ð‡-"6ß-Zû9:7/ÞSÂqôöjÛ¸`•wKçj{|„\,UíÃÄ˦tcaÆáY‘÷ ¯*5…‹ -Q ÄS!1m†w¿ÖªdT¬!˜|{¨¥˜ÿjküÊ,Bo+;oUH·B¢­=Ô® ÖUûdÖp T‰X§Âúz§k”¬kÈ$ùè˜YÐ<ºslF¼j»‚ÖcØÂ!c.ΪåÈz{PŽøâû§îqÍX½%;³W£˜"K 8Zãù½ƒðƒ¾ÁÓ6€Æ~X«ækZÚç< gõ€Î¶/ôÓƒëo8‹5ÛB€³ši‰ÈÆŽ~¶µF4ÞO{ŽÝW–Ô…')ž_À'Ë6ÿ)Š-`Í/­£®¦íœ)‘%¾ššä0ãàß­2d¬ÙÒ„lL²a'¸ÓiÈq‘ÿ¨u=ÞLYu'~=rW¡©š?ª:Y¢ô&Â%3Xüªo¢Š+_¹e‡†¨|MîŠîÈ(ç†2äÐÜ´=¿Ô|[ðHÐœ¤Ð•…EÏzÜ}Š4ãB34ú×êµ<‡ -Aò%T>³9¥Cë¤=ÔÑ Ú€c…¨ÖÁè+=ËÖýB‚ä†uæ‘Îd5T&<)çÍÕ¿MQÅÈ0T ä×.Go"TàK‰ìhô2‰¼7aŽ®ñb¼9[£‘híßÛ<• ƹ̉‡ý™ßSã}-ÿð‰fY„öäâô‘ä¼ìëCGiÔÑ ­€Ò“³‘ÄÔfºè †B‰ÂDCŠ|Tø¨vÁ‚·b2`6 m{Q'Mˆj:¿ê2XvÓ‚IËR,|µÄ|\~¤t-)Õ#IÈA7 ½Ð´€Ø|ö /ät†eÄáeg—W›óÅÐk²Nè$Ðlo~÷v &^i …庂ÑÁ7»tÜkÅÞF­©,›1ßÑî–l×­=µ}ýÓs#1’uÆÀ=ò÷³ºbÖütˇx—Æ›Ì”Ì AÏuZí·©˜äÆ=Æ U èø/Ãxû’„ IlŸÿ\Ù¸ì¶ïOŒ¶†¯=æ;>Ù+/0ûðŸý¬ r%8(øxF…µ#Q^ÁXˆíõò{cl{ {M^‘=¢°y^Y\¦zÄe±ì×ƈÿÐÍyÖ b_ ¥ É­½Ìâ¨t@ÖOûÿR -º}‡/ÙœfŒ7¤úŸjã-M2ðÀ¿!ÀÑ$rÇ?°§)/2Ø][Š¢Õj1 %Zœÿ‡š »)sS7ùXŽÈ!¡VJ–l”*›kT½0| òò)i€MZˆÌÏÙûØ_Ôý[Ñ$4!hôv+Cd -'kÜ&£X¦ý¼–e›¹OOâïqaJ:“ÔU xXÀ³Þ^BÀ-7H $ ÷nU«¥ -ÙÖqìéÑWÙŸ™±- €4äg,òý°*Ïùà´A%äm«*aRHd2ß+*t¸ã¬õØ rLlöð ‚~´¯ê?=s=ÿâæ5HT -vÌ•£JUYGTy2—›æìLüsƒïðúÕM·ty"-åÉHÐ=]7×B$K´;´±jíëÍ3ÓùN'ïÁ¶”n^°hGv1HŸ ±2×Bå,É{šI ­€là‘ål„}ýa 8|kZ¨~üèYÂP§Ú¦f€Šªg@<ßL\½É¥QdEuÛ„®'YL3—)Å`…ñÚíÛLÛ"vªŸðRn î;‰Ñ-ŒMÍ×Àj6ê€äöÞï/p18gcöù§aPfРŸLÐ HáÚ†ê#:¸u}êjV4Øw¬8×z) ´Ú°Aõ£œüÄmúr½¾ÌÁ0°í`ψš‡ÎFRÞ¯j¡È| #ÔÔüȺr\&´èL ÂT"·s¤®Óé ÷aZìÑxg¹T7ï0|Kì¦mñ¶M±bjþ…³,ŸÍìÞ9”êfrIFà¡ÔØBù(­Üf+ŒOSº³±TÓBÑÿ7ÈóÂÔ#< “V‚Q¶¤<Çô!»Ê8œ‰tõm¿Â™,»òn§¹æäW=ë÷ÁUÍVžI³¬¶Xó컲s¨bϽË7âvz{ôEùH6NÏfÙ*¼wK9íUn*¦0’ó~Øe“fšëô¬,3¡¶Öœ ÃeÕbºNÌÏîy¹}U¨ÐuK¡ÙG0?8ÙŠÝÁ$ Dâªæ=2[ˆ¥ñÏÀ„=ß+s‘‘ƒ©ßüeDæ(ÓÆë‚$‚8tªfŽkÍf~âÞ|L7‡¶'4?=ÊóV«·¿Ei4$ƒ‹K¥ÚF‰nð<‡S'ʲuÿ丸_&©›f@KR±†`ëµè™%'•Z²TJ•NL¤óp?<PªžB;2ò›˜ŽNEÍ?ëòdÜÇCØ(¶-£ü@y¦$ý¢;¤BÈeg-2HÔ¥©«µ¿rª„Yl½uþZv¬<Ñ;V,^áE«—Bµ½k1oÿ|›™§­Àõåôgb¨P%À_ûUüVî‡Å˜õÏÜÇZ3ìh™¤Ø\Û»¬úK猼üvÑ$úÀ…¿EÊh¾Ý=oÏôÌ"ZjâdKDvqΡîósÕz¶H&ö1c$ sJˆ>Ìõ„D-qª9»®¾šÝ{‡ÏkÐŽÊ:âaçéäGb±›÷%!ÚBÂiŽA¡ «Ùp©°OjI¤nÍð…”SŸñƒƒMH»iÊ‹Îkºµé°e[¡óG­NnÊÃû›1ÖÆ%¤Ì†Â„>U–@ˆ1›mJ˜­DIë+} -â×¢W.#]¿ïƒ×°§šg™ÊåÂÜ[L,z¡†Ô×ûÞz¨ÉPV>ú.PsþV1†;2\EŽ¦ïTv»Ü7»uð¨æŒ%vñ1\òTg û~“Ù6 ºå 9ñÕŽ2Q”^r4"g -UƒBO£œE+Ó/’dIƒ‡¶W|Þ«1¹%Òïslpˆµ9c±“h$¼ä|¥ªƾΑãî9ʬfúÇ^€Ë÷Í~Wi-Œh û9G:ŒHä Ca>0'èé—]&{¿ÑÈ;OZärJYLWÍ s_(Š©97¿–ö“CUýÇ›\¦ðµ‰¼îÊ ”/mm¸~fSÑ¢Á_|­×d³ºû舘DÃn çF½1Ó(§úýë¢ o}ùR—÷ƒŒÝ&865IVºŽ™hÌçª B™¬Î -endstream -endobj -1986 0 obj +¹`]NèIHΣ{¦ü+¹à×Î#ß逞(BUndê/’‚¶*ƒ ÓošâÀxq¸Ýð{ïo1Ø1«€Hÿ(Áä虸·ˆãÏ–à ¢Ÿ•[äävå ™‡½½i•ïÓ'¾²òj"E`$•Ì·…ÄИ8K¦°iÈbÚîjë{äÃ0eLáÂUluJP, ãiËÙfÆà'ÛªhŸÉ7 › ³®$6â༊:f‰W½<Û6 -%)p\àÏçèeþjÊïÝa—ß\" ûÿ9!œg‰Ç @GˆLmúÂs0½_JEn|‘&$SÁ…Ÿ·úÐ@È\mèªÿý<؇Áu®Zý=½]¡m€jX “æâ}i·¨]ÌP»Û¸ˆ]…à'`€ùL¥Õû}½ÏY>ªÔ;h÷ñŽõdã¯Èg ¨¡û‚ol^k¨‡ i‚âŽEb¹µËЯ'ÕA´vàÅíYqe}¼±ØnŽ—PÞÔA 5Ñ]«X]a¬î-ÎæñÞãš]3ÎÌP›ïä7úÞ <ÌA +³ó£¸NPl…G6³/†E†ø¾’?¯‚]Ÿ`©#dÄ'ñ¹Û  õƒ†~ÊþxL¸sóÿ%ˆ*4d‰^¯|‘Q™ÉìµÍúX½U¬ú=ƒŽ=Êh-lvŸö_ÃxÙÉ÷…±OJk܇âi Êé¯DÙ‰¼®Ì¬íÄ 4ÇÞò—”ý ¢trBÜœB§=Ä,vÊ© ÒTÙ³0$íŒ <*¦qkp¸`´h1=݈òDy.Ubo\dˆÔŠÉNí„ÎÀí"¾·6>ó”ÀU7FäPö•ðÄÌb Á–ùäBbŒF¶åK×úêPå©°¨»Äœ›oŸ?&D†…×Ðœ 2áE¬´ž´¦FŸLXô§³ÊàÒœÜã'(^˜­´H¤?l‘€þ}ÜDçO(ÑÇ¡¸UI.2 µ'ØmåçëbŽ™ßƒŽ#Xíö„¦²üÁîâ‰ÕærÂÏX¨"b_æ£ïV¶ü`)è­…1#ǵøð±uRÿ?ª%ÇÃ{K·btèí2ø›ÈÓËÔh‘h˜Ðªwr‹Aqhîœ2r÷{ ¸÷…ËÚKÊ^û;8ˆ‘á`“JÑ:;œjâ€Gꊚ1ÔZS ßÚm¢AO4óøS"ÝÒëø ˆ—Mq|E9s-­&þPF þ’zä‹÷é¸k|¨ÑNÕ8̵È}–¹·Û€aLCó³Â‘ f§‘ Å»]´Þ©Ch  ‰àL¸á²á7Zg§Zq†ëÊ‘°d“¬®¸ô;¢æãÈñ•wöäøæµ @I&ytŠ±R€­7X^í=iŽ?+¨Cð«bÓê+©îwVªŠÝ\0“±S´ó ÈÙâÆü œzK|V– ÂÐÐnz ¿…qݼ‡É³i;Þw ½vð@%ŸÜC±(º,$…‘ƒ‘ëiŒg2A‹-ô³CRu‹9à 0ü*o‰ ŸÏ@çè¯%«1Ü­´Âÿ­>Ø'éÇø©¦(fôè×¥Z[T[DùÖ™¢åÛ Ä>b´-¥·\˜‚oU‰Œ­k•÷äÃøÊ#Ê’ÉÀŠý¤pÇûªŒ-TR!šõLÎr«³“ÍŒ^¨ÇvLE·`#â3¤Á½°œŸ jÏhØm# oÞa|•…š[F«¾Ño¹Žo±š05âføŠö´iw¥-(8íŠø/žØ/2Sj²¯·Zés8QI@61Z?eA*ÌÝtêÝØlù –¦+#?ÞñښĒ|“™©fóÙ†>™-`›sjòc½ -"íèØ•ŒJê#å†!í³6? ²w­ÁÖ„¶ºn•‘ð?) Ô(Zжu(Y`Xë¾å`À-–Zvô¨k@Q0Óвu“=½–\œ‚Fà +ù÷gî˜J­çQíƒ0}б“˪~ÒÙü+$b%Þ]ÒïMr¨»ÎñŸ@]Ð)nÕYÐÅÇÒǶ6›;µ=wA=š4vºƒçìg5š«Â!OwãQu7Ž~8ú%PM½ª¡Æuì°Þ0Ú’ +™«;.}0ØñvËm¤ +w–Ð,²˜‹„´o‡(8—Úd½MÄ$Ðz5jŽ äó%!¶ßuÕÀ´¬ 3°Npü#ÖºA ¿4û„ó[<•ÛåwUv5—ÖÌ–TNÃÐæ˜Ø!êìÑšøây{µ, VñÆë-¼146r/¦Êó’^•'ð¹•4ûI;ë§Ú€þf¹tÑœâ9™ÀÓ˜ôHÓ*-÷èZ¢?6ο–È] ÉYÖ„Š(8g¹k‰Œ©FgÙÄùP”ëû,Âj±OÆiæÒ2ÄúAfG=×a¯;6R°T„HbGmäºe¶‹îÎÐòHÁ=NÎ`±ÄBЧãn¼¢-nZV—OIÀf@ñ¨•û\ˆvª”Iä‘Ë]ã$ÍÃrÅï 5Äݯ¸=t8ú>Xe/¨‚z¾"›©”ûJ…ã€Sg˜ ƒ?s†sf ÑÇjWKŸ‡Ùz1—žqë€Ú…¹ª mÅ7ÇÞ‰øW2Wø亄I·šXâË[öºhìZ½âÄý3ëPVL°~€àìõµG¨;#pÞ:ÂlÑP¦ «uA·y—»ö‹%'±Z‡~e¹HõÛš.¢­/:R\Âi¸¥¾ÎqO±«°ï§¼)? :°„×p59I½[Ÿ3aLŽÆ°Û]Œ˜"ˆ«À¬¹&WvtèÕKéí‘dTmÕZÏÆuÕ*îm·REàDD+9 ߆Ê?î™Yø¤\÷ËîÛ‘Û®nP7­šÃ¾úX¼o|X=¥š@ PÝÃׂ"~‘eµ62p» tõ+”›AßæÂs–íz ¢—y0yË«vNâËÐÍy6îz_38BÈ6»ÔÀU§šó@êñÖ ¥)å<æøÀ }f·’#@Ñ=•sߟР•Ðßtè9ê;éÓfÌ©u:ÀÆšàñò¤óC.4èVÄìqú{zÊ!pf;ÐjÒŒö,yv€9ïÏÊØ|>È ¸ý®§cܾîo ]øœ46xØáþ/Á©ÚI©`®Zê&+d¬#qI„”ýDžg¾H™ ÷”è‹*K–ƒgðº)Ùá<ïÞÑ7×6}¥)øüŽÆ ±RÀr}þ49óØtÞMãc±^$KK\ù “ù~®Tr¡ +f€[äLì]Ëøu%×Ò7¨ú]àµÑUšP[Ö¹›ñ'Ü®ëÔÆݺ¨øÞ` z†a^œéMJ‘ÊñyLìårsØ)›*ªzüo±Ÿ‰ùË£ä~ðvj1æÌx³ÒRò…B‘æÿ’}æJ挋±Àáo<¯\;Ù ¤ÊÐdä>P›—ò5ÑJ¿F[nüÓÛcøÅÏf!Â6¯|—ç—ýÚïõt{9µ¡¦ßÀfGDѺ ™¸J'Œœ²é´òNŸÐûÛ“av Òî„1Y¬Ú9·% 1Ò •ŽÁ‚ ˆM…!=BŸ«Q$E<æUÝÆ â¼H¡è‘†õE|Øï†pqapÅê}·vßgœ ×z²N†êûxöŸ/Lb©OŽ=*Ë +8:ð&¸?)HªfKù])ºœÜÖ î†E`å–Bdl·á~éÊ¡ŒÈ?}1Öp’„ÈïóV³—Rñ—‹îã>P ÿF~ß¿‡ÄôÌ0ß ÷óæÙÙ )?'Y`Ð)‹sR¼|–Ï΄WJüG!pzÌEäÜͧjc"ý.å_=5½E +*‰„ýkÈÏÆå(Ùæ‰z–E6ŒÌbäû]q@øù{ˆÆö½Cþ[®/X%u±Hä[Íp[Šíå <Ã2ܶ±ÈfÊ‘ N2 ÒŒ#_ËÌF @hšyw¢Xšå6ÕÝA\‚éDåèš……Š*%-Y»šB¢,G?Þ–ìžó~r¬òÓ*€QUXÍ&^à ®70l¹ûú)¾Ö(„•škRË(‰ªLxöµ†X%yÄÇšŸU½x.Ï™C5ù˜ÙÅÄ!ÚÍ•þñàhùšžJ-ái-_á*¯îd +hA¾:kLÑŸGt‹¥#Ïú‡Ö°Ôš¼ûçøרåð¹á¦bP•±ÿ¦0z/„™¹#¶E\µG·qððãÈ;]-µ"j<ænø)XMŽîI¨w \£×;¶¶[êZ}Š]ÅGáúÀíÜqú~G²AÓ9‰ á#BîÓóh¿ó2§Qûý 2I‹×ÿ/ÑJ(?Mq”DÌ#¢?õý,gðÏ„lå‡ßý?QçÝÙÆB’®^.`†ñ~þAÝÆžC)Ý—¡ÍNÒãßÑš:BÔr½]ƒév^/"ο6!ZÿÁN–DôÍ{~’‹BÜ:ž–ý{6aZ£ÈÏ«œþ?}/ÿªƒ–žT>Æ÷P_ µùÅ‘Ë¥‹Ø7i±«CU´ÙuueÓ!Ú \ì‘“3ëàë²*æô¸S\nE´u§ßvÍL„Ú uiÛ +]’X‡vãu( ü>šÊz˜—Ï.–@º¸éÏߣI–ÊE«0‚³÷ù#ç:õ¬(4ÞaÞW/47-ø"ZÉ4ò:מ„[ÆixHÖc„žBœÍ± …—€ãH[܈¾”C;R £Â¨XÙw›ž\ÌFŽàsšÇ ௹Ê0¢ˆ¹¼MÓ*KÔÚkH‡pahwÕ*¸gÝÞç¼æDQ}õ ë5ôNk)aKËA÷cü Åðb“·wî¸PÇH6žˆù‡˜:ðþWT(šZ[]†îÏÊsÏþ[8¬‡±ý† ¥&§i{Áwÿø(N”eåBv÷7;F’¦jrÑŸàÂ+õXf•úXTœ[\GT0EjêŠNã%7­Ù+z¬c1<²^OÙF «tÿ%TŽ{}A@ÒÍ‹óE Uk¿›ô]Ì9é‡W ´jðî#2ãY߆¼§Æâ†Ú‚ZÞùí "®#"x4Û¢lÏDïRõœÏ0i<’W2Æ¡¯üš öS@“/ͽÜÏqÞè +²8`×\åØi=ñuhº2&fö¿YÆø”ËV¤¶vAx.2˜ÀÎÖ_Ý+ÒߥE§²<€M¹(\-¯F­À±°S2þÓþÍmÿÅ>ésçËì¬ï¬Ä|§%eÂÇó:sL©ÖWáË)Úõ·ÃNÛˆßLq;´UàVßZPOIõ;ã_¶ߥ¤Ç|–}»ð~å&ÕF½é’Uud\ÇìsíšËÙ©ñpL¹+ +„æÓ%°èw[vÌ;e.KgEVÐ,À0¾HÊú­§}íþ²³€®í…±»’Ëõy ~S+‡µ¼úZ(Ý‚ e/Øww^ç·+ùk\¨ÆP<7p;2 foß?”é#ÆÕžW1í† +c ­mxþK‘\©Y^=F3‹0ÍÇB»—‚t÷•½OŸê4x&%–xàX¢F|ƒúΩ¼¢üvsßä_ê5»õ¯fþw.R/z€ùIÊ”U' V$ËÎ÷ÉŠLîp©ERŒ:†^E>ÐaéCó+8KvWJP5Ì]§SG€8‡ÆöÎ%ÉܽVèƒäžäù_*g ¯'SÝž«º;ük] +æ¶ûÂ}ÈÒE}Ú³Ç梷L^„†Â¿æ&´Rûû+§V9[„ö- ‚òÊyÔ£{œðn!5vÌ£èTwa'ý|ôÃ9tª‹É6õ,âÛ¾?f Zâ32¨°ý„k(‚äX=Mâ‘©Q›…Æ]’Ù⻳‹¦2×R!c²‡Ûõ†'p«š•¬CîI×ÁÌ•Ëç?/V3:Ò4H}“Qææ×èJG©…âá`[ãÒò-á}‹"%Ý…7ÅBµW9ŽßÐIôM§É6YýÎë‚Á[Vç7U¸×²’JðM‡;” Õ7t¸ðqÐ^Û¬OLu_H(ŸïNWê~ŸæÉÆò>( –þX‰¹Þ©Z²Q(ôU ”èÄ·ý¸]´ÜP¼õRb†'sµú?ðQµ¹ -NöÊÆ×Qd<±Á-Á”â˲ŠðûšZ–7¾„ZŽ&£|¥Âè¥ì2\&ö0Ž¨Y[å-éÖ¯g~läÝ´s§3îg +Ô„L÷“á1êf° D®r o–èÐ[—óô\û,Òiv™2¡I”É_RÁe¤‘ 'ŒMî­ˆAÜ­ùX¹Äuì8ôE’ Ì¡×KM9ÁÝm°ÄsA–µk7ä“-Ø6¥’=>m¬Ú‘}¹nbÂüZº¼u-¡cs ¾‚`—™È‹yÉRD(›pgUšbô,vb",¾SyUišµàô/o•Þsü‘[”´%VÝñé§mþ ÇÁ `fd“þ*¥«¶Y-ÿ]Ó/OÐF +P$è.Š2_'9Îñµ»_€TcŸoìovZƒE.ÇÑIEu9·oõèüÃÝ$e–ßúnY®é‰ >™~hU~5z¡PßäþÒ–ìÒ‰¥›onI@ߣÑ%#àKâN +-¼SÀ!CLÂýÃZO܈ãeô Wg´Â)Ef6;‚ …{3O&Õ]Òéz¶kà÷xâ_¼~]Z*eÀ}O¼»¹Kà]ÿ+?½ }×5áá†ð~Ëï³´G÷­E fF¤î\©šÔÕùuÍŠÛäj¶¼Å˜š9Ù,’ÀùÔÄYoÁ 1ÌÕ+­¦]¿ ë© )“{ƒÏk߆¹2Ðl?Ù‡¤ š½­‡‰(g±›†Xë +žd½û _Ä}#ûZéM„µºÊýS³µbÙ6n +»GŽ|'Hèr¢³ñÄyTÊl‰ˆ0r¾¥R°üN 7ô¤E?tæB!–6àéøy·”@N`ÌŽ£;R¬%–)|u‰Þs-³ ÷þ@aÆ쇗 ].ì|7 PäÔàØ#J˜‡7ɦXU%ÐæFO?©éº’Š19ú ÍÄž*ÐÝQ{oõéYôhàˆð©¨œÈWÕºaòú˘T÷ìŸÌÏÅ#øMTpý—}A» Ë,‘=v‡ï„GÁ70g™Ä V¬õÞlÅ4R;›×ãCÓo£‰a‚/¡åƒ !¹O´Æ‘kwõ|tÖ•Åð<©YÌÔŽþÝ ‚”‰Á€>I?¨lùØÇâ?ÛO|)9;µ,²8KƒƒHö@.“D)µZè«& +ýŒ]¢ÔÌzÂ’‡uPÞy·ã©……ôÔ¬U¿µ7Ùí{‰SxÌ\ÑGÂø8Ç”Aé@Aøžçƒýç“j£üƒ$ÕýnA¬X¦ºi G > n„~¡”OïÆ2@G0”Å懪+äB\æK¹P€!x"³ Rö™V7Ó` xŒÂûðotݼÙ_÷atœIÌ•æð/ërz.†U’˜+ÆŽïX¯¶j÷ÌÌ€ù«àQ‹¶òí•0ºæ u‡õzäb›ºäŠª:ü:€?7%ߦÜrûµÕsC@uT¼×Á? 1» îüÂ2_‰rÎHàD•Ö*¹×–“ל³Óƒg:KKCÇL §ã&”z‡™æ4"‘F©sz³Ëåo]¢Äq6æ›?C3Q6ûóè+”Ød€F|§À¸gM"3C,+TÕÊåÛÆ"»ëÛ£”¹Ú(ïo—`ÍD×»rYÿ=tP3é†<$¤Rêǘ,?i`µ3üÛ_ü í€±‡ÚIÇÐ~“UFèó)líüiÎé: þÌ÷šôQô\îpõ½´•ïùè^â"Œs6à/(Á#•¿IК2Âòí€â³ +àEé(Otn¼úVsMÜBQ6«Æ‚€÷fËÔ8q$eh1†/ðˆ~•ðĆ|ÝoÐ7ྔîíàwÃh¦è„nS]¸S9V?÷J¹m”^H@¸ÝT`åŒbÔ\©^cÊ<ÓêN¦Œƒ½Á¥I7³=†ù¿¸",Y(G!â¯Vóƒ-ÞÍkölàSK7\Þ<¦ÝqELôª¥±¤–¯iàÂ¥k#ÿ=º$Ð@,ž+öô` ©áÄHß‘M'f­y;Ð;ú0¢á²c« +Ãm‘»;qà@¿å#!¿^3‡öƒ&æÅšžvXQXnŸoC{ç‰&)É…8{ Ü;Ʀ¦ +±Å(N=çÞ‰ÂiíÞ‘Dh]1îw!Ÿ…}ŠA‚˜¿½Fû¥/б§#Ä2´´%ñÿ4ÿÐÅ’'j`Ñ·)$‰Öùqàª: Ë™T¿ѯ2ŒÐ]ž<¸c‘7?gädØ|^ñ>·,®ºtüßN…⦌F¸¦3è./Ú@R&æX ÂáTÇÊÐ8ü›Bù>JÛ°UóAÝÇZÆ«QUôƒ~å† ½£ý&Ø ¯qÏmgråç}j‰¿zÅþìYgrô>ª?±î?CsüvàÐôî¤ è +¾‹ÂÕI"8ê/½Ë©n§6¾VF‹w^]*[äFî À|¾>²IÙU£:&Õ¹ÅwÔV˜£ÈnGËáÓˆÆï„«ˆ4‰¡LŽ®Dá*7i“)T¥ìÉ#-%ì9ZæÚwR@éWå¿B/ÁW)å7Š?m”wû=´"óTë` aã,cNÌý÷yOEoÌ\PG÷Áj¡Í¼Ò­2ÆÞ«A ÃÒŒÀ§7Ç =.ò3?0Jº+;ÒøߌÁ~|úU_¢”¶XBå"(~¦Ø) yO{ëEÆÙÐ¶Ì ™Ÿf5—îž0M3žooD 3œö£ ŸIö¯ ÐSL +C¯ë]ó$""oÁ™Òa£Ò–Š!#\û·M¬p§ÎÚ-í +0êX¬¯3´ j1­šW«©©¹%&AŸsùmâ˜]ºCÎ,\glh©/Ò&GðhÚZ + ì(Âé:è~1ÅÎÔ!bZJ¼xØwLSïݘï'Ü ¬át7ÔeÅHÊÿiÕâÝݼ) wAM9Ö¨2O‚¹áÔ×õ I)õ},=’cÓ¡LA½ºYJ…ª<ýHl…¯ë–íLŠ§"ÚW˜ÙX— ·ž" ^½•?Îœí÷×ÙVððcèyÑ"ôøÓr‘…Ïw +c.áËŠÄ{$Æ4_>–Ø߈B ÍæÚÝáfðF3K€pý?‹å…4a´“1WŽÏkðmÂ%± bgÐOä©>wí+u5n‚^ðfVª;^]ÉÇ÷µ§ƒ5ϯ¹á«8-.Våïû=·‚e_~SŸáø¢ƒ,vÅî©€]AÆÙ²0CG\ 5%ëW)دP[&ùž¬r¨rÃâD¾@út²°ƒTì¾²‘RNI¤d.µY‹ó±ú7C«tHžjÃpdµ?—u’PežPµW[.¸~þ ðƒ0†ß¿>6ª[Á2nˆrn-og#R¡^ß/e>´õrRùUŽcom&ù'Ëè*0úÌ‘ÛÊÏ¿ˆ_Ë¡ãÈ\qS“Äq çé³ý“H"#S>4“H#¦AQc›½\Fx(ìº*Å2MÜ’ï ÄÙ(q—¬…Dí¬êñ9îšüéo ¶×xøüø2[*+/² 鈣†:”„5›ÿ甧[°}TMØ ¹üñ8nwEl±ËP¡bhè~.´ÀWžA®úW«eÄLÀ™ˆá¯’lJ{ 7¯/¹K >ÉQ:ÖÌèŸéí‰J§7›ÏfP.šŽÌySÂC±2€ªÃ /DØûÉÖædj«º©ë§^n⤋2¡h»ÃÔ˜À‘[·Ï?W,úÑT_S|Œ)aFP–K¿ò öˆÏ4*Ñv Œ*Í.üèX+L˜·60¹äØ0‘¯#ߟß,Zð6ÜN¶÷*=僔Oë÷›„‰FìêEfò8G?„V:Ò òâ«ÎTÒƺ¼‰!U$Œ1w<ðxÖfXØ7î +Z_<©:lþöÌyw8†öí²®€Kaßëµuåà•c2Ñ9:ð†#ö®Ý ÙôS¼„äšA›™»³wÅø—«IRVxìèªMƒ„…„ŒCÜ•`äkcÚN“?-8Ý€óÒ&‰bÐv=8àùaRÿꜹÑ÷‰cmã:Òÿ½È‚€O G¡t·‚ÛîúJâ©ñ•aÚbÚüv%ž%† +7-«¢¼œ>µgòé%gŽádY,xĽÜùxFws—7åaË«¾cÚ®Ât×è­£qúåHg¡šî8w­3Ó”ÅtÚ¼™<(ô·F©t%cQeÑ[ác$ïET£-§·éʲ®÷§;ùñwë‘«æJÕV¤áÒÐ.'ÚKÙUµ˜Óèxâ“ÿvuí"÷@òÎýMxZALê`L"[´Ø ±ÛN"~èý3b²w„–X×sdËÓJAºß/šQ½½"Lò xZoFç«d×(7IÀÕ)wÛñ‚®’_/¥ÖÇ'Y4óþ]9Ë/Qfÿa}bß-°ÔußœÚ +’obÖ#k…m5P>cìNˆE;äraŒÚ laÓ´¶°þS¯r)xÁekæʯ} ñßòRè ã\¸héã™ÓõPbù_VÐì‹;ÑðÙÔÁÏÅâÿOFÍc¥&%á©p}OY3ö%" Ž#+sx­b—bïP‚ʵ€cf_wTX;OŸ¼ŽP1e„ßséÖçÀµ²él[qYñ+ÍpÜÆÇtpsË ²0Z’dR‘ +·ó¹}ÙØå +s®»jXª™'MùW! +@E­úÇ! YRg:qU¨3`0O Ó þHJþü|z7®#íÿ8ê ŠPú2ª×¸{2Ÿ‘Ð+`æ‚hÌZ•÷m¹I3®¬YG¼Lغ!}¨Äm ÚÏG3n›ùKј¡:pFµ¥í”º©Ï÷}p^^°‹ÿáÕà,î·"åŽ + ?ëÃYžq•õý«ef¤òsŸÚ™æÆ+æõO=3y(VóïPSÄ.Ïw¥ÉòÄÿß;ø•ÿÍ/¼9´ŠÛïõX¯fÁ[ÑøSü¢6Sás9@;É4b.k ýZuÜI9ÃAŠb _sv­ +].#i}Å\`f0bœdfwJ T/ h÷KÝ0rØxü’°ÙOTγzVýI®¥u’Y|)ûÕlnÛ‡êÛ­ðÕ[ XÐ;= j,i ÎNa°á! %¿5€-ÍãNËhÐ{…ÀÝ1‚¬ÝnqƒÅnu‚ì‰;"FÍgœL{rÓ[Ó?¤):˜|‹\Uô ’îÊÔHÓ6¢†(­¯GxÆ12d¶*zœ×Dùrëúý>¬‹q&‘ë +§˜]§OHP £B_VÂí*LµŽñ裎A 'vvuͽr‚¶ñ4RËR¤Ý@à ¢o’,çÚñ×b”þ>¥yÒkm¾ˆHCÕZèµ Fc•¹ŠgË]—ëîÂÕüBþH”Š/—’QnKfA£"LFŠ»“<“ØÜ·Ÿž¹ž€nä1ˆ € %Fúû¸y\±õr{˜¹anX<É­nÛU³Ð:o¬"Tè ½WÁ¹nO{X4½="\9¦KÞøïR¸û»£^KDZ‘>H¶=Œ³EÚGÝyˆ +N¬Èý„Þq¤ˆ9ÞTxE;ò_æäòÂOšš6¨¶fDÑ!&™Ÿ'™³ÑÂ`óé&®Šë÷1ðe"8ØA…X’2ʺ~> —änpç§1† Z€5©¿£ø²ù€Ìç°RŠ.]íËöû„査@càÚé’@i\fÎ(鈿R–ŸðyŒçRòËë(gÁìÇGÃnëÁè’Bœ¬Ù[;Šç<ç÷Ê]\ í +o9wü½(Nšï¬¯Î§ÙéÚ«*uˆâÉé8ªy-*5òËÉ£czVK<÷ºp^ø17Û_Û3[Øn`¨s=×Y’Ö['@‘W‹È*êöÑT_S|ŒªÍÜW)i†Høë¥%‡"WRE׌´/b),Ò©G[–s¥ZÙ,®e¿]ìÎ÷j–ÅvË:aÍÖdZœ}´/Ý_ùî_©‹/Ö˜ 6SX`R;À9+ ¯ø7Ç嶌˜7„ÌíjK aô·p(A\N‚Òm6n1ú°Nëú¸›M¬p¯´Yòæoý]FPiÕ_–Ò 7;ÞF·8‹6@ G+Ü´¹&uüÁC®lœÍRü¯¦é(µI¸,ªŠ"|«eèöÚ7Ô8š—}ÝEíYBYvvb±IÂ#c½nl¬ü¢Ê +7›_½e¶LZ†o¤"/ߊXO„ò'¶ÊÙâ¶Ûø×|Å2ÏõñÐõÎ]ÒÊH, † ºÞƲ¯*¶Ë9»ˆWnÿ\´—mQêšFÀ´ó¢Ö²Å>0W¬9žü©c|Bà99óS³åÏRÝŽ ¥a#‚וnW^{^ä}¤½d-úعð¹í¬iˆc&i%÷{5¸Áß]'Òírqñlä‚fa[ÄÒ/ºurY¥Ó*­:jéF2 “FRùo7½òÿ¶:‹ŸAX?u¢Ñ3šF<¤?XÜAeMIuqýè¤âø]¡¢øJšoÏ ”X}§©;v =jl…R¿¿>;ìÀ<§óAdfænµ°Q·Öºõ¢Zõaî~ú ÞÐ)Ò'02áȥ͚Åå,Ô;‘ œÀbÞI&f#¯š´»6~—QŒÎdÒ”d÷¦jÀxr˜¢Î°ÿe©¯JÅÆŽ··¤5f1Ê‚U³?"§'¦£-^4yoϲ"Å°ÀôàÙc¾ÜpZÕâóÃ8äÊFéˆZðJáñOS4H¾ëjOÀøSö‚Øv}Ê]HÏõVû¥Êþo§áîàõƒø–Ñ{÷…oë¬Äj‘w×SýÆ5aÎy¦êß$ ucÓ柼̪úáA’=2^°3!#½I$ž¸>R#Bu5ì¢ûËáIöH5#ˆß5ÔÈc)rœ5DG'ð*¦ZfçÓõ·i[/5R7Í÷|ÊSÿ WY½7ëÀÖ¢Ô¾î!&ó©bi¹ŠGx\ï»MÆègrRBÛG|çg™IAé¥×ÚIÈ͆öj¦ Ð%d¸ £òÕ’‹l–Û3)ðoßÊ6¿ê •~O´»<ýHh½¢Fg9 »[±fȶµ#Õ³ªüÀú$\g1">ŸhÑŠ')üZõõ”³ÆöÅ4'ù£Ñ¿ïÔø<ëÀòÙ.vçÿÑ»ÕÖúK¹¡;~šˆ3Øx1±ö*Ul­oCÌl-×GäƤJ5^¾J|µ 62¡¥¬tYñtôúk4%üb´f%)+ø§.s+Ý~U*[á¸ãŸ'b“ëûï þ3§Åî½´LkZݤÉ;mÂy9…q Jª¹§d OÊ kaùx3P+%[sÖÐ8u¹ü4jP7,X!‚ƒg2âЪž–ÑÔ””dÍ›­J©h»à¶CŠ.?äˆMˆo;ëƒÛþ8-stßœ!—o G€¦X|öé¶#Sé{›H¿oOõE˜y‡@ý€Ÿ,pãQ˜êų½y6OEÝž×cmb¥«‰…6 Ã9x T'±¾Ù4™“ŸülÅ;Þ1šbj2¶äIä1ó*ý=lž….OŸÂϺÏÊñÃ![67@oýÜ5»¯5z2ÍN_ì q’%½J°åE¤âM*¹²¥Ù0Í1œ°l"ñ•£¯ðËïYó+GÔƒnÌHÌ®·Û»n×m¼´ Y¨ŒšádCÅç Á’CÖö·¼8U£‰>JoZ3¬4 m®^‹»š[ÖTÿdtv3pîÌž`Y†ëLKË=ð&JK%žK¬5\ÙÑÝ o·DS°ÍÕ®,ü¯:ñE¹¿þTiNëwþÀ!H®™ŠËòfþ·Ú'ÏÀß«Gü§5 ÿ£{lÀ­5üýÛ÷´¶Â¾iß×,×3ðí/¢ÝnÙ)ò3o”øãb†¬†/Å’s “9xý1Q­8þ%§\†ÍjLÚQªu¥›¯†ã9mz…ÓÓu5ëýÈõÙôÅ&[Ò¸|¦žóØ@¶n*FœTÁÜö61âÃç¶áôuõ…²æ™l½ÈKÙê%$p›9fk_=ž·¡RÇxJÆonÇRÔ'§ ^>»Æ­£ç}êܲeý"ˆàØèÂEü•ôH?í€BKÍxly°¡öJö¡EÝâá1—SÀÂÛ«;ÙqÇ"hK”=‰Ö9fü˜ÙÊ2¹y3„*½Ï±=^1}Ó¯DähþÙ|¤R€jí!ªäŸâÒp—°‘Ü •=JÚßÞ)¿(Ò«‡@†¨JR¦ýfò^ä¶þ3€Ìƒãò³Ik!?4†hÝ÷Zý`A#& ecÚj…×ܲA¿f%ŸVß¡uŒ6ûOê4µ‡´#>i0Š=t¶Ýûºy닇œ×/bñ4¶æVͣѺ·b[ä±Ñt¬°’F ¨¹ˆ`ñòcL7…³Kž>l@ŠÛ5Jd‚ŠFhkÜìÓVi¾–(”ËÚ2ÿ+p2«‚(M ‹Ë¯¼jpZœwµ!†õL§ÒàwwöÑíR€…O@¶¾MC©Üt¤óKgÉ—_œŽ2Q”^rù£3û`ëZDT`%%¬ÔçÆdÂjòV–U7ÐJ”‚GÃêwÀ1N5[šN +P¥—NÔ³ǼŸß–§Š°K#>ÚŠÉcn³ÿ°ÛÙ)(<éd!Þíz*`ûßÏÓ. èÏì7ÌÎQR6›-VVŠKÊA8bÅé–¸…dy Ê\»ì™í(Ò´§¶Í—¿‘’†À;n¨v*BâíÅ—´s73] ¿^PÁ(ÿó!ΣåÕÕ5.̶‡Z¾+•J¬6ì•B"´ñëF½k­ë[ C‘Š‡~J¼GXyo0Nâ¤TØC¾$‡\RrƒbP)$'ŒøÖPÓ¶àS$¡#ÃrøMÄ{T–ÀÈŒú)jj&FȤ'+žÝ6nkô˜Abͦï\ÑrÍÇ A ‰}9±ÂŠ„ê-ßDqx×QG’,Áš‰bå˜óî®Wϼê’yIg¥K†Aqhîœ2rtØ +×'•šbÊÏ_¥73w¢¹*] +fM^¨pÞsêàäÁbèfÜÔ‹¶{œáÐ×À ²õ(ûG„FLè¡Iï‘!ˆ)Óû¨3ÝQ¦· '£•ŠfÔr ÛíÀà…{}›´ÃÍ•}L±ÛÏutŠ~ÎOÜÊ?ùÄ8N¿)ˆZiØÕGàŒKZ %_'\Ñ÷a÷‚ÜÒ<†FÆÇ +[•„ˆR +FµÕÖ%£q¥øÜ•Cå3b©éÜÎW¦‡‘ÚÿY™¨ ¢jF”@ÇZK­gï»`àLÝü.›äp™VX?;žï äÔ›£åšÑÌwc‘ÊÏ—µcgïΩܖ^V„tÆ ïÏÐÿ¦±Ì£‘¸÷ ~£šÁy ¥»¬a^d5œÃr¾÷…]Gw`/s-ÌÚ·²b¸¨}ýšãe3ö+NW0©:‹ÄùŽ¥ )“A»º8…,t±Ò|%hL;—ó€4àë ÿð§÷Å.e\éNhôéî]"óXˆRñ€Šêi ®J¥³9Á‡õ&0µ# ƒO&mp%?q×DÚO—jâ8®èâu¦£ÊO|pØ>ÓVž^Ý#Zº!Œ‡çôì¥>sÀÃG#N…}†‡ôÐý(ïb¶lÌb…e4¸$¬Ê<ÕŒzæ>æ=4¦e?WÃÆ *>¯„]ÏÖv´I_•öD°8 £ü’ªš¦ê©Óýà{®¶`²ÒàÉpÑû´Íë_Ù}¡ûúó©$K¨´\¤B°/ÚQUùÃÙÃX´+ÙËÁ@õÎU5ù-¢—‚¡ênš­s¡ƒ›i¯0äþžC¢Š„\º‘)š„‹Á ª¯…`¢MÒIåùù÷ÒixDàýnw®áÖÐÊTC†Ò Ø<.ß·Ù¥íÔ·z^V±É-Æ<ûîX4òÂô‡Ê‹Àiæhê#óÏãõ1îCa/׬õÉC°céäšú)q¼ýÚ ¼Ök ï &†â5b5-c° æNW@$#¿ZTÁÝzñKÒúbX”ŸeŒ½DG'ð*§ʶµž”2bòæï²#²\ð¢'õy¥|´ ë°±œÓÑõ¸h]tÇEMšÝ-FwÍ AÞ¼±üO;(…ÎCq5pv=$êÇÿƒ“þßþH2§:òð‚–'ô¶]…EVWF¸îÿûRÏôƺUÛ¶(ŠÝ‘à|æ,àÉf43l=C +½ÉÄ¥Œ+ì=ï½pÝM#»x¬-ÉÁü·Ög‡Ýí—ý|/Ù|åZÆ¥•«Œ(_" ø09ÑkpùÒ:²“.ïò'æú§ÛÖ–z“mè}¹ýêEñµÌ¦%Ò§éà’¿ðµÈr¸…@êŽÓ)íØÃk×it³s¡žé¸ÐÑ7i4²€Ü~t¡Gn}gÑ­à¡›ër¼ÍqA©½ŽÍÏ…‡í°‡JEÒ’os/W­gg®´™÷Ö !à]&&,Ù½¹ÒZ匾“¼cð´ÓCù+]dÚ?Œ‰j’ä_×å…¤Š,Œ·÷?>Å-4c´hê‰4wþÔ(~•ÜÕ¿¡ƒ¥™ŠÞVÜÖž!Ø ÁñÈ »nê ©uIÓL Š¹ÛN£ŽÑ(JaŸ½’½yqŠB «“.…“L“‡b…ßÐ ”éKÖc“mI‰SÛ>ôؾ >h©Üt©Ò”W†“®ÇZÀªÝ °^*¼õ2ó‰[á»qgú$;û7®BEr¢Nü$WB´?KˆTS}m”R3o…A¶3:–­ ËÑt6¡ŒS¾Ó’äA™gàÕi[Ž/Së&QxäYY½}<ÊiļâÕéì”_ j!zÁa72¾I»Õz¥Ü(^£lØ7§þºéórà ¾˜ÇÏ$ö‡Sg›‹ñ³ó±ï$—MÖ‚JŽ3Z·ærq[ÙsWŽí5ÐiŒË•q",Í)W61EÈÔ—E³‡¶.gz¹LCØM^ï1t—² äw¸U5©5Öõ[€›ƒX+0ù¡L½Ç¤ÑÒx ÕÚ»{Çx?•Ý-ÜÆ4ÓÖ4'Ž½I³ËÜîü¬ r>ã ANì ‚KÓó4ƒ!EL·Y³Ø)ÑùŽo¯cû«8–ê©ŽB챜{(~®b̤ÓöÚ´ní@„]šûT:ã¶Ïts©ª¨—Y¶ú£r¼K…••È<½Û% Ñ¤‰ë ±iØž^|øây,þ[ƒáÒÝ3åø\R6eè›à…q$Oýß²‹ª¿§žI-ÊJS €f?3ø}DsYKÄmÊ…‚ *Kd&*gS /sîÛØéɇU0㊮FÕÝL£›PÞ_oƒiŒË•ð§ÇÞ{Žð°Â2 lÉk°nfeÉŽ0+> îf¡Š1)1®Qâq:{rÖ#~R—"[g°Ô© +üï§öㆰû¼ã`9¥à NLÊ·5šØhñH8Ê’àº5¯F;˜ºJZ²êuÀ›2×ðéó± „u§É¾˜O§|6¥îêÄ|þWZ?Ç€ßí¹‰Z³! Lc‡·›«ð½üÖ&Žf&¸žå™f)ž*Uñüõ~'Ú"íÆGSö‡BüIºLÕÛ6†ÏH®Ïˆú~?ÑÌ?E +§@Ø×8Sö¹õˆ X}Ÿ^.5n=þû¢-´&GËúí¹½5G’¬Ø÷LñzXp`Ô;N]çûÖí‡æ$ÐõqCQñ¬õˡ׈à‚ðTÁ°O+½Œ@^xô÷óª7ù8É[S‡ÝÀן ”Añl™WÝÝ%OÞ»šLÀ™®Dævý®{±”ÿ„ ÙdƒkSêï8s<5'ûüýõî¾ü#”ë7¿™õ¯ò8?Þ6³9†k¬Fýu@n]‰žù/8³Ø¿@ 0‘E¶û¥ð2Šl}‚û÷½ä\†BÖi93*ö¹ÓçÉÕÝøY—4>zÞò‡é‹vôûʘÒeWŸÃUÂÄ8üIà&›õ`~¡z`ž6:9㬟 Ð7;uòsN‘ïK(Ÿ,Z‹ ä'¸-Ü‚-@ºGÜø!ÿjiZ*j(ðñzhÕ»Þ”[nØ­Vù$X½ÀŽßx&šý¿2ç!ã4*o’‡+ ÉÄ>/ýÇ ›Â¯Ôà\²¹€ˆ¤TžkVç`MùFª6°Ùm§¦¾2¥v9ÍGÎÖ؈¡›ý’^ QìÁŠìº¢Åž*æ‘2Þã`ŸLÅ9*ŸnܸÙç ¸/)B”–¯K +J!œ‰×LÇJ!{¨hdqîžÃˆÞ±Q6›ï¶Þ¢ òW±Wo¸mR"Ø÷ ¥‹n£ê#(íý—¤•ºÅÍÉO}Ù-V¹àZõýçãĉaŠj­„ü<®—çf<ÙÍ…8éû¬X<1œy¸í¾GÃáz -Ñk\’lˆbƒ?suÈÝÚÕ.g§Œ¯WilyÝL&Ý=Ùßgž€åæ y +ž¾;ûéh1¢ª¢ÒrÝ°;¡¬1½þIöNÛå&I'9ƒ_øw€€zöRW²;¶ØQ«°•¥l§SÞŸS†4îñ %^ç¢hÞ‰Ð7c-ZwVë_Æ,éž«±mb!Î’Ø®IY¯H‡3ƒß}^ß‚ŒÐL‘^°=%‘* + 8{^¼ßy¤ÿ§! ŸøÏ(š²ˆÏMšÂõÝHéSŽ+ÖÞÞ?%ëá02•¯pfg%@s_AÍ«XPˆ)ˆ,°~¢C*0þ茸Ûô™¬ÌÔÅ@nˆZqtTMùÂZlòÄŠ Ql„l¹LÃÃF-ÃÍ‚««Ú[¿ßË}>œ°Ÿ\‡ÕsËÝ…Ê›¯‘À®²‘ Á—žamξC·æ¹Ût&­<+c„5Ü. +ìveMN:kí«v’ÕGùÍ&~ ¡ *TÓ¹OÊÈÑ?÷梆ÀhÈfF›;Cwî‡&=[·•ÿ]Í`_hεC°w'2¸åWw-p[Šc’ýO)æîVðxN&×KI³¯‹óºÔ¾Z2JóÞÝmU<‹TŸ(¢®Y²^›>0}õgaOzž´ô×U„LŠhØD•üN‹%ÑîÀ›…ƒœµöÁè!é¡u×V¶­«<Ïìc2€_ØÉzvÇ‹»×Y£¼Ç&–Þ@ëi=&ú¸\áDׄQëTC&Õ¶¶¿@û€zÊøa³äuŠèI¨£^B70»Q¼KUʽh¦ ð®k+¢³nÐ[s½=)¦[v¤²ÓKnÞÐoSlra”ÞÝ.ïlv“:"R+òÇO¿X–ÉmMÇbM¸õ²|©Ó¨L+dªóØt7*Æîv´Þ1^ ?«ÿI–ÉV­Å—í/o¼áhWúèȪt#k5ûà3VšÜ/å–-Ãøâ’ŸC¤½ý/04ˆU´‘ïL åNžñpþ0,%G¹“ËÐTŸ +^½Ø¸;i†šݨ ÁAmé|¯òÿ±‘•v©±Ø²‚Rã›øÉda` û"8‚‡¦ßá–”K$i-{ֶ󢀘#w\q$~ùøÔð ÅX8 ÿ%zÂ\N;-50xÿ÷â…¡ÙB)èMÖ½ £²êºFìe @Zœzx©HVa‹7Ë8jÌ^ÒÆKsñO(\ÈY­j:?ÿYîËq›c#ÚÊ;yƒ„ò{÷¡4ÍÜ‹ÑŠP<ˆðŽJ`ºT-XþŒánÉ÷Tý"𦜌vÇI$¼õSJñ—ø%˜ƒ·Àžs™7ÚMM_P¯}”BÉÒ8ú›Çš”„Sñ‡nجï‘h×ùÅ‹IqD…»5gêÁ5‡*­¯¯fCBvwè>lúøÒœdS»bª:TXº2ùºîýŒÂü[/-=JƒÆq ¾uúGáÒ¤wàW¾™™ºÎÂM®o2IÝ\þ‚žµúªèÿ‘šÏÓøzta~½,“E?©P Á½}-ì°¬…G)êÄ€V¹ê†æ1ZÊï Ý®|oÚ-jßEp,@áÂõ©,Ó YA;½y’<ÛGËÊ‹M؆zËH¹Ø”ö‡p«hç×’a¡/!¬ %Þ+P‚5é5—D8¾Øb°$)B0hru@ZÄ]Ë(áÏ0û”‚²5-é™ÑIëuœnò„”¶úP¸§ƒeØéUýê‹ö€Ér;8¦M”¶Ë×oÞéÀwrbè¿Ôñ–‘øây{µ, V©ù1VÜRÌC–æ¹²·5Sÿߥ—@_XqYüÛ± dY±½12G/µ«5e;.©GÅœ¶ÔŒ0éUd½‘à}IȸӿD`ˉ‡JÌ{Ãg/ØEØÜO¬¾u:lÅ'»÷ÈÚñôõÍÇÄ‚}iñÎeÌ|¬±®eñ¾SÖ”oîþ»< py12´ˆi¤€©*N»„´߬A?*½ÛÓÕ¯ÖZD’¯Q]›ç0§‰qmOÀ½!Éá‹|ÂxäÉÅ;’ƒTBtâR‰¯’ÉöíÒ“©nOgfãÁU†sɸ|¹ÙÁ¶Ó\Év:å!žÌ ºÐutÙà™SmÓ'èW®”К›O~,£½Œ mß ˆ” í·jÂóë5Ѷ·ßyc@˜¤Â¢ñ#V—4/62¾<·y™>‘A§r= CŸx«‡Î>]¾[{ÉIrç• +$@ÒW +¶?þ…Ò0㨤 +B¡èþõ+ïví06Å?ŸmOu['kUôS1ð+”ÞõÌâ4/஋±9u7ß$áuc‚Â'\—ËÇóe¼¾€[  †57É–ÔâQÚðw +ã¸YS8HG½1GÍ09 ‘ Ûzö¦ËÈ£÷Gr/nÇã·J]žÓÀÆ®K/+©ÁKŒåƒ¤ËÁ'îÙl^-ðÝ¢ˆÎJ¯þ¼'•@Ek–ˆí®n€Q‘,MšÝyMtc’$)ïôS ¸%실Y ÓUÀF 2 +J·µXnŒ®xÁë”ƬÌáBqåÏv†/Z§>bÛÕCLòÍN`M# ÖZyìz^µCÿˆ¨Cp÷(ØÆ}‹!n9ÀèUÁGÔt™áWµ|!å|?ÜòñmUá.“·é§¶A0)ã_sêô>³A·F‚Üð´s7•@ Ek†wˆ#îÐg÷GNÈ‹_sr9·2wbâ´k Ö +—Òä—a °ö*‘Òë¢gÈ_Ô'IÐH«n”íNŒcbñùÿƒF‡ˆ†´Ô‹–_­“{ „ CâxÜÞù¢ëm®a^¦lT/ÑmUºËß×: ™&S„S”®¤JS¶zÌÖçƒÆV 7¦ùœaqbýƒz-ï¯2Ô|£Jê¥æË­{øŸ–ˆ›‘ÒŽ!åÄ—;f@~æ(+ílø¥ûøt%v?'|½HpH¤3r)+þ‡Ñ»U!&ú•*‡¿òŸjYwÔGnPû¹ pPyyÌj¨¼>åð8˜V¿s×.'_ï˜a  Læ F,‡×É÷±éeÁsàôÅY°V7N­†!R‘\ÜAvgF¶a/%]¿¡$æ©+/Ë +LþÇ8D*û2Ò:ÛIŒ)úuqxaÖœë!¢q3y—Š?›2þ'#*YEî+LžÌ:O«žO|¢X:ñ?¬æ.ñî€÷1žB´ñ¥cÎlŠ/ZýCz9!Rz=ˆ²þ3²`e¤T@ÈàÕÃNÞ´¸0Ê ?2xœk쥖ٵŠh}Ï„#=ß'c~Cu’Ðøô «/üAXSä~$ +5r×»òqyÕò8[Љ†êMôXôÃzÍÉ–i´—ÏÂŽ@„à;8‡#Oªïô¾ä(\Æ™T¡ +™9| @%ä¸ÔÃVb#½>Oè‹«Uõ‡ÛhsDRÈtÎìsî¡¢767ì˜ô¦÷ì(Æ£ú·‚! ·¢(Õš|5D +€ëZ;ã*b.ã è¿N'Üue˜N‚ŽLa%ò' »Î·V0š(2pŽΤißЦ~ä ú»œdc‚`@ç4ø ¯W“"èøÚöM:EgÈ0÷‚MÐ짒$«Cç™d¹sᬬÞlpüàôc»B†á¤ÚÃx¾%á%µS«ñ –r—%œômθÍ(ÅçIÞéôq9.ËÖì“ôŽñ亥͎➀TH{~ú/Õq™ˆ˜O¨(á^­4J0YšE¶XO*_(cª%²µÎ°”ÓÞKœÛ¸¨VL,mÛ¿yD÷oIq—CCv1‚À&~ª±¯¼®$;ßOŠÔ.0t™»‘6½Œ?Çù.VÌ°ÊÈRUœþçû,ÍúrÃÅk‰XóèöF’=“…Úû©†›Ì® Ær>V \½kŒm¤|I¯üS JpOÆx3eBb“ÔΦ§8 þïnó²¡Ã&58‡NÌ!J5øç1Æ^½Ôþ°[ú|+±pG œÉ“‡Oý©±g¾*ӥ ‚Æ«è 8!üëc´S m“«D&»‚ÕkÒS‹yO}ð->ÖÙR¹!Ó£¦¢öÆ Q¶#,Á8ª­½–#€&NdÓ4\Ê>\ÁÏq˜ù6ø6Œê§CÙ¦ìÏ"-$K¹´Jh}Nµéî ÒÜ“½~EUFH+îä"7’¤"…Q¤¹žš]ßM¯®P*@nuÎ* LÞKjði¶6”oÕIýsr¹gWìÚ^,cÃ-LAßÙäŒwïzìØê”JC‡2 §M¶»zkL9l%_°ÌæVÔd•—Š-ptÎÀMgÆvY÷nt&l¹¥cNñ÷rœÛt:Ò-6NY;èD™ øU®·Šu:4r¶âÇJg•'õ›UÜó(?“L ÔgXï`•Á¬Lx~§[|®— j0#^3µ˜ýŽUúûÝTÎüÐD»¨ ƒ‘µ[ÀŽTç° T õ‘ñ‚2Ô„T·2ÞRuª/ø¼dÂìУ¸–9Û;úøSª:OÅ«2>çæîü7R9×0ÒÂJ¡J–Ú6Ö6¨Þó=Vx&NÕ>ºzŠÝ”ÞÁnA)\ÆßVº0jhr®Xì½ZfæM."z{9û”¨î_JÐ gcã¿ûQìOrÀ)ôµåÄVÄ £Û‘ÉÁR@ »‚çé9'šD)²“Yó±¢dy¯Iä™UƒXÙ“©RiÀô|–¾‚ÐˬËëI;®³ÝñŒÈËÿZP-TîÁH`ô<í¬1{Œ’b¾FÀõ.j´²÷\è—ZöØ^‡þØäìfì«·Såä/IÕZÙÂν~xï3d‹ÿÀ=õó}†CTq{õüDQU…‚énÈ€Äñµ礑;çPb…¯RëGñ‚g]#äÛ jçno/áFø“/lÀ©3’?‰Öèí¯zü6®/þ]®úš2´íúPÄî¹›7öEÞ$Æ;u"ÅcgóEΚÊý¾þ/î‹qcúHŠŒYÅ×üVåÅL¨zç«2 +f{[Œ±0Bãàô\ykÈO‹1rÖ†b­Œž¿ü€a·ªð…øs_7ÛOª3_b{Ì«̵Z*Ï~ÇÝ;ˆVo…£†Ü'ŒÑÄוּ…Æ{\¾ŠKš(YÃS¦dOîðx™¢>—HsOZs1¬óûÚ둇@zÚwÀ=zÎmP³ivyDªß6³9ª´§ë žÛ¾æÀô¹ó5†~Aów£Ä7’'trÇÝDŠ§ì·ª3}Ò7Æ Ç>…û£ö=´ Ž›'¶Ž yi\K ÊsA&ŽD~Ê{›·Ã˜IaÂù®¾½ý²VEÂ)×›eúB|9®/ÙŽÆïó…´ß.n ÿ¬åþ1ŒƒÒ!˜ç+&[×}öŠ÷ž'b¢g/sž"±ŠœˆRM%å< +KW1fM[n¸D3„ÎR‹¾ÂæÏt…;“$W¾­˜ ƊЮ™ÃK(:ëPW¶¥·3a/²q÷tñ妳Nãu—fE€)8Í°kSŽNîVi-†è;ˆþžåìBÖC†œçﳼ›`:ª£*îÿN´P-%wœþe°)‰CRÂé¹ß§Âc+9‘¦¾¯+!Êo0 É”N$hŒk¹ (ŸcfÁlRX^‡õ ‡0ûÆ~!;€D–fjÚ m;4ú @¯r{*Ü©òºMºSAÁß®jV|dÿ¬Ê\–Î[!ÆÕÌ°Ù“ =ÐP7· ›—”ß«¨¼H ‘çÝÀ +G±H^.ÿh+î+*àlýñlP¢SÉ-Åo}¶Ý [(¥@¬<¤äLG¦w­7$Ä,¸ûÛÅ9ÕIt”Hß­Ã~–J’ŸPu€—yžÖh)L‚›öõ#û 9:Qæ!ü¸ò×gì¥$Ä„Vùy²¡5ÞüötMI2û¶pˆ¡¾™ÅÒ¤£Ñ¼F±°?Û„»U~'Ž€ë^-\åÉÑ»gœº¾Itcú1nœ!×ó’P%)l=퓈HÊ[µŽ º=C4#@[b_‘æWò’2ƒcÿ µ v*Ê8fú“;úíê$r­~ؤKéU5jÍ«%·†ÚJ8àþäâ½ ívΈ±ô9âü×åB(†\ ìÔYé"¨öí:ŽÖE¥…é½Êd…™¸_YJ)Ùõ2ÓÍ‚| ÍêlKÛË#c­<í%Sa&P”èôï¾m„¦KÊ÷’ñ|—NpòÛ×Â*°ýu?´ÁÚíóZé½$R”SÿyøÌ\6Þ?“ ´pO…(fØY¤®¶¿ÄéÚû<(†bóÅ*± 9à2Ç¢ä¹S3S –×LΫ㱸ߔ žÌß1sºpK©VÂVT\æoObêÆôG255¹' B1Û!V÷dçV¢ÁÚò.[7õ°½‘8ª¦––kNÒzÞ® s?¡(Ñ¿†]CÏrìÂH×e"ý†ÉH¨H%ŒZ|veˆI™É ¤´, ҃ǬŽÏaÕ©Fô}–Àšü…‹ÈkØ­%tÒp€ v ×Ý»ìÍκŒÓzIoŤKJ0"û†¸ö?½æ<¦Í•èì¬ãp-Å ´‰´y{¡:MöüUÍæôÓ¯#`ªÙcô“)ç‹y)+MLECçÍâuÃà’„ÁµGÆ5Qh¸Ãú#£)&‡¬‹‚ˆ+†–è20&èÆŽéTzÒÏâ:„_O€9覅‡ÃøÀzî¦L3X[»ë—(v¬ Ak³ù`t>“knµHçÀ¼W³MU§€L›ý?FFâý·_¹âÖ¿j× Cç@{šb$ #Âb +½2äòMá…: +涡KžAQ‹àÕÕ©×i‡Ž©¬Tñš˜€÷ÅŠ¾½„1b¦¿ú­ŽöÖÝwgJ‰ ¸Ô¿(ý-_æÐ$œ…@Š.5È» º_øÃäGËòT `·1A:GPD®ŽÓÀŠuXS®¸xSÎÿ‰Œ÷‰ÀÂœíÔ:ð$Ý—›µYÒ_1?Hö—$ŒØ–‹ô§¹ÖôÉY]o8å™i:±C÷èh„`µ€-ˆe”©j(Wö`‘¾Þ¯RðųóøQ,”ö±ó wúÝÕ\ÆÚ= •ESd‚Ë1€ ½2@ü#$ƒ埣Òæ—9¥&Üc})k>Ü;’°•cm9àGDIrmEcÚ‡¬òáó-lu%z:˜A ‘,z |Y"•#çÆÕÃðD¦Õ““Â4Þ Ñm2²o|eæQ<û¼š2ª¹3Üf4t+Œ¨Öc»Ñ»´9¦É›Mu Ða^òd¬¨ôJk´£O›Nã6¯{Ö)‰Ì‰ˆ×Ó Q’”üo…ºBm:”R¬ˆš”‘Ðv•ÛD¨f3ó¿q¹‰¤îwyÂ}¤(q²Ðûy\sØ€úFBî6¥ºÂ}¨¢ªârÿ€.çŽ*vÑÉX?ÔïƒÜ®–%òóOÜHä(åøÈÛÊ‘ä<-6ÒUýçÉî@xñ£rbÂÍÓ«CÇéýw»ÜÉrj §ñDYä~—Üø½ ÞçE’ʉýíFŠßîúŽ¤WwÓæ)ügÃýž|…Í ë|t‚†ÛÆ€êŞ׎ï®üßÆ]òä)ÍŒƒØáŸft%sih2¯ª#G£ÁñÍ¥„"Ì€/œÅ`Ad5#¸Æ£¿´a.õé¥e§ûb¬©ò ^ö&XÓ^MÛµëŒÄ@s¾å·¶ªÝTýCtnS ˜"véâ5Ó²†P`ÃÑ +c51Ù;^ezy—ßk þ7Y©AÓr¤Ô¨™¯à%—÷*P }1°ÝDrÚ©ú˵¨°ÍÌÑ4€Ô61NÕ'ÃT¯«qò_‹8zb«þù(D]•‡?•ÑºTjpá÷¼”ήƒ´^ÏÀ†<ˆfœ‘«‡ eÏà$]·{¥Î»í·i˜>„k•AÞ†‰Q¾-9Ï 7ÚÿÙÿß¹F"àùqMü½öŸyÕ¯$uä_:!ý¶KWØïá”f +zùr©=™2&¤4ýª")îÀËm † Ïòê÷± Kë^2‰ x|“WÙˆèOß|r›rXÇbÒc=ù™Ô;JKŠWó-] U bÞCV™§%7îIÒ»ï*Ú±=Õ:îV@Ñî¿zW®ñáÜ2±ªåã!”¶U]ø‰€”ýÖ<À Z«[àZIuó_KþŽ}(6WâÁŽO‘3ÕŸnK’^TÄ›ÃLîk½ u +AÌø2C0ˆ¯5§Œƒñ¸ûoÔ]}‰I(&*㤠½;Ã@ar½’§×@ ž\-@óˆ…Ô|†5J¾ÏZ¬¬Ò¿ÖTÛÆâ¼ .áç1åT–€"aõJk:‘¹bÄ–D»| +endstream +endobj +2233 0 obj << /Length1 1620 -/Length2 17956 +/Length2 18673 /Length3 0 -/Length 19576 +/Length 20293 >> stream %!PS-AdobeFont-1.0: URWPalladioL-Ital 1.05 @@ -27550,7 +32693,7 @@ stream /UnderlinePosition -100 def /UnderlineThickness 50 def end readonly def -/FontName /RUEFYH+URWPalladioL-Ital def +/FontName /LHHPET+URWPalladioL-Ital def /PaintType 0 def /WMode 0 def /FontBBox {-170 -305 1010 941} readonly def @@ -27571,448 +32714,1710 @@ f ¸=0׋kDTOk¤ydÔtãÓÃÔPö{ýVta?lÆx«hÊ»âèp n¬.Â×2V«rœ%v%i§å)à†*–‘¬™ÂDà/çP800…g†œ[Ä'bSÊàJŸŸûäðæzÄ/›·Ò$½“é5r¹ì5¶Ó˜]¥”–0Þø:N™ñiB¼%…Ä¿H)¬ÎeIÄ”€ØhS!ÈOX£Í㤿ìÕO4 ¡¹+¹2­®== `™ŸøÐôæëñ_4ýÙ»N’U–þMÚ“lÆ%ãŽÖõž¼å&°kaŽ÷ ;› ÉБöz–››¶›_vÂùKôt©f1F[Ñ“}á:ßF9>q0(Ñ"ŠÏØÆÅu3TQ á`cìµMŨ”Ø’£<Òó²Ý–ü»°­ýõ¤É¦Ô!¼²pE<"+¼ÚÀnÉLg!:˼ßAb^ïàlO²0®ÿÔÒiS rþû~wÑâÒ˜¥_ú™g@¹“ÇŠŸ…¦‹‚å¼Á‘:nfâ–Í%õ$î77JNÏHIH“VweÊž·%¯.¹ŸÑŒêx ÕãPÓSÓ!·Þ¢49$úÞjúÊ«Ó)@çÖÍ  <¥´Ê,­ÖG—KîÅ5‰ôKlÖfÄ–rÝÙ%LYÚï7“? ™TÄ.¡6¥?~ F 4R¬£u’–4] AÄ%”Jïë ;°âGù?è›òLb?çþãiôµØ¯(á4pÎh@©0…›1·ÞSUœ?>œÛœ–mÚÁ l QÏaêlM¯6’YŠi^¨GY{G`Rse­ JM®×2ÃŽÔâK^Œì~<™i¯§÷ά™¢ Ê5s®É7‘ä~®û÷µ%,tÅ{w[+¥a£¥:èžó‰ªª˜Jlºž““Ú,¯ä[B>Ô=<½ï&N3ËQ&aÚWY†‹•Y|ˆÔÍÈï˜^ŒhfÑÌ£×Ù0ñùè:«0:ÊA!éIeñθ û%ìü¯çÕ…övŠ¤ßÎÞüö¶zl u¯3km%Cñn庞xãotÐ0(sýU$o#j;ËtLïÀ@N™x¥o3lNùìžw¦ÄÞzšô–Tê3(RØ¡ÂàÓ°w‚ỄD&;¢¡ùþÆC¶[óÂáÕxòœe™(÷Ú®ŸCwÛ[ôšH€wï32€Â|xÏ¥“mk”Z.·ÚºÙ«ÈñMÁˆSš‘é$‚>½“ „iF²kÝ!´:¨Ÿo¨¡ðj8½U4afs74?i79 b‰}ƒ4{Qù•NÞÃUׄiÂN5š¹ƒ‚ºÎ ž.ã Á^‹›ÿƒü ¢|úžíõˆLæÇ¥À0³œµì‘kv °z­Oxu3qñ5¡ƒ„fÀšV€!Q'z#Äaœ9$ÑÒ> ü-šeGÐHu„‹4‰Ñ¥U*—Ò"U†ÐøÍÒP" ^É÷.EåðbÿÃ&uB%´ÒªÒŸ[æï˜×È®äfòJ)SC®È"âÏ‘UCö£rí{ÿ½{<9ʼn7 ‹º[3SÚANSí §çÐOø€ã̓ÒaW††[9¦£Ú­07L ”1m‚þ¸µ>)ñüÕ×öµ\в•n+tmþ; UÇ~ŠÉ•:5xÁq>à‰V£b¥dSA陼æB\j gf—CX6_:Æ` ’Í÷o3U6\<(L÷ÈÊT±oÉ´îóÍê7oDDÈ‚LD“˜ŽŒ™‡ÐÞŸ8D}T ¤‚öJЛjœÖ&™1vW `ËúçFE2F¿Ó8ÒEï>Å×Ãþw6«(ô¼Ó™ô}ƒ0ÆÉOöûM@×9àùôI»² qS¾¯ð Ik&\}R»jâmòB´ë=Ûk-óÀw-½7õ7l d–w¢p7BNúDÆ9h~QŠ)¥’•Ûuåå˜,í&¤O1+Ûk›‚ÛRJWþ\0 oArœ@Ê‹þ«›P`ˆÛ<ùœxa:ö T éªÝÎ6yd·±iªg>·‚âuí¿àÄ!º¢ûúÖ(©þEÙŸB:‰û¶î™­΂«OŽ}€ mÆi”Zè†a§Nv‚d ¿Î²i²·D·qBüÊ5šõíÈŒ[$œXÎÕÆËÏc´ý"CäXL„š›ó‰´Íoõø6o6ûv®çÊªí¾˜ÆŸ‰ççtÏÇQ_søg*ë¦Vå´cdžŽJ…7ÓØSÙÔ•koÖ»° üÅÇ g»XgôLÂ/­þ†oX6  -;æ çw$‚’¥n^ »3@|V­c@Ù¦ $þràîHÓ¯N­'õ|ÀøRÖ5O©´@ÆÝ WóQ×HXbÄ@w@_ðW‡ ‰‹wþƒëËMÚ‚ƅO÷ÍáºqŒÑ¡SÊg@Š–ÝÂ…ØßüQ × Ÿ}c’*Ͷã~µV‚m¡QB"]|5ÿ*|P58ùëd»)Ú$Üx¯Øy½vÒ:÷æ2 ÇP6<Í|»ïë™ÿAž;G'ß4oå_ÁÓy“¶ýË5 îÏ-D+ÀäpxpÿràZ8;IK\ênNy;•&v.IˆëŒÖq3fx^¨ lá-N¯j]q¼úïÒldÒ2`*AÕÂÔö½ç$™jm”z„§p´—X„5¦Úfªc¹I| ö²Ù‘Å\®=+\xѼ<÷’¯ýg—Ó -BUù9º¥l˜>Ð;ñR;š`EƒžškÔœ'^—ȉïHC¼NJÝû¯ŒÊ¤9ëlV4ù”¼‡f¤þý3n”dt/µ ˜é>¹§cñº3†àQ»ÁÇû‹\¾=)Z2Ê%Ü·ÀzŸÔ—<©“l|ëÕ^µnBPø¿‘‰Žý$r± ë1ôåŒÇ`º÷>Þ– {ò®èfMjÒ)Ónw`’"ì‹:œFî¦sóŽ®\†/n½tµM¼Œx¸~]¶,†‘íbÐmšÐC¦­²£ýÞ¡Èbɽâ±R„+øþE#^?Í Ú­ÀÖߺúYËæãXÏù›\2xŇÄ<ÕŽAeo{5.$‰`ý ]òÀ€Ðµ¨sv_¬T*Ï;ɈidbMÍ“dБûn2,Q+|΃f%Ôz%žzâ¡"}Ü’=)ßâÚl)«ó˜ÉLÌl–èØ Ý{DliTþ\CÁ-¤¢½‡éœwYãZj/úîT⌠qÏ©g+ßSVtf8ÍÙýhãÝ­ŸSí! ºY¸‡Ÿ÷.£za§ßåNíÃõkÚ–:K -ibh -¥Î¹œ×_GÂOB·¦Y(u÷ùJtLgöÀ"©¦LS‰œ="ÅV–JÌ{<ÞÔ Å s¡…s‘Ô>P]âýß©áZ¾UÈýÌÅЯ¿lÝØ}3R„!íkuòSÔ¦Ì0äÏ÷¢MìO†]ó"®uÑ¥úR‡kjÈ6Ã]€ÝîA½uƲÇÿ .mH§g8‰µ\àzÛ•ú•Ü8+li{Æsölt®—¬³i“~#“o~hêl¿Þoâ¢z„Då®èçð/Ù{ícvðöj¶oƒÈÐ(Ó³„3ÍÙj^éaH -C§Ü¶L|/úe‹²¬pØã‚>5‡A‰¥4¨Øú¤þGä²&‚&›}9;‹GEVæàE…´ÚTê‰)ºÓ‚OÍjú -Öɮ߫1Nµ½w-H­ä5&‡7¾’¶ëÑ­ä­›zÒÌÍ(– éðC%ÇT`Ÿ\]ºSr*úÃ]…=‡³?Ý"-ƒY!•ƒšqRsá¡¡í4Í©lb_A.Ñ3é¼9÷ OÈtE9o¯lh'ø ) ¨‰4Ž2èSfÅ–9rÉ@‹M°N F{5µÉ<3™¼üÔî»ðrÀ[°R…7í¶ì™Ä™}L@¡/*”Ù5^ÌökÆÀGúª¢aÆE?.¨Á1 W<Â8Î#©Z×Ê›Œ^~P`}‹ÁPâ”Úùò‘"†Ód´±Á’̯ËÿàÉ…el3žƒj”wòš„¸ÒŽ–p®V_ù:v'©{–1eéÚ³YaHüŒ.áÖèTõצÛh2–hš í o*´™ó˜ø-a¯¹Òl 6Ó,ÇNÖ7𻙶‰Ùlç=Ôb Ñ<~sÒy’ã·­_ÛƒöíÆݲÐßg^ª¿B>ª'äMžjàšGt³mAŠ=÷ü¡VÔoÊܪ²£vLWÓc‚#ŸS%Ò­.–^xÙ5Ï“¨žŸ oÆ `;nbBš‚ºê†Ö‹µ=ZõØŽ27ž,á7Ÿ6ÛRP˲ÌÌjQò‡bž»¢ºá6½0™þœ8¿Jo©§s¥%8 aN>sàú¾$ÎI|T&Ó¤=s>¶ ±MBÒæÍ"Ø¢ìä§ÑO¡æD i\g,ɦ-2û{W‹»ÌäßضæËÇÑÐö á”'û°Uȼ¦’økUxbùm„Æ¡»2PJIL"!)\ŠCXqEë’‹È¢b–üêî†xnñ¿pÀb¤VC‚NG…wB³sF8Ï^H5”è¦ •)&‚ˆ¸$AÅk >¢jÀLÃø›M¼0'³Ê™ð@:.è!îw!qÖ#q*èc?•©ê¡?I¦oÂñw–ýýy"9º7ˆžõ§ÐÝ%ø9’fy³ÆîÜÁ†yUôóØ3}Dý?¹„EkêÕ@ÌøÚB÷”.Ðá© -åLY/#K]¿Ö¥"—¾‡zýH;z.EÕË° j’Xöžù.:Ⱦ#¾mÒ³bÊŽxi£ˆÀðk%®hM¾BîVèõѤ-@ÌûE¯ÛÙ¹õã°­瘲(u8Ø‹$tVŒ]ä1Vœ -bhM¿$,ÐÇ‹´X:ù,2Cæóe³â±|¼»YJ3¬ ô\†¼åÄÖ.9 ýŒðó`†™¢úÝ>.ÐÍ2qˆoò²%8¡8)BY&ózR¡z™¾…R¤òÇ©9×É»öÄFøZ¦ð÷SÅ•)Ì°ÄÉÔÎ/l?%BîýÌ)X£ÛôžòtÆëìËRYû -oøï<ðÎ vàï?ƒX8ñd|”¦G|ºíwÂm’¦ ÍëÛ£”¹Ú(ê|´˜1ú‘uWJDó[¤0T!!¨8‰Æ¹¶)5ó¤-îáDÆÅò·ñˆ¡;~)ŠÀ1w"Ã9ö¯óí­Éº`s+8Òƒq­Ñ>&<%I$ð•/ÓaòE“)ã¤Ѹޭ)¬6béX].Žr8vŒƒ Ð•hˆÍ"ŸX ûEÅIÄ8]û½p´­˜\ÿ‡¾Š")fD-aŸx¢¾äP 4‹ìMœ3G£øh ‰»ë•$Ž³ôóá¸Üî1:øÓÍ(•2iNLÝBù6‡w„½€ôw¬ -J4¯a_¸— Ãx µ®•”_ßO”îTL–ØsJ8€h‰®¬ÅB7þPæLâ›ÈêØÏù¿kÝKv]ôê›VH=öëuKµÐèï´°O›2DþGí©ÜÉ¿*”šˆ“7-’o>g®Â¼ÄÞ°¦…”ÄbÙz,»?UÈûäp¸ PR‹^±¥Ëø–p_*„שjaº»Ò¨jº0`÷zBí*c3Æé³çeã¼Nzܦ±À½U¬6 ,Ò&XUééÕ ÈÈpYßøÚ ,jHÉcº¤¤ýó’ûs¬3$xç@ ‚ôb“˨?T!=]B* ‡Ì9+ÞÐyïªôsû½€Æ'ž>ˆö‹ ýÔÇ@³³*ÅZ »ÞXM•²®éÒ$‹ -¦I,ŘÙ†úmà5 áŽÐ;+`3&¼Ó„’ÀŒ0D!’éBœâQ˜²…#^(0¼Q„$÷lH’®&bqØóy_gC¯ÇÆ(°|»}‰‰ _îK®TSHݦݣTxUßæPœ0ì¿E>Bçúb°(«÷*¬vš8<Û‡O}$’˜vHŠúŸ{~ÅhR^†àA¦ÞÙGùÇë„NR¨K éa”«8" -FWG†RÝRfAbú°Ÿ †écYî#ÇæxÏ«‹©‘½óE v¯}SŒê®ê{ôbçWZ„áS}œ¿ žD¹¤…˜g:ä__·­Û+®Ï“©Ÿ®ZƒEu²¼&¤>Íœ‰²FZ¿Õ34× gþè+¯ÝKŽ; àc6Æ¢O·0|Fú"Ð$«¤èç fCÜûÃä -‡´¬u¼Ë¯i†@Æ úÕzY{=×Ú—Y -$´–‚G…gº%ÃfÜäÜ~j¤ø$ÝW,;|}ìàôŽ3Y;ýs‰‘Áö`ŨƆôä£ò—#‚D3«ŸCB¿0 5ö½åe†W½ytý1…Ò,òì)5œ×T›­bMšb?*½†²šXɸñX_-©¢@g{ƒ®ØGŽšÏ‘3ü§¢lGÏ>ƒ!üÁü³"^0¡4Dv¦¸%~Ô‹·Íðú§ÈÌïÿ†Ö0cV¦E.c˜Ì € ËКšU2†T²ë;¡n™YC÷Kð>¨¿ê-WÀrÎàÒá{æù®ÛJ «´s' -\ªyS|Cþ29‡%y˜ò­ÛÞYH9dM)Á¹ Ÿrñ —™ãøX“âÑÆ­DKý¶Jœ¶îA/TÞu³ì‰®²–ÙñV®9=™h³ˆØdÆ•´;vÈÖT†8JIfJÓ­·Wph…$à´Ó^QÖ@ÇAÅõM6 ÎYRA¼¹ µJ¥pŽOx5‘ÊYS£6{£Â¶:æedÀ5JA‡^:ÜPôFW“AQ0:3¹ôZeÚeü_›D-ä¦>b³é±™IéàC)0ÏÃNôÿ¨çÈõ\£iÝøH/ø%¶Ò90ÍjÔZ0oï¶lÞ‘±×îX„jÀžëY|6>1\ÕiªD€ÛO+¸¹œºÞgÊ7P‡C¯ °/)4E›Ñœ¿q3.=®JÑfL¤ˆÄîíh£KìŒîȃ¥5F¿µ·ÈyÑ$`J¥Ÿ—* *D¹H‚fu –»e½õ¹ -Ø•¦ù´ê ×þ G­ŽxIÅ@Lèf¤Â·mé¾Ðýø>ßÉp‹A’x$Ô÷ L£k¯óuVSmF,^™@nR9áT€ÿ¥zžYøðA¯@®'1¤×Hr›Ãä¢p‚2gܯpXé¸ìMMî0ŠÂœ®Ã¾3iÆ4žm ›¡«'qŠr¹ª™±&…‡öý*i…mãy©p€©ÌA’h` ‰LœeVV}£Zžõ+ñRÙ¡ÃÌÔ[ÓmÍ®tî?ÇŒœ‹µ5Y¿óAb{ªïû™×Žò9ž†* û%Ä.V SäÂ}B4ÎAcƒZÚPþ¡:}Çú¶—¦ðz©_BíFH9­²—½]5Ùáf'Ý®¿ôôé T¹ÒC»€â™žGâÿq¿ÉÕàAhªÏú]‰õ=îá¥sòÕ+ãÖ³¡»Ùê’ãÝÊXzdPÑÍ“‘B¸m! §Ândí ëôš9½K"_z æ…Á  ‚'j»åòØWQ¥¹WZÌOöÀÌî6ü{õe~–ÜCŸ1Þîz¶¾'<ïô¥“S ùrF,ëkiׯv…-Q7ó;8:nÅÍö¦ ‰újíÍš ˆHã6(PŸÃú9Q™úì =…:›Ýßzóòb´3%j˜‚†€CAêeÔA7È'›pÂN¶÷*=εŒÞ¨Ž…g/Y‡‰Ñî ™ñ‘2icEí“Y*læ 5L-Q¢2T¯"R…‹1‹&†êñ0‚_:þv`†•Ïéz³Æ¯Èõ»‚ï+:ÐÜÜÑÜç€U€z:Ž‚(P‡Øgé5´?î)¦ºøùWbý?õ­ÁŠ¤Å¼JÏY<¨>X–´”wv×O²ì^eâ¥@C+—Íi÷g¦zóºà,“§i’ë&æ ±ëëztvØ1ÒÀ8?9=‹ù?ü‰5~6qòÑñכĊw‹¸¤|‹M3Ÿ8u ¿J®Ã‘CØ\Ó,”Ÿ=–éÙ]o7Ò½éáŒJIî{¼‚Øë¦k¥88áìÌ‘bÖŠÀ1œ’5}ú]Úç®KO1ƒ9ƒÚw¯ l1…‘ÀìªcÃÓ¢g@fÛ­œ<þð…£Û$¹Ý–¡5Þ$´ë–ûk8nÆÈ$à¤Ê¶.§l¼ØÛÚÃJÃgh)™e|TÝÄç6Ì¢*Å46¯ôâ™7?­kýöj=Ê< *i’¿¬  ¦k†:jé^X[þš‘ù>íÒ¥€§¤4ʴПg+ÍMö×YÓ ·- -Hì$(¨BÀMPä.Td¥'÷"²¯&¸Ó%Ÿ–Û;¨2±\ßl©Ú xÁ|©©µ\ûH”TŠZFEŸ×V0ã«Ú³±æéb>‡ß«=ʯ ^8^äòãõþs·Ñããì8õ<¬“ÜýCûVˆÓk ¹„"¹JeÇÚœ„bå¡ ,éGÙW¾¯+ýÁ“OoEŒ}µ«ïJØnŸºå>*ÿÝ/÷Á¥Rµø,PÿÎ@¯Ñcss•6K&Á‰#Wqì……èê5©À¬ŽÃ=!º©à¼8ÆÅ>§–eªÁÉËT{3ê³ÎRÿ€LI"gl„d(Ûu¨E‰¾y¦nl)¬åiqÃ"k<½F‡-§ät‹¼‘7žã0¨…ý_Ë©ú [Ü ¢Z&ãßO¿ŸgÈz{a¯ô˜}>v¼Qoþ»~™“[Øxë£l¾Óû1Íu®ÌË{þîß6N¡ª“Þ‹9·|0”¦tá6l-“ÝtðæܦŒKgÑQfŒørÿŸW¯ÆÙC%{¸Êÿ*±9×uã®$Œ7p©ÅÐòÏyCõþ)ØÆ>í·dQp¦¦GŠæ-´½ fÊ_g¹¸;q0s`Ãý_Óç°‰˜q¸êIŠQäpâbªt‡/H쮺۷ïG®¾Ay¸¤›äøþº°:LSCÅû@êÆ?õYÁKR˜.ézr[}fK³RZÿ']à‘l‚ýn‘iX±B6ªó¥ÉSþ\,Z'·–'Åq¶^‹ ÒiV25 "‚?âD˜]Ã(Ìçâ™È—&?…¦AW#7š â&C€ñ5ÿ¼Ëÿ‘›!º£x˜Ÿ¢+˜zdÐÌ1éWW6øŸHáÙßô8ïŽ ^Ó¿:aH÷úpìöºRK%ŒËü‡‘¡›ÜOïºwLys¦ëC¿¹#sœ©?à@¢lt<”^ˆf"™oò…øëMPÿ²Ó¯ÒðGíwÚSlú÷Ž`ŠôÖ†!zËiC(fÔÁB’ú){™ŒV¾¼ê ¸ „Æ“JYÝøϯJ{8kþ£‹éš¤‹Å7ú¹”õÒhÉ+g©ºDó„‹2TŒwqœøy0?¶÷ú£|q毨¤ÙßJ›Îôc#½­Ž[QT3âÓ—„øÎÀ¶“ äÑŽŒÀšOU¼'QÃåõGâ¾Ií{EX¿_ú7áþ‹Š{%=̨Te‚8¿«Y¶_í„£€$k‘ÚŒ–7Œ«ûïÙSW›Ú~öåe€ÓTÞä•Ñ‰ŸôÔQrQL5n Õî-«ÖÿQ){ôØ©ó·O€‚Ý’‚»ÎhuÚíûutIƒÏ\ƒûûQ»ïENé‡ÞÝs€ØPIA;M^45n …’C õŽ¶Ê³­Xà˜™¼^³ûçšé|Û¥6í¸»ø:LíÀ¿Ïqgœà+ˆðt(Ð;gg;L‰žB»¶=Ö{’w;I/£0ëÔYL}$Ðz6ÂP=ÏÆk`i¶œ}yüø$,rãî þhÅâÖmé̺¯oPzýN\¼ú‰ªÄŒ`C²Ã†íX3¥ƒÜ­îÑY±eTÈÁ1ú¤&3ïx¥W·Ã2û*Ò,Dep˜À\½ÊŠh"\,t›6ÞÑýYØ}ñ9cÚëqFS½'#•ÙÎ-¸)‰)]æ;7)–IUs -øb˜w‚ÏÛÃmöPñoê?ñª¦æ×ÉÌŸ‹òVÛO!¦­_îÏ·­BêµëöѤó›ÕOL b'ê]Ý4öHÈD³±6ÿ) Ò< ärÂaÛìÝAEAF)¦€†êœið®@Š#‘c¢s‚ZöË];^%#ýøhÜiêv’Ú¥±5ý Pu^ÂrÞ™¬t,­tg®ìÀI{û¡¸ü‹h¹®?h‘ÀðCðXá_U!¦ïê2åhg¶›®ò ¨K†ékïöõn7 -åq±søBÂõð—\ºÇߘàçPM²"öDXœ%žÝ…¹C½ÏÜÕ z]œ4˜–³à(ÂöK -©Þ1K!0ÐíoZ¯{ö–bÂ0¯q:ÛË&^/†@ck5 4×Jj%—L^ m£ÎÎj—W{?ÎÆuš†õ‚W꣖ùh£¸·M$3[¿åCdìse¸wJ]$$´&ºÛ€{pyü5-žïUwU_Þ_¬:¯×«ÜèŠâã£ò¨¬š$¦Áööó4–¬ëNs™0S7Í™e×Dnf#›Kgð°±eÝ*ç:xÖÃ[ãÓ ^Ø2ÖWw¸/Õ’ -¼×LògÅÎÓ_ˆl•$6û/ŹkM½ãy^#ÆÝÞ1G!|&0ä "—­–”0Ž! m!uPRMwÔ7TÁ˜îåoC9ð¯GÙ*ŒÄ ôBè“ë㉿š[‚KmI“îA^˜˜…V‘žµöt78:éÕ¢ë•Ö³cÖÍŠ_ƒ·µ`ÜS NA Gmõ\ÿ6xËYaøìûeÎç’½Gœ¼aH•f'T%™]äd–ߟœñ1¬¹íAœwY¾N³lzHÊY7ÏÛËsT×ø˜Œ„›à×c).†.-Fo¿£÷"‘(Û-ìȧ¾à2zÒ)ÚÙVºô¿á®…YsR—;]9©R}ü¥O¥A¸Ž6Ù®uÉ—èñÂÞ•ÅBˆ'•ëñ¾áwS±xÞô;¹û3¹ƒ”¬üAñ£iïc½åll|­|îCêšj`ä=N‘Ë…¼ã°Õ{_TqÓÂJ]¦òò>¿ä#eѵxö½­U×Χq¿'’U.—Œè”N˜óT®~Fò¤áüIðu±¯Räæ -qÒ2פíå'f±+y3ô¯ÿŒâL¯e°Ð9ùÞuÌJ¦³îUßÊà\öŸ† €Rkv‹;…KßÄIíÚ´4¿9°5=>ÄKІpCS>F›ÉTC9B÷¾a0æóüñ× aà0wÒ¡¾Z¦áÔ]ô$¾mÙ=„Ž5IÅY@3¾Ö,º¸l¢té³ü$^hˆMÜ瘑dü?¹‚‹ß«•S´É±tºÐ§Ïê©dÔ4QP\.:ìå(zõ8Οèý“Ð ¯Ô‡½eF22L1fuˆ:×#Ê|Cr;ÿ»×_:«Â5òµ–2Œ¿1Q‹Nì-+™‡³ŠÒˆzZ?o‘†±r …̳Ò ã €ËDc_à-~ìk?Š3,­â¡Ð}ã¬3J5%›w™a˜!¬÷>˜ÃdR÷-öÞ¥StÜù íÅáJ§4ߌG[éÆ¥O‰_MÆiG¢C̾õO9€]‰Îç4™†‹TUOëÃcoÈŽ®±wˆCªwqAÇX(V»=]Ívjào<SHWiôŠq,üuxº ò #7´èOg°¬hn—»þfI.DÒ9½f²éÌÈ.o6£üSß¾Û;àÈÞP²®—BÑØMÁÛ†mÚíC'€0 çp¿ Ã>pÀ7©{kôögg¸þyQ±žÐlôAuØvõÅ%Ó‰–»àwjpA`I诬j7[£éÓ¯ô'¯‡~1Ó³ù‹ˆˆ[gп~ñ*cS½õm0¦d3ËB -mº2òX!lN×7ªîeå†wú\ÁÛ¢“S\2²D?0éÓÔAk òpåí·I“ˆpI§uqˆÖÊÿ:)¾)ü }.ÐŽæx—ü€tæòúLºKöi‘(þ²l|žI¦Ò¤”0€'U"j±—Q1ÓúpÌ …Äà H`¡’bšÞrJ!Õ€+´Àr »t·ægßêÚ ß{6öJHƒp“d”¸t žì— þ:ñö°ä^Kãâ|JìÝm ÆCCþBíxäÓ¨ýN$«þ$P–z1äþÎòv=|/â{? -æ#yUm¢|SÅÒW ²Í¹ÂÕ]^&ºØb{|¦H¬Ó* &x³¥/Ê¥v*KÅ unšHÉz)ß&âkYC,6Šp Ê`QŽdõmÉDögd]ÉËvî0|"³±ÓÄd$Ž‹ÿôT]…ºÊl 1ØGLe–ÖóåL§|ÿ]+r=ãŠ)ötëÑ¢˜ªßŠqàËw­z)ÜÚ9ºÑölDÏa>[æ 6ˆÐ´ãµp9ûˆ»ôÍu÷n~ ;“ÓjêZöú­wáÛ^Âêþ ˆÔN§£Ç¸i÷0±¢'Õp™°#=cdÄÙ[—äsñ+Üà‹Y8T5´@hÑi§žâ§ÈIò€Ç¿F|¢*òóÍÌÜ D#`Á䇰sTAH5_€ŠÛàEBÛØŒ®2!ü'¼I5¶''³Â¥`Cgzõ£×«ø À1jËi°0úÔŒ»êA<=LÖAä«f -Ñ<ÝlxËVóE"“³°”mG;á50xX÷»&£5#gfâ‚T0Ù¼cK¯½.ûäL²kô»A~ÑU=/õ%ÒNÕ^-”¥@¢›Œü\¡Øbläv²’þ§&“ 7ÌC‹f Ivš‡o~öš$oSΖ¤h4x˜ê«GÅ".b'þC ‘4hüÇùˆ®î.U÷å/Ø8æbÊݺ2%duR#‡NWK–,nç¹2<ÕuÌI×щb£µ‘ëBbÃjÝ=ÐÁµ‰‘}¶°Š­¯>\~&ÙhûxQüž­tË_AHF¼ñv èêÁÄ6®7rÈ*Àí€ê²•d¥ÚiöÁaÅJösPµ;lL¾ž¼vÏ9 ¦ï•Å‡d4@Ûáf·¯8éu¾<Â`ôùüpÃÕÄ„¾Ü£%AŠ>bѨ…c>ö7ùXÎwÂnëìÈ‚Y@cJÍä`z ‰|Ë˳…Xšò –<=þ?Öj©uw鎎’»ì2¥éBõg0$×+ž âEËí ámòlÓ¢MÒrŸd7R…¬h>팿O˜%PD¦Âö6‡‚‰~e;WÐâ—Iÿíâ# `õuª¨²sí.aCÃq¸W­‡+¿ ¢„ñ€á3xþ¹ài¡­¶@Ìe¼ %L-)a…½Ï²nêùó’dÙ¸1YPéðÝЪ«¨¶¾ÉKÞ¤Ô–zT¼ì¶} ©œ^ÚS²–+Ù+ƒ:‚Ô–8 V¿oʯ ã­#œ0›Lïõ¦"¢—xGµgmØŸØò¸ÿE&Ò4ÜÚ™Yu>Æ»v²ôm)bg¶ò/¨ŽÂJºŠ1ÃØHÒŠ‘ù²âè~™²f4!µzbVÌŒ·øXgZ:Ö澧:óc…TPœBrq5-¢º|Ôàru¤`vZ|„.’‘<&5„—R^LÊ(^¶(½ú/ÇÜÜû5ŽÇÖîNU¿)7—y0€ü¼¶äR…îf’qOw‘4WŒ£‚Ò†ŸÏ¯ŸÆÙ|–Ÿþ„·±»”Gk/V“»càïtt+vΗ$F÷ ÷>DîÃiÆ¢’VžêŠlÀU(æ/ùý4̶Ýg4!t¹Ùæ[MfðWW2&<Ä-á Í; £A·OÉRqÛ8`0ìÅ_0(¼§mîŽp³ß^’tŒLÖ?èZõó&ÓÎÃóšî!ô… ˜bã{Îö†š5½A y›«©¾bšDÚwÚ¶ZwõÎQ2y4¼¶ Óˆ+µO‚æ|€šªBh³1Íhy‘´ÂôØ©9²ö伫üòW;Æ£âuêáªmïqÆÏ1‚5i¦þ¹{ë\½‚ÎÁÉ{XšËØ¿zIÎz€²¤¾¶öÛÿ‚ ñÙ3P…Aõ5 úË_£ÚxêÀ÷Ò׸µÊ¥š‡¡Q…ùÿ³”‹)¢÷épY7æj™Î¬BÈ»¼ÇQTÿìÓvyžì+à_mk—ÁJâ›ÿ¡Ã®wRš€#Rî¹·§£U ŒWäů­ %û -0¥ÓÙŽ÷Å÷N&Ç¡¹4#päF¢è]Ö@/P¦ -}79B8cîÆŠ -V >Ÿ&²V±~ Hà eÔJò³šc9ú"â“‹­¸ïHZVK¼Ä-!0,+\a§¤†´"ç·pÙövšpáW8j|X3$œÄ×D‡_<Â7è‰lÇMxÅ]æ£D/6b5Cvf «Ê£¬|îMÉÒd°ìÐÒ½ÄyóîÖ[ç9d!uRnb Ù죊*›^O:¼³xE¹•{sŠDcÑlÅ«ÈÅú&×Yæ3Û²Ÿ.Œ¡Gösuß!1÷³ùIïrcìüÁ8ïf DÁE]P¹ÒØš©ljüœg¿SCºàcé¨2çÛ‹ä;%âÆ/ìõÛ8 ÁÔEçáHk’¬°ló§ÙýíŽôˆ^§,ÙJó©°X»ÎlÈæ&bnÕóŠW‡‡z¬‡,ÇhX« èÜG-iXwÔ‰­t †Ú°FyãúÎ6ŒŸã—õ-˜³èG)qêâ,C `µöœ :ä1*?¸Ûšh‹MM2ò÷ÀdöY"¿¬éJ˜cˆkÛÑ[*¬mÅá‚dGÎó7ã¹Q˜†É2B®UP†e}P[ºŠG|q¦XÃŽ{ż潴ñíó¹ÃÖU°\uïÉ$Ô§tvÃë¨ïÆéqËv\z°ÒØè ‡c­sõŽH¬e~M)àu~¬RÍç ¸]ÕÍ i°Ólø)âÈûî½oxÁ#§+MõƒÍÀ± kLO€Ãè^ô]±eÜqy7wo Å‰ë¿Ñ¶h5Ó1¨âi D 塤îõÅF4ÍÌ«Ç!dòÖÙ #ÆâXI¦ -qçyUûN¼ e M*%€Ø9LSÎKö¡KŸ1Þ×ñM{á¦qÎØè‚Ÿµ Y€¾@ùª>Šµ?êÿq[mR^|ŸöŸè½)äŠïy»rƒÎxàä’ðh1‚V§õ÷A-šUÁÒÂù…~ëª ôê}f~Oš¿y(糺pÚülHYvp-WÄB4ËÊzŠ1ÉÎ÷øpßP.¿…¨® رo…vLü¬{‚ƒ‡ÌµXv~dh…Ð*DzÚY£jö< ìúkñ~Ñ´öR\ƒ%§µê¹µªD‘P57xH·?Æ‘žr2í°Ìræ®g<0µðWiXEBz͸h-´8û| ×aÔ8+‰ŸV÷Ô‡+*‰Þ完•½°¢cA_v?É+-#ª|Ú‡ï×pÁ¼Ö~ÖêóB¾FÐ\Úšã麘d)$“¯…ŽÚª»G«4QV‚`†Š’5ž¼/"¬[z›°Î‘—XÁ¨Süaïõ#ëØÇÎj˜ñmR=aò.‡ ì`!ªxE»ÃVl‹{ÑôdР借\¯˜„3e¿I öß÷„uå"mŒ£©h²e3jn[E˜ÜÇ–N50ÅE¢Þ»n¬¯’jÎJ×-ÈPﶸc{™‹â)n?oj¥‚5o<Ê(G›sŠààô¼™âDÔõW“®‰Ü+P’J¡†³ ìÄq¾Ð²<RV={t¨ Ú‡(¡ÿº.Æ}·n7ĉ Î ö‡KÑ`* š7²k‰/²È™­¾<ŬÌÞœ20²–u -:öcžž£p¿4©Ü°û ßSò’¬Øj;;)º~„?Á7$SÈÎ2õ`.õÕ5>Þü)ÕîðàÉP›â×Î ßVQ³cÂõŸï· - JEô/Ùâxµf?“ÇÇîÚ8Öª9Ÿb*t9憴Sóƒ±Ą́2‘NîP@=i•Ò¼2$@sƒÉM0xÀÑg‹˜^[2PÔi:Ëø‘…®8EÏzF¬Ç,~T‡Øí±; -}Œ)•¸Ï -Y”ŸÁýUczƒšZ uô#ñÜl‚˜Ohï -†0:ƒäålø=ºB2‘¦&«ÙùéjÙØÙ¼÷ZŽSÓ\iÖ$åÖE£Ñt>¸æP4äO† hP±Wç-Í1Ä]­¼·µÀ{†ÊpÌ檭㽲á0íT -²´Ö—ör±Ó„7ÊP“KNfÞÔõÓcñ:-Þù•#ŒÊí`F øÔ{I‡˜ï-ó+ŒIÜ›“hðlkêåÇþ.þ‘Ħx±ÓÞÖX_WpYþ! —M'űÖCÇÙ3âw:ÿ@9!7¥Ë%0®=mÓ%7âJÍåÔÅ1Û_h(;TYòÏApËw´ß#˱ì?\Hâ9ÙḼu2 -!ÚªXöïl­°ª®s?2>¢~Ȩ¼è•ˆ¬IŸ'÷⤢ϱ‚Äðüeò -]X»%ë&Øûcg›8ò³¯À}¸Ü$ë…‘ÉYÕÆ -endstream -endobj -1891 0 obj +~Æ8“mIÉ–ðé&¡+«Q*¸X]œs´Ÿ/¥¥¦µà×üh!à#ÄšÝ;¹]cö#õo ò™6B%ëÌ·™¿¨ôO1žÅ¾º$˜-—×MȈAµ˜ŠçŒ²i6N‚Õ¤ÝYvBo¾_€â:RÈç_Ñ!Sïu  Ã?»6³Œèa–¨ƒˆ)ªrçÚ³QU1H] ë4ä÷Œ¿é4²–À#‰"mWSñSªÄØRmQ·.ôÓ€"TX“±LÑ(`ù1p=¶›ì©NóW8átýXþuºã6>’•Ûuåå˜,í&¤O1+Ûk›‚ÛRJWþ\0 oArœ@Ê‹þ«›P`ˆÛ<ùœxa:ö T éªÝÎ6yd·±iªg>·‚âuí¿àÄ!º¢ûúÖ(©þEÙŸB:‰û¶î™­΂«OŽ}€ mÆi”Zè†a§Nv‚d ¿Î²i²·D·qBüÊ5šõíÈŒ[$œXÎÕÆËÏc´ý"CäXHY’Û(¾Q^Ûz~|˜x1¾ó"dVêZx\Ý2nXþ%¦I ™ +ˆ‘¾_jøc‡9…bâæã‚ã~Ðîªq‰ÓáÉñ…JŽ?O~Ý¡8RÃÒ{Ûa{ ·ýWA<ª8[¸]u˜/ÿ»S{!VÀh×¾m¯áóÒÆÂb8@÷Bn]'5¦Òþ—¸ï6«½póÌošìïd4¿¶õusñU¬T0mÉþ¤tü]XÂÀ9¦{ƒk2u~Q«½6Λ@Ö0$_b5û3!ËkÇÓSHÂA:¯“³ÀÆ .Í/ßæ€Ä¿éãäðLR&i9±€6^ËÀ”Šç3õz–y°A1¹»ÎQæ +\s†}Ö˹Óz +YÞÈ£Ô_ÛY‹ +XÏá ÑÓª¸Õ&*õl@<éZH~ŠBÏõc¬]67Vw8¤j/¹L‹nƒ{sÞ$Ékü|h`z€ +ïùíëÂe¡^lî‘áö÷/’¶‘W84ó ×*¡Ös_#í+ö: ÑKt3Õ*Œ¡ëÄès‡¬âôÓ Nò ³••eŠ´)îõ5o|ÍäòýógZXЩ= +bnna„ø¡x)JÄƳHÓÞ¤»áÑœIãv?Ú¢F‚Å•W¡ãe{ ¿:Ôùuбãçžs7«—²ŒæAôV˜5ÁWªÞv>Á”å€"è<@ö¿D&ODTè-Ê ·LƒÌ/d ¨o)OWÆôS«­sx¨Ê`‘õê|¦IN–µßÉ¥—‘…©¾ä˨hÓR­Á +VNoÕ2-ì9ÐÝs®?&p'3¤Nµ³³…ÝLîçLöÇ4ZXËÖ+µYŒ’P¢ÂjNæ'.Áó›·à\6Í æB%KróZ”"†Ë Ø” ×.ËUþør¡P‹y±g&Õ½i…âkÕ +>tëZn¸yô§ôøA%…K™ÄèV¸sß–Y›I앇pjSµÕÝBDrn±|úˆ–ãi¢‹´€ ŠtÛq¸ +Ÿ/Ôf¶ˆÄë[35ñ +nudJRÁ8ûг£’†@ù{nfТU{juÕnú;èaË`”ž› s˜E2‚)…}Uex@ ;ÁÿÓÑãÈzȧmY4^ó TÕ¹®2Í@&ôkê÷ƽkÒÄ>Gï*ÿkå~'äp\f±…z]Uf_27a± kò/]†žitÉ71ÝÛú + <Èvt¥’ÙœßÐxaup;5(­fV»eºWôˆ·cŸž‚û³ÕL)ôÁf÷ôã +Î[pÿ6B¨1B*ˆwDrÛÒ|/)t­t”â+qÓÙ‹jõ)Mˆñ"{uTÚð}=ÝÞ@©Öméè‚xÖ±.fN#3¦»’›}$uϱœIá^¼jiVGõ‹±¼•w èJ‡²éA]Äàmíur˜ v…@¨»Ó¯É ¥ÇíèÜ,ÌV4buaLûÂûž]Ï +ÚFýÓmæsa-ÓŒ°Pf$í&²ü~iM Ùî I¯wlwšF¨§»r8X'U%Ž~ýÉÂ1¶úzÃ!ýœ.T[Ô¡Lã5ÝŸ½Yq¹U‚8‡6ýól´"’ù#$NðIïAý8*öîyxÿñ<[ýx™ïðý…“׸=Òlõ-׃1+3˜õ/éyò12Á&ä•êZƒ”*°¤›ò/ém‰Ü+ŠµåmLÞßSbz ¢É +ö+Òû”·Ú%+K«ï,aY3à$rçPu--ï¬f´tö¤¹6†ÃI©pGjŽŽµ‡!ªÂð ô–£ì›`¡€µûÙuk"³:A$â&@'sèØß/«í6¸‘½+5Û·ÎR¯­{åÙͤ¥}U4¶—=x„èsw¤Ò]ìwêp‰”ä1”nfªàk£)±ÑÜè[ŠÜü*GëJåÖý™..fÔDTe*"«ÃêÙ„Î ÜÂyäßXGÍó„WiJ-ôÏïÏcZ0Wn½2ÁV Á=ÀcÙyïFÂ(óÏ}ˆ~}á=ÜßCí2±¢Ê°èÑWö¾Ó1›]Ü1ÈoŽÌ7\˜‘Õ32A§4¢7ë7M4Æ¥*‚VoÉg¬?JºNo[,<0[ð²Õ¿£â¼ü|Ði&>BB)\Ì5ks¹ð'$rÛîKȺ8ä#Ë¥ý‰r–0ðY—üS‡•¢´afOíþGupzqÌ%‡ÈîÀzÊà ¨ÑÙ¦]‡Éw‹?‘À+oâBÅ«È@%’6*„¦:k"¹¬TÚ O‚Xª£U? æô¸LÀbž†cÕ|…_³ç&¾ƒ•FúgRÃÅ<¥·JòA^ÄÉ%«6´ÚþìX£µéO겞ゕa[QF‡Çr'aD ð±èêPkèç\6¯“H\\qξæÞZ-¼~ŽXª@ñ§ñ*ºÇÃN,þõˆ‡]±ô6üÁ[yÛ˜Öéúz¯[îy š¨¼ûIèÂQ…–ÓùåÆI»*0 €º»Â^<àXn/ófœ,Ò…Ós0¶ŒaÖâe™W¡ëjqHòmÇÿ§=kM6KbL*)Üà„µû+öG·œE +ªÐP6 uoäW5é8"pÄuHÍ¿}=žkUÚQ¬I¨å\´ñìWÝñ”¶DsYKÄmʇ`7]*½»nkº°ä +®~€Ì}•.„!Fƒ![¶ižX•2aÖ:A¢¢Ç÷ˆ"ß_Èåü(|­\ V»B‘‹-X¹f„³òºt¬§ÎÞïF‚åb ëýêáÈ„æWg6‰?’»÷?\‡Hô‡z_ÄY‡*HɽAÁ,¤=åõ»áÓ+Ó¡SaµFµ¹,i-ùvÀà1‘$xñk§ªK{gµîvfŒ“¼¤zÚWü2òo“ªU…¿§‡nR¿s7ô©–¬Äb j‰‚‹ +º\É`,ÛGôèÜ +,®-ãL4B¹h{o• Tâ!qgÆ™$ D³Záô”ÂïÚÖ¨Âr¹­\¹m0Ãg`»";°Ëͨ7>oæ¡ÜÿP ++W¥)8÷´`Gx{Æôê«Ò~Úm_`ÚË{á¼û› jªùþæ·HàE'ÙU]Ž‡ô à%Ý™´­]É…ñ ÁϨú:¢Ð¿b ¥°«ø–úâ{*–ìpäûEì¬ôëŠÜniÈaψkšk°†V´ÿ5icj^‡g‹.æÁ>šÚŸù +WQî¡aRD§ÃWG¨×í ÿùg§p‘ý€ý•ùApV=ƒüŸc%Z…¸€;bjvY²¯à¬Í~-ó¯²ÓøöG¿fÀü¯Ü.K.JÚÅzÄèƒN¥á÷tqqeºŒÿ0½ÉqŽ¨áÿí¢OÔ ÿzkkYR-Éß+eÃ;òUÂE+èC4;CÅÉ©ÿâd¼w] + ˆ“À¢gÀ¼.ª…Ëük…¥œÝãÜŽÄä&ö³{Ç}z<Þ=膃,ž•}*:GS !åX‡-CÊך1eÞ Í"gYª-¿ËˆßØ(‡kÕ?¦<̤¬•–'g³wX;¬ËA† ¶†BŒj€Â|b⺽hKþÉÓ1™+$GkpBjÊ°×#G¬ÇoAñÀâ°¨¹8£g T¨¢Üõ´3ôXŠ"‰œ±ç×e…a”3¾ŽKó~?*WøUÈEÓ;&º.™ÌzØFo™Onò"¸ÕÊ Ñ0;™×Ç*¨ƒ|ÅèPO"ŸB‚—ÊëðÌ烃Hj4ÎæÏŸïÿ.RZÅç™F¬îc›‘*]‚{׉¾Y—?#,5fùÍ'®Á@ À‰åRõ3ýærijµ•>,\ϵjŒgÿœÂ]~²5;$÷PQɨ§çß­Sæ +r+‹eŽ@ñRZ +»îñÈYÅþÞÚv’ˆªŽ¯¸âiJdt˜«\`ª¦ž$BÐN„ö–¶„g—FÓâÅèšvŽZ~Þ\PNÈ#pÞ:ÂiMEA81$eiHrÒc|@Ï–™-JžX"ÀÓfÐQ÷D½éçI/ÄGXÛdÞÀ9A¹ý@F™b‡ÜЇ ¹}…óûïî!+èÆ*¿ow?ÿÙŠg;Âàk0¾~$·!yŽ5ç-a)ÎHÀ%@†rKL¥xÂÛ#µK¹»·KÕ9¥c.O;Å:R(óN½¿?Å*ꌘG×’§á©`=ø9Ì:i¾H0^Xê`,„ØPô÷åó£³[îÒK~ã,ed?!rH­2ͯ‹Òvßé…EWÚC<®Þ—¿÷õöꀞ‡ñêS™Dvq›Ö­8åÒƒQ~ð?‚ Z;0Â|ÇPyŸÄz.¨Ëñ±_æ#ˆƒ +Ú{Ïï‚8 (á¾5LÜÛÍ< +ñ¿+ôZ™VÕô»mJã{ÅWxOCدfÓŠíBÒå˜ãúÿHH8ÁêZ·\-ö¾ÑÖö9EÆ`»ÑMùIÄ5°•F÷In8ÓŸƒbGÌa@N äÖ²„Ñó#ºƒ±³Rãõ½ ÃbɘÀ(õÈ¢­ªJ7ÂFÁƒJÅwý“ôOÛw:}žÙ3æi¸y!)¿É.°]¯1.ƒ(¨¦Y˜l«øÄSŸ0Q¼Fo€ÌÅDñÔ ÛWSíÖu“![/#´˜© Â(ÂËrÉI’IºŒS&ªäA¸Só§xOø³Vb‘~Èô[©×BŒUªùÃÆ!j%Ïm3ÇL$8”õ,¼¥}èlðvgõ÷ôã>ýûP––඙Owtp%œÝ!\å´+y$péVñP­V $så g& Þì{€a¶·UÌÁ>û çloÑ5Ó¢[¦E¦½÷» /( ]ìÏjËýP•K:a6﹪C2KÈ­§ *w-äXWhš½@.A´’ŸìÎò»c7”  NÚPÿó„©i(¡)ýÀêä8s-a-?p•ÿu‘ç„Ù£E>,}k’ó·#CO¼rÃÔ´ÁîHgŠ5’8>¢þÝD"ŒÆ?±À“jð¸¡:…fÙyïFÂ(óÏ}ˆ~}á=ÜßD£,Au¶¿²yN®×!¦ÅF!ñk>F¹Âßyð‹æ±¥ê+ù…™/êÑŸB’Ž&?ÂŒ©bº¹ +˜A”&Ûèæ·›Aè饘YlÀ¿Ýw[´¦úmêšíHÁEÑErÀ¬áå64¥C|û?ûžÉ„!ªÈÇ%~ê„àSwÜDSbÇéöLÉrŸ~#zžÃ,„‘Ë¥îä–ï³Óy +%ªrðí®ØQgÞB +ŒÈKÀ<æ‹`hûž{õ¼"F7#Ù°š¸&6t1Jø +äÂ)+¸‹êœið®Ðx·ß –\X3…I#Ö ~3ók ”Ëwf襪¢Îß ¸Få%«%çp¼ü[ƒ&ý½;Ó~¦ +¦Þ¨‰ÊÆ16=Or ™Ïœ´ÁÅ 94w2›†Áy›yó;°œ¶wc~ ê´ ®®îz¬Ö>b3ü>FžgU0à‹c(û‘4`²Ø’4/#Ã×Îuo²:ïm´0B|X•Ptl£²n¨2…Œ2]æËYZ»vX£í<ªwË}ö#bã$Q¶› |¾ûU•ÿÉ;Ë·îÛ´3ók¦pÔî5ãô5*ŠºO깿F¡>ÌYG>Öœ…–Ú' tE͸}pÕûŽßHÂ¥+1Àå ¹š£6,ÅC&±Fcã>¥ßc¨ÑR.“ ¬Œèš:Ç-?àß2ñ§|²Nõýü¨¾8TšfÖQK"É”»þg±sqíaã+j 1Ý«=yD +ÙB½Âã:¾¬E «"´<žvcÎÔM¾@Ú„õðñƒ4»¬ @µÂ™‘ÖwÁ/RCú¿|«Ñh–%µ3ä*ó*ù‡µlßUÿWá[_õyŽem‚JnïŠýƒÿµ`ƹlÌZO(F8µgÕ5Ê€rݽíÑùñä \d—M‘?œ6¥C²pRÄtY†+?’ìÍ8F3ÃX={(’éD«¸÷¼‡Kì="ý )¶†MÀÖRWÒTc;½–ßÇã8Œ•sRHà`†P„¼=¸,íºü÷ëN0á°‰Jz:dV–›ÿ¥ô ˜¼¤·úª¼¹9>Þ–F²Ùù0á@¡ á +%¨Ž^Á•u°ÇK  ÜÒ¶N#F:3ý=¼^ä­¬t¶môñ^b RƒÇÇÆzTkX0ìi6kâfwš“{$~Dñ¶ãkæ\úæ ZRïysã:õÍ¿%ùFòŠGmJ›Ý*ÆÏèi5½kÇ$Di3è‡ÍŒh® ?p|cˆîF3(ú7 ÜþΑdÊ2uâVŒˆñe˜¸5 ¹>¶á=£–HmÌúI¬ e´Ì¸sÝc¡¿0뾯Ÿì,Z÷ÒEŽÕû¹J²®É[: Sí|(¯+ë¤s«E ºèìÕ²£Y24æ—îñV%ëvDá«5ú5õ¾~ÏI}ÑàÓIEb, o·Ë}!ˆ¶ÎÆ»Â,ëÕÃÍ×ÓtEÙÜUЦhu†BÌ”¼žFíŠ'ím…õL)Õ$ö¼ùÓ&•$a p&+ VUpc?Üå’')‡](=õÓí‚Eo_ðÝ©Û—)†Î4hxå±N,®½¾§“凙š¼{ÕÀ'D@‚J•¶ B~lü¼18$oó±¸ê€ÿÖ“aø¯g~lïû8‹mÒ¹¨ö†ÖªI,Úˆà=0cû}›r°°{GR®¬fû6gþB%,èëpiÉæ~–ê‚^»©.Æ! Oa=³ê½.mK!’B¹#`?z¤AL×SDòÜ®iàþ/Ãæ Øwí¶‡ü´Ç$¼þÙnh™éØd¾=ÜŒB0÷è÷|*D†]—î¹ïÔ«|AZå…5(;Ò²#¤œÚé„—ôú|PHÀ†!@ùÎV0ækëÈ7%¿ãHBh5>¶¤¶‰É/ÅHÏ–]õ.Ü£Æ_»¦± ß];1Q}Lrâ¾ß©æãåL’ßþP®Å8âÆòäa˜4þ±âB–pxÂö?µ†¿ß\|ûÍrëÉ™sÕWQÓ]e +G¬Î÷±‚ûÐÐTbj…àG +Rg6¹h›_ÇîüMåäu]ƒ¾Â¦m7‰oâT·¸,UF›’6ª³HŽKá2:Ö_y|=²Ã„[[ÚÙbˆÛ>uÚKŠÍL˜ Ê º×þ‰Ö¨K!&ÓëHwºp¥‹âsIÝ”ÀxÎL©ºj]¬×ÐKúáUïáíßI¬†håƒd½‰Ñ>$çK• ·ŸûÓ È–ûÆÓ£|D¥‹c¯å˜Ãfá(ásºÛ³d8¹MõbCqFµ™m !è˜rýVJË6ËðX¦ÇfÔJ/½ÄíÚÌÁݵ.mÞxÁtl&¢£D«fA˜S,Ëéþx¼äÔ%AÒ0ùušÌÑKfÉÒ ¶1óú(±ÆµPÝTÁßÃä@ø‚Ï€9MÙHm²>6ª[Á2nˆrn-og#R¡])ö™l;nº1Ý=“Õ0’µQè¹ØÓh4èGžhwžZ$»‚°$â^ Ê¤Ã¨8—‰¶ºn‹Ýòµ8¼$µ&ìÜ +ß +AzÍÞüö¶zl öóä‘èŽS yÆBwœMS¸ïóGF}u«Ÿ–—?7Ážä\û¬?›<šS´LÕ]47€€:OóÔc­° x‚àŒpP²à·ì›~$k<äNeðÄ h$ßÀû'Ÿ†®l î ñä5q“V•Úå³®Ó@0ßRi´Mª+ü«ª¨è~÷E’¶û4¿¥ËoT€e[ÂÝcÒ¿-¼éÆ£‡®LpAR›GëbÍož uõô¯#ZýJòb-¶à¿c~F3ÝKú¥öÀõÿjzØ·pÑP}nIx>ŸBºù0«6Án!ÈiY QÇÓ–‚Î"?•è&èÃdúq0QúTƒ#dŒ áì÷p8#Yª1b2 4 sZ† Ž­ÒÉßÔ,¬Zò¼D#òüMíjÓÒ²Ù§B"ÙǸê!S¾žæLÄÒTJcj€M/ÖàÖ8,6ò‡ÇàŠM¬ õ™@æÕç¬NÍB<Õâ¢ûOìá~CìxôA»áÁ=~®ÏB™yüÌ+s|̺ƒ:Ãmný³æ åO ¼Op´½ãàùàš®’•äRLŠè}ù©º­¹°ȼð˜t¦Ô­ +qOÅÜá¯s•ðžnrÇZ'ùD­#Tµ—ê©®kVB3ê&¹Äç´R;;óa«sµëPxVu¿{ÚúbYÞ9L°Œ|–ˆ! }eÞÂnÞÊ7iÃÞøX„ç#wlë4W3˜L6Uª€ÈK¥Ù¼æUZ-î^È¿APW¾¬ÊæQJH”ŸòêPƒIò“a }bþ‡;™^ˆn‘âx³ËúåR¶"¹-.È_µ§†nÞG¦]~0JšËVÌ P{ 4°JÊìÏâñkø/j"í'{£ã¨¶zê(’‘VŒÛ”àkp¯mׂZl½V‚»- Sˆ$Wú#ý×]d¸M>mÍî^NÙ˜ÙVçù3ð·ÅU #t†Ð/«„ÑšQ-”}dºÞRpã}ñø?V­íQBbOHWÖV(bÿÜÅm&Ò|¾¥–_Ùš‚ßL€ævìÐõ$íðBîmŸ”jwú„ød¡ñ{1Ø8xœ²$zA¤8ÈО±Z#9F>†õgûÔ?é05`ZÔ1WâNÚ¨áÚ#?Ìøpó¯A‡1ŒNI„ËÃH‰€¸j)^J‘ÿÔÝÆñû_?¥ôÑŽ]ÈQMK¼3âÂ+8›0AesÆrM{x²ÃÛÔC)u‰$ZK©’RÿÚ¨[ȹ“!·3ùÜþDÕûþÊà_(t¸œ¿5è’UO<. s¢±mÃz6º ZôDšõ + º}¦]ûkÔý>#“ùPó¡Þ¾ƒ2ºu 3vŠBI,ìt䢅BÔQº´vC³±½»0ÎxmQP½D\ÍÿdÒRØò(¶L?‹± GÉå.åðVV›“ +ÉŸG@vð}W©¥¢‘žDºåø–‚lÑß¿/©"fÙ뺨Ì5,ÄùEë™X×{çÂ÷»6ðìÞÝú=1+ØUxáÖ?Ž[ð/™£d·šÁᙟáFd—¤UD¢u_“9±‡KojÆE½§#Ù…¤ÿ6 ð`WÂêÛ¹9×· ‰¤{Ð0ˆú_RT*‰ï ™v.§ÝŸ ¬Ò·•7˜ +§Èèv%.JÉZÍ0ÇÔF= Šu +öÌfcÖw•C!!}«¢LaGxÑ`G +¼xP÷xšòLñUí=?¯S0ã®S&Þ^¯Ú›® õ¦HóЧ›¤\©ëÖMd¤§éx%‰ÿÝ땇îÏŽ•Q´÷E’Å”ªã÷mH âfîÿ»»ô°zm(qþñ6.Xê(ò|ºi3ž“>'›tÆ'EÔ¦3OîƇMÏë‰%·/ð˜íäôä¢.£n†ûýÊút™¦ôűìA &9p†„gµ>‹·l¨F¥Eò¹[äš[šMàÉN5]Œ$ÿ»– b„gí ª†5‘V…½hãÜðî)/Ò8#—Íl;ÓöòçC!Òì5O«Þ“IˆK³<\Åÿ ®SÌYø,nÂÞ¶|lס5!š’Ô· `Ö—7ZåÛk¤Ä;Ñù²'µh=œ^K9|ÙQ¢©ò÷ NÐŒy@®’áx¿LG﮼zÀMØC~rÞ +Ë%5&˜Vð>føXÒŽgÈ_…8eÖ&[©údWgê Â5ŸÜBrP2Ò„ŒÐi ¹yïkOfc¿Îæ­†ù»Ü]t¾3f f„ÓÜ,}UÖÙÇYã. ,Ãâið‘¤úÞ=ÅÕOG=gªNûpùD´FóƒgÜ…`Í2§¸àêK7}æ§R‰XѲÿ!œt#V/›Ÿ!mü‘¬.Â*5nçkSGåt¼q!ò8…èmŒ-kU²º­ ýzªðN½°>Æ7’BTpùWÓâ '\Ü…h˜¼íÅUøÏ>r>Ä$ôc‘÷rš†x¶HsʬÓˆZŠDåD6‹ÓÎpÀž -%ü„YÕ¢‚Íä›í8 Ä“È[¹«Þ}G*Aí³yEf2çýɈ*Î)U´¿“Ggšv@zP¬”ë—Œù£fFÀ¦'ãøh4 {ît¶Vä›sË*ñÄï;Ò1¬ÿÙim©±Ó+þÒ®úåE‚Éß]¿üßĨýŽ£´]g¸_jB¢¿°½u§Ô«±õè™óûû‹;AR¦6Íø@¿ñ: zÒåQ}¯“×!Èæ´™ÈôuB£þ€ysÔæ´Õfdù,¶ðçz[þ’!•½‚ Q|À…Ì¢Õ×D9¢z%ø˜øvŒ§Õô²fbó©cÇ”YFzA«rÈša†0¡Â6NYOïþE–#¶‡ŒËÝnotœ³Ì-W£Úγ>Ñ7Ûy|Îò™(:Úã ÛÕ@a +&~=íÂÓ"M;¾µ vŠëv9Œ4%«*r×AŠìÛƒýkåcgrú›6ÏÓ™‡OlË"ò–ŠpxA‹>ü½9t|^Æù‰Ä~ô´9³ÐTÂy½…•Êj?Ó°¯ñ’¾ì§ûÆ«@€TP 0_ûóVx %äÞu68[âÑ/yá&Û1ÓaÙt?^ xO&ä‡Oµ©t® nú`Õ„˜&Èfûûª$_òÄ¢‘:§> + n'æîÑ>(‚^å?Q-ñ€bwÒÌbo²ú»Ñé¼[˜”º=ñH²ÆvÉÿþ‹ÙžÜ«p ºV¨k°VÖêåþrs*ÛP°ÎfÝ1=)Ö ­ãlz×ê…%‡ ¢A"L£9#Éò8Hï³jÿ3×ñ:5å#:Yfv‚j¹OƒÅ&áÒþìLÔSƒB¬"è¦Aæãz€¾¢Ù¤||<´Ü%àÚûQ¡AÒ?ÂÇ‹WaßË0Ù“šcá¸^LábŽ÷ñ(äÐ.ò ×?¾dHª¤ôøðäÐ&ƒ‘Ÿò ¦å+3ÊòFÚÊ’Éw¶“{vŒîÙ4%t-—Ì¡ƒéÙh)A…xmâc“%$g-Þå"æöOƒüÈq¯çÍE3áËá®É2„åzåeä’ߘ¿ŠL­:’=Œ{̾‚U{ÍK%™\®êIKÞ†Æ/jbìwJeëG‚°›tsëBBÃøõˆŠß5‡P®ß…Æß"x£.ÎÊêûÖu u6P¸r”$š".Wõà$A ;æ+ +k ,ÜïÒf¯B³H¢-ÒàH©!äŠZµ£;ã˜üL}¯g]J^s¼KX-!rSáf +p–îs|¹*Á¡÷#}ñûtu(~ø?¤H‘02yÓÓ¡^àþzW'¹›jC°&fv»#œ ¼äg9YºûfžNåg@Ò~°¾^Û†«Évå0øs k÷ôJiøµ®^öSõA±òÛ0Š ÁŒ(*{’¼“ÒÿYÑÝ¢IŒrðýüKB°!o?¿”ÞX, £Ê6P4 ÿ(óõ}rö§bŽED¿Çrx3§„/§¹Ì´OÙd¨/ŠÃÙÇÙF¡»ÊI¢¼†RGd”Ç f á}åJß)}Ìø)‹Áy˜zô…›ÔÅÉ6ÿO‹o~/ê–ìƒï*tÒŽ¡n¯x ’“õ9Çï°Kš6{Ó)ãe€_ƒ.ÞÀŒ‡^èeÞ)ÖËz„¢]Èñá/ϤŠMàYoÈô9,¢œ—Þìg£×TE눮}qtôºšGÍÛeÅX‚¥Ä_wxÃël ñNUá¾=wÆU¥w*ú5ëqï6¸Ù±]tR4‚6Œ[@[LJ*8Þ*ñèŸÊj‘Òës]p*4P+ã5[šâ +ÏùÖ 9¦}v·5¤$H•à”úúVbàöëìjíFaõý®?ôÖ}÷­ÀÀ„ì#bÉ5ö]ò*ýÿS"^ò6>Øù­ÊÓ}uFÀø¤Àa›d~QÚ¡d™z’‹ €–ü‡—Iø¡¤áo>Šún}ÄŠárËÀ©o 'ª÷‘¦Xdü`C- ¾Ó’duÇ‚TÄ(n¿v8Ímë³ß͘•ƒUÖ67ˆ¨áBYgøåR>ï‰`±l–Ÿþd³õ- +×ýc%ÄÆË£7£ÓÜ×Pä1xÀÞp]ãÒw/F¸N¡ºNæö’0àqûìÃeÂ7¯ê±åëYÁæÍÏË?Ù*KùPÏôp\þ|Âi HÓÕ¨ ãYNUÓ0ŒWsK9àæC†­Úº¾¹pS^Þ"戺•v#†iÙ"ÙʆIúCã¨Á"ŒIPA{QmtÑ_ØZѦnzE¨]½ U5Pd"ó[ùÃ`$k¬=ú̪G­ÖäË¡Zï‡ÿÉþÏfŽþ;ÇÿöíšþFèLx5†÷±†@>ëÊ æ£ÙkhºæVë»7Üèï +óÌ T¦@rrz‚)(å°]Ùß K¸ù+ؘfi©ÛÂDøŠÄÈ:HÕ±ëù÷ÞÞ»¥Â¬í‡Ž²B£WU!|5c}æ¶5‰R¥#Ó_훎ì¢Me¥z4ºQû:àîФÜàåký#ý8œA Pbâ—ÉwÆ™µhfOŽokï‘Z9ëè\©¶¨g„$o­Îà×.ÞϲWO~T5 ¨Gˆ$f’œÜÏ‹”“k½=r“ Ë/3Gq† Þ¡¤s­»ÅíÍŒ§)»vx*ýP*ÏÿÑ=ßÀcÑÀè~ÓáVô¸zíä7dNS×4B®ÅÐæS}©RNaVÁªPÛm„>ÍQ¿i6h%8ÀZÖ½±ïPGQqºÇÈÌIâph,‰1¾-Ãã…Â(†Þ°ñâ±÷ùÁ„çì¬ÉluU<ìh¸õZ๬rÉ ~%>—Ö}Imkõ|Ëë›㢠=ĺ©Y#Z¹dàÂ$Ú¾öÃÑN¹TDŒé”(gÉÃ)†u ßÄÿTÀvû#à&å“,vžq÷¿͹ |ú8@\Nˆ®.ÒUè4”ÄÅߢ?A°QÉ^éÆóp+¥™¡tŒün(ÏÎI`¸#Ǧ¬H˜hÒâØŽÔÁˆ¢0$%,K“µ®¢à3å*Z耘®1̃±²ÝYº9åû^¾5~\Á³*ºê™já¾7£ ó“o]ßÛ-”©N÷–É»5×Úƒ…f1Š!MÔPK§‘¶‚«°™ 7fÍ ƒiEÅ:Èka¸áD‚SœV +ôᦠŽÞÑ«ª¸™BÜZ*p ¨‚ã"Ô»[´V{•»V!3ú6•Ö>ùæ&lÝŸhæΟv¯Çè6ÊG¾ýC™„êÅ&ÕL¾mÖç¼Ψ…™bÈОÉ{³ÓÐíî »6½nS=µîo–;J¶Û§.'ï£R’ËÈõN œ–È¢ßÓâ[÷šƒ‚\Z^ß…Ù„ õ™ +­%–ëXZÜÿçuqË +«ágÕµ~ƒúíÖVÆÂ…–ý»Ve;˜[ÝýÅÁ ÉLMÝ ±ôÐ}ÈfŒŸÏÑ`t‡š—Ÿ7Ë ‰Q˜È4úŠ¶‹VZ#¼ª +’  +ýƒíë.¦Wànd¨E¾³ŠK×™»ÅɶªQÃ\åÜàØŽ¬Ý°y +Æ4à£øÕèß|)’Úñ_ˆZ}ûóÐ> R¸õY +ST‡¼ax5¬ µFõÅzÕ>þa~zë[KÊQ÷ÔHD~YÖ­kÕ‘²‘Ûsø¢ÛŽ•Ø›¿¤´fœþwŒüÒw'Q½üÔ¡þø,À¬0ts‰Ý¼h:ŠSqÏW°Ò=E2Í'jˆþŸAr+…Dé’!}AÑO:YEÀöT:Š€¸¹²c=öóìn O¨}Æí¤C2wp£rA˜CšDêo Ò )Àse4›uNôã™}ü¦ŒÓx 1šåÓ~Xð³§΋[/B™ “BöøR +î~çsÕ¤4(¥ Þ²î‰`Cœé9’¹•ŒôÂ:E"²º–€IY~&4£‰RYæ¶&{2Šý$±GÇrV›ÀÚâ?É›ãµDVàêkÁ—Za¨Æ4ë%›O*tÉÏ”¯Æâ,!«–N²U¹ÿl×*‰ÖÌ[=a§»#*‚³–RZ +Šãø¹ y"]M¡ÐcØÙU…%˜»Žƒ/TJ ͹Æ2„PËH~ëH=yišÍÖ1÷9× x {ááyÚ½ë·Ù"“êÿú²S¦YÐ^{Ķ3þœž÷Pk<ÈZ?\û/ß2Üw)€%!Z?ÑúT0F :ƒ!WˆuùÖ „óá¾7Ìî8 O7±ów#ÀÄ¿¤ßzÂ)¹>×…VŠh&ksLÆÊcjYÇFír­ô|#3 #Õá:^7 X÷p²š¿lTŠï6#¯ð}«1¿½\]Ó=U¼·¸Cîd»øøä_t}ô¥ +åˆ3©•ä!š4wîQ+¯æITÝÁ —ÒŽÝR^n¼ÿ¤bÏάU#mÁ´¯;]§—Ho7Ò½éáŒJI[ͨ!ªNIÕú˜Œ¨÷|Lb–'æÄèDÐð3R@ñ#Ü;ÁÕÁ2¾˜šŸÿ—hÓŒLÎèŸÍ8ýÕ?/ 0|—¸!ó +Æ€{qK—ÿýðêT°%ÏÄÁç4 ~T’è:sãì# þýu B«K²¢Þ@§wÕI5­³=ð¡•öu³Îm9¶.tÑž2•5ÈĬa”Åpˆ5EÕûÚ4D(á€ÁXmè‡ù{>Ìì•o‹:=T¡AŒB[’œin«­áQ¦Õƒ‚:©ûþÀmôÆö¨.¸åê8¢U…E%x~˹>í£ñ:¾Ö@CQBYÑ: +endstream +endobj +2235 0 obj << -/Type /ObjStm -/N 100 -/First 984 -/Length 16452 ->> -stream -1887 0 1893 120 1895 238 561 297 1892 355 1899 449 1896 597 1897 742 1901 889 565 947 -1898 1004 1904 1098 1906 1216 569 1275 1903 1333 1911 1427 1907 1575 1908 1724 1913 1869 573 1927 -1914 1984 1915 2042 1916 2100 1917 2158 1910 2216 1922 2363 1909 2529 1918 2676 1919 2820 1920 2964 -1924 3109 1921 3168 1927 3315 1925 3454 1929 3598 1926 3656 1931 3763 1933 3881 1930 3940 1935 4008 -1937 4126 1938 4184 783 4242 1939 4299 831 4357 830 4414 789 4469 790 4526 805 4583 786 4640 -787 4697 1940 4754 782 4812 1941 4869 1934 4927 1944 5021 1946 5139 946 5198 817 5256 788 5314 -785 5372 781 5430 784 5488 1947 5546 1943 5605 1948 5699 1949 5719 1950 6070 1951 6101 1952 6260 -1953 6283 1954 6638 1955 6767 1956 6890 1957 7536 1959 8007 1960 8638 1961 9109 1963 9684 1965 9909 -1967 10157 1969 10387 1971 10633 1973 10957 1975 11459 1977 11691 1979 12063 1981 12289 1983 12520 1985 12988 -1987 13540 1958 13931 1750 14341 1682 14484 1360 14627 814 14768 813 14907 812 15047 870 15188 915 15328 -% 1887 0 obj +/Length 1007 +>> +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-cmex10-builtin-0) +%%Title: (TeX-cmex10-builtin-0 TeX cmex10-builtin 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (cmex10-builtin) +/Supplement 0 +>> def +/CMapName /TeX-cmex10-builtin-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +1 beginbfrange + <03A5> +endbfrange +31 beginbfchar +<30> +<31> +<32> +<33> +<34> +<35> +<36> +<37> +<38> +<39> +<3A> +<3B> +<3C> +<3D> +<3E> +<3F> +<40> +<41> +<42> +<43> +<9F> <221A> + <0020> + <0393> + <2206> + <0398> + <039B> + <039E> + <03A0> + <03A3> + <03A8> + <2126> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2236 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R >> -/ProcSet [ /PDF /Text ] +/Length 1577 >> -% 1893 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-cmitt10-builtin-0) +%%Title: (TeX-cmitt10-builtin-0 TeX cmitt10-builtin 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (cmitt10-builtin) +/Supplement 0 +>> def +/CMapName /TeX-cmitt10-builtin-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +6 beginbfrange +<07> <08> <03A5> +<21> <23> <0021> +<25> <26> <0025> +<28> <5F> <0028> +<61> <7E> <0061> + <03A5> +endbfrange +71 beginbfchar +<00> <0393> +<01> <2206> +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<09> <03A8> +<0A> <2126> +<0B> <2191> +<0C> <2193> +<0D> <0027> +<0E> <00A1> +<0F> <00BF> +<10> <0131> +<11> <0237> +<12> <0060> +<13> <00B4> +<14> <02C7> +<15> <02D8> +<16> <00AF> +<17> <02DA> +<18> <00B8> +<19> <00DF> +<1A> <00E6> +<1B> <0153> +<1C> <00F8> +<1D> <00C6> +<1E> <0152> +<1F> <00D8> +<20> <2423> +<24> <00A3> +<27> <2019> +<60> <2018> +<7F> <00A8> +<80> <2423> + <0020> + <0393> + <2206> + <0398> + <039B> + <039E> + <03A0> + <03A3> + <03A8> + <00AD> + <00A0> + <2126> + <2191> + <2193> + <0027> + <00A1> + <00BF> + <0131> + <0237> + <0060> + <00B4> + <02C7> + <02D8> + <00AF> + <02DA> + <00B8> + <00DF> + <00E6> + <0153> + <00F8> + <00C6> + <0152> + <00D8> + <2423> + <00A8> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2237 0 obj << -/Type /Page -/Contents 1894 0 R -/Resources 1892 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1865 0 R +/Length 1535 >> -% 1895 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-cmmi10-builtin-0) +%%Title: (TeX-cmmi10-builtin-0 TeX cmmi10-builtin 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (cmmi10-builtin) +/Supplement 0 +>> def +/CMapName /TeX-cmmi10-builtin-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +20 beginbfrange +<07> <08> <03A5> +<0B> <0E> <03B1> +<10> <15> <03B6> +<17> <18> <03BD> +<19> <1A> <03C0> +<1B> <1D> <03C3> +<1F> <21> <03C7> +<28> <29> <21BC> +<2A> <2B> <21C0> +<30> <39> <0030> +<41> <5A> <0041> +<5B> <5D> <266D> +<61> <7A> <0061> + <03A5> + <03B1> + <03B6> + <03BD> + <03C0> + <03C3> + <03C7> +endbfrange +48 beginbfchar +<00> <0393> +<01> <2206> +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<09> <03A8> +<0A> <2126> +<0F> <03F5> +<16> <00B5> +<1E> <03D5> +<22> <03B5> +<23> <03D1> +<24> <03D6> +<25> <03F1> +<26> <03C2> +<27> <03C6> +<2E> <25B7> +<2F> <25C1> +<3A> <002E> +<3B> <002C> +<3C> <003C> +<3D> <002F> +<3E> <003E> +<3F> <22C6> +<40> <2202> +<5E> <2323> +<5F> <2322> +<60> <2113> +<7B> <0131> +<7C> <0237> +<7D> <2118> +<7E> <20D7> +<80> <03C8> + <0020> + <0393> + <2206> + <0398> + <039B> + <039E> + <03A0> + <03A3> + <03A8> + <2126> + <03F5> + <00B5> + <03D5> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2238 0 obj << -/D [1893 0 R /XYZ 149.705 753.953 null] +/Length 1724 >> -% 561 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-cmr10-builtin-0) +%%Title: (TeX-cmr10-builtin-0 TeX cmr10-builtin 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (cmr10-builtin) +/Supplement 0 +>> def +/CMapName /TeX-cmr10-builtin-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +7 beginbfrange +<07> <08> <03A5> +<23> <26> <0023> +<28> <3B> <0028> +<3F> <5B> <003F> +<61> <7A> <0061> +<7B> <7C> <2013> + <03A5> +endbfrange +78 beginbfchar +<00> <0393> +<01> <2206> +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<09> <03A8> +<0A> <2126> +<0B> <00660066> +<0C> <00660069> +<0D> <0066006C> +<0E> <006600660069> +<0F> <00660066006C> +<10> <0131> +<11> <0237> +<12> <0060> +<13> <00B4> +<14> <02C7> +<15> <02D8> +<16> <00AF> +<17> <02DA> +<18> <00B8> +<19> <00DF> +<1A> <00E6> +<1B> <0153> +<1C> <00F8> +<1D> <00C6> +<1E> <0152> +<1F> <00D8> +<21> <0021> +<22> <201D> +<27> <2019> +<3C> <00A1> +<3D> <003D> +<3E> <00BF> +<5C> <201C> +<5D> <005D> +<5E> <02C6> +<5F> <02D9> +<60> <2018> +<7D> <02DD> +<7E> <02DC> +<7F> <00A8> + <0020> + <0393> + <2206> + <0398> + <039B> + <039E> + <03A0> + <03A3> + <03A8> + <00AD> + <00A0> + <2126> + <00660066> + <00660069> + <0066006C> + <006600660069> + <00660066006C> + <0131> + <0237> + <0060> + <00B4> + <02C7> + <02D8> + <00AF> + <02DA> + <00B8> + <00DF> + <00E6> + <0153> + <00F8> + <00C6> + <0152> + <00D8> + <00A8> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2239 0 obj << -/D [1893 0 R /XYZ 150.705 716.092 null] +/Length 2050 >> -% 1892 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-cmsy10-builtin-0) +%%Title: (TeX-cmsy10-builtin-0 TeX cmsy10-builtin 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (cmsy10-builtin) +/Supplement 0 +>> def +/CMapName /TeX-cmsy10-builtin-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +27 beginbfrange +<08> <0C> <2295> +<12> <13> <2286> +<14> <15> <2264> +<16> <17> <2AAF> +<1A> <1B> <2282> +<1C> <1D> <226A> +<1E> <1F> <227A> +<23> <24> <2193> +<25> <26> <2197> +<2B> <2C> <21D3> +<3E> <3F> <22A4> +<41> <5A> <0041> +<5E> <5F> <2227> +<60> <61> <22A2> +<62> <63> <230A> +<64> <65> <2308> +<68> <69> <27E8> +<76> <77> <2291> +<79> <7A> <2020> + <2295> + <2297> + <2286> + <2264> + <2AAF> + <2282> + <226A> + <227A> +endbfrange +81 beginbfchar +<00> <2212> +<01> <00B7> +<02> <00D7> +<03> <2217> +<04> <00F7> +<05> <22C4> +<06> <00B1> +<07> <2213> +<0D> <20DD> +<0E> <25E6> +<0F> <2022> +<10> <224D> +<11> <2261> +<18> <223C> +<19> <2248> +<20> <2190> +<21> <2192> +<22> <2191> +<27> <2243> +<28> <21D0> +<29> <21D2> +<2A> <21D1> +<2D> <2196> +<2E> <2199> +<2F> <221D> +<30> <2032> +<31> <221E> +<32> <2208> +<33> <220B> +<34> <25B3> +<35> <25BD> +<36> <0338> +<38> <2200> +<39> <2203> +<3A> <00AC> +<3B> <2205> +<3C> <211C> +<3D> <2111> +<40> <2135> +<5B> <222A> +<5C> <2229> +<5D> <228E> +<66> <007B> +<67> <007D> +<6A> <007C> +<6B> <2225> +<6C> <2195> +<6D> <21D5> +<6E> <005C> +<6F> <2240> +<70> <221A> +<71> <2A3F> +<72> <2207> +<73> <222B> +<74> <2294> +<75> <2293> +<78> <00A7> +<7B> <00B6> +<7C> <2663> +<7D> <2662> +<7E> <2661> +<7F> <2660> +<80> <2190> + <0020> + <2212> + <00B7> + <00D7> + <2217> + <00F7> + <22C4> + <00B1> + <2213> + <20DD> + <25E6> + <2022> + <224D> + <2261> + <223C> + <2248> + <2190> + <2660> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2240 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/Length 1543 >> -% 1899 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-cmtt10-builtin-0) +%%Title: (TeX-cmtt10-builtin-0 TeX cmtt10-builtin 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (cmtt10-builtin) +/Supplement 0 +>> def +/CMapName /TeX-cmtt10-builtin-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +5 beginbfrange +<07> <08> <03A5> +<21> <26> <0021> +<28> <5F> <0028> +<61> <7E> <0061> + <03A5> +endbfrange +70 beginbfchar +<00> <0393> +<01> <2206> +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<09> <03A8> +<0A> <2126> +<0B> <2191> +<0C> <2193> +<0D> <0027> +<0E> <00A1> +<0F> <00BF> +<10> <0131> +<11> <0237> +<12> <0060> +<13> <00B4> +<14> <02C7> +<15> <02D8> +<16> <00AF> +<17> <02DA> +<18> <00B8> +<19> <00DF> +<1A> <00E6> +<1B> <0153> +<1C> <00F8> +<1D> <00C6> +<1E> <0152> +<1F> <00D8> +<20> <2423> +<27> <2019> +<60> <2018> +<7F> <00A8> +<80> <2423> + <0020> + <0393> + <2206> + <0398> + <039B> + <039E> + <03A0> + <03A3> + <03A8> + <00AD> + <00A0> + <2126> + <2191> + <2193> + <0027> + <00A1> + <00BF> + <0131> + <0237> + <0060> + <00B4> + <02C7> + <02D8> + <00AF> + <02DA> + <00B8> + <00DF> + <00E6> + <0153> + <00F8> + <00C6> + <0152> + <00D8> + <2423> + <00A8> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2241 0 obj << -/Type /Page -/Contents 1900 0 R -/Resources 1898 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1902 0 R -/Annots [ 1896 0 R 1897 0 R ] +/Length 1538 >> -% 1896 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-cmtt8-builtin-0) +%%Title: (TeX-cmtt8-builtin-0 TeX cmtt8-builtin 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (cmtt8-builtin) +/Supplement 0 +>> def +/CMapName /TeX-cmtt8-builtin-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +5 beginbfrange +<07> <08> <03A5> +<21> <26> <0021> +<28> <5F> <0028> +<61> <7E> <0061> + <03A5> +endbfrange +70 beginbfchar +<00> <0393> +<01> <2206> +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<09> <03A8> +<0A> <2126> +<0B> <2191> +<0C> <2193> +<0D> <0027> +<0E> <00A1> +<0F> <00BF> +<10> <0131> +<11> <0237> +<12> <0060> +<13> <00B4> +<14> <02C7> +<15> <02D8> +<16> <00AF> +<17> <02DA> +<18> <00B8> +<19> <00DF> +<1A> <00E6> +<1B> <0153> +<1C> <00F8> +<1D> <00C6> +<1E> <0152> +<1F> <00D8> +<20> <2423> +<27> <2019> +<60> <2018> +<7F> <00A8> +<80> <2423> + <0020> + <0393> + <2206> + <0398> + <039B> + <039E> + <03A0> + <03A3> + <03A8> + <00AD> + <00A0> + <2126> + <2191> + <2193> + <0027> + <00A1> + <00BF> + <0131> + <0237> + <0060> + <00B4> + <02C7> + <02D8> + <00AF> + <02DA> + <00B8> + <00DF> + <00E6> + <0153> + <00F8> + <00C6> + <0152> + <00D8> + <2423> + <00A8> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2242 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [320.317 573.77 387.374 585.83] -/A << /S /GoTo /D (precdata) >> +/Length 1538 >> -% 1897 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-cmtt9-builtin-0) +%%Title: (TeX-cmtt9-builtin-0 TeX cmtt9-builtin 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (cmtt9-builtin) +/Supplement 0 +>> def +/CMapName /TeX-cmtt9-builtin-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +5 beginbfrange +<07> <08> <03A5> +<21> <26> <0021> +<28> <5F> <0028> +<61> <7E> <0061> + <03A5> +endbfrange +70 beginbfchar +<00> <0393> +<01> <2206> +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<09> <03A8> +<0A> <2126> +<0B> <2191> +<0C> <2193> +<0D> <0027> +<0E> <00A1> +<0F> <00BF> +<10> <0131> +<11> <0237> +<12> <0060> +<13> <00B4> +<14> <02C7> +<15> <02D8> +<16> <00AF> +<17> <02DA> +<18> <00B8> +<19> <00DF> +<1A> <00E6> +<1B> <0153> +<1C> <00F8> +<1D> <00C6> +<1E> <0152> +<1F> <00D8> +<20> <2423> +<27> <2019> +<60> <2018> +<7F> <00A8> +<80> <2423> + <0020> + <0393> + <2206> + <0398> + <039B> + <039E> + <03A0> + <03A3> + <03A8> + <00AD> + <00A0> + <2126> + <2191> + <2193> + <0027> + <00A1> + <00BF> + <0131> + <0237> + <0060> + <00B4> + <02C7> + <02D8> + <00AF> + <02DA> + <00B8> + <00DF> + <00E6> + <0153> + <00F8> + <00C6> + <0152> + <00D8> + <2423> + <00A8> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2243 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [320.317 498.054 387.374 510.114] -/A << /S /GoTo /D (precdata) >> +/Length 853 >> -% 1901 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-fplmr-builtin-0) +%%Title: (TeX-fplmr-builtin-0 TeX fplmr-builtin 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (fplmr-builtin) +/Supplement 0 +>> def +/CMapName /TeX-fplmr-builtin-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +0 beginbfrange +endbfrange +20 beginbfchar +<20> <0020> +<44> <2206> +<46> <03A6> +<47> <0393> +<4C> <039B> +<50> <03A0> +<51> <0398> +<53> <03A3> +<55> <03A5> +<57> <2126> +<58> <039E> +<59> <03A8> + <20AC> + <221E> + <221D> + <2205> + <220F> + <0237> + <2A3F> + <2211> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2244 0 obj << -/D [1899 0 R /XYZ 98.895 753.953 null] +/Length 1113 >> -% 565 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-fplmri-builtin-0) +%%Title: (TeX-fplmri-builtin-0 TeX fplmri-builtin 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (fplmri-builtin) +/Supplement 0 +>> def +/CMapName /TeX-fplmri-builtin-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +3 beginbfrange +<61> <62> <03B1> +<6B> <6C> <03BA> +<73> <75> <03C3> +endbfrange +37 beginbfchar +<20> <0020> +<23> <03B5> +<24> <03F1> +<44> <2206> +<46> <03A6> +<47> <0393> +<4A> <03D1> +<4C> <039B> +<50> <03A0> +<51> <0398> +<53> <03A3> +<55> <03A5> +<56> <03C2> +<57> <2126> +<58> <039E> +<59> <03A8> +<63> <03C7> +<64> <03B4> +<65> <03F5> +<66> <03D5> +<67> <03B3> +<68> <03B7> +<69> <03B9> +<6A> <03C6> +<6D> <00B5> +<6E> <03BD> +<70> <03C0> +<71> <03B8> +<72> <03C1> +<76> <03D6> +<77> <03C9> +<78> <03BE> +<79> <03C8> +<7A> <03B6> + <20AC> + <2202> + <0237> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2245 0 obj << -/D [1899 0 R /XYZ 99.895 716.092 null] +/Length 1477 >> -% 1898 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-pplb8r-8r-0) +%%Title: (TeX-pplb8r-8r-0 TeX pplb8r-8r 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (pplb8r-8r) +/Supplement 0 +>> def +/CMapName /TeX-pplb8r-8r-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +15 beginbfrange +<06> <07> <0141> +<0E> <0F> <017D> +<18> <19> <2264> +<20> <26> <0020> +<28> <5F> <0028> +<61> <7E> <0061> +<86> <87> <2020> +<93> <94> <201C> +<96> <97> <2013> + <00A1> + <00AE> + <00D8> + <00ED> + <00F7> + <00FC> +endbfrange +50 beginbfchar +<01> <02D9> +<02> <00660069> +<03> <0066006C> +<04> <2044> +<05> <02DD> +<08> <02DB> +<09> <02DA> +<0B> <02D8> +<0C> <2212> +<10> <02C7> +<11> <0131> +<12> <0237> +<13> <00660066> +<14> <006600660069> +<15> <00660066006C> +<16> <2260> +<17> <221E> +<1A> <2202> +<1B> <2211> +<1C> <220F> +<1D> <03C0> +<1E> <0060> +<1F> <0027> +<27> <2019> +<60> <2018> +<80> <20AC> +<81> <222B> +<82> <201A> +<83> <0192> +<84> <201E> +<85> <2026> +<88> <02C6> +<89> <2030> +<8A> <0160> +<8B> <2039> +<8C> <0152> +<8D> <2126> +<8E> <221A> +<8F> <2248> +<95> <2022> +<98> <02DC> +<99> <2122> +<9A> <0161> +<9B> <203A> +<9C> <0153> +<9D> <2206> +<9E> <25CA> +<9F> <0178> + <002D> + <00FF> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2246 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/Length 1477 >> -% 1904 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-pplr8r-8r-0) +%%Title: (TeX-pplr8r-8r-0 TeX pplr8r-8r 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (pplr8r-8r) +/Supplement 0 +>> def +/CMapName /TeX-pplr8r-8r-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +15 beginbfrange +<06> <07> <0141> +<0E> <0F> <017D> +<18> <19> <2264> +<20> <26> <0020> +<28> <5F> <0028> +<61> <7E> <0061> +<86> <87> <2020> +<93> <94> <201C> +<96> <97> <2013> + <00A1> + <00AE> + <00D8> + <00ED> + <00F7> + <00FC> +endbfrange +50 beginbfchar +<01> <02D9> +<02> <00660069> +<03> <0066006C> +<04> <2044> +<05> <02DD> +<08> <02DB> +<09> <02DA> +<0B> <02D8> +<0C> <2212> +<10> <02C7> +<11> <0131> +<12> <0237> +<13> <00660066> +<14> <006600660069> +<15> <00660066006C> +<16> <2260> +<17> <221E> +<1A> <2202> +<1B> <2211> +<1C> <220F> +<1D> <03C0> +<1E> <0060> +<1F> <0027> +<27> <2019> +<60> <2018> +<80> <20AC> +<81> <222B> +<82> <201A> +<83> <0192> +<84> <201E> +<85> <2026> +<88> <02C6> +<89> <2030> +<8A> <0160> +<8B> <2039> +<8C> <0152> +<8D> <2126> +<8E> <221A> +<8F> <2248> +<95> <2022> +<98> <02DC> +<99> <2122> +<9A> <0161> +<9B> <203A> +<9C> <0153> +<9D> <2206> +<9E> <25CA> +<9F> <0178> + <002D> + <00FF> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2247 0 obj << -/Type /Page -/Contents 1905 0 R -/Resources 1903 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1902 0 R +/Length 1482 >> -% 1906 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-pplri8r-8r-0) +%%Title: (TeX-pplri8r-8r-0 TeX pplri8r-8r 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (pplri8r-8r) +/Supplement 0 +>> def +/CMapName /TeX-pplri8r-8r-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +15 beginbfrange +<06> <07> <0141> +<0E> <0F> <017D> +<18> <19> <2264> +<20> <26> <0020> +<28> <5F> <0028> +<61> <7E> <0061> +<86> <87> <2020> +<93> <94> <201C> +<96> <97> <2013> + <00A1> + <00AE> + <00D8> + <00ED> + <00F7> + <00FC> +endbfrange +50 beginbfchar +<01> <02D9> +<02> <00660069> +<03> <0066006C> +<04> <2044> +<05> <02DD> +<08> <02DB> +<09> <02DA> +<0B> <02D8> +<0C> <2212> +<10> <02C7> +<11> <0131> +<12> <0237> +<13> <00660066> +<14> <006600660069> +<15> <00660066006C> +<16> <2260> +<17> <221E> +<1A> <2202> +<1B> <2211> +<1C> <220F> +<1D> <03C0> +<1E> <0060> +<1F> <0027> +<27> <2019> +<60> <2018> +<80> <20AC> +<81> <222B> +<82> <201A> +<83> <0192> +<84> <201E> +<85> <2026> +<88> <02C6> +<89> <2030> +<8A> <0160> +<8B> <2039> +<8C> <0152> +<8D> <2126> +<8E> <221A> +<8F> <2248> +<95> <2022> +<98> <02DC> +<99> <2122> +<9A> <0161> +<9B> <203A> +<9C> <0153> +<9D> <2206> +<9E> <25CA> +<9F> <0178> + <002D> + <00FF> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2168 0 obj << -/D [1904 0 R /XYZ 149.705 753.953 null] +/Type /ObjStm +/N 100 +/First 974 +/Length 16950 >> -% 569 0 obj +stream +606 0 2163 57 2170 151 2172 269 610 328 614 386 618 444 622 502 626 560 630 618 +634 676 638 734 2169 792 2174 886 2176 1004 642 1062 646 1119 650 1176 654 1233 2173 1290 +2178 1384 2180 1502 2177 1561 2182 1629 2184 1747 2185 1805 2186 1863 931 1921 930 1977 890 2034 +891 2091 906 2148 887 2205 888 2262 2187 2319 883 2377 2188 2434 1046 2492 2181 2550 2191 2644 +2193 2762 918 2821 889 2879 886 2937 882 2995 2058 3053 885 3112 2194 3170 884 3229 2042 3287 +2043 3345 2190 3404 2195 3498 2196 3518 2197 3889 2198 3992 2199 4151 2200 4174 2201 4629 2202 4758 +2203 5056 2204 5702 2206 6173 2207 6804 2208 7275 2210 7850 2212 8075 2214 8407 2216 8651 2218 8922 +2220 9270 2222 9782 2224 10014 2226 10460 2228 10686 2230 10917 2232 11396 2234 11972 2205 12401 1851 12842 +1782 13005 1460 13168 915 13329 914 13488 913 13648 970 13809 1016 13969 1254 14130 1127 14295 665 14465 +667 14655 666 14845 668 15035 881 15148 971 15261 1028 15376 1062 15496 1092 15616 1134 15736 1180 15856 +% 606 0 obj << -/D [1904 0 R /XYZ 150.705 716.092 null] +/D [2164 0 R /XYZ 99.895 284.171 null] >> -% 1903 0 obj +% 2163 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1911 0 obj +% 2170 0 obj << /Type /Page -/Contents 1912 0 R -/Resources 1910 0 R +/Contents 2171 0 R +/Resources 2169 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1902 0 R -/Annots [ 1907 0 R 1908 0 R ] +/Parent 2158 0 R >> -% 1907 0 obj +% 2172 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [329.163 275.278 335.44 288.868] -/A << /S /GoTo /D (Hfootnote.5) >> +/D [2170 0 R /XYZ 149.705 753.953 null] >> -% 1908 0 obj +% 610 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 134.696 369.462 146.755] -/A << /S /GoTo /D (spdata) >> +/D [2170 0 R /XYZ 150.705 720.077 null] >> -% 1913 0 obj +% 614 0 obj << -/D [1911 0 R /XYZ 98.895 753.953 null] +/D [2170 0 R /XYZ 150.705 633.991 null] >> -% 573 0 obj +% 618 0 obj << -/D [1911 0 R /XYZ 99.895 716.092 null] +/D [2170 0 R /XYZ 150.705 559.861 null] >> -% 1914 0 obj +% 622 0 obj << -/D [1911 0 R /XYZ 99.895 444.811 null] +/D [2170 0 R /XYZ 150.705 485.732 null] >> -% 1915 0 obj +% 626 0 obj << -/D [1911 0 R /XYZ 99.895 444.971 null] +/D [2170 0 R /XYZ 150.705 411.602 null] >> -% 1916 0 obj +% 630 0 obj +<< +/D [2170 0 R /XYZ 150.705 325.516 null] +>> +% 634 0 obj << -/D [1911 0 R /XYZ 99.895 433.015 null] +/D [2170 0 R /XYZ 150.705 251.386 null] >> -% 1917 0 obj +% 638 0 obj << -/D [1911 0 R /XYZ 114.242 129.79 null] +/D [2170 0 R /XYZ 150.705 177.256 null] >> -% 1910 0 obj +% 2169 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R /F61 1360 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1922 0 obj +% 2174 0 obj << /Type /Page -/Contents 1923 0 R -/Resources 1921 0 R +/Contents 2175 0 R +/Resources 2173 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1902 0 R -/Annots [ 1909 0 R 1918 0 R 1919 0 R 1920 0 R ] +/Parent 2158 0 R >> -% 1909 0 obj +% 2176 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 654.503 409.811 666.562] -/A << /S /GoTo /D (precdata) >> +/D [2174 0 R /XYZ 98.895 753.953 null] >> -% 1918 0 obj +% 642 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [393.303 584.479 469.357 596.539] -/A << /S /GoTo /D (vdata) >> +/D [2174 0 R /XYZ 99.895 720.077 null] >> -% 1919 0 obj +% 646 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [393.303 514.456 469.357 526.516] -/A << /S /GoTo /D (vdata) >> +/D [2174 0 R /XYZ 99.895 650.062 null] >> -% 1920 0 obj +% 650 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 374.41 409.811 386.47] -/A << /S /GoTo /D (descdata) >> +/D [2174 0 R /XYZ 99.895 580.047 null] >> -% 1924 0 obj +% 654 0 obj << -/D [1922 0 R /XYZ 149.705 753.953 null] +/D [2174 0 R /XYZ 99.895 510.033 null] >> -% 1921 0 obj +% 2173 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R /F61 1360 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1927 0 obj +% 2178 0 obj << /Type /Page -/Contents 1928 0 R -/Resources 1926 0 R +/Contents 2179 0 R +/Resources 2177 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1902 0 R -/Annots [ 1925 0 R ] ->> -% 1925 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.493 554.876 418.548 566.936] -/A << /S /GoTo /D (vdata) >> +/Parent 2158 0 R >> -% 1929 0 obj +% 2180 0 obj << -/D [1927 0 R /XYZ 98.895 753.953 null] +/D [2178 0 R /XYZ 149.705 753.953 null] >> -% 1926 0 obj +% 2177 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R >> +/Font << /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1931 0 obj +% 2182 0 obj << /Type /Page -/Contents 1932 0 R -/Resources 1930 0 R +/Contents 2183 0 R +/Resources 2181 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1902 0 R +/Parent 2189 0 R >> -% 1933 0 obj +% 2184 0 obj << -/D [1931 0 R /XYZ 149.705 753.953 null] +/D [2182 0 R /XYZ 98.895 753.953 null] >> -% 1930 0 obj +% 2185 0 obj << -/Font << /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/D [2182 0 R /XYZ 99.895 723.717 null] >> -% 1935 0 obj +% 2186 0 obj << -/Type /Page -/Contents 1936 0 R -/Resources 1934 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1942 0 R +/D [2182 0 R /XYZ 99.895 698.792 null] >> -% 1937 0 obj +% 931 0 obj << -/D [1935 0 R /XYZ 98.895 753.953 null] +/D [2182 0 R /XYZ 99.895 638.64 null] >> -% 1938 0 obj +% 930 0 obj << -/D [1935 0 R /XYZ 99.895 723.717 null] +/D [2182 0 R /XYZ 99.895 583.689 null] >> -% 783 0 obj +% 890 0 obj << -/D [1935 0 R /XYZ 99.895 698.622 null] +/D [2182 0 R /XYZ 99.895 523.438 null] >> -% 1939 0 obj +% 891 0 obj << -/D [1935 0 R /XYZ 99.895 640.564 null] +/D [2182 0 R /XYZ 99.895 479.068 null] >> -% 831 0 obj +% 906 0 obj << -/D [1935 0 R /XYZ 99.895 585.057 null] +/D [2182 0 R /XYZ 99.895 433.422 null] >> -% 830 0 obj +% 887 0 obj << -/D [1935 0 R /XYZ 99.895 532.2 null] +/D [2182 0 R /XYZ 99.895 386.501 null] >> -% 789 0 obj +% 888 0 obj << -/D [1935 0 R /XYZ 99.895 474.043 null] +/D [2182 0 R /XYZ 99.895 340.855 null] >> -% 790 0 obj +% 2187 0 obj << -/D [1935 0 R /XYZ 99.895 431.766 null] +/D [2182 0 R /XYZ 99.895 295.209 null] >> -% 805 0 obj +% 883 0 obj << -/D [1935 0 R /XYZ 99.895 388.215 null] +/D [2182 0 R /XYZ 99.895 249.563 null] >> -% 786 0 obj +% 2188 0 obj << -/D [1935 0 R /XYZ 99.895 343.387 null] +/D [2182 0 R /XYZ 99.895 204.585 null] >> -% 787 0 obj +% 1046 0 obj << -/D [1935 0 R /XYZ 99.895 299.836 null] +/D [2182 0 R /XYZ 99.895 146.317 null] >> -% 1940 0 obj +% 2181 0 obj << -/D [1935 0 R /XYZ 99.895 256.284 null] +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R >> +/ProcSet [ /PDF /Text ] >> -% 782 0 obj +% 2191 0 obj << -/D [1935 0 R /XYZ 99.895 212.732 null] +/Type /Page +/Contents 2192 0 R +/Resources 2190 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 2189 0 R >> -% 1941 0 obj +% 2193 0 obj << -/D [1935 0 R /XYZ 99.895 169.848 null] +/D [2191 0 R /XYZ 149.705 753.953 null] >> -% 1934 0 obj +% 918 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R >> -/ProcSet [ /PDF /Text ] +/D [2191 0 R /XYZ 150.705 716.092 null] >> -% 1944 0 obj +% 889 0 obj << -/Type /Page -/Contents 1945 0 R -/Resources 1943 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1942 0 R +/D [2191 0 R /XYZ 150.705 664.064 null] >> -% 1946 0 obj +% 886 0 obj << -/D [1944 0 R /XYZ 149.705 753.953 null] +/D [2191 0 R /XYZ 150.705 622.283 null] >> -% 946 0 obj +% 882 0 obj << -/D [1944 0 R /XYZ 150.705 716.092 null] +/D [2191 0 R /XYZ 150.705 577.073 null] >> -% 817 0 obj +% 2058 0 obj << -/D [1944 0 R /XYZ 150.705 687.379 null] +/D [2191 0 R /XYZ 150.705 543.917 null] >> -% 788 0 obj +% 885 0 obj << -/D [1944 0 R /XYZ 150.705 632.184 null] +/D [2191 0 R /XYZ 150.705 512.037 null] >> -% 785 0 obj +% 2194 0 obj << -/D [1944 0 R /XYZ 150.705 590.403 null] +/D [2191 0 R /XYZ 150.705 480.156 null] >> -% 781 0 obj +% 884 0 obj << -/D [1944 0 R /XYZ 150.705 545.192 null] +/D [2191 0 R /XYZ 150.705 438.971 null] >> -% 784 0 obj +% 2042 0 obj << -/D [1944 0 R /XYZ 150.705 512.037 null] +/D [2191 0 R /XYZ 150.705 380.53 null] >> -% 1947 0 obj +% 2043 0 obj << -/D [1944 0 R /XYZ 150.705 480.156 null] +/D [2191 0 R /XYZ 150.705 336.754 null] >> -% 1943 0 obj +% 2190 0 obj << -/Font << /F54 586 0 R /F52 585 0 R /F59 812 0 R >> +/Font << /F62 667 0 R /F60 666 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1948 0 obj +% 2195 0 obj [1000] -% 1949 0 obj -[525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525] -% 1950 0 obj -[777.8 500 777.8] -% 1951 0 obj +% 2196 0 obj +[525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525] +% 2197 0 obj +[277.8 277.8 500 500 500 500 500 500 500 500 500 500 500 500 277.8 277.8 777.8 500 777.8] +% 2198 0 obj [853 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 666 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 747 0 0 0 0 0 0 0 0 0 0 0 0 0 0 881 0 0 0 0 0 0 0 0 0 0 0 0 234 0 881 767] -% 1952 0 obj +% 2199 0 obj [528 542] -% 1953 0 obj -[525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525] -% 1954 0 obj +% 2200 0 obj +[525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525] +% 2201 0 obj [531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3] -% 1955 0 obj -[388.9 388.9 500 777.8 277.8 333.3 277.8 500 500 500 500 500 500 500 500 500 500 500 277.8 277.8 277.8 777.8] -% 1956 0 obj +% 2202 0 obj +[388.9 388.9 500 777.8 277.8 333.3 277.8 500 500 500 500 500 500 500 500 500 500 500 277.8 277.8 277.8 777.8 472.2 472.2 777.8 750 708.3 722.2 763.9 680.6 652.8 784.7 750 361.1 513.9 777.8 625 916.7 750 777.8 680.6 777.8 736.1 555.6 722.2 750 750 1027.8 750 750 611.1 277.8 500 277.8] +% 2203 0 obj [777.8 277.8 777.8 500 777.8 500 777.8 777.8 777.8 777.8 777.8 777.8 777.8 1000 500 500 777.8 777.8 777.8 777.8 777.8 777.8 777.8 777.8 777.8 777.8 777.8 777.8 1000 1000 777.8 777.8 1000 1000 500 500 1000 1000 1000 777.8 1000 1000 611.1 611.1 1000 1000 1000 777.8 275 1000 666.7 666.7 888.9 888.9 0 0 555.6 555.6 666.7 500 722.2 722.2 777.8 777.8 611.1 798.5 656.8 526.5 771.4 527.8 718.7 594.9 844.5 544.5 677.8 761.9 689.7 1200.9 820.5 796.1 695.6 816.7 847.5 605.6 544.6 625.8 612.8 987.8 713.3 668.3 724.7 666.7 666.7 666.7 666.7 666.7 611.1 611.1 444.4 444.4 444.4 444.4 500 500 388.9 388.9 277.8 500 500 611.1 500 277.8 833.3] -% 1957 0 obj +% 2204 0 obj [525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525] -% 1959 0 obj +% 2206 0 obj [605 608 167 380 611 291 313 333 0 333 606 0 667 500 333 287 0 0 0 0 0 0 0 0 0 0 0 0 333 208 250 278 371 500 500 840 778 278 333 333 389 606 250 333 250 606 500 500 500 500 500 500 500 500 500 500 250 250 606 606 606 444 747 778 611 709 774 611 556 763 832 337 333 726 611 946 831 786 604 786 668 525 613 778 722 1000 667 667 667 333 606 333 606 500 278 500 553 444 611 479 333 556 582 291 234 556 291 883 582 546 601 560 395 424 326 603 565 834 516 556 500 333 606 333 606 0 0 0 278 500 500 1000 500 500 333 1144 525 331 998 0 0 0 0 0 0 500 500 606 500 1000 333 979 424 331 827 0 0 667 0 278 500 500 500 500 606 500] -% 1960 0 obj +% 2207 0 obj [528 545 167 333 556 278 333 333 0 333 606 0 667 444 333 278 0 0 0 0 0 0 0 0 0 0 0 0 333 333 250 333 500 500 500 889 778 278 333 333 389 606 250 333 250 296 500 500 500 500 500 500 500 500 500 500 250 250 606 606 606 500 747 722 611 667 778 611 556 722 778 333 333 667 556 944 778 778 611 778 667 556 611 778 722 944 722 667 667 333 606 333 606 500 278 444 463 407 500 389 278 500 500 278 278 444 278 778 556 444 500 463 389 389 333 556 500 722 500 500 444] -% 1961 0 obj +% 2208 0 obj [611 611 167 333 611 333 333 333 0 333 606 0 667 500 333 333 0 0 0 0 0 0 0 0 0 0 0 0 333 227 250 278 402 500 500 889 833 278 333 333 444 606 250 333 250 296 500 500 500 500 500 500 500 500 500 500 250 250 606 606 606 444 747 778 667 722 833 611 556 833 833 389 389 778 611 1000 833 833 611 833 722 611 667 778 778 1000 667 667 667 333 606 333 606 500 278 500 611 444 611 500 389 556 611 333 333 611 333 889 611 556 611 611 389 444 333 611 556 833 500 556 500 310 606 310 606 0 0 0 333 500 500 1000 500 500 333 1000 611 389 1000 0 0 0 0 0 0 500 500 606 500 1000] -% 1963 0 obj +% 2210 0 obj << /Type /FontDescriptor /FontName /MNPEHI+CMEX10 @@ -28025,12 +34430,12 @@ stream /StemV 47 /XHeight 431 /CharSet (/radicalbigg) -/FontFile 1962 0 R +/FontFile 2209 0 R >> -% 1965 0 obj +% 2212 0 obj << /Type /FontDescriptor -/FontName /MPVPBL+CMITT10 +/FontName /SFGIZH+CMITT10 /Flags 4 /FontBBox [11 -233 669 696] /Ascent 611 @@ -28039,13 +34444,13 @@ stream /ItalicAngle -14 /StemV 69 /XHeight 431 -/CharSet (/D/a/c/d/e/exclam/n/o/period/s/t) -/FontFile 1964 0 R +/CharSet (/A/C/D/E/H/I/K/L/M/P/T/V/a/c/comma/d/e/exclam/f/g/h/hyphen/i/k/m/n/o/p/parenleft/parenright/period/r/s/slash/t/w/x/y) +/FontFile 2211 0 R >> -% 1967 0 obj +% 2214 0 obj << /Type /FontDescriptor -/FontName /SYFPBV+CMMI10 +/FontName /TPELEW+CMMI10 /Flags 4 /FontBBox [-32 -250 1048 750] /Ascent 694 @@ -28054,13 +34459,13 @@ stream /ItalicAngle -14 /StemV 72 /XHeight 431 -/CharSet (/greater/less) -/FontFile 1966 0 R +/CharSet (/arrowhookleft/greater/less) +/FontFile 2213 0 R >> -% 1969 0 obj +% 2216 0 obj << /Type /FontDescriptor -/FontName /GIGFZE+CMR10 +/FontName /SOSTRQ+CMR10 /Flags 4 /FontBBox [-40 -250 1009 750] /Ascent 694 @@ -28069,13 +34474,13 @@ stream /ItalicAngle 0 /StemV 69 /XHeight 431 -/CharSet (/equal/parenleft/parenright/plus) -/FontFile 1968 0 R +/CharSet (/bracketleft/bracketright/equal/parenleft/parenright/plus) +/FontFile 2215 0 R >> -% 1971 0 obj +% 2218 0 obj << /Type /FontDescriptor -/FontName /DMJGRR+CMSY10 +/FontName /VKSUEJ+CMSY10 /Flags 4 /FontBBox [-29 -960 1116 775] /Ascent 750 @@ -28084,13 +34489,13 @@ stream /ItalicAngle -14 /StemV 40 /XHeight 431 -/CharSet (/B/H/I/arrowleft/bar/bardbl/braceleft/braceright/element/greaterequal/lessequal/minus/negationslash/radical) -/FontFile 1970 0 R +/CharSet (/B/H/I/arrowleft/arrowright/asteriskmath/bar/bardbl/braceleft/braceright/element/greaterequal/lessequal/minus/negationslash/radical) +/FontFile 2217 0 R >> -% 1973 0 obj +% 2220 0 obj << /Type /FontDescriptor -/FontName /UFPYIQ+CMTT10 +/FontName /QGKXNM+CMTT10 /Flags 4 /FontBBox [-4 -233 537 696] /Ascent 611 @@ -28099,10 +34504,10 @@ stream /ItalicAngle 0 /StemV 69 /XHeight 431 -/CharSet (/A/B/C/D/E/F/I/K/L/M/N/O/P/R/S/T/U/W/Y/a/ampersand/asciitilde/asterisk/b/backslash/bracketleft/bracketright/c/colon/comma/d/e/eight/equal/f/four/g/h/hyphen/i/j/k/l/m/n/nine/o/one/p/parenleft/parenright/percent/period/plus/q/quotesingle/r/s/six/slash/t/three/two/u/underscore/v/w/x/y/z/zero) -/FontFile 1972 0 R +/CharSet (/A/B/C/D/E/F/H/I/J/K/L/M/N/O/P/R/S/T/U/V/W/X/Y/Z/a/ampersand/asciitilde/asterisk/b/backslash/bracketleft/bracketright/c/colon/comma/d/e/eight/equal/f/four/g/h/hyphen/i/j/k/l/m/n/nine/o/one/p/parenleft/parenright/percent/period/plus/q/quotesingle/r/s/six/slash/t/three/two/u/underscore/v/w/x/y/z/zero) +/FontFile 2219 0 R >> -% 1975 0 obj +% 2222 0 obj << /Type /FontDescriptor /FontName /HZGQIC+CMTT8 @@ -28115,12 +34520,12 @@ stream /StemV 76 /XHeight 431 /CharSet (/b/c/e/i/l/n/p/r/s/t) -/FontFile 1974 0 R +/FontFile 2221 0 R >> -% 1977 0 obj +% 2224 0 obj << /Type /FontDescriptor -/FontName /BQXTWV+CMTT9 +/FontName /RQJPKO+CMTT9 /Flags 4 /FontBBox [-6 -233 542 698] /Ascent 611 @@ -28129,10 +34534,10 @@ stream /ItalicAngle 0 /StemV 74 /XHeight 431 -/CharSet (/D/E/I/K/N/P/T/Y/a/ampersand/b/c/colon/comma/d/e/equal/f/g/greater/h/i/k/l/less/m/n/o/p/parenleft/parenright/period/q/r/s/semicolon/t/two/u/underscore/v/w/x/y/z) -/FontFile 1976 0 R +/CharSet (/D/E/I/K/N/P/S/T/Y/a/ampersand/asterisk/b/c/colon/comma/d/e/equal/f/four/g/greater/h/hyphen/i/j/k/l/less/m/n/nine/o/one/p/parenleft/parenright/percent/period/plus/q/quotesingle/r/s/semicolon/six/slash/t/two/u/underscore/v/w/x/y/z/zero) +/FontFile 2223 0 R >> -% 1979 0 obj +% 2226 0 obj << /Type /FontDescriptor /FontName /IKXQUG+PazoMath @@ -28145,9 +34550,9 @@ stream /StemV 95 /XHeight 0 /CharSet (/infinity/summation) -/FontFile 1978 0 R +/FontFile 2225 0 R >> -% 1981 0 obj +% 2228 0 obj << /Type /FontDescriptor /FontName /DUJUUF+PazoMath-Italic @@ -28160,12 +34565,12 @@ stream /StemV 65 /XHeight 0 /CharSet (/alpha/beta) -/FontFile 1980 0 R +/FontFile 2227 0 R >> -% 1983 0 obj +% 2230 0 obj << /Type /FontDescriptor -/FontName /TVMKYN+URWPalladioL-Bold +/FontName /BDDEWM+URWPalladioL-Bold /Flags 4 /FontBBox [-152 -301 1000 935] /Ascent 708 @@ -28174,13 +34579,13 @@ stream /ItalicAngle 0 /StemV 123 /XHeight 471 -/CharSet (/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/Z/a/b/c/colon/comma/d/e/eight/emdash/endash/equal/f/fi/five/fl/four/g/h/hyphen/i/j/k/l/m/n/nine/o/one/p/parenleft/parenright/period/q/quoteright/r/s/seven/six/slash/t/three/two/u/v/w/x/y/z/zero) -/FontFile 1982 0 R +/CharSet (/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/Y/Z/a/b/c/colon/comma/d/e/eight/emdash/endash/equal/f/fi/five/fl/four/g/h/hyphen/i/j/k/l/m/n/nine/o/one/p/parenleft/parenright/period/q/question/quoteright/r/s/seven/six/slash/t/three/two/u/v/w/x/y/z/zero) +/FontFile 2229 0 R >> -% 1985 0 obj +% 2232 0 obj << /Type /FontDescriptor -/FontName /TCRNJT+URWPalladioL-Roma +/FontName /GLTUCO+URWPalladioL-Roma /Flags 4 /FontBBox [-166 -283 1021 943] /Ascent 715 @@ -28189,13 +34594,13 @@ stream /ItalicAngle 0 /StemV 84 /XHeight 469 -/CharSet (/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/R/S/T/U/V/W/X/a/ampersand/b/bracketleft/bracketright/bullet/c/colon/comma/d/e/eight/emdash/endash/equal/f/fi/five/fl/four/g/h/hyphen/i/j/k/l/m/n/nine/o/one/p/parenleft/parenright/period/q/quotedblleft/quotedblright/quoteright/r/s/section/semicolon/seven/six/slash/t/three/two/u/v/w/x/y/z/zero) -/FontFile 1984 0 R +/CharSet (/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/R/S/T/U/V/W/X/Y/Z/a/ampersand/asterisk/b/bracketleft/bracketright/bullet/c/colon/comma/d/e/eight/emdash/endash/equal/f/fi/five/fl/four/g/grave/h/hyphen/i/j/k/l/m/n/nine/o/one/p/parenleft/parenright/period/plus/q/quotedblleft/quotedblright/quoteright/r/s/section/semicolon/seven/six/slash/t/three/two/u/v/w/x/y/z/zero) +/FontFile 2231 0 R >> -% 1987 0 obj +% 2234 0 obj << /Type /FontDescriptor -/FontName /RUEFYH+URWPalladioL-Ital +/FontName /LHHPET+URWPalladioL-Ital /Flags 4 /FontBBox [-170 -305 1010 941] /Ascent 722 @@ -28204,419 +34609,614 @@ stream /ItalicAngle -9 /StemV 78 /XHeight 482 -/CharSet (/A/B/C/D/E/F/G/H/I/L/M/N/O/P/Q/R/S/T/U/V/X/a/b/c/colon/d/e/f/fi/five/g/h/hyphen/i/j/k/l/m/n/nine/o/one/p/period/q/quoteright/r/s/slash/t/three/two/u/v/w/x/y/z/zero) -/FontFile 1986 0 R +/CharSet (/A/B/C/D/E/F/G/H/I/K/L/M/N/O/P/Q/R/S/T/U/V/X/a/b/bracketleft/bracketright/c/colon/comma/d/e/f/fi/five/g/h/hyphen/i/j/k/l/m/n/nine/o/one/p/period/plus/q/quoteright/r/s/slash/t/three/two/u/v/w/x/y/z/zero) +/FontFile 2233 0 R >> -% 1958 0 obj +% 2205 0 obj << /Type /Encoding -/Differences [2/fi/fl 38/ampersand/quoteright/parenleft/parenright 44/comma/hyphen/period/slash/zero/one/two/three/four/five/six/seven/eight/nine/colon/semicolon 61/equal 65/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/X 90/Z/bracketleft 93/bracketright 97/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z 147/quotedblleft/quotedblright/bullet/endash/emdash 167/section] +/Differences [2/fi/fl 30/grave 38/ampersand/quoteright/parenleft/parenright/asterisk/plus/comma/hyphen/period/slash/zero/one/two/three/four/five/six/seven/eight/nine/colon/semicolon 61/equal 63/question 65/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/X/Y/Z/bracketleft 93/bracketright 97/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z 147/quotedblleft/quotedblright/bullet/endash/emdash 167/section] >> -% 1750 0 obj +% 1851 0 obj << /Type /Font /Subtype /Type1 /BaseFont /MNPEHI+CMEX10 -/FontDescriptor 1963 0 R +/FontDescriptor 2210 0 R /FirstChar 114 /LastChar 114 -/Widths 1948 0 R +/Widths 2195 0 R +/ToUnicode 2235 0 R >> -% 1682 0 obj +% 1782 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /MPVPBL+CMITT10 -/FontDescriptor 1965 0 R +/BaseFont /SFGIZH+CMITT10 +/FontDescriptor 2212 0 R /FirstChar 33 -/LastChar 116 -/Widths 1949 0 R +/LastChar 121 +/Widths 2196 0 R +/ToUnicode 2236 0 R >> -% 1360 0 obj +% 1460 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /SYFPBV+CMMI10 -/FontDescriptor 1967 0 R -/FirstChar 60 +/BaseFont /TPELEW+CMMI10 +/FontDescriptor 2214 0 R +/FirstChar 44 /LastChar 62 -/Widths 1950 0 R +/Widths 2197 0 R +/ToUnicode 2237 0 R >> -% 814 0 obj +% 915 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /GIGFZE+CMR10 -/FontDescriptor 1969 0 R +/BaseFont /SOSTRQ+CMR10 +/FontDescriptor 2216 0 R /FirstChar 40 -/LastChar 61 -/Widths 1955 0 R +/LastChar 93 +/Widths 2202 0 R +/ToUnicode 2238 0 R >> -% 813 0 obj +% 914 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /DMJGRR+CMSY10 -/FontDescriptor 1971 0 R +/BaseFont /VKSUEJ+CMSY10 +/FontDescriptor 2218 0 R /FirstChar 0 /LastChar 112 -/Widths 1956 0 R +/Widths 2203 0 R +/ToUnicode 2239 0 R >> -% 812 0 obj +% 913 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /UFPYIQ+CMTT10 -/FontDescriptor 1973 0 R +/BaseFont /QGKXNM+CMTT10 +/FontDescriptor 2220 0 R /FirstChar 13 /LastChar 126 -/Widths 1957 0 R +/Widths 2204 0 R +/ToUnicode 2240 0 R >> -% 870 0 obj +% 970 0 obj << /Type /Font /Subtype /Type1 /BaseFont /HZGQIC+CMTT8 -/FontDescriptor 1975 0 R +/FontDescriptor 2222 0 R /FirstChar 98 /LastChar 116 -/Widths 1954 0 R +/Widths 2201 0 R +/ToUnicode 2241 0 R >> -% 915 0 obj +% 1016 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /BQXTWV+CMTT9 -/FontDescriptor 1977 0 R -/FirstChar 38 +/BaseFont /RQJPKO+CMTT9 +/FontDescriptor 2224 0 R +/FirstChar 13 /LastChar 122 -/Widths 1953 0 R +/Widths 2200 0 R +/ToUnicode 2242 0 R >> - -endstream -endobj -1988 0 obj -<< -/Type /ObjStm -/N 100 -/First 925 -/Length 11101 ->> -stream -1154 0 1027 145 584 295 586 465 585 635 587 805 780 918 871 1031 927 1144 962 1257 -991 1370 1034 1487 1079 1607 1121 1727 1180 1847 1232 1967 1281 2087 1330 2207 1371 2327 1409 2447 -1456 2567 1500 2687 1537 2807 1574 2927 1612 3047 1653 3167 1683 3287 1717 3407 1754 3527 1789 3647 -1828 3767 1865 3887 1902 4007 1942 4127 1989 4211 1990 4326 1991 4447 1992 4568 1993 4689 1994 4801 -1995 4897 574 4966 570 5026 566 5137 562 5211 558 5299 554 5387 550 5475 546 5563 542 5637 -538 5762 534 5836 530 5924 526 6012 522 6100 518 6188 514 6262 510 6387 506 6461 502 6549 -498 6637 494 6711 490 6836 486 6910 482 6998 478 7086 474 7174 470 7262 466 7350 462 7438 -458 7526 454 7614 450 7702 446 7790 442 7878 438 7966 434 8054 430 8142 426 8230 422 8304 -418 8430 414 8504 410 8592 406 8680 401 8768 397 8856 393 8944 389 9032 385 9120 381 9208 -377 9296 373 9384 369 9472 365 9560 361 9648 357 9736 353 9824 349 9912 345 10000 341 10088 -% 1154 0 obj +% 1254 0 obj << /Type /Font /Subtype /Type1 /BaseFont /IKXQUG+PazoMath -/FontDescriptor 1979 0 R +/FontDescriptor 2226 0 R /FirstChar 165 /LastChar 229 -/Widths 1951 0 R +/Widths 2198 0 R +/ToUnicode 2243 0 R >> -% 1027 0 obj +% 1127 0 obj << /Type /Font /Subtype /Type1 /BaseFont /DUJUUF+PazoMath-Italic -/FontDescriptor 1981 0 R +/FontDescriptor 2228 0 R /FirstChar 97 /LastChar 98 -/Widths 1952 0 R +/Widths 2199 0 R +/ToUnicode 2244 0 R >> -% 584 0 obj +% 665 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /TVMKYN+URWPalladioL-Bold -/FontDescriptor 1983 0 R +/BaseFont /BDDEWM+URWPalladioL-Bold +/FontDescriptor 2230 0 R /FirstChar 2 /LastChar 151 -/Widths 1961 0 R -/Encoding 1958 0 R +/Widths 2208 0 R +/Encoding 2205 0 R +/ToUnicode 2245 0 R >> -% 586 0 obj +% 667 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /TCRNJT+URWPalladioL-Roma -/FontDescriptor 1985 0 R +/BaseFont /GLTUCO+URWPalladioL-Roma +/FontDescriptor 2232 0 R /FirstChar 2 /LastChar 167 -/Widths 1959 0 R -/Encoding 1958 0 R +/Widths 2206 0 R +/Encoding 2205 0 R +/ToUnicode 2246 0 R >> -% 585 0 obj +% 666 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /RUEFYH+URWPalladioL-Ital -/FontDescriptor 1987 0 R +/BaseFont /LHHPET+URWPalladioL-Ital +/FontDescriptor 2234 0 R /FirstChar 2 /LastChar 122 -/Widths 1960 0 R -/Encoding 1958 0 R +/Widths 2207 0 R +/Encoding 2205 0 R +/ToUnicode 2247 0 R >> -% 587 0 obj +% 668 0 obj << /Type /Pages /Count 6 -/Parent 1989 0 R -/Kids [577 0 R 590 0 R 636 0 R 694 0 R 740 0 R 761 0 R] +/Parent 2248 0 R +/Kids [658 0 R 671 0 R 717 0 R 774 0 R 821 0 R 861 0 R] >> -% 780 0 obj +% 881 0 obj << /Type /Pages /Count 6 -/Parent 1989 0 R -/Kids [778 0 R 797 0 R 809 0 R 822 0 R 835 0 R 840 0 R] +/Parent 2248 0 R +/Kids [879 0 R 898 0 R 910 0 R 923 0 R 935 0 R 940 0 R] >> -% 871 0 obj +% 971 0 obj << /Type /Pages /Count 6 -/Parent 1989 0 R -/Kids [853 0 R 874 0 R 885 0 R 893 0 R 904 0 R 920 0 R] +/Parent 2248 0 R +/Kids [953 0 R 975 0 R 986 0 R 994 0 R 1005 0 R 1021 0 R] >> -% 927 0 obj +% 1028 0 obj << /Type /Pages /Count 6 -/Parent 1989 0 R -/Kids [924 0 R 930 0 R 935 0 R 942 0 R 949 0 R 954 0 R] +/Parent 2248 0 R +/Kids [1025 0 R 1030 0 R 1035 0 R 1042 0 R 1049 0 R 1054 0 R] >> -% 962 0 obj +% 1062 0 obj << /Type /Pages /Count 6 -/Parent 1989 0 R -/Kids [959 0 R 964 0 R 968 0 R 972 0 R 976 0 R 982 0 R] +/Parent 2248 0 R +/Kids [1059 0 R 1065 0 R 1069 0 R 1073 0 R 1077 0 R 1083 0 R] >> -% 991 0 obj +% 1092 0 obj << /Type /Pages /Count 6 -/Parent 1989 0 R -/Kids [988 0 R 995 0 R 1002 0 R 1008 0 R 1013 0 R 1024 0 R] +/Parent 2248 0 R +/Kids [1089 0 R 1096 0 R 1103 0 R 1109 0 R 1113 0 R 1124 0 R] >> -% 1034 0 obj +% 1134 0 obj << /Type /Pages /Count 6 -/Parent 1990 0 R -/Kids [1031 0 R 1041 0 R 1047 0 R 1058 0 R 1063 0 R 1070 0 R] +/Parent 2249 0 R +/Kids [1131 0 R 1141 0 R 1147 0 R 1158 0 R 1164 0 R 1171 0 R] >> -% 1079 0 obj +% 1180 0 obj +<< +/Type /Pages +/Count 6 +/Parent 2249 0 R +/Kids [1176 0 R 1185 0 R 1193 0 R 1198 0 R 1206 0 R 1211 0 R] +>> + +endstream +endobj +2250 0 obj +<< +/Type /ObjStm +/N 100 +/First 920 +/Length 10604 +>> +stream +1222 0 1281 120 1333 240 1382 360 1431 480 1471 600 1510 720 1557 840 1600 960 1638 1080 +1674 1200 1713 1320 1754 1440 1783 1560 1817 1680 1855 1800 1890 1920 1928 2040 1966 2160 2002 2280 +2057 2400 2158 2520 2189 2640 2248 2724 2249 2842 2251 2963 2252 3084 2253 3205 2254 3326 2255 3401 +2256 3506 655 3575 651 3649 647 3737 643 3825 639 3913 635 4001 631 4089 627 4177 623 4265 +619 4353 615 4441 611 4529 607 4617 602 4705 598 4779 594 4891 590 4965 586 5053 582 5141 +578 5215 574 5340 570 5400 566 5525 562 5599 558 5687 554 5775 550 5863 546 5951 542 6025 +538 6150 534 6224 530 6312 526 6400 522 6488 518 6576 514 6650 510 6775 506 6849 502 6937 +498 7025 494 7099 490 7224 486 7298 482 7386 478 7474 474 7562 470 7650 466 7738 462 7826 +458 7914 454 8002 450 8090 446 8178 442 8266 438 8354 434 8442 430 8530 426 8618 422 8692 +418 8818 414 8892 410 8980 406 9068 401 9156 397 9244 393 9332 389 9420 385 9508 381 9596 +% 1222 0 obj +<< +/Type /Pages +/Count 6 +/Parent 2249 0 R +/Kids [1218 0 R 1224 0 R 1237 0 R 1244 0 R 1251 0 R 1262 0 R] +>> +% 1281 0 obj +<< +/Type /Pages +/Count 6 +/Parent 2249 0 R +/Kids [1278 0 R 1285 0 R 1296 0 R 1302 0 R 1313 0 R 1318 0 R] +>> +% 1333 0 obj << /Type /Pages /Count 6 -/Parent 1990 0 R -/Kids [1075 0 R 1084 0 R 1092 0 R 1097 0 R 1105 0 R 1110 0 R] +/Parent 2249 0 R +/Kids [1329 0 R 1335 0 R 1344 0 R 1350 0 R 1358 0 R 1365 0 R] >> -% 1121 0 obj +% 1382 0 obj << /Type /Pages /Count 6 -/Parent 1990 0 R -/Kids [1117 0 R 1124 0 R 1137 0 R 1144 0 R 1151 0 R 1162 0 R] +/Parent 2249 0 R +/Kids [1379 0 R 1387 0 R 1396 0 R 1404 0 R 1408 0 R 1423 0 R] >> -% 1180 0 obj +% 1431 0 obj << /Type /Pages /Count 6 -/Parent 1990 0 R -/Kids [1177 0 R 1184 0 R 1195 0 R 1201 0 R 1212 0 R 1217 0 R] +/Parent 2251 0 R +/Kids [1428 0 R 1435 0 R 1442 0 R 1446 0 R 1451 0 R 1457 0 R] >> -% 1232 0 obj +% 1471 0 obj << /Type /Pages /Count 6 -/Parent 1990 0 R -/Kids [1228 0 R 1235 0 R 1244 0 R 1250 0 R 1258 0 R 1265 0 R] +/Parent 2251 0 R +/Kids [1463 0 R 1474 0 R 1479 0 R 1488 0 R 1496 0 R 1501 0 R] >> -% 1281 0 obj +% 1510 0 obj << /Type /Pages /Count 6 -/Parent 1990 0 R -/Kids [1278 0 R 1286 0 R 1295 0 R 1303 0 R 1307 0 R 1322 0 R] +/Parent 2251 0 R +/Kids [1507 0 R 1512 0 R 1520 0 R 1525 0 R 1533 0 R 1539 0 R] >> -% 1330 0 obj +% 1557 0 obj << /Type /Pages /Count 6 -/Parent 1991 0 R -/Kids [1327 0 R 1334 0 R 1341 0 R 1345 0 R 1351 0 R 1357 0 R] +/Parent 2251 0 R +/Kids [1548 0 R 1562 0 R 1566 0 R 1579 0 R 1585 0 R 1592 0 R] >> -% 1371 0 obj +% 1600 0 obj << /Type /Pages /Count 6 -/Parent 1991 0 R -/Kids [1363 0 R 1374 0 R 1379 0 R 1388 0 R 1395 0 R 1400 0 R] +/Parent 2251 0 R +/Kids [1596 0 R 1604 0 R 1609 0 R 1618 0 R 1626 0 R 1630 0 R] >> -% 1409 0 obj +% 1638 0 obj << /Type /Pages /Count 6 -/Parent 1991 0 R -/Kids [1406 0 R 1411 0 R 1419 0 R 1424 0 R 1432 0 R 1438 0 R] +/Parent 2251 0 R +/Kids [1635 0 R 1640 0 R 1647 0 R 1652 0 R 1658 0 R 1664 0 R] >> -% 1456 0 obj +% 1674 0 obj << /Type /Pages /Count 6 -/Parent 1991 0 R -/Kids [1447 0 R 1461 0 R 1466 0 R 1479 0 R 1485 0 R 1492 0 R] +/Parent 2252 0 R +/Kids [1670 0 R 1677 0 R 1684 0 R 1691 0 R 1695 0 R 1705 0 R] >> -% 1500 0 obj +% 1713 0 obj << /Type /Pages /Count 6 -/Parent 1991 0 R -/Kids [1496 0 R 1504 0 R 1508 0 R 1517 0 R 1525 0 R 1529 0 R] +/Parent 2252 0 R +/Kids [1710 0 R 1715 0 R 1728 0 R 1732 0 R 1738 0 R 1744 0 R] >> -% 1537 0 obj +% 1754 0 obj << /Type /Pages /Count 6 -/Parent 1991 0 R -/Kids [1534 0 R 1539 0 R 1546 0 R 1551 0 R 1557 0 R 1563 0 R] +/Parent 2252 0 R +/Kids [1751 0 R 1756 0 R 1760 0 R 1764 0 R 1768 0 R 1772 0 R] >> -% 1574 0 obj +% 1783 0 obj << /Type /Pages /Count 6 -/Parent 1992 0 R -/Kids [1570 0 R 1577 0 R 1584 0 R 1591 0 R 1595 0 R 1605 0 R] +/Parent 2252 0 R +/Kids [1776 0 R 1785 0 R 1789 0 R 1796 0 R 1800 0 R 1807 0 R] >> -% 1612 0 obj +% 1817 0 obj << /Type /Pages /Count 6 -/Parent 1992 0 R -/Kids [1609 0 R 1614 0 R 1627 0 R 1631 0 R 1637 0 R 1643 0 R] +/Parent 2252 0 R +/Kids [1811 0 R 1819 0 R 1824 0 R 1831 0 R 1835 0 R 1842 0 R] >> -% 1653 0 obj +% 1855 0 obj << /Type /Pages /Count 6 -/Parent 1992 0 R -/Kids [1650 0 R 1655 0 R 1659 0 R 1663 0 R 1667 0 R 1671 0 R] +/Parent 2252 0 R +/Kids [1846 0 R 1857 0 R 1862 0 R 1869 0 R 1875 0 R 1879 0 R] >> -% 1683 0 obj +% 1890 0 obj << /Type /Pages /Count 6 -/Parent 1992 0 R -/Kids [1676 0 R 1685 0 R 1689 0 R 1696 0 R 1700 0 R 1707 0 R] +/Parent 2253 0 R +/Kids [1885 0 R 1892 0 R 1898 0 R 1904 0 R 1909 0 R 1916 0 R] >> -% 1717 0 obj +% 1928 0 obj << /Type /Pages /Count 6 -/Parent 1992 0 R -/Kids [1711 0 R 1719 0 R 1723 0 R 1730 0 R 1734 0 R 1741 0 R] +/Parent 2253 0 R +/Kids [1923 0 R 1932 0 R 1939 0 R 1946 0 R 1952 0 R 1956 0 R] >> -% 1754 0 obj +% 1966 0 obj << /Type /Pages /Count 6 -/Parent 1992 0 R -/Kids [1745 0 R 1756 0 R 1761 0 R 1768 0 R 1774 0 R 1778 0 R] +/Parent 2253 0 R +/Kids [1962 0 R 1972 0 R 1976 0 R 1984 0 R 1989 0 R 1993 0 R] >> -% 1789 0 obj +% 2002 0 obj << /Type /Pages /Count 6 -/Parent 1993 0 R -/Kids [1784 0 R 1792 0 R 1798 0 R 1804 0 R 1809 0 R 1816 0 R] +/Parent 2253 0 R +/Kids [1999 0 R 2004 0 R 2011 0 R 2022 0 R 2027 0 R 2037 0 R] >> -% 1828 0 obj +% 2057 0 obj << /Type /Pages /Count 6 -/Parent 1993 0 R -/Kids [1823 0 R 1831 0 R 1838 0 R 1845 0 R 1851 0 R 1855 0 R] +/Parent 2253 0 R +/Kids [2052 0 R 2062 0 R 2073 0 R 2101 0 R 2121 0 R 2139 0 R] >> -% 1865 0 obj +% 2158 0 obj << /Type /Pages /Count 6 -/Parent 1993 0 R -/Kids [1861 0 R 1871 0 R 1875 0 R 1883 0 R 1888 0 R 1893 0 R] +/Parent 2253 0 R +/Kids [2155 0 R 2160 0 R 2164 0 R 2170 0 R 2174 0 R 2178 0 R] >> -% 1902 0 obj +% 2189 0 obj << /Type /Pages -/Count 6 -/Parent 1993 0 R -/Kids [1899 0 R 1904 0 R 1911 0 R 1922 0 R 1927 0 R 1931 0 R] +/Count 2 +/Parent 2254 0 R +/Kids [2182 0 R 2191 0 R] >> -% 1942 0 obj +% 2248 0 obj << /Type /Pages -/Count 2 -/Parent 1993 0 R -/Kids [1935 0 R 1944 0 R] +/Count 36 +/Parent 2255 0 R +/Kids [668 0 R 881 0 R 971 0 R 1028 0 R 1062 0 R 1092 0 R] >> -% 1989 0 obj +% 2249 0 obj << /Type /Pages /Count 36 -/Parent 1994 0 R -/Kids [587 0 R 780 0 R 871 0 R 927 0 R 962 0 R 991 0 R] +/Parent 2255 0 R +/Kids [1134 0 R 1180 0 R 1222 0 R 1281 0 R 1333 0 R 1382 0 R] >> -% 1990 0 obj +% 2251 0 obj << /Type /Pages /Count 36 -/Parent 1994 0 R -/Kids [1034 0 R 1079 0 R 1121 0 R 1180 0 R 1232 0 R 1281 0 R] +/Parent 2255 0 R +/Kids [1431 0 R 1471 0 R 1510 0 R 1557 0 R 1600 0 R 1638 0 R] >> -% 1991 0 obj +% 2252 0 obj << /Type /Pages /Count 36 -/Parent 1994 0 R -/Kids [1330 0 R 1371 0 R 1409 0 R 1456 0 R 1500 0 R 1537 0 R] +/Parent 2255 0 R +/Kids [1674 0 R 1713 0 R 1754 0 R 1783 0 R 1817 0 R 1855 0 R] >> -% 1992 0 obj +% 2253 0 obj << /Type /Pages /Count 36 -/Parent 1994 0 R -/Kids [1574 0 R 1612 0 R 1653 0 R 1683 0 R 1717 0 R 1754 0 R] +/Parent 2255 0 R +/Kids [1890 0 R 1928 0 R 1966 0 R 2002 0 R 2057 0 R 2158 0 R] >> -% 1993 0 obj +% 2254 0 obj << /Type /Pages -/Count 26 -/Parent 1994 0 R -/Kids [1789 0 R 1828 0 R 1865 0 R 1902 0 R 1942 0 R] +/Count 2 +/Parent 2255 0 R +/Kids [2189 0 R] >> -% 1994 0 obj +% 2255 0 obj << /Type /Pages -/Count 170 -/Kids [1989 0 R 1990 0 R 1991 0 R 1992 0 R 1993 0 R] +/Count 182 +/Kids [2248 0 R 2249 0 R 2251 0 R 2252 0 R 2253 0 R 2254 0 R] >> -% 1995 0 obj +% 2256 0 obj << /Type /Outlines /First 4 0 R /Last 4 0 R /Count 1 >> +% 655 0 obj +<< +/Title 656 0 R +/A 653 0 R +/Parent 598 0 R +/Prev 651 0 R +>> +% 651 0 obj +<< +/Title 652 0 R +/A 649 0 R +/Parent 598 0 R +/Prev 647 0 R +/Next 655 0 R +>> +% 647 0 obj +<< +/Title 648 0 R +/A 645 0 R +/Parent 598 0 R +/Prev 643 0 R +/Next 651 0 R +>> +% 643 0 obj +<< +/Title 644 0 R +/A 641 0 R +/Parent 598 0 R +/Prev 639 0 R +/Next 647 0 R +>> +% 639 0 obj +<< +/Title 640 0 R +/A 637 0 R +/Parent 598 0 R +/Prev 635 0 R +/Next 643 0 R +>> +% 635 0 obj +<< +/Title 636 0 R +/A 633 0 R +/Parent 598 0 R +/Prev 631 0 R +/Next 639 0 R +>> +% 631 0 obj +<< +/Title 632 0 R +/A 629 0 R +/Parent 598 0 R +/Prev 627 0 R +/Next 635 0 R +>> +% 627 0 obj +<< +/Title 628 0 R +/A 625 0 R +/Parent 598 0 R +/Prev 623 0 R +/Next 631 0 R +>> +% 623 0 obj +<< +/Title 624 0 R +/A 621 0 R +/Parent 598 0 R +/Prev 619 0 R +/Next 627 0 R +>> +% 619 0 obj +<< +/Title 620 0 R +/A 617 0 R +/Parent 598 0 R +/Prev 615 0 R +/Next 623 0 R +>> +% 615 0 obj +<< +/Title 616 0 R +/A 613 0 R +/Parent 598 0 R +/Prev 611 0 R +/Next 619 0 R +>> +% 611 0 obj +<< +/Title 612 0 R +/A 609 0 R +/Parent 598 0 R +/Prev 607 0 R +/Next 615 0 R +>> +% 607 0 obj +<< +/Title 608 0 R +/A 604 0 R +/Parent 598 0 R +/Prev 602 0 R +/Next 611 0 R +>> +% 602 0 obj +<< +/Title 603 0 R +/A 600 0 R +/Parent 598 0 R +/Next 607 0 R +>> +% 598 0 obj +<< +/Title 599 0 R +/A 596 0 R +/Parent 4 0 R +/Prev 578 0 R +/First 602 0 R +/Last 655 0 R +/Count -14 +>> +% 594 0 obj +<< +/Title 595 0 R +/A 592 0 R +/Parent 578 0 R +/Prev 590 0 R +>> +% 590 0 obj +<< +/Title 591 0 R +/A 588 0 R +/Parent 578 0 R +/Prev 586 0 R +/Next 594 0 R +>> +% 586 0 obj +<< +/Title 587 0 R +/A 584 0 R +/Parent 578 0 R +/Prev 582 0 R +/Next 590 0 R +>> +% 582 0 obj +<< +/Title 583 0 R +/A 580 0 R +/Parent 578 0 R +/Next 586 0 R +>> +% 578 0 obj +<< +/Title 579 0 R +/A 576 0 R +/Parent 4 0 R +/Prev 570 0 R +/Next 598 0 R +/First 582 0 R +/Last 594 0 R +/Count -4 +>> % 574 0 obj << /Title 575 0 R @@ -28629,6 +35229,7 @@ stream /A 568 0 R /Parent 4 0 R /Prev 542 0 R +/Next 578 0 R /First 574 0 R /Last 574 0 R /Count -1 @@ -29012,6 +35613,27 @@ stream /Prev 377 0 R /Next 385 0 R >> + +endstream +endobj +2257 0 obj +<< +/Type /ObjStm +/N 100 +/First 865 +/Length 10172 +>> +stream +377 0 373 88 369 176 365 264 361 352 357 440 353 528 349 616 345 704 341 792 +337 880 333 968 329 1056 325 1144 321 1232 317 1320 313 1408 309 1482 305 1608 301 1682 +297 1770 293 1858 289 1932 285 2057 281 2131 277 2219 273 2307 269 2395 265 2483 261 2571 +257 2659 253 2747 249 2835 245 2923 241 3011 237 3099 233 3187 229 3275 225 3363 221 3437 +217 3562 213 3635 209 3722 205 3796 200 3884 196 3972 192 4060 188 4148 184 4222 180 4348 +176 4422 172 4510 168 4598 164 4686 160 4774 156 4862 152 4950 148 5038 144 5126 140 5214 +136 5302 132 5390 128 5478 124 5566 120 5654 116 5742 112 5830 108 5918 104 5992 100 6118 +96 6189 92 6272 88 6354 84 6436 80 6518 76 6600 72 6682 68 6764 64 6846 60 6928 +56 7010 52 7092 48 7174 44 7256 40 7325 36 7434 32 7554 28 7623 24 7679 20 7798 +16 7880 12 7949 8 8066 4 8131 2258 8224 2259 8420 2260 8593 2261 8773 2262 8950 2263 9127 % 377 0 obj << /Title 378 0 R @@ -29092,27 +35714,6 @@ stream /Prev 337 0 R /Next 345 0 R >> - -endstream -endobj -1996 0 obj -<< -/Type /ObjStm -/N 100 -/First 875 -/Length 11008 ->> -stream -337 0 333 88 329 176 325 264 321 352 317 440 313 528 309 602 305 728 301 802 -297 890 293 978 289 1052 285 1177 281 1251 277 1339 273 1427 269 1515 265 1603 261 1691 -257 1779 253 1867 249 1955 245 2043 241 2131 237 2219 233 2307 229 2395 225 2483 221 2557 -217 2682 213 2755 209 2842 205 2916 200 3004 196 3092 192 3180 188 3268 184 3342 180 3468 -176 3542 172 3630 168 3718 164 3806 160 3894 156 3982 152 4070 148 4158 144 4246 140 4334 -136 4422 132 4510 128 4598 124 4686 120 4774 116 4862 112 4950 108 5038 104 5112 100 5238 -96 5309 92 5392 88 5474 84 5556 80 5638 76 5720 72 5802 68 5884 64 5966 60 6048 -56 6130 52 6212 48 6294 44 6376 40 6445 36 6554 32 6674 28 6743 24 6799 20 6918 -16 7000 12 7069 8 7186 4 7251 1997 7344 1998 7540 1999 7713 2000 7893 2001 8070 2002 8247 -2003 8427 2004 8605 2005 8785 2006 8963 2007 9134 2008 9299 2009 9465 2010 9629 2011 9793 2012 9963 % 337 0 obj << /Title 338 0 R @@ -29790,652 +36391,724 @@ stream << /Title 5 0 R /A 1 0 R -/Parent 1995 0 R +/Parent 2256 0 R /First 8 0 R -/Last 570 0 R -/Count -11 +/Last 598 0 R +/Count -13 >> -% 1997 0 obj +% 2258 0 obj << -/Names [(Doc-Start) 583 0 R (Hfootnote.1) 815 0 R (Hfootnote.2) 816 0 R (Hfootnote.3) 869 0 R (Hfootnote.4) 1864 0 R (Hfootnote.5) 1917 0 R] +/Names [(Doc-Start) 664 0 R (Hfootnote.1) 916 0 R (Hfootnote.2) 917 0 R (Hfootnote.3) 969 0 R (Hfootnote.4) 1965 0 R (Hfootnote.5) 2017 0 R] /Limits [(Doc-Start) (Hfootnote.5)] >> -% 1998 0 obj +% 2259 0 obj << -/Names [(Item.1) 843 0 R (Item.10) 857 0 R (Item.100) 1598 0 R (Item.101) 1599 0 R (Item.102) 1600 0 R (Item.103) 1617 0 R] +/Names [(Item.1) 943 0 R (Item.10) 957 0 R (Item.100) 1698 0 R (Item.101) 1699 0 R (Item.102) 1700 0 R (Item.103) 1718 0 R] /Limits [(Item.1) (Item.103)] >> -% 1999 0 obj +% 2260 0 obj << -/Names [(Item.104) 1618 0 R (Item.105) 1619 0 R (Item.106) 1620 0 R (Item.107) 1621 0 R (Item.108) 1622 0 R (Item.109) 1623 0 R] +/Names [(Item.104) 1719 0 R (Item.105) 1720 0 R (Item.106) 1721 0 R (Item.107) 1722 0 R (Item.108) 1723 0 R (Item.109) 1724 0 R] /Limits [(Item.104) (Item.109)] >> -% 2000 0 obj +% 2261 0 obj << -/Names [(Item.11) 858 0 R (Item.110) 1624 0 R (Item.111) 1625 0 R (Item.112) 1634 0 R (Item.113) 1635 0 R (Item.114) 1640 0 R] +/Names [(Item.11) 958 0 R (Item.110) 1725 0 R (Item.111) 1726 0 R (Item.112) 1735 0 R (Item.113) 1736 0 R (Item.114) 1741 0 R] /Limits [(Item.11) (Item.114)] >> -% 2001 0 obj +% 2262 0 obj << -/Names [(Item.115) 1641 0 R (Item.116) 1646 0 R (Item.117) 1647 0 R (Item.118) 1648 0 R (Item.119) 1679 0 R (Item.12) 859 0 R] +/Names [(Item.115) 1742 0 R (Item.116) 1747 0 R (Item.117) 1748 0 R (Item.118) 1749 0 R (Item.119) 1779 0 R (Item.12) 959 0 R] /Limits [(Item.115) (Item.12)] >> -% 2002 0 obj +% 2263 0 obj << -/Names [(Item.120) 1680 0 R (Item.121) 1681 0 R (Item.122) 1692 0 R (Item.123) 1693 0 R (Item.124) 1694 0 R (Item.125) 1703 0 R] +/Names [(Item.120) 1780 0 R (Item.121) 1781 0 R (Item.122) 1792 0 R (Item.123) 1793 0 R (Item.124) 1794 0 R (Item.125) 1803 0 R] /Limits [(Item.120) (Item.125)] >> -% 2003 0 obj + +endstream +endobj +2265 0 obj << -/Names [(Item.126) 1704 0 R (Item.127) 1705 0 R (Item.128) 1714 0 R (Item.129) 1715 0 R (Item.13) 860 0 R (Item.130) 1716 0 R] +/Type /ObjStm +/N 100 +/First 1037 +/Length 20119 +>> +stream +2264 0 2266 178 2267 358 2268 536 2269 713 2270 878 2271 1044 2272 1210 2273 1382 2274 1552 +2275 1724 2276 1894 2277 2066 2278 2235 2279 2404 2280 2576 2281 2746 2282 2918 2283 3088 2284 3260 +2285 3449 2286 3639 2287 3862 2288 4083 2289 4288 2290 4478 2291 4660 2292 4856 2293 5081 2294 5301 +2295 5532 2296 5768 2297 6000 2298 6211 2299 6388 2300 6566 2301 6746 2302 6925 2303 7105 2304 7284 +2305 7464 2306 7643 2307 7823 2308 8002 2309 8182 2310 8360 2311 8538 2312 8718 2313 8890 2314 9062 +2315 9232 2316 9404 2317 9574 2318 9746 2319 9915 2320 10084 2321 10256 2322 10426 2323 10598 2324 10768 +2325 10940 2326 11110 2327 11282 2328 11452 2329 11648 2330 11845 2331 12041 2332 12228 2333 12410 2334 12604 +2335 12834 2336 13059 2337 13278 2338 13505 2339 13731 2340 13953 2341 14178 2342 14408 2343 14637 2344 14865 +2345 15087 2346 15315 2347 15541 2348 15763 2349 15985 2350 16226 2351 16488 2352 16744 2353 17010 2354 17280 +2355 17542 2356 17804 2357 18014 2358 18194 2359 18370 2360 18539 2361 18635 2362 18749 2363 18861 2364 18972 +% 2264 0 obj +<< +/Names [(Item.126) 1804 0 R (Item.127) 1805 0 R (Item.128) 1814 0 R (Item.129) 1815 0 R (Item.13) 960 0 R (Item.130) 1816 0 R] /Limits [(Item.126) (Item.130)] >> -% 2004 0 obj +% 2266 0 obj << -/Names [(Item.131) 1726 0 R (Item.132) 1727 0 R (Item.133) 1728 0 R (Item.134) 1737 0 R (Item.135) 1738 0 R (Item.136) 1739 0 R] +/Names [(Item.131) 1827 0 R (Item.132) 1828 0 R (Item.133) 1829 0 R (Item.134) 1838 0 R (Item.135) 1839 0 R (Item.136) 1840 0 R] /Limits [(Item.131) (Item.136)] >> -% 2005 0 obj +% 2267 0 obj << -/Names [(Item.137) 1748 0 R (Item.138) 1749 0 R (Item.139) 1751 0 R (Item.14) 861 0 R (Item.140) 1752 0 R (Item.141) 1753 0 R] +/Names [(Item.137) 1849 0 R (Item.138) 1850 0 R (Item.139) 1852 0 R (Item.14) 961 0 R (Item.140) 1853 0 R (Item.141) 1854 0 R] /Limits [(Item.137) (Item.141)] >> -% 2006 0 obj +% 2268 0 obj << -/Names [(Item.142) 1759 0 R (Item.143) 1764 0 R (Item.15) 862 0 R (Item.16) 863 0 R (Item.17) 864 0 R (Item.18) 865 0 R] -/Limits [(Item.142) (Item.18)] +/Names [(Item.142) 1860 0 R (Item.143) 1865 0 R (Item.144) 2066 0 R (Item.145) 2067 0 R (Item.146) 2167 0 R (Item.15) 962 0 R] +/Limits [(Item.142) (Item.15)] >> -% 2007 0 obj +% 2269 0 obj << -/Names [(Item.19) 866 0 R (Item.2) 844 0 R (Item.20) 867 0 R (Item.21) 868 0 R (Item.22) 877 0 R (Item.23) 878 0 R] -/Limits [(Item.19) (Item.23)] +/Names [(Item.16) 963 0 R (Item.17) 964 0 R (Item.18) 965 0 R (Item.19) 966 0 R (Item.2) 944 0 R (Item.20) 967 0 R] +/Limits [(Item.16) (Item.20)] >> -% 2008 0 obj +% 2270 0 obj << -/Names [(Item.24) 879 0 R (Item.25) 880 0 R (Item.26) 881 0 R (Item.27) 882 0 R (Item.28) 896 0 R (Item.29) 897 0 R] -/Limits [(Item.24) (Item.29)] +/Names [(Item.21) 968 0 R (Item.22) 978 0 R (Item.23) 979 0 R (Item.24) 980 0 R (Item.25) 981 0 R (Item.26) 982 0 R] +/Limits [(Item.21) (Item.26)] >> -% 2009 0 obj +% 2271 0 obj << -/Names [(Item.3) 845 0 R (Item.30) 898 0 R (Item.31) 899 0 R (Item.32) 900 0 R (Item.33) 907 0 R (Item.34) 908 0 R] -/Limits [(Item.3) (Item.34)] +/Names [(Item.27) 983 0 R (Item.28) 997 0 R (Item.29) 998 0 R (Item.3) 945 0 R (Item.30) 999 0 R (Item.31) 1000 0 R] +/Limits [(Item.27) (Item.31)] >> -% 2010 0 obj +% 2272 0 obj << -/Names [(Item.35) 909 0 R (Item.36) 910 0 R (Item.37) 911 0 R (Item.38) 912 0 R (Item.39) 913 0 R (Item.4) 846 0 R] -/Limits [(Item.35) (Item.4)] +/Names [(Item.32) 1001 0 R (Item.33) 1008 0 R (Item.34) 1009 0 R (Item.35) 1010 0 R (Item.36) 1011 0 R (Item.37) 1012 0 R] +/Limits [(Item.32) (Item.37)] >> -% 2011 0 obj +% 2273 0 obj << -/Names [(Item.40) 914 0 R (Item.41) 957 0 R (Item.42) 1050 0 R (Item.43) 1078 0 R (Item.44) 1100 0 R (Item.45) 1127 0 R] -/Limits [(Item.40) (Item.45)] +/Names [(Item.38) 1013 0 R (Item.39) 1014 0 R (Item.4) 946 0 R (Item.40) 1015 0 R (Item.41) 1057 0 R (Item.42) 1150 0 R] +/Limits [(Item.38) (Item.42)] >> -% 2012 0 obj +% 2274 0 obj << -/Names [(Item.46) 1298 0 R (Item.47) 1299 0 R (Item.48) 1300 0 R (Item.49) 1354 0 R (Item.5) 847 0 R (Item.50) 1361 0 R] -/Limits [(Item.46) (Item.50)] +/Names [(Item.43) 1179 0 R (Item.44) 1201 0 R (Item.45) 1227 0 R (Item.46) 1399 0 R (Item.47) 1400 0 R (Item.48) 1401 0 R] +/Limits [(Item.43) (Item.48)] >> - -endstream -endobj -2112 0 obj +% 2275 0 obj << - /Title (Parallel Sparse BLAS V. 3.8.0) /Subject (Parallel Sparse Basic Linear Algebra Subroutines) /Keywords (Computer Science Linear Algebra Fluid Dynamics Parallel Linux MPI PSBLAS Iterative Solvers Preconditioners) /Creator (pdfLaTeX) /Producer ($Id$) /Author()/Title()/Subject()/Creator(LaTeX with hyperref)/Keywords() -/CreationDate (D:20220616091307+02'00') -/ModDate (D:20220616091307+02'00') -/Trapped /False -/PTEX.Fullbanner (This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2) +/Names [(Item.49) 1454 0 R (Item.5) 947 0 R (Item.50) 1461 0 R (Item.51) 1466 0 R (Item.52) 1467 0 R (Item.53) 1468 0 R] +/Limits [(Item.49) (Item.53)] >> -endobj -2014 0 obj +% 2276 0 obj << -/Type /ObjStm -/N 98 -/First 1015 -/Length 18404 ->> -stream -2013 0 2015 172 2016 342 2017 514 2018 684 2019 856 2020 1026 2021 1198 2022 1367 2023 1544 -2024 1742 2025 1968 2026 2184 2027 2373 2028 2555 2029 2767 2030 2987 2031 3212 2032 3445 2033 3673 -2034 3846 2035 4026 2036 4203 2037 4380 2038 4560 2039 4738 2040 4918 2041 5096 2042 5276 2043 5454 -2044 5634 2045 5812 2046 5980 2047 6146 2048 6316 2049 6488 2050 6658 2051 6830 2052 7000 2053 7172 -2054 7341 2055 7510 2056 7682 2057 7852 2058 8024 2059 8194 2060 8366 2061 8534 2062 8719 2063 8899 -2064 9097 2065 9324 2066 9542 2067 9768 2068 9995 2069 10217 2070 10441 2071 10671 2072 10900 2073 11130 -2074 11352 2075 11579 2076 11806 2077 12028 2078 12250 2079 12485 2080 12749 2081 13005 2082 13269 2083 13539 -2084 13803 2085 14065 2086 14284 2087 14464 2088 14640 2089 14809 2090 14903 2091 15017 2092 15129 2093 15239 -2094 15350 2095 15463 2096 15577 2097 15690 2098 15802 2099 15913 2100 16024 2101 16143 2102 16270 2103 16396 -2104 16527 2105 16651 2106 16724 2107 16838 2108 16958 2109 17057 2110 17141 2111 17176 -% 2013 0 obj +/Names [(Item.54) 1469 0 R (Item.55) 1470 0 R (Item.56) 1482 0 R (Item.57) 1483 0 R (Item.58) 1484 0 R (Item.59) 1491 0 R] +/Limits [(Item.54) (Item.59)] +>> +% 2277 0 obj << -/Names [(Item.51) 1366 0 R (Item.52) 1367 0 R (Item.53) 1368 0 R (Item.54) 1369 0 R (Item.55) 1370 0 R (Item.56) 1382 0 R] -/Limits [(Item.51) (Item.56)] +/Names [(Item.6) 948 0 R (Item.60) 1515 0 R (Item.61) 1516 0 R (Item.62) 1523 0 R (Item.63) 1528 0 R (Item.64) 1529 0 R] +/Limits [(Item.6) (Item.64)] >> -% 2015 0 obj +% 2278 0 obj << -/Names [(Item.57) 1383 0 R (Item.58) 1384 0 R (Item.59) 1391 0 R (Item.6) 848 0 R (Item.60) 1414 0 R (Item.61) 1415 0 R] -/Limits [(Item.57) (Item.61)] +/Names [(Item.65) 1530 0 R (Item.66) 1542 0 R (Item.67) 1543 0 R (Item.68) 1544 0 R (Item.69) 1545 0 R (Item.7) 949 0 R] +/Limits [(Item.65) (Item.7)] >> -% 2016 0 obj +% 2279 0 obj << -/Names [(Item.62) 1422 0 R (Item.63) 1427 0 R (Item.64) 1428 0 R (Item.65) 1429 0 R (Item.66) 1441 0 R (Item.67) 1442 0 R] -/Limits [(Item.62) (Item.67)] +/Names [(Item.70) 1546 0 R (Item.71) 1551 0 R (Item.72) 1552 0 R (Item.73) 1553 0 R (Item.74) 1554 0 R (Item.75) 1555 0 R] +/Limits [(Item.70) (Item.75)] >> -% 2017 0 obj +% 2280 0 obj << -/Names [(Item.68) 1443 0 R (Item.69) 1444 0 R (Item.7) 849 0 R (Item.70) 1445 0 R (Item.71) 1450 0 R (Item.72) 1451 0 R] -/Limits [(Item.68) (Item.72)] +/Names [(Item.76) 1556 0 R (Item.77) 1569 0 R (Item.78) 1570 0 R (Item.79) 1571 0 R (Item.8) 950 0 R (Item.80) 1572 0 R] +/Limits [(Item.76) (Item.80)] >> -% 2018 0 obj +% 2281 0 obj << -/Names [(Item.73) 1452 0 R (Item.74) 1453 0 R (Item.75) 1454 0 R (Item.76) 1455 0 R (Item.77) 1469 0 R (Item.78) 1470 0 R] -/Limits [(Item.73) (Item.78)] +/Names [(Item.81) 1573 0 R (Item.82) 1574 0 R (Item.83) 1575 0 R (Item.84) 1588 0 R (Item.85) 1599 0 R (Item.86) 1612 0 R] +/Limits [(Item.81) (Item.86)] >> -% 2019 0 obj +% 2282 0 obj << -/Names [(Item.79) 1471 0 R (Item.8) 850 0 R (Item.80) 1472 0 R (Item.81) 1473 0 R (Item.82) 1474 0 R (Item.83) 1475 0 R] -/Limits [(Item.79) (Item.83)] +/Names [(Item.87) 1613 0 R (Item.88) 1621 0 R (Item.89) 1622 0 R (Item.9) 956 0 R (Item.90) 1643 0 R (Item.91) 1644 0 R] +/Limits [(Item.87) (Item.91)] >> -% 2020 0 obj +% 2283 0 obj << -/Names [(Item.84) 1488 0 R (Item.85) 1499 0 R (Item.86) 1511 0 R (Item.87) 1512 0 R (Item.88) 1520 0 R (Item.89) 1521 0 R] -/Limits [(Item.84) (Item.89)] +/Names [(Item.92) 1655 0 R (Item.93) 1661 0 R (Item.94) 1667 0 R (Item.95) 1673 0 R (Item.96) 1680 0 R (Item.97) 1681 0 R] +/Limits [(Item.92) (Item.97)] >> -% 2021 0 obj +% 2284 0 obj << -/Names [(Item.9) 856 0 R (Item.90) 1542 0 R (Item.91) 1543 0 R (Item.92) 1554 0 R (Item.93) 1560 0 R (Item.94) 1566 0 R] -/Limits [(Item.9) (Item.94)] +/Names [(Item.98) 1687 0 R (Item.99) 1688 0 R (algocf.1) 2071 0 R (algocf.2) 2117 0 R (algocfline.1) 2056 0 R (algocfline.2) 2124 0 R] +/Limits [(Item.98) (algocfline.2)] >> -% 2022 0 obj +% 2285 0 obj << -/Names [(Item.95) 1573 0 R (Item.96) 1580 0 R (Item.97) 1581 0 R (Item.98) 1587 0 R (Item.99) 1588 0 R (cite.2007c) 830 0 R] -/Limits [(Item.95) (cite.2007c)] +/Names [(cite.2007c) 930 0 R (cite.2007d) 931 0 R (cite.BLACS) 906 0 R (cite.BLAS1) 889 0 R (cite.BLAS2) 890 0 R (cite.BLAS3) 891 0 R] +/Limits [(cite.2007c) (cite.BLAS3)] >> -% 2023 0 obj +% 2286 0 obj << -/Names [(cite.2007d) 831 0 R (cite.BLACS) 805 0 R (cite.BLAS1) 788 0 R (cite.BLAS2) 789 0 R (cite.BLAS3) 790 0 R (cite.DesPat:11) 783 0 R] -/Limits [(cite.2007d) (cite.DesPat:11)] +/Names [(cite.CaFiRo:2014) 2042 0 R (cite.DesPat:11) 884 0 R (cite.DesignPatterns) 1046 0 R (cite.KIVA3PSBLAS) 2188 0 R (cite.METIS) 918 0 R (cite.MPI1) 2194 0 R] +/Limits [(cite.CaFiRo:2014) (cite.MPI1)] >> -% 2024 0 obj +% 2287 0 obj << -/Names [(cite.DesignPatterns) 946 0 R (cite.KIVA3PSBLAS) 1941 0 R (cite.METIS) 817 0 R (cite.MPI1) 1947 0 R (cite.PARA04FOREST) 1939 0 R (cite.PSBLAS) 1940 0 R] -/Limits [(cite.DesignPatterns) (cite.PSBLAS)] +/Names [(cite.MRC:11) 2058 0 R (cite.OurTechRep) 2043 0 R (cite.PARA04FOREST) 2186 0 R (cite.PSBLAS) 2187 0 R (cite.RouXiaXu:11) 885 0 R (cite.Sparse03) 883 0 R] +/Limits [(cite.MRC:11) (cite.Sparse03)] >> -% 2025 0 obj +% 2288 0 obj << -/Names [(cite.RouXiaXu:11) 784 0 R (cite.Sparse03) 782 0 R (cite.machiels) 785 0 R (cite.metcalf) 781 0 R (cite.sblas02) 787 0 R (cite.sblas97) 786 0 R] -/Limits [(cite.RouXiaXu:11) (cite.sblas97)] +/Names [(cite.machiels) 886 0 R (cite.metcalf) 882 0 R (cite.sblas02) 888 0 R (cite.sblas97) 887 0 R (descdata) 989 0 R (equation.4.1) 1265 0 R] +/Limits [(cite.machiels) (equation.4.1)] >> -% 2026 0 obj +% 2289 0 obj << -/Names [(descdata) 888 0 R (equation.4.1) 1165 0 R (equation.4.2) 1166 0 R (equation.4.3) 1167 0 R (figure.1) 799 0 R (figure.2) 825 0 R] -/Limits [(descdata) (figure.2)] +/Names [(equation.4.2) 1266 0 R (equation.4.3) 1267 0 R (figure.1) 900 0 R (figure.2) 926 0 R (figure.3) 1368 0 R (figure.4) 1402 0 R] +/Limits [(equation.4.2) (figure.4)] >> -% 2027 0 obj +% 2290 0 obj << -/Names [(figure.3) 1268 0 R (figure.4) 1301 0 R (listing.1) 916 0 R (listing.2) 947 0 R (listing.3) 986 0 R (listing.4) 1006 0 R] -/Limits [(figure.3) (listing.4)] +/Names [(figure.5) 2065 0 R (figure.6) 2060 0 R (figure.7) 2104 0 R (figure.8) 2116 0 R (figure.9) 2142 0 R (listing.1) 1017 0 R] +/Limits [(figure.5) (listing.1)] >> -% 2028 0 obj +% 2291 0 obj << -/Names [(listing.5) 1771 0 R (listing.6) 1772 0 R (lstlisting.-1) 1128 0 R (lstlisting.-10) 1841 0 R (lstlisting.-11) 1848 0 R (lstlisting.-12) 1914 0 R] -/Limits [(listing.5) (lstlisting.-12)] +/Names [(listing.2) 1047 0 R (listing.3) 1087 0 R (listing.4) 1107 0 R (listing.5) 1872 0 R (listing.6) 1873 0 R (lstlisting.-1) 1228 0 R] +/Limits [(listing.2) (lstlisting.-1)] >> -% 2029 0 obj +% 2292 0 obj << -/Names [(lstlisting.-2) 1781 0 R (lstlisting.-3) 1787 0 R (lstlisting.-4) 1795 0 R (lstlisting.-5) 1801 0 R (lstlisting.-6) 1812 0 R (lstlisting.-7) 1819 0 R] -/Limits [(lstlisting.-2) (lstlisting.-7)] +/Names [(lstlisting.-10) 1942 0 R (lstlisting.-11) 1949 0 R (lstlisting.-12) 2014 0 R (lstlisting.-13) 2076 0 R (lstlisting.-2) 1882 0 R (lstlisting.-3) 1888 0 R] +/Limits [(lstlisting.-10) (lstlisting.-3)] >> -% 2030 0 obj +% 2293 0 obj << -/Names [(lstlisting.-8) 1826 0 R (lstlisting.-9) 1834 0 R (lstnumber.-1.1) 1129 0 R (lstnumber.-1.2) 1130 0 R (lstnumber.-1.3) 1131 0 R (lstnumber.-1.4) 1132 0 R] -/Limits [(lstlisting.-8) (lstnumber.-1.4)] +/Names [(lstlisting.-4) 1895 0 R (lstlisting.-5) 1901 0 R (lstlisting.-6) 1912 0 R (lstlisting.-7) 1919 0 R (lstlisting.-8) 1926 0 R (lstlisting.-9) 1935 0 R] +/Limits [(lstlisting.-4) (lstlisting.-9)] >> -% 2031 0 obj +% 2294 0 obj << -/Names [(lstnumber.-10.1) 1842 0 R (lstnumber.-11.1) 1849 0 R (lstnumber.-12.1) 1915 0 R (lstnumber.-12.2) 1916 0 R (lstnumber.-2.1) 1782 0 R (lstnumber.-3.1) 1788 0 R] -/Limits [(lstnumber.-10.1) (lstnumber.-3.1)] +/Names [(lstnumber.-1.1) 1229 0 R (lstnumber.-1.2) 1230 0 R (lstnumber.-1.3) 1231 0 R (lstnumber.-1.4) 1232 0 R (lstnumber.-10.1) 1943 0 R (lstnumber.-11.1) 1950 0 R] +/Limits [(lstnumber.-1.1) (lstnumber.-11.1)] >> -% 2032 0 obj +% 2295 0 obj << -/Names [(lstnumber.-4.1) 1796 0 R (lstnumber.-5.1) 1802 0 R (lstnumber.-6.1) 1813 0 R (lstnumber.-7.1) 1820 0 R (lstnumber.-8.1) 1827 0 R (lstnumber.-9.1) 1835 0 R] -/Limits [(lstnumber.-4.1) (lstnumber.-9.1)] +/Names [(lstnumber.-12.1) 2015 0 R (lstnumber.-12.2) 2016 0 R (lstnumber.-13.1) 2077 0 R (lstnumber.-13.2) 2078 0 R (lstnumber.-13.3) 2079 0 R (lstnumber.-13.4) 2080 0 R] +/Limits [(lstnumber.-12.1) (lstnumber.-13.4)] >> -% 2033 0 obj +% 2296 0 obj << -/Names [(page.1) 582 0 R (page.10) 895 0 R (page.100) 1553 0 R (page.101) 1559 0 R (page.102) 1565 0 R (page.103) 1572 0 R] -/Limits [(page.1) (page.103)] +/Names [(lstnumber.-13.5) 2081 0 R (lstnumber.-13.6) 2082 0 R (lstnumber.-13.7) 2083 0 R (lstnumber.-2.1) 1883 0 R (lstnumber.-3.1) 1889 0 R (lstnumber.-4.1) 1896 0 R] +/Limits [(lstnumber.-13.5) (lstnumber.-4.1)] >> -% 2034 0 obj +% 2297 0 obj << -/Names [(page.104) 1579 0 R (page.105) 1586 0 R (page.106) 1593 0 R (page.107) 1597 0 R (page.108) 1607 0 R (page.109) 1611 0 R] -/Limits [(page.104) (page.109)] +/Names [(lstnumber.-5.1) 1902 0 R (lstnumber.-6.1) 1913 0 R (lstnumber.-7.1) 1920 0 R (lstnumber.-8.1) 1927 0 R (lstnumber.-9.1) 1936 0 R (page.1) 663 0 R] +/Limits [(lstnumber.-5.1) (page.1)] >> -% 2035 0 obj +% 2298 0 obj << -/Names [(page.11) 906 0 R (page.110) 1616 0 R (page.111) 1629 0 R (page.112) 1633 0 R (page.113) 1639 0 R (page.114) 1645 0 R] -/Limits [(page.11) (page.114)] +/Names [(page.10) 996 0 R (page.100) 1654 0 R (page.101) 1660 0 R (page.102) 1666 0 R (page.103) 1672 0 R (page.104) 1679 0 R] +/Limits [(page.10) (page.104)] >> -% 2036 0 obj +% 2299 0 obj << -/Names [(page.115) 1652 0 R (page.116) 1657 0 R (page.117) 1661 0 R (page.118) 1665 0 R (page.119) 1669 0 R (page.12) 922 0 R] -/Limits [(page.115) (page.12)] +/Names [(page.105) 1686 0 R (page.106) 1693 0 R (page.107) 1697 0 R (page.108) 1707 0 R (page.109) 1712 0 R (page.11) 1007 0 R] +/Limits [(page.105) (page.11)] >> -% 2037 0 obj +% 2300 0 obj << -/Names [(page.120) 1673 0 R (page.121) 1678 0 R (page.122) 1687 0 R (page.123) 1691 0 R (page.124) 1698 0 R (page.125) 1702 0 R] -/Limits [(page.120) (page.125)] +/Names [(page.110) 1717 0 R (page.111) 1730 0 R (page.112) 1734 0 R (page.113) 1740 0 R (page.114) 1746 0 R (page.115) 1753 0 R] +/Limits [(page.110) (page.115)] >> -% 2038 0 obj +% 2301 0 obj << -/Names [(page.126) 1709 0 R (page.127) 1713 0 R (page.128) 1721 0 R (page.129) 1725 0 R (page.13) 926 0 R (page.130) 1732 0 R] -/Limits [(page.126) (page.130)] +/Names [(page.116) 1758 0 R (page.117) 1762 0 R (page.118) 1766 0 R (page.119) 1770 0 R (page.12) 1023 0 R (page.120) 1774 0 R] +/Limits [(page.116) (page.120)] >> -% 2039 0 obj +% 2302 0 obj << -/Names [(page.131) 1736 0 R (page.132) 1743 0 R (page.133) 1747 0 R (page.134) 1758 0 R (page.135) 1763 0 R (page.136) 1770 0 R] -/Limits [(page.131) (page.136)] +/Names [(page.121) 1778 0 R (page.122) 1787 0 R (page.123) 1791 0 R (page.124) 1798 0 R (page.125) 1802 0 R (page.126) 1809 0 R] +/Limits [(page.121) (page.126)] >> -% 2040 0 obj +% 2303 0 obj << -/Names [(page.137) 1776 0 R (page.138) 1780 0 R (page.139) 1786 0 R (page.14) 932 0 R (page.140) 1794 0 R (page.141) 1800 0 R] -/Limits [(page.137) (page.141)] +/Names [(page.127) 1813 0 R (page.128) 1821 0 R (page.129) 1826 0 R (page.13) 1027 0 R (page.130) 1833 0 R (page.131) 1837 0 R] +/Limits [(page.127) (page.131)] >> -% 2041 0 obj +% 2304 0 obj << -/Names [(page.142) 1806 0 R (page.143) 1811 0 R (page.144) 1818 0 R (page.145) 1825 0 R (page.146) 1833 0 R (page.147) 1840 0 R] -/Limits [(page.142) (page.147)] +/Names [(page.132) 1844 0 R (page.133) 1848 0 R (page.134) 1859 0 R (page.135) 1864 0 R (page.136) 1871 0 R (page.137) 1877 0 R] +/Limits [(page.132) (page.137)] >> -% 2042 0 obj +% 2305 0 obj << -/Names [(page.148) 1847 0 R (page.149) 1853 0 R (page.15) 937 0 R (page.150) 1857 0 R (page.151) 1863 0 R (page.152) 1873 0 R] -/Limits [(page.148) (page.152)] +/Names [(page.138) 1881 0 R (page.139) 1887 0 R (page.14) 1032 0 R (page.140) 1894 0 R (page.141) 1900 0 R (page.142) 1906 0 R] +/Limits [(page.138) (page.142)] >> -% 2043 0 obj +% 2306 0 obj << -/Names [(page.153) 1877 0 R (page.154) 1885 0 R (page.155) 1890 0 R (page.156) 1895 0 R (page.157) 1901 0 R (page.158) 1906 0 R] -/Limits [(page.153) (page.158)] +/Names [(page.143) 1911 0 R (page.144) 1918 0 R (page.145) 1925 0 R (page.146) 1934 0 R (page.147) 1941 0 R (page.148) 1948 0 R] +/Limits [(page.143) (page.148)] >> -% 2044 0 obj +% 2307 0 obj << -/Names [(page.159) 1913 0 R (page.16) 944 0 R (page.160) 1924 0 R (page.161) 1929 0 R (page.162) 1933 0 R (page.163) 1937 0 R] -/Limits [(page.159) (page.163)] +/Names [(page.149) 1954 0 R (page.15) 1037 0 R (page.150) 1958 0 R (page.151) 1964 0 R (page.152) 1974 0 R (page.153) 1978 0 R] +/Limits [(page.149) (page.153)] >> -% 2045 0 obj +% 2308 0 obj << -/Names [(page.164) 1946 0 R (page.17) 951 0 R (page.18) 956 0 R (page.19) 961 0 R (page.2) 592 0 R (page.20) 966 0 R] -/Limits [(page.164) (page.20)] +/Names [(page.154) 1986 0 R (page.155) 1991 0 R (page.156) 1995 0 R (page.157) 2001 0 R (page.158) 2006 0 R (page.159) 2013 0 R] +/Limits [(page.154) (page.159)] >> -% 2046 0 obj +% 2309 0 obj << -/Names [(page.21) 970 0 R (page.22) 974 0 R (page.23) 978 0 R (page.24) 984 0 R (page.25) 990 0 R (page.26) 997 0 R] -/Limits [(page.21) (page.26)] +/Names [(page.16) 1044 0 R (page.160) 2024 0 R (page.161) 2029 0 R (page.162) 2039 0 R (page.163) 2054 0 R (page.164) 2064 0 R] +/Limits [(page.16) (page.164)] >> -% 2047 0 obj +% 2310 0 obj << -/Names [(page.27) 1004 0 R (page.28) 1010 0 R (page.29) 1015 0 R (page.3) 811 0 R (page.30) 1026 0 R (page.31) 1033 0 R] -/Limits [(page.27) (page.31)] +/Names [(page.165) 2075 0 R (page.166) 2103 0 R (page.167) 2123 0 R (page.168) 2141 0 R (page.169) 2157 0 R (page.17) 1051 0 R] +/Limits [(page.165) (page.17)] >> -% 2048 0 obj +% 2311 0 obj << -/Names [(page.32) 1043 0 R (page.33) 1049 0 R (page.34) 1060 0 R (page.35) 1065 0 R (page.36) 1072 0 R (page.37) 1077 0 R] -/Limits [(page.32) (page.37)] +/Names [(page.170) 2162 0 R (page.171) 2166 0 R (page.172) 2172 0 R (page.173) 2176 0 R (page.174) 2180 0 R (page.175) 2184 0 R] +/Limits [(page.170) (page.175)] >> -% 2049 0 obj +% 2312 0 obj << -/Names [(page.38) 1086 0 R (page.39) 1094 0 R (page.4) 824 0 R (page.40) 1099 0 R (page.41) 1107 0 R (page.42) 1112 0 R] -/Limits [(page.38) (page.42)] +/Names [(page.176) 2193 0 R (page.18) 1056 0 R (page.19) 1061 0 R (page.2) 673 0 R (page.20) 1067 0 R (page.21) 1071 0 R] +/Limits [(page.176) (page.21)] >> -% 2050 0 obj +% 2313 0 obj << -/Names [(page.43) 1119 0 R (page.44) 1126 0 R (page.45) 1139 0 R (page.46) 1146 0 R (page.47) 1153 0 R (page.48) 1164 0 R] -/Limits [(page.43) (page.48)] +/Names [(page.22) 1075 0 R (page.23) 1079 0 R (page.24) 1085 0 R (page.25) 1091 0 R (page.26) 1098 0 R (page.27) 1105 0 R] +/Limits [(page.22) (page.27)] >> -% 2051 0 obj +% 2314 0 obj << -/Names [(page.49) 1179 0 R (page.5) 837 0 R (page.50) 1186 0 R (page.51) 1197 0 R (page.52) 1203 0 R (page.53) 1214 0 R] -/Limits [(page.49) (page.53)] +/Names [(page.28) 1111 0 R (page.29) 1115 0 R (page.3) 912 0 R (page.30) 1126 0 R (page.31) 1133 0 R (page.32) 1143 0 R] +/Limits [(page.28) (page.32)] >> -% 2052 0 obj +% 2315 0 obj << -/Names [(page.54) 1219 0 R (page.55) 1230 0 R (page.56) 1237 0 R (page.57) 1246 0 R (page.58) 1252 0 R (page.59) 1260 0 R] -/Limits [(page.54) (page.59)] +/Names [(page.33) 1149 0 R (page.34) 1160 0 R (page.35) 1166 0 R (page.36) 1173 0 R (page.37) 1178 0 R (page.38) 1187 0 R] +/Limits [(page.33) (page.38)] >> -% 2053 0 obj +% 2316 0 obj << -/Names [(page.6) 842 0 R (page.60) 1267 0 R (page.61) 1280 0 R (page.62) 1288 0 R (page.63) 1297 0 R (page.64) 1305 0 R] -/Limits [(page.6) (page.64)] +/Names [(page.39) 1195 0 R (page.4) 925 0 R (page.40) 1200 0 R (page.41) 1208 0 R (page.42) 1213 0 R (page.43) 1220 0 R] +/Limits [(page.39) (page.43)] >> -% 2054 0 obj +% 2317 0 obj << -/Names [(page.65) 1309 0 R (page.66) 1324 0 R (page.67) 1329 0 R (page.68) 1336 0 R (page.69) 1343 0 R (page.7) 855 0 R] -/Limits [(page.65) (page.7)] +/Names [(page.44) 1226 0 R (page.45) 1239 0 R (page.46) 1246 0 R (page.47) 1253 0 R (page.48) 1264 0 R (page.49) 1280 0 R] +/Limits [(page.44) (page.49)] >> -% 2055 0 obj +% 2318 0 obj << -/Names [(page.70) 1347 0 R (page.71) 1353 0 R (page.72) 1359 0 R (page.73) 1365 0 R (page.74) 1376 0 R (page.75) 1381 0 R] -/Limits [(page.70) (page.75)] +/Names [(page.5) 937 0 R (page.50) 1287 0 R (page.51) 1298 0 R (page.52) 1304 0 R (page.53) 1315 0 R (page.54) 1320 0 R] +/Limits [(page.5) (page.54)] >> -% 2056 0 obj +% 2319 0 obj << -/Names [(page.76) 1390 0 R (page.77) 1397 0 R (page.78) 1402 0 R (page.79) 1408 0 R (page.8) 876 0 R (page.80) 1413 0 R] -/Limits [(page.76) (page.80)] +/Names [(page.55) 1331 0 R (page.56) 1337 0 R (page.57) 1346 0 R (page.58) 1352 0 R (page.59) 1360 0 R (page.6) 942 0 R] +/Limits [(page.55) (page.6)] >> -% 2057 0 obj +% 2320 0 obj << -/Names [(page.81) 1421 0 R (page.82) 1426 0 R (page.83) 1434 0 R (page.84) 1440 0 R (page.85) 1449 0 R (page.86) 1463 0 R] -/Limits [(page.81) (page.86)] +/Names [(page.60) 1367 0 R (page.61) 1381 0 R (page.62) 1389 0 R (page.63) 1398 0 R (page.64) 1406 0 R (page.65) 1410 0 R] +/Limits [(page.60) (page.65)] >> -% 2058 0 obj +% 2321 0 obj << -/Names [(page.87) 1468 0 R (page.88) 1481 0 R (page.89) 1487 0 R (page.9) 887 0 R (page.90) 1494 0 R (page.91) 1498 0 R] -/Limits [(page.87) (page.91)] +/Names [(page.66) 1425 0 R (page.67) 1430 0 R (page.68) 1437 0 R (page.69) 1444 0 R (page.7) 955 0 R (page.70) 1448 0 R] +/Limits [(page.66) (page.70)] >> -% 2059 0 obj +% 2322 0 obj << -/Names [(page.92) 1506 0 R (page.93) 1510 0 R (page.94) 1519 0 R (page.95) 1527 0 R (page.96) 1531 0 R (page.97) 1536 0 R] -/Limits [(page.92) (page.97)] +/Names [(page.71) 1453 0 R (page.72) 1459 0 R (page.73) 1465 0 R (page.74) 1476 0 R (page.75) 1481 0 R (page.76) 1490 0 R] +/Limits [(page.71) (page.76)] >> -% 2060 0 obj +% 2323 0 obj << -/Names [(page.98) 1541 0 R (page.99) 1548 0 R (page.i) 638 0 R (page.ii) 696 0 R (page.iii) 742 0 R (page.iv) 763 0 R] -/Limits [(page.98) (page.iv)] +/Names [(page.77) 1498 0 R (page.78) 1503 0 R (page.79) 1509 0 R (page.8) 977 0 R (page.80) 1514 0 R (page.81) 1522 0 R] +/Limits [(page.77) (page.81)] >> -% 2061 0 obj +% 2324 0 obj << -/Names [(precdata) 1005 0 R (section*.1) 639 0 R (section*.2) 1938 0 R (section.1) 7 0 R (section.10) 541 0 R (section.11) 569 0 R] -/Limits [(precdata) (section.11)] +/Names [(page.82) 1527 0 R (page.83) 1535 0 R (page.84) 1541 0 R (page.85) 1550 0 R (page.86) 1564 0 R (page.87) 1568 0 R] +/Limits [(page.82) (page.87)] >> -% 2062 0 obj +% 2325 0 obj << -/Names [(section.2) 11 0 R (section.3) 35 0 R (section.4) 220 0 R (section.5) 288 0 R (section.6) 308 0 R (section.7) 421 0 R] -/Limits [(section.2) (section.7)] +/Names [(page.88) 1581 0 R (page.89) 1587 0 R (page.9) 988 0 R (page.90) 1594 0 R (page.91) 1598 0 R (page.92) 1606 0 R] +/Limits [(page.88) (page.92)] >> -% 2063 0 obj +% 2326 0 obj << -/Names [(section.8) 493 0 R (section.9) 513 0 R (spbasedata) 952 0 R (spdata) 945 0 R (subsection.10.1) 545 0 R (subsection.10.2) 549 0 R] -/Limits [(section.8) (subsection.10.2)] +/Names [(page.93) 1611 0 R (page.94) 1620 0 R (page.95) 1628 0 R (page.96) 1632 0 R (page.97) 1637 0 R (page.98) 1642 0 R] +/Limits [(page.93) (page.98)] >> -% 2064 0 obj +% 2327 0 obj << -/Names [(subsection.10.3) 553 0 R (subsection.10.4) 557 0 R (subsection.10.5) 561 0 R (subsection.10.6) 565 0 R (subsection.11.1) 573 0 R (subsection.2.1) 15 0 R] -/Limits [(subsection.10.3) (subsection.2.1)] +/Names [(page.99) 1649 0 R (page.i) 719 0 R (page.ii) 776 0 R (page.iii) 823 0 R (page.iv) 863 0 R (precdata) 1106 0 R] +/Limits [(page.99) (precdata)] >> -% 2065 0 obj +% 2328 0 obj << -/Names [(subsection.2.2) 19 0 R (subsection.2.3) 23 0 R (subsection.2.4) 31 0 R (subsection.3.1) 39 0 R (subsection.3.2) 103 0 R (subsection.3.3) 183 0 R] -/Limits [(subsection.2.2) (subsection.3.3)] +/Names [(section*.1) 720 0 R (section*.10) 618 0 R (section*.11) 622 0 R (section*.12) 626 0 R (section*.13) 630 0 R (section*.14) 634 0 R] +/Limits [(section*.1) (section*.14)] >> -% 2066 0 obj +% 2329 0 obj << -/Names [(subsection.3.4) 212 0 R (subsection.3.5) 216 0 R (subsection.4.1) 224 0 R (subsection.4.10) 260 0 R (subsection.4.11) 264 0 R (subsection.4.12) 268 0 R] -/Limits [(subsection.3.4) (subsection.4.12)] +/Names [(section*.15) 638 0 R (section*.16) 642 0 R (section*.17) 646 0 R (section*.18) 650 0 R (section*.19) 654 0 R (section*.2) 2055 0 R] +/Limits [(section*.15) (section*.2)] >> -% 2067 0 obj +% 2330 0 obj << -/Names [(subsection.4.13) 272 0 R (subsection.4.14) 276 0 R (subsection.4.15) 280 0 R (subsection.4.16) 284 0 R (subsection.4.2) 228 0 R (subsection.4.3) 232 0 R] -/Limits [(subsection.4.13) (subsection.4.3)] +/Names [(section*.20) 2185 0 R (section*.3) 2084 0 R (section*.4) 2105 0 R (section*.5) 2125 0 R (section*.6) 601 0 R (section*.7) 606 0 R] +/Limits [(section*.20) (section*.7)] >> -% 2068 0 obj +% 2331 0 obj << -/Names [(subsection.4.4) 236 0 R (subsection.4.5) 240 0 R (subsection.4.6) 244 0 R (subsection.4.7) 248 0 R (subsection.4.8) 252 0 R (subsection.4.9) 256 0 R] -/Limits [(subsection.4.4) (subsection.4.9)] +/Names [(section*.8) 610 0 R (section*.9) 614 0 R (section.1) 7 0 R (section.10) 541 0 R (section.11) 569 0 R (section.12) 577 0 R] +/Limits [(section*.8) (section.12)] >> -% 2069 0 obj +% 2332 0 obj << -/Names [(subsection.5.1) 292 0 R (subsection.5.2) 296 0 R (subsection.5.3) 300 0 R (subsection.5.4) 304 0 R (subsection.6.1) 312 0 R (subsection.6.10) 348 0 R] -/Limits [(subsection.5.1) (subsection.6.10)] +/Names [(section.13) 597 0 R (section.2) 11 0 R (section.3) 35 0 R (section.4) 220 0 R (section.5) 288 0 R (section.6) 308 0 R] +/Limits [(section.13) (section.6)] >> -% 2070 0 obj +% 2333 0 obj << -/Names [(subsection.6.11) 352 0 R (subsection.6.12) 356 0 R (subsection.6.13) 360 0 R (subsection.6.14) 364 0 R (subsection.6.15) 368 0 R (subsection.6.16) 372 0 R] -/Limits [(subsection.6.11) (subsection.6.16)] +/Names [(section.7) 421 0 R (section.8) 493 0 R (section.9) 513 0 R (spbasedata) 1052 0 R (spdata) 1045 0 R (subsection.10.1) 545 0 R] +/Limits [(section.7) (subsection.10.1)] >> -% 2071 0 obj +% 2334 0 obj << -/Names [(subsection.6.17) 376 0 R (subsection.6.18) 380 0 R (subsection.6.19) 384 0 R (subsection.6.2) 316 0 R (subsection.6.20) 388 0 R (subsection.6.21) 392 0 R] -/Limits [(subsection.6.17) (subsection.6.21)] +/Names [(subsection.10.2) 549 0 R (subsection.10.3) 553 0 R (subsection.10.4) 557 0 R (subsection.10.5) 561 0 R (subsection.10.6) 565 0 R (subsection.11.1) 573 0 R] +/Limits [(subsection.10.2) (subsection.11.1)] >> -% 2072 0 obj +% 2335 0 obj << -/Names [(subsection.6.22) 396 0 R (subsection.6.23) 400 0 R (subsection.6.24) 405 0 R (subsection.6.25) 409 0 R (subsection.6.26) 413 0 R (subsection.6.27) 417 0 R] -/Limits [(subsection.6.22) (subsection.6.27)] +/Names [(subsection.12.1) 581 0 R (subsection.12.2) 585 0 R (subsection.12.3) 589 0 R (subsection.12.4) 593 0 R (subsection.2.1) 15 0 R (subsection.2.2) 19 0 R] +/Limits [(subsection.12.1) (subsection.2.2)] >> -% 2073 0 obj +% 2336 0 obj << -/Names [(subsection.6.3) 320 0 R (subsection.6.4) 324 0 R (subsection.6.5) 328 0 R (subsection.6.6) 332 0 R (subsection.6.7) 336 0 R (subsection.6.8) 340 0 R] -/Limits [(subsection.6.3) (subsection.6.8)] +/Names [(subsection.2.3) 23 0 R (subsection.2.4) 31 0 R (subsection.3.1) 39 0 R (subsection.3.2) 103 0 R (subsection.3.3) 183 0 R (subsection.3.4) 212 0 R] +/Limits [(subsection.2.3) (subsection.3.4)] >> -% 2074 0 obj +% 2337 0 obj << -/Names [(subsection.6.9) 344 0 R (subsection.7.1) 425 0 R (subsection.7.10) 461 0 R (subsection.7.11) 465 0 R (subsection.7.12) 469 0 R (subsection.7.13) 473 0 R] -/Limits [(subsection.6.9) (subsection.7.13)] +/Names [(subsection.3.5) 216 0 R (subsection.4.1) 224 0 R (subsection.4.10) 260 0 R (subsection.4.11) 264 0 R (subsection.4.12) 268 0 R (subsection.4.13) 272 0 R] +/Limits [(subsection.3.5) (subsection.4.13)] >> -% 2075 0 obj +% 2338 0 obj << -/Names [(subsection.7.14) 477 0 R (subsection.7.15) 481 0 R (subsection.7.16) 485 0 R (subsection.7.17) 489 0 R (subsection.7.2) 429 0 R (subsection.7.3) 433 0 R] -/Limits [(subsection.7.14) (subsection.7.3)] +/Names [(subsection.4.14) 276 0 R (subsection.4.15) 280 0 R (subsection.4.16) 284 0 R (subsection.4.2) 228 0 R (subsection.4.3) 232 0 R (subsection.4.4) 236 0 R] +/Limits [(subsection.4.14) (subsection.4.4)] >> -% 2076 0 obj +% 2339 0 obj << -/Names [(subsection.7.4) 437 0 R (subsection.7.5) 441 0 R (subsection.7.6) 445 0 R (subsection.7.7) 449 0 R (subsection.7.8) 453 0 R (subsection.7.9) 457 0 R] -/Limits [(subsection.7.4) (subsection.7.9)] +/Names [(subsection.4.5) 240 0 R (subsection.4.6) 244 0 R (subsection.4.7) 248 0 R (subsection.4.8) 252 0 R (subsection.4.9) 256 0 R (subsection.5.1) 292 0 R] +/Limits [(subsection.4.5) (subsection.5.1)] >> -% 2077 0 obj +% 2340 0 obj << -/Names [(subsection.8.1) 497 0 R (subsection.8.2) 501 0 R (subsection.8.3) 505 0 R (subsection.8.4) 509 0 R (subsection.9.1) 517 0 R (subsection.9.2) 521 0 R] -/Limits [(subsection.8.1) (subsection.9.2)] +/Names [(subsection.5.2) 296 0 R (subsection.5.3) 300 0 R (subsection.5.4) 304 0 R (subsection.6.1) 312 0 R (subsection.6.10) 348 0 R (subsection.6.11) 352 0 R] +/Limits [(subsection.5.2) (subsection.6.11)] >> -% 2078 0 obj +% 2341 0 obj << -/Names [(subsection.9.3) 525 0 R (subsection.9.4) 529 0 R (subsection.9.5) 533 0 R (subsection.9.6) 537 0 R (subsubsection.2.3.1) 27 0 R (subsubsection.3.1.1) 43 0 R] -/Limits [(subsection.9.3) (subsubsection.3.1.1)] +/Names [(subsection.6.12) 356 0 R (subsection.6.13) 360 0 R (subsection.6.14) 364 0 R (subsection.6.15) 368 0 R (subsection.6.16) 372 0 R (subsection.6.17) 376 0 R] +/Limits [(subsection.6.12) (subsection.6.17)] >> -% 2079 0 obj +% 2342 0 obj << -/Names [(subsubsection.3.1.10) 79 0 R (subsubsection.3.1.11) 83 0 R (subsubsection.3.1.12) 87 0 R (subsubsection.3.1.13) 91 0 R (subsubsection.3.1.14) 95 0 R (subsubsection.3.1.15) 99 0 R] -/Limits [(subsubsection.3.1.10) (subsubsection.3.1.15)] +/Names [(subsection.6.18) 380 0 R (subsection.6.19) 384 0 R (subsection.6.2) 316 0 R (subsection.6.20) 388 0 R (subsection.6.21) 392 0 R (subsection.6.22) 396 0 R] +/Limits [(subsection.6.18) (subsection.6.22)] >> -% 2080 0 obj +% 2343 0 obj << -/Names [(subsubsection.3.1.2) 47 0 R (subsubsection.3.1.3) 51 0 R (subsubsection.3.1.4) 55 0 R (subsubsection.3.1.5) 59 0 R (subsubsection.3.1.6) 63 0 R (subsubsection.3.1.7) 67 0 R] -/Limits [(subsubsection.3.1.2) (subsubsection.3.1.7)] +/Names [(subsection.6.23) 400 0 R (subsection.6.24) 405 0 R (subsection.6.25) 409 0 R (subsection.6.26) 413 0 R (subsection.6.27) 417 0 R (subsection.6.3) 320 0 R] +/Limits [(subsection.6.23) (subsection.6.3)] >> -% 2081 0 obj +% 2344 0 obj << -/Names [(subsubsection.3.1.8) 71 0 R (subsubsection.3.1.9) 75 0 R (subsubsection.3.2.1) 107 0 R (subsubsection.3.2.10) 143 0 R (subsubsection.3.2.11) 147 0 R (subsubsection.3.2.12) 151 0 R] -/Limits [(subsubsection.3.1.8) (subsubsection.3.2.12)] +/Names [(subsection.6.4) 324 0 R (subsection.6.5) 328 0 R (subsection.6.6) 332 0 R (subsection.6.7) 336 0 R (subsection.6.8) 340 0 R (subsection.6.9) 344 0 R] +/Limits [(subsection.6.4) (subsection.6.9)] >> -% 2082 0 obj +% 2345 0 obj << -/Names [(subsubsection.3.2.13) 155 0 R (subsubsection.3.2.14) 159 0 R (subsubsection.3.2.15) 163 0 R (subsubsection.3.2.16) 167 0 R (subsubsection.3.2.17) 171 0 R (subsubsection.3.2.18) 175 0 R] -/Limits [(subsubsection.3.2.13) (subsubsection.3.2.18)] +/Names [(subsection.7.1) 425 0 R (subsection.7.10) 461 0 R (subsection.7.11) 465 0 R (subsection.7.12) 469 0 R (subsection.7.13) 473 0 R (subsection.7.14) 477 0 R] +/Limits [(subsection.7.1) (subsection.7.14)] >> -% 2083 0 obj +% 2346 0 obj << -/Names [(subsubsection.3.2.19) 179 0 R (subsubsection.3.2.2) 111 0 R (subsubsection.3.2.3) 115 0 R (subsubsection.3.2.4) 119 0 R (subsubsection.3.2.5) 123 0 R (subsubsection.3.2.6) 127 0 R] -/Limits [(subsubsection.3.2.19) (subsubsection.3.2.6)] +/Names [(subsection.7.15) 481 0 R (subsection.7.16) 485 0 R (subsection.7.17) 489 0 R (subsection.7.2) 429 0 R (subsection.7.3) 433 0 R (subsection.7.4) 437 0 R] +/Limits [(subsection.7.15) (subsection.7.4)] >> -% 2084 0 obj +% 2347 0 obj << -/Names [(subsubsection.3.2.7) 131 0 R (subsubsection.3.2.8) 135 0 R (subsubsection.3.2.9) 139 0 R (subsubsection.3.3.1) 187 0 R (subsubsection.3.3.2) 191 0 R (subsubsection.3.3.3) 195 0 R] -/Limits [(subsubsection.3.2.7) (subsubsection.3.3.3)] +/Names [(subsection.7.5) 441 0 R (subsection.7.6) 445 0 R (subsection.7.7) 449 0 R (subsection.7.8) 453 0 R (subsection.7.9) 457 0 R (subsection.8.1) 497 0 R] +/Limits [(subsection.7.5) (subsection.8.1)] >> -% 2085 0 obj +% 2348 0 obj << -/Names [(subsubsection.3.3.4) 199 0 R (subsubsection.3.3.5) 204 0 R (subsubsection.3.3.6) 208 0 R (table.1) 998 0 R (table.10) 1147 0 R (table.11) 1155 0 R] -/Limits [(subsubsection.3.3.4) (table.11)] +/Names [(subsection.8.2) 501 0 R (subsection.8.3) 505 0 R (subsection.8.4) 509 0 R (subsection.9.1) 517 0 R (subsection.9.2) 521 0 R (subsection.9.3) 525 0 R] +/Limits [(subsection.8.2) (subsection.9.3)] >> -% 2086 0 obj +% 2349 0 obj << -/Names [(table.12) 1168 0 R (table.13) 1187 0 R (table.14) 1215 0 R (table.15) 1231 0 R (table.16) 1247 0 R (table.17) 1261 0 R] -/Limits [(table.12) (table.17)] +/Names [(subsection.9.4) 529 0 R (subsection.9.5) 533 0 R (subsection.9.6) 537 0 R (subsubsection.2.3.1) 27 0 R (subsubsection.3.1.1) 43 0 R (subsubsection.3.1.10) 79 0 R] +/Limits [(subsection.9.4) (subsubsection.3.1.10)] >> -% 2087 0 obj +% 2350 0 obj << -/Names [(table.18) 1289 0 R (table.19) 1325 0 R (table.2) 1044 0 R (table.20) 1337 0 R (table.3) 1061 0 R (table.4) 1073 0 R] -/Limits [(table.18) (table.4)] +/Names [(subsubsection.3.1.11) 83 0 R (subsubsection.3.1.12) 87 0 R (subsubsection.3.1.13) 91 0 R (subsubsection.3.1.14) 95 0 R (subsubsection.3.1.15) 99 0 R (subsubsection.3.1.2) 47 0 R] +/Limits [(subsubsection.3.1.11) (subsubsection.3.1.2)] >> -% 2088 0 obj +% 2351 0 obj +<< +/Names [(subsubsection.3.1.3) 51 0 R (subsubsection.3.1.4) 55 0 R (subsubsection.3.1.5) 59 0 R (subsubsection.3.1.6) 63 0 R (subsubsection.3.1.7) 67 0 R (subsubsection.3.1.8) 71 0 R] +/Limits [(subsubsection.3.1.3) (subsubsection.3.1.8)] +>> +% 2352 0 obj +<< +/Names [(subsubsection.3.1.9) 75 0 R (subsubsection.3.2.1) 107 0 R (subsubsection.3.2.10) 143 0 R (subsubsection.3.2.11) 147 0 R (subsubsection.3.2.12) 151 0 R (subsubsection.3.2.13) 155 0 R] +/Limits [(subsubsection.3.1.9) (subsubsection.3.2.13)] +>> +% 2353 0 obj +<< +/Names [(subsubsection.3.2.14) 159 0 R (subsubsection.3.2.15) 163 0 R (subsubsection.3.2.16) 167 0 R (subsubsection.3.2.17) 171 0 R (subsubsection.3.2.18) 175 0 R (subsubsection.3.2.19) 179 0 R] +/Limits [(subsubsection.3.2.14) (subsubsection.3.2.19)] +>> +% 2354 0 obj +<< +/Names [(subsubsection.3.2.2) 111 0 R (subsubsection.3.2.3) 115 0 R (subsubsection.3.2.4) 119 0 R (subsubsection.3.2.5) 123 0 R (subsubsection.3.2.6) 127 0 R (subsubsection.3.2.7) 131 0 R] +/Limits [(subsubsection.3.2.2) (subsubsection.3.2.7)] +>> +% 2355 0 obj +<< +/Names [(subsubsection.3.2.8) 135 0 R (subsubsection.3.2.9) 139 0 R (subsubsection.3.3.1) 187 0 R (subsubsection.3.3.2) 191 0 R (subsubsection.3.3.3) 195 0 R (subsubsection.3.3.4) 199 0 R] +/Limits [(subsubsection.3.2.8) (subsubsection.3.3.4)] +>> +% 2356 0 obj +<< +/Names [(subsubsection.3.3.5) 204 0 R (subsubsection.3.3.6) 208 0 R (table.1) 1099 0 R (table.10) 1247 0 R (table.11) 1255 0 R (table.12) 1268 0 R] +/Limits [(subsubsection.3.3.5) (table.12)] +>> +% 2357 0 obj +<< +/Names [(table.13) 1288 0 R (table.14) 1316 0 R (table.15) 1332 0 R (table.16) 1347 0 R (table.17) 1361 0 R (table.18) 1390 0 R] +/Limits [(table.13) (table.18)] +>> +% 2358 0 obj +<< +/Names [(table.19) 1426 0 R (table.2) 1144 0 R (table.20) 1438 0 R (table.21) 2059 0 R (table.3) 1161 0 R (table.4) 1174 0 R] +/Limits [(table.19) (table.4)] +>> +% 2359 0 obj << -/Names [(table.5) 1087 0 R (table.6) 1095 0 R (table.7) 1108 0 R (table.8) 1120 0 R (table.9) 1140 0 R (title.0) 3 0 R] +/Names [(table.5) 1188 0 R (table.6) 1196 0 R (table.7) 1209 0 R (table.8) 1221 0 R (table.9) 1240 0 R (title.0) 3 0 R] /Limits [(table.5) (title.0)] >> -% 2089 0 obj +% 2360 0 obj << -/Names [(vbasedata) 933 0 R (vdata) 985 0 R] +/Names [(vbasedata) 1033 0 R (vdata) 1086 0 R] /Limits [(vbasedata) (vdata)] >> -% 2090 0 obj +% 2361 0 obj << -/Kids [1997 0 R 1998 0 R 1999 0 R 2000 0 R 2001 0 R 2002 0 R] +/Kids [2258 0 R 2259 0 R 2260 0 R 2261 0 R 2262 0 R 2263 0 R] /Limits [(Doc-Start) (Item.125)] >> -% 2091 0 obj +% 2362 0 obj << -/Kids [2003 0 R 2004 0 R 2005 0 R 2006 0 R 2007 0 R 2008 0 R] -/Limits [(Item.126) (Item.29)] +/Kids [2264 0 R 2266 0 R 2267 0 R 2268 0 R 2269 0 R 2270 0 R] +/Limits [(Item.126) (Item.26)] >> -% 2092 0 obj +% 2363 0 obj << -/Kids [2009 0 R 2010 0 R 2011 0 R 2012 0 R 2013 0 R 2015 0 R] -/Limits [(Item.3) (Item.61)] +/Kids [2271 0 R 2272 0 R 2273 0 R 2274 0 R 2275 0 R 2276 0 R] +/Limits [(Item.27) (Item.59)] >> -% 2093 0 obj +% 2364 0 obj << -/Kids [2016 0 R 2017 0 R 2018 0 R 2019 0 R 2020 0 R 2021 0 R] -/Limits [(Item.62) (Item.94)] +/Kids [2277 0 R 2278 0 R 2279 0 R 2280 0 R 2281 0 R 2282 0 R] +/Limits [(Item.6) (Item.91)] >> -% 2094 0 obj + +endstream +endobj +2385 0 obj << -/Kids [2022 0 R 2023 0 R 2024 0 R 2025 0 R 2026 0 R 2027 0 R] -/Limits [(Item.95) (listing.4)] + /Title (Parallel Sparse BLAS V. 3.9.0) /Subject (Parallel Sparse Basic Linear Algebra Subroutines) /Keywords (Computer Science Linear Algebra Fluid Dynamics Parallel Linux MPI PSBLAS Iterative Solvers Preconditioners) /Creator (pdfLaTeX) /Producer ($Id$) /Author()/Title()/Subject()/Creator(LaTeX with hyperref)/Keywords() +/CreationDate (D:20240711120826+02'00') +/ModDate (D:20240711120826+02'00') +/Trapped /False +/PTEX.Fullbanner (This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023/Fedora 40) kpathsea version 6.3.5) >> -% 2095 0 obj +endobj +2366 0 obj << -/Kids [2028 0 R 2029 0 R 2030 0 R 2031 0 R 2032 0 R 2033 0 R] -/Limits [(listing.5) (page.103)] +/Type /ObjStm +/N 19 +/First 179 +/Length 2402 >> -% 2096 0 obj +stream +2365 0 2367 116 2368 240 2369 360 2370 473 2371 585 2372 696 2373 807 2374 921 2375 1042 +2376 1168 2377 1294 2378 1424 2379 1545 2380 1666 2381 1788 2382 1891 2383 1975 2384 2010 +% 2365 0 obj << -/Kids [2034 0 R 2035 0 R 2036 0 R 2037 0 R 2038 0 R 2039 0 R] -/Limits [(page.104) (page.136)] +/Kids [2283 0 R 2284 0 R 2285 0 R 2286 0 R 2287 0 R 2288 0 R] +/Limits [(Item.92) (equation.4.1)] >> -% 2097 0 obj +% 2367 0 obj << -/Kids [2040 0 R 2041 0 R 2042 0 R 2043 0 R 2044 0 R 2045 0 R] -/Limits [(page.137) (page.20)] +/Kids [2289 0 R 2290 0 R 2291 0 R 2292 0 R 2293 0 R 2294 0 R] +/Limits [(equation.4.2) (lstnumber.-11.1)] >> -% 2098 0 obj +% 2368 0 obj << -/Kids [2046 0 R 2047 0 R 2048 0 R 2049 0 R 2050 0 R 2051 0 R] -/Limits [(page.21) (page.53)] +/Kids [2295 0 R 2296 0 R 2297 0 R 2298 0 R 2299 0 R 2300 0 R] +/Limits [(lstnumber.-12.1) (page.115)] >> -% 2099 0 obj +% 2369 0 obj << -/Kids [2052 0 R 2053 0 R 2054 0 R 2055 0 R 2056 0 R 2057 0 R] -/Limits [(page.54) (page.86)] +/Kids [2301 0 R 2302 0 R 2303 0 R 2304 0 R 2305 0 R 2306 0 R] +/Limits [(page.116) (page.148)] >> -% 2100 0 obj +% 2370 0 obj << -/Kids [2058 0 R 2059 0 R 2060 0 R 2061 0 R 2062 0 R 2063 0 R] -/Limits [(page.87) (subsection.10.2)] +/Kids [2307 0 R 2308 0 R 2309 0 R 2310 0 R 2311 0 R 2312 0 R] +/Limits [(page.149) (page.21)] >> -% 2101 0 obj +% 2371 0 obj << -/Kids [2064 0 R 2065 0 R 2066 0 R 2067 0 R 2068 0 R 2069 0 R] -/Limits [(subsection.10.3) (subsection.6.10)] +/Kids [2313 0 R 2314 0 R 2315 0 R 2316 0 R 2317 0 R 2318 0 R] +/Limits [(page.22) (page.54)] >> -% 2102 0 obj +% 2372 0 obj << -/Kids [2070 0 R 2071 0 R 2072 0 R 2073 0 R 2074 0 R 2075 0 R] -/Limits [(subsection.6.11) (subsection.7.3)] +/Kids [2319 0 R 2320 0 R 2321 0 R 2322 0 R 2323 0 R 2324 0 R] +/Limits [(page.55) (page.87)] >> -% 2103 0 obj +% 2373 0 obj << -/Kids [2076 0 R 2077 0 R 2078 0 R 2079 0 R 2080 0 R 2081 0 R] -/Limits [(subsection.7.4) (subsubsection.3.2.12)] +/Kids [2325 0 R 2326 0 R 2327 0 R 2328 0 R 2329 0 R 2330 0 R] +/Limits [(page.88) (section*.7)] >> -% 2104 0 obj +% 2374 0 obj << -/Kids [2082 0 R 2083 0 R 2084 0 R 2085 0 R 2086 0 R 2087 0 R] -/Limits [(subsubsection.3.2.13) (table.4)] +/Kids [2331 0 R 2332 0 R 2333 0 R 2334 0 R 2335 0 R 2336 0 R] +/Limits [(section*.8) (subsection.3.4)] >> -% 2105 0 obj +% 2375 0 obj << -/Kids [2088 0 R 2089 0 R] -/Limits [(table.5) (vdata)] +/Kids [2337 0 R 2338 0 R 2339 0 R 2340 0 R 2341 0 R 2342 0 R] +/Limits [(subsection.3.5) (subsection.6.22)] >> -% 2106 0 obj +% 2376 0 obj << -/Kids [2090 0 R 2091 0 R 2092 0 R 2093 0 R 2094 0 R 2095 0 R] -/Limits [(Doc-Start) (page.103)] +/Kids [2343 0 R 2344 0 R 2345 0 R 2346 0 R 2347 0 R 2348 0 R] +/Limits [(subsection.6.23) (subsection.9.3)] >> -% 2107 0 obj +% 2377 0 obj << -/Kids [2096 0 R 2097 0 R 2098 0 R 2099 0 R 2100 0 R 2101 0 R] -/Limits [(page.104) (subsection.6.10)] +/Kids [2349 0 R 2350 0 R 2351 0 R 2352 0 R 2353 0 R 2354 0 R] +/Limits [(subsection.9.4) (subsubsection.3.2.7)] >> -% 2108 0 obj +% 2378 0 obj << -/Kids [2102 0 R 2103 0 R 2104 0 R 2105 0 R] -/Limits [(subsection.6.11) (vdata)] +/Kids [2355 0 R 2356 0 R 2357 0 R 2358 0 R 2359 0 R 2360 0 R] +/Limits [(subsubsection.3.2.8) (vdata)] >> -% 2109 0 obj +% 2379 0 obj +<< +/Kids [2361 0 R 2362 0 R 2363 0 R 2364 0 R 2365 0 R 2367 0 R] +/Limits [(Doc-Start) (lstnumber.-11.1)] +>> +% 2380 0 obj << -/Kids [2106 0 R 2107 0 R 2108 0 R] +/Kids [2368 0 R 2369 0 R 2370 0 R 2371 0 R 2372 0 R 2373 0 R] +/Limits [(lstnumber.-12.1) (section*.7)] +>> +% 2381 0 obj +<< +/Kids [2374 0 R 2375 0 R 2376 0 R 2377 0 R 2378 0 R] +/Limits [(section*.8) (vdata)] +>> +% 2382 0 obj +<< +/Kids [2379 0 R 2380 0 R 2381 0 R] /Limits [(Doc-Start) (vdata)] >> -% 2110 0 obj +% 2383 0 obj << -/Dests 2109 0 R +/Dests 2382 0 R >> -% 2111 0 obj +% 2384 0 obj << /Type /Catalog -/Pages 1994 0 R -/Outlines 1995 0 R -/Names 2110 0 R +/Pages 2255 0 R +/Outlines 2256 0 R +/Names 2383 0 R /URI (http://ce.uniroma2.it/psblas) /PageMode/UseOutlines/PageLabels<>2<>6<>]>> -/OpenAction 576 0 R +/OpenAction 657 0 R >> endstream endobj -2113 0 obj +2386 0 obj << /Type /XRef -/Index [0 2114] -/Size 2114 +/Index [0 2387] +/Size 2387 /W [1 3 1] -/Root 2111 0 R -/Info 2112 0 R -/ID [<3E2BE59A045B6F63645E5D8EE5B06F10> <3E2BE59A045B6F63645E5D8EE5B06F10>] -/Length 10570 ->> -stream -ÿ”[ÌSìKÌRìUÌQì]ÌP=ÌO  -=ÌN  =.ÌM =5ÌL=:ÌK=;ÌJ=[ÌI=\ÌH=]ÌG=^ÌF=bÌE=cÌD  ÌC!" ÌB#$ ÌA%& Ì@'( Ì?)*  Ì>+,  Ì=-. Ì</0 Ì;12 Ì:34 Ì956 Ì878 "Ì79: #Ì6;< %Ì5=> )Ì4?@ *Ì3AB +Ì2CD /Ì1EF 0Ì0GH 4Ì/IJ 5Ì.KL 9Ì-MN :Ì,OP ;Ì+QR ?Ì*ST EÌ)UV FÌ(WX GÌ'YZ HÌ&[\ NÌ%]^ OÌ$_` PÌ#ab VÌ"cËR [Ì!ËË \Ì ËË ]ÌËËóÌËËóÌË Ë -óÌË Ë óÌË Ëó.ÌËËó9ÌËËóEÌËËóMÌËËóYÌËËbÌËËbÌËËbÌËËbÌËË b(ÌË!Ë"b<ÌË#Ë$bWÌË%Ë&ÑÌË'Ë(ÑÌ Ë)Ë*ÑÌ Ë+Ë,ÑÌ Ë-Ë.Ñ3Ì -Ë/Ë0ÑQÌ Ë1Ë2ÑZÌË3Ë4DÌË5Ë6DÌË7Ë8DÌË9Ë:D&ÌË;Ë<D-ÌË=Ë>D2ÌË?Ë@D8ÌËAËBDCÌËCËDDNÄcËEËF¸ÄbËGËH¸ÄaËIËJ¸Ä`ËKËL¸"Ä_ËMËN¸,Ä^ËOËP¸8Ä]ËQËR¸@Ä\ËSËT¸DÄ[ËUËV¸IÄZËWËX¸SÄYËYËZ¸XÄXË[Ë\¸^ÄWË]Ë^ÄVË_Ë`ÄUËaËb ÄTËc”3ÄS””ÄR””'ÄQ””+ÄP””;ÄO” ” -?ÄN” ” EÄM” ”KÄL””RÄK””VÄJ””ZÄI””^ÄH””bÄG””ŠÄF””Š ÄE””ŠÄD”” Š ÄC”!”"Š*ÄB”#”$Š4ÄA”%”&Š>Ä@”'”(ŠJÄ?”)”*ŠOÄ>”+”,ŠVÄ=”-”.Š_Ä<”/”0þÄ;”1”2þÄ:”3”4þ Ä9”5”6þÄ8”7”8þÄ7”9”:þÄ6”;”<þ&Ä5”=”>þ-Ä4”?”@þ4Ä3”A”Bþ;Ä2”C”DþDÄ1”E”FþJÄ0”G”HþSÄ/”I”Jþ^Ä.”K”LþcÄ-”M”NcÄ,”O”Pc Ä+”Q”Rc Ä*”S”TcÄ)”U”V”W5‘”X”\2}”Y”ZÄÄÄÄF=”_”]öÛ”^”a”b”c………………………… … -… … … ………………………………………!…"…#…$…%…&…'…(…)…*…/…-”`÷e…+…,…………… ×…0…1…2…3…4…5…6…7…8…9…:…;…<…=…>…?…@…A…B…C…D…E…F…G…H…I…J…K…L…M…N…O…P…Q…R…S…T…U…V…W…X…Y…Z…[…\…]…a…_….QX…^…b…cìììììììììì ì -ì ì ì ìììììììììììììììììì ì!ì"ì#ì%ì&ì(ì*ì/ì-…`¬ì,ìì$ì'ì)ì+¨àì1ì3ì5ì6ì7ì8ì9ì:ì;ì<ì=ì?ì.Aì>ì0ì2ì4ìAìBìCìDìEìFìGìHìIìJìLì@)ÒÄc=c4c*c>c<c1c2c;c.c/ìRìSìT^KìYìWìMIzìVìNìOìPìQcc0ìZì[ì`ìXn¤ì\cac`c_ì^ì_c:==¢6=ìa==ìbìc=ÇÆc-c,== = =äï= -=== øj= =========+=)=`======= =!="=#=$=%=&='=(cbÄ=,=6=*6Ø=-=/=0=1=2=3=4=8===7S=9=<=?=@=A=H=>sP=B=C=D=E=F=G=J=K=V=I‹B=L=M=N=O=P=Q=R=S=Tcc=U=X=Y=_=W¢â=Z =`¶{=aÄ   ë      -þn       ²  c9   2ê   &  Nž ! $ , ']î (Ä  1 -nO . 6 2‚¢ 3 < 7”[ 8 @ =¦ > B C J A¾„ D I M Q K܃ LÄ - S T W RíÆ Uó Y a _ Xü™ Z ^ có `F bVòóóAKóóóó ó -ó ó óóóB&ó Äóóóó^fóÄ óóóóóó óhóóó"ó%ó!…[ó#ó$ó'ó(ó)ó*ó+ó,ó0ó&”‹ó-ó/ó3ó1µó2ó5ó6ó7ó;ó4·žó8ó:ó?ó<Õó=ó>Ä óAóBóCóGó@ßuóDóFóIóJóKóOóHøóLóNóSóP óQóRóUóVóWó[óTuóXóZó^ó\;>ó]ó`óaóbbó_=ÂócbÄ –¸b b†ˆbbbbbb b -b bbbb —§bbbbbb¯†bbbbb!bÄÌbÄb b#b$b%b&b/b-b"Ú$b'b)b*b+b,b0b1b2b3b4b5b6b8b.ö£b7Äb:b@b>b9db;b=bAbBbCbDbEbFbHb?,“bGbJbKbMbIIObLbObPbQbRbSbTbUbYbN[IbVbXb\bZxnb[b^b_b`babbÑÑÑb]yåbcÑÄ©µÑÑÇ¢ÑÑ Ñ -Ñ Ñ Ñ ÑÑÌçÑÑÑÑÑéfÑÑÑÑÑ&ÑÑìÑÑ/Ñ'Ñ*ÑÑ(Ñ)Ñ Ñ!Ñ"Ñ#Ñ$Ñ%ŠRùÑ-Ñ+€‘Ñ,ÄÑ/Ñ0Ñ1Ñ5Ñ.Œ¶Ñ2Ñ4Ñ7ÏpÑ8Ñ9Ñ>Ñ6ªRÑ:Ñ;Ñ<Ñ=ÑJÑAÑ?¿ÖÑ@ÑKÑBÍòÑIÑCÑDÑEÑFÑGÑHÞ}ÑMÑNÑOÑSÑL?ÑPÑRÑVÑT^ïÑUÄÑXÑ^Ñ\ÑWdµÑYÑ[Ñ_Ñ`ÑbÑ]€üÑaDÑcÉDô}DDDÛ“DDD -D D ô‘D c^D DDÏDDDDDDÄDDD&DDD!DA¢DDDD D#D$D(D"MžD%D'D*D+D.D)e,D,D0D3D/qÉD1D5D6D9D4z}D7ÄD>D:‘D;D<D=D@DADED?—DBDDDKDF­‘DGDHDIDJDQDODL²CDMDRDSDZDPÇDTDUDVDWDXDYDcD[â‰D\D]D^D_D`DaDbĸ¸¸¸¸š¸%Š¸¸7\¸¸ ¸ -¸ ¸ ¸ ¸¸¸¸¸¸C㸸¸¸¸O˸¸¸¸ ¸#¸_¸!¸'¸$wm¸%¸&ĸ)¸*¸-¸({G¸+¸2¸.”D¸/¸0¸1¸4¸5¸6¸;¸3–œ¸7¸9¸:¸=¸>¸A¸<®Á¸?¸E¸B»–¸C¸G¸J¸FÈe¸HĸO¸Ká¸L¸M¸N¸Q¸T¸PãݸR¸V¸Z¸Uùl¸W¸Y¸\¸`¸[ -¸]¸_¸b¸a¸cDÑOsÄ - ba  pÝ~œ!”A #$%("¢G&,)²*Ä8-ÈV./01234567<9å:B=åÿ>@AHCûÂDFGOI hJLMNSPùQÄWT'âU[X5(Y_\9è]c`?‘aŠŠjŠ ýŠ ŠìŠŠŠŠc]ÄŠ Š -’ýŠ ŠŠ©ÿŠŠŠŠŠŠ»£ŠŠŠÑŒŠŠŠŠŠ!Šä7ŠŠ'Š"û)Š#Š$Š%Š&ÄŠ+Š( »Š)Š1Š, "èŠ-Š.Š/Š0Š5Š2 5½Š3Š;Š6 KíŠ7Š8Š9Š:Š?Š< ^»Š=ŠGŠ@ uˆŠAŠBŠCc\ŠDŠEŠFÄŠLŠH Œ\ŠIŠKŠQŠM ¡ŠNŠPŠSŠTŠWŠR ¶ªŠUŠZŠ[Š\ŠX ÏHŠYŠbŠ] éwŠ^Š`ŠaþŠc ÷¦þþþÄ"#þ -þ!#7þþþ þþ !)jþ þþþþ!1‡þþþþ!3¿þþþþþ"þ!EÅþþ þ!þ$þ)þ#!YVþ%þ'þ(Äþ+þ0þ*!giþ,þ.þ/þ2þ7þ1!wáþ3þ5þ6þ9þ>þ8!’½þ:þ<þ=þAþ?!­:þ@þEþB!¯ÓþCþGþHþLþF!´eþIþKÄþNþOþPþQþTþM!È:þRþWþU!æ þVþYþZþ[þ\þ_þX!êEþ]þacþ`"«þb%cc"5öcccc -c":cÄ cc "DÌc ccccc"G-cccccccccc"g cc!c#c "ƒkc"c&c$"•1c%c6c'"•¿c(c)c+c3c5Ä!c@c7"¯ºc8c?cAcBcCcDcEcFcGcHcIcJc[cKcLcM"»¶cN"Ø•cO"êcP#•cQ#& cR#L~cS#œcT#¬DcU#äcV#ôdcW$ cX$`ÁcY$ócZ%Q1Ä"Ä#Ä$Ä%Ä&Ä'Ä(%|ëÌTÌUÌVÌWÌXÌYÌZÌ[Ì\Ì]Ì^Ì_Ì`ÌaÌbÌcÞ%ªhÞÞÞÞÞÞÞÞÞ Þ -Þ Þ Þ ÞÞÞÞÞÞÞÞÞÞÞÞÞÞÞÞÞÞÞ Þ!Þ"Þ#Þ$Þ%Þ&Þ'Þ(Þ)Þ*Þ+Þ,Þ-Þ.Þ/Þ0Þ1Þ2Þ3Þ4Þ5Þ6Þ7Þ8Þ9Þ:Þ;Þ<Þ=Þ>Þ?Þ@ÞAÞBÞCÞDÞEÞFÞGÞHÞIÞJÞKÞLÞMÞNÞOÞPÞQÞRÞSÞTÞUÞVÞWÞXÞYÞZÞ[Þ\Þ]Þ^Þ_Þ`Þa%¨H%ò© +/Root 2384 0 R +/Info 2385 0 R +/ID [ ] +/Length 11935 +>> +stream +ÿ]Ñ]c#Ñ\c-Ñ[c5ÑZcAÑY  +cJÑX  ÌÑW Ì ÑVÌÑUÌÑTÌ3ÑSÌ4ÑRÌ5ÑQÌ6ÑPÌ:ÑOÌ;ÑN Ì<ÑM!"Ì@ÑL#$ÌAÑK%&ÌCÑJ'(ÌDÑI)*ÌHÑH+,ÌIÑG-.ÌJÑF/0ÌQÑE12ÌRÑD34ÌYÑC56ÌZÑB78Ì^ÑA9:Ì_Ñ@;<ÌaÑ?=>'Ñ>?@'Ñ=AB'Ñ<CD'Ñ;EF'Ñ:GH' Ñ9IJ' Ñ8KL'Ñ7MN'Ñ6OP'Ñ5QR'Ñ4ST'Ñ3UV'Ñ2WX'Ñ1YZ' Ñ0[\'&Ñ/]^''Ñ._`'(Ñ-ab'.Ñ,cË?%'3Ñ+ËË'4Ñ*ËË'5Ñ)ËË'<Ñ(ËË'@Ñ'Ë Ë +'JÑ&Ë Ë 'ZÑ%Ë ËŠÑ$ËËŠÑ#ËËŠÑ"ËËŠ%Ñ!ËËŠ1Ñ ËËŠ<ÑËËŠMÑËËŠTÑËËŠ[ÑËË õÑË!Ë"õÑË#Ë$õ/ÑË%Ë&õ<ÑË'Ë(õKÑË)Ë*õQÑË+Ë,õXÑË-Ë.a ÑË/Ë0a)ÑË1Ë2a2ÑË3Ë4a=ÑË5Ë6a>ÑË7Ë8aUÑË9Ë:abÑË;Ë<ÔÑ Ë=Ë>Ô +Ñ Ë?Ë@ÔÑ ËAËBÔÑ +ËCËDÔ&Ñ ËEËFÔAÑËGËHÔQÑËIËJÔWÑËKËLÔ^ÑËMËNGÑËOËPGÑËQËRGÑËSËTGÑËUËVG!ÑËWËXG+ÊcËYËZG0ÊbË[Ë\G6ÊaË]Ë^G<Ê`Ë_Ë`GBÊ_ËaËbGHÊ^Ëc”}ƒGOÊ]””GVÊ\””GcÊ[””¬ÊZ””¬ÊY” ” +¬ÊX” ” ¬ÊW” ”¬#ÊV””¬*ÊU””¬.ÊT””¬2ÊS””¬6ÊR””¬:ÊQ””¬>ÊP””¬HÊO””¬RÊN”” ¬\ÊM”!”"ÊL”#”$ ÊK”%”&ÊJ”'”("ÊI”)”*'ÊH”+”,.ÊG”-”.7ÊF”/”0=ÊE”1”2CÊD”3”4IÊC”5”6OÊB”7”8TÊA”9”:[Ê@”;”<bÊ?”=”>‰Ê>”?”@‰ Ê=”A”B‰Ê<”C”D‰Ê;”E”F‰"Ê:”G”H‰+Ê9”I”J‰6Ê8”K”L‰;Ê7”M”N‰?Ê6”O”P‰EÊ5”Q”R‰IÊ4”S”T‰OÊ3”U”VùÊ2”W”XùÊ1”Y”Zù Ê0”[”\ùÊ/”]”^ù]Ê.”_”`ùaÊ-”a”bùbÊ,”c]üöxÊ+]]xÊ*]]xÊ)]]xÊ(]]xÊ'] ] +xÊ&] ] x Ê%] ]x +Ê$]]x Ê#]]xÊ"]]xÊ!]]xÊ ]]xÊ]]]À£]] ½Ž]]xYx[xZx\ÑO]#]!í]"]%]&]'](])]*]+],]-].]/]0]1]2]3]4]5]6]7]9];]<]=]>]?]@]A]B]C]E]G]I]J]K]L]M]N]O]P]Q]R]W]U]$‚w]S]T]8]:]D]F]H]X]Y]Z][]\]]]^]_]`]a]b]c           +                       ! % #]V¾¢ "°Â & ' ( ) * + , - . / 0 1 2 3 4 5 6 8 9 : ; < = > ? @ A B C D E F G H I J K M N P R W U $5¡ T 7 L O Q S Y [ ] ^ _ ` a b ccccccccccc c +c c c ccccccccc V}c X Z \ cccccccc c!c"c$cñx]x,x#x0x.x+x x!x*xxc*c+c,%Žc1c/c%½c.c&c'c(c)*Dxc2c3c8c05çc4xTxSxRc6c7x)c=c>iycBc9V_c?c@c:c;c<xxcDcEcGcCp#cFcUcScHƒžcIcKcLcMcNcOcPcQcRÌÌcT¤”cVcWcXcYcZc[c\c]c^c_c`cacbccÌxUx^ ÌÌÌò²ÌÌÌÌ Ì +Ì Ì ÌÌÌóÌÌÌÌÌÌ Ì/*ÌÌÌÌÌÌÌ"Ì#Ì.Ì!GÌ$Ì%Ì&Ì'Ì(Ì)Ì*Ì+Ì,xVÌ-Ì0Ì1Ì7Ì/bÞÌ2Ì=Ì8vxÌ9x_ÌEÌ>‡ÄÌ?ÌBÌKÌFš“ÌGÌMÌNÌOÌTÌL°ØÌPÌSx%ÌWÌ[ÌUÏÌVÌXÌbÌ\ì'Ì]Ì`'Ìcûx'x`bž' '1¢''' +HÚ' ''['''mo''''"'†`''!'%')'#¤`'$xa'+','/'*¸e'-'K'1'9'7'0ǽ'2'6';'='8Ùj':'A'>ê1'?'C'D'E'F'G'H'N'L'Bë 'IxX'O'P'R'Mr'Qxb'T'U'V'W'X'\'S'Y'['^'a']0/'_'`'cŠŠŠŠŠŠ'bAŠŠª{Š Š ŒfŠ +Š ŠŠŠŠ ŽéŠŠŠŠ­‡ŠŠxcŠŠŠŠŠ¹§ŠŠŠ!Š"Š#Š'Š ÓŠ$Š&Š+Š(ð-Š)Š*Š-Š.Š/Š3Š,üPŠ0Š2Š6Š4ÛŠ5Š8Š9Š:Š>Š7_Š;Š=ÊŠGŠ?6»Š@ŠAŠBŠCŠDŠEŠFŠIŠJŠKŠOŠHGÚŠLŠNŠQŠRŠVŠP`{ŠSŠUŠXŠYŠ]ŠWuÁŠZxWŠ\Š_Š`ŠaŠbõõŠ^‹Šcõõõõ7¿õõ õ +õ õ õ õõõ×ÅõÊõõõõñ†õõõõõõõõõ õ_õõ"õ#õ%õ!.õ$õ'õ(õ)õ*õ+õ,õ-õ1õ&@õ.õ0õ4õ2]Ðõ3õ6õ7õ8õ9õ:õ@õAõ>õ5_Gõ;õ=ÊõCõ?|âõBõEõFõGõHõIõMõD‚'õJõLõOõRõNŸnõPõTõUõVõbõZõS¢õWõYÈ]õcaõ[¼Aaaõ\õ]õ^õ_õ`õaÖ¸ +'"Óaal`aÊaaa a ax…a +a a¼kaaaa—Maaaaa"aa¬Ñaa#aºía!aaaaaa Ë þxa%a&a'a+a$,a(a*a.a,Mza-Êa0a6a4a/S@a1a3a7a8a:a5pa9a?a;€Na<aAaDa@™:aBaCaFaIaE²8aGxQaHaQaJÙvaKaLaMaNaOaPÊaSaVaRã®aTaXa]aWÿIaYaZa[a\a_a`Ôa^ Eaaac=ŠÔÔÔÔOŒÔÔÔ Ô\)Ô Ô ÔÔÔ dÝÔÊÔÔ{hÔÔÔÔÔÔÔqÔÔÔ#Ô—ñÔÔ Ô!Ô"Ô)Ô'Ô$œ£Ô%Ô*Ô+Ô2Ô(±ßÔ,Ô-Ô.Ô/Ô0Ô1Ô;Ô3ÌéÔ4Ô5Ô6Ô7Ô8Ô9Ô:ÊÔ=Ô>Ô?ÔBÔ<ÞÝÔ@ÔLÔCùŸÔDÔEÔFÔGÔHÔIÔJÔKÔNÔOÔRÔM&ÔPÔTÔUÔYÔSÔVÔXÔ[Ô\Ô_ÔZ!]Ô]ÔcÔ`9°ÔaÔbÊGGGGf(GsG +G%GGG G G GGG }GGGGGGG™¢GGG¦wGGG"G³FG Ê G'G#ËáG$G%G&G)G,G(ξG*G.G2G-äMG/G1G4G8G3ðëG5G7G:G>G9ãG;G=G@GDG?kGAGCÊ +GFGKGE#YGGGIGJGMGRGL1ÕGNGPGQGTGWGS?”GUG]GXU9GYGZG[G\G_G`Ga¬G^c?Gb ÙÚ¬¬œ’¬Ê ¬¬²É¬¬¬¬ ¬ +¬ ¬ ¬ ¬¬¬¬ÏŠ¬¬¬Ðr¬¬¬¬ ¬æ5¬¬¬¬'¬!÷Û¬"¬$¬%¬&¬+¬( l¬)Ê ¬/¬, U¬-¬3¬0 ›¬1¬7¬4 $[¬5¬;¬8 *¬9¬?¬< /D¬=¬E¬@ E¬A¬B¬C¬DxPÊ ¬I¬F Z%¬G¬O¬J q'¬K¬L¬M¬N¬S¬P „ȬQ¬Y¬T š±¬U¬V¬W¬X¬]¬Z ¯Y¬[¬c¬^ ÆK¬_¬`¬a¬bÊ þ0"F­ !]  +!) !?3!Sþ!jËxOÊ$ !ƒt!#)%!˜™&(+,/*!­Â-2340!Æ`1:5!îÛ689@;!ý +<>?ÊFA"{BDELG"®HJKPM"ËNRWQ"SUVY^X"% Z\]`‰_"8šac‰Ê#’Ú‰‰‰"kƉ‰‰‰ +‰‰ "|>‰ ‰ ‰‰‰‰"˜º‰‰‰‰‰"´˜‰‰‰"·d‰‰‰ ‰$‰"»ö‰!‰#ʉ&‰'‰(‰)‰,‰%"Ïˉ*‰/‰-"휉.‰1‰2‰3‰4‰7‰0"ñÖ‰5‰9‰<‰8#<‰:‰@‰=#´‰>‰B‰C‰F‰A#¾‰DʉJ‰G#$Š‰H‰L‰M‰V‰T‰K#&ë‰N‰P‰Q‰R‰S‰W‰X‰Y‰[‰U#G^‰Z‰]‰_‰\#c)‰^‰a‰cùùùùù‰`#tïù‰b%•Žx1x2ù ù +#ôù $!0ù#ùùù#¿Ìù ùùÊx-ùù%ùù#âùùùùùù$7ù.ù0ù$´ù$ù&ù'ù(ù)ù*ù+ù,ù-ù/ùù$+ùù ù!$,‡ù"$-}$.‡$Ma$u£ù8ù9ù:ù>ù1$V“ù;ù<ù=ù2ù3ù4ù5ù6$‚'ù7$ƒH$„>$‘úùHùJ$ÐUùFùLù?$°ÐùGùIùKù@ùAùBùCùD$Û^ùE$ÜZ$Ýj$÷%&žùTùWùM%:ùUùVùNùO%3ÙùPùQùR%5JùS%6F%7f%TqùZùX%a²ùYÊù^ù[%diù\xù_%€ ù`ùc(Þx x%È}xxx %ÝÈxxx%è¦xx&x%é4xxxx"x$Êx3x'&Bx(x/x4x5x6x7x8x9x:x;x<x=xNx>x?x@& xA&0êxB&TÚxC&rçxD&’xE&º xF' ¢xG'×xH'\}xI'lÚxJ'|–xK'ÛLxL(AcxM(‘ (•9(›Ÿ(¡Û(¨Ô(±(·W(½–(ÃÕ(Çg(Ëý(Ñÿ(ØÊÊ) ›ÊÊÊÊÊÊ)JdÑ^Ñ_Ñ`ÑaÑbÑcÙ)r}ÙÙÙÙÙÙÙÙÙ Ù +Ù Ù Ù ÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙ Ù!Ù"Ù#Ù$Ù%Ù&Ù'Ù(Ù)Ù*Ù+Ù,Ù-Ù.Ù/Ù0Ù1Ù2Ù3Ù4Ù5Ù6Ù7Ù8Ù9Ù:Ù;Ù<Ù=Ù>Ù?Ù@ÙAÙBÙCÙDÙEÙFÙGÙHÙIÙJÙKÙLÙMÙNÙOÙPÙQÙRÙSÙTÙUÙVÙWÙXÙYÙZÙ[Ù\Ù]Ù^Ù_Ù`ÙaÙbÙc >)à > > > > > > > > >  > + >  >  >  > > > > >)Ár)Í[ endstream endobj startxref -2486953 +2739547 %%EOF diff --git a/docs/src/Makefile b/docs/src/Makefile index e6ee0c5e..385d9051 100644 --- a/docs/src/Makefile +++ b/docs/src/Makefile @@ -86,7 +86,8 @@ TOPFILE = userguide.tex HTMLFILE = userhtml.tex SECFILE = intro.tex commrout.tex datastruct.tex psbrout.tex toolsrout.tex\ - methods.tex precs.tex penv.tex error.tex util.tex biblio.tex + methods.tex precs.tex penv.tex error.tex util.tex biblio.tex \ + ext-intro.tex cuda.tex FIGDIR = figures XPDFFLAGS = @@ -139,7 +140,7 @@ PDF = $(join $(BASEFILE),.pdf) PS = $(join $(BASEFILE),.ps) GXS = $(join $(BASEFILE),.gxs) GLX = $(join $(BASEFILE),.glx) -TARGETPDF= ../psblas-3.8.pdf +TARGETPDF= ../psblas-3.9.pdf BASEHTML = $(patsubst %.tex,%,$(HTMLFILE)) HTML = $(join $(BASEHTML),.html) HTMLDIR = ../html diff --git a/docs/src/biblio.tex b/docs/src/biblio.tex index 14c6dbdd..5ba3605c 100644 --- a/docs/src/biblio.tex +++ b/docs/src/biblio.tex @@ -1,9 +1,5 @@ \begin{thebibliography}{99} -\bibitem{DesPat:11} - D.~Barbieri, V.~Cardellini, S.~Filippone and D.~Rouson -{\em Design Patterns for Scientific Computations on Sparse Matrices}, - HPSS 2011, Algorithms and Programming Tools for Next-Generation High-Performance Scientific Software, Bordeaux, Sep. 2011 \bibitem{PARA04FOREST} G.~Bella, S.~Filippone, A.~De Maio and M.~Testa, @@ -154,6 +150,11 @@ Lawson, C., Hanson, R., Kincaid, D. and Krogh, F., {\em Fortran 95/2003 explained.} {Oxford University Press}, 2004. % +\bibitem{MRC:11} +{Metcalf, M., Reid, J. and Cohen, M.} +{\em Modern Fortran explained.} +{Oxford University Press}, 2011. +% %% \bibitem{DD2} %% B.~Smith, P.~Bjorstad and W.~Gropp, %% {\em Domain Decomposition: Parallel Multilevel Methods for Elliptic @@ -169,4 +170,20 @@ M.~Snir, S.~Otto, S.~Huss-Lederman, D.~Walker and J.~Dongarra, {\em MPI: The Complete Reference. Volume 1 - The MPI Core}, second edition, MIT Press, 1998. % + +\bibitem{DesPat:11} + D.~Barbieri, V.~Cardellini, S.~Filippone and D.~Rouson +{\em Design Patterns for Scientific Computations on Sparse Matrices}, + HPSS 2011, Algorithms and Programming Tools for Next-Generation High-Performance Scientific Software, Bordeaux, Sep. 2011 + +\bibitem{CaFiRo:2014} +{ Cardellini, V.}, { Filippone, S.}, { and} { Rouson, D.} 2014, + Design patterns for sparse-matrix computations on hybrid {CPU/GPU} + platforms, +{\em Scientific Programming\/}~{\em 22,\/}~1, 1--19. +\bibitem{OurTechRep} +D.~Barbieri, V.~Cardellini, A.~Fanfarillo, S.~Filippone, Three storage formats + for sparse matrices on {GPGPUs}, Tech. Rep. DICII RR-15.6, Universit\`a di + Roma Tor Vergata (February 2015). + \end{thebibliography} diff --git a/docs/src/cuda.tex b/docs/src/cuda.tex new file mode 100644 index 00000000..c6ee7dfa --- /dev/null +++ b/docs/src/cuda.tex @@ -0,0 +1,244 @@ + +\subsection{CUDA-class extensions} + +For computing with CUDA we define a dual memorization strategy in +which each variable on the CPU (``host'') side has a GPU (``device'') +side. When a GPU-type variable is initialized, the data contained is +(usually) the same on both sides. Each operator invoked on the +variable may change the data so that only the host side or the device +side are up-to-date. + +Keeping track of the updates to data in the variables is essential: we want +to perform most computations on the GPU, but we cannot afford the time +needed to move data between the host memory and the device memory +because the bandwidth of the interconnection bus would become the main +bottleneck of the computation. Thus, each and every computational +routine in the library is built according to the following principles: +\begin{itemize} +\item If the data type being handled is {GPU}-enabled, make sure that + its device copy is up to date, perform any arithmetic operation on + the {GPU}, and if the data has been altered as a result, mark + the main-memory copy as outdated. +\item The main-memory copy is never updated unless this is requested + by the user either +\begin{description} +\item[explicitly] by invoking a synchronization method; +\item[implicitly] by invoking a method that involves other data items + that are not {GPU}-enabled, e.g., by assignment ov a vector to a + normal array. +\end{description} +\end{itemize} +In this way, data items are put on the {GPU} memory ``on demand'' and +remain there as long as ``normal'' computations are carried out. +As an example, the following call to a matrix-vector product +\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} + call psb_spmm(alpha,a,x,beta,y,desc_a,info) +\end{minted} +will transparently and automatically be performed on the {GPU} whenever +all three data inputs \fortinline|a|, \fortinline|x| and +\fortinline|y| are {GPU}-enabled. If a program makes many such calls +sequentially, then +\begin{itemize} +\item The first kernel invocation will find the data in main memory, + and will copy it to the {GPU} memory, thus incurring a significant + overhead; the result is however \emph{not} copied back, and + therefore: +\item Subsequent kernel invocations involving the same vector will + find the data on the {GPU} side so that they will run at full + speed. +\end{itemize} +For all invocations after the first the only data that will have to be +transferred to/from the main memory will be the scalars \fortinline|alpha| +and \fortinline|beta|, and the return code \fortinline|info|. + +\begin{description} +\item[Vectors:] The data type \fortinline|psb_T_vect_gpu| provides a + GPU-enabled extension of the inner type \fortinline|psb_T_base_vect_type|, + and must be used together with the other inner matrix type to make + full use of the GPU computational capabilities; +\item[CSR:] The data type \fortinline|psb_T_csrg_sparse_mat| provides an + interface to the GPU version of CSR available in the NVIDIA CuSPARSE + library; +\item[HYB:] The data type \fortinline|psb_T_hybg_sparse_mat| provides an + interface to the HYB GPU storage available in the NVIDIA CuSPARSE + library. The internal structure is opaque, hence the host side is + just CSR; the HYB data format is only available up to CUDA version + 10. +\item[ELL:] The data type \fortinline|psb_T_elg_sparse_mat| provides an + interface to the ELLPACK implementation from SPGPU; + +\item[HLL:] The data type \fortinline|psb_T_hlg_sparse_mat| provides an + interface to the Hacked ELLPACK implementation from SPGPU; +\item[HDIA:] The data type \fortinline|psb_T_hdiag_sparse_mat| provides an + interface to the Hacked DIAgonals implementation from SPGPU; +\end{description} + + +\section{CUDA Environment Routines} +\label{sec:cudaenv} + +\subsection*{psb\_cuda\_init --- Initializes PSBLAS-CUDA + environment} +\addcontentsline{toc}{subsection}{psb\_cuda\_init} + +\begin{minted}[breaklines=true]{fortran} +call psb_cuda_init(ctxt [, device]) +\end{minted} + +This subroutine initializes the PSBLAS-CUDA environment. +\begin{description} +\item[Type:] Synchronous. +\item[\bf On Entry ] +\item[device] ID of CUDA device to attach to.\\ +Scope: {\bf local}.\\ +Type: {\bf optional}.\\ +Intent: {\bf in}.\\ +Specified as: an integer value. \ +Default: use \fortinline|mod(iam,ngpu)| where \fortinline|iam| is the calling +process index and \fortinline|ngpu| is the total number of CUDA devices +available on the current node. +\end{description} + + +{\par\noindent\large\bfseries Notes} +\begin{enumerate} +\item A call to this routine must precede any other PSBLAS-CUDA call. +\end{enumerate} + +\subsection*{psb\_cuda\_exit --- Exit from PSBLAS-CUDA + environment} +\addcontentsline{toc}{subsection}{psb\_cuda\_exit} + +\begin{minted}[breaklines=true]{fortran} +call psb_cuda_exit(ctxt) +\end{minted} + +This subroutine exits from the PSBLAS CUDA context. +\begin{description} +\item[Type:] Synchronous. +\item[\bf On Entry ] +\item[ctxt] the communication context identifying the virtual + parallel machine.\\ +Scope: {\bf global}.\\ +Type: {\bf required}.\\ +Intent: {\bf in}.\\ +Specified as: an integer variable. +\end{description} + + + + +\subsection*{psb\_cuda\_DeviceSync --- Synchronize CUDA device} +\addcontentsline{toc}{subsection}{psb\_cuda\_DeviceSync} + +\begin{minted}[breaklines=true]{fortran} +call psb_cuda_DeviceSync() +\end{minted} + +This subroutine ensures that all previosly invoked kernels, i.e. all +invocation of CUDA-side code, have completed. + + +\subsection*{psb\_cuda\_getDeviceCount } +\addcontentsline{toc}{subsection}{psb\_cuda\_getDeviceCount} + +\begin{minted}[breaklines=true]{fortran} +ngpus = psb_cuda_getDeviceCount() +\end{minted} + +Get number of devices available on current computing node. + +\subsection*{psb\_cuda\_getDevice } +\addcontentsline{toc}{subsection}{psb\_cuda\_getDevice} + +\begin{minted}[breaklines=true]{fortran} +ngpus = psb_cuda_getDevice() +\end{minted} + +Get device in use by current process. + +\subsection*{psb\_cuda\_setDevice } +\addcontentsline{toc}{subsection}{psb\_cuda\_setDevice} + +\begin{minted}[breaklines=true]{fortran} +info = psb_cuda_setDevice(dev) +\end{minted} + +Set device to be used by current process. + +\subsection*{psb\_cuda\_DeviceHasUVA } +\addcontentsline{toc}{subsection}{psb\_cuda\_DeviceHasUVA} + +\begin{minted}[breaklines=true]{fortran} +hasUva = psb_cuda_DeviceHasUVA() +\end{minted} + +Returns true if device currently in use supports UVA (Unified Virtual Addressing). + +\subsection*{psb\_cuda\_WarpSize } +\addcontentsline{toc}{subsection}{psb\_cuda\_WarpSize} + +\begin{minted}[breaklines=true]{fortran} +nw = psb_cuda_WarpSize() +\end{minted} + +Returns the warp size. + + +\subsection*{psb\_cuda\_MultiProcessors } +\addcontentsline{toc}{subsection}{psb\_cuda\_MultiProcessors} + +\begin{minted}[breaklines=true]{fortran} +nmp = psb_cuda_MultiProcessors() +\end{minted} + +Returns the number of multiprocessors in the CUDA device. + +\subsection*{psb\_cuda\_MaxThreadsPerMP } +\addcontentsline{toc}{subsection}{psb\_cuda\_MaxThreadsPerMP} + +\begin{minted}[breaklines=true]{fortran} +nt = psb_cuda_MaxThreadsPerMP() +\end{minted} + +Returns the maximum number of threads per multiprocessor. + + +\subsection*{psb\_cuda\_MaxRegistersPerBlock } +\addcontentsline{toc}{subsection}{psb\_cuda\_MaxRegisterPerBlock} + +\begin{minted}[breaklines=true]{fortran} +nr = psb_cuda_MaxRegistersPerBlock() +\end{minted} + +Returns the maximum number of register per thread block. + + +\subsection*{psb\_cuda\_MemoryClockRate } +\addcontentsline{toc}{subsection}{psb\_cuda\_MemoryClockRate} + +\begin{minted}[breaklines=true]{fortran} +cl = psb_cuda_MemoryClockRate() +\end{minted} + +Returns the memory clock rate in KHz, as an integer. + +\subsection*{psb\_cuda\_MemoryBusWidth } +\addcontentsline{toc}{subsection}{psb\_cuda\_MemoryBusWidth} + +\begin{minted}[breaklines=true]{fortran} +nb = psb_cuda_MemoryBusWidth() +\end{minted} + +Returns the memory bus width in bits. + +\subsection*{psb\_cuda\_MemoryPeakBandwidth } +\addcontentsline{toc}{subsection}{psb\_cuda\_MemoryPeakBandwidth} + +\begin{minted}[breaklines=true]{fortran} +bw = psb_cuda_MemoryPeakBandwidth() +\end{minted} +Returns the peak memory bandwidth in MB/s (real double precision). + + + diff --git a/docs/src/ext-intro.tex b/docs/src/ext-intro.tex new file mode 100644 index 00000000..ef9b882d --- /dev/null +++ b/docs/src/ext-intro.tex @@ -0,0 +1,412 @@ +\section{Extensions}\label{sec:ext-intro} + +The EXT, CUDA and RSB subdirectories contains a set of extensions to the base +library. The extensions provide additional storage formats beyond the +ones already contained in the base library, as well as interfaces +to: +\begin{description} +\item[SPGPU] a CUDA library originally published as + \url{https://code.google.com/p/spgpu/} and now included in the + \verb|cuda| subdir, for computations on NVIDIA GPUs; +\item[LIBRSB] \url{http://sourceforge.net/projects/librsb/}, for + computations on multicore parallel machines. +\end{description} +The infrastructure laid out in the base library to allow for these +extensions is detailed in the references~\cite{DesPat:11,CaFiRo:2014,Sparse03}; +the CUDA-specific data formats are described in~\cite{OurTechRep}. + + +\subsection{Using the extensions} +\label{sec:ext-appstruct} +A sample application using the PSBLAS extensions will contain the +following steps: +\begin{itemize} +\item \verb|USE| the appropriat modules (\verb|psb_ext_mod|, + \verb|psb_cuda_mod|); +\item Declare a \emph{mold} variable of the necessary type + (e.g. \verb|psb_d_ell_sparse_mat|, \verb|psb_d_hlg_sparse_mat|, + \verb|psb_d_vect_cuda|); +\item Pass the mold variable to the base library interface where + needed to ensure the appropriate dynamic type. +\end{itemize} +Suppose you want to use the CUDA-enabled ELLPACK data structure; you +would use a piece of code like this (and don't forget, you need +CUDA-side vectors along with the matrices): +\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} +program my_cuda_test + use psb_base_mod + use psb_util_mod + use psb_ext_mod + use psb_cuda_mod + type(psb_dspmat_type) :: a, agpu + type(psb_d_vect_type) :: x, xg, bg + + real(psb_dpk_), allocatable :: xtmp(:) + type(psb_d_vect_cuda) :: vmold + type(psb_d_elg_sparse_mat) :: aelg + type(psb_ctxt_type) :: ctxt + integer :: iam, np + + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) + call psb_cuda_init(ctxt, iam) + + + ! My own home-grown matrix generator + call gen_matrix(ctxt,idim,desc_a,a,x,info) + if (info /= 0) goto 9999 + + call a%cscnv(agpu,info,mold=aelg) + if (info /= 0) goto 9999 + xtmp = x%get_vect() + call xg%bld(xtmp,mold=vmold) + call bg%bld(size(xtmp),mold=vmold) + + ! Do sparse MV + call psb_spmm(done,agpu,xg,dzero,bg,desc_a,info) + + +9999 continue + if (info == 0) then + write(*,*) '42' + else + write(*,*) 'Something went wrong ',info + end if + + + call psb_cuda_exit() + call psb_exit(ctxt) + stop +end program my_cuda_test +\end{minted} +A full example of this strategy can be seen in the +\texttt{test/ext/kernel} and \texttt{test/\-cuda/\-kernel} subdirectories, +where we provide sample programs +to test the speed of the sparse matrix-vector product with the various +data structures included in the library. + + +\subsection{Extensions' Data Structures} +\label{sec:ext-datastruct} +%\ifthenelse{\boolean{mtc}}{\minitoc}{} + +Access to the facilities provided by the EXT library is mainly +achieved through the data types that are provided within. +The data classes are derived from the base classes in PSBLAS, through +the Fortran~2003 mechanism of \emph{type extension}~\cite{MRC:11}. + +The data classes are divided between the general purpose CPU +extensions, the GPU interfaces and the RSB interfaces. +In the description we will make use of the notation introduced in +Table~\ref{tab:notation}. + +\begin{table}[ht] +\caption{Notation for parameters describing a sparse matrix} +\begin{center} +{\footnotesize +\begin{tabular}{ll} +\hline +Name & Description \\ +\hline +M & Number of rows in matrix \\ +N & Number of columns in matrix \\ +NZ & Number of nonzeros in matrix \\ +AVGNZR & Average number of nonzeros per row \\ +MAXNZR & Maximum number of nonzeros per row \\ +NDIAG & Numero of nonzero diagonals\\ +AS & Coefficients array \\ +IA & Row indices array \\ +JA & Column indices array \\ +IRP & Row start pointers array \\ +JCP & Column start pointers array \\ +NZR & Number of nonzeros per row array \\ +OFFSET & Offset for diagonals \\ +\hline +\end{tabular} +} +\end{center} +\label{tab:notation} +\end{table} + +\begin{figure}[ht] + \centering +% \includegraphics[width=5.2cm]{figures/mat.eps} + \includegraphics[width=5.2cm]{figures/mat.pdf} + \caption{Example of sparse matrix} + \label{fig:dense} +\end{figure} + +\subsection{CPU-class extensions} + + +\subsubsection*{ELLPACK} + +The ELLPACK/ITPACK format (shown in Figure~\ref{fig:ell}) +comprises two 2-dimensional arrays \verb|AS| and +\verb|JA| with \verb|M| rows and \verb|MAXNZR| columns, where +\verb|MAXNZR| is the maximum +number of nonzeros in any row~\cite{ELLPACK}. +Each row of the arrays \verb|AS| and \verb|JA| contains the +coefficients and column indices; rows shorter than +\verb|MAXNZR| are padded with zero coefficients and appropriate column +indices, e.g. the last valid one found in the same row. + +\begin{figure}[ht] + \centering +% \includegraphics[width=8.2cm]{figures/ell.eps} + \includegraphics[width=8.2cm]{figures/ell.pdf} + \caption{ELLPACK compression of matrix in Figure~\ref{fig:dense}} + \label{fig:ell} +\end{figure} + + +\begin{algorithm} +\lstset{language=Fortran} +\small + \begin{lstlisting} + do i=1,n + t=0 + do j=1,maxnzr + t = t + as(i,j)*x(ja(i,j)) + end do + y(i) = t + end do + \end{lstlisting} + \caption{\label{alg:ell} Matrix-Vector product in ELL format} +\end{algorithm} +The matrix-vector product $y=Ax$ can be computed with the code shown in +Alg.~\ref{alg:ell}; it costs one memory write per outer iteration, +plus three memory reads and two floating-point operations per inner +iteration. + +Unless all rows have exactly the same number of nonzeros, some of the +coefficients in the \verb|AS| array will be zeros; therefore this +data structure will have an overhead both in terms of memory space +and redundant operations (multiplications by zero). The overhead can +be acceptable if: +\begin{enumerate} +\item The maximum number of nonzeros per row is not much larger than + the average; +\item The regularity of the data structure allows for faster code, + e.g. by allowing vectorization, thereby offsetting the additional + storage requirements. +\end{enumerate} +In the extreme case where the input matrix has one full row, the +ELLPACK structure would require more memory than the normal 2D array +storage. The ELLPACK storage format was very popular in the vector +computing days; in modern CPUs it is not quite as popular, but it +is the basis for many GPU formats. + +The relevant data type is \verb|psb_T_ell_sparse_mat|: +\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} + type, extends(psb_d_base_sparse_mat) :: psb_d_ell_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! + + integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:) + real(psb_dpk_), allocatable :: val(:,:) + + contains + .... + end type psb_d_ell_sparse_mat +\end{minted} + + +\subsubsection*{Hacked ELLPACK} + +The \textit{hacked ELLPACK} (\textbf{HLL}) format +alleviates the main problem of the ELLPACK format, that is, +the amount of memory required by padding for sparse matrices in +which the maximum row length is larger than the average. + +The number of elements allocated to padding is $[(m*maxNR) - +(m*avgNR) = m*(maxNR-avgNR)]$ +for both \verb|AS| and \verb|JA| arrays, +where $m$ is equal to the number of rows of the matrix, $maxNR$ is the +maximum number of nonzero elements +in every row and $avgNR$ is the average number of nonzeros. +Therefore a single densely populated row can seriously affect the +total size of the allocation. + +To limit this effect, in the HLL format we break the original matrix +into equally sized groups of rows (called \textit{hacks}), and then store +these groups as independent matrices in ELLPACK format. +The groups can be arranged selecting rows in an arbitrarily manner; +indeed, if the rows are sorted by decreasing number of nonzeros we +obtain essentially the JAgged Diagonals format. +If the rows are not in the original order, then an additional vector +\textit{rIdx} is required, storing the actual row index for each row +in the data structure. + +The multiple ELLPACK-like buffers are stacked together inside a +single, one dimensional array; +an additional vector \textit{hackOffsets} is provided to keep track +of the individual submatrices. +All hacks have the same number of rows \textit{hackSize}; hence, +the \textit{hackOffsets} vector is an array of +$(m/hackSize)+1$ elements, each one pointing to the first index of a +submatrix inside the stacked \textit{cM}/\textit{rP} buffers, plus an +additional element pointing past the end of the last block, where the +next one would begin. +We thus have the property that +the elements of the $k$-th \textit{hack} are stored between \verb|hackOffsets[k]| and +\verb|hackOffsets[k+1]|, similarly to what happens in the CSR format. + +\begin{figure}[ht] + \centering +% \includegraphics[width=8.2cm]{../figures/hll.eps} + \includegraphics[width=.72\textwidth]{../figures/hll.pdf} + \caption{Hacked ELLPACK compression of matrix in Figure~\ref{fig:dense}} + \label{fig:hll} +\end{figure} + +With this data structure a very long row only affects one hack, and +therefore the additional memory is limited to the hack in which the +row appears. + +The relevant data type is \verb|psb_T_hll_sparse_mat|: +\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} + type, extends(psb_d_base_sparse_mat) :: psb_d_hll_sparse_mat + ! + ! HLL format. (Hacked ELL) + ! + integer(psb_ipk_) :: hksz + integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:) + real(psb_dpk_), allocatable :: val(:) + + contains + .... + end type +\end{minted} + +\subsubsection*{Diagonal storage} + + +The DIAgonal (DIA) format (shown in Figure~\ref{fig:dia}) +has a 2-dimensional array \verb|AS| containing in each column the +coefficients along a diagonal of the matrix, and an integer array +\verb|OFFSET| that determines where each diagonal starts. The +diagonals in \verb|AS| are padded with zeros as necessary. + +The code to compute the matrix-vector product $y=Ax$ is shown in Alg.~\ref{alg:dia}; +it costs one memory read per outer iteration, +plus three memory reads, one memory write and two floating-point +operations per inner iteration. The accesses to \verb|AS| and +\verb|x| are in strict sequential order, therefore no indirect +addressing is required. + +\begin{figure}[ht] + \centering +% \includegraphics[width=8.2cm]{figures/dia.eps} + \includegraphics[width=.72\textwidth]{figures/dia.pdf} + \caption{DIA compression of matrix in Figure~\ref{fig:dense}} + \label{fig:dia} +\end{figure} + + +\begin{algorithm} +\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} + do j=1,ndiag + if (offset(j) > 0) then + ir1 = 1; ir2 = m - offset(j); + else + ir1 = 1 - offset(j); ir2 = m; + end if + do i=ir1,ir2 + y(i) = y(i) + alpha*as(i,j)*x(i+offset(j)) + end do + end do +\end{minted} + \caption{\label{alg:dia} Matrix-Vector product in DIA format} +\end{algorithm} + + +The relevant data type is \verb|psb_T_dia_sparse_mat|: +\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} + type, extends(psb_d_base_sparse_mat) :: psb_d_dia_sparse_mat + ! + ! DIA format, extended. + ! + + integer(psb_ipk_), allocatable :: offset(:) + integer(psb_ipk_) :: nzeros + real(psb_dpk_), allocatable :: data(:,:) + + end type +\end{minted} + + + +\subsubsection*{Hacked DIA} + +Storage by DIAgonals is an attractive option for matrices whose +coefficients are located on a small set of diagonals, since they do +away with storing explicitly the indices and therefore reduce +significantly memory traffic. However, having a few coefficients +outside of the main set of diagonals may significantly increase the +amount of needed padding; moreover, while the DIA code is easily +vectorized, it does not necessarily make optimal use of the memory +hierarchy. While processing each diagonal we are updating entries in +the output vector \verb|y|, which is then accessed multiple times; if +the vector \verb|y| is too large to remain in the cache memory, the +associated cache miss penalty is paid multiple times. + +The \textit{hacked DIA} (\textbf{HDIA}) format was designed to contain +the amount of padding, by breaking the original matrix +into equally sized groups of rows (\textit{hacks}), and then storing +these groups as independent matrices in DIA format. This approach is +similar to that of HLL, and requires using an offset vector for each +submatrix. Again, similarly to HLL, the various submatrices are +stacked inside a linear array to improve memory management. The fact +that the matrix is accessed in slices helps in reducing cache misses, +especially regarding accesses to the %output +vector \verb|y|. + + +An additional vector \textit{hackOffsets} is provided to complete +the matrix format; given that \textit{hackSize} is the number of rows of each hack, +the \textit{hackOffsets} vector is made by an array of +$(m/hackSize)+1$ elements, pointing to the first diagonal offset of a +submatrix inside the stacked \textit{offsets} buffers, plus an +additional element equal to the number of nonzero diagonals in the whole matrix. +We thus have the property that +the number of diagonals of the $k$-th \textit{hack} is given by +\textit{hackOffsets[k+1] - hackOffsets[k]}. + +\begin{figure}[ht] + \centering +% \includegraphics[width=8.2cm]{../figures/hdia.eps} + \includegraphics[width=.72\textwidth]{../figures/hdia.pdf} + \caption{Hacked DIA compression of matrix in Figure~\ref{fig:dense}} + \label{fig:hdia} +\end{figure} + +The relevant data type is \verb|psb_T_hdia_sparse_mat|: +\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} + type pm + real(psb_dpk_), allocatable :: data(:,:) + end type pm + + type po + integer(psb_ipk_), allocatable :: off(:) + end type po + + type, extends(psb_d_base_sparse_mat) :: psb_d_hdia_sparse_mat + ! + ! HDIA format, extended. + ! + + type(pm), allocatable :: hdia(:) + type(po), allocatable :: offset(:) + integer(psb_ipk_) :: nblocks, nzeros + integer(psb_ipk_) :: hack = 64 + integer(psb_long_int_k_) :: dim=0 + + contains + .... + end type +\end{minted} + + diff --git a/docs/src/figures/dia.pdf b/docs/src/figures/dia.pdf new file mode 100644 index 0000000000000000000000000000000000000000..04b1777d6e1eab08287709555d15241aeb9a371d GIT binary patch literal 15952 zcmb`u1z20#(g0dYks<|3u|grZOA_4O-JK%AArM>&w6sv%iWVqVio08JcXuf6F2&jx zdV20X=iK}M@4fH)lMk}@teLfDjb*ZD7PYdtBr}kO9gVtcFCqht1HcA=npmUp^RvoB z>@A!v0i1A%DyyWGtuq7$V3o8rc7}*UOrd5FK|wSpXBfoT4$VDzLATZ((uFf{s5Kvn z;XZNmRMwBGIp`bF!vJHtjC~d1=Y%}NhxK{&iY^P2-L3!xnL;`h(;3eEwhhh3@WdPp_1$ubs#Bi#}ZInAYpA z+xulJb7e&j?Tk$%3!%Hbw7X)WHZ0|eH40z3o0uELK4+}MLjb`VNb)XYb@1-Gd z9#ZhIxTX*>l#@~pKs729dlaUG4P`VaOQ;?wJDqhru1!~|SZT=oWP061IHMw4WW+(! zM!(_uu8Ld2sAz?T)>}(a`2jGQobq<%L-1Gcg5?heHP$J~N3R^;NR9-bhA)PXdKt4z z(pO2wjKiLW!Gd!QRa9<gbbC>q+^0AJ@3}US>`K`2o>4M$fEI@L zN>n)#`zljuYtDN*?nb5>Ik164sz|%Qrg&LbRJj)Tb$TR@Y8Hs*xmK4nI7HG(`mMm` zE0hZk_M+}eqbt=bv$UI7R_B#$uo;<8^pS(f}MLmhY0U)=dA{hcdebrZn#kiM#>#h^7C=;a2=@XOi;hIG}KCz zW$0&28l1=uCO3-W8K$?iMfn_h#lGfXKRww{7r5}i@k%cFX)gyS!r)jKlQ70RR^%r7 zrSOG_O}yA#a0(((irbfl0mdOlx{e9eWVAwlB}RMBvP_#Vo9C*O?>AyKua_uywvguU zG~1GVp2}Iw$ybV8ffknpH|#zSm&mQ=hBp>*HwN@|RDy(mfM%CyStvK^&lgWz_ZExm zE<`i$xW=FhS0;=rT%o=1FE>eMJ6001-N{stzDCrrFA|8#f5ds#nPc?%lu`CeY;S~H zOkXW~=$rz0Nk1D8t9=3Oxs3$Z^#J^Xx|oqor ztX`0POtwa|Q>oBFR@$Zb`t8{c?`$U`^Z6={)LOh42eY#wf)Um^&Ooo|o#Nblsz!i& z$f@gIEgdLqS^M*A8ui&Rd~F<#A_gkpJc6xUkS?x!=~bbc$yT-EV1v!5in7o#+d#rjY2Z3PXTNgkmOLY+$a&HT4q3d;e}BpWy=T;T(;i_`uRCvyiymkPCzE$9=n?YS*3!*R`KxzVrA#*UsJ>1O7kPR@e7$ zH?F?#h-D5Od1n*L9boY8{6Fy>%&lQ(*iso(NMuguu`NTAh!q?b7?*pBybt81FdqKmrFXPu0`T<6h> zpGjZXhO=WQ#jvW?Mn*6lCta5Bgu%5D?iMP-Yr-s!2K)*oQ}(sP_IC#+eIeavmMe;h15rfy!U3qr-(P|- z{6Q)zX&7JY9Lf^Yf&Cd)8$;&vs8L&X_;ma8e z>B8n*c}wRIE_~D5%y#5W%T6tO$h)YrX(T_2?1{R*(e(xN~1xw7n?VRwSe^W zkle4be3TJq+}8>o&XTE{SGmWxY_VJ3031t97|Y4N*@hp#JuRf+Nj_4z>w7jxj2L^c zC<-jBveC_Xx>p%Zb10Zw`mFqOBu z(R;&CoOZp#KcC{Iq-W9aEl621zkQ=r75j6JddS3YkB?WH!wUbyqa*tZ9*{6ipltlu zPCU|r+4c9=QV)d<>BTbvRBztQYipYqW>O##K5JI!0LqH2BdN1<48Wov+&<};2*8LW z09RtqKNmKOdHCEf5)}<*YMgk?J>)4KA7Tg+3{2Q zZCd&TCyZdG9lO&;1TuM8#mB>^NV4J816;=)W5eA)s;+2$2=SC(v3NwQs0`=Sr=^U< zhSEqmHP|(A!Sy^8BEuz+bYpj_P;2**)!+jZKE3OyhKjp-{S}MRvhCbP{YhKKM-`3n z{%=z(^Hi!9NM4&8WXHYJrrU4iH?JKxe_R9W0wXr3$OQQXk(x|yWO?&~7yH_bIvaV- z>Y}S11U>4Yu6Y5QU!zvZzTOlxV+u0i8@{SiqJ=cN&~Ss`<>6hukLfe z`^@kD6U4y=Li-J4|95_4-!cz zoE-QEJv`lH6@%ZyXLJJGr>UxFaL{kI?os|oTj8_8`2zf6%n86MXJzID&_latTNTX! z0H49-e#*a)0{=z&n^Mf!+1M6p@#hr3xmSQV8=JvL06^}4tj+cJ3I4qeG*%^L1prV` z@HdmccJ({CR^>vwr&Im+7JhB>&rxo)U#qaH0@=`j_d@+E;-7kf%khu2{I6O0pJ_Xg zi%U@OU-`K~1n@~407Jq}u>}(lG|q%0S7&CaQQ7za0T=iPn_LWPgeVjEWNC%SclTr1 zmI|zBS6J8CA{QHfFTXcG#Qosc{a~kIoCS&b;&VJ+YPSY%7cVnx7vqZobjVp zc~TyjdXZ=u&ghY^oq&XtsN8p#6B=!{;XRrm6oaMF>0+i+=IsO~tijkfW&3CA9X#KN z-scHOI^-s+m~6ODwj7Qwi@B;iyLx!!tCMlB*1y~0y*PdgN7UE}a&H){qKc{-A{vb9 zR(234MTna!)XvymSxrHm^?pt=4yI0Ho8-5QrUNsjJ z=X-}xhq*xRXa0*~|6(Bji@tdNrmskaC;kBJcbCa;h~G=v$)gh2JC0;qc_Fm7lwd#J z$*4YLvnDWTcudy6L}^oRv$@b)E|olhu4$h_M!|H7k9kBYxv67#)RK4(2sR&m4cS)+ zyrg~5^{9kz>HN_Q`0KDj(^P_oAGIZ-$_-A!Gf^2EJVc>efe9}WXhi=*aLn80ZnH$j zXlz0p#`dHVSxdafI3N4+l*<@!S00zDP_+k0ei9qS?-{HW;L;0S);f_-jV9RqcFAn~ zBsVy7n!u^cWc5FF_{)(0sYDquNewwsMoAb1q6UFkng3IHz6zjgILe4_mi@IR^G<6NM7M(*F7@O^M_-ba+bzqvr{zvkiO=F+NYwi9pryEc~N)>)`PRj3=buoU~T{> z7l*-pXs~bsaB^_4v!StyIGNrDWcUC)mKr-qL#!+;f8XA(c0UFuMq`zMM?fo65qk?; z2;Asd)tn)AngAX+k%El zoJzf)ioD?wuC!=Afqaexdzu5YU=ErJ>r14*9&t2u?58R4V4XP(iYOMZ0Ebkm-_Y0Y^|`6~V(pv!n? zb1Dc_`EmY(0@tv9Uf|iMErYn`&JbZU`7HJ_lzIO%`pe@_sNmb95vIIMuWd*1RXbiO zBR0m5?WCrG%8M_3)t6pFW5*S%DT_(P&Yp}K^A2{FAM?Na0??&rkF$SkOS>O ziH}wh+wah?8Hoc=$iMo#iN}ej276-|F|B3fvVV$G1>0po$8fbL;&G!I=5 ziAIALq4N>b=4$lbpHTMS5qj}y(#yZ1gPO)U_J`@nL!4$FRz>fdVU-D9uz1CPtO+dM z{7O*guFy~{uQ?%&BPpAWF+E}VxUdy%{@^T}SpH=Fg}%PBOu-_p_uLWJW1u1E7ZbD+zqsU;Yq1Vbd~BI*7N9Vr!tE|7Jc_8yzXqh zWh3-nQ;Mkf+EKLf%B{|9VO6^zL~=)-!fg z{dX8#im#*T7uZLfGp)k5jmL1tJjSTYd-Uz>*`T1WkZ|pW=|Z1i53`C0u2TA zrnaG#Mu%X07vg#x9PqJ(NnwrBt*Ai@^1F|XU3Kl&j&f@TCO zPl8LtZpD7^{1lfl3Hg4g9ey&j*VUl&!7+wCqbZk)-Ysu4=zDR_d#L%{EuQ0-8t_T6 zde#BNVxsXD6UKUhd|;6A(tYAi3e#rR+?7ilT!T(PF(l-_ z_HHdJuw=C&9CPf1=S?2h#fx0wpq`#XQf^Pz+1n&%N z@5ZUb7q(++L-$FiR4==^`iVDdvF0C{P~N;InNw}Fs1?%L5GLPG_MKdtl+#L-LbrOd5sbA*vtcP70-pT5 z>@g4N+(v9d$IQR;zc}=$1mW@>`bTjje6#k>%sW!xm&m8YQX!hO7g-zZcFhanC#?8?7Z9(ns(ir18)oLNf~ZrEoI)Kk}7C@%4AvB zn&u0kQM zg}Q|-%tX{XxsB8&q*PK$r=LaxCt1Z_IUlw5d@r)d%m!?+(Nm>~C3(k1NWOrn*-^<` z$7hyPTRedw?9|~qhJj&&Tx1OjoKyPIpzo&av|Ai+b0Apd?1Q;;8D~>zfA>b^lj*n{ zkqhw-DHd;y|DeXYeagFekw%l%yrP#mByH!~nSB*y*;@_zJ3&<$))ERzBf6g2z0T(9 z?y@C0fiWG(U(VPI`E$Nlua6=cxci^?o$p+6w469ceBor(vVer?`YY<@+FueesGBE@ zC_w6F*j5dhEyU&)1ZwXL_^wIDu}?afrsAfS)H6e_b*(4|QR>${d-pe!jktjB-Z}$S zP}wmFt>M~P=8pyjbMu~;w@FZ0fu~cGCKxGRjG_j3Yz>Iy1^Qb7%uSV5k`{W#!_1rH zo9z7cG|Y57=^XX~vWyd&E`^VcjYsR8bsav^Cdc7(lMq}AJ~gl&w3j8)EFpQs1~ zjaM#LEsp!~o$JaN+DA~@a~BAdUPQ@;?thHU&Ie_h=k)2WzsYxmj8fX+kBYc))qRr* z3*b?R&9P!|;ZS_{JXJTq@ETUoWi61$G`>#vaEbh8H*YGvw!R5a@YVGj))Z_Q1X1=2?z=1+J?%U%~tN85jh9%PKpUl4nK@?9sID}-tx`qlZ+ zfwUrO5yPm?$xnd&^EfS*afVISLPwN(QOeRneP;z5ZNc+A+pNw2B-ENBcbr_E=c>?_ z(dRwQpLPm|-{gydkMmlj1kwv6vrcEfJy#;=RPJ8-SQO4g%mIWkb=ZJTd_6wRym`dQ z-EWZ2CkhU9w5#Yk{kk_!K=h#zx`oe z%~sRE057RNgK=DAiD27yc>YP@ZcPpjhXHfTaP0bFNb+=Qk=9;L2bE@N2ET_SRk;P7 zv>b_Zsh#EP%x>)hs9@`I!j_u%t0b9@%fZ)P@{b3Ri|0}zPgbLRj*X#g+Jsw(=LQzT z4ujL8V%Unr8sCdo#C!-*R0s$y+IqGF*8Cl;wxuB_x%E=rl@G6~1J9)S zQ!v@;hl5t0v#s}QmQY)L^4N-CFJPn3GT0BH86cc)?ik7YfDb|H0ou%K13&L({vMpb zcJlAFWOw_`SAOQrihiOY?PJSJ6s7L61ucOi)UL}WNssCbyqD5`K1@Qse)PRY@O{@v zob!_O4URJX&lXf)tl6G4tcc;jW+jwY!!<{B?TD27VHpcsQ=Fpp5;ojz>vTPS^O$rG zP{&;}tBBL*!tfEmBzu$3<2;4YMW!&--r6`#XA;>|)N)Z*mJpLKgbqU`*ElI*A7x%! zj^U*!wDEeF;5u`PSit|0GazN)UFep+e6;tV2H~M2RAQdm2;~4J8Tfq(aS>@Nj``TG z-I1xGPO9MXW=*6Sqt_QCw2)72JaNX;c#XN4=x#m>!NGAssL%OC_f@NVB00|`JEZN^ zptD=XXoY&RCK8l)xPEFB4)`xA(n!+YcRFY`?Y>Qr+huz8-o@qU$w<0$j;shzh!8zm z?yRVEk9(Aq4_&Cs#MR6PWpz1qiw_RswV7xc4^<{qtieU%eD55Byj7Ssf_T#}B(B>w zG9Ck76J=*St}j^7T$qIlm}NY> zW+v97U3KJY6*Xrf>wj0$`}BPM9Mr5}Z4X5>S!ER^lbG}cvi zP-;7+jekTC{|p)QdS!KWw}KaHpt#`kS;yvrbCdm0Ys2ca;S{#9uN7Qr0(De8_MtZF zyWH4ZJJixo>f7yg+}Co|e38qm<5DM^n{QpTlk-%q#JSPfNF-eZ+!#%Y`6U}qu{3_Z zIpQU+OY_YG3xqd&%`C!#X@tWV%2zo4HyAZTem58ch~o(AXP%Pwz~&;H#I-}PSsBgx z?ep5^HLSo8ntsOI)+#-eX%uY28@9DT%>8e(IR3?mScJD8l-CQ$M%J|tw61#;9CAw+ zm^*B0J_IAU_~HA>e(8A1i{88iUk-g4RTxK3CGI`-EzyUz+mdd~=}y{OIv$-L_=FaY zAGqByU@e`gprbbArmp4=|e zT~$iW85$=_T6Ro@_>s!|r`2*T@*pOar>&8P?I#ag--~I|)9FbsTP}CAuPW|=cKjpF znn8{-vF=r`%Mmr>_i0(?p2ph+_i37#dJz?<<-P`EB(nf-&UCzLQ!-LbKdJIani&7o z)*_?zFOi-$dI)-UG4!C8G*F2PDRmF7d#(7cVAdgo(V+hYT5hNSQL@kb&H>QMa;?Q-F@EaNK{m^>ugVUO6qi{ z?=^TXt4XFu;(ES$p#$Jfoa>rV(ZYIDAQoS4T0~jPXfR|M;QSStKF;%#(9rq&s`1LM z5ZCES>PwGp6|jC7gtK!bxoK? z@IyMxRG8*PLXB`Flrt}T)D z^>i*Yq+^U4sf@kG@L=X2oI2fjdF8lBA!9e-=*nkArVqH%($YYfnhO%Grnz=*pvGk?O^*lsREoi!vEFxvaAwCv?WQw_O?9H}BYo!UD5VtM3b z0Vw6ZP9=mUuBL2W7)1VI(Xs-D@JFMKs-G=_3D5)<=>xtGPnitx?I|*QVDcGi+Dseo z*`y>Jzc6cg|D;;D>(cj%J@yA;Z$b5syY;XW80qL61;5CqSE~$P7H5`SFPVIO*Ctb6 zSX=5-TfK_;(KM-E#5&cxAaEY$D>YBoajxru#eqyPamJ3E)Z2Fc`Z`V1$Z*apEwkX{ zx!$FIrN-p38}e?Xf2@oi3d7Iv8r5JdX}#D{&CeAs0EgxhjoP#i8k$pQ+Bp^Y)qMUJ zFdO#8K;^nGRqQh(paplHqD$^WX`ys%K|Rg+P}2(41?0$#hUkW;^sRIB5~U}b@i^;k893sy(7N28wjnmc$WWm{8sA_CY9 z#Sj?JtW;`QlHY39&x%i6H+;8zcQ?0Ex7J=>%+trTU?q!_y8ey+@cxv<8&HHI zk}1N^rhXwvu&WuaO&cx~mu;2*iW2ACtlqj9+b|j-=cdhN?8FB%RZ7K14B9h(*$0+> zkNw^~i2Z^2U7=aZZ|DKq!sFXl%`tw4Jvbf|SX!4btI|nYt`}eE_LvjYxht_Md^*?+ zYMc|0H=n5a%B176>+_S>wTB~eXW}*&KU_q-B|YNuV?dKg2fE)lOios*=0~)=63tY| z&@>`GO>Z@;nbUnG7!ANBVSJ!LR?m4DR9S^9`Wgx0b##ke?7}Q{V_w&KhYrVJzgaeq zK(auWQM2(xzfypV#`SmfA+%u5zFrl{T>Kc%r!d7gVV~(-(d!eF1-s&Z^!=y}AG&j5 zxydDT>7q_MMoQ&YN+v+n6Ec8>{E$P4Wr^0?=HmSMt{`S=8JjAHdwIipb89_@PrWk~ z9p8TMiROT)Ey1q2MMBVdX^>Q|nofnfZhb@l;gQ-8q~&8I+eFrum^Gd5i9Q04Ua`)I z{SP7AB&u598q{#U7tL6u#I2CHw250hpwpo6l7yXTmoM@6-HMgdFaL-u8$T)h@Oe;= z$X<${4f+}vna*6Z z#t&S1&meUnF^#_K>zlj@VJ*H)2?wCX~P z{FW%rZe>yxX9co}p;e;$paxE?ol)cVICeMDF#$_fqMG~ZtR-hyjKY7*>?MX3`d!pYSpH%qcp_a5^FaZ7@vk$z2k z)6rEY0RVY7p4y$ZotlRB3zY|r!e-10O?ZzaUBAC=6>W>}7&exILO7?_Si|RDC9Kp5kM&v$ViYwV@H^piMB=zRq4#=b%FpkS*+yD zYg^LpClEIaEHlO#$LJS&QUl{6e7VGjR(Y3?`o4QY0;gP_#GQ#AzdJ^PR3qhS$Hm1a zb|@r2?o=>8%sCs`CfaWO5NUU*Tb`0jpF6*%vhuteB5U(9s5FPVD=n^L9+PR|;j=?& z>$Gu8z$!7r2sdJ9*jzLl&<8C}h?<8b>SIMea7K-Uz6TeLRw&($;oC_<;m%>l;agGKelNF^B>oMkvYhFX;9_X?cjUeYWEU_-q@83JVVx$~; zr1`WNbMD_#79yn*<3ACwUX##ROP(&)jEYU=QIs`H<05}mXPwNVB2&qx5gNkq*^k+5 zx2myM@z`ye&%4=NfzljehCcmB3lAy&op91}kf?*i^mucq%nmf+ar+r@E%>}tpsTL1 zKk0d9>hh)|*-t8L|MI-2%6YN&5*n$SUq2^AMe;T2yM9Z?xg1*OTXWJs!n>)?ow};( zux~o)v=weBp#+9`V56NTd^$D4gy&I+VsldjOi66Ig}p!K(0duTr`{E`eKokY!ZZxC zxYA#QR$O%#e+)XIWqm}B`7*5$XG`i&U$S>J#M)8i#K}*C9&E}m72_k5 zPCo5iH~FrD>CCv3chhZ{(Ak!h;`q#+k+y1Jn85MKRy>t}rx>e& zZGA(Vidm2Zcqh2<=feZkhgSck2=|YA_dn`w|1LHC*TUSd5)&JMnF|Quf|s|rI6(is z!UP}qdxePyjrCs@CNALrP+y@qE1fI~Pz|G8%%-$tLh_)!@^7dq;-F1FehQ!V4`6#E|Y!Rk{o-N%w zDh0XpcLg7wV3*kYSDtXna;amBB_%{w9i!9_owL6r6Ru2`nPcE|8Q}!dlqwV=iJ@AJ z3RBJ04k`9djqiV*IceN}b6yu^6)AZhv05nGcfMGC?u?so!Cuj~l`4-PSDn@h9w>ge%t~QjzusM1Z*wuq&B=(h1QgI&Qfi}dHaj`c5&evAIdluO|HKbc zyNuW;OKe;nG(EqvePMdB)o*K62I2E#3>94HmhAR8V4`}Bh+y)4zZoio(Mm|-;5=u{ z<#S2Fi(1{55{dF+1^cy)F>%f?I&y)(O&YqrzZe_RjYt51y=}k&E6W(MqD8RVkKoop z!1hhxHyZ~uSqJ2%hvg0g{10%CJj^Zxzdz+i4O`BP%u2KWys(N?x9%SGrjkYeow%{x z$jE{bJBw<@#ap!xRAE9Ro#Mf;o9n?0q?jiJYU8nHY6^V~)kmA% z&|sVj#(t?EDxummF0^RQXYM0Jboh<}Ze>=7=kyFz``kS5>WRNx9E2%1L-=Lt2UIXw z^BZI&I8U~p;l*^`*~WZY1tG7`buszZ@m47lvVJg^jX}mmrpVR_{XkMt9xxryl;ox= z4sz@W$U5euP43K!%1&)xd}C}|CP@qBKqo9qQk9v~EyW;IxS=w~Q z!ls%%AX!stn`ng`1!IF?Std-hYv%F?32EN&KD^z69zEy}2P)gVq;`NTEs>qxg&s8) zM<=>}RJ?<@T&lk@h#w?cB}m)C&dm&7GWp1=I+ z&m%PQ?8mTl#^)~x9)~fvv55-QJ_=EnE5WD%G=1lv8fwJEU!>OQZaGyxpoz?>;CQ_w zJ1jqYaH*b~TGgV)yt^!yjJSY}Tfu5)VW1(3GRh9t+Kwmpo4k8Ux-b5@Lu5UUyWBn~ z!zIM*i*)P3c>bn$3OzJ~Le_zV)>*bHz|$~a)v(No+pQ8jHJCXWAB~r{nM?Nx(O1un zI5MH-!`@S8G|faN^SG%x5Jb!C+Cdl3vkQo$$^ zusp|%YFcCm3J|JIzUJY`&+yer<@;I#%64ULSXx~;lJ(LW%AfTzHZ@)z1!`yn8DY}e zH!?B1s<8W7VTZ3FpUaA@d+#EU1mw6dK@*2^+8WHa*PoAxjon%a!Q0a_j^wvts zgr}jpmf5HEK*;N&sa4f2DbY)+1H0PI!0EO$@e_rls&c8Ml$8bKXWA(le(rg}Cu_hoXEGg&_3eB)xRdf{2b&!&?6 zYMb>9v!#~wA=2RdA0b?OI$A?RMBco_!zg_VogCnBuT{K`g^NTWW2_mrtuo{2tpWjWs7T zf6s@IH^0I6CiE44X_6))KmCHR6x$(~Xz!(IW1h^g(nB+ztjetNz}-Etf5LaomE`3? z!2|tG;gyl9b94gE1f#*TPCaS&nAjt~(-ise+=EhQOdv z0MBMPdJ40MIh*}wh!Vf+k9AtI8={$V4_7hO9m8r1DJ@rPYX=AGUJt`EU0XMI-wWIZ zn*vly?YHtdIUE<1v#PH(bQdt6PnzcE>$+Bh3O~L?)?JXdt(-nKP{av?2nxul4=9VY zZrZ-lO+=G+cyWLkUAWpv68&<1eGSK!tItSnQFCTLtE4TYi$g?EGb#JPa#i%hiy8g6 zhV&4|sZ`(;lm1{=D9M+T0foDIC95zCeY{(lNQ^nhtaTJ=E~9K&_lRB7#2_OWqa!lCh;Yoc()6s9g{E^Q zH2r}6d}ziGLjnvD@|wiFi6YN4BShbqmg85m${!h}nIX@j*3P7EvHV7C<{Cf3GIQa6 z>Y-;P!bB;+!+b{6N_TW?;riyuZDlPFY;N%aFZb%}dMG4Z(ON+XqdKJSV3*L-gnwtq z@aEM7metNFc@`%&X*El#Yo)b z2)^FN*25Ln`YOJ|F#S6P)i(4*JSp@ak7{-Tza}B*P3-E3)?|d>CayJI&;BGJBjDeu zV8QR<>*$cM#hj#YZSfUp))10|9vL2ao5<(fG{3GXaGJ89&sirSCLtE!1%rW3XTt~< z0w>)+n~N_qk6yT29FN_uqG50Ppd%=D`pz9v{=n}QLLzg+VJae6r@!5AD~EANCip6C zELCk|8P?FVeOEe|yM<&oz2xAp9AEYQD1;XO{qWUvy)Pn*x0}a*N&NrPLiwx5?9YzY zfA^(nE6D$uB?G~n9RJFa`Gwpaj7@DI&HxjLg_XSk?P2{_T7Z?A0Ie2>0-J(^7{t;_ z+7kv*^Hfwf^|UeNF{2d{MB^7ShPM$znB9#Xn9YA3A9Di0o6H0##8l-aDee*6dEMH0CziEdnaCZ0oq?AyzuLLFe@$K7mBlu0IjxyGQb%MwY9Nw27p+2Sb*?GH70;L z4Bi#z28G!GfGnJ}@F`(t=De!#p5i|ygx?9!S~@#B@UpVHxw)~pfmxt13sxWx4-YFF zh!q54hGQ^0dDuG}yEEH6QNq{z#fX*mzsT&JSbp)uVhXimwbEtGgbPBRlOHf}Cvh&hJ|GY|-YFtfp5%wTplpc$CWoD0lt`sZ>E ze_``)>%j-$3xeTtfM4;#o2JcNOd&7Y*_qkE%pe|hAea}-!wceKWCQZDvGKG10~4-% zIH?%?_#pf^;r|EHzhM4_6be&^n}ndLu@wvow*VlR1qh(~t?yqz3z)HkrIo4Ez1r#j zVB*(IzX|zS@74Aff&E{L(AYsi;eQvRo!z~a-{bL$Lrq=m;60c!;{Sha=>J;nW~TpQ z*ue#6`^#a>Oj#kekbB;o;F<@*IW{xpHHX6NjN#^P?BD=z7BzN;djTulOlbxA;e*ar z&bAQ2U+ySk>-@)st<}AI@!A^OTL{p)Gn+xoja_V=X$3Xxe_@;bp$%Ir!9Om3(Xsw} zuK%K|Um&=?6j(EZJY^#4a6(x<2Y z*Kr1S7{naj@C@WZyAN!CyzU!~!5jc{z;76c8whXLzJCGif5U(rAa-~&{oi10ATAF0 z*V8{>oNWK3g(Cprk9_`t$HDy{cpz?WxHSKc2L^%RZPx#Q@o>Srl>Y(ay-2I2wz=RSBi{sYDi>6Q+W?oMfxmM#eqq>%>sHXhHp zo_p?pzwdvZyY>dwe&3mSXHBiy>z#p4SzMA8%*KvI*SGUJ2a5v$0@xc{Ve#_=<<0CY zTr2^c@DWv@B+SOe%n1OLv@voq6E`!lH#HL!#Bz3VGBdKp@&KH1Bw!-)}0!QG}+hT<09Z#Q=l zqN|Ja_3JjD*%TG>N`GYUyEJ7qN2_9bIhA%8)niJ@R6{PGO0zX>tZP7=3hnuL!y3E% z$+x(-fX;tPKFSph!_zz~EDr{W5Zi%EAQCo>{+F zpjhKBpGoZHR&3#P>7pBi%7ZoIj&~v=#?H%Fn{zp2ZW@IT&zov`f-RpO&XJO~4AfE6 zg;fLJ0XS5URcm{KCpAQ)-&y-J(;^~BrvJ&UCfdsne@d3hE=n^^QY

zaIf@Y!)|UoGimjB35xT+l_=4)xCR@9c`80 zC98>VVWEy__5zXEu1Oe1fMSOdFv*vvgOLLZM=cFF7k*>z+;Cyk2pyxa(3d2~w{TV$ z+0H_%(_DyZ&$!JwnwyAwVz<2JFr|1X*v+OKc2>rhpLyx4S|8IP+-$<4)`AyvLWFtj zXR@3GjX4=EUy56jlMYtAkSps}1>YMl-K4tp@y_!zjKYVtvb4wvvwK$I4!G>aRgK4Y z>V;!e7oi#nHXuMll35oiK@2a)x@AfpYgJV%+lSFTpdx*x`juG``4f5jTp>wF%XxXs z@C0FIzZ{xybguR&`8~YBaPk)i1(J*{?|2xGtrFEth(%bRUOI@G(C_DztUQc=*pdec zvqlP>ofO^_n96Nt#<0_DtQU7hQ)4Ulrb;qG?qlLutt=Y-u-54aIKB_=F}E6 z+2h)%pR>JMwS)Xdb@(7sQ6p8h#CRljiY}%%w%SZTkTGJ+fJ7p`XB8MhbDBk?lbt9r z&&a#{JRR8oPGhwU)%24SC$zy5vC%TP&KlEcEJv>yhB;n)-hIA)&OL^-fP@LdEJK?u z(+w!meZg~SjNLEs-TxX$->Gkj=pYo>y_-y?El7~ns`77blE z@|Lj}JmePV3SnNejp3T0UdEL=rWay;!A;s{ovNLDbRfd#Z)y+wRz%`d&{!rRo{tOucHaF=q;>pEto5>-bHPzuBEgv1gfWvKmcH9pV}WTM^P;s$}SX33bM36u+|jD z*UWbh_}VJ@@!IMpRwG*_0!SJyNtqw2u%EIN5r^w9N7^6vvNRgv^P;0u_?aIuW-a6f z|5GVK81_6t{Y>UdYB>A&Xs5(edZ%UMtG<{T!z~EX+Rt>wao93EF?EpfnfrXhw$%~D z>6!`HKaAAiBjCTpyQ)7=DxAKOd3hCdVPc*i78Hz$ree~oBP~{tHNb2lt2*w_FC&KE zdTfjs^SbA&ZN*CNxpo`h6*zTQg(O&QEp?sP{wl(b&x0EndIu z?`>YX59pV^wj+BTp6i^<^~j6YKkF=6SbYjp+!n*B(D~AK=Ts+jiPpjE3!pGed+j)B zC_VSoqwUd2edT=aQq^({r6l6hIk}|cE;?VBv+Kl_s z>m9Y*;$XS#6iM%L2x)n`-f-?Z#9N^mD^dwJx%WUuw4n0hBa*+jV#Bv{bQH9WtFA8& z;+15clxbt6 zGgi)S?0n|cTruoz^|hX?mOtijWx{epq%iTMv#<>{#ULbj@!LtVB_qOPTH7Z6R^_KY zY)JU!Ltp}Nfk+T%=m)-bpL^d_K2cjZS5s<;dSdT=qcg{QRav!Xzi;k<#@CgO5cKl!@v7QtIxTGGtU^jFFQKf6nl?vkUs zzYqvF80*&<`@d2sbx#K~pt6yL8BoE@6lNr9?*Y&Q!Kc~Txd0Flhdvg3(#g!u1pvKE zvao=vX3qAmP9|o~@bpXB$=*cG%ta3fPtX8B8Cy6NF#u5A%)koSR05}ELJCb+QexZW@Lj8p)X5?aIV{h>%iC?TKn7J64!e`)@{3koV z2yp$Kz`rgF3#g>5000XL{<+e>xCE+lVcp@W{>H~|B3I?c;{N;W-N!#J5UA#A>~g1) zpVGXOiH*IJnuC#v8BpBJ4Q65{!t-7ZFbE4MZ3eTjbOAshTv$L+n2WQr znUk2kt%JSYozlQqKpD7DogD1%l45|1ldBnA?mrV{pqQA9y1Jy1#~-OO7M%WF$_&@G z?VWDr0IYCi`0}o{c5sb=L4tyRrrYe?f`Y$n?Djt%d^%|5|D0GsLIB*U}glp^9J#qvhvs!qnSLL<)#F`EGr8jQizR>h@s>Y-TikBoN; z4^A>lm~=9EAz#I3@C_iGIzj<3)psuJD-|LBoL`n2wYE)IezbjgqeN2J(eP)9^pTtoYbI?UG0S<%d0)!x>~PFYPs{ii31n>m{}!5m!dodDpU zMhsN9*RX@#=@bC|X>UJ0;3w>z4LCWwh*=sr0ob{L@~2B ze;b24`JdLR4mYej&-~5cf4hMHqAs4lsViE5unT}6{9LgU2WgBr#mLgwBzx^GDWJlX z5$PUvaJ@VA%ix~*&w)NyV$i2Rt$m2?Bc`vUrdDsbFcT~2aa@81JiGL)(ueszy|uV6 z16@$}_M=gjM`%9O|2WF;X=s1`WvsJ;}y%=#FXc8w|@;tv0yI0x3 z819V-8#CA7+Tp;j& zw~IT#Uqboon*L?GKgFSFWNY@{ja?LepVEVe4p#WRg%ipR;N;@azx(2E;SAv9;9!S` zMG?~}|;MWaQb1}2k1n|I-6kyKI zFguGsA}tpeJi6XVA8yKK5CHh+6Nlf7{m+{$JaGQg*s*>EColjE21D5Y3{_m*Ag-0}zt4(77i+4UY^UpA7MfTYetrP}t3O>QzTbuXgb%T(hqh zS`dwVjZjn0W^Y(&E<62$57lF+K#Z3$=y-(%<>)v~o8T5Q*bUq{A^blHnT#5q)eBl5rZUnX(`Lkr1!B!p*wvF=XBhi*z-Y1*<2n( zPm!fQ*J~>@&Q^$c8~PDzs+C%dhQIHu?i~3{hnO9#+l0KAPjF?FtgyaeEW{1CUlzrl`%(YkbK*yN-#LHi zcxU!%;^mn{Sg~Lyc{pRl{2{n!6z$t*5|32Pllva`e}peX1z;H@21H!pn}K+-wrbgt zy#{h~$3?rgHL|`K%3ew4WG`@2d(_hdj~!4?Khr+wz&k)LFjA~b^y%@rR`?ME(jgA) znN($YtkO=%5}>L3Vt!x_Z`>QN!`cDM1a!X|Cb_SAV?WDL5-<+kVZTaV{&*D>u_|Cf zYvMg3q%EZIfaqFNh9yOnH|>M0 zHczqflbOYMw<_q4{wZo^N79kqk{FCss8X-ZwIjYy4+SfBDCa)t~`A-uc*rD%y_&439<`uth$q;c1HlF!Dn3Nw+V@lBQJ`yG6TY%Nq&niD4M^{wYD zk8-~w;i_D+X7mWHXq|J=GMJ~g%=V0H<37nrNvz%~N&J{w6Hu#SW$|ssOE>$*CiTJu)@{QNWnTJ~E_UGJC))ko+Z z@>V5>u&xweAYV_fI`NM@Jf}WAO1LgqfwqL-1lE5Rbt8;x)MxoJWWUyFm~Z0&_$iyr+QTL%nU z7Odg%vZcXC`LgZCPi8sal45o58_~_)=p1TRow2(C&5xEbnpdD~mF=HBy@PUkfsTpM zU3R7t9?>*=bn*czPqBB;LH!;`exZoM{2gZ|IM8vci)75gQShnUUib0=$WpmCtcV5e z+l;5%Lq8Sg5KjI|AD zM=Lb{zWUjt^)A9wdg6(t^GH>J1!PZm~GsnUTUNU59dAv5Zka2i^P>(NmDou~g zF~Q`8JJpdnUK zTy2JRctn9>%2Vy=OfDua3uCuLJ}9jK>!$=7I=+XedEJK|Pdq5c00? zyrq)F++zLj;@`F48orxm$diSdn?5lsdig*>$k@XN*H}Zpbj{gAgQM?LDg?%M+s?ib zUEnQ2#EOgOQB_Tu5c~dY2y1|#qsRFhlb>JkotQsG4jd*|o6i?ptPPk1biqi~-HEhpQ3DWsEo zmcYICF`sn@Apzu@aDykm3HuHAgp)+U7j;)UErI=TeHF48_PB-4P3`Thm`OSI!XhlI z6s%Uc!dP-29O+u`Axi9i3OfifrU9Pz1-MIO%96=idWM((>X4>hv_ z#Gv=R1)RT$nW3{YoQn)Qa-An@#oRMY?0#A(?ZqZnrhX>7-Zv6>R26o(rCNTS!dj`4 zP}(CHc?0ZbrS?4WF7n!2^^!6xcVDRcgk8oQyslQ>-|PH-Yv>x--`xVSEnU|;n9(0< z2u8R|VsHiVyt(}-`(nj2KT+2AEvF!ZVdd5K5r*&tk)p3nJ)~)2$YoraB;?cy6 zU^(#FoZ*+NY|<9`kvur|e%j@-=7qEwcB_f(mfe9|^{b`W9S?_OKZv$Y3Ilpr6T`d7 z%#4^qtm{LfqaSblx^TxNCKV&Af6Sn0996cL*ziL=tr(%nDR12C*dzV!`B6W+Ht|iw zy z6xsxivMwr~jpnY!<_3Ny@Km1b+%Me1Arce=i8^Em`*XHYHDO+2MB!sRqqkw`r+%x! zeGL$jUR#hm`55GkF^6%9fuh}!N&7kIlhGw*|4Z$1HelQ6qAyrS=g;%hGj6}-I! z4+PXTzwR9#5sBZwQ0l#St;Uo1)nx6v$|?iCkGRjl+{OE~jguC+(rF&WE6-4&0XrrI zn#MptlE0cA!-iYzmYXUj2Ce>%X0ZSc{zuv-dhvm$X*Y^8`newiry5#(AzS8<8}q7# zyEndc%~sWLL9`<0vN&&=v<1Ob^@ z-0{CZq4UL_1qVvHQW>)Q))rQ+S6My(bI&I&sf>*iw>xlOsngt~iFnmAt$_eWq`9!)}?vM(!Zy-2IyBc?`)m6iW+{5NsZ$uv<~V`txMe_7Yt6c8{HrNs0OQ& z%a*!vJoHrikUw2T%l<~Ps~K~c&;dkLN3@O~k6%L&wi7leAiwawTi{+T$}1F_PrM~T z54uhrQ@WN*2jkOeEVKrA6n;dvbGO&kgml@_cG@8++EUS6P;2nT^W_}*1WBp;?l$4CjCpq3JYSb;*8>`qC=4IN(2+@YQ&6@Z0HPi z23;C`&kQwZqGDn{#+&@fuE=$4dj71{K6UTRa5?pKn$Qu558u%Dah3^zYI#_^(UuEZ zibG-eo2u-|@7_g4b(8TbRyk88D=0(}DwUF*=Ti4aR2&m2ciDY5*>66=E{i9=$4}Hu z!J4LuCewtU&vfT#*eHVpRPXc4^%PZg+IcP7>6&sWRHn%)G+-CGGpaeph7%D-8VcE$ zfdYS^_~P83B)k+LJ4HZFcs9DvLqzA>D)GJ0cZ5XZmh9av5wj4^0I!4}o&d`ICJKvp z*M+^`1hz)33Jfw2ENrt3{E&34%3ohr4-=eG`%U?XKl*M_NAQMp*&RlQV~z1?7Ib1Ept3BlJ;H_J%Nmxh?35q3R}`X4RNolRiI!SsxWB2sU33;r;r0$Rzh%2B1dd;ms2-kiPxy`g zICboK>U*_Sg3uH->qqydqCJtYBiQ%KK~q>NAWjD{L~0k=)Rbxq!f!4VTifNUt(yOw zE_PgX|I~CoGn!> zYZ0rTO1yl@E@_Ihy+l&%!<%`taE6Z+T(z!TT&Jh;0D_~!bX3h;{uNmnle0^04N-eYa`|*ynb; zJ(yh(`eF03P3qm|^i_tO&A|p!$}?lJTKlFW7XIO>s=C4OwGw@5oRdCBT0K1;rO#`< z;UQ0~qCd>Z;P7LqOrlZis05l)s(-Rl=Wv=>9}Iu7z8*dPgj-F8=FC$wm)e5()FYrT>F=C$Bzg2`TbMpg&}yl=sa(dzc-K+ac;%&d_ii%{q`yM(*<>A=-w z+q=yKBUk+!f80UxudapTBIX#mH!BJbobf%nbA?4|YvhHyr$N++UuPhrLoI7qZ>f)aVM}TWI^SjgBL?H(q4gI1!7)AFMo)tgUaoi}+&COMNQO3sgd6+G2 z@LQh3w{*kZV%0|xk1`9#8J<5OSJsItJ1lQ8sDp>p(+WU?KjYPauI&LMF3U>`e{{748~oG~bwsG~1HH9O zOO(YCp*?Zy9w$^lOr;zO!4*4qSIuOS3JFcy2sf<3I zVa5?0lMj3xobuno-4if{mXB|yT-D-|hk6JZY)z?lKe#-eP2e#fqr@AhjH{4s6SIaA z>(~;Nu~rNOtrD9mhQIna>Qw`+9kJr5TJtih-1GWoJg?9+95&p~fTI*F^vJQ_f$=)t zWAGAt9(@YWhBj(HXukyPLPfCJ9hBTK!)=|TP+U)L6Ve^fu;qBBb497EM`<#3&TW$3 z+r5_jwo=E$B&Sv-fXTnK$^a-uA?Nk|E#86mtR=zP)vAO0LJT(vSNHLN!x`L!2F1^Dz~q*HOd#=x%*AWe!sgsY3ORH-TJVoe;1N2#w*QKejXvElvK%~n&Fm#xWa?L zSEN!Em=Bxn4C?qk-FLry+1ftzrgpy>Hq+vzqQM_-vfNy8kLtPGP+M7TpW*GtsQXVc z1bN+j-dWt|QrZ zZUw{yw1>j75#&x5$9rFnTH#K6Jqd9xXvHqR!bu2*bUs;BEhI|4zzFr zb9G=xIgJrUR3a|;u&cOj5RIN1krU!6={?d4B#f0B()G7-DlE)wei(5F5l^tQbp2cg z=CHANrJj?>N-pMpla)-0lqA`RR2*bo1}J4D$HkMIldumc4y3HQ@Swi>ilwB0Zc4*C9Ll5 zNqynAavzyl^~uplc3pm7Y(=`iz_B%B+s2B{JWg2cV*IkV!rImLWU~AFcwPm?{yY7# zR;7!C==b!x%Lcm3+|t3`3ygRifbHYmiximc1x4KRdjh=~=NUnKPX&rvFGl80d-vUO z$TMI2QJ;v zfpM*u2WT|JmP+d}g&(!m#_nx)4Y?kw0UkqY=zVm-#GfbmD@D=+AFKO&TI$jK@xS`jvGDNXbNul}{?u;Ye@F)9%%i2VFvINl5~x z-@D$f(&lcmjh<4nl4;b@=|-sc^*e7qrjzQ%YI=~Q7N6Q#reM=s^7Vsx@&g%zXqITb z$6FYUCe>-w3<3CW>p`EGh%*7N5ia#Yi>UDc?xU6__oqjt2Vhtu9G*^=&-=vtbaUCU z$j_=kjGj6tknpEHKfAZ!&INA=Ta2PK=y4)Cgj>VvmVd#FF`pG>{(E@#c+t6cL z=C&zSpKFoN;$p6ii;LZr<&lT`$(Ol=`jT>1%a3GNEmr03Lqrs$9~pJ?uOEm-Ii`El zL>lz&4{TM^Lsau+IGtxBQzRqDXeI#D5#+>?Xhay2aarJ0#ze(v+S(?|Qq97(08Nw0 z7mGm!MCXs+WS}jurio8}btQkLNE|8o8D(*c=Z)czS)hUamxeT|&ynZ{`&Bq{lUY`h zz&EP2=5|y4x|6-Zw~S6@CvUH&#Ejyq{3;kXZDdrUdcK^L-#Q0IogbyURsY=+#c zY{Q97w2?z(Q=%t%*($^Bth>mEt?PK>L7GN^4LbEIei4V76CrQ)DCb?=C4;wIeaLpm z-z1wm7tD%htP=`9!5v$h4>>x9xw4an+f@nnefp9^L>Nn)ghLwBwR)`iB-uu6J?7BT z7moGuoll8@_k|Vak9QG zbGOq;&M$k<60QXt<<$nDhFmM~x=u$JM~cc9oFnY6N&E2U4Hn5Oo)nxGM&)4@5n%DP z5X@y!TxMnNxjC}+Xlp5c9wp=Ft7+HBoqQG^ADiCAevhZId5VLY8cfaf!a7IwBb(Se z)?xBKc17xdx|Wd0`wtZxwR4R*^Ag*avUTP=TA_l zhMz3n%+16Iy?XfkygKPt@U*Any}~zKqcKt?mv!FqH$ta*ULn)b`+N7TI}k!&2jWwb zMam}AziXFPrdJH58JSFaVRMvvsbIgtbSl_3A}Jr@A&eX#!|6zmC5k{5yT%joB^`6& z@!$k@&vI@^*q5_GTTcJiRNuI=sD~MGD}nyiBWAQ1BqAi+LB1ly2Bv-pYYhXeE=w6| zyS^fg`wgtQdxRsJjycb*&%X@Sr@5MzGg{n}{rXtVa>BP~iQ>3s?Y+tT)EBmXC1PvU z+l$x(-7fKR>hJsdkIu=K-cKDrqLx2Tm|^zy*jVoHFT;C!L&b22LVix=N4&?~issUF zE4Fx>kcoiGOhSB>4#Yyk++^0ikO{BQyx|Fdq6AR>VV{AJokA`8qUE!o4 z*O#8C_ocQJ*3@YCV+VB`PrJdSWyx=ILSLUBZ!=H}+E$r~i)y4L|qiK>$`RFn|lh3E<*@{O9d?_{870=XvhxkbZ5?bKO-h z{ip4DFzDa+=Q%;}z5Czx=l{L>>bEMc|F%C5N{V#|9;wSXRdLYt8)G+-`=(K>bO*5!Pa;@o3*8xJ8YU=VIs;LTc5NEMzJ`fZI)G{ ztPv+HDi*gw)!V2TT}R#h7J(YkEQ=6vl^Q48xK#ZZA8Q5|>Bm>Y z?!#y2K4%%$0EAIAix;7lSAEW&6>Q6{Hm6D-6^A)0wNL@&3Li5yTdVfTh_4a-fDhBs zQgB$lw7qm@Ky27biBqDq-ku*>+22^3h?DXh(PGPfQO&p`5QVf!oQb+pPP4}^`212{ zt=$$Q&6R=bO{&c-Q3ZiH!^npidu%P`Tf?wVQuG#udOuoUDcJ0d^(od*QJEh*7Zz+< z#f=Fd)L~q@M1PDPTA=GkK*yC{dDGQQ%K09HvfA|dGpZ$`F!L1xk9&joA&@O4c#;GAK9fBN zRWdQp4A@-pJa`c*8WO-S+uX=i#M!71@nRr#a>EyCnW=#H5 za~FHFP=ilZ@gwPBZ(W|17% zwn^5uNT0Zuxpr$VZxHPUtWex*MKU)p{voSy>kU2;`@&e7il1%p+R$5`ZFYL*cTH+k z8Q$8ZZ>3(d*FW>J{QybTwB8<`JIFIovu-j|v`Cvkoa{bN_Q#cn`deQo=CjHXC`1&= zamXXHk&D=$)_PB{)Cx;#CF%CM)hs;x;moxCoVG!e@^U})6gNTtagnjDC)x9vr?jD9*ju#Fw>O(0oh zU1Ch=)MkYXl`Qh_Azz3Htp3Dp?wf-&S+JMY0u~q8j%u6vjFgCCH#>~$ z+aWTfxh!V5tmHvwk=#(C&U8D5PIv4PsgUg3Q0MT^r#KL^epv0Owe4T^HJR)^HQcBJmxM+%T#OUl~ZfE2or6kIM6{;iTBZ5M3NYwYm= zGI1U%8)XB~mBX&Ulx}sTs*+~BWrO_UD$+S$GoNE};&z$8Hd_)*Id2$}{8>>2;_2Sl za`-3~k4Z$xkvHATvADIoUR{1mCX`UTNKA?3`G&W1JoEkMFWHDzLh58)|!O)aGo3!m-W$qN zd5L#qv8VC%g++FTaYhj;bj@z`*|5tz`DA7}`eOfQVPzMJpf667mEsegP!7sro5Y)rw7QP1>(8!vGniZn%2e_RdYKQ}j7YPhU!d&Dg`;b^p z52WR(shHQVu#8$m*$1#0q~kb(6b1C9qea`2_<20OuB?1;I3rM+%nq8PRZDx$iL)rA zk+PhbdFY8sC>zuqWwTG=1?E!SnKxXzQHETk4KjpNOf4rdeYCkbf0?(_e!23)merSS zWvfJ#%bRT5C=u1q2yA?4QpePf>znXwtBGRjS>^jw9pV}gKret|7UINSXDa&x?bDGf zQ!j)*ctKKtzS4*#5>HqvLl)_gQwR)?>%+a%H-wsH{t0rsG&s+!f-njSTID%nnsoBj znl&ad6)|}q@_wN%`PvefFFpn@e(zCldR(1Tv7P?4H{+0hqw$HX)^!5K&2~Y8`_&Gw-vza>*r#8mYiYKrg>)cLcd@SUwwYIj3-y@<-i0cy^75PJz~9_g>zp zJIY_jiNc<@aS}DS7H=XUsI*6O zjsPQZpMEXs<#oyTUx^Q=o@|=Pc5j^JPqOBj$VqJ=<_h^j-btbl0Izr-;63ztvWfSx zL@V&+msx3};B-0*9z6H(D<0!AS1+_m3>{^aYo3)dQa`TXTIj`;Ip^^9j{EY}?P^u0 zL~(7Uw4J7wR;7kc#0=W41k#?D&-daDw|90|9g8n^N;L>&dF1ACK;BkfUf!3JFHNR= z*Yj_w+sp9Yr#U#E2wn7P00wTbkkZ3$K}%BC8kBxi{Oda28T*N-UnZspx#Tb3a8Y5| z{9rh9{b4WfEpgW7RNQCZu*_3-xZE_6>xX>V*2(zKyzgfL{O@_+pQZ8tD)!Y@kpDAD z0z)}D{z{Vgg*@Pe>egm10An)?n4JK_esenm0A?z{pv9p8Qg9G6vxG@|Ihm<>DXN=z zS)1^fG6)G`@e3Kj`xMMrJ&YV!&3{&=I|JYupa6}Ss=Or49e@X~hpmGxyh0z~VQXXO z%ettI$WB~jGaj_O)&{j|exY*m=C zvvD%ONjjOD^Qyv|7XBaz|0KX*>EhzR3k15myR*4N+3cMxfM6aT9v}z;gg{u~5UkFg zb}mL9tai?{@b!K&0@MBr&CZ$aCr@l9_O?KnHW17P0{*$spVh4}dpp40!bZmSt}X%$ zuC6dsUSs$}0T366i`C4W!2ZKwx%O5R?_dqYj4hLV0*0+)qGYUJ!^M_-~MK<-<|M;4QrHmfrsd z)ZZX~L$!BOhns|;i4n}n9&Q0(C>t2S_)FhE2Q8e894uib&Ub2O`h$s|lzt)d1Mk%K zo523pA~bSPQ26gcw6(pn@;f+QaeEV2TX_AljQIZ_8~R_X-PGhSh8nXNZsgzqFSIstfqMZEZl(-^{P0N^n2U{>;7@lHv2pq1 z#0GZfUc5F&b`}B*9;~Kj=0>hIE)0Sic0Zv_|ImgFOz@ABpLoE3(fSX%`Z)x5cevVs ze+3gkEWrQh_}>xwR|JLk$p|pG-^Is&bj);+#LYBUt?fSFgv`X>ThEp2nQ#;E8*Y9pxp54`@fHYKzD6L ze;iY zt^fe4!Mx1idIJDe?d{>MWbo%fs_-s9J9B%$&mI-{!x?da9>2JlC`go31O(;gk>G*r wN0digR9qYkf{O8oNQ#0a1pn_4`gaX`&MrnyER&Sea7RUq_=MHcC&%NqwFD7O;j}8k_7K2tc&(7Z824Qvj9q#oJ zaTtXz7oHw4gEiPFrX?SJ`PE=uNv*SmMcf~^4?9a=cnVag zaU7z_K$);p`ZZ;ExiH}yNf#Txd5zPY zoLYMKL6F=gTZ(pKA_IR2H3}sJHQLNq65d?^9{Qcz+s5>>Y7$*re43y^kXTDOiBr|0 z!8dVJkm5nXW%#LX`$wypBeR4`p$&nqciSqD3Dx>1)+TXAHJqBcr?C<@WUDFrx8du&+#1av7yBTH#u6K4MChG+NYV8Sa4 zTH2g(bq-%L6hckBq%J!Z#nO3HX=~NC8bOEyanjJ}$n~$a2OUDg+NuDiLrqp5G}io6F)DW36Z4^5P=Vm#jFB|D!oN3xWOdK*%FUW;SXy?=*TMpK85 zg@*xI);3d|%>E3|ZA3;#pI4X)Ns)q@`(Da>(oqEwmdMBmCu6>zcoBe7I{Z=MDW~Yz zwCOd!i53x0O10lRH%mUVD)UEA6_(2o=*qsd5X;3fnPE)@k7OdXHspa>zM5lyZ%Fw< zX@WJ@R*4riL%OO*Ut}8ksP?O_Fj@ zLG@MAX(gS>Ez@`UuWlvctZDd#*rUORpPkK)ayiV(3eO}oG-k7!2lu{hSTGcFceN{N zEmP7r9>jl_Hmb8vTC?1hLm8vbpw2S@M`(?WW`BVQP}>OEYL3Av>~;prdWHHC1dDU8 zu|JzI%O3SpHM|1V#nS~S&`2+9_p}pgvgl%FVVf+^L`n4w6~~hr>e0J}bhX0Y&`67? z*zrLV62y*a44br&_gMIDo_!`49$=Fbkl4ave;W94;2T?ni2jL@UM+FA19KZj5gK9c zyr?00%HwQ83n7VB+}I&8_GfoH(4}4-h7eS;R-vp`#tt_3u+^cYSY-r0xgh0N&JGy^ z%l2Yf6Ri8eQ~YVB4%Hj7?+;YlFGC*>=#lnqVSc>zFHw|7{VKG`ct@{P7Yl%=_9Smk zxVM9bv=bE;qqoX_8|Ha*B=y`WS}tNDGSaBhW+Xyh9bvuDg=gH&LOh3;g`$FlY)3k?!NM&rDF!YZXlG&X2+EN1p$vnqt4*gCxU&)u zEn4m1qUc~E45JH9U6WzMbtlxWX3EZzSp}}(1Ojy&!r4&zfW<;?y#Cdn#1mbH(~nnX zV*))|Rdyzvo~dyuy%vupN!y^S7H>s24A>fKIb2|3(!EOKR5kcz*xGTYJ_N*G7vQ8- zCq9_zB=R`5A$-wXke(RtO9H|=>Z(%j=Nw8Wprf2*oL+2!g5-_xU?w!?BRWK}@b;D- zbyzn@qAf1ot?=d+(Ov$rLUU=SUzQH1JBCO54>JpCm#U8CspC(UpTCn)EqHkMdLZiZ zFD_n$WFn~!dG;jA-kf~2ZrHMhzmAZVU_{W(L-?pwrT-C48BFqAb-7!c(EB$0)0RdW z)^(Lyun^yrU4z;9s7K1V3ob-;I;9iBt07dms+FOug*Kk6=G$q^OuKW6+HWy zxGj$+=VE0bu)D;GITf|;%$T7LzkmT|M$S;7$D~=rQ)QnfJd2Veiq~p>z94?eRB2&N zZ(ZBz+|QJ4Vf|d&x^Q@C$}dPjm$B9k4P}XNyybo8#*BK(bnu}T*s3-!NTiobu&@@A zST$#6RJ}fN%+49Z#5rMRW+^0WO7Q+e$EQb;uhDii6(^e}`nt3%3=T}%7iiAGRGP|% zH>Zy6Qa0`S)#C0dRAW3J^PWE+89{Pk_|(z5YwiQF6Qa`ar|PeHY>LW1gj5}hiUsyD zQuGmJLaJiy(z=#rC9z=Y*VD3w&B#8rTu6A(PL`mTDVRGQ7jxZ18J_Wj(YURd;Xl|2 z)k3q>46i&(FcH^n&oI;C7Rx;gG*?pST@Tb8S)+G-79;CYV0YQC8Rk$XGC&K+vQOVEl_75*F z`rCIuaNZ(sx?UV|G~0b#OxK?Q7^j{owgms2O?!KK5cOg3g650A9)5&N;WO{mH+y6Q zf%GTtbE`d(OnX-(=>AW>=Tj+MQKCCoZEPwn8k|46t>?YpYBFfw(Wxu=dUtgYJj?$+ z++k|JIB0Qx9-fdi`_;b7;kal29`mYTfV>E5;TR*nVtCTq&t#thk>HV+XP=A(o=K+{ zM4X@0aTTuBPM&yv{WK_v=50Bb{4 zrAG*ekAx7|2xt9Q$-8S`ab8iiPkw#AtM1wG_HBdLv58iH|7!5ofhoO__aFDLyN7__Aq;rt<)?0I-0;x+t(tdx*6ofb}7qKmjU29BiEIjUW!NKts{q#z-0Bs11aLA^@P26%30A z0H^|Sb%YHQb9IzZc7z2T0Pydj8VZKy5FWlU zUqr6Ng~Ii(y$_FnOc1E-Wa#*yl3&t%kcp*@y|S%=5dkTp0ipY%q+zla;l@FH!;me}T8ZWQhZ4FHwEU=;9!jr>(rf6>NoN&O{w z{)Y%SIsYL7*HqmoFh)B7P0U5U72O9^Areoc%fgmFqvzujN(dJcolyzQ?`@xB1j>KInGgNi0(MQp?Zd@jiy;hrdhbK^(t@BW&ORd2mgjqOg>Vj0S@W z)C%Gt4{=elu`;k$RF+fu<+!2{2P1o^t)q=S0Q}2mfGRes*3btF0)T&+_b-S01^Zxe z_709BW(M{EHg=$_!S8!^_CE~SK^wrz`rtkw6m}47>4E+J@&BJszdvEC%Af9`0yE@? zrQk0f_LnLA-_^zfX8o($qQMw#AT+V_)aRULT6CF8KM?VeFyWY^voXF8v7tbg&Zd|_ zAie@%D-ag=~WJSKsolWkx8YE87!o>QJ7-$j`Jz1RXXJVUw; z&eySXCe~V1dx8o1v-Siz!pKKDlqGW4g zP(NAfG-_K!KaO{k#@RxjUA8RvyJ~;g_rL2_OIT7|MUg?=9s*H@*h5YJrc;&&r}(Q* z|8SUJ+r@)*{8gu{EdSIg7x$k!MPXro(BpskLHRFz5946{oGcIC$-(&m$pJGoc8-UI zl=IgX0{a9#aP{!R$-?#v8wUp`fSrXKzyW&L_kQ<*9=5UnVmS|BzXkM<`Tf(re~Cuk zzzVia{x^pZhHahNutkOmw%u{Ca>00J*L`^FX6gXoU}t9op#X&(j2@O`SO;ufHL(4? zZvT3Gp!U!Q8w^{pVLKGmNXXjM5(1L}P}vb;r3T=J4U&U8I6$pU|5*GvIXVB>LSe=Z zVF7@Dz3%&qJN&ut!Dy93JaWef~+UJel3pNkqND;wKi+@s&!%~eIU%ypdmd|^+1 zWOJY?r`}<7nun4yfTom&lR*aVDFWp((WsP0n+Y5x9hz(#UFPFJJ6jUG6~6<7V0np> zrvjk@X>Qi=TQvd_nJXa!#zA&20ViJvi(;L%P4uAbd8y zThzaL`_%16$~p;e{P;xsXF<7dUv2ct9d+J}Df7=Il=oVc$rJS zBa|_yGrH0cfKyC4%{--W`>X|8*oH*fW_szxZBxO8Nfd(OL5%zge{2-_ytRC+TRlmF zXAE9pdHahov@zA&j-lW7Dc_@w@ z!)!saI-zM7;0O=Sx3R(Y4CzWjTl34~74WYV3H_tuwv<+GJ#MTSoT#h;*(0csp zNrW5XX;V;xHOYd|rT|~~;0o%+bFYxakPam}NC|ymQbMV8RjO51MR1ylO{oK4O!blV zf(R5S*tMAOD)zHassc2-K}6tFgXK3+A6o_Cn_7@Pc-yeO^O?8-o7c&r_&&L(^T}vS zTZ=exGmgDJT!bTU#F4;J>uo$atpXoXbD;W?m1p4_fJic*0KV!`kfm0oVXALAU4N@v zd~rqvGJ&#r7rtfISOotCYQs~TuGvFo{&^XP*{J)-ZaV%z|M-Ot)(hGj(q{c19pQ1D zyqY|{kU+Lk77qXMlFRt_<<8P%mSnT97#w}jOICN7$XnC+f;o>XZv^g0?nv+9@AV9o zs^S@HzXXohZ@$=!KS?=>Jn^@TWB9@0_Vp$;_7g7MBppM-+XSlb+-rV9;4xw?v=8Xs zAcS!FFtz3^H;TY?uiEeQJMmOsNT!Kq2ud4779uC3J8`RgzOK6*Os+v(Kh7TV@2oLg zNR8b@x~; zdj4LwDQ1U1Z9Yu%=GlneML@xh3}2Ls@|Y{0sZ8Duk6K7D-HKj^F=1=vkgtnx5bZ(K z#*+^Jn@4Kpm|9Fg)>RdrF1bDzl{?a~bhMiqY6m=QU-Cb4c`32R`3cGB5%u+k-qF$dG7pb~K=$2g ztK6zK+B2zXHgjIF^-7gh-{Is>uj>%?#F4bk`bMGYlhUm8M+;;cQmi^crYh`dp}@AV z7Jfx>3r;~>W&I~?h~y|bsJ$7?w$Us5K|as2Tbyfsnc{Vo9(PU=9J=vD-I2H`&V78k z|C-O*mEHsTA0X{#3$eo|Qun%b5xFFh65pXCKiy^M;xmrOZI&DrV-F270R0bDj4K*7E10 z-u)a`zB{w8Y~3RpK6W^{vF5kQZfF<6+rcZfT5|@v@8B(bN4ILk`)F@oEXb8B%cM;l zABTP`)EbYToysY`yV0GrnQN*cEB0CtJ}Vh%EkeDQp~23)me4nmmmww5)#B&4dOR%` zDLVJG9;1b-cEhLl?o7EvbKe-sap)`T#-MO6>Y2>|CGdrviexECo^0nyv zXdP)-BI(@~(|MOA56QJr4DmBy!Q!1V6mGvLMAv|zb75iE*=DifDqWfkj+0H1L~p(D zfuZ}SZ8s4Z zZ#){D`9NrgcxMZUr}^#Lsh>F55^2vQg!gYE;O$cQ_oSudotxP$`wDL}-XF?_T;?Qy zNrbq|s}t1bUncH`8H#Q(+CTZ)I4)0>rw)}LMbP3wch}B|A@)RLnHRcN?$U0ux}aK>h>YK}3LSO-M7IFr zjacd}V#>{0>RO%otXK-crz;SwHTG=XJJV8e36%;wH}I7=CXg&neLPR~7=Ln>!JU7# zsz}RL3@R(ITV}}@Yy`(rqi!Gj#U$^C`!T^JD$;FST7r2e%Ao9wIh(kRs3yOHKDrV2 z`x9*$TJx)`l`7TG+}%Ii`hLV8w^n@{b?mOU zIQx3Jw&LclWhoNDW^Di|MU(`8Erj@i4seK6o~5P!}>2V%J5G z_DM$IAmM$aX|^m$FPib%k97K{D-`dN^QgM7m6^5DS#m|SbR%5f^GYl>jM|x~> zHqo&JWoE78Qmg0B_1uKNX2!7DJZ%+r*O&?;P7+?)+brwBPf5P z3U}=9K|>bK(G*9X{aV!J;LYaA>&!z@iWdU>6c-{Y_>PONmtL8|ciVP%o+^K@mS~}`4z_>$&GBt6LO=|&bHVAy ztI%~ET~_fP9c>Q`)i+s8@L;=3Pva3L<0~fP+J}FD7_(!Vw0P^4m8bp6@A6jW1^rN zJ9?)u#E+jh4S~0lx~%tNO!f<)!bT^{!Oy5yNn|Di1tRZ__+ESlOKTNK3%fDgg!>3l zFRS>$`PsvTR$nPOSowP}7lQrp&9_bF8??9He`&Bs6YM$8m_Ys!yUh1w?sGKM!hTgH zDzAOaXhkh=7+DFlPuIfV_y*>+9oWxsS%Jd8~Y%HrFB{ryw?VA!#SGo(w zk{#8c_|fkr_Ife9(jPPFE5jr<72V_KrPB>(igq|a3bqoq3ZQDFS^s0d%B30oX_J^1H4B8LQl)Cil@=rq=J)6Pi;ZZT9zax|<7eJU(*6HsSgaWfiA z+FY6?9-AbehM)r)&4rpdkxv^($8`fV$uG9bP_-Pc6koWrmI85HOf%P+nR_OckOPB* zIrHQOvct1)mFj2qECBCM!K}ji zfsKn(f%9!AWWdv}MgjBuX)dMN^XR+@oaY^wRS~@i-s#LZ+?1y3-?-)OO1_#Y$w{#X z&cE>UL`aXSH(%OsT!Jr!g58SRJt{@UWIpp-n#Ay6;3tvi!3|E>rQln# z-cOk58M7)>Cd>5I5ahYgjoXz);vjSZKUTzRw>ZKz``DX5_1u&FknSrFT5?FlBs#4% z#jM?M*rz#qmVT{Xm2>!ZEQ;f%E75&I4Lk77GNV*{Gii=@x8{jX+3>0xKL4EM1!2vw z)XD4gw*^Dq0bBPe-=B$uz1|QMeI_JyzRZ|*zvSBZ#ABLiZ2PupCt_^-E2Z}SDfyc- z&6T_FiJl}eyq?nqp*u(1osZP7<}|BfK{^n-t znRyP3k`>Y!*w4zJ8wTwg#_7KLAW1(bUPTYe=YSU;MUhg=W4^_{&v4cHA-5oA7@4l279 z$4>1#W92j7rloo-mADL^joDeNqE*qYdqYFdbSmHyx$EUrx+_4N;8|5Pt?6+4RdaFY40u?12h^c1+!Xn&b}DE0x{$8wYqC7!)gEMapB+n@K*Z;z>6t zOJ*F`>Z7YwWzd+>Suz9~v`sLGrR>({q$a{fkLj|_8YeVI10xYdh;jWr<_t zOqu+`_g%s#K`T=$2b%_g32XNVZe69@-N1>{Q6NXpTNhQ`#`D9^NH$I15Mqh*8(2Ha z54f~@XBfUmm177IZ$+)XsH!q-T3XFM3hFhFZsUXdr(Qv+2Wz zb2kbV&Rg81PO!{T+)Ur14)pq|qU*6CQMyG+A`j1X>Xkb6>D>P!OTH@!!>zRjgEcmC z`+lvu;^d8Dm+$&{VntYYtwfQA^wQzp)DZsmF0^#^I#SKHj zWFhT*&RpUA{O-cq!cT#f_w{weA!J@GYbq!f(ols>`RRl*;F@IEbA-tWiqW}irMvfS z_1`|&@_XW~xu@%jZl==}?{DE!C_YI;Fq*He(}A4Q&<{yh>wPA1#}LKLMTkS*LN>lo zXtTlekUgOBMc~A`!)v8NfcI;6aK+%C?-h$eXU1|gbh~@C@Z$#RmD%6hWj4P8)&jeP z*U20ig3HvxYc!2OW zh`9jCdE`mi57R-u9?EY37747C@Gy_qA81@AeoxhxoiMk{yjGhwK3d#<=l_{W5=L*+ z_{z-iBUK;PI<;s91@qM95TaZVMGG@JEPZ?i|MNoGo}_V1* zP^-dLneQ_cDl2pR`Sc5Vjuh0uk8DOX>c)Bs(UWbD>nGsF=xeg=Of0qmqGSszo$LNo zEIM*}^5i7-s@DZwD(vhu3G`^(ES*cTvXau0@=Ma9DV;>A_#^~4-MGQH6L1_fO^8$? z^aNzWwJH_W`hGErHSGC_D}KnEV6jh(WMySl;CL6xXH+a?Pd?w! zE9V5@ea`uklShvf*uPNgRr=Gn|9tUEyw^c{lJCvy8t3mMml7Ar(2Y1PCuNDFkGCOC?1n)s?SWo_t}v2 zlU0F!#w?t$!-Kkql zr3NF;vA!-9N3m}3Yx5-H^`!8)5P+bU<$dQ*jmgt%3FX5AUUBb;UiS+i^N#T|UCWXC z61bl|FHokbiT8$|S86C&zS*%YL@3o_oPAXN2$dW5U*&8nSjO_}#KNkZ?#OuKal);=1M zaa-Vhe6y2XtFJ-n!EgV#*}2wCKBA@y+{R7M$il<(;yc)l=L6%MuxqDV>TZbb8{Y4H zqnZ9_1FoidC-uQzQ%%-dEDk)HBOP)(xBQxC#vhW5oR%0Df$Ab4K0DISEeEee+Z;8U zA+KKygs=>0(}C0+Lcm*4G>c?6?>@LzrGkJXow%&Hl?F;dv+fZxL%SED&$BKuxY zI56ZWW-7z0VPE472&p^poBs}Jqm1CV@W#Lks8Z1wZO_Q)jshl2(|F+`TV(3^`K&tI zE>@ybhw9D(vx2SFv-zW&4nCdwVyuZ_flyn~j4+HDxS*58@@k2BGS|tC$VxZV7c@QN z(0f;LcS-|4T31W5?ULZ}(X0@WQWH~qg^^hLd6vvOf7B-?omwYL);sN^xt}OdBZZ?X z^h*hQJH?NfzL8PNdUA&8kvYjWUBJJqHEpm8V@K_gI4Zs3LO=}iqzrsY*CA{ZN7fZ(kuWTS3fjQd!B9iaBMZtA4f0MR-AwJX8&n=j>kG$ zGNkWhb-*_qjp1T?+IQ3~>E};GiCW8uE;GSB-jvU^_NcKNnqYl)Q$mA?)3+WYKAZ?5H#HwFxcRv)UF6?j^itjpzYwYRCA!(cVUlEMY4Xc}Q+s8> zb_@IC(HP-f$Or1Sg<2Vd{lcc7;>43)G((zUow$#5skXx;h$Y%k9lD<{$3Ly@fePVE zN!jH_ssxWx(wDKaF)@V*>lDY2`t{&{=k^=b27O}_>ah^|z%A9mSMc`6qi04seQ$JI zpNVN7m;Xpij5rffiIg51hw7+SK@Ly5jRlfZ4O}EZ&aq+GYQBsS`{J+d&a+2)du&xy z8(DKvLSmGv*}M08)vCDu=ZwW!|J+t>vs{YjtS*BE{{A%Y;nZM*_Otgnv=EXtLmJop zt(1JI6cJ62(B!;MdBb&S>8Vx5>Ad81yU?A^S#H2GdJjWv{hIX`UTBWv+gJmZ4?8tM zZ;amj&`78eT1cu%JMB7uA<*_LzcRoZWE}}EF#x5PS z)b=8U)Hp7SKJ$AagdO{vCs9LmWh)Xb0`^>H*Jvw_?izz!Ge#RY+d*$e>+*8*$P@B@ zU@lUWmkB)K5v5A$Db~C$>?2NO32|9Ay?Xx9>NCjD3Ky*O=6iWXDV;)2Qj-s^YFnET zN;L^>jWJ3J0L3tiU6@KYvSLY9>Fq`A4~$wWDQn9XaW*crSPddE@K7FG6=NcP-~=_< zWmj#M6}nlS_iW~epH9|uV*)}E3WG%{qvOpk-6=V*V+$w*-#4AB-RB$eZz^ve;pnPK z2zj3mE--!vgWhqy+v-`rt;jK?gcq+UWF8IrL?Y~M;68boto)kGdild=x#3wu>T0k^ zOf7LDR*vo4IIM^Y+Q{iq5L3VK?8TX{Ms(+}pgwqX7n!{jUyCcSn33rnRP$qDQ{gfr zJ%e>$6SW_*MF9VEpzymXvS9*)kdWS+Lpa+EyMfN~RnAAAsEW4Fx7i~LsT`J*`pu|} zX?f(9r9yi`WuH2vjHbRw(3wvef{RrA{srkvrM_WJBchTpa_bkWH?GZl*MxZA(c3<@ z{p^gPP{aJl1HpdzIwn!BFq`gX@53{`vX@ThK$nIxk7EYjl=NC0rp2PV3+apS?^5OX z^UmAvE5z(SjOrX8>|0xqoe4A@A6h&CZ4wTU7By=$H4YpbK=-oF$*+QSU28}dGMCPI z7)-Za-^J547B3=o-JxL9dlBczVJduTX+9gSe8x@JryqaUnR0*cg)KW=O1O(e~W+ z_P*b=q0E(h;7PH5_&uHseTOJU(0j~?+q)A$dW?}sGz|0J>dAG6`V z61pG&6DJs!XXXHKva|f>d@iixU-P-#4@YT!=W{t94&eN!d@dOD?+INF5G>jIkXrwp z(EaxlM1MJn^xqP?>>w`CpZQ!?R?uIMJM^o2+LClTdB1%^+T?rPvi!>L(Vi^=J@z9? zGD#nO$*^oX=?WFi_l!ig#?#bdz4o62oJhydezZelwfs4%63kK~x20BOd8Cem>z`yQ zYfIrsKzY(C)Z!w0?RfKeSG}gpl%i-yor$Bw&UPPvHs&OgyWcjjwppC5rr1CCx^{ky z@JRT8@WA$>DoYGCRpQQtF%p=rt+V@W1mn@s&CzA>i4XP-^-Hg?edY6_j{*cxyk=i3 z)5kRlqox$KUyb&?2m!X6cL|-!4fpR9;OX9oK`y;iT;E?}Ke|WxetVw&c4IlR(H`^N za5;sh-p&9)N9O+AN_&B^TtnE|ePG$$a;F2s{ApvDnBgfEg=g;UJ$Rz&YU#VVQRz9R z4@0Cy-G|C(hPlz`Cm0Bl%Qla^AxQy zcDk22m&-8`1OxdH*B+4x=wwxY{HhFc>CTHTkQ*nPr`@_@h^M1Egrjl0-8TcNf6}>Y ziJ8$Wx5lMQ7sQ#|>|0I|GTlPl`d(ff1m)AZzV7HfMVJ{crnxKn2twZ)7~i;vC*N6`0~dtTCGp{Hi3Mqq+*K!pIiOn>we3E1~<)-f=;;8wNr%>sXN@@Vfu7k zkkb}%O+wqUmeH5e-16_PZJ@2tAS$v=NlRgGQzt`VHrtL@diNIHY5ICVC51gvxyke7 z<9E+C*l)*_Op#3p3|Uvl+(i~AliskAxVJlIvraELA~6jMW`AO3kZln#FwM?8tudfJ zIYnyoNyE2g@2<08B`ud+7@Mq*`{2IYG8`Zz)j07g7{4zlj}xDG{rUFLW}5)w%QDcn z8tYK*mX}A@q-G^iwHGaoobGLPFWnq754rqzCzMUvMNZo*dpcyMu=~HZq9rG%(LB+^ zG$2ZiC88)OJ(BF#X&e7g{`3aZ0Y4_4Tgl=%&N<^CN2vUP$SGu|e59H^F(oHgOs9m7 z>vK2R@pQi(7fr*}UZ+L=0n%v0&!^len3twWSZ7r2-0|EYrNs-2ooeT(#>Wov9^@E` z`^zbE-*BAXeiVxjlbqPVD&$jBcz#4{{z~^s@B7d7+&7D}j%n-mkxQfJdPw&<92pF6 zONZ5WQOGAY8W-sFvuH>0=ID@{cTgU4th=fsXJVGr2_pyUzmWj2&FPBZRxYz7mS0{T z)4ik<@3(qwa{O|jK{p}*$$qekE|X5IsuQ`KAk=MM50Y{aLDi8YzGgVdPK<%AJ%ztY zXMV|4DxbxT|Cqp-}z|+LbF62E8*^UV_G%|G$EgyG&_pYEKYfp$~oH$-^Q8EGt3~ymOrUHOYtsQ>aX;8 zp4yG!pGhXhPhq93SHUplk~lh9oH_9(+Bjb|PeIGy_TE-d?a5j;=X2EpRpNx)9F(dF zU=PQBftt(Sb2ZtBpiqf}q^!|BX*Wy9mOf;2+)BS>A|?j49M~BitrxMBwdg&7uGaEo z*@-a8hDkiT&YGP)VQ0fvaV`s0p+4I_Py4ALxNJUdyv4bOOT*Y5ji6(vyAN5IQqHw^dDlckC=GXL$ zT*8pqrKF6Y(3fIHDdypFarB0{+NWS>{VhaERxWxS|yPXkKCs3f@-cI|r3Bhz0u>q^y^^ryZ!OKLUW{N6jb zs~{mvY9G-#+U1JW;-hH>_s$G^m5#uwHMsCQE`8K2x>4vgeTdL;=WJEzqCP1Zd`1{n zT%2X^L7zl*oRYM>`)hFtS5};L-i(y>Z+YFo=7S%~-Zv{vO>?3&wcCo6cp>Zisj2w{ znalh9$6pv0z0YJ8X}Zt94y<)9&v)C9ykugMlV@hK>SlO%xE1^}gGgVKdm|w5s?Y=% z9zyG3l{2Emvn(yD$%vf-oRqGIe?3P7Rj=QaXD#}0Vc%czL3K16C}CD5Nwr<&1D4a2 zqiiEa5S$T3P>VnSeF_sp#2TC@Wd~C_A){v&Q8pSqCl!}|R^nRkFFB<+t~SLSKh4Z0 zAsN{q*p-sik1VBB_B{dHP4{>=sm@H+vO!aB6Hk8mt?-%=L1yc_I1z!}2>Ro0<=Y!{ zu!sHND@o7()SWpdr4P-3dmk{VuY!n!kDVrP+6~RH!tC1CB?#*|g#-0Vu<1LomxV@6 z73{V1#F)$e8=)^|e1znJX3ykr%;9U?%cX^`s1^uz$B%bfq`q4Sy*B)Sx8pGhc*!34 z1H&2l(wNwI&v2;Qda8ixv%>*klb04FjNG9l-Y_yw!c<{_|E;^1YtJG>Zt2I1qaWWo z=B#=vkGqRTTYV%A>6z*)a#+q??F=gH8{7{*%C6g(yM$xxPNh@u^50sRvPfSfJR@J6 zUz0$;1S4>;d4_i;MWjkI3{eKOX^gy+R-2a2ZbWJS!WqOUg?@=^EQO-U5^p$@!N>w& z@5s+XgSW1cY~SVp$i%9#_6^)8gUSNF;~NR%=f#w2-IAG~ymLm&%W%$(F&bNf$8Hb7 zkgDp|E}F3ie#bUc3+)nkMTpLK&nu!LaU)~oy&^(e<{4;luQFWicI3)-(kDBS1HJRz zG{vB8|1x|3%8tnB2k&Ga3)kJcqlpE@d=MA(!X`HlOZ{v+vZmOJ3nro5pATR`Y## zFUkwYVC012^x5w1srohEe0!ZG@xU4tezgHvn;S>1p97JkmlM^I_#BdArq?Y=(>%1m zOdt9UV0Cyw|4oxm@8aOV?OQioIVg zfQlsLsT+gmP=y!->pp(XvU;NG_+mE^yd3H}nrR&>Zp#~zga%beAjUo|{Xpczn^=OI zBhK%)N$nBm?VYJUwY-xG>F6WlF@OEBKHns6u#r8o=hk$!=f_1g#pUT$PyBa&G7cNE z>r|oQQ&H}~aC<#s;x?ug6hPqUbD>Ih(A=4J-N4hb^?OzdGNl3i@tguFa z1q5be0d04r5ShyHKU>;cU0Px=+VakUM6@h*K0_^)3{s%+<0{NGrHufqo zlMpa6fZE%@EC9^P3qN-Mwz>vV)VT0#VVxucM!;~zJc&gX6#i{@^-vqC5|N2V!O62D7qq z|8hB65Qr8A26N5-2gGl0p!*jW()qvokPbclKT7>v?I9+xY7-VP%EQ9;$MsO;!O9LW z0sL-bW#xcXU_4v^>)&l)P7W|^rvK6gg1w@Hz1aJ=HZbeo;6R{<0=9pL`w#foSYZ`2 z{|3kYcYJJMPFN+)zrwL`!|GQ4y^VtnR%7!oa9|d8&VO$Mv9fXgjb~PN(BIp*xc)|i z9nAjkJcC&{x&FoslKY4bvH-J^h*u%aV2LM!txW{!+f&Vvz?n4c#gQJ1H<1b?Yb8>QVqR`NYDTt%|AMkX-l>h($ literal 0 HcmV?d00001 diff --git a/docs/src/figures/hll.pdf b/docs/src/figures/hll.pdf new file mode 100644 index 0000000000000000000000000000000000000000..47267c4fd472586c74e0d207a985d9310e967dcc GIT binary patch literal 18339 zcmb`v1z20#(l?Gav37v|hG%JXW6OFF_Fg6#B3&0L=G_gSw6a*@m zJ6J-j0NhZDDp1-IuqVJ#No@n>UtM1g@oc|;hgazQc&>4D`pK} z{^f1uu*aJ#cagR?#k>9f6gik4p(Q^lyVAViI%Pb5-8TH_|B>Ugw;AuE$j+STbwfUS zr{2So;u%O>o-F3B^_jMrrRze!f^zar5G;GW=J*y1A4-CJU^sxm1+}_eUz0?pV@>@X z;yq5rS=__)H(Z(v7W^RU&H@{iNPR9OTNsCDd+JzR2FrycdC4r>yeb&@b+0>Siy1ST z#eaaYl9_(Q@6{436d~^n_84&!^aAw6*m@f&+F$bdM5g@sBrJT%67u2VtV$m=nkw8^ za%r`{nb-c-xeRLwCB;?t2kx60OI>t}Php)|(V)33Mx;ADx}nUz(pcqfX8W1`Zsh_r z*LUOq3Oy<;5!SKWYkjty^WmUv9V$~$stVR{Qbea6Gn$2=C|fW41!~dR^b|7QmG6ut zaLj-rpZijWBHwR-h9z0iZ7T51D=gKlR$e`O{O4o%N}a1excJY6uQ~GSK0qK}|CcrV}KprKhA`=5C zN4f+roH~})s%Koi$de(bTcVdVm3Z8}@&xa5K(6@6GhGfjD}62o#UQCc@2z5_;UIou z$uyLpmmwX3EIoC&_58dUxJ!yP{BvBoW2zK(%&c^%`>=A5d3EGFRaUW*Am3+Ns7mrZ zIuxd&Eegm%d}Jn00wDBb2FYZ;2px(XX*?~gH!pBW;nOiM?{z4!u3qMQ2fq?uA3`$h#hh!M{iyBIt8O+cfjNt!^u?&qMiLLCa)(Bu&u{51J#P2{EvH|QId#-z!F zx+N*P!4xdmQk=aZGN4H-Tu_Y{RltM&2cHKu2)wIAyMMS8(Q{O)M0;GvPd&P?5Y*WX z5tW=rk=zOfxXJ*^lREad42t+cZC)PP2hw5{bKr0cG9b5 ztF@Sz8v12;hg1(-Lia!|GIaLO$K8kxAyfI?_)cQB)ElJ{r0CL+Yop<7OF05qa5;(w zCEsv5q>-W|N;eU+!8S{3uae;1lr4fd`Lk^5cX_if(O%UzzR%SsLNaS{;)F+u3KZ3` z2-c&pvrk{Nom|q*SeRXyYMcFZS-@Qftiy4hk;Xs*@%vy~-h*o*-==8=GoRF1JCrH@ zkhkx6(9PJHUEpk=l9nusHn2p32)dfVu}4LU>KNF9UEe14WjzWnC3yqKJ=;ZaJsvQ* z5qa!$Pt)8B9Q(uac1EP}`}$@hV`JH61RTUWUUGugi$pa$1-|f?N!Dl=yuIVI?M?4R zO9`aQwJO!KC&x8|R>dY=TFK4z_R1t$ z(zMisjI&{Q+}03(hQ5C!lwgMb8u}gtJGWp;U&h&(hJs@!QfLAibOxLBn&2;4^q>W* zXJRYutrYf+c~;3h*Dr-hZ_(K~qgsLuor4b{6_-fLVGMfS;Y&M?<6%W>k9G>g9Q%XpWo zL$4CMXoeqMC%5@?Ks&;sYS8?NTB;kwDqp8rs`E)C0a>BjubkvuwV%~cZvd}EHd|Zg zyvOD}gJ!azh;ESwEdSR%BlrXTAORGO@%INe=R(-%n;iI+;tih?gr^hXcwi`X)^rgV zpB*8Ie%ggiatw3y)sB=J+SC#Z9|k!ETcI80O-Zm84N>pQyo?v27&iL11SxYwBULkhbESCti_Uu4CT@7V6Y>^dk5aBggY0#So`c zUW-KvK~g(dk)oe-yfebL>13SlPDW;pJK^T}tk2}_o{M8gS31AmR)~=C`qI|DxO_fB zu*Njn#1tzs%QW?nI7F9S?xJ#AEXli}>DBg!hY#F^;$%zBHmbXm6_j5c2LmwAC2a9^ z%)*!8jMbtRP@27rbWGMJIc$ws(gt=t1F1TMv8aIpw!Ne*XW+>2{K9{ z1`CLNoJV!|>M&#~*w{Y1ROBLN@Ny%H$*UU4+l%LAq^Kjo86v%){zeH-ylDYAV)183 zI!{_}fD)^YcMj3lu{Q_TbW^r29vY{=UNy!)6nLfyjE}k86SPq4-qV`5yNFy)IEGbzY&Ka} zcJC$2;c+#*cYKIxnjO+OORIHn89Fh2)0twP)JDLo!B(l^G$-VQpk76rJ+)!NXe8Z9 z4||s>xGQ({HcLXEI%Hy5Fnf?Nb9#K_lr?RDtbQ>?umWZrB+x}e?9)UaAW6uW-nIMS zuy9v}(X#^^d&$MY*H~NqV#{6@yjOgTOq#pCiQ;)`tu;Ow87JSr0v-dacUxjOhM~uI zBS&|JVd*JIi#zOm?DMtZog#C%>GL3mLjuj9CP`hCpkcn~Mw}y5y|pf?f3``1naDSp zLJ?f?30U&NcfNsl^a}(dH67H)19(2{W)?fB%`GxFF{ACM*3FWWOCg`dpPZyj^TQ4G zd3QscS5Ak6kB*Z@H?5hn9vU6$SGP{Kcex}1d%`3Vp3#J{)xnM+>*k;S0YN9T(m)wE zTbjtG>><<~(ayz*wASFkFN-)BA$?%@%Z`DWRQLe~!KR+ad)MZic5k7!H=TK|sUa2P zHT~N^Pz__WwAI&QLPXZDF8uEAPs4UsAJ6V5Jyv}^eF7g$E4T!G?z~3*MFexkfWWa> zg7zflU+aagcUSi)%5$yWkEgJ#VHr#J@X>p%iD5aEkLS1DXGaHTme-GZVWsVU_a@?i zP_rKqhyo;+^xE{s4QTU%*Zs5p{X)1Le3KL;-_JKCQ#&VTL9)!#Uo167PI&4G&M zX4b~gK>>Q~&}L3PP5=jp^EDc@(Z$>W0suel-O+%m=B|!zE~e(L&|STXi=(NUIYbW# z-S-24>gJvhD7%y=L`Dq)-Pr>`9Dj&NJ3_@lf5<~O`alWjXK0VEfTsXK6%9)IQ>`bK zKLQ45Z&1AezZG)@0OhUCTmgD$Pim{8y#_#gaC_?VFQTA-ivB4nVGJ?0bF}f&)+-v*J+>ul~oi0AYtJ@mHd5He}*-xJZMjHs{cBL->3PHR$jE< zqX1Pw>}a5;N&PG1KkNdX$3Ft1zs5uV2$-N25ElMtWb`_&6Elze4)1Onx8T zL&c7Gcay`ZCyy6De`|FKZ9EOQhN<-oKW#Qn$5WJ*^nsbpv-|X9`@aMBY0mzfL2+YO z^C$EG6(!W96;xQHUChnZ%w4Q4fKOCOb5~OrYbS`K3xMxwa)IiO8V=S^;Uoa`N2K{1 z+)u1%&^dN-g-BQ#y8yr(Kn3GJk9qlkTGnO|D_1=<4z4E){SQyH|Do+k4%*Jc@q|Ne zo+nOjsKvRspCHQvdU^-*ashbQpA>z1@^Ene?vI-k;r#zsXd`e7*f9MUzd< zjZ;CAQJT}q$)v$g_I9a72Cd2CIO&$SYjrl$?Q3A;9Fd@(Ci;Kk;kW1fJ0Qg5R3*eD zS=6oV&0UquJyad-jU80f6xIJ77@#ME{0$ku9rpJk@tgOr$l&Dt7c#hb{)r4OcBm!) z?e%1-ztH|idp+6wPyPQ1$bVt?w-!pq_Rt0RzkysFx*|gtWL6#!6g}Jk9xjg8PYZ>m zD}bAei<2Djh32}kSA_o!e8qU3j}z6HsBS1Y5asc~ss-VE@dE9H9oE`YLP@gtf?zidU7CE=k^m z=N$pas@F%|jwF4?n8zNFur*@8-B-E+u^s&%&`8YyupS*^Dn`VRIgExb7$0J#PU=hq zRQ2zncWXF|UFbi%P%Ar-;g&G&)?}|)8(}aSQQBcG*Edg@c+uKJ2mIjtD1@IqM^PD2 zQ^6eX?B?=Xyx~W5^?2vyQWxAQ+qvi+Nibo2JZM4K8Dqht^+nH#%?H`3E z{8{|^;P2|KqOad@PqRnM*!t&q)@64%*Io3q&*FU&TW%uF5;FAQ-*?j=%Q{JKZx$7L zBOt-Hc6=LhbAy1W-f?yls(@yqqhU z>>0o9J9_lIY%AVx@%262=(S~dc)0;ANIz~31&)Kyz2d!Ge{j8CwH##=ESmk95OqX1 z*T_~6b|~Zd=^H(dSo4ub!BQZBRjJU4_O>4%Vv~l-E92yzt{qJxEpBKJKGe2IU_tP&q0Cf!W817kZt<@BM` zV5ckSYWX{A#^;vdh#~y`w<}d)9trr{l~1-ILwtdTtKKAVIV-_2huup4bu^wjg(qHx z$O5ftF>BJDKcZr=&1_@bgR#-XN@yS-vG=W?1*1`rQ5O*m>W}VNj^!17AOg?fWY16y*3TE(xF3=9JCG3NqTra`4hhC8b@3;b(J%w zEWaLzxv)MT^~RLGfV@bTi$A;_8xXN4r&c_+?YHp8!hfdb!Qv%-j72Eaf2mXB3wl(N zoe_41JP1gu%hTt9vhGH%e!Zt6(btL}FfNr%0p0Dc9gHhbSF69MQ#BX>h#K=5>uA=* z%9IsO4c6B=tYS{pi^^ZRZQB_EcB7`W>rA^y5lcwG*fYi2SX~Q_T+ze{=twRzxu6M6 z*%;g*OjO&ExWQX->dQA}pGZyKGk@Hw*XG4;#i!0JU(gZ7e`rOQ>J?qOl|k14e8!Kk z`3&Nar4Rq;P$!dr^h-+?#3h|;%H#{#Tgll#Q=E#?IaHvd?9SFqi#kpA)F_L)S zU*e98eWtH*ZhVOj>}%T!Cn(4_B9|BRaC4QBBboE`7~uI~IUn$mO4<)+l1F;g=jAJW zYR0N1Un;F_SgCdC(YT!Q$2W8r;9Mj0k&ztNSZ70tp;1wAK=S-&+i0Yy+LG`ky*%M4 z9dYAnLQ`9H{38G^AE4{?fKyHzGpK$wDqmX7K8NvZ$~7BT0Hhw?FKpciSz{`&s5dqF zM7%7l*Q8OuI+w0Q_1iFaiW<3Gx@cVQPEz5g(OZo6{aNOA@13)*PJz(TocCebf|tYc zW+y*Z!S8t3{4cs*=d^J(y--##bUz1mU!%EH!C{F`8li;Al_`*|e~7a{!8*5JLauij z%bVfbp4VtbM%AuO7t)c=r%Xn4(F8Ac=a&Y2d)qt8ItNdZujW(e%1H4kI3k()o$yRP z*`5aNilKNRt@Egze<)|TWn=BZqAghSu@B3W4;%A(*>xIu^LDPUedYa3|I0DS1>t-v zxGRz^)vI!?m8>D`cYt-zhFQ8*9xu5EO}e8lVb}L_G)T4uEER2)A{56PakekAx%zHj%1?Zs zV34@}d}SELM?gJ+obRS*J0;ueHr8-uHi4{!)`NmMKQR9O^(Nkhxa9SytKpa1<=yt5 z_AWk;^Jy)-jd#~a@CCiSXJ@^=N1o;&gVs}O&Y%}8VVRXG?bGj!J{$)7k670xx;NtD z5Jm;>jMaTRHbPLAU8tLZ6sFB6-Zxn+jHFvn=Fo8J<*#cj27wfH6qQrmJmGlh4~<8h zu;D*zPtlkxkG#UeQ_UC$ERcQ^VD2)`hfqZ|2m}WoeO^El2;mS2*}?uAkJYE`T1%q* zKC#rLKyqdLUVxsB)RfFoL(MBh?fSU+nKL7qR!uyk!1u_x5b1dmm3~Mx2^vF`1x`6+ZE-IoF zuA=pPuTWANL!@hW(&lu~l?8{tTC=OCQU9;|fx)ec%t6L3TPpsx=I&V1b5j^Qt6P#N z5E);zW~CxZNEL02+{~QZC+CoL+*oRPRd_!{o%iC&==rJcfGG5r7?l>x-EYZWs2Wcr znaO^$K`Z^Gj5>hJtoII8CtteH=AyoMf@R@Ve(1s{>6lkRSaRqz{D64_!#bNG=R~@m zQrw6Yabk>?M*#_SnR^yf|Bdb+^VwauB3eJ~vjuVXyd0PhW@|-Pw_L*##P?!Wwe_+xMpASvhj#?aCVON0ZA#WfRnhZ!6ou?0W1vEdx| zd}Ir|tbc2+F;0$eCMk!ft)}J_3Rv)itn8vUG&`3L9c?);_E!rUXtIA4kSv1SFR@k7 zoFtu;4uY_F4kKizO}B@)`jqM2j(?8FJNo^wG7eIQxMRL!+L4EO&+JGO{FTMT5U)F)mhS=pQnLcRi8N2W7 z$m}owGv`k5=w$Edo5HrH2<0rHR~D4Z_-`}(c<)V9?>Y}k(L7I_-EJ3fPy(>v7B?3M zaN+`7cf6eFhqLtzgunP4Pd$vHZS6e7dTI_VI|I8lTAV(FL$bUz>lH&LyxrgUz# z#lG2mJ=Gq1WIQDsw?TO33)i*fjN;TDbR;6ye3NlFcIex7Td(+ddfvKz_PEy8)!sWb zJG}Nx@8pWD{IP6T@z<3@?PWWxO{p@x?6`gbG>;sUTj?{RU1 zd7&XD*MAR1p$&fzMWG-68H$3y|Cdmd@82U)9v+^*MWVm&7XIZf$Y0V6{-;P31Ojnz z{v#CS;bjN^H5B!Find`ae0|yq4Zi~-rHSeNK5CzOG z>*~1q^7u|8KxmD zRZTAfCkv;I2tXY=SX_4S#Q}4#8ByHXb}^Yp@n?!0qs8mVk)!lF3sQN63#?1BjF~!x zcP9?ws@{5*vyu<8kuAQy@^=+lU&a#rgx-E-UXjWB3`^EY+x83>HZTV}lkVEYN9i=s z6aM9ZxZ2!BYcU~M&Oep7u#)_N=hEAlXwR&DNg#cqg@QYG+;J}2>+Qln7m`;nvS>Q z*OK$=c)=h(Uw?8gw@FBr$4aPEwQ6?e8b zVCFgzwYmkZsP|Y;VWl_m*^nm#%{yhnyQ#LILb(oWMzTnk4fgukx}m0uB_|=poDu(USiv|F*&4lPV9PB>3!H!NNm@3iGHOL+MvrUf z-Pd|H#=NT8_m|=}-U4sFV!_Ite0xDNp;VI^^TP?YyvxrA8~z&BuY+^AlYdTYseg5i z3RO)Cq^-c(E5>ob(ur$DhqWX()0VSyhu?z603v*ystEQ<+4mnRHz7#OHY=R#zgJoT z>SLdfQc*l^USD+zkEzh-aZEDh5c49MS5uXLR3 zG+zU6DuOS%#MUL8>*r`Gr?{h6d4IWA@_QoB!-+|3$_2e(d~1y_!orm9@ZqdZO&2S} znB`}uFFx?98-_gpP0rhdQ@v}7`v^gp?;k)FpH^mHO)W=Uz4Ai9T9?0O&hAk@)uXGz zm)o;rW-;C_jVv?qP+jO%s_~d7^1(G6_4Wi7EZ;Leio%Ft2bxe8=xn&3z1_jE|Kgrp zuXD8%QWcd(M8@VPh`EWnIU6Iw`|I2FZf9?2G^ZbfS+^PK9yjSW4SvuaqmV?rC%!=t z&$^IXsUeL-JoQQ>u9O7fn=%B;4LdnkXW;@{M%K=P#PmuMJ*EpI;7Vx2CWR+Z_`|0# zYlpj2YeBiaH{_pw>Inw|xWI;?u4)&#bBQ=>CA7+W9V z?ZPQcPtL85x8_?SJWge<$8Lu1dF}-QP<6y()^G@)l>{$_sC6xN-F02%Dwg8w^)7D= zrAsOX?2wL-j&!GYrvo{OYDw+0>UElR?#S#_cJr6_i8xPaPB(!vu4n?i#OoIBp)(;# z4N;4yq}8dRtQ)pBzW1{ma)SLw*UvA{rtd@75?Uf3f*RIM+~L$36<_SI7$YN`0q%mc zEOVY!HHP~}>~*!T|0s=40Y`s}xQ@YQSz_~p?N|visYjHVE4g>~^FCeusJ#?z*|U6T zU->NW1?lMc=aoLnT)B3Wl6m(cBD9_pW3KrJ>obe$D|B}ri}O{K=5;VzRr@-hPn>&n zgHuf0V7aA~XPo;|R4TgZ9PH6G%z!7ke;BN&Pb5_bDWTHJIfzCn&-9S>xMTH{-Abh| zoQ*Kb?U1}VwEY9$$TJ-4lxr?K@t7ujfs{7q#(2#Rwq$8cjE_nNU(Vmb0d2@BOcwkT0fd$cW}i8 zAuX`7a@JGw12?qMjzXfR4n(XT{j=O3Z0^BJ!S1>`G;wT;Yg3xCNPj`lW$nJ6F6-y((Q_=BI<8e}bd4ZE>mC7vNRwkc) z-4NDEmbcJ9YWm!0bz55;f_U>r&Bc}8(y|E*{x+#Us?OAF+(5^mD5ntKpu1s`n@2tJ z3B|PWxkAOHjbs2gxKyrq(DtgSMYxl0Bt5Atv4Gx$^C5iwVm?B{tsrLXSY!H7%VsdY z>HP6qqD1<+xgzJH(AwA0YAreURXu|DLT}}FXVreKpcQu>ld1i1oxWG%a?Ee(79%)J znwdds#%jHkGn@{cX_IqA#D9XVjmXOf#3EaEcV4_1*?p(%bTA8;Irb9 zw&mlPnSIYxLpb25JT$q>PNzpIDoS5McuA{ke6-v2Z7u`Ucfd^=1dLkEXV!zhx^-lC zHs;)w%fhK#-x_F)LMhIohb)hC|fa3516})yz;uPI;=od8nt7$9rQ~ z;8ypV>GHz9<_>9R_ECFZ#YqCbZrya0+0l@|ttCi7yLDX2w@QF)-`A7D%}Q(M<1iCv zGhwmny65biuUD~x&5_3F?Fp=kC!i3}@!E)ZKxn5t_S!5GQ)0ycp`b1X) zqb1I&g?`D&y3c&z<1m})M46PNU%K8@7&1>wSDQ}KzX%+1mok_9W`~BH!(vKWF+zBKrl6c@=Evk`H8tD_smeAyixF`*A7bnlr&KZ}`aF2? zuN<%j#uX3^PwsD3<}pVi{Nh}fFobtBXlYVtt7aMBugJe2nr_AIJ#+og^1{TIe|FoS z-c=uBZd--5`Rkdp{7ZvYmCxDjwJQemyIGa?7r88Q&u@MXRY#DABA7YltkKGkBs2ab zd{L%F{8JgCDVtQ%T4Pl_LF_xReIXdn73}gNzC`iWy@5xQd4UR5c`Y3;m(6_zij^CF z2Q%`KhH#CCCJIk5ANk{mT)-M%L9(1*5jTjzu5su1=Wn`Wz(wV~l0 zK{fM2>Fw=jT5pYdY-O*DZs zAOlqS;Zxt)YdF`c{ZcA_h3)U_w*0;eqWRTnE1&`hkLQqCK?AJ-A^Ca^x`yV?SJ>R+ zugn*PmRrSe{$2Sk{<}Gq5)hm^oXvM4Y_F1h4uLEg0}5G`gXrcKyID*an^fI~D8!Gc zIUgmY%D57R`A+nUyICvels*^1Luh$@Xbii0xVhb4+7CempP2dfZlkHCje zhZaV)g>0R9SJW2agegWQCK3P>bq(%m7wf!dY751bCR-K5n$*r+yir!a1?Qql<^Y6+ zsICo|Ss-JQH?MLd?+fwnWs1mUey*e5VxQqf2p0)^+TRC6oFlE2%^akEfNz=1u>+^j zCoCz&$#*a{M7cB%K$?pXg5%lTZKt!5B6e_I6Xlwfv-*YQ-&zgD(Fr@gRrnI3&>r%l z0s4oCfNS5DU&EK5Fv62CYVmDefj{Fbb5l#g?E!5VtT8ws>utiaz}I86RTD zFZ!1J+_$mIovpo54I8=BcKR?eeL3J6Jwt`7UaesM`I^c8g_*u zlpCiP@QT5ca%%6bI7g&qP#8XkT{LCp&UC|p_r+3a!-4BXb(<2fej=eL$CYesovtOB z#|4BBVqu|)q5j=0gsep+~c$WxaFitpG!WV5dyJcghnAKNeJ^Rzf-3E#C zHY+_#cZaK%%nM`5u9D0Jqx$10V4;>*#f!mFlaSRIEGE#Y1Q%ep^wLM5J^Mp^%f!o1 zjoH_t#6@QGyoyi*Q70+nTT%<*IV<*X;Siy)yyu4lirodPyXv?U$K?7e#bf8ka|Slz zm@U5Q+D#U#nE}j8hzJEFeY!tMw)i0iP=v_|=7B%K$E#S)_1Z|NU3F z*Y08ta%5jc4UsU-)<)42h3E?2xS)M3Dk_pLo|sQvX;ijKSje>Bv%d@0EuPWIFlbh0 zjF-!#+oQV+CTHqTOUSNKu9W32WL>UT{$9bS@0z=#9zCu-sX3|ET^N55dVjnmnDmL4 zz^jKUCyr-C-HS^=VbYqmC2S2H78%c&Obk;DRg5fP0JCJtGUD!qb54kEpfp^(jZldl zDA*p4Bcxp>iHT2Yv^>r={rt=3~>7`>3#hh2M@hP9u*0ULA z6LEz*;|2W$DR(t70U5qRtUBV@t15Z!xpx)w@8s?Jthr5#aXe@)FwL2jG@Ww;&klAK z2Wa5BbQ2nVz(v0mDv<1Z$WD`FyA^7VxSu?^i8w;M$DGc?Xul}Tu1o6M zc$3M>xZSQe-o9by#Ww%_OfZAefqMyy5`z1Xwpybx;s1XBF>z2?G*W3|RUS($p;XvY7X-&^VgvbFU_0?U?4DQ~QKoV^EnKy@hL3|^c zYFchHd8Pu1?1*G+P0@Cpq-7|x)@@CqE`b~}?29XTweWnVJRexPBDLn48#=f5vVdYm z1vps>2sN(;X#l<3sr1WN#iKy`0!_{_zOjX0$9&}qfSw7HoMwG&I)6|P`RL4R{DC;Ojbmq4~xQ-vQ?aVd@tlCe*l%9B?Oa~Y{M)tHs0qEC~1ocv<_kPlI)2r}jkE{;(Xsna>v2#C*YR)+D1r*eM1{TMT` za3=M`MzggN-S^eBN^8+D&&X?vaU`PaPt7I1U%uELAmg0aes~|dOd+bRt_=IR@>oHg z(xV>+v^(mJNATrl)SHa7a)GCs{By=pnMLbksG@fN>sh z&HmS=$5j3^w5vhW&klUYXt}P>6BXEtKQ8d7y&&u9}DDDhc0`m?lgx~dK+^*yet7l|c{9KJBHM6N{KzCe{| zN;6CRWi*|{=LJn>dCe(CO7ZA4za?$n53=^d_?R}P3HSwKg$e4_nURBQoD59p{Ds9E zK_cOFmR7tNYwCB-CP52W8(92%&nY4UQ7JyrP+UEZRTLche|RUMNL8j?E;!n%YW^%> zw!v4TWO{bdt^uq3+G=C^_?#Q)s9~iRbwX2I(A6ORxe|+;^^s^}JF-#sGjZ^CQq(?j z$`C8TFZ!lpRh!RN#0#rx2EI~;SNg{#ExU%CGkKhv(v2gp+hx33=jCV3<4aT$1c>G!7cKaye_m@YkdN> zN_?IE`g6FI8*?to_HGJl{KZTOc|`a1RPcbp;mcz5C8IL^*UmSFmX?+&aLdaMU9Xol zlV+u&W~wR|vy`!#}=;E^t;Z{xO?zm3+zh{ z7UR#PeoOwA@GYmBVcRKE8b@EtU;Xk>_C_A<E&V{Q3?od z@E2E)=Ai?5H6y-duXu;=^Vo_2oGrW;wAkw+DD4ubOr5JJ2#r~%$|%{i#Hzf48J3fi zt>SxCon?0Xn%O*wdZ|1@qP5oM;cWV(xk*37r_|Cd>GpwNu~@h!5Xp^GPY)2NaDAjs zmEq!d&jjnvj#Z|(A>xH(*4LsTY=~}On#UDM)Q6X+_uPZvJu`u3Xj+_?cHFFq-p&+#RnwsO9pUxt7&`@`@^}mG8JErWBove0OUl z-3W)ju2ZnAjMyWI^m@&^$^Oot;bPuJT{2iU=EX4AMXGx`l0qQU3dgYO*^B5@q~_tH zG+RmmTTN;Gjfi*5T?5qD7KdC+oM!4aR8im9oTC=4j<_d+m zl22qG^rd-t>8H-5t-ZkWtGc?gKPY4X7begt&i4o}Bv~uhGr5+cW^dp1kg|8w^nZWg zJGZe0%05_;scKYQAV94#rB9eNcK-QtZP}OlF4;`mVy7 z^s8(g#$S|NlAP^YFUUG%Vlauvv8mWFYk=Q{&YH-TEXx4T+`8TZ(14X9qP+Bz_R?ol z)m1fvTf5nX*0D_^rpLHr+|JeBdOgWnpSDc;Z_O*S#hDgxqHy_Dp|>~|D(1X5v)z4+ ze7t3jbMI-Z`Yy(ZLbUK0yfOBvxB*f3o^ki7?=;TsbxS@Dm}EiR0yOCHD*$RP`ud?W zd0Az?x+U>PvuN^ev%1N;j^l1p?ovV)o`M_k0}`PUI1IfB$%eWjl|Dukb^Z)KmJtO4 zDcC^zzP|omz4xWMt-VHEv;;k4@7nvnInL^ClGM=lg!S9L6U>evEumxMmL_8*hM!|T zxNuSMl&zq=fXxPmW%LIB@?666=gscL`r(5X;~f?k!hn2gFprYHIxEd1-5_-(`D_Aq zYH)%>ks0=|x^;H6^%7Q~rk|?+#LAcWRkA6m4%vb6!y$q|?`UN^J4I9j#-{J*HyT6P z7w<;Ez=P{#yB51)!B2BH8`YS~UwbjH<_8$%<4J8^Nl6Wpug_Z$HniY6e?1>cltOpd ziV=U?1&78Rt(nfxC8C^`Eh|dT<1)IP zL32JcZ134gmUbwXNUNK_M}O6NenNv3xfvPHgk; zg;)-uWi2gl^k}%vQa}UQ$vaO{>heI7d;#)NCC47y$?_pXcv~kfy=~ounRNeW4oZu0 zb$soH--*2ITE8q(J4Wrt?!|g*d#W_BoOc$8S+t?IrI)8W0$r)b|I2v333;Z>505jaklM|MpbT!kNN{!;*RU0 z>sB9C7+Xc*V!!}AMHlUIsrtiOCwZSv_Fp9JEn8hxKTIQ$`$w}GuA-CPF*!)cPb4m# zN4sPTF!0fFD2?~>Q{+yebK9$eEqcwkySIv#EEI4KM^Z~^wqk}HLNi~Ae^`;6dT?j) zt*sFas~*)a?+T3b;9B6N^W##LOYd@3$|7~~w^PWZ)6I*!7c?noI?Zk!TJgEHY}fZ5 z*HZtnz9w%=$5HaaG>?4|tJMg}V~b6W$3b33@-VZm#Cltti3zAzl5&PTsB*1Hq~&GB{lX?H zka=1wRDqXeB`?+ zNd#uC*SL|uE`buW9YSFQ1R(rGuJA2uC`2R9#6%rJGVjeil*Dj_nnQ>C-u>L1Lj!%V z%oz;j*Y&U~{Bmq{fqByf5_~HVk+YmJ;%jmIufBA%C2;hcj#P|WY#ES5kDE{<5&Y?k`~Fs@nO3Uo zcs?(!%;)YsY|dP8*ESlB;||LFmKNSNd1$Q#`lRwqT!VdIzQuX@Ja~OYp|^d1zKW9H zicEvB;tQy^Sbi=Z_q;I#GfAFO1Cv0O&k#S%Rxb+q#M4D~{xo^z@~;2UbL^a4#z%Z> zgwS$lFf1P-F zh-KB=34VnV7FfkGiA26Z%HBMU? z?K4{P(DIm?(0;^k1=nH%mlMQ?oYD%M_NR)hYmWy&lk}OrwWO#?h{U=9p9^PJxNqJ2 zHiaG>%UibV;CYrG&uqO7&2CG@r?_}@+XBDX(2T=$jP z&Q&e$aQV9Y=s5Op(9JBkHO4Hf+5L)T@iN5NKS61^Qxc^4T-sFp?PWfrFI=M;QNnA3 z*|)Y+bdA&m?vgaMVpu=BVV0*ivbyn*y#}=R-+*~)wTh*SJJp%E=#s37UCcb7uw@rhLf;YKi8VDk_1e9}$u6=pe6=%Y5snpQ|I7GlQ2jaB z)fOVAKA;^BQdq9}l%~O=m_b@B&52kYIx;^|H+46+BXis|lBDN*%<(KjO-qyM!}WnSDkwa}VkkUl=}t7u@bq6_>ArraSu*J? zR)RVkFcRLkc}=|RfYB(!9a%JSz)70L7##Mje{W=D&#y_9`}+lbjQ6$is3*)Ln_iV< zD|YI*e8a#iJRWN^UHCH`5&jDSL7PUe?*oO0RKtTpjrolIhaWjD+8S5zbnuJ&4(q=< z7)RX@(AjdNea~+4BVjwxj;&J7$KPY@@u41wrC9bjGoSI@Y&%mI^k2PJb~XpLA9Z2Y zUzwPGKY2}g&9`ty#o1L6l4p{2y}sQZ0BfJ8-qxtObYDXE)!*_!g1F^CAG35pm)>x0Z$J&m1MEq+(zy8@v3;zHCCstVH7 zPYj;?p7u`m(25y=r@ftnE5D}@!*3CO=1w0kb{F2%E9XT#sOmN$?D)r10Chk3siQs6S{n#rV+a0Y zpnv4pTRS=co(48Hadd+SF}S%|oAI0Q@Nly8u=B8*TX31Mf0iQMMpDhi#L+cf-GSUb`U2kJD8P&PaOp22lMfB@UpOj z_}SS7f&az{H9k~S0{V*#&|hfy{~-Do&cBE{x~M}TA#7@F?cxYU00_(m0x(bUZzT23J= z`Tqw)|J!CaGySK+PHrxCza7TR6liW|{-m2L)bb#x#%8Ac7LG3V#!$E$J2^q?K8zty zF91Se${;KVZG>1u?97FKyQ7#LnHc(w=;II6k_mXH8Zy`cC&*p2x~a}<~IAo z8g|yge;oXl1OBV8|6;4(B&fSX%?AAEVj_$N_+NVb?{)OA6%<;HCdAdVM%@6xG8V4Wuf6%}j-2XWSHy5W{KvRpK2B(1>EC2Q z+@Qbe0&y{h)={}Ye**^qRI~PiRwnWRfU1s;(CXEv!ZZL-*1^IN@Moc{tRz5BP@GRf z4Eh3daB=f+@QQQra*Km`q`)8^E_M!HFqfF{|C{6WQ(>$t#MlM$8w4O|)*BZZ9i5c2 HG}`|Gq6}>+ literal 0 HcmV?d00001 diff --git a/docs/src/figures/mat.pdf b/docs/src/figures/mat.pdf new file mode 100644 index 0000000000000000000000000000000000000000..2c47520d668489fec85925f6aceb3d93a0ef77f8 GIT binary patch literal 6413 zcmd5=dpwleyLXEkNjsNvDKDi##>|)*%w$G#37Ha-TNpF%FwCu)A%k2>DkUU|P%4U$ z>qaTMDz_v;MYY{3r4XTu^N!Ts{r2yi-#MSp{^P8FX3evn+j^eoyWaIIZ3jzh1lj-# z)2@G;a2AFGPyoj_0A^~6v+eBEk3q9!O!q zLNeZYZZ6WVl*|027a&>mYBo*gL)RoATg82QiC5k~ zagT4QWM$j&?e#)GQ;FFNk!HP@&BC81XndQTd98Fb;wb8ksokoV_sU}WhTbQbt&GgR z91#-V^=dfe{INE-C!@jSkPZPKovxX8~!&`Xc~p%Ru+ z--L}0AT;1-PL3WF(%-xuV@_n4Dz=_?zb7R5XdXa4K(|}8;4%(9l{9B3Z(FtDq|6?L zLGdO1#hLa^ikYZ{i16!L!nkD5k0)EQ5>~7qC-M+4)nA`%FNq26cUC>HFSaa;)gn0W zcK@mA!tjQVkxW3jS=Pse!hWugfPDREtv_}{*q3kko0a(1 z`yHfB>@t7Pp^kL_)5w)F_P6((9Z?W!Khi&^pYTNR+UdNG?Mbq?*{nbD@TBF@$3QS?il5m0@@oJX)J9ik(T=~F)3OQ%hp^iKYG4oG%=-JVu&A8;BfCg zRx4M1vZK$MWm6TgUv=QlEUKaub9Gd8{O&q=dF8b)hFB6~GlEl5<>5VR&**b&L5ZV# zBu+f1NEF>BKN$|qPQ!SYUBA4nZECZDs&`Lkw4-X$o7$SiUk)ptc1a;iw)B0Sxb#@F zq-nKu#QEpBw)*gh{_A1!C6#*zpTx!WYVKndo~f?LXK-@TJuYpVbjh#&-L7PgrbnHl zmGnWJdi5)pV=G<2`UTB}+n?_;=hx>@yP%Ag4Ck$+dWJ+f7Hyq}hpGsB(UH@I}ULN7qmQ+yV6|IJ>)NqWH_wsJ?Dg%w8f zdQLnk=?;@OppRckK!SCn;&_hU{x`b4DaH*~j@NXyS-;lMDl{_PR82(i3vn)yRpK4t z*F%qOY*gP9cWz7G)gy;bd##JSw>q`#n1)ul-G&rnAB|Hz*}?C>?HAJ@b+0hT;1EC+AY5&4`-OV7LmU#l~Td<;S}|* z4MgU@6ITv4x9(vxJL^u|-mkn{Yv8!P-GEg_Q6Bn{GbPQ1V>mq4UvW}{5C`OV=#X}n zggU&`oG{fNAuT`F3_~=YI=8h`(a!Ztu|z=Hp|n$!ltrqZm!!9}*A4Q;ZBydpvfNG6 z!0j<-gH&wtm&!KtB8+HfoGv-qnCD&tu+;66r;)?wRCl*&`=4^MDoqpC2JfI%$rw1x zZALmb$<@PDf_G*gtuW)V4E9`3(ecix!Ig_u)9>KPt7d(7k(2wJkh_fHWB0u{mk`jD zYwiE3!Q;@Z@|tC5n_E6td_G&R`61v%x_U^$+XNZQuq@|j+qe<2->W@05D-bunnPVm zGViK|vcBoo+au3a)$6-x#uj-|HYlCOwtDyGt}!G%y3kDsNq=q3>AZ;WlJ@28P&?s$ z`*WCUO2=5s@VmG*zuhiAk6%+4AoT~*_v@I^=oz+mjkibVgXPxZsv4C8VH))bjh<7- zTn5D7l)tMvx=+{Ta(k|#R&ly}(#<9rt&ZvzvDULL)wL+uwYM%a1TJ+Fx#Hnd6g z9=uex&${?F`%r^9qTj@YahU`-$y-qnQOlH#R;gDE@vd4MOz6Q6v>YA0VY)deI3DH` z@$ANy0onC|CB>%}+~45Hp!QEf!t`p}z4U_4jG6YPrnH=gY}28!xUt zQT#S`=~lz!OV4EUY$Alockg}t+^)Uy=lL(AYAC1;3jF~Pf4*yM9{O;HlY^4873#k)ZM{>80{k+{BrzDAM8h7M0<6VRmipk$T&^KMRD0g27L!4+0OL3&H%u%ix z?ySqr-2HJZig+Z6e(Ub1CCj(8MmZe{Z>MGO8be_RH@S~IKo(2~=Z8fqH6s-3_K(2S zCgW3PIxj~rxn}$&eWAu(+b!mH&s@{2w>^9k3Z{Haf9{sk&M8<{d*wv|knW+Qm>T3= z+)Q%4Q5zvAcy;Y%szPG@F83rDuVQE-nc+}LzkW~8t8+}rP$M^Fpz(5bOh3Z$?Op|C zAZ#E|wJ@9_o`MW_{Yukmy~(|_KfXS$%cLtUGrh!QcbvS zyx;NZ*eg4%kYU-a=}JA(YD+TGw2Gyp3T_+0mX*WX+Mc?R!fTvs9Q^O7PQ5jBGjDve z>RL#;({m5+xat<`hfgh!;~e~ZsMm4~8rWXo^_=aq{@EWK-uAw{tlH;x#KbPe7lHJf z+%}zZ?UPlq?1S8!^6RGKYV?Qnwy+%C*~`bu z8rEt^+sAn8D4q=3@nW##35S3Be%L^9mnK0hAk~eoTv_9KjhginK`$En@~cw7rDKVb5BdHoiW$rOby{A0E9n2bb-m ziA(4F!Jm$5lB{xYm9<(VyI#J>-0b+N-k4G8r;mt=>pX@M!LXyWtieRN8o$rj>54E8 zYA}D-jz|TB(p9>4;Kxo+_7eT4YEm65{^*EYeJ0gMzOTHuLB;Vv$dv=qi`Ea_+`o8C zxW!3t<<==Y=;mH%mEo!9Hme=FF?OSZ(UsMwZ$-nyt$gIfRHThy!}~L2+OFJf94BIJ zIs=ru)gy?{CDSTg>k4O=5tS^mt5tiF8ei5WUlSo{v83Amz?xN7oB!xIXDT=`nBA&Q zekw1dC*9&Fznjr+9BNPtCASa(4+=j3o8Eb2YIitsA474JWv1v2o(6K99et zEjapE!R__V@nCLWdPV%!>ig?8y9VH-=ZnAcmLxhRf3hpEva3v1OD)-I96!?Eym#bd z^MREMR=~=|>|DD-US*D3R8M>vZtpI3|z+Y_}|El26jlhsw9moKhM4GoG+Shj{ zk&ebNk(lFO+z{>Z$Epd;o?*Q^FI4mxQEy@oXhO~uZ%HwiGT|Xe;1%a^n zmjI-Ng{`x*H6`TxTL28wFM1Pz63Y^$aWjB`gdy`mEH+<0Y+B4m= z|9{8SY@L+Y22m;eTK;oMo)7q3t^Y_r4EZDeu4Mb4`^6aHes0**ECk9%ASeKQK|cnY z2!DUI77j3IMEDLI8Aaw=fc^}dP#)+My2Y6q8c4;{;ASM4sTl>r9f$~_a1r#mhZsHp z@~K2E3rBlvEfGTqA%w+cK~)HZu$XKw=2!%QM7S&20T6IF%s_?! zFf_m$pb=<{K0xPDSm16BFAzW*7{MWxJQ|(g2!ZgsLg*h6?k^B<2}tDb-MbBTV+=Sv zKO`ED$0JdONJB#e#DU-o*#b%kg3VtI*_(5O;rdgI%{Q3y$$-jXAsMbnv;hkF!;mGk z9Ug-#U~t%g$S}p16C@zQgMt_|g0Be*ii3$U0;J=75ok0BB2dskpz$~g9fPHy4AHb7 z=D2@x^A~&20%Q;a#Q~ZTpi_nxLmOPk9C9=HEvF zO6YtTOd%cunL}gHg_aQ3NQNjh7JyovhW_ca)2Kfk<_7VYbAX{yksuQk z`NoGbkA^&^Q3-Sok41sXox=i}c_Xv06hh(DmN^Y4(Z3w@(Rz<~lTf{ubm70gu99g1ot_nG+)vgkk_79XTB6?jgG4KsP`(odbx{3EdNIEddWxtf9F% z+QQ1*#K^*ABMybNGBL5np>cQL3t!8+e7FA_MQBrV3;Lzg|G3-f5x& zKYpl|5y1a1ofY&TAP`pXzZW=(8H*Hnlh{pG*GB(m6Xb-V4bGBl0 za3{wKVj_Gcz zVg9Z#Zjclzb%+;Iz6U6W0@j2qkt!vKBM~IUiV-B|yo4QV^wclL`g`q6c|7mq`BTLh#)zx zeg``g>X2_U+4+Zk0&sqW8<`_Av3;@rxpcd5s{g&8CjkEp;sU)b%;=L(Lfjo7Dp3^% z23DK6+eCGSH7SYN#43cSTj+r*g=&j=mwy;40Cxe!W}oSCxMW~<{)ny;pT+{7gmpX~ z`S~UR35UlrYB@-MWN0#0?}$#fQSF9~&VW~-L71!o!c21L-aV`9@VC;nCcE3LR_mpS zMP0gSU3wySCueYsfm!`Dnu$d{sVSc2+8T#mA91l*c2g8sPyt_vKg2 z(x}u@Q;FLK2vmi8U3L&~eU{(5PyzDbaqUQDBuLUQzbkA&Df)j`(QZJo5uJkz>&2d5 z=8?xnq#rW+livQlm!U;oOVUXH0>qhJA>+v3S1&r?-hX|aD>u;bH{4jrJRTDH5y=cS zDNxrs5@>Lx25k!&mAVBx2s(AcH`i_ios!jasT@6W9QlC>ktu2of>5yj8~y_+uhmNd zxDO~wrcN5)2+N=g%!SCO{p@dbGV(#1tn|0~yuv@Ah-l){%z*Jzb zLG6E5W}dI_yAKx6GYI0k>|AeH0YQtOqhHMFhe8Y-Jt0r}U;ekaMia)U# z{;JJlKYO)}IX~xeZk2bw3B3sTmw$vP?1qo)UIztE2IFH6RJClm<8OSMSungID<1XE zNuU6gT>*)l!EV^GTaXfS4NdI`4(xu%s8{S3Riu@Y-W)4dz#RSCMs6%qiOXwKLPN~p z%KSP!_m*$Qx4{ppy;gxbrCn7EWs2J=0&wtupNR#0rVDxycRIyZ<>o*uaji}@_T5=; zk0N^OrQBw@NeIq{$5i5C1cUWBt(qJARoBWTyV^rt?XhhIvg6?7uVWYK*98%=NfZ3I?hgj)*XCeo*)0a{#+>m>r~j zOQts?x?b@iL0rDbj~xDB_;31;D0#Q*y|IFN6hH9pNI5VY|CA!zaJ(O0kvD#%$UA3( zHDv+8^X)1otvX5oE;`&xfL0C?C-kq8H z=8>7_A+%#yT#Fj(ED@mxe8_&U`fn?($o_kP`)!VjWS%%k0Bul(Y~6G?{f5^OUqgU# zB^!xRa2OR+HamiQJnB>!jRN6Ia`d2aZKSAWP;ecB6{ftZ^{`aY9!IH%Mv1t5#mlhd zJ)_}|)mqH{=?7C5>=davK1}VXBrq^TNkDWITjC8=baId!X5`p7K5jTD-7yZJg?VkoCB*b z(9>v9d~^B@=W?!F%1*9#DLtyp{Y~cYZ|LAF1aMOFdmK7X6E>!KaI{(Uk>go9FsdMx zBaqlGH7ZGUOnTMNwLQKLZ+MVcWA=d|@Np!geFf-3J8`{#UMgT@1fe(omJN6mJ7n_4 z3``DO+VO+!@=`}&zL#_`MFSoIDdkEx>~i=7xQ(qQ-lr}QPQQ;^Lk=(h*iBGr3*6KW<-51Ya((Z$gcZ}br;a**eyvCx zRuX_`{M%x+?7%ujKr&xQ+<;sy)(wfu@Y&iblJ{IRPI!rkT&xT>ZbODdwW&DWS{FDM z4uPF&RzDl5m;n}H7Q~|dMgG%fn6-f{s7H0>l?L5aa{K0ay77z!?OmwR)J7INFM0pj z!lyzmc(*BQc#Hu~VT??riB0c|UsPgVFQgtf3}KtmG3n$xCidCE9W%EPdx0Drh710J z#=iy7JsdA&WEhKoBh9H_BCs9O8J<6YVYkB3RUKCFHz7A-cEhZlg5ubnv^fjStpjG8 z1SuGVrcIkI7H?YQec;lJ}#fOL5VTYm@Chuw4d*3_o~VR<&! z#Rpp(fx{a{C%GE*6gPB=oii3L%V`H0>uLSGb2OMY{lBc#9+y^)2fa`QL1gRNtWIjZ z)Ue`fa?(v|fx4H=!1J(uZu4NE)M3?-)TJNm7knZ5OWLhUy?XFeV5`L7sn zY!)&KU-OWBC@-+BoHmE7OQo#xbc=(H(r@c^fu{zt9ORaJ%JQT2Tc04l_v)|4NS$vfD>bCI8Tei&T#_2pGSx*y*T`y zo5-PDnesy0;A?XBu%DFX1fni25P!YlY8u*CBB1aj5hhaC%9^btzBU)7%p&Vq9}nl` zf{=rjT>C#b1>zbuf{ctYWCSg^;&Abwn(aXlv45E|;v^@Z1qdS2PTrh_y9aush0lAd zZytRG%lPN))Et~2z3cSd(m;mu7s?+~FMfCcY3iNg+c97RK`{q?;~^_g$#xK2_);8% z;^!%4#&?Bqxr@Z~H7GY|glbOmKxWzTmv+G9&KRn#wM(4P$PmG&4_@|JP6ll(a1=ca z!%UsB>0R8y7=cGBi`^~4@Q`nuM0moO zoF#ujT8xb-OK}i2TE$DbO;*=<)X>c*(3jVCKXJ0~YC(x|2`Eyxv#(=kPGqh*f>I1_ zNu(Aq?S^9=UvT8_Nn*ujol|HnL}zO~@F>E2Opn+XP5dS~O)MC+&_>bst5uDC9 zYDXvMW6Lw|(@3}ayziGD$NE`s&U$Gan<~$=I|$f5dXA0K9=uo+tdqvcA+Q@RNC$VC z$zn$>N0^{lw;V~ccmz-SWUK7Ka!;xur0oI$>5Dr?zRcgtNLrLuHHE_}S~4JF>PI*s zg`d;T8KD;fpcfQDFKcy);WXLL*goc~Hz?!Lx8>1a zv?zb!1)EonDc_qms|vPPQD+S1xp;HohYbB-N;6MHQYb%q~2EVcOX#h=MTp&(L@X8^B=T#ys-wX zv4oZm6PZ1^7Y|IUXF4rQ`ot@MMrs-4hM&NccOq`kj>eBq(tdM0(wJ$5sZ69k1U z1@w??QFxxf>j*F44&A-){Bg(*Ia-)WIz<>tOXU@OWyTzyLOHFGPYVlYxGgb|WsQ%L z#~3-S_!f0Hg2!Z5RS286FZ^VK%bMpy^~@3U>&0uc=up@1UKEH=Cg-WAf5qC3z*lup3l)jX$%Ng#VSWSVIsV3{*lH<*CGU)9A++5{;CreP`?vtD~&r$qNQ|jy2*C z_ljxM@xDgj{+}}z)0WMOopeXoH19mpP%$A-4}ui|AEIh_;R(m=5d@f@=N)=3!vmZb z;k7yy9o3`*v1o9({kXtxNnO-8oQ>~=j8UQ(o$Jg7sqLh6Zn#)~9P5B?0Vt53cicY% zjGT3zO>ovv9!O1m6Aa)FGXc+)Q3kbkb90S*DhB`pO-!%WPf~t&@&__5erRrXv~9I$ z%B361d;ZK~O=>-+Zn*b}<>Na3gruqlfoZfABF^yuKbxaa4#HieVwrihmW4%E&F?2e z0K-rytb~qNVb}S*Q#MIdqiub6Ca%8|o__C_Cy=T<@W7cjptMWOHM4hbsa*ycxdCBL zZcI>&gx2jd;Tan?xBbo@TrZXk$o5qBKm3hKisDkTlpBy zY|6HJZcXq7c4kgYE1-2~o*B%De{ok!1rizT;QV#9eSoy%vdWY$nrZd4M2{PQBB_Tv z*nQlEP}c8`Kf&_y0od=H<4EWe60QF5V>jr@%W_Tfmrg+3QgcROLvr^Ayy&@3OAOSG z>Um*wGO|ULa%ZI#Ts6-(Y|h!0SG%=YOXtMoL);&pzPN1qc8PbcPfcNVY5-a6*K!V= zN5vUdfq2?fcx5(&KUnJVR8~$4VB^MuXSzPTmKM<(@z>gHnS2oGjRF;v8gHM=zv2M0 z30b`NITm#I57V|Bc}hRl{Y>TL!2J0IQ!dsrd1FbfwmOB``nMqAq_qHxI5x-{WW&*~ z&-ajtLn+fxkzpEL20l$MKD(bM>lYd^#III%l2bps>8Hc4RJ4a6E=0-xj{p_O<5l53FVK&5N^y`1)Mc8Xg=ab>DzlsK8$Y%|o^0UA}p z$ZU=0A4kYMbNiXKhApj6oA(>RO8K!0}?g+YF!pYqh&9OQf3i7*wWCnVer%VD=hS(1e&ghP=L?>Eg0Xbe}Sb$xd`cp67E+HWqM$mVpiHI2Ij zJq_KUIFz!=O@A{%Yn31CI{03HZ)R1c#)_xzWha35zU0U0p=?nxU8THDz;h>qB7XS& zKFCXxjhv?$sE>Ry^c#+D#XYw1l^?(lnCpr1CEnIb=z#*J$d2#7qD z8nLY2h$Q}G@(itgua$>8lJZcl*`5RuQvu+n$Y-F*C$d=lS*h3e%Ml&-*8z(l?pesz z>Co=vo}byL+k`TXbX)c6_mu4|*n36YwC{;YbnbE%HG2zyZJtgHvP+}Dn3COiC?6}t z32*$Kd<(RB@X)%1@9thlc}4)il^(NM^y8qbPpJ{$?oxoDXe8v_<};1PmtNPK8Qk?9 zr~O#@kEVcqZNgRq`-j%aQ=Bf%B=k151#YdkRA?j(M+w5!{;j17K$bWnfx;h+c;81c ziW(#2%1-*JXY(Pz-Mtlcl|*%;%hKcCsmMHidYVjEHDF#M6nTYKLxXyTG{w0T1PK6x z);DHN$T~0slLF}kacxV6mQyU;4yCaM1mQ&d8}4M{+CQa=XVB5Ek;1dgdrB`H{d~{DvU^Uv>!rISw4yXPyg+s$$m=ck zZrI9WbGCg0>WlDNzi{c2zOdo1LYkUWxvjOxp-4##gUs@h#z?~>c8Qsr#r+>D`YIMr zWKY_=C6k1~(N@vkdUjIK-26j2=!Fc(7DNX7(m}}HbLV$frn=$pxOah>&_fT9|N4gt z$8{KCJ|_#`n{CW7U>&;{7^2*`pFhVEzB)GL()*TT5SAMr_07EzM=v}|&7|OoAPEjl zj~HujQY-S~wm8HCdNo^SfGFz~%)&NRncM{W!x7saH9J?Y%lEDC&3C<)mIHGu>s|Nw zSd415T;Et3HHU3B8w{*RQ@@-LPlsezW+SYY&&MG8PW!h%9n;U6PtZQf^!Wu5;<5iJLNvv!$q|DwJc-bg z>O>}~@E1y@qAQt{AwZRJ=Oa2dW)YO1ls7(7s?FZ8afjZUL9muQ;rvW*U3s>P;{_mz zUYy@!VV2eAT~KAk?F&J;&futYLAqWA#<`dm5c-c#P#y~6Cxwk+jRq#9gL9}%1V|LWzd6kb^H8P>~;KZ$e6Gti0gAMHRR8 zpclr#7M{=-sza=WQ2un8O@pKnupK>mI%P@6f@)Z?BzqCqyoy z9#YjK6?J+?3c`uums28JSq$>F?=bfd-gA(Qu0d%S<|rj4pHP9_T2M*Vd%YWd{8DW< z6DO3-sK@_&@%Ae`a9~>pTf)Kca#L{iyb1qW8>)#?@Nqe{dt&+8k%EdT#D&j0zblG#-eh{Sh(# zVoKO?x3GPaQPVpckg8G6z#l()AgI@Xij7# z`anVwf5V--eaC8Cg*fCSU~IYBvtQ5R&TJ{fG9;8=Eckin!suqN<5rDYd^W+J-$ zpKt^mb~lxXum98@q8dc67ePIGENsqHu;^x`fI=(=I;xj6Imb9N?e|QWp-3xrCw0cF zcTv3JgvOJ`T3RF-pWeFS!Tnl+1i%AGJTlQ$86Yu&X}0LBI8&oUKnWVl6HR`3qL})d zPvq+K1F(Hz4)yhSyo`t~zEqJXcQ=oSHwQsILShn|60sNOVgi)9@dve5!hlHt~s%iJ+)tHmPZs1s`mTH6Y%QZ83O$+|y3KRk+>T$xN^F9}=!N}Rx1f`sm^SbMjnRTOXeu9tm~ZIB&)9))>uoqhg`NAnufA0sC2R$~w%lC#j*_Z>l!4silhujm2&QEg^=N&w@$e1Oyt*KpOQ{KRJ85KWhGcHG=Sakls7hvX7%CYaR!|0E| z_CiY(j<1~gjm~`3T!3nc3xBXjomnrdf%bl#`phv;Mj?tvXgKlc9UYT#>>^yJRBofIKd(l{|W#MDYVjY<~YpdL8NlzSZ;O-X# zMCNaSx%s7Bu-lrCOTM@N^fTr?8Z`$upswn#=dYNUO7w1H4U}mMBfn*nHY(E#)hxTd zfsVxUP#NFBNieMsSHH&?En?Gfmw{R{mJW;`*QXn|s^6;Zy$yQ$FA-=;BDPIkiv(>d zW80--0a8IZAll~24>J6cd63ujJ(yk=oBRe|yNIhVIgjDxP>8F-OE{Z59O3jFawp&4p;)D=`zi&87zmwcPHO}XY0U$2A2wvUKQk5O&U^`N01%Jb`J_#q zbfB`8xE?&+QIz)6SL$`XC?9bRp>c5e@*BK`%d%qR2nbBdhaWLjllqp*EfyG+flT|ME zd~}Ul&)BF5kKVOJhL)V9fc4cIa$c(wPRyX5x5FI%BCtI!jZlI1kq>wg7cIZ>XIx{o z)=I|k3FA+F{-stOozmc;x2=DflqG^FkA1yfu|1ACYe8ydrGxP=GFJ*U#Q^Q94#)%` z!0dr+amWyqB2x$1u+D)yExrgsp-4q|otQhLcSt+P>r~ChAnSLr z(n11huePlKpNXUyeGzu`Q*h68xBS>vVu^2Cuvvzai-2Q|3x=`}?zUBTo4dcULyB{B z#pE=cF2~LvPL4q@swU!37CAdX@ z>Lxy?Exz5bG2ou~=T9k@)43#?#mFTHGokjcLb~^durOpCEkVMAK zLxGjgF7L+0DnKjm;%?l08u(aA#c0M@s@Rk`O<(?$_q zGMnYWB0-8q*O)#vhtuad2<4YKBWESQ2+xv|zO;r|f9h;;I?@sjn?*f(9r47t%<<6~ z8-pZat$uMieTK5P;N<@JHkH$K*~xP~y31%Pc4OoT0q8(x+Gnhj06jcq1Mss5U`RB& zirN0y=+?)eq#~PAQHeYmC#I3V?g@!R#CcVG$x!1>?>fwJ9XYm~`o3LS-iE5W=ETU$ z=LZUrMMVjDx6b&kJ$oNf$&1t41wp^%YFdNo*tNgs9}@q=1v9nSRsKq0BPV$CdbZGb zr_`r8UAw{Ks&9}Ly(64ES?Lc~*%m)FQ7>k%Abu-1lWQhvEolR zh;?;!OuxIQAz zuBIoS!*%{+El+II>(+kDS0*u}m;~zzA6qhV>*Yk2M=WbQKfdlTkqrfV2oGTLV@rOd zALz2LfoH{Gic_ljIk`43kkA0NFWYmqQM*Gs_pHr+uzXo)ec~CF6x|g9DWVp3Ih^C{ z9T=eDnKBCtMebCBUT{WpuC}=6y!kKMVW83=Q>vA$dvxb#qt!2v*e|Vi&8+wA2LXe< zo(E140Sa=w=)Jg@o(I8nO@-}rrjY{>q4pRLe_;z<1M|w$Dal8kSPNrT0`yoi!5pbK zt6hTf`uZd=v3mzK@7F;FAU_8FB&UnEk%icGIS^Lhh#-pf*g{b264O>zcG%L?iuie@ zy?lA4$E-h1eBvt_LAAwD547Ie0XA9lEZw3kQnELhZ1Cu5bLrqX-fcAtJPxl^FEmYU zZGutjRYMHf{OW41*Mx-3NhWL1@S)Mo1E3@j!@Q|TP~HjL?2Jg?*a!Xu++N$q=^d|E zNM`qVQX6&=@L$zoT3|?rU3VXb&aRyardX`4QBXX;Z!-?hj4bOwWneG)+l1byv+|0a zz?_5V+xPD@{L$D-N=h0e&IIUd#6vEABtWsaU5n}LRbo?0fDZa6Aop7If7jrEH`#OELEpx)W_fq@*-&vPtnSa^;9mOuA`VBt&zB>oT+3o_aNB*?M!A(RlE5!D;Qsm1ALWjT*;ERpmrR z;dl9J>9O#u@aSbb_#ipiT+flVk4FRxapO#7H)1C6MtQISYIBp_*2r2VU$ ziEpGvy6O^JF>7WAvAa9Z+ArSvM-Q?8E6d`QOZ^p%&?9*pE*w{<@ z&T`w&j<`fbK(nhNy1FA{WAg62%ki#SE-svkii$XRG-R5$Z{U8S7Ny1>Y$kv&%9kb7 zRHyKA>if-kO)yjMnZM`CCDK)%=po0l>`6r*^cKDeOK(?Y`~gQoy3Y-C)w%rO(aLZZ z!b*yLJ4#j2Al@wJ#ouXNGya-^UqMMpT2nLeBh1GWR3m}d0nJ&QH3sxtq@YvqEU?d> zu8+cMVM7~2e&CFa9l!IA{{(9E0flcqwMgGo>6&4Y&-#3~L zh5or0W-C+Bhev(z==^vZGr;WBN*_>a%k={qLqS)Tn3RO{{oTz8jIxsp2zoSLtG}Y4 zY?H}g?I`lG-;Y|M0!LBHSfiCu_-Zg78;=FekRT?rJz%8=3WgLb{1T-GTms zSdbg{^#gT+uQXe$XKW`yJ7QR?*6%$YHn|CpkG;o zW}%zb6nfla+E=>x18Ye>pxl7jhtAB7KVvQ44n8Tn$`GCJ=??zW;zV{w>n^+cWb`Lj zr*@>oB{KE5_Akoeo+r`yxrTcN5|Z9~J>QxTW8Ar_XE4W(SMdl8=D|l4*{BvrA?n;u zY{QT=W)=M@o@fB@=O#x(%E6T=ou~PA8H=vYzZ2uO!}m=+$WoceLJf?px*NGoq`iN5 z0S{epG5LlrAJ4ocghq%DqsnDHBJwC??HlL?%Dwc-2})ZB*5}X9&oO-t_1&tgK|pmZ zW;#$a)C}F|Rs>qbuE-^>xz-z)q|!t4rtx}m20;{3I=@p`--dyK@*Pt@QGYaX@Tv3q zat04bn#iKva~ci?_=-*FD$?3f{=kt(tP;!@CH(0sCJ)VDPhF9R@6$8;3zN*#A(X(4 zGx?ceY}uvbR&<}uTy{sC8FdW}TYI^o?(S|5k3-#cBXrOtkwuB6&AA-UAEh@iZ`2^9 zoCP@Lvn-}w!sw;CgCV=I%2}8}nCYOippwR`G5ytB1{e38=*c%FUAmkq&{PNC4 zVBWQ(A}e3hstrGYb|)@P{o9C@2tR32s|>h5XRM-T#oO^a_C~pcOMixHF*2Qz3{La-?7eo=5;KW2PWI9qrP*AY2tPDSi z2V^YDOH|*p|EI%CPXJB>MwN%M`0eNr#qc0#vI69H-Km$R8;A9%ki09&gm#q+bt8~k zx)&nx=o8bx@ez<%>2NH@q;P37liOH;2_hSzCfa*~wcnO5|r^^O1f740=XK4TIg1h!oky=esi2_pD|5Xgc&j{i96 z{igDi=QXPdT1xcP9*>vfo@{rKM(=&JNDE3>*1WRr$nda~v~)0_O#A?nNM1&hIcPK~ zz2EoGUVrdEOYk1Zhri7KS(${Q@xY+S`^%R{mwQpT&*9^d2UhUUvtApnG&L?0qG0SXw?+>H&_BNS=4{bzC{n@xNRS6}c|%x`jGqW&uG$Ly&|}pY zHBWSZv4Q>W`h=Zn9)w{LO`b^h&OJ7lIzsq&P&PGxYN@Ws3ICqi!$U2RiwEUi7CMiZ zJkwx_gtZSC>*+^u^pxM6!wMw9Z@r1N@%GBB+m>gHO@y+;aqvr*#Mo2YG-d+^E}tc6 zaBwg?>@W|gIk=RRacgU9`LwJgDE=8eL~s7m*`swaF}?>1EMH@s5*sC;g`6z@6f9%0X)pCRj0R6xUT!%MT~ZXD<@k+-(fqk?80@g%)=#>=Me?qjG*J!)5czo=yvwD zokQXemqPE`&0p+@28#MzMym^BId%lZIO8FUw-KNxzgMTmb)f^`1^+5r`@M#$YS8eo zB0zsL11TYig0mOV3*h&)fO84a_&6dJ;;xMt_F}gNp#m%H^-2Pn#?F{w7$-$`v0u>; z#k_1fD*4RJo!+;qX4ss|9L8K%e9?w`k<`pP42~u~M?^ICn{uqpRaq!*%L~FEEqB!L z0P$fHYJw(llVo?S(b2iTX|V2{kd;NCVPp8^Cyh^0ngtAxRMp7p)eHg4kGF=bUk6hX zU&U0xBc992ggr3ia@AblOb`H_jAEd*4~X0vh4DZAX7;CxpfXB2pQYJ>bsw{&>sLSr zX~R~Qx_;sAWc4BIF8I_-ssi=F(=Oa3bC$XFZ`X9U;1(t(<{%7~Oc_xd`P*7rPA=>} zS>E!8_Y?rC+KvHgvp;LvZXl;AT`OWU{o&EQ(Pkg%&;C?8S*6)B?yrK(*E9LQ$)awK zu`F&PWWKY1kUSB~2(23(eXm=#B21sjz|6uTxNz?ks)b3m%7Qx00>pm;J%hpty^et!RaW?L`H zztaDyY@W0;H(^sj9(!QP1`8DKTh2R8*Lu^0?X9yo4Fmbc!ZG~7`Okf4qp!5i-OvPnhW~5BRmL+Z?H@Z3P9K_#4c~Gf;ZhW^<3JFCqJFU!|yAJmmL!nH+!eh%w2w5|2lRj$7BV7OD z;?!mLv&*;o#fv@X=UTOHeE{YT4-W&HRHpOIshQi+0{+tz*2NeixD~Xfe_OPby*PVZ zg%Q-Vnz;m3^gD%#;cpa{{Axs74Xi_{u?Yy9>VPa>8q>)Cb=P2;)h+W75!|#fD>Hd! zSPGa$9f}mH*I8*L3kZm{z!ZSPkYD0bTGZ)hDBHG#K~@|IF|+fRcVAL~NeG<#FYehZI3ppscq;;+)C-cEV-_IeZ|g=~vM!Wsl8r^5! zgW~jw04volQpHR%;wym@57p#ygCDJ*HZjS59Q}@$Bb@d8dF5%>j-v}dt=#+C*Nt>60VUn)CC7BV3om<~y28tA?2*tvYt5^X*?wm;DpZfVVZT z#h>4{4>n)*cMmtq({Q9Gp-{hC)dK2yYpckA^_=jBw5z&ow{C4>EWM7cz^!2PY57Lgtc& zv9G}maGEX{+|=}mv_YcPz}h;0LRNxE6Ey_*nbW-4odwTZED?05RUlJfW~T7G^)Uf%8tCh1K@lYQ1_xb;+`Q4MNPkLdJ9 zj{ZO70Hp4YZ#ZWG4XkF4&TRl0yqV2Du!$T0+3PYAvV~$dklZdp{8=*A6M*KxFj3CO zve}V(KuffovGRb^fEISE3gVBli-La&>sm9y>^10hk8!@OgK59g=;`YCdV24)@mtoh z4e~-AxgR+HBqlo$vbKMK{L&C8Wjb+V0LvI zr)Fu6bGUgxm;VXiolG;g1SlkxoA3qTI$)fYEWe#d6T{4?#T8o22zhgpGq>bq2DVaH z3zBP&sq`wpp_A@~A2#lQLRr-EBdmf{q?TDO^N1t%70CH=SB-~CiwtsL8~${G992ZMgkxTKWR1}!@RtWlb}H%1UCZNH1~f5!MY?}5oH%+t zYT);C<5OT7g6tD3D=X&s`1qfXWbFq1U@wvwbdg)KyZj{#m^W9DuS}g)R6|aEz6FLr zWCVufdxzAGK-_q0$4`Tx(;gjPKl7fuVkW#5fBT9`4_H%Vs@pqpKrGNutu%IF9_{Xy z($xGz&UpTefs6<+MA!O(A&^XfA!-2}L@TZ!tP(;?a>kwl%BLbgB&4R{rS$c@hrz@F zIU?e#>se4O(%1(n(`-)`{Qci6cWX?$4`d(Sd9*CguQdNc6#jn-NHj<*VY3nNFVG9X z@xX>G#lL;+XiBy9A+vbJB^X}!b;{jZHLdf}D1omjP6#N&?kR)}TuD|Z8UkO_L1ztg zg*^^aCn8haocYo*iHV8-vm`2z5zJ6P#pGOMIKjXu?bg3A{~lGMeIY9=%fQO2#njO&h@J4GpzLFI zO0_RYp!Ce9#^s;V$<%B6Y*&n_d;9PK2$vj>*D1>ZkMOzltI8flO)yxf)XyJH>~ZZ< zo&x+?2q>4RtIQ%IH~~Hb?pJmCVG*cj-gF!a1&t-z1ss7KlJLnrp6@%7n1^(rWk*i9v-)sd=OiXYDJa~<-o&e`HgR#r5WR>-R zGl6JpdTK}Cv9YMQrO%3^8ySEVDKS}&C{{c(Cu`4XY{be6Jm+_8?Q=2vStEblIiG7} z;#iwoR94m(iC4>jZ3}Q5H>Z7V%~VESo;fMyPEi^e zIQ0M=S&-V?H^MSDt|GU}YkyRR0Y%g0Qh~GH;eB7`vbuO^5lJ>pGF- zgw*HHwfwxcKeo0k$Nj88gUF|8TLPZ|AUA#iBBMCZ7%b~vbUIX{jyOuo5NkGI$nE`@u4eJ)Q{SLb)Gegy+eKCSE- zf)~2Qq3;jxCBTl~!O?#Rz1Rbh(LpnLi&35++aSN^m<_4jqlNm_3AJxJB6y4|oc zM}NQuv;xW5c5(rVgjZ!NoxBkl|81m#Bq%cy&^7_Yd^VjX4RFC>El!VUdjpiAS*v>* zokSKy0?t$($4f=(TpML>LgYYCSfLpZ$60S;P2u^oPr8=^dF}KskUdqrcGVVQ;}{HD zH(sruO$GPe>?k#3mh`GR^`G}`{J@NlkKbLW1I|kKF8)1UCIy4L1)tnOI|{Lzd{c=? zL>BX^y1x(Fi)o{YR*MNe8DNeI@7{e@k>|fs$PEet zy`>WpI~LgX?$?bU6*-ns)|;0CH7R4??bYd4S|*^mMDU}d&1{z6%c0rZcZ_u&XVjv~ zDJ=!gHgs=-!QkD=g19Z7Uf{qfy>yAY;SrXESP(vttbilbNYR{#udg=A0*&9WH5OI? zLr!nIg5hJ0&7hhAUo`N)?*)(scbeCmoqbu=RM<-Qa2)9Zw1<$o-d>rIkdS}nQ(0Fx zwPt|$160WKp2cohg`gz0$eg$@n5q7*m z;OoM2Ac;XnM8;61#yz70e&dv{W0_(HeE?PxC}qEWae{jGkzR_@oUWqS3X37M-ky z5@BFKZS%cOhK3cE=~Nh_Vqp;^je;^0$ObHc1A8EgQshntwNK~UL#Z6`Dmdihfs`Vo z8M(dpBx8X-G^GRs@SI=UeC1^v4LV>NBL-n>}T;}g$WIGwQSlh9ttg$dXp3O z^mxzwTZ0{3DdS2Om!mQv70oFOun>ZNk9)J?Xr4=u{0CJT-Vu`@Z^-EXTpd+~2dW!7tqfffO{fUma;u=6#sYW_Xb%2!0UN0+ zL{NMXgOmdy?cfyXhRH)R@Lk3zgP1wlL4#WD-ns#?Ccgr2MoLQRPl*mYKn$>Oa&p3s z!$wV`BO3%S%m5d1B&Rl(X=b6vZ_Hqq8HT<^mjA2itHY{%p0^JnptN*%cS?hBC~1+D zZjkQgD0S#mKuQTgIwYhUY3VL$1nI7K`Fwvb*Tuyj=VABR*`1ktX71UOM?vi4iH$)v zs-o*>J1dx+JfWyM!Q+2p$r?+E>qIeEZY+6u6E#iF)Dp$BX}{1W(ldhs!U{lO11w=7 zv2C*wmwc|$PmTpOI^`_^KMyY>7Fe53dW`yPMLxYPe=w*Y`Jq~n`_gz{U0NCuR8Qta5ebToPv3z`00AcE{y){vI(_0s z5jD%!&)yv!9Sv*y(n`n`j0>kDo}rktt5{K{ZL=8yCnNd9S!R4q>!VyW#StkhTNm5& z?+k#QSGT-2=XM}Kbs*)VSDKD@g%yMH>sJ6d4UKR3+Gj?q++8^d&JlYrlReKIlbn=+saAYh~hJ>T8Y|eMZ(YIV3;|N&vN%(t7 z9;?22`c6?%@%LYf11p^6$HO}jABW&A>q@=@lxXo8f8bk*80~?>g|R3pP3C}QvIFBM zrNc+rk$o23C+M}mu%XVqn8wejh%T?Lz8bd%9wXt_!QpVJ6i!2MZ3RycDM5wwHm+nG`rqq73;^^F84|J{3+SbDHl50DQ_yO@i>IYEi8jDdu_og38imXMaQ#mX<&zgrdPXr#zzIP<>SFg!*| zc(^*CSO#WPnA|$rbVaIx_*?+&G#WOMW9U27LubDkwx0}uT|VLt!Ajri=?XEK%Qe@6 z%goswZ?Gurab#Ild*544Oh%*x(z_Dx4(T>{7grGC;=XrW?22l_Imn%~8*L2=%0HHm zt_mJX;1(=rgtFsgl-7 z&Y~}KVjG>VUU@Gp1B8Hx-=&0%jLbZwhyvRpG4}y)0-D>&czU8CR*`mVjHey4DXdFb zG)DB)3gM3kSodw~Id`ptFei|y)x(ni-ja{Bu9C7cQ~_;&X>%wsG4Ttid*vfS2IzY= z0MF==9E>SduYP^YKt#*^TMaBqzr4lc$t}R-&;NgI4VowHwRrlZL_556*!Fiy%HN6a zN!8X5YCJu=mYYW?6UpM{A`qqV;Ke_kk69hoE*g$WkQ+(j#2s-9a5^(07uBZ$wDkxO zQfRlHhgT315QNs(3q7V(^YZeT_dnh;!Qw!@_Zb)o#`6imNIho%lp58wqq&^6iT^ocKtw{y#AKvR1RlzI-XU;^ z_{PwR(g^@bP;{w*WZ_S8{se^y)jYhAI-mW0wvgWH>i(luzZ-@EnzEVmc^C5AY6acD zi;XWne0XG5(~A3@L!F%~3N6Aqe5!LnCPj}50mtZw!#nW#>Akz?(A$NZHa&POZWcXv z&@iJe2gR}KXSIgA9)S*)clxD-EG#Twfe)KoTU+);a|6+p>}fttAPI*pQp3JL2O*zA z+?y1A?zQ9x{7Tn*)q?glH8q^6M+or^lj53u)lHRfdUYK89)If3pFcQwcyz+JS9QQi zb&jYBq30t$Yy9Uy4}=`ebIt^t%$Ymw9RW_Ia2=ZWeWRF>j{DaUy=k4>V0N`)C^WA$ zE8{1YfWRNj&_^-<5THJ)h>ypir>AHBxW(Nga(jw$ozBub4(SJt6i-Ljc6_$JO$A-t6VJI&UACJsN?T{k9~&si*JS+uOG%m6}e7j@X$rzwmxN+{n)bH?6dP zFK)Gl*07b6glL%Kmy<_?gMtVvpmQ)qT>J8UVzKv>^6&IAE?9N8`d%?r=VT020(9L} zf>2D@&z(D%XS1SKMjoKJtQQD-4!-d`CaYsnU&cqNLhXaYJ-b1=f@sP}uTsV(7wM0v zV`CtukQe!B{`gLYN*|4553yUV&aqX`%m~OS@$DifW_(6_K!b$yL0`M_J2rU~-l|h9 zq#qcwh#9@nJqg-h^fAbooJ2ibPWy-u7VuPQ#_N}r+4H{5L}>Bk564M;hZ_CAuVY9Z z_%*8Qy9|r9iYIj(_N^x8rfR=AmMZa+swhLAA}jnt(SucVa$np9+gNTcp<1Q-SfLDG zob*vipju(I52pPbY2I4dVG;xtx0CRm%+kw&fsbRlfY0)0&Br&Ll9!j)q)zE#1jFRe zBY~n@R!<&!N`qgz_G#G{@z0+tjC`ZvoAx)#gYjP4jcc`azZZwtY|XpJ*oHdoKtW`Aw}|#-=*-MrIrUf<>!exBizCtK;dWfy zd;jCJ%xcs7b!VZ<-A`v@sf&)&bC`qv)LdNH+QnLgcBmElTkanyi7Yd5HSl1f7M19M zLYSHb1-luem(^B_W;6T$xSmU62tG?#_+UZvuX{_fcQ}mDwxps0hcHTZE4l|Yedt@w z_?;^4pEXR*dS=v~PDt%fBYtAEcmIqqj8smt;R@)1-hCLhdo?<&6-T))0&3#xx}}UE z^18T`re~MSeGEjmiq~NS_JX;Eg}+`rt)hvHiqZ;}+C>n{aTB3w$KOWop`&$0qxppB z{8yRhZpJ+$V9BEC^o9#Des<_JjAC!U5!?8V94%_KwTFWv0NU=e00nJ{CBn+YV zx16uBHf3A7(}c+=%RdfE5T3ye8$G|))Jzsgc6t+%kws0s+2~&$i71pG@t1x=v;m>X zsSCj=hd9q8HknN-UQYj=w9#>=H(O$1e%18>ZzEVL@a(eJW)7m%(G*+_ACX3 z=#DGNjPUQ{)nSVv1cX*{cKYnE$IUbZH-e^bZcx>A=0kMuD-Z-GAIQ{?#-ohoEIWC1 zOJ$6&;b@kYmX|BR*m?4WsayQ984i#`YYNS$EKU9tx=IM{4Mxh!k!@!$hOnhX%;z;n zvKb7I#k{}Z#Yx_N9m(dhmBt{<;w34b$_yVU7bX#WyS-SKQEjq5(BMWGeZO!JJ?o?E zwtST;Nay#aF=xn%?pc&(f}qQGLO~WK>c3{qC45FkMwFeM9V1Dmn`}@R?dtCClSXW- z%5_6XMFs+-&5?s#^qHj?1hPlIs&{EAXBT0hx_WZ_e)nSOigRxMu8#F3@*wL-@3EG3 z2O*_M=&TK|*``ast%Rf1$E{AGNJb&e*uIVp9WFWDRCnF?BVt$ED0|D(oG}Ch1mD9L zDsXv;G4wCCjm3QVlUbZKu$uh1>vv4ilKNl*UR+pU(c?0$Vlut~erGRlkGeS;*1383 z5VrhV4N|s_L%;W0(2PkOYbT#GlQF6s{qxq)O(l`M?u*D}-OZ{$mA=~jK4~{e%Jxf+ z8YHv@(^Uv}Q7T?23IYZkPEy~Rq>OjvQ49GXgH(bvY-d#Lyqa0B2l8% z%%Evj(dRr4rlyPrHE-^BPKRFD9cI`46$>1!VKb7xi~UA3O}j6+zRvhZbZb~nJNCLZ zxLqn$j063kna=5sn0-$}P&sS&s$hv_oEO&?TSmUon(SS{NWW%J2+rk&WNX{}-?|*1 z)|-Dexqa{-r$X!N47v8NP+l>rkdHpUWRSeXtKF|husZooCu)3Vd%v9_mhl3u@duNd4lBvP#rzLIS}i`7GT(wc~`ud&ln6MRft^Vify}kD62R z_UsuJ|4#vXGFk?KDeIBHT-96n%qfYXFxT*^QH!n;S4$bk2HMbnfPGJ@>dHJw6_moSdBS zfP(?xO{$QF#LAO44yZd6XgvYZ2K7s5jXL?a*5l8v7F$KeKT-|mx@ro^niUaNHLaa#s(`YWnfx&E(|=MZj(>_8fFVIBXi;31R2#Z@1)tiI)*^ zXwx~qZRCdRgwz_hI@g}c{!w^~P1$fE-YV|om+mx#~3+Kytufy^k6@Na{m_J>QCb=hdaGBm#1qK#Kh`O6~`7yx=w1@R<~Wc<$%+Oq`q7(LN7=YNCxRuk~x$b3XM&LaviyXA;L&L`?D{VUqI7My`z9?((Hip>PL==x(JKmiOpE=vTp5C{a3>LB)&XjB0RG9 ziHamMwU(E7+#KPezZWLL^44uUjFR}&MTC)%u$XrLejYuX%%)J8sjKstDCNPX`q$x< zb%!C@Fc_e#=<3P~;*8%53t~lE#F*gin_3-s)AH(U;R-c)q;k2qD8aXUX+=Jf?5MP3 zBGY$5xDGl`(#XMihC=5&1S2xt~%B#dYIOu9M zDyKYi%lI~$qOT-l$d~1Jov9(&Drw7Mh%JOtVY?Pv{@^#>Ls7roi*CZ-7bSaj+ks19 zY2wT`?KMKt(px)fA!XE#yiTBed980Pk8M|UVLJcG@Nzv@{|D6$<)~YO{?$p)>Y&>I z4#EBsw`G4ai;a`l0pd5Eml!X@5lWuAKmQ7kay@JyX!{8X^QW(#9n`J7h8kR%Uu`C< zTi3nC&9yn@G5ugDvf5|HLrY`oe6?3{pd#&Ee}kSr&QjsK(HfAxhvwmRv6@}(&_P!2 zBo`EP_|NNUT!q;j#>U=T`^7PMnUnqNzq@`FAt_ujrPb+)UYs!s z*Q$H=P1GdvY*9(ORR2i{3KC0-t30258yVN#J-Yqk^HiVRVFaceGg=&6lCUYl{hNaK z{3rGPLz%;5lJ3S43zw^&mKwFX%Beh}PSBXw4I+L=1E>!XmTaG>#X4^OSo4!`n|o5m zrlm!-HXlE|zdmN7M~DGJ~~pB*lbV1W;j}HRKk#EuPE}Ds^RW z;ENF_Z)kW|zZL6+bbgDQV5;lgZ!xH+9#TN{Hf|`L6!B9OVJW6jKmeBg()v^mMf!_L zU=qJ()QMTY9ECTg2uui=@As!1CKVIWh{9;r0s+g<#PAM+_ue@4Z{O(dZSMODzDeH3qLFb=#bJLNc+ASdkl!kE)bKh25 zO^JKC*gH?t`i%x1OSJLI8XSiUv%Sdu!r}L%+w{^TL9ek~p{JENW?T@;)7|`IA3X)d z8uMkNW&F)v%1czw0}=l9&$ISWgAWuwPw*-ZlUe$4W8{($Qf^P_OPW@@zcjQONR+F) zO<(qs4oOSgSD^4&4UGK8!<0CG7qlew#~=hEacv^{`1|^`Ik&AH<10o>>T;70I!f~e z**<&mn0ES{jb@CzyyjslxUi-zrr*KswhFxRkY9m2SdgLz;YMoPR=?!kZ$4D?ZgX+F zZ7UiKI;xPO#aw+!O|q~+8?=Al{?WjqT;uVi*-?nrHF${8Mq0c%TRb}WBz&4ab!OUp zI;~r(zTvI~kQ4jH8^NIG$FWcNbd)~wi*Y%`?ce4W+HX3 zII`-j@M0Q`X2n^^|6Cji3CUR`ylhWiPL5W1_AYRJ{X2`Uza7HVL+3*;V|x1B;Ye#h z*a>Gw@$F}Tm_K}DCc-bErQvx+LL|L1m)=r(MJ%10X$j)YXOXWp3~Q$UlB1qL&i~oA zFNt$_KPz!nkZ@|nqL+S`0mF`u8^m5#8m@9`(v2Z+3KuB-BnS2C~NL0{0TZCwb;TP-h@nCX6zVwz|z`;|98q62xKR1cWKNw zy-})>!OA;?fFUV)JV-9jKX9}#s)~&Uxu{maY;m_)*QZxFHTAorlU?1ri)l_~<+dv# zRjp7b23kjqHurY2<`2m@9attG*=yalRQX@L$So}$+E=r-W@>0?Sh#oh^2%*(HNLhk z!M89u#FVqlzpd3)F2SrTOL#$eRMR4HMfRl}Du>dWrc*qbCp<8tV>LLe`R_Mn>HYnC zQymKBNOM2qpLfHuMdja-4@;)|cs6MO?0Cljds;?BfrV;kH<9j0ToLpTXsOgSP?iyT z8M1mH6RX|Nt(h3cHs_lr!8h%6m)F{7Rc+Fc!z6^edKqfToz{HtcPTJWo^NTi?BB7+ zpByprphVa3a}Ov!c`LghC&1>?(r|mo^e;030f8~gcY<$ZWkcPcwAt`0hzedk$6d`c zWhA4pJzGv@RU0I++L=pWWkif)E_~*bG&S&}g{O8@2wvUD& z=~K4JVy1GuQQx>EQ+xc&ELZ0SRM@dl8i4a+Dx+(DAb~OLmDTlCK`RekQ)u@cGU-`$cS298}|bAlZ!)qY6~nKBTHx z&s|w)xY-12_q_bjy*4AOD&!@o~Lhaai2Bl>Q=fS`hlpse}=~^ z_{?8TxZVI_@{L_xqupt)xzc)sl3c`Hfwa+cmK-{_fys5d@w?qqj0$;;%&dC6BltICVHb6|cJq#Pf7A3T>o2R!wU%o?!Nnd{Nlp;J zu1KSqA~}b%*ZN&J{-`Z#xUO8apRZN%^_;gC{1ah4GR6^iJ;w+llKu)6_ET=__vZ!c zagv8EeUbK`C*rrV%11V|F1ana;yU zv3y;79MjJGnC)RV0os6rBTa8!!Q41oX^z^^IN7K+`|FL44rtIJARPF;VMn|2eQTs+ zk0mZ3z~Q1y=Cf#^*7Y^{_TPPZd3i1npPG&Q>nyx-<(d-{yI*&I#y`b2nn6;YltWh|HyVfA043z%+!BH$)q@T8$dxa9coC@>U?o811*nTk+&Fh zI?d!C$qT_5fz)@M`Kqa5FZpaySjO3IrU8nm@@INUN&g#;zej(Wjf<`J-Z1R^U_u4`N*G@qj87E9@tGy%fp&1Y>+|h!T;cOl? zuvgqPUHFi65TdTk0opYw8QGbdT{JE2U>YAP56?fc3|~JA=Dr3haq)#F+mdJKe5=iB zi&CtiykMTyD7RoeV7V*9OT*3TTzcZPI!rRaLJW%u{1jm6W26hlXvHBI9*%%H8CYvGHE- zUG9v;YKBhZ;&LfgegTRp6@p_0x#%~$xj5);5B66cGX`cQ&n+hcZRB=y0b}@4%zT71%>|BPTzT!K&y`PPi(NAyb=p3rw`l{(M48Q8M zwZ=E~^a*2aN``xPprf-IBzrK1Fe5vMvTFnCWJ`-gCLd`l7NuAr`LWBtivntgoC_Ho z-IX*Bxds3Eh@!*nh+ikR^wC!&kLgv^HV4wbD>@Je?$P6S1oSG(%ZFDH(4S-jVz#Xt zVetFIJ*SbUQgVYjf&N#OzlLLyLa!kUAb}YjgGOXY@svgt;@rTlt)m-G1N?QLdV%Jf-6nTJ;U5J)E&n!0~g(?m-_1qMkf1Hm{5RVD2gskP1AkjeSVFFdU=H_&dt(b0d zgIb!Jc%RcZlF~S@`Y`AjWp^`hPp0<0I6m~T2iO)P9{w2K;oijM?7yxjfzAIm3h!*M zP7GE6tj7%13p`#BJ*%}kdqQ~4vOe=kqBC(0YKE2r#;{Pw2T~$SNpKZ7k}hl1u8xR>#nFq*1pP%Y+RULuHWRjBLIR=KU3xVxu4zJOkSz3} zz)LmSCs>l@CIaUJw>NTsDRteCA0`J&&DTubau(pPko$H#zvs}I8!iFCH#a$Zq3H!~ zBE{n&?|pUqDhqaE7mf4m1B6sI^h5l@l{$xcL0dkeq*<7;xR(`>4J%0+PyJ~DpVq1< z+@=+*QIOOi`A!NA7iGm!&weCJT*&{jVwi+St){Ln_v_b}r0GG@SXP6treDx}b064e z>L9gB4BbVzl?DqXBLL!Yp@6YZN2wOz83$hF-_LTF19Qyg)hl^~5u}fWG9IVWTi2?Y z^K2Ok75CljoUs9~$3p{s4$B@e^YHNK&unJDw8^AXQ;7me(UB=(x^oq1n}ZiSvZ6eL zIT9@8YzSUCx|Xx&=LK<2znY$(AE-{0Q3q#6|LxoS%8j}#ScUUW70YQxPsR1TD9 z#hgYsqQ8dx#x%))JeYi7J}BCe17wUc2;9_ss->d?VD0k02<{=M89brRJ!0cwLDVsD z%V71V!rdR^qw4XGh^oRyqfv%T^_i)ts0cZ)z<}tD|L*U11Cl^~<=%o&M$qb;N4xP@ zvf1-2W}iUr9(3&i$$ZP@dPJ%3k7`Hto>YdElr*_iV|vMrSDEpCYu$;#vQKF1mZj)$ z2s%+_EfIZ1+8P<1h(n9g=J220o14E9Fe|~4Hkq$=7)u?p>Isg^EOV1v8*z9?)M1mH z^?nXx#V2#zba1PlIc}*G^4x&y{Nf__+c!qY4tJ0UI3ra~S7}?HXhfM*K~W|yNsXhe zX{}mol`P^{0I{GsO<2@1Ga8L6#DF1ca$@33dAZtFbSfpV5c$3HaJ2s^D(Q`~6_R$D zQQ@{LjBO@n;?{5h{OiZd-X8NYFuBt|@Kwz7Wv zkNKO``H1cKM?2zVkStCgQh}gL7q!3COy=O=0Gt!z_naJQ-{KAvYO(J07CDDA!zj~Q zV#}{~5Vc+4N&|akXWa%6l--*P1vcYi48afRnhEFk9E|4AsNmKCVvhx}y{`MK7S^U{ z_`*#VFX5khW=KMN8{IL!P`?s*)lI z41-IIgDpPolJ!}q3|l`aE-h7=^-PH20vk7moB3$IB_2)GGZh_UNpLw$LX|oQd>-dA zbkU$hpy`^0Yz~-#GKm;l8Ukc)eqO`am@8%|EhRLqn$LZh`7|$2ZncWds;I*YcT__B z#8eqbJ)9<_!sO7XDm~!leD!d(6Nvr8EO^P$`FXY&@uQL-2;?_mAK$5d7_#~l>WSnH z1!irWf@lLYKT?mJ<&5vF>-7!Bbgrs9UVn}^>#Z5s+|AI~2K6IMrD1n6rTdR$5ehBc znUy?HkqN z*w~(hpKdR^9=hZdA*nJ@GQ>Be zU0+(QJteZNxk0Ee#jCBWJBi{TO90QEUbr#+(%5b{dHc4f2Y5&Dbsk9&eRbayA&o!4khU91gQ)6Reqv$Xka;{Kz0om5}AHjuG8I@r{_9G8= z5UC~kF$VOv94jX%I8I05upOqDxno6wq z>#0c^Fzr3k08)kKE(SX$c0b3%dn87=$+6=T$H?L1;}>>};EghZ;}#uC$k|T{-&TVx zed>Otq~wE#?Ut0Dj-Ecmi0<*8iJYe=tX<2}TQb{gIWn) zj@*A^P;&NnQs=yav?g~SA0OjT4tM^Y{Q}6xlRwwRA(Rus6g|3VAQ!<~hvb~TX2(ye zVPHTGbJwkih>ev49SN*W8Spo~(ZuF^?+lXVVenXpE=^Q^ULG|MkH%KCZUqbAq+@1Z zlD8eW1H~fD0oHxNjhLKfOK$B+S_~2(mW=Vl$Quq|@)DcVb<6sM#16YQKMS^22?z8} zsHbP`y<#Pdj~IiQmGyO~F>TJp=^z+p4wxMnTmtDZnq6C69a5|%fu9v)2&4|!U-6;W z)H`Us#OC_oAxB}DOc{|1Bc9VWsWm$xI#}YH)$oWy?Gb-s^RM6phr_qIVOmwXzP=4p z&9Z_$K)`MGf&yS??5u)-Km<=uGMM8% z90in2mFjzPa&rMC_-oK>WgP|vhN2h7T9B+MAix>ocG+1a=qUnhdB6;iLn?IHfmt|B z1XZ-WJbJB5Us~5~U>NQ9R&vBdUYIyv-r`tQ>%Du8gKi4V6KFX3KXBO71NJ*t3VQQ( z=66zBTEzCY4FJ3MpAr%f8W%YM7}PEL?~Q>c+!javGIvIc`xO*t9{ndznzB9{wHofI z;Nzahh=Z+)fM0V9Esd(!+m}qmBNCb&pPXo6n$01U{O^ssWM@&~+shE6r0z!r;!Oi^ z^-AB{yIz}ZvIaVl2V8t*B|sAg21s89&T;^G3)~t5EP2Zt0|SiUm)9ftlx^UyxNsgp zIpD)^Y3H7-1nRLfF`)tpfyMhX2ns&3ii!?ZpcBRa4HgwuLE)04=y0FJP8cx`27fsGRx*pwJ(nso;@Gg#Hv`_?741-5z4|r9U})=Fn4K zh%HhCXl4QG0_3yAOB$nJofxD9&HkT|0k)%>M%wmjwyB69jbFRyP#8XXVs6pc)PK zf(oGFzb_#7Z0O0KJ{5h|sDO;8erzIXLkH0gF`0Zl>*eU^8m|(M&`TR|xY0^$W7wsD z2w)*0$Xb_jw62c`(%v8VRBPy|Fs#6`612brqWn$*!?J8Dk=~@4}P4rmoqZT zV!b8Cb(jVYb>^~5R!>q5A>I=P;FYEWyO{_JGc&B)EFo0?|Fp&>GO9{7))7ZPvOTL$ zW@g{h!pdql8y^V*1u1DZHYI|VOgPp~DwerDIS9bFoGdFr1J5Lrz@~L~cX!dp z$p1qn|2@%4hVTP$ZqkRC+1UX)UVH2BF9zzb0R6##m4*wOM_-};AHk=lN*DCd+9^|? zAkmK5`CJR)Hb25qo*r3YZc2oT$w#FCGz$>BtKl)JLfzeeu=7J!L;&)P=HcvrF9pS7 zHaayoH?O(ozF?njEf2tJ4GF>_?D+Ist+zAsZ*BwhFVEL}E$`@RF?PHAuSo zPJ+=qf}jFt!trE9(B+q1O%CA3f{CynEp9;X0b@?U<2INZujY_TK_}3EHOpv?F0-{DbFNukIv zOZhEhz8>AS#{}Cn;ZpwO;)(RNYD2i^kBad&IVf$AnO{?VlX-5O^VC zTT+#7V4#G(VEQmg=r_=jL(j-q)bZl^C=3APo*8FQODUg+kUKZq@RQcWo>%0;75*OV zI6;3HJ^BWFdhj_5tsO#zG$FXxNXFm42@LZIW69SAOX%-+ify#)IA#9^dr72*e0VN(1rlpm`bH_WYzD8|yd- zqfQ&zP1I|z-iT+_34@?0&W$@wIjc$779m~@8ZLK@8O`&JiL;+Z3hjs;7KN1AOYAI; zD~=Cvj=6GZlq(uYO(GSd;81KENb6$4cxiXh-D$zUbX+9MKyU>R>cxNigw&_kQ}gwF zpa4ZWTxm2sm psb_d_cp_rsb_from_coo + implicit none + + class(psb_d_rsb_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc + integer(psb_ipk_) :: nzm, ir, ic, k ,bs + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ +#ifdef HAVE_RSB + ! This is to have fix_coo called behind the scenes + call b%cp_to_coo(tmp,info) + + call tmp%fix(info) + if (info /= psb_success_) return + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat + + bs = 1!RSB_DEFAULT_BLOCKING + + info = Rsb_from_coo(a%rsbMat,b%val,b%ia,b%ja,nza,nr,nc,bs,bs) + + call tmp%free() +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cp_rsb_from_coo diff --git a/rsb/impl/psb_d_cp_rsb_to_coo.f90 b/rsb/impl/psb_d_cp_rsb_to_coo.f90 new file mode 100644 index 00000000..3747100f --- /dev/null +++ b/rsb/impl/psb_d_cp_rsb_to_coo.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_rsb_to_coo(a,b,info) + + use psb_base_mod + use rsb + use psb_d_rsb_mat_mod, psb_protect_name => psb_d_cp_rsb_to_coo + implicit none + + class(psb_d_rsb_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + real(psb_dpk_), pointer :: val_point(:) + type(c_ptr) :: t_p,s_p + + !locals + integer(psb_ipk_) :: i, j, k,nr,nza,nc + + info = psb_success_ + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + + allocate(val_point(nza)) + + t_p = c_loc(val_point(1)) + + info = rsb_mtx_get_coo(a%rsbMat, t_p, b%ia, b%ja,RSB_FLAG_FORTRAN_INDICES_INTERFACE) + + !info = rsb_mtx_switch_to_coo(a%rsbMat,t_p,b%ia,b%ja,RSB_FLAG_FORTRAN_INDICES_INTERFACE) + + k = rsb_perror(s_p,info) + + do i=1,nza + b%val(i)=val_point(i) + enddo + + deallocate(val_point) + + call b%set_nzeros(nza) + call b%fix(info) + +end subroutine psb_d_cp_rsb_to_coo diff --git a/rsb/impl/psb_d_mv_rsb_from_coo.f90 b/rsb/impl/psb_d_mv_rsb_from_coo.f90 new file mode 100644 index 00000000..9de74ec7 --- /dev/null +++ b/rsb/impl/psb_d_mv_rsb_from_coo.f90 @@ -0,0 +1,114 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_rsb_from_coo(a,b,info) + + use psb_base_mod + use psb_d_rsb_mat_mod, psb_protect_name => psb_d_mv_rsb_from_coo + implicit none + + class(psb_d_rsb_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, ir, ic + + info = psb_success_ + + call b%fix(info) + if (info /= psb_success_) return + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + ! if (b%is_sorted()) then + ! ! If it is sorted then we can lessen memory impact + ! a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + + ! ! First compute the number of nonzeros in each row. + ! call psb_realloc(nr,a%irn,info) + ! if (info /= 0) goto 9999 + ! a%irn = 0 + ! do i=1, nza + ! a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + ! end do + ! nzm = 0 + ! do i=1, nr + ! nzm = max(nzm,a%irn(i)) + ! a%irn(i) = 0 + ! end do + ! ! Second: copy the column indices. + ! call psb_realloc(nr,a%idiag,info) + ! if (info == 0) call psb_realloc(nr,nzm,a%ja,info) + ! if (info /= 0) goto 9999 + ! do i=1, nza + ! ir = b%ia(i) + ! ic = b%ja(i) + ! j = a%irn(ir) + 1 + ! a%ja(ir,j) = ic + ! a%irn(ir) = j + ! end do + ! ! Third copy the other stuff + ! deallocate(b%ia,b%ja,stat=info) + ! if (info == 0) call psb_realloc(nr,a%idiag,info) + ! if (info == 0) call psb_realloc(nr,nzm,a%val,info) + ! if (info /= 0) goto 9999 + ! k = 0 + ! do i=1, nr + ! a%idiag(i) = 0 + ! do j=1, a%irn(i) + ! k = k + 1 + ! a%val(i,j) = b%val(k) + ! if (i==a%ja(i,j)) a%idiag(i)=j + ! end do + ! do j=a%irn(i)+1, nzm + ! a%ja(i,j) = i + ! a%val(i,j) = dzero + ! end do + ! end do + + ! else + ! If b is not sorted, the only way is to copy. + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + ! end if + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_mv_rsb_from_coo diff --git a/rsb/impl/psb_d_rsb_csmv.F90 b/rsb/impl/psb_d_rsb_csmv.F90 new file mode 100644 index 00000000..f5dbba97 --- /dev/null +++ b/rsb/impl/psb_d_rsb_csmv.F90 @@ -0,0 +1,138 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_rsb_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use rsb_mod + use psb_d_rsb_mat_mod, psb_protect_name => psb_d_rsb_csmv + implicit none + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='d_rsb_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ +#ifdef HAVE_RSB + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) d_rsb_get_size + procedure, pass(a) :: get_nzeros => d_rsb_get_nzeros + procedure, nopass :: get_fmt => d_rsb_get_fmt + procedure, pass(a) :: sizeof => d_rsb_sizeof + ! procedure, pass(a) :: csmm => psb_d_rsb_csmm + procedure, pass(a) :: csmv => psb_d_rsb_csmv + ! procedure, pass(a) :: inner_cssm => psb_d_rsb_cssm + ! procedure, pass(a) :: inner_cssv => psb_d_rsb_cssv + ! procedure, pass(a) :: scals => psb_d_rsb_scals + ! procedure, pass(a) :: scalv => psb_d_rsb_scal + ! procedure, pass(a) :: maxval => psb_d_rsb_maxval + ! procedure, pass(a) :: csnmi => psb_d_rsb_csnmi + ! procedure, pass(a) :: csnm1 => psb_d_rsb_csnm1 + ! procedure, pass(a) :: rowsum => psb_d_rsb_rowsum + ! procedure, pass(a) :: arwsum => psb_d_rsb_arwsum + ! procedure, pass(a) :: colsum => psb_d_rsb_colsum + ! procedure, pass(a) :: aclsum => psb_d_rsb_aclsum + ! procedure, pass(a) :: reallocate_nz => psb_d_rsb_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_d_rsb_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_d_cp_rsb_to_coo + procedure, pass(a) :: cp_from_coo => psb_d_cp_rsb_from_coo + ! procedure, pass(a) :: cp_to_fmt => psb_d_cp_rsb_to_fmt + ! procedure, pass(a) :: cp_from_fmt => psb_d_cp_rsb_from_fmt +! procedure, pass(a) :: mv_to_coo => psb_d_mv_rsb_to_coo + procedure, pass(a) :: mv_from_coo => psb_d_mv_rsb_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_d_mv_rsb_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_d_mv_rsb_from_fmt + ! procedure, pass(a) :: csput => psb_d_rsb_csput + ! procedure, pass(a) :: get_diag => psb_d_rsb_get_diag + ! procedure, pass(a) :: csgetptn => psb_d_rsb_csgetptn + ! procedure, pass(a) :: csgetrow => psb_d_rsb_csgetrow + ! procedure, pass(a) :: get_nz_row => d_rsb_get_nz_row + ! procedure, pass(a) :: reinit => psb_d_rsb_reinit + ! procedure, pass(a) :: trim => psb_d_rsb_trim + ! procedure, pass(a) :: print => psb_d_rsb_print + procedure, pass(a) :: free => d_rsb_free + ! procedure, pass(a) :: mold => psb_d_rsb_mold + + end type psb_d_rsb_sparse_mat + + private :: d_rsb_get_nzeros, d_rsb_free, d_rsb_get_fmt, & + & d_rsb_get_size, d_rsb_sizeof, d_rsb_get_nz_row + + interface + subroutine psb_d_rsb_reallocate_nz(nz,a) + import :: psb_d_rsb_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_rsb_sparse_mat), intent(inout) :: a + end subroutine psb_d_rsb_reallocate_nz + end interface + + interface + subroutine psb_d_rsb_reinit(a,clear) + import :: psb_d_rsb_sparse_mat + class(psb_d_rsb_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_d_rsb_reinit + end interface + + interface + subroutine psb_d_rsb_trim(a) + import :: psb_d_rsb_sparse_mat + class(psb_d_rsb_sparse_mat), intent(inout) :: a + end subroutine psb_d_rsb_trim + end interface + + interface + subroutine psb_d_rsb_mold(a,b,info) + import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_rsb_mold + end interface + + interface + subroutine psb_d_rsb_allocate_mnnz(m,n,a,nz) + import :: psb_d_rsb_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_rsb_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_rsb_allocate_mnnz + end interface + + interface + subroutine psb_d_rsb_print(iout,a,iv,head,ivr,ivc) + import :: psb_d_rsb_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_d_rsb_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_ipk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_d_rsb_print + end interface + + interface + subroutine psb_d_cp_rsb_to_coo(a,b,info) + import :: psb_d_coo_sparse_mat, psb_d_rsb_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_rsb_to_coo + end interface + + interface + subroutine psb_d_cp_rsb_from_coo(a,b,info) + import :: psb_d_rsb_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_rsb_from_coo + end interface + + interface + subroutine psb_d_cp_rsb_to_fmt(a,b,info) + import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_rsb_to_fmt + end interface + + interface + subroutine psb_d_cp_rsb_from_fmt(a,b,info) + import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_rsb_from_fmt + end interface + + interface + subroutine psb_d_mv_rsb_to_coo(a,b,info) + import :: psb_d_rsb_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_rsb_to_coo + end interface + + interface + subroutine psb_d_mv_rsb_from_coo(a,b,info) + import :: psb_d_rsb_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_rsb_from_coo + end interface + + interface + subroutine psb_d_mv_rsb_to_fmt(a,b,info) + import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_rsb_to_fmt + end interface + + interface + subroutine psb_d_mv_rsb_from_fmt(a,b,info) + import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_rsb_from_fmt + end interface + + interface + subroutine psb_d_rsb_csput(nz,ia,ja,val,a,imin,imax,jmin,jmax,info,gtl) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: gtl(:) + end subroutine psb_d_rsb_csput + end interface + + interface + subroutine psb_d_rsb_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_rsb_csgetptn + end interface + + interface + subroutine psb_d_rsb_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_rsb_csgetrow + end interface + + interface + subroutine psb_d_rsb_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_rsb_csgetblk + end interface + + interface + subroutine psb_d_rsb_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_rsb_cssv + subroutine psb_d_rsb_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_rsb_cssm + end interface + + interface + subroutine psb_d_rsb_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_rsb_csmv + subroutine psb_d_rsb_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_rsb_csmm + end interface + + + interface + function psb_d_rsb_maxval(a) result(res) + import :: psb_d_rsb_sparse_mat, psb_dpk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_rsb_maxval + end interface + + interface + function psb_d_rsb_csnmi(a) result(res) + import :: psb_d_rsb_sparse_mat, psb_dpk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_rsb_csnmi + end interface + + interface + function psb_d_rsb_csnm1(a) result(res) + import :: psb_d_rsb_sparse_mat, psb_dpk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_rsb_csnm1 + end interface + + interface + subroutine psb_d_rsb_rowsum(d,a) + import :: psb_d_rsb_sparse_mat, psb_dpk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_rsb_rowsum + end interface + + interface + subroutine psb_d_rsb_arwsum(d,a) + import :: psb_d_rsb_sparse_mat, psb_dpk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_rsb_arwsum + end interface + + interface + subroutine psb_d_rsb_colsum(d,a) + import :: psb_d_rsb_sparse_mat, psb_dpk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_rsb_colsum + end interface + + interface + subroutine psb_d_rsb_aclsum(d,a) + import :: psb_d_rsb_sparse_mat, psb_dpk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_rsb_aclsum + end interface + + interface + subroutine psb_d_rsb_get_diag(a,d,info) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_rsb_get_diag + end interface + + interface + subroutine psb_d_rsb_scal(d,a,info,side) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_rsb_scal + end interface + + interface + subroutine psb_d_rsb_scals(d,a,info) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_rsb_scals + end interface + + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_rsb_sizeof(a) result(res) + implicit none + class(psb_d_rsb_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + + + + end function d_rsb_sizeof + + function d_rsb_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'RSB' + end function d_rsb_get_fmt + + function d_rsb_get_nzeros(a) result(res) + use rsb_mod + implicit none + class(psb_d_rsb_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = Rsb_get_nzeros(a%rsbMat) + + end function d_rsb_get_nzeros + + function d_rsb_get_size(a) result(res) + implicit none + class(psb_d_rsb_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + end function d_rsb_get_size + + + function d_rsb_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_d_rsb_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + + end function d_rsb_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_rsb_free(a) + use rsb_mod + implicit none + + class(psb_d_rsb_sparse_mat), intent(inout) :: a + + call freeRsbMat(a%rsbMat) + + call a%set_null() + call a%set_nrows(0) + call a%set_ncols(0) + + return + + end subroutine d_rsb_free + + +end module psb_d_rsb_mat_mod diff --git a/rsb/psb_rsb_mod.F90 b/rsb/psb_rsb_mod.F90 new file mode 100644 index 00000000..73db825a --- /dev/null +++ b/rsb/psb_rsb_mod.F90 @@ -0,0 +1,50 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_rsb_mod + use psb_const_mod + use rsb_mod + use psb_rsb_penv_mod + ! use psb_d_ell_mat_mod + ! use psb_s_ell_mat_mod + ! use psb_z_ell_mat_mod + ! use psb_c_ell_mat_mod + + ! use psb_s_hll_mat_mod + ! use psb_d_hll_mat_mod + ! use psb_c_hll_mat_mod + ! use psb_z_hll_mat_mod + + ! use psb_d_dia_mat_mod + ! use psb_d_hdia_mat_mod + use psb_d_rsb_mat_mod +end module psb_rsb_mod diff --git a/rsb/psb_rsb_penv_mod.F90 b/rsb/psb_rsb_penv_mod.F90 new file mode 100644 index 00000000..12a56d83 --- /dev/null +++ b/rsb/psb_rsb_penv_mod.F90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_rsb_penv_mod + use psb_const_mod + use psb_penv_mod + !use psi_comm_buffers_mod, only : psb_buffer_queue + use iso_c_binding + +! interface psb_rsb_init +! module procedure psb_rsb_init +! end interface +#if defined(HAVE_RSB) + interface + function psb_C_rsb_init() & + & result(res) bind(c,name='rsbInit') + use iso_c_binding + integer(c_int) :: res + end function psb_C_rsb_init + end interface + + interface + function psb_C_rsb_exit() & + & result(res) bind(c,name='rsbExit') + use iso_c_binding + integer(c_int) :: res + end function psb_C_rsb_exit + end interface + +#endif + +contains + ! !!!!!!!!!!!!!!!!!!!!!! + ! + ! Environment handling + ! + ! !!!!!!!!!!!!!!!!!!!!!! + + + subroutine psb_rsb_init() + use psb_penv_mod + use psb_const_mod + use psb_error_mod + ! type(psb_ctxt_type), intent(in) :: ctxt + ! integer, intent(in), optional :: dev + + integer :: info + +#if defined (HAVE_RSB) + info = psb_C_rsb_init() + if (info/=0) write(*,*) 'error during rsb_init' +#endif + end subroutine psb_rsb_init + + subroutine psb_rsb_exit() + use psb_penv_mod + use psb_const_mod + use psb_error_mod + ! type(psb_ctxt_type), intent(in) :: ctxt + ! integer, intent(in), optional :: dev + + integer :: info + +#if defined (HAVE_RSB) + info = psb_C_rsb_exit() + if (info/=0) write(*,*) 'error during rsb_exit' +#endif + end subroutine psb_rsb_exit + +end module psb_rsb_penv_mod diff --git a/rsb/rsb_int.c b/rsb/rsb_int.c new file mode 100644 index 00000000..dc4a8026 --- /dev/null +++ b/rsb/rsb_int.c @@ -0,0 +1,110 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#include +#if defined(HAVE_RSB) +#include "rsb.h" +#include "rsb_int.h" + +int rsbInit() +{ + rsb_err_t errval = RSB_ERR_NO_ERROR; + + if((errval = rsb_lib_init(RSB_NULL_INIT_OPTIONS))!=RSB_ERR_NO_ERROR) + { + printf("Error initializing the library!\n"); + return 1; + } + + return 0; +} + +int rsbExit() +{ + rsb_err_t errval = RSB_ERR_NO_ERROR; + + if((errval = rsb_lib_exit(RSB_NULL_INIT_OPTIONS))!=RSB_ERR_NO_ERROR) + { + printf("Error finalizing the library!\n"); + return 1; + } + + return 0; +} + +int Rsb_double_from_coo(void **rsbMat, double *va, int *ia,int *ja,int nnz,int nr, + int nc, int br, int bc) +{ + int i=0; + rsb_err_t errval = RSB_ERR_NO_ERROR; + + *rsbMat = rsb_mtx_alloc_from_coo_const(va,ia,ja,nnz,RSB_NUMERICAL_TYPE_DOUBLE,nr,nc,br,bc,RSB_FLAG_FORTRAN_INDICES_INTERFACE,&errval); + + if((!*rsbMat) || (errval != RSB_ERR_NO_ERROR)) + { + printf("Error while allocating the matrix!\n"); + return 1; + } + return 0; +} + +//X is the input and y is the output +int Rsb_double_spmv(void *rsbMat, double *x, double alfa, double *y, double beta,char trans) +{ + rsb_err_t errval = RSB_ERR_NO_ERROR; + + if(trans=='N') + errval = rsb_spmv(RSB_TRANSPOSITION_N,&alfa,(struct rsb_mtx_t *)rsbMat,x,1,&beta,y,1); + else + errval = rsb_spmv(RSB_TRANSPOSITION_T,&alfa,(struct rsb_mtx_t *)rsbMat,x,1,&beta,y,1); + + if(errval != RSB_ERR_NO_ERROR) + { + printf("Error performing a multiplication!\n"); + return 1; + } + + return 0; +} + +//Should it return a long instead of integer? +int Rsb_getNZeros(void *rsbMat) +{ + int res = 0; + rsb_mtx_get_info((struct rsb_mtx_t *)rsbMat,RSB_MIF_MATRIX_NNZ__TO__RSB_NNZ_INDEX_T,(void *)&res); + return res; +} + +void freeRsbMat(void *rsbMat) +{ + rsb_mtx_free(rsbMat); +} + +#endif diff --git a/rsb/rsb_int.h b/rsb/rsb_int.h new file mode 100644 index 00000000..c08f114d --- /dev/null +++ b/rsb/rsb_int.h @@ -0,0 +1,2 @@ +int Rsb_double_from_coo(void **rsbMat,double *va, int *ia,int *ja,int nnz,int nr, + int nc, int br, int bc); diff --git a/rsb/rsb_mod.F90 b/rsb/rsb_mod.F90 new file mode 100644 index 00000000..e5252654 --- /dev/null +++ b/rsb/rsb_mod.F90 @@ -0,0 +1,235 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module rsb_mod + use rsb + use iso_c_binding + +#ifdef HAVE_RSB + + interface Rsb_from_coo + function Rsb_double_from_coo(rsbMat,va,ia,ja,nnz,nr,nc,br,bc) & + & result(res) bind(c,name='Rsb_double_from_coo') + use iso_c_binding + integer(c_int) :: res + type(c_ptr) :: rsbMat + real(c_double) :: va(*) + integer(c_int) :: ia(*),ja(*) + integer(c_int),value :: nnz,nr,nc,br,bc + end function Rsb_double_from_coo + end interface Rsb_from_coo + + interface + function Rsb_get_nzeros(rsbMat) & + & result(res) bind(c,name='Rsb_getNZeros') + use iso_c_binding + integer(c_int) :: res + type(c_ptr),value :: rsbMat + end function Rsb_get_nzeros + end interface + + interface Rsb_spmv + function Rsb_double_spmv(rsbMat,x,alfa,y,beta,trans) & + & result(res) bind(c,name='Rsb_double_spmv') + use iso_c_binding + integer(c_int) :: res + type(c_ptr),value :: rsbMat + real(c_double) :: x(*),y(*) + real(c_double),value :: alfa,beta + character(c_char),value :: trans + end function Rsb_double_spmv + end interface Rsb_spmv + + interface + subroutine freeRsbMat(rsbMat) & + & bind(c,name='freeRsbMat') + use iso_c_binding + type(c_ptr), value :: rsbMat + end subroutine freeRsbMat + end interface + + ! interface writeEllDevice + + ! function writeEllDeviceFloat(deviceMat,val,ja,ldj,irn) & + ! & result(res) bind(c,name='writeEllDeviceFloat') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat + ! integer(c_int), value :: ldj + ! real(c_float) :: val(ldj,*) + ! integer(c_int) :: ja(ldj,*),irn(*) + ! end function writeEllDeviceFloat + + ! function writeEllDeviceDouble(deviceMat,val,ja,ldj,irn) & + ! & result(res) bind(c,name='writeEllDeviceDouble') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat + ! integer(c_int), value :: ldj + ! real(c_double) :: val(ldj,*) + ! integer(c_int) :: ja(ldj,*),irn(*) + ! end function writeEllDeviceDouble + + ! function writeEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn) & + ! & result(res) bind(c,name='writeEllDeviceFloatComplex') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat + ! integer(c_int), value :: ldj + ! complex(c_float_complex) :: val(ldj,*) + ! integer(c_int) :: ja(ldj,*),irn(*) + ! end function writeEllDeviceFloatComplex + + ! function writeEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn) & + ! & result(res) bind(c,name='writeEllDeviceDoubleComplex') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat + ! integer(c_int), value :: ldj + ! complex(c_double_complex) :: val(ldj,*) + ! integer(c_int) :: ja(ldj,*),irn(*) + ! end function writeEllDeviceDoubleComplex + + ! end interface writeEllDevice + + ! interface readEllDevice + + ! function readEllDeviceFloat(deviceMat,val,ja,ldj,irn) & + ! & result(res) bind(c,name='readEllDeviceFloat') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat + ! integer(c_int), value :: ldj + ! real(c_float) :: val(ldj,*) + ! integer(c_int) :: ja(ldj,*),irn(*) + ! end function readEllDeviceFloat + + ! function readEllDeviceDouble(deviceMat,val,ja,ldj,irn) & + ! & result(res) bind(c,name='readEllDeviceDouble') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat + ! integer(c_int), value :: ldj + ! real(c_double) :: val(ldj,*) + ! integer(c_int) :: ja(ldj,*),irn(*) + ! end function readEllDeviceDouble + + ! function readEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn) & + ! & result(res) bind(c,name='readEllDeviceFloatComplex') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat + ! integer(c_int), value :: ldj + ! complex(c_float_complex) :: val(ldj,*) + ! integer(c_int) :: ja(ldj,*),irn(*) + ! end function readEllDeviceFloatComplex + + ! function readEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn) & + ! & result(res) bind(c,name='readEllDeviceDoubleComplex') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat + ! integer(c_int), value :: ldj + ! complex(c_double_complex) :: val(ldj,*) + ! integer(c_int) :: ja(ldj,*),irn(*) + ! end function readEllDeviceDoubleComplex + + ! end interface readEllDevice + + ! interface + ! subroutine resetEllTimer() bind(c,name='resetEllTimer') + ! use iso_c_binding + ! end subroutine resetEllTimer + ! end interface + ! interface + ! function getEllTimer() & + ! & bind(c,name='getEllTimer') result(res) + ! use iso_c_binding + ! real(c_double) :: res + ! end function getEllTimer + ! end interface + + + ! interface + ! function getEllDevicePitch(deviceMat) & + ! & bind(c,name='getEllDevicePitch') result(res) + ! use iso_c_binding + ! type(c_ptr), value :: deviceMat + ! integer(c_int) :: res + ! end function getEllDevicePitch + ! end interface + + ! interface + ! function getEllDeviceMaxRowSize(deviceMat) & + ! & bind(c,name='getEllDeviceMaxRowSize') result(res) + ! use iso_c_binding + ! type(c_ptr), value :: deviceMat + ! integer(c_int) :: res + ! end function getEllDeviceMaxRowSize + ! end interface + + + ! interface spmvEllDevice + ! function spmvEllDeviceFloat(deviceMat,alpha,x,beta,y) & + ! & result(res) bind(c,name='spmvEllDeviceFloat') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat, x, y + ! real(c_float),value :: alpha, beta + ! end function spmvEllDeviceFloat + ! function spmvEllDeviceDouble(deviceMat,alpha,x,beta,y) & + ! & result(res) bind(c,name='spmvEllDeviceDouble') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat, x, y + ! real(c_double),value :: alpha, beta + ! end function spmvEllDeviceDouble + ! function spmvEllDeviceFloatComplex(deviceMat,alpha,x,beta,y) & + ! & result(res) bind(c,name='spmvEllDeviceFloatComplex') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat, x, y + ! complex(c_float_complex),value :: alpha, beta + ! end function spmvEllDeviceFloatComplex + ! function spmvEllDeviceDoubleComplex(deviceMat,alpha,x,beta,y) & + ! & result(res) bind(c,name='spmvEllDeviceDoubleComplex') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat, x, y + ! complex(c_double_complex),value :: alpha, beta + ! end function spmvEllDeviceDoubleComplex + ! end interface spmvEllDevice + +#endif + + +end module rsb_mod From e9147c089e32bf6364a59c95d4ec5c9242aad42f Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 11 Jul 2024 12:11:07 +0200 Subject: [PATCH 112/116] Update docs --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index afab1646..e51fe70b 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ Harwell-Boeing and MatrixMarket file formats. DOCUMENTATION ------------- -See docs/psblas-3.8.pdf; an HTML version of the same document is +See docs/psblas-3.9.pdf; an HTML version of the same document is available in docs/html. Please consult the sample programs, especially test/pargen/psb_[sd]_pde[23]d.f90 From 1911fec97b96f75d19ec6807c7f238d179a2d7ee Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Thu, 11 Jul 2024 13:12:17 +0200 Subject: [PATCH 113/116] Update docs --- docs/html/cmsy10-42.png | Bin 334 -> 328 bytes docs/html/cmsy10-48.png | Bin 382 -> 366 bytes docs/html/cmsy10-49.png | Bin 296 -> 289 bytes docs/html/dia.png | Bin 0 -> 50686 bytes docs/html/ell.png | Bin 0 -> 52842 bytes docs/html/hdia.png | Bin 0 -> 59145 bytes docs/html/hll.png | Bin 0 -> 62128 bytes docs/html/index.html | 242 +-- docs/html/mat.png | Bin 0 -> 92468 bytes docs/html/userhtml.css | 23 +- docs/html/userhtml.html | 242 +-- docs/html/userhtml0x.png | Bin 1688 -> 1499 bytes docs/html/userhtml10x.png | Bin 2141 -> 1863 bytes docs/html/userhtml11x.png | Bin 1331 -> 1159 bytes docs/html/userhtml12x.png | Bin 2039 -> 1754 bytes docs/html/userhtml13x.png | Bin 1289 -> 1188 bytes docs/html/userhtml14x.png | Bin 1317 -> 1217 bytes docs/html/userhtml15x.png | Bin 1599 -> 1361 bytes docs/html/userhtml16.html | 19 + docs/html/userhtml16x.png | Bin 1105 -> 968 bytes docs/html/userhtml17x.png | Bin 1244 -> 1099 bytes docs/html/userhtml18.html | 20 + docs/html/userhtml18x.png | Bin 1304 -> 1176 bytes docs/html/userhtml19x.png | Bin 1475 -> 1311 bytes docs/html/userhtml1x.png | Bin 1124 -> 1025 bytes docs/html/userhtml20x.png | Bin 1465 -> 1308 bytes docs/html/userhtml21x.png | Bin 10552 -> 7727 bytes docs/html/userhtml22x.png | Bin 1443 -> 1296 bytes docs/html/userhtml23x.png | Bin 1220 -> 1075 bytes docs/html/userhtml24x.png | Bin 1060 -> 970 bytes docs/html/userhtml25x.png | Bin 439 -> 420 bytes docs/html/userhtml26x.png | Bin 763 -> 710 bytes docs/html/userhtml27x.png | Bin 1933 -> 1734 bytes docs/html/userhtml28x.png | Bin 2047 -> 1809 bytes docs/html/userhtml29x.png | Bin 2410 -> 2031 bytes docs/html/userhtml2x.png | Bin 1129 -> 1016 bytes docs/html/userhtml30x.png | Bin 2636 -> 2164 bytes docs/html/userhtml31x.png | Bin 1761 -> 1491 bytes docs/html/userhtml32x.png | Bin 1807 -> 1503 bytes docs/html/userhtml3x.png | Bin 1167 -> 1028 bytes docs/html/userhtml4x.png | Bin 1817 -> 1630 bytes docs/html/userhtml5.html | 3 +- docs/html/userhtml5x.png | Bin 1502 -> 1306 bytes docs/html/userhtml6.html | 24 + docs/html/userhtml6x.png | Bin 2543 -> 2191 bytes docs/html/userhtml7.html | 19 +- docs/html/userhtml7x.png | Bin 2141 -> 1863 bytes docs/html/userhtml8x.png | Bin 1192 -> 1070 bytes docs/html/userhtml9x.png | Bin 2020 -> 1761 bytes docs/html/userhtmlli1.html | 606 +++--- docs/html/userhtmlli2.html | 105 +- docs/html/userhtmlse1.html | 18 +- docs/html/userhtmlse10.html | 697 +++++- docs/html/userhtmlse11.html | 452 +++- docs/html/userhtmlse12.html | 921 ++++++++ docs/html/userhtmlse13.html | 299 +++ docs/html/userhtmlse2.html | 616 +++++- docs/html/userhtmlse3.html | 2725 ++++++++++++++++++++++- docs/html/userhtmlse4.html | 3683 ++++++++++++++++++++++++++++++- docs/html/userhtmlse5.html | 2301 +++++++++++++++++++- docs/html/userhtmlse6.html | 4081 ++++++++++++++++++++++++++++++++++- docs/html/userhtmlse7.html | 2183 ++++++++++++++++++- docs/html/userhtmlse8.html | 599 +++-- docs/html/userhtmlse9.html | 723 ++++++- docs/psblas-3.9.pdf | 2560 +++++++++++----------- docs/src/cuda.tex | 153 +- docs/src/ext-intro.tex | 198 +- docs/src/figures/dia.png | Bin 0 -> 50686 bytes docs/src/figures/ell.png | Bin 0 -> 52842 bytes docs/src/figures/hdia.png | Bin 0 -> 59145 bytes docs/src/figures/hll.png | Bin 0 -> 62128 bytes docs/src/figures/mat.png | Bin 0 -> 92468 bytes docs/src/userhtml.tex | 2 - 73 files changed, 20936 insertions(+), 2578 deletions(-) create mode 100644 docs/html/dia.png create mode 100644 docs/html/ell.png create mode 100644 docs/html/hdia.png create mode 100644 docs/html/hll.png create mode 100644 docs/html/mat.png create mode 100644 docs/html/userhtml16.html create mode 100644 docs/html/userhtml18.html create mode 100644 docs/html/userhtml6.html create mode 100644 docs/html/userhtmlse12.html create mode 100644 docs/html/userhtmlse13.html create mode 100644 docs/src/figures/dia.png create mode 100644 docs/src/figures/ell.png create mode 100644 docs/src/figures/hdia.png create mode 100644 docs/src/figures/hll.png create mode 100644 docs/src/figures/mat.png diff --git a/docs/html/cmsy10-42.png b/docs/html/cmsy10-42.png index bd31f1f87b9ae692f8b2d5444b37c7ac60ab0e09..3ebb79e6c4fef55dbb65db92b95ebf27c74726f4 100644 GIT binary patch delta 281 zcmV+!0p|YB0>}c8IDY{8Nkl3>*0c`^@5Clh60lW$!We^O35DbA34uQ}zhytOZ z5D3N4GH^BSo@=G~ldSjOd3VoyrnN>bU<14X$0>INZa@RRfP=M1AP*dXE%1@Ogc4YD zEd$tDV4uP%DoZJ+ZoUJb?QaU14cT8?Yv1{=u>RijcZ`=XUVr9%)FeAq{vP-l;3kK? zr|o&yo?8Rje^-mUMPy!QxBq~^E^7NKMsW?}0)GI#EfNYcw5&tu00000NkvXXu0mjf-9mj$ delta 287 zcmV+)0pR|~0?q=EIDY{ENkl3>*4GqFT423TQ0Sgj_gK0p*G)%%gOaeF{lfWz> z0S(~bcwf^V$6%Lqz4f)Pf0uI3DI}%TO3sq8vW@z`OS0rC8NmJp;V?)#$<6N&LI8#| zCD2$>>K&M*$C`KHwL#uNY?mFn`Xqox2WXQ`py~J_O>` zx-)%J9=)1vE3jmfBhw8c?DX9I1j=}l;Lr!?SsU46D6GQgBXor_ zLc0Rppj`nr2zny%53J`klO{ul-7N z9n&EN{B*D*a4)6&342Dc2YSIWHTW}#*Rhb9nZUElc{ejDlTH1p{sjT>;y0@Y?@BdA zZ+-t={hcOBqFeTR1Lq-Yu^xe@a3O01xO|J@s-WKx`uF4;T-wFJWfjy!Nz_w70{#M!Ie#!oL_t(I5v7yiZNe}Rg&ouZqK?1_LEV6EfW!u{K~_MFzzSsq zSb=UpS0EdN_c=c6V$83e^kUyV--~@`hjAQ(A%w6N&ca<7g`IKbDDEWmKKT*@AuKg+ z1O(1yBEGCk{4xs2xe2{roE98?%;w^b!aa$Fot%@ADabC~9)CgHLx?}jM<@iS1p=Lq zC-J;}{Dl_Pj75n5R>-YlgSbDqcLLwyYRTkK;2$Q~$23gerfDGe0!J9+)0xt!Pma$7 zM0^VT8G(Z^2;Nr7x#l2+@DM?Z9zJGs#p5M2pK+EP1Jf;>wbc?3M#TIV0g=}KTKih@ zw03sk$}UhuT1Qmt0OD9|9}^XMT?){F+#(h^Gl4H9wgQi-%QcY07AaEe&uDYS5ugSf ggSWGRYxw-fo@aGJs~6lKsQ>@~07*qoM6N<$f>GX>^8f$< diff --git a/docs/html/cmsy10-49.png b/docs/html/cmsy10-49.png index cb292c093caedaaf723a39756549b0c0f445011b..37ce5dc48e90200d0bb4f865f412bbe951e7668e 100644 GIT binary patch delta 240 zcmVbM((n9FaPPo%m@WK;0io}4`4rZD1!!I1P(y&szi5J-Md^w7k5&HJ`9d4 zi5wQVY-#?aq-+8vogpbDbs+p0SeCLvefMgY8rT2IIhY(F&3@EBdnZB=d5b5;K)&F<*YS(!Mq0Np058vyeKIlR?|umo}&4Ij|h)hM?H3 q_!p*mU!r3UROUZ~78S+U1?|4&Px)X*=QLNc^Y zBs)P66n3~Ue?lM{*(d(KLI_lV{p_~<$8it%2)7!4)eRkBL6OJA<9Knib_Xp)q`C!lA>9zm>002ovPDHLkV1h2ea4!G= diff --git a/docs/html/dia.png b/docs/html/dia.png new file mode 100644 index 0000000000000000000000000000000000000000..de7db9197139e8b592815942792d983f23f3f93e GIT binary patch literal 50686 zcmc%wWmr`27e0y)As`Y0(v2`mcejYbNW;)wDlqgAl7fmf(t?B{HFOP~3L_x{DBU@9 zcb<)(@A?1Vob%%M>TIsLnAp$W&$HrQYu)SF;aVEci3#ZlK_C#Zs>)Lx5Qu;o1j2D4 zzyrR?%AaEa{$Y9OJb!{U-pvyPe897Q@%$<1=JqeUIX?mT=B}%Xkp~FG_we?G<&!J# z1$@cksch(}>tgTeZRKtUdak9$^8A&Hho^^)`)gNE7C`|%0UjgALSD)hQ5Hy_9BVG@VXoVhNaWXjKdA)HqaFuM=RWG8o%4fb}Z z7&D(?J_MS5rrsj^cbRPSynQSiD#fd+#G#jvHIxim{z5J7SkN^Ml#Iy$ z^8iYk`OUqsc}xDB6iTjTsIPBRj=qVdpG=ac-Iz- zms#{pUfjB}Wyr&6rYau4kGP-mNCHjiTN0{&2ate3mRy99kO(MVR^%cEZ>PeTeK#I+ zwm2U{1p<-00G3{D#lMe{LG$}bd{49+|C5%~6YHB9G`-)fAKI^eQDQ+N>Dm~{YCZ}m z+u3^f1Z;9ADChglw|T;o3$tF#D8mnSbP=)At5_2-i>JHhugYS?VtI8lHV9-xZL=4t z!{R$#x57bQV?k&Vqweq)w4rQb^i$L9$&H54+1yso|G%WQS-F8FmlT@ zc4h%7QA#`=$l8n1iymscaJw=$8R-(-(($hOjl|0NFdwtXN}k4#juqV+?Xx~RjYC}F zTqWK=^WT_X&_Vp77JVFc8T0taB}b+5kYmhiM{1z;GYw$Hj+{YJfF!#PM6+> z{*EiTj64Eo^Jn&JM*W0Q5PsXBAyK%>!J0y zo`ed1&Rj)~7{G@qjFK5Umu}FXEE`=G6eHZu%^+(So?fPJ3_QHziOJvCLh(C8dwCz)0Gj~4FE&s{FE_8*}gp#M6qyUAeuf`H-X@0{POTr)~J?|4j2?AIF9 z2THCX)}s4Avv*DAW94#Shx(szK_IU|*X45?E9v)`j#gPNr(V|)7B^qOe_*ejEup#d zdF?*Aj_LaKvHNwA4qUo{UGPA@nxv{%kEn?!y7`et-BKev`pco)z|u<+ZI9A0XR)$O zXouc$v!aiHb0S=~DP*quAE4Lb&R-?C@wZO04LU9h=X#nH8~@pN%jk52LEf3lIwtED zyungSU&4827YQ53cQjrH6(WW{#GAm|&_+ z{qj2CMY8Cm7SX~RERcdOnWxW~ma400#cW-K73)}aVc9jX6z8I>7Uhr=2fO7BzPAyl zEint2ru!;om&FZ|5ucb(8enHkixVTrl(DW-)%doN@NJt5XTXsiXf92DD>peCGMb8O zm^+`=PU?N3f$b~eiUs`#9e!rA&B~KkJbE;zro8VydPs9ucURSAs)yW^?J-sxE87D0 ziENp)O|~O7(KnVjb%UL142hxXf?4DI+X|cp1yB19E^8SXhVrm_o#JtG5+vZH|gyf2rypf;JOb0^M9yDNg^PTt1qG>V~N zX)N}R?Y(0S+odaZB~i_o6dClT(Dx`y++T3GD}5Wk5A%-ZxZoo!*QXrXy3TX zi3k0qZ*;b-x%k+AE7GgWczaH2U8hrie==`OuCiPABfJR+5C2EdJ3vom-Wifq?`0M@s$4# zyIL^pK?#re))l4MFn6$Jh1N{JN>L9viY(9{NaoDcHNhJMFERy=3*5COQ>6&g%S8EA zoZ{1P0})QOVzDBMq$6)g?`4H`E&p>~yqkzXNeqdUtQpNDeCtVq#tx0}HpqezU7cE~`qV*7TWC8Ygx>D^lZ%Cgv?01a@AZL+O zG1^(11W-^B^jR<42dys_mr+ZV z$VC4$e=}0VJYxSVF@po~RnxN(rZRdWffG8KP6f2rJrD>8=P}|(VOga0%wFs)PINDL zx>`ysqxOzTI=o?#i{`c2OSvV%TE&)p_%qL?N2_eo4M_ysMl1z8<~psz!HQGbvyo{Z zH^!GT<r#p+k&Omly8<+OfWigOQImLvZQdZr4bVA`9mGNmD@#CW0xjJ6PG zLQrN+bF2QB13f8brIyk@^s5OQB7yYcxbc^kdM(~Zl{;V%$cY=gzkw86Wve(w+V{^n zW7|xECpU<-6Bf-=a0k4yF9Ro=XYrjU#RQi?H*v8g;duvGp?5Wybuuxr#DSyFj?AKV zzu=97AJSHI`(MmJpK(tmQYjTm5k_S$;o$M>b3efMUyccyR+t;`Ga2^UoiP6$hdBe_ zv+7qLLoS)AfxT_imY`5}I?*#E6HTXVk>U2%a$Hdd2$XH-Ey~R;ty&!YeO^7_I5gR9 z`z-nK`YsHZ-(?+5`d>%I;j#`lEG3lZ?1%U(z1wUDcUvE1pTRF9szvgjs`zX|J z)_rg(O7+|}fVo$Uj_IUfDRd;%XHY|5u}m8ccNzzG3DgRX;HEDGxI$&!bS>e0k^X6> z=!)qCl)FZ#bAk!a{%enm_xqQV&4`?~2P$%w18fBPo2kVcpuZ*T61H=#Yvx|0M#)Li z52{4sHzxAI6tp&*A4&F`zynO_?08PX0&gCxdyT&&0R;;d-qV*k7O1cgcd;zBG>sui z6xe^!*<8qnED-q6cD+vHXCc{=yTH&X#HHqhSofjoHqjkjx|T<7xpo1QaY?9{R!peB zZmzZXD51F5V%>PhQV2-9JeMn#HkhtsML);m-y`h$uKcHcNIlt3Q<&~)sqpE>L=_lk zLXc>n_rn|W9Oqb=bL;)kh6VPtEg+YqiC#sI+~GQ>@ett*mmS{Q(qm+^`cCtX-Xz=4 zwLrn!p^ zPbi?rf94k56XY_718PhCdxVnDX$r_^Ycm!f`TU~$KycjicYh_1BW!l(5CvM83*CeL zI=J*w1A?yf%uw@giUa>%YkK_GQnts0Fnaa$sI%5GV)F*{@x5gX%H7GM8I%qvuAJ?o z!KUe7_4|LXuG){05&i9AiVZo2?=MAaHLg-i9W|ZlY#+u};EuAO9j&)2RLTG5y&Y6v zYozJqdH@2c1eq!urteM-&g&J@zxZNjBz82sZKF#~U|gLMocW*U7{W+RA}>_-%yn8$ z{!T}-KN&;_BD=+kqLMp3atw1-hqW!;0Rh=|s)kNb4iXD-@RLZ!p${(I!sgZxgdNjS zNNbv5_?FI>=LuJ;&qePF?jEG6`9;}GzP8kJi%x5y27!o_6Q0h=p>))b9eI*Hoy?8r9o4)7U?l=E#<$YaDmy%Sj5T&OS~<@-7F zT~n0bkJcp2^A|?6^EefAb`>+p?_WFJ(XjOhs8_P_GW_p%cfh&V_#LiW4BmfV)`qf7 zE!L&8M}1dOc#NN5Z&eJlMzEC|eMvB^EwKD!9lGzQrk?um)QkLHIXf6t=F$OVa#XF%vT?4Q64;P28^Jx!}Y`>WWNkym@St zTyh-lj~Gv$32kIxfN|uv#k|MXf-pNz==Vi72XhfczX&OYct>U@1rTz7m?8vg$DI51gi>$fuUq;NHr7 zpkybGxpE^4$G4Fx;5p0D*J`4&(n!GW>`Ot+z3oZb7?=t8SYc!Ad04&cdmp z07VX$tw@giuKk(ik~S)lZY^D!dJmz~$bpHwRpl)Bp`vBVZLrxmG#eiDin(v;0~puS z$PUO_L(KD}<@-V$IhSWCvS0~k9f&-!|L_knu$x&rP&df*Rz{xIhug;I19b%R+p|Wz z1V|a#mm8_XpP_d%M!!$mOa-lTNdjUHcOjz$(ysfjK5YC8g@!IhzUZ;QhAZ}4ZblAty$LA-RkP_evytlw4t*3;WgbQ zh$#I!&H!qp2?(sdkENtXG$HFuPY^lkLfF6vPvZQHIeU?bvG_*V!H3mcrJ|79G4nQ| zzLZ|Z?v@Ejey(-G8Myr|!FSM7A#OpY6{!6e6?$eh_nKA)m}-r_COFlCKy4pIX=904 zyOFBdRs~oaHQZA8&O3`SB_~MYnGytx8UkV0tsXi$kYPPg<(2Dqd6nlI9eJ1cBZM-E zxkx7FwYe=WM&z?tyZ-bVY_Qpd_XxH&P&}IU81gLq&NQ9JT?zj;~bN>qfA-yk&E%4 z1M1RVe$D#xVA8g4XQ%S2!+VnVHf&Cl1~7vX6NTBl(-;6`6z#3V5aWEU;_2mU=(~*4 z=p0>TklQRzo9BwTaDT^`*lj*a#d9FT{Cm#+o|0VIcr`!`F$2YmlUCZc*Six+5Xg|$ zp(ePpl!B$yn#7vAXp8Xt)EVD*3P_g$9e5E0G6EjRMs|(FNJa*&Rk_*X+$kKW`Gypt-=&cGLscwi!R;UWbp;iWJA`X?>Qu+y6p_{&-$M6n5^Y-@4Vm1RWgv60-;}=Cdz$@# zd2U-g6bN|0IHaDy6v@+TzArNiKMJ4*dzcG5#&VAm$CEyj`~*-pRu8)IZ>xdbH#t1L z{l`p~dO&y} zM@?MhAwv?taX!}nx(6n-qP#;dgTu67bQ|rGD`n6Za(F6hq>KrT9XMyA=mTaGA9R)@ zm^E-y*iT%_rpRd+7JonUK&Dk@?rV)bWE4V(HjTXfY!|h>>vw(2;$Op zh@Bi;rUL??p~&h^-q%ClNtuxkJ8%NO->OkZb&=L;m*vkXgGUpGMvRO4|185oQOsZo=SD?%&z;OUqB_(`pa6L1z%gjY=HXm zr{2U8xZ+4{`bk!$>TDz^;+FkTIJe3FmKrcU(y|NxoYcJ2&2)hmI9_?Vpp2`H4=Ww5 zc?>2`vt5+yRIqu_>~eshe!)i+dOpaYs;-Y^fzY0;C>t82jn;U=f7ft}Bx-k|5^WyxctNb+4@$2-J^D zAsL0~P>f?%79K}jDI3dpks)P3y&Ww=IdPh<(#T>oY3xmQY4M15;u2k-+bHe4@9S8= z>%W+;_-4W`DDJWI*))wl=V%;c@Xo}XaON_fs5rL*{HaNpO_y2gB(tA0g&ts?2)%H( z(GvcR6|b(Qf(4G2lrqlTSpZM~H6p2U4(-Y@A}M0DcgaVIEv{b!>UIX;^+5ofric5h z9|iP&Y3*th!_oAhgXQrkoP*2suU%bceqsyUQ>BDv{J$B|95$Tz6t4&MA6$y*pI96R zsHq>qrv$SOMCDvkcuz{L?;Khti~!{;Z$Pa@*8tc)+wl0xcN8XLt^GHzKI7Jby?l2v zf$I4N;Q!#67?ZL#<^li!oF;79{Y6yI>lo+Y*bk|AZ!B#$O?R3|1moCvo3Xpb*q_6GnQhF4ipyGuuSfjeXlH zLExD)o!&PQ91#;_?QGlFXgMJE&6Kw{I&PSMV{T{UMa4aIu}gkenG+^XeRSVLI~1}` zs4JacU(-D-y0Ni{g$FRG%@I+*tWmbK+&83qk_PnV?ltNHLPCS+oQLo@5)GmSK8F}- z(NxQ|DZ?^H>V26Yz$Z3;M(Q9;-YsztT1B@bte zFZ!s#i%lqKhra31B-u`>sU-=||33|zBq(Dk75ER>GKehkvg460O(?pc81L!x)iQ*U zct!puw1H9BctacVZ`%ty7rGQn>J>x&1SdllB3~k^rHbeq7noehSHFE)@-{Q&m;eO& zGr|s%dgpd9#?2h{O~~TCd8{;`bs2OSJ$NJk)gzliumGSyuYj7^{qpR)8lK0$wf8+q2)??Hh!dr~MOiG4F zLNr z1ppk^T1AEFYTuT8<-T`^S6GbpFZo`7wi_QM=x~v`J^(mg>)fAn#GC(_NF6{e0kO5@ zUZAa!*UZ87m^wMj2h+1)*=66BzDgd^F|;hLbmg;(Zwr{W$mFT&h^t$-aUvF=G3it0 zt>p2eo!B#=9h7{X0J@A=!g)hR`>At_kmsJdklQ&Z{s`lHk8>57{r*x38ljTS6|3l10Zuw&rZ8 z#3Hw_(7KQ^{HaP8+QnfYk$dOBNTj0oC1Z8@zWjLc^>;BVQBNaGR4dk>N*ib+J#+TQZbt8Pr5 zIlw(zfT}BEw&HD8g4o?QZ+7N^`d@lHUU=pWTz)iJy5gy`dR>JDU$BB|7p-7_En{01 z)il&r8;;p3L()xq!zzekbYlOzb<}c^Ls2?g71USoB`B-04J}mZ|cSL>sw`Of6v*?&P?harMLf ziO7_is9s*L(7{WGL7vH}w@K0zxLt%Qelve2ZBfA&LxxFtrZ<^6^CK*gto%M>fEAL# z;JY8(Z@~(v^0S4>;`mAHF6dr~((a0^WtaG68@|)toUlAH?1w=qIeXGmuHSN!seH9tjS8W zlp!-#oAt#`0Ey$zo|8|dT)BX`YikbM(oI_Pt(WH3PleL|x^>lJjPrD zeuSN6ilOqS*C9$g`fl=to(O}s>!!go_bKQ-b?0N1bBmXMPB-d{l77M>-IkTsVoBru z1>%pZt5WZA);5;*OqPk<<=HuE9PETo0XD(2Vt)ekLmWQK1Ca{O-4v{)?)`MZBRF&-c!UTVL}&9<%1`T-Ym- zBI+dR8|?rcO6Gbphjtgql9QP~Oh0JZalp6Y8?7lhF*iGid@|?nNcKv8a-l8)quVE! zIpO;|SJvz7j5D6PCkgD;(hmX+Kk|Ev@4RD+RzsBfK#PfBdkEM&9hPuiAZ9!OVwg*S+AhUuOG*jq!`F+p9lz8@)h#~yGh{K4m4i&9}dr7F4I*9t>aKY6H_|vo|G9XmiN2u5?*_HA>K54sdTHTzk1u_B+K$ zKWRrR`giGRbLfCb%CR&a*H=Y!HtAjNQyWyl#mM4DpxqTXDIV?RUOUf_b@J+=dP{Aj z7=<*Uhgl>s7E;NXd9Ygc$XM0$IW(LhfLG0d+|u0E=(ujt)INVy5PdL7@O>1otO{`E zK(7dlNWR$kU&GATpp42b?8C=W@5^us9e|n+lt5{-8%F##xTIDl%5opd&RBe}CGc6z zPqEOJR+l;lWD*Q}LMR!aQ?}Oi$>d#hb6I*kTp%(&;rml2XfkE#%ldim#;9P-%Y+?? z89#X6^Y?c|sfi5^nU;*5-TJe%0|P`nT<8%h!1j)7I|Y>PG2N(N(N4pBz`XEGV^Y%b zU|^ubzz6xo`f@1zOJT;JsR`R3HWI*QmzMSyCe8gI8b1vV2u@XQ3DI zWS+~FjJp}l2J&MH$4{TCtxvM{zy3jGG7#{rrXD31T4p1?&5LbP_}4tJFv^HvB1?QM z#sjk@LB}q7xF(D~m_=UYRWx`1rouGAt+koc^tIO9{YF$K-b`3W26w57NKaAgQ`(!< z^E>%E#}0lF!;8!J((rIhFk$%d4ZK#qPU9S=zf3cy#r|(Ii~u(oi}7D})=eAQ_S9wr zsb`f>1>>5FFa+zZ&&n^div&<_rq*JyuJTXOP}kiWE4FqqFTx0vcOxW(c0X`I`VsU& z=ULtplL?GB^59tS5%%XICd>>7&Nw! zCG&jJ*9c0Ix+?iQPu=yX+K?fE4PIHmXTmL{@E+FPg^Z}BzEmh^; zK+r4IP1S2I);pl!`cD9t_TRC&EDjJ~|2qVn<4OPbHn{nJqkzxHZ2!#V_Ls59C7l1A zVafl0bh7+mz`ub}qwe)G5YCLzrcJvFLw_$%6{276^-Tr7-5Bc%pS4w2nz&_m-((%B z$-}fx>%FOIvO#L}r!&`*@uyi>bJ*))D`)B`Hu!RE)(leU%&;*orI|4u#KA1G_~Cg5IFutJ@2s%Vdn!NCSz3zf zeEv*OzEsi^uz452Wu|&;8yd1+C5q`eia7ET*9~Va#9Oi|sWS@+AAU{I@)FVCemD zd~y8iDEX~9VVZgw^o`prs%`Ev^wg?FMt%h)n%~m`Tae6U|IdO~hZ$~X8T@&yCjM!% zT~Jx6%vXrM-dKM8uzQ7J6VhMiR&34x+(WHfBZ)n2GqNC_lJdCF_UMaF{J#~befUqa zJ?2QCEf-x#Q0t254PWlzg=}oN+k~MgAlw_E4?OGXEkX1vt8>INUB>(WZ(x#oS@Rwz zO4vw_{~*}Hh=;0H-Gbg^NM^Wc)4sxxfUSSPy?)Bj{4x5V75Fh_nvqP2%!C?NY@7@0 z3&s5MuN!gth6erlhvBL5W{(Ps+rIX6C5dz9-bm*j25p_r=+6SH79UQY5oVT2^AY$1l+C#I<@=Y+{0GqDk%owbmXvSsZG79+%U&r5tE>%S-;*nSeJHD!-Ew>Dj|mv1tsh>QAh!3$0^V@`2?;%&wg{T}sn4C=COB~f$=CAP8s z@!v2KrRD$cyx{)@wf+Z0L_fOs){G#pj}BsWAVH10qc?)Nhh;$V&Y7EX5?hOAA_a;& zQ1IJ-wjiMN2$hfF%#A#i1fWa&^yN*!{4CIwdI4Dz&OQ91^Tz+i$4_=u@piEShW{MF zo{gNObOBTRzSfk3$Mk#RwnmJm1z|V=KW~%#t!y5$o`Hh(DSJfAa?}pgo(69e9HPqf zJFz`CmQD}3;&Dp3wtHdHv03Q}i+=#d+6J+&dEoKB75u z9Z!6LYXz?XcCBEeJv_%1(re(?VpgGcz{PT9_;|QjszafZaN>Xp;zgbCp!Az*EwmEk z7lAqYL8Xdcws(0Ib#p_fYIOtbjMXG8yC4M&5RvHPLzSg#pgqr)dw2jySSZ(%|L$$T zw0v_rWo;{$)yTXg;^Xp(&~sLO6*&`Pc_Lsokx$--DkX37mBwF1J$-kpfMqOs3QKaf zwK;HZ^{d_eln#$$yWbZ9ColKZ%sIFpSomp=O<3t#Pq5`yOGwH!$Fk#ge*kU+qPUeS z!~{=(#ESA~T_iNV7W=_;l8%bR-LJM01RIC087z^tU33755*YXIV*Di-D&$)#lGPDwT`;g zC!GeaM2F;eu9g)cN6wKB)LCSIxk@w5I0%ceO^a|pSkvwvJXzaT@7IB7YLhLRlVi6r z|Ma-{YA^j=V<$sfb)IARQ3a!NP0j(Z?hU>epwkKp{s9=C0wjpYd`iSn^fTMQB~5!2 z2HXvLFZ3IlUG8XrcjiZ4!la<`0k+^n+UdiG8$a%^UiE!UIvF4kO%ALhr0 zixtNyZa+W!u9y4|@6J1`Gux11+&SCt1NX3O0#p(Q2%(J4r-E~BPL!O)d`5nTY4UuN zK!|7YC7h?|={v-tQAvDLcnOWMbh2b5dz=e#JV}J=qmraezLI!ZZXmgl&V-VO@Ja?7 zMc7ujIA$hgxw$Z?eVe&JP3{Ax?56Z>(HCfI0Jd62Ofdo67DPh>h*a3y-Tz#xs`E*H z5%}TfIl){{v#OvYpaEi{r&zbMFqX%qpDs7do+Xo?SUNfLunFSd6#!XlQRMib%+5-n zk5xqm#bp|~JhDuvTu61+1T?u=uUk4+K4Fb9c4rmvdjD=>!ONrQ`q-hjau^9mFT*yn z3rq`luNHYXE>g$LzwR&k&Of3=3iwC&gy1uK3(Ewe^VM5HsT8-9qYO<$gx@3q`=Iv$ zuR|^>U33EL;Zks;kcV!6!;$}`&^w@fC%loEe+}*zdOC$kV}A!NchD;G$0GR(e!bg* z#(3%OHC{c$m(eGtV{ZHmlB%hOBeB7`AYmpxyKqQc&6zHx{re4Djj&`d+`V`AuM`y=+F1 zE+r)?0$d`m3$X~>oto(amIsHdz4>h+|5INLlye_C85nbYln43&HgLPSxB~1yBXG+< zgCQ$ZIerFBkH8Xg72Iy&Hbv#}S;s7LV*gQ#n>~2_hd)>&RhR~7r+|bhD9$t;rCQKM z-$TDb$$S8WjArRfd91AOm8o+sNzOesceryoEgy|F&_FV=r4>)ob8CFF@-u;IoF(_l zd4ZNYN)lV`nO<<=gu}dOeCM-zb;aU^`Y7CpN_;fjauu*#G@$sP2xtWC&K>>S+MtLF zd0}m`DBJIZk#Y!x1mDAoYm_1DnGL&bGHQ<6jd_3y{B!55dW~8ke@Y1-vX9jk^)&p- z#ZjuYmq3E_jY~TqrW67ly*pj8*6`pM6kUE0X6O2KCgvtko!$r7`RIG&)3`G~fFwuN z@fpGsF=wqQ!5}qdHS?gN)1s6*7Xyyv=x6MWX8P>ZrAW*24t$#LG~P}VJosqQbP!ugqbCY zR3L%{sy9(HUl_8OmVK{V%T#4lICF!9yFmI32RGF#WjFeO?D7_suwPBqMkEPmapsoU zG`YBt-zBi%zFzNGq7z`JM|Mblf1ecd#|G5<2E+l`p3{6-yYmilmL`4j*6P4d zjvo+LBrEbFrLh0c=0e!5tNs{|^C>qLlnf8}2pG0GV6FT%nw{T<=Ym8`APi4%9oMnN zTElVQ9Rf}~%E&ljP8g04?@qi8&isPDI0A&9x-Dudx4hO*ssOXRkhs~e+StBDR0m4t z;6Wn;oipGejHsdYq?5wa1I%fCbW*&yNpf@m5Do*p_!&wXpD5>`2~q)xNb`r}Z7=58ftrv+DScd3~FfT25?pK}7Qld;|&=9oSh+ z4H6|y00jqjQ4v>FSsiuWe9}2bzX%db!+n?bGZf(=EQu4P4qa%!Maq^io3*K1D3SRQ zag~~LYHoj~hJQ}2_;w#sgszi!32=#U;74meYkpl*3XQ9BNT>U-BZ#dNOWFOdw&M4| z{YQVtH|>8<^}Z~AHPa1TX9K_;o6dljaOS$Hqh`s+tr+327NF_K*+%$qK-Rw2_!1P*GIIp0c@)y4c zYAp*?WloC{hl0#jElWi>L-7R&1XKCse^|Vv@w_pvC>iQVhA71Lgj*GrY#ukrKerxF z;f3&7bXJ9Yx&?ut8=s4Y(ul#^wRTp;VsRJil~#In9#ADaUSC?r95C3-o%rh?-plrb zqFd~MM~7|!7d0W|&0FX#k z-Yjxxh3gmms;w$vQ5Os41^`VU;5p4jPx=-)at}G;4@pYx8}YmGIe;V@43}u_nT`^k+Tt3FOjGE| z(>wxHkaw$sv+c-XHlW|hxDiikFKbZ+w8Vn%hWERiDW1}0{FO{|EL_PrCSKI55r4?w8q~H{3$3$U)e@pn} zo(7k1U*~AzG6Famc(^N_A|_*D0i-E?+T(z68HV~7hxPS&smS*JVB4P7uynt)ys{s?lL--FF%obx*Up!MNRqD^86oE z+*6M4WJ|w|_MqG}dd?tb-6@KSDcbWX@5yLBiu)ucr3P@w{$73VdDPY`NUGzR>&4HMogN zzyorrK5<+v2|0kz-z89(?Lp@lp!XbveKK(_zqKDrgJr+^!LMSJuFXe|NTXp^9l)GD z5KNoEmch1PoWo5?`zP+AUv+-ttIMh2Lljme_E<{JG}Hz_0oZWQgsx` z-m#@J1IlyIWrij(teVd*c7Z))bFvmeBSQcdjk#Nq0>06uS-A4y8+n`eTTCWNzB&}+ zI^b2ViTwl}j{jO+%Y|?Ylp3Ryypa*Iv%N@cz=j?5JI^oOjoGtorw3pC{A0Y3RRbVB z97klmdByS*oZBm++uWsfryiX*wm+v{&hk=Kpix**d~@RigdU=9?<7(^fr;?z`vS@v zAw=j*N&&-w6l*c$`!!%v&c!8fTtC8M@^3Zqk#l$|f|I0AdIJ}h5G?TSXDD&r7)h|3 zP+Um`IJ_94i7Q|X;L3CQZP*DTGs!N#O+hs~D+O5PfAU)3Bfc6q!2%6PDMtQo>BW_k zl#^xl{opF;Iw7?MBa)kV{q4}82>0D^5Gm*0+@J460$MhKOjrY%aPHE~QFbe= zKcn8zJK6F8n7sw^vLa%SE~NNtm1Ue@H(i9UAGDj+hV;ov;3b^$z7FK(Aefv~N7>bd zh}#gT-r2YHFW?PR{=$4o!&MD(ofE)GzA8?gtPA-+m9u-uW zumd3VJmoMCaU13@DG86>ym&Gm7pyhny`XsJZMO*nG+6QICsuhYBEVjLYmEq0^&JQV zhPBHbmV@@I9r33xo(+aSYkG{Dezv+2hQ$ z?GdYQg4MOYqQz1fpWbYxb&6J(Sycbc9!WRb~`l?UV> zt=qf15j}5ZyYbuN2ufX!rybs%#EK`8O4I=lL=y&l1ZXOtldNAW+grJrH_+918U@tP z0h3wn&LhRnmuj45C!Q)G12UzI7zZLwql(1jLz}0d?Ae?}L{3V`}>b`b%O1u!-p*pmNNeo+qS^CinIer2Q09Y&|tBevAzd8xf@Chr8hT{vs`y8Cls zSqT@Zl4gVV=E@fGosxnFP*gc9@)`C$%OhqSHHF7%*3zJ^ z#YXm-g~QGNdoO@cQWOFx)6Ai2P3c5)1M4BOu*;T_=I7K|tOdijl%+c~0qFgF_-!h{7tObo zQZhLKFz!T_X+$e&XPzGz2p*TLMn$O3C5yMB%?en>LNQpzv|F8iDG ziWWF={HuHpgq=t}@%q%*cXV34zFB~vX0ZzpTjX7*ErLUZ);Sg?arWaNq= zg#!85UJ;>8QJ_}Iy5DEW^Kbr4_?)BD&lcJt%y$XMesR}^7FJ^cww;MVnxEDnc7Diy z(Eys;qPd5nAty!Mt;~~OR(lUFrH&7__Jy|>g^P!_#()G!eXRuvQ12$A{WCO)2_SI{Bm`74G0}-xi)9pVVuZ0P+}E=c zv_9dw4z#FJXSLf44jN6_+ninRc}*{}V*M6T#oMTHPvT{^^gp~u;=&cMsRGeo&X#p7 zVgfu)b!9TN=Qa!59y8?}zI1eih-$vpG8ZMAF}@R}#LiUgX^N$|k0oFWMSBcKUGFj& zkGTU>4o!+b_vI%LPSo>+V01fD!D)8}n?pbdh=N7%Zv2&q6QIFmN$1q8|%e;2gLQtn&yJc#i-Z zh4_ z%VnH^hctJApgG;(OBKmrd<$&YY!@w;mZpIJBtKez$r&QHta$!FnjDJpG}wC??fKcK z+<~(7g>n;O`xHwE%HQ0eK~V zqJx~7o}R4Fj2MkP+X-b5ayW240L)a(kd6HWiVYd|z9WF^H`|-f!u5Yq_Z~n|HPO20 zAR>xNP?Dk`ppqo%FF8jEk|bv_AX&0x6wpybkc?y%2}2Z+90gI*CJ`3k^+qU4kJY=}kNA!g>>XPzs*|c;!cO(cfcwtp z>$99j)uko1%K#|Dy~2@NB;Av9(I(-7g0lUHlZFjhznm=O$M))<0btdkdr0PDN+g^0 zhXnsaFO3J1n{~+BErMDT#4Nty|J}g+sWw~X0VUt##{m28H?ll) zjiL}!F#_JrT!v-YI4R8G+@qv7UoZ1eY{y(XDzX$Ac=bJ;spwOb_oMs)*zNMM2E0!s z_!A~v_x#X*bop;NrYMTTs^+z8$IbZ!?8=rqRti+3k<6|vce5}n z92emRQ`OeY5^iIDK=qAAr;8%1IyCag8_1C$U_LxE8H-FHdoj=R{vepbT!t&^GGck0 z$s?Di{rM@){jGw3H+(=fGSm9oDoZ@lJwx_Xr3Q6GzAJznN*`iluRor`WgOyTbHB#h zRmo$%J{Z^-cA7#L;%BsvfDXtyhcE-9aYE;< z!Ts??`yFXHuF)!i^*xwN3d}{w!qj^Z=hEq%NQiGfG=JQQ*R57px@Z`QT?a$j-^uK1 zfAdr`&`m}w6klLVK0*wt+Ww2{TBz!wfQe(S6IE4n(p$ILah z$@t*#Fv3)a+W1D!ZLiVePRtb7W>4sD(CN?Eb=MWj+$(8x6%48^__z><- z$wiqJ{soHZV#LqWO5G99v#ku~|9&y)7~heVovMOu>0qW3D+bc2RC9lraF}h`cPb?F zbgj6bHeQl>n6sIs;4LR+gI`$>t9;j1H*up4YCN1eONEJZ{;429ouScs?UQGCWSR5zw@h`iHg^HKO&!H5a-5<_ z;yg00tGb_hUR$Rw1j1!|7S#9H>lmNPg96(P;(ivcp$PGeP&R~x zF%Ue=z4*2#AIUK5ge~Z96c)?wRbR2WXukZ6lin`^(A@6by$slI1a5@z^bHW=1(TN! z91=upFM<&>0RvZ^+k1Z+L7%UrxrmNidZij_cI`JNJMK0!9txm6rj1c*ok(4*;vW)z zk&G*K*A4L*5M*P~E$^wCeDD>!eWpBw`+YMH!q~Xyj8ogCay}lp)fM0f$hr7737@eh zhtr_$jZ?1VNREC2#^iqIY}mdms?M>8uvxAz?}fY@Th33_E^KW?!d*}JAuoto_F?r$ z&C*DI-)i+F_TZNCj2V44@hu$qVu&kru(k54F2fYu{!GA|9Y7U->&cgO}%r=`{895cQ0jP~J~CrFy4#o)x&wYCp9_qRDuX=Ys(I&$gNO8WIU z%qa?g1GLoM#8V<1XWepU-o?OQerr=19r^a*t`OMZ8($Ece`QhH$V?L-=wIXb$|rpD z%bftG!nU6aZyLT=*nR2c7raH&YOD}WJT|rAJr1Y-91BrCXWZPatHUFRL~%!3^NzO< zuboNBfaszejZ)~kunX4C>VoMK95p``j=d7iq4IgLLptuV*-t19Cfr{?(x7SZRN~lk z2l2{)Kiq*(uR@i>&U4>{I-JrTlqo*DsIM1nhkx4A9=!f z$>FA=YM7vbbOrrvsZaZ;dqLLKF3q(@fg>F6D2j8-hpQwy|Jf90lH5B)ALY8C`J9w= zjgSRzz(I&E&~xefL_()YyDWU@P*sfNtGe#Nr)qmD^B%u;3SP<2D{WoslPWunZ%G;e z$KMh=KJEIu!#sJJI#I#P>`)=FI&KUb^YJGm;^ zMz#)Vuq`^M6Z81ljk+|j_`A~H84Nyodi);79Dk?ivEJCoZDiVFP~VVEZ^M)pS>0&d za>r2}e?z(SSN`|WzS-6lK+w721Tpd_SMsc2quWT4NbmzpAnADBR94D9yf`3TX1UBd zYFAqR*?#VQb9{oYh1?k(UVnpN%XL?= zin*lZDa@4L#fuV6<9L-3znaq<6S!C@@nA1p7j1+?)jAi-?VoYfXC!Q?vR0f10LNh_ zNB6nk4Cr)>By3lUp{Jjl#o@0L=IWL!=D1<&EKik;s zk9j~Gulv1PVf8sVs}$O6^j<)-G?(Q`m70Ae<(Ump#%^;?P9j^Y45FuZU0F{UeQws) zNxD-lOG))qu?F?!&R}8BQZqq_vacxzXNg|KpTu0$Q#zW=Pw8uK!&GqepS+I;f{Tyo z8Cm^rcimNad+JN*b)6&aEa+m+p1pI&IC50catxIZ^gr$t=6iN?Qu>oQ`t576PgL9t zdLmFsyM6pJ_Oa&2kCC$9b0^GxeyekhSdY~6{~SXriJ*RIX>)tNe7>aPntJ)U zX~m-utf%$_3iX@9Vz<1g=^xZN(~GN0w6o6vLh`*wlLG0siQFj}6RFL&#@v9mbFm#g zXZQN92L5am`Q1xyDPe)%i^Rq?yKKn3sHz)m@F$%HRbUWxW~vc^@VZ>G=jIgy>3T|* zSlwBq`ll5shvbC(m>URtCe{q^V%gE!@&$%ZEA;pFa!yQ3r;3edovP2BD0japqD1xG z^W}SDB5r?L`Fl^yblBLn*+sw;K!sb4$>!Watk!nVYwFF}YADD@Nxg=iyI;^uZg8aS zfTw#`zaOYay8Trr#iJOTO0!6uuPlR>3k;Si4)zHr6Y${3PSN?ukRt1d^)s<^{kgFM zeW3Ul0kV7WzEtw|MHcFS?>mv5W+ZRSY%zS^kPTol+}AB!Z5@}EeZ;=(NEj(06<8?L z7$775Ir1#^*4XLDYTiz7>Vq2nw+GHT9}LPSt;^~Tbq_nJyX3{<%6oQkQ0V)kdxf9F?~^gl(ze7=&lQR})0BU! z9OtciBt9c$nT)?5i4-<^92f*IH3qu49+*JMB=nu!*WWzKif?$)vpXgLg-P3; z@G{6@(mHc1hHJkGHLYHK#n|Npt+TGH(k)n7Z1up7uk zP`_IKYh`L~I8A?FVR%=IMZu=+Rv7XTPf$2OvBMKNl``%&^5C=Yx}J$rL4I?2*yVr|cwmv8w^QxKOTtXThAym!tovDa^X2cUyoCmKIa)>-gh$(k_4Ig}W z6Zy<;9V|gKN16{?z%0IY)7^C^g5S;{dJl(G^|sk$N5&`1=0@H*M_9rsy*F!AZYG&# z%VkqFgd_5+e>{SMHn`D7dBzp4cI1=IZs=CD_9M0BULjFVxI1a*2!+JAy5EPU5C%lu zBGgnNW5Yu)_`mn4O(1XsJpP(HsvP2ik51yWdjw2HM97~`6-)1!Gm0rrelEU$+UM!s z)+_z=!ks*@WAM0r0*%sAj)B_j&qexIicWHJ(+1yDw(8w*{4V()h48SqpO)^J$Cg(f z^tt_O0RAtl^ZgH!t9F7zxGHcB`>biF8oSNhN}8@`J}FLNW4Kty8LLAqYGAuD{+R5e zzva-yU$JrNZCb?r>hY~wR-2CIXvT4?LA8eY$ws&PAGS`t6g4n!eAaMMbw8ynm}Zj0 z;B=>g&Iem}!Ar##U%b6BS@-O!$DHnKi`SJKx;_&A-v>)xR4Ureu1+T`ZSA3sun;P! zk!hZggV#;{dFav0|4x99a^zntXnvvoe_B4NK*CDG$Ne88Z@c~T5L5lR*8t!BHywYS zc}t4YR(Us+@B(c^#h>VVGtKXQj$DJa+!e(J>xxc8PV>^*W7a?8dpfS&zHG9MWf>K` z>agQZwNi60iK2z%1RY@{A(?toDYFnyXAu_r_qBB)L=xEc8XkoPEx9Y7M02`W8Q33= zo?V|*B|AGXO(oNeQ;^%OYMpvct#7D`J#rgf_6Sb~{RnZ2ib$0=s~xAQ=%~6M>t1<( zhG2{v36<`vO^I46l_+&LQt}V4mOIrI8;s>zug|u1C8lVBkYe>-gf37`ux9AZt`DOF zTuJ7PKie-)CMr~wB?vIkY0LRLDbbMMC(ZO-eet1V@BS^y+Z^HyMGl08^$$NK;ZIe< z<-+In^;T<3rn^`78rM_$+%z-i5Ko*N3?bB@@6Q@3L+Z?N(S`m%U2b=Q*m%y;I^9A! zE80mVjoN=KB5CiIl_@@1u3^H2R}zi(iD}2iF`rFoerRFseowqVCeKwXG&~-YmW(wOeM58Q%o+zR!K1YQSjn@PzLRm&d z<2LqjZt3f6KdS0n!?L)pQLRM4{3xk#GDlGXO4yM^0VC3rZ}#YBEam>0ZM)&zFtKJ+ zFlfciG?#+Rlbw5nm|mL0h0onTq&h0Gw(hnps)9DfEa|1q&v-%d@7H`qSccTu5~mWg z1IA)FQwh>?>=}yL9yRYto@+#xCdyCh@)W7*upg`{s@6H z4*hon#~f>w*;Qt+i=&Ra2CB?Yx6rZbSWMytnW1ZQ_gZ2Vsyteb&`CMWoPnXpv=wiQ z1U1}eSf8BDy?}`}L^fa6zH*uf#l~7Jxv7-=yUx0AZO5siv)o7Pl5>AzH)u)uc`7rt z)MuvmSw~I#!VbUEcsEpen#z6WFhw-$s3-h>Xt9V^jpgvq#UkC#s$9i}9PQ_qu-aMS z9bC1C`931s_ikuc$@5FH-0}q*&bE<7f!Q`M4j1G-)3srU{)DMj49*o6lu}YTq{f=3qhyX&8D23~*&Eu4xoM)N z4-Y$Bxefg_Ba;@~H>?V1sHo0NX{k4KkFw{9R`8o}nQ*nc96uC?a}Kj)&Wt$+cG5l4haFKC6ZLKAQ$& zCSKt^Y9lw&+zgL%+~}J#sJfG@=`eH?r-w|FQ*{&dv@=nH?agqX4rj?JEsfC}#;PI9 z#MpoUrTCSqL!vm?bX}S!q@r3hfu=NCZikL;zaW<767TWrz_itEZpqi9euU(bLqgtjXJ+lZ0~hNfEM6 zkqI!5=Mob?>G~6Oho1jdRwfEG5~iU{c=(hNj@uwQ_M6Ad{jR?JwFqBc51MmDmPAh# zJU>hY-5jfXN4oO+M?eJAPcCIUXM6mE_|qKPvt`J=01bYywuiSPZ<#G>zNqYQBcc4r zmXLmH>-GhubVZz6*pR)up$f@~8U3`=#UAkn&T1Wpbb2sV-QC@~yQ784F`xRRc`N!R z$91Mw2ar`j{1QERRdeQWmM132wBy)RyfCkcvlh8;rn-D&kXw4!T#?m8T3dFd(~$ew z_4{~(V;aY-SgSFkbzi@RgsKL4z#BWQj_u^j!|T6%i%ndX!135$r&mCVP4k zTVh{3W}eS7fQp5Mg%@2YrkbO_J>8XbsYF{?DlYBhX0GKnq84j)Yz1XoZ(|>yfBlc; zo>S-)Ufd3;yFg`0Z=$kz9h_IX`J!)OBh*^B$1Cp!ynfxy8e0^u7DHa8y*_cjU8&C? z{lQ~SC5EMqj|;^D0sv933If$k+B2ta$z@Qea&1 zn;pZ;@0WOEUA**-3dXJ64Y_8v!qToE*SKU-7m$;avuTS4Wk#0TiHmS|M}w!HRKw0A zedpPyP%mDq)V?FE7k!>61(Tf2>8X8O1DuTo#~6P zP;jmXL%o3hsyjb(os^*XhXW~6-rO&&@?*7NUKeCC@DZSQp#J9~`xA4m>VeZmBrl20 z%`%-jlV)$m$McSYGB%#fqAy{wxbAGnqZ5RzV9H&ji;wX$7y&x7=6>^^o{wLD&k)Qz zc6ma@S43j1Acx<|e>+>3S0tJ;~Jh2DrykTm19*SPc@oT^h6Q|r+fco`uoGI*It`$pe|7W=mt&#>2Oo|beOK@{|E z9>Zhxq=TzTaY+~`7?lD3v90~EWw#v|vxQMo`B2)9b z@6B(1S}QWwa_F53uq4(fRjBG+zjAin?YQtq$^H+;rZ6Ci9b866bZ%RtTY9nKcYS5< z#LaeJyDMtfvRAV_k20-wuNQ;Q$YMgt&v}xI8<}z4zc`WRi5MWM=f{|&ith?yqqLWJ zch=)LADnwQP%dEVSw#0cW;H1h=J*4zEursM%?Pf$G2}y_R;kM+XM^>#(AYnmQo{em zgP-vp40n1P_3+L1d?II>GBl{tLW?}2(yo$e#A^H8!y2o%1SNeODYwboL+&Y|NKQ-m z&wlNvKP@qhJ#yupj83FpjE`@YH;Ouw>p~e`wIzsg)%Q!LFY;N9LvS|Fd$56Z-<~r} z=Xi%jw)!K?DfywIuCq{6Z)Dx!@M7j|Kvbpn+bJaLVOP^@@kfki+zHR$iDUP*D+jHn?5CXlqjxG%^!#m3D-jp8e z&0(bS!%Rz~?%LT`vwI#*>sc&{W&j(%s%k1vNL$oifoe51^&b`=@VNQRqCeJQ=iI-uh`G^qVGyZxPzzqIaELs)#qAfAaTS3J2s5Za(X#{-pPSasB z=-S~*f31wfTpDr0RD`IZPulo9dG|$d!}X*mFu6+$3RQ7?`%`+W^=AOyNaK@O%zLmi z^TRV6BYy@NgRP~RR$Il*d0s(!jJ?X`qk(}d7^VC#xYUx$n_MFoQ`k-7Cr?{mesM;r z_}C0$0jME1%DPk`BtDoS7^R1eDOut4uGO#xmnss1 zk-HfgN=O>CmfK_EZlZmAObL6HeyhfBxw|zrmRcl~Zb}R6A5*!pM^gAa;wL$oDz6fe zLlgf~aF)oFf;)RV%hUv?TAp6D<+(Z^7(sq{$!2s5Mcp>Jh57dME3oSDuoDqX+~wY4 zm++YPusn7=lXk83eBX8$>C|SUeI&9uyG};dCQW@G@BM~oxXFSu*;zsYUICtbU1+ef zrt+j7Tl^Vo#C+$i%SeCbNc;IzriCQsB|eZsEm7orv0)BaCjHQ`OiE<=q>dpW09fC& zgpSI3Ql&oQR$uYx&M#L^RM-kxWkI9{8hTZWhJ+oHd*Nq8Tp{>|-9S}>JyeEe)Ao=B})9#IA9j2M%DeHxzE%aBx6?LpOdS-19zi+7QH zMS~m`^QK4qJ(GCrcb8$NiL0N=zTUVI&)XZZ+nL2Aw|4Y0H}yZX{$gH}T*ZdmAiN3K z0wj1G7i-bUM?_#h1`C$*)3ff#os!D7MhLu%JI40-UfNF?S47kcvu3LESylUwpw>Pn zRPlM}t_PgpCV&OXhz7JA8RJ3Vg7nJWtTuUto1BA*;y)y>k1o(1j_~hLu`|aPQN#*e z(;V<`=ID32-CKxUz)leZNBGv3&+Xfe5AQTlE4umkVCyRjAtkM=xB= ztnYD|{1{CL1TH}W+I2r*MQ_iq@KTpvU!tz5>rr~o8R`f4ExxRg_oBi*Fy6;m>#B$Q z13_RaSgh>h8UEomC!C-)q2}Bt1nGWV;w;)jVEUV)A$Tb zY2rz+@TZqh{!Wlmk2n>oKVMHhdI5B~l%>&UEADhc__E%TD?NPVo$EsZm< zerlmkvn|b;vVJ~&g{_d$hh_~tVUC)prlaz(yKfH}W#gfzBz$4odnt2eKAaIdqYTv9-YiY7?S@bnmEL1Mi;w9m9JgyQ z3r2@#=`&vn*?O1IS@2rfS(N#>wr<`O9%Ox|i=nxeaVNUZW}HBe|lgPgB7~g32Z7zco zF4v&$nef2$6}sdw#qTaKPZ0FEiX-u;uK{yfD^Z&E$qBX4lUX;??-W}QJ(bB?bh##) z&hQB(^Co6!?uV&LEW$da9C~^ujqGcbz~pdqQnZR&bN2Afy$%&{JngWVUgESVtWiwW zNuJEa&%&|}t!0OWCsCwfo@RZXbhLw)gR;KdqS^ zhDy#-2^RMt-X!A)=&@4;Ee5A1tY~ey9C0c4P~+`tT#a-Y@bjyVHT4J!`qKAN)(OiZD?mHRd;edA>oM+8!| zzfJBjV)60yACPtHVg^j>LX<%b4HeF3NI*~f=xx1Y>&fr=lr2l!PPE!1 zsKN)h;-W$RNk!2EJBGPcqoen2McZJyJjit^@ps*ex(K2{uq|g zPa*O`jNo|EG#yOU{r0`=4zIm7`3o3xOIw6#2wcuZO zN}f9{Q~KI>u6IILa2;EKYF6 z3cqqA+HpGxGW~XJ+7HiF4M%A_3%`-BmX}U@)SX;EXZ!*-=wLNW)=(LHBuWDn4!-hD z-{inIjJ`9he5v<}ld+Jy1Omta7^Cj}a!0$rkAHZv$UgaZJ~D>T5r~Zi0`n`@wv_a5 zvJJW^mmEJY_56L?eu-9pWTM-ZJ%O-S9jm#~yvh%W$8R{N8-a5z*1PxkR=Qdf!!3s| zfIwtR`qA3V*%TBMINIJ(t}Bis-fh)a>asmzJ&5eFlrZoHylIVdL1_IO6ONXq1*ALD zIRNH=Y`mZGv}mg);fZo*#gn6?ndoKCXg3n_B3+bF3QWY|I>s=E7J`KK`{=%J8J?kCJTG`HH!8Lo%{CEqDY-UF=oA9T1}ESon$icR*M>+ zY-5^6t+!lj`h|K`PsVvrufFJe9O6v6ZZb*&sVH&>z3Z4DYN=`K#{99LnVpVcj?w5+ zm)l$drnUgG;LvD7Qq^x%#uPIdSTJYcw?NbH*mfLAL{KN#Mu>k)Y0a$Xn$BfXdkxKe zqDhawy_qICb-Z^j)3ez@v7si}W9A?PeK7dpbnS`~b`sGa%bJI^U`16E;?SU7Iy4BK%aY)GS&kT6O2{{_#3+iNayIA>9V)Mu}kG^$k+Ci%r=dflS#d{ul zT<4t?2%lv`7#_ZcjxPsJ*5C3wFOPi5brV*!veNr46-78~R0Ix)j|GU2Nr1T5aw+S7 z=K3-IwB9!6nn@1sn}znMmplj$y;NFAe*es$m8=4f~qgzJcd!7JB(tcX#Q<_me}De5Q^nE{hjj6&0=E`#nWi z5&Ey+pHXNnn=i&Lt(2(!X4D?QV+X<>NA{BRUjSl;v`Nu; zM~S4L3JtfplwO8}nDh<}4K0at8oa)K;}RyQx3@Qnl@ZBCrEViTf-U2pi^~%5S=c^oKnD){tEPeU7|c0LRCt8cn|Cn5Wnj5(n47Y*yTbGtPv`e!p5XU`U{Td{v#cN9>DjfX^m;-Db{?R@Tgxs$^W~6lZXp|(0UU7( z!s+%FUl8|cNcNp_9tNzc1*~_AqBkI7zQQH(YPfQ}V-bvBK=KcMH4s?Pxj#vlNk_Ko zvpRuAeY<9C7?~uJ+P@gNnFzrQ*UbBY%3Drw$*=!t5ReCT&tJ;Tv zK~cphFSVzc<=wThAsFSjzBqaY1}~4j<|y9Am|WxM6n?uS81M3!;4zTAZuB2i?Z~N}!)nFySk@+Dq+Wr;vn}@3#XD%KR>TCrX+)oC7r1`qbQt ztq(T{i{BeyU|^u)$jI-{yp?`ub^h*8wH^!jQlriv#c4WtL;4m@421J%Mp#fJb^dU2 zf5vyI|JN_{`V`i}&Szo50*5JgnbE4;YwxGxh~zt9!>x>jN)qJpUVcY0Iu`2JHz%8Z zEAMw1y0i#Pm78z0##uIx~Qb3{K4~oNaR4w^rdQrQD#wq*Yo3dX zY1Dg?8aj*7D=UhQ`^7jUg!W|)^n!>ANc1<7_&P*CA7(r0`TLImhz}=6vFZsa(hJad zE5TS_s?cO_U)#EXY>Zysmx^Ie!Tr#K9o{j9;a$&~lJ@tFGf8#FpkM?}T{>u9m1$i- z;?^Kma}VxsyLQ|yHMf6^DU!}^c%9=h zDW_iYog3V2Ihf-e+c@~W;aRm{NOqw$VSXAiuS+CXm(u+2#{fw{mOQ)wf$tliIeCRmGsElwB#H31%7mM4*=MO+CF5#sKAgwqw}rBbH}6@FwxEtd|gkVW-y zhy#ABH-hOrN@_)m5@%{Ej{$sgvzJXW7`l#xQ*Z~_k4I}4Buw_+>qX_*P_4XHCIn7h z7hwo%y)#3(H@6czErItCD!`NF%`UhPy^{1gP#+s~sOq2Nc8{7OK`GoMGaqWfJXi-= zyth-PzW5TTOEYhLFr0;?ja3)2H$y^OuaTYL1FuCdlWA;ce`W}wae!V5TH6zU>EMl< zbKLOTTpH_0Q}N)%LSDR~?yC)eAFtilsi(Y5;=bC=)}&Ropn>Ta@u~N>I38pjLd=&` z5UlsZ@_x*TCD;`MTU%S|Ygw|}%}d^w_H9?x$G{FuRCz-dI3&EKNrXyBD54ss5kr=@ z>sfLms4v~Me4+*Iv1Wc6s-<)dZe)RJ0EdyiY}eo(mFZKCzv#Qti{)}Y%QH9Ig<0V} z=&E@Mx-wZM7Nbl_j5?86M0DW$sjGf+GMK=Y;catD>&`z$$ezwF_0`GD&dfIi_eSXT zqFpWaUY?}ysz=`Gdr?j^9ch~}2eW=5!v_JwEnWszrYLj$K^2A-UgWG)n^&Q{x#MyRxlJh8h zIEJ#_Yu4?J%U09oCwjGM4xv)4%Vi?Lq@3{4!DPdtReCH$hH*e!DPiDP(p*R`3LSSs*i3|nUzQ_2} zY+YwDTfx3}RVfwCG-3osL8*FMt|6bIac1*MZ91}h1DKE>m~H8vo@3C`-vUM^NsBNo z?43)NJkxgCQFLgp+b_a>Yw=s?Vbt%gwD03dzwd^&ZM&UM_O6Q@T3M2ki z>N%jr*B1;P4EFV%?UkrCAgnTvO%)Ub)FlZIK~($o)6Zs4&^^=dx`dcMMGnTa(DLZD zsmKG0iUDbb*mz`jy?X2afnXzZqCTlv7xKc!lNo_dPw0D!Op)~=uwO~Jw|3PU7S^MN z_muG++5l2UAK0V(wWi;U<%pk__?6R?pcll2tO-I|&Gr`7mKPV_yDc)tRnti$akLBG z@Dx60T^81P-~4@!cuNi*(j<5^P67DL4DO$^f^Sb%c!))yirzIRL9RF3Pj zU5L%@ZK3@@IyE)@IYPAjUFcL^l%40rC_5iBVwi8)fGRNhpT5I)7`8fpXs1_ulglXVwoWUWg3 z#qgN%cpgPPocC;yvwxYv*EQe>r^OeX=Ja z7Mt8KtU{Llu`BghEaJVWZFf;iXG;BS{Para~<`eQUOM9S;)g zypcnUQnqLF4l4;OIYm|SYV>k{zkYj|=)bi9{Uu%lch&tUi7ahZKTdXyTy@?{O<fi5Rusxazz4`M^qj)BMhuhnVMX#&>>sIe<{U9%tJ1;D1NtKXy;T z%mBDSfQ_IE<;9tsBriilrc4;){1JLe%3vb550&;)=jrBWCO`ew8=P;OzSTuVFdc)J zV?Zl$fgUC6;%g>72%*!0P<<%1Fdgm;^7JxOO;hZ+TRLKCA{B-{cnnR7)LfMi&|M?Y zuc3udnO>Iy~n|c(?CV2{( zAs{&^hUK)nlAFO4z^8tjxbiFSlPLG`PL$Nld*SA!ZlZe9t;<-qAc|;UoR#-0ki#LQ zgxFfE+pgHrSyIVU5?{|UF=35j7ezFWqBK~%WWpmX*>HWwUluVgu(uK}dMMXh(`sXr zf@VS`_pQ7^z+i@($JT{79^5{QVlT|aYh*FaUxzgqx)(*jl9Xj20!Xh|e=V4ui~rv1tpg_ zNpSa%ta3lH`7cA(P$@vnYBky__tUy~{@;<18W1aN{o3Bn#u(}F!EMKP9ItVCiP*W& zdAV~a4H6UQ=;KrEbl1Yeg+ZbJr+`SaOqpR8uXSsQ&k;aXXY0Z`=RxJ-ndhs$!c8>S3z&} z)&pRZS&jC7|Ha?u@97YNF(jno72`{lDR^7+}ykR(GJ9--Y@E5wr3C=a2uzvava z1yAwPTnj%G5*pgH)#HR=bn)>)MrBWexTfD*WbmMaqWa+T=H^&3t>D{32zm)1c5e&p zhDsN_0>`N%b6=co2bh(-{E6@7kM`gGsiLKUT$QF9vt7ruKPGuD4u3pdE%8;RO5Rk} z#YYflz{XC3xE_yNQeE~3NVo~5XPgSKz_ z)@^|0NO{!@+?rR^`RYN)o^cS{WQ0>+t%}NVaz!YQvDNs-lk>5NuoK5`dDOj_K$jY{ zW*{pm#15&^shNxVdR9XY2P-9nb9!Pnwo<@@2axWtjgCtA3*WVTN~z7cAsc{z$M8<- z9?f_UI*oK&m1Q`nER7IhD1`8Sssh~ry6{EM!JAuRcjZ}Q$n4Q70gv1snVq*B z?RVJX9vtU|`P1E35?knaH?khkQ1K@}K?)*K+JNPr$kcwb;VI{0*Xp-9MlpsFtfoZ* zB8wyLQh7I%=6Vj?g;}bzN%>HG)*WAV7b<&rl4|H3RIAQFk%^kGIVqpxeU|FYNjl_! zd+_EdGytC+wc~~8Sjqsx{-%X&m2@cVF>Wjkhaz%%uTS^;BzLg~x4m~lYt93J zCHA8LT(k&}S>-u1Z(Q&mZJHkI=!dJ7C{#_jz&yajT6r6Y-ufR_$xuzPQT7lv*_*bA*<% zuLkIC4oIAXUM!ptpv6P`W()yaG);S3?<6-MP=o9wy#%DtZ;^ryxhnwul@$qzcg0x!!PK58+uWC?FK=~4Prs(kFsoB0JAdWeD8Q3^ z9j5n^=rZMHA)XtdO9n|bJ zLeLsT4>ej+>Y1nNEV5qYpO!&W--s14pBYN~sdoF)o-;qUH!(^h%mkV&cmRNTTFsmW_}KU7VqY^_?@OY2)^Y2)O!KfjePB84djCv z7ec63x{-0mK`d{r>*kld$s=vOz-bGH8 z31??&23!|HaKSB+J9?i=N;bT(4m?+`B>akHuYr7%E9pEdmOxi+p@qMm8#!9drfX?w z8AG~z4p}V}@tN&qX}MN~h5`QJu_c9usyo%Bmp%=DknzO2tKH?({(=x+tfLtD&;Xe~ zL=O``X&mdhx(&GFWz4jURO|r7dY2kmky^Bn?WN~1>ag~=7kbytFJUHt5$a-PMB>>0 z%zJSfIMNgAek@Poj3G1+T7lbWe;>r){r&HMx$}mUaKaE&<-V~LXBilF+92y$n#U#i zl~+~mIgpEA&6+l?p#utG-xjhcvvaG?yiBWONKgNX3sZTvRSGPY&u}53(au)phrMcE zST;$#I5O(83b1(kXtXq4z;=?`b2L^G5>E~IaK^FR}S$XE9a6i5LL_sK5ag> z>hovk{g;Wp>%r%M!$m`x%TuSMxYnr`MI)8eRKf7svMEI^>uSJ&kmnb$*werYIL!Cy zCyBuALGHab2xmoAKu@KjRuf}JG)&h5jaJZqZ;{vXgQ;WjEi3TmW|{2xvZ@rPwvhlW zFjk*Es=mgX_O^H5O%32-A&RpLEOP{M}PBW(Z7 z6mp%d4UNnE=P7`@!pQ>hdS)A(zW&DUq`i-lRN~tp^qRvzc4(;eHv2R2hx8!V6Ob#><&AgN z@D2h_1K^_X^$D*W-)sOH%PTqjt|{~_^fumbS2aPH0m@K&p507yJ0aL%wGku>7I(#m zsP+Imw0F7qwY}F{(^FsR1NeXCa*AAdt6u7LDBVOh|Jm>IshyD8Yo&O~W^rP!QQw*p zvA|VI0Za81!5@N$STd?wNP_CYMcW_I;vV^|v z{Y4XL3=?w1%9k# z*e_VUN%0OBAB6}E^7&op3YMa#apXsnrrCy-+%7jqq_fI4UVe7Y`rpN*fpnSJ1~Zoz z{+v{HY^`E&v4mwQee9hDZEW_~@8#bgUB#ZG%_8jGoj(3myi#sx;s{L~SVyOdn%aX~ zNS~3Oo_kQE>X*K^GBs9|o_%o)f2)Qq_x{i7O&9OG{|?1nD9?+c4=wImkIHu3MnvA< zWmT(Ape`)s5)u`?*qNkGf${+g+ERM z+z?G9wb?=R%Xc*?oih#lO;yt_B8^uT-$-!}O@KRRUG3!w7Z=6l-9 zwbg&r=K@6&kJR#ySxnxvzfo)Su~f z!3IBL=b7d$L;4Qx*}3{mmp&IUGBWYAHf!MhHuq;XwXuNyl3XO&FgudxE|RG1@+o~Q z|G+CDF_GNVREeyOt1;WU{Nm1NPu<-0K{|(pze~6RpotUq6kX*<>dVmG~gO=C+E7 z(mZWyT8sAO# z{da>bV1taGz9OfTxZ$2Kv}M-yj_&D;smo07TmEBd$H6E}tiC@GgdKXCcoO?!cfI zLEDp0n5uB(cBXf3U44}4)WnmWlS@IuIUoE(tNu(>|6k`8aT!MErLclzGaz_Qj z8!p2hQnuNj!tP8K;^)P~T#Bq!e!cN}dBya2)9Z(+W;y@%i&N-Ca6`rR;x4r>QJ$yg zWnOiCqlCeIS88AETYu>wBCR){Xs^Eu@JvqH7(Vo=(rxN0+jr_V`p;1gLy1aT`E&D1 zvf-|)GF@4c3r9X!*`Nbn?IvY({Sko#3^8)MuO;_%a>L)x+1^Sn^u>DF@62vL|0MEl z@rDBuQ1xZ4A69y~Jl*AKFC6V4E}qw9y;B>NVg0<|s* zuf5u*$_+n!mW7i`r@wN7J-wPK)^pyNrCI?nOTUyR z&ru}>iZBH&@49-3^L|{xpZeM>Y;)mSTKj`EU$&m9NgFa}UQ*qqPOc#HCC$8ctr$<; zhJ zn3B02)v zZ9f>#^sPB`TogS{d_8sp?b*#G%yt-7Gxyo7b>ERu=C1c9v7?9Y?oX_E(r;ryFsZD> zHOyUCs+U;S>jR4_D`jqmU*E1P6)8v65E*me8UGbuBF^z-<;!&|Mi*rQLL5^3M? zO6$Rw`343m$Ab_x?eydQR&EK5CpLP0oaxi1>Gd%D>*7xeIK}bQuLHs-t`<4V*7&{S2kj!jCKDi z)#7RKwEqwlDRhIwn7-EQ-`kXPhhfkd4PlAi7&K1pOR(lOD;v;6_a|~vt97beX==Hr z^Tu344PFfr+GF<+W+XXGhtg0{Yn*N1NgsUfB9_d#8aWPO^qI*-FY4Oq35JDpKF*%}k#%g$`#djCTrIsxii%YFe zeW$a!NSgjgT098RDoEj6EA>_tJNrg&6fb}U4Y)u&3qJ{JRWbRsBhz22p{3l}zMBTe zm#a+ePl-aji`H?mXubU>T8@FI1mFp@nzE(+M`D)e_v&BK(d}7Ion)1&IgsI^GMAzK zpAi?pZg^`x!Eb9(k~Z`%!m{sD{|p-iApIi<+yt{-F|iX9E3+z z5HugePA!+riMietnI<);f<0OOT<$cE52y?|a4}E^nB>TMBd2z??m$Vpl`|E1dr+@6Z z>*;T%7Sl5hL;}QSj>->|tLiOy9OWIb(ns)B*JQ$PK1QgyFSg#1aB;NSriJKDbi znBfJ;sQm8?t@&RuBNtc+^Y*AzoxeSw9{lH4eYlki)+C~)Um3p5psmrrp%Id^hwYQX zCvCe2Ml7`cMEI(Z_n+bC9*JRRhj8<9=&iZ1J6q^sDZgc{577;Ju^x3J?!>$jf>aUN zr_*vntEYkLDhy58Lx@{6`p(jee$XQ>L_GYbNgx&rce2)}P9QKkktr|JkwUHJl(+O1#zhY{VIcPAvZoi>`hIq-mX(~=^Op+ec?Ym+RrJ^2j88mMP#CZEASGt4a`$_Y@VP}$ z2Ou*tl0k4Op`GN#5QgqGN=Zu2mZ8Hhm2{U)(oYlZ>4-1)JZmz)NuaNelhwNj-lR6T zc0jL^e!`teD-K(tbit2xZbXgd)>wDrGNbdUhR+cUPj}Ch3-$fj*6WAO`=xC=rar10 z%u^LQrWTtIVkRe`s;j}ooDCk>Kn8{OkMOaCz1tUlH?+0>6dqXs6LlMsHrzL`vi|~u zA^x}FYj2@P*iXS8ZoaoU*Ti|(3j0UWV@1x_3?ic~?jPwX>90jPuf>lb!ii^~-XOkr zXb|3?2b;Y^(+M3ve-_>=e7xJbpp&2(KTEfH@SrsQ8`L?YP-({8mCPp6#R;UqNtb&P z2YB2GJ-v>Yo%K^h*ezl0#}e;7=WP9EQ-BS^8Ai%#r<*p6hJX6^`3>Ree>=I6Zh&Np*3U6O zx&(EG$S>sjpZxLbUVww%-hAjxyRQ6BxvF{SdvkdDLjhu0Fb-Lq6^b~|*dUICcR+o% z2hT_Mu;{zz!JgYn?*&(Ea6wZb9nyt}GD->Ei>C!kCPOG^o5t8A%OI0ceI21bgZ?9jrR|LaF~c8W857( zddKeVg~OM_=;qM4Z5;Dej#U3hV7dcmdU`KaZX>U3I-zulp*wzvuNBkoR90lAj7CXn zcpV~b{yX<$aT%Q2)am!KT0N-c=Oxx}1^dr^o=w4DTq`}l7I#}l;5f^{ha661qV_Zm zmRJ`LCG|CG)dTJ$HF>QbMPZ^em^q(xmBuBz`WFRMIa6sax*}WgUa> zGi#2$pI7foFFTxZ%5Jw(&1x+srH_$=uw3kT?W%nVf+Y}uX6F$7wo&AfwvkQhD7y*- zdb{dbn%`N}IaLEI-Ds>!QuFfeZgVM_I4Jl}Znaj)YYHej?{xwFmq1>8>s+ z(rMH+*p%muBdSgi2`8QG%$;P{VH4Z=52VekBXKiW%!8WlqUFM7>uUIQh5I`gS{qmt zh4p0t9zQ{AwP>7jlCk%6I0OkaUeD>U(_zyWOU2B9xj=kX-4$0NrXYoFcX}FFwyW~s z(Wjm{2hPc`MLX%7mnx$)l9%VuF%5XiT31{V1X)t!JrFiDhI-H!z<7TJGT3fZ`A-mR zi|JLnHH#K28en07gAFVVEop@9%$Zo7!^#BVeg-d>p#5J$bo*X@sB~B6=CxORTSpVr zoCyicI`RpJ&@%0P0y1$r0BxL;U0Hg_TZ21NSun^1%Esa>7%Hv6R;6yo z!8*RD_Y<@m(MY>GY33W9$_UTQO)>>AO^H{17dqNxWmopmR|XSuvHvJo7guV$ zXBLV3QI3XT^IWH33OY}%_9!F-h4i;}j2C+_oY)ONL6ru}oTsp|rPGZBSE8*J#(6j&0xCY1gdGG+ni)A#D?TdY$)_hErk6*bA^<0G z>{;Uvso`6n5wLWM(wjEh(@UKTes@+5W+zt|nj3i&iUu>&ItAZM18lf4eTSBH)rxZ* z^DzeCSGt?sHf%uX*gSWESc_QCm=i16ohKFMS4IB?KvKQ*#8zyJ`$aXTY3TQ9rm?-$ z=G`*;oK~0JTEtEYv(NY^OEF&I2Z*|^Xms2(&mtz_tqA1`7%4!k3KRW?HRQVpMEdHi z1AS?3CL?K0Pnu2rPQVcM)*(7NRDBmwwd$LbH`pGcx6~pE1tbm#=0tyjZ}58C;HGr< zs9%u(&`kTZ_SvyD{d=3^7+u{>|Qg znykMsCVvJSipXh}YiMsD%c$Gw7lK{3_=idWkv^x*9x4#4-$1N8E?#b4#B%fPK|K@j zMw+EPp?&YAj$AW2vuPU<+@C`9>UuspuDOIttPHKBc2-OrN9(+osn*Pd=3u-joJS3L z2O?!$%WBcx0YBRg*4@`Vc%8EL=y#$JX|sgj1=e+ETK*Ikay`8t zQ8b8h+rf#v|R&*vQq!!3P?(&rS-qS8PKK?*#OX^O3^gWt`E-Ss1Hp9}TC zS~UW)eU%6Q(n1+^^sG0-pv&E0BIy~9N6`W{dGQBqGmy0qUA8COzKuBv zopn*rtT9Kc$85U7U+47vjR)&xirLzZv*2DOH3Lt;$w+&G=3B-i~0^M;jLPVCL zxjiw~mDpB+6XZF_$@XT4^D>!7Pr=DaWN^fE_#%VtL^T6ClXweWE%lc_3FIpzXQ|Kt z=5Nryd~ZcfPsbXSI^IpDsRj)2yxw+YYaIc5)X08zm`@mY?K&3Ib-8oIIo6Yr6r8>S zAYQAuo>Pv=?<1ikN;0YEAtetAt9uthz1L+IQ5jZe~;Ook_a!xm714O5J+T_rR_n zU3Qs-W)RhCAgcbK!N%Hqz4W}H4?={Y9-b9*`NQ0M^AZJhM>ZZ%fixu}XlGpGCsmGk zG)G=i?mE>hNTb;mal@_@(Sr<`|i^Xra~Mlkz*N9wr= zAsqUmT5`J-mxXSxd};A+AM-g3iECm#1l;<2|po_Q5tmm}+mp`3N1^UxImq z${byd3==&LmobOpuDb|7%`Y0qYc_YnAvSaOs}(a7Tz+q1A-MO(ze8r(A=DsT)Z?i8XwT*L zi4QKTk46_V>Z=&Aly@e8>$*EHZfyzoG0CS9g)rCN5X2Qw|GCh4i%~YFhdL=@@oYAJ1Fxd8N$+x23F{kgU0b#TbS#7LK z<_<45nZCSKf+R#J7O@z+n+-n9?Pttm-fGD)UJ%6!T^V+LgL*VO>}W46_?R^I?aCJY zeFPCtyPa{(qW2=d6taoDgHSEMFd|&BUhazTy2{F1fR#Rxn=k&QgkUb_FZe>|ZpLrZ znjp%BPt@{xf2}x@E{eb20B3Gu#&xbJE!<9Z|K@djyoNZcu4k+vg4NC|dJ?hrH+fDd zp>sA}+r&Y_XC|rTtdD0WjK|@ikYap%9A{o==%wv<0l_!Cxl$no^peQ1yr9WZcdJI$ zxPVuSc>qz+)Q>WD6*1~{ll2f#C?9NLR}>I74b&>wl{edRi(wY%HqXAUq#6%YwF?t9LG=}6od zqedGwxV}vU>F?0LMGweF-J{v<22Gk#?hTgXJYF7QXqF*|n~jnO*EC$C4Zbw0A&+?5 z&x*F9I76=(rC(N6yzMwET%>QJBAWQX($bPylpt!!FXhrNXJ|x>^=2Sy zx}$|H`T4Ij)iH1O9X(&2ox&%yt7SiHY|0j)(J&ClUDuKaZy4B}cc%Ud_7QVN(VL`4 zTm9bNj*wgi%(did7@Or?OFt8Jb=dawKU;WSEd@aT{VI~Y1JkC~7s}@Q zp8&W3-4-@y|C zd{)0r7+&RK%>=OeP{m8$W3ktM@Xr4x*bb@toEveHQOfO;)STYNGpy z@dEswzad2{2^d^#-ocq9E+E%ptZH8t&cv}tGzW4813|lWfByjt>}|i&(T1gkn~*g$ z3owPhAB{zj{*0)%yW~y|pKaIlTe7eisOCSTbj||9I0zP%7K)Ag3tU-c9=I8kZi&#jIVD?)pZ zJTFV9ZuhT#cc)$=rlF-RiI@BLMK#fL4ue_M9+^~P%Z4)VA#ok|MU@u7RN)mWfGHbL zu$+D*F21lsA{++5OK2-x^7{sv)04=~>UY0>?Bs0b;+p0HGUSZ7c+8^Ar2fo?Cx!Dv zP@F&@(aifF8P&8rkpeK-r23uJq&b^;Au(j-KuZk{RUcD_|FsC2bOAG&2C3YK1kX^h zg=DjeK{&1wt~4btVRjS}8r)SLapd|uZ{4*lt9OGSMb!PWOT%o_34sLD{QRXOQ?oG#HZnKgOf=`F z#1)SC`#sCCPV?v``IrXFQx{L{@22bTOx&R@f8E&hZ8t#yFxGXNLL(@h(YqU3;|usE z)Y%YyHvsAj3stm@^4bHQBV2@DI%z~o{W)FAe>>%VD+|^Za}@AuHrT>~;(s`+4!1E= zBK=KTo;CmwtFb~*;P}dDvd;wx*P3E^s*Cf+0TAag%ZKc zCT#fhkeDUWY{7sqS-r#(kTYT2D+eGAi{w?<7pG6CK%gcm^Wn?VuNp zD&$mWcNjgiG%7pP-1EftX(?;k-u|<}Jea%vf0rg>T=p?kQO%&FIBD%9Z(4ibvF&Y(BT?jFQum zc1Z)81aF`DG{~dM&GHSFJxoqB%^mzs?;Y*RbJ%VoNb@VQa6k5fDUID;b7!)SkTU0G zm9!bFUMZc~pM1&Zc(20ipk0;AhduLW-C7Py3^OL;4GjkPa0Ww9$GL?qy5+U68sH8! zS;$L{Bria--4@(SqG2K!n}SA{t$md$*p!+Lk*ZqAW^rqg?Aau{bKcB?m)6-+FXU8af)BeXJPxGcZ%5 zN54-S5UHXFTintnqhNU<-9tycY3-4Lm`v!da$o_2$%ipKn*N%%ck%*t#pgClvoIsS ze0n#ai};u?KkN}X;qu{8BBj>C($ZzLho^$TkmzaOvD5rp7MATvy7Pec1u_gN_8CXz z(n}uZU=A>O5N%IR&UeZ~#aQUX1cm&Veb3)3AHuwjIE8!yb1Q*ID0N;$d%#@!FqBTJ zmkDKUwQE)X(@B%M{3jHtwIs$=4hRHxpci460MfAy7E+FnA)ZfgQWAy2s24W)gK?xt z9PXv>=$!Y3Ea&5|1AjToj8Z~xIgX35cLk+H(VGFk%DclO`^9}4VbFoWuJ)xiO}QjH zcD#UW55PrxcbC18phWXX0kR0oB>2GOvz^5xi9ibbOb7e+g5xalr8+*xN0?Id8Z753 z30Mr2jiHk|C%mZpLl*6`+wm0lEa#ChT@fOf0%c!MTb1y{XXdK&)sKE;Q4TU>k&%dCSYf5;spQV>)*um z3ZOG^6CE zB|O2L{C;HBqoh0j=j$j;pz0uTrTvP>+G++5`B8)EFnL?SKgSn=e3Ep;Fj6vaQz0IC z94-*&cJ>#328u%3kfwfGsQ3a;=*a8;MgrI^ta25WgYY-&aAWYwOnlo{AhhhZdklCf z%Qh}cGvw@$2^2PLz<3d(6KW-KfNm4Y*%sj#+WHF)Am}+Iiky*dW{xD$#bSNXmAJQB`)0aoSY|9J>`rkyN2Um1V*7FN-R0JAIOIhy#9n zm%Nplau0ULf%d80>_C&^9WXmJ-ZfdRCV*TOL>>j{6s_#Pn|^4xk0A-i3PcjkIy8BY z|HAEk=_I8vy@)xuM3HjXH(WKvpYcw8<05JMNzPjY5mw|J}ve?FE?8}&l#r+XqI zf$KArg>GE`-9Dx^O)U7CGd8$E4|Q(|{5PN<*9`=fhE8t!iNmF{&+zd|9vjfcw{;4; zn;~-4dajCxx92?ND`D6m;DzGr3Y{WgABsUpB;WzIb@D3|QRZ20I9{MGvaAUj7Q?~GUU?f*tImpHIf zg#~p2pB)>F$X0hV%$T8tdDcd7V$e z=hi&2u;^#bC?;rr^B>c`1{1$D5EuCPiwFBmY1_oZz4<2*$)q8>U4n+ocYS!$=f?WPHU9tO3QA6UvRzHgg|W~`fv z;NH!f?xPZi4g}Wps%k0z9A9v!KdjW;gtEDw+~mdy3}vs4igHQYgoQ7%Qwx;=$4#Rg zKz=~Kwc(8RyIG0yV&I_~v32$9MM3$+=Tl?ut75zz>U}pvrFmBVr1u958w~O!Ic%EU3C=VnK4dF@wFm2GyS4CgR`Ni`pj^dN@n@ zP)E|i&j|5iNN#Szych{4#_ZXUACPsDyyiP(=jF$6dLa<4S;OLm!yP{sKO0tl6cII2 z-=fH0s=;zt`OMb-hfJ{H-KH_4?*ddtuw*0B_7;TbM^U7%<%uCYmDFTqYLE~1*=DgP z;G;l3VmE8_#p%SqZXUsH8hy7#;>cs;@;!U4nYLdl-(*y-#J?jl)Gjf#Zg=98N4kJx zyNM`v4Yeu;{j=6?ci{k^vn&(zwRr6-L-E4EZ7607b_S6p1(>|)t*TYdH(WkzH6Rv< zQm*@oCNgt5^Ut+u8L$#wec5D0`s z_0dCJ2!ucd0>RTK#0T$$Ex(op|GV@;S4H8{L@#eJ_y*rvUF9L<0{2gLOJO2-=bGCi zqZbeeKRfQfOTKVfZ}29or;?%PljruHK2{!f5EbpmtSXMrUwFQ-@p$Iu$tonsFDT3} z)M@ekE%+!z^`ZPz-;Zn4T8_LEnRoYZ#=Sav(_TDbn>ewn=+m}gMLo>-lAxJZkb<`P zenP;YIc?|_b6=t4D2@w$Ace_dXzh_#TjqxEe#9P~?blKN{cwno;ENiS zYTl;4YFadTrJs-pen}Vi5l+TTR%)srSg+9ty!4oYv1t9Z1bMabdmblV39%Q zbTsSaxJ5$P(A!e82cqA!U&p=yvXX`hr7*JxEgD2d$PINAOO0-~E%RYuryAq{-Ft`2lR=_Zw0#R2`INUFbNfq;6IWp0q86 z5j_fkLWc{o$Z!JNTnSYIZlS9Ysc#$IGO$PM%wk+{mOg9{*L%!aL=R!wz^!c6XKgPP zX$G8ME8OMF>@3J6%JI$9MX%DX1VM+(CvB4L?rvQis%~BEkomsN!b)K(WGgl9hI#y4 zCEfLn#h`vH>|DZ$8Yh`^)Uq|&ztizLEJX?E?z zOGK|rm~2%m$@+{P&~@U3L2TF<*I)WRx~QEuNb$>@UAIc4emOjL$7&enVgj{%XE&jA z#lQ>EBSXR$xHRvlA^UNv5FfiB4fkZY_j`s0aoYw(ng?a&K$lFcz?Q>-U#5jhz@aGIlX=nJ;K5@q>4&O6 z^hvx>#x-O6w2O{o*Cgv@46VCDqMN*qAN-YvR1}|d7n*_^Il5Ea;qu|O572+#7O2Wq z)-q$BiQTNy{OR65SyH{~QDidGwPQSFTh?FO@rjU1>xo(+$;<_}Jw+X~=wy$L$tiA( z1fw&VO}sgoTfx&*U(ShJUoQBXN>hca*??D{cHN>=Z`N#(Er(wJkfa$!{Dvg2iwO3@+y2sp7r8C zjiEm(;+Bj9Oos@bHez;81J$S8s24kFX7Zpee3k^b9b2CCGp<(>5~h{bHlmHf(W5MZ zt~^U`EsLA+mo4pMy?2{8Olwj&UiwtTD$eegM6J84vS=^zD)j3swVCk;2lCKf#VxZW zJLgz00$W0%zarqt@#Yk){Oq4i zHNDQ8)+6TIu;y5tjvQm@jxK~f81J48?t;xIEce5fru?SEINFCb?Ro1<$xcR&fZ@5U z+Rqh;_O03*c|W0F%K69-fzy`Xyui5=9s#KM`QxlP}u@nA)5T)}mLHv-B8+E&!bD z9h+Q(K&IYdkGwLmg?^dAYFw~M*73l%eVntu{m!{&R{hEFE~7)~!t(Yc-1~D|7z>kH zWA^J+1#8~sH4wl6R5>Txl}*Qyx@URY-h~Qdc6y{R0(rBNv2q?}n|iQWg8FBJEWr$C z?$f$*n}e+~UH3HgiXjrL=oj~nnr_>h^Ye3?&tJ|yO{?G;_{1QoZ&`{_XmS6ehj@zFDc=Y3)<% zL-7tn;8>*%$uML0>DJ2n)q~t3xg8LhGg){C7j5e7PfYs11zP-Y?rLS~ZW-4(JyGpk zbTFT%t&xZbia7aR2dnQL+KJ|oI#K0Fkx(?fB-ilHnFn&;;)%Jw#!h znKj(?{`U?nFS@P@|Jmm5Li5IxL}6hV@AJvtGkUoI7vT<%jBR9=x>#}jD;)^rn3cIs z%x@`p_8q|vdV@{WjNXP=BA|cNF?b9>Mt5}6?2PlG-#JCFH_N@P0UM4PzfihY0G8j> zWl53%L$IuX3fr5N-}H7pYeYxV9r_I7Bfni-*1pBKo@`AQb3i4(0MqysR~AOS6*lWh zIi<%dfqEY{5s<%w^}?jRj>uqs(D=~cpk!>l>gzVrC^;j^^=H!XRY>;pG~%_Xlb@(a z`81`wFtY&nbbpWjw3;$p1gHMLMDS-oIgzBby{jU}wTbu@NAY*iQPVtGMBkttgRWdM z`6v_7>Z0~Xb)9u%XS|K3Hca7(hAu%x?dCUO4=N;SXhyyAh*%CPNeA9|2BQ3Dnc3S) ziIT~z^&R2jDvPsC?w7z;jLAIe7D@|hma;dzOorar)Ybl!?n8@SyD4C7j(7n!kgQqL z>uc^lS^2zaygff=dnd~QLjyr-iBFP~3sa8WFXAEBIXEeMkBE?S$PhRRP3_vd;dQ8z zr-k*>DyJ`P8MIBd9P{)*;g2$>6Au~cnI9B;^8QtWkN?y1D*mNL+Nqsa_!7bWb<^jg zgzc`A*W1DywD?vYr$_%9Uzk|vdpXQPg<*_DJee&My8exQw6O%QfVxsnl% z6sew89JY7G?PVk4KlN_kZFxWq`%%bW*Xvt8v|t`I(zYDK_vw+{PZ2pSLKR6?Wj!ht zIuSF&Kt0EIbuf7fINzAj71$iN!VrHl;WG^j%qst9r9H*3Xn%w7eDsOP=lxWWZiO#; z1?y^#6={O}D6Jsp;pV27$wDgCXUG}yLvo7*we*idO(0Qp;&$|DZ`(3;Qc1XU_lxTg ze>w3<64|tNN}5Nt^rnxXEgzM5`CC=x{BPV*$h@UZpF5K|2JHoDE4ms(&RUM-glTeR z?BA(e=?x;Ol`hva7sI!sbZDB{hk6S`AiGLGcqls)^u}fgswlq{J+N0FneJrXuN=}B z*#ATTt?qsCQXjqAjp(0$kz8OGbZLojZYi^?2tL15;uYGB9D9ED>h|R!jzzg*MtBl) zxW1DQA-0x}oYCCQXg^-}c+zre^W;YK{xekg59Tr6iV|G>UDMw(p9A@p`AutLWXDs* zSfg5wP-7E;45S@wqENHGm(!+cbsH%Lzi2HpK@x_zlE-e$Dp?gwoP_6e6%!?7yr$L> zoo}$>isCllf*0~fM>zMg4mdwCN@lV%@}`hAAorF9*=7kL>2tL^W+lxJeSK%%TYAc! zh%ABpQENJ4Dl}>6$us*34xcnU7fD0#J%Ww$^Vf!GPsK<`oxRnd(Wx?>}a#}qf#ao*9J#XCwfqW6?^uwsX_}bI$oHZtX)If=KkD%?@AP^-n+1v-UA0+J z!VUom?V#EL%YA3tb@!a!HJv;Opx>s*1AKwSJACsf1GAK@y=5agbw&i$t)dddpyf~4 za=Xl4wT7~#kn9(ohPYtMOE~r+F6psd>^RS!sFKh8{dS=HDLCT9G38-jjp=Ijh zjULoWI!*@?aL?x7U+NQi(S3Oc35QZ_Ehj&5dJ?7Il4X3uQ_+>g}F7DF>g%pw$3!s$OdgJ=D{rv4p(NJ8{>ihkCxZGdds4 zirglZJ^I0)WcZfOuaHC`IX4Q1dE{*Q9qYwre|HpU!=<{lQ)bT%RRcH!OO{Bsj_K+Z z{w-`tCK02z`%wa)d%u1}KtGZ$2vr6dA&?#~oT=|66kP3Ce81@ROX_`a#Yg_XrfXwy zuSDXB^&R_n=Aw*U@2#hrWZ0p*Sg)M9tB$j390$;UBoyAd!bR#*&Om8)a8htbAOJ1o zyik1quwHE#t8O+p0i!oeZ+5ACJXE}mG=STHJhe@ZP{oH=O=#@}zHOmG6k&Q=Y{Sah znhv&h$S<^3NVA&PL_(n=PXc8z3D}FuyImnU-lHa`K_e8bcg>8Dwo$+$tSwgXFJ`cQ zGwBIPaDw)l-hVNo5MYEHSs-GQgf?gXs;wA6oj&E;=#?&3_zZxeA*~n*||ffWm0qgwn<| zv>h5B=1oC)e`WxmPa{eINuSo3FDjIi>bUxAEh)W=PoxqiuD+HeeQDJX`p3^Jqw`s! zPP9%eT=mcN(wU4?(Mhe~_x*<0x&a{o2=WHy!Fm1zxsmyH%cQgihgmD2O+c~c@5>RMv06TKivL;8XWtk+%ZeapmZc8lYu$Az8$ zsh;Af(7R(TWS;`gekO0Y^#_3pq!TP(%^MJL3FS2vjKbWgdfU1_96z;?w&?YK_$uwC zdn!Jh=xPfpWlpGp>LRK5xSDzIztCL$b0VZ0%!6)J^EQU z!eZA`_6Y$zKTWoCfXnx%^RMd|)NL34K4_Us71?I&S(JkBLNl>gzs%<+%sx{RO(ok4lCC$7GF}}q(9pgb;y$n*35tyc5&YoUm&SDp{fNfpf%rbtso4F4^W~V8f zebd!?!CJu6^52M(Tm5v){~{0Z(C5ROm@xc3q-Wbc|vJukmtr2PC&L0-iuRn`-OS9Qz0b=z0lzb1^q9)%{fWN?0x z%-%uL07^$pDkTfSE9vwHsX0Ct^ehh0ZJCscu_?6okyWXKwcPRIQ`@?-O@ehc`{0DX za&^b))rUQg{Mx?lKXZW!e;~ge;^ukMm(x~0Mp1OqT(HR7M( zdDzdkF-AN2OTwQz9}gmzmC@CP3B!>Aap(7oo_dY`S`F&qy>FWKo7LXL+i5Cgd0FOJ zA|yJ&&9kX5FPquNtd32lqI|&)`XGvGYT8&e{tla#PU0S;j}M0czsATHHhR-J3rCEX3p3aQ~~0+A=y;bh*>NL2L3hBoCw{353LGi|rHR zFHOBJXbu`h^bD2_u$p&JCOySOol{bngVvcPIgImr4mN!r_EY-U(#KrzC~CC_WKJmM z@62O^3ycS_m|MW#N(@D9vc^VQi1Zi{@HAeH7ehlK%Vx`6e~&UDb}|!kspPuke(ktV zV&wv0PEXPx9uxZlqgu@aJAvyH89K_p?7N~XdT9N|t}04Z=%w|yN;*07MS?7beW}DG zWg8GfJ!DrF6{m0+nAd&j9$*%~Bk}TQ>uYLyire46F-kW3oc+Dkt)F|r?>y6P!jD%H z;Z(V;lx&AFn>y!~p2XeXAh38Q zPqv$YMqz0o5am2;)sIb3V#ipFkr$ay9bB-`D|W)kFH_dR$U8%~I9>JV0lGSc|0#9J z@E&Z4;}Wf;otH_H-GqEnkE#Bx9T0)j&G^Nc5u<6z)l$(Ejp(`x)l_$i%`K7rz0wAb zXN-9uhrz}1NF7hWmx7(uQ{M;x>`+RXlv$`W&QAaLLL17D_3 zR-CL5s_hY}pSz{7#6gXiKA^gdGspJ?w_SP0i%OPW4COid_mp-6Zwlm+aagI2-Aoj{ z)>dTam$@0h;4i@3$I|6k$U$~GYoomI;@GNczpl=XJeBdYDAeD5k9O){N;k{BYWoH% z_$^U$N=IwG7u!LCE^=U}`TbBqI{lzu%d0Jaj6ppwvmisF|CB60{v5_) z)=vgSbq_Hg>>k;O5lt`#nVvnS9s4EkB zK@6DCqTsgKZMojU4$Gwj!uxfi4G$FH6(S=qM(JG|ofT#6rwCJXKf5Ht-Rp#Zx#3n$ z{*RUIzv_%8VEy!IqPthkx1~^DcTVpE%>8A*Cazp+zqJEBJ)i8t^{PA#cx#`OQh1@( zi?z1Sz0@~MA07@ph@G&{)ip}YUb|&~$J^XsUp4(o$mTVbi4UNA)1=K0VhTigzOmgo zjhjH!gr~FdHFyAEPL zxS7y^*bgm!fgoT)#6=00XmH2CVykO2>M=M^_({+K_$5e)l@re)=NoFTJHk`?2YzpK z_-jC|;r+2N$=u2O1*nXN@~TTuaQ+N=dAI~r-5(jEYcl?70X-$ zNS~I}7VKy;j4~hxp3O{$t|9VKXxOU=5GWW2)Pu-&?m&RIS=v z%y$_Vf@KfgK~{G^S9Id3K%XF9rSw0mxi%PWV^#E4w?l1d5bUL43>#y;s4ZhyL5Wl1@Vf*YCwr#l$UW>$XWQLyq3-%ho`($FuSZE+sDxA zPZ^mG5TuGYu7CNBq~vl`USwOg6RZ``!M^6D_PiE6ux}BUEfxxe5 zbZmUQ;@26n4)mK|YVn{5MsJl~O0_w)AnyI?sThRs=HJGjXD96IW(By3h^+L;kY9kL zIDS`qg=n*-=JWGpJFzBW);wOvVcm2b70C2_wkXyp*$%`j>HEY;()kvF$?*%vDJ+S- z4D-XbeYk3yNDYkM1k|(){MKez4$@okGY2l7kUCcMPT=em*gr(jpWIgp+E6fOqqZD4 zY0ESJCgAKIN!^R0MRgrg_oEW!6UBE}_En`KP7f|_0 zt~7IB2xJcC*yrRku@fpXs2V<~Y$+0YZ`X)+SZJ!_q%d`{+1-(8&S3aM1W_EQ^$X_y zF3v22T2sW*hn!LTqqoON9#SQ#?s^}W_W;vv#$^sHTJG;10fEsSnEjT&oj~wS6D#B2 z_6Bf6`TzXDKRy6{N_+LM$AMRZ*Z_fANjf^uj@%^c3&f2GK&HNlGiq>Uz>_rfPtS$m3v-_v7z>ihoy>Pqb+_FD zOjjl!MtZmV9-e3S9}wsT25bL5!He~71q|drvjAWT$Bb&g$Kk%(I7{!nzy-yh^+|07 zOUuj4`Szp15;8KaGt<;nTFJxNrR}=(t@o7k+eKb)Mw4%4$WPRzSo~v(>AIKGa})95 z+tN$SWawwwqxsT@1GI5S=ZFPKo2P*e+I1s~DM=&$TWv7xd1hv&D}C$!=J&Ipm;-AF zXoa>&VOGs|EM%pnGuB-4mO_JzDrlKzSxzH1$wU`UYyu}w&l@CVWP+Yqr2|cbWS(%q zJx7KD&YHUpatT(LU3#3)9q%$IW}Su}zPvii!v5^j)OGtxde;g>wrASLlGr^Na`7J=M$pTW&dV|w*3()tt%Q7Q(Jiqz5 z|6Fgk=hqfgZJEs3dF(_&z4 z84aDkf@$kR@yS?&a?4GFq;_j!#g!)Ul)T6~=*Va3ehu07GOfR)n5CmUYi`eOhW|G% zq*aw`S~PGy_?_ zpg9bI!GG9WWeqzH2<)Ii!Ob-_8bC#I7}yJ7|I-U$!1`50$nKt(E(Zt^gKzF~!QYeB z1G+IQE-sG6r(SPuYIyj`bmw|-aRn{S|HBcoO991=`x3l9yB2^4UWBv+7}-o@+*0S% z-X(jYc9H#CB)#UI{}&JXr$r^yza(u;SIXHEHlw%xWXOk=@uP0z0%XWL+~tF=zFD#H zHJ3=7{j7B8tXoO(7Rnoi!tVz?@m)lx z!jgMkSe!WTYnx5D+Zv20tA_x*Z7psta`R8#0QI_y#0O~=lLoQGN|Rg*&bqKVzT+^S zvwud0YShJz+0<-i?tdg7PX-+ZrPF+{k1SoJXs8>?+Wx2B#}qI7t3ls&PEETWcT9o3 zOq0rHCB*I$`3zF;{_WI5Dlg4B%D-kYSQJhn;Tbqv$@^XpI?oIDtU-e?E2DK6Q9{`N-f|`<`8Z5glk4nj$7aw6-QzC&vkoP-Z>1&I?jHCI=4Ty;LCr;Dgz`PXT{TfyenRc66GVc}zxQ!oF$qB_g( zbB;N-D|%nmX;#mrd+6g?uHU-#-TyK0SX0!Wdbn9(;>7yu)agEZmR7CdlidP&&dEvt zY$5hT{-CsSXa59UiMR2)lWF?Urx#$ME=eiMqTrwDkLV`Syv%`uP93xt783 zJR$W}^{+MFt*^KKB16bxP~9fU_!~lUX8kLy0_4IUH#}fC z`#Ax--q?elbHP1ZY|n-$?-6AIEl8~D>$1hpDQXD%r|3!q!O}N9WBytj__Q$G)7Y@H zrthn#@CgMQJwv33nZX_rhG~h$)%iHA;`kpVGIaHvTLPc>sC(SJFebnSM+0GnunH{g zy-NmRYy{--qGosBCn!8><$a7)=Ysje(k(V#ByRu#TYJzTr}mkHXnF1Tvz0Ggk91B6Qid&L#Y3K$7~|cevMyb@w=6zkI7W?7}d^ zy)H&YofN~pYZnpm%}&WbZDAjbm6Ym~1q>ItOu-)hm-uWekm-4_5sZ#ltYR(az4!0TK)p@1#%Q4YS7 z15nxkQNh!l1$%0{L|W?hqz7ddMY$G#cN>z>{w5Fn*iC8qJJwKr+m9^0t}eC38+n^m zI_k_ej8HikHE*y9nrZxRtdxWT{sb!djmT2R^Bd3#x*Kk-_Au`g_3#{r2?UJu{#|e> zU;a&=kj)A@Vd#@jRk)>I_BSK)@xVyS`n5-@Ba7e`vv{v52Qyh2jy&$yj1oLR8E>!3 zW@_4BbjFH(+R3#r)Ym=Y2Yalt5H}wZ__*{Bxc&?XB~@XEd`c7YXIkV8;Chuld}o!! z9E6P$X&3!Mjk}1bq@h`h?NwB=?(wWovuauD*}&#G;8JDFe0zg$B5ohGr`$Q~u)~~$ zr7Y1DE1X9i{!OVsf()(1*+kRZgao@35Gn28j{?~Hip}}2J$hb{Xf$$MoJTqLQ{yW- zNLFoQzkAIm=m3zj7dUywTh$;0Ijm|10b?k>5aZ92f>u!V@fob69sZ41Hez4g?%O}o z`6R42k=L;x3hd`%j_$e!eE-@-f#!{HZ0D^J7n8$vC;vhRvw#hWb^`pojgGrQBtK=f|QK zSj6nnG^+l#?V5|6F-s2R?dz!W`OR6CKO#jdN>9@@?Hva1K9x@Amf)=|ELpZq)>WG%g1Gc%_pJa| zEQyR7J|OJGmzLNBA*K2DS_7>O-PZ=IPigXjO|&qvizwL(yJWjy0r+kOkNH?5MUzJ8 z;4Unn!Ccq1(P@7nyGtTZ@2umegO)N%n+g(F4O}Or4wzgg@VW=^T`wUM&)KQZ-k)0* z$9zXCm(RW%Jv}-e!I_i;4RJqE0XPzwkaiP4t(o1Rd22UlA7_Gxu#wg8Ju6;m*Y0+(<-(_%^vt!X<)5pJo zJ)xRgziwL0{!Zb7|1!ic>QlY4GUXHp5u^_#R7rVlwW)5e9@9SV@K*1tb$yBb4IdHK z>HD;15k{elQ7byH7sEt?Ba^Ro+X(LCFi(_&k|~Q&S$4DN>9$^tfIs)ZY;~(uRgKpA zWR6hkiKh}6&v=Opp@hg--k5VNJnsa{hR5xZOiIk(V4%_w}ll-pWCc|dHxGkk}Tyk?h?)3x`IeXH>dsLj(#*M%tb#0*BcFF`_Bn&Pr{fG+AmNF63@E(b<1 zJKDdLTX@TnTh_rpZX!4fBsBG_QZ^CUyPmxsVra%x@^J<*YodqIlliXo(4-{YtGEo( zhVRc5+dcnlF)IE<8mfu?oK~_JRm6t5!9!rp!kvd)4e>N-ca$HgqBRXL91`$#BfrJT z@&pIQ&QUkkPWPbZN6~Y@CZFLiv%W96A>HHuj)nd;3xXhSjI-8R*;4lVw0eb^X1&66 z|1U-8RmkYrs+(u8QiR)XPhZb3){ZFR@s6N<#%3t2A=K5c>CS z(Z-F~{CkmB3%6*YUfyoa7Q;q84tVC*9SFTe+8TOyh(j|jS!%S9sQkKIb-MW6+psr6 zZB;)e!MRM@1zeW9U|KkHq!KFoyLSxjL*3SP;UQ4|(<0_uw(O?bmlS@)iI`pf0yFkm z=Z<#~zV=Y^Qi#Z&D5Dw0QmTk?8Dj)V)oc4Bg+Gn!3?J(7hNQ3LJyR8z$gj)JcYU$= z%ft35pczWL((>@gokjVJxu;#5xQ$BXi%p$SBQomGelZ7_ut`Ir&9XWNx0Swt1kaRE zw``?>`1fVWS2G#?)VLi!5kxi=@w242_cQ7_1@MJtDglyv>bJR#HuXWmrgQ(wX%UC> zD1Fjo{;K@fzvKBUOJ2>Cm7Q%5hIm>4T^`||Tgu&g zi-xw-%c+B4#KNPDB!iwAfe_9?eel^w^5dtmg$JYB8zVg{oi}e<>%U;a6M$@}9p(t%r%*FHq}84?gc4G=u#F{-jhS z;tPH1;I!SWEO{AdPZ;5v-sc1L5(J|ccIZsTshvD_M&vwj@cA78j=VBlX8ujvU*H~pU}GmX&QxB=#}0P`J}GaHDZg+=FJRx~nl zZ^v-sZcky@p!0pAM;DvrSMu-<5*hHt2y25p0t*s9fCvh)6kEA0iWk&Wvb+->rICdK za2JZdn!Hg;c^^Wo3KkNkiP{SfjPA=BAHb_h3)z)jvJ^BXMZXbDY-lDMWhgTk5%^0Z zdgpB%g`G@pAT55?X0b1s8cdfCW5?$pavWSj{UV$c5w8Hk`zKYmJI3b_=Pr(IL8U1}va z8D{y^?NK6wJ8=Dd74b=Ey>s(EC?!QmAAveZ`<5@a^$@Di<+i+!$oQL5Vs>z3NIH-C z{@;vi!wpePoD+?sBX%c>19rSdYixu0pQx=qwKJxR@1D#rx%?ehhVJ>N6GDo$-G|10?H4pMFHs^I70jT+zX}5*AE2=7Yv`aZ?&w`Un<&l_>n!p_C*SgY+ zAqL{c6~z{`3fiOqx(Uz*t zAfXbbBjaTxlI0aDy3cFxSqaTxeM036S2eD$7xS3liT5G*UEBJ?-QJmv6oloi(sdA913k!yn`~p_@R<8$908i3 zNir(}V=0=zBMre?TxO`&uNVL>ivHmQkc@9=T=34V-O=a4++a|kt?SpcCl@18rjekG z*JsLNGj%kmeRi+UfWg|&4!5x3J|j9U;mjM^ey_e{NbaI1mCp%Z2;!nD&^f40`l6rF ztV}A@mC$NjcZ$Cd=_-i)_AbOT9>J9oo&Q~+RNs{d#{z}$MLAIG-BWq~UTeMHSJa(; z(M`<#2;ePHSd%P42;a&JapP7)Q~g-mUF+#rY;RcgXg-=alcnS55i#NB7@y1%JgqKl zW;qVKqx?+(Sd!<6Qdd@siL)myz{pK-<{gqzRt>-wH&_7tDEb@m9=L5)KGpk_tf5lr zr5Rn!i=qFa_F*v&tQkQ12>o(S`_j;Z{>nX6b3ga%tI9xa2t4_R(BX zP)=iUP4)Y z)iB9nfMn}c6ZcPNXp$Mgn-1p7Lqh$x_z#GbO_Cp?wD~_trsAG(`=+FBAJh8$M&{s6 z_W`jo$30wOP}8gmI?&)wf>MKN$`ojPjnP&n?xx8VqA1$X~SCmTIe(UJROh zg!*$+Z3V=RwHb}BQk_Ll0`BA^KhCT3tC_2yIM|1;uQ);ky%mX65-0Lxb(k~uWj^cj zxwGq#K9c=GUB?v7zjli?CTd%#pStvV-yV^yz6|pw9&arO_21~K!fpLVm)37&4aVyx zeTH#Lr!)bcVA67^!zgKjD*m#ZI!IUexpF=+2S-eJEZ(sFLNmRnz4nuR_1+pct=YJ| zoUs>gc+HHX+Ux;Q92Gmrs;wTv>%iq8^G%uejzFf;QV6w*%B9@E4)M-`NO@$P{O8#f z_^Nnhi03dx3q(%70%w~(qa)I)t`{UAH zoG^79wy9TUAlwiSo}z1b-jt*@bG*kIBe#~n+amz1-AkY?fE}bRUzwQ#P};Q@iln_jLHit;N`d@mTZi*W;_T9$t}hh zxzfO@L9k>IK!cQk@mN@6>>gvJe)yspkur@8eLVlQJ{9r^*lkXhqN?}$AZm}N7nwda z9bEubow-Ov3Ud_vW3k$Df$#bD_yX(}tC>%4I)avv1XB786e$g5grCd1^+RR(ThS*! z4y|1Xb3NVkIQ1L%9SuSU!!+=Q2t#|ABlw8=dd3A9lh1vh;y|&j{IlHR{Mn(|YYXBp z3469tqa>1m{#i{nEe{;M3Mmy$b@NnZ1y!&`x32O%{=RPJaK5Kui#Q%gCGQBy9L?`S z8$UzXmx8~TXOx~dUl7F(va6RN zD0dLz_LwkP|?ZCa*s!rLij_tJFL7^I`3utr!ajF3Kg?B%vJ ze5%;)s}Dz%u&TVVGLPK1&fsoaEtPA&(F&!DI$lD@b`l@X&SZc^YFr-DyBDFtm?W@D zgKrX401h(;ydIp!yGwdpG3V7V!qW5G%INVAUpWtAd-K9|#u(@kE99_9J}htfgeaot z4$Tc&u1^xIdW%nH!P9%>#4$!Aj!m6OLs`3gv}Q#_kDjP$Q3apVT5WNH3}G|n$}0oB z4H_ANWtst8Nen6DkxBQ9-7X^5zVfcbKBge8wDEbZ#ki((?@OChh?|B=%~bw_WN z-`xL>eXCxWyzlLOd3vHr7fNJ~#fCj!HIA;AZeI>X$S z0M86NEjtI@jDQ|3 z;QSO6__F3(2^WKp+gU!xbUo`L9AMic(qr4q_zOH{LSMYMT$BYRm?cYOs6s!c*L;%K z`+PYB%HSz(%=yXKi`*Ts7_GRtA?-5RoqLMyOsFLpHLHX@1qc`nMmdHfEzWAe69wRj zx9*tni|U23i$g#+%x?N7N<3GROUt!5{4pFf_e=@JihI(~QRhcoaO?$TbS%0&i>~PM zRR|vtl@`QlQ^ia8Aj%`Hg;Sb*6)wX|NAg>t`6XwDiAdiH3}uh%2%^F@_^b zN3wuqz5rfdV;&J?HPQAqYKg|5JRWdL2*4p83;j({(O2@;3=_Rn(#tijL!#LjHDs@_ z^{9{1OGRpzB34P1Z+PFl^hnS01y-TCF;yPvqA{ADs`F!_t@%cRfHT79)u4YB^?Gs8 zoU<07QPIa?AmUd2)tU4w9s1-(=##|71d!ENhU);kI$>S7V;`>{W*xPMpLNT3CAwkh z9BB)~C39e_7RVdmIhd(a52a6o2*Up7$om0q5+9hlT?XwCZ)SfLJbtzqe*H{A?`;~0 zf8W}J=89EOmA}4%h(cQfqI*<(m5^NWSqTY`Nl@&!+ldUbOT z%EkK!%;Dgcq5zJ1DNTF_^B_mSuP0iy*<+`-5(d2p$faP@K97tEl~CoWe8;J}$CL;* zBgI`JEMPbj6;PF9UeAaCe-1hhPjN;1w{mF=ZOdB+$_%~EypHYz^v8oS}`B{$3 zO0o8Rc#fLBqk-=mGem`mMsLyeK(q-HYQ zH$7IJf`o|R2G7S+z%5LB3I|00bT;|}xKSiX$^~@1cdFKZvf1Cx{ERXB`lb4SlPHjf z&{>NSJxq8@Ao*ixsA1eLH@`Sbo^%beK=HNf-LNb@^W4DV_~tudL314E!KJzzG#l); z&tv{xFoVR6XnvWKuCrh<98LD?LtF8;ZI><&&DCNel}#x!NXTBaD6pqeG2aIn{TuAJ z&8U@`x;HC85*NV6IlYv7|5Y49;m_dbTc?$6$H`!g3V>};HbD#OgiOAHDX{<8Y|mdI zpa{z7is(S#N6Gwd`j1%vnr1GAK~R|ZjT|=yYM;t_GsA0rCIH4<9^}}##p-htu$14b zX&<%}-Ks^4D3@%gT+N&IuyAD1BLzZK)f0JF@G;H|kjes`H+e+vIoq@}StWh?1%y4+ z_j;&<_h}LJ=JQ!y@hF7U zGIzy|JmQv&|-qIcp(ikH^W4?8ioG;rl`RH2Oj11+Vv{!+!3Yc1e9t5 zv&o~E%|IWTL;zl^5tbWJ*8@&upNC^}vQ5&6KmOQnVdsmqiiV#thcx&i!}q+>$#QRC6m>Nuhdj3YhVvttLY7fHDW2ChP+x(ox%`e2+zOwX zl<3k)Mz2BVJ6BVxVd}?W2oa%NJ69Yon+x2B13LaPH$ycuWj)fyvL7qv2!A}lL zB>)HxUZPIPWV+P$`*j|P66o;hB4dbt=SA*aP6;cB@_IzGO4gd6WS-Q$ zR-yE1gtH|8v@>#<7^(TqqoBDB%usjfp&myv`>XqpLxZeF{Bv;tw)Dx+JFj<}hepV` z-z(^iQo|lRwB1qIvF-$v*)_LVr6WUjn|l#iOBSxlSMoN=qp$a0`ib{Ck0ByzNM$uV zK1eF@Ti9~MGLGDAxFuym9BEdV``vnU+&W}EYQlVa$TEDU)o=709(lZs%G>GUI?p8C zV+r6cVEpif5NC#_eH1)^U;im*fT#^6WRmUgTvQRjv+d*=rz%F3NIH8i7)y~YUyE$| zs)t}PQacXkjKIci`j1UT5vTQtfhd@K42X0fgE>Xk##{|~cQVl`27(vvF(5F35185D zf9o_f_=^%rLuOX0a1Gw%?W5bYS3|r%JitxSX}9+e%)N-622;U%rB+^akd}pzM62u}`7Db|$3YKg?a|p5m;*VT=@12Z z2t@ioLv1++esJ7LJsZVb6i~@jaik(SLF>9?+Ma{wBm=-xa)B}Xg3QUomUylAy5Sr2 z9X{O=hXa;o>HH{9u$fd{wF!Zst*8-$GO#M(1R5?~F767ws?G$y4fP5P{CLOC3%6Z> zA65UW$_;z2s}`qWp89_q{V8*0GCTO6Cw{(>+!aAx*r_dJyleP8L%=b?92c1V3d&#H zj62w`!_E6HA)d#^#c8EC{{iD#RwIffQkaYU|C*qAla^uzhRMXP|D+2C*V6p^&c(vv z;i11ux+1~vxiJ0u z|M(Jjy?n;qu=mH;}_xf)5h7>CnJ4~V`v zzBOsc=rxlAd60-6>0|26BQPQ#XSvoo&1atu1m-e*D!|!BK8zBK>nup;Zf2wiO+@TSq$o2>`Ag+mj!MrEs6|`tJo(1yaVO2|LZ*2q+Zi2RvNsc;XjHN zp5;h6C+=|z=&Ct{#XqI3wgZLmtx3hiKihEa{oO|2AfV4+15y=`GLFG961+8`U>b*p zZQ1BQCvghkFD|xcPwkHPEY2j(e$Srk9e;fuw39b`wujmm>ziAe0@`o>V?;2B{~tp} zN_nIzL_)kLqpADy z%-{W6duOQ$4E`V4H`hrxaAQAQr&g|}j!Xn!4LGKYfWu8zp^}FUBxfv2wu1LjtnPiI zoP*v@bR26N@Qn^wXP z+1<6F224QXR^^D;)S)h^6N|;=fI>p>CpPi<``NRrzr&1NIhm%{Tqit~`C~U-Cv;Qo zPu-u?qYGKPovOzyr^Je#V6gE)K@Wq`iHV39cgaZbPO;{BWNBoeO~cDw?*P=;;1%eL zB9zw%i>JJ2mnW3tK;DpUio6P?2zi(j=sM}AP96I=8x#6xt7e)qoB|#-vRYLQ?&lz^ zM)Kah2DELZkj7{iZ;sb*`(c;U)4ut*j`Z(+k{5-ZhizX5>GjepE$kc&epHZ=lJxvT z2C>CLzb(J?BdmoGUgSYyY_{4TbNo)#+eTBvv{xrSIR9!e&q|lIDL`>U=wIXKZuO1V1U$!N8Nl!a$qvKi927 zyvbsF=$&+E2_3S@)U|Y*?TkP^5rcOps6gqKw)sxqTY?#0gyBQc;?7xgrhPA+w5{=Pfw z(%0xoA$59WQC?lG)jbNUm^OlmwN0e zoWT8>{tF+L_l&549s)TJ^WvP{8aYkk=|K}rz5w@2IB`JLset78F%K~@aX9tljT|rI z`xXW@=Kt+BH|!Dg1&#t-#FZsjDJLFxsvg;_+at$O%2!dPu8>52D*@>((ghaU0I#45 z89LRMOeyJgrs%tS6lWqQfMXRfxlX#nFu$s6h3#8`*AT+2wSP!bu&{V#1KcP4TkHdN z6s#fjPVBUu5JKm)a9G{YW>H4hs%dNkH$+g)DrTrCP+-faG)I6NdR)Epb6#cc%T!() zk#*2Oy(7PcfvO@FIY6X0V0=*80ZF&w$YPC9MVwW+b`1<3qpU8+R<=>4rd>j)BDz$V zKW)I?>qq_e(a(9Mt6Ap*^|7#<=RBPMoxp}b>WL)dHrG+B6noTeh~W{f3QR_ZHLrE# zXtKdEiAbH|1P;o`!)$_qqQribePQwl{21KJ&cZ78eoji3y9p7Te-(wQx(S5xF379Z zOrMd>mJTEXpQt%udqHyu1!E()&7zpuw5e;nCT;2n9905)jk!jTb-E4}I!gXz4Ws{- zK8u|^I+w?m=WOh4*e6)g?LT0YtUkPX9M7bHuY>{|h(csln0&xrP+8fxk{NL2+O=zc z?XJ05#_5U}o|VB51eJQ1*FQklOD++ps0hRof|Ai0Eeb+I2UvcPrqKBWm)>k>sdhRBcM&GM)*{zBnak&~8hBrWh63Afz>T z!-E$3ky3KP@2-!JPg_(z1OzYTFa5P@%Nc<7SC*Y=`A$QPVa<7dt3tM!T+}|>Ybl;KdCSPXAqD?DW@(paYBp7l3yFiht*R9@9#wu)L@tp1+01Xo z%{nicJbq;NGc)QIogRZ@@tCE4*D*)JI~b>zqoeAopPW3Hr3D73F=$(v_$g|*AVV)AGDFa2_nU`)Jh&3^| zUh9=_SK}q28#XWcOE-I30{<&em`C@a~}hO})<|B;iCO|DHj?3@VN z4NFOvN#LH1(PI_&->s>FWl6ZDV5{NxZ$6{krI>Qbjkt1tJ)0PM|7@>yDl%_agC`kA z?X8eE;eU(NVNvs)+>^-{plv9U$^ItmweSJRH|}tn{zsk2+l9`jQAL3k#hWr-zO>=R zB;vk;Fl~5$=DcsfrI&BjX=P1+cVbb-B!L+S&cG`4ZG;e*;Liu~LHSlQDmtAv(5!D= zQx?dbCMA2%3qZ7rLn`dy4(NhblE_!}(5!;RA4WBM85zI3Q!1C&hQGDKN^XfcXTCqw zS$L*%Da7s}{<4Qq#Y2k2UYh>_-lAJFc&yp)>Z~U`s2{G=da?zB$_5OL2^1LyF=5qU z4L`m*XO8iSSxc0--^FI9AMcj3e@H1&@Tq_Uk*Eus=DPvOEdZM6-r%>6`(@4P9qPfD zaw>5;D50M^t5A(nc{ruM{8I2#pGO4s&p(~k#ky#$`x~@RHLmBTx^gunN?On4AoLKb%*x-EkJO+=uIg+e8FIyLU)ULFN#xB^pKC37 zD(tmnMkv6t@aE*?fKs+(j;LqA~Sxw-vSfRtvA-shr(2{$f*$p>n{11@gx8L5r;u zCfEvh`+gy(<_lYP92a~K#^f_+2Ddi(U3zjhp(rNpZdDa0<8&QtHgYaW&#ZMZ*?-P2U(Bd0$dT;VrzvK z-wQV|Y7}J3C|2UV3M@#cwg2+|e0hMzH<8ZGQc8)y4+nE?5k+XnOP`aZr(e8NE=>Bb zBn8S+vh-mv5j2WkHM5|Lo5Z>PlqB$-xdgNUfZN2}dJ8T{JVIw;4R`y!9xGU7GG+_I z9{{0oLd|Z66FcG<<=*%^H970bX(8@j7wF{Z%RDhG1o9i3|G87Gv{f-7ME2ic<*e&X z_+hbI?M75kd&|O$FU~G0T&jaa7C0IEowds=0nuSiV%wul4=o>>?Fci?r4xxC8WP^^ z`ko@{veXQZXCH;vQ&-u-^t z%A$J!TC=uCHD3NLZ3v)*i{M~Qn+o+({Zjv{f+mWvBxU(8acm0`n5>Zh>) z?mSlmh@+pOl@u*>5Zg;i9cC7hk#eR(HgD7L#-pjwyM z+(1zjO=Gw1NoCO#uH&)bb1ODFUXWY!w>AI}NoIwseW`yFD5rXH+UHh9j3V|bAz3=% zlB-E>u#00NgD0BDKB@8S7m?9=&#jAnZ09SNz~;=p0bt3Rpv1d59}l_6uUy_d)bi6j z3)bfI2;zuTqK;nD6dxCJsA-O?Kx#(G$+cye8i%F$Jt%Y(hkngSNe{?LdKcd@TmN_H zHR=7(fh~*&K{!}1oWo8AF;Hh~?SF!r-65(`bKWS)8UrGEe+Y&hJPyWb%Tr}_Mq z-y)p)xfc=LKY0+F0-)AiFRfqjAgoQ#HwUIGAm51CT|^#vGR?hVmVbm(>U{mZn>x~4 zVCG%=i(sx!V%RsX`dZaV1#`Wd*xD%YW9%ali9fod0iY6<=wAKv{SOU>uCvm=Y2J4l zIk}7zSilM{H;V7dHuYyYzbG#E8eXI+yZWcaW~2-y)iHj6P=Y80zRE>kngr$E-d>3+6SKNg0cht+&47ZL4Ke=_MH zgs9FHfAYX$(uXapWyh-gh#C^Y3UUR4=r3+%@kYtCR@a+Lags3X)m+*G6y#2~Z#bwiv*QSh{T30aWs z_ehH95bI`U{MNEmu zrxxGQt(e=5ElPj77##}07H`et-MNiwM{rWn;!H_4_DXxWDHSQ=oYLgJqt46xkrC`LLQe45om|%r1bMOo@U@`) zL@8HK5h}*ez^9O>4aIy4j0*>*SF=B@j{9b7vekm`oL0fOwM)=F48#v4Ricm19j<-N z*jGXB{?T|e^==WgE_Fw{xOJED)o6{6X}mHB9@*ae18pPWG()yOk|{ZSum?|f2f3b~ z5A#wMagRMhLI)atI$ar9)RZ|X1B}qxOoTviCble_8j{4!y9PLjHoWxdE>&ABaf0xW z0yrO6^yw6(Z~UR9tQlrGVa5GM=@~lf^^C>TME{WBtvddrZ#je?Ozcb82d8&HEk{GW z*8->g>+qk!x&nLh+e?CFxAebPz*+6&4f<(A>ckCu-)Jd>;8v$P;hE}KZStQ{#5k42 z=tv;N$RLrbZ{r#8OkNwSq+TiSM)u>jtHq-t38N6;7);{t6+ydSzkaQ8&8*sRTbIPL zI%Hh1chUN0qlP*^Y@Zd!Ydxr8Xz*y-D+XU|(S0sgZT(A-7-E39-_n1NHNX8AX%jR7AOfHXY82&Vb3P%E10oLBsuu3t!obgn)Bg= zexWHtqA+(#z@o2Sqbm@Sqy`CaPxd8IR@vNr54@r~-Ze^+U1SjcK_Q}1z8?g!cFh7u zfkyH2$saeaI^_Jy58qs>)cI=Prak%GShXo&Ht=A3g%5&THP#Qm`1wbIjJD;N%C>)ef7P+ zw7eWaJroa!8e1%>lf<}sw+9yAJfTySG*r>xs2 z7N9tq0uxu!`PFq0|F>tP?Q&v|PG*slz@M}cz70b@v4=f`IuTOCe_2_6n^CvAw%!Aw z-t%W8rx3^IfO-tA)Qk$#2Am6~3oe-SxIUC^h$h=_?Ut7vOe10{l{i zoSI)2@v`3?GBh|JD|PoI*jPhN4IDFp9D}DWM8B@Cq_m(?a5UcsX0v$?`L>h5fg|$D zH7J()P4=D0(1NFJ-OpH_lDG&N&nUry;AF(_#yKtUvY!QAwyA@$UUK<+Bcv%RJ zWre-ROD%6fSf7vLm|=Y(J+3ACcNbXmj7Mxsme*{CB#@l?pCPj^JDo6>_j4Ft-nT$a zp~FT~7X1`7Zn+ENG3by7t>eb-eoZgjj8<;Eu0 zzJ{GWC*$8Hysv3wXX<0MMC-A%NH>gZa+dr@* zPZR_Rlv-~aeChX=!c*aAhJg<7OVlVfGU`H{#S7Xt%fja{fMlA|v96o^$2XnxBCqO; zL=^z~n&)T|8!3RHO>3XXVSR`9N;mIjy<=x9z^g_T?v>Ar(_Oi(HPI`y9r%zGJ#enG zg3ry|Z$_NU_qz&uHd{Z-4gha21SvaR$e@p4#hq!+oZwjG6&aHOeMLaW+QT>5Y-9}} zz}GiCpvyKIdb@)q@6D&{9WN60cG?Go=b~~PzMq|)t&)`WCGEAi?SU_r_99v}aC^zf zjVeptR9n*Vm?D92=fT0xH>`C8`21+RIijxMT{>oI23vXY;vWd1ll$Jl$ntoC4dV3v z?ET4Mc28l|pW@+(=gmU}!Pr}%A^jxE>FP5K(EJN@D|a5~61ZY-p=Tt3t33@`oQ$~`+B`(dBrY2D$qzz>=eAPh`9fNnz_Jk-{;F(u6qOJ6o*2n3sUNHQvJ zsGq!!aoTT*c^l&7%ZX=7a)P^8FTEzWg#i(wYG7x3HoNH)S=gcEa#o!LB`XFk`Fo6DBi*c=G@#JYqPQGpytENxHxpKdnff)w(H$3+Z5hZp5|3R;&A<)jp+T>V&qJ@7ZQEJntI9-4D)QXGU<7 zAe8C7I$H9}1S6^n@RaN#>7X74BA1=V#wwRBH)8Pqnl0+3KFu_j5tJ-@%U?EeBdmrE z$XkBijy6{)+#5a}Pc;yeF&j}7S0E2%vG4DEd4Mu#9Q1goCuZKZtZ0it*;jS^Ao`I+ z*>3mxtm3chlgB?IzX>Nq+|p%UrIVeO%sK=YWFw6xeg`+RYa2PmnO0z2tvSVst{5cz zroR3ArG;G1Ob;k|^}p>04JhKh)b;BKB7en?bTFtH6y}~xn#Co+-f6tT_-BhDo5Ylf zL2eeZiHZ?RziiW4z^pS_rB$yKk?aXQJHB|C1 zx;DAwob^>DgEwl+yQDhF%kTEcreCIZfEGI}Nbr4hxsPb-Ppo&Q;)dWsP>fKCSFWFH zf478)=qcu(+eLi@RYyiu`|1OMjbfdoSTe*Ey=P6ev_A?IdRqawPt&#h^QbGJ6bmK~maKL=cjKL{QPwb!FSb@!Kv zj|}g>*=#8Rpk@w;YEmpz`Y~bT&8(0VVo=I}7fUSrH~Xla0Z>!f$Zo3<%xmIe3_Q4* zHgl~9qQXX3NJKSGC#U6yoj&_nzsUYNXD>(o)mjb08u<4h93vv|G0|WXE#^7I2~~&s z6Y)Q&mr-FGl}`!3EXyE?X-CuvBpup5bo>wruwZ7|6v0Pw2Y)W4&OCJ|=olfqdlxd4 z{O>|x;NDFhr?1Izd#;o8gGKvz!_VJfI+e^nMpwogO2c+b3J@ z-)#RWd5AJAz8DNNW_kcPk1#x#LGQa7a4c@@m&aX9-kp3MzxJcx!OZ6Alo&q5DC8g0 zV*biq= z{GU^}q&fe03K!!4_JYAKpFVve>G1mfk>BFt5HLj{l2V(?pZN^v=LK!RAJM0`XtTxy zRDS*Hg<6qu^sWzxZ9cum^7}l?{K46UlZBa?nT^S7fF>bI>v~W0XqJOp?Vo2a&ro4p z)Hnk2R&aP+{2BdJ?@UXHFmBNk?Ov$y-|@fHPX3VrJ`y-E_LW9 z+3u^pp^nZNtM@IQF7@@@x{h<70=z>S*|f`*wg}3Aoq;Ra5B;4Yl}VQ7Pdxo1qj>`8 zNUH|(V>0!n>Me%4@?FBd`^fou_8I-kS5^z4CcWIKNq03fTlCJZ!k%O$-N2dpvha#7 zt0QjowUaUB*QK6)`5)AlZ@ry=EE~w)#IZy8rvdPl*UyH*Iw1WV+~$UA`1& zg0l#4DH(xi>_88No_!)MJ^lb1iUCgEStj`6(8C4niesPWa(VS;DD~|5Ygw?kt*6&s zKRCW97tG|vI_Z56d$7W$DU|m1_Wt5?_vvF!zvhSZ^z^(d6%`RVBJ6nD5c!D9px$%m zmFZ`L`T(9Er`L4;{R@1W=jJ$es0sT*IsWJM!4r?#tHjQ2Grwu|rikvv7=q7oFdMkP zpO3s~WR^Jwc9|?X(?=PeFzj`p4F#CzCJ{39zf$OrI2M$Rz<$2yP0U*suG#)yK zzX;^Le$3WM_TOv@fjAO?Zp#TfZs`k#BP#O(ZlkXNkDXj0g}vd=C}l6B4C8vS)207{ zz7WL6I?C9Bdnq^ZdI?F?VFmxS!3}~OU8Rxub;7R*`s3E{96bQEMm#AhxXS^$$%v>h zG~2b$UWv?e@F$Z>GOJj9g4#C`YY@df2vaD%W&U565m3xru_nlB-~AI|Q4Ts@Ks{ly zb!-5ILcgzTO^5MY^!gqoZ~0x;?QCrMib)>Lvh2i>NGb$e-19%1r;pjY57*$S;vy2& z2jma@0Ml0vKql& zia$?ivw1u)=rrk-DNIS!OV}Xg(+kK?z>L!#?dgG%BxcX7F_{vZkUt##m61X++^qpi zlvvQPZ3@IKKqlaOVJn>GxAV|lYNp4x2ZU4N;H-#UhC5qC^_%@!@;j|k{=(Cz z{XRRJBr~*!W--U&txY)i$ zA7D%#rWl3=Yf;Yq8OywUSo-uks_~m%1R9G0;qndcDPG3EE-v<5yP=ZAf1kimlzth! zL4iwjO7^VaU^mJYn)J_%jEGRg@*P$mYr%rMOI`9EQ4v)dw9n0%_->ufMEzaumHsTx zE%~iBA1QBc|D%qr_AL3Bii=L?aUVyR5Beufb*eM4G<$|e(V%1SI&>`(|8sKeFhGK6~D zR73r8Ib{+g z1fdT=x$`2(!t5+Vn)tcE#adQ61x4iHdT@5Y?&XCd)~b4Z#)?4<{HW|I0%D;XH#e0Z zs8fzCms_##mZ1LTowNPcbYep559-p%x(5nX0^GLI$&}#)a@RWcuwtb=dSco>aE*gs z*eWLOUDdRwlv^@2MI^BcGpG4k4PaW`8V&rgBru=>k|^e)3FJaoX~nkcmbm3WR8q#o zPSc|#Mv7Kw!05#ENb##vI;}xd4{+}8rn~f}HP&Ep zESKSP?SR{H>nrv-{|VA^3epkgxnj*%N5DTK!|Dn87hQWUdEBH-I`JR(>)yZd^uq~- zE0pE=)|9-XFODi9>bg9*mNnwO+rqu2pMxs%S*auzTxV4K*obtwBX!>4dAd>kC2ey< zcCE$wjcd>cuU8cy7D%2fw0_LL1zp^U>%a=ccB3Z%Hv zFU2i>I;V5?XFk1cdfH%kPzx^`q!m6t!lFDk$*H>N>Kc@ZdQw&L-oEmaNzlc|^8aFh z6U!Rut^7yJU$goYpD?s5&?OkLkjKGH#u?n$tF6;9<4rJg+Ilo$Ww_u!;`-o91~EGb zw!(Ye5m7310k>n)#U;;WQh$M1b=GmYAFzlT(+>|jRgT`vse1bHRB3Z#<0j!p^8pYl z=Cf3sU!T*gK1m}2$}8vcUxiSv!M_4Vp#Ce}eSCf&%o;D~&|99O^mjq6gi zm!ah~H6llvOBj8RRi?%E+4+XUN;4s*p*nySqJ3xF5-6TStwhC{UUEe-ZUpi-Ie_Mw zjo(yA$e5DP(`jYnmi2wPsCm#6WMiO(TP5p|ee(ph z5vr0?jZLNRd*|=Wo%<9)Q(1 znV**OMxE3NtGDr+DrK&;YU-(uKfSSPpfX%u9e2l-IDuW^%MvHy7EXSO5@;uHX!tZ+ z!f<2LJp5Nv>nZB0ZPU_b5LoGA&bXGdo>+`zFVcU={`lF)y_Sx#gv6Yx(_{PT9XqUt z6S3*ttv_yxW52+5*lijZT%l~N4QY?jPMig}4|C9wesrf%lcugou9H#7M{bkQzNxSk zyiPH;TYAsvO)tf`)u(Z{U;1fsOfl|0M*9>zqlaid286othDO&kA3ykEh0Cd9$7P--JBr;#jU2KXd3mN-?R=8uA!EX}CeoDcwRc>X+j2H_2 zlDuE|n($j(Dlan1B}er@rd@-=0iw#5Kp;OzklTG&Aj&9XhIN7Z^TP=p74-4VaL?> zz1uqy+;GFm4_6A#(qX}u9e)PzrI$Er_1x=mxG7n$H!uC?2S)$EsB#(P)ZOUQ`nY9w zvD;VSsAA{D6w}5;8So>o3oBB>n|N7wZpsZT(+E3$&Ga*~N`Y&oIkl+K20gGzER+wO zn($h6%9EDbO8PQ-H&7}6xkxhHo33`O3B*)7zY|5L*N@BqAoMzoDH(Qb`pL{kU z&*O9cI%bJ}z|b=;Yza`ST~NOQdO$ScJ4zJDeT?2**n~{P5zG0g%Xt@BNmj`1jfOGBTP%4Wox_LYY( zvOd~9EoA4u@cu^kwVnQz=a4q;LR{dF#z1}f>&_AAySY1-|iM4)* zpk;3DSJcE&;V1(K_33x0@crts6%}u%f@F}flSZ2tCyxVN&9R@{-r1Lci*+568Ke&b zTwe5&IC0I9>C<7ehyXW$-e`A&Tk+s`A#$e$TYGttj|_)J?6^^A&QPb4Jmqx{(SmT9 zMgf;s`(em_DpE_jXGrNDv0Tj+(R{DdnY?4z!!tguAnTd6s@t^5Lz*0sz-6 zU`o@-W;!A2;sknf{`b_Xe?_bzE3k{=bQ8B}5uA{7=EC`$!axmHp-#=hv~}}w*aC~d z!`R7>M$e|wjHRxC)6H|>CyoNenvY!jw?PH61Kw>nt?|!JnvM>%_4J^=o-hWu6u@mL z!Zsk2UeOljUZ^4~(SQNNAUa`(N)&S^;YR8Z86-v1nAV%eJC%$$1IEd1pxSyhne~8>X@^_$4nV*KsHdI zYb}&7^aXl!=@RAYb8jsE3Jxz+lf$Ft9~p>yK7aLnmnqzzm|yw+jz9~qc}Kodk5#vE zRx}$r3la|pJ#$&%gJ5IBt^zv0Rd+7_KMLQNSufQM4py#m{W4b`w(jh6Pj3fl+v#;O zqo|D6Vl-fKlAN-Fh(K%vkp7^8P+dMaOP7BfKk+BE7t!{rWflh5L$7uZtb~Zz7&NPQiaogq-?wu>AwhY;rdKl z6hR;8ErPm7llBp{n23m~XtJ@e7+K--150a^odFeNDaxKV3s6CP*X6rRH>gS*K(W|P ztN=jW6d7{qt6gb7g4Lv{KTQqx2Ug#4UHWFc5VXg8k37VzE1yokbG#H)RB>qVOHObC zZ1bNjDSF=;C6)^LM#|roRgvjk1B#e$;Ofy9Pp+kJ@E)F+$^ldnWvUq?Hy+^nQaU;M zoiQxSW_)$HJ>TYli9`N_$|%=e2mx%#asOBF6Xg}zC%(%cVl<=W+Qto_#|BqTj`=7^ zH4n9UxEH!~o-+YWPv9VL(cNv4Q*AyPdwTwiyLV%A%dE5mYll~>wxAAS@%>4oNp;@8!a~6Mc6e?>;p<(1V-2eW#2F(!EBKym744*8CPFn4a8 zzyUThkz_ZMvTDpzUS_tp!Z9IPFa+AICc#?H?4Ds)K1F0WltjKojngf(D@nM}5!mdv z=g$Zm_i8avVLj83?C{?CkPjGd94qV0WVA9yhCmK)`6XLzv3)OS3cj6Id+2m1}dLG9S`Q^ohZP*ICYvDw*azXgID+6 zVciGBA4#aEFALKq*!E8#m5TTztJ9ikfB$2E3(3*==%~pcq$js%8NYDV1{&H(!s#a8 zrTttr7#4N@@!tQ@0{F{M*ojeubdGo`})j7djY?tCF12C%w_BAqd}?T!It*Hy+339x9&0B z&69e+s_|5Oe0rMF2{b3c77Y;N)$26VhnoGt4oPDbgoxe(sQw@y>JfM>>kFro zOKXDfLvm%vo$RdUou7+hp9eDN!ZBY?>k|c6v3~9I1yq+^#|472*|hhdp8#(8 zbx?aN88=e%>VEnXD7XTJ~G=_DqZ!(qU(eLIK$$54Z&KMpl^}4m6|T&HoH7QUzig+d~=76 z0wQgfaL{`c@HXtS7$X%!ClQV<1=^xnkJB$YnyAzN_@+*6`Zv;m;)=`l`MaS0NXPvq z3jXBPWWbcwmFw>Fn2(=DZuXzPuR6iZ0s74FkEPP= zHoS6lT~0a`C;?&!vkoFKrV)?&zWL+rEFh4=sTCC$G&SIhZ@lYPaN>I=KQed7!lHvq z^BL$k^PFAR_vOLDgT;%27Twh!RIWZze>NF|hNwO= zdqEOizj|@hbXEgYm`~v3`gs#2oj-rW(D3Rv*WfBdbU_efd}CE!=VE?@i@ zz}>fXV%wLxh-|nau$oWoa;L4`K*%&zcu}7gG%#P2j5`olwGwPfy{GL$kM(665$}6H z)I_~)Vb@!|iT$_{!NealIJJZ4n|~hhnn4L+Iy_(%_!;qDzTVI*r4+fRECVh~Nm_V? zg#Kg-Y{6d@f^=u3MgfuZ{RBSfhUzy3}!onX8ww^44A5nTPJx;kh{jIf{ zqq5=V*rxn!_b6hWt=l8k$wmeM^6_Q`Z*xvVHMt@K+9st_OHNp|}IV zTKuom$HjaO0S`-Gz*N#*9xKRwz4J?h^S($}I4uAZs-Oc1l%v8YbOxbS$35>NxliLA3(M{Ra95t!^Nml?xt) z<6%TZUBwsP1fo9NxvRB$@9@XdY)1jz_WJFNAZ*f4L25R;Rc~b;ka?=9iTZRMirJ5P zon9ST_2}@txu_YkzRwp)3L(ahIoQw9KWZk6rW2HsN=L&h!_mgnnWKf^@j}f0NYwG$ z&cszHf*Fep@jO4<;>AiDI-C3a5J)U%@Jq}o-R;QX)-ogK<(iH7LAQ+qQ#cWqFj%Wj zSf^b|M*S~8i;m(*{6HP`aj@U}OQ%?#788nCKkOPc+L1NB;QGht}TRzua&(h{(F z4v&X`)&_hxQvTY)nYdmzNH#bAj#FVwxwCIY4oef<3q{dN^t-_c6ddTjFsXEk*T$Q>~ze>!Z*OGl6Q zX^Id?v%wgl`t;&sXXi7gugA4wmfi%>6mULEC?=82@D{*WI4)Sg80DAnWR5=&KH&4m zUl;sK{>*`Vq7T4KRMGIK?Bp{bYQlZKh&0SkJ(^#V;&ONm-%GL7(>& zl-k>7mW~`u-okWOvkqLXPkqeYIy;T~`U#X6Pk*ibX4?=JW4E2*+rm+W1F7nKSKJ$B zPPxCWjOmzZ4trI+TG!v|!x!OhNMfkE8s4yH49g(;~pdj_7>Rl?ZdNK5C7_|qZ-J7d?{3I@)hdNrCU zl(}`j(lCyf9Z{tkswbMLH@aH#cP>yg-u%Xc>vCG&@`WEG_oNl8n zhy-dxrDFBpmT(3N(jD&081&IjOn_c@hVswg;0Ujc$Lmxf2hbY&UW>Y^PK1Lz@kzWtjDStTA*H5#i0QBUl&J% zr1bQdwh^S3qAxHfpv#n4F8~O@?`1PED#szBt62@D4O6RCsInB^RS!PvzBWbpZWMw1 z`lM))@^A>$L-@fcLU?NwCP*NCUy}s;on zv&WFrFJcNr=A)(cnQlVN)hv0lDwyAVr||9}(2aI|;9AD>C0Uz9c&Q@Bd={3m61)2H z_^-b*G=aU$FHavG*H?84W3{O~podcX;qW+e=&dFD$S5dxtO;ZtfFKB~ZRTg9Gk;@* zGW_$XaNyuDf307!c%Vse;p??kNh-luif_y3jVx-#DIP_ly^#D!m!~8~$siOV#^Zkf z`gDR_rHkKEN1;1g7w1h8)ieHeH8^R%S_%ovkH!<~8J68sC-RoSw4oUIQuEqhXUMi6 z|A{kuQzialRQFgG9uUEvMU*b1uJ;zj7uD5!{l+%LfIc-$MRMvbXi3iDdp^c0%@?12 z0Hi0P8U~P`Q;t#PB?8-I1l^XWH?6V;?P3UKScf-UOBwsX(!0%@A(%n3MPK+(Uvbwv zRhbqcZvxL-@a^dj&|~MH-?0Jrw5GU)lf5OzVdR#Aqsq3(6?a$Gy1;wk&FpIu|JzN@ z^IG#$^o_dR-;`H&bGpjor}+3QRcN_Ni>;r0&xt6#dHS_C8*Gs$c2|Pb5`tQ%ekN1O zojtdJmpXeY@ceDiF=8tTumMt^AC+E5SZ^vR=%GibbVqwvgQLD9>GzjdsX*(#0Td1-F*LEcM*F{Gk!s+FWLO;fdi+@k z>RImVZ&`4LGy;)jbi*%ZSg!?MqlZE6E;YnljT30XYl8&%#m|ZZ&UJS=FT^izE?KhdMoZgA?R7KxJHoK zFXCl2*sGNs=USwQ8dEGqb@ytMItd4_n*uSWhYS1i2xl#RMz}fm`Nz-u=Kd0e*4qF( zQZ_m7Y0YcdUs(Tz-PE&zd!;Ec>6h(&9U!IbBB`s@AtwC$rb@7cuL-X|a5Yj$VcPph z3V(>f03LG!!RhGO;q0s|ydAX0bU(#m&)DcG`p=Ys9C%=EonJ$(etWH@W-kcc3cT_O z0kM$9z1bh1)eB#OHmR52N4=;ptgacH-r;7-!wTBA*f|qxtPYw9d&GvWm0IPzsggR6 zg!1BIvD4v$)LhVJdyRp3qdR3e=D;Vx^pbbdV67J?C;PR)&W}-b=Xv>{LkC=aef8en z54>N09d`N>RDM1)m-c_D-kqgSGmj2<)38GZwMXOm;izjT=S?a5U+Si8E8Q!UaeZDb zv;`CHtPZkZ4++trpT~M|ts@&iRZ#fP+PG*uN;#IH)hc@MH9#Mlj(%hT03UY0unl`q zhem?|dhjuzg!U%*`8Obqm$~OWov6B1n=){)C^ufm%WZVZ?hDif^Zsk8nr#-(a&A0h zJaDd6@#OT2%%YT*RKToOMQGz-Y6u#ItDe5MNNE~4lhiG}IhU#2(r*NI z7V3-KEhx6`RczJrzrwl>26>NiLE`c}ID3rMh8uk9_P0?EVF7KW z8O7$&sUpI81C9xRO-(Ul_MpK_10>7f6Tr!}MJ~-4gg=`&iJ=D6?jOAeJzzdt1FWLJ zK;qI&p%&xI4TK)4Yf0XFEg%5SS7HRQmb%kz&2twi*H!Gmdx$_p`pw^uuHJAJFVeX# zrUL>{+Z#!A)O1?gxv>Y98qVyKQZEe>Ky}bQgdDMN(=tbDMdC=+c}iEcoESVCXl*JwoX}WFy+RiyCihdu z%bm#-U9?;Ko{G&x3(uUv>(8sFqX9i#deQ9ylDvq=;UBF4V6SM9`{_NBlvR~w2-@2p zz5RoigY`?WV_1cLK8g(D~YeOH%)#kk@DGEk9-G@S`z_SZw3$Nht7AaL#}WZnFrg>wPg8KD;e zo86O6kAKPNvXNoU3j+B#aGrIrj|15U3To`3A>eo(I6~hoyLgGGFP(lmV)u=VwnLmN z3pbt;qg83!TGad|sWVtbr~jwD_l|1v`T9kJiqb))cPt5B9&A`rTC2wgy0Xwplhqm~APCv~u>pIIG)Vo0IyeWa7mivN z*8HG)tLNG2V8e-Pr|TcJy+Wvjt!0vbxuaEaeK!*KSmLAHABIBtN6#z`#`=Z+0ynX~ z0O1Mm`ag{yIKF!%C-KUElMa1*Mttx5K@h`aeJ=_ctRaW^EulW1$niA(Ph?1QM!~@! zfLd0i(SeKZv<&;r8|N>#P|ccAlHA*x*x8Tu@;1!sr?3zD#Fo*xO-z?;^v5pUa=F zC`^wj7azr{(GZz!yA562bwAIg!7!ynACO*USUxqqed_{;?0W&R#ojHMa#SXLTsE+X zEfl!u;thbTSt}2lxF&%Hh_%FazRSeDfi$^Euj?>SaoSbg5STOew04!H{;^{5T zyZB$tQRH!tzCpy!fB}o4d0q;`xkqBe_-JLDyS{e z$Ui3tP{&N^gYF5$qY4t|@z=M|%9dO`5De3+&9jG;7$*7$vjFQ+VuwXF59dJ`J2~y>Jd&W_`Z^pN8(VtG{o+d7+xU|Z@L4=Zfc~{FpfK#5 zIqc#Ow)0;e>i%60F@q9B)3KK_9K`4C>W2C#35X8c-W~>BFCJj{EM=3^oRO((`47DL z0?^x>@qY*OhW-~ouiI@CmoaO4(mi8w;;Ru8O0Av>0ACz}hoPyMlMHw`Gy_-sPs{%+ zu=D>mAOCl8>=zF__whW4pL+ClA76x4x~@+&G@6HXWxR=q3nxydZ|cRBH6tT@ zcUQ=DCuy9g7MP!{JGMp>_iwVu=e}?=ogk(*}B z-gZ1D8B6HsEB~zN|D|_<-DZX|4CP7dg=8($WG$(O=0CuhA&rx(Yabn8T-y2L4Ro?& z1T0g1?gwysQqO4v^Wwm6Q$?NL{nSj+W@L1A<`!Q9lfrr+ng15HTaXg1mI0c!pR4c_ zemwb{)4G83nwxT8Q^{H)qYIKu?nzM-%4a^FL6fos)*qS+N8&a=!EJ-h&h}GRuOK%O zpk3ZIf9>4&ThUaJQ64uMB0`#33Fd+B0_X!&V3r!S&I(*)@z%kefY6qZtsP7-nL!(_ z!^2+$VSX#EhxUgKB^H@=6Hvsz1#i zhl_mGp1W1~puIJpI)RoH+X94&nZEaadH<m_R~68b0cx$Eae}ecXa1jWX}X zVViS$S$7&aggmc%RPD`4POxi!vd@(7F+R9Y1N4Sc)}2Aty)w{9pu3f9>`hs<6D8)pN z=%pB#LSkb2W_LO}6VD@c3=K&mDipK~8*k8BTN`_yDrq$$8EGTljqhgoZ_4A%=5;kM zzRO(s!7?_GljY!b@@w^+@eEUx-C{$$g?c8T^^*rroXTNug`$b<9 zn2#I;G>jzQ`N?I@&(AyT{xTJj5F_g7=uq>CXzqv%HcQqNZf%c^D0yRcTd!9A@Ws}> zz|H=-gSocfJ!~;3qP2tL*_9zxkw4DlQj83*gBj#~cmv5uT4zVA3~QZ_dfe|humx^@ z9k#)ID=pz)tZFCHFgB>JEfrl31 zGVI~6&A9{9*nl01)~IFqLZ{FhJ>L$!%WVc(RLip`JCMYzbNtsB{A*3NJ zftx}$2Gc~V@uO@JArFI&Oq1ClkR@IzajEdWNZs3g(c&zDr(`V#c*Y&8_g@cKRT{RJ zx+qkQYI81!`f9hi1UA1uF61~lYqw^c8r>k79(A2huP1BCw<|8w+|It#1N|^-k+N$@ z3B_UX79+sSoX3vSx?IOnY-{U{h`SqEk*xu#a54Ix&Ix?ns$Px1`4&lf4Z{7m84t3U`51 zf)}8ydo&9e;o1U>B+`t*0dCivNWX_Q%orbT3))XtQ-C)6@>%O|${vc^eO8qxfH*vq z3!5fobEcs+Fm)fasyvXxD>liY3hP?15}YJ0^H)W%*=4xv@4FD?%gSiF$T56pRQ7zN zIsRoONr?WV)KD8qZt}a3$>HZ-@3)-{_&gl9Mc2-cSWimNj|VSBt72(w=yNWJ^gP#8 zr9SvPY||J{YMG)|aKkI@pk=Xj_IR^c8cuu-LlSo*JQBO&W*CvgjrB*G1Mc)!SQtN} zELUdl^5cnvZ_dWf>vv(ZRl<$W`LW_h@0o*DFHm8^)BYB$sCd}!3e z;=G^7VC8r<ePoQxPU1i>9?G{!Jj@#sgftho@1$+HeEP?5=c+%}w^GtGh zQKsF8#s~AK1rN}|GI6h32HdaeF3I&~h?N++m_9i=T#Z=@LeHFN?7^B_x{2ljo-akNyJ60C z@_PAOgE@PgmI|9Ul!4YUB9MD**W)>R5IJf_6gKOx)SC46JT1SHF)RCk9x6yqjBa0E z4WIK(rG6{5u`g!WN4dT)29)HoeAOt7Om65jv$2e#4$3|V;ot<>gqiXOY^$IToN5eO z3kwTQhx6+g*u!28uO`Hg=QTv~>uTTx`Br;-xbC~q62+PNb56jGo-@@9>Lf)Igxj$% z1$dE??z(v}7PgYfuQB!>g|Ee(wygW18Afs~UDQ3sxPCQ3A|SUNu(q;bLd6`jr-Ko5q^~Xp_wavuntG4{1jJug-~_+O=rqbiqBc*(ZSXC8^21k zF+Z;W2U#viS8~24^Hq0g_WX&J79Ep4l?ciP{%_Nbro&E9%XsZqDxwK$?E8;&hwZlw z$dz+j3d8Yt{C%At&0J05UG6>0|3JqYOks1PqD=zfyVan<=p9kcba{XY$lH3m;+=9J zLy4-G*7n8fww_gDmkf&f1EO3}WvYJz-2m5kvo1fpawJJN(Nw~8xq1E2yu4yfiMF=b ztPr<8;fGR{j6v=ny=D@XyYbx(zVhF#)5Y)<@*93i@*Tn(8c*dc1W*f80YRAbuDaUc z_nI{O`#){7C(M+$!CyQUtT=y+-a`)Cur=!P0V!_YO9(9lNbv-!v$JVw1+*Yuu=xy) z)?*l*#f1{oM+5*(W^7w{-40zV`KKtP@b$4aSS30rP-vte#r$?30Z>~VWn7FMQK#%0 zfv*{VJ(vT!vBAj?*9u`NQs53NGQd-^45(`e)vIsg1Q{m@`9%e9-P*#hWadn@vA5_%>kkh6&D zA#9_w0Fqm6CW!lcNb3R2d=SBP4RW|YC2~@`VhKj(FthmyOwl)vK}(@r56QUb_pY_e zR@R+LT?EyJ-w-?3N`HHaYouX%U~-QPs--Z+F&_^xdurgMA@}onXc2S7K?Ravj|xbW zPornIme&s|MsX3JTxTux4w)t?%FdUQyQ+G6_>E`9$LU@YT{ettsJDwXZvAr#H?CWB zEJAp}z*@BTA!TDr@2<(k<>w{$NbI_r!(l4cHO^cE$W^i$}N%lIVO|eI(M?fr7kZqhZsfdK&Kr-^y=-8SRLp*0qxDe6L%5Bj6gll^ zPdYQ=+WO5GW~@gPjEQQfN6}eab>!%-ddUcE@hzC~=AX?sRilZMCT>3(^kor zm}&L>Fe1jvL!^AaoKCiyDj+K;xgRxpsDsQt5%05FXb`eWqiBO2REe_}NnaXP%Xbi1 zJ*lp(e9^nTY?x@=mr6cq@w!|d;cYfn;_uqsH&@jDF&sB*Dd@9dZe(u=(B@W+eyrqG z?;DtwpFw=diPNYIdibdszSegYQfWuNw^INR#O@47Cti(kp0Ze5yWw2K!e;s*h z=6+hr%yCNDKTA^3eji)5OPkoT0f!$Rjv+VAdd$@$&a_duu`9r=*2uQ`U|WR)sjWvT zOVByI51W#I*tT)0ktg4{x_cZYfhspMBTCDxG^Jb5e)qWjRpI&?Z!SUF_3ks_^p^up zr)QTqlEdIvN2q;cHNqSOcvjNzTY(I?OA{QV#gaH6w52*V5O>K>%Ndwl`5Zj&_^-|* z9o!CWMKe`yTv?nqC-V{UN>>*=DoL>#f7Rj$@x9Bb6K>Gf=Cbrie0rjeNp_5zeG_Gy zSGTZgB>!zM75U~mrMjw~vrk=ezm9;)L$KGVjM7=nDn#<1z$WhJ9K*l4X&Z*|;h ziT2Ns2!a`NT6dU)PUBt$-3kI?Da&4YJKPH5E1#^Q|M}GH`C|FdutJRjAONt7Uv!_< z-jwBYQmSQzTJe~iB?`;{EI4QW_N>JWjU#)A@k3@;g)Tp>@1Wj7s$3~B(Ox+l96B7% zAfth^srbr3L{0F|#;M^f(D*RT@=GK$46o|P0|+5a+iOJ@x(Yn^f;D+VW2E^kGVIFr%G;h@6Q6|q%AyM+q4Dl9X982q2R8*5i8_#SY;)o<@+P() z8&-~o(diOKT*-Qu^eNj;>(Sy0dGh6rJ0&bkxw&BDkoL`UAl5oHFB31QD-6R1Jl8zC z^TL&0Du`%xlifiPn!9!bEOn=7jQik5OIO!~z}D=h_edYJqNvlWnEpKCjARYRfXqXk%mS&{)*|Nw3kaha4Se7UYH_I_vIB zAhP)90lms(Y@R6N+ALiMp9J#82%!zA0t$c;oStMlucDkN0X}S4z~`Jb_$vfNLvDy zTc>Q8N7azq|7QM3;`jqNkBdzv>@|yY4)oRbZ$|%DshL&=)T5Y# zvUb^Z0OgI#i5c=o3W02fOyV4Wm&4uJfDgRyaNk;U0d6bLPvs%pytlkw=YQz@Tzgk# z|1ekA_^Qg~SYohvwz)mb^F3p=yzoSwVulSwbuY1eAFCFk{A$)isH;8e*p^B|dJ0E- z-+HW|1!vn(x(fMeay2&am?iKUh!DLW7XP?>^!+LvjN=NoOXth|vSR^m&!DWPt~^H( zkJm3HrKC^<-rq?>emkq(_}ItHb|vrW-L1q@pf{f6uhWY*-UjuQTUOl3J}!4#${AFZ z&_j&!fMpnSNb3pC9oy~^R=OdaNYdy!vv1#vZ2Jukq#Iy+g(P zbv;qr?~(w!m7+%UAPrApmD^4LF15>Q-u3lf4K#~mFj02g*2TJP=E{%9cR`&)j;cO3 zwne>X@~_nUhFu4AvyDnq(6LP(t7&*WZrc^`#6!2?X2P_Z3vye_uxLz-chE7ya2bA3E?_V02HjwMjW zQRTz5NFXIDg|9vHhi#6%ve?OjikJ|e%xKm_*zh0?K4Z``OlN9 z&u&JJWZt`+n>Z1eb*a=uR61QJIO|e$=Yy7Qv2a}DvGA{`SA_$S`|o$maTjA=1uary zKm}W?wQ?3x`d+6b_IAL>(yr&7TO?L>?D|B@s8p_bt0e&uz z7jZ9f^_9@8FtZ>NL5J5=I_*dA!oQ0+l`Er=n?*+lVpO6#{?n-t2#rbh%0^M^q| z=rK#6I6u6$aO?}zdW0fwK-rBqc)sKb%=~7)P>xmL5mS+vN(yq$Pvt||L^{786mm$^ zjhYqW=e|c2kw*o20Pdy9lu30>Og%ENn&H@OvC@<0B1%^6Z7nGkMX@HCzzspbjZ@>j zRDz@GA?jlxzIM^_?Q=tMeM%k_E^L{K@NwQJF{~I;>x}I$2z~6QRT@tAxsrDYcd^Y8 zuE}wJ>u!EwIA6ioA~4S^8l|$rl~%=UKjA>`$$KfoBo;%^hMS9gm)D)yRSlj_r_#Fk znZj9%wknfs52$>vLk-xa$}H;MELy=dzs}J{C`?n;cQnLI$ydPNj=}x zN#0}4kV2%;uaQWL6ciVSC}dp+9z~62g@2LVtE)j~Xpv?1oq|qaG%f1&hIWrVDbdWT zlo4-TcErXcea_4bzJK87{juKmJMs8iJm$!@Mzj2rP+3QSKFAO3><9#IdbD)daR1Ut zuqjX7NF{4Y)+nprB?N0GCr0WESQXsMR$Ap+vpLy3Ww|X$1{6itpFE2Nt{SDS6XkJ? z+ugcbxk9|TCP8CQ?v@gia>euLqYz~IyvftQP8265BWq7Gd&Ko|yeaS`jC#>9l&jC<1-{hqTLA2-03J~GH z6Iki>7lNY{)9dW4`(>W1gNLG!d}GL38rlei&(ob)U=5fG_K$@qgJefToG=7JsJ1KCed)nx?&1np{q)hqbj5?H2b_%7j;`ea8guKWx_;%QMbj$0>9n6rhbLK$DWrKp531Qy1$i(Z{^Ll+S zF{POGr9HYuy-tFzKic2QJgx8M$0A^}$#kRkuPY{s)&n#&phw^aos!4GMP{h?6%4>j zna>O;!#^LiyxIkuFD zMFo*zZW_A%B!QCvH?gZ8@j<`VS#+uz?zG!`%!tH+P$F{0)WMPhku~e?KRO`3;S4Pb zdTE*0U1L`GE)tl)Zu3{SVO^&l19lPOu{ac(M}vtVw3P~__~=2`OE+rwD#)1EYU-t% zk>@UPmEB2U3a9pUT)(lQMZ?}fBq;3rQrddEy@F1gA||hg(A*$ApA}VD;8S|Si354k z5m+`29hLD1mO!e1m+I`jp(FtfR@}tp`+1H(4skU!7F!1_xX8M5gB2CB2Jl9Oip}e- z*Ot|Vfx*T_K3V2pM!KDuPv8q^)E717z781$7{?-xk+3E$ekQDD{Ehzlg;NmN}ZNC$% zIo(A)m3NLp_-k#%U=Kc*#*FOOxZ3E9YU5kQpZfS|%Q$HH-E@2RN01FFCwlV3xh>vC z*~6^#5zC-rO+vNvFGoj0H2QSCu0Xi)NeS&e{ws=kN;W&czsPXizJ~^c`hmo<&{EI> z;z}7R7j-0=SE!KnzUoab%AD*nq~JqAS8Dy?+NICSs;Eh~n-gQG6C~|=k{~!U;YcFX zUzM)JFIF?zL7+>*Qy?}7Gg)O90?0cc%mxeABkJyx&?Axk6t2`Yp$&3&K6QySzIa_X zK~cr&>R3_UZy9{@rf4_Rl=f3J`i#Xo=6hrt2>51q5!xoLY;5^GAPTSf zBvij)-PDy2RU*G!dWWikuq7=AKiK&4i^pzHDFTBlRLL%3Ts>145UrrRax@d|NEetK z$K@cvZC2mKvcwibf-7D+@b30sf|P*Od~Da=QvRFviF(S-Z5`WWDv{#SB5gTFRB45s zK?$bes=91p%YcP=MGyv?+Om~3{H#YzJ{c7rB!eqmtn-~_6?g|7i+Te}?h;th6tQ`_ z&Sa&l=w-X(F>{2NWbY#!2pEXC=~B$aJ0`OuV#f?@udTFKNbbdDfhYPvXtxBB=&8_) z!;&+qmIidHhHcQic>B|%U~~I4hXWGlPT8iu3VzMdy32<~nzmG>&T=SsF3~Z_G=wsZxe<&TxOrVl+P6$GE? z%)qjiaCz=Xtu9Ao!BWJNgk245igBQ-#h>|-$~X1neVx6%C8ru{V#lQfuRipmA$?Ic z@qViI4JXirT+5#*Gctk@T_yIh-$)hE4M2$6p%J6V=uwUFje7gJ*M2=++mltlZ$?kZ zT!yGLjEU_WeNT2{u!sB#rDNTmNd7Xjo_E7!Z{c%Pq9)~LTnwbEUm5(Y4rFQ`%SKO=wOlUlW3hxaPY^oE6{d&>B6E=yTPx$Z zSPXClBeA+tg(jTsyY8U@mZi@(FtGKz|l0rF5zdnUx4r(o9?m4Nhy>gsS9W**M@?-jw66Jqu5B`(4lG}dRex1QK~v>`bVb{rMr$fI*k2}wG||Lxm@@*H0-&KG zXg14o@-}!Y{sspoKs|ju{L=l8T}dzY3s<3C0?^ECXe0^E>K7`>f|r*oD5+<(|V9iP5q+7 ziGHw$W7RKN18a`o7;I(`LpFQM3!O0E_~m@Cr~y;X8UAJkEV|?Jk7brX!v~bnAdMTj zEWk`&{$mfRasB?!v@hG+0|MqTJcIp79V0gAJAX}EWD^w@0{q&>EI9$*O~Tr`bUIm@`lp7$ZD_yXBL?@a>6 z`nIoVZjhk|6zR!nolhXh7`7T>&_N(wzNl{z9KlI9+rF19A8RvN%JG4nsF$$sF5eG31wccg?-AsaW%FV>#kI37q^Q-IiBtkZkZtMi%6O~bXs5a!8z z|B*QD$~lw6tWC3K_oT+Mo0G&Iwj_borMK*7N^4;9ZBDtEZ%X~gxHt#74a%1sr7|AX{Ha~@4$uc3W6umLEu~2RiIDo6s(gSfEIE9yi)>m zaWbQ9#I7iiD94NESv=#{-jvE|F3PLUcUxoi0a6d(5oo078%QxyNWxQiV+vseu(9av z3>xp}O|P0l{mYU;y(yrE9Bg+ivX_E%h3zORd}rR#vGx!yW4Srx(hmQ;@qCv8V7%+t zKqjzno}wl0%Rgb9Yu!@==}YFJq3V7~TijnbUk%wj z9U+y^T#yA`Bi{MRZ2Ihe;iUX1Hrp}fbh|cT><>%X>0h&sCOSVnOV0*}bO_MSjzs9r zrc0lc-J_l+WtqAeo@Ph^&Q;7$He3$+#QZG3C@f5ziRoL{$$d4{0O4MC#XhyTaXydN$&e4!w+Cf;qTU39IzZ_m_ApNAS(VX?vl>eun|Tcne8TGmULgIy)@kqq}8 zsHhm<52c@q%8k3lVQ6e#8=C{(T5-r62C#R15-h-EseS1)Kkvc3e-*QhvdQAljF&bx z9h=M<$OafUJzRn)2Hl+p+l+bI*69n86JIuJwn9(u)jOaQ=yDYP%c*5sZ=ay8ZY zTF3cDyGCGIcf$As&W?MUHoYFg%hPFS4HbPK>G`FM4j^|ekq6UEHRnN@$CGw=v%?wF zJTm%sXd;8lNM(3U?!_JTm%;YBWu5V=E&v(;Ndv~aJgEF8IB3IfeJrly@@PsML%lRu zq!0Nj1^?W~53ca@M5YbwzlrwLhg9Y%;X}N!-_z-+(FNXK%%(d)X5bwy#(Q&C!dt`J} z&b09Q$Y^n3`-Bb_<0Mpspx)PpK#1QdK6I9_xyj+S?fh9vg6t!(dd$JB!{geog2syKquKMl8M}l3DJOELYw#9r0Y{dgLNo2QY zSDFA4sa5NKT-_lT-g%dNhwsdt6f8NN3>x@{YqMv-#ux-604mx6ud+f(U+RE&i3@w|3Q?$d4km=?rBakcyuatBLX9CbMsNP5T zlvO2$1lQE9uLiGy>YV5jA5@y+`gvic9Ny$@zwfDpf!X2#TVx%y6nqcX=--GirrDKnlQvW$B9%J9uB~m8>o_3Dj&N%4KJD8;dx}?XruRO$Ni70s zqI8x3x^eh0zA}QB=r1?g|hQj6(b>KW;KJaR~g#UQiD7P)Mg!9dPK0y*Z zx!J0%rcHMh22jKZY|;LtF#Z=X8^9HbE`oO8Dl`87j_=|P8{wxr0!-DFmHX++FJ2I$ zrPr4~2{k0onO0)IdW}CS>v9r5T5O2sKE9>JctHA~+Q)%$gXvx3&guNGdd~ybW!E=g z_%detBrH?uOP9kaS?7)$)Aya;e>~UQ$1QO@lRJ^SH6<;=c0j67<8$XV>L9AHMh_ff z1W|$gTh~`n>$uA2xN44j99$pX|2b3dDQ;$XrQqvW?a6q74{v4DS;Nm$pOmz;v*RG% zXvATZnzIKVQ_g%HyI9K!hRlCnU{G~zuT}CkcZ`ADqYav04t+*HJ}ax93kPocygvPW zurcjPW^#f0WwzB_7X96HoggJ(tBJ3}ax>ObL5HIx$cM~mu9FU?M&Y44wuVpR4&c{-R1hD{63b!LH9frJqu8tO?pB+XE55akU z&+e^`-jx1=#+=u^c^{`ZKexNPdrouH^3A!?M^H6+N}77{40|`1|GyCS8$2K&3ogn0 zay1umHBY!rerRQDyWW@z$T^D7+wrW%{Rp-{1@RR6*Q={8X15`Jdz^|KbS^QH* z`;KwkUsP=^Z+bmO$RJBSCqk_7oR)Nh#xil2*1x>koX;w=gSQZn)?fj1Ax5Mo3FX#BC|GKlqo zS2rv6Xf_ZQD6$E?Y6ShOls!ZXUlt0XeEwGjeF)^AW!|JDA}OiQc}xq*75*D3?{s(J z%EfKfB|8$E?oY5SLyX4%p2nW(0=g{Q&sQ(n>}lg7JDtdGw_VFEd3;=pR-xQxkyXEB(cEO-f{lOC=WngZZG* z^6S?xo$*mn=>KnCP2glIw`mvj~!joL;oBmvabc3@OjCfwd ze78!l@`tEQ1Jd-h00^WwZ(_)g?-LU`8xd^*SX1V6rw0xs|KH6-cmCav^S`Gt^FL7V z|F=dNfJ}liQ|3Si3#_i^ zCmb#4-=xy;SE2BgzZrQU${-NMzqcus+`x+BF0gy?_uqoCNe^H2uiKPnSuS=6`kH~; z9E|+|WEx`hqhJ+xgbJdC-6-gEzx#4{2m|(V0(P;Ot?%xZuUSJf6h=ly9PPOnoxxYk t&qoNsNdGfq|FH!B-757@Z$Q5e&q?q86r|5~a{>246y=p470DX;|6g%Z0sjC1 literal 0 HcmV?d00001 diff --git a/docs/html/hdia.png b/docs/html/hdia.png new file mode 100644 index 0000000000000000000000000000000000000000..08bfb5ffe8515da22a16750d333eaacd6a716570 GIT binary patch literal 59145 zcmdSBbyO7E7X~^Y5=u%+h~gEbLplY-pt}(ylp2s2kQhP)l+;T%gACm@=R0p?lW6wH+MHHSG%|FcLfFb1cdkmJIqHufsaC7 zJdxLZm%1?{=ls`HJ?paGGp4t1?>lic-sN|1`%=kdxB$_&mbfd2Pekax^aRoqK;1!- zB;oS-l!=s@>2KzDiT!N?Umjgk^w)qk^3l;KKV{8Dj1qA-ZGeyX19Q)ze|~_@_SK;(GSd80*4%bAF_? zkw=pD>7Ubo=Y*hJc#u_vBWSO53`H6_TR)*(h^)2IAuoI1oVN1zB>p3-IGYKj4+?yY z_zLUqR5@qHu3uxb*m>~&VKpKEG1<*`>6DH!nv1UEc5&+4v|x&aT9`;VvcgXhP>tQh zhHn)c?ClCErjNE%@Um(~c2Ci7nPcGaMUh6v<fh-c@a0hh8W9x!6RgE zac**-yi)D5drYR9$%Axa4%3ne(R8*aJ7wRkO{@C(SuUM^(y?0NzZDwIk<<^X<5zQ% zjlJBIxBGiPTQRPnA$AC6UaFBVq&Rd$HpzB6pgI zIhlckj=X5>&M;=LOSfFOa+(Qv_TdX(gZ}vn__ndP8r#0DEO?>ECKlmE7Vg zdK6-1*kRLg#BVwD#@+J$nOpyp(@jbFF9pOw)1_LydFzYKU7`k1=+~pTQ|<`KyTAUz zXAs!CcH*T`UG&nDMa&Zy*BhvFkgM4gQN4p62SuY+22nwO_@Sqs&Q zyk*|F4E%P}v3NL$rEp*#Gg~!oS(AZR^v1_f`@#FeHeX8`sPnrUZ9fd+yyF(bqPHwH#lG_>cZ?n1dBeMV*fS^G7W z{+2mAO-s@qbdKU^ihSYS(bcliUna_{9ir4LBL0<)FohWNu{`U_Bh(k0iJIiz*je*J z_@`EME^Uiz8s=m({qmPtI)>Hf)WmGE`=sQD(f2w3e(d>}$dy{!PiAZw_Jn4vF_OK; zq(qHXv{+drZmb3|gnqi+K|b}P=)(8@M_n@WV%l3loH|JwHqEeTiRwA-yn&kE>xGi6 zxTQXpv*#uY=95n_GjLhmJ3cG;YdG63_}ySMGTxM%~I$O95{%+?rTY2 zvW=PvhpCUfd-biF+ohwj?!>$MjJ09*LUvtqjRO@MnYlY{nV9>7*w8#uttu=Z?du$m z0ks$hCIj~H#dqKXjlUUT#=1Kbts^hz+M(ucO|pNy{7i0$(|DBgQb$Df`7&Lx#GU9a z7ha~B=`X7k0Dut?L2`Xvy3R#WzEU)tsBhP}Tt0Z60|Mdc*iQB<+Y}{+k1zhxp0Z`K zZjOk$nh&`_s%Ls67}zq1qAE2xkRFr}aBm1-Guurp#CNBs;+i_#CiUA4qdiVjTTNWN zK)b6=d26!G>NM$>Bn_>b>{Ye3OoUF+j?!7gD`%vrUF5Dy56U?Xi}hm9Ik^goPE#A# z7TqeWKg+77W1N3-bjFtAMKa$q_TFXHIF?`R{UsA!-@6#N*rWS8QIGyuTTu8(!?lJ+a0ujUUt>_NE8nurk2))4U{zQwEs>HnHk%DxQsFSSUXVLIr zH%k$J5rN9B&fj^CX!fnAw>(benTIV*-nTcyiU!6^T1ypM4~6>DEwBR}=sD-OBz$+mfnFg{zu`Kiu_&Sntcs4?ET;S|fqsfV^0sL;#GwA2(tbpD*d zjQ}VK88p{!uhE_~?NE>V&oIh_tmr=bi#7JnZjWo8l=6kH93DVYHb`}!ENZC1X+m`I zHMhB%rgQ1!BT{0A#)jURnIVUJnSM$%&For+2Z@Ki`Kd0e zWVqo#AmUM;=NdI=hdCT)5APCzN5;r?Svk2;n4O=6l5KmjM)F^ah(@DB*hVzBh>Ma* zWf>~Gm8YeP>R-K^w1Fi_me;r*h%l)UmD4bAu?|~bQ$#=y`d*@3Ka%9@r40qeznIKV zbg;GM4=hRc5K3O}pqqR-w;&0Z`}@E6G_-cX{g6P z2%3}Xl35-Lx%3t!^hn2e>H8vMXS);v8$v#vuze5=?Mi??bx{Yl`2%|nS6&^H;lCvw z>2@K~U*T*NFQQj%HpQGVoW6lnTlTvA3^JQ>!n2DIH}WpFOLLqJPkB2_=g{`(I#OIZ zm=;cMErzd8H%Jf8VZ557yVfB^^0u~aOX{sA?(+t)Ji^ueC$;&T&f6jaxO_NRK-`dS{>7pP^Vs9^B= z0F9yx`ct2;k670__|Z`9l)~U9@MfTg5F#Mu60OUkBYpdwytCjhQ!x~bHD+aO)C^+D zyef--t)&}o>oDife)MTBayJzSM286XWZCYSUAS{4jofd(MJg3thn%vdfF4C~2;SM@ z?LPMr?7>uVD^~YP53*vV<>uyPv66$>G+xxFF1f;GhW@d-x=3@#KnNPW+{Y5oK+gR_ zgK%x9>Uv0e*4p5#645K&YpZUgq`@eUw{nj|i;HJwHde(~dfKAiznm+jvYHC=3(XM- ze963+&29YnTHMK@!;P^4dY$&egtH=7KG}}8>;-# zI4Rp*mVDVgBRM7k3Ary^75JPqLF&{q?h0Blw(s+u&K`ECZrww-S5;L-0L`u zxT~lFQ?PmC!-vNPM)3ztnYK}b`SkbgW5yh&DYhFEXaujWCA1zU|Zqphs z_#0fm$9HIYr>eMqz(iGul$||2oP61$@NU*4172#WlU7aJlyl~6md^D_4>!T;ricWK zQ3VH(BxrdP?#K#!5K}Ty-(mAmh>D@vQ@t;nXQxg3dMh zjZ}*T7_v5m#DP@LJM}^jQK#E(9A*ll31FQPQL;Ulkp!)(-dev@Pf};Ityig;l0SB9 zUkKa^_)Sb!%-Q~6DaupC@h4sLxXQEK2fL;W)+QBm1Pz{r!QPHZhBj(R<3SCW%yX( zzbU3PExdN{y>eQPN9cb_2P9%VzME@kWH3f#-gy=VnsAu zEhhng&8E`uiNKJXvGtg-5@L}a;|sOhye(~Yb(mjRf5j~IJ-D^UO|30W~B z$6I4Kbhm(Fm5(;y6m63+q@}z04aDDqkEz?(?QUzoYQ;ynt(uHJwh!5BC)pJ|n=xA` z%O$j4&zKJlN_@HGUWP$=$X953z!0 zqxPL)aMfZ@x@Frnqb|2&wTmEQ_~Dc_kLIdS#(^Etb+Qt!n|rK#(%ZKz@hz-Bg#6NZ z*>!=k+&gbZVq2os6-|hiIq{joX$l+OurP#?TTBrwOc77YPt9etKUrA`QWFuCTm~|8 z+%K)*ITLCZsyGwq62(&yUHe?z=i8y3m%lZ)udQg|lvyZ`Td-hV{kl zTd_WaiY&NER`E?GXl$NqEJ&zjs`+hDKHq?)ROB`4rajuMFk^(=|4X z$Cm}Pzd0HAx zM;@e-7x&k8j%*d@?4>@PVYhneFgF#e*CXWgtn4vM02v7xjD~u4Davn3lnN?0XY(w# zKl>*`JDU%vOyxm7Nq>t$ncAMntewNhD5>(3p^dFQ=MApzN19@6XYBoIet3m zh7|&MK8eG35rJ4qR8aE~diF`em>;Pap_(`MYTYvvR<{j_rme`wTCkPZghBL$JT@@u zf#5#hvqvGLf$RU5GC!@zCG;e2aa~PWulODD6-35oV8`iWg_CzAX6mBClh{R;C4(`- z<)SC$ij)q8&^}A8GH5U4^;6rtpkqoNjrJ-S;SFg1tQm|kN9d%Q9@(mS084VScnBk% zR9&izcDg0pUs1`^-yE?!p1cnq@Jcr?b&=s|2>bNYDA)nixS#A_$JkLnR_4NPhW7cU zW@06q%QI{}X|4PC6#d(v zW+JD^d$XQN9_9o=?f0XJ1gMriZRX~c{RF6V#?zm!uR3q~+6Wa6m(A6F^%|3vJe&|R zmOUP2O)jPb%W;hn?p*gTSMowz;=9b1Za<8c7{o>!#UsfSJ_8OVrBK-}elb`pPo#gs@PnpQ8=Nk_Rxa7FA{(<4~+eNIV%u z?lGKN@$e8@ETG89ppSYbNwknT=ZF?3j>{6{^oosg{x*dk^;nBUWYo`s-d2DhNLl9} zPpt>2L+1c1Lx+fo1ADtFXd%|+TK!dKnV4J#YG`~SjNG>&p#J(54i8MzNpcQdc=_8t zqUTrkwiQUKB)2$!T@=-E@d_Dws-S;{#l?&1*i%Cw5!!YB7rJJj<1r_S{qJYPK@H$3 zPIrJt3`$|jSJfgeQpvgSd7wE$eqV-!(}(|>vW`%!(mUcsaRNKbOXMz&y1KcFH_(Yf zAReez#2<0Prv!wnxN0lH(UdK#j25+@^jKonH2KjlLNDIkehyLuqj;Il$ZqzwuEN?+ zr}It38xwyipt!Iw(LO8vB>o3Dd|Xjcd)UHh?cb2~-gcLv;l|3GCv(OnP51+PnP&oD za6ojVt-~b8y6KiIpK_k4eLA@(MGvxO_i3{w9h-FQFLBvc-|_S!Gg!+|^{qB7r6s>E zf;Qmn3HsEeYj?TQQrG{qE-zeQW&0}%$@QAoQnO>5pDG0_QN%)gL_VQE16z$eOw?Px z1__vrt(ox(4^U5ItF5$`-&SYGf`l{dx`&yx`+`t_8$i6c6gbwZx=3GC0JFj@!GR`b7$%&!i*)ly%ZEx4+Yfin(IX>-G<< z^x&@xWzHrI&a@sM?>W56US>B(#GX@Eq;5(fs8)LHqu>+!2XHZq6iD8qPn09tU2p!J z@yb3bVY4V4ya8$bJC37tJDI&<@vN|WU!#I&%0}SQ>H}H5xC$qHYIiqcLifk&UCY${ ztfjF#-lk8g+^FBF$2iAOZP?v3V=2mu)lJP^8-%J53Ot$$2@y@UHgi+(c)q0G){&!U zo265^7Ib1AE_5aA6aFHpg!+-v+i%uW;$+Qz?EjRJAS{8ry^(m4)X%h#6b4|R1(@l* z%UO0Y5Bgod={Ij}pw;;(H2tIaNL~8V64N3&IrCAB?3B%eLFvJ5 zF=AO700ue2e5l|T0N}$hLnxCjc*L1{J3;!e7qQP6nP=>y;D?_2#%?R~FSBDJmKleB1A>CyP2KT< z!IdOVZ9=#dLgZ5$Ij9&^K)BXUqJRXDU1;vI(XFA@$}*2!$EE^-DY$gZqvZiB-Ct2k zg-T56O&>PvtJwIfze0nm#0XY;#6bJ%nH)+Gfe{`Pt@|ZvL#59%$U38*pG*r`r(!Jd z)LN}FwK0l!K8w?~PuXu?u`BcEqJ4en6g00X@h3#owZ(G{h6?9;?R~BEZ1Es z=1E~`a~`_H@9{Qn;<@lJ&F^~gHh@!}lVrZS7{8{d586yxBe^V&oxAQ&u7Ot)^( zKKQE|Y*kaud%=#$fiRSh?Lt<`EPak)vW=OGOil2W+=}3{QAD)Ev?DX=JLqgDF2{+( z1?w&@M-Ui)f!`sJj#{1>`X;D&Qp?(e-YY29VyA5ihz7_D#H7hUlpn$mu*OCL^`=^j&5UVUYcsco~(7}lP&9tekT)OMYufCMUD-Zt+q$?+u$Y(w7r z^6-|*HTL}wsq+(8BAKIj@J67^E%#6PA$mrfX#fJ!ZFYRW9 zs?k60X6p~7J$lD+_R^LKzCalERv5UN>DX`%;|e^pKN5N?;pv@N#nKJ zncfn2yC9tgM9SA;;$JW$IQSq^3sYt;CBQ~iWJ_jXZ6vyX0Mhs1@Of_xuZ*X!IM`fz zk(+NDB0NimM3?2Bn*;U&X9mzRm?l^EmgVoc72c)Pc+;Nq4nW&f#txOTbl>B9CQBh$ zh^cB7O_Ge_kMI+`1-YPPJk>f0I(-#NE3zf!JMN?%RaK;8_?M(&xY*ddX7;Gb)MS8I zER;GE>I11{?K)duZ-0E&6~aDy|hKm{h=lJT@GAisAy zeRycnS}26zaS~edBjs}m@$&Ud1&5(`De8*8^Pa2Yj!f#wk=kIAH>)~5B#nX$Hd1*L z_iKNo{aD}O!dQ$2*6~pvRT7hRb?$xFRro02#)bV!giC=kpOvG_rJaEH!>8V@>S_4D zx&KdC^}{i`uazc1*6rl+&ep_aO`k__Plv>M~b~rMU758(T62y`W0Pm_ToXIp(d?u3=`H*4$D-QJ>|2hso!URV;%%17(30HU0W(@j(Z#@t znQ) zE=Z}mNDI2do?E{G_kHA^CDC6IBnUmCr6rQ}I3CR>2z8Popgx?Ca5^*5aF`~pWL**% zwE4grDsU2@X6oF(9ayKU{yrbaGvp170e%Srp?lb!9p|YNXFVs zz(`SI5+5@>^Gk*s#ECMjxI?40+FjoQ4C*SczHS=knvA(`zI1gDxnHeqU%A27U$Jf` z7aFA(A{%vHZJM>FHmkN#I9o`5jnOR;a-r+ZDr>(kKo0B%VCR&!>?u!wMGRa|2^0r^ z7sQ>^ldqLNI6ZE7x1yy50=LD_zHrf5k19jU(c0}7AXm1!U8wGVr&OKS>LR#5cIaR$ z+Mf4^T_i*F@n`MR3d;>yC58|rHPXlISA&UU)cvh>uHs}X|J7M6PHcV8?6iLF|il} zUa=cBkBo22ag36z4g*WKjMK@OEb!PoW1J}*H%9u({QULm2|)MAzR}{IQ0#8N0uIV@{0MiXv3fmpuGd!Fj@6OZ?b(sr?DG>Y&l>wf-Y47qZ;vfUY zPeu11v&u^!(0|nNoy1(3X zO{pbTElP&W)LY3ekG1qZiK^Rb@}k95X%rF)-#OGHaiX_;oa-IAdGY!k+a^i(hGIYF zvP*XM4Hs5T-^`|YHXjtwtpl451I(eqP&wEDha!5s9>)%(NVX0F)e6L ze|zR&IfKvs(@UJ~5l12rU);Go;iRKG?H$cTUl+Hl$=1vr8;D%a)3AaBp&+1wqi_|Q zTT6?{uL7E?2j6Yh>(eAEyj>|-I`N%UB9gh|HrhF1rV&@t{dUuMY$e*OtGc}AaES5` z&s1J=H-l5wV)u3~-dC35%-iQD?Xg|Ta!e8v1lFIJO0ORk=2_s!!Wo!|n!!ZK76Gpl>R zUj+rD)mLU}D;{3WUz=px81e?h)Tt$2$DN?6O8b26B%wDT7Ft=`rdzq2l9Y3zeDDbo zUmYX&u_KwsdI_?74;2)3WOJ{bpkI{~ikX5QYprFELwB?h_OC?JW0#ivL@r!gAJnd^*M%NfX9A?f` zfh(F8^R(wa58tz;xOXbc$MQ96EFbJ9L&Z4_Q!9UZOas;dmtzyut^MeIq5I{84s*<= zaBI$dIIqB!ZtVRa5<9n_-l)dewC+}|!rW2t=~uz*WOokxy8*M=$~P{lONV_s$H zb)fQcP$d^t+nEJH$8R_VrF}PleI0hYnPIEATlzT;gMw@i8Ky-n5!J(hfU3p&N_(F;=3cx|?_t zNvR&z;=^=hVrqVttm~u9Y4+?v5iGhqZ_7>6z3T&=iNVd)lS;SkO_CgT{Lh8>Ob+G^ z%A;*1`lgk$+RCh_eB2o}6B&{(#b%FKT&G8Lqa;cOp>3I=umb$9H#^_M4hVo~7K72kdh5-Q%Unt)xJ~}v{=?hi+WcJx9*X&*tass4=b60d zrhL;r#({ZIeee-v_zN<2QkF(^^rR!ZgJ@X42VH(`2(*|Km0p`{bK9)OvZm#Pr{hC3 z1n;@l*!4J#L9M#xHiafC<7B7w=9FU+6J)A_dJ+xm*L^B%o^4v?vit_^B?woYgJFV~ z6l2l3w%j!)s@_{`$vBp*S)zf3!{h-|6oNDUW zMVYT&kXm?dGPNk3?4%{EI|)@=MJX$o6>7vy_3P|T7~hz5Cs}r)eX>dUoN>q`9n*4n zky&neU>}KTbjiF832@3Pc4h|U2xc=ThI4XRVAz5t29;f6PJbO+0XRcW_YG{rP;SW93M^>gOOFd-QUt{0w!(8R-lJpmB-2TM$=@%^p67q8*Z2f87sXo8nFyv6) zMmFw5AJ-{mV(@QwNW@FU(Nyyzi4q2@On>{6YEY=kIUb9f@ZT*wPqRNpY}^~fVw*32 zu`j!;`y4aD31ZY2Wh*ydzFzN>aPw8SlZRGdtf(0A4T%x=Act&D1Q`Uq0d<_I`oC)!bvadsE8LmoX}K`usJ-=2KG$bFj8qyHVlCI}5FqO~Ihg4jO6P~P#&)t*+IOWQ3M6Db5(iq8Ije)|?=aC3`+~&5iehTE zWHNrkW97wVQu;8v8CT7rHbQ(5+fW+tm&u%a6VGr0Yl*aIO}woa~#$bj@p3KF)(~kF$6S_2FIC>{wjW253iu zc5DlWb#aMtbH&R#QqimqGY{Iu@Qqq&oH23(5hS;6nj4rK9%f>G^0 zf2`tI&s5y)jA3?q7C($o!Rr2!V$XapX8h8@tkPTkrejR;f=feV6unHw8^}}Te|yRx zUjAASXt~-BHT5kEa5IK)#DKTmNduTWt;`R1&ljcrSI|Gm+1~Bdsq9!e z4cgg3T_de(RXO3C$LmV4Dg?ehWa9;(bkIGx{wfG$z))2`CTiMD&D+3#U59HkIYMV1*j-})Qmd!0;q`bk{uVFP?5+*_RJlY>qvX- zN=|p7MjgV(E{7Sf{@j}!05RUvu6fW-5bFB9#lOM^hRi+o=GF_IJT|>1z^9=2&-AV8 zO0+SN9L0d=_Lh!s=6LE9UM8>m_=IlGKLc|@F%{P}$d4kXltIRBEjp9eSMO^7SdF?P z2O3w$NCI`09%ef%CATZ4V=({C6wqQ`Jz7U-A^jobpYb8yw_*-7K%wJs(!}=IoenRn zzVG3Z@=zpt#-2ZGdVMh=}Sj5WwBb zvHqVUWJ>n>;F@Y5hj!c%$F%Z|r<4A>2Cxy`ht?2?k3Q$4l1_2C{C|JA@ZJL@W$e(4 z-8p^Q!3;NY@ae}HIHeqJ=jgsZzlY536xYj*6I#Ul)XyBK{s8Joz15aVyy)PIw?A;( zzY@6#Dyn}U7O(!NPH^b^|L`>ml+8x*HeO5Xjw_3nh9b~~*DF@|{|)%?|E}Wyw_l$N z?E`^S!6}oNjO?`B!>g-w))cSfv$(5E3~0h8bLWXja3HzLK4+b5W!uMxtva1uhqO+Hv2Dn4}2eBP|v3{S_LbfBWYy8{;Apc(eBiT%&v zc7l{bfyMknzqxzNniG3De0+7Kp}abGa6CTHQOb$**FD|#PtiUd4(RC+uZ!OS0{?$}-9ifgM-tHWI{fqZE+|^>BHe%OaD(=Yp4g$`{&nndIz{vsaY<>} z=bUNX{^^|tzwccdFeN2@?O0qe%>e4f#txYq_$|$Kp7{HC!Tp$51n}6^{?kH0T6h3j zi2xD?2)`>?WNiAv`aPsO|6f9K&q8~aM2C=Rrg4|hU4K^Y%|FmwqVK=h?z&UqwYxrd zjPo~5y%IlC`nDs8+~3(dJx8AsLvC^i|I@$@X;ps$`NPu=aR~mvNE7!AT!#s2n=@o!ucf@$}~pzC~yHw(!lS3pec z3ud^yrKie}L>vcKr>ZlQ|U`+YBb1%YXQYoNCCB zgGbwmBbpOgqdTn6#;i}^A13;4NU#>%{HT9eO+96Bz%$z+rc|9l9v1crXdVQ7RnCj` z*daN-tA+k`H@dM5UCARel%&+B9kh+|solxB(-%DHD$(U-#vwzZYexaMiqDqp{dr z4h{bb;>Z#ZzgY8|0NcoJzLnvM?2jYGF^@fla(aS zw7HzoH%qdZNcl9c-OKcr#PJi!eV14gBp#%wl!mYtB)4o3*Evt(`|sJneywXip0@-S zPuOT&FZND-$1vrZ{FA*HazsS~arku2`RwF^;I>Xprjdm7ew$&(GF#C>{?hDaKFwdg zrn^24;a3`@QtxrO1+Nu9s4e?n67)CCC=^*Qoi=ztdXs~R} zho^`2FjFew@B*Deu6gDs2T4;v9qVxBsZ?fQ0#y;)(p_G2D)0F5AA901r})X?*U72Ajn*q0NU0g=udt*cUC*dYP~xMpvL4w;H=VkQGUUI-hhK02TLdO z0){paQA2#QiK$?63evc*7 zW>O&WI=820y61n)fNyy1%k^LL=?eRqav^*3*V09k>aHK_ltn|_R|tef$-jh?c#kG7 z_(xC0;^PmuW=n~KY1#txXk!c&IfaMZ&0l`#d#criWS7>(s7l$jKFol^sD@P0>ZXSpY^y9pL>$cB|cdMK;3KH&lCI)#yF93MdOEd zx`>JjZ7Hvoth94;psXwRfDWc6?Vo}57Nb_ms~12_m?O0)7u6kOTE;W$2$Dn)+KiJ1 z@F0gc=i|};N$#Sb^8C zEw3A!jrP`r$Sx{VX_uPhzq6`dDQj%uyuEnhAK{}1fiOM^M$`*@pVEDQ*G91Pr~BN= z>dMZ$h*803#SaABSvRePF1Cmh;mMfJ8su}M7mRxsr8 z_LkN4LHLJ={t_$NK?aVLi2=QiJ4epkhtbXlKwIF0zUDgLvscE_olnOdyUtQ*3aeAv zeJBJO&h!~$;cM%&0)%@lqUVMn_-3|+DRmuD5hvPH{)L>|y2lFKcu+?Rc#yTFG z=op_SLXlax+e`ezKu~e2mM!4zd>{1E>sVrxs+z}6L)x7tf zjMaYYh5CKf&KoWnGsv~RIS|XP`em54`6UTLZWN-HlN(Y(BszPELi#KRviO*@*=`GP zD=40K0;LKls9g}{*o;~%{KAOqdx-=?;w3+eU7wwML`SW@bk=COwAv#R?2)`Bh|_7_ zPsICgJot*i#Ns1RPdJ_4(>$uFfUzdoD+iFVj`{lEYD0i)E>}j_rvCA6XF*G!l6uU! zi+Uj9Zwe7BH6^8r`Wa&g`!m~*k~lyw*V^-3U`zD-wX-P8?I(O-pb;+jfkK&;}anW;4WK%()0m#es_2t<4%8&x)X$BLQ)&+%8{Ka>gAVskK{y4>J zFnTQ18Yzz^)%Fym^fbV|;OR^6Gi$f$hPkFMnbZ~-tHM{UsLm`{0_1d6cbiMp8quI{^EsJk%H}H3`chO++?C3X3nZ}snJgY= z0B32bZF_(6DG-@s&uaIKR!;xGMB_T`D`${tMhlkJJ4c$tY@UENh@33GowAQN*lYwT zv^QHsud~xV*xD|%@^ls{9rLszCx_l*;A#XhVkF{C>W_Dh*n*rsv{@s$iq6<9y+0h1 zqh|XtR&^O7&=@UPehg6U7#H8gWQi<9CkY2m$#Zha;bV2=3vGE|kQQgdzy=-OqZ$?F z5kiO|=X>(pIA=K?Vam^JG@8Zft~NylK-Ya|r-Fo`C=${*9yaMAN;;Loc1w`Kb=e!a z>~o&)GzHRb`yldgQi5zbXF0tZ?GtW9j$E_&}*h5EoJSD!39F9YK2vvs*ihDg^<~Jb%F9dV6 zS%O+jj!}wpUE+EN)rO`IGw(s%8tmR$f1F42;VV9X{m!o&fa+!REEJN`GNJ@^N%0c@ z`gfYAIqQ*_7s{(Sy6|sr2qOk5P3Gw)3F|OI0YvA|@bDTFjPqgKDQvGb4G@WrNn3?l^cNY8VSU3mX%{g{;4uY#bv40ZzQr|M=?$dhn}Pzd zuZ>-DO417FPb=3PT{Co&=s!H>Ws3|*bGqO^GAmJ2($GAaRxlJnw;g=Yx)CM-Wb zKZ68o=^z9Nhytd?Z+V`rO8doo^zu*Fdj&kHQqjurd35?U#D2RO1oH3S1yJlVd=2A+ zuKgA=iGk@H`6YT=km5)YoU1B><&DW)Hy8w$fx&RN0Pg$slyc)c`u9HHH6xYVe}X3* z4Drm$L~cV`2X(1dOPualaUufNu)bluN#3KNdv*?I#$k_Qm_tEYBumY;@p^UiTYwiO zBhG}aJZ2WHSh8bFfK_1(wTMU^R>IaLzi|F63V5Ea&>cgDowOaDE%o6Q6k77nW zDk|rn10~>HQ_R0-DmuFJQS-NV!S>C?AFbIq5=(%D-R&v+(CSNi3lebHbtQ`2kM{PP z4uAOIf<~;-Q|l?i${Uty?{y$BD0?51G(99*2(1Wtswm7Ulwv#p;ssXD4Y z)r)1l+BsWnIWKGhH&?d-ccXQ(iwn@XIrD?9frx#O>H53BGra(Jid+g7A82%|8+g;u z;OD5?e&lp_R9imZ5Gt^Nl%>VgroRNmG5zm*LDm8kgq0Tevb-&Z;a^9~AP`zI6ZX1L ztLlsbRq*wU+iwVS+7GQs)t+4oD6a(o zgR8CqqWQwy-!+c3v5x1ak%@Yk;)#U@N@v#nxk}>|1y}|y1C>senzmp3(b%*$zxNS_ z;$qUWc=$ap?Ys*9`*DvTE$k=_w8i#PGG^TbF^H!Zcpy7+KNlF51{pFj(4%-e4Im1F zD2;T!wNvR^O+IO(UK#{)u%@7>IF}JQqv3X&j5cN28jTryATC_dicV$VH&X04j;~Y0 z#ZX+({?7OH6piybmC;aeD<&Dm?gJPU6tEi*Gj9lS376>CK)kmQh=Qh?FQ|D<F9o*!2YlUaG7ac7Fx(td625k58lyHX4O&N ze)%#(F8S?;3GAIvJBLZpkzatHMz^ea*!Tcdo@?kRUtz<{Pu9VH`PcO*7GQ@RfUTVi z3?Lwn#c(_pIj5H=Xg7SOk*NFdZ4!kw9_%Z}`*-f}&1 z0UrR6;G&7Ml=pG2w&AxoWCPLi&WSZS#l3fe1Mncfe%Yk@t;RDD<~vo3JeRT~A-(YD zM79{5%%a;674kLiQ<^j4`GWBtqYfX=v@L`jHM-I6!VtqHd!h_lP@ZGK=6eEwslit) zd!&;WwsJu+Y=c9t?-w-IhPDsVU?WYkDmSlhfa@_21om;4zrbqz&9Rj&EVhLe_GR|2 z2tHmP}^uE`ucBS3-%c)*Sn z*c%PbRu7C?6{_mrPRzbnga#tOdi)R1d*sf+)ryweMsID&BNB=buMFal!8Za)DrIi# zWV+#(sb+Y;ivYYTt_;oMpo}Ju#+qG4g6}$x`3Lpdu}Fg&Lza5snkc$TWCB#6m1oaz zGsa9PC>HX%tRw214_0OAzIu|7(q8T2X1hG*zghz)dsl#EFxU0-oHL2jQ`FOea%jYf zmJXrqVXx}zLT?1#*0TGk9wTukK?54{0PZU(+YJw9)M)D=C{!p&+X6z;Yqwp=lGf%~ zc+7NczrB%#JSHNeb^Q>cUl@WP&qzG(g+#zW_D`k9oUBrtBbl>!S~jU+rAgO|r^6=v z0T<*~UI+POLH&CSY;7HgSFL7C9#ldaMLGl<zXwVE~a91qp#cq+97$QV>u{ zX{4kZkyb!NV(1QGq;qJg`OX35d4B8t-oL)@kKe3yuXQ_dUT5rm>|-B$Ul(iDgsE$u z0W<7cUSDqj@VC_FBLUE`Br%qwemA{Za$yAqGqRg5wE-)5=?8Pt49pT;Q~!KDebK>-((myuf2M()QcE58Ck1rrQr6pu%!9qJcS1- zPOf|W?n80)kjd?1gr)0m49@I%{E-Bg!199)ctp46gzMA;MmzW>0gq@vZ*vDL?($h+ zWPQwKGyu-)5S;rSuwfT={6^Q7!xtTX#!5QZ@XN?%PnHiZ+vO5nfaK2uBdj+)(l`HP zu;Zko5#3(B0V|q*CwUPT;DsB+te{!YFR=4VL4enpM$>8k!Gu%a;0R{RxDro^7l=yv zX+OJxL5pT?TRpnAIRfeH+4ZKslengPp|216+T<<;1kOL;-5;Rd7(w%D`5}p)BL&ia zKqO=Gn@J5G;k8{lx_D#zXY(&hKI~gqm0|;H{y<8@jN$(`3fa+Ku^OU4v+~{pxVCIc zz-sZM{>j+g&)%%c3d#loHb&KP_h})%$paIVm$8q`!;RiL4QM9jXbtF$af2mD-~<6S zWU)lRsL5hQ849PQWxq6s1O-6f2d;D*@{J(MW7z2(ThWxK*JTtwVHnRycWdL*3Zkf$ zXT`UFzWU4;e6g?B5ph87Hv)m=uS;fb=WN#Lb2BG@w3OO*cxkNVv;~cQK<&wL2o{r! z-Vd5$M9&%?t0d3+00kuUX9Rk8VMEwvYmfP^CjC@gdt4K2R>Ix^U|(_L-rcKdGH{AV zXUn~$3gGu+Zg&N(T3k@zojIw6VYVla$3j|f5^sL%ABiV~q?$PxK3Y}N-+drwN(^kc z$dW>mrw~1&?lAYfuV7`Dnu>j;YDT@n-95f%Tx(+MlSh5%)uA>^f$>&g3eiY7#Q4Uk zGvR>kJbDlfi7Tkw+vUbd9|L)Y5HAxS)z~A8ABWTsxR_I#ujNi!%j>(!`od3hjuACK zeT3>HIf%%>C76ZDUziR2`jMEMCW!t05`m8uI+Wzvg{|vQ*{@(nAvN8D*|f}wg(;W& zN-eGK=;>aBSWg2xD>Qw8!pM!P&e!AKR^M%$c$F7CZmVZw{-aUd2={ARLj3xKt*dML zRf0p0%WpdG!=lrwhw$N7`~z>r0ii0@g)gnaQp1PxVo%<|2ND8?-vK!qCU_9|=Or29 zP$KMnyh|Odgr$Yorr%X}cc^_QrDET{Mi&CtcTz^)e&zb2mz+xPc|}^arS*`r8djF# zS;hw`*2}}`P#x0wFwDwerTI($2ZZ$o&c}qwz8AD_a3NH({&cxxQAvQ|Joe*PO8co^ z7xsZZUUSps(hJPwnvH!#*ya@^{;_Y7wz-f)e z+Ys_lREHT&I60t(aN(n17XZNq>Xa5CC&j@yzRQ4bv@~5DgI**SWaTRWM3_oN8YZ)> zwsAGRO(Et}_@}49CaB&DKjMgdm+=L-#(n!+uS_adp9$R)p>aa2KTN^K{k{h5KZ}Xe zRP|~cG>xeqbBRXc0U}NcZg;w8miSb7SxwDAzD_9|9vlUhk?O-}Rqyg8)#j(55=nhWI-3aSK&zg8q+J6vVr2TX+YK5?*^4WcA$S`f){r)jVTxKc1U;4 zwTq#*amFeOEvZdP5-#94`As!eVK{X!Ef~M#*y^FcI~_Y1`8C<4gOs6*bX}hW{O=Km znNl$snOE^eMifWv-G$eBY%x8S~&2jA4GyY7hB-b2UxU zK_w6LG*bJ93u+8K_Tmrbx1F5o$HbIR)KOe&_N6X3bj#9$iBG1qxb}RPLZ^rH(}zN07!8kGW)) zX$%L$Jge4}z>dH&j%AJI!DmLInT}U`dz(-q!6`R2P;Sw>O{($u{#^T$S1`Vzo=4KM z@K17_7wksAe6+suGEGLw{?m#>&eu|4YY{{(xRQvQ!sL3ZUY1l5!V#-vY(@gbIw_0&b8g=S$cl6aSwGh{*DF{g z@osBfHa@dnNOwnr0g59MoSWj;G<5d{2f9A3xA(oL`6q>4lgN@JKCnBIua{vHTl-Z} zs|&uffabpgkp(VWO(?JG-uV1wq4AFCK&Pl50;9(e`3bx9q?jDlHvEwuw;RN6nH(1b z7gpP@TcV((1dY6@d!JgoerE?tf~#ABYITF&gXnqt+~^#@VC=x*-tUY!F}`$@MVk;{?)}0C?x*1!Gu^x;C$vbeV8hITC(D4{>$0L(=aMt5<<}M_&u+g(kOZ1lkjyx44Fi`5 zLhfv;jIG?^UAG%;_I=*xm30kdMHhn|jJ|7Hq64kpESQ(SzJ znf^$5PP{n;5%iJH*#C8xmSdw7I|m6Q3c%*Pgham_-?S%49gY)#_vxrbs6~YQQYY+s zgiDGDIWhb5M6fDC(sW`;5NGbVlP3NGOijz{B7_h_O+f72&oR;~0;)(0xYo|_D~Yqx<0U-h-RDrr^2} zV29UP;vN&>f!jV$i?S_W7zaGKatZV^qf{-uk76`vCY*>;?bwNW+AwPa|G6pV*m9JV zRXx}TiELu4f8%^$*8q5oP7+A!ASMs!7bb9`WkJ1g9*oa;DG*K(kFz)dy8t1~fAk=O zTi5~6RZLj{!0;=tV6H6NkCw_0;R}*$Z1=7`jVp~)I-t_i8o14l@T6{vF^P8Wb>w+Y zxhe}c4-np4fBa_oiM;EADPeTmNBqs9xG8IV!XyGhETz<5QY$x5$_s~X%fZ~IeyI(7xTf_^~0>*=qoTE*4dIG;d1w}<1B{q-<``>B)N!x48(ia{0}!t5k= zm0VR(jGXWeTepTthP{>5@&sm7F;Q{vH_jR^3_xMUJpE|xaI*f<%cat+=UO8>pawnh z)SU)spgwPMp9u*_ot#uNg4!lAYV^C7g%HtD(KparKm0-#3);BXLl>{InQTfbab zqkoH6x-U>-epFcV>)(=LiXekbEs9HD!U|b^^Ee0?*Yu>t+>JJUw7SWTeAo@^`5Cc4{}<49B>RT3LF3!0meT`(V8`xJ*D#Tia_Es&Dcp0|{9 ziv4)(8L|W;A-ge>n_+ z`Jj*8xfWXaF^Q1F0KL4e*YYV+c;1YjJ<8@I-W5rFe60Sff2RRw`#6IzwpMf0#mA+y z);9Ug#2zRv3z5x?dmZ{hn-VHEQJf?mUH>SWx&)7OA}h+4Mcnzp7s-6u>mF~Go6P){0+%?qeHN9Ugw7{I!qOin5s2jNyYf*taSo8D%x&KZcQOT7a|Gcbm{fP9FGSxqLKM zio|cF>t=>n1T`*psFka38&8ft3x8W{Hm%fZ5pH>gLEgLT=TIn2H*4g z+l&f+?W+Bs&Nbfu>SE*NjW}euiy#&o{;tM6lOn)yn8G}>E;cNfXH|qcK{Jr~Onh}l zhz;ZXodK`CA2Cr_EArUwGuVjQjsAKV9G0Lz1MfvZ)epI;G&LbzJ0kSdcB;&@TA}oD5TKwM}ae zM`&4T$taz8c(OBa24AJ&ZzB^3?3*9fmCmQTK{)M5D3YC^$!1E zXN3RlAZG?i6_0YQ?c1Z$wgu(`WtR7f@SB~}{^lw^84-M3-}8HtblHC=#&x(4GwMd$*UIkXq>(v(O&lSdH%2}iH)2r z0~z`gDo0AKHDqh?b1pZ+gG$ha*U2Exdn`COMNo1F9U9D@M8eFhnnVeyW`Q=tgsZ0` zq)x9gs8IdEQAlNbeG*B7+T9j&(~^~?%Wj)N=5q6h((`ubkzfBHX*xdWJm%-7^+Y+G zT2r6FMMz`rofhjzJ<@IZ9vLb{Sz^cob>cS7nM8T11+oVCNWjmsnJ*@1P0@pf@!M0$ z@uX{`w(er!eNGjMgj<2vqDTE%ncf$K3pJ3DAy{Uy#vQ5s>I`@`~`R%<4EKzj_)Z^As|4mFDR?PNh>PF3#%B*!U{Z zBooFJxhSc6^~8+K!8vv?2#Qtb)}Co2_%QTG;dKaS1UbcM zjY(N|OTWfb%D#qHH?1Q(u<0s3&oyJk9x_hFStg9sxA0kBVQ~KtP8}m!n_YhJ!@!cV zx@UIQq}J__m7AyGb-Nu8a-zn?XMfmd+stjN554|ST}h2a@{72DKQc3;E4Ny_ zetk%(HNJ1#IZj5l<})6>WD2xbyKBidD<+}F!C<3v$Zlp;$PT&u*=~~H^29*D+q5vV z0XXJCBBja8(HA;^lEAFYqvU+lxEy`jsoh=D-)ryKlue)gw!4n)*zSF$#D`mx@yff1 z{q0q-t1($4hW&BAd7Jsd)l6{Lci((lL-BVrHxTXe3mbm~n6lcQYw=LH(dkhx&bvA4 z_aVmYCcm#w9tlo*9-A5=<3xE@vVoPk+0qU%j%MaodzKVSW97dE5yE$;S372fnK{w^ z)Lpf-ScWmEC>lOr?4M;%fv*k;T?#aiZ|=+G zW-Py)pWBWs?-)?E6tXNMBP;6lq~y;?LFDPYOk1x%6!dmOh3?O`@KDfDSt`y490$9K zUJmWt5C?;F5kNpUow+6b@@qneC0o>|@xVrvWo9dv+whT-2uFFDJ@}fI(;v5Ll-Hmzu?GBo;Pwj<^TT16 zDKA>XiNtCwu(YBd_)jEe9jo0yrdNLO&)d&hwu`#X(cj&#b z)b_^2le;n%@t{I4La!A8=(R|46{dMG*)%HOSSQ&U*xE^V6Nlh9`& zbt47iZx2-{)OqZ2dvA^T1fw@*eS)Xz{@6%ev~~zpUo^#!G8lS;T|DR?xBws#_hFku z`}@;X=YJCCsO1F^NLcD#@j^;*>dc7K@IyKm-rlSbdu3`YUeJQOxZjW zy5Fv|nt{~o(~|Y%+b*7V;?jtXjYYK=d6PAqbQWQ3|Cn$C=V4lJ<%=PCS-Q+)D!9FW zV4R+N8gE_Ya@t?4(ozGke47_)wTWJ2e}XPp+WeT?Zk8jR(fRHdX$KEB*RHAOzML+z z<)WsOo3jdSr@Uo4^)q|(T(>pF(0+`O@iZLBJuxuY<*yP`JVfSuJy)~Gqhd~Ds^+cZ z9I?=hEjz(GhU+Q2yJydMMv`Wo@^d zthnB6f}CC4b#HXhG6cE8qjbZ%vzA6NA)(fyS zWVplskvQXcYw^v^#qdN*UnVk|nMqSlCFe=x+fy*EN_qTHC|Ff_R3_Ym=Al_{FD&Jr z>=w?`z$0{~Ka@FkX#e^0F{jw5Pb>La8O_L`;(=t5zZtjpj>*wlr`;j- zqo7{zYJL`B$M?H0d@DfVu}*n_OjSGP6o=s0V3#UQstq!VJ9R%TZ89?_S3rsNKXcOZ z&c@H?vNAPQ$`?gPjhc_r1c7Jp_f&F@jVeq{MVnODAM89v%`FQZ6Ix>Ro{SF43jLT} z@HM(ZGgH5twS>p@7yMss=DSV`^3jnUE_d_xN6Dz$43)V6s9rEqD7G9OH5=sSg}$Q+ zGN|-ixQ+;^ES~+TG&K~BoCb&B%taFzUwE1La?}Ms0ANX(lmsz1PtR7{$YL?N@^Vk? z@;GP8H+7q-smbawBuKu_5H>9!qR#~O_3CUtplz^RNTv(UMN*?8Z9{=xhgF|0p^bTwlS%SK`DRK%Qh zP^tdiLx;Fg*TIRhVDrhl4LwzD-t;R1vB`&PHFn6E$A-H?A93IBA}d7@eoh2ISjwQ$ ztngn5oY6>EE#vzw&ed~={E)22`rb${J61uV|9$2eVeE}T1Kn>+I_+1qJaQ^*W;553 zb;zk2m-U%;Loe#}$+~4bILWP5L}#YR5FZ`R2CfWjJpNzkmmj;dU=&Zq=)YZ8iOrx) z+!WPc+Dq8id2pwDEnk=(_ zn=0X^wIbi_jGQie$;r2!oXf4Ir+ZUyE5BHo`lv@*slB`X<)pUvaIfID=fGH?X$9f& zoJ+%^ow~&RL#G$Sj)R+?5Dw4;HYRFWd%c{K8nc&;mX-VyyDoYPEksm$xTZ)l{G0rF{A{ro!m8j5t-w6ip|Q-(tWS!Ana=kMz61)D)Vquv~E)N}-LP zqiOOPweiL6V=%RfQl5K9)txn;{nd?_n^ua4Zv1qWd0_K|z~(iVf?fjfvbwI~D99#e zXc?0-!d~(&e&6WR=q_5CTWK(AG%%jXriPf6jS|YW|E^Z)X&q+--F9ak%V0#)UfQZ2xJ+RxdW@*OiQaoc^_+;e@go! zZdYWUPJWjojmflxa|FMw>?uDYL{p#R^e+Nv7el{(8E~!oQcnDsEl6lH-~418jyHPY zML6|CjqZ(~S4<>`=IaYNRd&}VYEI>jQ%Zw!%t(#Jo!ad8An?k{xwi!j-?6+b#-;>_ItRs*3l)EC)H&22&n=(GmmUF} zueb@7mGv<0bd#mq6~1BHeP#2MzyO%cbULN1IC-n1skHKDaSr?iZAO6^ZXLpEO3Ag9gG`utQRQ>I zq~zKXx+Bu@Rv_W|k>nFQuDQO+u&Eshf0Lmj2~t+J7*rHL|-Us}q<{LebO-@cs|F!kq+t+x1N;|U`F@tbE(Py|6(_Gfe2`6N>M0PzcnPYi>tscn&_17s1MBQ1Qoz=&!&W#6%~|- z=~)K$lX}=mt8ptheq?2{9Z9Nf6Cec4bew9eu?;4twTgEyQS#lqe?VRKu~#&210O z<=$g?n*C>HJ$#pW#wq#QV*|p|9pFfF^RiFRcF2EU@cqzCBJAS9^&2iOozaXj$7cJJ z`;z;25zP{uhXBU_5EjpA{QP4fOV?HM;r?>Bbn-RsTbOA#T3V=D3tr@VGEhq_iU|8q zY#Gla()z~gw@wcsVGI5xqM|oy_6zdgG8%QS(7eJjFDLGd^qf?hX7agy`tzl#E3O~- z+Bz8pie6eq!4=9QCsf8zjw2D&kH4F}E~ORlA&EH_!Ku3TuIFX$((`R~Lk>g;s`QP? z0!X_$%VxHMg+qcTJZw!lxE?#ly;3+G^tqK)+emM6pRIiAPecgQmtIw1O7fbqGFMhd zZOaXIDdIVh9-F^CFx3C}ji~@M#cUoM&Sg&X>gD|c7=n3bq_zO&rd2yAdV1%fQjyK3 zdrgo-en=%PcdP^iqHjq66E^K|&ir6Ka#72|b3F{zhGaV?GH(Nh>udl-Y>d|uKGKYB zB|`xqL7eSb5bTs4iAJ#oC8SYe`HsX#Hc@Ko;}h}Su-U>1r{>zsA2t^;?ciVEd^<@y ze{TwXi$UU!lQ?Ybq=6m5-&jHS$EdNK7;|=wa44IQkUpY@9UBraMR~P}P!XRdS&+fj zN;mCzzV~Q^D~|(tbP~k4!~cGE$TL|jwoV2ASEYX- z-#TeF3%QjkZMk&(g>SZ+*1d)i3NhNF>q0anML&TFu%t0vmsvh7IOJE9*ABWNjj(+4 z3Xha~RbqLeF}G@x<`BPKHEGOyg{M4{0|Iq*;t92_WMGXRqPFy-%l77{C*v7EbW{nJ zxnjZDo;w14_B2lVEK1OjSg|F%H)~m=fp*slP2P)OXt_7_Syl)3qCrZjfe~T_x(2JB z<~o`CbPcBYiF2qvIsHRSzM0(=zHXCE!Z8}j*KDLhH3=YhW8)9eSvP-9mi<(URCyx8 zV@`XJUfv)yU!}DQ3<>Z_-iMm8L1ZY$F3P)OXZVroWAVClPURI4Da-{;2sOt9lvQAa zI%6m6y6E+$8+u%6?a@}L_m!gRhHmY!F?w+dZ{O=8XdDG~fMy6wt+ zE?B2vPT95xR9hq8xgv-}&4pQ%p95f4 z3=Hh8nzB+j*zjPqqzw_abjx4e!-8jCCc^rhbp~BpdX`-l3BwGVVcs5#)wAWXd6gZc zoHClR*<3J-WE+dhO8Yk?Vw^_|Tq zFE6+K*fdYplGaktEWggt*qHywz61vgvY=r#rf#`N6GhHCVkz#aJ22~@23wNV6JBYH zWJ7^n8=p#bf4{q2`Go`HZD4+TrM`n{UymJ5(MZBl&)q6mXrI2roVt2lKusG64j|cy z>crQ|FR=P{W4SbHI(i+6%jk0@f`Q0Z)U7DfWWnS>0-`PEC-(E=gkk|qjEoNvnzkiLw#QhxkGW~l?LaedV|BX|hHmf-0`$_iB%&c+sXlq26!+UzKPF#s zs?{UxKBMP`@X0oA%8t+{+?d^YC*nY?Crr>w9R<1VWx3obU4K4WurPxNg;G582Jzs^ zOs(YXS(iaw-H_GPS=ga&;ICYul9+X1(E&VoQfRgMTgRdM%AGd?PfC}5?rrt#Nrk9M z{V7lB5l<~={ETfz(sx2q90T^aE2E-2rE5gAJn!t_D0nIqp|Nx!P9#_v>#LgWL8=cg zUhQ5ho;^VS4*XS7M=gwfTW`#45HR=p6ppWHYLX|?M{V~Q_xL~IzTKUA%+;=$Y(aft zFsYlg`}SYY7X@k$ACk2(Qk)~ceIvitu@3dgg6pxcbw3hLK^e^ z#i*ad7MHQ|tvr6@1)Epi(wA-KzuIIE_7GuuX$63QIl)kVzT=S<0W7Ity7ZaKq0N?< z)WJ$C>ML%`0i4{F2qsAI&L<~gNIES(gEm70_C&Rpm5Wn&_ zMq_l%zjWJ7u@xC5+{tNu4w`_QsvS}f(`D&qjP4@J$_+0Pf@<_es>X7+s%#<%Id$8( z_w=H9F}Q25wsy;Q zAM-6{Yz|yDuc-p*c#J`L?k<|47J4j|MW(u$onm&)e5G~5ScHqUdPR%3Dv4E2Kn?aZ zPSS&|(NQ7r0Fm=2Sk*VD+Zk=>$7Bvr8Rh6ghw#Nje!o z0u{{h7UT|Ke*+R{8dUnI+~$=qvop&~s0qOZWfgp%ci)E{fwJD~bUzhlaX%rQ~{x>m~F9xh-wG8_+4&j9hu~hAMh8 zrrJT6s$cMEE0u4wAJh)-<1-#a*}Vm{MGfJwT(Yeuc_4hU(&|Hz;=O6Ht`q|h1?ZnV z%ro%sZqlLTzQ8UC|2oohMWu=kUH4nPvMV>lw|`0d=G+I;S>(5OGHPlnCrvZ@L-LW< zSvT8N84h&4SvVc_mwSvfV-cL11Kgd9;x%ig8>hgK&bV`x0cf>ih>%D}gSd*QEQ;H{ z&%PW?`%34X@zTYX11~E@wgOntVn7|`4#sqNgB-(F<&v+NEmrG~-B^*wzd)CX69P&M zYX=8UETN++N|#p+u9AoJGoyN+Kien<>qC@1WuxawM(j&Wga@yRb8xe^zLA5z1SUQr zZ2DwgoJ;ej!gB&m{k3@=2T_j1t%aWS{ohE&W;RCjb)ZkPayQhy5i)Fe4V2=_trC!f zChX?uB~f-TVjY3UO%*@t4J~gY=zJ_xTUFC#aNvgw5DUi46z*+tIW{&Y$5@N>dT++6 zEk0RN=G{gI-NCu8-VF3^zyF3!RSu~;)TsbH`^fcEr~JY27WsA2Bq7H*vzEbr)YD}O z^lx!`9?pkO_in>?+$`f0uCa$j)T1AGk~6ksr+Cs}6xH2?B4p>7kON3Qi;BdqT~A7+ zwdIe6UM2u!6uWVh8q_K4TZ?<;(|ep{6QK-~)Df*3KQ0Q=gvB>#2O&|z6Pzwl2HW%# z(=v^mL2Im-G%nR{*-=77zotgbc>lMDgi;tuC+&$~_M~9%vkm?Hql^u6sHlbKR5lXj zn%&A&@=Ob*{r;N|u1oEtl!$y{HX2>TJcFQ3;n!id;-KfZy?P~n^5kb_7bo&bKbK7A zW1z-VT3cJ_l@B6Z{p8ItqW?ny2rncN9RS>A&z(q*R*YtW^h<#*vIBGuU`+j{T$VL>u zoN{7%pl#OXwo{U!nu1;V<^^hNM8kuRgZIIE^e>sTy-$VwFOh|R)<2hrasABDT+op{ zD$Z^9dYRUOkwptmHk!7#ygzuKkWA-07?4t3sCt>j1i$&0ki8WM>Ai zd2kgiGGhmHG0zP9OXU}mZ;HJMZYm$2-sVf^n2$7;uNL`$8x`Kuy0|JQ%43~5?^@Qx zO3&ZbH}-)xD7L-d%}6SVva(mi=*5yq zJ@+&B+1j_sX3|W)BA|bS0Ya>8i7F6wSAVRo@dQK}MAY;wE z0Y(kzzq0MvB^Fu9f=Mks=!yRo`(r?RQn4U_gQ2tN0Inni`0`-*nMr|vR$*dq%o)ak ze>Pf!Y(e)g%LM;^3>czSQIDvjVI-Kio0hesusC=d^K!KZUxh1!#J;oS!^Ho=_rRwl z;Q4J-QGjOvrb9y(^upQ{5VkYu2A{$;!MJ_^!RyZ_5{UT$=NUf&-yn0*wTBS1e>xe7M_!%P9 z#DSZ4Il1k9;vC6Y8;D5>A$|*b(3{be%Po=pcUbW% z>7c;xNK{wt7*&}37@0$_tE-(jgA19$Q#r6m2V|O$1VhB02Msa3B2&sSNX}ShZUy+R z8S^kZZ{@qbi@AI1<6s52{0m_ByqqRPqhh2mZ^GNa0HD5KH7|v>7&8W2*Y%*p)O*-d zYNC}j_`t!(pk$Tfv`|kO z%FGO0T23?8?92*%!@=N@!MwK#Ao=AeS56DOW()uoF9D+r4+b@N*YHkjx#neY4r0;s zvn&Vq#Vvp*00Y~X3=IYqX};F1U7-FYnYCyIHT_%29&k%Xg)_GV%gDQHj+bhDZZX=< zGB60i$MbVAxe2{geD^;e%Y%G07G*+4s>Mq$V$8ro!0kGmjc=&{(iH=S+KR?*huy$A2mEB z+Z_8M{PoUeU+{iU|6v2@zS$FIh>0-1+;gV^6B0g4gargmi{Oe1(L}U*MPR{;do_$b za-rT@4nt5U&M>J+IT7EWi(1p=ho)orkBhR-DfJlTDy4Wvb8K6@qEU z0b*2lI=~7jw$GYXuW0$uboXEL(l!R$ApsZ&0DIsb5y7EA&S7CD2DNP|VBr+whaO}o z-G5haJOHq%eHDbjtl6J)7WAY2{{Xf41YCG9_}@4XAj;#xiO!Mkmnhg+tG{U~m;?9| z1z7oC3iX2iW9<^)VWm{QATazdhW_hi*mtDElriKN7NBor7)ukI&mw+E`uB-*zSlrx z;Z+BfsjQ9e?{o>ELkBocvXe0M@8ME(Y(9fr1R9BhtS19mD?lv{W&;pAyZ>|iyecrr zx$a6u^*{bqo81KF2S#-{A64|D1)lF7*sW|b2hmnj)&kgn0*`Mv=&*xlkspjClsPRx znGMdz)@A_v@dW6Ho#jtPcIgtn1Y=;1TNJLBh6je$?Xt41B+Ul>e=6(40e}ZV> z&(yj}KoE34zj&$?D6qAgLl2t50Sw$b4#&|pIiCpwi0hvP>B%iw#4nAC# zSQLzle);oEJ!L)S<w+nyB?0_^+W@BUA%5lLNEQJS7-{?#d2M3Xw~0J~tuG|SlV6O2DJ_Opf;7gpp>-OJT%5(Q z2lVg-MN7QV4`oi~r>H%ACYy6A;s>hP;HNg1+Xnn{z?FFr{aYS*I)c9?gbxHy#W=Jhs)g~bO|G2!8{-oubw+QhHLst z=PYf4hNo=$a^kvkkI82T9)olLpEvV914|@~`R5JrPndzl|ItcJ_X^hCy#m<1e}?62 z!U}G*e~1ZTz*@FI&zD(?^SNCuV4m6q1)5yUE7(>!v5kB)AQ9)qFkchQP5N9wYEb~9 zC{U(X&p|5&)bRK2(1d*lRBL=s7(xMLO|bYgbZV>yO)jeOMrmz_7i1enDmZ5os9 ztl-z{Uzy+&=R*x*?f@A7dOX|gEbAJco%^=jw&-1aUYleIWdPW?JE_(@)kZu zgIU+KmK4)@-#HHVdio#BwB3xvuEnXMifBDK0a_H5FxdMzAoGJ}!9;NRH3^}zXi#nI z+ce{howvCEMeZOE+b(5bt1MV}(Swwe{_s*4WfM|_kpUs1g66DqT7+`dfJCOzR|h;i zR}@IdfB}KJkdEw=j*^@@aSmXeB?ITYdotYu4<{<-AWf;^e1g2 zDGRri6uqptzy|w#s>HcJ-z1Aq#OGa;6duMFT}1%zZEq7xN4bhmbyN9+rsh0)6LE$J z3Mm@jfwy3H@JUXAF4*+}rs4@hhWCb|L6^!3SRCvGE{)^;-LgS4eXHetY1rEq+2s;Syd4*y;j8M%3(j*EpAUr({S6KxueSn&n zo-aOQaj*&IksAS4<8oX{tHFb!mk ze|16d~e12k`ikkKwXjras6JiD*o|GCQe#SJTT>Z&7tKg6iCmmw~rGT z<(m;%ljsCc(nS`1T>|yTqg%&lP->l*hzL44mXkgD(|@SX7^>}ketb5S*(d0A<{3M& z$rGK{F+kYi*nc^UFKCi^LodLMKQ$mHOP58#qJ^=t@ZiXtAm{JM65hr}Oump~{B;qu zPA|GCY+FNV=rimsWM$v9A|Euc%%q7J5=P8>x{A-2Q-{g-QQDAz)ANpKnx^4uXaWw< zzEbxEw>=W;F33E^iUJ}p~w%h)v5-C`57}y}bi|L)QF<35)W&IB9Me43#>?(Ei!w z<~@8u04ng;U~C35HGrgX$OzxCVAtpi3gC=u?L08)zI_ta~YGYJAMrIY~ za6U)TVjRqc?}JVO5dHcd!QB2w$_@_zY^Cx2IixsZo))X+`wtFNl>j;x%baTg`T(+h zuk|@Bcfb4h6)N!bB8c{b)5Pfi4H9EO8wn)8_$)pCpO^@z!c4IROtI&|a(D9is5og% zYtNuGVPCnRjRs#ie4xp^(7()Lj z7_tD0g#W=%j5LtH^t=Jdr0S7u)}mvnh;e@{^?1mrst~V@YMuU`9mv6ZpD#xy#Dr6N zUAzhjjr}HF^KjlT%rUuYZA$rM51K>^M6VKFto#_q7l(uWR!s#7wvKjgHm>iy(b%2q zm}fcTShwf~g@gQWCkS&_P<|kkTKqEpYL~-=QLiB12ZhP@vKeu%hBa0e-)k7O5@lQI zvb(70HZGvzbADMm^&qs2-jGbBI^s$$yu z93T&U_kGVAYYbB=6z!gV)~zDQ@|4G`GZYBJVR%AxSezfq31`x&}a$t-EK4nRe_&Rn0km6dTg92`7PhGJ)~$GjI13t4!Od<~Q-6J?v1$+w?8gMYH%@ah~| zbl<6HPsW-6!aZmMO9x^XGcg4nm1VqtyCJ9<0BIh(sf@~O z5bwmO2Dcd_0jEjKi3O^i?@6Y8mjV9jIQyq9Obg!UDKE;{*eO}r=Go7&sF7@1KAL0H zWIuYY;kXDXb=-;;KAD(8dmSx3ZU|{WANo+Dcc+fV_6Lf*o%-h3kRvKXHx0UWh%VTh z<>)N@;vR~f^4=0;X2$f}yC6}FU1Tb1$(}sm#wgV9wmoL%8PHYWn$T%?uh}{IS}R$} z`sf`_Rd*ZLw+6J6KQX=fv<-(%>&L%cySG6J5Q)NNY{q3256AQZM7FBU^rX{zJQwV% z?cLfw^l{I&Glu|=_T}hhQ~@__tQ@0PCRD8TEaG`~A4gf}#Bk~q=GtDkk#%8+{fCPF zc-55~l%EXRD6779OpM&;ooKfr$(mog^R6duHr2@FmGvF|;3N|lW7b{UYUe@LzOQw6 z)ql!a8eVX}dts%Js%sx)zS|)Ampv>E!dm++^~L@F~h9ql+x4h@@$HxygQG={g-_!=i`x&d-6u z?(U@}%YBsEN!7ZOp(jdizfoL=CreKtJ< zxbWWh((CGQJ_T90nk^9hK(?*{+b|15{-FY70ZX`V4{kc!j|7mXMwm{{csKKGVYeI&=@0P5)WU;H7b3Rr)8-&IW2A+S0G=uR?C#`E|a;O z{^qNZU<-U-dU};u#=v#;W9X#h>E-)7IgKJgoa*+hxTp`x1Z?fsxFH=viNUg z{(>Z`iVJzZr=YUNZA)2~kU|H99;Qr~O$El{5*WEHDgv8*yIoqLT~@94Tm4UCS~y5m zX*v&7IV&eZ^%=T8PB<;x7tiseTrRnI1zV$_zwj|f1um06&%LCGj+I<3ODmM>EYH0U zE>mehrZM_gd|(jTC1p#d8|f@tCNr;-!JHs1inc z(OyKl;H}v#-9NnNLn(ROlWoB>_DRb!B2nx@qCVZdZMWWvFIsumk|lj;y!Uq2vnZ&Rpx+!U zxeUxiJ1cJA%BP|_y2Hg=4vzWgBwEu;va+op6Q`#*Da%;hvL#B$GqWm`RrQzu-cUS`h>gjrCHysG^yn2C`fNk!SFY_+y z$+r`X-{C@Ho5mOmK2vUW^OzDD?CE*1zce-#l#NIsX8)$#k}Y)rJvvTrAn-vitbYS= z)vILNa-qZuACgTj)h?yrUPw|(j( zj9{~X;Wd6}E`+7jBJ~fgnEdbgsW#6jFB(m;pf0lixy~7-M$gFY!P9NmlG;5XUv^R@ zVavYug5kTZ*=LXHq%WT$rRllewASexMznwj9}>Q9slLC4Bp7=tYLy#3S`c2Wu0iI) zh7(XI5Wh3Vcm0|FUHN7tI$z&cVDt(Ol@|HI+~N=henbg`KC1a_TkMmd5e+1aK1U9{ zeM0_SSn`y1C-;vyr`EWkp++xNS7$vBjx0u%1ZOG~Qt#EC#M4lXd+n}vV7Ga!t~TId z?7jT>k=P?is}wyp%pVMLWeYPso}{WKo^Wq1gnf^ieOs*V-SQ3A8`K_DqSV|tqo1zh zL)Y51?flmM0h=B}W(6;UUj?&wBckd)L#xY-w;we{C`rUjJ=1N@a@wY)l;+g8+%hB3 zHZp%R6Ex)ZjEeZhP?@qPC)ift3sHp%(^zVqsWP_`sxKuh%*}8%G)w><+C_7Us zZf(MS)a`$TTPb&Q4Z^P!O_g8t({XTgB=-g#{k`ATg+iS5A7CbJ7G~;oC|*rWWTdR4 z`iP@qM;l5`@iPtUWg0*XZQ2H*r&mHLCt3A{4G5SPB%`tByG<7KyVQhn z`GikQ2;Z}fgczlHe3Kcu_IbyJUPb(}4D9W79CL;p$;lBpW-G17kDKNUZ2~djf>mN- z%|JJ9hP2V5g_y-_{sk;#oqx54FTpUAtHw&1v<0#1H8i4IpoOtPGj`r#!5b^C=?Yi5 zG10!{gZfa~1D;{(hcOO(oZC0TIpSMhU0J_H!SG&&uI4sY>dU9|Zwvgh*BW5+cUU#zcF5#12+FeymDLuX0+FZXUzSf{=N(Kop|fOmTyhzSo5BH@{2O}SUYA) z+{RqR7AXDrN!<53byk?){WkMKl@GzSn+ub`d0Jmo{8`d4-z-8uQn*SA86H@z5~yQhcs%3)hw&yO&fwgiSg!k zd*va0(miKms%QKsJ(|~%z4#~RMp=R+L2O)AW!6htf9-1R zHf%m<7ep#4YU29C!6n;-{xt+4fv_)3rBe31oe-`mG$?hVUaB10K3L65(u7X2`DMLwx2=nuf~2=0NsTie30?z-dvO;&&R7FuQb4mblqq zjq#3Sk1M%0Dt@f)_&2BR7Ibn9QBkH5%7@K@&JrPW9gBS)IXRkG?Xb>dMQla!9Y2wx ztc18u3hU&W)Y6{GhpS>$*M4T%jMp?*pD}mUmk)}9IjJD_ad-W>_x0Nd4Bf?^kpgT8 z$OicDJFH{lSUSU72{nidR4uomS4a$~tbn{dX8vSoAU(AQ9+y?svz=OA(Efm$VbN54 zhi4o0TGn2i4rf;7&8tg(tdq>0UD2B?V=jIvidxMR$xgzEO-whr=&i z=zH#ESs_|I%p&9YkCxj8+ZD0g9!mGfiAQrwuaa())ohhD9T*-Q%mX{6%Y;vS1t0%N zRy%NKcg5N7_@N|;2YFmvTbwQT7!@zAXx6Wd`%NNy%_MvKB$mWnpn$a?|*MdfMjh)y^-Qt)> zSIX5CvJk=im@*5Ax0{Q>eY+K-Ep0CN;gXLT%u{ZphjphDH3+3pmCgwB3e&`J_EaDX zGLby*c9@7>jz3V9Cj%*LR@22VIO`w#@<2m2XU+~#i5c65*u4KV3NHh%bQ73$9}V}u z{e5#zk2^IiQo;(3TvNC{z`=&bvJZ(zAAabfo1q)XSHk+HZXlVj<_iSUcwW7)nw%{R zBxQ|twUBU`e0F&vW-4g?)?8|5>qZuO*qin({iWHsZ#%ybT{JOpcrCwhlD0@>iEM|K zu3a|!6o0nyT>63%7*762IofT{0=GLGDEP^7!l?UaYm0KqjS3O&nJX{0MNv;P-Pnqc z`uwA4;EyJNFDw@2B4f7GcM)JW7MJ@t;+<23faw8;IvW|L5OnB*pT}(^~7=`zP zA5iJerCvH%V~y68OIPm*JVOwQ<4d27`Ld0UccOZCWS;?Vwo144QQfrLW`l(6$711n z;HCSJTvG}>vXQ&xW8ebkecdjos6>(&K4!{g$u#_0%YET){np17(PJp^IyiEMu2!Pg z;{{HZ=C>Q}Kt81TY-V?~>+0k*m&nf)*mD`NLuABz_95*Ns1~_@zoQN`qz_d>p_RBKd|elV5O$$lAU5TPd4k%U?$^X51m*%*&rtplH!(664N>W4y2cN ziCjb2Yb-fTbY>W&$PpJKp1fHiNz4y@NSyQSQ64j+sS~%p6zLI(ae{ZO* z=7u;8Gv6RLt#3bI&jj3Ua*gamW*5JxP^Y_4hJu zrthk0wC7f0HT=GOciUyigM0hUhG>%TAv*3EDiqAW(-Cge3{Y#hi;Uh)y}S1)Q(Hvf zYd0`6^_F3$Q$(i|ZH-lwhybp&i485w8>;aG4{YNZMclW&S8uwbN~QFaiELM{wsVp( z+R@16e1v$UqJ^9G7IG4sdZwR{ER~Xfhlj(SwE`|e7z>ZTto4GOI8)jgy zesU8^LTPG}lK9hi2KRpMP84|{?A=z0R-;`F_b5t1IzR7T_D4B@7^0Bx3du>|`0|F& z@wm|Y7vnk7z0v(SW3L0pB!?bEstrFNR2~+8{pH}PW8i^`e|mUuKefCWSM2u-@TKH| z7fx4%vazE0k-DproodS;?%ML1-f(kqgobI1skVVFtrQMi4?pqRYTJ;Q6q%{GE?bqP zkh7!s>pfpR6I#i%k39oyRR}_D5D8GzJ+K+!UDQ{`zbaD7Z#aJ1;!E+yi)l^rI?v>d zTdvNkDn!6S75yM2+tpZZTo{udAtH-oUL+RQw%T1v!gz;u^>;qi@6J&0tf{8 z9hk||Ar3f0oyDiWOKB)wvi@vwHUEU&F6ks@K$g(*atN8o6g|Ee} zyt=N-Fb=gcq{l>94aIPW47xL?>Jev!Ql2iMIOBNT*0C1Uk2Sw}D25B$)rPM7jqXq~ zWDV5zkx`g26O!di_#}Gur5#c|pv!LnjF!fZy6(ED17^=R=FfQU3?J!Km=%7#4jT6} z>D0d~$Ig?1flJ_xs9IdROQN_WwqhBL&VzE7yBl*pq~uc$Vk^oNx76z8Wj{|JfSqSe%YI-t zo37MGH{Q;0cL+4w9(pkyQ8wVsdelzdXF96xL9gx!jOd5LeVMewfh@>FSO;wb$jc#$#Ki$fn z$aL&pmeD&?CkTbwJ&{Pijjj01w=Sq-OK@dtMc(2k2@LjoT>t_+NvC$Xs=3j%cFHcr z^p2(Y@dsTP+#|xl%yZ*^a0{&{s}1uWrVE#r4G>M|>JCcM;t-cC_EZcXJJnuZTH;OW z`Oz0~HSk=wHBIXyQH-3m)3@1b2X9+2RqdyIOr<`U*12g|y>jn4CxMm4lA1{-8noG% zl(fntmMCh)wShz4Z^Xep$LL)la69WZD$CPac(b$DHvJ8bwT|6JNbE-y3DNVCsIS)& zcpYwL%z@oT8CI4Zw_EP%V;p(<{VWNyR7P!tpFC;N{dB!%uhA>~S{Zv;N{(4!!;%HqYAKOb(IS#Z_AZzvDn#}BR85zT6XkKtFPG2r zI(J?PJxZ&3c6JB|XC34bvc53y!HW!(aQHfaMgTfAaHvD;_$GwYKbyU!r6n%T4DEO} z|Bv3STXy51M}+@TE^8dN9bpAS!575dApFTBVfVX$sAg2TQ?HTyG&WaC@X6B@gWI0% zBK5u`^lOW%>K z5tP@hI|2AyGX=cowd!>h8MDc#i6)1p>LI})21|mIqe{B;aD0s=F&cM9Y}b7IOr-m+ zkB>$I72i}iTXe0g3M(^L94bd~W#RsKv@lW<;CZh`CeqpbP*3KRkDKdc=Q`>^uWNHx zmFo~HZhT02BBC4hnf?wKZU{m~jt@=7r6H354k@a;8QwuTGX-WsVU`?iYFr<1i7XYZ z1YJ&1a^I*-OnS>vaE42|OkDcY^$rg?b>U@YFc!H#R*T$^cxl3l8->w zY-%tyV5G%MZ7r;>5g7?^PA<_qyxg@s06uvg%64QUed7X7d~Ob9z02q56nhad*U^-J zR-+Z#w5^ig#Zon``uG)EA1$Q+oI9AOY;CyRS<}ARza}xV+CpgV%F(#K&-e1_ss`h& zN`7hLr>BY21cEPxv~1ME4}QHbZq+yUzqZz)y3-F?Czvof@EqjVJA(C`;@>i}{5me9 z?M#x%c%sD%3;S9cXvV5T$np*N479ir+u6Xc1b*WL5l7LGM838G6OQ=j+A$*bNtqQ> zPoOoH6}luG_z{kG`72_9K6$?kjMh{qDI$VOWSc3W=dgE3O?pa36P4;rtt;m?6FT>S zS%xz_SPsB{wYL^-0$9hS@GA&-wUx=pmnB7SbHs9k=!&&rMGkD}G^()u#ekeyGi4r?84gT`5IDZTagw`ZfS(elTqZkuN(~HuxXkrV$^}14lAZhna^S)r>TJ`Vg(yG<%}n11Y26Xqkt#4d_fkobPnE9oJY;zCp$>vny`~`C_^Sgi zdOcTqog1c#4h*9_UFjnLc6IkGd31Aw85Gv{cwp9a=d;~?&Nfvtc z%tKNGr#WW))jl?CG3So@YER;_e~=bnmdHB-Gkjb52QQnA&MJNteJ9hDRGUwI&V){5 zyTy0joX#NxUVYu|%6Fys*XAR?-|o~}zMf<)qWKy*NyiZE?vED>B^dI{IO%V{))2@! z(8O9$(--eQ^p@cX=SvpamrYu3hJ}sny=w5wmTutcj?6{fuD|#Ee18o246w1EJeXTa z+5Oxy$M}jzd6fZL(Raj##~0&j*9u~bF0l@cY*Y^37uGRXn9mHr zOe_niDRa_5cDI0yKR#>YI*z;AbYi>$`cvY;>|9MaiuQG}wfytE?{c+w@IT8f6%x3Oo$aLu%`O|+O$ zxx-wB4={3hp_%lH-5KK|J-2B(yx-v!CnnPvQ zm=Qs-;|}zi0;tak*8Im9!L$rD(;y}fDO#E@1?7YHjbk60O2ES>)+gnDiHRAs5;AmL zV{g%AwLHAEHLT*0LCbq!qb2tCqv7hu@En!%X6wdr;co zRW*mpNj!4glPKZc*#HRkYOia=Bivq@4O(BXvb{9=s;jnfW}>x21DJ98dzW7%^pUwy{e9H9DQmdDQ%CzT zBQ3)v)X=`dr%26DW8zZ-&$ivm zXX2Xj!2Z7&l6h1$_<-7)TDcNM+~>-b9cmmfMX*-E23VF+cP^ zfN{{zQ>nm~Bj63HZ|xv_bkR?=1h?thve*;^Wxph<7i8~T`@G>X7C^5IqRSBkp{H&J zEqRDu6f;+CTx!#4M1O0UO@Drsj_7@mX5~%pX`q-Gg?W$C1=V~kmrmz;rO=aH%qb%v zc2f`-md=LEEr&Cdu~}^b4a1k-nrts#m^>EvqQ*`LC+zAfj-F9+kmpq^@#smv6zeR7 zS>cvatB-5ipLi#RW7SSh8G#pDq06{`b;2)TQr+8+$7}{)Ys1B+pwFg1BP{dmM?Zw4 zYk9tV*u1T4WjS_;8*dNhkUFDxYoU}yRjhUO)ih?S8)5VF;=`uZd|-gZixoNWggdhf zv3O3u6_E>MH!(9I22pQ8-5oukJ+0e6+F^Gc`^Y@bb@g)PDy!GveZ>5uA`0*6w8J@g zc}4@l_Lai-u`^w`;5}{&#^0(Nj*mV#v=pO{LA6c0<(Qw&>CaX*ofIrz86-8RFHaE* zurijX;*E@GpfEA5kpwbg=d;x1Gv3a3!k8PHsr@R^2e@rVU}A~ea8}+r z7}IAuIy67^lOz%2xc5>&fu+&&R0G@j6iizWoi+nNm+7W_6xIzmT~jWb_9P7xh!zh< zHWU-Mf9DI;eMciGB)BJApIUVuM8JfJ+K9b|m&9`4b~)C=ptzdgF(&caNAA_1;xwPImX z8uitzVGqFuXBYN!Lx;HaRG3eRx;PV=&PZD^F#wD#!kwP@d^F&aUFLjNv2*j-3@rW} z71fbI%iwxjs1wSiQ6e@1eq;~cPrMk|G`u88cVWa+2y@C5sW4EEj(<)mj9)4-dGX*3 zFcr9ux9OT*gBNf4ysfTld5|6VP+np0VuzljnO8O$)4gwr!$Ig5SO6$WS85aJ=JpI_*-boCR{mCFR>Ew0E z%#6$?%IUhJ(_PwMOfS^)oE`Rz#tkl6>bhPDw>pwtYyy~nZjgtdscFq?nuKux;?_u? zV?QwNOKUg}BV3RfaIquqWY(W>-sO`S|9IJBoN4XE^?IT|fph3rm%m|cJBs2I{% z;2xaQ5)lz!Ik|YHKWq!|#A9?(CmfMrlDK9s!5rbLK7)9`fyy}z08-c>Cw>>&7?G^~9 z_+|{-**g08q*PFJ(Q&L}r`%D*=E5^pyC;LpPZInhF}vl@6Zcny1FC%cr!pbJ!UF14 z&1--4`|is}!9N#;LMs*qsV5|9v6bkge(g@hxPYX+fRO2{K16!RiytpayzE4l-}$!Y z(kRu_?;@S(4gvq-Uh#ssL^V(<2Z7l(X-{D~$X0N32Q+K)a~8*1-@mmgoN!O)t6i?w z^$RRJvR$PhNXg>it+Y?iY=RFlxMMk_DGUlP9F)>Ole=&JD4Bf){z<4%Vz-0NJoIci zP*bwwb0wB%&qgSvqn6D(*h7tatOB7A4Ik?-&lYE5E?#8$g1(BB;OnaVI0A;q5du{l`^~M##Ed%t8sLKJ3>vinz&4k68 zmtUyQ{(4bSE_}GQEb#3Rl-j`9M6Fa_Sr*gVE4(gMtE>LVqVgH_IV@yljp1}c%kll|yM0Wl_e4XdRP94H(n4086$bJLk#>alA=#A`L za<~1H8y7;d>=r$097rh2*nnhv%#SZWK0Z|bsqzA7{^s;IG(l{}ciFQ%>=bW*4uJ{G zurw|KFn(Y&suKW4$4k5+i{cFbc_bSGjfUtrMest$GYzb?lA40R`IMiocN0n2(x|h( zQF}&?l3cUjpq<4ry$2k7V<5~O;6PqD^JA&&%Z+^@wT1?fzSoeR$uv3pB#``T3}447 zh@L~n{FgLWJ6BF zNfn&}`$BwnQh3JxdraZ^s^CN+T9)X8-0e67H1c$RhTs6s%biFoyVeEPWt9UK99~Xggg{VhK*kxLZqW=K)1NTK1tEy~0PZof# zQs^SA&{l)5=_#d@q<-?*T?xqSkn+g1Cr;96BLN@to5&diKjaLa6b;N>al^SuqMh z5u1Sp7ohE@u3Lv%s;^)4@Unysn2pDORg~1bb1g4Vd6~A{A`)hkfFlSYx|Xj#iehJn zF2Bs#<|{cVIoSW{Cxaq#e-ca|Ms)1y03WiNWe;^e6XUSAa%SVR-4zZ`Okr-5RGW%Q zSAm!?o?29FH7p_44Oqcfo@Bog{=5-b$RWMYhRpp%9C44of^o}#E6aM)aPerGjNR{& zu$V8HLZ`4=viGXBR7R-a>XP>eV}EFpQu8oVAfw%9Y4fjSXisS}sBEo%FWW3W`xeLu zQH5^V%xc-G-nJEGse-{TWkgSD9(@t#OuM1?`NwwF?H6lc=VQs3=C@Xn>xsGcbwJDf zoYz9i9pgkFs9FfqY%#R0Zd72wW-xPuyi>O8?R^|yvf^45HzInVa@omi>&}Ka#l>~q@r4owPO#(V(obWaUk6&lz@%< z3{Ylj;-%`saU<5-wX79Gg14$qYrsZ0x#>Mbq%Ai012+~N^FLp10>4kKda~I8W@@pZJ7GYU70LdbK}m92AfN% zW$pphJeV1$cv!nK24<$smI?}dJbV&wYokgZu;!i)ymX>{&w$N@)$;2tV=2FF3EG!P zzk$pvz?rK8Sbq%GlfJ!LTB*Lr9UfKR9C4W2Zm#gPluo^=i0Mt~#8LjjX-A;C9=b7d z!fRf3v2*DAh)w51lZ`|;eGPS$H%HV}5Qw4*?>(yHv+Or6T^VSrW_GGl4BV2oXgGNt zT08~tkIJbT!>cauNB|yL$DR=L{u*eU~K`yhBi);}wON7Cdotpa>BY~nqu~!w=8;CKWa`uy=E%0=! z6B@5p@POh&=($XS&aTTAT-XF)F5CBeura1Yp)Qoi7}aactVUg2{?CCgjj?2kFx) z_SSpv_x79GKypLpHGdee4%sG+5|4C*2@`lg^&yDvVX!`cRyl#LjwqE#^78ba0AY2j zP~g^k%GTmqmNP&RX`7C2DC(pIXc5pC8)oZLMiRt4!*i16NGtdv5&$vJI>XD!+t_z; z3CG80pR|vf^JaRqf4{dXF!vKLB|a{0IDIkv9q@789WV}dvn?b$J2o=nlTRm_QommS z^>yd1uo=-~s~r|eKypGeGs@aVfY8BnQo(VOw3((ui^W((+TBKHG21LpcUdf5G2EL8 z;q|tRFf9|#Qa>^1y)=?#;ax1^csAjJ8TD}eyWz}CdI$Dk1DvgvisUB9EHF|Z+f+?h z>%_jsN|9ln&93k=sdgEMweVGRT01RY#vcAgPSF9oy|^#uT768)(n)z}&AvVOw}sXh zJDVOyP(r@xG!+|zME=8@Q3^16YV_!uBkMdl<*GYwm^)1{Aba{fAHqe_(^nb=g*%%^GkTYl-TcI3>)^X-(=~M zTVYy!y2}8n4c5WqkQgC19v+^J^7}BDH)y1%Txf~N8d%T~22U(ZOiWB2Hj9*w>`3Jq zHX{eMA5yeu5^9NSu*$MWF(X?;*I6yW_n{{MYm zBfJKRfq37pQMUGGG_^HQwi+FuP4Nkll+9WaTH243wA{I2M)Sl;fW@pG%sir2)ld&n}z;94 zR9dWB4W0qoKm&)cE5nM+8QC}WE1{GuotI{<*%Q9gjGp+IoY@$n*NW|mF|h~!6Dv8K zn{|J=00Drqwh`m>^cCJW^}>cG1J(-{R6mZ?6fYfQ)UEj|gS4P@V;!$;wJ*9IR(ys`>9huGJGajJsi7v9lI6bc;I` zKypSZC95Mrf`|>PE1}Ljq0TBxln)hEC9<1P=fYVmwkxAx)d)qhDWm+->mttO4F%ri8 z`12z1+S>HL-!K=1Bu|x^pG);XbZ4iyXAoy<0948hrsjbEre)0q%}MiWY5?@fo}`{v z;_PDi)v196VekyEr7jqjcQtyRbNdGJ_Ytzp^YGA9zEE zmE4OU#SbPv2|xd_JwIR8t*OQw8W1DxdnNyw6su`4ZvJ3V^sze z?0)aqBN4;LQ>={v*IZ{k zQ#MBZLA{iJ-8!A1VCdoD*Q?1^n=$ZfVgvL7fNHYSceW1R7b+uKU2B40bFUtuVyT4y zmc|`~yh8fE32<0j#wsz%1Wr(W)8?wu@>;1?s9TsZB827@lwaf68>i8cyh_$;dgqLGLsk?Z3A?+b=w5 ztZ^wKA)p7lm5D~n^-k%h+=R)4Oj;B z{K*46v}bbKmrq$)o4gs}H~jo1((yF60EO6qawcFkTt2FLJ+s}B-xRPS$Dq=dA`DVQ zq66>%d}*X+g&&r7opRDn)qT;m&C-*S-mNPRL?c&CDboM32vS%i2BC~7wcy${gssj% zQtMySQ`;_Lri~yR_07Z-z3PB6h2-1Jz>-1^#D^fCdN!b8uWTro3#)gcxZ#Rym;u39} zdnS(qnxL4xRb1Of$YlH-c=;^FS*YQ2CQs`B#v9X7+0Rwfl$w(XFd@MbqKiZ1a#*-ouUh;fSuCrNx-U*0KzerMx!rUZ#6t0a4d z(vKCcKFJdPSVh#PYpc*-3%3TW^>i-m&;e$K<(=Kb;Z5*w$Na6QJFpRNCt6}DgSR+P zg_w%+L0wQ~hwovPVhC7(py^HIAPtw8q2*sJS_@}dI#;~SZ&gj&n5q79WJFR@pQ>lf z<(ws;WT?`zZ;u`b@{WzH=O$keGo^!b_WyBXf{?bkPW3vVCKR=rk1ur9va8>j2dU0i z+t>{L$Z-9m-Oq~XG4SMsjC+Iu1a>V?*iAE_t2Wt?_d{BrE)x4QFB=t(WW_(^COxFb z#qYlIS8>~$2V(mO#_`IN1Z7@{vPpluyre=kv+6=;LGshp)nc~-Nj&%s&&+DE9aRB8`ML`m?>dC z_O}%WjmwOS1m4$pEnY`tSrlM(Hmd+V^BJO!zx($Y7NtF(l{ed`#0zOS}CNJ(Y?kf@{nAM3%Jc8C4;jgh{*<`N8s#wF7SWj9I z>HP75fA6YOy7U6hpho{Ai3A4$8%+#g^`;s1H=h9$ge7Eu+-ZNM`GHD@T#w8HOqMZI zZz7zjs|Vi3384VrqubZ!UTz^5OiAz1UbbKeC&0(Fj{K|1CX6BiTTmrl5ssn*q%Y?+ z6195qaiULGWPf-j@B?#jMWUQVQ6!hrpXz!77BZ@-(g36j%j79m8l7^y7JIB!=C zHXc~F>Ri}(G|{G}Va-^UQ6|6;rraa_;?enP?7`b6<1f)utsgK7VKY1CuOCM;d(?hP zxX2|jun!7av00)PX`7>`u?z$BYqN)a1U2}_glRpD3$m$nA639?U(>B)u3>FF5 zfMJaDv{c`&2mSa&AQhO~^Q&&^f!YN*yWEjl7rl2!`}BNayCxb$sZ&oar2*4(?X=_6 zY^(308TaWIPvDxwE))l?{;?@R!D=f^P&w{7b^n({;)vxxZ&%FFk%R48lBPLc0D$2P zD{|cc1o$D$!(L2yJHMTT4s>>|Dq3qj92= znjXN}NGV@0*5|*570>ICW~z?WK&e|B_qbh$1!Qm8@>S;veW91LvOy64jf9DBE(YCK zT9U|VyYn&W(m}Y3p3`1znFe15o$~u{>Jv6UA7v`IbPPNmT`Ek9e~?Y{PL#a^S~vna zI)+9?a=X%T#_`?LYObYoy^d_m%rQ|R5HGHqt&J=wJA8P(Nb z=Z2k9-~$XBXVJTXZ|sb_I%)$|{F`t#Qa2hTH75gqNgzh{()kHV{WpNd4|yqfl3e{8 z|3~E%zkQ**EBGPg6xF$x7Q;*hXS|fn+xW6<%iU&hZ-*EQgGPmhlF`VFoLH0@?4iX7k*2yWO2Rk#bAK zfxOsgi2cs2?Wue+rMxP7l>1o2dr3m%*cH1rVI0U^bisLy#FkuzeT!A zAaet!NCG)OZ+X?0Ev;z1p#0}@eG)1#hK=u?m&qfFsCi=`wD< z`&V`uox~rM@}srd%WoBZ;stwFPBpD&>-Yt@?x$#t#V{?8qw?IpwnScaz4^ygjLLo= zXzPb zZY)VHF^VtUeJOq*`|^7XY1I2Ola@+39Yohxr1sB00i^uto+pLyn@3r>TMla0(^s>K zHR7}sL%3q+)fgrc-KIF!+!^UFXAkSpqUmUiN0G(8Bd-X5CEWO9v*QCzJM6Qc&+10y zy`sP(j3(n@qeeJY#pJSGoVrk1_+>+gH?0W$Sl8>NquQdLnze%E%fD9Rv}PxrGA57l zsf#rGm<=rL)d|llXwjoT2OhNI*8lAMuxj`)c_?jL{`M6U5b0j}Qtw(Ly{43cC&f{Z zpEaPK*S_;^YRCM7gj6QT{ey$;td`gSLDdE_E~-9WnB(MybUw3;1RYbthU{$D!@1p; zoE#TgaSSKTbsn2;eBZ`fByQt8iGyk-Gc{dCe!~)nqodb2xo8yKEdv@iN|nnpI$&Dn zTc9weB_Ti#2l{u| zvj??Y2)_1;G>$V?O~mzKN$4eKsy5br`U1`v6&CtY2?GSo*f#P@>5KCg%lC1`ej9>_ zi&S?F$>ZEFiiFAGYm8mLN)Pu{@t50CVph@3i#g^WSCzUdXh)jk@^V={FQGJa;hahX zo=Kyqzw}^h2#DwYr@UjwkLN>$AG?4*xyPH~7FzHq;NeE9J6}_(z=kMqQ4x9#V$vBW z(0kgkZ<%CA0vo3FcihdoESf_B-p4-nr@Gj%XNfFdB8%3k@^sZ~1v;!;sj8ist*tyi z7NPyrpQJ5h%_4IQ=@>3W*H&VZ*tqx>iDMti^eht7B>lCuisqZ_Tx_?!h-M?#a;co? ztrosSUZ{Pv3NmPKka0^#HDSF8d7+ny>BKn1kNPh@MnAm}AcU=WAFMWdE>0SGQdzjv z)tdl!&(k$O-4~9swt;2v%ycn^P?FbX7EyzQ&$OqKQ1Dch`Yzv zF?qP}Lh=4(4#}-m!c~_WlOkY2TkeXvG*5O)*84g69_P=iHih~tKd;k!PGO2^0HLj# zK_N-L7*+ zFo*w2z~T1x?#qYaAa3NAF*&&8yjuSC;&niayZ^zG5Wo&xYf>O~^~u-=)@?Oqcu&@> zx|&Ub#?(+<+7Y_>-}&MtL|!vA67=hQ1;SNmg_Vsq;~R|H->TO2B)Mrm0}LV8qlx2q z>mCa8@PYRONsX(D5dW+)y%BZse$tw33n*I29ay-h^(jSHr$d(H+&TmWKV?rTvdW4R zeV2}Ny+2Tmn}_yVT|mC>yE_LSnM0PprVoX>A499KsO79(2)-Bgd??!i>?TGU#UCfS z_N(&VZeJ))T(W0>pq+)^A3L1obwoU&v@Ri<SO;<>Pi+#$5^Mc2_3a2@-bHuTOPVv>3@o-C4KJEG5O{ksLN`0PdhXllq6`^QJjmc zy>uB70ruDcgkRsfI4Ca*Xb1&WI(iA)ZX05+J*M7uy~yngRg%>oFpa7Ik4a8_RYFfN zxd8Zk@k7_xH_~yAKHbW5pX{yx+@eb!DGU*fb6$hBaS%xKvHtaL(hSV|#n`J7+ePB{ z`bD%`CQpKS`)>c<|JjSbaQ+M6JqT$M&VXbwyA(mY2MR4(RA|Pp$?gTRtK(jUCP;8g zc>XWqJ5XZU{{1K*YA!y0;LT;n_&qK?h0SwFlG?1{h!P zK0Fsiw4iBj)9PAd64^Wx6tw)1bO!PB2j;`?6^KYO1%f-Y*z z-2Dp7gpe+52C2&~!KIT(0;K?r8(z}HjMJXNiv~aNoM!0OB7b}W`M7fbxqKH8B1r2m zt+ZAG{?+IrF-}~>GsfOpLB)l?VLpukDc0^q93dgWyeZR6I~)W@9eZf%u;D+;s>Wy> z>@dUk4sQVOnWuA8RU@XbHTfio4mTt zY4IqAtLE?ghDWDzcW>%i)5rRRI;)vqFWN2IpydXr6giYG<5Cb-OvYhdRt9YGjZbsl z$tRIC_6;WKoexuaZ~poy%cRY8mKOVn(C> z!a!FR!kI;q9FUeu0JUn(-*43GooL;6sp%rq$Nh>yS?Bcz{%jJksdX+qe7WlVkmEG| z+>JI|&2Fwg56D_W>WXZFbhEPumy5JEcX{*~$mJ$vOvj|Zj1nxRtOo=ez)OAo)sBn$ zcdz7!cd!_tZRiw5pSEOwARq+9s5}n_qF@{v!*L|5E2S7bSqceuMdK3i)+%&H16|^E7(9J`KmLRD9YcVw z_%=yLFA}qwU=+qrZN(kY!ND|ty@~kC$OX04&%^XXuO3onnWivZuo{nh!rpO4)e&@S zZtR^QAAVJ`wQ8hVL5kZ|-p6nsgyb-jshCydvWp*TH}(;3BV84y&Q4+25EPkuO+F&P z!?Zq&*Z<>=Hp_=7Q-tkzeT3txE^l9@0!)EgYz`ddhYlTcO_yf)Vn41=bj*NBc=3AW z2QX5=OytywB0cbg7XL9sFY@CP>efjDf09tn?<@jhSd^2?QQsC!M&UC@gighjhUhWS zh>^);x7cOylWywh#9w-C7J9yoD6A_6cn1K(lqF_=+%D%22?+F|<(@h}w44)Eb@{$6 z1YrB0Dw#l^37FK=(9+SW5HYT{&oz;YZU7pNljjGF6nkjJ^k_K*O#|FKrK>g*=nWVH zMris$+Yxb4DgtV~LMU5(hU_7>A%y%M9Q023e*#cxsh|p`p#}i)N$bkSTmCs5^l1tp zX<`5h2qnSdfs*|f&A_r{qbe%YH}nlm3;-cRDAuRG+P zjtmELwO&a*_r-vTMKXUgl|+mB7c%4`VX6$aeis$DepesaHtq%Ow)sej+ zNG8eUGqf5Z(+w@Pskpj-0^Pw#Xf~>S?|AH55_qs*OC6lpz0hAhAHkEC#M;yj+d1?; zbU(Gr$*idozcSp=a7*hbdHphh%7$FMvG;qL>s9NOLV11_Yl}PHhx5a%4`k(pcA^iJ zdBU7Z*HFj_)Lkq55!2H7Z!M>@J&sL=^b~7rw|a(ohPQXS%^Rc07wHxs$Ywh2irSPJ zzFNQ4Gq3WjhvM02>$R=;dxIR$Xm1ISiV#J)b-SHaK#$a&j_a>3#U% zdb-25H@hI-;BRNKBmrzPzDGa9>Bd19`*g~Y7<`=bKbwf%td)BYf@dP+vsFsV!J?^OA_=N~gmWm_S_GuC|c}l$g zWE#`B8$3)WC40{x6d@}#ga20$i?xvp)tH?f(DTvj_h(zW( zJ7<6PwxW^ALSWKd?$DIV=5VJ~Ryd&YMjW+aWzT$j*~s`rN65HQ-PFMOZP;|GRL@oh z=D`PpVKw`;Z*xy3{*@qy{~5L5+fb@MOeFwS{;&V}6*(X}Lx)$EmIhyO>f&N?oA9Q=k6*qO*C;9)P z?Y;S^@LP5#xKDH2eJ!G9`1u%`!KV!Y9`vXFNs1t`^XbOQkPq zQU0&bE&sEd$QO)^KPq%sasf?tmIHDYepqoW6Z&+&&JnA+;$G8+E%*RWxN!-_cP`#` zBXnT25e-n&E{hC8`3qGLqb`WCng&c@%?GbASn=oHi9Ba?3;5yVp0o==B9#BdDe}3I zy~mOW_}QlLD?M!C-ws*zlMV~;ge_4t4qGfVDFqqOhJryfmQ=$Dv%YO?ulNjng5lq_ zZ*X;e4D}&f|J|LNZZ~0{e(MbQ5(cxJ{1=zZtR;RWax^DnQX=o9b?q1!8c4-kll74wDV^G(_d zc#+xjrJkpztBt3(`CDs{g1R!Zf}N{}r-$WRdpA#Jem)*P0UrLJrjyaYTS1C1WVL*< zb{3?gxDa2M{%WUE&PH!=Y$>2tjcvMm4({1E#xfB8c5<_LV$;ZY-XER!q!#B+j$)R0 zn!8!p_wPa+HCBmdAvoe$gt+(;v#jELV%2AEy}N%&X>zf`u*2NMo6AN@G}}tcTH9K0 zf|!I*Ig*kKr?x6nr42!j6>KUKfWZGQ<)5yti~svep5#01|GDR5d52B%I%$nvF-b@< zDYCNZ8>eED5xe5sRJQ(9HpQGIwhPWZ%bQc~n=7S&TkRw^TVZKwm87+jY)K1K#iXp> zW8ViOhnk8HUwvY`pbhTi#@R4-mEMiEUS~HU8yNU?c1pEmXds`&rr#R&ERtik!LKJyTBhaM6gLo~;cSYX@5zTD1p6%E zWcC@IO{WVgfUHl$-5$y{6R~J{o$2}zvOJFOZbMK{32us z3F-GDK0EKT^qCl9XV{TanLt#EVxRZwZBLoMfFeasm(v>Dc!Ls72gz?y!i$D<6}C2U z?zwa0^6Ag58Ma4ln+qH5UyUS!Ak&dxt@O_NCSncv2DBXdt7Z>gQu4t2_*$lcNsa(j zW8pR?Q*_|}cco+GeH$6VEqSJ;Q^g^tmuNoFaA^tHd&=RLg*A?YUEdC+q%~m=7rC8~ z%F9aUSut0$wlhvT#C~90_IhLx>a5Mgp}ooD(0uuC)SbjfvjWv07-Ceeon1yItRcDZ z2o@KQk~H<(d#EjS-NVyFwtkN*|9%suGjW{)a3|=FUF}|S1+0FKLolKhitzLNy|)^8 zp4R%E$2J{(HG)F3DVhk7EUUmfV9jbz;1=lt!}LGdbF4FcA`tjxq|O^Y4+jLzTIKgJ58_T^G4sc16`b-oGzwkI*b_9x;l79!QipjG7|#OBJlF>3g?BbnnAzciO6Z=Ed;-W z2B@JDPX6Pm%7i@KQeU%L(d5hY*wcwN9I(_PgrKRY^{f|pXx!`MjJP~@*kt_+iumdZ zEgn5SOUd7y@MUmhcw3H!5^eW(&GDRCY^Iyy;RW5mq)pbkrk&KUd_D|cNI5}_4_D4A z{wTohGb+I|Ukz6HgNmCux@>p`{1<2}K<4@=8?7b_+9tH&kSaPsWkK1&b49YxLn_Tp z=KS0EK{YF0YX|=le4$DCr91r4F9C);tmfvuUIfGAHf9KqjQ@Q5i*lMsm<@TMFTOOp zCDr3hIu#t*aC`qOt$*!jC|WXYl@5+y?$I$Xg4^ELRq#kVG;At~wo^i*?qs@KE$-fB9^h}$@wC_dc z@1Qb$PR;4!=ZA|bo19@Hradu-m6rYwPH4d`}D@V3M(e=bESM`=% zo#Eue3FlYGX(5aH!?cSVxis-V2K|M8zWeFR?b#ua2NPR*3qO_t0&PE5PHkEi5XzK0+TVq@TUExh$mKS&PraPw$nZ7yFUJ>czs7%20My|A0k25P>BR=kFvifPU zyP}#avjgJ(>O~IwD^GatUkZX?$g%5fTO23Xt!$|rc-DV`udt=ztHq1|dY6buD9Bw} zCVOPx&*5vSaPC?9^!nP<>1Lwz_QCSILw%6O9qeSivC}VR(8!x_m;^8gSKQljMFq-fOMn zxyE_H=Afv6gRAvKRVEPaL=0Mccy!M2vKTO@<9~U(#!mw`mNloPW5q;fxdE z{0fTfbo(s;L`u0vFwJBQ%^#`LTID`I&urV~NXl5bwth*Z6w2P0GbDO@_4>8{;Nhi@ zi3EQ)*oqqmQDu^%aB9Jp?VrdG?b9$X)3qTHUCtB7za^6z;e)Q2CVX9kX z(-UL~-4P)XVIyDA{z{|k%(X=x_NeVC=t=fs@k>h*qIz)#oyM$_zn5;|rK@cns?ulf zBB2#N*)-cw^34*KsS5ND{%6|4VI#g-){d|6Yt(Q!KEZ{-hO~kAqgnfFHHW9OkElZ^ zw+=BvQuqa!veCzF!by=@MwGe;RJnTwBUj?`bT#!7 z0O_jk8~-hHPG6g4NiYnt{@sE>*z9Fw{tDAwcBVQfs{b@90|}k|&Ay)bJ*06Aj(~79DnREOUPuyQ{yZ#Mr5!vCm6o

ty&2WiY_*9Me>m`_w^L~}6eTEclcUODnE=}S8$fPTH1upJ8VA+}I9$16M;r}ft zG~iB~9hoczN%B>rwNuVg81Ms3)Eo$)zaS@pUHCeH(__ho;#l*}NhkEg-?o(V!Uirbp_cTu zrlITyFMKNhmSj4qaLYNP(3l&qt|{M{chnJ5Vu*^ilwTq)RPLiN&UI`*O)JqkhOT@r z(Z=%2AHjrTB=XqT+7Dk76NAhg)m=0BSt|N}CG^U`MuuDGu@m35pyb}QYcip#KZy?6 zMF#-w7-=o^v}OHm-=5Efb%|dVls;;5@?(z@QjspS&*(9u0I>=rw?cEpY)*#A2iF*p zwR@6N4&`@kZ9V-dDseX#L3_G$hNOHU#{=QSHP1^Q;Jm{|59<6fIi@a|C%AZ|%Uyxi zda^u~s(LVtefQAj)H)W}yQ=u+k$K*53saAfnPSpBiE~LfPE(7juP@D9jo5pZ6v7Zv z;%_bsPvU1ug&~Dur5j{sx%m3*maNuO?g+=ysb;2&t$Y=XcLa8s+PjJ?FO4|b;;TnK ze>H+~OkCIKpea;QHzUIO`ghSHKo zicDF%qU>?4YgH}Rr8Fy~*5o!(tbUESH07c5RrO6f_RJd%N*xst#*$RD@H>pdQa@nUL6Yq!;- zS&*MCQTnEUi10;VPK5oE^QM0hl3XECCe>$<{*l2*_6hj!lZlHzF zF*=!G<51UZ7t6K1v4{7z509wLMr##mHjpv&Q~*69mtqnxB_Uxr*5@!Vnaqap;zTr? z;;>_cfZytjbNuJsz^6X8{LYtDUH+364qock*m=%4WrnDCmO6tfE`EcLpz{4Eihz}@ z04u?MSC;6wzJ>k3L^1zYOxJoD;b|f@+L?r_KHG!ffOYDPgUg>zbt=JLs!w&5XRAnt~>A0B7MmT}0HQ7k(AN z+DZ-?LrUo0EVS~{*DR1N%?5B~9^eDmFw3&h zFl%U|WW|{ml5teKj~40^NH5&F4%Ms(5^Zpp)v5#4I~HTINgK&&dt?4SkmF8$yZSJN z-P|BaNM$fMjcr}}sz9ws)$Y`auGVp`tI+<^JYnftXUrU(za{Je&^Hc+bUrjTXa_8mgP=V%it9{aAzJX?@mNe7pNo?Y-)fPfM;cl)NXFvk&_OnW;HIE#D8j#>%L{_?- z6N`+dA^Z>R7FspqXRCVg%gV$YiiYbpEteE(iJf!%jMxGGBkpv()NdD3ew$%HombT7 z){EQez~lTy%MWE|Do+-Fs%!$IaDpQ1P_z&%KU*P;uXYJ!9Jl_&tUX`Fi3uN?mL<;) ztiTkuer=b_GYIc5?TPYvt*fln$@Iz~y8G2mKZOEbPYBJTsO8RS+s3(JhcT>o)9pV-B{39SJu zr?rmCi!*P{GVGH*pK7pLRasA*$wnx+WuRyYjWYeGFNIh_|s{fz*2f&`JpPA)l3omHX{~e6{B{R7Hvep4vqHY! zn~~3Jl0UJ-%D7UC>0&qM@@(Cr;1ROg?^SIL2j2@6I^9k262hA?<&+{ zgsAgIXi!pdkV{0>JwO*ux0ke<*S9PxGl8QFg}+zJWM_IaJ=KTCyYJgj{@MyKF2||w z9CG(-M@XjN%(f~gt?h+pdJ#dmC_zv7Kej>(=iV5l7+M=AN@_Q|W~&1hF19|834$&4;Re(4HyN*#8hE}zu27gF5~xtj@n z3Y8Z^<< z#IVNsHSAjBx2j>7T)r*zR20IHJ_HQb|=+g_E{uw(h7Y*Ul%JT zSGm0iF5dFE)BN9B7wD;1z*@n;T6&S?)@JB$&#?<0aU^~M3K=ZBNX2Le=~-$bav&Mj zV5?^)K3d(aJ|LUj)nww`+80?3P~vLFu1dy%vwzQ--HeZSA+K5@M+;W(jy8OptT0?p z`+R9+*;hUoI)%7X^KZdFL8p$E_O9+GcS*jG`i3m-rydc+j*%I=%8(gb-`t{eG=--c zooCv`+`Jn;X*39hZ)*MU%JX$ydHwC`OaIRcj)V|YxZu$rx(13 zUNr#j@XZxrY9)jIs*KHtyQeG8iV@EPUn_EcQ05FUyS#!rGb{U(x{`8jWz(D$|4Og+ z_0rJH-toA;$+6nSieW&{y_UE_S&U2+N!RkN~@T7&H!3l{BP)CRRxZwTO$DJBuyf5uQeWvw7-+c5M!8-%TyzXkJz%*u&I~7j2zMiou0o=V(Vw#CXrlvVtp@! zg=QNAR9zsFRTZ1Q3`b^?i@fU;G>>INO$SYOa@Eq-0?KO^RkJ@%w@q#{?;$TWo#`C) zm0VtjU%&8Lz@~}ipQ=Etq)m}4>}kDM9I@!= zDD&Gh@oWFj|0Xf`Zz00Z#BgF2yr$vTYC6Em_xc^S#?JZBqL%7{xc+A`Lq<*U80v0t ziHPWD4$Aw*fg~p#NtSf5;H#6}@2AZFow|r7|DoU` zjN^VO)YuB8qp#}-tQ-qzWcVgZ)iL+qmX9!)uZ1NSPI^*GI-9q+gK@|7yl&pw&ov_Q z{8bK~ub2#F%LQL1ke0^c{J@5!HI}IABOjfp+kRj}a-YJ^LXRe?#|BhO^UKS0enJSA z3)RS0y6c7Mhv_UAE+^^hBzul7TZV;N6_d0o+~@8}&i}XYC1&B8{9O3-Nj2$gRL(AD z;gmcX`l)oo$gRW4f;30fi~8p_+gW1QD%uLjma+0K0OKf?hii%7WqE!o$wfYBpl!ES zJ49u{1=n8RyV24Vb5Gn6KK!d?D)8AOhfPt0d31)rBt(kFap5wVzV2s_AKE#tiD$K= z33XZfpZ1e5+Ryr!1=l(KMmk$B^>6#xdEHFwu8*AKEGjQ^>hR_Lmf){)7odTfib-qb zYMK`?++Ao~6X|0vH`4;a+M(ALQ{ElZSrlPsR!khJHA(aDzk8R$kj`hWd1}#tW~~|R zo$)O$fluRSL&t@Tr^-baPq%z6Q*(Ac2@rOZ$1Vqb>sa@<6sxL0Dl*i%OU_`NuAv`j z1Ua^y@OB=vPpR?*scGLC)5vb(=QjAY%|0Nog#S(P?=g5m^i%~M z|7XkTz7EdeQBC169Re07TS18d3_dJYp8jTnH>$?k25@?Iu)9->QWZK~q!PKJU;iloBv05lKedQQ>L)2BJq zHejL=xSeaa*Dva9Jv+O)w|bES@OiGsx_9Dpd}0O37#szlNPGa{HoSHHFhU8HD|$fk z{5aM<_ec1EmGQPV=vpWPJ(Y+sR5cUYfJR~n;bv1!js!FSLq5(?Qx$u-i=X0cS|P}v z8C(#(p(OblSv(;dbdr4FAA#4IV^%R(CkCB{D}OkZ9A>L_rL7un6xNYieRb8!;0GbQ zo7KDP$`war%jq*$vnHd-5~_aOj;gQ_Om^mJ^Lj2UMA_@ju=Nner8s79tDYtKt@tX} zp|uu_Mj()s>W2E~dtZtd^~T)XPKu*Arz;YKYb>YdIyk32kV0O(30c$Lo@k*O0exMj zxtE|{G+WmW#k<^b54U`|1zN#YLZ#Ok>i-#IvPO_7t95ld8KLg^$xfIeMPq}#&dH)9 zU0E-$RQCK)_<^uuQo%$SKeHR9+LSJy`pP^^+KRu! zA)PzL#EYUd_Xs3gymlQq5Jp~c^Od(y^>0EcpCTF1=K*U;RM2?i0;F$mx~2=zr#2P{ z?rTk4l6-rP1)DwfHu6+?=6`DmAqo3bZpdW+8dg5#uURfS$qAqCkF@^ZSpdm- za$>IMMpGAySK<==ORvJvo)jbdm4VIRKlI$I`+H#N)F(5!s#V68i+|6(4IARr&{^=A z^oFvlK*_(YO&c9+EG}vltMt-nfzHa6L*HT!)Dy{E|DAm`l|{Ao_V&5psdK}w-yOGG zT5xQrBdk31C_}{9I|0=LMy=%n=LaSG6O#jLj4(hJp_}=wb>PyzqD5XiMD219Bh60} zYo`7==FUQTW6zx}| z@I+_zbF&!3?^MZ7={V{M-b1~0Q#I-hm#Wv!-_LZQQV$lG9q-UP1fZl~Te`ul&Yw$I z&-N?7aJeYRoAyS=)&1Vjfd#jf=;zShO2i=4Ckbnj?Lu!v3Tub_wUm$7*B>1}v`w$A zMTQEKuso-4>Ch(YHSP=of4^v%^9V+fV)^>|o^CX-#bfF*;t{C%9?u$ghm5j!4c-|?^hJ#ejwJI&vtnMCS*gEU7WAc74`+gw zUY4KGxAhJ84`^$yT=ot+zu(0*t@)rtB2-(b$;AY_;eM|twY1t$aMpk%hs^)2IQKrJ zx;+-=fgNVFbczUfOga)Qi(m+=#zUT)3X>C{Q<%l$RNlL|-cXS8c0sT%3QqKt z8e-AgJw=tV)Hey)%WXhCp}+WkTxY!Yef|dxFi4M*(vHO6&pD1fQG7e@4CM8#v>1bo zLPbqF&v$_xuIDzMjIJM4wB$@zRqTPzSJu%zx7BIqe6cO9TQZ$}P&Sc$#EyXN=V&1H&|Zjg6T?Z=&D} zk&@djRfn|xJEsZ@erlRvo4|4;y2G1<}~qgi_90l*0^q!&FbIH;9uELuxA|0(tlZrZ!wztt}&?#v>0l;Et9 zAW(?KoFOQL2bgsl{lKQkvjV9)AOL24WN_-6K%mHbRR1&g6GC{Q0;l`mfuMQ?_5ZzP zfARm_Vs!EUtRB`PD@Fg)aD+>Pfw4~edSHuDXMMf+NsmJV_%4f)R{Jg#c%`vk@!u>DNpPg477zGN%Eu) zYAaLn3 z#DUoWhGyjs7+JF0^56Ve8vo}!rwbS@v3^GP!gZ39uJYl;U3k3iw9w7-J1%$*o%KTs3gFto( z|JU~ZZ^IM7f&igh79oA+TBTDK6CwS{{NlJ&v|OjGNK*RPy~U`Kbcqe47r(v7fXJaE zB@W(~MJ(Z&_??khKrd8PWPfR@-M+yDu+H%b+%nZ^uZVV}yskSA8Uwa_ z{cFh<8y9bL0AN&Wmq)Ig1HSX1Q`%A~o$r8tPWA*Yk2p^1-`Eo@7vXotU`3Ne1y`&% zk8nGaUcb?fyf!9S&TXpat}hzC{u1r@4*!MOQOe-DKP7x}s`PDnzuMsXn?M%}hvb;V z!mTcK5sXi2H)=n7u&M<$c2mjY#*8MaPrPkumpl^$G-@2D@j{f5pySrM2v}R7iPGQJ zaI+$gnFMa>O(eT?;vFPEFq^2y=xU+3$&`RVZp<@9D?)?s)s<-JpSb964MlbehsTtPM| zD0Ae#?qU1Bzls>I-)%r60r2Vu6Cm|A1tyuPKJvmf>5K2KqYUU3@ge_P*E`tw$?do? zsN_a_P6XK3%0;V~M!MKwa`YYRFQzLOT~1pW2f*UB`4qAk3;g=`7+T7r#4d4j1+(;+ z-QtrAr$bt~=B4%uUG5<1$H1Ovk90Q%dm6U35b))TrHFfb7Is$a8NgqIy8flq`1=ZX zT_tc(Bm~c44Sj?1*Cxp-q9!ZP<2gFV#OJeF2YbABFA^`7$VYZ(<4=);S&`&9D;&rE z)+;A~L=w!*)siSc&0P=W&Ykx*P0yWv=L8-tQ={Es)@SUvNamOD4jpwGvvY~+WL{K@q zzbVi;w~k8p0I~V3|Jg(q(3gN0fll1^LM3(ZiNW$As%aw96h`p_p}JDLJL5$v6+!L- zCCyRgqVyI9M=>?6fB7y@^8rz{MZ;oYvTO%4`tygr?#f+Zcs!n?i{v_Dg7oCwep72dPACy zyk+0)I4DK@7A`S-yAaUmU+3YDMt%SU$W_XuSBY4m{IQ*x5M*N)Li}b!TTpJif zj*qe-i;=#VSOT7Q`#4-(gy^=03@fx9*qQrr<4tvevQap>31GwE$?tzjUT1|a=dWc{ z0~^~}zA)9!y0va+C%}|iKEHp~Rz03wVeulk-?GqNA?><2ZC&6g@c^E4t1i&i{c?wf zD^nylt2g&*3o|GP4A|bP_8V5-dBZN1k{lh(8h)^J_bwF>jJCj2lnC;jN^g)fwA<(l z-c0v3pix!{0We}n4J9ThU-u?<*ZxreJbJ`fjnhPze(H)d-g8(&uueuYJ+ZsX-s3h5mYdKb9Uy16NMbX0DgVU%TmPs0z$5jFc32O(I1Yo!9VvWF#E zL!AfvnrIC0Rb|vc@@%^NT^js%Kxfo+Hl0+HtR`{S=({Ki-@%|o8)tE{`rMOR^14le8EOtPj zM!#)isz{ooT~o`?X(6S=>H#)>PxULgde-swEM-s-ql`6(lOso__@C$a_&FE%6QJ0O!IxYhsO zwbYYN&2FjSwh1ktI0PaO^*An>jJ5(xM#gM=MQ~ixrsAS78>k6|wBro5KE@oG?_Cgu z2j<6(oj|DOin=1<2e3;N5(pL=$S zp$h7K4VU^($&T5jPp!EqFCx;MSx0VxvuIOdZZ35|>+Kb{s>F%O(EDm7%E{TcBbLOZ z&b{&jUmO=FhowPR$`Wk+YzL#`%nfvd(j*Gcb*55hv+rGqquba^TE4mxNzEH5h5r2MNl6BoKB>upL%3-J9 zc5-u4S{GQ!KhC+Wb^Ku$26>pf@|#fKI+zl~XW-s($UDbU#64U-=>@wU|BZe9U_?Xv zRo>Y#gqSV(Lzm&2a>#=WRt^x(BW4gNOMUDzsZ5Lnwb|PjHG&sMF-TYyrZ>sr*X3(? z>u~lyKOFCjn9ymk2P&K1oc=1EL*n)`YiJ*{hsrNQhh$tcA`6*6oT`G-CF`l@4Zq)N zwYB#=UZ}KuvY+67eZjOEy!0TP51BT9WD2=AGE~XDd(FbKs`p^&bl!bT)(uZyQ5}RY z^Y#Hq>|4^u)qVFDr&c|}_hM*S*+t(qTM1n_I{iVbF8#ztN8mPE6ilxTyrAblTewIV z^i-CAetRJxov!~Fl-9meYXzjq4Ik6?CI0bL_os#~e}tt3Z<>Ju73(-swQ_m9&*F97 zcZ+cZ>7LQK*o|Yf>AiTXD!XCg$D8pZk=1RK$ukK;nCpoLlchI)CSiQSE% zrNt~B$<~i@ZUgxKT0d1HUE57N^GpV?U8BXFgN>_N&Gg&{++#iGYslxk?n4&^HW?pS zhotvv=tIX=uQi3M-S;2gA<`O>a<4S%cE`obTVnIZ3ekKucVh3o!G6zMX8x?|HV#g_-hJ1hkAYCp5BNwXa|B=i|SSPZS1TgAruh^b&iS8 z)oqGH4;-m=xQ~saCJBk^8@Ms2fqVh9M_4pMWS7}bx7RW*e=U;-J-kkIJ#Po_k}ZSt z8^aIsmPSlRvc~W6RH^Q9WNMo^S$`@1=sYl(cvZf0J=?zW0m6p_^Xwr&hs6aQG3+Zk^a;UXb$VbXA3?!$OULQeCbNEujLLP0+D7 zYA^e=P>i($U^mJzxx#j~NA)iOEES^KtC3;rZhVpptQHoZIuVl}d*y!r-g(29RsOl! zhjec{GrewRMx38W3yrwR%EG>>P-_CcN$Kw*r)j?~=sv03kAd=CE8xFb^&P;@XG z&`9G5Gt)@7E3fn|t(Gxl361?Cz#oA>cy$qcTCTwnd^4i(7U+>`gGh<$Pv1YzSB@x) z-uMd^e83^-hARtzQ1Amf+yPqV#=(05K!G%MU>}%e;gLuTwiC^isrj-h&?-PP_yQOy zBa0VexWCEFM&2iO9YAcM|KV%S7>_g-sJa12hCnuXa;V>-qCN7>2EH2!&OY8Csb88T zTyq%2Bn}X0^Z^w7trFldfC7vNXw>T)Ryor>soq;|mFX`|;rAkHY@~XrX&xFchm8>U z=*q|7mtIY!sN$aTz9u^u6w+Bt#7-l*NCw4WgW;E!y~F0Ukquk@_u#t#7&vAmy9S!e zNU^~XYeLL(olo3a_6uqE?Icu7>~dA7H3FRU^2!orpZPPL6=xk3+^9-Y(ORJ?wAc^9tv6@J9+Fz)ChYukP!jxL} z!O9BFxbjjQ2Lamx?sj^4cYrgdNj{N-?l)4KW?I88OTuQE%lM!?3hmR;##Fhy87UkR zM$+W4BW12kOm9SeWd%7wu(gZGSbHvKQliv^X6{)B(VwLN?I za6IipwTuazC6xb;Q(wEQ3UGqXPA7u{vKy_GY0Q%u=?P^-d(HE%j$^;v^GXhNWWlmq z#PELIG|Ke_gnuS)fKRb}q71qbkjm@qG5Z13zKWk&oZH%WHvWN@6}6iaUN0_TazFBW z1bA4PW4{HK4G2Io3Ip4dBhz2^i-r(>)IT@cQttl3G=x>Nk8xMtetfAU-x)zJC2{LU z3m9R@XB+5V#AaS*&F#5J8J5ctJVP?d@v{A6A4lWGb^~V3%F!vJk&=MdH6312Lr+qD z0ZStCzQ62G!bxyv$C9-XqfjCPT3#mW)W_!l5m4E>$ZV%r^^X5vbJ#bFd(*(+iY^22 zJIZjr{LzK=b9uoZk428KKr)#FIo~k=QZ+@spcAm=Wr*#U$Yx^MAo4|k-~oXB4Hp}z z=rISN>ZLZIa|E`5DFs8nz(dcij-NdiwDYVjf5XnE-nb^C1C+HfE@5@E-{|S&tH6BS zP&!Zu)>D7{8lG#n05CI`+ov)^%*hRa9c(^^+{gI2Ov^F(592KBKP;+|qIQpwm(X#} z=X@J2w~RxZhn>X)gjZ@T;lcU``^J%YeR;)U{>sb6n*B>;;ng4zDHXh#H$&sk;@rtk z3WeL?4@&s-D}I2;R^qSzV?nhfVi;il)7x+jiPEeqD1QsnH z8zy?H5l>b9QTE4;QbN>Aq8F;>WGfQMLRzuTT1ryS#|_epDej$10PNSz&;4L{L3Q_x z{QQPQ3#>6%1=9rEy)xRa;^1&|4!m5RYp&;f5IT}i-n zO9VyW-`;j{f%R84AHXWGX|t*Rn@JKvr&BJjMA5Z)3wZdoGRt#Z&}hmP?I}z6f!LS4 z8!Jo$oYUoX|0nj#Wr{rWe~*mWApfxenEB^l=RoO|vAY-7;DiD(jx*z;KYyJif7$=_ z{^d=T8-NSzWC^Xv3CHV{kis!lnBZbXU+npbub}Y&o-X+}`$3S*hJYm1$bSCFeKMdX zR4%Jvzx~&S%m47x2MwVSEa6Ly2a@~l;P$C*iOVw$p`+97ZeiS_KTlChmOJEEl{+K0 zF=d)@E8;?I)eRUGHJyKwzK%&6IS2fdq7)bfxDen!8C$y$2n1+49(l3M*P2Hg!A{>< zKp~Cu)Zu*G;(XZdgk6zF>VE=9tcP6yuWS*(Dgw~|Xb7;wMx--{=F_>1ug6N2Z=7ru zj8Mw$KGfl%tWlm7ZVydScR*~Mn0{)}4BQfeZ3ti_3y5}*!%e<7m2{u{TzMGkqhUpo2&T#vuBcr0884hCwp*BTvzhET zRsnRND@WL6oKkjufy1JmYYM-P6kfutH?w~%MO!~wnw>pA!clpT+Xyy@$SYPRZsivo zE=%U*_^lfcA;-3dJYHROM%n z9jO?RlEaJ#EK*tVl?5o!g{51k%i6R5odp0i=SdvIyQF_3wL-xI7~8d$rEm+txp&M% zulRkL`l+s8Zb(bN&DpOatYXFRauw0p{TG~D($Ho_Au}n%=TyH*FUmaH{tE#((A1G( z{0Jx>R->-k)sjw-6)%dvSz!C+4bU_kEocpu_n9P*!1vx0%) zyk}s>0x@7a&QaeHk@M1~nfdkyoyn5Y`xzhiqky4QE~V~|7;r=|1%O=^Wr&+CM?!wknXy;-A@c} zuB78nu5ds?PrW#T`>#GFc8l%!$pW7b~4m0}{SKLP95Dn)@72DGRdLg+(^0 zho18H<2U;RXZ!}ZbGtX?N|M0GyH}(M31ejxo28I>GR{uO3wqm+<&H@soTF6#5T04O zz1WOjLUtX7Jsy#F$Q#M+b%CA2GZ8E5p|5-`edRQTwP zUMkHTH2Eh|XUAlq(Zn{VSInx&lE)5xucmJE)?Hf0{dTzsrGY+aGWZAT(16Q=`9x{5HP1 zO3?l;)MBWKNf>v4wu3CadtJ%s_2|8KtLjCXozWhFOme_j{qP9r&LkTq0N^GN0?fwX zthobiaEk&J4VhJQqKWg~i1IJjSv$X$XC4~V-vWd=`g=HTqkwtnZPOoE7t!6GS;`}8 z3xR@uCBPLAuM%!D$RUqDNdn78emJ#b2|X+gxNrBx`AJIp7wda51Vr7T|AKlFDDPj! z@h=Q(wz**Uc zr3-y1oXs^zZ|0`y^|+7o;^O030R>%CjE!ouR~I&jBehK7E@7;uWBzh8u59*^;Cp7l ztbh6>|G^u5t%gUoV(U(IxCzEz9e?1BCvBjQy}BXbZmiDE1I$}VN8Z0T1LU$>MS^8% z;0!nzPx__|;Bk!)_E9zY4gxC|w?>{ft5N8&S&Dlt*>ix8&M1^CNcP+UAiSy(fj9Ym zIsD(5K<+L87m_`T$#k-$uJivWj9|Lz%bibDMR8miM~9K`f{}r*%p3v3!dSv@o7uP$ z$z(8P1X|QV|Jmmz=7y1HHHo31W;LT4kO#o)Aa%J{A(W+p1&Cyuz%Y4Cc>#6R7Fha- z=W@&Q*L4F?Pt5Y1IyS;5nzgt;s_vQAP&Hp#V)rzlYUv$$o8ht4}3m^2Kc`k-0ypyG}Xh1~hv!O!4l zE1Mg&6r*r=5xmy`7Ib}2e)E5j_ulbT|L_0!K}exaGBQ#W4dW<#S0Y*2n?o-ldy}1v zvXU7Yg{(57tZ-x{am>h$kWKbHzw4=9@6Y)D^ZWCAyM3Rx+pBtcp7VG<#`U-!_v?P$ zuP3wG$o*D^Gv&Xq#Stf%=n2u!SP20D;3eio|Y3)IxoQGk!fD_iX1rc5|txjQ@~E{34F0_k4N?-n41yU4Tu${@ii!1t)D5 zT{-+;-8ppXzy#%=?cQv5j;1H!rn#A1ZxW(#u>?`_myeF%$G2)`uu1E5=F)vGN}F85 zhfsIkv^b!oo!J5PAty+D+Fs<^?7nQvdCv#fCbIRAZjmFI0^?@-N3hkxkfTVy(1 zt009j#1LT;>>l)!2u)N(T+A8VCtV=5Z+ndS%kOcQq9haLekg4GPlZEKA@e zF42_ULklZY9{2w8=*pKiLOev5K4UAsr|^54;cJSl<6FI-DN(^W6}>?$$zGJwFVJHz ze+Wz^VJo&G8gNT4wjv=PgW)?~x00nKd*>5l(_+ndv2%w=v5W{$+X!~N0oBfqZoK62kL{(%C5pG!MFU(q>A79 zifa%VRekw7ruCESz&owabVov2Jd4YYm5H525JDjS+~XOEWzp_9N<(TEw?p?rf$4Q+1Hh}(=na0``&44Xl3Ie zmfKM1-iM|u3o-G>yF9F2?swGg%vwHugLr1;Al)K~q@J9LUQUV%C4P*sF<-1^@u`F? z6q0kZ2}o^DF=cN*YsSWJaQb2Ut(_}V1ilnP#gy}om-_m?VOM0F!#>gIt)lTw@!>q) zjj~cZIf#bVo3+{fciuKyEaY2GEj(wSbAQ}&{A6qrh(SsjIDDwje*EVKiNjW5=;l`) zIx=6rXcWH+Pj$VJ3_fJkGrt4Twhyq;c-pH} z`-7=bRn!`o<0$1@@HiQG9K#dgI^3mdkTsO`J5eo*M(7QsxHjOVRiBrMV=4Tv$h~#L zPyd7WHCSB#eW>@*l%g;Wj#yzu;~T`a*2H^Sn66Pimpz-MonbW^dr@H6QAnzyy+Pm* z>)rI)n;g6Dak@U(-je}~Hp`d}UG4(4k zRU8KQ@`=pLXKzH!UwB6k_NJ^*`D0ahirGpm&){{w@AI5l`kobIC*A;}w1a?OI&X}G z5ZZCZ`XWu&y=@&7iXm%W?H@xs@;i!1F6jv(orBBb%l{C@AujjeLf*6X|JpyL{?%i( z$M)QZlCm2QP--n4xB^}`d98Q!rQ`vZP0((TaC;FVbH<#Ka^=)5(huS#Z*Bm znQ8Myi=h-0`Vl}^oGP#+LD>=s`U2JElo0?z9*Ed;7s%&Q+C|H;ei_0XGJYj@k>4B7 z4AhXJ zepsk-cs!9$S##rP@1cG+1eC1}1s!jT*z$u>+a|vD)m=^@4BkqYuPET_yv5$ZKX0w0 z{OspnH6e5Vohd{Hyc5GDJX{zU>vC)ImAV?r(U-fD^Y zrW^6^1@ZE=XleOpVLT|y2i&V8ar%)OFIFUe-_35!YB*I5IaenpI zFM6aINj_*3J@K2DpkQygCe1OhMWgk!Rgugg!_YL0C_1S=iK`wp-pm$?j-p%$T-@n` zcQxGqkjU*wx2REBq0?~b9gnW>!W7BznR~H3ks5NEQzWJJ5@#iL8u`GUlJ(zba4>@8N__VIf$7KQDP` zt@5tF6>y*KDY?)mFV%E?l(>VJf&b=HixZKZy*??lau|ab4HpsXv+jL(U6s(UvotA&)pTf;-*1E!#ez7uMflU3eWJ`8Mst^Zb7y`p z!erU2Nn>B(K5uP#c&$zHS4)b8mfF=kF1BkM+uapsV|!=3$5iz0T_<5RRnuh)v!APW zZ85KxGk@D7FXxjq=Ng43GO-pQ+lGnFs16*Jr`vll^^jG|#bd~@29566*r8@O<=xwr zqv1MaD^o8kTHw=E{bs{z<@D6X z#$#0*9R*EJ(Hxb-QSg{{$dALo?^x1rMDvCEG@*;)>HL|DA1RqXaSP@;3X zfPYez%j1@4du*FDK6AA3TJ9#M8yTO+_Jdq|oUo>$q|(b6BPnm^n+awTY}9HY|6E?X zh>RlKv)C5?#`t#H4w!3mk+EhqyeHQ*bX~lk<*FI4l2ZhD6X!$Mj$`h6IV`cZZAGQ>P&Cr^*?+5Ek_R!W4l zUTKFFeRU@A*6Gz;D$1hMXuR4lq&dCX4iXG!XGV9iZfUx)X8h%1TN0GLTPM%#UddC@ zuSPO+t?G{#oU1qMF1N`pa-(e5lb!GaV6@Q>*$3)Qi|x(u8&y9zWjEsEv_ZvmR-#{m7Yiz8$WpX#lR>rz*)^=I7 zkqfmjrTg+(7sECw`yBPrh!@Gp4S2~tOZdC>wWd(aC1qj`K8M1Tmx$aKYxQm$h{_v$SbgV~ z9g%y?{+-{hDYTrHUHwLy@ey@~YSR)gyN*&BE_W|J`nK{$Sa~l!&sTbdSproo6q(7T zNrUn`A4j*p9xUDsgRvu1s?fZM+H(JVgULgSa>vu+{N9-U_RVJ;o~e||Ka*hvr;ND_ zZ>4GLjxIkf9&nU3rpU6!MjGU%9!EjGtEf##}+-trI$d7w8 z;8m?s`8(U|*X8i5$1GC7%x{2;HQ=)sRh;@KWi>h8V^HHhkJ2G2rscvkSi~G>bqVX* z*$Zs0_u4bWRC^{&QfS3)xXQYLk9{-qu4kNe?|KcxkLJ<6FNn`s-!gab@sX_58hVw> z6S?xHkJ+)HB*nbJRx@5<2Ncb(JG8H1PYd02Q0Vlow+$%Rag`DOQao1lq%@TGt?yi< z4vENZco|mCr)7D);$JKZ7W_ zrBs>v$#?NR`7@3M1%oCvhS%hwPn-GtIYPoB^r_XGH$Qssv|2N|$ibGpEvHVY71!5R ze7#v6^^5&XyOFoa_!>ou4~y0GuPh}K|AtuehV?@BuMbRCO^)tOuSq@$vCUO0lKLt6 zzE{dMuIM+*pn%*lTv&#`2Eo+Zj?Hy8f+IzilvGdb)Dct*Pq-8r=aOC#!=QT4gqHF9 z*D3G$A9vhFtol=?d=)*fasBu2->vnWnsO`VULW5`6WpLcZGL;oPG*tHS$h1$FI}b3 z+Am*u(^iKuT<6-AHlE+jBg&YuJ*GA>VdBbiioA>Fq~CXs?)V@v@XcoqjR5#32VHy3i-efhGMnJDSdK8r;L& zs_QCi*2y8mxB_Nfs6wf1M5Wot#%M`B4*3>4qcEOdoBS!yX7J)0<6YXlF)^OK=f+k0 zrh?qhUpm|I&y8GX+Bt;cjwdE2&*|RHF2AXFCiW34?Fo_F<5{gC?ss}7Zi7olFJss~ z*cQqw-E#8Jz%3V6JH%Ws6Iye>&?ZaaaFTlf)SYCH!SNbHGQaiSrUAvpC!H+`S zGwo9jtD*}&w9%>a#+Wm_t6v7p;M+#1_+?k}yvSr97e>2kpQ|tncSIjz?bRIX9tohh zO9n~MEV*7*K`(bUyrs zy|Yx7+wyImak0qLpoRhBBsQg8qyF%f-#sng`7L}XlVUW;=>0x;Bvzl+cG*hLwbuO&2kRA|{{zsbgF6QrJMN$chhsE^?jjn^a|D z2`1I?17SAK+FGW*BaFjD?zEVF(>$DOZG~67-WpOaT)n^HuHTl?%Nu0nRP&26=t+&Z zHwzq!S1ajS{n63SW-mUXLcN9c01p2g}#F}c@pR#f@ulc?O zXvFz2XHHBFtYbCbNh;-w8MxF`{5VWpCV7o0!5`-EwUf(&M&h9J9MsC;N2dr;x36ob zNDap|NbQ-1#BGEw?w-Qq@tQQ7m+Q74jgzBBEA^z}3>^0~KYU2+JJhSK=40^rAMgIu zhWT4Pr)9a@S+k^Xu{UbahBzO(#zZ{a&H&GeO5)(o;=k{>;;zBSfYPyMcm0@%Q;=1( z<=z`h^>KJFxE{*?ay^-%uVDB^YJ*;?=Ld$?pV~p zL->pawi_~~Mv@9fzP>)cVpXW}W>VM0jS1CGR@KnEz^!?f$#s|Bi6=0;0K@~E_nNy2 z`7@VopjF1R7f{?$w^8AVgZvjMq2Z+!0w&h&XMsWgMEk?yrnS)J{%^e1G^a(Rq$0Zt zl+Ak+xA^C(EuX_qGKgRP`MY1Ca^KB%d~Dk2XRgSMuEvzvM>%Z`nvk=!<%w7Fb8(tK zNYf(IP$f;8P1J+m$OTcD8#&@XN0lla)8eU;{?Y00jN5Lg*X!c#j7VA>MWR17YLn&I@rm>E{HmS| z!6bx1@1zsU7}sjunapn!p;k=nRaY*|hfKVyeH_%IrX76aUoAk%iqENO`e-YKnzB{% z?V0eqv`K1f^peP-&o)ANDQr z%F1@b`;ts!FHVi!$yuR;)9TyTB7T1f_hV+ICs4R~5yv+T28nA7?gptkjXQ!CFQ9h; zcgay{X0me+KAaK%^hoRkU5bx`gv~U4K(v>+xiLQfe5eWLLZN6n{NjTW|0iWDmcs$W zv(xT}Mvpus2GpY~p5FGA|1+`Qguq--8P(C0E`}Et32XIDHtoFmb~!r@h0Vow!_~=3 zcj=n2sfGF!DC}+awiv1WnNDUwq4ydaw)RXwDsP3q;keV2iHyf6-ZmuGiaC#}^3hu8 ztp2)>LypS9F{k%Vcuv8&ccSL~j7emASI3(#Li0_ee9)k?jJ2o{uI=Ef`up$1~MhOqv3 zpZ9}9HaUxJWdi7OjCUKEOKrYyHRDHuIrFC{(|> z*xm#9qF_09xGrxu??915yD-d?QF;W6aL!Dpa(qz6Q(QEhlvBDL&s{tL+oDi(H&bW_ zEU%T)pAb%TNlN&9E(vXC^HS0#=Fn?MOte^a7m-{iZtYj9$-i0ebA&P*bMP(uKtRLl z0z_y1C*ZXQuPAa!rAda9T}b>U!$n-vJeyi|^>QBK`f;VbYJvWBA$>6Se8*5fSFY+4_;W9fG;0p>12CgcC3R$xtonk6a!?!QvqH z(8t7oXFF)#$T5`^g?TA{S&(f^ex#$ka@uoJmA)xsd?Z{y*HK*lJ@H*$9)K_{*KYuGi`N!cdw6?6`kAO;@v5JPwgEJ|z>Z z?A^7AWE=BHq1_=SK@CPJ2-S1wH!?Ec;2vKf*&ExAt1>p2q1jk?eu-hc)4e)Z0fKwK z%W%B2pz%!_mf_VN-Kv^S&q*{5QxwaF?R)r|yBOY+bwYZe(SGAti+OkOLeyPy#$t(c z%c42TjK3BVW5`G2sCEWZf67+0l1U}SJN!_oDXY$0ps!m%FVuMY3Wn=42Y7$-rbFS5 z=RB0=#6Ov`9Pi0kd6|b^KBwto=A)#ii8T=Z`GKpaifNDxY*8|TrOZ3Hm*G(OJ8lfP zJ0{l~Z*p6E$mqYuJ8by-IC#IErN49}tS3m42xZUiq4fqrD+o*XG_GNNpLj-ioA_^O z_L#YSYS?`5{8J_x_5)ejPVjjMyS8Zqc&_;eX}Ea6YTlC(Pq`9Xb_pI~b4rzLz+;s? z?1ELfbLBDE(3T1{47*pW2*W$YR?ROI?yj3jMG&J#Wq-dZyY<75)AEKs%}Vl4#MuNZ zPZ~)(vA4$jX3wh4UQpi!AmVsR04wTz@7_vog_mB>L_kR2{R@~S#p`r_K~!X<+=n9t z=N9kj2Eak|v%izKTAh0~*=;y{XM;rw^Qq92hvq3H@?Y>8PJ7?1uPf=eU*NyoS*@hC za#52uVfD1-OfMgcxdY}$S9sTfhvP|qNZca)Tto-~**IKV%tY9?PV96!4t~YWDe+6@ zJ<4HGWCSCi+zh^53Hwr|G{HL-$jt08QsEIQ`Kr_e^23dBaLbUsa<6UE?%{2n^&jdmhED zwxQ%dfZY;p$FqI4KG3Ry-Tq-G7_n`)gv79S^NAQBc2< zQB{fvmOsmASgytQ2r6FqMXzmfx8_##gAoazQSWR1DXLA>*~4TgOtOX{b}ac9aoY-a zYp|4J=(duua_@tuf|E+`q82_2vv7bMv&E!4cw9~89?vs>9Iiz3v?zMn01F9yN->0-_yC1&KdTXdQY}*gRirQP6w`30)bL3Ad9-*{;9b>KT z??2Ri426v`5o#W|YSCmMFSkvpe1$nnK3A}vH^{=HrmUOOXX%F=*f8@c2fT|Z@d;F# z2sEB-U#)S|pOmlA!}f9@8Fla>@`%oj4Yn}{2)5w%={IrWufFtRa=Gdr9h^ov;oDeo z<$%kRKm9%^5;uP3YpNJUoH`0>Wvt$t}fCo{?zTTX&#%pQ0E%1QW0u) z>KA?8`wzz8On|O2A@r>xyEsyE_u9t?dkxl1Li&|Aaw(kE&46Z3ET%!@8^xP~a!8{m z^OAHvrd~LQ^x4F`Ozr|HP^L~ThE)z zrqCvz6;q{FPbi>W4Q*Ks4KaBc5?M`99jxqan+|YhZYsP_tdv}S1SS29xMiF!hDD;? zDE-pqjba|+-u5;zZ=DmCo%c1Kw2_Yy=ghp*2e$;)lJ{7~0Kmvo<4;gXW1 z;3)?8Z)3^+sB@OJPWBy665#hw2~4+T;_FN*8}%!fvW$Px2gqvDZgy}=-ET!7g5yE9 zKKksj=U%Yky|n{fY@S*@=ZlT?)S&TWvS?zFZq(loyFV zk>59__2-?<#io61(`xeLStdt)he|O0u(Qo<7U|=O^EMVoA|s`?-CemW*e{;5e;Tni z;}#P!3-qF^p#ZhEO`11Hj(hKyr$UOLkfrO*RuhxsPHi_3CGq#SY}P>eUxih+7D`vtD>t6xi%xO-&Mw zlNM3vCV#4ar;}F)A*$fwX`(JsgjgAni~s|Jdz^WLPybDF>Cg;(w8{Cc6fTFS*1n%V zjmXQ0D<6IJHp|#cq2^ZS!j?i0hvfDW`7*4=3#D{h-4xnN_UqsEIarBM3tu(wE`N*c z(!XaATbW+cOkIY!Uunx%uH9q#Y6%MYyqG2XuO5cczhvs2V7jy(h-bVQzb`YFW%x~j zoqLz+sP2N!W~EFG;52rn3d1ApzPlYw4ut)^@BA}yKXsQa>ScbWu#+RfdP4DJ@{N%b zK;m=;mF8t{ld6N?Yv6yE=$EWA4_q98=_GW~_dD28q=zjZ4x@1PmS30LKih__IV-lR zd7aK^z#jDlMtL<9{eev3BVx8byLHC8iG|XMj|N$Px2ucApiq;IkMG!tWq9HpdJaAM zXaZ@6!i1pMPn4ElTp?{z$cw?0kt~Tm2q>5W05$*AHaj?TvA1ig_ETIEzSvX%|AdpB zgRX*ZAE;pl{!7x1i^El$J*<201Eqq_kjkxhxtEP(`$CQW;H%4*Qbk=$B|a2n=Q2<4 z&v)`!;?@8jJd~fn$Z?M{UU;u_gi`!g^m8Rrvc^n7aDw2T!b40#)AMrrzh?W54c?Bv zP0pT+VF}+{>6EKMat=)wx$K*dN$s0>!92Q+WM2%Yh0Lzuq76GlU!nuS*w5+2dNv=C#{)JPOd;g3R&E+lqOgt^1EnrKu1R$r;CaOVa*1o+ zht>HcI;l}H8uDc^ZBK?69rFa;)h#GI0B#x%u8!d^WBRNEdiqpANE}2`u z4s5YX{sn?Cz1sN~0qCCi3J#il6&M`jU0Z4<<7z-$%j2y3w?87#%X07$$KeR^lkkFz zgdCG=L}rr%CWI$XPUQMrs{^0opK-I_gyscjXAC7!SMrIYzU^4-=ozJa0L9za@6}22 zp3b~$>8k$8*A{$tbMcsmOj!SqYVDrP=0{9O=7Fw*HRI;mVwo?&XV-mA+jK3OwLxt0 zqgWDI1Rx6Vx#Zf1=J%x7m261xJboHb2ak<~Z`!D3o&<_&BEOVvP znFk&!>wF5{1Hu7mED9@P_#TyDy+IeWSY{c&%Do-l@6bMAGdlO_;xuFxi9%a7Vt+h4 zR=sg;cg=WfcDXLnS9oRoFeC{f+LS+XIE=Ao0kU#B_J>f|$)IE z{Am`272R!ScW3>Hsg*Z6DiiamnKx_la{HZ?&J9xUJv0HbRpIQjm%gPN3qzsE!TC?k zw$Q;E0F+qzcAexR=3Vl2_SBp8nlEE-K#wfi&c$+0&6i!8Yzk=@ODD@sztYq>>w_8l zG2DU8^{M54UO+NtlIifc0-0=nJu(ma2wDsghiFR+2|5Ec?)zll3NW~GZx zuda?6EcOgfN$>uaC0OeDd5X49UF%nRmqHvJ{iuVws|`C9I&x+2;Wf+n-n!~%FK-wp zA?xL}RCSDPEZD)-l`hLpd3-gm%YA1(iHR`BU%q)E+>9Jw%rbc)Tzg)T#ILe6rO}-0 zDL>iB0+SU>W^d6zy>O*nQ5m6n>@(Z4qr%>O-}=3uDY?e>J7CU48HWAo93KynCg>%e zlTd74FBwKJeN=BxBZ)anCe{W}bS&p>{au_eXo#=oG zrJ0(VT5cUEg>3MZP5oh|YmeTqzVF`{=6gKe&8cx58?_S3b73`BX)N&vI#Jugf&O6CB0+$D3pQ{cknV zAg%ztysxQpIbXPP^%AFLX%dS(%D$2#XRg?GyzA3}DCFoLcUInm zSgm}#^JP_YRx8lCI+Q7_kOhWJihSrwmBU0An)aq`oqVE&mqE`&G$V+dpk4K#CGwZmGV~yeOh5*1Q&YK4vnmH29G+GZooYjteN94y44ps0L*+?-mq( zqi48tf&u$DOy}@iGE4Qh4S_b3ehRM_KHiCRDZ0|zq;(m5qg^3=Fv=>svGQIaj*I$U zG27P{wf)hkG60tb{LVRYReaKf>yD5cG)|Thv-EQ9V!vJBm_YX=P&{67M3~U=wAv{q zf{f<4-zg}v!?Jl#9aHi8Hb?snNR<>?TR1l~My`*#Wi&rjNM)5g+Hay*v$;rCy%(=( zxF1?+#Zvr~ZiAP5Da-H)!CS_wdt+W3z1;LA3Ub*H9zs&iJzXF0FfxhuK zMjY6fyklj90!gUVf9d1sCm*GuP`y`@YGr?-DG+33C{pQ9JZa zI?z99hLo)0BIb+|B?>O5j71B+ zD|At+c>@5tKVO}NXN@XDW}F^D)&iSN?wl;>bOmJKX!K_@5wI z))#f5DQVKk5n z=LU0waRbMAl}(U|l5%cRx2XRP&7aQF#ihYiBP>2lK}Ssd03E}@mr(FlFL1Cp91x99 zLdGxhyzggauqb@|=jKsJxB3OD!l{tkLJn~3_Z#;V^P5??kC0(wHH4E^V2Q+{36 zttW#VbO)CWq)Bj}a%UVl*%B!!#aoI^)Bp1=WQcTnn(>?n+J7Tge+}>ict7OAy#py6 z_97hG73xN|>AaZ2Z~u;9@>3EoxjEQdylfxXUtMjO09*mCWhkUip%>8|#oa0$ zFjUecvm1!47|K5q#9jRE-S1g?diwE{6{~I8oi}gZ?Dm&czcemUzp+TcnXEjO{gR^y zURRj#*tWDXiEb&eWN6+sshUIK^}xK5#P160g0AVgK{cncn_vi8(|h+4{WUq+L=Spg z7y~iUD78=9NOgK@9rLNu<1bQH2GGj`Zk=Z)8_#*ydCH0jtftsApp&Qs9-n`IV`Ee9>>%?5YLUNN9DB{VawvdoSPq3vv33RE#D}_3s zxT5Qnp)3>^yUa_y=iD=ahe(V|GT&Z_peU-n`2{RsUwDY3FMbAUnY#g|CKh0zkR&HF^P{& zrKhUmR8$v;kJBkHD@$-;!m$=Dfj6nmpr~h!^hi8@S1deFc{P=N zj8K*S!QPEy>!Zb(gh^J{G_!y`i{G~-?Ede6eAhzi2VIXo$J_GG8h$+r zM;$h-B%2`2a<)*S_MbgXQFMoV&k5E#gYhlqAc_w1jv z25OKItj*$prp&}!r2JOi4zdi`bsiI1; zXE5!_za5$Wx&mo+q$6~h;IZrc{r6xw82|RZ{R+~1ajXA;MpaUq#dH+C7ND z-QV6z7QbA9E`l7Bd|fLdwvruSUMbsIbrnravtu4n)lc9ZA`?7bl<<)gLyt5~t>N-L zJj50rD+icAbC!6;?3p}D=+XQtzY^Q0v?{*HBm)CwBW*v@u4(6wZcg%9T_VfnZ5Z%D zeS5xQTOYATZ$SI_wSo#0Wm_tiU%cQVnw%_lpV}#VV1D5>X=jvZa@&`o_lIqH@K5Hu zQ?n#)j|ywj|2vIrY-0zjZ=eM!dl~UtVu!OXDwWZp5!ChH9q`dF9VA@O95Quce*Mrn z#Ty4J!ig@5Hg7P><*_$w2;=+1^VDgW;A&_O=E&lr+%XjHAw=r+0Ym^$CoOI|aDcJ|6vKP36pP{(mg=i=XNNauDzC@`w3+fawB->HFgS$9x zzyf}BR>GKeC0a6w>X8c@0nMNwjVTM}_+Jx{a2L!MR|=l<1w28S@&4(9Pq!m#^wDQ0 z8Tuka-v3uWoseNOfJ#R>7t=0-#b)Kg?{N7$I9p_J40!-N9CCU7ou{kFW>N$hThB>4 zpr2-9;>DC{$x{9u8}>T#^!NPC48n@EWYkQ5pNR+h83ZB|@L8C`jm$>FZD zRTuUT`ou0E8{udcz&Uy}r@F!YKMXk1 zKMNYLdaMb6Xd1KU+uAbx4s* zW7EVYCIsQw8P-2=C_0xe1M&lTussPtf;@Cla`kqyO#QcphK8|#^~ z`H<9rutCV4n@$IifX(O?UtTt!yI-ANzm2Fh-DYJUtEnDaCNQH3Gt>ihy9~@L$F?`n z%XW*|+=IyuixfXrUcGOu;^bfR=$LmFZLBGg1sH1eYrn%iRW)p*{2w#3iC!Ka9v{!I zU11yX(Xiy@(IS4zS;df7I(h*~%|D(~U58J&E)?pboGxk}>QmAretjO(?l@%IJ9-Lb z5!$Mp{Si!+>dtz@#ShG1zL&Iei$BAWJt&NC#W&q!C-p;{1uXQyFCPob00eDgBq zu;^2|ML=-gzwswL@e_mB;DlOsUa^R@EoVU;P5(c_0<0?NJ08kw5_1^FFlxJ?(p&gAYuG!$Z?TLqqGFU$1Ag6oAYc0HSFkOA+d= zz;T8ILFXP;1|6&H#|NLbzl&g0Tq~Tv&e~d9)aYX@Rw}8QH5Zrxl{j9B(4a6r;Sw4r`BEX4-KjDZWt$=t)lOcB-fTG zgCL{{Op&m=kXXlZY9r;iB(L7=myJhcQoIq2Wn^5>*vGwXpD#ofsQ?^%xQ;Tvg7GaISi*saitF8LY9FX;v|`uu#TS1-9hq#%*eZ#1DLn zyur!JO&N|yT`GqhgcdxLmCdIj;K|QNC2FT`DkvfTjQHN?1LHgefsU=>abo^q37g?hXC5Nm zO;G<6mUSUyKmN(zec4j-zjb6BI(GmnfP(?NnjRIWqP*NYV`fy~t0?0C6A0^vVAYj}kdQxN^WC zjAULYI^@79`gc-vsF7_8p)T<4AV=?0Xrgdd%k)e7MM@@$E<%L6bxN7%A$t%Wver%S zi72t^LN*(yBqTGc)$c+l3g>A_dxmmheW zuzTm#M6QY{8&^r%GfZzLSK9EwL~scrL!vu4>$gcr2Gk8~p|S=HGb}_R^r|8oH?idH zP``kqzS`dz)?39dE3-i?3k3;VCFOCBOr5heQQJ5rBf{cpz<#gPn!jc4YYZ^^zq1RB z6?5q?Os_qjcdq)i798S!Nf8O{w2@`Bh^od(mlk}&DcL|>5y0fWA1!I1wA!X4B6oYD zVkNLPqO5nutG`s*qQjkfHSmV*+M0?~%WMq_reO9cIVBa5QjXgYL`4zX+mG;^&@Kal zAKI^5W=(v9Hwhj1HMLDFo~O-Kq4JSim+7cdWO>=+;fJd}di)Eyt_~ZWvr_YXoI)Lh zZB~)fX4M(&R-(OcIQZT|As8r99FEY;|0x9H9ZGTm5T1bWjj%;-o7bsSW`BDeg5*Q8 z4Ab4%xD~J51*8Hr6%!g?PTG#d=ok2`_9 zq;OO}M`Oc8@8|c_F@t5hmdKH7SgZgWXzG^l$WK=aOcO6dZ5>Qlp0KwQqq(`c(7s2i z*YD<1TC)AoW4=<7NbiLR+bUyt!cb~c>TZ%$`An&+n(?6egO4K7)hhgJ+j8}#X9HmJ zLp(PY8lX3$Q20-Kv_83d;}uH*VP3xHpT^VXPCh=TjjKfVKHSG4)oo*Qb6kc~4W)ZO z0v5fGFMho0_yuZlY?)tg9BR^lb!ZojfCy;x*?}kZI*!%{N`hz52s`T0U+bX{eD^3_ zfl8{zz5UVLn`8f$oK5rkFiH{T%$GqmpF{tc-1~Pk=W=DuhFB2+zi+AGa%zV#ml`4V zI<1#JHa+$FgT2%YU#F{{dmu9(hngaX-&^LYgk9e8pDXyrdD%sl$luu=LB~Pe^m0l& z@DS8Vu%xE!vdyHzo8{fNOlO|rFi!)si%`(e@BV|zO{%P!;YC08ap#Q%O30saPZ z6%d-qI2Wkvc|Gb0=~L?cuFz}K{{WI4AUqiQ^j;peV9m_O<>9gUJYHkN4a()5y>BCg zx0`Qk2>Q#Mgh`b#);$%%h5Z)aEkclljQ}^L?U0(3!F|i7&FH+gBwr}Cd;W1h%Q(3a zN(T?@p^4S|1V}Ad*Gr5@pTR!cSSayV-~zrpkC?dX((zsiEvz0a^zzu;O|Ru`&E3(u z$T84N2+!mUOjpepK^?yRRm&Hf7ly9pXD*vATD*%65x!k>TCUzihIwnyCKbpNbseXw z#gtc4pS=e+Tuz{abER619t=H*t<%b@FnI2g)F^M`)7{(qU~m3{H;Y`ox?Fb$l%%0h zVIVbx<1%YcC`jm|24Slm=;MfO4mwc}Ezs4Kci~)4J#`(EI<>a#>#;3b?Y=kDKh-Am zmY!jgp3p=%MVKY*)`WqB1LVr1bEUlG=>yDS0Rfct^#|2(gn(n?Q3tGKrGSl`S{!iYt{Mh1l!|3th@;cY1h7&wrEi#r~Z{>D2F2A z#_TbwInju)4^FGF_B5axDn${_u&t|A5MDp<_Ycs&KvI)@iM~B;)lJknYO}T1ZY-;2 z(-N_x$epN$P4&iz&VKL4LxxAimHL}fZdvg|m`>Eak8f_b_TA=P)ixZHCOsmGAo zYfu^QndOz$6oN$U^4D44tKPLy3X4Cg0=JNlJr6##jg*6Ban^*WsQB>BTnJk`NoOxk zfeMR?SnxQFu2k1Z@r+qGDg_i!Z<+NLL_ARuZ#}PEyES^Ls`~ff-u^t?;nt+`l$^L1 zB^Dj=d4R=@x&L?-Zt2>QQ_^_|3b5x!u=TH+vsR8K_k_4jJN)>s6-vz&LD6R;JUAhb zbj$}_iTrK{{KERp@hrj>pZVVXJQEd=^_p+DSXjO@`!!3LS)s>Ahec_UP9=UHCch&|*cOMu*O>?`_bA#zH<>8}>7l)XQ6VD-;`OVb^UgT8!&Oa9< z30An-CACvC=H|QT79=s4biCd8S;;L?{&S8EwsYk!Fu>FlgYwYb$GwF6mTa#7xJ1Fm z8uU(oy$Z{UxYP7cRFxMQh){3i>BiR=vkz^#>XrOM6n-R$6Uar~o{7G4sHTckd+)Dm zLgYVkk=s%FO!ZDRe>T!_US8edgGaI4qs^*4LIstg_+BW@(zO=)UN@P*VFZglxW^sgJRj9c}pZIj( zAzAKSV<26cLjng+`!6y6M*BS#H1{jvY_T&})M3Jinv z0~3Nxcp}tPS~(2OW_}^An=H^^xI2x+`g*#$y2iS?9w}rq1K!4XEiJ7Sh(r@U6JuY) zIz-wDU+gYUlpL2OC6k@!`c<0TIT^-xzjp}`h1Y(=SS)SLm&N_z#jsqi_Wpy>oHxU_ zwdG&JS3d=?b5Fy}Us>OnpsMC>dUt7uGu8J)d(U~YWTIl z)hiz%6<0KGb?fpTmCqm@UL}Y@$W&&MFlf6>`qU^rkGK~gJGIh%6^p?|@0d%*_V0(P z9ron6H_GCxIiBl3FIU}oBkwloK%&h+i?JB9P)yJVTEP!S^uSD2&RbgIN)##XBfxgc z#FPE^K_nf0^P}NJI;W#$7!(4XPUZg2=74A2{ju1DjEK@R`|U(yY6gqQNQLRL^A9-u zMUgVfxtHEF;WaVCcDz+n)n03k7cWsz&IFnU_1&={_WK||{<3zcgwxV|6O4Z2!An2+ z)xc)A{Ejfq$_DyR>$FZie6QJXIytG~d9rD#VH^D368ST~bBIlB?*i&2S&W*b+blbf0VlAvj?6+)7H+Pw| zQ}+7hs44$H^4>BosxRsv9b$+9$w5FsQb`HvZULnm=@10z6ltUz1?lcaX^>PxQVHos zKxz=A>+X@?bD#g)`{KU2_k2DogK*A1Yp-74wKiAfz3x= zqeZn|eb*{tgL`h#go}%g=qLjK8h=+&NZn)EJL?i3^q0-R&bIzV7AA(3Bu;@s{Kn@ zIv}9kX=tNLf0NwCtJ>@b$&2YEWezq@nW6`s?{cbc9wY7{Ji7nfv-7;-AlJ6}7d?Ou z`DDYqy{WM5@tHfZ?I`}TV>h0YIM_VRV*-W_qf~>jnxSd>9=w>M3aTC6r+-ti+<4gM zv!Y;ul!bX{shlj1OJe>7psVOc-0OsRRz7w8?23^2slt_xwxtek;O6sUT(aur$L?DD zSpj8t$ogjo7ZUb*2Gj4--zl_m5dKiT-#zt28r3_UT;lWaU}wp!`@A}-hoLu<=Q8R~ z(w3kS4D9OWYtDiMb!N5vV^s$WC^>?HFkk-3*9%{9iCh-u0=QXFVAN}0#34ul3?fl= zDbE+R?S7hWKK#CYq*&2Var8ru`Wwg176&GEy{}f&%#DwDUgSml*ze6iu5G+3%5ch) zsw#FjQ&?H+n~Dnkou+#KE#FS33V+g&v4U=NI z=d!5=;>;-gr9k=!KEk=%+A|8n1)zH*5}p|o<5>P}J27*RQJYg04HMnLfZx@#d8|Ou z0UZumd-u;xZO+a&6fqZ611Ek#_H{9|@}{C+Ms^2vi6H)I<6Z$Ya1Vh>C$xt{+N*K( z2f-i6;AFiQzZHVD2;y5d1as2-8Q>?OxkALr!*Z@9{%*h*g1Y$mE#A-de>cD<34&W5 z{pcD1GqELo!xb~Wqhysb6WKEA?E8IdtMu}B)>ZhPwDw)-#2@P451r1|AGbvJs}7Fr zJ=Y27d;KAD;Lx_70n^CXIu_fi7nJ7H3E*op(&fi~#XKj%88C;t@~I~>V>2GBHsXkK z1D{C*}58JTs_3mfb zBkzXd^HH^jqJwd25ZLDR^)*|Lqcam-i^c*Q1kzJEZIccNWkrmKTOQemL#h)}+U2o0 z^Z`7LaW*ovqO!8z_grm&zLd{a(+O1 zY#rK&K-97G5F1%eStGA*k~d>dU519aEVA;qs})@1<(qHT`s-*ZflX#A280;{f3h8& zi?4Sg`$A7nPA+U@fMcYM1;uceUCVm7|A^16qMnMNNl#jG9~0@`p8Kh&me_B;w;PlG)~(L zDrRSRyvtvqJP|%*R&w$+$E9i>bxkmVSNXgY_gye5%%;>t-WX>StT?Xvhv2{f>*?;#cVZ^cDMbQ?+Ph)xaz0 zLh=Y!C9DdMx%5um=+sMcsLgn2D+Cfcadr6@spWS^ifT{tsLzdrKAOzg@(Mg*duO)e zub7)B5fL|zv5IMegACFjeWHtH6&FUk!C{{%(=$XZg>A$dj>cu}7F`tL8yBgs0OQ`T z&t)ps7gUzd&ssjKHoNKFY{8z~K61?O67P%v4`%I0~If@v*Ik~$a$m=UH@4br?`}tQoLC%@z$BHo!bn8838T_HZn-vp! zI!Axh=Ax@GX<9U-{Wja*=UubB;$xA7zjb3#T@dG3q?ZqI{&r%M)~4gdOovJLxIa25 z^!2hA?<~i;u3&l>qxc0i2K7w!+SDei?eQB?p{Fljc5&D5_+|7B>2%Sng)9>Pi)bZl zCgj0GwZ%d;(~jRKWVO-K zz72At*A?O>1=4#TodsIO&y$X$slN|-THI`tM|9P@CX&GgE3so`$c2=q-Y9|{1hhEV zb_^StKW=@#Rqfv_PQV}N=n=TP5nqx23c)4}vz>Y~f@P2Nk|F+cs=I08Dj%Pl+~bp7 zll@7-MO#j({5vmJ>>a+?>F=d4vK+7{^*0~viJs5zWw^F@yu~N=xk>wbWe|iuXm7uW zCuZSN1S%?p50{VrDpnZC;qmH6zo%`6g@x_pW8)N?)9qb%(9&b>Bph71{oFvWL{WW> zOx8l;1Se#d0N*I;GIQGLe<^omifh|Qu0Q;9y%|ey2#xUNra2IPB8S2~gm{uUtY)4g z_1p&wKgIfqdRTNXxB+>Xm#Apwin|Fp6T)J~AA1-_upPzfnZ(P2gF+^a$g^zR^{P z>i5x`X*;Fi-@f7ox}uf8^C7a_ZzLKOKx>5V*VarTc%R_JAJeI>tC)L_vRw#V6nBV- zs*Qe9K}Um(BD{?u1hVB+t!{d`E`6umrNd%SrAN`3ZG|5K84LU^dW_Az87Uc!B0zbNwRqSGO=*iWKTv!iX4wpkCyp_SG;VV+jDW_J1)S)Qp19BQ_?NhI(UVx`lr20$h%iyI| zU<42el-)N77G z4gC=1>!MyW@e*j)lza3hTV#Y*4bwK`jMdgm7#goPGbq!kZMS(aF%v)i+x2WF@O-_R z2?1tDc~B<92x0~qMb>$?i~6wdHf>9(6;E2GZA3`p)nM1cr5Dr?6RCevfm|iTzvQrW zrkR$h`kBP+eIP#`LDe0FVV}pNRnQ<&n8-xH?G+a+s&ugjSlTK5$FYf&rml%cpI?1t z7iu#nXkH8WkN+SVM>luG_8o2}{a68^jbGI8(%$U(U3eT&->lxuo=kb{I&jZsRwou{ zm1wdohh9s&>!U@@r-a#}`!Of#YQ$Oi+KX)~fduIfsic(6sAGwG1=C<>XZlt?_t4ec`w9=GZ7Pf z0#D-T>kUb}B-xN7sUVbq@rN;}FF=Og0+YZ@*U)Y#x@2@v@n>3`7hZWaqNS5ppWO<` zlv>H)9kH)UQsCs(?Db)McVYpHHGiM+DgPui+JNRTZ-FkaVmsallVpDsP973_1+yU^ zl*WPc7I3}+1VVS-g$lX{rtK6^UTVL8))^VdxZM5{mWjnxn=JlA^d*iEjDdAX#=gk$ zJ?JA?ro_FxN~k|L{?j@Wu*E>YzLUkLADzwG!A)|Htfvr$6D8CGk$=kxOTRTNjw;5g zOJ!BQN%J4z{-(U1`P|Y0s^7f@W;4IY@5^%AqtT{r6!mR@x>4a%l2}ar@8ScF?>SN0 zbcBiGTLVHbj@g*~5^zF%R?g%S(f{)4*!~ZO|LJ)s zBs^vLq`Q6}3j#4OTJNX6Bl7u`Db|=9hm6Iu!&3zyCwy0Vm_g8(@sH3JhLBM2szTvm z2yQ?&lJ-lPeM2cKH&;c|whF_E12xTeS9{X*p^|C>558P;NZ|~s0rMBTjvbqOZ= z7;*gWcj+U`vX5EI4^0QXtK3b<_-RVhBuYRD!wr*qZm*DC?i(r46GVte7eOh8 zflXJ8vcdNycvD;%(ODe?1VPqe9a9vm7wXjRuqAlThbM7 zo}x6v{xT;%ZI=4@NY*T(3;SuoyUM(vM;GE!lh$3|)xKCavlHJ%_H4iA)ROQed=nPu zpRp40ch^)-5qSCaVImZtt}a6m6N*%KR7I=7OI8 zmmj>9yJwnq+>V#=(2Ohwo3%?C)F{%xv_9O5e$fXECKbCuv9SiKS|}%`F=TMTlL6IApzOAiG~JYVK3)>lA~l-V>% z-Au#ZTb7w>sWS8|FAdEBnvNMl0ee{Qba1lHf;vGUVge5fTj0UR>{8Tz{M?wFtAs3N z)efR&$BS(2=u~;35M{xt&bzlf9a^$tWsGuJv*`OU>w0}`hmXZPWMWMFF1prfj>ApT zKuuw8F|onSjZk4)H9MyKhr1v0H6!N4N`T&~Et|Q0n!AjJ%2i^VKgB~q`nK4uu%z|Z zsWW`0EbpH9DAAFCdoSGU_|!B*JX7(I`U8h?ebDf0yKqu-)HRstb3 zXV!Fl!X06jOal5w1tf#fW5$iKu^Z{FV=fxivfWfh#YS@TAicEZpz|7Bj_LMVXFwo_ zn+VY>>q&Jk+N&yjn?zVypRrEcX<`7v{I-gTvT~HkCusXrn>zHtqR&xroY@vG?x&iQZ-hPJjv;HmgQLXTg=r3cEhnY5y?dd!fgt&;6o6U134r zYsJ~?k35-~wUb_HZ0WSE5(ees4Pj~BD`Wp+^%4Qzz>a<4@AabK2(1oIuoDh57ebXL zl#T~iEx)$7sAvDS&P)mE7gv!?7We~s{5Rtn0+D77H%wDTqkMLbtKa5r*@Qh(KZxlns!yg@w%0UVIT=5X&qwSU{sKAoR)kC z5LsYm_FHE<^V>;@9^BpYsp1Ii29?^!?S72psJ65-vM`-dp~|+QF}E z6A%|qYCk<}j{`;CoqC(D4|kK4=rIjgvF_t7XtE|gJ$QZe(Y~OS7w~7{Rr|ILe}KO| zTX3<-(B+&r9|bQo@u}=j9qMf{*RE(u~Ox;z{}SoaF+$Lz&ComWBWDsQq*W=gjze z$jF_7PjW&|YpirxdJ-0BbA!O!yK(mXsmb%`^I4Op2J2PxtBb|_X`3Cq7#adXJ!2o+ z7Y*U+B@C|ywwoOXQ-s99pp?&SV8Xo@qPYFZLY~PMa#8N>Gbc7qHLGZpr0zmd zd;Xvm?xDL9`zHe8KzX^jYXrHu4-3LRt76YAjcjmpb8~>T%8#Q${BueO(c|4OfcR*f z{p0N?O7aEj@e?oX;!nX!1h||{;%08*&0Z=>wpN>IM`x9;2>AXrn6&FsRpsE3hU7k? z8=>0f1b5QLlS1u05qnjCZxA+;JcFrV*w{0DSB#lSbwe~QW89awV$+zq*Qv0_A~}Yu zlF;-)OXVht+~DA#ZCKwxGHHb!2@PObvec*F!}wZ+vbzmeE-ozuRw^ zJ2NB~NQ5&X`>G2y@o9JM&4|nz;Oj)=LVM!QLtXX}%LXTNb(X}n)+5HkcvuUI3>w6L z44B9`StM71@Xw3qlCkDBRw!q~F3(hKoYP^M-J@?-TYIaxLV&SaR1L&C+=JgoKRMlRx z*~{3j@hgXc6C3*2CROnUcH8g3fwk4XoDyFm<5j1zLv@Md9|Fz{%JGEAx3c@*h#)GPe~dQVyzSaQ*;ss5H1^FNO|JS= zE#0%ofkONm8xfGg5IAm~X$zwaqQ=)=ZEt@4z@f70(`*Of;KR~nLRa!!Lt^jI1-8Mrf%dLnXM9?g8_+Q%xI;NBC4 z@0l?1HwlU^-gKFvPE|GzNI*R9ypNy|lmt#zHhW0co#BV1gl8*}9BUi1?Q#E98wG!U zcAPa9HhKoSt`E_Xq6b>BL1yB~&rJCvHQuy}d+H^c?~W#V6=+ekql>a(8#60sGyTtV zdNkM`v8OdhhQY8MYseu>Jh!PV*)Ennw*9t8R^xS=qh3XMYC5@P5ElcTz>=s=#~|xD zC*2QJ&?u^XO_~>*hlw!h-!{yyq;HRI^$;hBkat1I|8bA*68NdU*O#OR+s0QylX+O{ z8|$kx5m_Z^PhCw%RWEK*>~Ao;PZA3f4PxoAj@xO+zu%3Ioxb^^w)?_k;&76YY*&qa zqanbwN#tU0ig${U0oaQSmd!TGu{ng34Kcir@hDTMl47hGq~7@0vZYWt%p=BJF(|5d zBJmHaC7g^KiX~<6LdYeTyz0=T$s+d59Ho2_>kv-zbtQacu;U+wN033_>qS&k+s9dItGy#~B2)xtc zF5CMN0dHgU1T&3URPUV_*=PhqN6Vj+)(OqQiLe%02+ygAkGzZY;@elf>S|Zx8R-hq z%n0Pe?+Bh5*?l>&;kc8*f`^Uwg#;byAK291?m#iKwfS8P&J8MtO(VLr#5C2}P-4W@&1Slc|<+Yr2ijWv&c zpb`wNos8};4Dqo@Iu*_Ppz56ooBb40%Bx&t^mJeO~Wm$WIitq8a@%joN3*B2% zsfoK6({|n-Q3)QqTMWTIM^PseUozIDq-^Ji_ZF^ z#=jw^%|A<57W?nN*axKC)D{ekzx_&k22x}ivu!}rv2n0JaN`qD<44Difj|frS43Y~ zfjnxk8Z^6-LPh55_)o*}RpA5aKYY7M_GRDs5gXJ%xt*AwtIu}6ezz|MVkU0$^Yd^J z(cPovoT4&e#X`#$=!uyi!>Pnxf(ro?=IZC?XDQzg#Z#&<1)j})mfDCZUE3Sr+I}0! zx{{iE$;duep7#~xSRC-dZTa!J*Y=R<_V!Z6X%lX6&kR@Y~X&xW}owdua6)>+uaP26-U2*{%pq87B+R9=19Qf{bhPPo%% zB|>rPlH1i!My&5Q^41)r<}XUeBpu6xTb46QevlN=IhSAGSz-(PuU_%h6^WfpF8&_; z+K>bT)9SJn!2Q_&IUs-ev-%f|5#Nw6``6fyp{%S7S$9fY^gC2qTP$a*2n&-hc+6Py z>!6x7j+sD3_vH0Ul<=nr1Z&nPd?M$_K|x&5)p11Fi~|||m*s^lbq2cswF&Ms0;l;nB46d|xMpfZ zf$V`vm(4P>hV10VmmNRyioP_A3vo;-Lqy2lM?1*@zxFIGfxGVUuOLFS>2HoV4O+CxS6SRFoCNkaO(oaD(PzEAIftky z|J;4Ze;GY8VXXvVc)T<)XQAt8J@tjuqa`e;_ql5x8y5nnNfX?{7?N|Gl^ck*{Jo#rxh8T09H5b%l$XtdXnQ8Ck*CwOSHO2_#+JoC4J9(TrG# zll&{{9hc*FSEtS(LPC>QaFDIUHB*xAx=#9b!Dz)yEgWH7p2mP3CwLyAeKOR_Wp50t}Y7$?@f@2TB zwcITW8)W+;-?UyoUN+meu(H~ihVx?!s#Q(CXP?9OaCbj0sufIr2j4(9;-E}iN$=Q( z#?dYyz670Id@q^Y)~7poygc^0-GO!(n=@ry6!HW_qyP8y|7Z65e@)c>r&wSQf`O2* zpc`D9$i9-W+O&!uYmBroX5gcIIA479r6?VvR~qD+Rh-7+Xn{tH>?;gIznKsuB($!- z$2c}Gr1%ao5qM=U#F$=INWA@WkiSA;##@kdG+5^=B^ZqL&r1r4qrKQkG3 z>1w&EJieO#%J;9kmF+f%(;-t#k~5ow`du2DfmttOF>+#K1x%iHf;z!qi7JYK!2J*w z)O+Z%Yj%W!;8hivcq9ULw23-yAKP};6vvh9#G9ksq5Qgl> z0Ul4vb1F&$%7x%VyQ`NyBQuCde)Twn!JXY zjzdfM90?}ovgHEP5c-C8^$7_9i4vq5-n9+I+8=tsl12 zUYXPC7ENxqvJx2%9JTZU4`pyf^xc*>FF5`A0iAw1O$M75?%cC6rIqttq}vWC5dCq# zYz$DX!D~{7$2Kbxcy=#*?Tzja>8!$n5U>dlXBRPvh?Wxg#kry}V-d`Q-#VJzZd8fJ z8fF&1PeF=^=mGDe5b*8}?>m#(0t(6Ze1n3t0@UcsakYUif%1NFGy;tISc%XLbSW87 zTg-X&b~otz%hNhj4j!?nldacst^8>{#dJ|AFqlu~k$Vs!J9(@rY4}tF4G?B$Jy5Rkpq!AI?&is@Z~_+Bpuzs`WqvV znOv-+nVixfjfA68%ZtWxtlDF>H?_T+)JZbOk4Q#`ZbS~?AboQXV3ja^&S5|Huxpzd ziGUH2JM0072@+C7nnbEGooQ5cQu*3|-MIf?s4( z4depqUeZ8*2}Gio*DX~=cZtXn4n*qM={P2wV&LP&0V7JZ0ZVh~zXX?ujuO2OulSuk zkAx~CJ3)CiMspblLLi;NYOI{?Uz=kT>RGi%b95a^@*7%kY`m}_r#JuDmk8-lSH5qr zWn)kU8foNR4^}a|`G|~#qtmy{iWB^QKBJpgvd8~m0U%){soZ2urU0q523Cc9B;Y5O zv^$h>xvEri{%a4%WNJf`{n8Vr0lh)Yp0*r2L0$GbBI0p3-Xt+_TrFa|I zRzM(H8qPiK!@Ny*M83~;tM}yfPo85MU0!F-s|wZ+jZ3Nxpd%`sbMFsP9@a26Hf=7| z(Rl|@D=XdZ@t)ak!`*voWl5e0E!&=WS8}sj&8I-9fjNoH1>BhPUZQ4VO6kH)Q-@-H z15vCNqJ8TUcgT~B8E>sO1A+gnPn90JvY2fGEyV@EFqn7!)QogQ1o(#@(CV18TMw4D-0Cu0$^C5 zH78v?=R!-0PxhK8qw8}vsh_#s75L%3*-Oao|xrBLHH9xa~@CsIqv_RxIe zE4!CBJ|wd%NV}Y#Mgi1Ko7Yi=HEwtFJq_~-B?-uDCO0FV3La6Cci&J`c6A*Ne1S32Cu@AwM~yy$(SL<{Q- z2r`lWzWy^6=UM>V9kG=00#iZW?ary1p3gf{vN0i|1ppeqkPQS;pI9|aFZWF+t*-pppI4|Dx2=Yn?YqL|GZe}botR@8Py%`+0iE~ znj}qxl;y)5TY}3#Qy}l4uSapI>Zp903?NB;ogJ@k!ozm(YScA+_FN{IC1#s!J3fE{ z_5JF5@H0{)sCW8U;_t8=-z+85ZQ=>L{EBN=SRP3g$8rq4(!j6rYrrL#3Q*CP7GL#Rp2L+L=3vZotDQsbwu6fX#x=NdV zy`-dIAwz=j~GH%gx-m~ZkgCl{H^q&MN1Ul92UKLnj=pHJCz}a07S^)*sGewhH}GwmieDB`7T@mi3<4Hf~Zg*5!(PGFv^^a?4pC zEjrp-N&G!`qJEVrKwCK$l>>g3YdMsv?tl=I{#Z3^8t}ilz8>%*lr_pvc}N0OZI>Yn zAMfn8Pyv{Y9}69I8OaoJ66b;c>bE;ih@oCj${s3};C8Y93cXB_ClR^8yarSAn-FQN z%{Y7WdRiFSeh<#stmsxkQmSkj-+%?HS8YVvLe@=D2ZNO?=EAKPRklZ&yFQZ`cpWKS z9GF{$Jdj2Proi!Daw052#UEObyK0lqJH2p#I@GVuy-{={O15nAGv`NX)`&9P%Nz-^ zM}0h89VS$%)a$<70=o^}DM_C{nqjmosN=P_cTkia7A3{`x`oQGPEaezKp@~(VJKR8 zHSahcU@v=i_R4E!c>62mO`2>wZ9>RzjC6p3VF5NbCD1w8m=_%)Eu>j?eZu&sTT7Co zGW$!qQ)F>G+VbS6bVPG^eq~aFCgGVU#>EdfyzfQYC7s5+@xjBr=jEaS-=EI8sBUf} zNB@B9*cB>~wyxY8Zk@KLse?j77JfMhsP&T+U6v#WG%*|L5G*+RKtGr%Dy#iiFRaNL=e0TmJel8U+O(JG&?$aJO#%mZL~IPDDrAiN7D zx5v5v@U^(Bz^>G%t*~M*>V-_z4dS)1W(;nqtc8?md?Nro#*?Q0N2o&fYC32v;!(OV z)y70Ot0C9R*^j7B44gz`zOzq(gZ=T#*S`%9b?C%zx_<@ldGtIFSqyedi@l?Ti)dMP zbMZ3wDN3nE%FU2o%OVO5y%Q9SrTJj8=I>SyU}SmKp-wv;2@Pn|@~~ayTw*2l)9h-= ze}f~yL?XJ~uN+lI!dejA&uz`)qB}BNeQ+`^5k_q<7oUPx{Chhvv%2S?4-E|NN{7Xv zfSRCn5?Mn>vs5r{h|6QW4H)s%E2uAEl3a(=8OtvN zk~_jeOxc?zatNdQJ}!fDzC_g|ceG)3iHTw;Vf3UI1?}_1#(xX!W$LQ@Iqm#-r_5TKpfb1Hxv(xT@%@0m#+S<5p`w8bU!S*M^ zzwJ>`#$aY{HDhnKLVWU%K3j@}x&T$+D(jN=B;oHBTcrmX_E%SuYU@Qb32oO5PhB@7 zM;1j7_?xhJwAaOrAJL+iout*deBbp-s`r(|oxpDQ&3>!J9}q2l92pYr9xZ**z8eTu zDcb0Kx^*6X2tXzw`--$2*K!icOd;z{x;pJD!IrQvUXzKrt-IuQ@_91+;nE)R2mg>uD zQIKkS4uuA{l6B8L_@2?N)$0)T=`J^_l2bMDUl#mWN|4WrQR6Jozn`w_#`dzhR0KNY zqP|HGZl8F!>WH+@Hm`-XL|H zgm1|Nl@IjJmo0#e(~h3e#2t10U@~z^Bb@|Ii$SB(PAoDZ8&g{&P*`hx$VBkMi60x1 z462*EC0!~6MDjJyi3JOVWP%p)sXiJ1QEiU%!ZODs_~7OdUfyyaH#!^nh^4zdc=;Ag zsNmjP7kW$>Y1T2pN_FQmLd{ygLKdMLU}x%s<>}@vr%v9~yMd*%6(6qOr$zB9Ry|kS z%o)1{0e0T+u}5{Kb$Uv;;4t+8<7SN{EHiWNN12-D9a`Ap!a#ST+E+gDik7O z>9Bl9{?LRrifY$1uA1sJbckXiK(T2;Nwm{v-^QaQCjeSAYAoP2KH9d268{HO!Z|Do z&9?4#kag1KmkKcN9NeGNomP_|h11BGSqkvMNSQ{1DfXJ@ZTSiz&GW(_9@2BbfWY6q zp+0}CsVW9=4bKLicngMt`@`LAQvw@%H5Mq>7QYH15q~_A0bPbT$O$xH4$G*{+<5KK z6L|#4dMhO-^hJMI5S6VGNZlinHRR3AM{KN65{{@Ua;$tL+-3ODG-UY@?VgFNhp)0J|&cL;_De4<28fSue!PR0X8 z^=HPa{v`2#T!SA!(-yXl2&_$t&BIoono-L}Y?x_zib%)hhC$7RV7Ll32s=?mNXJ`H zhE|bjgU7c9smz0pf(e)z924*mA&Y>85H+VegbeLgFVc^*eHR2why|ysD{hr3V(Ni&=D;^mM)48^NzrvypRcvDB#0-My=h3&JRI|C~312-v&I zGF2glQjIpwF)kIz0^`_JfP2k^$qq=w)aW@NU%|g5ygsccPlD|W%Y_n-CG9Z@R8LUk z9o^lxW7$)J`7QQ3{lD5~S!KQP$OX9=48IQp95u655~_cqCeFE%mOxAzYi3V8Vgew3 z#(NvX$>{6azZlYy3Hm#6yB)Hg9|^=!xZds#p&r*87q9n2AK+m)P;OG+p3u>&%FSli zsgO$gZ*0GtJNm+D$phVGal-+;;M8c>D4O&&EuFAAOzS>Z2+bt8Ww-*Fu!KJr*}s+; zFkR2q5Jgmt9X`0p#`SpHuSfv2F-hD5An%8#^mNpbeJ+H8@c7XB9C3B1^cof*GPE$R zgS_{ibGc*h!#q&*xV-?Dfo{S2`+bAD;zjKYjF8>7Pn9#XW20h>JoK2unoQ_qMS!h= zj6YO&huf-{YV<^aW2gSROz8fD&Vy1kb!p&Db#MYzGi_kdC3u< z(4lc8tj3|YihD0??lW^@BZK^7fc9?1Chm3Oe7x-t=4=c0!wJAZLg@~cw4_^9X_YP}Wfl2t@eH5Kbuo5sVk#sz9`YE5jI?b<7SH!ah zr{vT&01AJQjKdF_qZrCG*FP-;ISJt0bO-y7OOs3SST$p!K(79S_EUdq0p}t{mDJBh zQFibEE|k@3-2Tr%3vV)U=0BCyXQ;~&m^2IzZLzZP~Uw!Z)DQff3%mQ!Yx_DSuB14|< z0RSaR!Z2^A@X{3~L2kMRDeU8MUkXw!Jp8n9V{P;n=8@&!B8jAnzV0Itbi-#(&+Nlw zeKDj@(3w)d%m3tOei(fBuXJ_swmidJM_X(W5ZO1%5dZL3_4j(2tyIR;Xwbld6H)?E zfjJ~ZotB;I5&QZDX$u?DgJ(}an(-J}TRsFj=H9 zI;pNG3#I2b#uXRGZReN$ESR6hG5GRc&C`OeSB#LTlzWn@DyKY8@Gagd<0an(LBsro zmH?xYs>!>JvivH;KJ$dKd*DGGkK$JzLd{y8J+&Jm<_byje8h3OO}M2yYaP3n66ksb zHn2cPl~Vi&p-)-u6v8rt=w4l&2gMr)jh1vcUlSX8Lu*fyRYU93FlDCRXpDq|T zLh?;WSCdk3c3aw`5EFFV?j_5z_zem;y@t3>{vlQ_fVbK6NXI*v9ou=8XwpeosAj~9 za9G(qU;_}eaWH^|^)ZS_6k?+Oj@{}k0EiF}qADPoYIc}Z`~HVbr8QAos>uE_J>U0l zBN(qQCxBy%zVx-+&_ilW#1xH8uGMm>^OJY^CZ|5tSLqPQ7~Dg-?suzzzi;6WI$2-8 z=syDoZW5#x*sN=>;J1E{XuMV){Ox(-(+7sMeWes2clEu2WkL1SHI-JxbmC(+VUwgv z7(;)()-Tuh^1b_@hZeNpgm2HtG@x`yflMc9Jk^&_&#d6;{MAmlfG5FXS;22j)jspY zY(Q+t()}KM22NK!rZrjPz&7*F>NgA4srxcHk)75AbPAS#N}Zb(7`}fts!yv3*-MH%|TYNms9m7UoFTG9Cf)#>keHb+5XyeeXH| z#xO82vS=|C%*9dGee;&ixu)*Cm<1%1YNk`ux!?+RB6zMb0GxmNjDQXVR-fie)2m-= zoqa^2^ooad3dVk0G{_2qdPV*@6A^>UFC^uNbY3!B5EgC%)i+q%mV}ttJN)!0nWYEM zgv@8M^d1E<)`thd_%N^X(SJ^bqlU3$832X^Z<)F4zo!S~f?;@|=4lQhWZTS5^63M1 z5?PrKq)D#$v#Pkt^v;76nfQ#>nK-Q4iDiG#d{|up$S|Eap#ZvzfRyQ@m_z}kHRtt6 zXCV}27&r${6y*ZC+S_|%7^q0~T;WVRANv!*Oee_FLp#Dav$(#|l}`YC@9^}=^Jt0) zQ)SRL)V!}Kyu4n=>-vZa;IpEb3F8wBzzp}8t_8Bu%?mvMR|0qu3()6kiZx#MX!ZmW zXJt3rD~=KqK_T3M5TP4)BJ{9L7Sa@v0hnnSXeCZ~i$8dv|J(6kEzwId=n8c+_Dfg} zKr-c3-jHewP#mV7ywzM5b}nxbc|%v2 z=C6c82RQ7+S$=Rpr@3u9ABOMu~Mri`ej-q2{P(8gs-&;UI&VZwo3us&f z7%rB?{-rOBF}10&-u7ci1Yxa2(^~q1|pRUVc=5o#0=yashfb>PME`Wyu zz;jn>=c{$swGCs?)!?>N{bAXovmY6~ZDJ>Np%$or8&~R>L54FRe_m)wQcMy|2DutX zv{;X0wI9mo?^&fRBvr+lU-|=GWfr5s+M{{vni4P*=&+DzIDHjDqG8P%H7eGXNT&hR zQ5hce4Dj2ds_pfWmb+x(s^Wf&rJ2ba143LVM5ND0UOiDMzlEZvd;+M{O(_dTcsazx z?3gE3Pc00Ez;x%~QiqpqRQMZX`(`6QM^ze2kr3V!?49;pBzq7%GYpc8SOIUitrMe7 ztrgAMXP6#d%>GFQliJT=H~a{?4Di}CQWq`D#aEqN50EhSM+h+>?uxxPB{|Co}(zo@g{#6&~qH=5!xU{XNq10fjlNxkt+cqDko2z=7Rb;H5O+v9aXpj!v|;+TV+NGvh~H!>mGJArNU`;ae)pDWk2^ ze7AH_IHe5t#VEW*3a{Sebh{1+ByrH^ABe(y*txC8UYf8g9VUqA;uj%jIjP+ zdaS}vzXlm^vAf1Y{3Ssd1>l)C+H_iU{KP<2D*&_|ai@h|ou|OUpDMIm=4t%2z=4YA~K) z0s!B&HiD9!?lH0`VnUSF%6yG+;m<$=7Kl5562UnV%=5i^B!%k#UUB1&5dSZYRC|;Z zH#pYxWVAXw{;lZ~0^pQ%aNf(StPHKVh4pj@JF&_w>U)6#_u;hN92o$03HksfV;&Y?;AVm|Q0D(F8c3^XyAmAaJhLbJ4cQL%5xh}5W6VtioGu%VG!h@k;P}#GG0t+R7P;FPfN+fe6x$} zY<=PkpXvqcP{bt_$#uVCvGB@L-4*~cE5hnaCJU2TaQQdC|6JO){Rz$@02Mi7V5CYA z6PaZc9tZ#`_H?)k2L`e4*TnV?pQ8*Qrk|#Pm)7JU8Q0brWJ`+hBzfO-e+1>o$WJ<>tikMfGCP~hul)Fz-d!9nsmW3fjCvnK=&bVi zaR&P8nx?H5s?%jq#~+ zFPy1As=I#&766J4YX?Iw_O$Vk;(%nBOPm>nEJeQoP~@OQoEueCq)j$}y#O>sR-rxS zp+KqV85umTBO%q%##Tf&foNnvV=x>pp_1=FUi9F6OTBr%Wrt7G9KbR_?SHikw^kx1 zM+hffEb|Kf%e zC0ouD4-Zc9=v=D2e^W7i1llp0jpj#P6-6|vQ~=y|XJ#8Y6dhb2k#f^4DIl<_1lSuq z2;jH5_;(A-VoN`P7x?w*kMl-?`)D4>wO{0PnHaa6fj*=ZfZ<1uacLA8@mBT(trG>o z1^M=v@W5UP63r}%=feHKfdHb9e0S2ET;M4HOT2=LYBymQbX4(cHGh7QBWI$dl+ z-e#MzebYeHWTvBY4XdxRx4t|iT#9=k#W;v<^oXck#vX4USQ$~DS*BNO(biyEvAHdE6STZ9zQ()Xr1ZwJ$UW+Y7ug~ zK#y~{y$aY6fC&Mr_ycIP$bu6IsP$sd2GEvJw2!R7fMJ;^`M=t``e-P(H9m9-lQWDt zqI@KCMW|$4$=B4FA~7k3?#UIA>GX+-l8+{ZL5)L+iHH;O`WPyo$yY;S4sMcegpiMh zB*V~P&VFb3=dN}Cxc8jaS>1K_TC>)EXWn=3=iU4FJbTaHzx_O^7%;g4v)@L+{+`6* zgyMVDIx1*$kX)^-MYym!ke?Mxj+wku8_=GCGy)R_P;(|Ar~q0NFdn#DuiC}Ta%Ni4 zO=G%)%La+;A&&iz_utOnGF|g1p8@~`DOYanJj=?YcE*9H>baU zJ3f<;xGdusJ|h>xD*1(R-DWsSV-TzPprZRmDWIBnjW%{AbEVF&1N8e$@K**bd%V%8 zi!wDPsq?O@Ww*#Yu5!tZY20<2O2}@H<0f;9QZQjG3-*rmd~YvYC_I&=-4HLsDF1le z`6r7Xl-rQVFklv50KUf1%pR}?S9D=s zQH!}3ovRIoC+GTFn*$~eAx;UDxg7(^OMq&FP$%#eK(P_f|yZ< z6@b6I%=@1ATFP7F2}A{CLcV^!O=%Fk$N*$kXqBnu5@u5znEA5pnSDRQjyFK~H1Vl- z1H)q5OPzpjcVbIs?E_qNYE5k=aN1miyUvn*-usaeuUYW={ z8%9E7UOG!iRP3{J&>?z_>7$*&0T_aRe`(H$0XS^{=)__pA-9H;?W|{03K()ZI!4}r zrnn{!<19tddlVgPc23`}JqH={a?`4zXFHYRPft^*P07A_jO(6t*!Eu}*ZM0eOS@r@ zULkr0WX)K>{sIsPX)*c4{ZyFZJxHT$t?rur85$SXAf?(YG_0M_#yY5i$mlCw7+rbw zTw2Yx@S0#p!LS1n1(1S9%GzlGuz4-7wcqfkIlrRtHGS1a;}Z zW`A)C)j^)VN?5_gu96~=-)+^APGelk-F5~t3Cdd>gCyT&eR;U;=x`LqU%$|_vWAc@ zao}?Nc$+WUtbyHUx3EJo&aon9(8T~E-oY44Ez6&@G>hA~r83&&0tj+4fVA4B(|1iV zeMVYQy7xeM-E>m_C%{!5RrO#wC`DVINZIRJHbPWW`7RQvtE#*DYKu&A1Y^ZSnCDed zH@X{%IW1lU#q&Dz1j24Nh7?aG+-d~XCl4R92h{k$%)ISzW;xl(i92WM`u8xie+Q}i z&w11kct}f|2J65GL^w?GZuoR;(HcEBp-6g^pd*%(i#LIX1*labEl6IfIvS6`$MSJ=9(&aV zkZAC|nVA{x8oWxeoW35r{$DzRBWC67ylfscq6EK)3vf(^Pvh}-ZBUDDtftS+3O8#8 zId-cLK5H(MYnjiU`S@{Tf&S{6kshqJgPNpTt#l;K>aLYeq&tE3oEAS6@)9LMxJy9f zVD1uLPx&2w;5}QORDr+1(%P`k=PLh-KeuewM0*NPo}QR%@Yu+*z|?VfAKq$-Va#4E z!q@R>v0i7ZTiIj$IXrW&75dVtcXEt>$v#!aoKnI^z*=DNBH;0pkO}_l@c;^S-Xhri z@!OA+30kp>r+JS+n7sLmr{yj${oh5WFSoyurl{w@C2fyb^FX$ZF52aR?^%Zy@p(XQCzc>GCLnv9 zD4J-)zh9Sz{Zhf{-xO-O8HBY!7*QYd5P7fo^w-s&rX$xfK}xGXoQu z(@&qJF#}6@U>l`I;5D?@+f7(FT>YHW%<0y*P_ghLZ?+JPUid~1m;Sn{aPTB~+*=h` zQImL1_a62!qPgNqRn7i=q{`ui)Rj_Cx_{o)h-=RWwD^^t>c;5XkUKQ0}zC- zq?ckV?u|o<&2ii+dvky-pJ30KNfkjz))4IO4?#<4i-NhbgOTvht^J>5=qZkNY7jaUm;f@FwyK(jasKA=nu&~IFdX~OJ1m~Ix*K3EUbb9VuZ%{qsqrHA~RGmcbtkT(@f|fR~Wl zg4-Ious9UCA1^Bi=|mZbD+71eNE%L^qpw7fi;rAX{~{I;e7zGZJS5rqHx6x4bF{k2Pn$0&%UP8)!MYEslmIYSbIg~{pm8uBNa z?_t{$T~U{MC`0dGOaV|$++cQbDr4ya50)^Ad7c?q#XH6OnOw}1g*#g|!tuK^;FJ0= z7h>TU0Ntw3$QNR>CoeRw69W6xWhuXN`}JCPEc=JYcP`F{G-XJF&OkOyD-T zhz?QZHSeqb8xIFU57DN9Emuqt+1o+|X5c#Xd2Hf(=tF6gH5efnIyh^gPYLAr=BE)( zlxX6_En*(u3}Va_a?B53Ts504Rx;Ez1h(@n zX-S3olb*sO2z@R@=R>#7n zI(nxP*`;)o^(0J?a)DNvf{cNbNVDO_HR}HWIzW{Eli}yv!RNOl>Hjt9e_NB{gpw^+ VT?j%#*c7a9dv}wF1qAo7-vFpbPeK3y literal 0 HcmV?d00001 diff --git a/docs/html/index.html b/docs/html/index.html index c4f777e4..083bd90a 100644 --- a/docs/html/index.html +++ b/docs/html/index.html @@ -10,7 +10,7 @@ -

PSBLAS
User’s and Reference Guide
Salvatore Filippone
Alfredo Buttari
Software version: 3.8.0
May 1st, 2022 +class="newline" />Software version: 3.9.0
Aug 1st, 2024 @@ -29,219 +29,39 @@ class="newline" />May 1st, 2022

- Contents -
1  1 Introduction -
2  2 General overview -
 2.1 Basic Nomenclature -
 2.2 Library contents -
 2.3 Application structure -
 2.4 Programming model -
3 Data Structures and Classes -
 3.1 Descriptor data structure -
 3.2 Sparse Matrix class -
 3.3 Dense Vector Data Structure -
 3.4 Preconditioner data structure -
 3.5 Heap data structure -
4 Computational routines -
 4.1 psb_geaxpby — General Dense Matrix Sum -
 4.2 psb_gedot — Dot Product -
 4.3 psb_gedots — Generalized Dot Product -
 4.4 psb_normi — Infinity-Norm of Vector -
 4.5 psb_geamaxs — Generalized Infinity Norm -
 4.6 psb_norm1 — 1-Norm of Vector -
 4.7 psb_geasums — Generalized 1-Norm of Vector -
 4.8 psb_norm2 — 2-Norm of Vector -
 4.9 psb_genrm2s — Generalized 2-Norm of Vector -
 4.10 psb_norm1 — 1-Norm of Sparse Matrix -
 4.11 psb_normi — Infinity Norm of Sparse Matrix -
 4.12 psb_spmm — Sparse Matrix by Dense Matrix Product -
 4.13 psb_spsm — Triangular System Solve -
 4.14 psb_gemlt — Entrywise Product -
 4.15 psb_gediv — Entrywise Division -
 4.16 psb_geinv — Entrywise Inversion -
5 Communication routines -
 5.1 psb_halo — Halo Data Communication -
 5.2 psb_ovrl — Overlap Update -
 5.3 psb_gather — Gather Global Dense Matrix -
 5.4 psb_scatter — Scatter Global Dense Matrix -
6 Data management routines -
 6.1 psb_cdall — Allocates a communication descriptor -
 6.2 psb_cdins — Communication descriptor insert routine -
 6.3 psb_cdasb — Communication descriptor assembly routine -
 6.4 psb_cdcpy — Copies a communication descriptor -
 6.5 psb_cdfree — Frees a communication descriptor -
 6.6 psb_cdbldext — Build an extended communication descriptor -
 6.7 psb_spall — Allocates a sparse matrix -
 6.8 psb_spins — Insert a set of coefficients into a sparse matrix -
 6.9 psb_spasb — Sparse matrix assembly routine -
 6.10 psb_spfree — Frees a sparse matrix -
 6.11 psb_sprn — Reinit sparse matrix structure for psblas routines. -
 6.12 psb_geall — Allocates a dense matrix - +
 3 Data Structures and Classes +
 4 Computational routines +
 5 Communication routines +
 6 Data management routines +
 7 Parallel environment routines +
 8 Error handling +
 9 Utilities +
 10 Preconditioner routines +
 11 Iterative Methods +
 12 Extensions +
 13 CUDA Environment Routines +
 References +
- -
 6.13 psb_geins — Dense matrix insertion routine -
 6.14 psb_geasb — Assembly a dense matrix -
 6.15 psb_gefree — Frees a dense matrix -
 6.16 psb_gelp — Applies a left permutation to a dense matrix -
 6.17 psb_glob_to_loc — Global to local indices convertion -
 6.18 psb_loc_to_glob — Local to global indices conversion -
 6.19 psb_is_owned — -
 6.20 psb_owned_index — -
 6.21 psb_is_local — -
 6.22 psb_local_index — -
 6.23 psb_get_boundary — Extract list of boundary elements -
 6.24 psb_get_overlap — Extract list of overlap elements -
 6.25 psb_sp_getrow — Extract row(s) from a sparse matrix -
 6.26 psb_sizeof — Memory occupation -
 6.27 Sorting utilities — -
7 Parallel environment routines -
 7.1 psb_init — Initializes PSBLAS parallel environment -
 7.2 psb_info — Return information about PSBLAS parallel environment -
 7.3 psb_exit — Exit from PSBLAS parallel environment -
 7.4 psb_get_mpi_comm — Get the MPI communicator -
 7.5 psb_get_mpi_rank — Get the MPI rank -
 7.6 psb_wtime — Wall clock timing -
 7.7 psb_barrier — Sinchronization point parallel environment -
 7.8 psb_abort — Abort a computation -
 7.9 psb_bcast — Broadcast data -
 7.10 psb_sum — Global sum -
 7.11 psb_max — Global maximum -
 7.12 psb_min — Global minimum -
 7.13 psb_amx — Global maximum absolute value -
 7.14 psb_amn — Global minimum absolute value -
 7.15 psb_nrm2 — Global 2-norm reduction -
 7.16 psb_snd — Send data -
 7.17 psb_rcv — Receive data -
8 Error handling -
 8.1 psb_errpush — Pushes an error code onto the error stack -
 8.2 psb_error — Prints the error stack content and aborts execution -
 8.3 psb_set_errverbosity — Sets the verbosity of error messages -
 8.4 psb_set_erraction — Set the type of action to be taken upon error condition -
9 Utilities -
 9.1 hb_read — Read a sparse matrix from a file in the Harwell–Boeing format -
 9.2 hb_write — Write a sparse matrix to a file in the Harwell–Boeing format -
 9.3 mm_mat_read — Read a sparse matrix from a file in the MatrixMarket format -
 9.4 mm_array_read — Read a dense array from a file in the MatrixMarket format -
 9.5 mm_mat_write — Write a sparse matrix to a file in the MatrixMarket format -
 9.6 mm_array_write — Write a dense array from a file in the MatrixMarket format -
10 Preconditioner routines -
 10.1 init — Initialize a preconditioner -
 10.2 build — Builds a preconditioner -
 10.3 apply — Preconditioner application routine -
 10.4 descr — Prints a description of current preconditioner - - -
 10.5 clone — clone current preconditioner -
 10.6 free — Free a preconditioner -
11 Iterative Methods -
 11.1 psb_krylov — Krylov Methods Driver Routine -
References - diff --git a/docs/html/mat.png b/docs/html/mat.png new file mode 100644 index 0000000000000000000000000000000000000000..d4f5c6f97763e00ac51e3febf2db6a9e5112863b GIT binary patch literal 92468 zcmYgY2RxPS`+q1|uTVy>O=*}7JM)m4j2t@04%vHeg`AvlD7=wTna4i%PLlPa%#N8+ zHXVEW-zW9{{!bqtdOYVI*L{ueb$zeLGwpjSG*m295CqYvsVeD05IHvlopGW#1MW1gsof#e&`o9P-Il2rrBkzO<(G_ias{u(~$_ zoo6pj`6?|9$S|EeY;%r#hQ<=%bt&ZZuNxt5ur~V++>!)O>Y&Wo-_KyT7r}s*h%FJG z=mie)le@L?R$xwF{cVY1b>5k$r!gnD596#5CfgT=)hVhU_dkI{l)r9&zJ*Bm{-JI0 zJghL*3a_9fo$Jqb^89BeSQ{fwU<8e__KZ38>t2Xo?$bIB?io8Ro`;Pq3MbEL%PGwK z2*YTAwOVqjp4_XF<+gk#(7CmI>SPxASi80^3hquBG^)Oa{AAzq>l8dg=V44>^_O_g z{kjxlTU$6=m1jsm`6O-%y*zH=b_I5c_+is%rxnvU*x>xB}^~2V4vh3Clw= zPcP30{_jJAK008Oj3Mp}zTexc;I-1)kO|ZKGY^wYkw2vQWrR+b@zRL^MzUDe4C`tB z8@TusmV2KChJA8!m}Uk*utolVc`jQ0Zyt6(FweqZ?f*upO|Zh7j9KK|Uxlf{e;=dX zx4s&3?QD~9%%4q|ZTm68ls|^n{62qqIZH$W*hJ07<*>Vf-r>JLSZq;G$$cNfFVe`L zh%rasG>?0q0X9s1A{~hDGn!79U;ktWIroHNswBU+<2i4kG1w4>QSqcb`7N~fcAeWY z!D^r0ReM|F7)&0; z_HkP(u?@(ai~y<1=K4eMc|VF=M*fpqdikZ?mVDM@MSl+Z`cPs*e#-wO`&qu;rkyYB zM6OT;_62cSv}F&;etkau8BU;ck)ye=-gF!Cnkil(aS3d0%GufWx{;zOWXE!rMvU$G z*{srb-?8bF2|KKO$LrHYJ8@bU9~QlBPWkQ|)cDq-eB9-_6bcopms^u|lsXvxiFw|F zLn=k(Bi=)-I^V2K&q3nCa<*5;dT=v2bz{O|t!xacGyFK(qv5rb%CCW|a*Yl6fB2>@ zugwk>xN}?Dwe018P^97>x^PyRRyU+mUs2d?-y&XI6J_g!`|p zh1lo5wX#bLZ4-Km>Cyd1hmqT|$>n;Ifj9>f->}zw0JlSLR_SD~B<{jJ1J`Jye?u4N z9o&#j1PBT_tGb>Tin^fmF^xZ9mzHO$SDFDzPY%OW_T|#u?8MchcFLxjskmQ=+_-&@ z(^D!2)!K)j$^YT1{ZQgUS5GV!c{#Bfg;MFJhaj^ruq$c1UUWnA{cbutpJU0ObYO^; zedTm(S9wzP10HbCRGu%tLhtI)WoFxuI5Aa(-eS5Ov`@{MEQLm$SGa%cFY`bEuc;LT06rRnmazcW_Zp~L|HGyGxY|ZT^qj@7=ZgR!dIj{_LHsp?kp+- z7|LgQ@vpF%I?I^WMo#);^U3`c=B;Sys}AfX(cQoD|mCet0QTXE27Y{MX0v{XusMcvu5@$AccG0be9#QTYgKAq(&1c#@#C(k= zi^aq^Vup;Xw4$SJ+a~1(7DJOAHm73QMB&N zVQ57TU)b~LGqE7ly=N-0FKrXdkhzswM3MN3o&Xl=tGAtDE0z3O)A~gW&y)_b09nRN z-=ka3VHh`@8ibo-lQfi2u`~Xj+#S15PjG3MJV);;&GtMs(~J>Cv_r93EEv`JNHMTn+(Rw8y5~O_2q{DS|&G=h! z8}U*5^<10^I{zf|?YzTHlYf@teQ$!%sa`obOI$d7huSesxVs|UE9utwc(%>f@9Iwp znUJe~bo9q=;MY+sPl1aamB$7diA6+ttOnzXRo6Tc(YD#Ze^6bLySHqZ7#{1-q(};V z=1rjhN>SS#L~YdkVWwbdQ$}-A*J8x_uJwz~e%ILth-nB;F`MpRtKkCYDbXF7p! zU0?LX?rcd*A_&*N^(cLmV^AW6aCJ&M;U(QtyJKx2N!F4(Q(8ypsUGKts zK@EPP&aJmq&rvG5HN|iySQZXjXBBLcp!#Cciex4@5ld~z3-6XSI;TJJ40WvYfChrP z;(iqMBnoIZ++`2o5OefL?RcGCli-n6XH2@|e+?y#HMfBv7kgt$?t;HY-!kB&`E8q| z((2+hqmkUBDlq=!IDX14rR-)!{#X}o=v^g~ zJ_~acP-GveYcv` zK=Q{RkiYV;*N?#1%Bfbo%dc5>0CtEq7l>DPe8ia$Ft3_9aywcB$(|yddeM=ZNBl3Y zSz3rAK5QVOxv9;Q0)qH}2}URUYpPXqPQJU~pNOUG`s*anT$a`%$NP#hDS)%8*>mGH zN@y0p$C(wr(5=O{@|x<-Zv|#LxV|;@i!S3ujf#Z4))0*CfcY|(f^|L?@irbz$#jq% z^0Vd86%&_JWqboX?E{$VLp9WzL_y6`RGD+zoyk7Fu?BcWug`0&YLSf}o$narCRC6w zv=ZZo(YFBFRSOsMmNCVpm)+`pi^q}0wr)k7hSJ-Stgx)qDyItPYh(^BnB}*f`|^dZ zx@*-qs8C#&(GrpAfVbaF$vP7&PQ%440pfr9!{(+rh4#_Sn>;Jq<|K|u?-INE;J{CK zXd7&WJ2(Om=w{#X;l;fo9%Ek3rO0@v2$>=eei;?9S99Wms8JXsX&P=cYX7Klb`}V1 z35oc+RZL4VGYTAPhCeMN@9go=v`vhtlFrw{FSoRJAnYXZ)_h}ey!+uMdAb$X6lTOo z+sLdG0)=}(9RFG_;yorDuqPl$x5T>Ne+v4{;&_&uvtK6U>{Ua-YFx`B&WvXct~YBZ z`UWzsnug1JVrxR%1j#Y6TOS0@lLGsbzy*njZwQj7&d`V0IMeFeQ{_A@6cygTDcx#q)&s- zBSvMfYl5kQc|R9D$BC*le*N2t6RA-s&A;MI-p!>z%+w>5@3MP7?e%%w+bFK zOuGmti>>{Oi`fVfnDW68g1&93Gb?A2Jq|P&j2<*P4TUJ%+Ed^&)Y~d5LasGD%?1Gm zmsKJHM_AdUUG{8|TimUImorGi`@%JD8uWKAuwAPu03_Cr}#&=#hNgI*YAlnCW~b}gNc2zKLrajA>tuz z6124efAYImF%aaSlRAqSv#=>049C!5m1>`(_I*<;T}d+-7k>a%2}B@}UeOMLJ)a%P zW8r5o)E!$7=kJf0fMb?ITMvO~ojs5L$isaJdY>@xw^BtQ(3GbbP0A3Ca*>edacb>@ z!wz2Bh}yBN)04G&(keo_R&#@n{HXkEAs^y?=WE|?&K^5Pf-z^+fwmETAy^!2;1=Hn@Hyozw%+P_SD4q*Xu(xJgn#B zmDNVZFAF395M04GGA>Vn;DJXlnX-;3LXd}Q@^-j`Aq75cARq=f8bM=Un(R`wWW@nK zhP=@v7mgL(0+Qh@1I$^r^u*P-#1air#63@VoE zUazx1pJoEz9s3+VWFv85(^1!8-nUVKZZ}v=AY@||m~BP2#P;g2bihcVfPe{(ks5;Z zu#Vi8DdJxe*>j#ej~gofsvMWsVe(E$Ypw%p#hyH{hM^fW2U8h;I|dR^-ss%(X?qoTK;-?+GLLrg;Ijz&RVv^WKisVG-SKL<$3D$LYWVsV%#Z-fX9r2Ph)2hdArp7eM{ z*|o5Icg6`@^6e*Knxf{6Yr(iFzs*qTp5u3st@r!GhQP*Z-q&w-F)+sr)?TFV=SMn92VIO4ad$q-0q7n4?t?j_ul7m8O+Cy28!V4pz1g!aRQ_<_ zZMY|kd$a?ceD|2GJRKs#kA;{!rN6^EwGS&75E0c#UI|N{28rd%(-+0^OjkSXMN%|t zjM)9HXxk>u^mcu5JRt`+d5#w2WOx?;;_2+B2g*Xc3}bOPecL+}h;@@Q*`gf&%!PW@ zH019FQ>sMm?`E1A6i{)M3hZkDBX5jCH4YSW{mO|$IC864*7~So${8Igxfij6w|mwP z)Zq_0;}3lu9yB5eoAbkxI(l2Pc>*~m%<}h+<|Yoz0}O3 zWZnYDU2d-dGTX(imzqj?utNQSKq+HN$rR0wqk#C+1IcGF?le?9i#5t5kbJfPe(%Ev z{AQ$k+`S${5W6EP!Z4ni5;cR7o_JZ$%N`mT`sFP{*|hxX=<|aq5e`E%T(d0al$d(x z&j$g+=b*MW0RG3yeXr2_C;0gDNJCtO`m|zO*_TJ`aAlw@wJetyu3V(GV^Lv!SuYky z!Hv#~@%D=*2GYD+C~!`+Wfo*mNCDrtmZ3x~5}`UWF#9(I)hAdX@@m#ZWyA7a<=VR8 z3PM-!&21&-zs?B2jeK{vSW!nMe5fd>b%Z1hEa(tk(G(y{`t~D|BuTw(pzvR{gtBsw zG#;9OP_t;GNP$q1@QR)FO?3X+2@@H>e2*;ZoR5<^Uz@eWhTm&!aU{dIi;~5*xX%Qr zjaY#E^H$6Ld}tZ`%y(ot3kQ^rF0&N85+*P-uU-k#U6Y%+d3jT1C3~ZfZjlw-b{we% zfu^v73t;!<#JfBGu0%(6Hh$UKW^&2RfT?Ft1k}-dBIt0VRZlJS3nZ>dsy?Qg;HQ4H z&v1P?-*L5&w5^&dv<*kjefa>ORO&u_#!~9=2mHA}6KQJbvj~>E$j*b}mDtz(n(rOJ z4F=j|+E~r|wWv8w3_Gl3fB{amRYyK)n_6^z#TP{ag^XP5R{7VUb2E?taMLQlO-Th* zEbOpBJu*Bxd9w^xR#iEvquFfx8l*XTy^l%_7@ahRw>ZWg2`6B|T8z)c0vJ&|!!mFZ z^X586rQwH!!Uo|~GHC7{5iVtgVGK5T!tO>sZV#fGKuHMg8l`ndRZmIcDrd?Oc?*!6 zQ-p#gevu0lOhT4F$9YDT87mht_kfJW0aVzGfB24P>qlDon{rHK^%hVk=qaRbmUaD5 zL$oY;0t^5a0b-0TnHPbi4OBknEwll~Hxvh@6b+^)*suTt{G$0iQb_yaKCD55Vzxgi$gH-5iArvs5a6kUA1yba&I6Xv@aZXLD>Pr3YA8}+CTjc8 zzLb|=?cFMnG?v?9OtQBOj6e-Ul(tV`iU7VXK+d!tZk-^cP7OmBHCau}7DAAw&K-?G zfzCB!P7790Zv;-Eu4urilW%N+;>qGhnI-SMS`QHfEYl!HnQy#p0(;a91^#IxBZJB< z|2qI+OV3wi4C2F!r^`YZ0BBOA*sULjD|8p+c=(SeJ~O6DdF5c40M;*!y^YyHoIDbpMEI_N#;PLr8V;291%)WVthiQ#@9PxLLXh9D7q5{|{>*?6z z?{WI|v%eNs4;GgiA5d5deyyX#Q2DV+l^QrU8w?-7yP?WYG02j^i_7-gIWrn6xyH8H z8B{&iR+GJPLvOZwC3TtV*eh>@yf!?qbhOPolwtJ-6hewm*^-!*P`87!7oVZtup~dz zXXFJ+=<|J0!1jzWX9uOHz46G++qPsNAf9Wr9k9BR0j>EeF^p^*DSy0J;EC)H0 zRd8L&;fC;Xm)AoZ)%vg$ESJM;DSIA{&jM7|_>yVDwkd1gCOb1@(E z&=Z6c1D4Z9$Uz8Nc)_HU&^3~pwQDjYZH$xo7|0#~j;E^u(?!Fb_gaD?MLnba-*+O_ z*Q8j^DyL;IzAA_vZV%GMJrUt~_ef)Xuhg!+|i! zJDrEFgX8~ZOsm1vwUWt&Tj*B5{elTfg!{-2HjzUiVm3wqULXeenZq#}8ERdJyo|Br z^n|y9{F)Kq)XScf{i}x5>=YF&6F$VwbVh)hS-!c=Py&)hm^$~k#Y1d&+x`N`Nwtd~ zepymb3g}JnUAPIVbD-7&n1FY2!f6lWsUdko5uvbrb2(M+87iXXEfOn2#PPHA>%hr0 z15rxzbZp`3?g~CAAF2yyP&#VW_^=@M`r^LrlVi*F$^$J@fX{3aL<|HC@75%OJRB+_ z=>mKx9I(nW|Ut0YzS7ed@Y<#tm~zDV5Z$tvsvq2da+IN2QSwfX;F- z1LZ6sN}h}5kBkRtvSW3bUuh*sGb5O407$kFy%CmwGhL$K=YxVoWP(ug#?qaSMCEwf z&6v%4Mq`$q5Gx~6!5=;lqrb#kbK${|Z}d{;R-JG{nRq0s3WbURiIG}MGsr~?xdjD` z#mFV?I!!ihqc;NDZ^gPIl?=D6fhYfD_mBc%bz2{f8G3oyhg0~~w>x|L{z;epRN$Ih zzycMh5@Zqd0^#i=E1B+=c$2rWmj|?FAA5$+wjIHCs<3w3B4ikt9T6jjORD0fSBq^^ z{FwwDxN-S*>y{v$zZn6%OVl`^Ie0se)+>>i$umHIc0oz-Dw(8;hQ1v~R!lYsP9pV# zAW^ckZ*sPSyZ^(-F%&*Dn_HeGBhQZk1wH%j*h7&oK~ikbyL%hnjey!}@2+c=Lf$M| zq-p3aGk}hd%v1O3X2yfZt7mhrZ6DaKwoWsmZG`*SDz-{UA(z{}sn(hkwXOy%j36NB zTQSa9rOv|ISk?2cbJCK(BP2b$pFuaLkhGDKtzNp0s;av8?Ioc zg3I+tk*1ofySMRL+`UaSTU7OTU#R=<4J;)+_RMJ9UcLN%p6yxq`la6Ln{qKuZb3>? zDxc=1ZD+^;uVt*Dj>MsPg7RX z-{$HN5e(~(EYH-v;425`=g8zM(_GGuaXdKe$(uW^L35uXU!3wHdW)HfZ}f)|y)015 zR#1F>Q6}1kt!|wX-0NfsDA->vryxiIqjq22_ICsr~#udY)zSlz$t zMpt#-01%j@A17<_fVZZxcy$_U=EiX$?as$cyH+Rl;G$W=iu%;mrQHX0%0@y<=Xb|a zW3lW2mc0${mZ4>0l3&4H)Oh7PGE}n64MB4?pTaPJ4TSrw3xCAwB-v0tdw_L`mH^Q) zqyX{Y66-b0sK>;oWT}*hn3^&Wo~XgY;w3wUH@GwO^FhWTAJ0VE-Xh3zR?G%p({aad z<@y8%tOca@wOc$qxEgYuO8|pGke@Sd*Si z;TZTzGZC*rPy(OBc~|7R3$W8_=^l`vt<|*ZWZw3qHB*4cV-hu&&>+bmUdCTC=-e9L zPUrmQ8$;O7!A=q`$Z!BfOuL(@Xs`Sl6QozSlBy;wFT&cW^0*W!eiOC>K)eeait&3) zFq0p?K{{VbPDV~oOgabS(DQg*8W|0o67i}Zcu9xDs{hAO7Fz^TbHjhn_da@ngJ`O7 z{y`LKohs&pmV+kR*QG}Wdb)A>*td;Q{TwiTe57d4dS_(x?+4&SGL%V_J1if2?mt#l zFP)Ox(kdRq$o`w^^efQf&LEZ-h*ysMc~~2ExCHxqUm;ZeEHJi};POAc0K{cLfBUB9 z{n@xaa3_E>rt&m^t-w9l{!pwQ$X_+ye>&Id8cKfA)6x7A{Q%%^^Gn>De%?AAO4V%Q zbgdJ2EZwu-*zjY-&sS(YdN!}&A1l0E`}#p}j+2B)lszRP;m|8}wz2Yu|B*@RY_RrN zjt#WR$&jM-aW#LJW0hHE>M)emO>mOEkXK;RXJntny`4Uhhy3aQ z`>&%~|KCv=cvBz-Ov3+AvMg_|3Eub4tsrUy7Rx2PE0l*kRWjE`KxBxJ zRCLTqPuORS3V-A#IA1N$c@zMo64vd z!q2lq=Mr3m)nm6~j^_RK`!o=#Lw!tQow!+g*9T2+WINW4H1v_?fVM(--}mzHcnKHh zNdhcZQLRfF;KF1IZ6ri6_LM+!*LhNlb}RN(R8%fQMA^S3_|$pb`*HZzv@Y1?d?7DG zb%OS%`m1-w<-YmX4maNDSsy;88Nf%EM?yFMbWvy*NGZ1Q*(jc{*brkLZ`i-M{5ZZg zl=km#0Qn-&38B4xVF4R9nl~r$N7Q>iUI5d;M0y5lss1idKzjLo@fhFK zG`Q<;;Z_v_8?R|C<~JTc|DOr;r+3C<{*0NU@SOa9mF1w zi3g-1ed2%SlJw(F3VG4RuzWsGIOU0A%@pa<(B2Q_+jVY@QzPRfBp6Ro<@Pd@f z0JkM@T3dgNH7V!MPb}YkQwL<8I_61*~%f(=oL! z7}-=JW(b$>^#}f^ICjHfsz~Xh7L)9u3O%9GUyKB}>ke3(KqofmNybGmKxnu=dU05& zJ6MYWDg4xhBuTg&DN~q(3<4Nv#g*!k7Qj$dQ^bjBJki$Yndh5JDWPI3W%C#0< zK5pBvem_DJjIm#Hn^s1(vAg;CbhuqM$WYuWrnX5L)#GDYiDdgkP1V%^L+P_nRwwEk;uAd$OL(GyB*&(TNgy!-2t`<0q!m!haI_4! zArBcHT70_}_h8whnohN5K7u9V*`e#+*N?VTY(-1c5}=r8cO#6*vS~!`nl&Ht&YI7J zGAJXfz#iE3zf{nb{B+=9O4a;#Rx;I!1WL&`7uPZNcF&e|tJgKASMwN<(UQ65Y@csa zPfmYH;`(`$r@OaR|MHCOmnK^!Y4!a|X;sdBji63*B{s17Qk0LSDS(A@2_ka7y6dIS zRQCsB`qnAIrZ$W~O`Pbar@tofDYk{uG|UdJNqaXBYK!LtMaIc+B*J-@h09DwZPK1@ zY+eD-RCWK(*MhR0Z}_C`Cm`b1E%JRYWA5}-k{&nH`Dm_G2g+qJMj$+=9~PEKtlf4V z4>&(j=X<`$G?<;9zNjIHQSSIBbF9^>hsZua?Gp@Jm{HS`pRP`_sh}j@Mm)It*uS`) zhgFWH^c2MU0j=s5#d^@^%h>OTjbXy3oE2?mcUOBLB0fNa6s?tP{}4p-W5Nn9mQ}+2^*hEAj1Gz5W56MGvuAY zW0{K|-|#!)1MDEsL! zWv+A}-Gxqg)g!a;we%ch#?ME|oi|Jy_Xr%YKM?V5q|BfYGbsu=ES!}P$;mj9A@CJY ztGP5(E;ROqBdy1L_lpQcRqqN}%C@5Jyhx&qrkEU+IqY&2}PFkM_PN5?X`T z9Cxbnu&tVc0xwNA9j1Ih4!ur1@?>!2VinjdTHXELS&gHHp8GpHIV(R)Y~CDBHS^AX z)}dk@U20~@C0oSW%P%saGssC}z|s5qy$4=ZL>_r{M-5*$v$;4#oTHV1pG|8?7N6+9 z^_~~l@u(?KuE)FN&}T8Q8My6O^Kzl8KZCTP?xs{~_hn8k>lbL2rxhhc;O(@PSZ*5d zW`Qz6V;S9vEbcBMi9&|Bu!AGFY=?#WEbV?rxYoJT*K?HNI%xrp zToR3nF_2E)`3Nj&uHq!}38rAf{#~ytr>zmmVP_XP!WP0hcby^ zRa8~H-m0nDN=f?efMP-1*5g`EdMgWNC743ZS!w>=UW5Utc9vtuQ9^{^1bo8B)rKBt^A0^m_cxTegDBwB9vU*O_cy3V>s*3|9>PM$t@peLRs(*xOv(Q!c;5e=?-VaJM2cXf5bW z;3LLp;7rm=*pMYXuDYy+c?BB9C(6KZ_UU!>ceiu+sp$uO^m?fRgn6RvSwNKE09`?T z$icuxoFd7L(Slg!MNk;$gX_Wc;q<+^-}2f)6m*?<62S=9(sDeb6%$`jC1dN)|3foV zy(fYfZ5DkW6o8;5RjTnqaP|l2p6P+1qJ_Wvqcg3~Psa)S0)aXZiQJB9O77}}4YNSR zPi?P@qr9Ei)H2yO-e7cdQ0jav+?!D)uKV2^U#=wz0^XA9FzIB6(E;!Z&xIQf;4-1u z?&&z|^X9>-Q2H~_RF;Q)#v7Kej1S!`l|4)Mk@fyZ`6d zcp$stTdgg3R*OssuZ5}N z>D6A)Q)lzCP!C4Mm8Y{Mf9P7XyBr0qi;3PkTx-?PRW;^i_)1FAQxjgY;+Ab5crRFD zpuv+0)5e3Fz*!si*x1DZ@604yFUR)$bS$Vqd1Ri`1q;*QOB32d?v`aNv`5LNBo>nG zhij!}Qo2mUla(-C`4w<>+iq`vCB{9|gFW=#8F&#+vdwk))_Ex1%Q!b|3C?Sx*+E({ zt}mPUKJ&u+6Q#6KW<`Uv3F+RuPeOh^_brASAb0$wguJ6SFEW{SMz4BQGfH}EszjsIP1^L zP2=$apu?;7EyzBXB&>t$Q#+6%R@L*nFB1ic7RM6#sf|mZTK8nu7}XjC*tC7PkWijSEVh^ z$p<;Nmp&t}K$AcV6^kH+jwpn+jnTsj4f3hV$=Om~Y@NSm(gzeWzFw#hpmi5_gH+-A zg9R&@i?)S0vbR6Dq0bM0Ww4VKUhFV^o*wsjiQ4;vk?^^FA^-PZ$gN%&IKI-zkUSiHlwm$b(~m#KbAipv-BRwOTadl-{WdAIh%}59h1>8t5@cf{rv%s zc2K>(W@{hny^?JjcnA+XWtU?mh_Gil3&|^~5V815w0?eaDLI?5fpnU&zd`NVUPeL% zP^e8K%3hO(-bhc%kroJw>J3*{El}{zz5q+j77I19qLrvAd_XymEJot}U(kVCcj0P^ zwFXiBfVCAmi*)G^7JMSLh0?`p&vK(_3%nI5s&BWj!V8!aRVN*8yrh@-%-gc~*nZW{ zD*o-|V<4Zv)lrqk19irkn;Ki$;5M-3LKezY`KRw^}-YG38vlD;QyqUs`{JJMDZ*V})o3`Hxkb}701&uH1 zJI_G*)_d{q4#s0Xo};nwV{c7R60!$Qvc>V?)GzmS*PkJ6?4Jk-G60>}v!Lg9PonlO z?mV6gBT+ayTz)-QC&E$H;NUhGSJ>`CYgSOv3W)$=mP8-+Jej!EG z{Yes!=40cv>R`|_iG5naaF~MHSr6|ytWWx+yJ5=6#B-n!W$`PW%h4O*=UyhKEcS0S zU8_E}mr8zaf1&d>aOuIwzqmo!d!oo$X#yl|jRrgYzM6_B9~wR(D zpDhKZ&bN3*drJx&Bx}u!<+U4taTuw1p$=kN*+a)hu##5l<6ywE2boO!umJ9XUHRks zhwO6G4F2wmQ)ZLLpzVbHWxzdN5IIIajMaKCf<1!xbH`frFEYr?Iv!(1YwF!}3b1<# zL;TcMdW*e`Eqt;=VFO0Mp1*Asjw()6zhhZD;{h@ryUFe-tHS&^ux@yGr=usY2VjX{ z2;3;CoQ3dU=J zW*AQKT@N*aUoCKl9R1)dt~Q7jg51+3&>(fcZhQQm7t_K`P_~kH95$i67B(6?@1R>@ zDi5D@Gi2mX8=dRpEu>^shff1Lk$C{tn$%JAtnPT+fAg;_nMh-M0n}+=BYAYXEk@!D zie)&N?j4gG7}s}N)*0==gx5^41b%mf6x6D9smDPXNWN$oXr6uLIqw`WZD^SMb zjO*_K%KKHrtZ8+*zcTzF^8#%6l#^t<)`kLJayhBkj7Zt%OM3|d(@L(zrLU^U)fx1t zCiM{WEFCXKn7+;pYu=$1{3$2*7Oo$lAhUDSHMwIiH?OK;eDqry~-z%fa%8R6h&h|EvG{9ePnBKEK&mRLm z`?QyI!h0nbl)K_9I<7j!%-~N$+Ia;LpeW4Pd||kY!)4Ld8! zZ}IunFkA1H(P#d0pdI6-r_~KkpcUn4^)@Ui-|{lnoVIGS?DiXR0Q!8}?S>WiTa*PT zEj0It@ug62V=T~AWAMoo={#B__!N|*nLJXW`V!WG8Rqf%gTIS3FDBuwmbTQ z<|0zM<;pKEr<7fNx0JL~^>h*>I|lW)h8o;w?;Xsv%H;xAtSiP@ z0y<)H#)LqHm2V~tGw`>{N2^J970?X!V6%_^noG4VS?(C6j1E(=%)D*(weT{9tQ?ZgGtq7Ff2)(2aPczj~f{C<2f>7sMA(FMKe0C>qG5fsgPcXNuKGT5f<;$uX;pCobdW zp>N`L1n92O(%3S7hVscFzM14@4SXLOh-oL3^Ha=fp!P)ig3gx1hw%+a{#}`9SpOPM zHgfx*`_TUXP+xEmy zxkyle{901Bv)V!W-ZWq*nK16ZI&>>1GDA`gFRcMyar!J_NCSGY`0X_-sY%oPWQ_DT z_4<6ggy4P&V>4->A-`4hFG?0GkiZM${bxWkoXp|}p71idH%n69pk;rl7P#tx8UUiZ zQ*IO1Cw6>CtNXe{w?P_J8Yi4W1pOj9hN~UpA{TonOYgEC9ujJ7B@UzR`nAjl+EBj-l4K2 zft)}h5nv$aKf-Of$Z!1h-z02ycbVy3@!cMcEvfqFsEFa>54F7uBYkng>=|4*Y~*gi z?~(yNPXFIR;JLNmfs&s(*|&PmF*I%PU<#L;xvPh(dA1~IL$Db+%WY`*{ljJ^Hrus5 zF}hZp{uQ<&Raml!bJ(6F!LMv)`1*w0R?gEK#v>m@zbUjQf;TsU{N_Md>$gOxBydjb zbIBgyBMO|m06)6bSC`?SFniG*yam%f?03b>YTb1qc1wYOe_w(~^p|;ZR_PU#3xouK z{9zV~V_T@EKRx(*T(ZRC!a>jZYUvate=8$ez<>#Mv@=dl z0IM%8SFl80*lmyVZJ!2FV=(*-=v1u;WFGuV-m%o*yrJDN$HgOSJ0j~sJ*kKYk-C>B z@L04rG$f)$1{p_APXI3m$=LQj3m+(IqP=$ZBjr-`xx8#mX3Df?Z{kN(BC{@smfEB0vkQVW0T zaw^wxzEy)S>!R_+==R6v{PNJE(Hk$qkd^cIBR z{Q+91Q`GA(K(=a{wQT;c1{5;*)r+Efi9|>yR)1R1FxI<2EHe8}IP_M4t6sH$dASs- z#BiJmcwI=_iRIbuvzyQYyIcn?Uv=nk>fG-OeL$r1Nk4D&eu#`Q23nUcqpQ80<6w{S zm#blJ+)Qp{yygDEoV3{S4PL3i;d^^Txx!FJVmdBghUF2#ZJ@cA2Su$z41#) zyXDP6Z?PN&4#cpx&f!8DVq1F#vUXH1=SJJ0MT_!`>zHafw^O?3{t-wwg6q7 zdg`9o$skKCetD>~Dyky$GY+RFMSD_khrG^H(&2Ph4EKEBCuDZalzMbM$afnztYUgu zZdLz}fp+$>TntJhz8gReqd+lNkFoF=@6~a^{f&tEX{l~zWD)WbCFBw9mEZpmR5RgfSt$UeU);fDRn6Isn#`>>Yw4?eIx$OKWPtbk& z;6?010GE8TZR>Bs(g8jKQ(gZ3f^Pcc@L=@je}pk4-e2*DOi!PzV1%tW66e(UZ%&;P zhA;~`5G~r{&JFS<;wAF>&phw-t$b4_R>(nzky1cSIeo$BtaE)?7h+nquvk4S{v(*u zfm_%1Yq$tg(xOY((D8MiBhNb)Z(bQ`09N~;J85;-8a~$i<-)j}4|tusF=cjFcrYM& z8yy6g;Br9Dekc#SxpK!hVmrn}N=8bGWk?29*oIs3njAkj(mgpo@u4S&@0(jw^{Y^t z@flm@;I#bpP8!myE`sJ`q1=0|wv{Bns80Saz`jb4LlxJ+J{e$peIo2B#b=PPSpP4n zgrhGSv!1jen?Sss>j!Rza$2|bPxfh+mqJxR4}>yleR2b0FNp$W8?Suk{~cAhSh>X* zk{0>NqS>i@KM81ZKw&IKeuCWs z5Ww-E_r zLZgP`5<*e34@*(lybE*sxVmS^p+KVYZrB?IB*sZx#RR zAAnYE?*Auh74X`2uQAn0}!0XHJ`E6-$)9CV!MY#IX0j4e|X^^;lBemH(aRL*cdcOQO67df<{92WK zGO@L>U$W4FI4h(t-y4_gA|K6VmOm{Ru7FJ5Hm5HyZOOAc$ zArDaoP4}Rd2(auc25FAW^y2)M1+nE!3-(O;H371?X?9L#$ZMfnFlhCYBk^x_fM!!C zkc)$Hv*`g@9L-O`h5oT@k_OmWW`I9(rlwE59~{j(=N62f{`Gjyo#91-1pJbdUI1ZX z(ExA_LlcMFBL|LkL5F)C+hf)4S5_K=w^K}miP?_-YC)h#ZQ6%6a(dUBDVh$3Wb%1s zZ`kvqUOh%RUpc)V$o!*`L%@NwaV`(FGr=?EW?s6DMoFZBu1!W-V9Yi-{Z6jI=1wJP zL4qUejz>R_rMpByRR=I~K29WpiCCJnd}_{#-z+AG&YO;NJ6p@ja>v9l3f4zYPq=9k zw7m<0tK7=roXP0^WPO$Y_6nBM^#Xka*r5OqN9_vqnaPoY`*Esz9v!oI&_o%{MofYu z1E{cf_ce#!7heVlNiO&899NfUVc+-Ms;h2C>p~Z&M3yJKYR0~h*1lxAEoba0=(GMK z*=KiPyJ=di-gjLyfbhL&ipVAc7_)nfudV%Bfs?-RGg$YnB@!nPpI6JEI!86SpG4|4 zhCbb-e}xtcQTna@P^W$L!?px_=koqP<@e`W6`Jp@N%TFiFVT_-{i{P5eb<8d>zZP3 z{Ct4f^V6F(tfvMr5ZpW7sx!{^)wya~&-tm=dInydy2P1Pa)!~%*Va`v$J~2%gI`zO zG>zJ=-pw(|eH7TPtaLCiys)Q#?)gg_8u>TImnb9`g67^csNRen5hr`PtiLvqQa-SG zrTtNpMKWl$xc(|`=9B)z>E&CO#LwFC+D=rxI(><=oL82LPF{g`Achu~>MX)SzttuTbj48x5OnMchO&w z7`@x=Z_{Ivpaz2dc`h}VI&Zc1CrU)ht#%ao6)qVB&=OGH^bF}0eaI`xcdi0`T>HCP9;fwtzv0%SJao?s>@*zdJ z0)6!6-eT(PqeqY483|dFox+XY8@x~uroyO!d6#i>uP^`fTw-~%kE;(ISk606qey+~ zuM6>Z0)j#<|K9Dj zsd|rnJO#b?QGG&P@NGJGpJ~%iCfcM?eemziTL#iyJzXl9H<<@^?ezHK`%Hpr1t)d3 z8QYq`$rqq@E-fFlzIZY1vv*+I6QroqZ!++93(KT|)+niR5nSe_9n>FkUS4tv7TNnk zB44qt50j!jCy}U78pj%?Tqm`+^=%tnChxmFw%Ss=8YDb^Jh@LGXvs$R$Q?@u5sHH8 z(+<}rHuGK@sX0xpUP?aQ{rof(@&svU+$gU4vjofp4g<_7WHjH8%wuC~K)F@38T`n* zAtyeu`d8)WK(e*i(_C_zwaWdAn>%}WgX(e<8T4W3p@gjDv&@#vtBIXsTF!+9g%hLr zu_LauFC^D()iZmQOYwEuovW@ktyFhWGo!8khhk@t=pB95jLx5+%+cb%TF)G-r*?$z zITktEmD)da!(YV@Uc!i%8xUT~g-sh?aX>;poAZJ(tSSFDz= zr#9|nSuIWt*drL@md6Zazv9Qd{-QB@8${AZ)Pjm*fwaddeVxPR{V}GkhC>tT_B}Nv z;P~b5T!IwWY?WRLtu*N;;E$4!?UlkRWxq%O;`)AYj>@%zrb&w1|pBjDa^%{k`i_nmWL+z!Xu6{w>2J^86ypz!s} zj;%QA(4FSx6~CI0+alUVgp2}JIr$MtyW6iWdM)p)R!;_rpUxO=+w>mLj2uDulciZK z>Jw<&7&$4H!loXBX?_#`hL)wu5V-&q2F8~NhV#45rlau3(LAKs(<*7tp46;Tb2}}h zl0AY(K!yl?K#s|~$+v5zI;Ci>KJJ#e9v$Yp>G*qAAX_Un2YhKvM8m8Sf3LVop0%)$ zP-tOlh?-54Mz>RS3>F>sxv8W`(U&Qu2cTv3)-xZ6EO}w$YHbPD3q1uTB%-y+{b_hQ z>+EG%!~Qa%uf>*PuoN&3GJYPR4)lUGrHUWv9AieD_kukj9WI52aWTPu2F9hB*%X&_ z^G!JXP>Vt)r3Qs9C!c)8(kUW5YOnPe-cs_k{<0sAn8{p-32z{8!?QOFhq9gxTk<|w+wXth{$8)DpkX9Gzrev+M_J%X z2wDOrp80}7Ag!VAN@_-A2wBMsB;;8G%QKGFAp<>7h)$oL_?{F#K7Fy4ExOX+hCq-P zy?-=;)~OL6>!zM>BjVqB4h;CC)~Lz#)z*Ekig4d=gjE;x@E$l`R~aTnY(D%>GxM&S z=^JJ5A>wj$3ByX|Tko<{+hrtu^4zD>lln|8Y2kG$1mZc1!sb%W_ng{X6=g)xELc06 zBT7gn&=L+>LHP5|JQ38Z)S3-EhGsb**Pk!9<=M9ymu}#aN zKhbjR>wa@3SRS_yeDehc%Gp$BX31S4lH$OA9wE&9^jFEU>1T3~CM3 z#=88)924!0UAGm;h(S-s_YY4c` zpT}5NMSZV;tC}Dx6zn;t>XLo<8se~^V{$_G&!!)eqhAtpX)(wdCFQ5mRx`|)*F=3^ zPX@})$kn6L(IF;VbT$i8*6!<8;vUt98^MSP=|xh@iH9ka8A?NwqV?1ik`*~<`^SB8 zvx-_ZpB&q-!)cDjG)J>asG-B}`iHWr@BQuRXkT>MlQcPbe3j_5O+1wPxI3(h+EoJ( zwE4I`bw^+8ZkMHgr=crn`s|#U<*2XL#qVW2le}7UOMWbMQDGC|$DXOaDAmM2pA&G^ z?80X6*+~My>J*uE-ZGK>-*ev&cZ;L}|Ht`ere;56r{&kS2a>NxIm=v1@$WRtsV;oA}AUZSt*=|zP*x%_++2{?B}ax+E8Nh`4Z2vZGag0N}* zQHmp1zom{+9*Z9to^a-0anf+5MIZ>7ayFM1n4Go_EUiaBxQYk~spc6uZJjAm$juhP zL~Tzm{_1vq?NOUD^jcd<8zKAimZW?FH|Hz_T7jxUYzl&z9LM8>?LVtBNctY~E@8$& z@9P$lsuq__JA%FppvL*NZhxi}eOX_slM~~s!7*#$)IrRR2!5g{KXuEan-*;h77W;% zhra8c8{of|d>(%Zfjuutzg*oe&G4oOOy;ZcWfar?prpFwO`+whaWh?5laHYxz>+^P zv!*3H^Q&a5dGS8W8W$otGI%VMauAe|`y46kMYmrD&RTs+fzr+M*=sfXytp?t@@Vt9 z;yv0nnD28W8O{p4!R!!Z?%&`-=vPVM()vsN8o7GG?`SB@XI{?A;`S&F)H^4_s|hxR z*0~8JA+q%OSJc^L&^4E>EE`Z4X)2JWTf8OJ99q zFH{WT=mXcWYa2jaE7oqoi5=6c_|d`+r~iGgrLs4{ypKWlku3uc;^7Doku3)MU-4yT zwrG0(b{yAUwI1Bj6EoQz zmTkFLpwtxJ#_DAd&g!rPZIgCn0XH={9Tnw;!y~KS-FYzpYf2q@JV`PwU5mX-I{P6R zQ$F+5CoC(*l+eEVnuAgnco~`iF%;>ijqtY`EUt515xk#1qH!+g8N#_;v2-NMjidgAy(%9n$9op&43$x!oU<0|YxjcZ zTHV-jIJ+54Z6IPf-qD=mC1hEs_~LmC1WPWRj+9upCqm*y3IkS5_%Wb>lM6ob=%QO5 zGWK1DMWO!udJj<$pYR!kpOOY^j0evT9L*UThC4>w(LFNHxnU1(9`o5nX~qlqmJp#ns0S zP86`d7nRg56#3|b0hrLy%X_lXT?_T$4bNkqAnNSwlnn?~IgVBm-Dat!yo}(Kwm2tc zy@AocRBv_KvAk@{&SB8w!0UZ9Yk0zXvU$@tW|y<*XqT84T})q>l&8}1lU#=S1rmw1 z$F{EKOywKFL6B}9-%;bdjCii_r^#S6e#`z;snRyFI5jvm487L5t;MKm41?dT3gy&L z?X<}f6GkPovTS^AELw~TTNuSoiDIR+N96C1SPA2kb3d_V06{znN9gAkXPwR0?-TU< zd3Z|qQ+Da*TJ5fZ*HM&j>{=j+_{qAP$jJ$0JCI6@H}|XObT;+L8X5!2IV$ z1+g1(9o^5cx-<1}$YrX(1kba32j8k-_f{*Utz>AOTGT)g{+g6-oU}cj-A7rVGpZgp zX$nxJt6*cE)W&zbX{g!l{Pa8=FQ$5U2K7h>iMV!gekqMvih;8iVwcD@hQt?Kji1B7}=iN3@J>ux)OjA!-@@aKBhd_{6*&&h_hf;Ya z-n4w#sfW7e{bW>5>`+e5p#?4??(GM(|H^rA_Ejc)d%t*oKiE_mRz2?CFvQJ;0b&Ua zyphReD9lW{olV-e84k4z-#uTB-Q;m!9>r*2si?-qDJrg>=+BxiHuQ<@F>0311Tu%f z9@@yJuT^r7^{WK%U%zBfmCrfo>RL|i%6ydjZZ-NYRW6)pNDt})CK_^VQGeuywoz{G zo7k|q;f;pe1r@fY6Llmj(nLZ>ulH9>ZtgFBig`FC5Whh_KEl6vp+e#K2-P#a|A<4k zU(f-34^E#(Tb$!f(QR%_w%i1oS>d9`E?lV*7nLjZL-Piot-eSFh0IICF=D_uk1iI~{x!Pmuz3>yO`n@1S%eK+qCZ6;% z_I>s{P1f9mH}!`T3jI2H1RD3xB@hQpMAjf0gpP+%UJ*1554A-vdFRrKXccCdvxv7@ z@PapbEc%I8vQ>5PJ8SEOfB*+Z|I+G-u$wnI=6T*Eb#-=TC1b2BD=#Kq_Ve%*JDTjT9O3Xup&p@h zwZca~V1@^!;n4bHDizIp^f_iSe~5Jq6Xhc-ypg5# z(9Ws}-+TN07q*!8##AER9L)!_2YOa&qUzXq5(Ze1$o#6zIbEba<^{!SfVRjl-EfiW zvF@i(s(%_^6HA%3cA;gBQS_m2@9pfg9d#t~v|3^EcVJvvhwQE%dB0Uy%4|8NWAa-KF7Z#lme=vYwwUKWQtLf7LuK9*EO?`Za<0qsaW zPV@E-gUAi=z7`Tf9q%wF6n#`Bs1;@n)wUxw7?bWx)mHu|AJuQf zO*glX=}xvBqgOMX`VzvK)f&Zkf7pu;5gbVgb0r-?)*R*CwSf{*o
    4w@Zg$yMp-)cbNb4H44i%R*ly?tt$lyY zT%Nm>kT7!302xZj=FoC9?lshLjTxggr9cyL=i-@(oH)hIm=0=uPQ20cUTco?so!6w zp7g58iN7Pr)nwg(v6`AuiS=zl>Ud)U*GjG&oR(?wuz}^QlaO{neT-IDWt=n8v4B!P zEyLqOcDjaA9%Y}1gRcgK`-p{XY{#rXOz&Eaw>83<4Nu}%E0)CDa~^R%OOY)Y%HcB& zyJ3Cjy-uUVPjrv=zD$OIbmw_0c!bR~rr-Xaq?g@&9rYfkr6}Hef-#DWpb}KDkR{o> zSY$@d=$HzPVqbB()$A2xY4w;2XhzndhTNu;%O_vXX3oB!oaC`N9%CDxd^NasR4RG0 zBG<+zm4=C+*I|v}?>^v@QNHASjX~KjLv#_>N?-%Jki}>l4(ajP*A;t8DN(5}mS5YC zwo0OjEr@QG4E~iwkt%!V^QTE)`KX$*wQ()9PDt?9$gem^ zt|vo37}}5b^ef0U;>c2*oAv%4mDf#X2VDjPz zpGp%_6T{H@7!p%{h)uI*lYh49BP8dhFy-{J_SReYOvsk;Z%+|M@6TNcr~zIQf(e~k z8aSQt6;Y}mcZN=@b%JbgPA!imiMbg8uSr`nIDDhU|mVEV` z8aza(FgZG+xm)d*?0ydlB#QE5y~C-%xoPiYKC>Go8qzUJ`b{aaTs%0iJaXEwjDZ=! zxr$@g!g>}@AN|~85M>KA{0DB^p3a2%lA%L%5a2{NeYS{vF9JN^OYV_3^q;P@PEKRY zzIAi@$*d^Ga&iub(J{G4HVzt^9KwXH>P#jslF$_VG4k=&|OJ%cjYHqWzZnTquRC3&gI0e7d%NL4-MCeVQ3tc zhNqTxF2_Da%@xTzRp2WskjWvCHCFiK6BmYBZ&VqC>ByB@@?M$uSOc)4v!{zFa{SvS zi>}En;bBV5N+rxly_J@3sNf12KD8!#zY@A8)0lEz385TCheKtjk>PzeEsko;{Gf008K|M4Nx{u8<=^a{fX*dE`V{7!iSNYje?Dt#`e%0tYa z`7Vk@bC208VeLKeq%cirN7x@nnqG#QQeU1#&@1`~QY}JV_lDto{Y@aUqAx!@F3Sr} z7jetWTO}Y4p#Q?fZ}}IwH===x?Dy4h2X6T1$J;K}7m74lLpE=LH$q=Vo#&ux;C}KS z&I)3Fz?NSrZhsBJPe^f1UEs>S)EQ_#{l4CO$+mEJ{P85b%Ut8{Q=0F=?|=Tg;V~t{ zS@*1yg8%&LtnqgPJ7EkI(!VcG&3`_9=Mu+R=u$yG_WQc5Va=a-Yd{BFZ)YiDOC+t^jp@~PoORr)Aj!{Cvxw8`6w)uhcM-@rg$k+hF@lnWR&3N%@3 z$87n1z1nR~YDR2*xT@~nMa&UZp^uDS%C2el;*#m4PKvU@qtaJuoid2Fp9@<=Sw4EC zK|6XjS~2s)zn%a_km=t-cUEYmtgK9S{}fyVToUDniHV7BsB4Y9Eb14EEdNq-{J5C^ za})=HaF9l_RgWIc!be}6v^aQD1;IvFIXvmTZvOe(6v@SWs->u!|9k|YuiX2ODB?*2 z{;VJ3c`nGmIF&tyAJ`1Dztz0q$ zQmeCn6=UjwDuLAZtG)l|Yz)^Vl?O+TG26Z~6>h zia)Wd#T~f)zpsk;{wLvV^jH27>;LnZyYK$ebsF~m6(Mv2|9qW1mkyhjdj6kL{)Dd= z1Y2GuCjPHwiA*93sNqZdtN)~F|4^^|@M!*7-~Xw=W;nD;nSJe+FG#}Vj(q++vw?9z z2hZImVEWIm2z*e>iK{a%|488}$v@VAK7vDLr_p zcyR=IEQ7yUx}8|%$d2LAu=H3}Nhy=Jr8j;9!_wt|t9+mx!XcSJ9EkT?3_;VWf5pWUnYGVITyaeqb3K_ZyNL=Zxm96jPlhUT)lm=KIQ1{It8 zV+RK!;7~o=4z?n;uOp(e+Rz%fICSs?yKmRGu9W-`%FK&>j zZhLJ~4^YAJvbXG%6h?#zFy^eX?ZswB*ATszfiQRFq3o5HH-@qi!RX=Q8$%|=gGXqf zJ;l|Lh;1mnj7G6!8oh&Ry%-W(+>nw#iK}Cd+?aD>kD
      _r&jN_2NpBM8y6k(D0r z_)(D?Q9jsf*{O_q8>kWj-or?4)qwtuEhrMaT>rcZEcRcue$_NU&cbp5;LMG zy_pc7obWOs!QbB3UbZn?p}O{vKMLbj+Yv?sXR?L22Vgoa%*+mRkq3^V(C}-qf%rPv z$!KveRhGMcB9@(?p#s#6=CXobqUTfgRS3w_0DwVX@YQJEsKlv{hfxs;bpFzW4MU*s zv(}*#xnrH!CgouBMqpMyG8I)T$se|~?J*E)eb-Yf2$%-`C&$PwA9E-!(b+O!{V=yf z?|oTm+Y*k0DC+ZqH@mTp6CzLeY z?c$V1Bg|1<({&t=Z3^pjr|N}sw@ojm>a1vSjBq~wo!7$v(bc8ajeH(t$!*Nmg#9yq z#uO#VyKQY7iSCRXKelh71v0fR^DlM5VJ?P{4VG7xJNwqvb_v83NlpNkk!j?w7PUhy zc0lGIoErH~u|7jtbL`R4rPQ;u{2XkmfSNMb8nO%R>iXhp?_G6Venv3dwQaox07xI} ziUdE6!@WHw*xffOj|Q88!UMI(P@!m_x%$ZF56EMum8i%OqG(l9DiS-+FjVhAr=o&h zTqO-Au|r4O&>{Bhp@%-zLB5)UY$G#F9$j@)-~ZKsAlK_MFNP-SYxl6*ppOyqxp>s3 zqT;zt_g@1x4_qF{_06fS2RFYYGiYctei336uU9)L{UHi>kL;S}%|ADb9uKl6Wsoe# z<@>N)Q5c5o7qo&gYHbr0k(jE1i~uax9|Crnxpx(GFww>nK+J#^F9V*5W(AC47H0lSao71bCx|#{d=nBI74p= z7-gj`O}PlIyFjtf>qa8+c3C}SD2Vw+Y%x8Zv?%BQ6RqEvy z^p|hiywJTlCO?!NI*9ou93Q&Fw)|$x2<3PmEdt0w&l_B;v3iMQ$ZuHaK$`P?0oqI< z$dB4Ghd&+?`H=U|e1J@0adYRn>&`r9DUyzZo`BW4G2Q9T0qi}5taxny94UdPLMQaChYRA6R>sopVKmntLK zx706x@M7Di$V|aLfYmeQsyb87>MH%Es33yfSlo_jl{xw_M`w^z1S@5D8Uc#|OG6a!-?*fQxdB znMH&}hrvPu+r;&stI_>5-+h{&`^<5K*LHYxy>tB~kDv#mcvQ-{84b+pXG5bIt?xo> zV@ljGr!dj_J5pPMn7OV%_ssR-9^Fogco$Mn{DTnEE~9_C;_ix0RM=bFYJ?*1L|F zyxqwix*2$|IQq*&?Exnu_%$S#x1&nD7j>srsF%ac2BltWGv020u&u3*A-p}xA1?Bw z2=o9RR&J7Dm6X*pe-KJ0kFc;f;q?9E+gAcB`j7X(*L+Sr6?`YQ)IUTKL1Ko{ijMlX zBa(u?^$V|Z)`FUX!$ z<5S{)kSq`+>2-+^LQv5GXgQg#CKK~aJcTxD$_7o zNInRMpRQsXEG=g#pal=q3TSEyge1OD;1n*MxZB)gMaK)S2zX-1xeb+s7DP6WFU0ug z%LYyy-hYvQ9o@+SwaaND-7m1FxFI*x*cFE};vUi}P!5I8#E2zYlQO4EnV2aG5P83GttX?Kn3)56$F80ff~ZFM>?M8YEe&SX;j@J~m^CqNWQF&YC7 z3mVF@;1zldtRG|bB)KSuGY!xtvlRXKV!=Md+Ky-AI+`|XWu+e)R*!)3mh3XWUUP{87Y&z zAz+sM{6qrT#wgBwMu%F1)P+Yi2Op zhjzb@6mn@qMnkUEioMR|7nsO;jQWs$fBfVV%1c%q0iMdL9+&J0QQJvv2i7tlr_aV% zR?-%)k$euXLEa3TR!vly#BZ?_cw3QWkKN0}Mdt-F-E~Ehs*>J zPkbMo=|fnUp2&k_1*>7)dL(*%W}W#_HDIAeulhn=ZbCiwkV%x%xZh()wmi6LRoeb4 z)1{A86QtbT*Af&d2u@xeu_Kb6?Wc>Qa2pb~OY&pCZH5HLmXxD2)hu6Z1fkB|C*f-7 zq~4etVoK?oXHb}J_fGBSJYwp7C$%>yV)y&$D+!KN z$w-Pv?`MEPl*R(K(kxA)ykyyg6fbMA+5Pp7`78x#RqN7T3K}CvxaLyT`V1a_?aH`+ zv4NMzH|aB&UHq2zd(q*qIogn|$foxmuQL>=VX)A^y}ENP&U7{#XUnE@ZCoN$V5<#X zQ@Es5IJ$+Efr0bj1Qzy;k&)Zj$;IhdatE)V`k(46$(6xF(N;k~M(#P{4kuI#56MI& zdFSmi{!mcjGh__Z*lMF6$+n8|xI~H(AS{ee$iqPuHFKA6qa9B+85r9z@7zHfgN$5e zjkdsXrWPmQhf2L0x*xvv#NNG>7l|FOJr{-nX|n3zBbCky*T|IdG}3E`IH=3~iWlz9 zP^Bi#2t;y)@1S(`?o%H=`;PXo&5n@LrhW>38Li@)*xE3_<(rhef96eLs7+d9>`C9; zcu6G%;&~2>WGk~f5yH>kS#xtlaQS8?m5s7IpQ##6Y43pIT5z)%`MHPZ=fZ+`d=Xw9 zd>e$^bNb@95jV87FE|ZQX27tgKVi-ia?1T2Sdc=u)X+Rlk$`F)15 zc7s+34tMrvcGm9rVN__!00l#01Dwch#j-^PELsuFzHS`RWRwnqu9F%<0z!U1=?6+7 z!IRk-0;ys^)U)S;}$K|(^u{SWo_(ua2#a1~iNvqk>9?cGTE|8vQUYXBP&(Z#nb!A)_&GyB zyl7R=n|}RHL(R9F0*8i3+Y7C&JGDFl2THm#Dvg*u`)n2(H{$v+cStvM5rPD$mJx6c zs)JDsY?X|OkDg<+K24y2LALU8#C80J26?gF%tf z|E=m`^SK4MQ+??5L>rccP-dP%PL|mZ)xKuq77Jr@bAHxKgX~t^qnl!#W}Jw99$!Kb z?N>u5mF0KOOT<(J0431A@1Tk?)t8A)>oCiC9?{$YnS0c##A_LYs^A@`sFK#DeS^9A z0yJu`)Fzmc(Sy;T`^Uh^<>j1|WVsgxIu>pj8KCdc)y^gq46eHO zDZnjPN|5srk)BB++@=x-|Je7GE78y6vX>*bOL0{`7vv}=`D z?;6(q6rPeVUdsKXD5}DdWohrCj)yow7%0tdpW!)9zE0|Fbb`cG$?w|9b{)KGELRJ@ z6ecGL+R4bNDQPFZm;?&9mE_1vTbE)$2{`G5`K1dv*yjY^Jb`YoyhvE{1L}B>xv&!$ zhQ*L$2zy+IV`)|Rq$NI+S?Gk)+yg{~^Y5GWJTbG{@DQWF9^aG~WW_PdIa)hf8aH1R)`-pFt5_dE5SMBG}s?|TSikszv;>_#n0ww#tAjg9wJDU7O8oauH zA0ys%F3bVk`17AWPAgt${sxk7sxh(ohA84<|JDl08BSg+3kURV^Pay)Yk9(s?R(XV z0c_t)^)?4-q1L@+H5tw;Q%RlFAdwwf4E0n!f1U8!T{Y*}a{wTiFXki27%Xwc=i3A7H$sF$dQ9=h)mSAa zEjgYrd(&5{O&u3Bi8bVOQbV?cm=D(%@kHw0yrJJAOz-$$D?x-2ZkgC-0GkF#pcICe zt=-+724wH`f~z&f*xM}A#ZW=7@hQAb?XoHdp*EE2VQ7eW`;~OQg1ayxw*5{#w9g3MG z=;6J4_8mwXLzru;u&`fx!`|JC?3_+<6^|OiT#R;=qi*NN5v2Sr#(R-Ai?|a@{R+0b z6U@f*b?qLA%~<~Ff*DW{CJF6JUx4<~$i4+#0l?3MR&w%_C+y~J86wGQMKMOuFl%vP zvmy`;C_5Z>ZQQmPR`3u9f3ki?whQj|6LYG53W|D=^8QREd_mb@RQs31N)&W zkDCg%y`z+VQsjj(ln4|0975&$3)^kq*poDJ$*##Y60xrrF~XfQv`T{YT$VTcoV4@O z<-5-euAViL2fZTHaLQMy{k|jQl>SgP;S4o%ey!HfnLFg*mh@knzKt8rat8YvzjxjK zFj4zMiQ3?2?V<)!zgse6vQ9k|hWWp~0h0V#yAh@A=(586GDpW?iWV z+?$|Q>^^Lx8a_7_fnY$$Xm$=>L6Fj0d83||Jo^sF|M)$35y0#(L5FOg@Pm?#<1WDa zTQ-!QZWeBOL5V!T^!pC+^n^1{*cArjwc9N6EH^w~ll3o20q>AOcVX8y^bF@!bWMM~ za>plJKS-p+us-Nwux*zM@Hn_hO`-eB*ot{a9zC@xKw)vMI0otHz)&~3$JQD_G6jQ# z09I#ntZH)T->LugZO9zXSlvB`Fhrw|z*G?9AgvmFrIW?gaQ1h97<#SgCG&c1MIA~V zl}sH5`8OIx9O@q-2T$eN?y?~6$jh9SLV95+`-9prD>N@^57{6k8$ezW$tYePo;J^LPr?%SSuL2Lj1LaWkBlN}49h?$iA0mOU5g1Z3477|!uy=Iv?V{<) zQ%I;}<1GwWyFBH$T84)nhUx@}={NMhYkB^s7)IYBRVt{VUGBgXRGVm3n3#|d()7%R znAq`nw49*s&-ORCm;d)dfTt@pL;vgsOv%jb_71A2d(xMvTWH788b=nF z23=I38&7(Xy$*RdymY445XgtnP(^o&k37$4x7||c#?ZnhxC2-jsE%%`7+{l=fq}TX zOa0n0KVj_p^+S)N7Mz@%yjHgtkH%w)HFfP7QLi9qK2v9nvFA(d8Ysw~sQw_Tn}K+! z8#%9G}WwYk^D7hC0p+u1QquI?Uy zZ;Vl#Rq+$1`w1Y`|OGm6b62FItC+l-sVmcx}lmzWv z*bC`;%V+C=g^upm_ApgpGDvJBApw=bwa1HmN0G)si5vxqFK~>#!4|w2l<;Rw3p}yXRWOl zH1sH6?L~>Icl_0JX~_Zz6D6peq%2Fdl}7U>6v;d(t>$IV_eRYmWyv8kFUjRmsY^FP z4Yl;>;bEi*|1DLreIo3)7|qZ@@H1Wn4N;zp8jGuew+J0stuSV8Y+dN|iXQL0xbo(u4Pl@`#v=e+=Z z@%IdgzWMyQ${)*QsSgq`S*-%G`m@SUysi*pj{6ptv2<~eFsJq`aNNd225jX=ad z05A0WYPfUh?SJEzBW{7%9Upx(ny1q5hW(@cV0=y8o*T`xp9S1sVRnjNZY6 z%botk5EZ-3QDgPdim9@V%`CWI0q#!ss<$dGEp=g|)84#>`}BH@;;BjT$kE32n}| zE9(pJYfMt}e?V2`z>yl3!5#-sk7%duC5W+u7U4$_miEvIKm(#Af@rtE;P;{>pjm z8RX}op&?mmFOT*Pv2Ww!C5U)Bfbf9 z^1!#toM(HtwEFMK0}apqmCL`9`!5jJ;YoO=x)tN(+dslXM7^mc+0;J2Rv-7p^FIQO zV_9>3ujb_A`{yG@t()in%Gv}F@(E>6{l5YiEIfU^^9I!P_x0Z#hkuC||8p2ClC2?E zTmCN0S@g-Q=^Yrg`0*LGYUZ)kUvUS{+adl~qG%nK34HZnCL&wr{Nc`DjE zEW@ox4jVf;byLIBpviHvtuN4*Zb9FoIKQne&q7R0gB{`X7@!O)c^Yz=jR!akzax1M?uekizP90a-6^OFI&ACvU&D zD9v7n;K%u_y>A=FgD@MKKm+L`#%m^n0=tr)f30&bxkS9nv5t6kJ%MMH;Zd_0PHa&L zZqhZzN6qP1n2o@E}euprR|eq$KXbZnIw4&iW^g3!m6Rl?QRZ5xD1_-JQ~qd_Xb@ z`~0bAIFOVtcB2yA32ym45wv-jV^AoUJ`(70>;f40Ob>d`c<3-}f3HsvK|YdQ@=bj} zPsblgHtELrVTR4>sjI`+9=WA!X$HPjy;Uu(dtN1{x;dTa4)!Z3h*L*tuA%pT5e%(fenyHa|hurY9rp>?!6c#;*+zGuMo0ymgXJlOnK8eQs0m&j(7F@BRqqDxd z^tc9cQb7GR3pGv5v)1xARti)kDJAHmy~I|XlLzB-K|lbH14_Pai!itH7DM`$0q*;* zTL*XF>l;mtw8hAUg4$e};|j6$?H^;F?ORLe5|L&%nq=H4IEnFY89=dl>ByRbVO)c} z6oxh;U7Nu_3FOWpv-apvt{apH8|y;ryFtih>g9So!LFYBFBLN*w;3H+ zFG5>s?YtdLh9dtf>Ll0m{kmt=p5_iO-7Oq8IW-dhCnLA6?~yyidF$$NIa;6k+LlP- z$}Nm1k|65H!vTw@I2n@+6_sKVT<40kMTLY`<8_|Tj+Op0UKXWh={05);@Je?{$8E| zCVKppqdIEY=9kVSnS%`*)VuQk0fW&yiz9-wkDs5w#)twAc5}O{6TyLMk(B#kvfP4x z*W+xl#1?d(Jp>pHpkqRpVrC__sf!<}fg${{L-AX4AWL>&d{uS1=Z1eBU77v55uok6=>6* z6)D{iY@mU1M<-I|zV+|J1YmA^UBiZ<>~IFbAAq&o@!?+DB<6!hAcTI?>9c-Hkqr4> zk)?|$Gz{&tZ3asfX4;=U28@HfmHx@2)8&aCRXyE9=7VRSXwL`WZ_jfugjdBp!k%A0 z?=eLBi$wi6k@{uT)2OO|nQ=+lWcNHLytWJr(Z}g^PXyGJXzpq$&|v-a`;;UZt5LQ~ zJ!Vu>zI)=8(RUd8_VaVwveeQ#8&S5asaNN*^=na6+-_EmJY0SUIzz`?bCW&ELiK_~ zYoXttyU97AkahOe;`U5p)ZAxU4ODanP;ZYbsZJgr%E9>&-TBw2-QSudU&k3w7S#4m zxFoUDoahHUP_>Xnx&KFZ+%Wq`qZWIZ@x={cF)#IP*2juVKom9 z*5aTfaF=)a*Dvm)?USAwsgq0vdcQHsQp?#1M5izbmnTzU_VKcML7b@s44*N0DJ67{X;VY;AW8b5c%MxAv#~V4`WZ$VQzEg|P zV(rSM)*-n0SX%dWWBUSW4s!0ZKHUBJjl<>r=pj||WqLBf_bnfDsUqmn4~F+X89nf> zzC6eo+voyuKRzGh^Ms z3fA4&ASa~4E!Aq5$c@?N<1%@5jFO%7W9ztMOY6G`mcR=n@$!@Q&-^j#hFLk%^B2ck zb+Jc8Og>GWH^ThLs7Yt^dR~T2lpg6OTKw3oguI!NsZF&}*n8w5jB+0&PPi+eMVS1;#pZDB?QH?c8cJ!-sk@Ny_R+k$%DNBzU( zMw{bMqg#G8dm}=axY1`XejoK(r1px??0BUCflLo>d_kOJ=Z;qS*txa-$?SRco6d?9 zdGW4xoIA;x8^aKxVb?Z!=&?(@OOacW05)t~#}j&-`TGtL)!e^J}7 zErt}@m=qEgifq!MoRSM~jm+^{Ci8K$DSEpxuuX!duW(lG2 z5-*fBg(_)&W{Uw%ic{@!tu|AzTQL}Pb+-bE^|=-dn0*%95WP0|KI@j4kSf8r+wvX% zwoDG&Mc3+>$(5nPVFj8d`w)@ccQS)%h>OgQvgJIq{&z{(l+=2}h8}x0W%go3QMD1!z3TD9X45Ubh_snYU*+g{IP?C`4{9sL z@2ObTURBb?i#zT~1kP>8r%LY&`dRWE_!7Es%-AjM*d3U-F5B?ZSFv+6 zaradfC@?yTS=U@UB3;Ean!7_r1&^bzG-#I&dZraOXYc5)2-|FR#9!3W_`FAAG8p_3 z$zkfenzQ^fme;zM;Xj(K-h}tT|B{o-ckGcUF5|hY)QIXNk`g(}sS|SbIPEpv`JiDT z_Oq_<8uIUbi@agtsFdt_JD2Y{I5U=pjvto;!X;uGvE*fg;zrxZTP;(u2efTzE#ov^ z1;5GECeZmgGcU2US+%CT&=${GfK-WiEH<#=wZUtxSn;uOO=03o)`iRErpZFL4)YQ? zN-guPR?Bs0W$f0p;g~^$IUDfrx&8 z96RRD;jInSy~*)!qpGL2e(jX;dk`$OQ+5^Hd(QId)o`!Z8r;ACnte5DqxfB4z(&KQ z@8Kgoix4pi-%}S4u?Fb}Y(xHU#HPK=%G8@zUCt&AqzMgx5-Oa<%TTzO&P0G~7Etu+ z@f+8ln8E%pIimcZg8fnHgu_%9q)Tk)qF+Qux_;_}0 z4TenRSg`{67tb0eecRWh@*#?rSY%xwDPlx^9Npk&F==tX^j)LqolMJ}X|T;F;KVUL zo~4T4n$4Fy8Wx9R3dJeF!$(BQgg9Cv>zkqCOhido)jOCFlE#3iP3by17S|* znSkC4ZAr8i<;(NYv3T~*s7;i6(n$g$Ci<+r zGLE^Dr&=;9+$y@!G@EQ~gjLY^)fL;~KO4@|bToKAf5x`YjT!HLK0#JN*;9nRU))0m({(V?@EkVcj)aSj$+Q38(KD8{$PWC}){Fd4P$drq!cwP7$%!U!7RNd(KH^n4Tlh2%~ zc=T`PNGy0F@}JQ5ODhwG#`a0sr`Wu5uiHt;GZHa~=dWI3j_&m+w(hwE()nco(g~)! z-8zizW9OH{X^b_XHM-9=v9$EyhIyzayXlpt#;E{xWNi0rqtPfs`7=9strFx1?T^fa zSyB3TgT5ZXWX67GqBm!EoBoqghv3pnb(R^5*e zUsO+3a=#7!==Gpeyk#Iw;Lbc~;><6$7gZya%;?3mJkITyB7~wy<)(NDtD1}%L zFwHY72jByWg(EcrM}2t@3QP~MaMCl6LK&dRhK5<(2G)JSFbxp7=$z(iV|9#*8r?-6r{lzIIh=H3FT%B|}k-HIY5DWxEy0wRq; zhZsmJNJt|fDc$Xn1_31mNtIGcN;;$^m99;9BeBW7?|eM(dB5-8?;HQ|k9)_sdkl^T zczD)YbImp5_nT`iYuCNBhOT@m{AvNJR|5QbRIh?$)}nl(nZ2YhskhqSN59F)k2sKz z8^A=LDqiU5&4yKS%8mMm+xLMJQLhL;5gL=Mpq8XsZoUjnRx)PAQz02;a>@x+iw^q! zs{Xja6Rn|fZSL8euipz^s@yAFSzc@>Vuj;S=kfwd$;)tnc`rYBauN zl%!$qImNFwul|{9`0LB{jx}Eo>I6E5%m7ZyHa7W}9%irsET7Ih>xYIem+;H;{U1?L zK5s~QE%j|O0+RRPnZ-p|3%na z1w)~Ge$q!?yG?Mwuoml2vL*Xk}qD zpjF6j(+W^~4~^H8{Iy3RiYfmr6MvQv7pe!J`SAt7wG0aij(Uq#ei~biI};Q1-GM%&dhQr&<24f78^!zWp8k|4i%t`r*Vwo^<-ZLS*ntzabvF8aRWEhQ zqrP{%DRlQae?GSIryZY;4l&t*2!5K-jLP4Mi=>sV_la>GxxTU!%S?+#i%LdN^2_yG zh5-*=sFtM%R_x<<+ECGubYCQ6ovR$8P)_2oo}#O1{g=c1+~Pd*g;tVx9UtGh<9W{n zx2CqXEXS0dY5eJ4jb~CeJKxRu)Rp-GNXag`NK)hR76J^LH#*R%fp( zkdS)tzyr(fxrw34_oKUBywZ{rvHs>*V{9u|f{>;RTblY=#M zx{G-=bTOFAQg(EmOkGWkX=;mkc0MKD_ZMXi8@*u>gh7P^A3)#!gd9a<6`M}TT9+49 zH_*2!@vtu2x0j?q zXR=0+^F)EddFQN$Qb>qgv9~xUH+P!f=^zvKh;z-lubKl!t^2C&m73~e)NT15rG38N zg9N5{a)0@V2GtdqM-S2TOasw7lLv1@tId~?DES4KE}hrWhM zHIY-XSJrX-!&_=ebxT^#(Xbxc&?X!Wr@~wh=3(u0Ov1%y2@G)u#$P1o!F-r2`4X3SO=l4K;j>U zENPtYpU!V%XIG?S)2te>trIjfsnczAW*z)MnUmucx%+!dSOPTEi_z;tSWdM)jpJ1> z@mnc0uOS?Zx__*EO8>Ii{2pMX!)Mo^g{5Bpvl9=xJK^d;Uum)di(;2F(35v6-x|6m z;se6X9lecK{>nP)^o6*5+-vIvAMKNf&>JHOJfs2aInm$kbl1Ohs$hw$*N(L*iHTvYM)44Z`a$>|Ozo^QD-3jFRbV{*w0z#G-T8=8 z31CmS*1|(k1-By`ZGYd`#NbkjmG^7)!LVj$tRHwd+LYp+?W&*ZBX2eeGFcd2(wx<@ znd?<^d^-46noAf-KMVlA(STLp%{W#@{*1Wz3+b9$q#wKTnR1i|1zRsw*N-pj@_TE) zFlIRw;oY6;xrKrT343u*s@kMFefN|chvUeT$%2A@N?U|?4@apC2kDOHoYOwRsbf`N%2rquxXg)s+G`R50e&;=UZ?dN_DbJ%5Wum^IRVz+puyA45=jy=gEm(ysYi)~|PaHsW-^^oY8Z>c+k2Z;+V`)jw8` ztm75SKVDF(&r{DqJLiS94zE7XexLFOy!g5ly5CSe2cc!FPI4b;c69M1H|Hu&;t^4(ceAi|y1n}7$~7Y` z?-s{-vh-zkA|dqLqXc_Ansh?&@-=tr5e0rJwRdneL*JYuMwZ8?wX{Zs?Z@&p4&GSI zrSbumtA=N0bkTUGrE*VKtLHsm*8i@WtwjA+uRs@o-AU0vU^P&a82_1 z6FU15jAQ^)^)d7MJ{VUKnr5p-!@iYVfwY~JV#;c*F2*cYDRq2k^^))M^0MFM>b0ZL zxtHBR9LvgQL_vlf3e4jd9E?bolC_<%Qf1j*o{K{*{m(r#^u~A=wySDoHa#g-Vll}U& zq!VUmW_-Z{^19Dn_>O1KmKUK14THK|sMg}?din;~YnXY@Ju5ML^~T4epC?Ej`ml%G z>5-V0T|I#~+g#E1c!&5h-O}x8jKo}3`0+EgI){d!!sG&{Jox1w1?)~3?=`(yH@q~Q z>8^&lm0o$N9OsC5o<};T z1$gN71ET$28pEpZz0S+yCTG*vv~k>i2~#>*H+@8$Y)ZWyAlyEG({D3*!1)U;_v=AV z<6o_eff(uMZ5X~+H6p->-o8#wEs2)YC1;c^sD07y{*@NyPV##beHM0ORpR$wH6jBe zv$v;)o@WcQfE|rx1pM$;egw{>Kh+=*uac<-$0w@kehzfuhS}%-^6`UmJDUchS>l{- z)bory1U5EjVsK{a z#}1&(6Km9k4aAv6i1`u___ixWP~%7L)GZEy&aB0yLF_0gz>kw#t|EZFYIz?+an>47 zNgb@huaJr)$b43}ee}+>`D86xSa5WFWOUGtLdu0kYEa=YVQfp(mmDAn)8vx7hxQ`c zIy#E8=QS}kl_sNl7cFeti$pDo7LET zDp4k{$0|}nji~i`AV(7Sw0|(dK}uf_cgpy4@|UKjx@!+x>2w?CgZ&De?*VP%EJr{% z8Z;)`^BAv^oOq1~@2j*P8yQbtZe3rW@q)IK-5Gb#(3|hx>)}cunUkqFM|B^?Le6&D zdkBVnkJ<6eyu;MrlAIB;(E*2 z`faI1#F`jwlc)H$d23^1nv9cF9J#*`WwUqQZGmq|4fCzjMP19QPgY*5cJXxH&9L8e z3>99}a0JH-hnx7%x?|Xc+`FZQ=#_z4LqFBj(0G{5iXR(OePYZiK~SD?VrQG^z#vhP z%^~m18J7~l`0w{9Bncb88qARTB|&?=F@a|5^i`)h1TJY`531%hpE+gFQYPW{@=1-> zC=>G-Q%$+Vnwq7+fY!t=r|;K9%>Iop?BP-b?w!ZJ`>?8Tmwh*$jpm!&#j|o z`$~L_+1A`fWGw3AjAhs9tq;a-6iITt`9N65wwav29u7Svex#y6C@-fc z(uv-ak26hLp&_O}s3Kslq!f>Ng)|A*Zs?~U)V1sg){ev#r6m6h8El>Nlez4(-yM%4 zV)h;6&%q?UsZg1uar<6J94R(C;Ou=w$wuh@vR7w}sp|dI4x{JUrkhVxc5%n!stWsG z*4M|kPF!aVqXwUpMD|*hO>=?w<);tx!TIKd$a(`}N|Jf%k_8X8NAIWgxLv?*eD(KH zj#olH<_pF0#Cl^lz8yMUpZWIV{P$D(_qP=&vFF#r6RW+gmQ1g?+@#l#+QfXrcr1{m zI;THUsowSQIap-MBIR-@eX+M)j?oTsJZq0=v;ZWqvL9+v$n=5_nn8?=>O~S4HHW;; zu>}CTW-U*8viI(&->xH@X2PY>uyyM2!!dz_9dVp0!DNf#x22^Di6o3}$oJlZM>@aq zaM%UE_w66BIvmW$`@GsJ{(G45EOs#_pFDpWVPD=Mjh$tQrZ0UTfvT}2CMI4U7#Pr_ z?T18FIrvqPz;Wb5R0OLcxm(R86}?Q;d6^Cuaa7Oq>{j0OTPR-r3Y=&D;}tl~=FdkJ z`u+F+rOWcJ)awnzkXf}9`kqK) zF?tlj-+m~4Tse$Jd#Y~V?;WbEtJ~qMR`kY}kW0DTZi=r>)IH^FVr{){Okj|)u>7~s z%$N7q*4Faw>)lJFrjRoK=eDK)Cy@2OCZA$xkVeWyc$vq;)3b_qIgij7hAr?LW9#fZ zu&&s#y1J?pz85uLQBhITxnOsVxr=glch^aZfJfS6SP6cb_uN4y$_GQ{4^LK+Ft5yb zhG9b7;4&ozml)pmLf@O_kNsV7K0{g>8gx6HNx|5oF2#Q;8@~h-F^3Z!l>a@xT1JlR zz8*S12TuR}{aWllMO(H1XH)y%>B=oBg5~>q|5CO`jIYV-ra$uht8gxp^xr4s{O@Se zf7jgqx@vw&`v?c`H077&7gI@TkTF{}zx#GWe8=%l6tlWf&&HE!eQ@J$v)M zf+se>Y%f9b-j}G&>-@~#aM`xH6vLo^3(@w@MIRgdLpxL92olmcOHIFc!b!Dmp7sjhPom;s&$;p;0p@ z3gxU5vexh)FX@FoSrHguzHx-GeZ5#SW;NbfDkgF@gjzjLqUw9+z{p@2P82;lxU?GM z=>@Ud%};q`C`p8KN3HoIh06^$E_zq|SenYpYO`^5j6TSpQ@#Nis|>c3joFv&@5rOZ zG~}k1?U|Jx-NR9tM7;zPFU$S0-&eh#SWlTb+Z$SRogzm`?tt}1t;N7QyKyqGb73cD0YlM&Jb^7$4-p{#BBd8rr7k; zF9HBFfq0Z$1^LDXRntgUxF!!==uL)xbj`2+Ty?VE-f^SC)5Bt}LhS1Y`8f%Ty*kT! z)14aX7U!$qCOn1BX;&boF@>^-U03&&4cEc($b0jXDk8q*VRx6kI3ULP;8;UVLwiH+ zLo>7QI1*PhF%}H(oJ2j5gG)&i-&5@DCqrIb^0Ad)hVQ`!Z(Rg zti@&dJ6gIZS7>>RvFa>T!8y}Nl0xarq{`vS@<2zNzDD>f?1e;%dSo=;?h@cS-){r2 zIk~-JxVMF7f`6wI6%I(29fT)mwrAik@~PJS+GXJWx>gx*rH?|J1gq9h=qrP=EW;PB z)x5?H#)mWpRXkyy>8(0@z2d~)mB&|*lAmHPPn!BCR+qk29;mBzAe9?f4~(>+kXV`R zA$nv98V36L2&!W?K6}ROn2p%}Hp$+Shcn8PK%^;xuPbk#L)lQda=gw1iXRFW_NG^v z{QSe~-6F}LoS~4F$q7~xmU={xFCA8x3A1l6%DElolJ16?M~Yy|C04^uqhhH*LO#!6 zBXI4L#^@%V9<}p`meh>lqD#t49#v7dtcQ(1FV-I>Wj>O-kos#%ad3Qk=sh7w?~~>v z%@-)Kfa;|BAB!^m8H0?(s7K1k`|JQWa3~ z;VgQiTnGHV8%BGEXe$Av+NK--S3RN$P{On2)QlBB%sPp>6oQOY6l{+CeUe#a_32%; z^Iegk4It8v_bCg-ce>x-LKhaPRYEL!1XVvj7afAjzFaW(wwhlJ^Q0W{IDvXZigf5L zbjZd6CMA5@eEP)Jw{AfNMW5?|@)T@$oC5T4Fe~d}^M-7e?|rNb5-Bcr1$v%-p9dvr z`v(_G@qn(6_FLDlhLxkOblXzO9H5k@_=+7E3l6@a#{#z9S14M(NtDdzQm z`)xu`X7^U}gB|j^buSF&BSQ6dk-)sjE0~eTT4o&U!og-Hy&t`yI0Kaiq0~e=*94faqNu=j4>O9N z;Wr`I7J?~f@D`QTa;?;=KjD4ZrDKRoQz&`Z1X*dr0p_$1@lfY3 z3A~|ONlx&}M2^LP$=>p48PZdLD`ywLuF957E7(I}!nba$jCLFus+KXZctNrXkWg8e z!>GT&6bD&?QN18;p9U0rd|%Ts$zD&_shm&YCK+ZtBH?FH6Kk3GuOQM=18FsV14nsL zYLL;Ziu;8|Pp@Nb4VaE6bjKwi{UUoQXT;c@v%e3QLZ{I%rXte>F=*}8B!BY$Lp@mH zawBIoE(qaaTi5(U4IE_U#t!^{F@L+N_&9#I5``*Ep;5^AXKt_JJKb@rKo67c-W5A@ zR8hB(3Ii(Y!-VHZoV|UwBpGn7B#;s zp+GA2ba!+GTxt>w6vUrSJ%Evv;^XHfDmHVsh=Ua`N|vTpXJljLQ)U%5=bD|**PXo~ zbprMLA(!x<*`b1Fu7J;aL00nYMRG|ARdp*6t`ClH+(MYU#yDjAD36VipTNb$d3-m*x*P zG~MY}gR>kU7;1m7=^&3d2ToX`p>H+#mPK1E*_me!AdZ;c?n)B5Yq0IQ;8fA5qYz}g zLi6o_0W~oP3>aj@C}Sv|)Q`I76Jysz@HoT%&90rQRLjf0dHQjVUw-3MC?7NB5JgEu z^a?~j4ED0@HR<$2nOgH&4)&^>biLO4Bm841&PyB8LF1j zqEOEjxP+MzO~VIGV|`oD7S7C`6~^I=4VFp%#qae%Eg26r3SGSU&olbZhT)W#N9W-C zS`X;P-(mLuJw>X$_rAmZzMj+l$vG+mxuHSQMIEgA&Q7mj(Qn<*5|SA4o*(C zwkaa5v2pU<7v4c753-K5=fLQ)@v}B%i^Jkc&~%XcN22l#!PCF4pYLULvM;Coou&}V z$grl)9D$FDz4ecDG6zv~#`-BsN6{V~&zFK%%9vJw!eC?d$`h|{Vsf1ehB~lr`ac#pjd;A&faCne7r15{0Ja&p0b21 zmPB5=l&k=5+bVK5SaWVTi8vc!R^-hzAbz_>)C+yKPk1j^t2Ubfm!heaN!zI1k@(Pqht;dwcqK6Bzj^+i$m2Uk z${e1Jw#N8}&>w4m^+y$C-DyYK*X>~+%{dsN_&*(KGU(f+Q^r5Eo-&e&COOp&h}$31 z>lRBCAH7<%J%}VYm4@*SGABVWg9sfN?tdniem=CfACO`VWG2gkIl@#^&%g7j? z+E%Unm$v=ec1)iqV7~C$e^=U5O_Qu!H+NJY2RVczfq}lWNfSm_4&q*}6mP~?9&oGi z9j^CENNca5HCvXG#eHYULdxF0g;S*_?^qb`%?jAQO%29h^8;k3XuT#zjVasz!Ij6l zs@$RYsB01b$u1fOnOq=U@dqdk|A^VQxUabU>@Jl_tEl!utzty$9!6EnrJysP5q)jh-4}^69tJuk6O~e<-Y~;1Q}Zbj4l2l<34!zYZuf?uLBz#OpUjw=dWjtw)TSwd&X`4 zz)lL2Y~l?Uq3nD@7%bGR8v5xo6i{Vwt!m|O7nl|g%);Rm_X_E(hjpGsIWfO452h}p zC`L)u>Q@DDaywPWP&0!QUWMI(@_LLGzY^9J5Rn9fNG^!L{6>E3zHL%6G1@vb>k42p zyMIA3!b)1`Xl{R7D=8S>6dyPsLh0`=sf9dW`TYcml>nHUPD3$ag`&EMP!uB)g&yNbFJ z7{s)T+dSOdvpF|OZPW7m*M>p)9R}L_f z6+7)tC4S(Ej&_FmPj1m}>O#%VKZ(K=pev}uFq+qbj8umM+oky`vj@5t#x^Z~||V>%i92X1AQ5eAM# z(dU3Xs7(y|fL1cg3a{V8TNT+t=JyEO{Fv)m&dsF1N=0sOF?4{^0A(g|t~uA)NUXmd=`~8e+m^16&g4<$oww5N-{>8Hh0e6BV$q18@(Nh+=Yz9kZ z*1c^KYiH4Ws{XOI+0l_Acd9=^gj!o!R)t}{cWyMp?WY<#u5^fvfWH-lEhh+*v$SNl zG*AHZ4BXR);?Y)NlZ^2!^18gv(G#dxy~zHLiEWCIhG)SXSV0|JZDPpkUtWN(42QhJ zqKmq~i601WtV0gFwg2wYm*Xuhg-=|}9^unK2q|XmCDCmtc9-2dLA}~fA z-XospShKUUo&`B2P&o;D9@VOX=)V*Em~}LhNda2}V`YtbJ*d-Js=kjOsG(>A0vl8b zR2b?%eTW@w(F#G)YmmcZ#Y=n8)lWahDrxRnT+6k9eZyO4oUOLPyc!Vj$7vi#if z_WGg&dMZz_- zNFu%)SJy%r=pQQn$lB{7o;1uq7~T;N$J|->YDqlx{_Q6^sT!cyy!l?`ok2+k{lT`v zz>?K9CcXhI&j3N>7`7$KUP=DGueY&K+59%ho2}}bN)04OJAW-EW^cJ!Ehtpd>NdE; zaX0VVGVi;;OodEh0v}9Ll_rcgjL)G8$)!t!J1bI1}NfMju zEBE&(6)~(nQsZL0iVu&=oyi(-p2h`mE$8C{^Iv)ZN&O)lQi}Z5u8;-J%aDrjXXd42 z(FMm-okK;@i+QU~r&0O{0sH4UeeTAstDnnC?fc`BK)wmkmrHt|7XH4d9fkUG|Gy&t z|6k+!%O|VXwg8^jBi<`7E(Ca+QA&g7K$xxzV0WCSr{^)#74g*!dL7bPK(1Z^$?*rn z$Hk*(AKgx3fo%sz$-oKV)&t=R>)gCN+`vk1Fy#jJ``38@_bnRcU1~BZ)|o~lObGEr zNXoU4L??;A&C`z{%1W*8iym zfXkS^C9SWmwfYe~xkvE%pS|G!XJkj8@-N!jM0cQWb++&Se???KL2PW5A9T=B>xjELafY17nrjhXZ61?yn~$N|1Ay(7Ve+V|1VyMOdJaQHsBhJQiG(-qNL2( z%{X)zPNL&6!K5R>qyi2nfs~t0%fa6Ix!SoVHS5rPM!lui5*JO5*s5%s9y2&s1{sE_ z->9^Feszo~{4RmWX4;sGgUGyud!Ms6gCDtWg#-#UVJ-ELs1VSStE3A_GbBfa*_gAW zhq!hDW?x#{4a_q)by&RZ@uh>CG7KU!ellhSO$^die9{DyjSz4C+RlxA8jNXHtdwT0 zl8|aTrkFb3Y(0jHQqY9Vad8MwQ2iYpnV=I^vMK1haWj?yh4RrXpMBPBn;k&zG>yZ( z01HklScO`TPy>q}SyPj9R>L*lMk-He(G9@p=SKf< z6_Y~>O$_Sl?-1|OY0L&Fe;(61ll@Ja^N?hP9ZIkJl4Ffe7`ZkaPA^`qxaf|#w~k#v zbBO7!h4>+_{;=*vU$01Dz~=v^rW8b*7oZzkK>o~MsksFA(h29UE*09=rGcmW3FRECTf48D8IY6@ zVE*@V9Y4#*&+kaDfOKdiX$%oKG3=Of{x<0{n(6OQApEGXXASm>|5Ny3yB3~EURUGS zheAC`B!Bz)ymPb})OSGr1;T~Aq)ron?W9uBQ2?u(6B#tZS8bf8Q>@qfY2gTn(K6fm zG#OfvFuQ*d1mseC8=Gx?_!xjz z1Y}3h#D%*AIfr^De^6iotl-{*C-0#bfjR2Qvx$d;t4;`Jz2`A@(bVzgi~2}Bh!+f? zqPM?x-By&xP9zEa>?bG$3$;kDXvbj!vfDEI*z9@;W8o2_AXq0`7l@*v%(gHrc)`(d z2+BJSe#B0%HQZ0Hnf2_1ElztbPHq_(=|E!Znw2l2q&bH<8!5$a`@R)Bdy67Z54nPq zdm=MuCrYPIa!K#`=?Dlm5@6G(clF^FK?@Z4YvP6iWfD;Of75 zjD3VKK=%IvVX)=HzHGq+oPy`->fm4Y?&-AZRGZiql1n4WBhyEW=pcyfT%5O^dI!81 znp2gYyTJ_)HUYj7qp}Q=I1$oJDg@X~Uayuv)z0t1&_Ck0KX6!62cziEh&u;s>@9uq z!Y1rK`)Xr0h>Q(zd{^R213-}ki|oe!MfYfaj0xGOjO@R)_1<3VOE{dyz;Yc$R|8_f z0}W{~2;w+Mqwu%$__?5oe|Fpn8Y$^MvmsBE8f)akMydMtmZl#DDg7I^S!a-qL zsG;Six05A_Lgb#4EebZ|B4#!+G{vb9-;C)MHY)Mwiaj!!BWm9bp~4oUb-?KrFP{ z0NZ9$uvL=1t9Fu-i%;E0>Hrr0=!MB zK9^vDKj%yec~`NJFVCp{+r#YFfl2?4I3e>9P#W*8Jw+~2oj?aBJ1-&&0d^ciq%L!~ z;k43wCGZqVpBwDlL+#fI=OwE}yyBh5o|(O-OBL^XPYrKn! zwT!_c)m}_vll`y^fD1}!yB{ugg~n=4^GHy0?D&oSXp=TzpOfgvYsbt;x7G-l>E#$! zSk&To)hs*Zvf@R-a6Nrt(_dT*J7eNp=%rjnaZ)^$@$>Masw5ws*!NgOE$FSnDe?y{A5c%57cB&nw8CI@Z<`+3K;5+XV{1BwfQn*VHO?=7SP2{qQ zrAQWV{^>kx8?vY5fu(vnKhseay4mYSFzKwLf!_$$bYmIW?puD}I*E))<&`<&5#T*C zxs^YBzU16q9RNlJ=(OPggg;Aa#|A?z^?s_err}u);Qy`o^2{}b?M|8#YU+G8atQ%Q z?Hf=xjoQ5=GY*Tc3AU*{IWS9E@UgMa7eum^fjH0Z?5I(ISqE0Cys|BeK9*( z8THbyfDYc6G~8JaM^cPGyM>*KjUfuYNWlE#obw%05)Y`+;;0&S7=&SK9%eLengCic zDn+7v#waC5pG9iiv^gY<~O&WS)xxsz@bQ2|*c0+}z_F#?VnuSHj8w(kzN zDJ9l}aV?3Vv-c1c$x*27wLf6rasPIKpC#}Zj3!KKqqf2wpyARzyF|cYiZp6dpirUG z1Tr!A+Tw*tccmOG1OhL*s|JXehU_F$_YbDN0L_v}tTVokv>* zp>|=Cohppnhm%Llr2Q;KuAc&VQc1m?8`|J*3Ekx>wsmw|OrU{tv){mi_Mq&*o4#jE&^NzV&0J(2;d@)3flKer)Odms6Ie2=% zq;NN*=-u+S>u_P1)fI|NPw_nG3;jri+h>!zH1@X&-+kj%lRPd#Jn*7}zh17^!?UQ` z1ac{UOZ|=RMTeEYRt#%FSxaIFdY8OtX1+!xH}4__;92hTIm!@GG}Ei$A3wdS{JGpA zNyn3VSRMQ-<|TCo`>Xh2x&1iN+Y=y<{Ii@>RJ<(4D@R#vQ_T%^gHeeeieolKRQ{MY zq07Fhde447lez@wFW3*a;REW*D>dx8r1^pssu(gHQ$Ksi^_P~~>o;fk<;#G=n0?RY zgUxUNd!>%ormz_5tGLs1*m6iPgS%3R$c#@s6URL}UjQ*waT0bY`VFOW2KULFs;JZT z;TF9Gv?w1Tm>^#s4n-z5wS?i*ai8Z*ioilC2)*)>zxCpWFuAfGpc;x9Dx-Z-8cn3X zz97dvwbL_S6$?TsXr)@xOCUTKYQ`S_IM!TvZg34pOKezZ?6C0p00PI_1PZC{NYjw3 zzU}>G@;a|85_d17P~>$G;67{rwWxj0p3gpIWHo*DWVU5XRKNM|@1-xXA#qgf63MM& zNJDJQS3eisYkjx5_4!~Q%1&%91i6*DzZiYPHao8}%P0Npu}Q0<#6ha>9un}&f*I+m z_2DthVDeOU3LZvoks6}hDcHjPPV<#ly=Z^+|jJbKL4c%DfY%|e|APF zVNhr!>al;ajTB7vy`kV&bsXJH$LYn;(1ujJ75b%ZPU8uKiI9T5FwJ3QhhMUj!jt3W3x^WwT{|O+#jY zvX_F5VEqakhF$nN_9oXD)4AHYLBy8ggp`zpQ%gk`YZlGF&UG5PHaBOh)3KI!$0EtQmW z2q459sq|O%o-koicF!{t!B|DferkP1$NHAwr!xM{d-Rbk$ohz|)*sFHSogA3SGL2v zQ=#=!^K9oMe?t7Zuz0PcTX;Km97g}-{4Kf$UVj&tOyWj3x!ra_&(`z@mRCkw;H3_{0!p)*W0dK`UD>Y6vy!009 zy}M@hImhCdb~{9}g`e(~1m%~MFV3#37Y0X`Q@Rcss!wGc5^b;yz9ZL@D*Y>WR*rzp zv;U0uI>(^5xij^XH_;7(bQfMpZr}0$Dn1ugOY^=vp-W}9HTY4T&#h0Jn-``Th0<=f zc@Y(;e8^yXNTp2HIz*>+Fb{nmy5c+2iv2z&;#?FO!$&w)JPn~(ZjK3_p5la>nNXTW zR>5Ze{Cq;P-lKw&@YngEQNbUdyD1SLREN%m#W35k*0Q?80`$aV`?i)@+kIsk^Egdp z;S9&N;Mt)ohJ_ne!%s-S<}^B~9GvAO;Jmd?)AN=+YO4ZaCu?`kwhQ^n>&+&Rd%;aT@SgoGAKO{X9dLCyz7a-4YuI?r92!ha zOo&BCRLef2r3o^TB)53ObakF`rL(A9HNjg@JiVUc-Gi%xm(9X~P-AA#{dU&7pFcy^ zM&HHnk@K&6BC3{l-1<95(AhtL=;qZXQgHh#@=LN)R-d_R#w9()>RXMnAC_z924^e! z#u(zaJSvksRIQs^lm0xu%uCl;GVtH0z(HU0v=xA-i^jNo@u~G%sc*oID z35N?&fuVl=JbF&qI60g-!zdB%SJpRIwcqZ1czDKrWHEI-`pa0)+b#R`^l=CWHqLgV zm&6R0_V@i{lm709B!`wOi(j6Y1TLcIW7_SR;By(5zy+C0J@o%k8ZhmZ=9GjL`_UnK z+6=Hj)Hu&pkye#1Px`IXW!~1eERuH8>oP8h@aGYI9^T6La28rVH{(R~8|;9&Dx1Po zQ@2b_QaAUGTa~hi65Mju@qS_r&S|RRN{0%JM;Bc)&}!>Bs?WEke|J}zj+yP&cxcS3 zL@+KMguEoWNv6)EI=5dsq8&V&=KfUg%h47%h3%}S%*6FKWjD%*?#v()`Ajl)1%9q^ zHo*yQm9fR??ysj>kD1MN2WPPwqMYQ#m5+F4I}b2YT1sqm!8LEQ?$) z1mRFiJ$xWbHSzNICyq9bw8{{}7NgyqMzBG2H4f-I#(TSuCOy646i+Je22u3p!wXpaHJp~tkID?s z^c=J|`PLTkF%O&sEO$cX8w_VPJH^e|HO=A9PQ=-ou4n^@H)EpM#HQ5Jf;NR~R|YAM6Dv%NxMf^Cu26&-1nR1pTq3*{dS?_nyv zx8OR^kiO`?xN5wwg^PS9c=Hy{U3w#u(Mb>Q%q+>yF~;Bc04Mj>_euY%>}fyv9q&L1 zw&Qs)&^BSk<} zIL!)~;i}oRKks%myD}kB_*eQ|;&bFr9W$n$oX=mhb;s#Vv{9qLkW{GUtdu4!+&^obsUB{y;W*1=AgArMG@K_!3j1*7?S+GnfS zR|YIbPEu8jJ83JMp`aiTl0 zqL;^Zy>skY8p6Z9+DEiC0%2uq=sbPhOy<=RPdv0O$O>j40KU3$_UVvs&i@bAMVp8W;bxlS%aRq4=;c66liT#R?%a zQEJL!Mqo>&K)(IQ?1i%t=*jMgpKa;QS`ms-&y%jNwx9O)*q(kO$|NCrWIMPxh`!0z zk)Q|H+Gc+?j_&b|rz^fj{`uZA^fVL{CTOe!&dkeWO}OGzQ#~XxcqsGv0;rtgqS0Vm zrI4iw-pugv8LG4KbMv&hRNQ_F;Uyf2oSsP-572LI)yBA)A46cIo#W*;_r6bS=#~p) zi)Qr*ymx*-AmmvVd9vlD`#p%X77M(;mY{FV54O4~Z7JS>U;nZynQmJg5R$GyU(^x- zvwrhJhbiY+Stn~7I?!d+VPtg#yw8xv_>r`mxUO+k#tpqn?>x^POAjG2`enG1yJiDp zKWM6p*?0LQ3Ym%VRbQiQ<=B$K{OWzq%U(f&Y){wYll1}-wXbx@GqIfjx=S;s|8=zq zEMmWXKUk_$il0C(W+l)dJ96po&|5{y>QOjtCM+;{>$aqmVv6JSj}Eu*eCzD85}l8Q zw>7$nEfMc5TRDX~TwX7>F)L7Zj2~FsZ#kP~Q9P`w&zOInO;7Qo<5;lT@=DVMV~b+V zUfLyrA(9J=`-6%nQiDYyYyvDo>rGviw`I1qRa%hVb2@tZ5IhPBTH1MAFI$S7{-<*i zDtSCjbw@`fqc_anfn|u2ysot``ogC@r>T4Mmd$}y1v;$Xm4lnR^M=y*zU7d|@mZd2 zj+Aw2>_4g1YN~d(C|-4eT*E>DTEuuaZ&55>r|JS`NsvWD8 zaiQ5r;PP0yI|QLVZx_Ba6+|mmiB)wbTgvLc!5_M^ci<7)VNg!H z{n{PsT{4N7ua@Mhkg`QLxW2DzF&WTd*dtxMN4e6@Mg!{IMDsR4zGX7f$u1vx79q~>i6 zu<3Z0uR0wI$%mi0w+mg!s=J~bqEX0boc#9@vhalct4O|9+9vG@M% zW7oLrm~8aKzTirH^4c&7VDowaKoOb!9w4NM{tOXxQ?AAI$Frd%3&Ld!`yed7OlmZ^ zDCCo(4XogwPpt+uwbbtTCiDsUMEU-68VVi9tj53cj{JXbqcQyVHkzh?@7xLf_W>$& z97xEDe0Y&k%s(If?Eh$~@A9*Zg$IBC95p+6^?&&AzjsRfr`B)LQgmIO8j$gxZfk2R zlkx7lK0G#t6}p^6gC@;gt$EPkO~09xNLcK*xMSHcfK$JB)P;OAQ|Nn!Vsl)&a^Cx1 z!y#dDz+wt6rFnF8)Vrdx(nE0S#X*SqFK-(|akx&o(3i96w zW-;QUNejS!dg~Nog`(Q5JhYxuKC`i$s%7iyDkQMDPS|7R?tVBQ=yGdC46bO#j(jeK ztvziN)~;~G_>P-YhXK((rvW@7JY6vl53o8=f5fGX1FM<$iRRP}ZQn=Sgw`JChvY8G z0=kfHCESTf0r)Hxx-cZ^>FVn4Y_{eDY>a&VL#?U@3^3#WPI%YtTwV8b57+(vt*rl{ z6<>A+OptsRK6@97u?N|a`~m`))WCIE2*_tN_i|f4(Z5pX6yMQ*E`|P|SJ(e&>mU7> zl!5cTb!av{|GN(T^U!}9&Hv6K;W~##wG{mE3jLR-%sREk?l(+A>ecBR-*oClwly3Y z&9*3{(dN=-;6~#F7h*aQn_fN*j%P)bs>^PyARb!@o=7CYX6sr67LMl+<$+bq3Mk-* z+x-F}%yorDbP=@IW{$TKap3O)5(5V*gxiyfE^S{H!>d2(Tj?!c{wgETr2_otTL1a> zhSW!kPu*C5(~txlZ~b~Izj6@zHB)eUn+fV8Z*x^WoDNXjbBXj87mim~me}m4gBhJT z5N4jKi09s`FOfA+F`3+JjKvlwHqPg4i;WRoNY@*v`xz3?O*HG$P5q#97~I$Ez>>wA zqa%DrhjBjDHj9l4cbgd!QCs&9F0OYM=3mK9Vmu&2J%Ay4{OX$zH}n{aW0MF8Os;5p zce@e>7y6TsIb?80BJ95L^Ls?p-^!gXkAI`3H>m$c#oZ{EF{~TPr+t1-=3b@jIOZ#d z1LZ@-4`AhU_yXkW9i)DIjTJ=y8O>kKwpP05({EIJ^xdlV$Z8jW{5}PANL+#Hwk#*t z#Tr@Smiwo>62DINH66@3-K)%x6?J9z@Nn#T8)Vo^$o~!G>)dUxpKnT(y0O_8tH|LI zvewwpY2r&|bGip^{*XPdht<|sWtxT*&xd)G@J9|Zq-dImykZv*H>dO9N{e`Sdh|wc z?}M#Ts7olBdv~7vGB4&I*Md8QZ>xv6dGeXt+dt0Zd0?2&x1^=bWon&mT|EYI2iBh{ zSavOP^|T zx~*<&@mZDI44M~DZLi4PJE&a1TGSw6WNh;KzJ*~vMX~0K-jYSmtJM^O)Y3S(Q1^2x zu4@22+AmhU63QX{nn%Pi(3(zf_~$7ARDf-$SWYrGFBw64kztatIlFz;p5l^{iEWvP zPiCLFZN{(Are}sv2EWo!)TG{D6QGH-mKJW@-g2yKRxEXZy#N;&j21=jQc%EgtdKbB zhClyA|GH5Gg2jMij->ky-Wr_mv*QzjS3MpicnAeS)L`Kq4*%W8jztu>X3+2Wx!qj9 zDb2;|bdoJ&ob@5nV}YU3-Q(%DF}`~y95kXshP=NTtpF8I^lP7tV*gF6}X$D<}S4?x!2oqQY3=YRlX2>u*=SNaJgWhon!y@C5)QwV?ng z)0i>uzN`HCxv+4$7*-7?)>VGkt7mBQXQxFNEUcOTPCNZk zZ|h_WOyi!-HZwfG9>hhyBLZ={1Q{-{zGjS!xLo){UGW;bQPC5x9f{v3TP(0RyDZ^k zmF8q|(!&I|Vz8NVZvFIl$8d8^>gHgc>xr4hMpp*xjyP8I=+n>5S!?ej8m2IDk_*k2 zX`5+(W}h8^_BId{&*IF8kA-3jTRPQW$Ggqe^)&2WiV{=;5j26=8yrT6HXkh-9{u^v zCfm|X+mF7&I4Vm~1>*Zx);Wzz9h%4Uo-+49dq=K&(jSNe(84c!YH&wfAnkT{EfWgc zg+#y5bR0@Q_9p4!EensK<1*1TO7!^B){l(6)$nou7p89KDR5?=AP0KH0=@bHy&8ru zZ=*yvI7U|ed)-aWiYB%SM2PhVT~f){s>b|TE+b{qOSvSUjz4E{4st_dU4Zj zo$UK&L!Bv`iHB zkej`{w#8inDiy%htMuBbf)SB1v23E%0se*M__#FJyv@U;^I(=X=K~~uD{5q?brFOF z3Z#HAq72w(!^6V;RSROzCo0?$ilzpiM{5XRTM7}h`cN4ChF&-^DUnnwDpB?dO;vKL zdv${8j|dt?e%iq2EJjPWFW*GlUH*O0JvAxGKBAZ@ki$k>P&BeCc(i={^>?;}2!*TI z-|H)T$LP&FL${?#RczhfuuEMNR!pz1aY~1Io z@M%#3)q?2aWeX3XkNn?z4=X%YYPJr9;^V;VD?_6J&?x;^c*6c(<$|{-jZ&5IKIpu4 z#_y2r!mXW~J++Ktg%|l9L-+nK!rlX%%kPaJf77xmGD2l`dA;uY9R5S&*G&3j z6W@KC%3=|Fw7)~m#kRQ5ck|qDB_F774#Wvye{2HFuB-yTdI5bT4mp#Q^Zxu~XOc_D z8oZ(xo2k)GKYb-_l{ z{rc+$DlRMCktB$@hEOlfp zd4v(Nq7OD1Q5X@5C;F|&SNE?ib$+>OmiZl^t#{>5p zyBzEE#cS`h^S2itn0(og_iLhv)DrpuN^hq5WolXJKhpDhw)+#ESTRDn4^V?u?J5H3Y_6>G-{w6lq2d`myXT1bbMP8f);$f^7hAB9> ztygZf)2m%~BgZ0C>`|0+kINnjd8?KZLQt+|xcYYTXW^EX;bM5sI7ZgdC%AyIq5g(> z>p^9Dcu8!V55x-~1QT9(1&7kFeg!nFE@R@-#w2IfK6-PGZSr-(rw41I@2Op@iGB7y zNo;!K$2(5{m!XIEZU`K*F3Lsj1hl?Js-2RZ%uG5f2s6*wSXOS4V{fYZEZ|0BW<+G- zA~nsfOwwZ?>sMzdC&<-8CVN`6{7>A2{;N6ra5Z?dB$zA=g5FPNcFxwdKNUCe8_VGr z7VHvW-b>5b@>vkA?Sl`Hlir#ND3Q%aTjst8Pb?xXJY=d(hcrw3Cy@qYB4e z?(|3RDKK++{UJR$PUM`Y9tl@%h$g$WSJ4)2*pMievA7h5AJS&qfJQJbip;|r&^OMN zWZLROoC*HBS1hBqk=Ki-H;Kx~}nBMMh$xBIB4hjlhTei>XGm}1$^2ynxL^-c5hZmUBzX+nfq#m(y8t_YP zMsFdu@+dz|l))nS>JJi_J2osSH+T``PQ4 zwqnTaTU|P5pXXGr0rb|?pjx$QFRcV*$a>ueHZASBgyOZVP;Ky2Kg)qTwe}r5x)D5X z3(h2<1cLIg8#UyPlDulae;l<-DJaiH}d#{N;LeJmZ97{NvO(vX05W8{ws@5MD3a!_eM0w z^FE6A*7rEsEO4cQU`ud%Bh5$ zy>U-)dN8ufI?cwXn0Y5imav?}5$TrjqXSlCY%~j7QM6^ zyqFC>U5IJQ=Q^9^8W0SCdRujQ$sj)yuuYaH{HJ8+&$eTe%BnChqj!eZ*PYH48~;#e zcWq<4+0O&3L*BUD*Nga@ltW6t4o^_sA*b9-*jG2Ez1K`SVMY$wa;VPB)O2VUlF+ME zrn&WrRwqQ|7Dt&|+4zqFBj&Z{X1nVw+AFx;b!Ji{bv%VSdJM8X-6b<{qW1wvk(I`h zX&kQ#Pu^Tnboj`uf161o!|U~|YETOEI7maiEsrzVd_hZWgDjMNwl22V?KS`oqw;Ng z=e>l4C*LGoCo;ney_=y9I54V{j+*Yu33?AUnpr{Ny|B$S?LmL8tl8NoI5y=0Z?hw) zjVjL>e^V_F*^1 zPIht7F4Hz`DmCJbv=4pwZRgh`JVy`f-XmqL727_~QL*F37+;}8zY4e>FX?iU;CGOVFNj)Zu|P}Q zu8yD^{B|x8P9RDsgC!%?GvTyn^Nr%duF**jE=nQes~7l}ekrtX!_X)q?PDi=)=k-Te zR8k$4TqS9lMw^s;s(r}FHZ$;L&b+*Sh|Ji}mPfdyzP^AQC1R4RG>40qbRJRieYQN$ zb}qt@fPZpK)L%kkLv&oWC@5-`jDY?i@9JcaU9Rjf*k?J#BkAabRYx+u3J@nfz!I@n z#7WWuGH-&?wT7wz=n0AQ%0Xbdk@9~5VGOCYO6--`@L%%2(21wGx5x_AFKzc^;?*@c z9>1T%* zK+BY55G^|~nVPUl=(h@cTqP0{KD!MzFM(-p?dFw-GYu~eo(ua+@TtD;w^VHNs4G6L zup4}d<-rjKhXh%RWEqRaej<+|XnBkSD3uGznFJg1{Kh(3z^9asUl~(s_Ny=U34yoP z1Tk~yEOd;*M*_)^X~ors!+GU|{|OSA_2~o@tK84e>$Xh!P7l&`=-P+fSH6;7n|yaq z>z4FE%h`x(Fyt!jtG$ogNFU5If_P);ywe^S7}i5R80-eM*rScD(`RQ}1?F2cx&*rp zqghI%r@prGiPnV>H%3l#r6wOvo|YYhMQWam(l{V_F6??U%i?VYFg8=(ArfxF)1OPQ z%nA7Qn^$JuN^HH`*uPw=-6eiE$fYKUm^+f++im24FUjAHzb?K^i@jUmprz}$P5oRV zamo5m%It!GLK;k*G^Wr8y`~=&uDV`iNM+C(gVNB;SMSi4xTaFh@s{+OQfriLYPNij zc->ReTllg0UrD!XE=O$?d^&f~!lMdKa4XkOt!S>GoazMYvRY)K$)H)iWuIPuE9PPP zgt}(U#)C1PzX>5EMvmC9s7s7bDpaZA#CrYiCK<_=s+MZp^3KwJCb(2Q-$*<7fx1P`%Wht+EfBWgRh2EkY=si(~Nr{Q5Z1ZVhLAk013r_ zO+C+t&jEZ6@$V1;CJRK31{Upm#QQ+)lOD2U@PtR&o6ezU!fedj*3i>=(YY6(Z$4R? zuxw<`+OnIe-o@!B_i-c(NF`dY(ByC^C$;(I#!xr2EQj&0O21**8|ozbf)Haxujf_9onqa|!2Ah%fRls*G9ic9$3-|Q~+ z*haL9E&19?k8glY{0K2_+y*6C&fxLT(qB^FMcN>ADzpViC29 zys?lMPm80q+`b-tTtj!Jq;uL}M#{dxW>zKSwI{Q|DK_u%Iec^H1v3Ae@pa8hwe|S7 zhQh7R0<7Sq#wTMC@l<972zB~~Ocr*$w6#~ljQc+OBC_nW@5U908@v$^f}m*RK)ai{ zR~9J7E|(pJCdHe82!ye0fs6D=b3U@oXJ|t5H;AfvGTHbLS>RRH#TEy@p8@ zS!S3n`@k2jf8C-ize5foAod83FX_IrOm@fkVqaU-$H=EmeZYnn%PL28@9&G}p*=%3 z(^*d6Igv@dxap#|ttX}Usj>I`*z;ambtKT?vmlh zff_}tJ$-#GJ?4Y#vV^9FtO-Q*r3z(#As#1ho~j{Ne@OCbsS3c~VzCj9#34n};ooMp zGQTa4v#v0g?`_#QG90N6ym)F?Z1K%*XrbMF(A_Crsc>lBVX_V_`o&hGM}wVqLU#+- z_;8$FVC+mQwmWRE2CGmqa-E#U-D%lI~qJLb1s$GX8hc(LD-$14|omr zwx?X?5@5KmS&=pn`ISAttg=|(hh2DIyv&&RT>uYZgiUI>E#O}e2@!KKOT_WKQLU=N*LLNV#H#Jxxh(wHGe5_AEhY3Ke1Q371JOtB2wfj*amWp?$O~Ngb z*-WtR{s_BXHGZ`u!eAi1_}K|V75Lpti{LenXca37&0os=O}%T&=Mpz18-eJqe%m?m zeG`ENBFkd4RbJoEXouf6yFo_ygwlaEJmcgGmi69@lU850QMiL zt|EAOl2;|ymWI{zsWgAnC23=sGamZ1nDN=%G@7FCoSfVdfTL}sQ#E(MCs80-o3^=; zz26B`KIPr3bJWQs`v*IxvvvAJY42wqStM57gAr}wUOp|5ImWqs0FZ;07zOoJqO%bu zfxT6=nYW=UrF-Xn&RWWnJrhpC^gvGL<^{mlcHbEc~v}5C>A`)3J`KIn- zq0KXIM`kY~PWMbhf8U{fE1UZQxG#e#g(!aIS*}q2I8bTbH~B3##g4)TCx zwm+PI^DMAzUW@d`*S0bl|HntI+7bZ;_45z~@^Sg(kCW54NY2sH=;6jT%OSY7nb0qx zz@F1~8thEww^T9<$?q}@G9W%qSf!~9gM?cNSnb4jVjNF;G^9*JJwa$N`el_VI=KU> z0F^_~%@Fg@yKo2k+4qTya@1Xm)jbj5OlnpgQGs-+)uS{lh3&xKL7?K}#)%g+_&OoM z72&5+SLN#ZXPvhsZfi(aI~?7FB^kYoUEX|ulEr|Wz-B3{0_M-OLshg^;I^#L*qxzB}RZ(nUm-GK8+1;oIcUPrS_NwLt-Ni zRw^^2UcTz^8l0~^*D-SvXXJhp+D8DY4SQSV6G5ZQE`Q! zurC>76QUqCU0~_-2WTM|IjLx)s?PH_u>h{);x+NdnJShaTkb`8)D`xZtjb9Ub-3uS zE3PlpeOJ#Vh6Dj?mtXV{jNw@J8iWQ>N26vstgQe$)-U_kR}>bLobpHc|CR^`>RV$L~a7=Ha`Uo|WA`88JV$ z*N%budJS=DOSK`hay~Qao%y#{I?ARVv+z|fsj664D%RO2J-zrwJoEa9g8wb67s36; zM+Pg8C8`xN#|3&?K7BIl97{KSF!J#`Y_oxe8}i98%?@@_uisvKOs-!!27O2ghHwsP z|1v8Y8es8;DoL=D(Bfa7#9+Qq{ATps=~ddPI7en3r{PUDp_&&~riKrF>>|tEVil!C zs+yV_mwT{2OcEVae2f7Bb$hQy#(w@P$W1;L4=KQojp>qT2)X zvj6zpQ9ONZ;asj@U+P47!R(vystvN&83F8!_mC7UV= zAY2v%z#~OWhDbMP)9k~)E2T?PZ?%>_tAq4Pq;Qx>zWnoLv9^B^21RiY00D4{uSf19aH z6GQ|YSVc!i9}W3#?T;h>7@yK^0xkZuauncu#5p0n>yOjx`7_Wh>QjR$Hh+NReCAPB zQX)i`BtmqFB;u70IgL2Bb?kbF(3jQJ2(SLVLcOy3>+rCPt{rVoRx5sS-Q~sQ$9$ly zNMR}iNxV&Q`27=#zLye#_>kguk(%!Jzy9BYaEK|yySuuCuU3L%gOvJhHX^i_uO@3H zhVgp{?5LjOec}!o*`iX7xrQsyfVg-a|CrnGn`qjxH50h!eA7FwfoM(ZBSdW>iqC!nOsaO|QBqK+Mj ze%!FYASK2O#%X1(miH_p_(y+(q72+JsJ(SS4!<9&-&lhjRjBy%8qgVPMzbt;z&~6GQ^6;KPiFMGGzlL-F>-qukemfrl^iByS!FI?_&nnM4*#!7&mT6D0?P|EQeF zX1Cl97VU5;vMAUcoY6w1KoFPM>pr6bL8r@Qj0CGJ?u8B9Z6kiZiLmfB+}rkmE_Iee zBQkYgPd-*%UFcNPx9?GVoCK%q(Xo1~p2IA+hnm^@ye-1$V~1$&vmW@t(@VZiZ}5=A z4_3E?tXaF%N@R1#qH-(DHK@!A;9#-pJp8^ks&9j7t`z#{jn9*?s{L8q0K$D**we!< z0lq4@8w^Lo-E>xhGy!=%bnLs-)RGcdhgX<0hPd^XS`X?F z1H=NMM}(p61&221pR2zeirb6(Y)HIio&JNO(8Ezbml~0UB?c#G9kE?_U@@>~wFya$ zTDio)2LT`E@n%I0e5hKb;NRh+*F{C`v@EZdS^yMk3(>?Yn}M}tsCpt$$ugD~!G!|! zO6ztEw#dtC(^FM-D?pxu8*{tyE`+m6K#VEGX+i+}%mFQ$u$m8|2drwVAYLCOpU$mu zz>nUES)7>IH}}}7u^n)za^GALfDi$V$4H1kPeHmXt$vz9$;kKs7Rn17rb!utLYRk4 zc!oR@ez7yvulcM+2nh=T3R^;Af3t|hk-V$C>|H|HqTN**55>ox%c@{A#46HUDLu)4 zB>}TH(chM7mD6dfn^>{1ni+gIcVGgxtx+r2qyXD;*@IvJ`Ytt3PK{G=u-Z(P$wvm~ zUOoOF`Uh;gnnTH~fUv;`%fZzPAw zQlF2|+0L4%ud%rvH+G-oVi?r{F9^(jbKTKpehqgB6^YBmM3ulzb^&N`8`!h+y1f1b z3jn)M8!30SyLYuW3wumEZuIO`i1A0&+X0uM1Aq24Dt#V^-j@)@P=`CtG?EWb9&TSPbWt+xe`Jz{gW@<_EH2G6{q&Mtc;%72 zAg3$BSgtovYHV>TzUjv9Ixj`JHnwIC63_>!Rcze@%f+GFXZAsVeI|!i@iNIWgle2S za-o!Jk#e@$;a8)@guD_kCsmlk7@cnmbkk2@pb;tWd=K^;OJ5ZhNp?fHXu(3EMSr^< zWN}up{GzjogcrbP8{{jN<<`x6LGS**&JB0)j)rk@{c;ByCP#9r8t?`r*_`w?a?J#_+?7 zQW3m~E`OQ+_O(Y4uXb+K6(*(F>Kbpy``^60sy;ENEQqQ5AhHm=tuxCtC*kBh{0rMA z^Ag`m4u_6}I?t|{$U>Y5A|#I}{XR4eG83ZpcUQa~m5m@_1QRf6t@FvlK}kF*W)9iv z(l{aW@CDo+Hcpp#^74(dPIhh)Wwf>?Kp2N;#BJ(lH$n;;N`)$t?#l1~2K9jE+2ZfXn z_43MG_*kvConq|EjF;VGz#IBy@a1AAoVRKsg)@2@vsFEpr{7U5s1H=-K!2r#!SN35 z#O!@-P$Mz7#N2b8UC5Y0EX;j!9t;SsfYDCgOwXKGg<~1>E2^u7F8{KprhB4N*jLcb z`bVg*EPA~`ld9Vmql)vOLoasu)p9F1jm~MXz+88Ze^#ctjN!Eq7aw26R(bXW^%TT? zDqdH>$k2_e#v45mYb*XX!Y3l(3eygZZ~c;7ytwO3&aIrU5q%Kxq(yH!)TxHyMZ=Gc zJ=&Oin3xV4b31o{r9kr34FQq%N=h46bR>*sy^(R0VOLC9T|oY(-f!_Cb*|Z zyDu%>^o%hK9q`FbJUL>NqQ-}#4l$tI@=C*ayQzDnFll8Y43~y1%Sse)`BbpLIh4>z z-4o$aV0KtkhBL;ImN1GX^Phz;aojy&JCLC8Ci2PtjwXD?z#MKI2<ph&Co4=>vA`m4)!yT|m5!uRF1^{{p;+F5@fLy!8{%`leSeu8Kh>UUuNZ~Ieo2=uV~TUM&Re3 z6U2hfbAx%VUm}aKbtg-RSJQ6J{*)8|8Rg`6m7_&OH(ffc4rR>N)sd)S>ibKztdTQBw z1VcVE34aZY77()5`LXx;3H9i;KtjzurPM=qQNl0zV6kAJVfPkFcp~UE|CvzSB;gk= z!oaiGK5&HTSHE4sI~B*ND1Q5B1-W6DBb?3Be%zFAOAmg zNX=>L>!HB3gVCTDqTKW`)4J|R#w(wkRAp!ZmrPk-uK~{?H6D3j$Xe45k~L$?1Q*Ua zXMb$|9D;&dH4;KN;C2m1Fm0P80Tm%x|oaN|PN z^XTg7dECQtPV~fQs(IS;6tJNC+MKk-isImI?g$~8ML&~0-pDKs{r;?Rat+Vh)FI|v zwDkpL6f#})PB3n(E|V4L>HqL-JAMc}$D{y1G@P<}jZE;#ao{w__*EgVsk22)z<4lN z01krNW?~uOO2TtG*8^h-$fh-sbP5#d8w9>O90EB)c1N+|D;Buy0vQp~3{#t->VCEY zdX)mbLet0xJVjv7Nb8cq-MV}#%##Ga)?H$x1|=pX5ho8p2{Y~h(K1>ZN^9D;=dgxQ z;!HJ3XG{3w#r=Gne(+ZxH$sIF*%{?4tnFb8adKQQYT&-t%-rh;z8J49NH<_yOlB~F z&@(c6)zvxP{s0G6|2QV&@%=jxKL5!AJU-ncAMfUg#0hflboknRK*W!xpUObuCu-aZ z0VIw#F9LLAeKt-RK&L#y)Rp7uBsPr<(;199%Pa( z;L1Xvd=sa@iQ8m1dZP&|)zokM4WsQ|K^~pg$aM}4d-CFFi(9CaN{IBo=5Kim=J;J8 z{~UHZM$0d*<-!-aRV}B5%TOa&}C}ZZE!B2VzLaW(-QBBwTQ{6 zhga3*1Eo>JhTND^E(zU#d0;!xe80i0)yR0qW3Xc;2URa|77W}=x3gJMPtjpWqO4gP zQCsY{WzdqZy2xktPK%cUW{418fD0O{|FC|Hh2k&+zneBaEKm5uFfcCDjE?-GNAYcC z@SzkJXz}~K=dI26->gNt+VGx@2Y-(cuBk3qv_Hb_aEyMVo;%ah3TXWxb88|uHc5=iso{52h|uot?zQpAe-7($p__2==&cvoWLrxWnm0%44+36ocpIpBOn zQdmzPAUg=~H~?b>$2~jdHo6!<{o>_Xp#b0HA8atUn8tuzaPZU`l-|0%Cpi>OzJkhs~yo!Z}(ywFOBNZn&Gpm2ZtA>%F zzNv$S)3fMTvBYwQJePU=3o$p%Z20gcZdcMm7&FhJ$E$4h!lB94=Pl0ZE@Q^%mx#Z! zA)Se#Z{q$K9)K@0j%TfK-!Is8fTU4mF<~i*#6zhtD=d4o=;DurH@s9vR#@Hng{to%?3BhR*#a4JNZ|8Erf~s1cbt*sOQK z(8?k11UYD15rWMsgjbav=r4{E?f}D}!uR2xmYyH*VWglRunn<+juarXgDfIv%g8b= z5deRnvnGIa$d1c(;B6Q2_<3XHu%&@pL+u%KSQ|#P0FueR?ZLuv1?Kz#5rI{4tsauj z>BPUKDc=dxbbotp<>&R;jY&h6x=P`lK^PW}Yp^x%QLVHSL)f(;iY8zzzYqBVGG8^! zWP$AYi|`oD-c$zgG8G?~dJKh+gSSvouW& z1gj4K0SjM6<{80wHGeEx!Esk4uT?j4g=T_s-i{GfuX>3mZWw?Q0PK`FL6=8hQzJ;Q z3c13lx8Ff*?( zMA?zF&1-*BOYLTIzn^X3?fw!3go`?ydK~g+cvvAA+WjOJFJCtZ%CC6*cCqz2(JhNkmM>PdjjI8 zx@p1)o9?y|Z+*xI0rLUSe|hZgkDlAvO=;1WcdN(LwP zzP8p;mBCW&CG#OODaL^6@*6z|6q~#EKhKztE02|Oy^c4+HV5^@iJHkd`U2@VvX@M4 zXLFzxK)B!|*2_UO6>TZCSZ$wQv4Es`MmX{y8-FkkFb=L0B$8iMAigFHOGO@k0cw1vph4@bJ2r3YTH=H7dtRX44qstt7Ko(k`PL)FY}21 z*m8wX09sOsxFbQnL%OyD7l+V7dsog0u7CD*%KZVG1(G-7t$x=&^#FtkTO=YJu@^|r zw7;(-0RggEt;B5WtfH;^V2DF89@_~j_v8xHqDZyF)}_%>>inKd11&@R976O=Rk8Tz zn|2w)&xt&p4^?ES_4DO}3M|ZQwzwld%*$`AeQayszB#IJ{KhmtJI5^w4AEzqzYYpMTD?t_db@SWJIB3_)x-NIRzvK?8^?DAU7B7bwQgp~E>AE4eK@8qB4Ij`4!+zPkd3#h=U zMGA-diRU3^am&$s;D8{-faG)j1)G|MKdnbOs+3Vph3EVMK~bX$=x2X1|A1+x;XAn`-g>!fYnAXBH=Nbza-n(8|hcL@U0ZRwm5zPXETZk5JbNmgu?N z`(boPi5c^rk$jgInrpo2WDX zxi0wYDT+6xP5)uaYS1f(0kAm7W!WTxYU(ET%L!svEX;3xhm{-e;0`SJEk#5~{DpX&ko)^jUqa{YeL_Wrl1 zQMq!E66Q?m#@fzx&oNbDR6Cz6e(@uimHL}hCQZw5oXkz5WfPFL=@(n*^npib1svA; zR3C(Xw8I~3wM>s!b*FJ<>s8$je+;{b=v--8+1BFQ+3i^5r}DE~7hEc$Lt1nq^h87r zL~aoN0m(CT!syHf9+b=e6T-|$ypVGQRhmQ6@6L0y^l{hR?R@eOpHrbfTrdy}22x^J zhyUe-s1YZL=k4(c7^PZX?h)f5Uyb2{gWg~`DG1P>diFNPh2qit zD28h?Veb~MZ$Q({8RHvQj65sDckUipZs~88U>|u%{Ghx-IaaQE8Q!Pz7`JkYT2jH2 z4Y{^k6`J^F;Go3Q&TZQz^~7l}@^TscwQmehQC{(1;PSQ8SCY|2G@@c<`RgS=HqX;c z*h^HCgdjpc4QFC%_xvI%V2~Ma8mOOMhI7qk8|NPtP8i`eoc7YLkiqXeL(QSCQH+%w zXvitr>F{fI8lH(~#gMsr7b@_w{ASHJBI?BZy~F-_LJ!Tv9-N;wq(#u0^67&R%$3+@1rsQnfqMifv+n_skIGht?cza;(e)mU-(i_3B05MgXhY(AB9TcJxTr4uY={R zz7a;^3IYwmQr%DVWserR_WAQpAFl%{xuxw0)6b; zBBBB!Zo-2fpx3BKA7(*~lh4yJ*wiV5ZZ!K=Ak&kH{Ii z751wa?QLOjL@i;+Oh2^pJ-CF2l8=!Er=++9*Q4{CZIAuxDouCyIPv?dWM$A0fN>yI zX7aZZOsY@dR{bL>1if+VuM5VL>TYzk!Kd| z8h}?$K_cwVNOM>T6Xl{>C`#V{^~-cMqcn;iV#3B+FFfJCc|bN^=_`5!!Jlha;b2nt zMWMr;MbXJ0`{{b zHhIE*v3kyIQA=&=)1)O_Qnx_v{qFmD6*vulQHwWwMUJ7odY{PZ&Uo`3(IZu@pQ|~` zCyyxW_K8eqS6(IkVvGMK7X;t&Vtxm$b$2*?knA6PakZnZXZ6>{(wAW0iooTuwvfi9 zm68=~l=%|FP3eQ?r%#I;`fNwy>{!pAF7^dYr^;9FKwSp1MppuDf?v%S9NnYT9>7~ZJyzB|8C@oNQO#_5ZtSF3;y~#YRt`z{ zq8^Dc@u7_}>F3;ZIH?q=&%9^X-C>3vs!_V7W&iEGvs4q};La8vu`PJ3%H3+NzCs|H z<$E$E2oW1>M`&{K+em$WG|Cd^_oTqEp$2?5LXhr4PW0rcfo2p`$62|e~{#%D0=AGKpX5&gw*78-qH zXK5(Alclpif39=K!|1rGKq7k!P(wa7sU|#Ihr@z7(%6e%UEyGAr6F1Al*3)j5D>jb z;%k%<++ZJNjI3-t>YwwoH(#s0%SbkdFZb?#YTxVY_)&xMYR4AG%O!~ckwG*{1jLxonD5dxDr>XBSa0dxJ3sJh` zj#0LizaA@a&oJZ}TLW@OCc_poV#P2GM!iPZ94#sr*49)K_J3K7MLU#io%Z4np_9?x zBwZWj#&YI(ZH-bfKn{|oaVQXRaNc1aOX{|?2-iciHC$$YCjg)K>f1a;)zdxRI!v90r>^5oPqaOSe4}5)j~>~%J(>;qm$QIL=~3djTDKmq1UT4DB+Gq>tBa? zl{eL%c-Me9s8-K;;{oyWEIx>9aNVYE^U5KW$(8U#2rH#BOE;g z9{2Ba0Vd1bH$?>!gIHQwwJEiovHQW*i74p4DSc^M>AZWmD&gKD&a>hi#2qo{EKVD@ zuvku*`c@HLqOmIfksD>4Yb&Vk-u{}R;1u;Ksy*UrMov8B(S5r|-TQU(!X zt3EO=S`YpB#~b$;D-`h8j4&r#AFNZbAf`2KtLRR2^Z=fT?3Ais92~xgfbvThVIjydp;Jjg zFXwvWxfY-z=6P?nKF1C?k(FFamF9O~z`qjghv-#6PpOgq9#MmHFe*zqaij=yU;$Tr zCV{@5*f}y4+WK3(LIq1|b6>c1x?SlE$-Vlp#4#mzpM~%*-L6Qbs`@lV0Tne*4wF}* z4>i%!84}%klH(rdQS!3+BSj1{sn4)-Y#%FYKM6uSuX{Nq9Y&Y$f+zWJw^mfCfN$f8If+`BO3LUq6LlpRr}K;5=3rFAKd0} zlXx!14+I#=PN|7gT%m05L;AH^{+|7-G@ab}@DdGbEpo5;^8VT8K+)`eZenlmoljd; z1s81ICj1a51Y3$CO}6l>?aDBkH^iyr#i*Bt-GWM}0-6?QxJXo{yxo*H;V4qw_)$TN*P7spL2ql$$Ny1*Q`EMD6Gl|&_1l@ zB}(bl$ZbNS&U&}G_!yFtBQts*Qd=62$9ZvB+jZx#*n9pmTzwX`vbFTRy1;}R)tOuA zDK1EOfyK_w?F%1&q+tyEth=+jog0%O;@&jpvVR}alN7xY57+HhYKU7#+g(ZdQYvPo zNP-?^ZLL&2g-Csp(a`1OR-Ajj<}G?UlyTK#{Nfe-hXy8_r}|IQ6tZgmh&k?FjIr)Z z(8wDC8T#F>sSgNI>A9|g>M1UqcrROHzfk^us8I)3oFu{XMw#6s%#>^3i9TK9#G_;@ zZ2b7~zWwu@&@+(vK^bDgg#C!HznY>nF0uK& z-JFLuI~1-Dfa{=~^^dnqUf3WuxBe;(M4+=7c{rT2?q7PwxKK=LOV2~ZHen3)HxI`| zHT7cm+71?;4^*?X3Lg)BOu0tQc_D3A)h=oCG%Bsil^RR~UOJgDbzz^I?;f1GJ-}zE ztLv-@|C`eLkf@aJdJzB(Uf6=jDCPU;Xlf=bzRx?=GW)}Hg%}k9`*6~>-qr7VRyIvuBjPkWKAF6rujQ%`O+i;IdCF43JR z_4<&sCBhM$uZY9RdPfz`2t|oG7vt2`(Tz_VvK9^%FN zyh}kgtNR6612Q)=(DLToU4^7Kfd?L1^TT=tx=A~UmGsI2F-{L zu5I57qkVwN8r2o?bNBslxjlz^4>J_5wd!(<`aZm%efO;z3h8vtI*TB~bcr*TghLh2 zv|Dgc07eQ)1~nF3GxND8*LQC5jBrQz-t=C_i+lxSkfKFt;4LbTJ27zbP-aaqUwl`r zho)sV6zj4Z4Yq$*zs-2eDJS6XW9g58yqLuN;^9@D<0icNiuv zYJq-H^Rwfl_BW+cTz3g}9xmM#((cd4_L}E7n=T7>BoN-`jqlptT^({ZC6s>wS34CY zd6v5xVYC)^HzBT<<&3i)27uHJ+cxe3!J%+f;p+6Rh9q%fB@c#@T>no$R&X7njfXc& z_a4IkZBB~H>vn`we+}&k4m!WpafJZt;K<3d?Oo3c@8yx(ikj$<(pOOHv)jsw0fXeM z@A2jy_~Z7`RB7jVzm>e1Y?yT>4!tgU^^VU;g*!A8*(FFqY06O z&3&9_8E~FizNG@{b0il4{B0~Wwp?d5G$@++^xaiDa$0}S#MPJVb!6Pxo;jGvxM#G$ zEI?*LnIt;CR_BOiu+Hi7vDlt?!4lCk$@-A8$`SVd`8d+3g-7JWMYjQ#QVYf)RZwT% z=4QvQc>9(V)Tt?YakGur?QQi1&+WB%xP45MJA1qbGvoq0e8d397%K1B8(uq@-k!;| zn&=&VqkWRL%9U~4ZL=$-CVZtB+0i^^c@{!Yq`L`+A)hsOzJOl8I9T_JeW)(y(X%51 z=jnBt2lY=kTw&LG$Cwy#lk7l)+=vN0nFa1I!2ZzfV0#g6h`aRe4##!$H3faqLl>os z+2KF-TSh0Jpkml%!qN!cc0_|jo2T={1MS+n%OHre zi+w~I0z5TI+bO6RSF^Yv`qNH}7t**D#_41@gRY~vSa2mg%E0%#NH8wds5rJBwZd*n zxF-c68E@=+Uk$!g*U|H+Tqpgz&4(Sk?7*~)gJL7=Krl3d!?k!>0lC`3fp$VtLTKJy z5tcD%_Nsw1eXY|Mzw;OK5#-{fzdGDPAgEi{`>^pKeY$+;Q8WB?_d@Zw(H@cetTv_? zT5Wvpa*Q*G)2bNzP?7gT&)M6z)$_kR;(Y+N1-2ZLi?>*5Xomm8FUBpn$$NDkHN#>6 z?txp<=kb)v;!AF-(}=6fbq#l634oZI)?X}x>q%RMUPcm)ZvciMSo+m6muB{>M-g^T zqZZupT@UF3AM&n9(#oWMt>&9cXwhT*D)M457bTx>hAqBKR4VgZD*=*%EA52+eWRVf z`e+piG)|pNH%{XNg0t z8O>g-?$%EJVdQyQajoTtf<$$Y?*SVJVt+cblO~Olj6L7WQ8eFtuTjLfja8o)69rJj zY5RN05)wuOp@Lr}JTG9hJ!i9?uy;5_mgIU`qsdyp-CTV&(BDPiRlXvBT}m+;w8 zDmo=x^g^qz6Wr%g>Q2A-v+eEs{8Q(ONf^-eZ`$D0OcY1hBg)sVmQyZ0B2AA48U>2H7>3ESwRJ6zY6 z>sl?}J2rh&BH;$kX)kB{A*!@Mj}#&e?X0+Rq?wUb7j)coSCvpHQRM${$e62+XT{AS zRN}bk*bY%meZcA=d$}1_fV=}`G}Hl!9)nV^sURmfC>rUWRjUSHtZi~_f zzHqT&|86a@@7!v8=qRzT-RG2bCYe$voUSdokNd!;wckyz^I|gPXpAd1|EOH+gKO

      Wi*ONIP0pF?xC~_8m$za={3qS6o10aop{`G_9)%ko&SGIOJz+Qe(u}{}GTV|s zcAa`)_7t!ePXb-B%ck_i*=>9IC)XcbyK)TZt1W4&ZNM%Iir7IQE7p#Qd?8n-i_FmP z-=Qqcbp8&2moM|9mff_p#oy9Em2nqP8EKt)^-ooJGXLE*r|$p2C9!z82wc%bEoJpc zmdRD*lu6)PJY*icu66KnJ2<-Cc*JbR_Bq@YJD3*X9oFk;Cx*hBZwUkW5civ$iN2LI zvwpu8e$fmP_^u~TVSBe=b?16C!bY5y>*prhvS0w)Ir}2aNQj-gk#6i{mOoGs(FCja zKOIvN=Z+}}!>thwpl-$^xi6cy)_B7ciN4#`t^G;M%gfrIx(JdfH-<+>_N>47r1bdj zOsqd2dBXlW+7;WmQ*wTK{e-2J@@hGp$pQZ3@4h#iC){jR)-Fa6lz@No;OYi>n4Ul0 zo5OP)8tnq));j<5(!y|2&L2X+%jJ?O{#qBQz6JhY`DR^T`gde%T=U-Z z2(-zQS{(B`bg`QC8#$Yt*D%En0KR_=%i%ArpLBbsD@IET>$GxV{1str2=P#^Kb!)3 z8{?vtVjy_$rNi@}hQT-J(%D)2>`kW5tCN#xpA>ZIFS$TZl9Q<0PuV2)8EpX^YI9-lUd< zTe8BpbH!e1*g&AvUj{_nrH{}l!F$v9OV=S5Q+)ub%EU*e$~`TBl}z;uCBe3xslbXd<~ zFZiWzYBww;ewZxwuDmmUv0GW3T!9tS1eIq)b7Nb!~+0Il6APF69kf zN2xsg+@=k_4xo{Zf7~YhEdV*It=wM$7`nZrJGnl!blA6+f<5YQ$*=%Upra!Nvie1~ z4ZdMNbBaI zhbfxQ+As!S@s)eCtF`tN1Oy#hNA^dWnoM}>(k3Z3;aK9!8lU$_y_h*NZtIt)!yz~hIo4v1$iV2V}sxZ@_uzQpMXV^Ll>lS{&`SXCGZR@-955e!Fp$z z0^U*_h};VnDWiB{ZiKGJc|eICUo6=&eITPlI;Fp-MSuw1e%kQv$gCx2LpPwQ>#*f` z?spE)CE&Akc?MXC7|yTHR)G320g|rvZ$gsn4f*Rc=zean722v@s=E1BNiHDbrcHzpW42|pX&Gh zAEgrBR(4q-D?|vHNsfrhI<_+2*;_Uxm61I{2xT4Q*jvM1m5{BhI7W6H<9og6{dxTU zf#2)>sCVae&b_Yty081XuIF{#x6toBAn4Zgix5w-u0=fyJb_xEfJ%aNu_QoN7aI2T zYubjik3bo*vDEsDUhRmiTrSZmfmXgLmA9oPU-1H<3BU0Rz!(4?bOVp)$kR>rsGB0N zn#tJyS;RBVIC84GjSQ(T*1Wg|`lgFVs|{s6ip0}+84L-tHT3ZFlKa%GRyG<9+xmPCzQBU8NO5MSgywWk8 z7D_ExU0`W;?`;*eNHnLDAg8>#1xIpEreUlMN-igfmnk&p1`P;Xg?xa1h zjrx!-g>#ktWSx6|;+&StzEUr_G^Z#`l4NoG(mI+b|%w;m_*D9}$n*SVH#q3qUk$ zg=9L$XiT4Bo;Z!~cMWdWWR$-VHIu)$=RSrftPslu^C8H%BH^oPz|O-#d?UVpTTSD8 zH6lfadn-Imyx6_rXj;>s&c{xHl($yCn*l5KinM>i7MH?t2K8}29o)5io<_O5$jojQ zI?8oyV_gtc{p&!Y%AtOMsud*Ih_%o&zDdSNnV5_Map_4q;zkMqIRR~LLp;XA0%-{Z ziUW`*F}7{5;W``m9$RQUke36vGcECq46wG=lmZH1fqhci}#v!Q?^MXhD@&xRTGbT>S% zN+`QfPj?p)W9~IZqv_UD3%f>RAAvGM0$PfBe1tj5w$m%BhH!`B(fp(@!lafz)Km!aoa~4o!D@C#p!I42g>R^7 zj5rXqrl!0ce);pR*CI-Qw@3 z0`Exeyz>@?y~S4qc53ij*ZB#Mf(=bXM~B8gzq=udWR<&w+YUIbV)oxiDsU4y7c#HA zq%J&j9!prnM4z!Yhms-yG{y2HI~v;j;IP5yb$gejmQesROq@7N?kw8v_E>g+Xr+MS z;%plo&v#r*Yh!qqBH#wo^U~K8DY1GAq-R5AfmF%3{13$}7j=G*=|zYnV6%2?wjwt# z7}bS*@407a`5M^(Zr>)-Osywrxly1;N?S@S@&Kl%QgXIxRQ|npZ-O3~OUANQ1E5=7 z2)gy<=4OfNkp;4(3hhbsN&Il!Z#I>T5tl*i)Fl<>CWB%Y3*(Kw1^?<=3;=N%F`J|J zN98JuQ72(D*xfPm`$O37Qs?%Bul2Pvx1C4}Py+5Z<%-b>`8ch0EicF%DSR`JRUrG$ zx4p;A$*y3<1er8+P{_%977~AUCbC@@Mmot5YJ(~!I`uJ^?n%#x+|sQ}-@{aTFZ1H` zX~-4dKG4^JWw2`4bG1+IKwNF;u#q7ztpK~q6#1LuRY}4Z3c31hnxqWJs<0q!m)J*I zv-sIFv*Iy)ss!!l1Kh89BFqLfMC&M1uBHdse5XWI26&OgbN&?iMBHC%N|UDK4hbVF zIw$xzxM{-CZQDQx=v|kL|76DX8IOYh{>l;l9nH{Tn5oMTc*$8nP_QZ*#-=re$E_sKoTM=g zepyOhQTgsOJ`w}5XaUJJ*Tusq9Gkun9f(*hx&dfE#^8gRibWQfCJBk+%cFve%#$5 zp(~AKHiA-5GgTAVVBsH%eXWO-5C$m+JQfnw1)e z2<#^}`VRE!)1)u4AJloe00y@PsQN@Ow(MJ(x}!qtJxPV2Js?}0T4#w39^GZxOC?3@ zyi|aM-4(5+kvCc99xe)U63rL&)+&Lme&9v-j6u^Nf@ip<{H?y8o`BSe`-K-r4ceuJ zN*pdyxV&}HX!g@-P|l3ns;~5quH>GbBO+^d9-rE_J2tET zo@vx8wkrM=sb>Mx_q|M$_j}jiBA%DIsw%Uv?$5Jl5yPf?6boQ8?z}i={F&dca9YiL zeCk{fSh|h>V&gFbFDf^s=(xB8;Et}GCCQMLY4zLG1kg= zuJLzJtg@6N(z>!HW#iu^CR;&TOqceh_sf0Xx}Th#c)uF5W^M%m0;s_O0WQR$!6Vri z);3gWcvHmlUF}cXRt(>|!a>F|r$Yv?NMqO3D_nW?s-~E^4sB8ii`(T>h8}kQmS882 zEq57uRbrF2-t7t47j#;-SSWQsux+eO(N-YfHh@hk_v;L-Vw-oy_+e?;7dV7#zw?yU zMP9O$vpm6#$T?#Q3a7bT79=&|YZA+7bP;U3@hSrD!oE!t1$M8C5ji^24T9E+@)!ZS zX=u+x0kKQ@FC+*ebtD^LJhT zBaHl!7cv{Y8xHX*ni&G#3;G?4kE|PCUcdo=reCdI+wA~DxON%O<{c5hf_7XSTD}30 zK;Ox=f%vlY)9aiV4L=-5&D| z0*pAusol>Dv^h@eHj6&^>4d!Ap^rd>($x80>R~shENqJE(}w}V%Lw}N@;8(gX5akv=N;w|Wui37;e) z&2Jaj%o$nzAVM@pMI6;8!zz+p_VF9Du$eak)*ns;Cn5*DhWV6rc&~)c){viyT(DRs z7S0feVwWiPqoZN0zQHqHnj)ily{j_rT#(P*4`N!1L|Qzg^&8p*ek+dUbbiOWPhDwE zu0&+F)A%&4g3tD^{FnL>07ho$M4g2C`!gL$(f54qIX((-^;4XZPgVM4Qfg)~Vp)97 zh7*;+M4hhhKWFsE?Sf>9u#=5=G6MBI2NZ8i=;b?*fLfdep)!l3j~5DWkapin;RAx6 zAf`;!5#>uHM^iQkMVak|wg4tND1*bY@lHN)|3)3wC!nJkY^@StZ9&=q+IPc-5R1kJ zV-!WaZU~)E6fv9ncVHhr5n;-z{XVUe)PTo+O29P{s*n^5(`l^zM4&-+_&qfB6W3xw zPt2AkeWRS^$R||UWlf1pQ4_Z9=J@-us8@$*>}|H9^58JMr+YBeOhDTXTsfPhZo_8Q zTG>D&AF$H;XxqOHH-CkVD!m_mr`;by$9+%dF$8`PTCjznxx0J=!*Ow3IF_^PWT#-06Tpfsd9> zzYf#W!<}@vg1d(C8;v75CJ`+3j-$S zS_1Y2NXTOCb@u)HTBsQ`WjOEmtuC8%CnwV?2>mBP|M5Z^Z(cZrV0g!= z1aN-}zro6XU|oAbI_LGS*4a)*YAHZ5^GjtpixE)a^Mr!Z?j>ev%_wk;?Jexz{!;@= z&)nKxCcZ}I`5vaMuj~-k#0>cC4!M|Xd2?5jFf zf2(BvMjFUj0#P=Dh<%I0NK0cflZbs5w*TV3Yj<5UH~u}0GVk>|V=1qL`eRg{w!y*S z9-3MM;p2E;|1%I%txsR@Z-rcQc1H)|e_a*(8=4N=9XLv-vYtr{?tL7L(8>z|4RN|2 zhe&4W)}7CC=gR-!5Tv`q$@e{61dtiyo-w-raB&5R6^JP{5Jn@DXohxlkTZzzlf#q@ zH4vRXa0ZVMM}~vU5CqP*J~~}|*@r_HFX zTxemowsGw`0@&eE(XmdsivSWdJ~Tw{E?T0^&yTm%#8esxE_t0(`eb8g5A>M1R?+m+ z4nP67Ukn48-j7ZS%XmfO;2@*H9Dk2aJlX*(-dmJ{Z`FQ~q?7dd&UG$xbP5TH@DprP z{}}k^Vgy2MXSr!A4WwGRt&2+35dD2dt;cH}O&6B3?zO%VXRe34op?t#8aGgaSN71b zi9W|nfi3SUDlz5xc1s(QPRyp;?n79oa1*vk*MqQfj3)AI%kia+xaUb^pePe??3c0l z(o$Ut4ETU5rz>N5!Wg@EYuUp89stRM$2Utj1%<`(PAkT*{z&3%yVdvREm*qZf!?q7 zJX0;+$hKkc43zk@UXGG(mG%CIO2RW@4g)Py0g*m_imA~D)_iSsT3PC- zbgJRDsI447Hj5aK78N+uEY(z@qgYwn3+Q<=`Rmy0!^yVP2-N!K9SgBN|4nPhWz)jT zeAV`7yA#T?N`J9chKM{!K8b_-A`mnO9#wH%T;zXIcmBi4cu;*nHa_-&o7JE} zBTr{t-r;_8_`$Tf^Cl&j!&J!m5AdYlftF&t79oBdRkx|G%>+GJ&-xUP`HyCrkMh7? z{AAsV=sSl+2xf+^60?KidiY76WS$Z_a1M9tKeRTWDn4uOQ+NGYI|3AzhDt8pwaTIQ z^2PjBEFGM-R(}3o-Mo`90=t5L@CpBTWA$p(kQ{&COZYlSxpVz2q}d+!Hzo< z5GGZ6U;T&%8mj-JVL|)j$d_3CEF`M&hzf}YZX}BFeJAw{?)-i6uKGAT5^Qj2Bp<#@ z*NVAHfqgMg9W)}eqW>RBg%u7)PF_ZTP=u3 zvd~p5ynwDkcrT5ffL9L4|<;5Qo{vSlsJ}ghb5TSp9T@=q{qVZp@WV0!7 zESaOGv#BN}b^2TIJ;J*8ZLHg2g^Thf1)<6zBMpYIYkq5jhi3%fIah2$Vv%xE@*)J{V=|H~q&Y^u5vsO$> zN&AlX2U?@kGx-bcE0i znaN_f46vz%qTv94sHY*>cK8v?62|Z>1Wm;r)NgH%KYd6=RRi^ULweu|c4#T`0;%lM zi=nDz0C`G>S>+DAFDxoSwwCV_*aZMlLu{Spfv+5>#P4omtm2}CurlP{Lz>6BAJ^J5 z!6r--betg*iKGztE^#?G3zAY6H5LIBZvt2?s_4tBPbTDd#F@e?8%ob^C3PYH3s*K< zIq(v%w}NhPFCv4rLYMsrHAD3QP#|kq`tv^AvL&bwu4UzVWe>Sb+Jc3sUOhd%FbMQT zBK<(iR`B0L(S`ZML!#l0df~<(62(pLc+R$+()C{AoP12z9L2{+5rBtOB&AdI0zL)l zb7K`89Akc|l@QmrQ3l z1oc*qpQw~H(^P18zj7tQf1EczbA93pfR|?jD zdV5C)w5KE1<(*RWK1y!2RB4drU6`AivRPpDf$LHltbtYGhzEuI5C(VwY3Y zVj(n4F7!2&9-*dPd2P> zukOz{zM|Owx6??E_ZcexqB-`n z3TA)bKm4-iSi}5h`|bT@p{1m)1t*x(?c%&D=Dy`VxmZ(i?EZsb z49~k0mVun+^>7U1%{QkSxx$BYSbZf!w(2}q>bDAGrzYR^nokXTIJ9U}?pK#r@877_ zr-N#9a?AQQ;^2?m*WDJXYze8jIG?lb&Z`^aS8ghZ+zAv@`%+rXmaj~WP2{5rAXR1F z7!MVnhapaiH@%T6VDlOG)^&aMM^c;O#W{H-T1e}z)Y`$zdj!XJ_WWv>>-ZOyv)5AS ze3uuOX?@#@Z1Qmp{O?Y*Yd+zevP8*k&FbV&KW3ab`+x>-WxUcnr|jEVmMuE@tK;_W z#*d;ava;RY+bE^o0pxrsf}8}`{sRXoJO#Xe(IC4k7IVGt<0ge{vkB|Iu9oSZ0IgoA z$F&U^L2tr@OKYfBfetUK+s%<(akcIA1j@U&H^O&vV`7R{{I-m~Y+(4s7obi}R;$zb zH@bJy{-OA}w`3a@Tb0wR*O^F!lY861m(%dTxGBj3&A5%_Pu}JVf6oY zN@UjZqaq?A_)T~dXM2Ard~ze7fmyt+EPaVm=3)$D|-@s=mZ37e3+k{ocy)7_qvu={e9y1E}BWp0G!qvPIL6DNGdA13jKmOQlE43b_E56 zMN>w)*op>S*Mb`b>-fFzd*3^T-^m#m82RgW=RtJ=p(<8efs&E;yLZY&EyKEtcxR3Y zJ87#b+Q;v%X$KLgOWe7rH{eNfz1i-2b^~G^O;9U$FRAllWVI<-)cxZji6Ffj0HioX%XH&*R|w8}i4ui<<3 zyHNEsYJu5OFW#a6JK#tghf?IW=zh{!7fhMXu3XKwMIk%GHOchaRBx}<6v>A&!0OR1(k_L!9=V5i7^_67u z+x^$h%{R*coT~&p8dCXkl6cI7;Wz_6?Q{)$Q+?p`Jbmg_^+8fiDx%AuTMYw1oP{$C z$#(C*Z>epuJlr>J871@SQ|6fOt2tFwo=CYyKQ*m&kp%620Rq2!;5^t7p>&j=ES~kk z1(W*@{zbKYozF6bZg>tPT-67nM;{Rk-SH`&d{U7P ztPZ1`2Q^<@kecOd=G9z#6@+p-TBLp;GqwBJpv}(0KnDBwlJ&*1tmkHA=(d8-7Mj&D zp<46taIa*6;RKE#&z9}AyN{1;iR{$xwbtOhD1{{`r&;JK&dO8RE%{MGAC^5Z-j9}$ z{J`f?LABL!&;H!ypw7v!ts;54P3s)4^E>tXJ5ZwJtCYKEelle`kHy5Dp&_C+iqzF@ z;+)FI(mOWOtg#0&F%M!`eYXv}RV}}-tT+K*{%8RqgESUc;;ArTGD++DG{i>!jyn;y zeT^+MDC3eb3zfQI}VJ3gc-FfHd6Nkrqk5zVu->Rz^u(xVZ$@ zWcoOLFT*UZ4K+4-qt}GQQKxTKK^cDiKrl;*vvGql*Ikstxi*EoCPd+|1C1|t|Kuk& zUm5+@82f8g8uHZ4OAw2DnxXOs%5c(0vT4>5PI4`1N2x$32dvWX zy_?Ygr@OUk==!0m-F}I3!0{Yz+qwDoGaG{O0xUu-@$(tFn^FWx@=X)lhKq>YC_y=G zY`=(TAd?fZJfW)f)HCw-G^cq6GqveyEwa=eZ^Vp8eiK*ug7%NSwuWL7DI(h85q&#< z*!1CtZ%q*vLZ)~4UIlex7vRtfUhi`RpY3&bbot=xZ}Fn|xb=2pg3H?NzI}@4WPRPP z5gC@;nzbCfndte7`4#q%TvLvgawfMHvc*x9=~vxU6nBJ=C)NfEt}(l~Oc!KvIOp-X z%_DOvd7^ttioIvR{<#f7jP6qn6BvJtgyVGc$Vm9A&K)Yr<)|)1Pl&xgp(a@9RDa7V z%ixS$NSBCGp zbnUccNnZX6KAwn)vaU$6k-TzEn+tx}$vsf~LRUCATjx1t`dPQ_^*%cyVTEC7gY!-2 z)+A;-&}zi44sL5K_)&y>2)CR`8RdPkeK-+u`0r8c7$BY+HMQyL z30VWu>V2jHQ~O8xNI-YG(wwcVp5bC}HsPZCur@~c{QMOPk@?slH6M8*wSJYrr;Mk^ z6O*C41fq6Rsya=5<0I|MWPJVSSNwf_Y??dZdY(-QK$s8G3PQF>KpPoh>2$O&V5>3`tW;FVul-j-)O61 zwI0ZicJQDdSv^A)O6sv%t=Kg1`nX$Nh}eAm|K@*~!OyX#hH}xp!0(CS(AS@%wJql% z9}*ZLFREnc=Z9C$IwI#GAWLAZd-0U-r9GgjPRl8*HF*Gu>f;fUR9|t+lA%*D;cI!^ z<1lM=od?JYO(&bf$bKeAI~F>GH4TS)HcboNB|Bw$kCUCS@)`Zd5_aeplXPcYI_c{6 zx-PN^4I5iu)=k)&c!Mw6p;oPD@s{QbDU&89H#Nn#{AezYL)zpg43H0gGFz&sAq5j9 znAO34+14(;gagW(rnp+NUa9ba88$auym(P??$PnLZ{1O?N^A_KtKzHG?8&8O%dS$z zrIoQ;(bkBtNwhm}we_ayO|9bO(EQl^JQT<)JmgH8_-c0M8Z(EA;bI>vt|I;4ae{~) zo@h6>AWJF1;?iIA3Zig5mIY?()nud#~K1T%6o-nHK&G>^p8jLS)o@gIUo$8vxW!i&oyaGKzDeH0@DopX?TXZz zr6t!;Z6&bAR%Fxs9LYs@eeb}2mc}c?K{vRiy zr*y-q(rs_(I z>q@3~9H82qNY{8)Mv0P2#|sX|_ogojO^+>{p<3zK?C!_a(g)xMg7Wn;yMdsN3=>RQ zlBdu2Im0}Z<(*v^o2TQ_F{v&@R$?CStGOG^o)_U@m(d_C)XmVF*Pr*zf_CXxFrON9 znVxzg>5xtEDMp5sG#xiP;?k(9ph=yju@3LKGRece&Qs_sqkm)jsP}aV4s9g*5TZ6R z8d^He^b)pgCazV%_LjzAI*%*x870ZZSi_}hHs~lXhxfW zP6;&IogMQJnfa$P%SC*i`mkH)C#j>diTkxxm5^g?e(ZnD(Cfsldk`lk&HFque%0?B zVB>1}QTEO{gHa$c)GhB6IB?m~0o#79s! zJ=Mz9Wu};3^%WqrXz>Gt+kw+4Sg)QP$gKjlcyosmNM|ibt9PKKW$FuJ0=^4_1@p2p z!_9t488cGnVfH zrYF*q^_|Lyp2sw!7U)(8FIGP8`A0`tz#UD+wWVi~29^r!za(sM{A@XsNoc8bsQC)p zg{vsO-PC!m62OZZNW9uz(>(PJp`qhJnz{Lw&ejl3G{~TXMIS@MO3|g+y5idi$!AnA z1+v}^GXZ{ke(Y)zbtPxc#}hAi4Bsh6*EGy zQw!HWk+$7{@lBLAdfB<{p+jsiWd2>SE9(KbgA~pfUv8Pr^#959G>xJGKMS$Om0|}P zhWFK4CecNA4Kv@doi!awiVcD^)`o<-Qvq*L$>XfOZ}$eCJ=9t+gQiN0jbDJKd1Yr+ zl7jmPbM(XncjLuB3JvHC_^UH-ji}>uZ)LA72jf!)!^bBr z@~C5xQz;3Y-&DR21@kuiOz)qK+T|)Gr*&NqEqccemAMC`hE$~{txPNNaEOK~mKk(%V{i;u#%-4vVU4l*SS z3i>UTu3q%MNT421?axeATwGpVA8olQtM1?Iu44Sfa%auj+lt>K2T8nayq@*_L+G<- z(5D;MoWfB!@+Pq}caOz=$pPIMVPxsAGuswBG+=3DGjH9XTA2z#wRMRmyE3~%=?VQT zh%S7+(M~)0x$c|UHaCZOg-7|w(#byeq6OmETql&uXe2?&MZ3b2;@^=f(gv@mjTUK0 z%u&C9;_{5aqy>{{iOuaVIdB(<$E#n~H-r_7HHx)kF<*+?o4MFyW~o;EErf^yLM2N` zDj0#A3HZ))acfL6Rx`auGmR26QhfoTVbO|R=o+*}j5c@@W8ZoxDBq2VY*QZm>#t9D zXGy3FK!5(unPrb`VeoKYoK9ydDh5F=flwBnm<&>p2d>n2{-4jQ$;ublX| z=#BB0Wl3_Gk9^nx%W)Ryq6M+e=w|R*(Nq0kMSw$i5{`7XPOzUGUOxD*0%5}F$Z=cH zA`gEvA>yhq6BDH7iUcLY(`c@u_|MR@XA&^r!xsUA*`(M+&MC%R=G6r3-xS40+n9i} z8r04z@fJmp<1&al{3(H{5VaY9OksI+A%v39R3vNeW2z!5BObTNsK`&Ho{~brl zG*f?QRFHtF?B9`Fq2!HIIHwq9{`Q2DIx>Nq#PgrEzvuH!ICHWKHTxV=vvu+13emew6(Eq_`n)Y4w^A}#wR;m^B zrs_P?AXj-hjf-pzt;phiJ9v9f!9qpW*oeOaiyhEpEYPvx;_wT7W@W|VV2R$m`@G{X zAzuEJ>c82F#Uxk^u90dXRwq32_fd+*3h@>~UsGC*2md0S0qFB2p4ZXwbs)hwIY1qS x$29Wg-;p;e&Haaya;yG_LnQK`{_=!r#y)F;ia|jh#oRnn5Op0+URE@H^nY6dK6wBD literal 0 HcmV?d00001 diff --git a/docs/html/userhtml.css b/docs/html/userhtml.css index 31d7eb99..9dc17886 100644 --- a/docs/html/userhtml.css +++ b/docs/html/userhtml.css @@ -33,21 +33,26 @@ p.indent{text-indent:0;} p + p{margin-top:1em;} p + div, p + pre {margin-top:1em;} div + p, pre + p {margin-top:1em;} +a { overflow-wrap: break-word; word-wrap: break-word; word-break: break-word; hyphens: auto; } @media print {div.crosslinks {visibility:hidden;}} +table.tabular{border-collapse: collapse; border-spacing: 0;} a img { border-top: 0; border-left: 0; border-right: 0; } center { margin-top:1em; margin-bottom:1em; } td center { margin-top:0em; margin-bottom:0em; } .Canvas { position:relative; } img.math{vertical-align:middle;} +div.par-math-display, div.math-display{text-align:center;} li p.indent { text-indent: 0em } li p:first-child{ margin-top:0em; } li p:last-child, li div:last-child { margin-bottom:0.5em; } +li p:first-child{ margin-bottom:0; } li p~ul:last-child, li p~ol:last-child{ margin-bottom:0.5em; } .enumerate1 {list-style-type:decimal;} .enumerate2 {list-style-type:lower-alpha;} .enumerate3 {list-style-type:lower-roman;} .enumerate4 {list-style-type:upper-alpha;} div.newtheorem { margin-bottom: 2em; margin-top: 2em;} +div.newtheorem .head{font-weight: bold;} .obeylines-h,.obeylines-v {white-space: nowrap; } div.obeylines-v p { margin-top:0; margin-bottom:0; } .overline{ text-decoration:overline; } @@ -91,6 +96,9 @@ table[rules] {border-left:solid black 0.4pt; border-right:solid black 0.4pt; } .hline hr, .cline hr{ height : 0px; margin:0px; } .hline td, .cline td{ padding: 0; } .hline hr, .cline hr{border:none;border-top:1px solid black;} +.hline {border-top: 1px solid black;} +.hline + .vspace:last-child{display:none;} +.hline:first-child{border-bottom:1px solid black;border-top:none;} .tabbing-right {text-align:right;} div.float, div.figure {margin-left: auto; margin-right: auto;} div.float img {text-align:center;} @@ -115,15 +123,16 @@ table.pmatrix {width:100%;} span.bar-css {text-decoration:overline;} img.cdots{vertical-align:middle;} .partToc a, .partToc, .likepartToc a, .likepartToc {line-height: 200%; font-weight:bold; font-size:110%;} +.chapterToc a, .chapterToc, .likechapterToc a, .likechapterToc, .appendixToc a, .appendixToc {line-height: 200%; font-weight:bold;} .index-item, .index-subitem, .index-subsubitem {display:block} div.caption {text-indent:-2em; margin-left:3em; margin-right:1em; text-align:left;} div.caption span.id{font-weight: bold; white-space: nowrap; } h1.partHead{text-align: center} p.bibitem { text-indent: -2em; margin-left: 2em; margin-top:0.6em; margin-bottom:0.6em; } p.bibitem-p { text-indent: 0em; margin-left: 2em; margin-top:0.6em; margin-bottom:0.6em; } +.subsubsectionHead, .likesubsubsectionHead { font-size: 1em; } .paragraphHead, .likeparagraphHead { margin-top:2em; font-weight: bold;} .subparagraphHead, .likesubparagraphHead { font-weight: bold;} -.quote {margin-bottom:0.25em; margin-top:0.25em; margin-left:1em; margin-right:1em; text-align:justify;} .verse{white-space:nowrap; margin-left:2em} div.maketitle {text-align:center;} h2.titleHead{text-align:center;} @@ -131,19 +140,23 @@ div.maketitle{ margin-bottom: 2em; } div.author, div.date {text-align:center;} div.thanks{text-align:left; margin-left:10%; font-size:85%; font-style:italic; } div.author{white-space: nowrap;} -.quotation {margin-bottom:0.25em; margin-top:0.25em; margin-left:1em; } -.abstract p {margin-left:5%; margin-right:5%;} +div.abstract p {margin-left:5%; margin-right:5%;} div.abstract {width:100%;} +.abstracttitle{text-align:center;margin-bottom:1em;} .subsectionToc, .likesubsectionToc {margin-left:2em;} .subsubsectionToc, .likesubsubsectionToc {margin-left:4em;} +.paragraphToc, .likeparagraphToc {margin-left:6em;} +.subparagraphToc, .likesubparagraphToc {margin-left:8em;} .ovalbox { padding-left:3pt; padding-right:3pt; border:solid thin; } .Ovalbox-thick { padding-left:3pt; padding-right:3pt; border:solid thick; } .shadowbox { padding-left:3pt; padding-right:3pt; border:solid thin; border-right:solid thick; border-bottom:solid thick; } .doublebox { padding-left:3pt; padding-right:3pt; border-style:double; border:solid thick; } .rotatebox{display: inline-block;} +code.lstinline{font-family:monospace,monospace;} +pre.listings{font-family: monospace,monospace; white-space: pre-wrap; margin-top:0.5em; margin-bottom:0.5em; } .lstlisting .label{margin-right:0.5em; } -div.lstlisting{font-family: monospace,monospace; white-space: nowrap; margin-top:0.5em; margin-bottom:0.5em; } -div.lstinputlisting{ font-family: monospace,monospace; white-space: nowrap; } +pre.lstlisting{font-family: monospace,monospace; white-space: pre-wrap; margin-top:0.5em; margin-bottom:0.5em; } +pre.lstinputlisting{ font-family: monospace,monospace; white-space: pre-wrap; } .lstinputlisting .label{margin-right:0.5em;} /* end css.sty */ diff --git a/docs/html/userhtml.html b/docs/html/userhtml.html index c4f777e4..083bd90a 100644 --- a/docs/html/userhtml.html +++ b/docs/html/userhtml.html @@ -10,7 +10,7 @@ -

      PSBLAS
      User’s and Reference Guide
      Salvatore Filippone
      Alfredo Buttari
      Software version: 3.8.0
      May 1st, 2022 +class="newline" />Software version: 3.9.0
      Aug 1st, 2024 @@ -29,219 +29,39 @@ class="newline" />May 1st, 2022

      - Contents -
      1  1 Introduction -
      2  2 General overview -
       2.1 Basic Nomenclature -
       2.2 Library contents -
       2.3 Application structure -
       2.4 Programming model -
      3 Data Structures and Classes -
       3.1 Descriptor data structure -
       3.2 Sparse Matrix class -
       3.3 Dense Vector Data Structure -
       3.4 Preconditioner data structure -
       3.5 Heap data structure -
      4 Computational routines -
       4.1 psb_geaxpby — General Dense Matrix Sum -
       4.2 psb_gedot — Dot Product -
       4.3 psb_gedots — Generalized Dot Product -
       4.4 psb_normi — Infinity-Norm of Vector -
       4.5 psb_geamaxs — Generalized Infinity Norm -
       4.6 psb_norm1 — 1-Norm of Vector -
       4.7 psb_geasums — Generalized 1-Norm of Vector -
       4.8 psb_norm2 — 2-Norm of Vector -
       4.9 psb_genrm2s — Generalized 2-Norm of Vector -
       4.10 psb_norm1 — 1-Norm of Sparse Matrix -
       4.11 psb_normi — Infinity Norm of Sparse Matrix -
       4.12 psb_spmm — Sparse Matrix by Dense Matrix Product -
       4.13 psb_spsm — Triangular System Solve -
       4.14 psb_gemlt — Entrywise Product -
       4.15 psb_gediv — Entrywise Division -
       4.16 psb_geinv — Entrywise Inversion -
      5 Communication routines -
       5.1 psb_halo — Halo Data Communication -
       5.2 psb_ovrl — Overlap Update -
       5.3 psb_gather — Gather Global Dense Matrix -
       5.4 psb_scatter — Scatter Global Dense Matrix -
      6 Data management routines -
       6.1 psb_cdall — Allocates a communication descriptor -
       6.2 psb_cdins — Communication descriptor insert routine -
       6.3 psb_cdasb — Communication descriptor assembly routine -
       6.4 psb_cdcpy — Copies a communication descriptor -
       6.5 psb_cdfree — Frees a communication descriptor -
       6.6 psb_cdbldext — Build an extended communication descriptor -
       6.7 psb_spall — Allocates a sparse matrix -
       6.8 psb_spins — Insert a set of coefficients into a sparse matrix -
       6.9 psb_spasb — Sparse matrix assembly routine -
       6.10 psb_spfree — Frees a sparse matrix -
       6.11 psb_sprn — Reinit sparse matrix structure for psblas routines. -
       6.12 psb_geall — Allocates a dense matrix - +
       3 Data Structures and Classes +
       4 Computational routines +
       5 Communication routines +
       6 Data management routines +
       7 Parallel environment routines +
       8 Error handling +
       9 Utilities +
       10 Preconditioner routines +
       11 Iterative Methods +
       12 Extensions +
       13 CUDA Environment Routines +
       References +
      - -
       6.13 psb_geins — Dense matrix insertion routine -
       6.14 psb_geasb — Assembly a dense matrix -
       6.15 psb_gefree — Frees a dense matrix -
       6.16 psb_gelp — Applies a left permutation to a dense matrix -
       6.17 psb_glob_to_loc — Global to local indices convertion -
       6.18 psb_loc_to_glob — Local to global indices conversion -
       6.19 psb_is_owned — -
       6.20 psb_owned_index — -
       6.21 psb_is_local — -
       6.22 psb_local_index — -
       6.23 psb_get_boundary — Extract list of boundary elements -
       6.24 psb_get_overlap — Extract list of overlap elements -
       6.25 psb_sp_getrow — Extract row(s) from a sparse matrix -
       6.26 psb_sizeof — Memory occupation -
       6.27 Sorting utilities — -
      7 Parallel environment routines -
       7.1 psb_init — Initializes PSBLAS parallel environment -
       7.2 psb_info — Return information about PSBLAS parallel environment -
       7.3 psb_exit — Exit from PSBLAS parallel environment -
       7.4 psb_get_mpi_comm — Get the MPI communicator -
       7.5 psb_get_mpi_rank — Get the MPI rank -
       7.6 psb_wtime — Wall clock timing -
       7.7 psb_barrier — Sinchronization point parallel environment -
       7.8 psb_abort — Abort a computation -
       7.9 psb_bcast — Broadcast data -
       7.10 psb_sum — Global sum -
       7.11 psb_max — Global maximum -
       7.12 psb_min — Global minimum -
       7.13 psb_amx — Global maximum absolute value -
       7.14 psb_amn — Global minimum absolute value -
       7.15 psb_nrm2 — Global 2-norm reduction -
       7.16 psb_snd — Send data -
       7.17 psb_rcv — Receive data -
      8 Error handling -
       8.1 psb_errpush — Pushes an error code onto the error stack -
       8.2 psb_error — Prints the error stack content and aborts execution -
       8.3 psb_set_errverbosity — Sets the verbosity of error messages -
       8.4 psb_set_erraction — Set the type of action to be taken upon error condition -
      9 Utilities -
       9.1 hb_read — Read a sparse matrix from a file in the Harwell–Boeing format -
       9.2 hb_write — Write a sparse matrix to a file in the Harwell–Boeing format -
       9.3 mm_mat_read — Read a sparse matrix from a file in the MatrixMarket format -
       9.4 mm_array_read — Read a dense array from a file in the MatrixMarket format -
       9.5 mm_mat_write — Write a sparse matrix to a file in the MatrixMarket format -
       9.6 mm_array_write — Write a dense array from a file in the MatrixMarket format -
      10 Preconditioner routines -
       10.1 init — Initialize a preconditioner -
       10.2 build — Builds a preconditioner -
       10.3 apply — Preconditioner application routine -
       10.4 descr — Prints a description of current preconditioner - - -
       10.5 clone — clone current preconditioner -
       10.6 free — Free a preconditioner -
      11 Iterative Methods -
       11.1 psb_krylov — Krylov Methods Driver Routine -
      References - diff --git a/docs/html/userhtml0x.png b/docs/html/userhtml0x.png index bed5b30411c06478e013e4a446657647971a0591..3e324fc864b1e0d36888ad4e96e38b1bffd729c7 100644 GIT binary patch delta 1461 zcmV;m1xosu4ciNlIDZ9=Nklqmw5_O6h9| z;lnPL4%}zlgb<$JX9Z7*lPAW3??@@VNhv-0{^mf9`^*u00yY|4&qA0+hA$xmmH=1m zLHuQPZbO*r(1vdcn@`~oT_=B=3}Wq_oOxxts7BW&06tY7yH8`k)goJEcrvAJlw zOE%S!zZD-}MtD&jPyQ4X`atQ(n?_CxuGYv*=dBqdCsieHMyl_3>D~ev2S#QQOU)!nX|SHJ9plt*kqr1%@9vT~;a!8ChFS*NoAXx29bE-GV6xjg|t9ddwEn zvcz;&snMxe+L$AYplis~^xw7Jx9d6G^eYAq+JCw>Sw|NfMPS;JDi0LC@3s3;UrNr7 z#OU(raUd2pD87hgi{xkMzI>q95u4M zt+1nD2-mS8!T-J?2YueBl;XlwlMG0pMeMMB4d{ULYCN$QEgesWAGL?UDaOc(VF_J6 z?*RXr%3 zIBk&i-j`1A+4SrW6Vzs8nJ{X*WKz0iQA-ak>xyqSz!Y{lKI>C5f^Y9qijg$?OnSQ| zR_$X>&2nG1`7iWzGLTy=`}D0YdmK@B)ONqDURsvbefOU0&e5bkMamm;7aTYQ$A2>7 z5ujveUf(%my7GF8Eq!ycNsfGKRM)qS7kWaMWY=!zMb*I+(qD<2@%)lMItbTl`hXZRuaX+hUK|gl_POF4!~m?d6(I zqkOJ%w8eIJP&DyOTd&*Jdl{|GvVVSezzI05$a-@O!G$v*hI)|2bD@Z}hd?=dCU2{< zs&7G0|J{JFfTQ1;p5{c}x2|Gjrr%N9or79(i&GPmOG3gS_P|wPlM7(Uo07eucfr57 zS}I5FQF{wg`En^Kq+?wce2#Lo#dfo&?w4nD%`=_sSu0tCPwPb!@HC~QwSU@y17`vL zj>`s>m+XI~v>gaBdoXY_yAAI*>dp6o1Lp_^_K!X(04HxY&@(IR4_k-uzb8hF@rF}` z&PB!_7{D*gC99kZ{^h|=+a@q$*zqli!K|V8t1%a?0h>R8Ec6~5xC7hiz;s|wD#$n; zm~;@+0WQI~1MCiLI>6^sbnZxd|8$?h55m=qMt`fL-J{jt<*~WBx%uTb!)CL2C%jh~ z^QrJZ-6sA+!V(v6rSEfbaP*M${+YVoSC01XfP^=~e3o$!?|%S)h1>xHa^PddgB=XL zlOZ>^ePUomCh(iG&xHSBCzh>)-YNr{Q?3@Ln$NRa1-_?XcX49eQ&U&9RtAG zfcRYTLXqA6O7S%(N*Ps$%%vhS^g)jDE6XX*kxSca#YsH(nM#Gho5ebk9wV>7xURp9 z7;0mD%aC&4Pk;6(f2`=fq$fFAU+g=vd>*gG_9p=s?1K`g%&U!e2plpVV?PP)<@~@y zsb6n?%D!`XVNKeT+e?mOek(q)Q88lmIFigF+cPfwfDNbSmU7QOrudP3a$=mqilPi< zoQk^l2FUf)F`ylZe5xztyfCOA#j|`%DjYjEZ*e@;fkEsR&45gk?PWjVG5J0HkWm=Z5y!7M zR@w7&5%2-(crV-L^6Iz99?3V4m1U71<;rNTLPXy(+8L{3TQbZ*ha2%I zXPhLbJWe%ed)Zeqqm!;S^s6?GJdXQxI!eyYQ7r=1vqM%JMLGUdUj3Tp(ioD*_G)~$ z#Wc%K^65U3wY@z*LPnvY>i$u@ z(tj6=OT{Fkav3J^O2Nkbzmh9I8~&j(y2OAkoOUegGFtX9c2Ifp>)95z4-8RNp(A|8 z;JP6`-R3@DomraRfjv=$Nj{@+LwU}2<~ef4pA9}XVE07!LJg}khHe7o{H$&sq-TmR z6j>zv@d~Bc0b`iNi^6kdjmJXc4~8$ZEPuQ9dCr#!`kYIi&(T9Ml1yKS9SvL9K8lqa z&;dSU_$^lq=@@0VF8`=K>VKdJ|6^g2&#_KFP>&y=(}m)RBB#3F8p~njICR2+$;3M> zhEwl120Je0?wHU0Sv*}m?YSKBsy7RLmqy`o9byrp+tpD2+}7!B8P%gF6BhMHc7O7k zp!O!)Jr=3L=rw`y6cMVf5GrF^WtYn4Gkzt*mTj+6i|o-j)wPXr6)Qb$yXt58EM!MH z${aS?t}*c~bo8SiZhBXFk2{y{t8*DxX%NN(5QSNFoDT749bBDg@%0=It3nK=(J$5oqtO|+R*cFr8pY|qciPEPqmdh_L0j2_R`H^)^_8l zgLi*l|E`6dP6DN8k~@_Qc>G#`dQy0Ls+fY>(7VL>3`QQ~P;KbV;@b9vnYHIXP10PS zYAk#KotSgxq6=9kmnZowWJ}w??KqOm65EmGYWcgkxV92orvjlbpk&wuaDO85)t9C$ zM>~rK$7FRP$I7G}cVXJoV_E4S9+=xi;h_Wg)ovTH)LxEX>8UpFROl9CwKFfVU067F z#B;g&YLanTtmuY4a9`P^GTgd9Y1#=x9rYFek&9TCeDhppJ@FCy2D-j0F;#BnQ?gAQ zF@|E3?IYu(Cw{#6%0lKuWx-O%n>hJW3s3v%u>z_dMjXkYZLbM~nE&I-$I9hP%_*BGSJN0i z$cLz-&715jPdi^2GRD8ejRvjdg$z7mF4;H_IXT3^(EEZfr(JyuPi*^usgtiVJQCC6 zZsDA);wu9?V?d71sqj$>d{O$BQa%vy5a+`XCj~c>?t_N%vw<^)2M&)^2NuTqk2Pjv xO&<&cS`_#(3xNf}hR;dD9~$4vrx#tvzXNInU=M`Qp1002ovPDHLkV1fe=HUj_v diff --git a/docs/html/userhtml10x.png b/docs/html/userhtml10x.png index a0d4d0d4f3b6d7ee9febce6cc9589c6ac95f3481..5db77543c69d274ad7afc3ec02925852d8ebc413 100644 GIT binary patch delta 1828 zcmV+<2iy4F5XTOXIDZEDNklWN^k2_{ae42z+D!&4nE=#YK$qhrUK)j1N1Wvp}BIi(tn$!;tO>N!fR;wwGGMh0QFw zHZikTyJGtDIkCE|(TtjEn!Xn zGPBzZD#plth6wnF+DOpm!9jl?{HBE7{ngBVGP5^58-LX+vt&8mmhX=M)@D^mhGaBo zn>_@U47nm7u1{i&SH*tgE(2)oPl`GKJ-jGo5JI>xGdvxA(GQ#X&nm~$kx_ZqV*!sK zos#_mcNv#>KnTKE8NX+0I(1iou|4(sVotYfuJmCwE-1bDSXYRPvL5K3jRZnY^;CwC zsdvlE^?weFq^!rgWgQf7?se2gzV~vj+MseJm3Ub`E59=~uUnS)eJ|(NS8wh!&8wa( z^FU^}``k7>cUgP6=EUMoW!lhD@65~miV-lgFD_>h|3L0u*ROh~))kvZHkRHe?J@@z zD9`94kG)TKw1L6Y+|rmObJ~GBU%a0&S0C40E`RuZZ^#mP97Wm*a=L}2oCznWF87a$ z)eR7ww?REtGwmb`P^`*aM=7p<)7u5~?xl)wf8{$hXx}$IgHYQD-{*H7U`+Q)3#>UX z&U(IhB<5Hn&q*?*-ce>s7MnKTOU4+fUtI1J00Fc06_^A4!U;l<*Z!bOcLkDXC_EU( z5`Vcs_LoJyUaZN56uO{zxh@8hiQRC0p3*xL=>e|!AZ!uHrD7TWlpC!#o%EF!LWPeA zMT%p=w20s!w8H8PLaq}R-`Cz+_*gaM^Mp7hYT_gFOlZj%(j%*`+&3W%R{ZaoBnKS8 z`4|B*O~yb5tWF4Y>CTZ1lw|(W+pa>_F@IQ%3pcfD0iKC`nV|h}bm>%M8IWI_RU*tI29| zh(>79r@Ky6-k5&*V4&vWo>^M##P-jSf7!=atjUEky~rvYvr8t{9E2>BZ)k9-e1F(n zmcTM}0n3)$9Lg^yU89o(4hYIgoyNd|8+>O&un@0G&l5uE98Tf+_)dtBJ;F>J89cmw zLuO6g$qVtU+n`4QzwZcRH@_TEPtmWd*L9QWL|tO6xWW1boWLw2au``z3Ro&sk=sG zfS1XOqA~tnti^>y^kNkbS#%U|5aVhfxF&a-xUC%L3%pa4o%&tzHRT*56Xy~7bh%Ob zYNjY#kX2$K62@E?P-<4}03dKCaRC;2g5*ocbJV7ONAiJrX(1zHNPASb2Y(niKF$M3 zp2wuHuU>e4x@$!S$rx!7W{xqp5JDXIJP&x=QeHxqYQ3yc%4PjC$(%+TPDCfD? zX{E+^meXkdzF%c|sdLc2Kj*3xR!(7-&-%WT9_xEP=b08Q-hzRVj$^SyIOK)vrW}vS zoFH;pm+xsF+S5><_tX5<4u2sdWxPw9+|y=G|4JW}@r*hLX+-9wesj6n_P$aIQSYKd zKDn;TMdNDfPIEm-8>`R632#GVK|YMNmwQXN;PcIcF9-0gzE91mmgCZb>ddj1b?fga zsxJFp?p^Uq=etCAb>A$gx@^jQ9~XSSWsJTBUh6b(4u~7ET9XlD9Dkk5-O-69x?3BA zT-ciE#FX_$XAp{8A#9HKc6@Wp(p?Sda&{JMEEhHAOu{)iE$fcn2rdEwN-23dbp zNE^db#{%8eU=OzBq7J==b?M=3#S-z9R#Vw(mKv3FS`!c5Dj{TGFYl4#_yQ z;dkNnLEDZQ*e&|$0E_-n=~hl2iKFug>5gUCWAqb%;&V+kZJqa>4A~ZWp!rQ7Lky zSzeSRIogQsogU}n+oiuiZx8H?W3M+?hM_5T!tbkHsoklaXILje73M;)clJb=#b;9Z zO_Swv)QuWd+ynFR-#^l1Ri7v(2M!Qx#N0i!;21nvN z@v|E&FoOrWVt=P`Dk)Hl-A~lue+TEn&fqDhVSKHm{j*J!#$Ppzvp`PNrgUV(R0}%e zfv(t5g!t~7>|1I~F3eWZwc_~z9h{^x8r~zx>3vp@)hH9Y1y$mK!q{M-afN1!TG+@3j z*748C`?qk<)GneSp2Xh2>V?X2X^6rnJd=WS#)o`2m~W_L;P4O66WQ@qboRaRR5<*$ z_I)Cn*BY_~4Bd_B-a`+>adx^;HT8UjkA&wai2Rk>XW^Q5Lk-tKyy#8w0iL3laP+~W z1NtKuXn%~|&*AVdx}92Ivdr!$l2gkSqscwRhabl0f+j`&aeR`vIO}EX^C5B0yEXL* z0(83$2M>$H)4}U$IBFajjoy(_^$pKcz()>GoO9*J{JX}G8E8Ge?HzixU|$>E&@sur zQ%h~d4(w&S_ddeNnl8|&k=ULN-Vmb%e7yFkX!_B~To|9iHGfVKFXJSAE)k4}W0C1$xa?E%a5(q%ZSu0b z1CZAVkJ(A$ct{hUSda)I8KMsk)wkGqUVA4-T){%)j~6yx^5l4GIzo?6LopDZMVjUT zvGMfk)*uw9==1j`TTg#WL7saCF^n;Qqb>rz%!ecvi=vShSp~3G@H9v5Xc}eMyfu-cr5G~iKfWn0>5g9G>*$Qm zg>5$h2HJVZp=+A@mq3nm#JGGH^?eMs`CXq8m*EepW|)e`?=+o8tUxr#yb#O^{3e*+{Hi^c;u4~?-C zQMypD!|UHyze#do8|`JJt-`K~ek$@>#fT2V*%lbKElW_u1MsBD4n6bc06ae<-%375 zpN}~r;h5`7!`K=cc|x)fdw)55O;9_6pjLrxCk+cdx7hji@Bod`RkCFVehx)NwypF| zvU60xf2BZT(ku^1a+?bry9;sm&=xylo#qj`A&YscG4=V5yH`u|1#$yy8J`Ps5CTc2 z+LadgToWCJ)2J*YOnNg3F4KU;$2Ggx_?vkbBdRMH{P*(+Vov~)tzuU_Ph6Gdo9t2+ABGOW0%e;S!=%$ zq%>`P@i*pz*&B{7>dXb7v~96chXChBDo<~a)51q?Z^nhedtglLyqqcCJ9A;~MCaU& zlD&Yl9Slm?>b%!Wz(PEQ2L{D1ojAsF@GV$rF3g(fN>n~lxPSU<`xr@mLm&Eoa|4eR zCm$TE@gEh~h95kPh+S$M-(cL#4TYjym^IO{ELLUyH%d`}NeHo}e}XX7Ne_lke&9zR zKF-psy{UDidj6O~Xy4f7=8SgdhB|SDxe)B#(nOsC7lO@uKtbcy=0C+_%nvJ@SM-t2 zFndLRq<-6k+JEe5%Qwd+JS&+y84Bx9 z_jK3>$*JhqN^-7S`Q?#w^uF2x6L?@bqfg+(u_EI6oL>~Vec+Fw24A<0)#Z2PUn=^; zh^(E@Ii_s-Z*d-m#lKjKCDDJe!yBdx=kr*}Ij28|PBoRDtJDQ1@W7Jj6F6}K5qx;s me^m?5cvfHjRAB<=BI18VNN~qavz+Y!0000OK+OO$K+FI#0LcI{z%l|0G7uvW z$N)1C$Urg>$)K%os(YE8n|)Dms}z6L)!ppQvtNJroSK=;4OQJ0k$W@yF|-#|Jra=% zGrJw?yKJcHwur2m*#&E>>Vb${nc2;A?4@hHmFHM&3VdTbV}BG1%P7x)Je`F~x@G*xxoz2ujOyhA!>hSxN3VSn~H5@tn9~nQ`Ag6GdBi9XNYi9TUemcCQIr49m1kdey`x?&UcYH*IG@X8= z?lGP*7)0KWv3VcD<#zP(IhuAn=i8GfzA0ZW^LP1g=YL}ot=99tc7DFTevf&F+>lR< z_5HWbh)m*q^uuWV95@#5{^WDST($46^;XsVJ+tnfP-TO57zc{&0`!1|55mhL z>fyfmmH{qK@^iEQL?7-L3yERKajY?}X15&(kXxO;+YdHyQgGt};6PP9^-!H|i&S;5 zhZ`GY2gagD-7*cq6C*-7`+yc60}QifypvEwG=HKD9}K?_aBLpsdXbkjSn!x^6h0k< zUnoQZ0O#@{@yUU=c7Y293P*p-Yd%(B`$1Dj8jzq*P_gA)ukY7Hq zKyX3V#p<3)c$6TtK?$?K<#g`n z1AiFqRk4}oc8!b}+CZj7pI#)TxAMh1`_bZZGYeAxT}l}p!^|2Y{ASm7FDfXB;GV&x$E~)!ru jmRGbFu0o#>)%E@Yiw4SdvTOb300000NkvXXu0mjf`D-Y` delta 1291 zcmV+m1@!ud39|~2Ie+CzL_t(&1?`xPQB*MyfQ6v|#SX9o#15bX&<>yj94cU-1F!;t z4xj_Y4x|H-4)FUj$s;fKes7L@IJz^ryiImDoBd02E02$lucn5Tm6cs|ctDq%K3$+a zgyss`yFS`w&!gKmYraOeE7u0!r~3~6LwwQDw%+QZ7aA)sDu1>!#G%TD?v(BfzSm>d zS^~W~t!L|$cs8|=&r*KCCwD4aJHb&0ZCk{7w=}ae2kF1$1OLCDEHJ54an=c{oIR=2 z_+^=`m1Tk;lwSBPcmJWVz*>PX8{Nl2Z@Ly`ey?TTVE9a{IlP7TpBkPlQ^_(fM{_#_ zXUn^d9M6$a5PvJgD4xHGIAkiD+(hY5v<)*O8vFzJFWSsallcwC&61zAw=^#>;uS!n zv5#oV2mpn3JS$ZCExcoY2aJhp z*LSqfj6KlL2QbTc7<--MIky|}TfWcyP7T~^x^!OM!+$v6@)!^~oxC}X<#IdG@U4sr zPviE0mnVBKNZ0vX{>_{+18qLu^({Ru?5~S%o;l^xJW`Tf)W5bzaD%Sz;>m5XA26dKB} zn`FZpN8?vcWW2njq-Vq?^ag`q`K^`SFp9CEF$%xZZje=8h+Ipjl;C6i0bb_~4jtPD zmiDAa8%-ETo>}92xJzEAj0H`4oaOsg{}5!18-Kv1%}tVEN9G3w;~IqN-wqAO9t-n;f!P$i zqhukYsyygoBH0Z5F7S-4___$43z-^;jNl;#^epTJl<*INbOF$)HZMLon2l{9zky+- zR)3|ufw*_GIs+M&bagPedU+bwd5)o(MAFfvekICwZ0wX|8vCIR-Poi5}4)WN|0 zUMS;O0O)pUhqPpeG}YM$r^2K|%$mMTJ%5I#b|%5A1H8?pGQI^~Dj`qlSm{F`(3jx& zoe_$B%XeFOa;o|Udb16VMHi1J?U{m1JB7+aZvd*R2@;MW`e6Wb^|oC0 zHyZw)cTIRTOzRuAOD=qa-ho{*n&yyeRjFk2t+Q(H)F!^JG5CG3*Zu=Y^o%#I7s@Y_ zQ^5t1m@;~-4ps-t)o8#d<060b3V(}Xz&1L~Terv1;Jr@mR7-VUx(8W<4SHNyVMdkS zLBd}akw=!#Ff%@!wL{cPXX+5zh11{^752H|Su)1Co-;F#^o1)pu|P9)o_$X0RBdLq z;Z(2so;|&p2c1z{{!OWD7Y4~quMb*6~AVE_OC07*qoLhu5Qe8I0>E|#M+O)&fDFK702$zN1TM(H z=LmRYaAaV(415_FUk2m)rn*(5-qC2J)t)3TU)9#0o;&)xr)RoHCo#sbHX`yogm4>U z{5;kc5qT3r_!MKj89R3kMC54*;X#b?6TdGauR;i4VvN_PZhy}d9X&0&uK8?p*kXIP zY@>T~L|zg#*t3>E05QgQA%s6f_(Sd57~_k9;{=gkeV{l~ zMs=LP)^xqlwCi@lGt8pmYfG?s8?81o&_YUY#GiuJ7@p12G?Egchj`e(x{a z$kA(m2_gI(LikuIE>fYVdY>xy6X2TFn4))s)`L2NUe9U`JfuFO_Qi$|v15$UkGgYp z-5oVw(=?R6@kILte#16AQ|+#`rbR!XK#8CG8rVK;q)Ur$^qbC29{*gozmn7Qw|zng zER}H{%YSEfJ=YLO^7FVhKZoh(Q#(iR(QVs)zO|^xH_M1u`LXFuv==g(I= zZ|*U)LnFIhpP~1h?Blp@(|K~93!FUvi;hPCK$`^VG))4q!A~JzTG*R2Y%CAcW5-e(j7$K{2CAUXM}6Q zBE7u>um-Jqw`x74qe9^R-8PiR`pyHkCQbWb2Qnt<>cQTOmLk&m&K9ZpnjU+v?Z(G$ zoPXai6CkKwnxAK+8A1kHANabm85^ZEfrr%TBoEOUuuxF3<-AAgtbi!J6hXNV5uz!A zel8RW`7J*^1zym()d0e@jDRo%X^c(Vs_0aX_Ze#UyAvAqZq<57N8shGTw}CKDU}BA zq2g?_t{(M^$C`rV?^|q5UEQ!3XD1fomfOjJ!r#G z4zwUGdN*mUdFL1h!3XEdozXF>>yi|TZV8=TQsg{kduqOhoq7XgHjDQCrFNg1uN$k{ zC|_EnA(pET;iF?hbHp^ABEoCOOMhsdt>PXVL=`3vGGU11V-VIxZuc#!L{ zsTj2aoJbuhEiY=P%|{qIJq1Q`e&8IICoNbQ%tCsB#n1xVOTEXp!nFp5^oFVuW4sg* z+Ri^bt;ML`jatKyj_ATlL4OJe`hEenvp|SVin9+5(vx)ED8QHoLAvRFN9wvuhJ0HFcIrPHJ*>Qx9o}+cO_YC}v&z(KkXw_f< zT6p30Ro>kmtu<%?Y%j~Bg|A_-ee!AUR2@Byx~}qUN3f~(Zrw)z0g8?5i@c(xvj6}9 M07*qoM6N<$f`X)Cj{pDw delta 2005 zcmV;`2P*j54fhX_IDZGJNkl0UXys(gBxrKsrc52cZLaselI^ zz*TS|9Y6<3=-}w!xb1-Z|AfXT&&syE>%}&S-cziP(P-vBqtVBb?Z?N*t+CVg_V#

      |qnD!u*D-bT@|M)6 zDDRYQv{M4=CTXI<-jxIb@ z>s~%ze^&XXZllqIfvj;ZM?7w_iU9nf%xNG^(Eph>{3^HOScuP0jcbFi6+%};*6Qm; z9JaPLW}{Jqfq%e17XX(XAaEV1T^|%Y!WBW|IgC^&M4?TL5w)k3a zUYP-tx6x=P0~we{!<84Z49D0*@!=lWE5#_?Ys^%M*MG&)>$MK9p4YGX69RLwQ8*b$ z*aL}ur2LBL7=?eU`ajA`FZb%_=*F)6xAO1GJnT`gl4w|@pOxVel4yARv&4Mgtdf4? z*pF)aL-{&7Xi8#bUg)_hgqM43EclLky^wyzco$&Zh$A?}@dRGdYhih;HtN{$wKBWz zLfJXfYP71rzxaVE@;;DUrl04x@w#Lv!aLG+Vl9EQI7USXCTC8mH`1S- zpK)BZzPNs>y>Rv8CHtQ8iE`yTo@NSl$%mh2eJ%F=73IXo?w#Lo62O*M=I4=?5B{E} zS=eY7iBVF@qcBQT22{I$ytAM44nz+SB!57uT@ga77O7~6rT}^?UjCtwxo|q4_ye7V z#?p_m2nbDyD~QR_L=)P@j|k4+Gqmw*nPie=A_F)WFWf(UQgef%b~1p&%M_UdAF%=a>doRkP;iYCteR4vu!K zBm!D4==I_y9;J8}Vcj;l=BiFr(i=x5+iur3bmhLNpWUPPOBqQ%HX(wDPsn_ue)Eg8 zAD^mpOgl=-8f!e4d@bo?U2rNeT7P4Wtll4julJEDr5)Qw;baiCW5+C6c-((Fla%BU z#U86zjAR}J)3ldXT$%9%6s{fBmBZ0c?Q{moaw;Ai2a#bd2V%m3QVHHgSWBKZ98KJ% zV2|^q&S-@3+li~AU#lAZWr>Gm$X!rCF z&SLkcq`6jH)M~LJaJVC~ld*95Q{jq_jLfmrM^H#{w(QQK@qkUXE6ZnSja>!AAjk*UC-1|yK_enc#*Z}$??e+v5jYp4+zkQ0AfVRmv)Y3 z_u|jd%K%PDLXo+?`NpO?{M1LV;>s{uD#W`4YjkMB5r-ucpuv+suuq+&Hcm;*kxRXB z%`uo!TcfW<+)%}@i`=P>%`059HO>c+S=eYd8VyD6;$n$S%OCY$;D1EXkvPH{q<5#249H-#xN} zhimM_TXL3QjYLg2f(ySCgn%G^T-mUl1ww3+J$-0QZ(KEImPr`2`C5~mXs>N<;=?-` z8NbJ8!$WNZWfnFH!G9nMo(kV+Skcx%@PvS(w$24AIu<3nQ#Pr;>7_f+Yb;LAqh6Ck zXZgm)#(CQY4b>-^!MYP~(Y0cYJfg|s7;#C1G_;-k*2Hxvh@O>?aLM|b+Ou|2yMOx% z^0x4qg^hNe74_xGzQjnn+|%8Hqp!56Wwh`x**(QuWY0Z4D@dp)E_o5PmN_?9BYGB nDa3UY-Mp+l>XVXp>NffhUOw&1+kmZG00000NkvXXu0mjf-bMPH diff --git a/docs/html/userhtml13x.png b/docs/html/userhtml13x.png index 4c70ecb36862a92f4fbccb9b30d3c31122b04814..108c35cd4a3b7941e27517c64f019977818deba4 100644 GIT binary patch delta 1160 zcmV;31b6$13Zw~;7YY>!1^@s6hkiyuks%m=R!KxbRA>e5T8nKIF%W%&C;&tUq=P^z z06GYw1JVJA3Q*`Ebbv?)&;cYJBvb&QgF(-FW4vDP<-FN&7jaKo-P$`lGq&fuiMcy= z*c{Aks}8_+2C+Gq*=qoM?*4XjS`4u>hz-KbE&x1n_um^-V}N^u(GLF%W_AeRbeoKS zf!IZe4Z+OL06gn82s3*F;1z(cQhgu5V;Yynyx({Cy*W#}Kj)<#ZU#k%@4dz#9Ju?F z7?{~-0Ow^LqPbbvdV?@CT6`2;PXT=Ac;;qq3*Iq0^eUsh_*A|3yx5<>x47xWRvSbZ zgS#Ka!W3$M0C*U4cL`?31DqQj;O>!s8p!Jb0A6&G5}BqOY&*C00`R%kjrkD(d-vn| z*f|=2nf4lqTLW;jAipxc??FPZa zZ}sS$V{Op|Gdu2jp#wZ@GrQ8qrYTT<)I&>vPQi$?7*Y+PyhiI~1^coX6SP8q=JD{% zuynRQ41J_$=sE!KQ@!-j79%O zzoe(tCwkHWIMK6|<5%$%?+3wuTeZRS7`J`_@N+MfX0P3(Y0Z~B=Qi%>>Nh5=B~WB_~~itiWk}r2$TDKsm3?m^_SFf)PLVgB-}JYKe-@^N_dZKP&cu=%(g6(7aB&rr_ zW$PDhR)c5jV+!T)gALoz+*_+v;%1#z-w-H!ZLrldvlF(s?*3uz7iU89xL1s9FRx+4 z^`>@Md&bYu{F_a4g4X>$#xu~zpB5#>M;@ek8U*uu4&zhwrFBlx$OA?pRNlmvGS@4L zLR7GIjiN>=zje>F)SADIIYqY7p4PI8w8FKgXsX(Wd(P`dsjip*fA#+m{EF!AqmM>A a{KX$_6-f&)gHT=o0000e5n2T)`F%X7xhynt103GnC z0O$Zj1&|IvRDe=KpaKLP02T1)fKUM<9fa>Qw&mUR-mWi~+#;PNpY8GQd3ikc-s=4P z{9s>p=YiE~wWtkP?E_+`!SDj!|1oj`76bzcX&k&cm>FZb%5Z{@9*cuS5d&#3B3a_4$#1K$KI#^&2g(EccA*H$e10zZD_*-*4<0Y;q`(34`|6p2a%Q_P3@us zA_W7+9wn=wd9r>{ZzS4(*uF;Rhpl>lM}NHUt$V~c2Sc3u+QZBqF8vL0Tn0n5>N)Kp z&N-fK<${3d?FMd|9S-nEx*E&`hx$T(JrgsgN1q%n`VE1>exsk_gwA~!>88n?Iwvh% z2mswbp~?}upI%fYc=<5>0I$_Wd8PGB&k#RQwDw6R&6CkoaNtg}3i5pbo^2z47^2~i zY18Af$roeb@P=i>-K#40v(WlQ=iY6Rf-s6L0z|@hFXJ zZCoSHU<^9WX~Abb=ipl~@b)mddHUmLZEeJS8ksY|gTI`oxCd^ue4YD4eNnEBbisaN zuJuF2&f*XDAY_NAhywD$NORM&k@SP{aZW`;0K~C|I6uYH%~4hkhC+US4n}EZ1@CEb zUB@{krq&0qKGn^r;WwqJ7ghZRK@BbVrpBAO{=pC%4k4{JJMQ~Rn_$=VQEd!vHh-}H z1kt*YwN3I76;HkNU`V!8m3c4FQIJM9`4Sy1@)-pfF zvz7wIj`Cy96xtdLDHDZFV!cjmt(}Znv$LeBBUPW}gF@dEKFAv7y3_|Zde9Q)BJC9a zoRR@{GJ!%V-ivf|cF1W9x<=;AkuYV;e2r%^cr3zYGYC=sP`cI&%g`8TErx-d7BKjc3*1 zh)vf~K9$fzimdR?nEqUU43DNw>921cBhPH6?FNVa(cq03{Tw!(*5K`2FN|l~+Siq} z>M)!CHNgJk3_=UNKd)N&&TlT8Sx70;{(v2H9TwI&dNEHKkE#DAT#pNp||KC z7gd__XBGJRC(sG&P3Kopys9kksP~aS>L=#4*P49hzm7K@TeO+xO&DzU)5afV731k2 zq_s%b`8qPE@kk%w<(29f2D&F;aAKet#b%HEUZHOmQo&kTZF)~cI|py1r0=#{bk-jA z&i2$~FrTtWz80Yw1p3^1q3eS6P5c-A*EPm$wQ$Hq)$07*qoM6N<$f!1^@s6Y)Sn>ks%m=b4f%&RA>e5nvZQ2F$~6S!~jKX02}aU z1K0q>1i%JVOn_j6vI5-+LZH zusv;$-2JPWJ=>lIeY)Ep+S5&wyT38Bvk=1bo7Qz5@;!2FkKFx-nLQ36Tx`#ZKHcem z5AEqD$=zR=+3~Ih2DA^3?a_NP`>=_60*o_@yPv!JM|XdKyk7rO+HWgcAfd+d5W?F{ z)w}x_WP}i2ma=;NOXyZ02N~h+N4idRKUgx+9V1ZVa3!0lbf|b*>h;g2ov&5|m4{`P zx%;V^{W7zAvuC;^u+g}E{x?p_HOQcv7K((b#yeue7ztvq^tp*T8=HwQH{uBd!BBi2sN=YU~{O$v%93S;1be(^dUxT(U{5 zD|MlSOo~R!;rS&AA~@LdQyXJbFko;Q3E}|Je_{{&rq-b-|bvvgl)rMGgHW zDJT}73Bx~{4NK)BUrykG?P@S7@sLe9AbWgeznn3-I3@ok&(c--MJO;plTuGRXQ6nN zs3rd<&vSKUC7M)5h@@P90Wip9LFjqn>)j{hRxru?;_b?m&ZU8o5DI-{+jXwli$qe$ zXEC7^@D-Bue3kn3P8DEjZ0-A$uB=R7*)l?4M7ysgG|(6@SMiS--jSP7r$UG_TS{Gp zE!86RP^+~=TM9u@>1+AcYJ+SEh4nm%8m->3#Jefmr*vh*G?{#V;K5+QVafmwKzR$% zepm6glaveuReU{zWJ5Z$T*+tO&Fo3y)x03CyS39<5TWB}%>6i{7 z2YzPm5oJ|&<9Dvrw!e5nTu@{F%X71L;(?W03Gn? z06IuS1waQxRDe=Kpake3p#w+<&;cSHgzs}~H_I6|DOhJ$q(AE4sUkt0RrPKGOkF_SG$(A3IElha%|$ZW!4ZJKjHL$ zj6JwshU2na?k~&kz-_Jv2#jN}U+-#QXvZNq=I-Y5xDT-WxD8tajDA7?LqDTGmLJb5 zeO+05ajZn6Por_qd48v#-sP+t+Xy$0_pi|Z-574pJ9vE&2d|S7-=de8$uUf2d}lib zm<{Kpj?911|CF?S8b|$)me5&0(BJldd9~<$+}L~{j+HYE=x5j)j6Whf&Uo%sLuWr* zzhl$Zmr!__I7jr$(g`hkiJ3Frhfi70&!0H+NivA>6*Q-J^7nzHXXEMd*bnvwbdvlu zBHrPT-v#;w<7@o%tDvnt+=t+}YFK6FnKf%QoML(c)9Q1_!)2dcHjR zuE16Kf{}l|oH16Bm!-V^IIaO~{V{m;X>Lv*zsX#k=;}5IZfMyzHQv1MCyDrw zlsvWBdG-}!gArB(|5WKEj&-wSfx8vR(@n2!mj4*~l9-EgOiP>8iQEDg3=vPCLN~rNuUR^3F1woso1LdxB3DDr_2aow{!C zRMQE0+Mv&J(l9pb2i2s-2kdC#wbpGyACpxEN2fw4h}J1l%*(QWrop+Qv=jqVS8c7_bl!%J4DSazFZf28^iCz2;tzh1c=DNAx5@i4S!Hl& zP~Z7E0C;Dhl-V%I{F-vCsqw8EAF=7!%GnCt%^g$>Xc<$JUDCj=y-DTzb;iiD!fCgG z;&?QOBTiq30N`nVAa{--=Cf@b(-l7IG)vZffc~S#dx5S0X{bZZe4C%6;d3_P71GBC zVEQw{H+-KbTzo3)S{bYhmq+Q5hYo(k*ncjiE=?U~HTe2@ea?Ez{;r!xne8+EIkHFn z1V(%9$zK0=^w7CYn|XW;lkNY^mxbbrvnmWqU!=PUIQmS})l|8Pn@ij9B*#ymq zwyNYi4r42nO4j?}? AjQ{`u diff --git a/docs/html/userhtml15x.png b/docs/html/userhtml15x.png index 00e0d73d21ed80c1dea52f8a5de0d638556e0cdc..feaec7612d624612134afc1110c39749b8285ae2 100644 GIT binary patch delta 1321 zcmV+^1=jk%4ABaZIe!C5L_t(&1?8HJan?o3K-}h zNd>`lKvF?~4sZv6JJ`%~v#U9sb<*j+AI0#@Xa;@gq`Oyp`zNcJ$=0aq&mwYbW`8fu zMOB}P$Qv`eS-N%$RP{nc9+=r1p0BExBJ$46KI~&YYqaX6MStU6*ZE!V6GI!M^6OXPDQ+?4IR$H*2-(rBUN@UFG>Kn;V{pyfm|`+ALM|z3;#ruKzRt z;TvIAt6sLycvHhu)t7j$1;GfZqYxZn);+*CL8Df^99`p+-kI)+OEdd^?p|j0r1Jk5 z>~Ez}e#I4c+r#Vx%ap??5b5SyJ);b@_4FxE+S7wZ{4s(Cmg&vo6=_6&W^?AHH(NvrpyEQP+a9DhOMS;FI8xZ#|KH}G>7TpASZ zfwwNqt1vhhGUp!=d4U)%$XSt-VeGXQ!*etHLnImY==lZjMA8On#@F|vdB6eimwuIL z^H5bu9mus5jv}olc_2pgPoneN~r3SnOtokdm)T%`kJwXhgZIMFc0#f z9@son8*DX2bifmEutAT)AvCUVNeYqK6i$t(ca^tO@B}SYCI!VJ-V~06nTK&uEab40 zYk$xr#(0y<&crGiLVOJjMpX&l%z+KCIOd4l+3-Z4i8;<`)yv*Au7K9LBOBHb#RVWh zThyRCGKqrVTAw9aILPh zVvLthk(9%4IaRHCIfTX~@x3~OVl_FFWPf;WG+5)3$0H#g3KbH6$=}LrJyvv6P_dbI1A7NY^LN3ONf#f?B1EcsLM+2_O<9vX39T{=p#edGz zY_eh0C5X4QSw_&NQB~&79~*2zRIJJPypl!J*bHb@dhx1O4j!=_L&t#M*_?Mcq{by) zMUM+R{w!*%LqxvxK08u_p79htJ>y!&cri8HUnCVZ^huk>*RJc;is#qvx%B&-#;xo0 zsK#bW&tn|_&U3sic%6C1wcuqb7JpXb&FTBbE?V`{tuc5SMt+V*C-f7cQk2`;@CeI! zm~O%sgaNGzv-h0UTDeYy+tutzV_H|V(@1SNI8XWPriSOa46Tl+UR&Y%Ut2H!8tBxD zu(tL)cCfR?d^9jKEsoS2-_-EHz1CY#Ko{{OobQ}vGW~yndF7u1%kys5Y9i&|$XC;- fak;MYe3t(J#F`^9VG4QL00000NkvXXu0mjfvOS?9 delta 1561 zcmV+!2Il$E3cn1HIe+mc>J=nE~bmskNXxQygtcGb{N%zD!_bczO6t>(=9(c0p zLXIw5Y%`X?{-Q>fiwXHZW5!Q%zY(ii45q+%(!i6$7Xs_502oJVas(?-?U>bkj7lk5L?L?&nlq*Fwpi8Lz758uH7(=Gu_Fnds2vdf!O_?1e^*<3gVgwe_+*=J zYQLya^wV!hG6ckXHFOwBh8F`T*8AH#@iol-UA$k_u73jnv54RErpR$?Z;;OHah?2# z4j8iephNk%6pudG9;%&*_oypW!s?4;t}=XWV%N_eFz#B~_@42OWi)|vB!j5CJdki0 zenT4P^T4edz+8rTd^P~EnEq1dJjf7mhmwgOIKkfaiQ&EGeb6|8ovg2ZOrO50 z-+Au|BYz0W1~%m~!e${Jr31AVezWOlPMfeo#)H-6`&>uExXiJuqt{t1K3NWelIwQUdbKr+`F`>1v&UHMA{YY{j(@v-$W7)<~Z zd|kvb?sjy^7+?C{>4punN#8~tI~=5ZiG9ZhD;LCPqvg1FVO49=C>XaGlYIWsGvL!0 zC4a$kt{*?{c*y*F;Qbao_K-PV<63P^PUCl4fFpyQZU0KB{}%q5aK?~~KM>|(PjODB zVy&yS$+QF>0ZlzrLu{V54K`Sp^fXyq_K*o^Y|y>X7#MdLek%V6G><8g8qZ>B881vC z6ZCn=)Lli=VtnNFf|GB+Jg>}#)s$81OV{bDq<(Mo{bVFfuUwcp050z)#%Z} z93gF)f``9I6oroFdlv9x-|K{2oVUMLn{q+f!KUH}lV z>clD*8gf4d#`*f;*~jb?bu5`!;K>MrhG;BhymQGUHBxYZv)m*5m1I0%bZf+%Jou2e zVEs=d`Grp*dN2JP)gP15Rfn}&Kgp~_R(I%AkBz~xI~B3PX=HNzBpVSh})u+Z@<3~zI!8p>N+<(dzN~Z@s1B-)s7$w7e4G$)n5O*|wDFqia@i}q{ zRstm6p(y6%gM}bu;&a}Z`XMVOLF@z4$IH%1(1EcY)Gp#zz$OeUM&k-&m(6 zlXly|Ufx(O_`pu~fT?c=u*$P|Csx5UHdH?6K|J_u&Nb2+qhXvcCg1QieSfi&0ld{t z(qzm(wQFlG&fM0_*F64y8pUVnCPO#+bx%O?oph`9q@xNe)~{U~yVY7T7QdIDk|Q~= znaqFYaUa)s;_aP2X*YOg9J)AJmdrbtP!Ew9Fy1@&w>NvY4t=p6k7 zp`CJ52Oh_Ad`zobrr;1eXn)W#Th{!)mKcmxz=`8_FzpCqK3$O+DQ#%<7i!aHJOvJQ z37 + + + + + + + + + +

      +

      4The string is case-insensitive

      + + diff --git a/docs/html/userhtml16x.png b/docs/html/userhtml16x.png index 95794ed065cd03520d2697dbc350ead02eaff06e..3f5b67d75c1613945ba8539a88590579ed382db8 100644 GIT binary patch delta 926 zcmV;P17ZBp2*?MJIDZ3rNklXf=m4+-=|IK`9P9wB zfRPTM1BDKx106bu^X(*OSnrOPt8o$ERL$~ipZ9kU0_XB z--*aKGrM_=byeMByqVqA&n+^?Mr~}WsOr9|UQYG>2(pMgOMmZIU(d?5h#U?Ku5H%W zgKQ4|X9B3|3lTZ?S0Q6o8m|JZ`DN>cPCC$Lb}J%BELv4hMOHW1Ua}87z+`Gu{+rVM z1o#k}TBKxkQ=U|yRrRE)_Zs0XDk>NvEcf~*l+x{LZd*iNo7q9ShCTXZR*$O6*thIF zAHUDDBJx8-E`KPw$;PNnRedia&t1&0*ix#O#8mtI34Zi7|6E2!M~~h!vtw2LuBu-| zJn$@)(gkdRjOI{(=K+Lz*HgOh9r|&Y_Z5PI8ZJcnVWJ0Y z5@bqz_fa7FR6WlT*Ka0g)4#~(+Bo;4e-O+F zI`{9=b$>^)F=Ue$iSuzl3Bz72bYa|)XaF`cZDk&=XiM`vIEH5|{vrE1|B4jfK&h@{ zHu~^*g!cIz=rs6`%=P}k!|_?KWU4>I_=kFFpG_7(C8(dJua^=Q6(PObT)1wa|B*EZ zEjLjEc&9fhDGKg$bW+nkosU=St}F@wWgS5#1vS=>~slt8EcTxS{1kKl7mDrMWp_hn_rh|RuAz&xc5xD#T|3}d=d z^cXyo5s8%Xe~NEvj#F8HWulTb{R|ozjT3KrN*ls##~qYK+R*sS&=~G_0uDUuuKZgs z?SG+cI9HDZh-{><)6XtZlW8N(ggbVbXY^4Bu>UB7R@lypIPiN>c@BM@_-kQGkMA{x<4 zyP%3)t^>Vd#5uZN);iGb2(=t=%?}hUnHPJGU#ppvY-<7ARR91007*qoM6N<$g8t&n AJOBUy delta 1064 zcmV+@1lRk>2hj+SIDZ5KNkl`crffowwNNQY0RmhmF2H8s$z~;B54efNGRlXi%@Hs}y3DR(F%xC#e+b*kDq*n-RtYCAo^>=*{DWP;r z5GCYYQ}?L%(0Yo}UE z#i6UE{|Y}wYajJ2p3x!DVtgV&mvZ_~jc>vy!oMI`IFGLyhfE?IJ9@I@d&KD&X(R~N zTAt!P5#5dUa*C0)K)&!Pcu5dn^!!|sX9-@?izM-16Myt2#REN%AWk_B2N3WeM~830 zu=4aLH3y<6n^6X!#H4RW%~`+RfF~IYpA=-x4U#NdX#0%%94xV`E$b5d&!yLmV`PWE zu^(jbp2md+4Qv21nveRq7I8d~pL$AnD%%j?Fwt`;P$n_cdMH17NAOqiL>47J7^OMc zY^S&)i+^`*Kzz9xs4m#iDUfbd8HNX+M9Dv%VAWha@$EYeD(k+{4xO{fY_d6L&br+n z1labR>c1&~dFafvI%Lb#20@vPb%mB>0Ey zOa2v(Z-7u;pQfAtjq#X?CwBh{v}5sC=872Shkp#_iE?6O2LbwZcj+>oRtfUG-0Pu) zWpWVR2ndjPasQDuW8uXPQ6q(_o6zaK^JjP)eNLZb6|TCgAvSRPI9qRIC5PX7xgNp+ z63+cafXGJfF7Ug!W`|SD>mfRp>)};EI)9JQUBc-V4zM9tB(7QIIRn?gU#(#X2RVWA zoPSb1d);fZf~Wr;Pg_Lq8wU9 z<#Nj{@A$tdx8D0NV4zApR~k(;4IkG9RqQU5ZcTQuB_2@KqG7h*I+9hpKqptUo*`Jb iY>dDk4Jq_&EaNw;nUrjxTJ!Y)0000U=n5;_3V!O=kw6-1;1 zNCgBs2&o`I2hc$f9XR?%_R72L<@T1ly&v6^P}s?=XVxA+KM!hVvOzzn>SqzzF|%td zsOm^WZq4j|8jGsB!+bM)ZoW6GHQKP%Hn>4&5VSXGcJsL_?tet&Xw2lM&E|7&tW@w- z^-fj4_Ws%>gtNsv5qUrK_EswRBJwCAH$yvVUU0fUbo*rVpx7RCwO7K-!cpsjQxQ29 zkq42I{exoFGQSs8^{m&6bA+lw^&Du}4562`!yqV`?}~C)L_U&T=^5;c$VpbPs><9i zoK~NIz`G*ySARsV{ewE&^*a6*<;;R+Mm7{UH6n)gMC8iM`p`bAeA#f8nCxj0nIE*j zZ)PW|`k<;;B64YF-|?2J^3M-Z1^Lu% zBlJ$Dt@)ny-(us-0M(K#ztUdF{=x)9V#~9f001`d7=L|hLM28#>)9-Ts;WNM1=%^j zbg)aN!yr}Q6UusSBb)>S#k!pk4A$~Ji^j2;U8GHv{yzHsp+M$yDe>$<19tD> zoar@-AeCT)L}Or*T>?*us4h054yN_O2J1lG?4=XDl)yZl-x&hVL`@A6hni%n{4sBA zC+Fi_vVXq0IOCPdPPL!$=k#1dvNO)^2dD82v;#JaGm;H%&KU1h@7=+a2@W--s1I%^ zH+^0l2hVK$v7@S$PeqC^Kw;i-@6Q@h^TCS@_u51d;`$PbkHf+IF2QKC%SV#U&G*p? zg${94T5H^RD_kQ`F37OMI7~|b_VX!ySAqK(t$)?K?k8bf{Mt-PXZA?M63^V~@gGrN z11S0kJXx7H*EYWmZSxqbG8{PNV?Xo0ep!C8Xr!l?^~>yNu;;2n|e1a4}$ z?-1p54J7Ap7w!eBitk`r5{vGtl$~hxsK20@i3B5@P8%2pf%$EjTSL+jO}lNZ!CIw^ zG=GF1Yf+9KJwxJfOoYF4PW_MWWfAb;(Av?iwjtQG?-lPs)?2FjQ$)B5Cz-K*o=PUq zbatc2d>nt@LXU{QJr_?|2#%ZI#JG8k$Du0l&E5E2Hd) zz65%*>USD*YvtnN&RfV={4Yd*ru9TW@}}sZu{-%V@qZ>jj?^3v`9csc^!v7uutNgr zM6&!0BFX;Fo!^%tWCacYP~cw1w)a-UJpNcl(iJ^fkFo%zD!=Ed5B^lmv%ZQinGUb) zc-AE_c}X`n5*5hhI$h;6t>2uppUQO|Odd$5vmnO|>xpQ`df#Rqf0*#|)zRGO{kt`R5S5!p@9W}o9v!%x;SftX zcz@F_7z=m@j(9&_49{E@X!8UX*@aR6J7|CG`L^izq?iXMj(avF+nsCSgW5#xmvDLC z>SH^q3yC$>*p8in1{j#A0E(H2Gu-IB26`YsZxP0#GN|A?uWBHRfIztzFv#LKx)+`R z^x!BHoAGJQsn#8;Z*MPovcrz#vo~Jt;(r@c)BuM8O+WGOig!LnB=3iK@hO~ShWDxF z-M5UXjHA9f0LqO;;1Os@{c!o)5+Ubjya<-j`#OJ?(Si4%GTNlvHA)G=@nXvANx%^h z;5mY3${DyS5Ixb(vC%n5=M6O+z`bKr{h2+ZP{MhZ?lV@yKFjw8Bqt9E! zM$Z#Hd3EJz=MFGlJ71Pq1*ocSI4xRx + + + + + + + + + +
      +

      5Note: the implementation is for FCG(1).

      + diff --git a/docs/html/userhtml18x.png b/docs/html/userhtml18x.png index ebcb935dfd1bd9e40303f5a28b54f8f0086877dc..d33d825c3b43bb1883af57650d877c62074343e0 100644 GIT binary patch delta 1134 zcmV-!1d;og3YZCyIe$t?L_t(&1?`xPjTAW$g{u$&tRRCU10aMY1Go%;3>Faqf(+OU z_{m_&0N4y5gJU;?I(_Dq<>_(vbk7VUVD?F)?s41Yay{3N?Ga(e40k_a{Sk3}Y<9SH zYzyw-tC{`VY5(ov?x*hl+TG9G{e!zdUZV8vC7u5^xcf6R`+vvG?)_WSkQE4*_}VPV>3@eKY$J5uXM+e=)PuLE9*fmV7gNX=cx5 z8NB~cV6HnmPIej*A3HK2ez+k<@k_{OcaP5a8^GNQ*$R&odb^TwFi7{cuGjRmJHGoxtx+WbGu_udoT7?a<$zJJL7`P8N=cA*WYzz))ZoPE{G z$#I*Wl2i5H)PMNB;R+pCd;Y2o7}N9{<&;i%DLI69h{p5lq#S|gk||j$o2Qkv%e%evY(C|2mQwEg< zG)62N0e|5lbImZriPd*)hxPhx+a{x3^~Q_5^ng*XQdFwzlioxs*0?gR^Ph+_xv_e> z4cm4hUz=`a^QU|{ZbtA~oZfD7B{~1P&G%*{r*M{>%%>{wAphFk*@4^OQ+sCZfZhDQ z(+<#&dIkN@dwrAxk#XYgneV|v^(SxELaHsClz;pA{;nsx2X`l|rtB8Txp4QMO`o*$ zb}(gwB9}1@U(=j6JBKhf4EYdAAyNB{y(>!4f5n5w`Y&nMj>kJ|u5QOG|9)%RhVRPP zYhhY7z^H8GD6g(Woo&~-qvv-cPZPCE>dUoju2D|gqaMZ7XGS!jt?Z^vP{hEa*hG6p z<9`iQ=40rcdkb`8Az$~rExo;7ybvXHU5YmW?gO|$X_0pXhXkJA1E-&l5pg;AILT>C zJ_9*A6*On{*_Q?)=`DB7DQAy>T#hysB8i+`i4oBi3fXu}HytQtfcAV~mbOggA~JHN4(nKOpriwW~N;%Ed-b zOU_q5!PChl;O7I z=m0wq=|DRW=m5VjWY+9%c5m+veCRpv<&sQh{!C_)N%m&9x3?SXOtaZ+)Ai$rn|1TU zlXY8gWxe0v?N#`Xp2f!9p&U~7DW{Y-ix~ZM5#fJL7H7R&9)MOu?@f#LH^PKx}d+);SF}zHu!kt zBjtVe%*p{7^l*Q`G`;G4I|Tl$51j(v-4GSAJJy2)pT!*x&iDuh!udHp`q0~Y1ZGV# zA=}+sJ3_PciS`SFN)fzEq)_T~FdSae=gRwm~^&UU<}tu2%LQ-2Y&`_A8p@?{(iJ;HL^PIU5wFo zeRSLFgXgA7esK&sr5sVT!Le&fgVWU?>Z2o0WB-uc5Fg!$l{UEcD2)%)E#wAcs*tDD zJH)n*@Z>jvrtiR{U?L}0Yx9&?tF*xlhqr9Wd~Gy4QT+~BZ9c|Z_R%*ODsGnY^mSUp zqpe(WFn_ft&3-dp>W;Ou-pax6&H=8UtP#gaF-87v0oD|qkN^a?oq2N?iCk}obq!pl z55CU6u3=;w;{*&k7CEz0v%b7Wc`ml4SLvH^noLl$V+s!p*U_45ie9$WP1oA|p$p%G z+hm0-KN{1g2;HD0?NM&%m#;jAVV>rjwF2Nq{9NJ7 z(mvlS+J7vgOZg5IW0Q8=_lmvBvO2bTNP*L|k$22;l4a*s=y*j0b}28;IpZ~-?w0eG zO1VAegM_Ob7=b+=Qv_3rN5>u6P5B0L(tVGdhAF?)t`0kDwU`u7XZ4PS@BRC|rW-uQ z4Sz;2gTeC+Fo>-_WL9Zq9r{bH6TMYVel_!exk=H`boCW$9*yY}6oP!ne2q@7<(W?A z5)+y+JW)<~uLO3SLSN=XRQv8tvYfKCF~NxwQDOwP4orl+*}=a{-E@-w2lxj{N%0>+ z$$RLw>!^`mKWD7(vNJdu(=zxf`stL2g@2pMrSclM;|%=fARVFW6>#5VaJ_~aVjhE} zolRiqw;V*@0D5Ao*8yX(Ap=0hD2ei%2hsl%bza8>b$5OGS{vV1>==#d z45uWW3~p+Slb*gI884ku!SA!cG6gTJ*GFn|z|}q_EWdeEfZ?0Dd^=ng>wody z^;93jZ=KeR=eW)j(>H~i$qZwi-Y(jm Z{sPjGBnc~`0lWYJ002ovPDHLkV1m|6eIEb- diff --git a/docs/html/userhtml19x.png b/docs/html/userhtml19x.png index 5ac0302ae507ddb5a29b282b80dd87b12bedd3d9..a44a8a1adf02dc3af352f32da24b2e604b27b817 100644 GIT binary patch delta 1271 zcmVjd2t?5QV!b2S7mvkO2o7m<$XT!5tYu5dj4m z;0*Z5z+_->29UvFXP~Q3^)&QqH8YysJ)iOJtJ<28X4LxWR`*lSdP>O-Sckj6G_ya= z?2Dd0GP4I}_E}}_AElHYuD8COIJ%wF+6kVdl>R7^-2Ic8U4NvMUR8B_gFAaO(*yL_aUAj9w ztlk_=?5$@}=zn>pRapx|64iun-T_{;bey{Y<2lth;9iV{-}{t)>9ofN z*f9)<#p6oW@{IfPQ*HslL*3Lpvdrvc>YYuWVYFuUoyis%W(G<97RoQnee25DXj7hP zeyNlW;n-X?6KGahs)U$Rz=Ey~*FUdmv>SH_}x#z^X&@}RI*e~rz6QfYX%^?q-iHrgOVO3n(= zPNtvX`2xAL+uB&_{**E(dhfCU8s!OuMb82dE`Q~BEo93p)dSq7ti``<$&j+kju~B8 zdA(tNnj@Z&IgYk#^;y@|?d1U{4Y!ujDsSUiWTY#btMnnqP`pW(4IG774`;cGe8nus zIL_vid_eJ>5Jqe#$+Zc-#2i2aSpUaHJz#wBEyjP^dPgxC8Dwk=UjdoGYGK!iU@sz3 z6n}St7u25&M&a(b`7)LWSS(Zzipx`hK9*G@`(d`sr_kVD58N=vz8l7z553SuKM z{-*wpRB}4k2|P;+oF+dv(gBk;5RHZJ9vnq8LawAcX(Oe+*MC8F4UB=K-vB8I&M_!P zhA}`PfgRQiPYPjRF6BQ3l#&&rY$@Xz5@}k?H&X(34bA&&V?Ce?7{yHifL**uW`X`kvnY86lviP+!$XSK#2Jj{io%+;ZJ@-f`YJX* znX{n delta 1437 zcmV;O1!DT23d0MKIDZ9oNkl>u=xVfH!Sr}Pg2cKjUsJqz&Ge;#lB4wrB6Hyq*D$d2=zF@L@U_pZQ2aO=O1wtj~T zHi%;wHBJlG1K{%lcTvDw4eq?N;28V%@|1}nG2QHid1*csbcv?GZzZ_%&cdmFroYZl z7f1P;!;Fn8d*Cl~e1$HbzIS7s{(yc&Kct`0U(Qi-r)w6B^#)^^a;A#Z-=T?x?i~Bx z8``kfuQ+&KbAMfdw+<^WM^*2gjVu_+wDKxgWi1UA)~eMj1F}o2E^4+cu(QsBasHye4MrL~kl<9#mxd0kcm6SL4C>do`T>qotFLsF=6&6Xv#`0N z#!dVbvS|Deey>sD$%rB^oP)MkuZ+;&tEsTpwKN0Xe}8uGf(@E0V}WoplVP0EQy<1=XAf)f^RT{KruST=61e-iYt_EH!7G5V!5Wq#5JyfN_YX*|Ea zTAn+OU4NLa;Dj3+L#t6=j?pZgL$0~^3;k)7!Pp}8a|R#AaqSDO#(NXz7`s2fw*3v! zAwy$#c9e$ZBi=Dxot?X)r*((qlz4cQllEKAPr>lUvsDLe6??LoGBUbBACSl>U`}(a z;fyTFF@T+DI(+Dq)pNF7)+S@*MuYcs33zoh`hW5H@4#y7G2hb<*u#)y|lluM{#a;kgv98i7WYmjesfAl=Q7IG8Q|FI?;QVE3OOPz>uPj zcz@cc4?KqGS}NLW`ZaK>SAflN{x^>_fa{HXr@?g=bGsh9qn-&^5mB~~la=9%ZLC)$dl6_g>{=lh<|aR^n|P70Ec@vsV<3RXlofT!*_3n6eC-Tg zm?IQ!(i`x}i~@{Hsj5u3ydh;H?L~ zha^eljWklH;a8cvd8?Agk*MF&P zzTf!ZmACk&ljH7s?7nNwf0G}uNBJBu>I1p6$mn0uuCZ}bi!Dz}KQIHo;ZTp^H6`74 z(-};Qsj-fSq@L+ufO`f4_5i(M)%3(%bfBpx|Fmvp;F{gKtf#@%=SidsI=+wa|M3m` z{=~Pmhi9XXe^&Gu^??;e?0AB3Lw|LQB;9{<=4TrbS*C>9d==$;DCDZ2g=0KmQ>bqU zokM^(GFtmwX8W-YFlwBD@7?Yo&v}ifOlu+?D@Gf@>4qHT>$F|H7IYH+7P!5|vt$N7 z-)@Z5vmVdnh0pZrMYj%7Kbyf|@T~|Q?KtcMY$E+l&c`r$D5Gx{Iek8cuYX!w6!P~b5T^9 zW3?6^=$r+iI>)KPVTvKuzh#EzhxQXKz!{W9=%hB`4CkKRqz`m83&PEBMiL#xT4Q$J rhUcN(6AN&j9=z{k%@L`^2NwPXjK&7$TKJ4000000NkvXXu0mjfv8LYG diff --git a/docs/html/userhtml1x.png b/docs/html/userhtml1x.png index 465c255b054ba1835240a036b8a44519a7c900b9..933f19a7a5113d5f204d6e993f82de2e6dbdb81e 100644 GIT binary patch delta 983 zcmV;|11S9D2!RNYIDZ4PNklW9@>;UXQ=m25`20MTb z5IcYlKs$gA1UeY=P0noQEwi)p-ad@J>`6AeJO4B1{*H$bY{q6`YqPeq8F2SKGy7;} zzs>A}nO%esezfk+XHEUL;O>WJb{|5xPAhZwM>BgDLby+5HGjOSRnBOKyPvxIc6*QK zaQ9s^d&mhDZp{qsZNg0I`ta7wUQZluZ@|n#Xs5Yjco9;O+OdgeEW+Ipif884-LK5- zq}B9-7%0hXe8O&<*=4T&a@xBggl~(>sG`|aEDNpmi-L$ZW`@aQ(@(8q=>YfDekX)* z9wBCb8XEy2C^;@ya;=X& zrKj7Q>QlQxt{4QZ!j|(5foN(sv6)fPM3Tl@5Q==PDSzMEEC;@@7L~I`tO@@eVWs)Z z;$dv3I*}0%PMQJ9U43dd_FfmM{do}BjGrd1=oB}Q9Zn>e!9@}uqnp%Wy$GRN3y1yK z-?ZvjW0LHuw&Yr_zG!|`5K43Q%chVjVOkK@G`xqmy7wF<672q%?}Na^>1de%og!1~Vz+C=05hkPr3>!Q{thr46m-sfck<6OCW z>8uX&c?+oREsjf{jE6c`nM9xoebWm;IO?w5(6_Xj_Ek+8>R&6DBu8y5wUZybE7jG? zq}>bJzv}uYYHHC^WY#mfmb4emHON(-(h!8)(0?NXWl>7TAPqmzglk16p&oBNU-p&XBP(eJA-|6H5{wBp$XKnt({ z%U%)y<)tVNellG%qFgrvw@dVi`RgE8$xip*$_J}CU%yZKUnS2=egD_@sy>yhVd|dy zTt)MX21wE(WC}1#)kty(V-e_v0)Jog`A};?6IV!yn2vdpOaSJo%U7pL(%iqCJ?N~9 z5&h7awQv=Es_Z(NiA~2B5@N69TV}LF7Lj{^=Cf|$>JO&4#d?x=*y;cP002ovPDHLk FV1jQ7@$3Kq delta 1083 zcmV-B1jPG+2;>NmIDZ5dNklb8ehF|#i0%~IvLwrrCHrOg{QNvv!eV(~b+NXy7`qt` zhg>-+zR5A8BpvO55aeF5d$< zel4H7o)aA#%si$64V*W$W^WcOgPP^0ET-dE2<#RCtt+zzw9MQtYu+TNZ0@*g;pP0K z2f~4vDzDHl;XKSOy=pBUN(heV?}D4)d7A#rc}g~*{ie-@>{5}dFy#yO{(|O1;wz~A zZhTDJ3CLS2w|@iUI&%Ba97|sZvvma>uhF>vX|TgblD3E^sqe$yTVpjq0J*_&Xb#GagZsVC}4&B~6ci>voL*K{6Z8AQH{b~!J2cX!mbHF)x0;<NL zxvnuX>M3pN;;+sl8)Kji?k8foFJ%(G##qT@R$0sK8-GAEwG1F*C$fSBpSmUvMXGDw zKDwjRC+rDy?X;rM{0~}nP3CmQDAMU0jKXoWW`^T(KJs<7L2mA#R@XuAEg0jI6)X5( zHed_tkwSsW?FU-tCgGbpIaLGa9kYfGf2z|H?2MlUVqSIT6^C?_b$BavyQryOJx|(WIuBA zWNK}tK<5Z_&&9P8?F?%*#%cwI2tE|9;~=!sO#e<`K{I>@baSr`=#Lrp*rL4d8?R(1 zi$>r%=m8P-K|kzMkoMkSLezmT{0sK}w=&rpt2C$z`gu?Q8;{?bX*rJ5wQX?x39^>r zw|@YG=9N$aYZ)kQErY|j$w|_SIe2u8GGD*~f}W!|evg(lJzzIgn;GjvZTwkdM{BIg z|9*pV?~9n3#kUfH^1H1UpvyAYTP|l2bZeuJn|Qx?efWK+a}2K4#{0Q?x(D!B&g3cX z{UGP~0Tj%NSP@<*nZM|^LiPHSK-I|keSd1=cu#7dU~*k!i-&j$!d{zWgC#r~X67M68PPOXMw?%DcXUg(E+slbnyi*P0naUB0CCX=bS>pOE{26)i)l|9f@S diff --git a/docs/html/userhtml20x.png b/docs/html/userhtml20x.png index f9f0de44b50028ee4e79c996284393b44b044168..170b8f1792408d91019ce6b7b64175e28fb6176c 100644 GIT binary patch delta 1268 zcmVjd2q(5QTRc3SgiE=zyONpaXs?06Ku70w#1o zIsno^=zt_0KnD;y5c6rCjvXG?cD4JoXHO~F9(}m`V>5ecX7|nP z(9GVal)kmv`_ld&_}+QO;O?L3nNoTZpJ=u30G0H9~el}+#>~z{v zswxDynYPTDynlv>^yN=EjJqH7((4G^9i4xql$M?TMDrZRPCJj8z0naI5Peo4V{va` zv0Lqz4D`6U;TR7a>4`M9Fvp8vyKt+(@;1L0F!?^>sp$GtwA+%wBO<13Rca?A|Udm4UY=;k~QUVR`UIt^Yc0+K7`p89p0HyPYwo z`y*p)%XyZ^V&fGb;NGZD&@oRS7$7@R5Hhr=wSTC!tmLM@wRq<>IWm6PNzL7<$T-bW z^Ms=@&*pQrF?(&xvdnU{>^gN%p7g}w*3wZ9yRhlXcnTVmZlgYNJ@5oL4+XEtxXFvu z1G-#71eufvH^7V6!Ac#a0;1%{N;^PDI2&`|<>d;L$B{urxg5~}FFXPHe}z!4gGfHb zeSeVsVMLa@lM;$f5y&`p_nJMP#4#)AtzDK|o(cBu`$X_Qym~oh;Rr)~N1=}XYwe7p zN9$XLB|21jf;hz2*7LNBbF8EaaiFH`p3ISZ;uZ@LbDeeCByF`)dB`|9<;VGGyrxgY zqk662Mcd?&Jt$(an{mReBkU?pMlCeU>wh{m!Z~WYwnUp=p>P;PII7R39H3efMYst5 z;_)Xzrtf?0lGdBorgLG$KH%ktBGl>3mN#mjD6?mlm~jRDCPh|Wn-zLi*8eKPiy;jj=)Wz`JYtQ1*(CAJa5SF`o#2q9t}XQEq{?F z*8x&WvlrCeNguPH8-e%$NA)R!;1tC{^tu54bQb0We;}$HQfrCh1b)FHA`?!9qm^hh z8{$^zlKG$X<9B(f%1)mxU+>ap7xPrtAqc=>N^~Vqd}&mhGs61SeIn+#c&*bxw_Xza z$Lkg^iGB9hzsn(`Yq2ip;UqjdMs2U(vQOl+#qKNrhyx05gxOY@@1m0)_Sg*Yy&mV= z8b*Aga31n=K1PO+%h7L24SW23kaFkRMD|?Aju-kw<~+)YL>lrTTxFx2zsJ>qvIw=h eSsZJ`Cw>8_T~?A-C{|$r0000jd4^r5P&(x1$amY&;cPGkPd)U5a=LGDqx%r z><*0Sz;s}62haidI&i*Ekrcgm()0V_GMDr1jCj&&ceUDGt@dPAx3{+s4lrzi)oS$u ziZ`@pv@_Z%cKh1GGk-5&bqJnuekM>Zq{Ln?OO6)4q}kX0!+(9<*cSHn9GRCDxh;H! zbiYuC;K83{k7HZQwah(c&iRieO$%Sr>~sHUpEoyQZ=az(p;^z=;Aup@Az;201i#Q< zTmKZe|3#O5m@$8)xgYm&&OScy9S!$BcrfB?tYv0-GGH~0{1M~6EotDj4(ep1a(-9) zanE1N{eZinU4KtG2>1Hh!dPBKsQ_+XThO%X8Z*+>e-$L?YmhGjWT^9Zf}|hxpTN7% z(QvPW2P^$a^C!4abxaH}=Q_r2;e({@{d4jI0_Zt`Q@frFoX#Ha+QwFbhYeak>@*}N z#5BD|*+&h9!)b)>x;Nj#_sDi<827wUpMm#RKiQy#?=K9U zGJRkap1^Ox?O41u)>NHgSp=?q7Imkl_^euWn7h@gW)E8Qf_&U+hcaIF)JX7b8r}2Yb#_3j4~>G8xh)9a_N{gH26(T4tVuU zC#h**Xv~axkI`7-BLk(hb)J+lxsU5BL{k~w`UbvWn3~}^T*3_%?j_3?SkCI zQ}}y0CLjz~6pTi7|H?8DM%6peshsq)PJc$yqxDrTEo7G(4Rh8IUuts+$GlDnrz+w6 z0@grH*(L3qKC;b-os7wNvYp^rl6i7wDffWPW14#pSAK79FeVYAOyxML{BI?i+)nS z-y1p%#zg-AJaGZjddf869RPz*Hh(CxygnxLH8#*eBU>FUK9j?}c73}D0RrCaY5jCv z?8*AUta1jvbK;{s=QW}-t*H#Ol#Lip17y0E0UheUsGs9+QIaQpvV7#4)h9pZ!RNJX z60n9*H^D@4g&rQrD!`{r+c5!i{#jo3VbVc^?7Mj9)ue# z=Pm+MO1b{@GmRf$M=XFpYDMj!cF~`6{CH4L+XH()Gran=0TQf~{P61>tsV1hC-!!-$J2R8?$#`Xgdwh zMZh>|Md)1Xv=^AhV}k9I@>#`5+OD6c+#0(gchda?O|s1(w$`_MzWRerKyG9 zo6jJUTVVKX*5+#c@-?pyMc$t{>G1HfNzy!ma7^XI%$@5V&q2yr;XG zwqyR-_fx_nt=$(^+I$n4zRkTxypu*~Gj*Pk_c6_iYJ(+NjKMjXU!gGDw*6vV{CXx2 z3Sz}ubr1-Ec^T0BI#=lxIOf;kJEHe|!7%b%vOz>P6v(=~INZSYzS2+@@}0GPQ!ej9 zFGzC6!YJrBmo=#xhFp-ocY?M)c5AukVW!~ z_oWx=Vd>~s9h3^-_5FtSI~X+Lxnyx zQXscCZy`zZcTNA8JIk?N^6u5cdh>f-JnTC;xevA^9?29k=mBz#W2qsM(_9q5nLzSc zCxY^*mnf`{nj^P`Y{hKpRfR)0z`R=TROSbV*l0u5*y6M)@(wg*&sOB;et0`+dZ#53 z4b!#5{!sxnK!#3NYfl?5XdU(o2urMZB^CA?Z75OdJY~TZ85sA@Fcxc;>Qpziai%Dj zh{~)sZg~bitf7s_cPi|hcD=pr4(L?6o)g1WpP%{TY&5VTZJPqnH7*eYhzAH`>o`a+ zzG<$_f!Rt6ZG|EEE?1#=H*7Pq((hPr*jF03v>nc@ zVq+qW7G;#;512RDW_IWWwy(Mxz24A|J`Qy7p1+KedD`Om#mQ)9Q&hz}cSmxECYkPO zBx@zOcydH>9%uNij-SvRm09;v?1ib%H$s2fc=*Z5PbFf6#^T2T$17%y-I8HJw@-h+ ze{V??>SHn8h?6;Fu6=62LRCEVUNI`@iEYA|dE0s{eK`Tz(`Y8hSoZ--802*hJapTZ z`=hF;5Z)6vVqJwC!4QH{65P?oOsz`q434wb=Yh9;oyIMPG(9T>9A9mg$|1wQ5;*Q6 zvjQW&a~}}0x#hz405MWqG!a9)4;*A3d6~KN2vAA#dybPrJvNzpj~87!WMl%x^0;3+ zvboEw5pGj=%Vi4P3r#<)o6p2<=i(u56z1zb%fVigz>Yd;eEqQ6CzR}OUCSJu5(z{E zFf0Cmz0FA|=4F!pC;`i7$x`8`7J>>hBV-QaT1bV*n*>fnp}IQs1NJ|KNDwyZUxQm{ z!4&J$zj;%dti00ZkGfMRz1p{?NvA5BU6H3PwB5YF!aYb#Wu3`z!BoGXeW$s2!O@X} zu6qQowPG9seQo4Dyk@GTq7-KGRdF;z0a3jvJUy;NPADbV`joBrV6(_fqFSq#4Y;+gh224$!cgkifSv0CM7 ze#Ghz`uWm3sfLMf*;EVoBmRgWlrOfGB298ox@#Y@o2(4K^qvUXGH0?vyY%j9H{*l( z6GLr*xgC#p0sHFuh5O|@KAdKw_wlaCjQ$M4(RtlGH_~5(&mcJ_A6f@uE6p1p{^C&zYZ1&|NJy!3aJ-&$T3CORj1ayhTu4$QB~>b42Jr zWORMg!hTscq5js>u=se+AXUnPg11bzy&H;py+cO~g>Ub$$LqZbQoxxF)y>)Dim!>l zZPX5yGaiDU%B)ke3J8$%Sfh}*hM>EB{6_odDcm_1tXn&ez~4Pxr3JoY>i z&th|u;0s>qL{0lVv08md z&8`Lq;^d@a9dI19t%VtobrGK(+Tx|~x;GVhO2K6}b~}~0TLFAS5`JL^LbHa_3@KO@ zYHIyGBSU@Jl?sxqhDy$m zgh=zUD&!XUr&A2UaYn|zfjzk_uIdO=c1h6hKP~8~G^KHXAjQS}cMT+I^50)xQj-6S zjiIEgUh4YM-XiF0!&>K|139eG24(S`gB-&BA$vQ=jQLNimh=VW?!X&hnb0YNX z9sztQ?oXZ5N!_A!s?-*1Z-x#;EYz9W!mA9PZs-&u=^-sz8znsy(Nr&T+-gGV1aMp>>0|GaM1D>_(06|-|56NZ0LN|=G7 zn2(DrBl;Ugv>-F}QE*gfSy%C(y&GSx_CW>EWTt6lw&qimeI(DHF?X5mu8}=S(S6lP z0$va#G}mX-YQ33K?&R$<{-4-kD9Xm-Yb7ai3(4@waF)X{wc)B06RtpSdvRNBg!p-? z%@Nws1nQjarE$yNhLjO=_bcbR-)yymBXVE|oL+wuQr_U;Yj^|&3<9y;tsQ5ms-IDAnl>;5sF!Iqy^cm!7 z=JpeF^^vU7N$kJm_x}jW|E^7WqUH=CCG`utH)y=HA`wo00e(8IDpZdeM0A zljI`nZVB?=L0tg70V@BbhSaAo7hAZ}t}+SNst;HqykucjK-BcCqJwIS+jD*OvAmK* z8n4vgQy7Q9kOm>Z@T5l@!S*}E%B}tHaAgmS;Fz!~&Q3MadIa@=!phuZ`DufHyPWAr z8k5p}uj^T30nB#N;4T6B8G{pqP+=9_x3FK~*XJ39Ky|5r+>8La&>`;L7hUo-%$B8a zZa|meFW9Qk>ZLyq=eJR8u34QPvG2I6fWiAv=jPtmA>3jJ9*}1cBi^x>NIfjtxj8yV z7DEc|VNUK+uOe2JCMkGzmzI;5Ou!;8QLrN5SiLD$9QH4hsh_975Ls(_&de& zMl;G$(j~7yK z^;St))$yuJYZi6a3^S3&Ys3Ve=qeTFg~P=0oose+RX++Yxuv@or*3cZVOsIllBoXj z2PPb-wFr5(9a=wu58HkXk-nku0#ld`(+eyL_MQGWO!yyhD&3OR+s33*1n8GHEmwUL z&{IJzKg5D6Y#rYCVbB-nBe1OUnZ?ryb1Qn=VLz;Q-6~bz0Ul`+M3|z*$dT|s`N(9m zI9EriVJEa?!Z5WXl#PP>Zqy}LiWbj6DtmM0eT;+5DT$0{6O00BChFE|bp1ic`Z{K} zL**`MRTOyfqr)F^sANd=J^DNY3YAV}6gY8PnC#4KI#k#DkR{n)6^y`hHOkjIVX5$- zyV?oyTRgat=i&BB`3A>FNf@+7f9obwJgkd}RLSw-p0J!%Jl}rtEYHz58u&7o2FSKn zs%<;DTgVenKf`HY7upAcTDS^7yLX14CaWwu!hj|L)1Kw)ao~agN6Y`5KPGG=FyVt` zl>ZKde#W*^e(r-8Lx9ocw?(Qnif|y^$9L&A6b1~6{Q($Q$@-+4-)GkD@2y5KsG|Bv zC9O>dLOK(wENloe#%IO(I^I*y*Zw3*({H!7iI;1SnK>#OfWEVVpXxvW?-y@>o<&w#%Z6jK~Xf+ zo2FyDO6toi^HCP2`nnl1znWjy|3eL*5-s-#1I?>*e0x5bKc;+|ko#|dKikZcWvAg; zO9JcJGYI`4A{RsKhCb|U$h6!)YBaJPKXaJwh4`sCM2gU=8MAsBlPP%`k&>*uKg?FA zJbv4bV_nFj$5k*SA|G!C@&pU(f{IAjl0b+Nxuya^-(jUN0gRZdA)xS6=EWLlTQj!; z=fuSpYhQ|(X_H0d?tf#6{|45vY7jB89@ClrPb|%1RIO52p@w2|La`@P-&9|~b)}7+ za;Gb*;ucf1j|KGCuZ_+V0#-Biql*hA@yA%*d_^)6>>|5!Xi(4ca9=3X;RKdzws%UT zT@Hw#OQ8Xif0IkMGXq}__Q^VhYR4Lc^uYVvgZC9l+Q+QR$y<$E6xI4=ch)B!Pjxq* zyu-l>ywA0kSYN`J3bvG1dIyWpp-I;$sN_(gb<-Akeelh4W!1~=`BO>x@9F0`$CN{p zT8roRO}?YvdCVdul={FA(98@&%|m4`9iM2VZ*^ovgYE7~;MUY)O}$+U7$$h$ImR1v zo7vkg?FgKr<;kD;DNI-Xvpq)!bT2J7?rY7-9HsO+>_7%1-#NCz zB|sAtWs6fg@`tUqx{IVi{|e=A%h(LVzg9beC)-nbhkgabhqb3VxDjGZ{drmrYi!#; z(K)6)ua?unYr!q5-ces>+)eo~gZ0lxfGx}5d-La8EGp|8yqyW0Q>0sh&B%75qNzJb z36rz-0Ow~_*1_71dl7@5`meX1H60wE<>X2>cKpNidI?NSe&z&-r+YZG(;35SnWDB! zeX=b$?sMCo&zt^DM@BIaFcKNnU9PVpDWVU@!KiDnqi$L~zlnwS(6lNqmUDI>a?cO| zw(+!CqAG>&HI752>dZgHv|sqCfDTWx_%ZsBKzvze9tm4TcA zv#`I+5Cl`esSqPBUiDIk}$oGZy(8p$9Uiwh^0DIkAJH$V~6Jy(m0?dhEaA3^^=W z-e9NzzKB1Lkdemmj2)Xg^k++g!h0Z-tKcN)G6v+SwY0{{&En6#A>U_B9!5ysxeJKL zl<aEhStiPn5&X+~!*5f6=MSqsH6-jPOm&|!k0 zG#=2^7jFKXIkuCBuFjA-WuyrYLf^AJ{}A`cer)GeKfwma;1zwF)CaIcF4&e;T!mw} zB6!gH<7GV}<+{3CQA+7CVKTq=N>3dnrV9A+2?JUoRnRtpS2^aH^(+Y|Oe6adXlil1 z4|ZOiE9nU|X3I^B+MZ^xkpD|L{4dX;9{oNXbADJ?nDVN~Q>lGnL{d@>#cH+uB!8Mm z+-i5hp~y@fCCTsGUUy#X*qRy^wTZ9cF{RJG*tj&@6EHW1P51+AzpM%NP{Y#P+WxSO zRVW%9AEmj+(8CC{Oe_z7Dh}f+Mp&+qddgbnjyd3n(V z$>_o)oK|8Jv=ODk<3F{o8V-qZ7YJl{%)u6yA|FQ;Cn=U7`5;8x4YQ=oY)t3FtJd0E z5>Da$pbxf?wHY!B_vS5?5_taltS~V&ELnQSN(0m(Ya->0U|e4hGjOt-5W2;>6oXD5|oE=nX>ZVjoj%k#D}s z(gfWh5;1?2rCFSWSZnrk_OvrRd)6vN7`33n$m8dBeq9TNccuy6qJ6%~me z1TJ_pT{wc!X8)WfG!TAdYsK*r3Q8@gY3IPBuFO1Y6dB6{0yJY7l#gEI6Lp46D>{d0 zH(c~WQ5Ro!KRpi&^9J@*G6xxAs#tnwCCvBf%?P>_cDZMRrEF6lOrP0iC6xW^F}h@W zH);;CH*46$!}L1+|K%YJxnXlpTx>W|O#CUnoNc@XRdEP#sj34j0LXaLfTD@ox+dc`1hFzilXZC8rfBUFfs6RUn$m z@d4|9;HSDzmKx&zl9K+x$JLZbBnsBxk*xXdJB{b~CQz_llAU=s?M&4p);-u(Enh@s zbKN=odIYCdk!RGl30j7a>)H2^|8QDQ{i6<95Vq5aog2_1B{Z z76KdR>H1sH&&j^(`@5`{I~M;D?tj=ibf8aib+yM5P9K2e$^T>cKC1IqaFL``;t6IJWCgv!LN%G zOA9(LFG)a`4cC$cT|+GEI`iH+RoV9bRiDmZ*0J9s=c1BAACu(fU-L7iY?4(MA|F1K z>8_oT8Ax}P?-|<~&0B~q!6uIQRc!RLDg8$N{cfuMuP20GLbGzAHUOn1=H0<-!|fgg zQ~hS06UgebL(UfBK-V|>52R#$ntBrmzJw~wpLyc_>XCFdjp)!HP#}n!wO(Dyl69fS z-Se|yr5fhz!7?c?zy^^yhwFqJL8Ne~ja6r-+9M87kA3A6-|4qXaeTq{h+bWD;QKdp zr;&Od&3wQdP7%SE4{?w%A%2?w3)Atxi?`IHB0l0wzg1$%n#}fvT^$qtqU_XX*=sKJ1{;8@ zc*k#l7gsDS?Dkz4k+ox0e_NAv_ZjVqxfSCQ)jEd-8lKG)n&X2{%1)g(=|$jx4xI*y ztW&gGLiEm`%X#n~b#L?rn>sn^W}Fh_6aIpQu}js|oEAd~;%iO5qz!GT##tXrO<4HU z306ZY*5(IkF7xYrH4+`*^()fk#<<<$qgtKW&VH8vG)A#-van$SI(to{fBb~s9bqes^ETFq{F%m_l^%w~>jm*AEGpH^95Z zhpm2RvEiytnZVF~S*u$mRaZN3%-_~Ix_0u=g5TaUNZ2Q?FfWXieecE`z}{jgLm!+fleFW&Le1Ngk8Gyh_ft`42c^oTec!+$?zpdiBnVC8h!blpS% zF~iBuSQ2!lI1AJ{1x0!1$TH~mCnR;4ctkpqEgN4h30S?qGUzb|dB##d|Ep-Yq9VWo z*ezKphO+`gjZ3u-jmf#pk|Z8*hT^At4v|=KxW}V{a<8AlqpsPFhIJq&%39w;B9ko* zPNP!5J*!BYBYsL7W=jGYQ-!Vq=Pef>0?;VgE&ee1kevIxmh83Z}Y6|x*~3K6F#q?IU{n5 zOxT`E*)|J~;}<_!n&*z6xvk1FkEl7_ fdD{P#}@NQ0tugLFzwQlycP7~KqzM!Inb(g=ZtZ z5_>K|&Ii|k3oGqw_}5NT9&AcuJB)!R@B2Yj5)%Z((dcy1C^DVlgj;%}V_ ze^19k|J|+t8HQs!lo^B1@Ere%(=(O|{0mu5r30S+*!Z=TNzD8W6BUiR)9LL6EE6Fv z&;0RQa)E>D^q83CbnQD==1q42IKj|gVAAB$i?VtrQJlpkV`6v~Q=^=l_qol}TgDuV zbx|n!TG|4VGVn2~ZTx*$^I4^QdN8!-pK9+vKMTLFiWZO<1wYnySh-y&m2 z!reW(t^ChISoJ=s_cJjmuPd$a0k@c5h;4FX;IVvl$nzHc?Z1s=0~)FhMEG%XJ;xJ# z$SEE(*nE)7CH;1!y!YmulyTGeu%Z&(!mYd~1+LD?B>+r)Ti%Sv$j0L%zkjwJqhk7a z@z*%jXEEP6FViZ|urJ%6*Jx`mOFv;FK0yA}jZ5J6Suxl%x^D2VwwHwYr2Z7&JhsI`Y2R>$p4yXsB95mDPv7F?zl2lVgrOx-|56s=c3q zBiBBeaZwH5Or)Q9jbJeIZ!)hu zi`uLQMf2@uuWr||>iRhxY!8xWkThmkg3`WpTD2BZG55MZ$`}t# zPC)ubX9zdN2Tid)g2bL&DRWC^&J$-L)r2|JWOnxB>B9Ux)pryd&ql2-l4_XU(|{84 zuU~Sz#-Dt|Wi_SE99k0;I@Geqi0~QcB8! zn7<##30KwegKyPa-M_VFJ8^DiJ{%rfPe4X9txqk(x{k0hLerC|N1_;tZ!Wa0WDBb8 zeq4Sw5qMnqJKfvT4~Hn1aTldckK-Rn40S+LsqVem)7;5|&t+0WgSL>H3OtPTu3?d^8^?RZv(FPnE9A zUz{{SaoA7whXq@#7YE+Z)3=xD1pycCRC=hrV&EI0^YC!1%BQ}K6p%XJ#AJr{L$mFq z>G8*=v&S6IzOR`d8U9wIlxJH|WEOj}Bt zG*;(%6!%ZhV#xT@cg_%;1tRbpH4)BS9dorgilGz^(?_HUa)g+3J+}2i^V51+P0cR2 zzUuIe7uBAEa_e*!HPLF7gZcu{^4;^~iw>8u|ZhvOY*>h~kT`4s(^VPwqk3rgX+#;={$;5$1N`(E7l!ns6 zgF?ryX8UkI4u`%o?#CRg-%goB+5}sWzT6`#TEGS*Xc}bYd%v$y^(YmZxMiYVqJ)VE z|0Xxi`~KdtgL`EEvUK=Z8{vQD)kG)3F<_9+BmsYHiqi!8#5M3KNcpx<>{7TkZVV`# z9wBD;{ZhI=J=ZWbs+bY@^M+V4t{0+K#$L7YTiIQ4iip$p6Q)j4#w&w2j5p6fH>=$I zqZDdCf;@$EG&n`Gd4tcc-NA3Zt@T=P#UXid$=dY|XXA5nF{y<}|16m9RZQwXN1qFn^DU1)KJY+>*fn(J=O0e_jlQb?P z;Ll#>koEf2ly|JY%oe8Q=Z=&XlreHMT&+Za^!$sPuTQ{KKZfRAXH(?M2%urdry6yU ziC3j90Sy9wC^zj9Txo7s;ZcSN7t%3FU3HmArJqo;`l90*TVljOOqowV2A}FaLrjK@ zU=JYqNd z?9r@fnjoO@w$gvgqyBb9$RioEj(E-nu1G(dxZkC5REByTz$B=n0fLNNkSW}fgB(Lh zk*3V+?|ay$Gwi<+$0iq>%+vMySbeMg;WibZhdaX?Drj|`^uZ5wQ|XaN1Itd!2@C(~ zA#@9P;oYHcgy>8d-F`op(jwqy^y*(`nQxBdg}Pi?l=b(}9IIG2Qy(l31raL_>t(xa z#cRws#HAo}mb~m4r+%Q4mQU`}YVrzA;Y>3jvBg0lwlFjWTl9@$R(LC8kbVxJo1i4Y zjS~kE984S7B0v2TVw74LO6pw&l%*@RGn^3RIKxDcb9~EayA+#~+ev32j2tSuHt(gS zXMXdFOZ+^wrQgVV{rf?WMqVG%y4x$eoNmzE`Ik?yqh>&Ms=m43TC+&wYN(C2004DQ z)YHBEaizdmK$KhNlS9zvke!fK)vtpdO#$iAaIurW)I6-tH;bo%BTND@LUE>rAE?2^S%H3q&DO7 zyN2(-sn>uCHGDfI^avq)c{wmRjnf}=*gSj#l|3dfe69x3eu%~&nX z_PrmzE~o7*)j1;UNYwz>;5MS3!jTbrLY~@M1^*jv>DGp-6ZOANa!ey#D57OlKl%7A zm<+_|G1qJPQ{+mXDkI_9q}Y5nnm-w$vs9nR3ayANSUJ_LgIJH`Q#OEP>aDNlGdG41F{5#{D8@ZQs4%DAvnWZFwshD_l0?a2Y8&dlTQr#Y6{TMU zKWfs?vDs96ac3SDa3}7kyCv=U#xba$+BRueHv%PbHP-)5iSoV})e0wc z9Of1viv%&)89jTcyM{aJUrvk=?jKN77l>Q6r$?ZqBlUCPw->BMN7Nc=!&Ax!-N%ow z;oD^>kDlLDG$Of&LZ<84`fb(8O3s2z2{rIdk+$`t9^AY=@>{=W1Hp4*OR!(xA;o3( z`>nQ>Ecza_$?}r)`V|T%8oTncInV#i;Gp3cgB>2tVsAAFYu||sro$QMQ|f6T5W3!k zd76jRJ0U;nM#M^5ax z<3pdOJ&TFrk*Vqzu@)B&F?9Bj5sWTb`b_`o{ZoEX?R3X$^7#LQlBxPAf#Z6K zJ*a9CMziZ~aP5FGC^*c`*x9E^ge7u8Mo%1NblHl~%^sS3)_tApeB8+JUEQ1V%V99@ zWercw+w1g~j;FLZ6i@L;(4QKeMJCG0@|Ct20!d7d+&mo28B`I!)z zUqBql90qC0%}rY|#t*(Dhyeo2dm$OXQ`4-*W!={xrl8qxCc@v-eYx|@BaQ~MEvc&af`in ztGo4yS)X#B)CX-7L-OlU7jM*_WWyCx2H04O+FQ^q-TLafEv&QSQgX#$J=D}ONv;m# zRAqxltviPsU-mPnU-%e5TVY}9Qe#m}kIois#)E$*hmKm| z!-}7X@bVMLa<=W{KHkc~RX=*dhxpAjxATM{_U>kS%Q5s3=c71^WDEB>6%J45LvIPI zi%0k{8w(L;j+l35r`T+P)VQ>cX&mjtE`j^!dlIyBkrXS}ipmV><-F2~7U%OIMXu#n zgivE7zSTJ4??wVEF}^LNPJha|^4f67PCUtlsN_MC>m}hht9H-soiv`P@! z8+x;OEfB14h>GH9+5Mb3yfdGG5+6C;2b~=$e6bh_KlV2G9B*ga#l4d=nnddp048A?wH{jUX! z2J2^}0l3mrxEyk4_ArKz>khh&C5a^q*?KPymjiQXuaLRH+<*|H5`Un{+ap&+yAyF2 zM2FyP9Dm3)bi8kv6S)>Km$SpGPF5#XV#6fGi++A^(9S|=Qx@}~YoCIuy@G`)aE)tq z)1pV;6^PpVg~I;Qxs+V6(ybJ^-vmP1P`lp8!Z7_~Q))dA%(tzJ%`ot@Ki&ms0_Sog zNX?B}$8RI*6+T;mxy)nsZvRGI$05`MS`6os}z?fYO5i`%mPra4` z-Ej<*nF4s5lPyLZ-)!gFCI|{_;G2?TeyE4ein*n*RuZ6bVGus{$@IZlfQM#o3)~O5 zc%<~)(+-Ub(?;J`m(mAZP*R!oLD0_PiC8|bEC1Lr!Trv(&yjqTG@!+0Z}3c7dIwyD zmyM_D3AzbS@bPuR_{Q_XiPFFH;*;-1wGYbmYcRjn7^2tj**`LSMmEy1p{4lAR{_yE zO5jM}ONsGb+bUZ)y5wo%`g=GX8O&3-f&BsmYOCCE0m|t9{r20OW6I9*qgyZonZ1A3 znE%{Y6pk(Vez{3pCM9N1rXpisiKe-iHz=TE5FBRq25%E_d-7zOO4L*)$aJ#9*n>)( zvEC}(js~0eOWzl$TnPXBlc2G=B%yP2&);b!x&!ol?em!3Pg=SjlP)P7=oJUUPp6>4 z8~mGMBxP(7sRIQh;rh8FMWJ@jW`c90Ncj3;Z&S+!<=wwNF?3;zlE3y=+4;LMc(ycK zql@EC;}X&;^Xzr@i18zrdgo%10rmhRGn*ZvOr;f0sBY>wO{uz(&yG35W5KZaO1UUL z;Iaw0RO2@7><0K1Pl{K$?NPTX!+lk;8fFFC< zb0mijdp7djFqG!szsnf-hp7>Qv2vlv4-j9@DC>#*Dl>nZ#80%dxs0R{kA5rb%Ho)2{oU8V z|J=c1bnx|$5jzNJk&p1Y=5r;K5bta6PYvv!+Buv!Eevd7>bRk**1nFQ0DH($M!X%- zU^2`Q!S$oCN;nPHHa-Wgnm>b*sVPsl51=j^(iq~2D-Q*R9<*e4Exd!lQ5gN7`?(6O zB?v~_{T!Y*rbC8ffK#Fxt0`M#rdV%PPR+T$GdnlfDOL7oWkH`Lok;L(gEFzv@PKBA z9K0btI41?8ox8L}I&9}d?Y`=A8ny%4NwBXEzr?wwfzVEpAM-y>q6HErKjq)x4G=Z( zM{7t46s;j|h4>l1#xY|O-I629;qUHT!}Z_xDhN$%gCnR%j1UW3Jr%2oe3KL$I^QMARz)1&ljX^eeYtC)*Rrjs9P_!VY^zO8jVc|k_8J@)vwn7oK}yI z(PE~d_5Wa0#>VJ%tkMR+NC}JQFM19b>xr`Zt_3do^VCK^8wOA7xVi`7_igzlOI)XU zKo-7c$TXPz`eTF;-wSvyg&D3O{2(EWrx`sC9kZSQkb-3XeeM+bjl8fPDcT)GRmtj(Lj-W z3v4X7eBFW9-P?eBJK)~ zPDl0=QvmisU@}CItb^(r>U?~bhXtj5oR@&OV6%$5>(^!E?vA@OduYFBFaos?R-eRzdlxOvS&}0 zd59+a#ZFHb3Zc!29tb%Avv$Pbd zHH9@3|2TrBeR(iD#2)2dM7GfV8>_4LeeVioue7i~S-4KyPK*72HFx-d^5xD1(@PzI z3$@$P`_HIXrBV`J^j&!3F~rWsQ178FL66hjCZ~)Sj7*oF_dZjtb}e4QO94wYKSKjL zz6HkdN=J-}^3#%%bVR>=XmCpL;$2gN0j0I!rmfYFu{O$1VKH9^M1zd;IO%^gv01k- zxZKcwKA42=swI0aZFhO?c}-MbAZ|n&)UW{dkQG0r4f-PUi*vrcqZGc@bar%m!kShM z7>}6%&MZ6X)SE9tqNJ^D*e8RHb!=#j>?YvC_4LV@)9CWVPDfLDFJE<%)K_@t$LZ$3 zf>m^j;=)wfh2V|z_X%>n@E}vFXW;A6$G>FF`*e!(ZBBKyxnwP)A)XGb6?I7{q_dSy zbcsV(&{GUJOua@3b*4vG=<39g`#;wKOj=BoKe$dj+I14pu5*Ox0AVA33Yssif`i{4 zUwg|esZYJdSdOBuKtnmH$W` zILbpqaCOY%iXQ{Zm@AMtO+Vp8JCFHy6K~4s;4pPPo7NvW`Q`9l1wui;LROsaaP|K` z5V>Z;;BMGQ5+%DhM3F;Wfk|X9fBG|MZCgHV(3RynOmkq{S2%7IVu1^e1&w;3O?k?t zJu2S~YSOlX7NXXabMpgMdTR6v5EP8*Nfc~ak@~sZZWWdV+@z!oW7&wl2&6 zs?pchZ8Zzr@R>BnXz@SfvkwIbE#{te#RGo!&Wh`8S<(1$#wm+!B|*aiI+3=V9FHzC zs>v()F8~iEiKaz>y4VA?lsXQ%3}k->=Yta=ArmL@3H6f|A4Eg6mY~{c6WQkZ&)u*; zreR;p6v@Z^2!9vft^)IQjI3a5?r2eGZ^_ju#NCZqQ7qxnTL*7qwNfqdn4dWXG0jkh zhI1UvNBl5z3Jil6Sk1?gSbc2GhvOOI1Wr&Ek_6&4s;Cj!K+G&(;)>Foz@+84JzV-MS&5 zp!xgH^Em2+ZkOMubUa2yPE=2z;AJ31ew$~9A#Ij@zr7OMEWs1k9cS+#SDZ3@i>N<` zvgp6OO%ZK)Sc`E`hFfi5ZPbN;uyw!Ok$1*gT8%dGZqemgjpOW3#+}7y@|o-DV~^}j za@U`KX)2L&rQP%({xuH0$|P~>DKVc1#99uYXZXJcK@IWEpf9UX{5X-w zz)Ep%Fw9hEZF zr~q|g5Q{<*bn2?w2p%Qh^+&ToOysUS{`7D4J|~qH_o`TfRpSh%6MV~+I$+14ktc9+ z2^i(o6>q(flNw+KtJ0urZ#wiO@0(d2KPYvoc!~V;C*4%qY=fgJG{3yaEQUJdF|%1#)NEDDJE z#xC<@{z^YvWi&iA-cCw*81u}2e-S!<;#G#4)~tqI8_xfyJO3QRX5TILSGN|u76^2~dD#y+N+K4Mm^byb!y{Z2-Uq?Z@|4`t{u|zj zVI|vyyPJ4^KC7dgbFN|0w2LM>W*By)HJhlA`>-c3c=;g1!=4AzZ zgI?K`fl~`L+z%G?Y2uR^`GW=R>$^?-daxi$&F4Of4;EAfY<*kGg0>)ii4mSS#;@KI zBEiLk_q~4zI1_1#ZO&8#W^xyj{Z#b=$%b={Rh?tWvbL1iG%oD2rRQzlFC7de5OfC$kZ zJbSI!7R~>pM@XCI{+k4@A$VQ+a#PuWFINCSEBt*?9dyNA%UkGo!NqqE4^nPPTXv5P z&fWVSsB%f;rRE=SFb||)uJf>DXTEcPb^jMgk5wv_r~2=g$RNa@0Z?5PZXlCl$D={^ z41puA>Phw0A}Zogc349e0e+1s6?kz{{d=}wGb8$uzOQ+)Nb~>r4__0o+Z+DQ*v1=K zTA`t9WGZIBY5%?AY{Cii!_RO3>pJaK)X){5VD4 z;5z*WqDOC$9(Cs0B4QLD@4)3W5{QflXBXF+X#h)MbeORnj1HbzqBG;|FgK}&H1qIF zSswL9jo+kq(c?n+uURgdmm=Gp;eqzoWkKGbe)iG8(ROsxAJFI*r$D$GBR zyjEL*ah}G=;#4t?8i8b+^?$U{84y5%DUSdrgDP}_3j2h^~biMU| zJKuo;RUbYj@ou_%Z6PVz&{0Y=Yh%`Y{!Xe+6VfRf`uOJ^qMu+#!!|dXjhlDlvjv|@bZIplOrIZQ?G-y^K^-pL ze=DpwEub;o9DriB9y^H}JPVs~x~&lLJ1HN~WO8-j7<@C>9LIX~9zCYvvXy<8WBXE0 zko&*Uju%Tl_g-qvkp(i85)=@&S9||3+$kQi7aKSj7ZT=39p$Nl!y!D+gdd>qcxB8B zTHuI5{|-ZMw1-M^hO`o5RZHi(LJ}A8_6!=k2ha50&4;+xKSh?Iz-Ft|K5s0ag97G7 zdM;eFKDu4g5zYyteM*&_G@;`c{-6iD-Tv&s0FffDi!9o}!hGEZEqn~FsZ|2M+_Esy z_RPKznkYP>S~yxvS*-6jZ)SC;!x>xxBq%|nS=|NEP@*eSFwNOO4`4R^g4b#2iO}T2 z+am*R`#tEEd0vAOo&p+goq#`6k_>3PCCt2lj&(kaX(~xF3Q1%?Bi)s*Bv<1(H)J^y zFpxw>FID`wl8%5h5k6)o_6`#|UT2U^PU$LBQuBez5jsytvlD;xh+n<~PnA@{UUmHa zi9r+Uqs0bxBlHSZv&R`AL$COybcdXH0^}vrRV}W2jUQ{tsBSe$CC)!SPP(9R!K>tl z97NlcM4!CHZNq?hfd1rhl(j9uSbM?YIV0I??yHOh`?Z`8{5qDUVN@{Su$%3R&xCgNL{F{p*U-ohys@Bm0ZP45X*MAq$jlib57{eu2OE3&c z%8qd3{ZoHcbABt&oGSl1h|=gWwpGlR`p;ued*k?jE*SE(RW(hb&bl8G4o?S*k{{$~ za%>lvD_<==(7Z|KouQ~{f3BwFq{xQua8my3aHuhS5oK0xVt(!fJYhr~7l24JU`uhD zJt__6nh`bEs>A`@YTlFP`SR4E{7sBKd*0o_-IH=1D-f%LQ?@>X95=Id1kYqYw}65R zwb8!du}iVe+wO*F21+*C_XX{{erv|nZN-*ADaYC*uuMID7qDv6ZT zDuV(d97pN8;50Y0m6~#?mR5aVDJ|V-bPx&9)dKB-F8VD$<^)M|@ zrvA9DA(Ez_OOw%0g)s$*sKLgWsPdXie&2=p`^Dw~8q}B{*MtLcE)G0FMtkNc6P{V) z>NNNl*TDpI`BRcr4It^pwXmk}cJ?EKNbOV7@)R7>(poN|VJ2rN?GT9-g!?$e+Y&ol zr(bJ#vI6v#RyX3mdD6hD6D$$T*>tWV1by8=f4!fT)_z;-^#`cjn}4qodvET7#vCV} z(TELg@&B16`D3Fu?-+WKf$csUti&964io+S->o~@m`Z-Wujo+~$l2}pW8x74Za;P{ zHYG}fC^IQp>}&^XfHn}kjMcrppjpV9rXlP08JzVp6jmoF(F`ELzXY^~*e4x3oH#fi z??CXp%AhROn2VOC1vIzPJj#Zjz9znvZnte8BY$0!7dn5>U_TX|bzyFALjU&pe0YKx z5A(s%T_OQ8Fxe0@EZq&x+t*Vz0MSO>$p1HtHoRxK&oDhcIjKcbUGQnVWb|6hAvOoh z9$D{X?4=(y3;N8D_J>sXBqzkJ#{4?CZ!l%m8_rzv#fF6BiRq^2Q8$qeh4ia(4mZIM0ZZ&%nDBcIpeE@q&uzBuSyy zx<8LEQF=H3xv9m8Icxz?tC+dtQDOc)RX5~9VB~A8usb#FKON=<6SY5SBC=nl@hfNs sQ7`nT!K{_9u&S-%f}a>%|HG>xraLa^CPkvRLSm>YYQBNKw*2`20oholVE_OC diff --git a/docs/html/userhtml22x.png b/docs/html/userhtml22x.png index 8c882bc24aab03bb3f9215e794b1c956837ef04a..700fc6283eb4da87a5226511dca252cc2a2ab4d8 100644 GIT binary patch delta 1256 zcmVjd2u55QV!b0>CB%lYxvgfDFJyV3YxrBXHpi zLCJN6j3elXzg4xd$D%T7URt_lkG+6vcaWmgR79?HoFAe{m9IY z&Fquf&UmJjKE@c}?sv@WPc!>Z?f1;=c}nR_^!Hc}1fID2L$hqmhB9ZcH-%2Y-KkSZ zQ{Y}o=~z`a?L`muKQXfp`rJS^_}wh0OB`iNH*AMKr+=})++5j@p!-|nH<6)1M34-_ z;b*VqsZ`L=8LS{DDYpgEMPIn6ZFNxy3m{9#b;zf5DlCIzD&M11==(JGl%5abd;JWy zlAeu?x^dJKWl#Fz`)F>TLbsBkmN6HGiVh5NOf<7|JS9|yJovq$zPFu3ym)vbA7O!u?oeO>j_wo}+jPOg9eJ!@O^z@D&7#$ax+P5{=3wZXw< z*nvC~2l!Nbha#h?2NXFw(JgjhY>67NNQtU??a}8^RE);$)5c-c^A%-Hly?2-)XbqD z+r(2jzvx}h*BDr`FvZ+lr42SHHH6kbs0_V~QGXMVBVV#(1@ave>Usb#ts^B6Is6|Q zR9j%63o(JNKp2aff|pa5!Ml{w^SOy}Z3AL0BJ|bVoDnLPgjp{17BNuG#lHRA%qrEG zNmOH2%h2O?Md39CfridCj)NT?62;5MO0fGMmI6QY}f9iq9^fcu19%{yj!^XzVK>(L25cUI3#y}Q#m~cn3<~rZ8fk7Ie76INqj&@S*e@Nie z{Dn+7KsjJJYF{*>69+Q_?NB#1BArg973@0FpSADC!d0O3Ot6Uz3X0_JCr*(duZD_4 z*j()^B|w=a*D^^36+j8z2sSLDgJ(S) zYMRpf;4(BAdB#oK|LmyuC9^*zk74Yk7evF~9TfHXt!S)Yzhmjwi!pus()T$T>bbcl zxWPOWsT9Bc?O_tS@-k=VW}rfn$mi-u4N%{{hB&kgHEx{Xg4pa|yH;Q;8NLBPLp=ri SAuY-P0000jdc_^5P*I6T!7siecNL0jJq{tX=t*(3vH=Y+{A?Mp~ZktI1e3kM+|Cd2B#q#%z zxd9}+0QZDmXI3RMtoHxxTtfTB9vq;HdiimMhv;+)&VRZ1Y=?`$3);sp>ioc?51!LJ zE{AN>ciMAF3oyjXnzSa}?~!~K9ir@;j?F?fV5TGd_#tZTX&wbtIb&|1NS9v=jC~SS z7ng{eTuR~d44zy)k{v@maG%tJbK@PpPS3c^{DL?P& zTAI_Z@qdD=j!@>a=uqmoWRsK~I^>kamAKS1RH*&rK=QGUXD%6{q1{;`=a zy?Uf9$XOE>_V?pU=eLfyN7!Pv9@EtKZLuzH?P44COn%YM(ia7L80ydLmmc2W?}AAm zB-x_(u0yfc%@7ATFaW+dPK~*l;bOiIFqUpDyMLvjNt?4qI$F-EB)8Q=yOtF#+HLXd zk*k+70JE&h(o!~S5>-5rudJ4p-6emdt?WWxlb1CEKzaEN5$Ai=p{wH|n+P5SZ^G!G zpXUx?DDML*S!PP27BBqi57BN=7(}Bg0rVaLI?FqS@7-nUKWyJe^78Vz zQ0x(N4a`nw-UR5*knjEm!?Uq`O@ZSU3|w{0L9UHGduwwGyhMlOf%gdB6dYapK0j3c zjSTA47f^HQ5Vl?No6fwhqAC;u0|PCYX@5P@2LY^^^|oIE;^wC^Q2t`AcZhtA_f_GM zvv=?#(JpA|eD(u|%2jM8t^{p9E)i)QBhKMuo+AS%Rj|*ho@$G0@J7{cD2VW`72? zEge?|K>on&X`ps5joPICeIWiyo#FL3Z5=E6wfnJ^r|cgc2yuUh-AXWv4(w-AT+_}M z7@-12v`$FM`mr_AjJ9OvvsNZ>UZ@0(@GpX?N(3Y{@+!{&`O3QTMg#m_Qq7>f0Wu9{ zbqdEB^51}So&&tc4$s>s;H^+=0Dr&snYm3Hq4A>k12U>=06tZbl==GF%B$F!E+d)MJ+=lDA&O8?I^xG9v)sXlgBildT4UN<5@3Hbr+CN$%d>*SMhdb#-U{d)ePWLp=qg12$a%0000< KMNUMnLSTZe1-{Jy diff --git a/docs/html/userhtml23x.png b/docs/html/userhtml23x.png index 30c84d3dccc6c0c3648e846fde3e50b2137e63e9..eeeb8317cc8553955e4a3b576d18d38d0695efd4 100644 GIT binary patch delta 1034 zcmV+l1oivG39|^0IDZ4>Nkl3~lMNe3k9V3}uDW=|){I-hjpcr!O=OS1I5tNqDPRpfpEu5*36(hlGRz)=lf z0OtVqYsSW7TF9yw*JqqL1ra#|@GaH1D-e+*0LQ9&Q^Q$RKYszZ6p_7}@c}Jl)s<_! zBf~v_uK@PK{Vx6k_)+4`ep;^|LrY%QDqF~^C)cDUhn=jdk6e8hAtD#F$_egw0G6y= zRoNCSYZ5JF)t#%~#o^jw(^iOOZy~GRT$2i%avi0Y2XIm9-$tJ$ zFdHR}0JJ7tZGR#w%3>Q_SKPIT8~}qGn+sY&bBM^kh@9JRaKj>U67GZBG`!6vjU2~B zNksTeiR(|Z=qz%wDy_ja$=OZXpKEMcw8YwyBlGM@+`j2OEy{ZcCPo%-xgpv20Q?Ov z0B&>d5auyAn{~-Hf6*)DWK~+jlC;YplWlNav1GcB+JBP6!>r;!RTsX!5m?TBjOIF2 zRa)CiOP;>Cezx7a`vJz0RReK6;jx%>GSe<$MOKM5;^cLR_iobuT*HpyzqzO4?)Brt z^^{eFadZFU0}9*yS)LY8R}M8oj5X`$3ug~Wrrs$htI`_w%u=l12G{3Y#o=ATVv4Zz zpE2f9M1NR!&cThUzRx`v${K>P*(~gHZhQ`|B&26}oa3<^lCpAZyh*Gvcn>!w3pZwC zuCbE2`{@F!LFI@D-4A2=saHrR(A`l%BTp(mv2N+qiVC_SKPE{|pfQG;IfE%#QK3sU zxLI=CMs(#m#$;xy!1~rfm_?jgLevzNsa~4}6Mt0D_h=Eec~Ciwbt`b5ARLr@SQ+#O zueq60eU-9eQWzf#v(3k2qAOQoRI&+g*xhqrmc+8emj^9C%NXKL~O}jxSDb3P%1JRVuPL6(#3295xG@WT0s*S zH-CnLA>OO%l?m*ZEwG4PwS6$)hY5O@6^IB+Fj~}ot{M;#CP*)0S6(YC^BD9N!Ez~E zy7(p8^x|qfDYFHIMj_DpnjB-cnrCh%9-ixUR5({wRuL?H4C&2voSzq)9?gZxp3=V^ zX3W~SmRlsts~TC^+T*dRg&Q8&B){tQ;(r?SL9G30Ej;{`|1D!JerindFUh0S=jSr3 z9ZvoN4=UP^@z-IDZ>zapgU|e9%o-y9a*SEfc!XHP#bRjA;;N2(?V|(hTZTc#Wya#- z`mMS8^@!1}_>TYO+611pBLAMIl&nH}%c><;|6acVZbRDRsrS;a00000NkvXXt^-0~ Ef?ufjMgRZ+ delta 1180 zcmV;N1Y`TN2*e4HIDZ6oNklG;Gb~5 zlf)Q41N>qH{5}1REN9HNt?JAgQLp5xte+FB;#kvF9LkK$v*{$%ckv1S$8B+=>o$Kg zMIX7@u_}!EPBeGc7>1F8y0J4`x<=_+83)T?UU)UJxcT3O4iuPU~ z`h?B?K;^M!R$DzJi@-gMVz_BOH$x%6D*f4768vcpB*5 zEtx*;N1wSd*bjI71U;S|AGCQ%y9o9u7suMkWSlxljmrpaqvQ%j=n&oOQQ^%6e4RUQ zslCCM+4d3^@3*Aiwuhu%6>E$JS$YQ#?O0Frd($4GPOLIMw3Cu?3K(oJk2S0)pO63s zS8pmG%75r|R&3~7;)(sn2MX%0{$8B)IhclyIH2l`^$FY1$-;n_zQHl63wJQk*6X9z z(QiEKDHq53949G#q|(lqw7*kk<+^1VkSEH%cWy(s1ox~@k-nI5_%OTVuYK};?br!f z<<9U#ed8S~v7L=o;^TeBN40w-GKslnF_D>#wSPUyu70<`$%OnFfp`0HvX80937KqU zGeI-R35qzK_>5n(wU3;jOFPjutHR{B<%h;H7Aq&Ty@BceI*5}@Ij(LyHl;d~d@XV7UXon>a(HENg5w0@&mH94{)EhO1DYe-@5MVZ zS#OYQPg1ZqC01g16%=ZmR@<3e1S|ebjx~I-$6%{13+H}3aEqBm7`?-eS+EER7DG;C zb>3t9k~ZtVYX{e~HMUjyJ;(ep;qKBQ`hVSV%%}%svTp;(cp-X0efQBT{P+inbgLVGI|~faWVDj=)m)> z7eJrlF0?x?IDSH$f*E5W=?#tzZljO#}yxLL)zm^bZNH$0000n+f6Q+9Nc{~nY+twcK7}NUQX_=+9m0#HnwT^r;^qcrxCFtZ~wyBTtZsbn#;14)POet)~n{_cJu>C()$d*|1z zB?)1-Yva#(AFo27xb7w057~bzStR}J+Lv*nAfFRw@69a5h)5Q6_a|0*X}J4?yMN@8 zw3Ig{OeKq%orfcI;h}v#&dvi7O;xJAJD?2`5%0+G{q@!%{qpVWv;q`vo(-Uc(PrOsclElJn0=Nw8kf=vmfL5Ljs zWza*bM^Tng9yRA8n9YuvAv5{>tJEkKVlFks!h@7) zW;88uYYwtD=#k|*OFICMZ{!Y^(=NL70VM``$2t*VO^?x`fZwrC5p#x+1U+nfN)?fg zFa(EzLw}FKt-E)H^jK77Nt6;qt-w>i4c#a+CK2{DG?V0L8-jMcmJ1hv0;9`?V;|=N zt^t;n1y4qaGr2KNL6)xW7^71sc$Ox(6g{BnDN2~th+qt3yaJX$D6dW5(IpFtGPA3Q z1b`ALq&c{k0|ini7LY|$A~l@X{4x>3^NZc zfC%71iPQ#+Fk7#}{)iUlmcV^F|A#IIkhI^Xz~K}QV{1}V6q`Kf4F(j>%%E_*=$aI9 zKGGiwLm<2wwt}Z#w$e(ILORj|5ey@0j1m3AdFDi{*Y;fYDi&gxqM4nOHrMr`4WWYPK*xKLSes1?8htEZu(J>D z_VFLN6L7PS><0v+u(q~#1mkCv`G!RHwGZA7*}_=9U}u+g_kRfNU7z)lyF_k^-g>8e zDqAS(?0Sm-nsoND2~JCV;PYX~`pA#s3wD0x)|C&Q)Ma>f@130`dLlvwG5Cy;HwX6d z2p)@UrdKtFO^@XZb}q1KG34M8=G^|=y9(%gnv(D_!Lcn}CwQwy9?O>_g8C`vJp6+M z-vpk5b4i+$MSr<9tP#i_!I+LT;3CBvi_RK4)w?wzyO1yS{bIub-PvC1I?eCs8=z#X zZ=DE^&^8LiapH3*WRKraV=tA@`2x@0ZGeR>0(T6}XX#tuDYVd<4|$GWT+ne4e+55- zCM#9-zu_C78?Xhj8R4Q`y|8774cSA_Ta33QM13iP=YPz(6xreiG<~)JuRsGU2Cz5{ zWDL%*p^ldb+fNGaVSGt3V~N<$WHo9BC(QIVdeLl0m0pLK^2NE)ESeqmZ?HE&tNI5R zK-yDy2%n|j;VQicJ=aM*U7ofm%(bS{@StF#<4cQoYd7+4pu?9N_8Z!cd$!^Wzo8q< z=FqkM)PLo9EZ|-{tR>%K%*EgjeAkJ>Tq2P0df2R#sG>wRU^_AqZpJ(GB5;Q@xg))Z zO?8sKl)*0SvTv(Y{9X_S{4oioxF_tH=hS@YwAxIBuj8p+*fNIfDUNtFO~?Vyxue? zuffo-4Wfgz4$|si0G$&pDD7w~TyV}>LjMWg#8L^GREa>S%4)(;l3Z4gzp2=!mJK=` z4N7g{oG+dL(dxBln}o_9G*9srx4 zjO-1%hoE!fslFGU>tMoSGejSri??dNxCvk3TW{GzXk*wIvR+JfEMJ-&tZ zNx#j>heW*iXMo%yugSyVrTBtfXTUzib5F@lJzy>e^ zWrMN-DjOWS$axCT*CgV-`+0HfXP+I%p(M-!l8=(sbNH^|JR6W)0Jsf#Dd~=+W$!Nm ztR;PA`v%-J*mR?XC*q_o0JySQegd!oa0MjY0l+HjYhU|vfq!atz53u%<(ZSX_YXM; zkdoS2j+1hdgFf3G`vU2NUXJPhy9c!K%tj*kaLXPVKm+{AVGK>dA z&pN4VN$-QA0uP2r%J=T0Vm^k~*5PuJXPJ2ESaa z%(00lqXNDUZX1)@Mk1fV=sAdXf#hD2-?vFXGCeyjJpT-esX)^2+IA{`J@^KOZO9A< SV7&VP0000-m|#? delta 392 zcmV;30eAkS1GfW^Ie%$ML_t(Y4YiexZ2~b2g-O)`Dn@_}+7&tiGtdpn2o)p13hf3k z0)_SWiqJ1L%!C z>DnK|Gxknq_lQ@IQ>~8&=fH02iR3!jq<^9>;AdF2BRoy^l7GZ^4o^ycwRUp4aHb~S z6_ARsR#RK)kZwx)*`EB`B!0S~Rh#YJJ@UG6rlv-DL4x=2`Q~!lt+f^Hr6JXYTsVU{ zD3LpRMU7S#E%h$wd^tQ0e zKaZ_lQ{yU5Y=80N(>D`=AO-4TgwWlis65J13$o--PCXJXoDVqabmQ~OmFl(Iz(Y8a zjnzleOA>Gzy#;X^#V*N>Gf*`?lSti=MsaD4d`pME4Ct@s{3XYpIQ_qMLgaJf{P$3F mf`BA|(Er-9t9&(iegKAT$P9_D=+6KE002ovPDHLkU;%>V_P&V# diff --git a/docs/html/userhtml26x.png b/docs/html/userhtml26x.png index ac0518e5334d02c729b33141dca5a241c615ce63..37257ff876a646ed8e8a1b0aa43aa04ea93f4500 100644 GIT binary patch delta 665 zcmV;K0%raD1;z!CIe&ObL_t(o3Eh~1Q5-Q0MZYpBfJq5Q2cZQ@fD#BDKnWS>0H`2C zI)DzCO9vzsB+!8}&m-@MW3y`?cM0Ur+^oHpWl5I5yl3vNw+u7;DCt!OU>{D$F`^_aP_|sHK@5y8CUf>F;_cyat%rLVwZ^Nq6p!<3m7zyML4P zS<-a`uM2N3DB%xfg_*{?3$Y=mEa+JQDs~GtU)+oLYmCtOZt&1C1vaQ)H95^nUYy| zEH=km=EtVuz|T`Lw>&+ z;XIbxadZgClO4%2FKJ7rXy{>z}^e+mj;%-mxL#*_O=FMb2QX%gbGaT4{f@`)Rd0GhMA#laJh{)9E4ppVOB`9ABWX ziS19ZR!#0#OTIHfz%cM1(XCSe$e=`^7K|sX-}iNWudl*;kbfuG8GihwKM=HmXF&9i z`EQ0R`fUWy&6{-U%bVH!Jc-QO7-NR zL+%wj*OZu&$5G;dQ#fbMxv8q*%gr>6vfGBEZH%Go_+wGk?!3a6uW=v!&#fj-@1F`H zZ?#TyPveHJ6Myoa>1T=GH6m-49Cyj}$Rk3p_6(AEtD z$+e`(HvFw|!V5;M86glxgIc=+%&V`GcC^vt$gXSHO~o_$YIX`j04CC2{a1iQUR9jP z27}%|gMab0u|VaZpu@&}0z$`Axagx(D;xds;Pf@WaL@)UAdRo2vh=DrrwE2;iZRpy zoSVn`@Q*dyFV=F+d0FcIiw)Df_lHxr<%xCAjPXaH?Sbw-u6<)q>rRk?&cA!msgD!I z@B+Q{h+mB-_SX9(d+fJaYpSn&vXRC;Q5~bP!AE=yyDc_nWP?}AEb%H?ln1rZKpEuC z?n?f0+kA{cAmCw)Qr558&c%GSZ92><{907*qoM6N<$g6s-+ A5C8xG diff --git a/docs/html/userhtml27x.png b/docs/html/userhtml27x.png index 57beeb012fe8eb0aad9bbcff35b056e48c37cc33..bd9e8f4f7876c017e24b2521f5ac25168e7fefba 100644 GIT binary patch delta 1698 zcmV;T23`4$55^6UIDZCsNkl9fmr&)+PCs-CIpm8$M+X{!1_Revv3^_!}mFP-y8M~m}T z`!S)aCn9n>G?v>S+f9*DdMzUVptnBOeSfnQDWzAcN_U9b2`Qy($i-kp8{?j(lzth0cMRE%gqI@nuZTQjzMW4P1IDt~@oO7^#eVd`HY`Gm z^H%#>Al&*oHGdeV2p)LqJHw%6J2*TXvy*ghgL!g;69N*6cP|gV+Pj4o=dJd&aN2Q> zFH%ZxIujjQwqpUO@QTTHK4IL_Zg4`d0k`IKF)w+m(c-+-zA|udqwv`h{brPLB-xGv z%g;{8WkxxSRR$bh9k}$Pi5x^EeN*?ivKnj<%rd{dJRaR5CbB07P@@&_Trq~0bX zFW^~3o_~h6Pa-+dE6-VJn&X#+Jqmp(%aJg8A2xvTAl}=UrqHC|07_;|v2COe+G$QSxFIkRV5oVVIDr_AYVv9-o|e7}0~ zv47uujQDDMeG=Q^d-i=_Jz<={O;f%&Uwpr|o`0A;=-FABNF|4wy*Iy@G&$F zIEo4|%Ng>Rlc_jwwFf?{{sSj9axV59N>+?aF9Yl-dteoE%R%bcDplF#QF zzJIQ%CqA}0ob%jZ3t?x9_okzDDn>MAK-LdMtqj51@nQ^=H_uM6N=Y#?>$AjQsy^oB zDw%zOy;mc&lB3su*Y9kdsrQ`qS;841BOF$aptBcvcKUNV_#Y=};ckh1$lg1#dwbTA zZ5KW1@;Ae6S*xvs)3&Q8#z|$!aSqoR&wpWlrA0*kvsVY36%)HwTuQ&U{!oDD2cG2f z`S0*OAcsM?*&HVJu^5#TY$$UVmgJ*I*Q$zg?SuEjd|&|9*{dFbrJS`mZ?)s3UJrAn z0cPb>RmQ>7$NgW5mTXJcJS$%hd+G_f_N)%N!9WfZ4NJYW$3&5ZR0M|KRi>1W#D9>2 zffop-Hh>eR8gn6#r7BM&%iN;v2Q@+syzaBP{<~s^kBpfT#LKZ^qUy&j^HB8v!)G&; zd8`ko1iz+sK{bi%G)5@k89%ukvw!0NtMpI>B1BjfEj}Eqxz(q^*@$yPtG)0WVf;1c zc9;x+Q=&LB-|qkn>8J2HVkFzrHMjaXf_#(MRZpm}Qn6Ll-Vd;V3hcET%sV@NGpy9f z0P%EiT$<3aHqY}pv;y`#%7_P#rxqK4fjGQR&x<+m6#Owh8-lTv4YLg$hJSla*v4>A zi12J}zy}IffT7!__*$282BtV~wZp+^Zj0f4FP6v$vaH7Uu!PK5>_ZDvGA~{8Qu(H^ zSWnFPqC@EA?^A;XaN>SYONw}y&h+%|-Yb z(~k;v+4x~Gm?5mg=oac-gR0J5@7T})muNeeGQS&`uJa>!<=`Ebo_|mjIK77_F$*g| zi7eqOG3;H%%w4DreY`bY`EKD?d@z=YJk3G0l}~MQ7_LU-+yGcy$Csm{cEg4N2w@f# zT5d5&38>By`i^p7FEaU+ApWrhV`0&r)Dh3~BhTdVv|&Sp-#g<@Uu?fC;2$2Xc$IUh sW)8Dq!%|pSL(+VwLK}EHb*f_0|0(x`IM!17UjP6A07*qoM6N<$g20_oGynhq delta 1898 zcmV-w2bK874UG?wIDZE}Nkl0C<-HX29KL02$yJftrClBe;hd zpb?yy0cL=b0c3z|2HgMGbT!qpT{E-GEGN44UOU~@)%Dd?{l&U@dwaXFhP4MC;(^W0 z%|nf!DVDxGgwKyQR(8FWPM>AhmFc!+m9bwjIaaw;Svb=)W`9TJOyx%9bgax@old@L z`aHrvroW`azT_QrljTRy?V)~%fI=P3j=vX>%W`0ER-mQ@PF^&-Fd(&@urZoSeThjmgSG+aHbNOwD8g&&B(V zaJ0d<~qA z%2D*I5Uf@3sci|8oTD}qSxtc2I0l}@f!=su7Y#akT}YUekc?NMUHbjlZe4WPcGksD z(SJq*sb6Ag4#kgCnLiLfyV zQ_*M)t6ZhC8QAyL7#v#Z(MP{%pLk=ZhJVF7U8Wi1v&iQWo|PScVxyJr?lPz!x(f)< z613hP;lnvoU!>b2ankEQuS>Ts`3uL1#-Ll3$eyqIQp{|t0{nUR+4whq?StWVW(60W zOQHJ&K?lGCcFCvxPr}^)!ySg`8oa}JntVoOSLy0}&$&_4ID!Y`XQ~ri%fJ`uQh(hC zI!v&$@AU}su|A7@9^qM}-b=sgu7p4bgZjxg@L?hK^Y2*nRwLcU+B~RVpY$c;guC`& z3~pz^IESF^KJ|k4fq@~F19RZw*jDgyG4u}vtqEX@*oyTUOI|=IYV2APIC1oeU%3{LWDQaZCNsQeHvlpvcKI>=OJnwHHLo z&uUIppa&RZJ6A$voYljxoI`^03dA-%DLBVh{xBif9MRAxx~|V?S3b|YB7c1GugT;q z-uN$+ZM0bjn(Pu~Arn5$x6$cKBK_vtzB;GdSeqj{WA5}bRP|N=LUAJcLzTBGTTMH4|K@ABXt2dB&%s6%=_q!N>~kcP%Es0v=MM>08C116V02 z@JBw$QYK6|CbWCp6RHh)Vt@FZ5XZBr0e=v11te!z1)rL&V|@mXmd_(RKA4j&9CtmN zi!u7aKj|vHO9Ft*059+nv~(V8^H}{_vPZ`Wb0P-B5OMi;ib5g?wc@mu z`nbcqQ%QFNs5FRk9i4MqE&Xd)d%$^sBMiaxlz}S&e`4T>%VYhof}9ULZl%qG|JXvZ zU^Mp%fb%>T#OC&tDAxb1J&--1fFgc)+9oSX}$b6W$FaQ!f9*>vYY4~O|JQ5(V*nPVI7L>&p!-frO!OWh7 z5bk1(uh(kxlwfARhYnhj4xJb@DL8-fXqn$#cGOx!Rqa+6S-F% z+l&8iC}Q3>K%U+bW~yuzD4Ec6;@Mb%g9MDD&O#3D`!3iUed`a|5V9X)ZCN}^&?f46 z2;q~;9^r*;-J;$H)q9|g*iL9FX6XSQv9^|Rn4(AfTYm^hbu<(@a9yErHlrCuovxnD zjEKzI+v~7jz3^x+!d-=s^aGxn88-Ay^=MhS>6gxlru6YWjc8xu{XeDs7~_>}@v}Hk zWvA#ngzzRwF#%<^9G=~RbN>5K4r1FEML)<3Prdyv7V?i7?*gTzzYxN^5YomlKU4Y} zW1?h~>3`2i^S$%?rru-Svo<9-oJeRRc;^9QeN!1tkxqPavURjD5VMxMW{Uy)UM>blazu9^! zblchjT_$3evMEqy;>;s`RCHb*MWmDCuo;7k1bhBP4ldkxY?Rp>w zaetU9m=xp86F@1K|6a;J1(#Rd0coHptP)JmiKf{IiZaoK+V=`^%6AkY^UPG1`ET`E zIk%U!S1D-_#oKy^-WS9`Z_Fl{AhbP4PpZ=rjvz$?1EPw+qqB6cOc9^o+K{ zTAryA(Jx1#qElW*xHMoHYf#VY?{nS5aMi$jeaKkI%2-&kF8Qkn(Cdrmn&SE4edcIC zM^AdM(xoqb+J2`Z?T#{J+c_rnJ0S3g9JDuTkHR@BN0c4%IAy0pIatlqZu?f=sek*q z{G$yI&VDo44;76KqMQ>^4YBd3#X-F(VS8m`hU*!q*GBtXhGdMRhHmSxabN6fX6vcY zc{RgocB-N*GLGrWW#3UY%U2RQ2j2*RPmXHvLk?uRGX&Er{H9X z$iA4_?1MdJq{vpW%sL8?htE#D@fai7v~-RDX6ngx-DZ<=n%eCnkj=`eadO-2quT2o=1bv zipB{OAGSX9^)MAV2a)4AS=uuR?~9Y^lAyD|N47CWBFL&xbcIMh{zSkg*tqOddP-#W zm4O_Ob|HN=W)Ypb5T?~P?~Im#V%9wLbAyEEKz88FyMV#{z;eTS-~-qg zBLQ(IWWU8U*vBH8&c^0IN5kKmkusPm+r6vmFr&^{CZ00b zYcbP*{dVq*z!ZI_>1IFel{TCNSemt)cs>q5yTr^_MHnn3Fd*|ESGZ|P80q; zpZ`yh?r|(nc(7ODd}{Ay_$fhISjL(5znc?nSQT1s--qSeD6|5ql3o7-)@ZtW#pT57 P00000NkvXXu0mjf_XBA} delta 2013 zcmV<32O{{94*w63IDZGRNkl7*=zt_0m<}LR5a<97734S_ z7&?gQ06HMhf!%?@9gus4o+8a>cXoEx*k*N(tj~LTe?4imKKT0e>z#esTVN>*92^`x zhSnu<^?oUu*NG1Ly+EF~=yx7+-Hi^wW9k`ozpKJ1`*tG>5Pv&IEO<0`18oz7tUaW@ zqkf^DZY1~y$msSr=qK`vWn~iqe97cn>Zbti+nZql9D3j28IpDtYzTkLxntt!`CJ;? z#2{-u{T^lB@8-%dah*E83EZ-Y@`Rb>RvyEl@GWh!5+)?-$kY`(I$ zVs;FgFrelM@qcfR>Vgw&)^wGFpR(TRZlZ_G?Rpn4sLq~LwtyLUEcva1Eos1K1lQ{Y z2yPRD2DK;BRotdb7~bCg-$b#inA_CE-d;)k`&84o0bY4*}GJos=?aMB1g?E#h}_*scaCoy3@5mg)?d=aLmZwy#vSZ)gx%7Y!6&Lo=~-q zFh82t(&^Iej-lbb{c(4G$uab`qsb#|r5*-< zjKiLzc2Y*FHcxbHsoLOO(215geeos6b-M=k41ajx;W~=;Nq(k%k>|uIcB8L;CmlZG z6dh|=^noXzR%7|gwD^Pm>=M06v`J~@<+#9C(HeuX-YnG%!ZG|g^O;9+nle+h z!GB~siAx>wg>+J2*RnBZqAGzi1qQyp8RC)XWO(v9efgG6LtkGsKFv7ITt^Q5TkIl? zqCe~uzvl7W7<5xM$%Jv}Sq^|&uIPg|EB;t#aygCc*i$Ggs=*q;@_Y!F6i$RNNC>rQ z%lbx1#wq))qLXJqBamj4wUy616^s5+7i(p?iEge5BR*FyPFG?B%8*j3U3HC;hP9kjVr|)Dj6Hum5bWQ8@P1~PKvA8L@Lx=&$6OT777A+vBD*VX$SEFf0GtkQyteRK7Ymo zdg`XE%4qP+scQ`y{Gsm=FTdbfGAnay?UdJ*%r9L^y7~l*0aSLm_*m=l==1o zIQT~kp7eS$2&cs6Xf`7A`l0Aae~B1uPU=h(F~*Zt7si@N@t1XLHjxT7My-hTvK+7! zh?!6@?csF=oHB;?(6Kz|pegjSjDI>(nYW@dm0#u=-!=8M)TkP@lk!o;X--(io=#1c z%{+Rx3K_v)1zRacMMJ)WSH-}|a!9Y)BmrmMKx5bjvUAir!OV`qF&UumiPE!MSC)DV ze$Q^q4aN=g>UMIMKNd6E2Rpn{m=V-EUNo8?@qmkK{c{zbFiXb&HW>H|+<$-9IHeju z7GI>g&y?j?aPS^eUr^2XlM1@{BIRq9L9vsfJk;{c3{PLv(o2^N$LtxxuvVLft;6;K zHVGu!;3fZLfUjBgYui-VWe~1gXzL5*h*J2*#7L{WKaaqiZur4Nx|Z6{@|_ifiPE<} zg-_PQm@2VT;gxRId2;-fg@4~Vz8YV~CUWXiW?TY+IRn9G!c=5P$3T2SP}DzYiKjzL z0oQ*;G0?jKQE+lcgC?V9{u;J8ggFD_FL9-rVFu10&P^>jhz)eJ$1cim5bbVkZBjN8 z{grk?MuR}RN-LF0Uj3u}FES{aWM5z<2m?Sn2GgEBe=+B6B>6SdlYd{?r1Zy%Yveyv z)LA;IWF55A(9bE`#GrCUB9>Rg>0;&E=q_MP8x1WWff3FTF$OK|G|b@|%DT0dsZAd430 zng|S}&_4wryy|2VvVUvO!0Xz{Kv4gbm1(u1bPf7OXLvdhOlD-Sn?JetCE9)Y{uc0RW?>m)vltLosjuU>ea z`&kb^JyIgLkyh8a>ulg3SvG>p4dbIz+Dv(DKfdq$R`Ky-<7@bJvMYM|_@w|1=G(qLjM3sT6UVj&n&lyb{(kxXjzlg~D z+{U%jEL9%2MdY8nri~`5JnlA2l}{5@9ydb>AM=_vny4DZX`)&{Rq?0(U1-)wXrfw% z$nV0Nsp9pp{lP%yHNSVvX; zk~RtRP!v> zP+5&t>VIFo{Eq$*zRuhEOPYl!c2tzAdS4_)rHhen6V(K3YN%|gV)}84`B%A^knwj$ zMUe@@xg<(+)bXw2;IZ__B(a>>c4Dq%;q;O1gg@~~MBWVNXuANMRKn_?IQq~eCixAH z5iuX+s<5ht3g_BJGfwhtRedq+lakmy!=dLMK!2Qq2Tt`^JA9=3=7aG)LP9L=+&@-T ztkG)*i`{GTRL>LP^8L`)`UZIg&$HMGk}-a0+gYCI(hyTWf{Ja-YBArZOB|O_RgR5?SOaF+ z&%Y#CYVU_EI1g3DdU&6`R@I-qA4*`>g6Xxp2OM8DKG4!lUAgTNM*XP}0!vD(xot|t z{Gr15c}$6xO{ER0_7_4KS}9CwRJ4tj&VOQ!9ruXarZVX{=gLU7>!%NT)+1;Cxi)s} z4`~~-RK0tb+s=<;7U_w9KILz>vrTK4%wKwN>nMDM-uAK2Hd?t*UYO=xrh>8GF0t)I zQTcSVhp0H_m`f7U>$ZMhY{x#f-pHGtaY&E1=6(9y->CT;!5za{EGvxjO6~(|#(%4~ zFgxTZ)1OWAn`@|SI{M2768%~Po6?}0=8WFG%Wao{%kj0C8{rDfF@IBS%>4Q^ia*ga z`8EZPF3kGxAL*}I?afnxTYd9Si(T8;G)KSLeTj?OHh8g=e9?1dhV8u1ZK~{?Z;s*L z`Cn@j132F{^*83JY-Hx-IR3yv{Kf;9S-BESeCtW8yI>Q>OY%ECO`tOx= z(7}#JKX<{5J&q3q3Eaa^3+>)}ySUdkVz6Uc7<+Q)`N)z@1P$1lW4!ONp2}-^hIK<@ zkbkl5F{HKnnl4JRPGxz+qr)iHM0AH|tcEHF8(^eiew(@hr{aS+d1Q>fr+>d0xJ+Sj z@aDDy6B5THz+f4syPr9jNV#r)|4u8?8)1v6l8(hWW}*2kFoQ#8;FkHxlVCNu_Nta? z%!u;V7JTpNdCUTc7{mj--rBo#<98fd$b4=DyP2nXkzBX2ZjPu+bqCs2E zA)%YDZC)ZBjMa!L@%1_#iqrlSKF!&lmWEg$1YDASp zt+$4~F#v&N<)}QsNtaAc#Viz&)+yBn#jtB6&?36@Ang+Av|oj&k}w|-%^Zt0RA4us zXsOuRqgg$os8!L|&XB@jZINX;Qc8=Y8TGTqMp`e&GsGge$;wB^+JE7gKRv~w*l}=% zDLZ0{A^ieNQ_cT|W!cjr#6_wQRdVB*)~I#rDkuivZ+Q}T4y*|Rp!RH4hg&qozOf=? zf{efzX(j3N0ZKXC*xdoWANQ*U8CDR7yN)?WJ zf=6{z)e^m8!^SqM5`UE~w6Z4Y(^q2$suGofwYro5wT!S^vc?J2C@TD_>lV(Z;H0}F z-#CF9MdiX7S}UCNW~mBLqo`asL%#}3<dr@pkU0cL=<*D6L z0j0cqS*j|jSw>VI0pg^ORe?4(QnRRBNP9(n)l#jfk`#iAt5Jw*MP*>6d>U*NqIOYX fr4I2V@Hz1xbPVq4f<66n00000NkvXXu0mjf0JiCa delta 2379 zcmV-R3AFa_59$(-IDZKoNklhg5QcZR2!Pv*;Fb({L;z&4Km-RFKoP+e zmjPsO2N}Rcz#{|5fMqk-`#+7^NVaUtGcwE|b(O|?wAxa4tGgw6j<2q+jvCB!;Q09X zsjyzmW7dZu9XbwYeJw1s{3HkNtMxZA@NyEXH6EQb9%OUiw0|j79I_C61_VKt@7(}6oeYnCdHSIzxp^65-L`j<@H z>d~5|s$K=kGoUHe3{SQ5BcA#&gVve@4m70-m@$%N=l5}>tyL#D(3C2{lwQ>>l{-r% z%k^35YQ^-p_kaF_D>kK?7ujtE>EX^Vy#M3MhO3+7C5b*tDC+~IS4!a~;F-z?4KU-N zC$RS5WsyFLM<_rCs(Y=%xvzMqcJ2jt0?~xtb0uQ9`=a@7CxV4KYh2tWFfQOs>5IlT zt@uJXYAMYF&&1a${;2J<5-XDQG=;NGooHbR-)_@)jekBG%gd0i%EXsZUD!^(xj%7e z0;W#Vaa{&4w4%Y?86*^}z!%ZOD(nM1?#=C`2K8`iU`?r}xN~$gdXg^T6K92P0GC(< zMEi*dEmA589^rpfViU2b@64bd__-Py?r<8^p}D^i9*A>qm99_m$m5tBOuEXc{w3Qq z7IuyzxqqEZ)fZ*_Hq~G4xW5-uIL%UtLlcnm4Ha&XT_A9d#n@zj$}by=T>^YI3m0P} z_j9|DLWLH3fZmj9z>-ac7>Jo4Ww3P*KL7aY*HyTr+aE+ zS30}XZAvxHeooTE^QloxF&aT|-egcLzA}PyPk*n-EQF*|%=P!}D9%7BMVn0sv6Wjq zD%){NlC1ki-FJE<3~u8e_)Y|=UzOgdt~;Dl8-5q_$CmncC7gI<4u!+yINPAV1L6E_ z`2%@Q?feE)(EOfg-UtQ$+*F>pBjU@T(+(A)&-e)FkpqCmL>AFPNZ`E@+CAa9Mc~Jb zvVSmc;F0REM#P8jl&2n9KJ$5GOFMMZppG=Sa~3@rAm=3KzCJpWP(ASiI9>3nJoqSM z7x1f0zYvFQC==;$_C2rhVr`;%&5ap+B@5blt}FRL;Uf>Y(ip?@v_FUKs$=2uu1op)L2Ku2+_p){v#XXZ7_g>iVIlghh$QrZTd#&3_gPI1A6_ zC43a73%->fcqG+jTd@-%TM*7brp`G2QhT)O{EA=zwv9#O5l9I1S#4__zNlcn{|!|( zY4L=Z5+2+YUoNvdW*~Zc@ESfl(EEsQ0X7N9@aqiTC&P2SzW+EYr8ZpQ`|s78oM=v2 zu`~7K&e6%6Y}&13Kl zRnDl@kIfEl^UshkIel|+{5z0?2_F8#a~3Zy%q`$tJoPc70!=Iy*D#?>(GBKxL-m9; z1JJvWGxO&Cx4J-GgJcX zBai<4)t-I)AzT>Fc0PN%-G5)dELtdNbYkAe`H^Mjri3MC@njv5gomH%^Xq~H!Ags^ z*f!N6*46@W1eH=wBWtOHC=|(5uKvhcR z=Z&?({Y4(WHzw_ zLafzNxhooPp%7Q`cuKTUCaz+*cYLT+4b-YsW|hnZvhz#b1#YbfxICrBUrlcP!aeF( zt5R9~gnxws_{)>kZEDK>&VDsn?p6CisY<1i0-061<6R(udvWkFRnAwF#ozri30A38 zp5a)gGoS42sd1rYIR>V)NZNJAila> z%Tkro&;z1Wvr^d(kRaV+tOF9={tmTfr7}fYYed(#zfxa$WYsEFaD-sWwKA*gAz!Uh xc@bl!%njBf^_6E@yHa7L?C)DEv$`Jg{{x4Dmr_1t(IDZ4GNkljNi;I9uc-eH{AW$%nr=#LKfR* zwrOTR^~~Sf5wX=hLO0%YH?JVwX+E9Hgmg~adNxL&$6TK zzHA%%x0$iIy`lDF>{Rxh<~|UkVu5|-Js;-xV}G-D`iPU7J8Jw{CI}B>xm!*m(vNdn zv8WayiV*sjj$i^jJ!;{sp?37xG5YL9MhYzKM0yGs?E@z?@B7B8-)ZfML*>EN{>AZL zIq^+7h<^xXRopF7OimCTM8q;07Nl_BIp7CMABmG{To z#pF7kf%#-w_cGcLsDBR3g93z>%07*qoM6N<$g8P-_rT_o{ delta 1088 zcmV-G1i$1JeOaD#(xy zOb3K?03A4{1Ka`N4xINXT8&P3wsq&jA%is|B(3&G`?Ok}On0$Zbatg0fo`{ZshM-t z2hVY+c3<_EAJab+h6jF})%3HtJ}%R-0R3X_YZLC&e-!O1p?}#htLOz&B|1d>>&k|>e z@{Z__F};E>p04b#1YHVmBsE5|iE=hd^|jLS|400)CEJC%(dt)%?lp6fG&MGq?8`Y48=6wGwon$1&K{SmWrB?d!ry z|Lx!i^JzYBx}<(AC`bADzfgVh^f_&6%OM?puNVl+`i;+ZkWqohfjL(>)K@0=fT6r0)Cm# zBb%?=N{7>}7XMT1NYi5joE-+~lh}yHfPQH-^=(lFKtwnBcq1^tPKS-n=}U%aC@eNd zE|*8T0e`~<7BVV;?gBBt3=R!+3_ZmLGzMUp_-!HPD@89n zo;Pfo#%x%vh!KA&I?Q%kt!#wAK&J|)Hbnj=cBCiH5-5-HU_+n{`uam5weU?Imoo-h zMEn>)@Mp~A`_L)wwWi!RW#`+N!Ll~PJ0ej?;^@z~{nM7lYaTGBKWjGBB7IO1hl;^p&-our_cBNOr@MDZYtJ~re<^Gm;L zHD6hzgNwb^KL;umMMUW&fLIq1PO7^Rjd2L7)ED^b7U={}aI%*?IC8q2KMwS|O1HtU zg>!eLm-{wMw{Ane5Tpd>50t-K&|CL>O9}-r5xysYd6jeONoSn+&!UmaJ3apFN;#_t z0P-yw1-PX%?mP#$qiaCBD=i@)pn3CfIRiL3oUft6o%aXO93z|HQV;b20000|6aI)Dy%RB+G%a1|VM07C^gKOF!S z@aOu+&el!2%x1-pW<9%wSXi7wfipZ6e(mQ&g zs*gnEQ%dPet6m-B9KrGV?f9n*8goTNt`^6vw_8vjv)&HQkP!YL=#&utAn243{vha- z5dI+Ol;1n;cUP$DO%eH1M6Rcl9;oVl5xFBG|BA@Fl+r(4-H;#?mcI|~rj(wF$W2wf zpv$(6LSTQ;AtC%L$E?0vc7_CpngEf!|HaC0L9m4o{tlo3NL8;R_#!`qoe-cd22$0x z78V36Cu07UQwTSefZvPlZ9I(}Kp%wV?}vLL@?l?hL-6|{EPp>>`L`zb%+)6}xRFwN z+|O<4m$3XDU`apX{ZY$r`2doEy#KI+TM~$b@OJ_h{mV9&p8YAMmm-42=QZenaQi#J zru*h8zk6Uh@Qpoff|U?%e~_`5{{vot7Xk(ACWJq5Sp4I=sy-2szkO#gA95Lls3Prqbufn z)i<{PtM<+180T0GCP&PnAkm7@UwY_tk1R55i#*;bCTcNSb4x_Nrj(wSH&ay^>3L=o zxiYy8YxvnZek6MkdTPIYU+#dt|HNKt!~XlW{h5-{scmQ%e{m1i_zak^zZJZJWYf2OL08%%_*t&a3!s+c zLeUPYie|u#C@a8l`?2U*_&I@f63c(9I$^%(eQknZO_(5I=>cneu+TdbWHAeJA&XRG z4#rPakxyzma{k6vUPf7<3|>>n<%%G?288x@@q?@2E<^ch?#Q=tU;Y1-HuYzg-nRbH z$M}nTF!>qS_gva6Nd#-d_1^cnUGLA8Mf-dX(fvF&lhgZ6Sdc@KGU12D*vFK~Aye9n zvXtAOQqs{A2Eh!>g5EP%{X$g#Ms&yQLB_rA?5$v_f?{O?6R z6m?*+2Rw=4vAt=xt&>AwN}C{9E5zOwYZGr{X+K-ceHo_6B8fg^aVW($tB&XKS8+1x z3+hBWa-MU0IpGII^${)y2c{!KJ-8kGSxSkq#8IZSITh?$S=?f9ky`yMl_i3B7|M~8 z%+zyCZP$uppKlv2uVc!wtfFy|MNHj*7UTWHv$l%~vdFMJ$3@AwBDm%?Iga(8M7}?V z*?7A_A7cLcgEdnohfHa63fN6yShFs^;O3w`oqBsJjGpl2jLnd%k6CX!_5OOhHTCi9 z?PF1&x8COaV;@;|EE^B=w#vl%v-LL5>~mFpuc{0gyO1=&vhb{yoQr)_m1nLTQ5$7K z8w@bN$GB6O*!x@C48q?Tgfm+`m^s)FW%6YYE?Ll)BGE?X+Q+=-fDw3Qmb5vh1ZqPw z2gdGecmC3N%-;>>Qq8_`W0*y9D}AhhPg=ECKb?uHpHDdWF81;nNae{iZBPIu#7UUYGn=VtJ~esA#UT>dNi6n>Sly zj$s-(TM<4M&bA6LBf zTz5bK%VE82X_roV**uDR)t%cmj!Rz&ao()(N%nZ(JuN_RSutmh$Hn(H=ksSpZ^Dl? zZt4Qt=mBH;CGL7I9rsh5QuYVWCg(J8e+fgX_qpCR{2bRk2@IbmEXigd5n#JRMOxxVW+_-pP$3!fygTk3oF*z;PU*MkC9#3}H z%X<>+rbpp#soAmke~egH5^xO3w6I1~aaVZAX`m2lVW_v~Fj66z!MkXXeA@-zVLfWY zJ+Jp2!eYw$)acFa=ZkIPirW|~v!lkWn?FD;70m{C9p=Kjf}y^k9{Tc{1KQswGj7&; z4Gj0)CX{g>hW;K1qa(v{*G_N3k5wcto`t#C4DFRkz7l_cmMrnb((~sJKY~9fHp@k1 z*VwMt6M$>T{$t@?=>eWbi*SN)ZKv~#m{kB-^&E+Y%GlXvA$gg^yc=%8=HC8 z6<*qm0-K`%Cqga767>{If+DSlY*$T>!q4Q54U5?cxWz~LIWD{(4>`^O3+K4*7NZ4c zLsxD;@KCBM3oHB-udf*kS(>`aD}r1|clV|{;Rl<+MZ%6Hj7x3rS$c=<*6!=nb4+d5 qiesN|8!fM6@3CxR&{=sZt@s})ZER3Efa{_F0000`E!wbOTvE+**=a^ zAiY9?9HuS#jle7Y zp+f7zkjlMRV2e;;;~c0yfQ=6@#~@g#tKG6+i)l>`6qxzxA2-q$|8u8CHIt6$;=$-2xOa$dD-E=2Kz! zx$pqVd`f(CpiIr)2nVVKu#thxL5BAMe8L*gySpru18>D~xE!bo;Bd!gjh=BI;6lRx zOwY45I*}yOHd0>F$0Vuc7N@MN1}-9UiT{k!=0^9?#@s$0Onom)Y@?h4kQ|LZ zRvnN>j&za9yTWZBcX5hyTj9IXUh3I^aB%|kLV=IJ9Jn(2gOHPe^9RA_p17qs0(|~6 z|D+7*4}AQq-d_cElFC{6`n=?I<-<1kAXz}*-zy*&e1PR-Y3x&Vhve}boW7lpdfu`& z^!`i1$!Cy8e?r;jI`}4+C#XJsz*HIo z+-0}Ejj315Rt{w2faWfOAFlY5O?@v%yX2A8AE5_m_pM|g{GQ{B(jF_=kHk_QnRT)) z{8$6r^*DD5Kre3;06rB(*a5Zx&8;z2?){j=mG*y9%Y}99U3EvBNmjQ$D+ku#fEOlK ze1KJXvfy8$aDRv7kzahqS&Q%^%<=Iq>7haZDNacG`Bs6Gp1(;nQy*KxnH0+CHCa>! zv*g#VGUTO}8^V(_UwxuuHB=sqngc^~P;&|aWHlxwKA)`$;kH=-XoM%GE$pc@v|FRq^xj#{%G)!w zoRzQ7eygkU=D2vHxsUlh%L&*gKT02zN0q@6d2jPZ?<}exEzVPblK9H|DKP<;KF}r& zNK-%vW{BHMD2|4s11-(cRPZmU<>GoZUOBJ@4oF`3rh>Z^1XkRy3OM1$K#DvfpU7i# zZCmXi*jOO{Dj48YM+t<0tY04QqXhs}#v}mTQJ|g>(l{49@)t_uoUs>D%k?7IV}|eR zg=lJ&n$pZ;Y93Vl9;5%wmhdn*SXdV^NvUn5mW%OEiSM(mq5kFUOA0QP;pXEaKa48e zZXp$Zp2=GS)fxea*%HIsQRx<%`Iy5Vo{%AVSRA{Bs`!-+?iX`afrUFBXV(=`3!La?hN zE9Y?}wlw{$g7|RaOD|JNXHgh5&!-~KQBOR@(brr&QXC#khT^sWo`JJ9XBWq9!n2y_ z9TPS5C3ID(Y4bRMH+%N#Z(Y#1ebTt$3uvy*THc{N)yE`XTxx%dQvhPjV};MD3>sMu zzk-?nKUH_>70`9hD;`z`!2O`7(umVOeh9qJD5R|ctKVzUiWeWas{pNYz74^e0Pmo~ zTV;+OPLG~~@dR)rlFMXlTK57MI-{6`i~9i7E^R#3q<=}!`!%Iu%F;gD96tIWYzjmp z$_?82V9jWU|KS)+p&5$;MN$Nmr~oj_S)FqOLKylL+=}Z3GhSA!j>xF*cPz#ll<>&Yf2Y4P^KF)Lx zd14LLXEz5*RRB3PJS+b{9jh!(IF4%&gna=*(K;4GDZ~O1$LDlTxa`QYg+}c{dp1SW zLVg)x4CB)rcpj&9R2@uelcTJkzD3W&hI9-0vTM!6{s>*P#JGxeA}Fv)3-jQ)zQSkCgE>+BYIGLC|QI*$MG)sa%>}n=J267 z1)xcS5X2t)m6c<|C9RT?R7@-GX%C&Q(+~x< zG%rx3d|7DLTadfW*Alk?jO05FHiA$Th-T*`(*)ngzy3O-og-r~%5?JLaAdT)?v|;^J}tf5zZ#mPp%^2&tZ-yw>P(-c;fsC>US~bKRhdL3jom?WQ5?k z!XNv0^0x|pMb2tAZ-`g;MvCK2kqUrGi>8+Xz=_AtBZpc)-v1H*Nu7+4+D2-*99`RI uTSNWL*_T0FngduwT;-0-4)ycs^Zy5N2w*6*UL;un00001uy#H4buxDiBL{Q4GJ0x0000DNk~Le0001&0000l2nGNE0PY4@VUZyhe+7j} zL_t(|0qvZNkrX!&K)YQI07C|l0cSF}%K%IUM+Qd`f$N+Bb_CRs0fYc#02vrAgLXa9 z8tb&0ozL#v-R|_O+M1O{qxq<}yVb2(H8WWgs(LCS56tX6_pPeuBJ#=1zU=Ec+{QT$ zfnN^cBKOP#5qVhjS(o=g-Dh21e+6q4uHcpk;R+TbgezE#5UyY`eqF5R14dP!ipWb5 zd2D7cRQ0)tJQIh{&JwyV)Ile?V1zd*6gS%;HSkAA2$L@V(#@3ud&xGqW|b<^<%fxFKA{ z#eNl$M_iQW)Yvze%pKF_u5Ps8y7&&1zgKm;vgkQ{oXB}H@j$31he8I%nlKFG{j5mb zGdn&Jk*{X$9Gv$lzFC`gk(79}u*BE)n^W$G)heT`4!@Ur&?fa*ad9nVFTp zIs_|gq^xzC$z1PaYnvp}J>WCAdY=NhzI@K3rJ<>2o4*lbjn+b-0L}QWN`Zay2EqAj0oF10?S#@l4`@X@{8|OT~>MJ*462cvabF< zrcHg!(q+Ft?(trIhVI)}WiW6zOw-VD+xG7|)_#0)k@9(bf8|JiwwTRivP*{^i5y}~ zGIvay4VE;EW~Lh_JOEq+oVh8xR8?G(eas4HjdiiK=!ORM3B@blk3Y`mBuSq;Y3ErI z^VWYkwKKQTF1Kgf*H_slzHhe>ea~kf4t?MFj2Ua!pjY|&Tw{I6Wwto`OgPeauOJZE z9prU5b7>ane`S0!j!v^j>gtdCZCl&a$3%JaISxIOU4%?z)L1k2jANQLay?mS+44^hm}hErpGyc=8-BTCUd}iw+|+kLTlcAE6K1guzQ*db)`UfpqX^B_ z2F*pff9t|Oyqk!r2qqAktD^9?m@TE5m;d`9&V4LtoOQ+hAAU=hFP4P3Mq554*&19nwMxp zb8Ui#Dt#=1iaRf-_8I5*aOl?!v&4T~V-b3&P1|HV(#`t`leOO=6K1i(6W3)JVkcZk z5zy2pDVB!0JW9eWE{F$Yd7qCw9dR=Jx7BE`&|J%sR&M$#&P=xe^VkXDS`ah)ZNcvE qCOvBtt}evjP@elL|33RVO8x`ySH}R`2wDmN00007 zCnS$-BwMzh6Z?5JBV?^tYkBwguC(@J^Z5Aq+uF)zvpLc|ztp|oaPR7$skv4Ep8hV% z&$z}e<~g;D7gJwgrt)8EUaz9Le>MBjslkU$na%!S!AlyrR)8(ZMvI=S0c_FcoB!ku zVDq=pqGxCTTeSJ+Kl$J1KgC}&L+t_&bmddkH`-mEsQ*F7hpPXn-iY?ES}PqIFb|ej z(2+p&Nv974u1nZ_AJ9R!QlJ3?SVBqOdj$_gI-|;&=@|M|VwjN<8Zdw*f0P1A-s(m` zd!s}0R~4W{KT)kEWW{%_N(i;m(15MM8VaEs0f>zMHtHw19qE&Jg2&78tdedXtljL# zI{mGk#R1a5l~Iy;u#93n84s3S(EG4R7=0@b_OV^mI?tL1%h+AuAx**hE9t>>9$>Kn z;QXJeRuUR8fF+dTNdV|Rf48=RuKJI7@LaT35*n}&nNSKOfrn3d+~+(QmO$oHpSPi7 zZCwNA!Rq?x)$?nxZ+J5uyJT!H0V`lOfQ>UU`LM0Pb*}cWaiYeJW=*JiH8-GJh#X;8 zJeP)_j1Vz;D8P~scDnCrHK*jJWb}mw3a9}rn(TMUdyPy@*lp8wf3E1|Zz{e0nD;smjx`{D^7vGR1uRGo*=kUn zeED0-r}4?6@yr2N->2|yFwCvbxJB_wHvlX4Sc5~Mw^N(F@m_$6!gaq$$MO5uThUHc z!;fa*2NMoEl8-kdH+bm)ri0I_6h2J>OcX+7mBrTJ?8{%rf9?O2^2KAqvBvgx^I`11 zw%%BHEDBy_FtDCPxW8TY<`-6MUH}(hX{KwS$ACj9n+)dpMU|cP9}q(*#1}f3s1sS2 zD<0hHk2W68mGX7T+2wi7iHXgwcHN`qjDhhc)GADMjcYqEIiu|D$GfFaxQ=Le#Gqe7 zs0-i0gQ@S>NusM3zgnEKzo`TVyTW zRi4>V*AJg1)jX-@N%Z;X&lAe2?}7&dac2RIp%sm%r+R91oa77qp)KVL{MP29MpHM^ zE#Ya@B0pyz(L3!e9z5xFn3s-m3SUE?Rs|5=xnoZte^FQs@-%KF-W~D;F`9*cTNo|k zYm;H;^7;+B!49f}fTnRJ_C|g!l&bH5qc+|xvU_aqC;^=dksBsVk!!=HQCzA*EiZGc z#hrJ37ow2|$(B;StbmD6XntYG6LvtXD+m4CfL+QOdfS=o5MZVhvQ~-0QR`5p(;VFD21-LID)aPmDtBGn&AsR}; zJG@s#S;=gDi7bu(DMfQ}0Mo^`nr{sRLna`3cp;({>#)HkUgQrJvTp;WRGvl_)~q>M zbIiVI&g24jndZSW0kTo$#i-GHJlJ~#JlX9Yf8S>E*8IKW5i75rWGo;W!17As2`1b$ z6N>o}dvHeJVhO?WfI?aXJq#`uBJ-+a?Xf!Vr3Z@5kJ|v{Ra1V(JXl^=Jh}IJg3o>B zw~vGkD?Kz|9xS0O4y-7=tu2Y-;ECt!CDHbC0 zrrw5A-UAULd%rf3$7^o+PbFtHKh!l3e-=S%5)xOmd40)#j0000!?+TK%!^cQZS)vqH^G){d&4iO4-O zd&jd-)e8~%Xl9?6dNzl+#;Nkdsowa)+!K-e#X0NsC8*C?uYa$W5Mgk1LWIH52@wWI zCqx(=oga^W2PcB6J{FN*MdYEGJy+FdBJxy3{t=OPX7=}q{3*ANu!QZLnb|85d913J z9Oml?lkGt#A;JdS3T8`3Drb{Z6iM6PG@K?FAw<{+iELGUhyX?Y7agv-PHL;_mAmVp zf#?%9P>76?m4EAEeT-MpM{Fl7;XKYoUi4Xu>=V$A(4Swgp7Y&bWw@*9f)>13n!$pQSMG&puzY}Z>K0$YlqQ&sNDzhqxqh9#N_o}dd_Z8teZD2BZ)QKGt!_@S zsmZiv(>b(s^ZadSU%9QdU3tv+{GG1tss6M)7Z(9mY0D+H96Q#=pI5gg{P@&0%h&Mg zqksHa;I30wAzMPAuQx^006uPgYm3k-gA}cm8{To(yHgB}c?elW%e0hH?`vQg5=e9&(GWHp^Xu>V> zRlZ-hfba7ifkybOH*PzN3$f<=_UJifQ;KaVGG<-*2Z}6uO4ASx`*>;Ik8)p|89AU)+|*9C zI|he-7002kH%4#N+@J~4dbm`A(;T2Y$hqZ=KL?Ch&b**60+STDY(%&Q{0nm5tAAZs z#{*Cqv(g-XI9gzFOt?oq>Fa%w= zRmBDG?L|U_!EC&nu}O}5pRb#6$H8o76!EGdBP4zOnYvS zdPJa*vnyHcRmWITiiR`w%rdn*O^C3QrGkgew8s^*HWAi+HrBg?i3SdRy_4i$A~QeO Tw}b-%00000NkvXXu0mjfdN|x| delta 1771 zcmVT4Ek>%oWUeceFz4^sn6gH`fnhd!6Z(72+yZJfNk|(*6pF%dN2PFtB`0Zz7n1Dy#{ompUSrmwZ(TW&vI?9KmxWWED%xzGHzCuQM#}6 zC-DT$*~epaF9Fkp>p|f}?B8-l41hTawew9Aw&jsap$U795WfVoWWN>=KjbR2{09k` zCM-mP0LBpw`@v34sC>YO(L(+^d2@vlFc20BN^F4aM}OZ^3tj$S@t^?-%@s<(mg7P} z31B?)a!TexkpxT=M#2~x^?Sl^Xhp@{f%eck_o2LjaNJquyl^Q1epL8* z+^UDy#=cel@~M46Y&jl2w@nRbKqXEa!V`%AK6M4F{vSg1v(vcRRd-t;j5_;M9ePQp z)M0ldp?|#W1%3(7t_GTEcbVjqzDsCBliO&c)ki;{YzrI+HJ5828)Y!TkrlXjqq@VR z4o(P;*&HS={Jhi7SNX%xmVe^EMvg_ybF-)KZ z0UnNauQU(w9r}0C-phv{kHHUa9KKXPT9NbAi-m~%x(tK@wD>lFv)bC_^|iZH<~QKJk*2Pd4Xx;?Ve0knYf zN}f}M2Zg5y?egJNE9pKj9@O*izIQpHQx| zu9(N-{j1Fwve6t2Hca~FQS>Y(#Dhk@4r5UqVNfV zzGalgSm|7{m6zq|D{J&_nGTT)b0uG8C97NsCa51wG|RT)4qOUjE13OdYAFLuw;Vge zL-zz+i_g| z04&TeVevApayd*OKX@=cs2kk86n{oy0~c%|cNEl89tR(Wi8F2+5W1|He@Ga7OAj=7 z_sT_SzU^3Wt-Z@wV%TAGqIUG>79#qYf>wnCeU)gcn&>Wkno?Cftsu~(?%-Pv;9L=sKK0Hr57MTOx zkdC-q6fGDZO6)+@M<{a=mlEScASO5*@g*YB^RvlKWEU%8yq_Or( zjTL3j*BJ;a=T@K_#duIXe^+*4HpTW0gz27*^xXz$bKC80a44Ir`VRqVQ}>57L{k6& N002ovPDHLkV1h+&P#ORL diff --git a/docs/html/userhtml3x.png b/docs/html/userhtml3x.png index 3676aa5341e9977553aa40e9c4a338e5a2093a5d..294044725df17fc1ba9d04eab26f7525d2370d06 100644 GIT binary patch delta 986 zcmV<0110>A34{ocIDZ4SNkl>V7q`!9V5Z_Mm=8K3lWz7Aj(5ne1j2#Su2 zhFa8|xPNkVb!TPQrpgDo)agkUgGj8U zKE`g!v7z79N$w7nA1bGk@i@!l6f+~C5_EaL`}z4YpX`D8X2}m-AJQ?;(gHelBzMXM z<$O*(%Vh&wjw$As2f_lJ_^3@_L-)YP>Zr-f5h=Kkk@Hi~Xb&7l^Sq{~Z(MswzEr+! z`2jLX!GB&Daak%=Bp|e%$7SrjXjbI34o?R+DvIVsNnU%3&+@jSPxRW{gelgL-jY4K zej&IRThE*i=bd0p0r(1KWwK%7*Fq*E^Yae|lsmX3gE?La7OH|{FS6p1IFE><+MKc> z68FRNUuyCX1-Vai@ga>^=?C)PRhGd3PNGXCTz|oas|sql7tQc}$@ykRXMFF5^hK7g zE4mu`>YMY>=ksiwdR;L^mvY!2O=~JOa}(5kb&$9h4Mb%THCaAEQBK18a5s|Bs_%!E zjfxN|rSukYN>F{!l`7%^Rb9HN1sIzW^BOV6l3&*p{!q*7ysFA6aHWzdmz3Z1P9s6j zaDUWZhn~`0e(7)qcY-?PSz*IeAea(8$|fAYk(DbzKIOkB0)BmwbrnIFXehngQ8duh zWGCTdPKXGGvl<@NyN=ukpqMMvx;5Gy`Z$>Dh(O;cxH|;^b@wDtpkPzVClPTXXEcG; z-N&+!Unyl%q|2XBYT8%ti?zoBFZ6KGV^gjFCqO_cSnnD(h<}CYA%@t<|NT-|3lqG| zmCnLYXATM delta 1126 zcmV-s1eyDU2#*PnIDZ5|Nkl_Ou6F=2iRNtJZfP*9C$CMq)Hrwugp8kI-bMDeHcsxw1;rWT@ zcpXd@=KOpCMmQNHj$2jp=(wftn^qnELM7%fVptxspVQ&NuRRA+okjIFY3Bn*N?UG` z(M0Zz;4x|ci+|OoyzY*;S!Y6Rz(2r2-C@(luhFvV!nLvgqB=ykx!#;#nnM$R*}=>O zA16AVIc%12(iAldP2k%*^=p&k_lo(uGLR;IMQwg_GZiyow~`0!UCtl-t~$?G{wtz* z0Buu0MQ@DEfq5Tx;iN-&?tn|=r3ebr=0})dvv@TLV}Eg6rbEd~ga@f@`RP1PXdhCX z(+PUtse8+*K(;)qv>c)#kdY{x>|1yV13>E*f( z$b&W2llPSJ+Sci3)bF*`Z>xt*0Xd8M)IBskP@R)9=X(_8>%6fO{O}fsDu$-jI zt1jZZseeP-4#riFdU1dyV9<&E(*@CiD_$%}n>u}riqc@)^lPJoU4aO}r9LG1A!v9z z2it|bjmDrCsVG8!CQ733 zF5cKLf6VKo{Z;w+8Rvb)a7;VJ!SnAv>co^$ zO(jNovHht>y_sl6CRjzzr|y^W%nrff+!RC~7g1ZK2wl`~6*)d;#@MxA?1^D1_0o2J zet)!P8&u*)P!SdIB!LL`d!rKND3AI<37JuD`4SkDN9lfV|3ubU#fK5%P02xbN{nGh zxuh6GlDE3BQ$uU=kk=)N%T2|gynyQl6LyQ3T0TZ42I%&e?j1DmhtSHhpS=C25h#}l z)IN67^rSp27CLnU2>t6G`67>!G1RH8!hbWELVC={B=B#eA$SJJx7hCJEhg`xt=xyr zkcPs)9hgRg&f_WxvY;8k=By%*qU+Txeg|mSEfjYJ%_8S(&xLtJuDzuQKVAU5cXvYA zGl(3M+urnd4$pQGSk-x`w+>-nT*WIyI&bkbrG|Me{IQ1es}KYCY^h!XUS%v@IbUT? z2Ccz5`Cy~cmF?b6f&f4p@o&M`Gt3*HDbGys9$vT&dEoZ+G6)Fvg{A>MraaHa0DR#0 skuMD06M*~e*<4gdfE07*qoM6N<$f(jWjs{jB1 diff --git a/docs/html/userhtml4x.png b/docs/html/userhtml4x.png index 1ce118eb81d18d4591d48b1b3d21a3345bf3977b..e17fca31197f49818c3b7b75c6572b8d6a45fdaa 100644 GIT binary patch delta 1592 zcmV-82FLlC4&DrqIe!pIL_t(|0qvWMaTG@ohI?EJ0PGAP10ETO3?w@PoB&ZWQVO1rc3`g?nNx_eehRplP{04b$cBJ!Jv zd^CwX5s^nC^1<%8exj<6?f&F&c5*Ym$C`Mqs!x4@l+ulee1BHe*G1XM;-{0H`8^gz zN(t-zXVK}oi2PNQeJbux3P+*vDW!{)62ct?+pfKw{|)BwZ^4-CRMlM<>OK$=et#EZ zc6C~C7o4kn@!_}piDWBsJ!cMPbso%VbF6g_;orh#s_aDMLWH4~*N?m8T=H!a-V(wP zk$;Bz;kk&sihp%1kh+Owr+PiczlB$e>_p@mWoa3|ukBi#r|{j>rAaAWnP7+THDnDk zRFyK?PjZ8f6%+Zlphb2y|GgP2a~|cJScO|5rSx4@A09HkDf??xeQ`+pV?Yc47L3Wx z{P*u7@>Eqnw~SR{3(k@6W)_~HrL^30psM7G&+X;Rf`3_=P0`A~1rucVjQ&?p_NI4! zC?emShfgVSz`%jSBUQb$m+@Rg{^iiYT?mI|NyyE95s_agkGWc4iulI*$WYa{Kdv$u zubSerw!dOwM6$2$2O%Y}K5V^nGY6}*!O@Z;NFRa{!p6glJORLvq zNknoR7E7#0+gOJn=-lgAAKkr|+Zb2&my^HVz{&joTsO8Ky0*Dyena)de(cd$(^30K4>V0@#I{ezwKGeZ__zxv6tvh6?hm~WC?<{@ji|0G-Y#zC;Sw-qpcLA$A zXvKW2xpiZ^sn;bJvu2&eFZ(h1?C~$_&AE3uFD<*;9EQd+H-*TDAtw(!gLK#-79ddC)S3XeZ{X@9}PAbeM_w#=u`f1eEVMOhro16S)1d@SRX zh+>DscQ}`Yvg<<_i&(3CPkCn!byh*>A3ZDQP(}QBj7d&WE&|n2u$7C52W{*E z{JIa54EU;jpgoHQh3(v<3Wm0^3=}n`gqy@1Sa_pt8nI_Q2xu$g4)JfnVzQfnnt$HF zzfYIe;5?acOiMCJ_nrGt`e_*`r1|$;Uz>7eScG5mnNd1?x;1wWT1 z%HAzzbE(RJwwJ#xAjNpZNh0f%_R9;bBD(>=Bd4Z8IOC}4(i)s+@y#c#rc`g@x6pkE zgm7(GBq*k5b$HH;XA*9C^Fj&>A|s`Q-$k~kjO6_}mKZDXE%qP5%NLP{!GD*Vgp|^k zCI5*51vPgm#|NtV&g8^72B@87^zyg<=!qGQ}} z-}e4VWaym-ZnHVh;mga}>_=|;(DBWp_U7n^`i=SUp?YgSJCuLxxF4KlT3B|tuQ|R7ZXH=LcV}z3Q_g#Q<94&fwQV;$ zN6=&QFn@99P-Cpku|A*6e*t#}UwaYR*%=G52uT3^_(=z8cgeZbl`%~}4|Z74Jn7)T qGAw&R;3;Dc{}vpR?CyedjqiUI&eJM6wfrdn0000bQ2Mb_<)oOLF z$7_{a+JYmMLzT}o1k{h@GtXRW4}aEsDD#8J^(^p0&ivfiYkvdWsQz7j7g3)z_wM<4 z?<9T@xQYej+(hr|i1~@yU!uORQG2hNJK^v$_NfZS?FHeT`^D+S-4}!s{io>EXFEl5 zOI=8B4m20#TQp~b6ZIE0x1n=VUn%)$$s#M^=vO5&2%o5PP(rh-xMTF6qRUX*3GY-z zQ_b%m+tfMMcYjf5$C!fuyNt@X%6W*RHplNx*tf5B^s6O1yj4coPIOl)UuwvPT($W2 z>APr3qa)|NH>@66_^A=QHE>8W7JVGMgVG>Jw1Ynb^`9d76x*fp_bIYC+R8fj>bqj6 zEd^5kkLG_eW`6JN7ovYThJF`lrszLK{yy7joZS77%71g&_xqH&V%jBi4)t9u!b=Ep z#8%{vjnmcELO^@s7pDt#V@x!hEQ`Wa{ig^&z;?&*ui)&H)_knap}uRF$oPC<;Df`V z2G9Iryiog>%4hYt6hd_e8YhS5ZuW=De{>3`xvQlu9HPXf3OpPL=Y!StWWMATm#2ND zaqfi4)qisvgXqK2TY7U)B^B~^VNro+lKZXtXk#xv)^H--=?V1dPyWrzd}uRA*!45T zcA~{D<_mXf6BxcqhNOMF<}zN9{Il!aB|~JVf4(At=yxwRdg8z+dFuPk=*`Qm#@=|m z=9a|MuvtPpXhR%cf=&#_^Jv=E@8^B*pM3etzJEAr44w`*r0=SOuj2IU!YdsUPd0cX zi|Tx|K|UPFQiM0gSAEzzr$6{COwsKXyam;4XR_O~Yc7p(lh%@7?b&tilELKkc5%M4 z)(&AoEP_8(9VfLNO+SbZJTC2*!m&uj7|w1y>}3)@4ky@!VJg8~zab7mxI`KI*g<~Y z@P7^Qd>#Ho&q3gM^J%oF6JrKw@NoGigyuY?n?lYcvz z1DqV2N&Nx;!M?H%W<<)O!5qf;yHkxP2*mG1Lb1;#?j&d zYm4?mPDq-B3~an!8lxTg+z1|Qg2cRC`j4F2kRZXVy6e6c~e0DPa>lDlM>}p1Llg2oqyuz`rxMu z8sr z+n)GM-r;DRNKBOY9rAaB{(na}ycj>xxXl#~XX~Q-A)49#i9!6(k%e~e2jcxveYAgh zAyVHmvif^|@Pe)h6hS{Zb9SwSSIu@_pXQ;tIE~qu7SRcAq;DJc6>b%sOZ45iiIJdN z>yu&l(}Gd+iPoiHW9UWO+=q{$pNnR!{-SIBL0G9gQ*75amv|Ehvw!kwY}A@}@f;vi z<11kfFP>Y6Pkz^rnV6i|j?W@`8OXg17ZKG#o}&L0iALKloK$N^ddO+wKy@68RIbau zOX%F_yR^e|Tq8^o$gmuvoD8Wvv542g$i@15-FX)o1vA?Y2fs!T@wK#$Td=MabE`UU z=-YQ3)E<|uFk0&qbZy_9!Y5xm=)>EG@lySTpG_Br=sTzF#EW-7@^UT#$m0inNE2Ss zn!|sHZYdnmwtQvLKM$5L4zvp~T>SCEkG3De#kdH8NI W&eJLsOI1t&0000

      1 1In our prototype implementation we provide sample scatter/gather routines.

      diff --git a/docs/html/userhtml5x.png b/docs/html/userhtml5x.png index cabc66d4402811368306a1893ef1156d6b112853..324d7f453a3b14051f930972e7d0eee37ff5f3b5 100644 GIT binary patch delta 1280 zcmV+b1^@cq3z`a$7YZ8)1^@s6br{X(ks%j<1k6c9K~!i3?V62k6fqEmXAlKM&_U<` zK?l$Qp@h%@hzbyN5U2o!4w4QabU-=)(t*)uy|KJ|yLY?CM%n26q-bybGc$fOw#VLh zO34}y2k!pS%s!-)Zk8T($bPW2;D*)X*vuZ?l9gF@3#x|2(SoXBakQXnSR4l#R7xm+ z8*T{?Yq{?J)Xd(P*)NrUGqV>drO(mM-Cvj)?VhKUPO5kC(#+nalzzJVTQhrXW=B+{ zl#tuO6n((mNdRA|^AlvbJMWY~k}G!9Nx$Yq?@;)wCXRm};&<}ps-E%4mT z&QeO3QQ+>UW_BW7=y{n^x>os_nZd(U0VfemM|vk1{+iiEv{Cs- zJ!yN6v!mY3AmmEVEJtm*w;(w8(T5#br(~fM@^Dtzcv{Qe_8VEX&D+|}trLIPR>mqW z;HqOIG1a$2<2nz$8XxVRMp-?X_#wxCWAxiF zK(iwcP^1)R(?$;CM`vJt0;lSeOaisYli^pYW2GXGx#hU4qH+BcLdShsR4IsJ)}12%Fhj4yeU@R}$2Q500Suq?IL z)qCI>`9r)2$i89Uj{=dkQy-}NY2PPEzgDt#LWw!E@y6Y8{5~L0>5pxH2xGb3pP|Mz z+&ynYTFF_p;J)Uq1Ldxtw6zvL&BdM^lP3<^_E3%j%$yn|-6cWnfx6BhBnMLZeMnq$ z0Ex7?&7Gq!Krj=aMK^lK1{WY0+#P*5>||H_I|Kp$Go=K-8SWlGHy}RYgx{=s8;7Ai zFp1-B9CD=)GpHYR2+xjxA2@=Sx(Z=Tku$Keyq}{F(%k*L3VcZWQRxJeh|~#+6A?@F z>Yx)OtMG@OaWM&L!)jaOi%x=!>r^N4S(wW)r*7;Zh{qH?&=$tbXY0zSjOS{A)#rCr z$6gQmE7H))21$8k(SXTm*dC_pBT)InCiD_eHOZ_zeJ6}Uv+JyX8sE}@EB>Z{ag?!C zfU^uuCMNxcOgZ&CW733SYi0s&wB3Qp9LpYT#`ifr8iW!v2Eq(xCh*5Qxe(FVN!V20 zIiW}fOLsX+M+^v+PPV#r^6PegR|9m9Z7FO&GJ#(J@L&==JLv58XGaj;Bl4fki zxdc7DP|m8)ID+ecXiL9*dJS&XKXWEI98Qo)Uu5!5|B~p-ZEl7YZB*1^@s6XdV4uks%j<1&~QZK~#7F?V5{G+%OP^?`|l7Ass*m z45=Wb14uf64q#FNLpp#|kdO|bgg^(-L6Yl${GZs>$U2`Nhke|{-WefjwQsG~vbpK= z^Ydhc;Y46MonFe@3+?;7Yp-4QtZTueyp4~*t}NW?$;u#mOsbxuJ7lqAQVn_cd-8pM zOsbv&J7lqgOe(uj4td{u^?fUqu3+zLeya9p_@C+z^`>xb^NDBD^W&QYV$ZqSlNvF? zfBG~gng*Mf#pFcmA!bH*FW=Jmvwp09h7b6Bto1wDz*`+L2X`kNG>xIt7)v?Mp`1-3 znYL|?V}I3doyPE&)N9czePLto4kZ6UlO44?wI9Av)tmkMnax!!ZP%z!*oI9G;Mnj#j<6p4sq8uI%FY-o zEh6i4P9z@6(sc z;$mAG45xpqUN({(*vMln_$XZ2BQlt%HR1{UQX>JE(n8b_C>XUdu8QGI0zAX>Kzr%s zE`_5h#I*$J#zZ;d4?ssJT{ff=5>Xm~2RS#i`#m z$1$GcKwgo`DjNwEvR-^45nJkmhvs@oD6e#$?D-`g@=}m)68UwYQj(SeMEEBqA$^jbkj?>?iiH_u+a}Dp>}BhnN#gGSQfG z@4@iGrP%O=*ItvsT_wIsgEKsGIbrcqr*R{Fq9cgd0DpKTS5MW9cDkN6z4%KnzHMZr zw34^4hMxq}KPzB6wh|ZcBmjQ6?@kNfHu3~)u3nT}QG@c}l3-kGWmy>qFL + + + + + + + + + +
      +

      2This is the normal situation when the pattern of the sparse matrix is symmetric, which is + equivalent to say that the interaction between two variables is reciprocal. If the matrix pattern is + non-symmetric we may have one-way interactions, and these could cause a situation in which a + boundary point is not a halo point for its neighbour.

      + diff --git a/docs/html/userhtml6x.png b/docs/html/userhtml6x.png index a0ef30ce0aa38d7ecf6312173d27dd0639874e97..72a36e5d6f9c0fbc77d77c8f10ca674ced9fd3ee 100644 GIT binary patch delta 2172 zcmV-?2!r?U6OR#)7YZ8)1^@s6)uQqsks%j<2ti3iK~#7F?VO8|6gLotC5iyBodINk zAp##67&`((26j0D7i0jAfJX-42;j)TI0D;bAlIksmV30C*%|G;?pA-*v9~iLb?a|+ zt6Q4Gg%Di(IX@1ZbKg7XF0AdHbI+V}ckH#LelN+NLzZ*yXXo7Ap}y7}r`2OCxtR`s z2X?pjYksfgywvU%R~ya?-unyZ+@lb}`)uTQopWDj*Jj6n{F4zvcKyq zG4g=-o>d8F0{7@kNap%IkvVq7VncUU$T%p4=$o3D)EkZ%KcAKpfX;oK*gb@Ylvj51 z*?a%QImb$hEab2`{aLQF(!j&}p4Ik8=iG-WgO{<Pd9UUCs+}J5iNiCI;<2v; zl7F3Z@AMfyIl7cjo_#|*y)Vajm-3m8kC~Se*lOev(4tI{i%c93LvarsNk>!PeZR}G zYtpyxx!UXEi0GD%(}fGT>gXt(baat%F)5=CGCsJwuh;ZbxM%EYFD5U4+$Y}kS^X6K zl5TokKc&ykkvaNvU)mCMqs}6WU>VypK6xg|7rs+}L-J*;v)I|_ge_8^<~nmI>801l ze3m#G(~f6KZB;*+CrkMn^RH<4xp|j4JehNC7fUu-nK07R5Uw%}{Ujjq50Gr=m3$>T z`5Z#9tB+;`FHL(zI))v8D^I{g|4qLL9*~8}5j7>qYrWA2&EztD=LXpcy4kXbEC#Or z$%LUl8D$QhjSvobj;yN#$$;qd4_Rdi7T*Y)Jhsz-#J)hdo>4+(&OW9-b{TrWSTz%;y~F z_0D@=vh(99`WXKqJA~h~_?vsD`r06@zI)*Pw#i^LbyN_o6G0ic*O{DV70ugFZp2_G z4yUXA6nTmR$3U(nH3xyjdyg!#vPlzW0}wDj*-XxR->Zi32`Bu{e$GK@R{*Q!R1D1p z1^X0ZFh>4=C_4|?-JmL9^>Ha#+1!D!<0B>*fofmN`dO3rD$ZGa*+bHmJMV-^5ranI zs65>>&`Ff~DMZ{~r^v(Mm;fedVz|5{I_iY7@=)H8;_nisM9Wii4d6UuPv8Kh6650M ziR?QJ0o$bhCZJ?*i&D0r_S2FB6y;B~Gbep;Az93S5ID^&>b4gcAJK9cAF(KLKi1b2 zYw}*pd2Dw}qA1x;N>9zi=lGt;N_`h$_6YJ!sI-cb-o{j4H}EFPgLr;aF2N`{lmyD$ zfs2sUl)rJxoqA5d)tHqCFpe}912~J2$;yIp(5E=#2gW4jBc~&7>HnxaaYS^WeLOao z(ktni>v8NmN6*KenR;KGobhnB%TYW!#F-4dJ{1F`Z>kf`ufbT56RD;9r`X4xD!0>^O9M2-s{o(r;Kx}wyJyW=Ni1%a&Ft5 z-Jgzx@c&!qXNuSIR`*soS-zAuoP1K|kd>9d-eTXqQ$Pu=`>xjsYw%vnd1`ld0jVMG zH61+&SP<-WohqL_5nkhD*44^Io7i0hzfq@Kg20Svr85}qR?*C-9&}g)?T6tc0r7AHash@vKJ<`mY zgP%Wt9<;iD7#tiNJW+iV$6oDMswZ*ZRQEakW9X9H7pixMWNCahKVv(V(wo@Oi+enO zUUOfId5-T$ovrK{0*8l(kL2=WO}>l7^}A}{N%}@_4(PwJF8TCYLf%Nn$$+p~{LJWI zO0P3zD%@)^&+)Z{l$kYq(Dh1_;rW7oPOTr`{$0?&D%DCDUsT!QO;x7Ey%zHhUrR_+ zg?^=SHQ_m~yjjY;IrJ;pUgDp0u@fABwMZtroT&xEpQQV80zY3(m6Q~hQ`N5%*o??IaIeKY@ahV!{(uG;D)^p?j$Q z)2I#0W7U`H|Ehk@!hfi`386mZc1npp7(~Z5nZ-o8hb;BcrvIKQIiCp)4z+!Z;N!~`@do#Ys{g5feo&t~9_ilVg#XMRe8)%HRtEJ;_Ly--C%X-BVji!u z$Y)~xRP|5Q_rfbYFf=ySTALI1TFkHc1c3YOFEE&BOeFW&m#gXz)sIHQRfQL0T%moh z`ozX{&w}{kui^8t+AyS_m`n9xhtuixqLVEi3*BsFJG%Af3&>Fo_LlH}>_Q{i;Dv3$ zSKWgNSaobv8%#R3K%+_m3cjcf?~AyPdlhi_X8pQnF@t@;-Nx(+y=t({`t7PQWOhE8 ztL=2_3U9zq_vm^P@nSnPLBH;y5AytYRzG~uNAjxU9LXRSL1xt7^V7z*T_tZ+Ub9bE z=E+oRZDZV}b5z_VuL^U2<2%xgJ=p;IxdL)UpIvqPkv7F=?DOlETL71$i=RHWBX}F} zdvfQvyZ2ARt@=D86aCDD`0E47C!^j`<%#mn_iwz|(2P9e+J%ydfU+Q=9b3Ofkz$>+Xqfg~~B%7pfJ0N&T zHWqLCnS9I(=(rE_hkg~3k@3?W(mzU^Q4$rf0Q*%I;l>Wg))#XjVCBbdyw<$qbK+i$ zx#K(b1~aKiE@2@FkQ4P$L!a(HGvWONRI`^Tn3!zi5$=j;Z9=dBt(5A6z9Uagt|hC`7JJbn0o-Ayh51iF!KJ|}Vvi`aiidWk{yaWr$sGmrlRwZV@#EncR>0Fx=_gc(<9iJs6j!)q|1s7iZq*726|PlR^BzH$42K zv74^7)c#I8ws#G1;XuNQ3omThx#8Objk6ORD#XSf2t#z~ zST5c%O}v0Yzw9A??4GP-^?SPFIE(&?>I>Cygfs=om9Lap+Y-9Pn)1Qci21@AiAxUq z(zRxC=ES`gbH{h&vnUH7Y)=Wc_k<$>3xe~1ohxeF1WR{Mxu+;#yRE+X9I!j;gNJG_ zPDw^#K}jZw@{I9qMS2Mef8%7nZDXk!2Pa6J9^W2$F#(tvpbU=#GZs1C)Z8-i{tF&f(iCKRUInK=}$uhD;Ohx z9=!gzlyAJsI5(T3SfRQ>zugPVt&mhAh4Us!|s8_N~H}rO#04eQ{UhDH0@>Z zex~%K^z=vWBY0KfZet}96>*op34s(8bv@?H3bXeE8O zTfv2Xk_Ud6??wyX3G!7~m=*`{H=p5urJr$Ray){8->o?Rbq`LlOpZ7mY&4Gkz>^tA zM~6>s?C8Zi!;4*1bP%5qZ=>~NJ^kw6InLtg%}vu|SJ8ln!`Aw^s`UJf$A|8x;Po=W zt%DhD)gJcrsTQp@?{7Blk}*2&l39hh@pUHbP#|v0m>lo?9_Y6CRqBWQ|FMaGR%6u* zb(74re@K5yJJgmOCVnS*#`GE^d3S!-W8>P~SODcC3KU1Y@ zFQwO+G8OK%nCJKoWn>1o*0mG@3KB1BnYgVq1ojSrmXP)?>UCvA+0FnN=ST4Vc^KtUTUu>F3ptr5NJoX`LG_;
      -

      2This is the normal situation when the pattern of the sparse matrix is symmetric, which is +

      3The subroutine style psb_precinit and psb_precbl are still supported for backward equivalent to say that the interaction between two variables is reciprocal. If the matrix pattern is - non-symmetric we may have one-way interactions, and these could cause a situation in which a - boundary point is not a halo point for its neighbour.

      +class="cmr-8">compatibility diff --git a/docs/html/userhtml7x.png b/docs/html/userhtml7x.png index a0d4d0d4f3b6d7ee9febce6cc9589c6ac95f3481..5db77543c69d274ad7afc3ec02925852d8ebc413 100644 GIT binary patch delta 1828 zcmV+<2iy4F5XTOXIDZEDNklWN^k2_{ae42z+D!&4nE=#YK$qhrUK)j1N1Wvp}BIi(tn$!;tO>N!fR;wwGGMh0QFw zHZikTyJGtDIkCE|(TtjEn!Xn zGPBzZD#plth6wnF+DOpm!9jl?{HBE7{ngBVGP5^58-LX+vt&8mmhX=M)@D^mhGaBo zn>_@U47nm7u1{i&SH*tgE(2)oPl`GKJ-jGo5JI>xGdvxA(GQ#X&nm~$kx_ZqV*!sK zos#_mcNv#>KnTKE8NX+0I(1iou|4(sVotYfuJmCwE-1bDSXYRPvL5K3jRZnY^;CwC zsdvlE^?weFq^!rgWgQf7?se2gzV~vj+MseJm3Ub`E59=~uUnS)eJ|(NS8wh!&8wa( z^FU^}``k7>cUgP6=EUMoW!lhD@65~miV-lgFD_>h|3L0u*ROh~))kvZHkRHe?J@@z zD9`94kG)TKw1L6Y+|rmObJ~GBU%a0&S0C40E`RuZZ^#mP97Wm*a=L}2oCznWF87a$ z)eR7ww?REtGwmb`P^`*aM=7p<)7u5~?xl)wf8{$hXx}$IgHYQD-{*H7U`+Q)3#>UX z&U(IhB<5Hn&q*?*-ce>s7MnKTOU4+fUtI1J00Fc06_^A4!U;l<*Z!bOcLkDXC_EU( z5`Vcs_LoJyUaZN56uO{zxh@8hiQRC0p3*xL=>e|!AZ!uHrD7TWlpC!#o%EF!LWPeA zMT%p=w20s!w8H8PLaq}R-`Cz+_*gaM^Mp7hYT_gFOlZj%(j%*`+&3W%R{ZaoBnKS8 z`4|B*O~yb5tWF4Y>CTZ1lw|(W+pa>_F@IQ%3pcfD0iKC`nV|h}bm>%M8IWI_RU*tI29| zh(>79r@Ky6-k5&*V4&vWo>^M##P-jSf7!=atjUEky~rvYvr8t{9E2>BZ)k9-e1F(n zmcTM}0n3)$9Lg^yU89o(4hYIgoyNd|8+>O&un@0G&l5uE98Tf+_)dtBJ;F>J89cmw zLuO6g$qVtU+n`4QzwZcRH@_TEPtmWd*L9QWL|tO6xWW1boWLw2au``z3Ro&sk=sG zfS1XOqA~tnti^>y^kNkbS#%U|5aVhfxF&a-xUC%L3%pa4o%&tzHRT*56Xy~7bh%Ob zYNjY#kX2$K62@E?P-<4}03dKCaRC;2g5*ocbJV7ONAiJrX(1zHNPASb2Y(niKF$M3 zp2wuHuU>e4x@$!S$rx!7W{xqp5JDXIJP&x=QeHxqYQ3yc%4PjC$(%+TPDCfD? zX{E+^meXkdzF%c|sdLc2Kj*3xR!(7-&-%WT9_xEP=b08Q-hzRVj$^SyIOK)vrW}vS zoFH;pm+xsF+S5><_tX5<4u2sdWxPw9+|y=G|4JW}@r*hLX+-9wesj6n_P$aIQSYKd zKDn;TMdNDfPIEm-8>`R632#GVK|YMNmwQXN;PcIcF9-0gzE91mmgCZb>ddj1b?fga zsxJFp?p^Uq=etCAb>A$gx@^jQ9~XSSWsJTBUh6b(4u~7ET9XlD9Dkk5-O-69x?3BA zT-ciE#FX_$XAp{8A#9HKc6@Wp(p?Sda&{JMEEhHAOu{)iE$fcn2rdEwN-23dbp zNE^db#{%8eU=OzBq7J==b?M=3#S-z9R#Vw(mKv3FS`!c5Dj{TGFYl4#_yQ z;dkNnLEDZQ*e&|$0E_-n=~hl2iKFug>5gUCWAqb%;&V+kZJqa>4A~ZWp!rQ7Lky zSzeSRIogQsogU}n+oiuiZx8H?W3M+?hM_5T!tbkHsoklaXILje73M;)clJb=#b;9Z zO_Swv)QuWd+ynFR-#^l1Ri7v(2M!Qx#N0i!;21nvN z@v|E&FoOrWVt=P`Dk)Hl-A~lue+TEn&fqDhVSKHm{j*J!#$Ppzvp`PNrgUV(R0}%e zfv(t5g!t~7>|1I~F3eWZwc_~z9h{^x8r~zx>3vp@)hH9Y1y$mK!q{M-afN1!TG+@3j z*748C`?qk<)GneSp2Xh2>V?X2X^6rnJd=WS#)o`2m~W_L;P4O66WQ@qboRaRR5<*$ z_I)Cn*BY_~4Bd_B-a`+>adx^;HT8UjkA&wai2Rk>XW^Q5Lk-tKyy#8w0iL3laP+~W z1NtKuXn%~|&*AVdx}92Ivdr!$l2gkSqscwRhabl0f+j`&aeR`vIO}EX^C5B0yEXL* z0(83$2M>$H)4}U$IBFajjoy(_^$pKcz()>GoO9*J{JX}G8E8Ge?HzixU|$>E&@sur zQ%h~d4(w&S_ddeNnl8|&k=ULN-Vmb%e7yFkX!_B~To|9iHGfVKFXJSAE)k4}W0C1$xa?E%a5(q%ZSu0b z1CZAVkJ(A$ct{hUSda)I8KMsk)wkGqUVA4-T){%)j~6yx^5l4GIzo?6LopDZMVjUT zvGMfk)*uw9==1j`TTg#WL7saCF^n;Qqb>rz%!ecvi=vShSp~3G@H9v5Xc}eMyfu-cr5G~iKfWn0>5g9G>*$Qm zg>5$h2HJVZp=+A@mq3nm#JGGH^?eMs`CXq8m*EepW|)e`?=+o8tUxr#yb#O^{3e*+{Hi^c;u4~?-C zQMypD!|UHyze#do8|`JJt-`K~ek$@>#fT2V*%lbKElW_u1MsBD4n6bc06ae<-%375 zpN}~r;h5`7!`K=cc|x)fdw)55O;9_6pjLrxCk+cdx7hji@Bod`RkCFVehx)NwypF| zvU60xf2BZT(ku^1a+?bry9;sm&=xylo#qj`A&YscG4=V5yH`u|1#$yy8J`Ps5CTc2 z+LadgToWCJ)2J*YOnNg3F4KU;$2Ggx_?vkbBdRMH{P*(+Vov~)tzuU_Ph6Gdo9t2+ABGOW0%e;S!=%$ zq%>`P@i*pz*&B{7>dXb7v~96chXChBDo<~a)51q?Z^nhedtglLyqqcCJ9A;~MCaU& zlD&Yl9Slm?>b%!Wz(PEQ2L{D1ojAsF@GV$rF3g(fN>n~lxPSU<`xr@mLm&Eoa|4eR zCm$TE@gEh~h95kPh+S$M-(cL#4TYjym^IO{ELLUyH%d`}NeHo}e}XX7Ne_lke&9zR zKF-psy{UDidj6O~Xy4f7=8SgdhB|SDxe)B#(nOsC7lO@uKtbcy=0C+_%nvJ@SM-t2 zFndLRq<-6k+JEe5%Qwd+JS&+y84Bx9 z_jK3>$*JhqN^-7S`Q?#w^uF2x6L?@bqfg+(u_EI6oL>~Vec+Fw24A<0)#Z2PUn=^; zh^(E@Ii_s-Z*d-m#lKjKCDDJe!yBdx=kr*}Ij28|PBoRDtJDQ1@W7Jj6F6}K5qx;s me^m?5cvfHjRAB<=BI18VNN~qavz+Y!0000$yc$cSPh!M6SJ^jBU@%PO}_PRf6y*5&0E;X7(lZy%&);B64PC7a`->*-nxrJ7nIs zW_A$$F264#AITI|^@E7KCX=Mk%vk4R!CVW4s&0wMJ72^$`%qQhY2Wt!-p##8$`&tO z7!*U2dNUr1`+qGW-$N$xFK7GgDh7nrd^DZJMF;LuN$C+R09=egM6%h_w4OjYsbdhQ zz8ZTGpf>$nO}y*1DkTq|>|53UD1^&}y(lxgMij==v{l$rRY-tAF3ju|{i=G4^4-W7 z52F^iO9wF~g*UR{V(ZP)ex8l#nQQ z42rw0<9{S4j)^*o!|b$q5Ey$D))V8yO$K0eoTLssDT#p23FjdR3iN&7 z*9K^B5y#in=OIXj3i>;z}xTSJ1~`*!SaJszL9VesXI<#fF=8^{|>Odl2p+aAkytg#q&6KGOk$ zn3vUpn#`TdmvDi;E15y8Im2NI?ybP%4xn7%lr*J@%hi)`!fK;p2$GrtzpMr%PK~is z{C}*yLEmZSyTa-!`5J_?93tX=7X=8)6MM#(&E2nuhmu3Mej-UKp>HfAhQi=uHy$8R zV-VU&D6+Bmzmp7v!6`!;*Wo}elt|2E12B;1I_glbzpi>*3&zXymeZ!HJj8C2uaZ!V z0kyQMQW}+7c>#d1WU$4kUVozVAQ?K0+5=<}hz;^aL_!oPvu`ThVxDTN2}cO(P=0000?;2+5>vyeah2?F^_pQwTa-J=9GA!lWe4WrvVt>0lLe>dgo?LWd$vUB( zgmHONV?BXRaBbUB_5omJp^I2Cf)RlFS&@ojou$lQ}%e5>-w!%6nFSf6V; zaB8ZTVCF|70iz|H(?++gKv4uFWF+2qEC~0Y@uPZpNghcx5RT*kA(IbpXaiq53;qTg zf>R$rf+-xMiC(w~d=RVOQ#;G%3{9VS3BZdWn;zE zD}Vb%5I+UMQO41=Fp%w=YS-=qc2JnNg+bs9L^o7BRpZfA4ShMx@kf<#ycrn2lh!hPDP=T~N(c3qx#uz{Lplg@_e6E)}h`XDHZXRFQSp}lK5+9#Np|^^! zLn4yH3)Yes(=c zZgC{r-CAQ`+@UqJYnnHVfFH9{e>Ak2uuf=OgzDGrL6ea)k59r3-Py2F-)}-Gg$J-A RZ_WS!002ovPDHLkV1hSAG)e#f diff --git a/docs/html/userhtml9x.png b/docs/html/userhtml9x.png index dcf8438f3889affb89f4070915c4a26a9ca4782d..72555397876e8e580ed18ed351158f61082df235 100644 GIT binary patch delta 1725 zcmV;u215Dd58(}vIDZC{NklE|#A_EyR5E+2U05Y)45x5`& z;|Pq%05UL;0m{Ib494}1yT`qKySm-Gm2UTRf7R9LcIVme?ipz*MNZfss(K+J4@xQT z7)@1Qh{(rM%IEz@-wsuMDk9%XDId1C?m|_67Lj+Ql&kKW|9^4l{Y}9V*Sci>+?zt& z6RyPr5&1r}$iHt4CiG8sF#w>H@>)cGS5?+LguI{pdr8P^%fD|GOXttY2pk7ql~P{r zxfS~VvF8}8gsOhBXy~S}X#Sjxz{;VjukhmgZo^Xn^I8W7``Qwj%n8z<$!XLWtbsrG z$86+;MdL*&<$t@GR){hA<)##3BF-$m`ZlrOWRy~#P3|-vi|5asiv-Lv9|f$~G@A#r zn!?2`df=nRTKLn61XT50q~?ZSj5}1iFG?wIGPg`neif0w{hbw<{M}C?@}r1c#)6fF zo9Ce|Rdwclj79eMHs%vN6BTyL*?yn`&$~TxVCFjrjDNn*MdZx(N=5&%(QunOp~dI3l!T zLWBij z!*^YmjepDFbuMo^=9{_V_+$EvZNs=CHT0`}Y|WiD@{I9}&TTEW$3_!t!x3ZShdrYY z2E>2!zB5O72mdM`tS_{MM+mp`mQc0zv39g+{rvry{dwCtzD*X|UZ;#XX05)*0U34q z-Ag8oe~af&O$W?lAS5qK`Oh=>w<&E0=B-i6>VHLoRlTxzC_2ECXHSEdsqd}rpVk)d zbmiyl2D~3a`^M)As1dS>^m0stS!pe_+daPdm5FL<#NkOiVPRb~B)M30O%n$9eftKg5P19%t|u;rJg*eF_i zmVa=jsu&KG_=6XxRP`$gYyDAp3_*nnH}EmJOha1_|Fh8YoG`f=5;Is}^d%fiB{)ZK zYdHikK(>D9+LWZHFy%*e=YcjD#Fz>I#%Z)k=+MoKX$|qskPH>WNbj5wL*04#En7q!X->)M6K~ue^>)2y}!7tbc>90Vv^Xubb?GNFOsi zcz2JwZUnX`uiuO^0v0jA=M2{SURC>y9J1ED_f!PALw-9l8s2qeHDGU<1abX`=6}!q z;8{I{ci=VMV{gC;b6#j7lHfxj(9zew?-}5EM=65G%xmCMOEp+T8-Z!PKzUV_y|KQ% zgl8rU(IA6-bPVt|+puAHpl=k!gq;;^DuxQY7G+WtzR9&*Crfk5jS3-z4IRvf-HGjmSw4rPg7-b75uqpo)j}Ng5rjH>{63FKzZy!X;?7o>M}NV9ks2HX zwpZwx_nJX%g#9-d&8S+(h>5?WQE(uve0)v}KrSoVR*(RprCn6d)&^Wq)(uK~)K}`G=W9U}&!$KpPa*l$G^DV+1yPZ=$D;u`pm{ zsQgz$VD|$U&kZ2_8uIoi$2KFidRUS6d61b ziH(^BG!nYP6uEDnSYK%9siaRuOKol&LwoH*GQ7PoqqeahQ}B+#7=JUKMOU%7d(;?` zr6E3*GEFAe!k;^LZMg6DEY4+1GjO-ZJL6y}HPT@Jut*& TPyuhy00000NkvXXu0mjfpte*~ delta 1986 zcmV;z2R-=V4df4yIDZG0NklY8Dm?PWLc8sk3F^?9v)t7QhyC>ZEfv~ z%S`&bhMU?SRs2-@miDTX#yij$bB%Y?b}Z2zi{@IicZ+D(J%7JNa+Tm?ly%JZ*)Az@ z1M8%LnW$bSlzBVp8n$Xq|GrBAKoZfNtGEy!WL}i=`Q+_IbnAfUvgPe3m1*qu*#v0mD!(&ublTxLtXS8&aVQtYxQN< zn_hD4_GZ$Y0e{z^*4L;e?5AL#O%P~e$#LU+)B?U7NJN6k$w0jNvN?OIKp_1cDzED8 zX7u*?>w+i#&Mz#%5Rjf%(()Z`gld(6O6|waDPKv)pvp&CT+UkE1w8{D>!$y zSIWUb1?zeTX1ui|%>n%$Deu}`S;X&@(P8a%Uich|*_p8ib4SPNP7SXgz)Q*K8H4m1 zoe4%8zOR+h?ObJ=*b&iD_{x!XJg2(!u+&F2-@ZOM4g1Ima4hVLbKq=z=Aw;~{!{+$ zIB6;PFMn_xraDJS0)~ngJcLlPa4Yn*l`j%H$7Sp*<%6UTdd8tl$AWnn%W<2OqT&7= z`8T0yiUVI|j()CBWUk1l_A&iR%r(lQI&+LVF&sWIMtc|x8Ul!qL4Re8q!0TA-5M>r z#YO}ow{4U+p#vw-4GDds{knQiMj!hq>A1I z54xmZraQMJ9#bKbM-Ee4Y=Yg$;NY{{K+8hrg_U67P6_=Sm`YD5oM01hycDbgcVv|~RPOj;Jb%%~ z=$+u#s*;NUM-V_pj@?}n^UCFD#Ygn6N9KxNyj|jh1D*>fyqlB|mcc|D^`m(cILW@l zEhF#LZ*%hZn?^xEvJniibDog*yZYUq(tdm`&M)m~QWvfB#D6P3n)I#ACy_Cgf8F-k zF#(XvkMPtO=#ti{WI2@_n zpaR#E`cH(%+>o!A^VGR_f-KTO&W!tHlZ1|zrCQ)Yzo{c;3_35cjL*`h`B4o|@^Q-K z;c}I-JmyP7`Q+&Bvn@s6n+0-Z<2ZhIX90yI|l1TpJ4}VqsTjZ(U z2#=V5U_|LC7mllb@>j$AoZv64iU)y&<1dc2?o^skhEY5p^1Kc+&1ZfZNO;a!=& zz{A;pQb`i9QsO9I9KVD-Rha1pU-D~N?n@;*0c+!vld#XW76=As7mi0VDTw1UiK88n zjC-Od@LX{YBWmV>00o54T7Mvk4@WN&kB2cpsP0ak@A#!16FQ85TqPrg0ZM-?m=M5x zZY8vp!fdYSGoNcd?x_IGKPJ&KcV4a26BBjUU&K`f-H_2c{RDs*ECeH{Ge#+4-#mDi z@TLw!FI-LW#{<=UV{x4^zuZo3eR2}^*(%RP#>2r^^y0EwK9lkhJ^%Q4%> zZcIB6;-a}$*h|iH$%#E!tjLRwaaeShCQf9PU}2p&FE9Tg5I|@AJOE-ZY*I=rj@&m`=#{cJ4Sa3UC{A?j4nzS(XzguDcfT2L8+aeoIMr?ORkI_$t^?IfeO z&tDe=*6tF+3&4q?vg>ylLV<*%Zn$!Fp@&91xF!-rj+FTy1gi6RrazQrJe6mKxAv;r9=G2IZdLCm8V9TRa{&70DLvb6LDir~N426zsDJ0*{)o zRcHRtM*F@MhdYkQax@-G`FzO#5iEVL$%hdg4~F?qlykJT%`X?EL3>oPj@>^00|Z^g U*bv0@&;S4c07*qoM6N<$f+h;l - -

      10 Preconditioner routines

      + id="x15-13500010">Preconditioner routines

      The base PSBLAS library contains the implementation of two simple preconditioning techniques:

        -
      • Diagonal Scaling +
      • +

        Diagonal Scaling

      • -
      • Block Jacobi with ILU(0) factorization
      +
    1. +

      Block Jacobi with ILU(0) factorization

    2. The supporting data type and subroutine interfaces are defined in the module psb_prec_mod. The old interfaces -  10.1 init — Initialize a preconditioner -
       10.2 build — Builds a preconditioner -
       10.3 apply — Preconditioner application routine -
       10.4 descr — Prints a description of current preconditioner -
       10.5 clone — clone current preconditioner -
       10.6 free — Free a preconditioner - +

      10.1 init — Initialize a preconditioner

      + + + +
      +call prec%init(icontxt,ptype, info)
      +
      +

      +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +icontxt

      +

      the communication context.
      Scope:global.
      Type:required.
      Intent: in.
      Specified as: an integer value. +

      +

      +ptype

      +

      the type of preconditioner. Scope: global
      Type: required
      Intent: in.
      Specified as: a character string, see usage notes. +

      +

      +On Exit

      +

      +

      +

      +prec

      +

      Scope: local
      Type: required
      Intent: inout.
      Specified as: a preconditioner data structure psb_prec_type. + + + +

      +

      +info

      +

      Scope: global
      Type: required
      Intent: out.
      Error code: if no error, 0 is returned.

      +

      Notes Legal inputs to this subroutine are interpreted depending on the ptype string as +follows4 : +

      +

      +NONE

      +

      No preconditioning, i.e. the preconditioner is just a copy operator. +

      +

      +DIAG

      +

      Diagonal scaling; each entry of the input vector is multiplied by the + reciprocal of the sum of the absolute values of the coefficients in the + corresponding row of matrix A; +

      +

      +BJAC

      +

      Precondition by a factorization of the block-diagonal of matrix A, where + block boundaries are determined by the data allocation boundaries + for each process; requires no communication. Only the incomplete + factorization ILU(0) is currently implemented.

      + + + +

      10.2 build — Builds a preconditioner

      + + + +
      +call prec%build(a, desc_a, info[,amold,vmold,imold])
      +
      +

      +

      +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +a

      +

      the system sparse matrix. Scope: local
      Type: required
      Intent: in, target.
      Specified as: a sparse matrix data structure psb_Tspmat_type. +

      +

      +prec

      +

      the preconditioner.
      Scope: local
      Type: required
      Intent: inout.
      Specified as: an already initialized precondtioner data structure + psb_prec_type
      +

      +

      +desc_a

      +

      the problem communication descriptor. Scope: local
      Type: required
      Intent: in, target.
      Specified as: a communication descriptor data structure psb_desc_type. +

      +

      +amold

      + + + +

      The desired dynamic type for the internal matrix storage.
      Scope: local.
      Type: optional.
      Intent: in.
      Specified as: an object of a class derived from psb_T_base_sparse_mat. +

      +

      +vmold

      +

      The desired dynamic type for the internal vector storage.
      Scope: local.
      Type: optional.
      Intent: in.
      Specified as: an object of a class derived from psb_T_base_vect_type. +

      +

      +imold

      +

      The desired dynamic type for the internal integer vector storage.
      Scope: local.
      Type: optional.
      Intent: in.
      Specified as: an object of a class derived from (integer) + psb_T_base_vect_type.

      +

      +

      +

      +On Return

      +

      +

      +

      +prec

      +

      the preconditioner.
      Scope: local
      Type: required
      Intent: inout.
      Specified as: a precondtioner data structure psb_prec_type
      +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      + + + +

      The amold, vmold and imold arguments may be employed to interface with special +devices, such as GPUs and other accelerators. + + + +

      10.3 apply — Preconditioner application routine

      + + + +
      +call prec%apply(x,y,desc_a,info,trans,work)
      +call prec%apply(x,desc_a,info,trans)
      +
      +

      +

      +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +prec

      +

      the preconditioner. Scope: local
      Type: required
      Intent: in.
      Specified as: a preconditioner data structure psb_prec_type. +

      +

      +x

      +

      the source vector. Scope: local
      Type: required
      Intent: inout.
      Specified as: a rank one array or an object of type psb_T_vect_type. +

      +

      +desc_a

      +

      the problem communication descriptor. Scope: local
      Type: required
      Intent: in.
      Specified as: a communication data structure psb_desc_type. +

      +

      +trans

      +

      Scope:
      Type: optional
      Intent: in.
      Specified as: a character. + + + +

      +

      +work

      +

      an optional work space Scope: local
      Type: optional
      Intent: inout.
      Specified as: a double precision array.

      +

      +

      +

      +On Return

      +

      +

      +

      +y

      +

      the destination vector. Scope: local
      Type: required
      Intent: inout.
      Specified as: a rank one array or an object of type psb_T_vect_type. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      + + + +

      10.4 descr — Prints a description of current preconditioner

      + + + +
      +call prec%descr(info)
      +call prec%descr(info,iout, root)
      +
      +

      +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +prec

      +

      the preconditioner. Scope: local
      Type: required
      Intent: in.
      Specified as: a preconditioner data structure psb_prec_type. +

      +

      +iout

      +

      output unit. Scope: local
      Type: optional
      Intent: in.
      Specified as: an integer number. Default: default output unit. +

      +

      +root

      +

      Process from which to print Scope: local
      Type: optional
      Intent: in.
      Specified as: an integer number between 0 and np - 1, in which case + the specified process will print the description, or -1, in which case all + processes will print. Default: 0. +

      +

      +On Return

      +

      + + + +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      + + + +

      10.5 clone — clone current preconditioner

      + + +
      +call  prec%clone(precout,info)
      +
      +

      +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +prec

      +

      the preconditioner.
      Scope: local.

      +

      +

      +

      +On Return

      +

      +

      +

      +precout

      +

      A copy of the input object. +

      +

      +info

      +

      Return code.

      + + +

      10.6 free — Free a preconditioner

      + + +
      +call prec%free(info)
      +
      +

      +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +prec

      +

      the preconditioner.
      Scope: local.
      Type: required
      Intent: inout.
      Specified as: a preconditioner data structure psb_prec_type. +

      +

      +On Exit

      +

      +

      +

      +prec

      +

      Scope: local
      Type: required
      Intent: inout.
      Specified as: a preconditioner data structure psb_prec_type. +

      +

      +info

      +

      Scope: global
      Type: required
      Intent: out.
      Error code: if no error, 0 is returned.

      +

      Notes Releases all internal storage. + + + + + + +

      +href="userhtml.html#userhtmlse13.html" >up]

      + id="tailuserhtmlse10.html"> diff --git a/docs/html/userhtmlse11.html b/docs/html/userhtmlse11.html index a9cc2529..41a4cd65 100644 --- a/docs/html/userhtmlse11.html +++ b/docs/html/userhtmlse11.html @@ -13,10 +13,10 @@

      11 Iterative Methods

      + id="x17-14200011">Iterative Methods

      In this chapter we provide routines for preconditioners and iterative methods. The interfaces for Krylov subspace methods are available in the module psb_krylov_mod. -

      +

      11.1 psb_krylov — Krylov Methods Driver Routine

      +

      This subroutine is a driver that provides a general interface for all the Krylov-Subspace +family methods implemented in PSBLAS version 2. +

      The stopping criterion can take the following values: +

      +

      +1

      +

      normwise backward error in the infinity norm; the iteration is stopped + when +

      +      -----∥ri∥------
+err = (∥A∥∥xi∥+ ∥b∥) < eps
+
      +

      +

      +

      +2

      +

      Relative residual in the 2-norm; the iteration is stopped when +

      +      ∥ri∥-
+err = ∥b∥2 < eps
+
      +

      +

      +

      +3

      +

      Relative residual reduction in the 2-norm; the iteration is stopped when +

      +      ∥ri∥
+err = ∥r0∥2 < eps
+
      +

      +

      The behaviour is controlled by the istop argument (see later). In the above formulae, xi +is the tentative solution and ri = b - Axi the corresponding residual at the i-th +iteration. + -

      4 Computational routines

      + id="x9-550004">Computational routines -
      -  4.1 psb_geaxpby — General Dense Matrix Sum -
       4.2 psb_gedot — Dot Product -
       4.3 psb_gedots — Generalized Dot Product -
       4.4 psb_normi — Infinity-Norm of Vector -
       4.5 psb_geamaxs — Generalized Infinity Norm -
       4.6 psb_norm1 — 1-Norm of Vector -
       4.7 psb_geasums — Generalized 1-Norm of Vector -
       4.8 psb_norm2 — 2-Norm of Vector -
       4.9 psb_genrm2s — Generalized 2-Norm of Vector -
       4.10 psb_norm1 — 1-Norm of Sparse Matrix -
       4.11 psb_normi — Infinity Norm of Sparse Matrix -
       4.12 psb_spmm — Sparse Matrix by Dense Matrix Product -
       4.13 psb_spsm — Triangular System Solve -
       4.14 psb_gemlt — Entrywise Product -
       4.15 psb_gediv — Entrywise Division -
       4.16 psb_geinv — Entrywise Inversion +

      4.1 psb_geaxpby — General Dense Matrix Sum

      +

      This subroutine is an interface to the computational kernel for dense matrix +sum: +

      +y ← α x+ βy
+
      +

      +

      call psb_geaxpby(alpha, x, beta, y, desc_a, info) +

      + + + +


      + + + +
      +

      +

      + + + + +


      x, y, α, β Subroutine


      Short Precision Real psb_geaxpby
      Long Precision Real psb_geaxpby
      Short Precision Complexpsb_geaxpby
      Long Precision Complex psb_geaxpby


      +
      Table 1: Data types
      + + + +

      +
      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +alpha

      +

      the scalar α.
      Scope: global
      Type: required
      Intent: in.
      Specified as: a number of the data type indicated in Table 1. +

      +

      +x

      +

      the local portion of global dense matrix x.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 1. The + rank of x must be the same of y. +

      +

      +beta

      +

      the scalar β.
      Scope: global
      Type: required
      Intent: in.
      Specified as: a number of the data type indicated in Table 1. +

      +

      +y

      +

      the local portion of the global dense matrix y.
      Scope: local
      Type: required
      Intent: inout.
      Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of the type indicated in Table 1. + The rank of y must be the same of x. +

      +

      +desc_a

      +

      contains data structures for communications.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an object of type psb_desc_type. +

      +
      +

      +On Return

      +

      +

      +

      +y

      +

      the local portion of result submatrix y.
      Scope: local
      Type: required
      Intent: inout.
      Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of the type indicated in Table 1. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      + + +

      4.2 psb_gedot — Dot Product

      +

      This function computes dot product between two vectors x and y.
      If x and y are real vectors it computes dot-product as: +

      +dot ← xTy
+
      +

      Else if x and y are complex vectors then it computes dot-product as: +

      +dot ← xHy
+
      +

      +

      psb_gedot(x, y, desc_a, info [,global])

      + + +


      + + +
      +

      +

      + + + + +


      dot, x, y Function


      Short Precision Real psb_gedot
      Long Precision Real psb_gedot
      Short Precision Complexpsb_gedot
      Long Precision Complex psb_gedot


      +
      Table 2: Data types
      + + +

      +
      +
      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +x

      +

      the local portion of global dense matrix x.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 2. The + rank of x must be the same of y. +

      +

      +y

      +

      the local portion of global dense matrix y.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 2. The + rank of y must be the same of x. +

      +

      +desc_a

      +

      contains data structures for communications.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an object of type psb_desc_type. +

      +

      +global

      + + +

      Specifies whether the computation should include the global reduction + across all processes.
      Scope: global
      Type: optional.
      Intent: in.
      Specified as: a logical scalar. Default: global=.true.
      +

      +

      +On Return

      +

      +

      +

      +Function value

      +

      is the dot product of vectors x and y.
      Scope: global unless the optional variable global=.false. has been + specified
      Specified as: a number of the data type indicated in Table 2. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      +

      Notes +

        +
      1. +

        The computation of a global result requires a global communication, which + entails a significant overhead. It may be necessary and/or advisable to + compute multiple dot products at the same time; in this case, it is + possible to improve the runtime efficiency by using the following scheme: + +

           vres(1) = psb_gedot(x1,y1,desc_a,info,global=.false.) 
        +   vres(2) = psb_gedot(x2,y2,desc_a,info,global=.false.) 
        +   vres(3) = psb_gedot(x3,y3,desc_a,info,global=.false.) 
        +   call psb_sum(ctxt,vres(1:3))
        + +

        In this way the global communication, which for small sizes is a latency-bound + operation, is invoked only once.

      + + +

      4.3 psb_gedots — Generalized Dot Product

      +

      This subroutine computes a series of dot products among the columns of two dense +matrices x and y: +

      +res(i) ← x(:,i)Ty(:,i)
+
      +

      If the matrices are complex, then the usual convention applies, i.e. the conjugate +transpose of x is used. If x and y are of rank one, then res is a scalar, else it is a rank +one array. +

      call psb_gedots(res, x, y, desc_a, info) +

      + + +


      + + +
      +

      +

      + + + + +


      res, x, y Subroutine


      Short Precision Real psb_gedots
      Long Precision Real psb_gedots
      Short Precision Complexpsb_gedots
      Long Precision Complex psb_gedots


      +
      Table 3: Data types
      + + +

      +
      +
      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +x

      +

      the local portion of global dense matrix x.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 3. The + rank of x must be the same of y. +

      +

      +y

      +

      the local portion of global dense matrix y.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 3. The + rank of y must be the same of x. +

      +

      +desc_a

      +

      contains data structures for communications.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an object of type psb_desc_type. +

      +

      +On Return

      +

      + + +

      +

      +res

      +

      is the dot product of vectors x and y.
      Scope: global
      Intent: out.
      Specified as: a number or a rank-one array of the data type indicated in + Table 2. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      + + +

      4.4 psb_normi — Infinity-Norm of Vector

      +

      This function computes the infinity-norm of a vector x.
      If x is a real vector it computes infinity norm as: +

      +amax  ← max |xi|
+          i
+
      +

      else if x is a complex vector then it computes the infinity-norm as: +

      +amax ← maxi (|re(xi)|+ |im(xi)|)
+
      +

      +

      psb_geamax(x, desc_a, info [,global])
      psb_normi(x, desc_a, info [,global]) +

      + + +


      + + +
      +

      +

      + + + +



      amax x Function



      Short Precision RealShort Precision Real psb_geamax
      Long Precision Real Long Precision Real psb_geamax
      Short Precision RealShort Precision Complexpsb_geamax
      Long Precision Real Long Precision Complex psb_geamax



      +
      Table 4: Data types
      + + +

      +
      +
      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +x

      +

      the local portion of global dense matrix x.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 4. +

      +

      +desc_a

      +

      contains data structures for communications.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an object of type psb_desc_type. +

      +

      +global

      +

      Specifies whether the computation should include the global reduction + across all processes.
      Scope: global
      Type: optional.
      Intent: in.
      Specified as: a logical scalar. Default: global=.true.
      +

      +

      +On Return

      +

      + + + +

      +

      +Function value

      +

      is the infinity norm of vector x.
      Scope: global unless the optional variable global=.false. has been + specified
      Specified as: a long precision real number. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      +

      Notes +

        +
      1. +

        The computation of a global result requires a global communication, which + entails a significant overhead. It may be necessary and/or advisable to compute + multiple norms at the same time; in this case, it is possible to improve the + runtime efficiency by using the following scheme: +

           vres(1) = psb_geamax(x1,desc_a,info,global=.false.) 
        +   vres(2) = psb_geamax(x2,desc_a,info,global=.false.) 
        +   vres(3) = psb_geamax(x3,desc_a,info,global=.false.) 
        +   call psb_amx(ctxt,vres(1:3))
        + +

        In this way the global communication, which for small sizes is a latency-bound + operation, is invoked only once.

      + + + +

      4.5 psb_geamaxs — Generalized Infinity Norm

      +

      This subroutine computes a series of infinity norms on the columns of a dense matrix +x: +

      +res(i) ← max|x(k,i)|
+         k
+
      +

      +

      call psb_geamaxs(res, x, desc_a, info) +

      + + + +


      + + + +
      +

      +

      + + + +



      res x Subroutine



      Short Precision RealShort Precision Real psb_geamaxs
      Long Precision Real Long Precision Real psb_geamaxs
      Short Precision RealShort Precision Complexpsb_geamaxs
      Long Precision Real Long Precision Complex psb_geamaxs



      +
      Table 5: Data types
      + + + +

      +
      +
      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +x

      +

      the local portion of global dense matrix x.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 5. +

      +

      +desc_a

      +

      contains data structures for communications.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an object of type psb_desc_type. +

      +

      +On Return

      +

      +

      +

      +res

      +

      is the infinity norm of the columns of x.
      Scope: global
      Intent: out.
      Specified as: a number or a rank-one array of long precision real numbers. +

      +

      +info

      + + + +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      + + + +

      4.6 psb_norm1 — 1-Norm of Vector

      +

      This function computes the 1-norm of a vector x.
      If x is a real vector it computes 1-norm as: +

      +asum ←  ∥xi∥
+
      +

      else if x is a complex vector then it computes 1-norm as: +

      +asum ←  ∥re(x)∥1 + ∥im (x)∥1
+
      +

      +

      psb_geasum(x, desc_a, info [,global]) psb_norm1(x, desc_a, info [,global]) +

      + + + +


      + + + +
      +

      +

      + + + +



      asum x Function



      Short Precision RealShort Precision Real psb_geasum
      Long Precision Real Long Precision Real psb_geasum
      Short Precision RealShort Precision Complexpsb_geasum
      Long Precision Real Long Precision Complex psb_geasum



      +
      Table 6: Data types
      + + + +

      +
      +
      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +x

      +

      the local portion of global dense matrix x.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 6. +

      +

      +desc_a

      +

      contains data structures for communications.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an object of type psb_desc_type. +

      +

      +global

      +

      Specifies whether the computation should include the global reduction + across all processes.
      Scope: global
      Type: optional.
      Intent: in.
      Specified as: a logical scalar. Default: global=.true.
      +

      +

      +On Return

      +

      + + + +

      +

      +Function value

      +

      is the 1-norm of vector x.
      Scope: global unless the optional variable global=.false. has been + specified
      Specified as: a long precision real number. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      +

      Notes +

        +
      1. +

        The computation of a global result requires a global communication, which + entails a significant overhead. It may be necessary and/or advisable to compute + multiple norms at the same time; in this case, it is possible to improve the + runtime efficiency by using the following scheme: +

           vres(1) = psb_geasum(x1,desc_a,info,global=.false.) 
        +   vres(2) = psb_geasum(x2,desc_a,info,global=.false.) 
        +   vres(3) = psb_geasum(x3,desc_a,info,global=.false.) 
        +   call psb_sum(ctxt,vres(1:3))
        + +

        In this way the global communication, which for small sizes is a latency-bound + operation, is invoked only once.

      + + + +

      4.7 psb_geasums — Generalized 1-Norm of Vector

      +

      This subroutine computes a series of 1-norms on the columns of a dense matrix +x: +

      +res(i) ← max|x(k,i)|
+         k
+
      +

      This function computes the 1-norm of a vector x.
      If x is a real vector it computes 1-norm as: +

      +res(i) ← ∥xi∥
+
      +

      else if x is a complex vector then it computes 1-norm as: +

      +res(i) ← ∥re(x )∥1 +∥im (x )∥1
+
      +

      +

      call psb_geasums(res, x, desc_a, info) +

      + + + +


      + + + +
      +

      +

      + + + +



      res x Subroutine



      Short Precision RealShort Precision Real psb_geasums
      Long Precision Real Long Precision Real psb_geasums
      Short Precision RealShort Precision Complexpsb_geasums
      Long Precision Real Long Precision Complex psb_geasums



      +
      Table 7: Data types
      + + + +

      +
      +
      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +x

      +

      the local portion of global dense matrix x.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 7. +

      +

      +desc_a

      +

      contains data structures for communications.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an object of type psb_desc_type. +

      +

      +On Return

      +

      +

      +

      +res

      +

      contains the 1-norm of (the columns of) x.
      Scope: global
      Intent: out.
      Short as: a long precision real number. Specified as: a long precision real + number. + + + +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      + + + +

      4.8 psb_norm2 — 2-Norm of Vector

      +

      This function computes the 2-norm of a vector x.
      If x is a real vector it computes 2-norm as: +

      +nrm2  ← √xT-x-
+
      +

      else if x is a complex vector then it computes 2-norm as: +

      +        √----
+nrm2 ←   xHx
+
      +

      +

      + + + +


      + + + +
      +

      +

      + + + +



      nrm2 x Function



      Short Precision RealShort Precision Real psb_genrm2
      Long Precision Real Long Precision Real psb_genrm2
      Short Precision RealShort Precision Complexpsb_genrm2
      Long Precision Real Long Precision Complex psb_genrm2



      +
      Table 8: Data types
      + + + +

      +
      +

      psb_genrm2(x, desc_a, info [,global])
      psb_norm2(x, desc_a, info [,global])
      +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +x

      +

      the local portion of global dense matrix x.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 8. +

      +

      +desc_a

      +

      contains data structures for communications.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an object of type psb_desc_type. +

      +

      +global

      +

      Specifies whether the computation should include the global reduction + across all processes.
      Scope: global
      Type: optional.
      Intent: in.
      Specified as: a logical scalar. Default: global=.true.
      +

      +

      +On Return

      + + + +

      +

      +

      +Function Value

      +

      is the 2-norm of vector x.
      Scope: global unless the optional variable global=.false. has been + specified
      Type: required
      Specified as: a long precision real number. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      +

      Notes +

        +
      1. +

        The computation of a global result requires a global communication, which + entails a significant overhead. It may be necessary and/or advisable to compute + multiple norms at the same time; in this case, it is possible to improve the + runtime efficiency by using the following scheme: +

           vres(1) = psb_genrm2(x1,desc_a,info,global=.false.) 
        +   vres(2) = psb_genrm2(x2,desc_a,info,global=.false.) 
        +   vres(3) = psb_genrm2(x3,desc_a,info,global=.false.) 
        +   call psb_nrm2(ctxt,vres(1:3))
        + +

        In this way the global communication, which for small sizes is a latency-bound + operation, is invoked only once.

      + + + +

      4.9 psb_genrm2s — Generalized 2-Norm of Vector

      +

      This subroutine computes a series of 2-norms on the columns of a dense matrix +x: +

      +res(i) ← ∥x(:,i)∥2
+
      +

      +

      call psb_genrm2s(res, x, desc_a, info) +

      + + + +


      + + + +
      +

      +

      + + + +



      res x Subroutine



      Short Precision RealShort Precision Real psb_genrm2s
      Long Precision Real Long Precision Real psb_genrm2s
      Short Precision RealShort Precision Complexpsb_genrm2s
      Long Precision Real Long Precision Complex psb_genrm2s



      +
      Table 9: Data types
      + + + +

      +
      +
      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +x

      +

      the local portion of global dense matrix x.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 9. +

      +

      +desc_a

      +

      contains data structures for communications.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an object of type psb_desc_type. +

      +

      +On Return

      +

      +

      +

      +res

      +

      contains the 1-norm of (the columns of) x.
      Scope: global
      Intent: out.
      Specified as: a long precision real number. +

      +

      +info

      + + + +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      + + + +

      4.10 psb_norm1 — 1-Norm of Sparse Matrix

      +

      This function computes the 1-norm of a matrix A:
      +

      +nrm1 ← ∥A ∥1
+
      +

      where: +

      +

      +A

      +

      represents the global matrix A

      +
      + + + +


      + +
      +

      +

      + + + +


      A Function


      Short Precision Real psb_spnrm1
      Long Precision Real psb_spnrm1
      Short Precision Complexpsb_spnrm1
      Long Precision Complex psb_spnrm1


      +
      Table 10: Data types
      + + + +

      +
      + + + +
      +psb_spnrm1(A, desc_a, info)
      +psb_norm1(A, desc_a, info)
      +
      +

      +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +a

      +

      the local portion of the global sparse matrix A.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an object of type psb_Tspmat_type. +

      +

      +desc_a

      +

      contains data structures for communications.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an object of type psb_desc_type. +

      +

      +On Return

      +

      +

      +

      +Function value

      +

      is the 1-norm of sparse submatrix A.
      Scope: global
      Specified as: a long precision real number. + + + +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      + + + +

      4.11 psb_normi — Infinity Norm of Sparse Matrix

      +

      This function computes the infinity-norm of a matrix A:
      +

      +nrmi ←  ∥A∥∞
+
      +

      where: +

      +

      +A

      +

      represents the global matrix A

      +
      + + + +


      + + + +
      +

      +

      + + + +


      A Function


      Short Precision Real psb_spnrmi
      Long Precision Real psb_spnrmi
      Short Precision Complexpsb_spnrmi
      Long Precision Complex psb_spnrmi


      +
      Table 11: Data types
      + + + +

      +
      + + + +
      +psb_spnrmi(A, desc_a, info)
      +psb_normi(A, desc_a, info)
      +
      +

      +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +a

      +

      the local portion of the global sparse matrix A.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an object of type psb_Tspmat_type. +

      +

      +desc_a

      +

      contains data structures for communications.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an object of type psb_desc_type. +

      +

      +On Return

      +

      +

      +

      +Function value

      +

      is the infinity-norm of sparse submatrix A.
      Scope: global
      Specified as: a long precision real number. + + + +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      + + + +

      4.12 psb_spmm — Sparse Matrix by Dense Matrix Product

      +

      This subroutine computes the Sparse Matrix by Dense Matrix Product: +
      +
      +y ← αAx + βy
+
      +
      (1)
      +

      +
      +
      +       T
+y ← αA  x+ βy
+
      +
      (2)
      +

      +
      +
      +y ← αAHx + βy
+
      +
      (3)
      +

      +

      where: + + + +

      +

      +x

      +

      is the global dense matrix x:,: +

      +

      +y

      +

      is the global dense matrix y:,: +

      +

      +A

      +

      is the global sparse matrix A

      +
      + + + +


      + + + +
      +

      +

      + + + + +


      A, x, y, α, β Subroutine


      Short Precision Real psb_spmm
      Long Precision Real psb_spmm
      Short Precision Complexpsb_spmm
      Long Precision Complex psb_spmm


      +
      Table 12: Data types
      + + + +

      +
      +

      call psb_spmm(alpha, a, x, beta, y, desc_a, info)
      call psb_spmm(alpha, a, x, beta, y,desc_a, info, trans, work) +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +alpha

      +

      the scalar α.
      Scope: global
      Type: required
      Intent: in.
      Specified as: a number of the data type indicated in Table 12. +

      +

      +a

      +

      the local portion of the sparse matrix A.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an object of type psb_Tspmat_type. +

      +

      +x

      +

      the local portion of global dense matrix x.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 12. The + rank of x must be the same of y. +

      +

      +beta

      + + + +

      the scalar β.
      Scope: global
      Type: required
      Intent: in.
      Specified as: a number of the data type indicated in Table 12. +

      +

      +y

      +

      the local portion of global dense matrix y.
      Scope: local
      Type: required
      Intent: inout.
      Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 12. The + rank of y must be the same of x. +

      +

      +desc_a

      +

      contains data structures for communications.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an object of type psb_desc_type. +

      +

      +trans

      +

      indicates what kind of operation to perform. +

      +

      + trans = N

      +

      the operation is specified by equation 1 +

      +

      + trans = T

      +

      the operation is specified by equation 2 +

      +

      + trans = C

      +

      the operation is specified by equation 3

      +

      Scope: global
      Type: optional
      Intent: in.
      Default: trans = N
      Specified as: a character variable. + + + +

      +

      +work

      +

      work array.
      Scope: local
      Type: optional
      Intent: inout.
      Specified as: a rank one array of the same type of x and y with the TARGET + attribute. +

      +

      +On Return

      +

      +

      +

      +y

      +

      the local portion of result matrix y.
      Scope: local
      Type: required
      Intent: inout.
      Specified as: an array of rank one or two containing numbers of type specified + in Table 12. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      + + + +

      4.13 psb_spsm — Triangular System Solve

      +

      This subroutine computes the Triangular System Solve: +

      +
      +y  ←   αT- 1x + βy
+y  ←   αDT -1x +βy
+         - 1
+y  ←   αT  Dx  +βy
+y  ←   αT- Tx+ βy
+y  ←   αDT -Tx + βy
+y  ←   αT- TDx + βy
+         - H
+y  ←   αT   x +βy
+y  ←   αDT -Hx + βy
+y  ←   αT- HDx + βy
+
      +
      +

      where: +

      +

      +x

      +

      is the global dense matrix x:,: +

      +

      +y

      +

      is the global dense matrix y:,: +

      +

      +T

      +

      is the global sparse block triangular submatrix T +

      +

      +D

      +

      is the scaling diagonal matrix.

      + + + +

      call psb_spsm(alpha, t, x, beta, y, desc_a, info)
      call psb_spsm(alpha, t, x, beta, y, desc_a, info, trans, unit, choice, diag, work)
      +

      + + + +


      + + + +
      +

      +

      + + + + +


      T, x, y, D, α, β Subroutine


      Short Precision Real psb_spsm
      Long Precision Real psb_spsm
      Short Precision Complexpsb_spsm
      Long Precision Complex psb_spsm


      +
      Table 13: Data types
      + + + +

      +
      +
      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +alpha

      +

      the scalar α.
      Scope: global
      Type: required
      Intent: in.
      Specified as: a number of the data type indicated in Table 13. +

      +

      +t

      +

      the global portion of the sparse matrix T.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an object type specified in § 3. +

      +

      +x

      +

      the local portion of global dense matrix x.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 13. The + rank of x must be the same of y. +

      +

      +beta

      +

      the scalar β.
      Scope: global
      Type: required
      Intent: in.
      Specified as: a number of the data type indicated in Table 13. + + + +

      +

      +y

      +

      the local portion of global dense matrix y.
      Scope: local
      Type: required
      Intent: inout.
      Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 13. The + rank of y must be the same of x. +

      +

      +desc_a

      +

      contains data structures for communications.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an object of type psb_desc_type. +

      +

      +trans

      +

      specify with unitd the operation to perform. +

      +

      + trans = ’N’

      +

      the operation is with no transposed matrix +

      +

      + trans = ’T’

      +

      the operation is with transposed matrix. +

      +

      + trans = ’C’

      +

      the operation is with conjugate transposed matrix.

      +

      Scope: global
      Type: optional
      Intent: in.
      Default: trans = N
      Specified as: a character variable. +

      +

      +unitd

      +

      specify with trans the operation to perform. + + + +

      +

      + unitd = ’U’

      +

      the operation is with no scaling +

      +

      + unitd = ’L’

      +

      the operation is with left scaling +

      +

      + unitd = ’R’

      +

      the operation is with right scaling.

      +

      Scope: global
      Type: optional
      Intent: in.
      Default: unitd = U
      Specified as: a character variable. +

      +

      +choice

      +

      specifies the update of overlap elements to be performed on exit: +

      +

      +

      +

      psb_none_ +

      +

      +

      +

      psb_sum_ +

      +

      +

      +

      psb_avg_ +

      +

      +

      +

      psb_square_root_

      +

      Scope: global
      Type: optional
      Intent: in.
      Default: psb_avg_
      Specified as: an integer variable. + + + +

      +

      +diag

      +

      the diagonal scaling matrix.
      Scope: local
      Type: optional
      Intent: in.
      Default: diag(1) = 1(noscaling)
      Specified as: a rank one array containing numbers of the type indicated in + Table 13. +

      +

      +work

      +

      a work array.
      Scope: local
      Type: optional
      Intent: inout.
      Specified as: a rank one array of the same type of x with the TARGET + attribute. +

      +

      +On Return

      +

      +

      +

      +y

      +

      the local portion of global dense matrix y.
      Scope: local
      Type: required
      Intent: inout.
      Specified as: an array of rank one or two containing numbers of type specified + in Table 13. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      + + + +

      4.14 psb_gemlt — Entrywise Product

      +

      This function computes the entrywise product between two vectors x and +y +

      +dot ← x(i)y(i).
+
      +

      +

      psb_gemlt(x, y, desc_a, info) +

      + + + +


      + + + +
      +

      +

      + + + + +


      dot, x, y Function


      Short Precision Real psb_gemlt
      Long Precision Real psb_gemlt
      Short Precision Complexpsb_gemlt
      Long Precision Complex psb_gemlt


      +
      Table 14: Data types
      + + + +

      +
      +
      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +x

      +

      the local portion of global dense vector x.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an object of type psb_T_vect_type containing numbers of + type specified in Table 2. +

      +

      +y

      +

      the local portion of global dense vector y.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an object of type psb_T_vect_type containing numbers of + type specified in Table 2. +

      +

      +desc_a

      +

      contains data structures for communications.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an object of type psb_desc_type. +

      +

      +On Return

      +

      + + + +

      +

      +y

      +

      the local portion of result submatrix y.
      Scope: local
      Type: required
      Intent: inout.
      Specified as: an object of type psb_T_vect_type containing numbers of + the type indicated in Table 14. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      + + + +

      4.15 psb_gediv — Entrywise Division

      +

      This function computes the entrywise division between two vectors x and +y +

      +∕ ← x(i)∕y(i).
+
      +

      +

      psb_gediv(x, y, desc_a, info, [flag) +

      + + + +


      + + + +
      +

      +

      + + + + +


      , x, y Function


      Short Precision Real psb_gediv
      Long Precision Real psb_gediv
      Short Precision Complexpsb_gediv
      Long Precision Complex psb_gediv


      +
      Table 15: Data types
      + + + +

      +
      +
      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +x

      +

      the local portion of global dense vector x.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an object of type psb_T_vect_type containing numbers of + type specified in Table 2. +

      +

      +y

      +

      the local portion of global dense vector y.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an object of type psb_T_vect_type containing numbers of + type specified in Table 2. +

      +

      +desc_a

      +

      contains data structures for communications.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an object of type psb_desc_type. +

      +

      +flag

      +

      check if any of the y(i) = 0, and in case returns error halting the + computation.
      Scope: local
      Type: optional Intent: in.
      Specified as: the logical value flag=.true. + + + +

      +

      +On Return

      +

      +

      +

      +x

      +

      the local portion of result submatrix x.
      Scope: local
      Type: required
      Intent: inout.
      Specified as: an object of type psb_T_vect_type containing numbers of + the type indicated in Table 14. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      + + + +

      4.16 psb_geinv — Entrywise Inversion

      +

      This function computes the entrywise inverse of a vector x and puts it into +y +

      +∕ ← 1∕x(i).
+
      +

      +

      psb_geinv(x, y, desc_a, info, [flag) +

      + + + +


      + + + +
      +

      +

      + + + + +


      , x, y Function


      Short Precision Real psb_geinv
      Long Precision Real psb_geinv
      Short Precision Complexpsb_geinv
      Long Precision Complex psb_geinv


      +
      Table 16: Data types
      + + + +

      +
      +
      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +x

      +

      the local portion of global dense vector x.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an object of type psb_T_vect_type containing numbers of + type specified in Table 2. +

      +

      +desc_a

      +

      contains data structures for communications.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an object of type psb_desc_type. +

      +

      +flag

      +

      check if any of the x(i) = 0, and in case returns error halting the + computation.
      Scope: local
      Type: optional Intent: in.
      Specified as: the logical value flag=.true. +

      +

      +On Return

      +

      + + + +

      +

      +y

      +

      the local portion of result submatrix x.
      Scope: local
      Type: required
      Intent: out.
      Specified as: an object of type psb_T_vect_type containing numbers of + the type indicated in Table 16. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      + + + + + + + + + + +href="userhtml.html#userhtmlse7.html" >up]

      + id="tailuserhtmlse4.html"> diff --git a/docs/html/userhtmlse5.html b/docs/html/userhtmlse5.html index c897c0eb..6b87e4b3 100644 --- a/docs/html/userhtmlse5.html +++ b/docs/html/userhtmlse5.html @@ -11,42 +11,2309 @@

      +href="#tailuserhtmlse5.html">tail] [up]

      5 Communication routines

      + id="x10-720005">Communication routines

      The routines in this chapter implement various global communication operators on vectors associated with a discretization mesh. For auxiliary communication routines not tied to a discretization space see 6. +href="userhtmlse6.html#x11-770006">6. -

      -  5.1 psb_halo — Halo Data Communication -
       5.2 psb_ovrl — Overlap Update -
       5.3 psb_gather — Gather Global Dense Matrix -
       5.4 psb_scatter — Scatter Global Dense Matrix +

      5.1 psb_halo — Halo Data Communication

      +

      These subroutines gathers the values of the halo elements: +

      +x ← x
+
      +

      where: +

      +

      +x

      +

      is a global dense submatrix.

      +
      + + + +


      + + + +
      +

      +

      + + + + + +


      α, x Subroutine


      Integer psb_halo
      Short Precision Real psb_halo
      Long Precision Real psb_halo
      Short Precision Complexpsb_halo
      Long Precision Complex psb_halo


      +
      Table 17: Data types
      + + + +

      +

      call psb_halo(x, desc_a, info)
      call psb_halo(x, desc_a, info, work, data) +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +x

      +

      global dense matrix x.
      Scope: local
      Type: required
      Intent: inout.
      Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 17. +

      +

      +desc_a

      +

      contains data structures for communications.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a structured data of type psb_desc_type. +

      +

      +work

      +

      the work array.
      Scope: local
      Type: optional
      Intent: inout.
      Specified as: a rank one array of the same type of x. +

      +

      +data

      + + +

      index list selector.
      Scope: global
      Type: optional
      Specified + as: an integer. Values:psb_comm_halo_,psb_comm_mov_, psb_comm_ext_, + default: psb_comm_halo_. Chooses the index list on which to base the data + exchange. +

      +

      +On Return

      +

      +

      +

      +x

      +

      global dense result matrix x.
      Scope: local
      Type: required
      Intent: inout.
      Returned as: a rank one or two array containing numbers of type specified + in Table 17. +

      +

      +info

      +

      the local portion of result submatrix y.
      Scope: local
      Type: required
      Intent: out.
      An integer value that contains an error code.

      +


      +
      +

      +

      PIC

      +
      Figure 3: Sample discretization mesh.
      +


      +

      Usage Example Consider the discretization mesh depicted in fig. 3, partitioned +among two processes as shown by the dashed line; the data distribution is such that +each process will own 32 entries in the index space, with a halo made of 8 entries +placed at local indices 33 through 40. If process 0 assigns an initial value of 1 to +its entries in the x vector, and process 1 assigns a value of 2, then after +a call to psb_halo the contents of the local vectors will be the following: +

      + + +


      + + + +
      +

      +

      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      Process 0
      Process 1
      +
      IGLOB(I)X(I) IGLOB(I)X(I)
      1 1 1.0 1 33 2.0
      2 2 1.0 2 34 2.0
      3 3 1.0 3 35 2.0
      4 4 1.0 4 36 2.0
      5 5 1.0 5 37 2.0
      6 6 1.0 6 38 2.0
      7 7 1.0 7 39 2.0
      8 8 1.0 8 40 2.0
      9 9 1.0 9 41 2.0
      10 10 1.010 42 2.0
      11 11 1.011 43 2.0
      12 12 1.012 44 2.0
      13 13 1.013 45 2.0
      14 14 1.014 46 2.0
      15 15 1.015 47 2.0
      16 16 1.016 48 2.0
      17 17 1.017 49 2.0
      18 18 1.018 50 2.0
      19 19 1.019 51 2.0
      20 20 1.020 52 2.0
      21 21 1.021 53 2.0
      22 22 1.022 54 2.0
      23 23 1.023 55 2.0
      24 24 1.024 56 2.0
      25 25 1.025 57 2.0
      26 26 1.026 58 2.0
      27 27 1.027 59 2.0
      28 28 1.028 60 2.0
      29 29 1.029 61 2.0
      30 30 1.030 62 2.0
      31 31 1.031 63 2.0
      32 32 1.032 64 2.0
      33 33 2.033 25 1.0
      34 34 2.034 26 1.0
      35 35 2.035 27 1.0
      36 36 2.036 28 1.0
      37 37 2.037 29 1.0
      38 38 2.038 30 1.0
      39 39 2.039 31 1.0
      40 40 2.040 32 1.0
      + + + +

      +
      + + + +

      5.2 psb_ovrl — Overlap Update

      +

      These subroutines applies an overlap operator to the input vector: +

      +x ← Qx
+
      +

      where: +

      +

      +x

      +

      is the global dense submatrix x +

      +

      +Q

      +

      is the overlap operator; it is the composition of two operators Pa and PT.

      +
      + +


      + + + +
      +

      +

      + + + +


      x Subroutine


      Short Precision Real psb_ovrl
      Long Precision Real psb_ovrl
      Short Precision Complexpsb_ovrl
      Long Precision Complex psb_ovrl


      +
      Table 18: Data types
      + + + +

      +
      +

      call psb_ovrl(x, desc_a, info)
      call psb_ovrl(x, desc_a, info, update=update_type, work=work) +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +x

      +

      global dense matrix x.
      Scope: local
      Type: required
      Intent: inout.
      Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 18. +

      +

      +desc_a

      +

      contains data structures for communications.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a structured data of type psb_desc_type. +

      +

      +update

      +

      Update operator.
      +

      +

      + update = psb_none_

      +

      Do nothing; +

      +

      + update = psb_add_

      +

      Sum overlap entries, i.e. apply PT; + + + +

      +

      + update = psb_avg_

      +

      Average overlap entries, i.e. apply PaPT;

      +

      Scope: global
      Intent: in.
      Default: update_type = psb_avg_
      Scope: global
      Specified as: a integer variable. +

      +

      +work

      +

      the work array.
      Scope: local
      Type: optional
      Intent: inout.
      Specified as: a one dimensional array of the same type of x. +

      +

      +On Return

      +

      +

      +

      +x

      +

      global dense result matrix x.
      Scope: local
      Type: required
      Intent: inout.
      Specified as: an array of rank one or two containing numbers of type specified + in Table 18. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      +

      Notes +

        +
      1. + + + +

        If there is no overlap in the data distribution associated with the + descriptor, no operations are performed; +

      2. +
      3. +

        The operator PT performs the reduction sum of overlap elements; it is a + “prolongation” operator PT that replicates overlap elements, accounting + for the physical replication of data; +

      4. +
      5. +

        The operator Pa performs a scaling on the overlap elements by the + amount of replication; thus, when combined with the reduction operator, + it implements the average of replicated elements over all of their instances.

      +


      + + + + + + + +
      +

      +

      PIC

      +
      Figure 4: Sample discretization mesh.
      + + + +


      +

      Example of use Consider the discretization mesh depicted in fig. 4, partitioned +among two processes as shown by the dashed lines, with an overlap of 1 extra layer +with respect to the partition of fig. 3; the data distribution is such that +each process will own 40 entries in the index space, with an overlap of 16 +entries placed at local indices 25 through 40; the halo will run from local +index 41 through local index 48.. If process 0 assigns an initial value of 1 to +its entries in the x vector, and process 1 assigns a value of 2, then after a +call to psb_ovrl with psb_avg_ and a call to psb_halo_ the contents of +the local vectors will be the following (showing a transition among the two +subdomains) +

      + + + +


      + + + +
      +

      +

      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      Process 0
      Process 1
      +
      IGLOB(I)X(I) IGLOB(I)X(I)
      1 1 1.0 1 33 1.5
      2 2 1.0 2 34 1.5
      3 3 1.0 3 35 1.5
      4 4 1.0 4 36 1.5
      5 5 1.0 5 37 1.5
      6 6 1.0 6 38 1.5
      7 7 1.0 7 39 1.5
      8 8 1.0 8 40 1.5
      9 9 1.0 9 41 2.0
      10 10 1.010 42 2.0
      11 11 1.011 43 2.0
      12 12 1.012 44 2.0
      13 13 1.013 45 2.0
      14 14 1.014 46 2.0
      15 15 1.015 47 2.0
      16 16 1.016 48 2.0
      17 17 1.017 49 2.0
      18 18 1.018 50 2.0
      19 19 1.019 51 2.0
      20 20 1.020 52 2.0
      21 21 1.021 53 2.0
      22 22 1.022 54 2.0
      23 23 1.023 55 2.0
      24 24 1.024 56 2.0
      25 25 1.525 57 2.0
      26 26 1.526 58 2.0
      27 27 1.527 59 2.0
      28 28 1.528 60 2.0
      29 29 1.529 61 2.0
      30 30 1.530 62 2.0
      31 31 1.531 63 2.0
      32 32 1.532 64 2.0
      33 33 1.533 25 1.5
      34 34 1.534 26 1.5
      35 35 1.535 27 1.5
      36 36 1.536 28 1.5
      37 37 1.537 29 1.5
      38 38 1.538 30 1.5
      39 39 1.539 31 1.5
      40 40 1.540 32 1.5
      41 41 2.041 17 1.0
      42 42 2.042 18 1.0
      43 43 2.043 19 1.0
      44 44 2.044 20 1.0
      45 45 2.045 21 1.0
      46 46 2.046 22 1.0
      47 47 2.047 23 1.0
      48 48 2.048 24 1.0
      + + + +

      +
      + + + +

      5.3 psb_gather — Gather Global Dense Matrix

      +

      These subroutines collect the portions of global dense matrix distributed over all +process into one single array stored on one process. +

      +glob-x ← collect(loc-xi)
+
      +

      where: +

      +

      +glob_x

      +

      is the global submatrix glob_x1:m,1:n +

      +

      +loc_xi

      +

      is the local portion of global dense matrix on process i. +

      +

      +collect

      +

      is the collect function.

      +
      + + + +


      + + + +
      +

      +

      + + + + + +


      xi,y Subroutine


      Integer psb_gather
      Short Precision Real psb_gather
      Long Precision Real psb_gather
      Short Precision Complexpsb_gather
      Long Precision Complex psb_gather


      +
      Table 19: Data types
      + + + +

      +
      +

      call psb_gather(glob_x, loc_x, desc_a, info, root) +call psb_gather(glob_x, loc_x, desc_a, info, root) +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +loc_x

      +

      the local portion of global dense matrix glob_x.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a rank one or two array or an object of type + psb_T_vect_type indicated in Table 19. +

      +

      +desc_a

      +

      contains data structures for communications.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a structured data of type psb_desc_type. +

      +

      +root

      +

      The process that holds the global copy. If root = -1 all the processes will + have a copy of the global vector.
      Scope: global
      Type: optional
      Intent: in.
      Specified as: an integer variable -1 root np - 1, default -1. +

      +

      +On Return

      +

      + + + +

      +

      +glob_x

      +

      The array where the local parts must be gathered.
      Scope: global
      Type: required
      Intent: out.
      Specified as: a rank one or two array with the ALLOCATABLE attribute. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      + + + +

      5.4 psb_scatter — Scatter Global Dense Matrix

      +

      These subroutines scatters the portions of global dense matrix owned by a process to +all the processes in the processes grid. +

      +loc-xi ← scatter(glob-x)
+
      +

      where: +

      +

      +glob_x

      +

      is the global matrix glob_x1:m,1:n +

      +

      +loc_xi

      +

      is the local portion of global dense matrix on process i. +

      +

      +scatter

      +

      is the scatter function.

      +
      + + + +


      + + + +
      +

      +

      + + + + + +


      xi,y Subroutine


      Integer psb_scatter
      Short Precision Real psb_scatter
      Long Precision Real psb_scatter
      Short Precision Complexpsb_scatter
      Long Precision Complex psb_scatter


      +
      Table 20: Data types
      + + + +

      +
      +

      call psb_scatter(glob_x, loc_x, desc_a, info, root, mold) +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +glob_x

      +

      The array that must be scattered into local pieces.
      Scope: global
      Type: required
      Intent: in.
      Specified as: a rank one or two array. +

      +

      +desc_a

      +

      contains data structures for communications.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a structured data of type psb_desc_type. +

      +

      +root

      +

      The process that holds the global copy. If root = -1 all the processes have + a copy of the global vector.
      Scope: global
      Type: optional
      Intent: in.
      Specified as: an integer variable -1 root np - 1, default psb_root_, + i.e. process 0. +

      +

      +mold

      + + + +

      The desired dynamic type for the internal vector storage.
      Scope: local.
      Type: optional.
      Intent: in.
      Specified as: an object of a class derived from psb_T_base_vect_type; + this is only allowed when loc_x is of type psb_T_vect_type. +

      +

      +On Return

      +

      +

      +

      +loc_x

      +

      the local portion of global dense matrix glob_x.
      Scope: local
      Type: required
      Intent: out.
      Specified as: a rank one or two ALLOCATABLE array or an object of type + psb_T_vect_type containing numbers of the type indicated in Table 20. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      + + + + + + + + + + +href="userhtml.html#userhtmlse8.html" >up]

      + id="tailuserhtmlse5.html"> diff --git a/docs/html/userhtmlse6.html b/docs/html/userhtmlse6.html index 78d2fa1e..19fb0f5d 100644 --- a/docs/html/userhtmlse6.html +++ b/docs/html/userhtmlse6.html @@ -11,105 +11,4060 @@

      +href="userhtmlse3.html#tailuserhtmlse6.html">tail] [up]

      6 Data management routines

      + id="x11-770006">Data management routines

      -

      +

      6.1 psb_cdall — Allocates a communication descriptor

      + + + +
      +call psb_cdall(icontxt, desc_a, info,mg=mg,parts=parts)
      +call psb_cdall(icontxt, desc_a, info,vg=vg,[mg=mg,flag=flag])
      +call psb_cdall(icontxt, desc_a, info,vl=vl,[nl=nl,globalcheck=.false.,lidx=lidx])
      +call psb_cdall(icontxt, desc_a, info,nl=nl)
      +call psb_cdall(icontxt, desc_a, info,mg=mg,repl=.true.)
      +
      +

      +

      This subroutine initializes the communication descriptor associated with an index +space. One of the optional arguments parts, vg, vl, nl or repl must be specified, +thereby choosing the specific initialization strategy. +

      +

      +On Entry

      +

      +

      +

      +Type:

      +

      Synchronous. +

      +

      +icontxt

      +

      the communication context.
      Scope:global.
      Type:required.
      Intent: in.
      Specified as: an integer value. +

      +

      +vg

      +

      Data allocation: each index i ∈{1mg} is allocated to process vg(i).
      Scope:global.
      Type:optional.
      Intent: in.
      Specified as: an integer array. +

      +

      +flag

      +

      Specifies whether entries in vg are zero- or one-based.
      Scope:global.
      Type:optional.
      Intent: in.
      Specified as: an integer value 0,1, default 0. + + + +

      +

      +mg

      +

      the (global) number of rows of the problem.
      Scope:global.
      Type:optional.
      Intent: in.
      Specified as: an integer value. It is required if parts or repl is specified, + it is optional if vg is specified. +

      +

      +parts

      +

      the subroutine that defines the partitioning scheme.
      Scope:global.
      Type:required.
      Specified as: a subroutine. +

      +

      +vl

      +

      Data allocation: the set of global indices vl(1 : nl) belonging to the calling + process.
      Scope:local.
      Type:optional.
      Intent: in.
      Specified as: an integer array. +

      +

      +nl

      +

      Data allocation: in a generalized block-row distribution the number of + indices belonging to the current process.
      Scope:local.
      Type:optional.
      Intent: in.
      Specified as: an integer value. May be specified together with vl. +

      +

      +repl

      +

      Data allocation: build a replicated index space (i.e. all processes own all + indices).
      Scope:global.
      Type:optional.
      Intent: in.
      Specified as: the logical value .true. + + + +

      +

      +globalcheck

      +

      Data allocation: do global checks on the local index lists vl
      Scope:global.
      Type:optional.
      Intent: in.
      Specified as: a logical value, default: .false. +

      +

      +lidx

      +

      Data allocation: the set of local indices lidx(1 : nl) to be assigned to the + global indices vl.
      Scope:local.
      Type:optional.
      Intent: in.
      Specified as: an integer array.

      +

      +

      +

      +On Return

      +

      +

      +

      +desc_a

      +

      the communication descriptor.
      Scope:local.
      Type:required.
      Intent: out.
      Specified as: a structured data of type psb_desc_type. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      +

      Notes +

        +
      1. + + + +

        One of the optional arguments parts, vg, vl, nl or repl must be specified, + thereby choosing the initialization strategy as follows: +

        +

        + parts

        +

        In this case we have a subroutine specifying the mapping between global + indices and process/local index pairs. If this optional argument is + specified, then it is mandatory to specify the argument mg as well. The + subroutine must conform to the following interface: + + + +

        +           interface
        +              subroutine psb_parts(glob_index,mg,np,pv,nv)
        +                integer, intent (in)  :: glob_index,np,mg
        +                integer, intent (out) :: nv, pv(*)
        +              end subroutine psb_parts
        +           end interface
        +
        +

        The input arguments are: +

        +

        + glob_index

        +

        The global index to be mapped; +

        +

        + np

        +

        The number of processes in the mapping; +

        +

        + mg

        +

        The total number of global rows in the mapping;

        +

        The output arguments are: +

        +

        + nv

        +

        The number of entries in pv; +

        +

        + pv

        +

        A vector containing the indices of the processes to which the + global index should be assigend; each entry must satisfy 0 + pv(i) < np; if nv > 1 we have an index assigned to multiple + processes, i.e. we have an overlap among the subdomains.

        +
        +

        + vg

        +

        In this case the association between an index and a process is specified via + an integer vector vg(1:mg); each index i ∈{1mg} is assigned to process + vg(i). The vector vg must be identical on all calling processes; its + entries may have the ranges (0np - 1) or (1np) according to the + value of flag. The size mg may be specified via the optional + argument mg; the default is to use the entire vector vg, thus having + mg=size(vg). +

        +

        + vl

        + + + +

        In this case we are specifying the list of indices vl(1:nl) assigned to the + current process; thus, the global problem size mg is given by the range of + the aggregate of the individual vectors vl specified in the calling + processes. The size may be specified via the optional argument nl; the + default is to use the entire vector vl, thus having nl=size(vl). If + globalcheck=.true. the subroutine will check how many times each + entry in the global index space (1mg) is specified in the input lists vl, + thus allowing for the presence of overlap in the input, and checking for + “orphan” indices. If globalcheck=.false., the subroutine will not + check for overlap, and may be significantly faster, but the user is + implicitly guaranteeing that there are neither orphan nor overlap + indices. +

        +

        + lidx

        +

        The optional argument lidx is available for those cases in which the user + has already established a global-to-local mapping; if it is specified, each + index in vl(i) will be mapped to the corresponding local index lidx(i). + When specifying the argument lidx the user would also likely employ + lidx in calls to psb_cdins and local in calls to psb_spins and + psb_geins; see also sec. 2.3.1. +

        +

        + nl

        +

        If this argument is specified alone (i.e. without vl) the result is a + generalized row-block distribution in which each process I gets assigned a + consecutive chunk of NI = nl global indices. +

        +

        + repl

        +

        This arguments specifies to replicate all indices on all processes. This is a + special purpose data allocation that is useful in the construction of some + multilevel preconditioners.

        +
      2. +
      3. +

        On exit from this routine the descriptor is in the build state. +

      4. +
      5. +

        Calling the routine with vg or parts implies that every process will scan the + entire index space to figure out the local indices. +

      6. +
      7. +

        Overlapped indices are possible with both parts and vl invocations. + + + +

      8. +
      9. +

        When the subroutine is invoked with vl in conjunction with globalcheck=.true., + it will perform a scan of the index space to search for overlap or orphan + indices. +

      10. +
      11. +

        When the subroutine is invoked with vl in conjunction with globalcheck=.false., + no index space scan will take place. Thus it is the responsibility of the user to + make sure that the indices specified in vl have neither orphans nor overlaps; if + this assumption fails, results will be unpredictable. +

      12. +
      13. +

        Orphan and overlap indices are impossible by construction when the subroutine + is invoked with nl (alone), or vg.

      + + + +

      6.2 psb_cdins — Communication descriptor insert routine

      + + + +
      +call psb_cdins(nz, ia, ja, desc_a, info [,ila,jla])
      +call psb_cdins(nz,ja,desc,info[,jla,mask,lidx])
      +
      +

      +

      This subroutine examines the edges of the graph associated with the +discretization mesh (and isomorphic to the sparsity pattern of a linear system +coefficient matrix), storing them as necessary into the communication descriptor. In +the first form the edges are specified as pairs of indices ia(i),ja(i); the starting index +ia(i) should belong to the current process. In the second form only the remote indices +ja(i) are specified. +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +nz

      +

      the number of points being inserted.
      Scope: local.
      Type: required.
      Intent: in.
      Specified as: an integer value. +

      +

      +ia

      +

      the indices of the starting vertex of the edges being inserted.
      Scope: local.
      Type: required.
      Intent: in.
      Specified as: an integer array of length nz. +

      +

      +ja

      + + + +

      the indices of the end vertex of the edges being inserted.
      Scope: local.
      Type: required.
      Intent: in.
      Specified as: an integer array of length nz. +

      +

      +mask

      +

      Mask entries in ja, they are inserted only when the corresponding mask + entries are .true.
      Scope: local.
      Type: optional.
      Intent: in.
      Specified as: a logical array of length nz, default .true.. +

      +

      +lidx

      +

      User defined local indices for ja.
      Scope: local.
      Type: optional.
      Intent: in.
      Specified as: an integer array of length nz.

      +

      +

      +

      +On Return

      +

      +

      +

      +desc_a

      +

      the updated communication descriptor.
      Scope:local.
      Type:required.
      Intent: inout.
      Specified as: a structured data of type psb_desc_type. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected. + + + +

      +

      +ila

      +

      the local indices of the starting vertex of the edges being inserted.
      Scope: local.
      Type: optional.
      Intent: out.
      Specified as: an integer array of length nz. +

      +

      +jla

      +

      the local indices of the end vertex of the edges being inserted.
      Scope: local.
      Type: optional.
      Intent: out.
      Specified as: an integer array of length nz. +

      +

      Notes +

        +
      1. +

        This routine may only be called if the descriptor is in the build state; +

      2. +
      3. +

        This routine automatically ignores edges that do not insist on the current + process, i.e. edges for which neither the starting nor the end vertex belong + to the current process. +

      4. +
      5. +

        The second form of this routine will be useful when dealing with + user-specified index mappings; see also 2.3.1.

      + + + +

      6.3 psb_cdasb — Communication descriptor assembly routine

      + + + +
      +call psb_cdasb(desc_a, info [, mold])
      +
      +

      +

      +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +desc_a

      +

      the communication descriptor.
      Scope:local.
      Type:required.
      Intent: inout.
      Specified as: a structured data of type psb_desc_type. +

      +

      +mold

      +

      The desired dynamic type for the internal index storage.
      Scope: local.
      Type: optional.
      Intent: in.
      Specified as: a object of type derived from (integer) + psb_T_base_vect_type.

      +

      +

      +

      +On Return

      +

      +

      +

      +desc_a

      + + + +

      the communication descriptor.
      Scope:local.
      Type:required.
      Intent: inout.
      Specified as: a structured data of type psb_desc_type. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      +

      Notes +

        +
      1. +

        On exit from this routine the descriptor is in the assembled state.

      +

      This call will set up all the necessary information for the halo data exchanges. In doing +so, the library will need to identify the set of processes owning the halo indices +through the use of the desc%fnd_owner() method; the owning processes +are the topological neighbours of the calling process. If the user has some +background information on the processes that are neighbours of the current one, +it is possible to specify explicitly the list of adjacent processes with a call +to desc%set_p_adjcncy(list); this will speed up the subsequent call to +psb_cdasb. + + + +

      6.4 psb_cdcpy — Copies a communication descriptor

      + + + +
      +call psb_cdcpy(desc_in, desc_out, info)
      +
      +

      +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +desc_in

      +

      the communication descriptor.
      Scope:local.
      Type:required.
      Intent: in.
      Specified as: a structured data of type psb_desc_type. +

      +

      +

      +

      +On Return

      +

      +

      +

      +desc_out

      +

      the communication descriptor copy.
      Scope:local.
      Type:required.
      Intent: out.
      Specified as: a structured data of type psb_desc_type. +

      +

      +info

      + + + +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      + + + +

      6.5 psb_cdfree — Frees a communication descriptor

      + + + +
      +call psb_cdfree(desc_a, info)
      +
      +

      +

      +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +desc_a

      +

      the communication descriptor to be freed.
      Scope:local.
      Type:required.
      Intent: inout.
      Specified as: a structured data of type psb_desc_type.

      +

      +

      +

      +On Return

      +

      +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      + + + +

      6.6 psb_cdbldext — Build an extended communication descriptor

      + + + +
      +call psb_cdbldext(a,desc_a,nl,desc_out, info, extype)
      +
      +

      +

      This subroutine builds an extended communication descriptor, based on the input +descriptor desc_a and on the stencil specified through the input sparse matrix +a. +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +a

      +

      A sparse matrix Scope:local.
      Type:required.
      Intent: in.
      Specified as: a structured data type. +

      +

      +desc_a

      +

      the communication descriptor.
      Scope:local.
      Type:required.
      Intent: in.
      Specified as: a structured data of type psb_Tspmat_type. +

      +

      +nl

      +

      the number of additional layers desired.
      Scope:global.
      Type:required.
      Intent: in.
      Specified as: an integer value nl 0. + + + +

      +

      +extype

      +

      the kind of estension required.
      Scope:global.
      Type:optional .
      Intent: in.
      Specified as: an integer value psb_ovt_xhal_, psb_ovt_asov_, default: + psb_ovt_xhal_ +

      +

      +

      +

      +On Return

      +

      +

      +

      +desc_out

      +

      the extended communication descriptor.
      Scope:local.
      Type:required.
      Intent: inout.
      Specified as: a structured data of type psb_desc_type. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      +

      Notes +

        +
      1. +

        Specifying psb_ovt_xhal_ for the extype argument the user will obtain a + descriptor for a domain partition in which the additional layers are fetched + as part of an (extended) halo; however the index-to-process mapping is + identical to that of the base descriptor; +

      2. +
      3. + + + +

        Specifying psb_ovt_asov_ for the extype argument the user will obtain + a descriptor with an overlapped decomposition: the additional layer is + aggregated to the local subdomain (and thus is an overlap), and a new + halo extending beyond the last additional layer is formed.

      + + + +

      6.7 psb_spall — Allocates a sparse matrix

      + + + +
      +call psb_spall(a, desc_a, info [, nnz, dupl, bldmode])
      +
      +

      +

      +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +desc_a

      +

      the communication descriptor.
      Scope:local.
      Type:required.
      Intent: in.
      Specified as: a structured data of type psb_desc_type. +

      +

      +nnz

      +

      An estimate of the number of nonzeroes in the local part of the assembled + matrix.
      Scope: global.
      Type: optional.
      Intent: in.
      Specified as: an integer value. +

      +

      +dupl

      +

      How to handle duplicate coefficients.
      Scope: global.
      Type: optional.
      Intent: in.
      Specified as: integer, possible values: psb_dupl_ovwrt_, psb_dupl_add_, + psb_dupl_err_. + + + +

      +

      +bldmode

      +

      Whether to keep track of matrix entries that do not belong to the current + process.
      Scope: global.
      Type: optional.
      Intent: in.
      Specified as: + an integer value psb_matbld_noremote_, psb_matbld_remote_. Default: + psb_matbld_noremote_.

      +

      +

      +

      +On Return

      +

      +

      +

      +a

      +

      the matrix to be allocated.
      Scope:local
      Type:required
      Intent: out.
      Specified as: a structured data of type psb_Tspmat_type. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      +

      Notes +

        +
      1. +

        On exit from this routine the sparse matrix is in the build state. +

      2. +
      3. +

        The descriptor may be in either the build or assembled state. + + + +

      4. +
      5. +

        Providing a good estimate for the number of nonzeroes nnz in the + assembled matrix may substantially improve performance in the matrix + build phase, as it will reduce or eliminate the need for (potentially + multiple) data reallocations; +

      6. +
      7. +

        Using psb_matbld_remote_ is likely to cause a runtime overhead at + assembly time;

      + + + +

      6.8 psb_spins — Insert a set of coefficients into a sparse matrix

      + + + +
      +call psb_spins(nz, ia, ja, val, a, desc_a, info [,local])
      +call psb_spins(nr, irw, irp, ja, val, a, desc_a, info [,local])
      +
      +

      +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +nz

      +

      the number of coefficients to be inserted.
      Scope:local.
      Type:required.
      Intent: in.
      Specified as: an integer scalar. +

      +

      +nr

      +

      the number of rows to be inserted.
      Scope:local.
      Type:required.
      Intent: in.
      Specified as: an integer scalar. +

      +

      +irw

      +

      the first row to be inserted.
      Scope:local.
      Type:required.
      Intent: in.
      Specified as: an integer scalar. + + + +

      +

      +ia

      +

      the row indices of the coefficients to be inserted.
      Scope:local.
      Type:required.
      Intent: in.
      Specified as: an integer array of size nz. +

      +

      +irp

      +

      the row pointers of the coefficients to be inserted.
      Scope:local.
      Type:required.
      Intent: in.
      Specified as: an integer array of size nr + 1. +

      +

      +ja

      +

      the column indices of the coefficients to be inserted.
      Scope:local.
      Type:required.
      Intent: in.
      Specified as: an integer array of size nz. +

      +

      +val

      +

      the coefficients to be inserted.
      Scope:local.
      Type:required.
      Intent: in.
      Specified as: an array of size nz. Must be of the same type and kind of + the coefficients of the sparse matrix a. +

      +

      +desc_a

      +

      The communication descriptor.
      Scope: local.
      Type: required.
      Intent: inout.
      Specified as: a variable of type psb_desc_type.
      + + + +

      +

      +local

      +

      Whether the entries in the indices vectors ia, ja are already in local + numbering.
      Scope:local.
      Type:optional.
      Specified as: a logical value; default: .false.. +

      +

      +

      +

      +On Return

      +

      +

      +

      +a

      +

      the matrix into which coefficients will be inserted.
      Scope:local
      Type:required
      Intent: inout.
      Specified as: a structured data of type psb_Tspmat_type. +

      +

      +desc_a

      +

      The communication descriptor.
      Scope: local.
      Type: required.
      Intent: inout.
      Specified as: a variable of type psb_desc_type.
      +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      +

      Notes + + + +

        +
      1. +

        On entry to this routine the descriptor may be in either the build or + assembled state. +

      2. +
      3. +

        On entry to this routine the sparse matrix may be in either the build or + update state. +

      4. +
      5. +

        If the descriptor is in the build state, then the sparse matrix must also be + in the build state; the action of the routine is to (implicitly) call psb_cdins + to add entries to the sparsity pattern; each sparse matrix entry implicitly + defines a graph edge, that is passed to the descriptor routine for the + appropriate processing; +

      6. +
      7. +

        The input data can be passed in either COO or CSR formats; +

      8. +
      9. +

        In COO format the coefficients to be inserted are represented by the + ordered triples ia(i),ja(i),val(i), for i = 1,,nz; these triples are + arbitrary; +

      10. +
      11. +

        In CSR format the coefficients to be inserted for each input row i = 1,nr + are represented by the ordered triples (i + irw - 1),ja(j),val(j), for + j = irp(i),,irp(i + 1) - 1; these triples should belong to the current + process, i.e. i+irw-1 should be one of the local indices, but are otherwise + arbitrary; +

      12. +
      13. +

        There is no requirement that a given row must be passed in its entirety + to a single call to this routine: the buildup of a row may be split into as + many calls as desired (even in the CSR format); + + + +

      14. +
      15. +

        Coefficients from different rows may also be mixed up freely in a single + call, according to the application needs; +

      16. +
      17. +

        Coefficients from matrix rows not owned by the calling process are treated + according to the value of bldmode specified at allocation time; if bldmode + was chosen as psb_matbld_remote_ the library will keep track of them, + otherwise they are silently ignored; +

      18. +
      19. +

        If the descriptor is in the assembled state, then any entries in the sparse + matrix that would generate additional communication requirements are + ignored; +

      20. +
      21. +

        If the matrix is in the update state, any entries in positions that were not + present in the original matrix are ignored.

      + + + +

      6.9 psb_spasb — Sparse matrix assembly routine

      + + + +
      +call psb_spasb(a, desc_a, info [, afmt, upd,  mold])
      +
      +

      +

      +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +desc_a

      +

      the communication descriptor.
      Scope:local.
      Type:required.
      Intent: in/out.
      Specified as: a structured data of type psb_desc_type. +

      +

      +afmt

      +

      the storage format for the sparse matrix.
      Scope: local.
      Type: optional.
      Intent: in.
      Specified as: an array of characters. Defalt: ’CSR’. +

      +

      +upd

      +

      Provide for updates to the matrix coefficients.
      Scope: global.
      Type: optional.
      Intent: in.
      Specified as: integer, possible values: psb_upd_srch_, psb_upd_perm_ +

      +

      +mold

      + + + +

      The desired dynamic type for the internal matrix storage.
      Scope: local.
      Type: optional.
      Intent: in.
      Specified as: an object of a class derived from psb_T_base_sparse_mat.

      +

      +

      +

      +On Return

      +

      +

      +

      +a

      +

      the matrix to be assembled.
      Scope:local
      Type:required
      Intent: inout.
      Specified as: a structured data of type psb_Tspmat_type. +

      +

      +desc_a

      +

      the communication descriptor.
      Scope:local.
      Type:required.
      Intent: in/out.
      Specified as: a structured data of type psb_desc_type. If the matrix was + allocated with bldmode=psb_matbld_remote_, then the descriptor will be + reassembled. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      +

      Notes +

        +
      1. +

        On entry to this routine the descriptor must be in the assembled state, + i.e. psb_cdasb must already have been called. + + + +

      2. +
      3. +

        The sparse matrix may be in either the build or update state; +

      4. +
      5. +

        Duplicate entries are detected and handled in both build and update state, + with the exception of the error action that is only taken in the build state, + i.e. on the first assembly; +

      6. +
      7. +

        If the update choice is psb_upd_perm_, then subsequent calls to psb_spins + to update the matrix must be arranged in such a way as to produce exactly + the same sequence of coefficient values as encountered at the first assembly; +

      8. +
      9. +

        The output storage format need not be the same on all processes; +

      10. +
      11. +

        On exit from this routine the matrix is in the assembled state, and thus + is suitable for the computational routines; +

      12. +
      13. +

        If the bldmode=psb_matbld_remote_ value was specified at allocation + time, contributions defined on the current process but belonging to a + remote process will be handled accordingly. This is most likely to occur in + finite element applications, with dupl=psb_dupl_add_; it is necessary to + check for possible updates needed in the descriptor, hence there will be a + runtime overhead.

      + + + +

      6.10 psb_spfree — Frees a sparse matrix

      + + + +
      +call psb_spfree(a, desc_a, info)
      +
      +

      +

      +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +a

      +

      the matrix to be freed.
      Scope:local
      Type:required
      Intent: inout.
      Specified as: a structured data of type psb_Tspmat_type. +

      +

      +desc_a

      +

      the communication descriptor.
      Scope:local.
      Type:required.
      Intent: in.
      Specified as: a structured data of type psb_desc_type.

      +

      +

      +

      +On Return

      +

      +

      +

      +info

      + + + +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      + + + +

      6.11 psb_sprn — Reinit sparse matrix structure for psblas routines.

      + + + +
      +call psb_sprn(a, decsc_a, info, clear)
      +
      +

      +

      +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +a

      +

      the matrix to be reinitialized.
      Scope:local
      Type:required
      Intent: inout.
      Specified as: a structured data of type psb_Tspmat_type. +

      +

      +desc_a

      +

      the communication descriptor.
      Scope:local.
      Type:required.
      Intent: in.
      Specified as: a structured data of type psb_desc_type. +

      +

      +clear

      +

      Choose whether to zero out matrix coefficients
      Scope:local.
      Type:optional.
      Intent: in.
      Default: true.

      +

      + + + +

      +

      +On Return

      +

      +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      +

      Notes +

        +
      1. +

        On exit from this routine the sparse matrix is in the update state.

      + + + +

      6.12 psb_geall — Allocates a dense matrix

      + + + +
      +call psb_geall(x, desc_a, info[, dupl, bldmode, n, lb])
      +
      +

      +

      +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +desc_a

      +

      The communication descriptor.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a variable of type psb_desc_type.
      +

      +

      +n

      +

      The number of columns of the dense matrix to be allocated.
      Scope: local
      Type: optional
      Intent: in.
      Specified as: Integer scalar, default 1. It is not a valid argument if x is a + rank-1 array. +

      +

      +lb

      +

      The lower bound for the column index range of the dense matrix to be + allocated.
      Scope: local
      Type: optional
      Intent: in.
      Specified as: Integer scalar, default 1. It is not a valid argument if x is a + rank-1 array. + + + +

      +

      +dupl

      +

      How to handle duplicate coefficients.
      Scope: global.
      Type: optional.
      Intent: in.
      Specified as: integer, possible values: psb_dupl_ovwrt_, psb_dupl_add_; + psb_dupl_err_ has no effect. +

      +

      +bldmode

      +

      Whether to keep track of matrix entries that do not belong to the current + process.
      Scope: global.
      Type: optional.
      Intent: in.
      Specified as: + an integer value psb_matbld_noremote_, psb_matbld_remote_. Default: + psb_matbld_noremote_.

      +

      +

      +

      +On Return

      +

      +

      +

      +x

      +

      The dense matrix to be allocated.
      Scope: local
      Type: required
      Intent: out.
      Specified as: a rank one or two array with the ALLOCATABLE attribute + or an object of type psb_T_vect_type, of type real, complex or integer.
      +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      + + +

      Notes +

        +
      1. +

        Using psb_matbld_remote_ is likely to cause a runtime overhead at + assembly time;

      + + +

      6.13 psb_geins — Dense matrix insertion routine

      + + +
      +call psb_geins(m, irw, val, x, desc_a, info [,local])
      +
      +

      +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +m

      +

      Number of rows in val to be inserted.
      Scope:local.
      Type:required.
      Intent: in.
      Specified as: an integer value. +

      +

      +irw

      +

      Indices of the rows to be inserted. Specifically, row i of val will be + inserted into the local row corresponding to the global row index irw(i). + Scope:local.
      Type:required.
      Intent: in.
      Specified as: an integer array. +

      +

      +val

      +

      the dense submatrix to be inserted.
      Scope:local.
      Type:required.
      Intent: in.
      Specified as: a rank 1 or 2 array. Specified as: an integer value. + + +

      +

      +desc_a

      +

      the communication descriptor.
      Scope:local.
      Type:required.
      Intent: in.
      Specified as: a structured data of type psb_desc_type. +

      +

      +local

      +

      Whether the entries in the index vector irw, are already in local + numbering.
      Scope:local.
      Type:optional.
      Specified as: a logical value; default: .false.. +

      +

      +

      +

      +On Return

      +

      +

      +

      +x

      +

      the output dense matrix.
      Scope: local
      Type: required
      Intent: inout.
      Specified as: a rank one or two array or an object of type + psb_T_vect_type, of type real, complex or integer.
      +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      +

      Notes + + +

        +
      1. +

        Dense vectors/matrices do not have an associated state; +

      2. +
      3. +

        Duplicate entries are either overwritten or added, there is no provision for + raising an error condition.

      + + +

      6.14 psb_geasb — Assembly a dense matrix

      + + +
      +call psb_geasb(x, desc_a, info, mold)
      +
      +

      +

      +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +desc_a

      +

      The communication descriptor.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a variable of type psb_desc_type.
      +

      +

      +mold

      +

      The desired dynamic type for the internal vector storage.
      Scope: local.
      Type: optional.
      Intent: in.
      Specified as: an object of a class derived from psb_T_base_vect_type; + this is only allowed when x is of type psb_T_vect_type.

      +

      +

      +

      +On Return

      +

      +

      +

      +x

      + + +

      The dense matrix to be assembled.
      Scope: local
      Type: required
      Intent: inout.
      Specified as: a rank one or two array with the ALLOCATABLE or an + object of type psb_T_vect_type, of type real, complex or integer.
      +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      +

      Notes +

        +
      1. +

        On entry to this routine the descriptor must be in the assembled state, + i.e. psb_cdasb must already have been called. +

      2. +
      3. +

        If the bldmode=psb_matbld_remote_ value was specified at allocation + time, contributions defined on the current process but belonging to a + remote process will be handled accordingly. This is most likely to occur in + finite element applications, with dupl=psb_dupl_add_.

      + + +

      6.15 psb_gefree — Frees a dense matrix

      + + +
      +call psb_gefree(x, desc_a, info)
      +
      +

      +

      +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +x

      +

      The dense matrix to be freed.
      Scope: local
      Type: required
      Intent: inout.
      Specified as: a rank one or two array with the ALLOCATABLE or an + object of type psb_T_vect_type, of type real, complex or integer.
      +

      +

      +desc_a

      +

      The communication descriptor.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a variable of type psb_desc_type.

      +

      +

      +

      +On Return

      +

      + + +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      + + +

      6.16 psb_gelp — Applies a left permutation to a dense matrix

      + + +
      +call psb_gelp(trans, iperm, x, info)
      +
      +

      +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +trans

      +

      A character that specifies whether to permute A or AT.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a single character with value ’N’ for A or ’T’ for AT.
      +

      +

      +iperm

      +

      An integer array containing permutation information.
      Scope: local
      Type: required
      Intent: in.
      Specified as: an integer one-dimensional array.
      +

      +

      +x

      +

      The dense matrix to be permuted.
      Scope: local
      Type: required
      Intent: inout.
      Specified as: a one or two dimensional array.

      +

      + + +

      +

      +On Return

      +

      +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      + + +

      6.17 psb_glob_to_loc — Global to local indices convertion

      + + +
      +call psb_glob_to_loc(x, y, desc_a, info, iact,owned)
      +call psb_glob_to_loc(x, desc_a, info, iact,owned)
      +
      +

      +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +x

      +

      An integer vector of indices to be converted.
      Scope: local
      Type: required
      Intent: in, inout.
      Specified as: a rank one integer array.
      +

      +

      +desc_a

      +

      the communication descriptor.
      Scope:local.
      Type:required.
      Intent: in.
      Specified as: a structured data of type psb_desc_type. +

      +

      +iact

      +

      specifies action to be taken in case of range errors. Scope: global
      Type: optional
      Intent: in.
      Specified as: a character variable Ignore, Warning or Abort, default Ignore. + + +

      +

      +owned

      +

      Specfies valid range of input Scope: global
      Type: optional
      Intent: in.
      If true, then only indices strictly owned by the current process are + considered valid, if false then halo indices are also accepted. Default: false.

      +

      +

      +

      +On Return

      +

      +

      +

      +x

      +

      If y is not present, then x is overwritten with the translated integer indices. + Scope: global
      Type: required
      Intent: inout.
      Specified as: a rank one integer array. +

      +

      +y

      +

      If y is present, then y is overwritten with the translated integer indices, + and x is left unchanged. Scope: global
      Type: optional
      Intent: out.
      Specified as: a rank one integer array. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      +

      Notes +

        +
      1. +

        If an input index is out of range, then the corresponding output index is + set to a negative number; + + +

      2. +
      3. +

        The default Ignore means that the negative output is the only action + taken on an out-of-range input.

      + + + +

      6.18 psb_loc_to_glob — Local to global indices conversion

      + + + +
      +call psb_loc_to_glob(x, y, desc_a, info, iact)
      +call psb_loc_to_glob(x, desc_a, info, iact)
      +
      +

      +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +x

      +

      An integer vector of indices to be converted.
      Scope: local
      Type: required
      Intent: in, inout.
      Specified as: a rank one integer array.
      +

      +

      +desc_a

      +

      the communication descriptor.
      Scope:local.
      Type:required.
      Intent: in.
      Specified as: a structured data of type psb_desc_type. +

      +

      +iact

      +

      specifies action to be taken in case of range errors. Scope: global
      Type: optional
      Intent: in.
      Specified as: a character variable Ignore, Warning or Abort, default Ignore.

      +

      + + + +

      +

      +On Return

      +

      +

      +

      +x

      +

      If y is not present, then x is overwritten with the translated integer indices. + Scope: global
      Type: required
      Intent: inout.
      Specified as: a rank one integer array. +

      +

      +y

      +

      If y is not present, then y is overwritten with the translated integer indices, + and x is left unchanged. Scope: global
      Type: optional
      Intent: out.
      Specified as: a rank one integer array. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      + + + +

      6.19 psb_is_owned —

      + + + +
      +call psb_is_owned(x, desc_a)
      +
      +

      +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +x

      +

      Integer index.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a scalar integer.
      +

      +

      +desc_a

      +

      the communication descriptor.
      Scope:local.
      Type:required.
      Intent: in.
      Specified as: a structured data of type psb_desc_type.

      +

      +

      +

      +On Return

      +

      +

      +

      +Function value

      + + +

      A logical mask which is true if x is owned by the current process Scope: + local
      Type: required
      Intent: out.

      +

      Notes +

        +
      1. +

        This routine returns a .true. value for an index that is strictly owned by + the current process, excluding the halo indices

      + + +

      6.20 psb_owned_index —

      + + +
      +call psb_owned_index(y, x, desc_a, info)
      +
      +

      +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +x

      +

      Integer indices.
      Scope: local
      Type: required
      Intent: in, inout.
      Specified as: a scalar or a rank one integer array.
      +

      +

      +desc_a

      +

      the communication descriptor.
      Scope:local.
      Type:required.
      Intent: in.
      Specified as: a structured data of type psb_desc_type. +

      +

      +iact

      +

      specifies action to be taken in case of range errors. Scope: global
      Type: optional
      Intent: in.
      Specified as: a character variable Ignore, Warning or Abort, default Ignore.

      +

      + + +

      +

      +On Return

      +

      +

      +

      +y

      +

      A logical mask which is true for all corresponding entries of x that are + owned by the current process Scope: local
      Type: required
      Intent: out.
      Specified as: a scalar or rank one logical array. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      +

      Notes +

        +
      1. +

        This routine returns a .true. value for those indices that are strictly + owned by the current process, excluding the halo indices

      + + +

      6.21 psb_is_local —

      + + +
      +call psb_is_local(x, desc_a)
      +
      +

      +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +x

      +

      Integer index.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a scalar integer.
      +

      +

      +desc_a

      +

      the communication descriptor.
      Scope:local.
      Type:required.
      Intent: in.
      Specified as: a structured data of type psb_desc_type.

      +

      +

      +

      +On Return

      +

      +

      +

      +Function value

      + + +

      A logical mask which is true if x is local to the current process Scope: + local
      Type: required
      Intent: out.

      +

      Notes +

        +
      1. +

        This routine returns a .true. value for an index that is local to the current + process, including the halo indices

      + +

      6.22 psb_local_index —

      + + + +
      +call psb_local_index(y, x, desc_a, info)
      +
      +

      +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +x

      +

      Integer indices.
      Scope: local
      Type: required
      Intent: in, inout.
      Specified as: a scalar or a rank one integer array.
      +

      +

      +desc_a

      +

      the communication descriptor.
      Scope:local.
      Type:required.
      Intent: in.
      Specified as: a structured data of type psb_desc_type. +

      +

      +iact

      +

      specifies action to be taken in case of range errors. Scope: global
      Type: optional
      Intent: in.
      Specified as: a character variable Ignore, Warning or Abort, default Ignore.

      +

      + + + +

      +

      +On Return

      +

      +

      +

      +y

      +

      A logical mask which is true for all corresponding entries of x that are + local to the current process Scope: local
      Type: required
      Intent: out.
      Specified as: a scalar or rank one logical array. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      +

      Notes +

        +
      1. +

        This routine returns a .true. value for those indices that are local to the + current process, including the halo indices.

      + + + +

      6.23 psb_get_boundary — Extract list of boundary elements

      + + + +
      +call psb_get_boundary(bndel, desc, info)
      +
      +

      +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +desc

      +

      the communication descriptor.
      Scope:local.
      Type:required.
      Intent: in.
      Specified as: a structured data of type psb_desc_type.

      +

      +

      +

      +On Return

      +

      +

      +

      +bndel

      +

      The list of boundary elements on the calling process, in local numbering.
      Scope: local
      Type: required
      Intent: out.
      Specified as: a rank one array with the ALLOCATABLE attribute, of type + integer.
      +

      +

      +info

      + + + +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      +

      Notes +

        +
      1. +

        If there are no boundary elements (i.e., if the local part of the connectivity + graph is self-contained) the output vector is set to the “not allocated” + state. +

      2. +
      3. +

        Otherwise the size of bndel will be exactly equal to the number of + boundary elements.

      + + + +

      6.24 psb_get_overlap — Extract list of overlap elements

      + + + +
      +call psb_get_overlap(ovrel, desc, info)
      +
      +

      +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +desc

      +

      the communication descriptor.
      Scope:local.
      Type:required.
      Intent: in.
      Specified as: a structured data of type psb_desc_type.

      +

      +

      +

      +On Return

      +

      +

      +

      +ovrel

      +

      The list of overlap elements on the calling process, in local numbering.
      Scope: local
      Type: required
      Intent: out.
      Specified as: a rank one array with the ALLOCATABLE attribute, of type + integer.
      +

      +

      +info

      + + + +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      +

      Notes +

        +
      1. +

        If there are no overlap elements the output vector is set to the “not + allocated” state. +

      2. +
      3. +

        Otherwise the size of ovrel will be exactly equal to the number of overlap + elements.

      + + + +

      6.25 psb_sp_getrow — Extract row(s) from a sparse matrix

      + + + +
      +call psb_sp_getrow(row, a, nz, ia, ja, val, info, &
      +              & append, nzin, lrw)
      +
      +

      +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +row

      +

      The (first) row to be extracted.
      Scope:local
      Type:required
      Intent: in.
      Specified as: an integer > 0. +

      +

      +a

      +

      the matrix from which to get rows.
      Scope:local
      Type:required
      Intent: in.
      Specified as: a structured data of type psb_Tspmat_type. +

      +

      +append

      +

      Whether to append or overwrite existing output.
      Scope:local
      Type:optional
      Intent: in.
      Specified as: a logical value default: false (overwrite). + + + +

      +

      +nzin

      +

      Input size to be appended to.
      Scope:local
      Type:optional
      Intent: in.
      Specified as: an integer > 0. When append is true, specifies how many + entries in the output vectors are already filled. +

      +

      +lrw

      +

      The last row to be extracted.
      Scope:local
      Type:optional
      Intent: in.
      Specified as: an integer > 0, default: row. +

      +

      +

      +

      +On Return

      +

      +

      +

      +nz

      +

      the number of elements returned by this call.
      Scope:local.
      Type:required.
      Intent: out.
      Returned as: an integer scalar. +

      +

      +ia

      +

      the row indices.
      Scope:local.
      Type:required.
      Intent: inout.
      Specified as: an integer array with the ALLOCATABLE attribute. +

      +

      +ja

      + + + +

      the column indices of the elements to be inserted.
      Scope:local.
      Type:required.
      Intent: inout.
      Specified as: an integer array with the ALLOCATABLE attribute. +

      +

      +val

      +

      the elements to be inserted.
      Scope:local.
      Type:required.
      Intent: inout.
      Specified as: a real array with the ALLOCATABLE attribute. +

      +

      +info

      +

      Error code.
      Scope: local
      Type: required
      Intent: out.
      An integer value; 0 means no error has been detected.

      +

      Notes +

        +
      1. +

        The output nz is always the size of the output generated by the current + call; thus, if append=.true., the total output size will be nzin + nz, with + the newly extracted coefficients stored in entries nzin+1:nzin+nz of the + array arguments; +

      2. +
      3. +

        When append=.true. the output arrays are reallocated as necessary; +

      4. +
      5. +

        The row and column indices are returned in the local numbering + scheme; if the global numbering is desired, the user may employ the + psb_loc_to_glob routine on the output.

      + + + +

      6.26 psb_sizeof — Memory occupation

      +

      This function computes the memory occupation of a PSBLAS object. + + + +

      +isz = psb_sizeof(a)
      +isz = psb_sizeof(desc_a)
      +isz = psb_sizeof(prec)
      +
      +

      +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +a

      +

      A sparse matrix A.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a structured data of type psb_Tspmat_type. +

      +

      +desc_a

      +

      Communication descriptor.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a structured data of type psb_desc_type. +

      +

      +prec

      +

      Scope: local
      Type: required
      Intent: in.
      Specified as: a preconditioner data structure psb_prec_type. + + + +

      +

      +On Return

      +

      +

      +

      +Function value

      +

      The memory occupation of the object specified in the calling sequence, in + bytes.
      Scope: local
      Returned as: an integer(psb_long_int_k_) number.

      + + + +

      6.27 Sorting utilities —

      +

      psb_msort — Sorting by the Merge-sort algorithm +

      psb_qsort — Sorting by the Quicksort algorithm +

      psb_hsort — Sorting by the Heapsort algorithm + + + +

      +call psb_msort(x,ix,dir,flag)
      +call psb_qsort(x,ix,dir,flag)
      +call psb_hsort(x,ix,dir,flag)
      +
      +

      +

      These serial routines sort a sequence X into ascending or descending order. The +argument meaning is identical for the three calls; the only difference is the algorithm +used to accomplish the task (see Usage Notes below). +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +x

      +

      The sequence to be sorted.
      Type:required.
      Specified as: an integer, real or complex array of rank 1. +

      +

      +ix

      +

      A vector of indices.
      Type:optional.
      Specified as: an integer array of (at least) the same size as X. +

      +

      +dir

      +

      The desired ordering.
      Type:optional.
      Specified as: an integer value: +

      +

      + Integer and real data:

      +

      psb_sort_up_, psb_sort_down_, psb_asort_up_, + psb_asort_down_; default psb_sort_up_. + + + +

      +

      + Complex data:

      +

      psb_lsort_up_, psb_lsort_down_, psb_asort_up_, + psb_asort_down_; default psb_lsort_up_.

      +
      +

      +flag

      +

      Whether to keep the original values in IX.
      Type:optional.
      Specified as: an integer value psb_sort_ovw_idx_ or psb_sort_keep_idx_; + default psb_sort_ovw_idx_. +

      +

      +

      +

      +On Return

      +

      +

      +

      +x

      +

      The sequence of values, in the chosen ordering.
      Type:required.
      Specified as: an integer, real or complex array of rank 1. +

      +

      +ix

      +

      A vector of indices.
      Type: Optional
      An integer array of rank 1, whose entries are moved to the same position + as the corresponding entries in x.

      +

      Notes +

        +
      1. +

        For integer or real data the sorting can be performed in the up/down + direction, on the natural or absolute values; +

      2. +
      3. +

        For complex data the sorting can be done in a lexicographic order (i.e.: + sort on the real part with ties broken according to the imaginary part) or + on the absolute values; + + + +

      4. +
      5. +

        The routines return the items in the chosen ordering; the output difference + is the handling of ties (i.e. items with an equal value) in the original input. + With the merge-sort algorithm ties are preserved in the same relative + order as they had in the original sequence, while this is not guaranteed for + quicksort or heapsort; +

      6. +
      7. +

        If flag = psb_sort_ovw_idx_ then the entries in ix(1 : n) where n is the size + of x are initialized to ix(i) i; thus, upon return from the subroutine, + for each index i we have in ix(i) the position that the item x(i) occupied + in the original data sequence; +

      8. +
      9. +

        If flag = psb_sort_keep_idx_ the routine will assume that the entries in + ix(:) have already been initialized by the user; +

      10. +
      11. +

        The three sorting algorithms have a similar O(nlog n) expected running time; + in the average case quicksort will be the fastest and merge-sort the slowest. + However note that: +

          +
        1. +

          The worst case running time for quicksort is O(n2); the algorithm + implemented here follows the well-known median-of-three heuristics, + but the worst case may still apply; +

        2. +
        3. +

          The worst case running time for merge-sort and heap-sort is + O(nlog n) as the average case; +

        4. +
        5. +

          The merge-sort algorithm is implemented to take advantage of + subsequences that may be already in the desired ordering prior to + the subroutine call; this situation is relatively common when dealing + with groups of indices of sparse matrix entries, thus merge-sort is the + preferred choice when a sorting is needed by other routines in the + library.

        + + + +
      + + + + + + + + + + +href="userhtml.html#userhtmlse9.html" >up]

      + id="tailuserhtmlse6.html"> diff --git a/docs/html/userhtmlse7.html b/docs/html/userhtmlse7.html index 16277374..6e245159 100644 --- a/docs/html/userhtmlse7.html +++ b/docs/html/userhtmlse7.html @@ -11,77 +11,2174 @@

      +href="userhtmlse4.html#tailuserhtmlse7.html">tail] [up]

      7 Parallel environment routines

      + id="x12-1050007">Parallel environment routines - +

      7.1 psb_init — Initializes PSBLAS parallel environment

      + + + +
      +call psb_init(ctxt, np, basectxt, ids)
      +
      +

      +

      This subroutine initializes the PSBLAS parallel environment, defining a virtual +parallel machine. +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +np

      +

      Number of processes in the PSBLAS virtual parallel machine.
      Scope: global.
      Type: optional.
      Intent: in.
      Specified as: an integer value.  Default: use all available processes. +

      +

      +basectxt

      +

      the initial communication context. The new context will be defined from + the processes participating in the initial one.
      Scope: global.
      Type: optional.
      Intent: in.
      Specified as: an integer value.  Default: use MPI_COMM_WORLD. +

      +

      +ids

      +

      Identities of the processes to use for the new context; the argument is + ignored when np is not specified. This allows the processes in the new + environment to be in an order different from the original one.
      Scope: global.
      Type: optional.
      Intent: in.
      Specified as: an integer array.  Default: use the indices (0np - 1).

      + + + +

      +

      +

      +On Return

      +

      +

      +

      +ctxt

      +

      the communication context identifying the virtual parallel machine, type + psb_ctxt_type. Note that this is always a duplicate of basectxt, + so that library communications are completely separated from other + communication operations.
      Scope: global.
      Type: required.
      Intent: out.
      Specified as: an integer variable.

      +

      Notes +

        +
      1. +

        A call to this routine must precede any other PSBLAS call. +

      2. +
      3. +

        It is an error to specify a value for np greater than the number of processes + available in the underlying base parallel environment.

      + + + +

      7.2 psb_info — Return information about PSBLAS parallel environment

      + + + +
      +call psb_info(ctxt, iam, np)
      +
      +

      +

      This subroutine returns information about the PSBLAS parallel environment, +defining a virtual parallel machine. +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +ctxt

      +

      the communication context identifying the virtual parallel machine.
      Scope: global.
      Type: required.
      Intent: in.
      Specified as: an integer variable.

      +

      +

      +

      +On Return

      +

      +

      +

      +iam

      +

      Identifier of current process in the PSBLAS virtual parallel machine.
      Scope: local.
      Type: required.
      Intent: out.
      Specified as: an integer value. -1 iam np - 1  +

      +

      +np

      + + + +

      Number of processes in the PSBLAS virtual parallel machine.
      Scope: global.
      Type: required.
      Intent: out.
      Specified as: an integer variable.  

      +

      Notes +

        +
      1. +

        For processes in the virtual parallel machine the identifier will satisfy + 0 iam np - 1; +

      2. +
      3. +

        If the user has requested on psb_init a number of processes less than + the total available in the parallel execution environment, the remaining + processes will have on return iam = -1; the only call involving ctxt that + any such process may execute is to psb_exit.

      + + + +

      7.3 psb_exit — Exit from PSBLAS parallel environment

      + + + +
      +call psb_exit(ctxt)
      +call psb_exit(ctxt,close)
      +
      +

      +

      This subroutine exits from the PSBLAS parallel virtual machine. +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +ctxt

      +

      the communication context identifying the virtual parallel machine.
      Scope: global.
      Type: required.
      Intent: in.
      Specified as: an integer variable. +

      +

      +close

      +

      Whether to close all data structures related to the virtual parallel machine, + besides those associated with ctxt.
      Scope: global.
      Type: optional.
      Intent: in.
      Specified as: a logical variable, default value: true.

      +

      Notes +

        +
      1. +

        This routine may be called even if a previous call to psb_info has returned + with iam = -1; indeed, it it is the only routine that may be called with + argument ctxt in this situation. + + + +

      2. +
      3. +

        A call to this routine with close=.true. implies a call to MPI_Finalize, + after which no parallel routine may be called. +

      4. +
      5. +

        If the user whishes to use multiple communication contexts in the + same program, or to enter and exit multiple times into the parallel + environment, this routine may be called to selectively close the contexts + with close=.false., while on the last call it should be called with + close=.true. to shutdown in a clean way the entire parallel environment.

      + + + +

      7.4 psb_get_mpi_comm — Get the MPI communicator

      + + + +
      +icomm = psb_get_mpi_comm(ctxt)
      +
      +

      +

      This function returns the MPI communicator associated with a PSBLAS +context +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +ctxt

      +

      the communication context identifying the virtual parallel machine.
      Scope: global.
      Type: required.
      Intent: in.
      Specified as: an integer variable.

      +

      +

      +

      +On Return

      +

      +

      +

      +Function value

      +

      The MPI communicator associated with the PSBLAS virtual parallel + machine.
      Scope: global.
      Type: required.
      Intent: out.

      +

      Notes The subroutine version psb_get_mpicomm is still available but is +deprecated. + + + +

      7.5 psb_get_mpi_rank — Get the MPI rank

      + + + +
      +rank = psb_get_mpi_rank(ctxt, id)
      +
      +

      +

      This function returns the MPI rank of the PSBLAS process id +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +ctxt

      +

      the communication context identifying the virtual parallel machine.
      Scope: global.
      Type: required.
      Intent: in.
      Specified as: an integer variable. +

      +

      +id

      +

      Identifier of a process in the PSBLAS virtual parallel machine.
      Scope: local.
      Type: required.
      Intent: in.
      Specified as: an integer value. 0 id np -

      +

      +

      +

      +On Return

      +

      +

      +

      +Funciton value

      + + + +

      The MPI rank associated with the PSBLAS process id.
      Scope: local.
      Type: required.
      Intent: out.

      +

      Notes The subroutine version psb_get_rank is still available but is deprecated. + + +

      7.6 psb_wtime — Wall clock timing

      + + +
      +time = psb_wtime()
      +
      +

      +

      This function returns a wall clock timer. The resolution of the timer is dependent +on the underlying parallel environment implementation. +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Exit

      +

      +

      +

      +Function value

      +

      the elapsed time in seconds.
      Returned as: a real(psb_dpk_) variable.

      + + +

      7.7 psb_barrier — Sinchronization point parallel environment

      + + +
      +call psb_barrier(ctxt)
      +
      +

      +

      This subroutine acts as an explicit synchronization point for the PSBLAS parallel +virtual machine. +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +ctxt

      +

      the communication context identifying the virtual parallel machine.
      Scope: global.
      Type: required.
      Intent: in.
      Specified as: an integer variable.

      + + +

      7.8 psb_abort — Abort a computation

      + + +
      +call psb_abort(ctxt)
      +
      +

      +

      This subroutine aborts computation on the parallel virtual machine. +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +ctxt

      +

      the communication context identifying the virtual parallel machine.
      Scope: global.
      Type: required.
      Intent: in.
      Specified as: an integer variable.

      + + +

      7.9 psb_bcast — Broadcast data

      + + +
      +call psb_bcast(ctxt, dat [, root, mode, request])
      +
      +

      +

      This subroutine implements a broadcast operation based on the underlying +communication library. +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +ctxt

      +

      the communication context identifying the virtual parallel machine.
      Scope: global.
      Type: required.
      Intent: in.
      Specified as: an integer variable. +

      +

      +dat

      +

      On the root process, the data to be broadcast.
      Scope: global.
      Type: required.
      Intent: inout.
      Specified as: an integer, real or complex variable, which may be a scalar, + or a rank 1 or 2 array, or a character or logical variable, which may be + a scalar or rank 1 array.  Type, kind, rank and size must agree on all + processes. +

      +

      +root

      +

      Root process holding data to be broadcast.
      Scope: global.
      Type: optional.
      Intent: in.
      Specified as: an integer value 0 <= root <= np - 1, default 0   + + +

      +

      +mode

      +

      Whether the call is started in non-blocking mode and completed later, or + is executed synchronously.
      Scope: global.
      Type: optional.
      Intent: in.
      Specified as: an integer value. The action to be taken is determined by + its bit fields, which can be set with bitwise OR. Basic action values are + psb_collective_start_, psb_collective_end_. Default: both fields are + selected (i.e. require synchronous completion).
      +

      +

      +request

      +

      A request variable to check for operation completion.
      Scope: local.
      Type: optional.
      Intent: inout.
      If mode does not specify synchronous completion, then this variable must + be present.

      +

      +

      +

      +On Return

      +

      +

      +

      +dat

      +

      On all processes other than root, the broadcasted data.
      Scope: global.
      Type: required.
      Intent: inout.
      Specified as: an integer, real or complex variable, which may be a scalar, + or a rank 1 or 2 array, or a character or logical scalar.  Type, kind, rank + and size must agree on all processes. +

      +

      +request

      +

      A request variable to check for operation completion.
      Scope: local.
      Type: optional.
      Intent: inout.
      If mode does not specify synchronous completion, then this variable must + be present.

      +

      Notes +

        +
      1. +

        The dat argument is both input and output, and its value may be changed + even on processes different from the final result destination. +

      2. +
      3. +

        The mode argument can be built with the bitwise IOR() operator; in the + following example, the argument is forcing immediate completion, hence the + request argument needs not be specified: +

        +

        +

        +  call psb_bcast(ctxt,dat,mode=ior(psb_collective_start_,psb_collective_end_))
        +
        +

        +
      4. +
      5. +

        When splitting the operation in two calls, the dat argument must not be + accessed between calls: +

        +

        +

        +  call psb_bcast(ctxt,dat,mode=psb_collective_start_,request=bcast_request)
        +  ....... ! Do not access dat
        +  call psb_bcast(ctxt,dat,mode=psb_collective_end_, request=bcast_request)
        +
        +

        +
      + + +

      7.10 psb_sum — Global sum

      + + +
      +call psb_sum(ctxt, dat [, root, mode, request])
      +
      +

      +

      This subroutine implements a sum reduction operation based on the underlying +communication library. +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +ctxt

      +

      the communication context identifying the virtual parallel machine.
      Scope: global.
      Type: required.
      Intent: in.
      Specified as: an integer variable. +

      +

      +dat

      +

      The local contribution to the global sum.
      Scope: global.
      Type: required.
      Intent: inout.
      Specified as: an integer, real or complex variable, which may be a scalar, or + a rank 1 or 2 array.  Type, kind, rank and size must agree on all processes. +

      +

      +root

      +

      Process to hold the final sum, or -1 to make it available on all processes.
      Scope: global.
      Type: optional.
      Intent: in.
      Specified as: an integer value -1 <= root <= np - 1, default -1.   + + +

      +

      +mode

      +

      Whether the call is started in non-blocking mode and completed later, or + is executed synchronously.
      Scope: global.
      Type: optional.
      Intent: in.
      Specified as: an integer value. The action to be taken is determined by + its bit fields, which can be set with bitwise OR. Basic action values are + psb_collective_start_, psb_collective_end_. Default: both fields are + selected (i.e. require synchronous completion).
      +

      +

      +request

      +

      A request variable to check for operation completion.
      Scope: local.
      Type: optional.
      Intent: inout.
      If mode does not specify synchronous completion, then this variable must + be present.

      +

      +

      +

      +On Return

      +

      +

      +

      +dat

      +

      On destination process(es), the result of the sum operation.
      Scope: global.
      Type: required.
      Intent: inout.
      Specified as: an integer, real or complex variable, which may be a scalar, + or a rank 1 or 2 array.
      Type, kind, rank and size must agree on all processes. +

      +

      +request

      +

      A request variable to check for operation completion.
      Scope: local.
      Type: optional.
      Intent: inout.
      If mode does not specify synchronous completion, then this variable must + be present.

      +

      Notes +

        +
      1. +

        The dat argument is both input and output, and its value may be changed + even on processes different from the final result destination. +

      2. +
      3. +

        The mode argument can be built with the bitwise IOR() operator; in the + following example, the argument is forcing immediate completion, hence the + request argument needs not be specified: +

        +

        +

        +  call psb_sum(ctxt,dat,mode=ior(psb_collective_start_,psb_collective_end_))
        +
        +

        +
      4. +
      5. +

        When splitting the operation in two calls, the dat argument must not be + accessed between calls: +

        +

        +

        +  call psb_sum(ctxt,dat,mode=psb_collective_start_,request=sum_request)
        +  ....... ! Do not access dat
        +  call psb_sum(ctxt,dat,mode=psb_collective_end_,request=sum_request)
        +
        +

        +
      + + +

      7.11 psb_max — Global maximum

      + + +
      +call psb_max(ctxt, dat [, root, mode, request])
      +
      +

      +

      This subroutine implements a maximum valuereduction operation based on the +underlying communication library. +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +ctxt

      +

      the communication context identifying the virtual parallel machine.
      Scope: global.
      Type: required.
      Intent: in.
      Specified as: an integer variable. +

      +

      +dat

      +

      The local contribution to the global maximum.
      Scope: local.
      Type: required.
      Intent: inout.
      Specified as: an integer or real variable, which may be a scalar, or a rank + 1 or 2 array.  Type, kind, rank and size must agree on all processes. +

      +

      +root

      +

      Process to hold the final maximum, or -1 to make it available on all + processes.
      Scope: global.
      Type: optional.
      Intent: in.
      Specified as: an integer value -1 <= root <= np - 1, default -1.
      + + +

      +

      +mode

      +

      Whether the call is started in non-blocking mode and completed later, or + is executed synchronously.
      Scope: global.
      Type: optional.
      Intent: in.
      Specified as: an integer value. The action to be taken is determined by + its bit fields, which can be set with bitwise OR. Basic action values are + psb_collective_start_, psb_collective_end_. Default: both fields are + selected (i.e. require synchronous completion).
      +

      +

      +request

      +

      A request variable to check for operation completion.
      Scope: local.
      Type: optional.
      Intent: inout.
      If mode does not specify synchronous completion, then this variable must + be present.

      +

      +

      +

      +On Return

      +

      +

      +

      +dat

      +

      On destination process(es), the result of the maximum operation.
      Scope: global.
      Type: required.
      Intent: in.
      Specified as: an integer or real variable, which may be a scalar, or a rank + 1 or 2 array.  Type, kind, rank and size must agree on all processes. +

      +

      +request

      +

      A request variable to check for operation completion.
      Scope: local.
      Type: optional.
      Intent: inout.
      If mode does not specify synchronous completion, then this variable must + be present.

      + +

      Notes +

        +
      1. +

        The dat argument is both input and output, and its value may be changed + even on processes different from the final result destination. +

      2. +
      3. +

        The mode argument can be built with the bitwise IOR() operator; in the + following example, the argument is forcing immediate completion, hence the + request argument needs not be specified: +

        +

        +

        +  call psb_max(ctxt,dat,mode=ior(psb_collective_start_,psb_collective_end_))
        +
        +

        +
      4. +
      5. +

        When splitting the operation in two calls, the dat argument must not be + accessed between calls: +

        +

        +

        +  call psb_max(ctxt,dat,mode=psb_collective_start_,request=max_request)
        +  ....... ! Do not access dat
        +  call psb_max(ctxt,dat,mode=psb_collective_end_,request=max_request)
        +
        +

        +
      + + + +

      7.12 psb_min — Global minimum

      + + + +
      +call psb_min(ctxt, dat [, root, mode, request])
      +
      +

      +

      This subroutine implements a minimum value reduction operation based on the +underlying communication library. +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +ctxt

      +

      the communication context identifying the virtual parallel machine.
      Scope: global.
      Type: required.
      Intent: in.
      Specified as: an integer variable. +

      +

      +dat

      +

      The local contribution to the global minimum.
      Scope: local.
      Type: required.
      Intent: inout.
      Specified as: an integer or real variable, which may be a scalar, or a rank + 1 or 2 array.  Type, kind, rank and size must agree on all processes. +

      +

      +root

      +

      Process to hold the final value, or -1 to make it available on all processes.
      Scope: global.
      Type: optional.
      Intent: in.
      Specified as: an integer value -1 <= root <= np - 1, default -1.
      + + + +

      +

      +mode

      +

      Whether the call is started in non-blocking mode and completed later, or + is executed synchronously.
      Scope: global.
      Type: optional.
      Intent: in.
      Specified as: an integer value. The action to be taken is determined by + its bit fields, which can be set with bitwise OR. Basic action values are + psb_collective_start_, psb_collective_end_. Default: both fields are + selected (i.e. require synchronous completion).
      +

      +

      +request

      +

      A request variable to check for operation completion.
      Scope: local.
      Type: optional.
      Intent: inout.
      If mode does not specify synchronous completion, then this variable must + be present.

      +

      +

      +

      +On Return

      +

      +

      +

      +dat

      +

      On destination process(es), the result of the minimum operation.
      Scope: global.
      Type: required.
      Intent: inout.
      Specified as: an integer or real variable, which may be a scalar, or a rank + 1 or 2 array.
      Type, kind, rank and size must agree on all processes. +

      +

      +request

      +

      A request variable to check for operation completion.
      Scope: local.
      Type: optional.
      Intent: inout.
      If mode does not specify synchronous completion, then this variable must + be present.

      +

      Notes +

        +
      1. +

        The dat argument is both input and output, and its value may be changed + even on processes different from the final result destination. +

      2. +
      3. +

        The mode argument can be built with the bitwise IOR() operator; in the + following example, the argument is forcing immediate completion, hence the + request argument needs not be specified: +

        +

        +

        +  call psb_min(ctxt,dat,mode=ior(psb_collective_start_,psb_collective_end_))
        +
        +

        +
      4. +
      5. +

        When splitting the operation in two calls, the dat argument must not be + accessed between calls: +

        +

        +

        +  call psb_min(ctxt,dat,mode=psb_collective_start_,request=min_request)
        +  ....... ! Do not access dat
        +  call psb_min(ctxt,dat,mode=psb_collective_end_,request=min_request)
        +
        +

        +
      + + + +

      7.13 psb_amx — Global maximum absolute value

      + + + +
      +call psb_amx(ctxt, dat [, root, mode, request])
      +
      +

      +

      This subroutine implements a maximum absolute value reduction operation based +on the underlying communication library. +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +ctxt

      +

      the communication context identifying the virtual parallel machine.
      Scope: global.
      Type: required.
      Intent: in.
      Specified as: an integer variable. +

      +

      +dat

      +

      The local contribution to the global maximum.
      Scope: local.
      Type: required.
      Intent: inout.
      Specified as: an integer, real or complex variable, which may be a scalar, or + a rank 1 or 2 array.  Type, kind, rank and size must agree on all processes. +

      +

      +root

      +

      Process to hold the final value, or -1 to make it available on all processes.
      Scope: global.
      Type: optional.
      Intent: in.
      Specified as: an integer value -1 <= root <= np - 1, default -1.
      + + + +

      +

      +mode

      +

      Whether the call is started in non-blocking mode and completed later, or + is executed synchronously.
      Scope: global.
      Type: optional.
      Intent: in.
      Specified as: an integer value. The action to be taken is determined by + its bit fields, which can be set with bitwise OR. Basic action values are + psb_collective_start_, psb_collective_end_. Default: both fields are + selected (i.e. require synchronous completion).
      +

      +

      +request

      +

      A request variable to check for operation completion.
      Scope: local.
      Type: optional.
      Intent: inout.
      If mode does not specify synchronous completion, then this variable must + be present.

      +

      +

      +

      +On Return

      +

      +

      +

      +dat

      +

      On destination process(es), the result of the maximum operation.
      Scope: global.
      Type: required.
      Intent: inout.
      Specified as: an integer, real or complex variable, which may be a scalar, or + a rank 1 or 2 array.  Type, kind, rank and size must agree on all processes. +

      +

      +request

      +

      A request variable to check for operation completion.
      Scope: local.
      Type: optional.
      Intent: inout.
      If mode does not specify synchronous completion, then this variable must + be present.

      + + + +

      Notes +

        +
      1. +

        The dat argument is both input and output, and its value may be changed + even on processes different from the final result destination. +

      2. +
      3. +

        The mode argument can be built with the bitwise IOR() operator; in the + following example, the argument is forcing immediate completion, hence the + request argument needs not be specified: +

        +

        +

        +  call psb_amx(ctxt,dat,mode=ior(psb_collective_start_,psb_collective_end_))
        +
        +

        +
      4. +
      5. +

        When splitting the operation in two calls, the dat argument must not be + accessed between calls: +

        +

        +

        +  call psb_amx(ctxt,dat,mode=psb_collective_start_,request=amx_request)
        +  ....... ! Do not access dat
        +  call psb_amx(ctxt,dat,mode=psb_collective_end_,request=amx_request)
        +
        +

        +
      + + + +

      7.14 psb_amn — Global minimum absolute value

      + + + +
      +call psb_amn(ctxt, dat [, root, mode, request])
      +
      +

      +

      This subroutine implements a minimum absolute value reduction operation based +on the underlying communication library. +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +ctxt

      +

      the communication context identifying the virtual parallel machine.
      Scope: global.
      Type: required.
      Intent: in.
      Specified as: an integer variable. +

      +

      +dat

      +

      The local contribution to the global minimum.
      Scope: local.
      Type: required.
      Intent: inout.
      Specified as: an integer, real or complex variable, which may be a scalar, or + a rank 1 or 2 array.  Type, kind, rank and size must agree on all processes. +

      +

      +root

      +

      Process to hold the final value, or -1 to make it available on all processes.
      Scope: global.
      Type: optional.
      Intent: in.
      Specified as: an integer value -1 <= root <= np - 1, default -1.
      + + + +

      +

      +mode

      +

      Whether the call is started in non-blocking mode and completed later, or + is executed synchronously.
      Scope: global.
      Type: optional.
      Intent: in.
      Specified as: an integer value. The action to be taken is determined by + its bit fields, which can be set with bitwise OR. Basic action values are + psb_collective_start_, psb_collective_end_. Default: both fields are + selected (i.e. require synchronous completion).
      +

      +

      +request

      +

      A request variable to check for operation completion.
      Scope: local.
      Type: optional.
      Intent: inout.
      If mode does not specify synchronous completion, then this variable must + be present.

      +

      +

      +

      +On Return

      +

      +

      +

      +dat

      +

      On destination process(es), the result of the minimum operation.
      Scope: global.
      Type: required.
      Intent: inout.
      Specified as: an integer, real or complex variable, which may be a scalar, + or a rank 1 or 2 array.
      Type, kind, rank and size must agree on all processes. +

      +

      +request

      +

      A request variable to check for operation completion.
      Scope: local.
      Type: optional.
      Intent: inout.
      If mode does not specify synchronous completion, then this variable must + be present.

      +

      Notes +

        +
      1. +

        The dat argument is both input and output, and its value may be changed + even on processes different from the final result destination. +

      2. +
      3. +

        The mode argument can be built with the bitwise IOR() operator; in the + following example, the argument is forcing immediate completion, hence the + request argument needs not be specified: +

        +

        +

        +  call psb_amn(ctxt,dat,mode=ior(psb_collective_start_,psb_collective_end_))
        +
        +

        +
      4. +
      5. +

        When splitting the operation in two calls, the dat argument must not be + accessed between calls: +

        +

        +

        +  call psb_amn(ctxt,dat,mode=psb_collective_start_,request=amn_request)
        +  ....... ! Do not access dat
        +  call psb_amn(ctxt,dat,mode=psb_collective_end_,request=amn_request)
        +
        +

        +
      + + + +

      7.15 psb_nrm2 — Global 2-norm reduction

      + + + +
      +call psb_nrm2(ctxt, dat [, root, mode, request])
      +
      +

      +

      This subroutine implements a 2-norm value reduction operation based on the +underlying communication library. +

      +

      +Type:

      +

      Synchronous. +

      +

      +On Entry

      +

      +

      +

      +ctxt

      +

      the communication context identifying the virtual parallel machine.
      Scope: global.
      Type: required.
      Intent: in.
      Specified as: an integer variable. +

      +

      +dat

      +

      The local contribution to the global minimum.
      Scope: local.
      Type: required.
      Intent: inout.
      Specified as: a real variable, which may be a scalar, or a rank 1 array. +  Kind, rank and size must agree on all processes. +

      +

      +root

      +

      Process to hold the final value, or -1 to make it available on all processes.
      Scope: global.
      Type: optional.
      Intent: in.
      Specified as: an integer value -1 <= root <= np - 1, default -1.
      + + + +

      +

      +mode

      +

      Whether the call is started in non-blocking mode and completed later, or + is executed synchronously.
      Scope: global.
      Type: optional.
      Intent: in.
      Specified as: an integer value. The action to be taken is determined by + its bit fields, which can be set with bitwise OR. Basic action values are + psb_collective_start_, psb_collective_end_. Default: both fields are + selected (i.e. require synchronous completion).
      +

      +

      +request

      +

      A request variable to check for operation completion.
      Scope: local.
      Type: optional.
      Intent: inout.
      If mode does not specify synchronous completion, then this variable must + be present.

      +

      +

      +

      +On Return

      +

      +

      +

      +dat

      +

      On destination process(es), the result of the 2-norm reduction.
      Scope: global.
      Type: required.
      Intent: inout.
      Specified as: a real variable, which may be a scalar, or a rank 1 array.
      Kind, rank and size must agree on all processes. +

      +

      +request

      +

      A request variable to check for operation completion.
      Scope: local.
      Type: optional.
      Intent: inout.
      If mode does not specify synchronous completion, then this variable must + be present.

      + + + +

      Notes +

        +
      1. +

        This reduction is appropriate to compute the results of multiple (local) + NRM2 operations at the same time. +

      2. +
      3. +

        Denoting by dati the value of the variable dat on process i, the output res + is equivalent to the computation of +

        +      ∘ ∑------
+res =      dat2i,
+         i
+
        +

        with care taken to avoid unnecessary overflow. +

      4. +
      5. +

        The dat argument is both input and output, and its value may be changed + even on processes different from the final result destination. +

      6. +
      7. +

        The mode argument can be built with the bitwise IOR() operator; in the + following example, the argument is forcing immediate completion, hence the + request argument needs not be specified: +

        +

        +

        +  call psb_nrm2(ctxt,dat,mode=ior(psb_collective_start_,psb_collective_end_))
        +
        +

        +
      8. +
      9. +

        When splitting the operation in two calls, the dat argument must not be + accessed between calls: + + + +

        +

        +

        +  call psb_nrm2(ctxt,dat,mode=psb_collective_start_,request=nrm2_request)
        +  ....... ! Do not access dat
        +  call psb_nrm2(ctxt,dat,mode=psb_collective_end_,request=nrm2_request)
        +
        +

        +
      + + + +

      7.16 psb_snd — Send data

      + + + +
      +call psb_snd(ctxt, dat, dst, m)
      +
      +

      +

      This subroutine sends a packet of data to a destination. +

      +

      +Type:

      +

      Synchronous: see usage notes. +

      +

      +On Entry

      +

      +

      +

      +ctxt

      +

      the communication context identifying the virtual parallel machine.
      Scope: global.
      Type: required.
      Intent: in.
      Specified as: an integer variable. +

      +

      +dat

      +

      The data to be sent.
      Scope: local.
      Type: required.
      Intent: in.
      Specified as: an integer, real or complex variable, which may be a scalar, + or a rank 1 or 2 array, or a character or logical scalar.  Type, kind and + rank must agree on sender and receiver process; if m is not specified, size + must agree as well. +

      +

      +dst

      +

      Destination process.
      Scope: global.
      Type: required.
      Intent: in.
      Specified as: an integer value 0 <= dst <= np - 1.
      + + + +

      +

      +m

      +

      Number of rows.
      Scope: global.
      Type: Optional.
      Intent: in.
      Specified as: an integer value 0 <= m <= size(dat,1).
      When dat is a rank 2 array, specifies the number of rows to be sent + independently of the leading dimension size(dat,1); must have the same + value on sending and receiving processes.

      +

      +

      +

      +On Return

      +

      +

      Notes +

        +
      1. +

        This subroutine implies a synchronization, but only between the calling + process and the destination process dst.

      + + + +

      7.17 psb_rcv — Receive data

      + + + +
      +call psb_rcv(ctxt, dat, src, m)
      +
      +

      +

      This subroutine receives a packet of data to a destination. +

      +

      +Type:

      +

      Synchronous: see usage notes. +

      +

      +On Entry

      +

      +

      +

      +ctxt

      +

      the communication context identifying the virtual parallel machine.
      Scope: global.
      Type: required.
      Intent: in.
      Specified as: an integer variable. +

      +

      +src

      +

      Source process.
      Scope: global.
      Type: required.
      Intent: in.
      Specified as: an integer value 0 <= src <= np - 1.
      +

      +

      +m

      +

      Number of rows.
      Scope: global.
      Type: Optional.
      Intent: in.
      Specified as: an integer value 0 <= m <= size(dat,1).
      When dat is a rank 2 array, specifies the number of rows to be sent + independently of the leading dimension size(dat,1); must have the same + value on sending and receiving processes.

      + + + +

      +

      +

      +On Return

      +

      +

      +

      +dat

      +

      The data to be received.
      Scope: local.
      Type: required.
      Intent: inout.
      Specified as: an integer, real or complex variable, which may be a scalar, + or a rank 1 or 2 array, or a character or logical scalar.  Type, kind and + rank must agree on sender and receiver process; if m is not specified, size + must agree as well.

      +

      Notes +

        +
      1. +

        This subroutine implies a synchronization, but only between the calling + process and the source process src.

      + + + + +href="userhtml.html#userhtmlse10.html" >up]

      + id="tailuserhtmlse7.html"> diff --git a/docs/html/userhtmlse8.html b/docs/html/userhtmlse8.html index d4260b27..2e6b2ff0 100644 --- a/docs/html/userhtmlse8.html +++ b/docs/html/userhtmlse8.html @@ -11,13 +11,13 @@

      +href="userhtmlse5.html#tailuserhtmlse8.html">tail] [up]

      8 Error handling

      + id="x13-1230008">Error handling

      The PSBLAS library error handling policy has been completely rewritten in version 2.0. The idea behind the design of this new error handling strategy is to keep error messages on a stack allowing the user to trace back up to the point where the first @@ -36,7 +36,7 @@ zero, an error condition is raised. This process continues on all the levels of nested calls until the level where the user decides to abort the program execution.

      Figure 5 shows the layout of a generic 5 shows the layout of a generic psb_foo routine with respect to the PSBLAS-2.0 error handling policy. It is possible to see how, whenever an error condition is detected, the



      @@ -67,270 +67,241 @@ explicitly. >

      -
      subroutinesubroutine psb_foo(some args, info) + psb_foo(some args, ... + info) 
        if(error detected) then +  ... 
         if(error info=errcode1 + detected) then 
          call psb_errpush(psb_foo, errcode1) +   info=errcode1 
         goto 9999 +   end if +  call ... + psb_errpush(psb_foo, errcode1) 
       call psb_bar(some args, info) +   if(info .ne. zero) then +    goto 9999 
       info=errcode2 +   end if 
        call psb_errpush(psb_foo, errcode2) +  ... 
         call goto 9999 + psb_bar(some args, end if + info) 
        ... +9999 continue +  if(info if (err_act .eq. act_abort) then + .ne. zero) then 
        call psb_error(icontxt) +     return + info=errcode2 
        else +     return + call psb_errpush(psb_foo, errcode2) 
            goto 9999 
         end if 
         ... 
      9999 continue 
         if (err_act .eq. act_abort) then 
           call psb_error(icontxt) 
           return 
         else 
           return 
         end if 
       
      end subroutine psb_foo -
      +class="cmtt-9"> end if + +end subroutine psb_foo

      Listing 5: The layout of a generic psb_foo routine with respect to PSBLAS-2.0 -error handling policy.
      +error handling policy.

      Figure 6 reports a sample error message generated by the PSBLAS-2.0 +href="#x13-123026r6">6 reports a sample error message generated by the PSBLAS-2.0 library. This error has been generated by the fact that the user has chosen the invalid “FOO” storage format to represent the sparse matrix. From this error message it is possible to see that the error has been detected inside @@ -342,7 +313,7 @@ process).



      @@ -371,7 +342,7 @@ Aborting...
      Listing 6: A sample PSBLAS-3.0 error message. Process 0 detected an error -condition inside the psb_cest subroutine
      +condition inside the psb_cest subroutine
      @@ -379,28 +350,236 @@ condition inside the psb_cest subroutine
      +
      call psb_errpush(err_c, r_name, i_err, a_err)
      + +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +err_c

      +

      the error code
      Scope: local
      Type: required
      Intent: in.
      Specified as: an integer. +

      +

      +r_name

      +

      the soutine where the error has been caught.
      Scope: local
      Type: required
      Intent: in.
      Specified as: a string.
      +

      +

      +i_err

      +

      addional info for error code
      Scope: local
      Type: optional
      Specified as: an integer array
      +

      +

      +a_err

      + + +

      addional info for error code
      Scope: local
      Type: optional
      Specified as: a string.

      + + + +

      8.2 psb_error — Prints the error stack content and aborts execution

      + +
      call psb_error(icontxt)
      + +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +icontxt

      +

      the communication context.
      Scope: global
      Type: optional
      Intent: in.
      Specified as: an integer.

      + + + +

      8.3 psb_set_errverbosity — Sets the verbosity of error messages

      + +
      call psb_set_errverbosity(v)
      + +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +v

      +

      the verbosity level
      Scope: global
      Type: required
      Intent: in.
      Specified as: an integer.

      + + + +

      8.4 psb_set_erraction — Set the type of action to be taken upon error +condition

      + +
      call psb_set_erraction(err_act)
      + +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +err_act

      +

      the type of action.
      Scope: global
      Type: required
      Intent: in.
      Specified as: an integer. Possible values: psb_act_ret, psb_act_abort.

      + + + + + + + +href="userhtml.html#userhtmlse11.html" >up]

      + id="tailuserhtmlse8.html"> diff --git a/docs/html/userhtmlse9.html b/docs/html/userhtmlse9.html index 878297b4..f0fa9516 100644 --- a/docs/html/userhtmlse9.html +++ b/docs/html/userhtmlse9.html @@ -11,47 +11,728 @@

      +href="userhtmlse6.html#tailuserhtmlse9.html">tail] [up]

      9 Utilities

      + id="x14-1280009">Utilities

      We have some utilities available for input and output of sparse matrices; the interfaces to these routines are available in the module psb_util_mod. -

      +

      9.1 hb_read — Read a sparse matrix from a file in the Harwell–Boeing +format

      + +
      call hb_read(a, iret, iunit, filename, b, mtitle)
      + +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +filename

      +

      The name of the file to be read.
      Type:optional.
      Specified as: a character variable containing a valid file name, or -, in + which case the default input unit 5 (i.e. standard input in Unix jargon) is + used. Default: -. +

      +

      +iunit

      +

      The Fortran file unit number.
      Type:optional.
      Specified as: an integer value. Only meaningful if filename is not -.

      +

      +

      +

      +On Return

      +

      +

      +

      +a

      +

      the sparse matrix read from file.
      Type:required.
      Specified as: a structured data of type psb_Tspmat_type. + + + +

      +

      +b

      +

      Rigth hand side(s).
      Type: Optional
      An array of type real or complex, rank 2 and having the ALLOCATABLE + attribute; will be allocated and filled in if the input file contains a right + hand side, otherwise will be left in the UNALLOCATED state. +

      +

      +mtitle

      +

      Matrix title.
      Type: Optional
      A charachter variable of length 72 holding a copy of the matrix title as + specified by the Harwell-Boeing format and contained in the input file. +

      +

      +iret

      +

      Error code.
      Type: required
      An integer value; 0 means no error has been detected.

      + + + +

      9.2 hb_write — Write a sparse matrix to a file in the Harwell–Boeing +format

      + +
      call hb_write(a, iret, iunit, filename, key, rhs, mtitle)
      + +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +a

      +

      the sparse matrix to be written.
      Type:required.
      Specified as: a structured data of type psb_Tspmat_type. +

      +

      +b

      +

      Rigth hand side.
      Type: Optional
      An array of type real or complex, rank 1 and having the ALLOCATABLE + attribute; will be allocated and filled in if the input file contains a right + hand side. +

      +

      +filename

      +

      The name of the file to be written to.
      Type:optional.
      Specified as: a character variable containing a valid file name, or -, in + which case the default output unit 6 (i.e. standard output in Unix jargon) + is used. Default: -. +

      +

      +iunit

      +

      The Fortran file unit number.
      Type:optional.
      Specified as: an integer value. Only meaningful if filename is not -. + + + +

      +

      +key

      +

      Matrix key.
      Type: Optional
      A charachter variable of length 8 holding the matrix key as specified by + the Harwell-Boeing format and to be written to file. +

      +

      +mtitle

      +

      Matrix title.
      Type: Optional
      A charachter variable of length 72 holding the matrix title as specified by + the Harwell-Boeing format and to be written to file.

      +

      +

      +

      +On Return

      +

      +

      +

      +iret

      +

      Error code.
      Type: required
      An integer value; 0 means no error has been detected.

      + + + +

      9.3 mm_mat_read — Read a sparse matrix from a file in the MatrixMarket +format

      + +
      call mm_mat_read(a, iret, iunit, filename)
      + +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +filename

      +

      The name of the file to be read.
      Type:optional.
      Specified as: a character variable containing a valid file name, or -, in + which case the default input unit 5 (i.e. standard input in Unix jargon) is + used. Default: -. +

      +

      +iunit

      +

      The Fortran file unit number.
      Type:optional.
      Specified as: an integer value. Only meaningful if filename is not -.

      +

      +

      +

      +On Return

      +

      +

      +

      +a

      +

      the sparse matrix read from file.
      Type:required.
      Specified as: a structured data of type psb_Tspmat_type. + + +

      +

      +iret

      +

      Error code.
      Type: required
      An integer value; 0 means no error has been detected.

      + + +

      9.4 mm_array_read — Read a dense array from a file in the MatrixMarket +format

      + +
      call mm_array_read(b, iret, iunit, filename)
      + +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +filename

      +

      The name of the file to be read.
      Type:optional.
      Specified as: a character variable containing a valid file name, or -, in + which case the default input unit 5 (i.e. standard input in Unix jargon) is + used. Default: -. +

      +

      +iunit

      +

      The Fortran file unit number.
      Type:optional.
      Specified as: an integer value. Only meaningful if filename is not -.

      +

      +

      +

      +On Return

      +

      +

      +

      +b

      +

      Rigth hand side(s).
      Type: required
      An array of type real or complex, rank 1 or 2 and having the + + + ALLOCATABLE attribute, or an object of type psb_T_vect_type, of + type real or complex.
      Will be allocated and filled in if the input file contains a right hand side, + otherwise will be left in the UNALLOCATED state.
      +

      +

      +iret

      +

      Error code.
      Type: required
      An integer value; 0 means no error has been detected.

      + + +

      9.5 mm_mat_write — Write a sparse matrix to a file in the MatrixMarket +format

      + +
      call mm_mat_write(a, mtitle, iret, iunit, filename)
      + +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +a

      +

      the sparse matrix to be written.
      Type:required.
      Specified as: a structured data of type psb_Tspmat_type. +

      +

      +mtitle

      +

      Matrix title.
      Type: required
      A charachter variable holding a descriptive title for the matrix to be + written to file. +

      +

      +filename

      +

      The name of the file to be written to.
      Type:optional.
      Specified as: a character variable containing a valid file name, or -, in + which case the default output unit 6 (i.e. standard output in Unix jargon) + is used. Default: -. +

      +

      +iunit

      +

      The Fortran file unit number.
      Type:optional.
      Specified as: an integer value. Only meaningful if filename is not -.

      + +

      +

      +

      +On Return

      +

      +

      +

      +iret

      +

      Error code.
      Type: required
      An integer value; 0 means no error has been detected.

      +

      Notes +

      If this function is called on a matrix a on a distributed communicator only the +local part is written in output. To get a single MatrixMarket file with the whole +matrix when appropriate, e.g. for debugging purposes, one could gather the whole +matrix on a single rank and then write it. Consider the following example for a +double precision matrix +

      +

      +

      +type(psb_ldspmat_type) :: aglobal
      +
      +call psb_gather(aglobal,a,desc_a,info)
      +if (iam == psb_root_) then
      +call mm_mat_write(aglobal,mtitle,info,filename)
      +end if
      +call psb_spfree(aglobal, desc_a, info)
      +
      +

      +

      To simplify this procedure in C, there is a utility function +

      +

      +

      +psb_i_t psb_c_<s,d,c,z>global_mat_write(ah,cdh);
      +
      +

      +

      that produces exactly this result. + + + +

      9.6 mm_array_write — Write a dense array from a file in the MatrixMarket +format

      + +
      call mm_array_write(b, vtitle, iret, iunit, filename)
      + +

      +

      +

      +Type:

      +

      Asynchronous. +

      +

      +On Entry

      +

      +

      +

      +b

      +

      Rigth hand side(s).
      Type: required
      An array of type real or complex, rank 1 or 2, or an object of type + psb_T_vect_type, of type real or complex; its contents will be written to + disk.
      +

      +

      +filename

      +

      The name of the file to be written.
      +

      +

      +vtitle

      +

      Matrix title.
      Type: required
      A charachter variable holding a descriptive title for the vector to be written + to file. Type:optional.
      Specified as: a character variable containing a valid file name, or -, in + which case the default input unit 5 (i.e. standard input in Unix jargon) is + used. Default: -. +

      +

      +iunit

      + + + +

      The Fortran file unit number.
      Type:optional.
      Specified as: an integer value. Only meaningful if filename is not -.

      +

      +

      +

      +On Return

      +

      +

      +

      +iret

      +

      Error code.
      Type: required
      An integer value; 0 means no error has been detected.

      +

      Notes +

      If this function is called on a vector v on a distributed communicator only the +local part is written in output. To get a single MatrixMarket file with the whole +vector when appropriate, e.g. for debugging purposes, one could gather the whole +vector on a single rank and then write it. Consider the following example for a double +precision vector +

      +

      +

      +real(psb_dpk_), allocatable :: vglobal(:)
      +
      +call psb_gather(vglobal,v,desc,info)
      +if (iam == psb_root_) then
      +call mm_array_write(vglobal,vtitle,info,filename)
      +end if
      +call deallocate(vglobal, stat=info)
      +
      +

      +

      To simplify this procedure in C, there is a utility function +

      +

      +

      +psb_i_t psb_c_<s,d,c,z>global_vec_write(vh,cdh);
      +
      +

      +

      that produces exactly this result. + + + + + + + + + +

      +href="userhtml.html#userhtmlse12.html" >up]

      + id="tailuserhtmlse9.html"> diff --git a/docs/psblas-3.9.pdf b/docs/psblas-3.9.pdf index 5e56d8b3..5140f9d2 100644 --- a/docs/psblas-3.9.pdf +++ b/docs/psblas-3.9.pdf @@ -28369,7 +28369,7 @@ endstream endobj 2053 0 obj << -/Length 8697 +/Length 8663 >> stream 0 g 0 G @@ -28604,19 +28604,19 @@ BT [-525(my_cuda_test)]TJ 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F62 9.9626 Tf -2.989 -24.267 Td [(A)-222(full)-223(example)-222(of)-223(this)-222(strategy)-222(can)-223(be)-222(seen)-223(in)-222(the)]TJ/F67 9.9626 Tf 212.576 0 Td [(test/ext/kernel)]TJ/F62 9.9626 Tf 80.671 0 Td [(and)]TJ/F67 9.9626 Tf 19.082 0 Td [(test/-)]TJ -312.329 -11.955 Td [(cuda/kernel)]TJ/F62 9.9626 Tf 61.29 0 Td [(subdir)18(ectories,)-409(wher)18(e)-377(we)-377(pr)18(ov)1(ide)-377(sample)-377(pr)18(ograms)-377(to)-377(test)-377(the)]TJ -61.29 -11.956 Td [(speed)-276(of)-275(the)-275(sparse)-276(matrix-vector)-275(pr)18(oduct)-276(with)-275(the)-276(various)-275(data)-276(str)8(uctur)18(es)-275(in-)]TJ 0 -11.955 Td [(cluded)-250(in)-250(the)-250(library)111(.)]TJ/F59 11.9552 Tf 0 -29.243 Td [(12.2)-1000(Extensions')-250(Data)-250(Structures)]TJ/F62 9.9626 Tf 0 -19.001 Td [(Access)-232(to)-233(the)-232(facilities)-232(pr)18(ovided)-233(by)-232(the)-232(EXT)-233(library)-232(is)-233(mainl)1(y)-233(achieved)-232(thr)18(ough)]TJ 0 -11.955 Td [(the)-384(data)-385(types)-384(that)-384(ar)18(e)-385(pr)18(ovi)1(ded)-385(within.)-713(The)-384(data)-384(classes)-385(ar)18(e)-384(derived)-384(fr)18(om)]TJ 0 -11.955 Td [(the)-247(base)-247(classes)-248(in)-247(PSBLAS,)-247(thr)18(ough)-247(the)-247(Fortran)-247(2003)-248(mechanism)-247(of)]TJ/F60 9.9626 Tf 299.187 0 Td [(type)-247(exten-)]TJ -299.187 -11.955 Td [(sion)]TJ/F62 9.9626 Tf 19.098 0 Td [([)]TJ +/F62 9.9626 Tf 11.955 -24.283 Td [(A)-378(full)-379(example)-378(of)-379(this)-378(strategy)-379(can)-378(be)-378(seen)-379(in)-378(the)]TJ/F67 9.9626 Tf 229.675 0 Td [(test/ext/kernel)]TJ/F62 9.9626 Tf 82.226 0 Td [(and)]TJ/F67 9.9626 Tf -326.845 -11.955 Td [(test/cuda/kernel)]TJ/F62 9.9626 Tf 86.402 0 Td [(subdir)18(ectories,)-278(wher)18(e)-273(we)-272(pr)18(ovide)-273(sample)-272(pr)18(ograms)-273(to)-273(test)]TJ -86.402 -11.955 Td [(the)-259(speed)-259(of)-259(the)-259(sparse)-259(matrix-vector)-259(pr)18(oduct)-259(with)-259(the)-259(various)-259(data)-259(str)8(uctur)18(es)]TJ 0 -11.956 Td [(included)-250(in)-250(the)-250(library)111(.)]TJ/F59 11.9552 Tf 0 -29.238 Td [(12.2)-1000(Extensions')-250(Data)-250(Structures)]TJ/F62 9.9626 Tf 0 -18.999 Td [(Access)-232(to)-233(the)-232(facilities)-232(pr)18(ovided)-233(by)-232(the)-232(EXT)-233(library)-232(is)-233(mainly)-232(achieved)-232(thr)18(ough)]TJ 0 -11.955 Td [(the)-384(data)-385(types)-384(that)-384(ar)18(e)-385(pr)18(ovi)1(ded)-385(within.)-713(The)-384(data)-384(classes)-385(ar)18(e)-384(derived)-384(fr)18(om)]TJ 0 -11.955 Td [(the)-247(base)-247(classes)-248(in)-247(PSBLAS,)-247(thr)18(ough)-247(the)-247(Fortran)-247(2003)-248(mechanism)-247(of)]TJ/F60 9.9626 Tf 299.187 0 Td [(type)-247(exten-)]TJ -299.187 -11.956 Td [(sion)]TJ/F62 9.9626 Tf 19.098 0 Td [([)]TJ 1 0 0 rg 1 0 0 RG [(17)]TJ 0 g 0 G - [(].)]TJ -4.154 -11.975 Td [(The)-255(data)-255(classes)-255(ar)18(e)-254(divided)-255(between)-255(the)-255(general)-255(purpose)-254(CPU)-255(extensions,)]TJ -14.944 -11.955 Td [(the)-232(GPU)-232(interfaces)-232(and)-232(the)-232(RSB)-232(interfaces.)-304(In)-232(the)-232(description)-232(we)-232(will)-232(make)-232(use)]TJ 0 -11.955 Td [(of)-250(the)-250(notation)-250(intr)18(oduced)-250(in)-250(T)92(able)]TJ + [(].)]TJ -4.154 -11.973 Td [(The)-255(data)-255(classes)-255(ar)18(e)-254(divided)-255(between)-255(the)-255(general)-255(purpose)-254(CPU)-255(extensions,)]TJ -14.944 -11.955 Td [(the)-232(GPU)-232(interfaces)-232(and)-232(the)-232(RSB)-232(interfaces.)-304(In)-232(the)-232(description)-232(we)-232(will)-232(make)-232(use)]TJ 0 -11.955 Td [(of)-250(the)-250(notation)-250(intr)18(oduced)-250(in)-250(T)92(able)]TJ 0 0 1 rg 0 0 1 RG [-250(21)]TJ 0 g 0 G - [(.)]TJ/F59 11.9552 Tf 0 -29.243 Td [(12.3)-1000(CPU-class)-250(extensions)]TJ/F59 9.9626 Tf 0 -19.001 Td [(ELLP)74(ACK)]TJ/F62 9.9626 Tf 0 -19.001 Td [(The)-190(ELLP)92(ACK/ITP)92(ACK)-190(format)-190(\050shown)-190(in)-190(Figur)18(e)]TJ + [(.)]TJ/F59 11.9552 Tf 0 -29.238 Td [(12.3)-1000(CPU-class)-250(extensions)]TJ/F59 9.9626 Tf 0 -19 Td [(ELLP)74(ACK)]TJ/F62 9.9626 Tf 0 -18.999 Td [(The)-190(ELLP)92(ACK/ITP)92(ACK)-190(format)-190(\050shown)-190(in)-190(Figur)18(e)]TJ 0 0 1 rg 0 0 1 RG [-190(6)]TJ 0 g 0 G - [(\051)-190(comprises)-190(two)-190(2-dimensional)]TJ 0 -11.956 Td [(arrays)]TJ/F67 9.9626 Tf 30.302 0 Td [(AS)]TJ/F62 9.9626 Tf 13.165 0 Td [(and)]TJ/F67 9.9626 Tf 19.571 0 Td [(JA)]TJ/F62 9.9626 Tf 13.166 0 Td [(with)]TJ/F67 9.9626 Tf 22.958 0 Td [(M)]TJ/F62 9.9626 Tf 7.935 0 Td [(r)18(ows)-272(and)]TJ/F67 9.9626 Tf 44.005 0 Td [(MAXNZR)]TJ/F62 9.9626 Tf 34.087 0 Td [(columns,)-277(wher)18(e)]TJ/F67 9.9626 Tf 72.949 0 Td [(MAXNZR)]TJ/F62 9.9626 Tf 34.087 0 Td [(is)-272(th)1(e)-272(maxi-)]TJ -292.225 -11.955 Td [(mum)-211(number)-211(of)-212(nonzer)18(os)-211(in)-211(any)-211(r)18(ow)-211([)]TJ/F59 9.9626 Tf 167.954 0 Td [(?)]TJ/F62 9.9626 Tf 4.424 0 Td [(].)-297(Each)-211(r)18(ow)-211(of)-212(the)-211(arrays)]TJ/F67 9.9626 Tf 108.255 0 Td [(AS)]TJ/F62 9.9626 Tf 12.564 0 Td [(and)]TJ/F67 9.9626 Tf 18.971 0 Td [(JA)]TJ/F62 9.9626 Tf 12.565 0 Td [(con-)]TJ -324.733 -11.955 Td [(tains)-218(the)-217(coef)18(\002cients)-218(and)-217(column)-218(indices;)-228(r)18(ows)-218(shorter)-217(than)]TJ/F67 9.9626 Tf 260.482 0 Td [(MAXNZR)]TJ/F62 9.9626 Tf 33.55 0 Td [(ar)18(e)-218(padded)]TJ -294.032 -11.955 Td [(with)-315(zer)18(o)-315(coef)18(\002cients)-315(and)-315(appr)18(opriate)-315(column)-315(indices,)-331(e.g.)-505(the)-315(last)-315(valid)-315(one)]TJ 0 -11.955 Td [(found)-250(in)-250(the)-250(same)-250(r)18(ow)92(.)]TJ + [(\051)-190(comprises)-190(two)-190(2-dimensional)]TJ 0 -11.956 Td [(arrays)]TJ/F67 9.9626 Tf 30.302 0 Td [(AS)]TJ/F62 9.9626 Tf 13.165 0 Td [(and)]TJ/F67 9.9626 Tf 19.571 0 Td [(JA)]TJ/F62 9.9626 Tf 13.166 0 Td [(with)]TJ/F67 9.9626 Tf 22.958 0 Td [(M)]TJ/F62 9.9626 Tf 7.935 0 Td [(r)18(ows)-272(and)]TJ/F67 9.9626 Tf 44.005 0 Td [(MAXNZR)]TJ/F62 9.9626 Tf 34.087 0 Td [(columns,)-277(wher)18(e)]TJ/F67 9.9626 Tf 72.949 0 Td [(MAXNZR)]TJ/F62 9.9626 Tf 34.087 0 Td [(is)-272(th)1(e)-272(maxi-)]TJ -292.225 -11.955 Td [(mum)-211(number)-211(of)-212(nonzer)18(os)-211(in)-211(any)-211(r)18(ow)-211([)]TJ/F59 9.9626 Tf 167.954 0 Td [(?)]TJ/F62 9.9626 Tf 4.424 0 Td [(].)-297(Each)-211(r)18(ow)-211(of)-212(the)-211(arrays)]TJ/F67 9.9626 Tf 108.255 0 Td [(AS)]TJ/F62 9.9626 Tf 12.564 0 Td [(and)]TJ/F67 9.9626 Tf 18.971 0 Td [(JA)]TJ/F62 9.9626 Tf 12.565 0 Td [(con-)]TJ -324.733 -11.955 Td [(tains)-218(the)-217(coef)18(\002cients)-218(and)-217(column)-218(indices;)-228(r)18(ows)-218(shorter)-217(than)]TJ/F67 9.9626 Tf 260.483 0 Td [(MAXNZR)]TJ/F62 9.9626 Tf 33.549 0 Td [(ar)18(e)-218(padded)]TJ -294.032 -11.955 Td [(with)-315(zer)18(o)-315(coef)18(\002cients)-315(and)-315(appr)18(opriate)-315(column)-315(indices,)-331(e.g.)-505(the)-315(last)-315(valid)-315(one)]TJ 0 -11.955 Td [(found)-250(in)-250(the)-250(same)-250(r)18(ow)92(.)]TJ 0 g 0 G 164.384 -29.888 Td [(163)]TJ 0 g 0 G @@ -30420,7 +30420,7 @@ stream /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[0 1 0] -/Rect [121.315 282.176 133.27 291.182] +/Rect [121.315 282.166 133.27 291.173] /A << /S /GoTo /D (cite.MRC:11) >> >> % 2045 0 obj @@ -30428,7 +30428,7 @@ stream /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [253.836 246.191 265.791 255.601] +/Rect [253.836 246.183 265.791 255.592] /A << /S /GoTo /D (table.21) >> >> % 2047 0 obj @@ -30445,15 +30445,15 @@ stream >> % 585 0 obj << -/D [2052 0 R /XYZ 99.895 349.256 null] +/D [2052 0 R /XYZ 99.895 349.244 null] >> % 589 0 obj << -/D [2052 0 R /XYZ 99.895 231.912 null] +/D [2052 0 R /XYZ 99.895 231.907 null] >> % 2055 0 obj << -/D [2052 0 R /XYZ 99.895 211.058 null] +/D [2052 0 R /XYZ 99.895 211.056 null] >> % 2056 0 obj << @@ -31454,7 +31454,7 @@ ET endstream endobj -2209 0 obj +2212 0 obj << /Length1 1383 /Length2 5908 @@ -31525,124 +31525,7 @@ W ò6S&ªå_!“½SÎ|esU›FÌR™y† ¢Y‹¥ýžï­§N endstream endobj -2211 0 obj -<< -/Length1 1956 -/Length2 7144 -/Length3 0 -/Length 9100 ->> -stream -%!PS-AdobeFont-1.0: CMITT10 003.002 -%%Title: CMITT10 -%Version: 003.002 -%%CreationDate: Mon Jul 13 16:17:00 2009 -%%Creator: David M. Jones -%Copyright: Copyright (c) 1997, 2009 American Mathematical Society -%Copyright: (), with Reserved Font Name CMITT10. -% This Font Software is licensed under the SIL Open Font License, Version 1.1. -% This license is in the accompanying file OFL.txt, and is also -% available with a FAQ at: http://scripts.sil.org/OFL. -%%EndComments -FontDirectory/CMITT10 known{/CMITT10 findfont dup/UniqueID known{dup -/UniqueID get 5000779 eq exch/FontType get 1 eq and}{pop false}ifelse -{save true}{false}ifelse}{false}ifelse -11 dict begin -/FontType 1 def -/FontMatrix [0.001 0 0 0.001 0 0 ]readonly def -/FontName /SFGIZH+CMITT10 def -/FontBBox {11 -233 669 696 }readonly def -/PaintType 0 def -/FontInfo 9 dict dup begin -/version (003.002) readonly def -/Notice (Copyright \050c\051 1997, 2009 American Mathematical Society \050\051, with Reserved Font Name CMITT10.) readonly def -/FullName (CMITT10) readonly def -/FamilyName (Computer Modern) readonly def -/Weight (Medium) readonly def -/ItalicAngle -14.04 def -/isFixedPitch true def -/UnderlinePosition -100 def -/UnderlineThickness 50 def -end readonly def -/Encoding 256 array -0 1 255 {1 index exch /.notdef put} for -dup 65 /A put -dup 67 /C put -dup 68 /D put -dup 69 /E put -dup 72 /H put -dup 73 /I put -dup 75 /K put -dup 76 /L put -dup 77 /M put -dup 80 /P put -dup 84 /T put -dup 86 /V put -dup 97 /a put -dup 99 /c put -dup 44 /comma put -dup 100 /d put -dup 101 /e put -dup 33 /exclam put -dup 102 /f put -dup 103 /g put -dup 104 /h put -dup 45 /hyphen put -dup 105 /i put -dup 107 /k put -dup 109 /m put -dup 110 /n put -dup 111 /o put -dup 112 /p put -dup 40 /parenleft put -dup 41 /parenright put -dup 46 /period put -dup 114 /r put -dup 115 /s put -dup 47 /slash put -dup 116 /t put -dup 119 /w put -dup 120 /x put -dup 121 /y put -readonly def -currentdict end -currentfile eexec -ÙÖoc;„j²„¼ø°Aw-åÎ3Ã6Uöÿu4 -lã.ÂNk©4¡õ8•DPËh>®[M†E7wk9B½.…H‰ì.³CÍS†oøZçèqO«£­ OèÖ)9ú‚îOHl~Q:ÔɾD0j‚‡— J¿¶Ñù'JZ ¶ì÷­½`ÕÖÄB 5Ô†G -t²ðbY©7:¾Í¿2úh½K@¾À,¥Š 'çó,¥É÷ÙL’~## ê¥gˆyëMŽ¢iMæç?à úi8ðZ¼‡ý+ÐÕ’~Ëó[\3©èMn‚Àû=r=_­VxÎgpÄÁ÷¢%vïbÒ¨ˆSr\ËÆ©ˆµÉ*ó£'"$¶Æ9iVˆ«ð¬2Ý;äxÕ¿£ü§`e'ËôóÄ«fz•+‹;¥æ€îZÑ)¢Ëÿ˜ýæÓ£ÿ°¸¡4š¸ÍêYŽ,Œ1i$ÂWšºVÁC™oì _±S*`6 »Ij$¨I‹Ý/}„½!9'n0%3SV´ÝP}vÖÂqôsF}¶OTÓŒ+¼œO*½6ž½¸’I¤é*\-ÌRÚ3ª°ÚNÙœ:gd¿;p/V;aZþÀ6è©¡Ü:£“s–wQu+MÉô…àåF€Ãœ•Õ¡n>s ¯­›”Éå*ŸÁHPëix€†Î¡\x6²¥u±»j^tüâTg'Dà¦i&kò|í5Üé.Æ+ E¦0E,¼€ýâì»TÙ™~Ìçô#N™ƒa½¡X®Ï–Ý.-ÁŒüEæJ:êcx¶éΖçxh¯¤N ýT#O½£¤ #k]ºFÖ¼‚…XNÉá±D –Aj†ùvÕÊóìfú rÆ®–/*{âŸ,Ed),É# òƒ¶¿ñvihÓÓÍøÊ@ÂŽ áçÌ`ܲ ~êy)öΚgÙ‰Š•ïÇZln÷ñÙüÖv»´ªø×é̉˭0È2L7Š‡ÏçPkg÷¦ûøcWÇïãÏ%» Nç «|]§É¼Üûgñ¶n– ÉL¢è³>DšõC¶çj}§x›t¨7$ËÀ 1€Œìµ—ÏõTõ -"ç*v’OBdÌNÓÇ#™/Ãõ_,N¿Ð f/’.˜1fuU¤oëÄÝ«ÿøcýtÜݶ.±¹ÆãaÂÿLœEE¬Ô O´¾¼8NFôm|@*7Ȫë–q—–EïIu£eXÊŠ Hm{+¹œRZ¹¾“\ Düç¤o þºŠ6Ã;‚%GÉí÷ã‡þC`•]m%4B­Hi*Gßö§Fn‡Ó²Ý/‰Á„ðÕ.'¶…–1#F\ÚŒ£SÆÈàÅÖ†Ek˜Sò*KbÒ¯ix VA‹yÍqÊÎÜøô‹·iú,ki”X—¡¹áÕWø&ô`BAâ…:[2« ôºGÖ’—LQ2”}—O6É%ù™Ú‘ÊšX;îåäñýóa~ŠS?ø雂KoÚ»6lðúTyª6D¼È±óF‡D“ðï×¥À±±W‰Ó\g?¢""Ñv™|]íJÊz`¡“%i~üH £õ‘ Ä69Ûtö @Y¦ P˜a¾÷Ъ' ¸é› $|,ú­ŒÐúÏký¿©ß’2Ç{fÉåC,ý1ŸšiJŸ•Ý¹`zÑ$ïR·I½)ôO!À¢lG{žINÇüNù~zj"(¢:ÑË.AòØoÓnãÑߦvSÃw_#a\ ÏöQ½THÁÌ5̯EUSûOjŽ¾Dò¦1ì7£Ø³BKÿ­TÖ*u¢»ôNºEh†ZÝwïÐ…Â?’rÄ81…¶ÙÔÅ:›ËðAíðJОö»Qú¨÷[»“‘Í¡`첈ã~lÙ÷&P}Ǹ­™?p™µ?{ªêÛF§.§t@…í_ÌbÃïkîÏçqµÁí‚ÓÝ ¤¥·ü 7/# àio »jihŠ¿÷'`±“7ƒ]7DÏço·™¶VÍKN4Ÿµe˜ïÍîí©œu -Ÿrp²Úáòùnó×¼‘ñÁ!7PÑ“Ðœ€šžwe¡®ŸÇÁ(ùíB -ßþ•É ³€Õg‰»¶ì­ D¹ÂÄ -þߦ¹Ñ‡çPqË2t…•Î¾Xm7š+‹tº?a’ä­å}“iÒ/Êk±5Ýiªœð:ˆS`ƒ$‰¼¢u²”5³n2ù/Ä¿K‡ô #é&}ùî•Ø 6ªÓÑf -{a$ò6šÊŸwºAòKY¤ó0×tÒ…dšïž³Ât¸¤ åIÀZ´IÍýù+ñàcÄC­Öð'àÕ0ë@-hœ¢q—m -–÷™ÅæyšóÏ3ÒP^Ù>]¥©à†A•ÔÓ{G -ÚI7®Ðè{¹ðp™ìõrÈ œÝµóÕ(órècÐ(j|/ Gç‚ì%! ›.åÍ -.I›ãö~NžKõGd}ý ]l5Ï, j]~ "¨´CçxûÊôóCHt¥ x@¶x /ÛÄ[ìo=Î~¶‡h7 /°æOawÒ÷Ü>0xd);?uš^q#+ˆuI£Ö¦| -ĽT,ïG…f¬Ku(˜‘ÊU ’êcÉ]~n8:>NA ôn›”åíc´wŠvZ÷ý¼B3]%ãö#q×c)¸Á„!ñÿ56#ý¹D!Ø¡oC¨>ÎFb£™2§y° X˜pätl‘ÿ0;µ¸4bO¿Á˜÷³NÌœÄmo OöAgn£¨]¤µumÇíiMC]GE½ŠØýoé±nÝõÞX5ÀcI+Q] -Áqe@E¥±„•ç…+­LíNv{*Hï‘!¥‚AǯÛÈ™ð·÷å?=A%;W xBÂ6JŠ&@;éì§l ƽƒ&=ȘŠ˜eÌi±™æyΧ®ðñ‹ -.ü} ì Ê-˘ñ½xÝVJÜï{npJ¬F岨ÇuÝ:õJA/‡õ%È›šý ØVဿOÄb0¸SK¿Ê”‚ëÁ_äœõñ4C©ß–J`ÔìèÐ@©`ÅÇŽƒ%* M º©^ÃA{y뿱)æ—šØÄû’ˆ³ÿ’dô«=§UÁ'oXRxº/8ôÊ–Ý~TM> žÙ¢Lêi;g}c[Í«.òàìlýv—ÿœj˜g‡)G£©66„û[þ>kxÙÄ-´áöSð´Ã¿³ë,M®/sVðiæ«êÍúŽ‹‘ÌÒj£³jü]’wɶÿ“u/¢Ûº*º,y¸©LðC÷¡|ŽQÔø±qSA†ú™pFî~_U³—ó¯ÙÅ0øˆ«|6¹;è"å–s;1Î(—V¥Ã$ŸÅ -YGr®àÄ“çÊă‹ÄoX¬r6¡0ź3jî‚wÁ…8«6*ˆ¬Ì£ÁyáVK£­¡evëG=Œú€¢>‚N£lÖðÑNÊ -…Ñæ¬Æ£ñ+ª@ðËñòÞ²¦î–g5ƒ¿:ƒ0_D¡‚’*Ñ… açõÈ·½»7¦6¨å¾M·ÿxv.Tï­RG†»&µÚßÛ!'PM7>ˆ‡z-™ˆ~‡±¿}Iì”EH}YN8‘Ùlj"ÞS7n£˜£yN»½§£R¦á_c–êá½…›äÿý„£+˜ñ¤ÅgfQÉ›ƒì½æayî³áêø ÿâô@< zß~ÕCÍÐÞ̃ùú´D:®`12Y¹åzåÿÌ—›­ÀçK€D»|‰ænd­5Ç„à?MµÇ GÄ&¿Š”ì‡û¬³[®;ʱÍÖþ G­ŽxIÅd§‡°êXhX7 ¿`⟵ÄmR¬IŒhölüöQ¹ØÝó”:CÍ–õ‰¨æ;;EÞWrU–k&´‰8Óþ.®[Ôh% fäYBÅ‹èÕ5Q —Á¡2m'c ¢F #Êׇ žeDõ»”ÂFJ0î7Dņ¦¡¸󩔪°é\g¡W}Lj̉Xÿ€ÔíÎéwFuæè§ËÂœº¼¬³ž¡T0C"N¹^c*´<À¹’wÛ3|‹ñ™B<¢ÁaYˆ:…»¢¢ÉŇ…99tÎ8IÚEäL}w½…mxPiòV§<4àÆYzänOZ×aH8x¦m¥ùºQ¬—Í¡9EëË‚sr6íàï‰öþìTü?~N{3iÄ Lh±Å.*s®Úai9J1?Ç-B[\†SOSÞ[ Ø÷h‡ ÈŸ bïug2ã£JUlÿkí›vgÇUÚû X»=ç>CxíYEk4è Ø`½UçV©ìèˆ0N sÔö–,—KU þŒ–!ä¥ú„lÞj =òÊ”}ÝwfXƒ±"uŸ'ªÆ”BºéŠsŸµ]P{­b‚×ÿð­aáܱl7r^c¥JÝY% x~Ú@¦£„ŸtýÖ åN¿¨%½õ†=œßb½Wa[´¶pT7·°29w¦¡¦"ƒ£6œ3M‚"‹¬(žMÓÊlàúY¹Äl U½&F†ÙFãJ®å«‹™‡Çj#™È6åE¼Šú¾ù¼éÒæäMkðwæÃÑ-hÕ£lµ½%s󲘮K6‚}žu‰nYA(uƒ–nrïÎ]‚Û ŒæRø™Wù?¦8^•Áv_Ö%4ïÜÄ5…]Z÷oH0ò¢gÜ?ãÛˆ®¯½Å2f!‚¯zËq³*. Ós¨¦¿f'Üö:"¹ë5aãMóÔ“6Ù·Lcùø$¸KØtÎÌÈ…¯¶•Ar’SÞ'\sW°F¨•™6 û &™µoðíß›L‹ y¶JYÿRVRnðØZïT 8žÛ7´=ù”n4P’ëärŽZ\JÌ¢t¿7ô¦v½h¸˜P¤;?'’Á\©z¬òÀ*ÚÛÆ™ õmÕ⌟)ÿ‰õìÈØVÕlÆÅ\‹…IG/r›+sã÷íZ|Ÿâªá§ººÍ5Ë®­ÜDªàCt C ˜ @'h ézHê]/mÛ)”µzýk¿ÃtàÄ[寴ÉE {êä¥NX¥cŠ¨KyÀX—ÉþG¦R |ÿÎûØA=1WÓÈ‘EJŒÉ½<¸ºnYr[ÕTƒ ‰asÒϼ=£1Šá=‹r%SíÒ¤ÔZ6T’’Wa9¾Ý KÆK­ã®0ÅýWŸ1"mƒNÂü6d]w#t>6ç¬ïÏ‘*½^œvú´[¸S÷JÈàž†÷$ -úÝ`D/Ì>¼w³­°jêEÕ*¿*ᜠXú(H“‘ö ò¦|¬ r¤=ÌŸòéƒz¸#-1Õ_1¬kmàøG\Ëeò+Æ@T‚o3©'P’îjÞ~-cÎv~û H3Z¸êW»-Z×ëzÔ"d¼mŽ+:éäšZoǹ Oì@aØ[C¹*DôÈ3* fT,í¤1´‡ˆÑñœQÐÙĤÓjôçJ àzÔ6þh¿[Ù+J£ÍJ6L|Þö"?ŽÀ°äN|¼ûieÝÉÔsï{ïÝŒÚ -A¹ehX‘ª'v2d*U1‡¶ã{®¶¬d^ -_g\e+J`Ú¼uÜ4ã¤Éî– -þšÅˆÈÁß"þr?ÿ±¡iœ„'5´DÇ*,uƒ¯Îêžö àeòH%. $åÇfØß'ÛT¡Õìx„ ÂýÉ®3aâÐ\©<ò;3&“à.8‚±Ìoé aè­?shp/Ú`ȶB¨F–{ˆ·D1U#¡¢¦†ùžË'õ@/ÒwNwú¿öæ½O{WC°Sž2ºtP=‹'B—\n’ ‹m‹…$`ˆ¼Õ≲Xÿv¸G^¹Hf0@Ǽ„8Dz”gˆðtÛ…ôÕ×-4ŽÌ}ôýGW -Ú ýó£XêYÖ¾q¥7¾D(m‘Dç¶ ³¢‰"ögFA­uÏÁÛshB…!&«s–X¤Ó’ü}¼W·üTÅÃZÈsµæc@ŽŠÙ‰úø`9’ÈîzmÚ§ÊuËÀxç“leà :»wæšýÞn§­Êa`uú‚•ÑÃÏ©n¡ëtÕvóí87”JMS”rrdI8üMïÔƒi¯6ß?é.ôÎÃ"{O-|ä‘lòÕÊð ‘vÐ~6|¬gªÇ|P.³¾SD~nLjàyDZŠãÓÛÒ®o‹äåW™$ÃQp$³äÿ,;øøCÀ꺵nÝÆ­ó|ƶ法dÓ…ëw€ÝPÂw?WL²Iqß>\h(óÉFÚìÍS*Ñ—Qs¤ä™“Á3 -ŸÚª´ ®N«×À@I€ß;ƒi±eg¤Ü|F\•ÿ‘;;MÌZk±_çä`÷½£ý:BçY–±9 »LxqyKÕN,H`#øWè$ãV=U‘DxkU.¬Þ Ò"*4è´xSR‹è¹?õ €±ü¡§PrP#‚ä½ü”Ýý¤®>³½iª$!¨E–Ö°sG&làzÒ¿g$|oüëjÚ0ÑPˆëËsÕëffÊÄXú ô_ÏÉèÏç°»ÿ§‚ñ£^ û颤Ñþa9ïMh- téŸÏÁ¸\œZéZ³çŸ ™¯ßHzÅŒ°)XHm]cÕ&Aƒsl†¾ÛY³² x=VZ]wq½¢³ Ù¾fp™n‘v%\Ü*iš;ü2ýf<¸¢ž“ŒÙªÿÿcZ}‡Z‚;“É{,X¸]“T´ûÔ]ÐÙ4~jýr²d×.1º²…ò=ºâx%LE(‘ö“C_ÇïLлF»|zyå8ÓÅ‘;èênö—F+G'jDWTÓ2-ÚÁ–ÆhZîVÛ‡ôÐ »öcAÔA­^dÃùE¹É_{0âŸvÇÈŸ)Š‡Ë7Þèyëß΂€Lhé ³á§9‘SFúsþù¨Úàf†®³_-vAYµó}Ö1ï)GMÚoÑ\‘<¬Ÿ+Â’‘Xc?«[´ªÖl&/”ÿ’øÇ_®ï¤ÐÝiËÿºâÇœÓT%˜jéü¡Ì.2‚Õô­ÌF"C"Ç#«à¥UѸ 4?“h)DÞnkcœ7@ñÊ*üãY­ÇÄÌ7sáîùÕݹõ=°žœRµ½ï»9YçBÝ…TZÞGýàkÇJý‡`íÊž`­ \þj+@{“kF]ÆÓ Oì­ç‰`èwŸqŽrÇÁ¿1-—F8©ÿA(èpóÔžèÆ@f]‹ Å»ƒ#xq"ï»Tïã›À× -*!Ñî¿/Š°¾µ§ãñ’û½š®K¹ÝBdî$¶ɉڊêý`²ÀÅiõç‘ÉçÝjÍMz³Jü7âä]Äjœ[¹‡¼JXö¶p h¬핤>ðÉÃ’mþ@¿ž4G«ëÐ׆‡õÁ®7¤ -¥ý2‚˜3>nå–“ éjVwYˆ³»2¥8Qד%ÕÊuÍh@}y‚S€._6dž‚òw…Al‚Ÿé¥Œ?Ê¡- -4ö8ɹ ˆÁktÃ’ë:öÿ×Øý]XŒû¼|jÄ9ž' 7ˆ,¶} -öR±{>þ¸gºõ7,}ù™¬‰ÃP+Þzm5ÞÇ˺ÀHõÿ‚èU?iSÂr,+ -F½Aa´´ÿ.P’gvŸþì˜Çaósl ø¯ZBh·õ´gæÇ –Wµ Û-‹BѶ”N˜°ŠÜhú`˨[ÉW÷jßí)qˆ(µÎãÓÌý÷ÍŒþð.wYÕæ‹$ÞQ¬ÿ0±€®Ñ]Šü -endstream -endobj -2213 0 obj +2214 0 obj << /Length1 1442 /Length2 6151 @@ -31720,88 +31603,7 @@ n^ r sb¢VØb)l 2ÅþSŠ„T£­H+PMzjÔîëzËiÍfí$ŽDáŒMkV©° Ã?ã\Ù¼Ä>Ž¬߈ƶa¹<¡ÈèEE£ÜL½¦D…¯6=t¯–EÒ÷µ€ðàÀ.…(Á%FoÜ~­´ô6€rý¦\l•ù;séñ'$Öµ…ïß>ž4¹ÿr$ï2¨DZý~*®\R ù·Yi$=Þ¡¥k‡å'¯Öå“™úý.m…6¯Â–„tY0’O$ð)ºQñ’ršÈUQ…»+¶ßªúÔ4(¨«,‡°Qb8¬—mÓ©qý±âÀÁÝHä°=‘œ,YŠ8i  ÿ‡½ª”SZ6Žöã½mØ@ˆ»`é7~â"L7\ã¹ßFdN#FþýHR´K´KHpb40 2ÂÚ~HÇKε֊úøX endstream endobj -2215 0 obj -<< -/Length1 1478 -/Length2 6403 -/Length3 0 -/Length 7881 ->> -stream -%!PS-AdobeFont-1.0: CMR10 003.002 -%%Title: CMR10 -%Version: 003.002 -%%CreationDate: Mon Jul 13 16:17:00 2009 -%%Creator: David M. Jones -%Copyright: Copyright (c) 1997, 2009 American Mathematical Society -%Copyright: (), with Reserved Font Name CMR10. -% This Font Software is licensed under the SIL Open Font License, Version 1.1. -% This license is in the accompanying file OFL.txt, and is also -% available with a FAQ at: http://scripts.sil.org/OFL. -%%EndComments -FontDirectory/CMR10 known{/CMR10 findfont dup/UniqueID known{dup -/UniqueID get 5000793 eq exch/FontType get 1 eq and}{pop false}ifelse -{save true}{false}ifelse}{false}ifelse -11 dict begin -/FontType 1 def -/FontMatrix [0.001 0 0 0.001 0 0 ]readonly def -/FontName /SOSTRQ+CMR10 def -/FontBBox {-40 -250 1009 750 }readonly def -/PaintType 0 def -/FontInfo 9 dict dup begin -/version (003.002) readonly def -/Notice (Copyright \050c\051 1997, 2009 American Mathematical Society \050\051, with Reserved Font Name CMR10.) readonly def -/FullName (CMR10) readonly def -/FamilyName (Computer Modern) readonly def -/Weight (Medium) readonly def -/ItalicAngle 0 def -/isFixedPitch false def -/UnderlinePosition -100 def -/UnderlineThickness 50 def -end readonly def -/Encoding 256 array -0 1 255 {1 index exch /.notdef put} for -dup 91 /bracketleft put -dup 93 /bracketright put -dup 61 /equal put -dup 40 /parenleft put -dup 41 /parenright put -dup 43 /plus put -readonly def -currentdict end -currentfile eexec -ÙÖoc;„j²„¼ø°Aw-åÎ=Ó%åW˜)-{Ùr½uú•)¯œ‚ßröA•ÉÂÜãE(õ@Úý{ë¹´‡º“Q»û|ü_‘RÑå» -ØÐÆϤëA³Å -T@æ|ýq|Vk¹¿J% qu8P¢øwÄGx³ÅªÛ̆ÖåQæ¯6K üªÒ-UŒ\§Ô%¡bÕ"t-*ðxÔõöÓŸÏÿJ‘+ -}ì3¥{Zà2ŽùÕzݬT2sÀ$ZõÌÑ&{Bè–J×{¨¥ÜÆØx¹<¥Ÿ ,—Ü-.è2›¯iR‹n·Ã±vÌÙ¾1ä • „'Æ䓱©·_r"O¯»_‹t±3keè¾fBݼöVÁfj—Ú9Ò³ÿ Ô Yh뀸Á{û´qÝÉÊÆ-÷†—ºøÉ·ÊãÁ}W§?Å?gw1*Eh[ŠÜÛ:›—ªtÍÀWeI,Úv•8üa…<{‚ñBú1â¤00Z8ÃÌî5 |ñŽ}­µðN¾àÔ×lw]ƒÿƒ6Nÿb¿—ðð¤h3E`œ†(¡›EÁ‰¡Þ'Q;²eµÔƒª/ðà­.D©yL’â5ØñÇ$¥4IGÃå׮Ɋ\‚yjù9£.îV­U»5£Q!äì‹-ÍèµÈBƒ…Ä9ödèâö««B^éoVùð­wˆBÉŽáT7„¡‰¾R€ŸÀsO™GA‹¦÷ãÕà b#íTBv!m·WT=úãÍÀ¦üw,ªID%'¥ÙMÅKé<‡|Ù]ØD¥Ã°€@‰õ·€2²½Oø( œÝŽ%œæÛ#_½É·VñþBSTø¢#zàÃ×ÒÍã!ËÑã${l­\§4ĸ ;5Œ™l‘&!×〚óß=(ŒŠþfŠ¸ÞUwA´Ú±›ÔÆeßìÈÌB#½30>ÎìH ¤©'œÞL¥+ÿÖ“žjâz@ú¸=®s\²Þõ<…«=\ÀY$ßD?ˆUê Ðæü6•ŸÒxG¡QnÇ«šŠäi¯%Ö”ž!×Ñ -L2±[žšôã¥)!Œf…X‚cè‹ã<Õ'Ȳ–ñRP3E¡!„-:±'àþÐAJ¯.ÙIJCJÎJtª&Ã{76± -#VÐÃÐÀëÕÖHÕ“ó‹/©ÊF×gw"F5”wñSˆ$|¯ÿoQ<ÎçΗ Ñ`~¶<10©|™ÿ<»émï´·ùØœÞÉhðÞ „߭逵µ°°›?É`?ó7–ë _Ì [•¾ó8xP<41Õó=tÏøHÚ ;6™¹¸}rKu1é3ûšUCiš´Ht­²ÑÓ}öyÖþ~c%\ßqï€\ººæ4O4v![›_÷Iº¢°ï³¯üÆæc‹0ß«òMJùMŸY0¬Ìv{6ß”Ö\øà rŸqÉš"3=÷”TÒYK.ÜKêÉ)Á,GÊK{aˆCZBGž½ -yÛ(ŠK'†Ã2O0ÓV—®6IP$$lûéú¬¸-“à[ÀëÔeÍÉ‹øÇKÌ×&2%g”b¯z‡÷JÌ™,uo~M¬2ÄzÞu’™ ‡Ô6*š»*¦ Å «Ñÿ9¿Î¨ß?Ä£K”c)ËÏ÷á\ðgŸt­&?©ãŽÏÄôûnTëw½“æ¼$ dN\C¸›¤ÎÚOöÖÏ@¥W2xè8q!LdÒ®ãoë~ä1V/ŒÈB=@±ôúí!0’ÁY¡ozù$íùÐ÷Ç€%Ä­¿ÑÉwáÞá2]oªÃß ®_ý?IhËuè¡BmîF:µ1Ø–d©Èy6÷‚ÀÚÞ F ¨„Ln5Sòw&©-ëÓ°¯žGzø¨9ê=È'’ä(Ìý¼IühNú´Ö{äO ˜1LvcÕmiµ¦À&È"MìêmÄ@·WÕ†­¤­mÜb%0Ù˜*ú`šëÄ¡“ß“/fõÃl½%PE2¡:VA)SBL/æ<ªî5ªCžA£ß÷l;®{ËÀ (#ºØXnF’"‹˜„8{RØo’ÁÄT(ä_WðbAþK|oM _ÖÓ—Wßþ‘½ yäMèÿ6€« ±^½÷atž@O1ë¡/Fñ”¶3y#™ãcMM×ÐGÅZýÝ[m¬ÿºzÏŒF lªù«"ÂJ>®eØVBWB}çV)ÜSÈÛi,¡ïÞ§ïR¸¥½»FǪ½P.è½LÑ€xø¦_[ÎXVˆÇóôQ ¯ TPÿõvƒ ù -eÊ,’pþÚƒrh“ú¸“uñ„Œ`+l{÷eQ÷Ö_@f63HßPavÑzUÍ_°ŸÍÆZ«1*ªäǯM+Çbp€†;{ÝªÌ ­Q3èVV;Ø¡¼ÍˆÓ ®O.Ožài,¼88«ÖƒìÆ…gŸ=\=à@"FàúKFøT¡bû˜‘¾VŽl(lg®ÁÚbÒ­Ý!¸èªÕ–ÕË\áýùa4Q1°Ì~ «9âqV¥¸§Ì¥ ’s^kí¢sdŽ›nz‘Á­£ç¤:ÖLžz]Žðªr Š¥¢ªƒvND¾‹MG79$ILiþ&“AØgánl6y‡J FOµò;ð,zÉ´Ñ% -o»¯ß xÈ0˜s‚ÖÈW1FxÏîÔè—ß gþm‘µxVyõµÚÑ"]#z~—§pvòÁ †‘™öØŨ.rB9‰$n¸YC­C1úÄtªd@#÷¨8ÐkN‡Ô‹K° Ó;£ÀêÔ“lFú( ØŒNóŠ¦;àá¥dþ¬Âûí’­üß*¼Š<Ʀ¡•ãÅ„ªsÀ¦9<µG CE`…Vé¢hà½ÉÄ%Q“ÎF¥oP½Äâу$ª½"Ñ33¼÷P9×9ìZs9‡'`ã­ ìrÚWù:N”¥ìÈsÓõÇ”·@fƒ œ^WÝ…&24w©lz æ$Ô2Wï²wÝ…ÆìO9bÿ£å"O¦ŸÊÍWÃ*F]Ý/¦`yªŒQ± ˆÉ”\Q•ÂF*ªÜiTñè^N öd_£M`ƒ¤†S%T³NºK7¯+R™Öì[r¥›üÈYõÅíYqe}¼±Ø ¥‹÷ØY‡Púô -ÎFUE¢ìóþŒðÓKƒÀÕHŒ'²µ>ZQ@ÎIÙàt%£äÿ–œÀ[¨)7Ä&¾ü hÎn ±éY'ÔÛa¶TÃñ'XrÔß™/n_n•JL¡ (–@ -²ç ËК -­µN{KFµßÛð^k›bÞÂnê~í`L¯£]Ûa¤U¡¹Å{üˆvâïLßܺæ¢4“ô1˜$rmAÿîs•ÂÆóònZß*™»=©÷Êsõðo›b@{‘yÖ !CC‹{c.WZ¶ìïæ0n àbp…gsP˜d$-Ñ\ö¯ÎÔ›&L27J»Býóµs/g(ô©xQ,t>[®â8Í“GsŸ:R$9]6&ˆúmy®)iµŠvÉéjÂöW¨‹LÄRB^RBal¿ëx³_ ×Y:²5M§¯ßæîÛ»­µ¨û/j¤…àÅÙ¡!}Þ`”îëD&W€ÈªäÖQX¹$HߧçÌtI¤­¼ ÈÕ.ÙYñ@Øn«r–_ôìnÜ ¾OüãtAN-L n–‹—?_Þ^_…Ûg’µ~žÆ;`°Ðí¨KB‡Þð6eĶÁܯ‘¸ ->”èl¿Qp‘†«¢Ó)À†¶ùµ¥ò/;S' ^¹õ:ätJ±j@êx«³ƒE(0GÊ;κÛQqŽ©Ç”kT'¹ðž;ž ÷ÉDHj‡¢ãsr‚xÜ– -æ.Â>ÚÐÖ¯¯¡Ïk—®ÚlÄ;&–¯#<çÚš°òe)fÑäFz›`·Ç·?¢ãIc@SH&–<ÿ¼’îevn8F‚\ãV±ª3Åóa­A!yy¼ã:ñÓ+žc!£xð–‚bÏMþÖ‰á/f}3ß–›ë˦Çã"G©–­ç'fMsÑÔXWUý}üöµ·¸Yĵ^ €Ô×%jÍÇ!H¾ï‹OHÿÉÍŽ_ù¶‹;臲Îò‘âJ¸v¡²]Ü# -$ÙªÛ'Ch?5™<„Sý»íË°Áñ@“­ ša¬´€-™AcŠ†éloW´M(òr·§#Û`×ÐnY{jÛ`3|lv®`º'Il ýà±!å´ü¡õÓPÂÊì{”¢E¯˜ï=Îpaè^ ØhëOk`·î›ãpøú ­8uyÜ$rò–Ýó•u -f}cqÃ+5Áy~!ýÝ]ÕúøÀìU,­¾t )æi:Hò ¾O…ÄÛ(®_eOéFV¾þqÐ>¬uèàÄ-G:ˆáñÄzƒž…›HZ\%UívØÀ Ðzã^àËBy%€NT⸶3=®Xëà?¼mJ’ø-Ó3„÷iã×H@Üfσýk¸R.ë›õýv'€Çm.'ÍSãÂD»ÌÙlyü7“¢ YõÖC³­Åméµ^¶¯Gu5_G›øµÙ1¦ût/ŒÔ þàÌâð -à4‰±1­nˆ_ûD>}p+æfªg9h$ú¨Å]@ïMù`Ø^Ò¹GKö;5«¤wÇë»äIrqïA¨3fA‚ßYiu¬€ÂÝäÃò½ÑïìÒYréDõö:ÚdR¥Ý “At ë-ª,)âºaª¤ÉºhâÏ9[Ô§uÿSç9K’„ÄTK©aX$äwgÉ“¶Oƒ[âx‹qǃ£yI(–ÿ¼j5L§ªIÀcµ}|hÀ$‘zDâUÏÂw»Y+½ý¬Z:ÎïçÄϤŜ1¿òµlÙeÀÇ`uø@|¬ìÖ܈贚Æ^Ú6î/žyÑû`að¡¦È Š[¯ÎŠ`Ò¾ÌEž£ÂwÁìa ¡zæiòLa„}jÊ°çÉ@û£Šå}ÅxæïW¶™ån‡sŽ˜0n*{¯l?Z©zó‹•Ê‹>¸ç8íp-LÑ&È£ûïK`> `´¨%Â`–¢îÝ¿Ésª?ºvË»Æ^\ü‰*’ÖýQÊá¨Nbf…+Û¼¾Þ„žo" Ù§U ¼zÑógêgÍaäŽu´F Ñ™<ÃXúyݸª¬Ðú*¼Ègöÿ4?X2]Â4¨HšÍ¢›×òš^ÁÐqLeo²œ˜Uíó~ ÷¥’’Ð7>ì)Ƥó™Hnžº¨­A7"é{D†Rj)ÿ×¢Éÿ~áeàíVDJÕ|—ººX}ѧCrŠÈÆTÈAÈè fý˜£’”P};Š"Ó¼KöÓ#ΑT ·F÷ú\çý÷^Ô“ ;™ À|êR D\Ä‚ HuÔ6ÿQÅýИ'Q3Èb;x#¸Ž3óSÌ:ªjbíÛ%¸tJ‘/>Ê¡OÿîœrVÞÂx]°¹¼>)âïŸÁµ]€lù5o¾[ç¡ÐF;,N&¨'i©Š\±NQÁIòHÔÍôÐÀ?T:í@ØFïçNÿ SXÁÝ0±<¬)»hàWÖ½4f¹FD·hÛ.سT)^SÑ-…nw}ý, ˆZN¸EÐCô¯÷ Y ReµzÂÔöð3¼Ò\ -¶ØtíþþYá5`F~_›xÁƒUž|‹'X’µÇ‡„G@UÖ6I \™„çÜö>ü:ÉV_ûÉ>Õ¦9*I0}Óµ\"{Øï3UÚzÆñÉuäFœfÌÙ¤…õPߨÊ)Eˆ8‚üU¥œúw)®ðgŠËqÔ:àdj#¥¹¯ÜˆhqÇ ¥œ¼so -¹@’ -KG7ã"­|QЇæªúå Zòzs|…‚}£ÄLÕ´SŠ¥:€fªÕm¼§ÂA3ïÖðS 2Ä=‘Ö[³d·Ø“üJœ\ܳ­òcìÚ€üù|Ët¾j1ÑÆ›AçrþŠ®ÀÑ®þÌJvÂ,K| ò´…Ï•VÞœ -˜ü›Q„´[ÜÞ‹ ÑãÛéA£EKS/Œˆzýÿ˜åKcÅ -OÞ,¨4¾•e|Ó{‹G»Êø#Rvðà©KÎýyÝ{ K1èEúŸÖýVËVw¯b´æûÙç6­«ÂªÎÖ=…»ºyeçŸÀeìÜ‘üÆ÷ uEØ>M%;,ˆš1–W}w ~hW¹ «—¦˜_èFÌâÑî9QcyƼ®E¸~ôÙf®áz\©a’:a<ÿÔ2eugg°SRŸ1ØEÜ’(=F™=Ž£ƒ;âq')^1>Çu6 -GÁ0ÿÑûòÿ¦œÿ+B°¸+›æ¸žb¥qGYÀðitâLxRIPv®Õ½·hÎÄÓÔ¥ºÒÏ‹^ž½:nwåòœÕp5¿Ð>^؉R¥Þä}ð4ü¸¿Œò™áùÞaŒgül}×^™(©t7ƒaÝÔ &ó½¶)±ZS«ˆ‘ ”¾8ÿn}@f0;­b.Û£ELì›7†%·¹›3Cª,ò@ä7¹Õï[I [1sI¬îÅXÚ<Æü[›fÜŒ¹¶Ýa¯:?yºÈç3"èu/'Ø\Ödñ ÁDÓ¹Ú=»qUü êškóκùH‚â$ y§ïQ7¼plŽ*žmhS]ADäÛäøù5Îã½üÆ«ü‹79µ -endstream -endobj -2217 0 obj +2216 0 obj << /Length1 1688 /Length2 8444 @@ -31892,45 +31694,273 @@ p bŠ|Ü<dF.ÊÂÃOü¶½£æ[–ßÈÝv°ó¿ö±O¨hgÃy‡Qì®—»å¶ ‘«h›0}¦ª»½˜Üõ,ïåè:Õ(Ñ–~¨–À–’6uü‹mXóR·VÃ<‡­Ð³JVíM\Fw%T6VÍvýÑQnéϨÿ¦:Ï M¤\Ú…Uá-:φ57M}ÎÞ!ãZ‘D ês‰ ç {$Qû0Ý„në®êÓ Ê(ãN3°Þ[( ½¶?™ÓŒoÜî9T½”ÒuæÒ«6nÀu÷ðD!qáíZ”ÞU›·ÍÜT”!pÚ¹VØžïó‡/…È\ýh^‡:ñ'.AµEõ¾S‹6P*BËY‹LŸ endstream endobj -2219 0 obj +2218 0 obj << -/Length1 2589 -/Length2 18452 +/Length1 1173 +/Length2 2916 /Length3 0 -/Length 21041 +/Length 4089 >> stream -%!PS-AdobeFont-1.0: CMTT10 003.002 -%%Title: CMTT10 -%Version: 003.002 -%%CreationDate: Mon Jul 13 16:17:00 2009 -%%Creator: David M. Jones -%Copyright: Copyright (c) 1997, 2009 American Mathematical Society -%Copyright: (), with Reserved Font Name CMTT10. -% This Font Software is licensed under the SIL Open Font License, Version 1.1. -% This license is in the accompanying file OFL.txt, and is also -% available with a FAQ at: http://scripts.sil.org/OFL. -%%EndComments -FontDirectory/CMTT10 known{/CMTT10 findfont dup/UniqueID known{dup -/UniqueID get 5000832 eq exch/FontType get 1 eq and}{pop false}ifelse -{save true}{false}ifelse}{false}ifelse +%!PS-AdobeFont-1.0: PazoMath 001.003 +%%CreationDate: Fri May 17 11:17:28 2002 +%%VMusage: 120000 150000 11 dict begin +/FontInfo 14 dict dup begin +/version (001.003) readonly def +/Copyright ((c) Diego Puga, 2000, 2002.) readonly def +/Notice (Copyright (c) Diego Puga, 2000, 2002. Distributed under the GNU General Public License (http://www.gnu.org/copyleft/gpl.txt). As a special exception, permission is granted to include this font program in a PostScript or PDF file that consists of a document that contains text to be displayed or printed using this font, regardless of the conditions or license applying to the document itself.) readonly def +/FullName (Pazo Math) readonly def +/FamilyName (PazoMath) readonly def +/ItalicAngle 0 def +/isFixedPitch false def +/UnderlinePosition -100 def +/UnderlineThickness 50 def +/Weight (Regular) readonly def +end readonly def +/FontName /IKXQUG+PazoMath def +/Encoding 256 array +0 1 255 {1 index exch /.notdef put} for +dup 165 /infinity put +dup 229 /summation put +readonly def +/PaintType 0 def /FontType 1 def -/FontMatrix [0.001 0 0 0.001 0 0 ]readonly def -/FontName /QGKXNM+CMTT10 def -/FontBBox {-4 -233 537 696 }readonly def +/FontMatrix [0.00100 0 0 0.00100 0 0] readonly def +/FontBBox {-40 -283 878 946} readonly def +currentdict end +currentfile eexec +ÙÖoc;„j˜›™t°ŸÆÌD[Ï|<3322ãý¿ô9I¸fÈÂÜ"ýÇXXH`ì{¶Ú(Ìb$`á«2 GyµÀ<“»»É\ð&’ÌMê¨ÒêµÂæCté+Ë…B›®JvÀÆ·mo÷Ïš}^ß¼ éYTY½·ÞCÒ]Sü=ÚnðÂt9x¦Ð>ÌíJòêKÌ1¾‹ž'r6iÁ’Xï¯Ü'l±­é Š”6ÑŸ±Ã=ïvª1]Øöüö•mñO ÀCüQC>õ¨iÔÜOLÈU½O³*TšÊcàTÙm(™ß‘À߀ ãº1ƒí˜U +nƒAŽYXÖþUÙ1ñ2¨×èèÒrÕrúE^Oy¤"â;ôDmÒà{O0Ò {2ÆR¿BÊi³¯N´íÌMÃýÙöéufû–L´”o¬ñ¹nõÛâ>å‘Gl»_ëí1¶#<ðW”\’ŽïJºÄaâ½æ„_«òF˜bÁ†ŠÆ~˜ ”M÷PsRƒî“ÛŬK5B|«BuG‰ÓŒ8öer(ßVΫ0¶\òFÌ+£S“hdr=ÊéÊVd^ RUl^›0ŒpEÉTXÛ½‡Ç›í—ë]Æ%’ÁÒ[õ”áT8ݯœû%²Uæ~ë½25ƒì£‡µK÷±;¯Fª<´FS$ÆÕû²9dwY€-u&‡i$ªò¼(O´fvdz¦‹ hº¾Ó¹ +s;¯“Ï‘èB]¢rø„£œ* 3Ü”™ Ãæ_&+Æ ÿªñ‹œÍ4‡ôÇ9¡Ï^WJ òÐlÆ.ņä»Y¹~^à³¾¨“òXq¹=ÂË;agâ £CgïÏÃÄ _%‰Æ˜¾JéD­í>‘ÚYÎ> Ø‹’¢á½næׯ ,G#¢Ä²›‹5À.êFŒE)a"™¬²ó p‘˜ßqÖo-Ù4Ï`7µò}§Óº¤WÌt_ceuÅÐóJŒ—h>ËÌÿÍú¸µ>Wþ¾ªÎ! ôA5j¾-±v`òç-Š?R ïtl.ƒe)öµ1=Ìï–kdSã¤iõä!5Ëþ_lŒœ<Ÿ ô—²ß-^ó­ª+éÍš‘ŽôùbøގçÞ©ÈÜ–™nWÿ%yïg§õ͚™ý÷3¡N>âVpä9ÿeŠ.•¯oÀÖÛv’£ˆóý“€hÓ; ÂÍ‹ ÀfW´\¹ü^yÑ_øÜ“±š¢'Úî1︖œø]c¨*Îç–" Õ磺û©@A.Øê’æXïècǽD&Ê<¼r”—kö¢ ¾Õ|lØ\†Ù·[ne_@úÄ_TkϺ©åhô´.5G´ ô­­†ï[î‚\?… nä½;göƒà¦ŸÌ’\]%»ÐAí¥ÝVY 3ã”è×?¼ ³ßMhš•L”I•*nÁFçÕqÄ™ñu §Šeñ÷~ÈÀA‚õE Q€P+%[sÖÐVÔÓ$—G… YRœ}aâÕÉèüHງ´×÷õÃPœ  Ð1t'yˆwBî`©Ö~›× +‹å$óe1cww9üBî6$Ê—TĶ¨³;ÀZæ (4¤9g»ð6Èv¡¡f«´§æ4J Ÿ¿¨]5(§™Wð ø„šs¹Ô˜tàaȳÅ<[EŽ¬p­üT˧Uü¥!µ”JtОÄò ©ýª?,–ÎL?!w ˜×ÃçsŸðg0Môh¾BÅ1êä³Í$sÉyѼÏåÐÛç8G•º“Š(ûûbÏí>z¨uݬ£ÿ­·¾‡Do­eHÐòñz)Ž^0 +9JžÒW –©fÅ'ÈUâ€ÀIõ•c‡‡eiÒä+¡5¦¤ÄH,ârø73g0Ÿ3{#Y°«ò_‰E^n2ÖÝ¥»„ þš®Á¿xdR$;‡ž9ÎÒÌ(ìúû·ŒfôñÂ÷n*±Ï×eHýc¸«AóìI…#†y`©û÷µ%,tÅ¢\µ ì)ÿ_ãV£+µó7À\öMþ7AÙ4R‚ã"З…@ðOÉ!1í$¥¯*ϤóÐŒsp%>*¿È–, QÄíŸÉ¥H;ºEÖ×Ùº~b@Ž—näÿ¦ sשÌXZðÛ–…ì+ ”¥ì, +ƒy¡¯#ö/–!A¨§L‹9¦Ú8üøž¢¼üT»a/±äe£T´,‰ut/u¼¶ÀÌî6ü~¯® áº1ûQAÔ‚\ñSøâWä{N?2¤±â/ïóG½?ÚÆE£æ« 8¼Ó®chbK3¿qí$£þ;+^ÇB(ƒñ¬¼¨xšà¸‰k‹:wMá…XÕ,³Óƒ~õ‚Rçÿþ[&¡òh¬8AµS #L'ɧÑT‘p¤®ú#{Ûª½¹>É;+â­¼’MÞuU§”‰[¡ä˜%Vû0ºEÄ—‚Ù²ˆC9Ø…ÀS©×Èu^sIÓ2\ØþÄï’¢ÒÊÍ. +>Íj5ÃxBÄ’Úª]˸fóŠâ*Á÷‡g|Üšx*c·fr+µB!ÔÃH9Râ€2Hî4^ÁEìdöãöÄ>>ž›J1öu£¬•z¹&¿ÐMLLœ÷i¸Úo™Jè$!¸ótîFoµÜ¸§_µO>yù,ÞªôÎ+ø±#îD‰ Ùqµø¾ýWô2³û  tE"Q×IŸÕ…À§Ø.e]‚Çùl wÙú,•Ó*¡"ŒœMf]­—ÍÆàiðñ/FSóê‚’.÷ìsãç_.nÁ1 àÕ/€ßâ8,‰}̨ó ÍÊçðW˜“‹‡ HFPšsµWkn3‰áa¸9`6I¼åŒùÿ݉Wi¦>|ä =ÔÙÙ1u¬Åª4ÿ¤ÅÔ…X·’HAH +ƒ$ü-UÒÛÊ3¤³# +endstream +endobj +2220 0 obj +<< +/Length1 1188 +/Length2 2740 +/Length3 0 +/Length 3928 +>> +stream +%!PS-AdobeFont-1.0: PazoMath-Italic 001.003 +%%CreationDate: Fri May 17 11:17:28 2002 +%%VMusage: 120000 150000 +11 dict begin +/FontInfo 14 dict dup begin +/version (001.003) readonly def +/Copyright ((c) Diego Puga, 2000, 2002.) readonly def +/Notice (Copyright (c) Diego Puga, 2000, 2002. Distributed under the GNU General Public License (http://www.gnu.org/copyleft/gpl.txt). As a special exception, permission is granted to include this font program in a PostScript or PDF file that consists of a document that contains text to be displayed or printed using this font, regardless of the conditions or license applying to the document itself.) readonly def +/FullName (Pazo Math Italic) readonly def +/FamilyName (PazoMath) readonly def +/ItalicAngle -9.50 def +/isFixedPitch false def +/UnderlinePosition -100 def +/UnderlineThickness 50 def +/Weight (Regular) readonly def +end readonly def +/FontName /DUJUUF+PazoMath-Italic def +/Encoding 256 array +0 1 255 {1 index exch /.notdef put} for +dup 97 /alpha put +dup 98 /beta put +readonly def /PaintType 0 def +/FontType 1 def +/FontMatrix [0.00100 0 0 0.00100 0 0] readonly def +/FontBBox {-70 -277 902 733} readonly def +currentdict end +currentfile eexec +ÙÖoc;„j˜›™t°ŸÆÌD[Ï|<3322ãý¿ô9I¸fÈÂÜ"ýÇXXH`ì{¶Ú(Ìb$`á«2 GyµÀ<“»»É\ð&’ÌMê¨ÒêµÂæCté+Ë…B›®JvÀÆ·mo÷Ïš}^ß¼ éYTY½·ÞCÒ]Sü=ÚnðÂt9x¦Ð>ÌíJòêKÌ1¾‹ž'r6iÁ’Xï¯Ü'l±­é Š”6ÑŸ±Ã=ïvª1]Û +…Ä‘gåŠ@ìFÌ7žªàpqûÓÇíõ')øjjOh%SÂÊösq™·=ÓðŠ{ãÑ.] ˆZ§O7Ñ”î‘éEÕ„|Ûª´u±BwLà Ñ`'ª´&ÏÄ0C¶žg†)X0F?nÂY¼›«&‰Ñ8 ˹r¼]lpK¹GC°ÓpŒŒoÄŸ³g\´t/íþ–bj©ê}t SòEî š²£¾cûqR7¡VÀ“L%ìŽY'1³¼b¦êÇî ˆJ71Ù®SU3ÿ=ºg¡OŒä>b¸H”t ÈkK‚¤Ëc:ƒhíý׳A‹:FÁ.ÞxW˜(úäÿáüLÆ^œÓN1K °Ø@_»Pÿ̹>ýnéæn¹ïÂoùf³µ7-Uš EøßØÙlÇ#©+¢ÞöA5FÔ”¼kèyêî¾tçÚ:÷âR¶R¼ª2\þ¤3©Ìøí’ l ªvÕ"‘Úª> VÓûœ³ ù”íb¤…/¼¬þ™÷“&’¥íµæÝ"@Ñ4 +¹nD5¤tvÀñ_\…ž$ÎS„|Kp°ÔCOÂÈ1vF^É(âByÒ@ˆ‰`gˆFú—szVŽÖ‘‡ûŒ0‡ôÎQnÌýÙ .ßÔÒÄT=>>‡rÕYA,{ÓøÉ ÉQðƒ rçFä!Þs߸ߟ0×€ߎ5¥Ë¢Vô¼<æ4d(íênL¤Ø³[PCŠ©ì²;üŠ+qÞ¥œK V²:¾d£§&>J3)éÅ›ôQ6Põš|ÞB5 e í£ñ)\£@ˆ~7mÛknË‹ª$Ô¶úÜ–P±+‡‰™C}èòèo×*Ú(¨ã ·-ßè.4‰ÓÆ7§ŽWá?ø8"½N[.½^7/ÆŸéÌ3§þ/3±j(ĦwÿNÿË º×þ¸ wLjŽC; ìf&ŠŒcv^%€:ÆÃS,ƒ’yr8ºN@K}m>KW¿÷Kn­9×è}ê%F¢S–GhpŠx³‘„CŒå(¹€@òr XÕ#?FäÓØ0éʃº¾’#«w^øcuÖ»¢Î€…y’þ‡"¨-ÚlEƒ£#»~»Áõë¸%UŠôŠ²Í1b[ ^¨Â•1€›€@‹¹¶*°ßr«¹ÖEŒPâW‰ðS/õÆÛ?ÉûN!™õö< eâÙ* ‡ðWxaÏx"(äZ„°ÿ5Å…Á‹ÆQÎu?ÅéÊ +UªgHß{@^‘Κ2¤]Ʀyý±Î1°QÕÇ'Ç,êX§L káx2 ´ß]mÅÈŒÊ0Ó`픈âpº³:ì*:_k£Ãp +‚º“ÏŠÁ÷×%¼þ,‹•™véÈVíb]2q2嶖ćVas ¾|HzÊNê@=Ü«ˆ7ëik¬K‘\É}ŒpYöNsÏS)#â èôŠ„¡cÏì,áE¿×€ÝçZ†üí=D=0»Måaú‰‘fü~nÄľ»<ë“vž­QI¼q¿Š[ ‚]¹@²\ù‚$‹C{ª¸ÇŽžÏÝÞBŽ®0ôh”u‰9íp?{U¶ ~sU>í4ߧšïô2º¸ËB †o´b‘ ÜÎêbþObww»wªÏ>Â|ÎåGkh´6°–´„p rÎ[#õPŽ±"7à7vôÑÍEá8Ê–óõ›Á$;E£þ6×zóдR]}îâ¤#¤#ì+µëè0É"i%uCjíÀ,±|¯÷ÁšaÖSVMŒx›ËßÉ5àê‹Š ÿýp‰åÌÔ6‹ïUj(mEä*Üè~†mø™YÕdÇ¡æ—µàÁ¤w#ö`–9­Ôk?ÏÁ)€Â픥…Z©ÑÜçËW|Mü%H*'¢¤ýÝa>˜"VÑ›ÿ¸w5t¯’0QËPG“R%‘à®"ò*¸½¸“ý„B…u+Ö8®Î~®³j Xí< @œ±cêNÂnâøg¬YGй34ª¥ VÐ])¹Œ-l ÉŠ÷5þ¢•‘iþmÕ™UV#YÌ‚€¹{Û­õls»1å¶weØË(\‹OL‚õ EŸ¶É»Rþó¦Ô~–—[шP»så1\èŠzLê¦ÊòþDÿžg•SÂS[OÝu’p6æ(@1…Àâ£R›sP1í ?õ=ç¨úEìØœÆneM6N5‚†åw<3dÏq^cF‹‹òðž)ÿUÏIÂ2M õØFÅÖÆ¿äë~ãq +1йmدNIà B}ýfÈ·/ZºåMYöŽE(Y´Ö‹'á²¹¢ ú +T;U·Åì‡÷F7®<·{¬ÈÆLN-^×(¥£U'UAf0GivÄOð0P$Ý 4ÛªZ@º/³ˆ¯¤Ž.â}·L‡© ŠdlI/_Øfá°>k[عW‰¢SʪÇ1<" Ì£Çó—UÇÞ3 ±ÃØlÕûÆP.ý+\ÐõBÀl{½É‚ÞÄH%Õ¹éú'%œ’@-ÜƺüŒ˜¼žRÛSöÄëÊÂð覟yNuE—Ê|ž÷Ïù£ÝÝvæ‰^Ó+¾‹ÖåZúûR·kÚœ[ÙÈȈRÆÄýûÔ¶0ŠZHç: +wÆFe@g¬`!$à ûp±eš„pdwíé,Õ,#ã µ„Œž<€öx4¬¦«=u^‹Ur± ^‰ ÷´º×hIo Ç–mÝÐΧõ +AÖª^§Î±:™µd¸5âDѺL•‚:ÊÔU¨Ôœú-¾L::V +6» f žÑrº^‚7£ö1ë5aj³à¡Ašº£¨nWÉ©!µp© cílov®>ÕªI8©3!ÕÜ/ª˜ JÝÚ?|iî$dØ_ìñ(°/;Åb÷üÒ49Ãï•Û%X¾eÊal¥Øw픋v_Õ̹ûœ@dr®Ëp.álä§<çêÉJt~½ŸM«÷[£†yœ ä™û3¹î[Î{ +endstream +endobj +2222 0 obj +<< +/Length1 1756 +/Length2 19614 +/Length3 0 +/Length 21370 +>> +stream +%!PS-AdobeFont-1.0: LMRoman10-Regular 2.004 +%%CreationDate: 7th October 2009 +% Generated by MetaType1 (a MetaPost-based engine) +% Copyright 2003--2009 by B. Jackowski and J.M. Nowacki (on behalf of TeX USERS GROUPS). +% Supported by CSTUG, DANTE eV, GUST, GUTenberg, NTG, and TUG. +% METATYPE1/Type 1 version by B. Jackowski & J. M. Nowacki +% from GUST (http://www.gust.org.pl). +% This work is released under the GUST Font License. +% For the most recent version of this license see +% This work has the LPPL maintenance status `maintained'. +% The Current Maintainer of this work is Bogus\l{}aw Jackowski and Janusz M. Nowacki. +% This work consists of the files listed in the MANIFEST-Latin-Modern.txt file. +% ADL: 806 194 0 +%%EndComments +FontDirectory/LMRoman10-Regular known{/LMRoman10-Regular findfont dup/UniqueID known{dup +/UniqueID get 0 eq exch/FontType get 1 eq and}{pop false}ifelse +{save true}{false}ifelse}{false}ifelse +17 dict begin /FontInfo 9 dict dup begin -/version (003.002) readonly def -/Notice (Copyright \050c\051 1997, 2009 American Mathematical Society \050\051, with Reserved Font Name CMTT10.) readonly def -/FullName (CMTT10) readonly def -/FamilyName (Computer Modern) readonly def -/Weight (Medium) readonly def +/version(2.004)readonly def +/Notice(Copyright 2003--2009 by B. Jackowski and J.M. Nowacki (on behalf of TeX USERS GROUPS).)readonly def +/FullName(LMRoman10-Regular)readonly def +/FamilyName(LMRoman10)readonly def +/Weight(Normal)readonly def +/isFixedPitch false def /ItalicAngle 0 def +/UnderlinePosition -146 def +/UnderlineThickness 40 def +end readonly def +/FontName /NCCVYE+LMRoman10-Regular def +/Encoding 256 array +0 1 255 {1 index exch /.notdef put} for +dup 91 /bracketleft put +dup 93 /bracketright put +dup 61 /equal put +dup 40 /parenleft put +dup 41 /parenright put +dup 43 /plus put +readonly def +/PaintType 0 def +/FontType 1 def +/StrokeWidth 0 def +/FontMatrix[0.001 0 0 0.001 0 0]readonly def +%/UniqueID 0 def +/FontBBox{-430 -290 1417 1127}readonly def +currentdict end +currentfile eexec +ÙÖoc;„j—¶†©~E£Ðª%9.ì¬>XJ‘ٚмD â"e?¤¸€›&¤oLH:]~•n¦X%„lþÞ¹”­ÏôdQ@ãa~M~EAËŸV.U‚›MØ€ª¾")éJŸ¢Y§4Ò›º‘º UËêC9¼¿ù2Îÿò–"\ªºÜ¡w¥Ô1·rlxؽØнtÿùŒaÂAÙ*Ó`¢†ËJpÁ¿äŒ¥?ü‹(ä›:øE(ùqÞÚUé[‰s½ $ªgG '“\ +wšÃM•ˆÏ‘¤s¾u¢IÐC'žƒêµÑØú93ó|Y@TÛ‚—EìÛÂè—k$:™mGzR›†g»?·x= w;•*E ›¡ëJ²úM8Í›,õuøàý²èZmYË +þâøµÙ`Š”n›ë¿.Ë£†ýî/J +E:– €ÉB³Oà÷/g^Mj9Z6ñ¬ïF脨0 B‰KBÕˆ[—°GÓO¿rEÞ÷µå符õY8ÁÉà6ñ¹¹n¹XÕ”°*w„€FoÜt8CF :ÿg6}dPâ§Ék»©™]«!­ð$ú¼3h+Û†^]â;J˜ØRÛ*Ç-uCí4È⚈uŽðµ6Lj=!;Í`‰,7TàÈåìØÍZX Vʬ]w)ߧ±ÏÌɸ)Á²ê%NµLß±‹VÙŠüÓ’ôÚIþŸœü”|5ú…sBŽÏMedO&ÿNg°’$q&äàYשA Øuþ­(†@HÞV¦qûÃQyåï?>&_Ø@GKf¸0ÊZ E\¡¯¾±$ÿ3e­‚LÛá +°Ž:G(,æ fÔë‚Ë[MÆÜ]l¾n™ ±ô¨uEäÖÓì~0¿Rø÷š‡tö›»^¤S Pêþ–¿jÀòw¨£Wv6ä™ò!¦Ýé¬ +­Ðï‘1²nC22OÕœQµS ê³tyóžn”åG + óýµ¾Œ}=_÷Ž¨ÊóÎnŽW?^è‚2I·=>œÄÃ]ëL£¢‚‹-‹Í¬Iô®úq^ ͘´ EM +,|K93¼¬ðQÒbÖ¸Z×¹pu ÐÖòPÛ«%ÃFùj`(¡m-¦>¥ĸºtGT‹ïf• +ëb0UAã¤xð“>±¬|ïºz40 +—BŽèCGÒ¡ó Œº%"èânHè¯â¨#Ü•· diKs¿öýx‘²cî³"ÕÖHc öAå2Ÿ·¦áskÝ6¢.ab?ìÀCGZրݣêÞ~côc>U¤Ú”!«U ·ð…Îìæø‚ب ®Zl¡p;ÿ@6CñÖþá‘ÑžØOå.©a‰ÞˆRÙ&‹UÕ÷ìÕtºA’ó•²_Kã·Éfq +á(CŠÿ¿$gUà^½†«/¨—è"z ×RkÌf ÿž¦vPGÃyQ”}ƒiòî0Ïñ}ãÄ*‡Ïí+»z>³šnÓ ˜ˆy!2XÁŒô{”Z6’”ã0Ŭá‡Wë ZîxVêŽ*ü2'PÔ§ˆxh}·C}—«Bºû—²â•.†ÈŽ”ºp&…îj…Žÿ©×·ù—dãqŒõª×â'gº¶0ˆéù·!gzŒ”$Í…AÊ\/=Çj‘ÂëU˜EB·ÎÈÏm–/ºöðÄ §ÎÖy²]Â"Q²5Q.Žš_xÕ(çæº]ïnõ´R©Å{æj„Fæ.6Á‹SµÇå0&‹M…P!>æÞÉô>¾ú¾òÇ’Ü¡Q°Øá±A&%_¢ÆÍ¡¢°•¯ß ~æ›eài6œSËܯIƒ_1pº×$€$~á<À-’.Í/¦pY7jU‡Ëcd适™ÊM“Tj)9Z[æaì +ƒIjðŽÁàQЖÌ@é1¾°tªž2»ú6“Îh½e>}[ˆ’E»fagŸ~ùg¯$íP81lÎÑ_ÿŽh8é*îðµ5îCÑÕ,ý áUwF`‘àeүͿýå;;1"c%¢|PüPh\CMNñ½‹ Qn1:ü .«hÑ+±ÁUe‚w€5­¿Ï6e–õãÜKì0k‹º(@çs¤ìQ-rñ¹­DÉ%úÝ}òzz®®RüãÚÉ7ùušÌÑKfÉÒ ¶1óú(±Æ¾Hµ¹^PÁ(ÿó ôB'vhuŠ fgÀ›ó!ÓÆá±› ¬š>¡Ôgªã¦‰ZLºyå’ûM«SŒ({º’÷ÔCªE§‡]éûæ;ïþÇ&m±ûP†4 }:FkR‹{˜÷"¯KÅ ;\Çc¡‘/¤[ápUÝ)6)gÔsô‡ÔÁµ +„.Œ +wçøʘXóÔ¼Ï68…|%Ç;xõÃ_ðÝêÿò¼€jA÷ñw(¿šdŒÿ—BsÑŒð¼é’>„èú~è>‹­bÛ þûµa™‰5s8«Û÷èݯÿÃÐ>ןqˆ§ î!/‡ßÑÇjWKŸpDzÂ¥›Á™ùN=0óCÈ72K™¿¯0Ôër¦|]´*LÛí­Åø†Æxå\þÒ¨’ëü +üG:Æü—c{7&Lš3ÛÊP‡ÙàDúîfº$FV‰—ÒeŽ‘ü=…X`5 :cZ+ˆ2a ]ê.…D`©“KÊÇk^%÷ÂIh´MHžŸ—/F? A¸]bu¨\Ék)¹šT¾Ÿ¸@ú,³¡ÈØö®d[‹ñÏ¿ÐBí®‚, Hh«¤ÁŒª^Û¯ÏI±±*ˆ™qjT‘qûbx`J@̤HIæ{…¾€Û£;§qOÒ]ßûTe„7”xwy÷i”CåoQ.<èÈV¼ÓYx}½ +kLÞ‡1‡Ú†ÒÁµÒ'mȾ­ZzË\0©çž…Ë‹õ8FôXqàŽÕÝêTòt-1À¶ÝAºˆrz€†–&‚ +ß"iå/3 KÅKÓ¨÷å ÙŸ7bA-ÞÄ +œ¾Ë÷y“²#ĹÀWMN«ë¬]Jþ—ו¦ÀU *É;.Ï ‘ %wB©¡åy;h¢"1ӡ환%NÑ©p1ú ‚Œ‰ð@+VJ–ÅSƒ€).—¾&µ˜yš‘õ+¿p<l)Ùlw†ý¥?Ê̹”FfG•òµ ~r®#ßýÍ]%?£ÓöLh0R"1ƒY,”1ªÌì´¦ˆ`H+blb„›¶Uû¢—“Š<˜nÑ[s {åyA$ºÁÙNwtú•Œ&Bºˆ!L‹3e,DD12–r¿T5ˆ ÿ ¢Hù¬í{bLÔåëe/wyBQ~ªæíÉâÁÿOžQk6¥ÐðàÊüºñÂ$+æåB ^<‚HZ°~“ÓNp¼þ¬å#³j*µô¯gqWkM6z¥±½Ã·Ø2‰w¢õÙÖ8©”¡ùèýTèÚ|JÄÊÆ,ܱJÖŸUVzÅ%C/•Tü­R(x÷j‡‹Ø q?táÐhSÜõL陼æB\j äò¥Ei¥cB ßgŸb £š=e¨ÎáX2)ƒm ØÊ9 ý˜odšB°a%\“ìÙ¸Ü *sr“ê„7ìzóE±ë;~‡€ðPØÈÙ¨ê ѽ„Ð •Lù%*DCî̯Væªpdmô™›öÿo­Põû›B¬­6¨é9OsÉE‡Ríœ"(¶$@A.6'y[)GG¬˜`Ùšqè!ØùfGŒ—õÓ(OZEü2u[\îÓ澶-ËXu[é]’¼ËÉÄ…üe¡[þ£@×$@ù´¸zýJ&„ûäàŒíU, ͸ +Û«+û|äï¥íØø½ +¹ËF– +‘Ž6ždnú¨å‘§Ô[°E-x•þÔ»‘ïÙR¹,*[c—ŽPÇLCã 6'«ç»µûøÍ~ô8q!÷.ë:š¿÷©ÄkS´kúªn98@Ì?-±×Tùt¡B€Ô¬™Æ×Eš!3`yl& WAÁÑvïËØ8æª8 +˜Wç2¢4åCqMAÌa©Â·½qqYqfìžžœâÓö“õŸ¯„bۘDŽ5Ÿ(‰ZÊ$hÃߦ‘·m±­ÜÞ,Å·ð,ï]ªx¡óNõ8Û4 öøtè»G—= ØXv´Ògkñgc q¼ÜdfÜ%Mž› SG—øPïm©«É,,ÆÚÈî¯ÆåL ²åŽAvŠ¨9l1ð¤Hë€`™Cø⃗Æ!Tà¹!2RÅ9QN›õIuV;¯D\õ½ÖGCöhtAÀÚ†‡eS6ã ¤À[2ÆÑV$¨<¸oÄÚº®~ Òro1U>&=éþÓ³ö¸‡Ñ%<ÅìÊhh +Ð*8‰‚ƒeõ”NåakU±u%ãvH²EDÔÐÒŠ58ê‰võ"ȉª?H‚}Ù( 6Ö˜s[1O=xZ1ÁÏ[$Ñ'ÏÐ!Ö„¯„«W$†ê?ÒãضdsÇHˆ» \éfÖB–Ë6ÁEÌ%øXãKm'á:Nw;b%Ì÷šA5Q•¹ÑœyacP»)tLq=͘F.’ %O -¼Iˆhke¸ÏÌ!òw†™¬«ï(Lö<tÉÏl%"Én…Q´)ã4a +iw+ 2bgr8%#ÃnxTËG&yû«ñß1Ño¢•È0LäúewÕÈz¶ó3áÂî«Â +i„¡e9Y½o¥“÷ò÷'ºâ}zf *ê8!é¯ï6¹¶Ôï8S´("úÈ,7MXJ‰ªALìÎÃî©…ÎI1sëÚ6$)ϺÑøjbø¶ŽcÞZÉ͔զIG™'`þ#áÜ|ˆ5+…DÝ÷×bm6›iæ¹Û>‘]ÇÁ,2¹ÿ“Jó ÖàŠo5@|JK6!CõÉL&dxXñBÝ8À‚p9™j4¾ [ú™Ù‡èGtö´bú.Ô/ï _9"Å:Nªx ðèýùV¯ª)$›®ÕúÎÍÕ¥þ»tùV5è©çû4jô·°ðCYÈ­§ü·†Uçï-ÁÃi73)‚IF27¦øñsÓx¤ƒ:¤­ùqdAÃÙºXHzõÏlaV/Úõ·üDïû8š4&MÞ¬–ög¦V®1¡±Rü­¯„˜<éÍ8„Îí ì?Ðb¥ 6¸[êèZøŠ$ŸÛR ¨ÖvƒT-Á|âî0àV¿ ¿|Ð?Dµ£…7ê•Fê<Ǽ”i;zHèè­8Ùc,o“H_š‹³žK" oo%££&T€»ðT{ݱߎZ¼ï\Ù=øŽ`ì㬀:ŽÛzXÀî Ú ©›QDWn®*Ry£Ê£çW­^kDÑ!&™Ÿ§ÏÝp`|Oú*¾—W^¿k¸ˆWn\Ÿa^È0§AzÓk­@ñÚß66¢‹zOÈçË¢^ƒcb-!„zp$‰²Œö'he–÷WÅÁ}Êþ?'Ï˹Yp˜•2þ¡íÎü¸xþÌtÚÐßïD;9\”Äwè{d\’w¯ŒŒþª€*§VÒ•ä²³1Í }¥½÷¬FTîÂÑ/¿oôs€G‹Sú ÉþˆyI‚ˆzüÏÞxÄÙP¬OZãMŸAôÝ'FÚ·jf×zM†`7”U»˜µ‚ë*÷…0š†»@áW·ØòBÛßX€3Êo¹Ê•:c6˜‰Úø£÷R‘ôEÝÄ!Pÿܨ^F£θej]Ó¬¤!mð®=Ü¡ÙPàñ:4®ZIqõêvknåZUêûÓËšË]P$ÏT¶ËýÌØ%.‹þÔ3¡Óÿ‰8‰ƒå`®1ã!¡MxéRîíŠ ‰½š=ŸåN‹­ó#xPtâð#Ï&Tt·+i~®u“$Ž­zÝY(ê=ìÅÏ>T–q–ZÈ„›;ˆ[}ªp­ßa€~ô¿ Ë[pî‡ÖsŸ!c yÞ;àôÅ.e\éNhôéî]"óXˆRñ€‹‘’þ3å+w^Yë$àÍ­’÷a³a{›ÍÙcçFäSŽu1\ŽÁƒ,*ÛFD­ý6, ËeYÃÚÉ VOªñ~ÿ<‡Þ¹•5å0„Þ´Sc†YNr¹t¨E{óÖaœ°Šç,Å»:4 W ¿¦\…eXµÝ¿+ÆÖßWiæôÝ6çÇ :ÅŠ²ß£íÔ–ÁŒ‰tì˜ \1O|å³”åÄh”<“¢"­eøuÎ ß"¼¬¬k(Fî 9õ?ƒ?'y•Š‹xI“xCf&Ëó,»·èfö0ÆGû°Ã£¡^Ül‡5\_|‚*‰æõÑm 4ËIžÈd²3$)Éf˜ÕΦÔeÌžÁd<‰ùp3W¬KàNîmÊB˜‘' KÞ×<2†‘–§ÁxÓÑ3œJÜ‘j¦ôõþŠd-¢åOB$ìA9ÙáVtËÍÁ!- €¾/EJ}†ÐÏd¬[ÌyÚ{öÐÚH/ã8ÿqb³Žpí¦w?…àjù§¹Y¨Þ¨+?¯=Œ¡—ë²ôþÞˆ‡Fò›#¯N˜:îáznmy.!Ö‡¸ýA6q#Ïîà,Ænc+âžo +rCR²ß` +"h³ZSÏC×f'Öƒ|ë‰OÅJÑ¢Þ4qmպă0¼á×¹­‰ kÆFÅÏ›$y´çöýT÷ùÓJAê$碬¶e•’˜˜L||gBø|ÿ{Ý$·W“CÚ^UGè€!±ßÚ·‰¬ÐþZ/e‡ºj†³Î‰jž‡$,£:»©¼ê–CŒk¿)µÏ8SJc¸ ®¬ñãêŒzžÅÃëÓUM‘ùNðT§þû¬f»{¹·ÄK‰×ÏS‚?O~Ý¡8RÃR¾}Íó¦™Ô@‹“\aI:^·Àlââ†ÐyÛkH—@—¤Ý".,Ôÿ˜ÜyɉÌÀ†Ê©ûo!TïÚiâSPz8çC3öF.áÛÓJ¼·tÃà ^ ¾žâƒ3xv%¶£ ªn`ñ=Õ 7ˆ"¥^>hC2)+›qLvW¯ +g–ëZÏÙ­㦨̰ÀôÅBäƒ$é£@®Š‚ñvÉ…º&ÝvP²Š2o‹’ôát›8ã~v8ÙÍžc»d_MNö.¿lßÑvSÒÖ¥Û(ëﻺtÉW:HèëµàcÚ'l‰[“)ßUžä÷4–ýÒµ‹ G[^@Á^r\Åò‹¯ÑéLý:)®zpÌjm4ÕÝnx¡÷Ñ¢TúÙñ%²Y›Y¯COè…ĤșŠ ­b‚Øܤ!Ic=ô±¿Ä!Ä€ÎÉ?3¤—ÇÍZuv£ÞîàDI6ž!u±Ü¶ ÔM±?Çb5‰£zùp$EöÏìÜE¨Ùm{ƒ<燑 ˿ÆÔ摪‰œžoš¾.'W@/•|au‰û¨t’ ¯¸˜‰)muP#4ü«Ñ¹©Òß¬¤Á2ñðP|ˆZŸ9#^†.p×é40ïîôÊAÆ5&Íj|$mGô.1ˆ`Šò8Z@$9¨Õõ²Hà!Rá÷iŒ²Áƒ6©'ÎÎïÒ‰G¼»É1“tânkõnÌaÎô$¥Nb}*Ú@[œ´Ã“xB#Sí.ÿL?ôÀ˜öü¤'Ò¤S`©×‚vªŒz¹×Ö·#dº¯Øëf0Ì‹Káǯ—ʳc%ýÙñ¸ <á€dà!šl:ÀXKW0®m¦V—)-L%€½¦kRYMX Ùœ|,˜Àyž©.™B8ó¶ꄉï™çpî׊–Æ*ܾÛæô?ÞG.Q¡{åáQ&³;õ­ÕùÌ¿ 'öç!}·ˆƒî ¬ôo¹£/—܈í‚D-¯;)ø+lÊ”ÍÚ lT${Ñ‹zO‹Å”a¡NÚx¥¦¸R‹Ÿ+€G¤^»óá$:àÅÛ¿®ãÐå²Ñ‡ûj¶»}`˦zRÃa0› È—rmÆFbP¯4'ã#í´•š'0ç¨âô ™ ˆß¡š>Î:!C穤¯ýqG PjÑ]ŽÎ6yâROFË©éX¨m¶GºUß­tK®u‹GŽþ\»«Q8Öä«©:(vüIªe41“ù=ò9¡)’è`È™ÈxÂ|`ZÕxüŽ0«ÖϾöÉH©&û¹Éqáá å¾ü*@˜çp  7© Yy¦Bï͹xÝ¿¢™¶qß°Œ`ÁHšÂ»pÈÓHÇýÉj¶ÓÄX »DÕÿ±ÚÖ×µ`1’åéØE+yÿ88#¼‚Rõ}Å– •  û~ LÈ…"ðÿ¯´*¿nD<ÁNXºBŒØr%Çÿ(ýÍO¸˜ R ±¤ýd’úókðpEÖ+Úë€ÛHDòÆgæ$²MDí=?v(Í-Œ§¡±3¦‘¡=N½•­Ë÷rrËuH³aÒÎ ©¬!¨s¤¬»ä¾™Ww`'K´¢Ubúó“6‰¬Ò^ê*a%†~õ#ÂÖÏ'®(Q³ªp…AÄg8¡àÌÌ@@[k‹ÿ,2¢£ŽS“EôµkŽ>7 Ù¥\BŠîUIÓ¼£©”H0³L‡©.G³ ñ )œEæÛ<ŽžíMÝí ,£raá0‘ +]á¯XÊÃðmÑø?¥ FꛢcR9|=ÅZÍ•'ex‹WÃ4«-¼›rÃ×Å›o2•ÃkúƒÌ¸á[,÷þøÉœ)½1ᶪ¸Ÿ ³›¨ozqw‚÷¹¶à{]ü%ö ,pëˆÍç< ‰.>eë(ļydgÄQ‚£ý2=\bÙÉ8Z .xÙç««7­%M}ÈïÕ¯jÿãWvD ˜dçG¬¬…|D[ÜÕmh—ó|ø×çxðgúúàîjŽæ£»ùÁRLO+H©oâêå%ZF&Ÿ÷'ã`}ýŽ5+…DÝ÷×bm6›iæ¹Û>^­ÿWÂú¿5ÙðtÉŒm˜b°©‚ež†“D{ùýM§ë/µ‰ec@MÈH¸{q¸8‡B<ŽÀ•ïžB•hFÜ­ÓÔ÷½\‰÷éXµ„)ƃ1â…Ùªv‡(å£@—õßlä|÷Eä/ò9L{'\ðã}̳¦­=ßòõ|pI:ÿx8l¦ºsÛÔ>B¥ÇrÔÍ5)ٱģ Y—‡q¾]óÄ$&eT31ÚEŸÚ‘§ˆhÚñÈV+/YƤ(ã¨X¢–~E>¦¬š¦<&rïy|kVqÕ6Ò—í ¼uÍœ0éPÓÁÕ}g¼½ÄX´.ˆåHÒ.¢{aýl‰Ö[óNf4å0w kihxàŽ'…³œ¼_3bßÿ'QJC”z‰a â?¢]Ð$!:€‡ Žþ<ilì›­áÙxeøÂÙµ\KØ'{Ý.†‡¦¸Ô?ˆ>yK>鯊\Ñ +4~Ç9£Ÿ¿ðf+“² cUÝ$d¹íšïMejÏ¢jÃÅS]©6!W#u:$Ñ ÛN“ÆÔ•\Z?äžëoò‚[¶§oûÔöä½Ý‚AöŠ{ÊßË=$v>–òÃ…¹ùÃN‚ð.¸È¿Ÿ˜ÿß<.­ Õ­—å\¬ +åcVOÁìðåÈðš& üçÀ@ÑuKÀ‚·\ß•fšA•[,¼fp˜–ÖŸJìÒ~J%÷e¼µyTÄËZšBÆÏn Ø ly4D¥¢"ôš»#¼aï;³=Ä\d¤Qü§PsJo§]»„’yHN‰Le(“3Fªszˆ©É, ±K²Øz£•ïnŠj–pSäë—´€(^8Æ.þø–ox6®FWÌÉ Tú^|ÈܦBÏ天Jª¡SÐõõë<¼­èË +YXux›Ï—‹,#8 +4’ø~FšJ |Y&Ñ°låç iòB€EÍ`ž#Î7yb•·B O€»O÷y¶¤e²V²Ä;'âŽ%ãN6HDð¿¾PB`‰ìpq?QqLƒ\œ•Ö-€‰ä®{Fó;:‡=3󣬦‡û³ô*w»Ö‚;°‡í®ùZRE— &*TUrüÙöM**=`D“±ÂMYâ¦õ…XÊ'N@qxäæ¥?ê”Eûá*˜Ô›‰]¬yʃ?Üçv¬ŠõÓ©ÞˆÇÖ¸B7XmÔš)U¦tÆ Wtç~ÒNbõ¨]ËL¯•¾‚Ϫdà!ËSì2 +­lìÐdüõ¹Š ýóÞÈ­I3òš:ORB>¢EVp;'vnNŽ>6ùØK‹\qàëñ_4ýÙ»N’U–þMÚ“lÆ%¡[.ªê_1ؘ—‘%Ò¬4ŽÊH±Ñjw¤yeŸvhºD¶j„c®0…]8ªÐuɯü`b¬Žû9£ÅMÇ&Õ¯Š±}Ñ :ª~¼z¬é®•Ÿ{Âî#É£yý©÷¿¨Þ,ZíQ‰I>š`ÄÕ>ëD 6.V¿S³o0†ìr:x¡kÿ£°•|4ŠÎ¢G´ÊY +]wcÞ©u]k„ Áv©ôV—î‡úW¹ÍíÎ[åìò=°Oétw*"$ü;Êw–ß{Œ©´ÿŸçpK†é€–#ó㣮55RÆÖÒ¼ º·Ú–ž’™ó[&Þç>6 ¢F¦´#˜‰_•ÕöŒê -‹æÜÔž{b=i7fÞÞ‡5Öz®“ÃÔ˜§Ó¤- óy#tbci4Ýñ0-Î^èTH¥ÃÏYm}€%x¦¶ÙͺhŒ0A±:]øÄéT¶9[‡þXe7¢Jð„çTMŠvIB¬ÎÐ:w…؉5Júàþ€@¦À7à ¹`p»¢º„upÝ&œê9adœ ÏS(…¤•™ +œb1¡%™,hh2ÚzQ+$2%l@izɚϓ­g%sAâ8¯óÆM)¸Õ= §A‚þâ8ËÜT:XJâ+·;GýÑ5t‰…oå D+ëŠwù)Šu;C`øÞ~ºL%ƒ—Ím§bG T\æ¸ÜJŒ~7‘atðê© Ÿ„@ƒÏ•œ›œÇy‡½—š™qe™~Á-n‹ÜÒ?¬®%ÞÖéütÞñ*¯ã7š®†b{¡‚e(ƒ—‰¸äzaX ,Û[²‡®m¥×Y˜!»ªd ]’7ŽÚµiÃG^‚‡àQvĽž@Zÿã:¥3ú&Á*@šñ~nò1ìzÁ• A¸±Þ2>­¬GûÔ‘bl§q⶜åÊ«ªÓ§vvI™m²joX†¤6%Ʀw£}]ô>àþ6Þ_'¯VŒÁô,Û5ߨyO‘ÓTTÄ-æÿø¬†ƒÝbírq&ò6Ç]&…òý3N&`<vØÓ¼êì’XÕ«G“Ä M}Bg"Mu|ìõQ­ô³DÆ’dÌü.ŸE’b®±=“èD$h‘&Ÿ0þÖkÙ)‘Š)Pw†¦ÑdÂ:6›ûp¢ +J.Èã>rW•úrìi8(±ˆ¸¸P›š||”ŽnnJ mTpB”G‹LfJNŸ¡Uø®ÅBé„-yɘ—JÕ™Y~_§¬Ìã²Ð£”f€ßòá s›ÄÅlE'Þ‹1àõSþC†:T·ìFÁ Ç÷‘RÞÙW :py†¹s³ÍœZ[å‹YÀ ¸@==g~6ëˆôRõa“ÉÖÓW*ªÅZ@[ðô‚nid¡³É²Ø(D£šÿ_æ†UàÃv<Åøîaÿ$]á1êf° I“à©”L ´q‰¢°ÐD Ë„FߋПś•GB½²t+( +ï•ûI-_ï‘>xb!òijHèÞ¿9M’`챓öOîóœòßôž:š%Ig8ê:!zPÀ›QK~IÎf¡¶¹ rq›5It{²éãþ¼â‡“ÄD©‰†ž¸JHSuN‹Çf.>À›ÒíS©²Òv.P6)ÈÞø*CÖOFŒs¢N¢c|€ï É‘ÅÝÅqü³‹j‹i{‰tÎqÆY…¼0dbè­YÜãÿÇü–3BšÉYÜô%P"?6 +¾û}FrĘOð-Øe流Pz˜Aþ;,ÍÛdòÆ-†$áü¥Kµwô´Ò2¨ºj +eX¡è\kE*ÿ2(P†_?f Yä÷Æõž=ü}LO¯UáQßï‹Gä] J×ÒÊñwP—5zBuÜ9C$9<Ÿ–Šü3Úsu´ð´Òª-K°æ¦–Ô¢Ií×ó¶võ#Er¡q (àYܽ6Œ^!b’4˜„Þ.UÿòÖ×1Ä}`H‡ö½‚ͨžò=œ£þ:C…ëþyI¸CÕÜôœ³ÈÝ oXÛ¹›r"CPÉ]r¢vw2òž+6èÏÉ2¯-Ú(Ö1ÅY»Ìô(_ÔíùÃ2³Qiµà:m\5ëp = î‘YU&…xZòù^£°ˆªP²æãþQ‡Þ$ÝEèTÆd®S=”PºÛ†Lþ½|æÅïWké…¬xaÝW|œe×VÇSlaòU| …4ÐÊ’äT~ìïBÚxlb4û˜Û†˜ò”·Ú%+K«ï,aY3à$rçPq×ùÅ/æU2Û&p,Ü‚òÐÆç ðP€øz›`IMô ó”” N=Æ"(¡wÿ¾qŸmÒôBHÞf¶êÜÊj ÷+h²ÈM´A`¨+zx¶m×0% hRZ³œ‘Ö¡4A”Š¢ùÖ HÙ0`BžÜb=Ð,2=pd”M‰ë.l 0/¦àmµÆ‰ÛãsºŒœ ?ólˆª©½X_5aüX²X ƒÜbi«jMõq‰¤JWïügÏäß`£K°óÔoä/˜[Ï=”“­Ì³q}bÝG`ÚMÝ^°—³o¾¬©gLxê~!ÛqWS¡ÉñÍÆ7rÂú(‡Ro= +½((›F5‘|l.2@ÈgP#€'¹¸á)ã“—Yš­NåS“žîaïK7–öòWŸß}&gÅ +9ÐÆÎiîÞ% ÅŒ›5¶°"r2æþäaKÞ ù¨GI ‚î.ÚZ󆛾ßØ!cl§¿yµ¸À’^ȹp( >¬b …´ÀÆ7î)uv–Ãñ5©‰Õj*³Ä΃n™Þó*RPvZKI@.w»h!®9>xq<½QÈ©ÞáYkh£ÚÜþ _‡ûœScßjÞî©+±LkÅ)–6„ÜYåá?nWS yTHÄ& MÈ«*²=$OÛíà˜a5ê™™h¦Æ_¯¨@ô£“Ù¿Û›åÒM6îëC)¸Ú³SÉ"C.’9J•m© ,v"³e]*Õ×*X6ÊDŽžøù?®Û˜ÛÇþ¦ü L†ò˜M£¬D =¡Rnó]Y»—I€øÐG‰5ÅwªÎ&âU½Ÿ}9ôŽåØotÏéä<Âd2õ²=ˆë6¼.išìâŽo+87Î’»d<‡Ý”Ê+±…¼Ó|x ÷ž¯ «­~9oËÀZ mÅBûS-£k9àÄ•iºý¾d,–4p~AÚW+&T{¤t&+åÎ}(ç+ž”zÕ“´‹=Ð] +„¾oìæ +ÀÜéŒyãà5EÃÙX’UxR ðÂ]é¯Éò׃ ƒ°¸Ü§gl{ßâ4Å]ÓúŽkØ·™ShÍÐN⻦1¦\Ã(“TÑCÍžÜÑr8‘Ø´½Ç˳bíeqç¾1F…¡¶Êƈä]e;ÂŽ.{ñ:Êk;µNXrlè0Ž­y¦ù“š)³œÖ»$¦?ŠÓá»ì +bñøjù²¥³â¨ç­‰0ÉwzЫËtSK‰e«‡’{µì¡š·é9GB'süKb×f²WÛ.IG÷PP‹vßD0Ÿ=›¡mQÿîðKzõó‹!šÌ—Cãè€auLªcå&˜ +o•Á\tÚÆä(s% üDÜÊu/W€mpê» 5‡u„ŽŒ±¥ï_)CÚ¦'Ü‚ÇÏVé“rzË9ÈÕÇ ×ÁªáECápÑCyN/ãk¥¿éìˆç€ú•‡ÝÖ`9üßÎø»C‚1SæÂ=11!£ÇÆçš~Ęۺ‹4“ŸeãDù_‹(¾'Ëbß"* þ³‘¡x£Z%ˆ>ߊx¨a.]?a}°˜¸‰ŸË3Ø\”W2ë¹À·ŸhYÒçŸKOµ¸Ý$A‡U³@¶VÙä™d×ÁXV?ù¼À´Â­ãتI)4^NŪÂ×f¶pÚq+”s–Î&s}6ÿ1{H‚NC¢1&˜=~ ¡aÑĬ…ÁŠZ®é·$ð?1†]wô×OzR–?î~ f€X"Ëû2¨O«¼‰Õ/¹ê¡É•·Ñ ð÷;ƒÈ„>š0¹;n`ð!§ÒZæ?g¹±À.ÉhQ³³TºiÀ'Y.!í¤O$8²…·,ÈètßÞ,~õ³ÅB(ë¤ÒßT=š_ò5½Ý_$”œë‰©9Èk*çh¿osØãÈQÜ?1aLZ³õáŸ:9ìCæ‘ ÖN«çÃÆ+S8VöWWE^ŽH¦;ˆ†¾&R‚^ˆoJø¸‹¼ÙèssËnZ¢ãx˜e—mqâ-|2Ýj%AÈNݯg“ÚÝU©$…‚QC³êhk5ð-Ê¿R²l°mg®³Ç¡Ã½ß”ÌM¶Â.ÍõºH +èËÓ~©žaÌËÌ 91/#MMSÖL_A0§’Iöt_P–?Ž+4‰§å§lĦöú½:ΊÜE83ÎÑ$OÝ<ˆ3–àÍ/¼ºÇ‡ ‰É:ukYµÐÊø¯âü÷ö×'´û?› +ågšª¬È« „Ë»)žÓQÍÌý2(|?òÀ¸#t+•Gº—²°â>šà›ßŽ«;0ÍO“0Nž µ]ÔØIt8³ªçú—CÝ 5üÛ¢¾ KtúA8ñ«|ºÕC?2à°‡¶˜ÊIâD¨§L‹9¦Ú8üøž¢¼üT»a-Û\¢–~E>¦¬š¦<&rïy|kVq×’z™Œ‹Žû©ök¾Î¡73{0rÄ‹v +¨á̆1ÜëÃ݃ԑ,oóZѨ!¶¾ËÍA*}j“¥|9â¿ +šé °/_Lz­F?òïò þÆ”..›ù£ZÐ +öO1PZ®ÉÓŒ$|!ð}ãLPþI÷Zê°8½0ëvA +ôT.rçdH¶Î*«%­¶¶²¯õ )I»Z½c„qï€C°-¥)—Ô:Úõ©A$«YÅ‚¶¶gn6gö¿a¿XO×ór)îÆRa,á¹tÊ{â@S jÊ"ZéEUåµY¿ g7ѳ钅»çlöó‚kævª9ùíKÎ5×pŸu#ä” ¥.˜>¿ÍDr”±ùnÔ¬[¯{ˆlÙõOïï›T +™¶hØÓη¿ëÞ +d}bJDZîÀ.ÿÔ~X] +ÝìO•Ã­g»A'X·Mt˜Äïå™Rh¯1ØQûJ"¨…(#‚ûTâ1z»aÙò‚g}ÐÙ£Ãx3kt¯‹™1¾ÀÄÏ‹Hu˜5ëbTþáR¸÷‰ú™ö´ñ•P£7äUÜ¿¯Ì„Ñë°«g+ÔZ„â’N䀣Š}Ë¿aíwiõÝ#aË“@ÅxíÛ^ +!ŒybT˜¯Ý¤Ròª<÷Ì("©ñ "kk5tf6H‚†*“ô6°µNáÎ2º’n4¨øÃ|£ŠAšÿ÷ßpe™+MVvEîM/Â1cJ6™†tøÍaSØ7À˜4"`Áœ~À8SÎ^¤Û¦r‰FÁ5¿§Z›d,)™ÏBN;ï­SA¨à¡›ër¼Ítê"æ@7ZœªREf}|•òÊ•™9 jÅ ÜÆÚG^$EÔg&¦ÔQ‹øQ ©òCaêLˆs1…âü[Ór ¯{.¿¸ØÐ5©êÔƉÑÅT¼s!†… >l:«–,¹áj$Í´óõpã±í„Bjø[66Ì࢓ù—J 7 ï4Kn‚:±Ô ²:ŒE[æïG¸å³¢™:µÐõ§Ü;㸅™›øÚ‡3R€·ÀÆrC²žXɸfVÅñȹŽ"PA:U4ø±Ä:‡7Ñ×0^sÒ£˜B—»QŠ‡DE2vœÞš–…obPÖ%þsÂÚ82 £q¥dÍÄàoTO Uö +ÍJ_ +¢yÆIa‹âÙ±á ì© fðŽívF±ò¹w“~p·!@i `Ù‡ÕïC' +_€£’yÅúÝb­ªE®ÖÛÎÆs¶»8ɯY” «V€¢n/`–>Œ³ñVÅ$á\~]ЄÙÃý²$Š­X=K>Ö¾.ð!‚)|Rb¬ªW‹ýƒØž`°½àd8ÎN.:ÈŸ–-"Ñôñ »Ë1‚^?ê;çQÝ8WbI¦‰I”¦ÿ@ì ïCÏäÀ\€Ãâ¹I¹=ÑJÄ +NB&õìÓÚêË&ê^ÉQÂ<ìT«îÉÐ"#H8ÐjJ"D“òò¡°Ñ?Ý÷æG®(² ½ž3^E¯4†à`v‹¥ê"‘SHÔ¸5ÿÐs(VVr¯äÒ`³ÅËÍÒâÁׂ"~‘eµ62p» tõ+”›A}»•ëÜp´3·A1µÉ5× éÞu@zéJÄ);Š}Y^ãùÇé°×‰î؇ôÇåâ`„XÈJ˼ñT*¤­sÔÝek\.ºM‚N{ƒ>ˆÐŸ"‘ú¦þŒLÍW§¨Éõ“â 7:œQÒÅû¥/ÙlVˆlzgRˆ!Œ¤‹Æˆû)œë0¨©•Ÿ7L&‘ÍTp-ò•’ë}4Á°<6¨PæÒò¾¾9#šÛG½+ †·P!n±ÓÙHå_àüFJ ->÷·÷dÂcM s x!…›±‘Õ3˜¢ma S®jZ˜›ÒëO!ËçuA£™F,– CS@#ÝÒZZÝ"À1¢&ˆ¾@ÑËüNÿßuá|¥šWº;r “Ð.>˜ª_ò2šÆAwVªØ š*2èô¯–’qÿRÙ¹žÿn*Ãh•À$”ñÑçýÐ2©9t-µ`As$àÇƨ>^£„tÆš:±Á)ñ+µK>¬œ3%ýô+븜œƸØØP þKø ˆ²Ùy/hû¹§–­ Bÿ‰¬„: @†Îø[$~j¥Œ{:eÙ E˜Þmòªß~@È“ÙBÁE Lü֥ƟïÂ[`äRp.,©QK„Ügj.Þ¶¯¯ôé'ÌцúÝ}z¿‘äyS~…BÅ™Ôæºdøý¾4„êÌ(º“5¥“da’<̓Ê’”3áòPLÜñ'E5šãâWàÕ¤ñù®ŽÆ[q‚½H­1EûjU[Ç&cœ~ý98U:ê¶Æý °â›ûâ5b5-c° æNW@$#¿ZTe¿BÉõÝE;88YÔ ›ÝæRJx7&4 Ÿ"ñn@J6—Ãhäàš![ä‰õâ%Å컿Ë}^Ü÷ßßG¹}‰ÜîG* ]…¶Ûe_dÙ#Ü°3Q–ÄõQw (Çüz3ö-?ä¦=Ið+oûÔãr³¸^Á¶µ9“&̯Núç ʪ´ÿÑ„ÿËîEÒˆ»Ž:²¨0â©t„8þòCx:1›/ÿfO¼á§°eÇÇzŠçöjꮕR¥ö)ÌÅš„ÝåÕÿ™]ëU  SÖ$}zÈ£îoþˆLúH5âÐaà†Ò{AÉQn°ZŽ¨¥½1âýJOǨû"³ÓA½´gð›úÓ^NUo¿ÁtӕйD* Ê TîJá_&§0Jš—°ä—úÜ¢ž¡1Ô‚™ ¶L dÒÑ=wsö­ñ$_àq—™¬1¨»NWrŒÊÞ n ©Íçeæ€GKŸ—“uñÚÖØ1БÀȯ¨ý\@û"„/æF¢Ÿ7|>ÐáR5³²Ä34A—tg 5&†v|§Q@3z Õs9\ËnÏy¤vÇŒŽÁDÞ_”ûºÃæßv¢jŠˆÄ`Fuês„ó9ŽI@Jüá›C]? ‡8¶ê<^A£ææêçf];ÑŒ§[LÀb*µJâ„ïAÿ.\”\=PPxesQŽ‰0û•ˆ3 xc1pŠðØ‚9#Φøzž4%÷A.’ %O -¼Iˆhke¸ÏÌ!÷yâËì¦Âçà¦ýÀÑZi¹›6Bò°T%=÷êñNÔÞ[ˆO§¯?\é¸m¹ùÁˆÿœ;§:Š…ˆz{':`Õe¤ϽgÌ|gu¡·¶¾ª|ù/Ü47\A ûŠÆɆeÍ~ø€ù’)ªÙ˜È‚¹URÅ!QƒåûQ`[ªyHÖå|×à0*?’:ìy/4Å©îJ(ƒ„ž|Ï5^O,MoU]¬šmÂcö=UÖBL^FÃР: +´ØË)Í#>Þƒ¦Ã £1„Ö”‚õuω®ô'Ènþ¦úNUûÍ]€´±%½+Œ?ov ‚E¡­ŸPŽ0BkCî÷UDëv•%ú}€´§Ø §lã û¥ ëâ¾(ýŽmåX"DH'Gs… ùðÍp'@ïÁ;mûÃÒ…¤™2ò\ ÆS7?.9R’¶^ue¬Zð&ïDsŒb»¢Æ­1 ÔKŸÓéèϼ0 +Ú€¼3…nÒ›š´1”Ÿÿ] ïÊÎgÝ‚Ñ¡Ò—Ñ—>a%ÿ€^ÓŒvõµñZÝ%óø¥3\7!o¿Ë{á%Uƒx.…b 8€£ÁŽÄP=?U£7DtñÔ¾rqüôN×çˆíÐC,—·Ï{¯˜3ÍJ™rºê£¬üæ qâùã­°pS:q‹òYº Òá“‚Ìßû%æo·d½ sýÌ{oôÜHÞ„z¤È¥béH\$è›ø |Ú÷Ù9ü}RCú¿|«ÑnBÆ +3ÀãC»D™aó 4Ñ“1wåêÚŠñv‘¯ãÊ{XoŒBjqߨÎÛãß|>²‹A’x$Ô÷Œ9ðDíjð<¤E“þoÆþéŽkųÅ’±Æm1JšJýèz²î]¯@3ð9Æ!M(‚ú•Dƒ˜ŸKójrMAHI¶=AŸKE»‡'ÌR\]•y-¦¿n «UüþŽ²ÖÏk¹5 WÙôÏÜLþøÒ°Ê.({í“ß×Í°ÀÔç¤`û!e0ª†Á 4mŸË,¢x+A…xb.j3Ëy¶tn…nÙTãªÙúA–bÙZWš¾Vfz›uÈKs²ÉõÝE;88YÔ ›ÝæRK9ü=ÛÎ@`hKðbCëI¼F‚[ƒ¡)šÀ¹i­Ò^¿wÛÃø£‹ÀbU:ÚÈuªVW ä¥X« ©0W—2èfm_hy…פœ~éÐH¾¬ +Ì¥-úøYÁ.ùëšÕ„“ZXgivÝÒ¶áý@–ýÊÅ› * §e/í±C‘ >4‘uˆ”¡lSPG‘0¶\ñ@QM}ë©›ñ“Ðb-£Ö%SËž³§GJï|wïý »‹³KQÉ´®z62j[K]²mÕ¶Êö>¶wÂY›ª&–Ýð 'Ùáª-œ‰ ñ¼ü˜ðð58ZI¥ÉÙ ö.6ô± }l1TêÉî$Ü_Ùßßñ¥¾ÎÇÐUªØÈ\a–Ü(lôÄ£º¯ÁËs IN*·wõ˜*öýT¸Ác“˜³Ò¬$À5f]ÙŽ&`l±£¨Åz‚KMP¹ë óÐJ;”i˜‚m“%צ‹ U6åVî!}îuˆNô¹ÀbΣqÄ|Р±4*±ØÍqLJMIÙt± oj^¢ð2 6¯%K:ï:»lå'›úO=»§Õ FN?)z,Â>´—St:™„ï6%M`w*íÊ€ #FšçêoÁÀ+_Ä®øÑ6IÂPGÕ£|Æ™™L§”# +UeêH‚Mž¬ ³Ž;þpn§[ÿ¤¤³QÌt¬¸²³ H(R¦‡‘ÚÿY™¨ ¢jF”@ÇZK¨nK)Kú×%ðÁ»ki'üÔÚ"•$ §ƒØ4¶îÌwÑc„~cÓɪA-T»ì-ݾ®I?íÓûîfgí¦¯$Ïî+˜óÛfD†Så6£Ø“†áCQR6¯êÔÏ6¦ä«V~Aþ¯ª_9&š¾šœÈˆi5ý÷²£±|dV¨oùÇ+Y«û©ê‹Ìdo97$!_S_é‘’†öWLŠwE¹«§Ùøû¢)¸uL“•˜•(~Éçt—3Œã ­áÅ¿íH60¿·1e)f{Û+y¦É{T¸¿è*m?è”æSa% R/Ź¹€×[6Îñò;•Œ%û‚Z\ýÙ¬ˆ<ÐzžÌg^j ¾Í¯óÝЪ«¨¶¾a ÏûÞŒÂE8Ñ"ÇS2Xh÷ßÒ“=_  »E«_vI°2sEÛÕPôÓ¨_ë àŸ@‹o°@€óÞˆÃej}•Zèž ¬”nü-¬¢sÆ®ù¥åðFtî:&jþ-é‡qÚ¦ÈhtsÀFq g”ÖêÑpŠîCñ o÷Û­8Єl%YvZ£{¡ÑÂÙûní0 +^©D0yK¯xYUŒyŒpÈx¢9*„ !^µsÇx[…ñ¶vxØãÈQÜ?1aLZ³õáŸ:9á«56{MØvrv_ÛÀ&àDª’N[ÁÜáú$9󯘛Ù˳¾ð‚eó×gñR!iæ L¿Xnf'µ–‹[ÆÀ׃¦ÈžÔºÀÅ ÝïRñ!™n$ƒéÄò#< @ö¯†¾—øÚ¨2n±¢´Q/j µæ$;(þhP3ÇáØ-Õ(ÍØÅ 3às!H­6=@üuë?”ýó§0u|˜!í5©H€܃L?%ZN:ÙžND‡œ•F§ÇºÞ+Yµº·Æ"ùt£×âžjÿ¤4·wÚqt(*µ]z–óî Ðþü‡Íä{„Øôò×ñu£è‚Áö¶à $¶Á.YÿÿqÛD²€G•£ßô<½$ðÄe{Óp*„ê7U–‚óÓ1v|ê/¸JZÌIOÉ Y!Ó·ì±öAk2WT<ËûUŽù—A‚•ëÇŠÔ@j ³ÖÁÆC* ´\4lCµó%—»ŒÒ¿cÚÚežqñÑеé$ÎoÑ}1ô‡¡øgܤ~önƒ+¶5-ÛuBlÏ<_YªµbXjr3F³ÖñßL0F*)WPg…¸ ­QeoرlHoÑI_–B`mæ]˜zñ ¹cŽ,àn›z;’Eñ;\¡Ñi>0ó$j5Íg¿„¡û©û¾$‹^æ.Hc =`h)e$¹¤—Ÿ¦ÿbE<]`QË&ôŸÎ‘SØc°WV`´ [u‰(³lå»J;Þù Ïó€åwþ´ÆŠD‚Lç,i©+G¦]1 „)ƒ „ßž Ï0‡{à g$*'|gùcjNí ¢ø7¹;Ñ%¯ìµß´±d=¤Ü4¡{cŒgÔzéæâlÁ'à'·ÖÝH3LY,k$ 1Ä~ÞÔ…§¶jOf;`§|^1j*=¥Ý•iÒ_˜›¨$¶‚³õnHͽæò%us1ÀXÚ8‚5^³¨¹ü¹©„R§6¤•>ÀmIeí¦r“îE„ú —ãÅ< ¤@¼­'[Õ}祠Ÿ˜Ô¥ØãIƒ/4>æ(«Ô„:ëo¤Ý”+L§òK  9'ìQܬµ$±bŒ0Øff4{lS²›„GõþˆZ‘od½Gû9Jᚤ¸y2—Њw³Ø=Lã£Òw|G¿¿q| +endstream +endobj +2224 0 obj +<< +/Length1 2854 +/Length2 29958 +/Length3 0 +/Length 32812 +>> +stream +%!PS-AdobeFont-1.0: LMMono10-Regular 2.004 +%%CreationDate: 7th October 2009 +% Generated by MetaType1 (a MetaPost-based engine) +% Copyright 2003--2009 by B. Jackowski and J.M. Nowacki (on behalf of TeX USERS GROUPS). +% Supported by CSTUG, DANTE eV, GUST, GUTenberg, NTG, and TUG. +% METATYPE1/Type 1 version by B. Jackowski & J. M. Nowacki +% from GUST (http://www.gust.org.pl). +% This work is released under the GUST Font License. +% For the most recent version of this license see +% This work has the LPPL maintenance status `maintained'. +% The Current Maintainer of this work is Bogus\l{}aw Jackowski and Janusz M. Nowacki. +% This work consists of the files listed in the MANIFEST-Latin-Modern.txt file. +% ADL: 778 222 0 +%%EndComments +FontDirectory/LMMono10-Regular known{/LMMono10-Regular findfont dup/UniqueID known{dup +/UniqueID get 0 eq exch/FontType get 1 eq and}{pop false}ifelse +{save true}{false}ifelse}{false}ifelse +17 dict begin +/FontInfo 9 dict dup begin +/version(2.004)readonly def +/Notice(Copyright 2003--2009 by B. Jackowski and J.M. Nowacki (on behalf of TeX USERS GROUPS).)readonly def +/FullName(LMMono10-Regular)readonly def +/FamilyName(LMMono10)readonly def +/Weight(Normal)readonly def /isFixedPitch true def -/UnderlinePosition -100 def -/UnderlineThickness 50 def +/ItalicAngle 0 def +/UnderlinePosition -167 def +/UnderlineThickness 69 def end readonly def +/FontName /XKYJEW+LMMono10-Regular def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 65 /A put @@ -31993,7 +32023,7 @@ dup 37 /percent put dup 46 /period put dup 43 /plus put dup 113 /q put -dup 13 /quotesingle put +dup 39 /quotesingle put dup 114 /r put dup 115 /s put dup 54 /six put @@ -32010,124 +32040,181 @@ dup 121 /y put dup 122 /z put dup 48 /zero put readonly def +/PaintType 0 def +/FontType 1 def +/StrokeWidth 0 def +/FontMatrix[0.001 0 0 0.001 0 0]readonly def +%/UniqueID 0 def +/FontBBox{-451 -316 731 1016}readonly def currentdict end currentfile eexec -ÙÖoc;„j²„¼ø°Aw-åÎ=Ó%åW˜)-{Ùr½uú•)¯œ‚ßröA•ÉÂÜãE(õ@Úý{ë¹´‡º“Q»û|ü_‘RÑå» -ØÐÆϤëA³Å -T@æ|ýq|Vk¹¿J% qu8P¢øwÄGx³ÅªÛ̆ÖåQæ¯6K üªÒ-UŒ\§Ô%¡bÕ"t-*ðxÔõöÓŸÏÿJ‘+ -}ì3¥{Zà2ŽùÕzݬT2sÀ$ZõÌÑ&{Bè–J×{¨¦Œß=מ0).ßÌÅ|Ë¿!í:[LI0lrÙ9GÌ$}»³/Xà œåS`p*²pë×+fwwFG¢O¸¦Ä^æȇ ÷ˆr+dMvNy{æáÙõYëõKëí²Ï[zÊ[fâ -âÜ·BÝDý‡q}Môæ8À|¤j›©$±{ÃjÏìpÓxæxS½Æ«Eîu -¤•™¤š¶7%É‘õ±4\ ¯9»b \Á)[ÉÚFUÛ.qñæþ4í¡ÌZ˜8Kûzwwµyl”ϦI5b·jc‘íZ¬™Š,“7Ÿ¤ÖɪK±:ø#@JÛd®@Ñ»FìÍM‡ç÷Á_ª¡ûd#g+²úë=·è>ëŠÔZ‡a¸·—WŠNõŠH5Y/ÚãìÜ .rå¬pf{0DÀ|6@«çŸé;]ê/¯¾h|ÏÐ.ø§RAÞÉ°KÔmœlö^Ü&WÜ€‹ÝÇ{·¢ðýmFÄÝï«´—¿ÚÕ™É3FK~I†¥7@T7AÜMûOPŠ.;ŽÝŒHÊ+i†X±ž;}²’øžæjÅYXõE×u«¼£5Ç»pÛBI¼¦á¡ŠS ü© 7pQ½40cH™ÔÕ4ðu[ì wY½Ã–šžå2¼[tÐÒÉÊ–Ú£Œ´œ?hô™Š¸bœáþ*Ù$€ -dÆ¥`Š$ P… …›‰:"‚ ñ©)Ž©|UÑïÆ©ëŒfù«Dµ¤tøÔd—§ =¬›œ5Íó¼+¶¤îj7Á%ëi@ÿ—âÈ)шËT/ÐqˆUÜÐFé±÷¹1”tEÏ‚Ò·Ý¢@ Ñ¢€1ã²F^R?–-•ò¦q„^œ!ûŒ*1gw÷ç2À¡4/¦ Ÿ¸3¿@«â\°7zÛazêÑ1é)‰·.à™ðó‹Ex‚Y'ºÈª÷ê5ˆFc=nj¾2â½ÕÖcèw1 üøÌè3€bŸI]‡œŠÁy¸²:ŽÌä fÖ¡©Üê‹“åêñ¦û¨fgì%¿p¡ûÑ^`´(•­Ó'm™ú K'KâÓ '>ŸœEàPN,:Mè{ŠS3G2R@6\Ÿà?-ðÕ^X'€ÛèõF.qh˜Ã÷YŸòûl‚aª¨wÌÙN®Upͧ`­­ôâšM9±ä_JgÒbw!◠̼sÄÊTÒ”­DÀb‚É:Wq!]iÆ¡w»“`ÿ褩Œ'åÂ`àh^ TTI«nH‰el䶢½Í’òŽ÷à`ÝöÖ㘀ö×Ý '¨pˆß(=«Y1¹æö«‰ U¿Ñ¬¨/ìOúT -%`&ì{„(KÅ:.CìiÍK9¢¿xHÅ-vâEüÔ4x_5ˆå ÙºüŠšÄ,ôž§dâl5'bËbÝq,–Hö. O“°è8Åá NNn÷ZÄGòquͷDZÖM”¾ôÁgûéÔd/.oí~*Ľ–ẾÊ¿epø˵ -1î²}o€Kì(–ï{AÐsMã`?  žµË}|òœ¨^ÒÚ§(ûÚ1¿vŸMXüØAj!Vóƒk5=êÆ&›Y¡ìiÙwó†Ë|¨†vC¯‹PezãF±Þé`FƒâJO!À«þzÆ¡ÇãE.)8£Aœt`+—ÎoêÙê”®­ €ƒz`æ!|þ­a*®ÍdçU ).tIhq‘×þˆ*0 q±€¯>ç„NÇU Å3ÆÈà7Ä€9Ot¶[3|ð5r0ó¹pÆØþûØ8ß•W˜qDLŽ¼Ž{Üí`²“Èdw”|ׄ GJÿz/·œèH'¾Z$„‰Ù™ØMΤ8¤àŽÆZ̈Ž¤<7ß{¯LpÞÃ÷V)%áèkÊÜ3ƒ„ì™To¤üCn}4>·ZÿrÈ°{»rÁUZÿ9EªŸÖÔG®áu‘49Ú>à`oq¾Í/‚¤á~éxâ–S…|œf9׬?ÀXñ.uÈBxfGbEü.wW;û…™o,À ¥BLkkKµ{9fggü,lé|«&¼€ÒMÈa*£Ô$ö‰¿v#ܶ>ì7UìcéÍö«'·‡n¸NЙÎ1Œšç¦WÞÆ«þ¨ÙÿVÜ ©˜Nu+9ÿÚ47‹yG(ÑÂþßß ¨ ÂÀŠ"­zÊâoOÂ_”|¬cZþzB¤æƒ@·Z€Aݾùà@a£ex–åb?ç>"ΕÿÉYÀ…ïîœÖ‰éT¦gB´lÙ9UýFêJ¯?"[p] ÚÊpÎhŒØÁ ÎÀ’¨ñ†U8€ÏÕ“z¡çÞ¾L†?³€ú“ü:K‹;®õú¡Wÿ%ÌX”„õ$؇:bÉ9pã´F^dšBÎÖJR(6[-T*í4!œÑyY`'Mµý( ­¯íÊM«PA#Æf‘€T"fŒ"?e÷—þX§&’R¾ÄEÇë ]¨2L€ê†Ä6 ñ?h)8&Ì/Bafƒö–“!º0ˆµðt·M<Å‹X0z½1ß$6$·}ûW<ß1jåiâ76çý$¾4±e°´³ ç©Lå,vΡTuQèo¹:Ýò=ñ}µE´Ë¨(ãSÊËÍ0.“¿¯Ž1Þ) dû‹ƒ#óÊIÍ;»€P]½Éµñ¹R¿þ•ü蕺I,”"È?›“öãbBŸÏÎÈü×8|ÜÛGÏžÞÝï\;ºH -nÆác2ˆß¯xXΠ1!?:àá¹–åt#Ãà?%¹Ñ÷•g„ŠS¿e[zšŸeÍ…} Êßh)—úDì j-ú…„ÄìO„6võzu¤@bƒ ‹Å ix´cÆÝ`rÛ&rÀç IìêÚ¹_> 1SêSdôåY$î…`)¤Â­õÑÄ×:Šî¿•] øR]ÐFI®Tkxð›jgbžH“d.(: -Áâï«œ.ŒÐ—%¤pq¿ªô>¬>^ézx,Ú{^’Íc]r›â6o®·†µ¹õ¡ÈÞ¬tH“TÜCýÏ× •O;Ðqâ]8ù$y¸·BˆoLr €{=fÀ¶¬$' FøÅ}*õƒ%ù*‹úªútds¢›CNzc¶»š§û‚5G~Íظže ê2‹Ôd«Šã]bsz'¿È:åÏaWA´ª™>4+ -ý¦5uß=JTòú­“Hˆ8¨Yá·n¸ÐÏ{ü±-¼øúžtÿÒ}Püì”tÄ¢§Jx³•8×ÈÓRÛá.\{Ê`uÝÿ[ˆµ§ô%é¹3Áˆü…7Â\Õrn‚†NJµêþ‹Ïò™ý F­k¬² ³ò¾Oj¿—½®ßÚæ+´WeÕÐs©MÕV—‹)ªø˜–óÁàªJñYVñË›8´ìÀ1Áýd#JL1?M´È?"Å›b§ÌQÁï¾¹fc·ÐÓŸr´½¯ ar¼4דÕ@ùd¾ª—Á”çv¯5³꺜5eÄDfR+·c=Piÿs«ÛX‘8þšÔÐmm‹€šÛ;E­¯LêÄ]œH|­S“°±‡· #õÈ¿¿§ðøË´§åç°À»Æªlçõ²ù™ã È÷¨ò„S6ùŠæðe¤îÁ¿!3€BYTg˜Ú±}T¡Ta˜Îˆ‚ˆŸ«Ïf›ÔgŸ¦»ÔþðX¦_+t”1¡2)AÖ{ߨøïãcƒ×° ‘«¨zP*QË©—S×Råd*çlNíØbjɇ©w½u_ŧ0ß3¥_¿mÃ=/èßQñô³ufô⢠-ñ'I N¢»â?Éqjªñ™) -ÊŽëšÓ²+ž¶¡AÇÑÐr¢â»©6&HДC9Ömr|Ã7r²óGù%:‡òJrþ¶<Þ(«gG^ÂÄ´³~¯xPé^€KâÞÔÁàYúY…DË“"ø¸”$?f·7—Úšß}¹’XËäoèˆç3üœ'â+T…e|7õ~Ÿxø9F¾’ÖÊ“ •u -Ìiãh -9Nt’…9‚»uicæsÍäJ€w -cZ|¶Á$CÈ[ÑiŠ×eðƒ Ò"÷û²à¾Æ;3|A‘zFáY4öWØSHʘJ᧞é)~ÿ%œ§b[¿O?ÃñÅ;ŸRw‡ß¹ÇE­5£?"žWå|€;Õ®I6î”X'\dŠèKùôs}2Œ{QA)wJêtXàŽi3uŒR3T›q5‘b|~ëú’<é2ûÀ7bA}…Ÿ™¥LXÔO6çû/'`:ÏÔñ½´¿´‘¨ºñ8gŒI$õG,•ÿÌM`y}GꦜÊ?)c’ÜÞó–šÿ7áK !8BʽŽlÉ.Ü oõϵÕuˆv -K·„S?Í0Z<ª¼òM^žoEáÖï” §´"1é¦í\­eάI¶ SsÉBIT·ŒÕÔvòÙe赡 R»kvl$|­i÷ì0qpð½ -QxÑroãHúQˆ³™vzñç›ÅƒðeˆÙñ4`¨ÇÌe©b÷˜º?«V'¯axð Â*áÍ Ç¡»~‹MåCCô¢‰®”õ0Ò"ršs›ÎSC®æ=ÂÒyÊ û[ÔûÔ^þÕ”Iî1‡d#œJ9î&™I…#± ÏÅgŽß¶c*»B+¨­ÜFWnd†M}ÏRYu™bP·‚­všçc?_gMk5ZoÄoÝ䨟³ƒÒf †~ßíGÂ>Ôüصë͈,Ÿ ¬˜ãgkÉ]E8·?˜ŸØ`aˆN/`p§çÎâé22i¨útË (/[Ä@ã|;C¦ß]ar6ãÕXõ „W¸,¼C‹ŠOö‰Qì‰Y`¨ç`J_B¯àh§Ôqñé8 !;É.òÏÇ¿*Ä¿LM÷¬Çïç›b¿”¦MK—a{boK›IænÑU„o¸wÕgò3’ÁÕ|‘R¾q?ð«'ú©æSúÖoIÖV\ÄÛ]iÖTgÈ‹[_ßZµ_ìj†®£þ”5¤¯¨'N8wMˆ%+E:vŒ -ŠÊ"„¸Óªï©á“a¦x;ÏY Ž`³m ÷±ÎÆeòïï©"bsàiq>,ÄZnÊè›3æÂŒeÐÌ(¥±gÆØoû¦¼ =$ìRù·ÿŸµþܬú¯Ÿ'âJ:cjª3¦‚f2 -N’µ:3CC;OÊv"<ȳA?9=¿Ô‡a’ÓÈ{úúMË»Š¶ö&}Lænu¦¥4ÛŸV[Ìà+.¢_…bê¨$tö«1ê.¶}ÉÖÇÓÁcÑü¯{ä«<<›vì÷ܸßÌzÖô‡<ú Íñ–ÈУÝ9ÌrÞµ"œb‚t¶™Ê˜$yéЪ֡Vì ]W–ÂÖÒÔ>£Ýã0žõP¤B’·W*ZCÉÆ›ŠOžêS€ ë0³é€Õaº‚ÎÖÀºåS„±5Ε÷-}7‰‚ÔÆÙ-Á›*¸IC®{1ȹ†AŠ˜ßZųä®rO‘(G n˜6ã¼¢9iã5ßbDýN÷²'wL å,²j"•éWv³yMÎbfv›¹¤ù&,Õ†H®†ƒѶ¼G[‚f…íÄ&“©PÀx¸´&Iš™ÿë¤i=(Ë— èz:‚[} š$êú>ÖÑ]´¡çIlv®yPôÙüdŒÓ[‚tºzÑwä;Ñhc¥9–¯éX S8ì{‘ÕY¬J4ks¹ð'$r+›tšý‡æ7)„)ßm&‹LWÌQÔ ãL7“)­³gö€·†×ó‘Í‘¶".ˆÀ¼ÿf ˆE›ý*â -°MÊö‚:7¯õjm›˜ ª!µ'¦¿3¹xÄ<[r îä«ënÝ^™sºÉ:Ÿ^—M{Ã9E“Å·ÑÌ8ÑÝBãt<ÚW#ë³WsÛ 3’}Âæ~]ÏNAýÑx!¦ íf”ð%þÒ™ÄÇø°wÂÙ €ìˆëÃ˳,Ø¿QÖ㌴W싪ËŸ«D®t2ïó̉BŽ›°¸JD_æ£ 9b ʃ>,Šw©v­¯Ëà0ÍüV\åaµÙ4ŸT2G+¨óÿFä]ôÍ,Ùšå]©z ~aŒ1›æ›CÑÈãÓJU­Ús/ 'Ú«À±ÂìÅ“k‡¿[ÇM#I8ߦò’)¨´óq±U‹ë$ÔràCO>Å°½âŒtMÖý>úIóªŠV­Ì&¥õµ“€Mi`ªo k¾Öà ÊPìÅ^\ âm"¹eð¬]V¯D‘¶ÄÛ7uë\£»»~ò&øbÉìÀýOŽÄñ4˜ÃtÞ¡–KÛLôÔ¢”¸‡N\™-ÎvúaK’í¾D¹­~2W^€"á‰Ã¶¨ 8Y´JBX5Ó"ݵżo¬v›¹Föv$=MÁ ù˜}( ˆ7@L»&5ƒ|®L£.`Aù@‹{åšßhrÖõ£Y´i¸(ƒÀn$›we‡Bp{o‚ÙǔԽxM}TàÛ}Qž¿4ΑÈq’vÛBäfSDžét•Q…ë±¥t»y%שÊÙå &[ßÓ€£¾˜çIB¿|ÂBî8™ÞLMGÊU "ç<ÿ¾B­tbv@¢1~8·„[&Þç>6 ¢F¦´#˜‰_•Õñƒ¹>,òomðq™À;¥Fo£ï½w圠¼ŠGÐÇ€G+WÐ¥á©øQ¿®„‹wÕ]é6ÆÀ®:l¹‡áÀD-K¿ ¨!”c‘z¾íÚsVriÔ)¢˜tå8lÆÑQ€ÒC·+>p—o"Ù•ÎaÛîwÁCN“n“>¾nÊ&ZÛ*ò~µá¬ Bb”„0mòUsQ_#25}!ò¬PršS­Uj®ISDòÜ®ià•edð÷;#ëà…ñÅ>¬PîUþ^2w7©y…vÉÏŒ›±!nÚ{Éa•|¦˃{ºŠöÁ/9L0’€P+%[sÖÖ­ #ƒÝSôÑ_!’—`%J/v8G˜¿X·v@ß;#b!O®­ßs~¿ªa45LÙ×9߈6ÎûuŸÿ»Ø&ÆîÒe˜h_GrèópÉdv´œçöÃÎ<‡™¯„ÿ¢J_?žëqÙ. z¾‡ý„™dyoŽ»¢(áw/EUÁ¼€ýäÊý‰**î-|Ç ýXL$ž—†íÔƒµÀõÔ»ÉÌ`z -†Å#ÃnåÔ y•JüÄÿKÀt7Š7Ó«ŒÂ6bk^1bÝç -Øy:šM_£Á­ìvÿ×BènÂÖÄð¸QVECÉ:‘÷u‚þ®ÙÕ®$PO.’%S¥î™Ô Õè¨kéÜì€ÀE&Hõ=´Œ<Æ­ª¶4i Ú­07L'죉úp;™í;IàªXbÁ[¿-DÞ£CFi¤ÂµÑX°”lhÓli÷,¹ÂãŒæ¼¹öƒº®-†¯S¢£CeÒæ`¦˜EÁ‚ûï…I`ÊÝqv‰˜å¸ÿRMáSÌ@Ñô+¨Ë'Ìí÷>ÿQäe—ÆòYh…™@ —w¡Çuæ¢J@Ù]ÐT¶ƒç„J¼ -WÊ€«N’qžáQiS}Q”§ÜrÖýµ¶2yÁ¨]X’aXÐNý™.ç‘8Õ¹ýÔË×Ãxäpw‹xñ鸵º…ÛÓ4‘ÛûªjO¶Y‡æ´÷žÅ Ô«DùŸwFÔ>¥ß¬ì¾¥Xó„Ü'±4µÕ¯Ö›B‘‹2®’‡ _n€¿™ŽŽÕ#¦.ç¶ÝEœA{Sÿ›…ð*µsßcÜS¯ -ˆ\mãY.n² ±š\B2‚áúûE†¯ðOð<´_½˜ø”&Ó6Š ÇCöN&Y‘Ï2·1h I¾’Ü-Ĭ±ÝDœõ§œ¸&q(ã7q{é?º:9jà„ÝA‡Ná+?>í~Âx­¾rÞlt(+—–ƒÅšÐ ”ˆ3ZÌ2 ÖJ¶ÍÆïv8ÌHÔšx‚ÔQš>w$ô“<èZn:¤Cb¡•à„ËȨx]ªøg¢¢EϸÁ_ןž©Ím„Ä(célšø0/¸µÜü*•âMžÐ|‚PF–ñ¨bK—K•1O¬–Õ.åµbyÔmñ2&Ö.@ µ_S»¨0-hB#Ã+½ýÛ¡f2œ™°=žœ$É‹¢Þ–KŸ_Æb`,¸Y–[å„©´l„(hPú\|ë] -ˆöÄÀ*øcN¤À3kéìÓžÇrzþ-¤µ¾\ÿWÜöh« Xa:-i~O#kyö®Gд•¨ -JØÖ@;hÉÅ-‚ /ý‹F!?*Eô~öh2zû3 -µ…*Oô3£a½Dôu°Y[ò‘ns± ì]CÝ)uVŒJI6U5º0í[ц§îډϙñSÙ}›oÁxv¯²I‘lñG)Í«Ý¥WÆÓúz=UXÅíŠYÎk¹×_žÿ’6]ôäQò^bw8‰SŽægý!r+ól~ÌÆg«ß®¢…šŠºa\æ”.ڂˬÍs °¡Ô;3øzN¥‹oöý»PªJõʤÁRãçooªä -æ©õ ÷BeÅŒÔCD ŠïI2ñÜ·­£$ꤢþt#ÿCë¤W“¦õ1ÈÝ÷L 3ÝñƒÂa\éÀÛ•(çC²CÌÅþÏ@ -Hõ¾±¢ŠìÜüŒ©˜¨jlZ%Ú$è·*G¾Li#*‡ƒ5 -xÀ*‚z´~ šrà·Ð9ËëCˆt}'h–g½*æ6 ¹LÑì§ÿÑWM[”çXôüŒRÿ]MŸC‹–ÿ¡?È+kÿE!ÈK±µ,6T£ùùí ð姓j°a-C¥ç=‰òémô!Nz™û,ÿßYQÙS½’ËŒ’'ûènF4M1Ä…ÜøC„—sã¿U2jUxüE-v/Ô}%Â3A†ìÃ-ÿYd,0N ^(®B·P»SiÜÌ9" ä@çÔCúE͵íÎŽ:Óûß‹¡üìo|Ø‚®ŒÍ sóªlR-ð¸ùÀç~5ñÕ»ÁVåiQÚ¶”ÇxÔP«ú úÿ9Æ@/ CáNöóZÐfh!­›QëvK+Zœ–éxSV%ÍyßIæpûô$À{XØÄÿtûÛ¾Z¢Ò*Zâ×ɸÌ0¦Àžü9B«+ù¦œahÁ×ót9c)ÓÉWž‰¦œz§²þÃ/¦Å^i€lYˆo»gÅkæÅ(oÄQœ”4ÈVã~2>e°S¢ê¹=Ÿ°àô®ójHò'Q±í¾é‡ðßêà™ß~ýNÚÃuÏŒ¶ÿÌèü‚øQ¿Q+ÿ¦žÅXÒ H ÇÖàL7¦æ¿oú>Ã}¶'’ †¹Ÿô¶>.¾~çQº1+Å%ïcÏ»uÝ öݽ‹­WíQ5C³Èf?Jk!7Ø#ûå“÷—'èþÿ‡®NÊÓ\Nɹo­4,¦·rè©Ë®uñ>\Ê‹«EN"ñÈ€´2„Сëûäöä¸6IÖÛFà!YÞO#ÿæ/Ì’ ôÏ2wƒ0e œîÙ°ˆÓ -pE4S¨àg¥GT·¬œ"`šýshÅ´š¥bש¿/“0;m‚ˆTtѶ¶÷õPDA;Ú PÙ'<·)U”aŒ”˜w Ðf]XÈsUßSÂ&ݦ„]×gÝ 5{¦!¥òoÊã÷mÐô¼ÕT+tgì„ÉÄÊO®<¦¼=œ{¯ÁÝ=ù»1Qµp­zú/ä=ï>•»±»J2¹/Äٳ [(·¹Öôx/ m¦D:¹¿Ûá_EèZl‹“í.ṁ×ÀòõÁæTÏ+`*õAAšI‡&^yŽÞÜlCñ„gl1#;~0-bFI\ºÃ%¹Oî³Ir{¨Ê­qe/ýn›æZVDIœ2ÝÒÀGml$а Òà“}pl2p#cu„^æëSíK”dø=ã®&æ÷wðçíÞ€ÔÔ6_<ËÏ3m‹¾«š[ûû¢ß!þÒá`AÒA¤òØ@Ð÷DMÝ£¶iVj³©j=PMи½N/â#/Aã?ûÊë¨ ý͘Õ[^zM¶è¯ ·Å¼0n -Ÿ:ùÞ)ÓÉí+c;Æó¨f×N|G·º¢Ó‘èQØÏÞœ×Q3!e.«Ž#¢7%ˆàä:k(½më÷_@W ”:Zºîò˜R˜C¶¨v|h[ -[åó->#´íjá×md}\.âY²Ó„60“k´¬ÑfÈ%^h7÷ú?>Ãda÷XwL€÷¸‹ër$ÒãÄÓè¤w3ã]üú#2E2¡«hùñŇ–R) ñšú–V±%‡Êdz§"š6rGzšwv€WŒåJñˆ¡ú/7V° Hšíb1óõ{ãñ~¨™Èw¡÷õ·¾‹òÎj—µ‡ÉšæÝÌ$×ÛÊtèÆ@õÒª_‰3WÙ§u*yÙ3›ÑÉ@ž-9•jà‘u„§S>É$›ª§ƒPf ªÙsê>­ë5Ÿ¾g¼Q¿vª·´W\aaéK™üâyL,ãò‡É]f‹£6®>â’×íQj%[êýOê§Õòq©â\ÛÞ"ðyID»B‡fjªgÇ-¦¨(Y&tÖ³÷¯C‹€Üí’`Šušw­ð°&±§Gjl¢4ǧ٘7W”vN5NnmRänZ[QRä:¦ó»8ÛØ;õÐrS+üMàV\B2 &»JïàíÐM¬¾¶Ð=­÷¡R½^aùm!Þ­™@Ð3ÌSÛÑ¡ÜŸ9Íî`<&³FÑs‡h˜¶¥pREãVWÜÊøQÇ÷Ží­æ]± ÿÓŃÆp½H›H¿ç½14H×þ²•y¡Ãeœç/¥}z¥` Ìé÷)•èo ÜV$r§|Wt¾´ä‚ HÃÃyá«:ì†b>]ô1–#W/ŠµŒs‘•ü-ÿ~*Ÿ&¾YèÄêKÿc º¡Î¢U–˜¶”Œï^¶áÁûÀøèEBz5‹8Wð°5lçl‰T=.Hà«úÓ”òûhèqw“û•yÙzœ1îY»*²ÍpŠWù«PøÙ,¹^VJð¬Ù·hÉ{_ ?@‡ªšùÈ™N!ƒü•4—ü“žá.3ÇéXr%'9*¨û‘‚žAÉ"Jß²@].˜oXc I ï1úÅHè¶ý?õóY·öÑ,H9¡ÚYb/ Á²½l£åéƒRÊúy´çcЧ#¹îÑ$eŽe«1;]âkè²Í›™™ñý§W9URSß+@s -UÅ{d™úæh—P¿nü;àö·uñ1Dò„ìÕ*þ+˜¬‰lD¶¥³œ¹7ä[($íש¦j—?„ä¤*(àvñ÷hoÂÇÑÞ¢[Ò†®Ÿ¾/¹€­°aQÿ1U—l®³*;B$NøxQV§È*JÌˈ„®cæ“íËÏxF0²JÜ{ÑOz±²tí×תàñGØÒüºò|éd¸åX̹8*fÆE”üLþ¡=A91ãéÛüÌÜ[æ‹_óµ®ÎUõî«#˜JAû@aKgì¬}•ˆ'ZEóóu·C¼XÁÃ×®Â% ßô•áI–8pdM -âã÷9{ÄÒ{jpAû¯½o.Çx]ùü“|±$ýx},-tJ¡| ±Èô|ýÚ—åé¿Sµ0ûKý­°“¤¡§GUêY"™?DdøurO u€»O@¼ú$>Ñ·]Ú\H>{*X`À"gˬÀDw¦÷ÞµV€zÌÑë€ZˆçÔKqHì‘ž&À\Ö -”º}<†ñ{ì‡Yãp1‹³ÔZ4Þ–JÇÓÕ…UwÞ.ótž—0Š0sÃÇ޴uŒá$Ñcn×-uà£CQh@xˆ™â`ýÂßrÆeÊâ“H‡P؉Þñìñ1‘¶åa9ÎòÆQuq]Äé˜<°,íBò>z–PÒ €SuQŠÓ'n/3LÙV-&'o–†ºl va¼y\ä¾}É8Ñ,¿¤…ü2BÆUSÒ» ÐÙ8n>û1H”a°½Wê¥}$ý/$,èáú%žÞ´¡fÍë*¿‡s·Š©Î9¥TXJ[ž\¯ô¾â +ûøU¯¾©®í&„¬a‚Ök3i!$¿pÈ*‘ÏgñìèL‚„ ­ä—Öð/þpwìòœŽØ¯@Vó 3…=,TåÚºámc´÷ÛðÞ‹ÙÞNÖß.Ÿ4øŽ$ŽVV¥—7¬´6¢÷nø3î:þS$Õëò‡f/ -Á3Y“&0h8ñà2…æN©—xj®°ËcëSþ$G‰¤ÓcÒªY‡.yÆiG€C¯¸?–ÞÍ^¥”bþÀ^¶ñvsx| Û>­tÄï·õÛi1ñb¿„×^ÓÖÂ>ïù¡<«Ý¢l¬&‹Ûå8@é¿Éž\¹]á•ãÞ±¾húf}™Š%DHË E”AP¥•Jä•÷ÓéبÍû©™€Éƒ›Õ8÷tß²üÆÊQ =¨HEc°¦ΡкŸ(ɇÐT`ËðÖP2šÉÿ¥Ã8Iñÿ^.{¥WhõmKX%B´¾U©Ôä¸wÀ¥Á¢oá/ˆmòÖz{·#?)ÌV  s+Ò7yÁ -gh$àKq|¤ÂÙ¤c¦;nB\°Utû­âUÑI&¹¼š‹¯f}^`„°1Çšœç#ÌÑ~cA®,Ò0pW¸g1Ó=D¹C2;+o'}¢g9^uÀ*à•Ó×€‡ümµJ®ÐK‘Ýz;ËJÊd–­*:=>éjE‹À§ã0ˆ^¬sÉÂDëH—»š²DG2̆ÿô¹x‘ÚõâJ#O:»åé'J28ˠǺÝXÚI¾ té[1ÆÛ?WÇö‹<>/ýi¢Œ…$ÐÆà aäÄsÇc-Š8–ÑâúÊæ‡&jlR~>ün@ŒíízB÷ú9‚ÚÃaÛ÷R¢¼Ðp¾ïXT ^ÜÔJp4©Ø,¿9ç5Þ[~ʦÀä.·3gÊú‡_°²~`þ1ÒNq”5» -åe·ïC;#gŸ+¢†’ é”Ô<ÞóIK¤‘º19ˆÖÿZH¯ý:2Þ˘>DRrÄîaV òð{àÃL®*茌ȱ‰,rÄ °µpÀòX_ÞX1<ù’¯lå]ZZ µ¤ígt°oXgZ:òzk£÷8ì ŸÂ5šGƒñm±Ÿ^_Ú5F!Úð…X \OJCNU¿ÝÞöU”‹Þ艊âHbC›¸½;ï.ø X„:†V^ñ°a0ŠfÿY¯õ1Oš@N3%=FU`»¦.žÔ¶Âªz€Ï5¼À¸û©wZäyävbÈ#EÙ2k+gd]{èyüಠ-&‘»;IGº{ÜÞ‹ÇTbÂWþC›6QrÓ‰Åÿî1fZË ÷ -ÏQJ¡ü¼Û**û~]´û­ǶÂk6tõO‹+U›þÅлr3l92èéDòÈåWöl…£‹Uh5ˆçÖGÑ™iÆdÄihËzœÞ¥°‹¶Ãºû˜ÞÝE‘ôH÷‡AL\Þ[ñ/ küp᧼Œ¿Kc”úîðÑB ”>S²vvÔŠÌÕßìùšœ 9\£ ´`Ô-–Nú¯‹LéùdÌZOïTs†*£Jg'r31 8++g³,@döïr‹÷3ïmzRVô¯ãÚ6 -E‘‹N¥»Ò«”ƒsŸ53ñ>„çë.ÇFª!Ž!`h:þ›fbEêãü¢ በ-ÝI_+qÖ§9)jzuYkéëË­¤ùO+3¢N~;›_¿Ÿ/CÿT; "Y&'cAŽšð¹ -3üù>ü1=ê¿°€²á0Ñ¿ê|mÞ¯³ gY"æTjÍõΫÚ}YÉ0ÁyÆö=²|ää~Q(š ÝëŒTî<þÅôœ± ¢7ߌ.°ÿR†ÝÀo³ƒ>`F«áy©çœ?—dŽ·y=íñ‹ò -ðÉ%Ñ“/Ùš3îN9šKU[áôÞ¡¡}ߢŸ -ƒ…* zÆfÝ| ­4þ«µäÒœjšˆ_¥ðîîù´«B—¼¦ còÌ_&ÂDÛûLÛ,Äå†îœ@´„Zþ˜Ì—ú wg0?öºêy«‰]·ÑÒ×fêMº -ˆ'y<úb"{4pИ¸w÷ÙPè©-¤ýlû¢¨ÒÓJŸó›­øÆ›Åìg)#KÏöˆ/Ìõú€ÏïîPÖ]Åg×`Ø#)hËóÚ3þÙu4Ïí¢$y'`¥·+w´H°ç[(µU åñÐ$j½WÆÒ:hØáçìC`—„ƒ×}%•IˆB½‡úî •úßXÝò ¡ã)vX~Àûgÿ¥œI8/Ô©œ¥T¸Íëœ{IêÂð;’OMÌnÏ…6”{ÄAUv¶ó°$jGª`þQ3œR¸/ô§mi­—Ï%ÉTCS_×½~è‰Ï ÑBSʜƓz)©eBÙûvX— H}ºV_yE0­¡ Ý3wl>êjNN[QËÀ›ˆå¹VHåjLù–€ß4Á“ZuQæae8Mƒéiâ>Pÿ„D„MX —’~®Òâ¼ôv4ÐXAß Ç… -U<|>tU¼”­ó ²Òò®Ö³?SNˆ™½‚Žúl´=ÐõQœ»¯ùá~dMÑļãmhÙ˜dŠªœ_'u»x”Y¨b %ih»ø©¬î’Áõ˜5‹®V‰16^Í¿Ý#‹ƒCú½IžŸ¨ÜÊV%K>§PL¢TR¢éÐxUkݶ¦ÑÁ}\Ï7=+/ÙÛ£ŒÂC¨Áº¦ðJ”Vdà<Îÿu³™GÎÙ~^NPóÎ…Ñ`4ð•«9³öÝîð/Y“T~ÆgEAÏÔ5}º%¢üµˆ9XqèL¶îül¦Ì'Kk¶xCºà¬6BRšx‡ -Þ—?4 \PlL‘³Âü” V—ö,@JÖv]0¡ðì„RÎèœéßvz١ﬠS§8÷'èS%ó-g¤‹jfqÀœ‚$UÁÖ…5 –´DÈ­|5»ñÖ­nÏâ¢ÉHznÕæi;i©­®ó ›ˆê¬Â×Añ‡ª¾ÞÊWœ«¶+Jó:ô†0Æ„A£U9p¤öl¢‹#Ô4vv]1jâ8z»;’Ð~Ÿ‚ ¤̹¦Nóœ“2ôŠßg÷$)™¾Db¾|3$9ÔsÇ‚`¶ZaG®îh¤B3ˆû[Ã?н¨^¸£@\²[7)Ü@”€ÛR$Ö[¿bˆ]Nnc,ÇÎOH¬ñ#‡,ªqZØ°YP7=:cèÒ4ÉnݤõLÔ3xU_™Ô?ü=VJÿ©Í¦&é±Öø²ÎØ$÷Nr\K·øðÓ8?(”ª›'ôˆ¯ò»æÏÊ¥ÃJ¾®ÙFê'ŽæÀ8µ§Wåo@÷¾Æ¾î—™s›^p¾t÷!ÎÖ°œj°x¸í·ÖéÂœ.eÂZÍÎ.]Ã' ÍWvØED  -–Ê|cƒ±§‹‘^¾^iêÛŠ8lÈ⯷Ksú6{7Ñáéã´ð$€œ…Œ¾6D¸"‚NªK -¸2’ @GsŒ/´ª,óµBéOÏ_•—ëù,³•§ë2;îÚ~”Gêê°#ÆR““×ßÕÖ—=}gƒ¡!Äê™ØµKééï=¦f¬®ôïh–nëÄôýp˜Xô_Äà87»ª³%úd?¼Fü¬«*šîˆ|«Vv£8SGZêeª¯5MΠa$úd!»BúTJïZõ58wÔ¥Êâ6ê>©5XjG3Í Š®bù€~=¼F¾Rik0ÒúÁÑþHa¿ð½C[¸x0*yè;gáŹF)²2Å´-‡Ámr•u%öÀr›»ôÙ²«A4`"­}w;Q -Òj7e²ØáµÆ»ÈãKr ;Z‹i¬ÙøYp€F=Pðg)³K ã|K¤J¿š%Ï4*…^b̈‰ ŸEd‡·fæÅŸ¿ýàZ ‘6&±!Ñ¢7À~ZuŒÎwxЃWuñcØe¯å¨¤o=¸Ï<…e̸¼ˆÓS CXØ¿ò)/)<Âm›'õ[3ýT%߇ž3Yú÷Š¿èV€’}yóÿ=Ò}#YvÞN›°ElCc»¿-Ÿ²^¥7m#ƃi‡q?Ö2Øþ» Ø,™/ÁÓç1xHcuà·$éEy±0ô4âéj3xÃbZ{Ÿrï‡O]èœ÷Ô­YEÝg¦]éZ3yq7{µEüñÔÌ)cçDLnóZ/Žè›Tà¾û˜a4.IÖÍÙ«ŒòäÔ³nZä*/6ìÜM ð f¶â& ºŒùïnŒ²,*ü*ìÛÈOŸ¡“|[…¶·/î2ý÷|äÜ §Þ_ ?d.†×m½â/ÜчÅ!И¾®"ºP¨F×'/ØûBÇ ! #MËœ8Ùv\ÁŸ¾a ‘¹ß •¬8ê>]bE0ò¿ò,$«·:—©ÁîÛ¥(c™•ß"¨GU¹fí9©CéUÛpBø^ )]tdùÞÖ¬îFHñ£TrÐ4§Vc'"ZºPÆÝ"Éá¹ãX»­†n¾dïÿœæŠn»æ0ð%Du‰ó‰šZdñ4Á"”©™¢úE`«ú¨P²cãÈ ²~þó¡F1%…´úÖ·‡áâÑZ/ wr,uk¤iøÊë2ÃÌCÇmA¸d²^aÔ ‡¡öRÚÿ¿[:@nˆRêSZÕx•D©ŒPá‹vÅànRÕr -‘²‚îWfâÚwÊHC‹×µ>[âBÉô.¶ßµw0?où¨CË‘àÔžELB8òS?žFDtsÚ¿éTnæ+×½éEe¶¤‹ÊÎôJÆC.9œâÐHØ`~¸à$îÊØ;`¡¼|‘-G,é'湿zÐhGoMå´¡á‚ù= yˆ°©·þ˧ÖsÑ;¶VÏ9Ý¡P±íæIªo„Yü8ú}† 1o(EùØÉÁœÇ' -ã5hIõ]3 Ã^å'•qLÚ›ºõÐa‹  ÕwcñÃ}$ˆ7ÿ@.wEžöË¢Qëu—¥Ü ®Õ‹µ¨§Dd~ÆÙ8Ó§ñwmöÀu4åáwè­!ö,Õ¶sJ bÿnV˜Ž_ÊouÿQ>gÝ»bÀä!õ¥ƒ?µÍà¥äÔœáoâ¼~]´êÅl•%Ôd! Ø~iÕºø( Ž>¾m þNF3.­gû2 ’»@Å ä³Ió½º:ø´Á»DzN_êðTš`X©^^ôëág¬Mc›rƒp—•F»|ù„7¢²ßsÂT¢|Óÿ±å ©zHƪ Â1ăá¯00¢¹„qCÞ£·#S–˜åÆ@•ã‰ô`¬êí-¾è‰ú«3ÕŸÄñš¿Á…î°G1?ùßpýÒ+Xý•ö Ézfñi\ä%Æ#[Íl+udzä$ŠkþS)èý-”fÛ©|øˆ] OÔTÙÓJîNd̲¬ÊèØï*Ë<Ý‚Œ*`v×7«zá¡ÁŒŒùéY šuøؤõ?ÂÄò×%·š:$vÐØ×8KòPdµ W>úÔkT‚{ïYC³È= -§†,„eÄƺ.7š0q{sÅÑSKy„“e½WÜ Ç­²1Ó|Q0^—µf=Kúè©÷@=À}&l™/|óôô¶· Qfs5»FÌò8ó<ôÉyì¿ë{2xrÝ7tP´¯É^gžîúl¥d.…AÜ3xé# ]›Ù9ÒMÁƒyÞdŽÙ$sšØå7|Ⱦòtדּ逼:³(3{GÖŠ«’”±ätTäÁ†x¸1kæ;ÉX×Rà N%Ü’|z²: àDßgèô0ã=!…̲ÊŠ¾€5þ_¶€_:4‚|è*T=öþjyr2û›ÒIûÒÃeú%F².Ž÷‘OÞŸ§Što]UrÖÒT?6ö‹Ò>3ʦ•Ì/TCX¡U½+òÄ ú0tÜ’qÎÃ.Ö ÌgnÑIJiNd?sΔA±Í’À`« ¶"/åYa'e^Й%(æ„TZBø›X®×ÍÜ$m*5p°ÑBëNÛ·¤GÚL™f¶F •cÀ™Ø¬Ê9çÙܽ¾Æ/ÜüÞã¹ì8þoŸ´WO°ï¯zkxÇ!xAÓ‡¯o;kPþÆ7_ŒA;ä°% ä1:™º,ó÷¥¿›cz Çò±‡¿” -¾¦ad!¼ôoƒEZ]9z㹄öƒ‡”(âf½Âï’KG±Ž•ŽQ0¶ tIÅv`®60_÷Ä‹8•Ùxã1PÐ÷‹¤wq¾61%Ø÷ö.§}/idåYvK?Þ‹@_c´¹ÓNÑ/®áW§š^îT2Ó¥œÝÅçJú&zÏô2¸ÿ›IÂúè@jNG×±)à:Ž[Þ…ðM¾½suïT…öþ»ƒªëØFRË”Þßæ'–-VŒèK^¾ä×*6Ä¿7aTb­¹Þ¦™”tjýÃ…¹þWV\Ò8vœž¢ÇNÌ[9 -…õ°u•R¼âèWjcÞp«›0¿0 Xq-Ñß•¢yVPRŽñC7XIt†‡“EªõZ£«‰fν˜TFH_Q5YØ°NdâfçèOSPßibØå]Z”€¿¢j¸€‘L‚%èJM‰áïØD³X‖ÜÚ–Æò!ã;î‹r)#sG§ÚwŸ6äÞ⹑ۯº -Õc6Eó‡@›h‡N -&¯cü1ÓWNûêHÞèZìÏŽ,Í9kßO“ê(/c/o6ÍaÕPžßewcW­²•Íˆîù8ÌzâÁ -Q%“? -ËŽN¹h¬|ÇóWò+@Ü4–yŽקËJ,n -ê.¥§FùíÉyÓrÖ›Øt:£iµ°IÁ›£uLÞÇ*­nüOC|e!êU0Âñ@ö7ÈZx§îl«¤Ö¢kÚÈäh†•¶›˜!-´'¢`RcAl•sCmía -endstream -endobj -2221 0 obj -<< -/Length1 1494 -/Length2 2555 +ÙÖoc;„j—¶†©~E£Ðª%9.ì¬>XJ‘ٚмD â"e?¤¸€›&¤oLH:]~•n¦X%„lþÞ¹”­ÏôdQ@ãa~M~EAËŸV.U‚›MØ€ª¾")éJŸ¢Y§4Ò›º‘º UËêC9¼¿ù2Îÿò–"\ªºÜ¡w¥Ô1·rlxؽØнtÿùŒaÂAÙ*Ó`¢†ËJpÁ¿Z+—¢TXêÀ{˜¸ñ«ÉÔ‰ßZ4 +œ3<¥¯@žRî¥bš¨[‹ž£s¢Šilk˜Ð`LØ®_/É{ªS”µ&ÚQµR`øSZC^鶃?í\j!m,©Á€jêk3\Ð<‡Ë5](¯R¿•_4šÉÂQŸ¿†j|À«64Cÿ¤ƒŸ¯AÆt7Ò1[ CŒ-#‡Î×€°È—(¿PüÔNžƒú³GF¸©a|‚säpa¶³¨m6&bê» ß4NaŸaéמY÷ñ.Í!Õ +ª>\•¿rénóÜÙ±M0/¬ÞT*fáv×XòA္í Ý-wúÙrÒËOœ± +BÊæg(JCÅèIøÛÄeP•ð9Ûçúæz»‘ÖÿêÿgþVƒ¬"ÒÊzow$ïá¼È1¨¡Ç=ÄVèXqmû¤°­XNþ…¦kžMv!:´ÐäP3Œw¦€V?¤Zõd dCBß»ø1sŠ ¨VžÃA‹µ¥rÛXUÉ«l݈“œ>©ÐÚáç>è;¥…‡íÞFý4ǤàõΪÉÔMYŒ©]±ºôW‚ &R`92.:ªÄBàBä:ÏÇ? Ò‰ªs%D7ùΚð’Õ©vã(Oá+ÝÛÊ}šÓ8Ÿ©<Ð:ŽÖUy±ky€ +ºïeýJÍ&^VÂ-ó/gvDgPgç Õ²,ÍA·¬*-äˆ7püS˜¦+zZxU×)\<äQ˜AÆÙ6Z›*G@†„áó ç²n‰Z·' pÓEøŠT¶ Úô”pܹÍÅ9´Hò]Ycð}’vÃefP×m›£m¦˜õ¶ ¾q|?TC[OÝzÏ¡;^3¦Øщ!'ÔXÒ³S‹|cª"¢2ÑóÛÇ¢ +ó|!èf±´Lý¦8QR«Ž•j¿’Ï'òÌj([ûI’Š¡¯ŒÿMÃæ¤TY[Òèyg˜½Ÿ±gÈGq—yÞ 6¸_Pa²Ì™MAïêªZ*Aãú¾$ÎIòs-{¥ÍŠ‘ÀhI6ž—;‡tf€Mj©€A{7bfÅ…¶ó±{#ÂkByÃ{Šiˆih>¯ÈóA»DŸf}Ù…¶Ûe_dÙ#Þ=Ÿ"ƒRõþ£X¿LØv¡Lëë3ÑBòæjªŽ@ûÏÉâ=$edèKpëÔÈèwwFü b|3 e™òà\`æRït?!ËðdøkbÖ;À^m;·p¯›Zt~¿ –ÈUhçm4­]M1OÌS‰„íÇ ›n$£âzIÎŒé“ÿ5 ô 6Ȥ—šˆZ +J¢oø4ŽÆ!‰´"—†ŠÒĈkâ:¡ƒ=*RÁŸ†•©~z¸ ú˜Ù·ÐÛýéBÊp`³¶,€6ñ`CÜÆ»·¦ç®øËÇÿ›jÓ¼œ€â˜ød’³Aƒýãw¨è¿‚í¢/Ê'NŬWÀ#—¶?¢"b€º-DHÔ­K‰¶}ôÒÊVš^þ…âM”tàOÒÂe/ ž y_Šêòû4ïs z\¢ù×ÌgÏðPníƒî@ÍÝww˜yý7u ãçQ‡ÎÓÚ•6è W¦ð.ßCÅë *šIŠ;²…%E–E§Þ|±oš$AÑ¡6¬ÑÉx + ò_©iš”hA%£„ø¿fóGîm¹TaY\™8X!f=;ìbÊÓÀ'’ËD¢ %ŽÖq”«éÛ-yÜ<¼þ4šígþ¶Y>ì¼™q“¾À·•Zsr뇙ž”»Rø{MWíJ¶@®t¸Ù$䎎ê{«~ùÒ>Ä.KsM\Z §pæ5™B’_à„ã:Yg‚`ÌŒ ’ÎùÒ«qă.S+F – B¼¶³… ©,דÀcYhwRÙ>·ŒO,˜“+ŸfúûK}Ä|Р±4*1:½Ïç/´ +¯ôÔ¦sºI,ìPá>¤ ž¨+‚ðÀ¸p1#û!  º'¹ 4ÐwäîíPÁ9Ë1HGPA,;²€âÿûGºûù' Å N\T4×21Šç‘íÇGò«9£þf&ÑK/‘}ß#XF»ôù¦ðDÌÄ–«qÌD߬ïIA¸åYŽý”%A[OuDT¬¿DÜô–hÛ°dL„·æ•|*SSv\x›æé„î±tù-ÊP<ÿüåÌðA (w·`P°wJbÞ?o¯Ì·<T§®C#oèÂIäÁm¤?ÌÖ§µ"qöŒnáDUÿ¼WXÿ ‘» ¶âBÍ벑‹“é¹Z™ÇÄ÷óL°ºªù:ÙC[S"~ÐVïæˆ3*5÷Ž˜#«TŒ+îÏù äÿVVqÏ®¬Ä>c ¶×•Ð™•Ú2h©ƒ Ÿ›¨©¶Ôk‘cþµ¬KÄ@›š—Ì«¥ÇaHˆ0®Çì:핤>ðÉÃ’½ +RS”.+¸szàÌ%^ëdsOÓînË?‹YSy šê¶n÷ZÍ!_¡^)LB†[ÏT“PË£ºì¿xþÌtÚÐßïD;9\”Äwè{d\—z(f„D>¦ýlS²ÏïW¤ ¨à­¡å"©Ÿº™”+Ølä´oõÆ›«Ê|‘MD}ºæD¬Z›ö +xÍš;nâæg§žK4"ƒÇ3=Ï÷©ˆnóížáÓ´¤~¦ãfׄ§z‚6²û?«&«»¦¾ÝƒZü,XB%ZmËÜ•»2䨉 ss[¸`†:OˆSÔÎOb_WAÚ7âš•„ìv´kïÃÉ€ù¶…ªØw<™Ü@Å;°ð¦Œ|IÁ(Pd‚!·íŠ*V?hÁÂðo0¯¥Wy¦€.&-ðúÀíÜqú~G2—ž&KmàÎòÿS€€hÝÑÁJ~ܵ„ýxMœßÿ‚m£—ÇÐßmˆ­Äÿi|\Êí'] A׿ц؊¸}¾Ÿ)® Ÿ¹œ$2Ù`%ä–¬úTŸ  QÔ¾¶ Ÿµó‰h‡ðÛ ” Õá +_çNdŸP!É¥*„GžôìÎ9µfñá—4y¼Ê×+çø¨ý3Nb¦ª¨|K‹ 1z™hxºy„¼ðV©Ó‹:ï±Y,P¦ãÖ#„ùìÁg›eÁ׳¦ 'éû‹ 6³³E¯6) +—;ÚusWÕ`v'Ü“©oaÓVM Dé¸ßÚ‚ÓxL‚Ö…‡ÿá­ž³€ÅÂïæȃ†á„fÆ ê ÃÄÝs'3h«‰ jýäXòŒˆbK|ÑþÌäФò5ÙbV8ˆ¿¦·¿ùêµæè^{Ç5¾²ñ{.çz%qù;LÙëX<uZ1AHƒÂœhÃ&5ƒ\¤¨ŒÉà]{$úÜ+-2d`&º9.êð•Ï¾Ë ÿ/f=B’ »UmÕºJŽCεĽ¥ “ô“õRx‹E%¥ oMs®‚ª›HMo‹fÑPYü¢k½zŽPAÿzæ}¿Ô­}|EtððQW]=1C/]L¼1 ̶ŸPbn«ªé?UªAÙ‡ nÒŽ&Gm×úlI,ĨûS›}Û„A7«5èbÎË—aÔý5ÇaR¬2DÓ¼¡ÉÑP྄á”CXz8ÀRº ÝlT£jI§}©!'83Jýÿ• +í1æ]€H³ƒì$ +â×léĈz`DÞy÷;Àð¼¢mWÛ/ZP³ØâÏÿïf_ŽÉÅw†m’‡š¯/ðSÁ7Ÿ@?œ*É™êõѧƒ‡Ã,#‚ [öq j°—íå¾›—oÊÿL½†1@rW.•Scj‘5/o8cÄ(k/”ÜݶPÂ÷ƒYMæ9÷‡Àc†féE‹*¤`È»™²6ž’2íÏ¥* +VÙckb8x»¡¿ fª €R”ZÄ1ïÃáA' .ÃͳÈ~™-æÞKÉO£\Ái¯Öœ„¡é[;¤î.Õ@ÎÔÆ⯂«¿Yçටû¤÷âË'Užœ7.u¯äêùÍWY uñÕ?þÜKWwÎþ“ª¾kÄž°aô* ÙÕ›pÏ e`öG°~iJò Ÿž|n_GüÎœ>sÜän‚¿¬5iY¸2ÿ9c$ôÖdL.É pµs1³}yll×'c&ÝWLU +£†L­/äÀ9÷A·ÔücðeòxÛ!t’H݉Œ càÖµpeW¸1“J¾m=Ì6·b™PežPµW[.¸~þ ðƒ0†ØÕŸ`ëå€&ý§€– l ¸yÔ\åIs5Å=ö°Š(J)£r„J¹—U…/¼n›!4-æµãã„g¢©5 Òo.¸©_KŽÉØÐÙh.­3⦞ðYuLÌûíuÁ5}N£Þo¼Ðüõ;L@{ü=x0 ZåçÍLPªd–÷¹²èJÞ$—+ˆB–ª·ú2weU±`ê+Aûw3¼ÑÓë¿‚ÄOå.W¯HÖtÏ‹ž¶“ùËÇ–C€®óš³—AëvGŒ &Ã’"2ûmÕÅáó ³¾uénqÙ»¢½ÇZf˜73¶Ò±¨S[½=*9¼qôùl<­Rã üh–âô¸@Ôû—†ç‚²((Í_É™,¹ÇÎTåþа¾òµÆF1†qB°¥y”IHWÊšN´ RR¹Z_L[–ÚaÅ_m|ZßiL8ƒ»)”‚j”É +D-ï ¶œÐÜÕDd5Ï"¨¬‹Æàxijt0¯ˆ£øQ2Ú%ZN:ÙžGã•dw8† JÚöñƒ¼V“–OA‡þΪϡ‘‡ ¬®Œý6<á<¨™ ÍÝtÖ5R¦¼—‹e̶è¼ýÛ«ª KsË.N¼÷„ÅáÙÉãßm +·÷Ñú!(0:¿eG]½ò%|оn=k¶¶ÁN¾ñÒ™¡gÛrD,Ç]+eóî~Š¡ùF§{ê!Xßj@yÙ+P~Ýíí Mgv”:2HW/¨pF[pƒâè؆Ù??Ec6¼Ãø­ÑbÞ¢g}FÔx®+k­z­K³½+´ýÈç©Ôj …&¾ß&ñqÅôÈq¬×Í2|²E/fÜÁ@>©L›–Ì¢ ¶õ˜Ú×ÈöÓ¤Û÷bIëG³Ü©øÞ~(ø~69Б’/ÉдÜqÜ §€Ð3y~åt¡[š$+Õã4 N ÷í«)(C,‰œYÆß×à‰¦ÃðÁ‚>xYÖŒß Ü~†áºb¾]H/¿ËVŠC¯ìŸóZÿÈá5ð¾ÿ7n‚VýG®kòý4¡À÷æO›ï|[}@u6Ï[Ö‚¾`æŽT–”—oÞRuÜÌÎ.ŸÍ~»v¶)-Í*ïTßFa ’ö,\)»mðê˜Üg?¸v4ºû‘:‡2ÕϯpáÖCxLÊ_÷ƒó)Ä3¹]Æ’…£&Y-c}É÷ ½ÉÄ¥Œ+ì™èéf¯3ãûɨʴªv½ŒÚð¸c›ÕÒÇU*õA[6lÜú‘8çÆä±Ìf_K†7f¢A@ǯŒ=Ä”EÅþ¦÷ +@õ<@œbmnÒV›<´ý²§%ÍsÎ,fa‡ÝÎÑ:7: BµpxãotÐ0(”IZµŸE®ýÔmËŒû–àÔ8ŠÈOk&ŒŒ/Ân|/6Âû²0>_Á• í…ÑÍÁ"M³-¥Z¶my +eC,]OF¨è‘èÅ­0?ß´Xõ;Â-ŽÁÄ×ÇÂí…´föLRð² Ú»…ÒDRà†9L·{eO¨Æúá7« +êEêÀÞ•¢$$ž/‚(ìÔ4ìÍÚÓG²cfM}—Ï&ZNÖ¥¹·+·ÇÂ;{ƒÝ%.Iv_7|Q¨ÿ9*‚"DõU Ýâú”Š·ó¥=ݼ®0¶)k–LV¡»Òœ¶b˜œ®ßcàÖµpeW¸1“J¾m=Ì6·g†Ñ!C èz¦Á ƒ²š®©–ú’®ú€·†×ó‘Í‘¶".ˆÀ¼ÿf ˆBy®+k­z­K³½+´ýÈç©×Šè,(PL¡ì ?F{3©¡ÞûsF'ÔÅ´%실Y ÓU\.K¶—û²2H{žf›ÃÜ,«—ôvÌ|ÜãÛ]ß“½l¯ª¥c¶¥›dx'¼o¼,<×Íïñ—ÒêÉÈB>û?:-,®y:‚ +%»-“ê7#¾Øܦ-vÅ;x¢‹Ä4¯L“¤”k°Mɺ¯¢2ï×ä=ê_°Nu3Ô°ëisèº[·6S… ì-í°û¦‰o ºæ‡¯îsñÓií?ŒNî¡&¡ žò.O% Ò4€€E¢44ƒ¯Uò  ©1Œ Èj<¶õH¯6ô!›Ï.¢)çb-Ò·ý&yЄqkFGìÍó£èËJ¹‰òÝÄcÝo„Ú¹cõ!1OR+ÏM—;n¥ÿDü%Ûè§ÛÑŠ‹ž‚loøA•Gß/7DPî~&ôcýfúÕaf¨øUaßHÚÄÅÛZ*¦°ЮMfä³D‘L¿ö„ßa%$øV¤ÃU8Ά͛²2V˜´8àdT¢ (‘{Ä–Öª*©Ÿ/ÙÄ•|z¼,Ëá-©Pp:ÃR˜=oˆ†‚ï;i0>#vNš]¾d ©……Ú*JùÞZPF²_XZÂÁ˲Ø{ýÿvÝ`i5À™RdXµo‹LÌj¸yf- -/Åx^ìç iòB€EÍ`ž#Î7yb•±«yuûa3X•ôb&´Úðšþ']Y`‚ê;5™ƒ®øcf8oàœSïuôù7Ë3¥q“ t§\Ø»sM- WÞÕ CU5<6 † _p8D ºE?tÐîŒ V™ëÂHºªø_‚€âÛmHÆ/È"ötø{Œd>â·ëòèð\4ZOø(?qëÆt¶¾X>14FO^mD.}‹X +` +ÖÍô#\’˜ï¨Q¹ZOÍhá9¡S Ò æu¯ /ÖQÁu§èKÿ±Ù>\¡Xí|›ùaXFî6n1ð÷MinU°‹g*Ú§,6ª9x‚ŸÝ÷‡X' wTw²:¶/KÇŸoš•½Û .µs+·t½:wðŸ¯\ ¶¤>òY‹s½S-ôÙ¯¼@‘x®b€“ƒl| ‰¡º¢Dg²í°£CpqÔ…½òùÑàviübu¥´‚íÆÛ… yÇ•°Ç£ žØn™ì½P€õ‡÷ˆÓ‹v;›6ʪÒÿ­ÞÛ#Èó#Ybã(‡ŠR˜©¬Ëß8=JУü¨4Ä.fxňCjiudZh½@'œ”ûûçV ¡ÁAù)Òž¯D ¨ µGDå¡¡\¦vï@ð±ý“kcWµdùŒ°‘·Q¥D©I¹æèËÉêªÝÎ6yd·±iªg>·‚âuí¿àξw 1Õ[¿Ú ‹ 9êðÛè׶bÌRt òø„„áIJþ!¡– §ÔÒ슎üÈOX0–ÖVÉy,áGÂéÕ½4•íR*7±MwCÙuèt¥Ð5_ ÝJ¥Y) +Ó;¥Uâ/´hœ˜ž´œ{ϼ¤( Ê{ +v6^m„á¾Þ{»,²qN7¯ËvÁã»aÛŠiALnܽuTZs.€'mÇzþý¬Ý„3†šÏ7[U¸lš +Æ_š¸tìþà™³?Ä€v–7JJH¬¢ô,›–<#ZY¼„¹¶¢Ø{…ò9𤻔ôBn]'5¦Ò„DDØ “ûH+$ôñi€U€ôëÿ‡¹*@@ùõ¦UˆÔ›¡­M@è%‚‹Û¦|©ˆ›1ª‹b?9þ–jÁ!NM£-*ÅSEø50⹶Oü]}¸_TJGV-‡“‰mFCœtÇæoÆþ(|Çoïý:XDý©¢Ù.ËÌ—pÐ&(×Êá#MM®‰ ¿$š!à‹’ éúŽãea:ÿŒ2¸"LHå½À"”Íщn>-ƒA‘+‘âÈp³:Y¶WÙ +·ŒiYñ®Tâ§GÔTU bŒ~ë]An#GŠÕl†\ô‡9—jµ¬/¡*oäEx'ëc[ûm\ŒfÃÑ„pL†4:É%‚C˜Â“zò±î¥ˆ2Ý|õÁâ‘g:ã Ú·6)¡Ü×ÀX­ M[æÊüÇÐOCAˆºBÓiz>(Q{ HRÅ3úT(’!cëìÖ¥jØ;Nŧ&6ÔitÞWp›M­5WÓä8ñ’¯Ê*ïÙ—­!ÅV7m!ûpm&%+‚Ê— ©ˆ€Â±‹²*wáÕêÁzŒ.Ó{ŸK⨚×-Ò¿‹Æ³Jܳ‡ÞpÇ(ò<Ú1akÿû³©¶*w0lT1H¯³.Ü×Hͺ¯ãァØúQ ZÞˆûòÖ¹Oäê:›Û([íƒ RPo—®åüw‡ 埭Qú°­ZÁ¦´Á–Lëƒ ¥&Mkd7w® T‚‰yH°V¨fyr’¬p#ÌÉ´uø’øh‚<Å]HYÀ‘HRŸºW£‘c1ˆ€{W›¶ ÈöœÙ㶭鷘Ö[ÄvâJO²Ž«n/„7³„â=iË1+B(%®—æ8å9Ñâ--F0Cwà–!!g­â+W(õù±æ‰Ù¬,riaŒ¹§,F=Ô')Ã|AÛsGÑkœBV˜_æÛuüï›°÷Fe¤_ ¯!4á’bUýF¬hx„£T} 8•ߧr$7H÷ —R›œD•¡öq5B˜ÍÃD“f-s6¦ÊìÛ]a—˜”Uy*dRwv>b¢x”µ¹7ú­4:aÿ7|_]pƒˆ³EÌÛéée,qª2qÕq£ñ:iª£9ã‚ë/¥å°UæbbX¢BÍq§öá4¬ácÅQ ›Yqà><ßÆÓË^Ø3â¢ì¢ÀÂ꿱VÜ…®©DA¯ƒ\…¼0dbè­YÜãÿÇü–3BšÎF•miII³µáÜ!~Š‘Ñ7ŒAº¸´umõP’»ÍÏy£‰}>ÃfÞú¤ v=Y½u(r`àúà“x\:Æᛎ«Pä]îOÍ,¸A^N8Uô™[Š%{>¹[ýJ’VLßß·>í‰7‘Z°Wí,Y›B:Å$ò1?3˜´’8pÑ6k]Ò:ñ; @}âàÆ»y‘ËneM6N5‚†åw<3dÏq^cFŒ°,=1Þ^g·ouuüµÖSÚÑJ‚0ÖÏú7ÿbGV¯øñ +,`1 ý×ØÍs²²¨ +·¹$ñ¾«Ÿ`Ú,@ï—æ“Ûñïš+4m™ÝµDµ•JØÌ„×Ò^åý¹¥_óŽåV²³ÅGo°övÕ'⾧uKôLì°9¸o´t×.ŸÞC¤¯x´!Æ~Q®jDÑ´½Wɇòb‹±ÊLXljôøÒ²Ò¸;óÏRvFûÐVÝÐÂœ’ÃɃ,ƒi‰ÈðÑU¬&Ë™­ãïð!1“éeÇÚqc°– +2Ö’d©œýºê8¸: #ÃmrëÙ” ¼êu1köHÃÔeá,ÓaÝH’\¢&¾aø†–fÅ8&®H€QãkžWk˜3nWÃy9@ñ~y{!‰«o¢ë$êAßd>Á¤"$Ë燡¯óà„<³2J¢œ+Rº(íØ»'w{H)Ìa,öwñíÇ9e†Ö¨!,`µÁ)×ÆBùœÖF8k;Ðe× ó¶tz4µ,M6@Ê:6#8£²ú­ã6‰¹QŽ;ã? ucMö%{vµá©®å›ëÃáÐÕ©2±¬ÂWBKùèõÕÛ×»2ùvâ®FŠ4ø'‚‘œÍ°îocc¾ïŠIåÀB©5È>"‡Þ¡Û( ·œ´[0Œ¹ê¾ÑMâ2œ;'µú»¬Hð6dêÎ×:ìʾ¢*0CUD‰ž›Àælu]p½^+ðÝ޲̿šæœ–¨¾§næ²VjöT©ÇÃKìôŒÙrlò¤Cà}=/æë3 +Sœ©]îuk#„rZ‡ÌÂ_¹!.ÖÖr¯•]½ic¿B{y!J¨çï8ÅýÅ>·fʈÁ‰(ûͨ,‹ñX–§±ƒFÍ~€í2²Š*f‚ŒL«~os¬ Ȭòïja`¼”zêÖÙg Ù–ªŸäÔ&Š?Ûøñ­Ó"l_}å‘j^Eq#l°^%À¶î(UÓ3Ì5‹£…s¹š«é)u-ZŠÁÍR³7GÒ…ó„‹¹ýÑ/1Àd£Ä°Fe $ ›^˜ÊZR pzŠ+.õ1¢øo«”ëÖµ%·XÀæ¡Ð”(lŒ ë KmbÚÿ”ŸWjV!Û³V"¼®§1ËD}ý`ùF¦`ÚÏrºqTgVnþ(ßç)Šßw‰C1í%¿iUá= ´ÙÊÊ„g% ñÚ—¨ƒÛî÷]š˜iÑa-X'm|óÎüà—{h»ï´¼Äss6ê×3Í'DRëàv¸ú¬õÚ0µ©s>ºÑQ$ÄÍ¿sEu#·!²¹ÀÆÈ@^Õ³1 ¦0Gè‚êørÈ}5 •ŸÖåŇØduKwM¹ÍÂÛÞ¯4dsÄñš ™¹@׳±Ñ¢îüz‰øîgü†¨jMåÅ‘˜õGÊàÀ¥qŽÆÙ‚Âl–C“’Wq”¾ÙqÇ##’$§UMwÊÓÿŠôøÍ;'|(œ†HWÁBÉÔý*.è¶=f9oŠéß ø4?àÅì].¹Z7Ôlƒ˜£t¼BŹ<Ũ0µÅÕísò÷ÑYÄ[(hwDzjáeŠUä+üJžüq“]í\WÏ‘ª/w`uMŽÎ£4î&rVr lä±ÿÊd¾L:ÉQñ"½ÕÇ$Ë€#5ø0¡ŒS¾Ó’äA™gàÕi[Ž/Së=§ò9:ÐQÖ€…­!É‚ÇÞT—°šZePÚìs_Žü@—6¦´yÓ_¥‘£Ì»é{Pü"–Fe¼&ïdù| TzÒX¬wͼ–cnª¶vç$ÈD¿¥ÿήž—fG2kcÔ7èNÜapr’åŦêüì»á6`Õ69òÕ™”.:&ÆtdæÒÎ? ~#Å,ç½V}§ñìdø…^€öÑŠ½Eÿg4pÝÁö9íx·ASt­W[*äÓáVÖy~WÆ ±æÕ¸(Œ§çú,i JF9vY…o“OSB`÷½Yj&uÁý¸äKþ î[vî>“*p- $rÉ…w­m`‰\(’`èx¬äöU6$ §ÖïYwî±…Ôâ®æ<¢WãÀ͖҆óbÅ«¾„×¥¸):RNB(2—äó–F ÜÈqE¹ÃÃÅÎ"ÔµÞ¡x†²ÏÞÍ|›æ·Ål˜­…ûv$õäŒö¤áDû3mó[Ô³"‰÷þŒf¦¨-}u`ÑÎ)a vi?¼b¥LKðbì_Jƒ®0¥Tê ÀâüèPaœÚK¬úJ•‰‘çŸS¢4óÜ.Ò«ú?aGxë¶)SµoŽýO¢ ¦&kš•,ªîýªÜâÔ/¯Nkc“4:ÝÈN…#ì^s®Îcxxx!„‹C¤Ö±ºìÃÚŽ¢Wck_A»©ÒT”žX…/Á,m┵›á5¨•úe«cd6® å§sS‚<Æ~6rjð¯Ùë«swn{&Š†^;¡Ì?ZÃyƒ_\¼£¡9ö '§âA/BÔƒx“Ò¿ ÏMm7À.©ÆàýÉ© +Iß,S—U +£2³4ƒ%Ân ôt ýuö¥:¥ ˆF¾arâ^|¸*VY/'ü'ñÔ_ô»Àáb1€~&‚ÝÒ‰ksÁdÃ-»½ Îó=Ô/‘#)âËã÷ÁDžOlŠçÖzî›éÍEì4$ +jËùÂÞf™,ôÚ³6ŠdÉ«Zv…Äfè—–rúÉjÈâFº†q½Þƒ¼bKàÿÓ‰"Í¥Lx’ùL1»õZlC‡'™F³­ªDèª}|\ýò…šty.á° à7åKn¾JrQDØ”–v”`œ“ ªö„Ž:rßк„D)½;Yv:ã9!Ú]?.D„ØŒï@Î1<‡T¸XÐZ¡³­r5…Dk×9 +°ã9'΄T2WLš[fÚ­ +” ù©ceº¦.% QáÏ&!)dÛ.ÑÏóé'…h„ÃÄÂþàP©“Ù«Âe£žä¤”¼ÞàuùJçÓ’à’wûN?ÐöÊà+¢9‹öâOmÌò×"xB1êlò‰‚¤G +qVþJ>pËd7ÁŠoæ3AЙ³‘Õ†b–¥drIˆxcb&¬4­¡Y{+#Ag¹ë=¯5ç•¢Ó!“Ü.åªìmþ,†t÷Øÿeܶ»ß—Ò»w@§Pžs“•Ð€ßŠx–ŸóÀ…bÞ@ºï3Üq÷G{†:ÇæÌÓÒrÅ€!3©Åáhè¶ HÁ "’;ÞöAˆ¿–(EßòÁÆÁIÁsÇ(-¼€rñ^˜Ë¯’{Â8"„ŽÅõÕ\¡“Õ<5I“¥U ÙÞ(B* ¼ÅÿŒb¸”àw Té•~dƒ=f¯‹]r]!õ1!€ÁØcòFÂý¾n(ùTòž°þww}¿þøb̔΀OªÈiŠçFpVÌò Ü×ZCüóftÿÚ¤âNx¡Ž(QÓ®ŒÿwÀÈÂþÿAèá¿~œ°/.F—kQNIk´ìŒpƒMÈ‘ðiÕ((UŸ¯ ©0Ô`ÀåV94¤ ‘…FV¯¦Ãê†&~žD³KõŒ‚ +ËýÇåóõÅÕvm‰ÒÚ µ»3M[2÷NpÛ1,)‹ë ñ2©©sÊR©¾…¸L¸2x/Å°²+pÎúW6a¹ªƒ®+‘ÀÆå‡æ2G˜lò™!âzŽe¹ör/™ žÑtÛÑ LV€ìJ‰z>b¢x”µEñãx[ÿâ޻㨬…4¿l´K&CïddL_ۢʪN&çBU7?J[Îþ/ çÆPt©Ÿ¡5ìÇAS:\Æâ5¶<ïL‹¢’“·êhr¥Â£«piÉÆ'Ä£ ‚3N|v‡éo—Œk+%™Ã°lyq謇È5ðžË¯Þ(z2áùj7Í̓ˆUÞíP˜¬Y0T[cªžÒo&kѲ=pci ž1Æ‚àÙ£»¯ÚY*o6a ñÀ‹õÆÈ·0$}‰4]QÅ=";s{~-z…ë‘QJ€øç"og ‰pX+“ŽØÚ1ºî¾sŸú2TP‰…nT³C3ßI 1(+úåŠëX±$r¯"ùÉ™îÔr²Â¸[`¦±>:9X»ÊQå°6m¿mY+{ ÉÌ2¢·¼ËrÖâÝxŒE±q›€04-GûÉÇ‘|qé›xºÁ†Þ.øHD³ß꙳DYç¿‚­Yý6nw_+߯‰ãpWÞ Å²y›Jkb©É»ÏÀ'<>–¡rc¹ð6›fºläA–í“Vç%ücUÕÝœÿ!ºégñ8›¬ùï§IahP²wP—>QaÝÕ¥Çhx˜ +¸HÂØ 'r°]¦=ÁZZk¯íÙÊô^%Ù£s§ú."ãy%®ÆÈv¨´ÀˆCkåx +95]!JYe«¿™[À¯B–¢7åpÖ뻺é†×“Ê@=ÓépŠL3\A| 3ëT·¢µ%'Ö?=„Ïn¹×aÖ”áGÇÑþpñËÆGD"FË'@V¯j!Ϫ;Ù?çhŠôeþ|(D¿ƒ.MTˆ ¯&JD1ˆCíh·õù¸+Ë-‚äëkÁÒc£÷žÈAëg–n›oOÝÿbè+ËãÔªÿJe;¥HLÆÂ߇ðñǘ‚Ö—…à€tô`'¸4é°9l•ÀE’z™ªAdSÞ|fÕÓGz•ÂI€>cÝœ–ø[.¯š@R±£‚ª.¢Dð Ýθe M.ö¢Z'HR¿E0à» ÆÏ[s +¦eèúåÏ–ìñH¨¡è»!/":ÙÖÙ-ÞK ¦¿C`þ¤dýéà¶öô’øÓª·â’Ž‡D=oàipŠ‚O³ N;7q v'—g®#1—ç'ÜEé(p…¢«qx;¤ß§{t«÷Z¬ ì‡ìxòº[~e¨Ë¯$ó+N1*GYGÅ~›6äÕrß²ò…;(DÃóÀÀû1XkELÏÞ0c&Áyëòd»ÕŒš¿œÜåDTøë2¦š"ÌžC9.K^ô:’Ž«´gGHø3O—%ôìÆñS3¦y+K¿•Ñß8]¹Ï€šÐ“]m,±€=†­‰-™0v}æÊVÆYn7¨} Åh•K.ý.¿•Ù¢¤HÙæV\½™EÚË0 +QÊaözÊ"±ã‡ó6óPú]ÑÎ6ÎUa-5f~­r[- Pò^Ïô3[Å. ˜Ãjy¤±XeTc˜9lÇåNëqó²ÖÇÝþýæC˜翴r晪?çé+xýk²*¼àÏáÃ1¯pß.‡"« Þ ˆU 0˜Åo³+ËÀ¿ Ž3WÀðXëÓ}®g¾âÄ+r•Ÿ{©Yò}Z;llÀü-’X”ù½™qŠœ³sþ>I ú\ÞŒÚàÏñ2ÃÏ3©¿Â…‡`XufI¤è°"c•ŒØzqðÈç„a-¼ð¼×šÉ2šÙ?HWÁ‰„Š;áú×)›.Õbª•LªmæÆS#?žÍº~îw³Ñ~íëŒeë|<±pNLO?ÁJ©éˆ«º²0Îøø¾ÌÃ_â‹~ÎnÑÐRþL¼ˆTTjÀå×Ód¾TærWÉîùyŒYØ1uô CjlfÓ_Û³•lF_E¹¯Ÿ”(ãi^rÆK<.ò;á¨axEçëCÔˆW…ÔÑð}Ý7<µzB&p¢• í˜ÉS(?£ËÖºY¡¬+¬ñWê=j›ì-™ö0…Ùh(ô!UöÈÁÈP¼COrŸy4Ë¢;Z¥¶Üõ±m% ÛžÒñ]<ë º!D¶îgÁLÜgç°³aŽìñ~ãzbg­ËFÕó Ê5Eºägë´eP2ÿ;6¥Ê÷™j. ÍÀ7XuÒ¢ (ÈÎÍ2„Ș #Æ$cèçWwÖ#1 ¸wo ±  Óo•·˜|1oòçaüº­d/5\ÀŽù²°96#lôÈ£W+÷¬|ýr…u`-ëþé>㤻8ÝÒî”g¼ÈÝnæâcÆ!ÜÓíÞPns´ã–úìcur?´ b]t(ƒnAI²×hÎRiÄD›¶L^äÎÏÒúõwA|5+: ±Ž+­Å§"(Û‰Es¤rã!Õ  +ˆ)î›ÖÖ}fÀî–²fÎ*o¤]‚y:«ÆC”LvRÀ|jÕ€Ý@õFÌ¥ù8DU›?QfÛP >EäoúßÒʤU›Œ˜x¹cDPï?æ8ÌKŽi6nC:‰˜S #ˆ®oÿPý$Ø~†½IØ»~E»gdøˆLÀ»ûû¶üå—Ãò)Õp+mÆ©Ô85¡/BDë2Ê+˜­¢¿‚Òà¨`šŠ™ýÔSWy`+Žé; ø·ÚÇ]‰ŒP~ùæ•ëyQçqËçßQÔfŽH)Hê­x•xm™åuÅ,Š],CLÝ»¡ƒUí>nOdv—IƒZëåˆhtòÖ#r8ãò»¥ò*ã.W/fBê•CÝo@4ƶR Nž_pI–¯;rÏ_²áË8áRh>gÒÏœWŸ^pºLUÀ{‹•¡ùVñLJ” ð‡mÿ^zÈCµÍ‘¶{î~\â‹åIy&Ÿsð>‘£kÚèër&Uç$ýå8Ëž•FD©9yáýº4ÅakÜ:byàéÆ "ú…é¥+Vî~'7{y‹.òÅ)j¿ÿ‹%¨Un5d¡ÍìñO\¥ºûÓúŸm4¥œ†ðVœörÝÓZ§¢ÊKUqA?üî)0%lR$íå"d°ÉÜbîc4¹‘šÖï/ÓŠ ¿NíÖߺºv#*iòär€œv’]UÆÇIDí!ôî!Â10>J\Ý•Åiœìw}9ç W¹„ìR=ä¾Âí¼}ÁÑ—ƒ}ÇɤEÜž§y°ˆú â횦)'íO²Ôxw.¦æn8nyBšI)·Ä*"¢&Œ¿Dë›ë_øTåláuÕÙ!­ìL1¥È¹ÕÈ rkÔñ }Á"„å¡=á­c"°Ž‚æ½òŸ%BÂ6ãnËÝóÆÜ’oäÀëþ“l¥Y¼,Žê¶õ-ò2ÊùÄ{d´pÆ·ZÌhdã=¿cGU´.ãŽi¡Ù(ù²Îþ ¨|tù.`!ÁמÍšLnSë£ÙŽÐä”fÞè‡ç£¨¾ì„¹’ÔZ&€ºÆ4—Å“É(±+xÒ¿h/è7—K¤¨UƨÍóQýi~”®õm7ÔŽiû>˜5—NT9“è€$‘x‰]"ºFà¤Ôm[îU#ßû`&ÅÔŽå@C3ZBþ'ñ£ÅŒDŠ¥S…•XpfÐcˆ.™‹ÜCQR®Üü˜o1£KÆ„£!VCKÌ*'óCv6i›_HYÑ=$§‚ÀÔ«„ff3Hº‰ñ«»8Ä„ÅN¶¨;Ĉõø†ãÅ}‡OåÇÄõlup¼ÜÓ_³"ëÌrƤ}Ä+T–MÅ0N{%9³A]_F|2n3$Œµ&1©Ã¹¨Ø|9fuewã“Ã}ÍÃѱ‚¬Øg} +z¤¨ð7Wü|ë}–Z°O|¨4¶?nï+QæƒT|ùÊwU9åòœ¼„‘=è×~SÚÎݬì +©„~ÌX¼ r1ˆ˜E'³¶Í¦Ž"žË†onÃ÷X±%Åu¦ÕUö ±‹XgØ~ǯÛSODGÀ5«zS†Xèú5™A¶lÐ0HÀ5÷ÀÅÇ34's+)boC¼(s +°×ÕSò"£°Ö½ÄrÓi3—O¬Ñ`×Î'¶§M^TßL¢/Ór³ØYƒÕ¯ g¡GP«„„µ, †Óåihë^²Òðüo SÐWèH߇…µí ×t°i7l~–¦‘$ñ"e—±LêïP|–“öf£ïG˜Tpi0Þ¸"ê˪ZÆx L‹/8CèŠà nZKäÜHcI Ïãì#ÿÇÁ_ÚWy;l´iÜqÃH‡QéÚ–”rCéq¬w7ôs*ƒ®´ðÓ/ÅÞ~Œ~2žR__L™•,@Ž(qØš03¡1øb*vŽ©YdblÚ},%§‹‰ˆošÜNã³O6IXØÞYæ{£¶#) ¶¡uùh¨u=:ƒTiV—>t£önq%³Õ¯DšÞYžÖ.O_(reÄbÅø@7"›÷8Ó›Œ`_¬ƒÉ%ˆ*FË{𕽙YÚµ¡¶r<àô­”Êó0iÖ( 5Ý¿íä9Üðň»PIÊ´Ñ‹ù³óà„/ßγWbz¾uc“i*c!jµj†îä†æ×ÝÐåŒø+ °Ê´&<·SÏ/í°€•k½ªŒSu«i±wH ¯D24@þ¹KùQ¹ðy"=²m¡ ¹+UR6Èy¥ò²ù/9+öúã„2ú Déµyœ$äNAÿ$ÍÓ;ÊDdÿ 6-A{ú¥ä1þƒ³µMjôm ‘a‡>šý4ÚòÚ±gØ}Ëê.‰>æž‚<~K=‰.,2z.àˆˆ1Ÿ!Nb_ ÍBH±]>¢7éÍ w°ZÁØ”½ì‡»–ÔìµL¹ç…À‡—«Eyщ> vS…Á">ö×ôÜñe(va߬ÏGŠ¶qLF*K(îã›÷b,0%u îN¹Ó±âɈÂ|xÖ­ËÅ8Ï·¨ÕlÞÏfýà½eªÓÕLRN6i96O@ßI)Ž5#©$ŠŽººï‰è,¦YYÇ3û%Ÿ'öŸz2ét„j¿¯ÒF½ÝóÃ^MìE—Y×ÓW_oüƲ”–ZŒ,:|ŽE¯€±~ .ärÎÒ’ôí bGAQ]”¢È)r”3¬ÝjYjkýCoQWï ¯7>œìªÿNMkÉH²wÒ YÐý—\ÒK©­Ù¿L¼6QN¼CX¬Û‹ó.Kð\nº£»ùÁRLO-úü6 Ïɺ*!; prц؆Û`¬^¨9>QµXíÚÜQ|e,þ$·/Òó¯ g}&w§½…ž%Ý}»\ïü|‚ɘÑ6¡;ÂWu^¤Öv®Oj0ºS¯‘åe÷ǯC{ŠÔòõ©^Í»Jõ6b!߈ó!¾Ý<á|Å%KTËé ãÐt·çTÄw¾t5²´•y¤ÑV–T®gŠ]ÜìDÌuyèɳÉtº={Å·ÙÛ " ®7ZñK7îðËM·œäsû€A¨s‰ƒ2¿¥hü˜é¤Å[¨® RŠuUaDë0¹ Žà +ÔÆM¡_èkL51ÿ%Šgµ  Js‘?³3,e‘œ…S/Goƒ®\í´e©ÕuðÖ¨GVh'e¯D$NÜyê +Êà@—mY\}±)À¨:{ÔÈ‘p3~X´Ÿ |5Ƶ´{±g6: +JMuÇ^” J&Üše†Ü¡Uzı42Ôm„ÑqNÆÿ+Aá +ÕuÝ,ë´iÔ 9§!•¹±—:}ÙSR¿2ë‡ÜsFý%ÚPŽ‘²î>€Ôà’رTøNmô=¾ø²#q&+E w lËßÁî÷¡ swGjGtYØ‹uH‹ï„áŽÁ|2ëá˧¿'–?ø#—¯ƒ(†IŸ™´Š’ó|4¥]£[÷„Ú 7Ç@ƒ›Wz!ÝÐ÷1³!TŽ)V~è•!¤ ´˜—8øuåAåõñðùÉÎH·zÑK¼(?%Ø¡ˆB+^Œo'Æ4 Í ?ìðúH¤NýÍ;t5°j½Š!uð2a†]ħۭ Ö¢gŸŠ7de×)I[œ-HÚj2¢wÁÀ~dþ¨.êP4¸á á΃y20·¶§ÕõØÕÐUƒD’¦]œdBc3ÈA{‹ÒìÅÐÛ®È_PÕÍ2aS"G¨W ‚6¬ƒ  ‰žáÑYì[xEv-þ7¾ñþò2ȤoïËõ‰¼ ôº¤ÕÙeVjôB?âM&%æFÎrX„x¿”p•Õ'ó¸Ö4qëõs–‰7)y ¸‘€ciý@#×q`’|ÿ•MêØäëÍo˜§ç>¦•Ø] 5£¯§ ÿ¸=\´y-n4¿ç„ ¯qåcVåh¬Êº~å6 ‰g»uêzÚ‘L +t.¨m¶,?.?3’z[79ê€Ýe¦?Ò 8à,qÈsŠÿDsm¸wIѶó—A=ùl²!àþ¸Ð*]VMY²?#˜®\/d„‹+á«e÷mˆœÔ{^š¿¶¬1åMظs÷eYÇaЫ1ÞØ™~*)Gg±B(2—äõ 8GÏŒ²1(aq7ݘâjsº”—DYÔ)kÃ¥„ac¯/ðSÁ7ŸD€#¬v¡óC»>aÏMÏÏØ}pj0‚|§BcnN/“± +o%¦]à¿/¶Û€ äùÇã늒îŸrê¿ÁÜ4É÷H4Ü4Î{!XéyOö‘ÏéGÎ\šQZÀ®*T©œ¥: E·31Ooá›dù¡É¾ªøÈÔVvjN=”>“çhâ!¾¥*2BC¶åÂØLůώ·yL€øÈêîŽÁy¾‡cüß RC‰›ô—KªJê2~óÚªé2¯qHm€¹ÔârÑU­TÞê‚b¶¨bÓGSŒKV<ùŠHY €‰èû;é^ø•†TUœÖÓ ³6¤è:²ífyv/Öƒ[mH¬¦. {‘k+õŠcYaq¤­°Ü-(£”w‘n$ÙœwÔH¬²Õûç~çŒõàµòjâçùÈèÄùñò®;•`Éß_æšs‡=“Ñ{_E£X§¡bgžR… +)*ÏÓeÈM‘Jq‡ÓXO×p19Ë9Á¿}—YÐ!:o6`|Ç!Ö¸¤SÐè&-\ľ!ýˆ©2|6 ؈E$‘{uau…“–S&5¹ŒP¡V„ù¼ÇšÖÛrUcY¨¶¿`Jýº¢hž™èú€™<(6ûY=§ÁÛÁ‹Ë‹¸FÁBÔ©K?Æó¥Fqª3vÌ),É# òƒ¶᪫};°ùÜ|ã­%ïcHã²Cø’Ä•š Ù¥/€Šgwšº…£Ø!ó%¨MÙÊú ®¯ +’}\}Üà=̲)ÄFxD=ïv1dbƸz6¨ÐRÒäz—%è2ùÅ/Yý½è‘Säá¼0bTË$U(Ë¿¤(»64Àe¤wxƒ•D¢âÑGØcgCRWcƒp±azuŒ¸ôž¥’ZáõqÒÔÈîID:ìÒºÍUµ1•] Ýt†9ÚÙù¿»î“¿+Îò5î9ÖÃÂì9ÜqO‰G›·‰\æó¦Ï‘2 ý û•s‚t„gC\ ì{¼ä´/7 d-¡tl)4JñüqRÌ †S^ŠjÑ,•ã^HùO—‘d'=N†Ž³†i¬¥wÝZ³¡”‚Ö”"ÒN´¤¨ýµ˜ä¶¶ +PUT?µ™àP[ÑVœÁ±ŠÓo2­2°W˜ þ"á&bWÚ¯ëÖz°ØMxÌ–DéÌŒ&:ПR~¬Oäë¶ûïƒøE; _äög! ¬·ù þwV‚5¡“õÓLõþR}5R•ÙVë›z“•IÙa9e˜dßö&Ìñ¢£b&gl\6©2‰Ûv[Qp2C|6Ð$Ì,>OrÿC†{¶"ãnNùPÒ‚%ÎFþ° ™@ù$× ¸¶Î*˜g°Û×X†§:¤][÷\JzœtÖ¢)¿æ¶¥+(àèõ*UNýÖruÃŒ¤ôaÆî‰nÈ…´;6ÌëŠËSl‚7íÂâÂI!’1ƒHrÜ{Aøh`Ý¡€ Ð~¸Ô˜Ö¶þR¥‹«hræδ!þp—±NJ[iÁouGˆ¶³ÃÂwmMÞ!ÇŒ\²ÆDÚ°&*I—‡Ð™¦`¶vbý=7|»ú3‡þ%ÏïCR’qJI*þ!Rü³‚j¬ôH›èÿ æ +ÿ„Ô_™F(Úåö߇O;èǹ +ŒDŒ;lB»Ç;q´KÖ¦µuÿ@¶ƒ™—Ïè_µ[V¬øäè­ûûÜ{K‡péb2'ŒI%s›£Ûã1Á!ù䤗{žùÁUÓ$È3¡®ôuJGU34n+Xa¯Võ‡4:¶¬åõMQ +Áqe@æZ.)G•z©¦3óŽ ×閺̱V†ÍºîöæC #T][™MýÞ\8³„YÒ ÌÁÁ‘Œ–½/þ` í4Dy¨a\/5:TîdÍ·i­`CNò‚BFÍK|u’u.¸c&;ýÏL±!Si¡ +o€§œ! ÂŸÀöí<çZ´àÁê Ön…Üešê–7V|ø/qŽ·ó¾±•êGÚQáèßYçÜ êÈuJâž®ZÈvÀÂhU•“üXÌç)lµÊÂGÝÉ6YýÎë‚ÎÖMùŒÈ@lXÿ XȆÒ€P=zyhq`.Ú1#7¾d*OèmË°i +À£”ëžòGõíóÎGï#qzÃ:”©K±¢œ¾fÕ*cqO}Ø…~öZô04yc.\µ¨V1¡ +ÐçD@Õs`Ñ1í‹”âüVMéœD|5ß×é¹lÝog•ÆOŬá‡Wë Zîr|Ë×/ݪ´`™{®Íäg +M.UÔ±8àž·Ò0Öî MüÐQTNìêC¯Ðp!l0ÀÎtº[X 6êÝ´O×Sis&õ +µNòÖ̲oà^1õâŽ[¬è „O~qð0ƒ»‰óó$Ý­e¼„HÉ7K#W]©¤3¡ŠA'Ês2åš*~2ò8’U°IR¹°Ýí1‹É©È©è×±¡Ö¿‰ÔÉ=¥…Ý/¥L ^a?t{ö£kŒ(´~¥¸ñ=È»!×¾2Ä‘x&Dˆ+l¾¿öGq +ôªuÃë]&LöF‘Ÿ>t‡™Ôœ0a*™2ñq,±¼ â á|CR;0̦Biª‘ +©ë–} 8,É’Ç”áv8v kþ²¹±fRÊ«6öZÄDΓ\º´1—5DœÂ«|Ÿe³.£:Ki‘»a*“ˆãLóšËKŒ"ëh€~°—Ä™mJê¬ëÞ=eêO6ÄÌÕç(Êx&¯$Ÿ”°»½Èù]Üÿð9Õàñzyv–eÇA~).÷™Ú‹\xb•uDèHD ~ܽýÓ{SÒE=Û‘ÝÂñg¸óƒB/“Û! i»jèœEP{á±dÁÒO³òè@‡ý/ÕG ˜#\ËBVv^ÌÌ´„½gÏ#ôôî:.¹‹ë\!PV“ý8 Bðw§_ÍÞå"³D_ ®i±‡övÍ/眊0ùÜÚ}ɤE…A5ÞÂ?6Ù©Rå.^iG‹ŒdY+Ή† +lºŠ±Ë2©îwA+žFçùË!‹á¶p묹Èwñ,û1ÚÛb­àf41`35 +¡?„tÃ"Þk¼ˆe•bÂA¢ñÏÖt-NÈ÷¿æŠ'Öå“‹˜µ/‘ô5ÍDu ,8OÜ/ôµÔ³½y6OEÝžT²±Ã¡%—RuäÓ“õó–ö{ŠÏCœ«þêÊHE?w옒¦;´­Á•3€{ÔÁÏÒŠ½£¤¹Œ%´ž®)£®Ûcð +ï¡|å5`}³*y—=ê±Ïð_à¾Ôy^@ÞïíûGÒ6ñßHŽ© Iš‡À­žML·³êx¾ÊMÚ9¤Ens4|Ã6ð"ˆSKd:d>Šƒ€)è2ð§ú'Êæd „ñêS™Šþ¬DGWDM'½Þˆ:Á(þlé<Î-þe‹ +rϱT7v%µ’OSpéÁ? +fˆÉOöñ©u;ìíûgCš÷Ó·°$ß]ì«£SòXCÁk&•D[•¦§Ht…¢uÕ«kýöj=Ê< *i’¿¬  ¦k†:…¥õ”×áýˆ³™<«¼™%Í㜠¦T×"Xðoµ+…ŠœŸ¾ ‰aÕ¼¼lŸ²³˜:Û“ŸÃݪÆ>–¬Ú¯,ùJmþÛ›Œ½ÀÑFÂÌtOW7|n•¬;t†5–ú4% 8Epdðt*-•ã@ 2› ø‘ÓÓÛUöM ;¶5ÓÆK ƒ_̤ÎÙH4G’yîÎ8ÓÛé“# +âJÈì°?ª©‹ý‰j:¦wÜ_Î6ÚXb²š2±„÷2u™Å (†9¹‡Ž¾4mÏxy7„H–â…:™"'Çæî?d[2ÀÆÅš†²bŽ»ŸˆÎ>Ú%Œ½C¨# +yzÎ87m°2}+3ÃzéâÏý°úÓaÄ• †Ê5g3ðIguTûãœÃÔ˜§Ó¤- óy#tbci4Ýñ2§½ŠÙ™N–2iÃÝñ¦qpËEò¬Ì]–ŒÞ#ƸòÒ< +ž üŸ‡£'?¸WZ`}çõÍ0(7ºIåŽ ù>Î÷9,ÍL=¼>âÖ¡…îjR£˜'W½…£{=cìüœˆ]cƒU×n Õážìz;¦ã¸¼z`„¤NýÍ;t5³ò3Þô à‚µD´¯q'êm[õ sVØt´åm?öPá¶ÆN%6;dl¸>yÕoå«® g©Wu—uÃòKµ=d<ŸrÑïVOJ{U„E¾5nË)»5Ѐ}Æ¡u¡Ìb§;4GƒÐe ¼³K:âwè?$S/’(À½àŽR‰ççjíù„ +‹td\7îiFŠÖHßuc ^)‘,×æ6.R‹ò!þJr¬‚+5²ç™ž\ÔÎZ_¯–"æ»ÔÁ1n^v&¡·¶ú¬t¥ØôwtÞ:Êá“jaê1¬¦O¤m¿Ì«uJvù^ÉvW/ä@×íÆëÍ*ðÕm\³Ù +è™S¯çΡXéž7äw;V\ÒîLÂÀVÅÿì0Ç~pôÒõÆ×ôFdÀ n×3 m õW'ÝTôWéÂÒÚ-Ò…³‚òÀ +àEk £6 +Ù™B m°ý‡(ÏG¡ƒÖ€˜à…v¾]{øhàe‹*¦pÔ‘•GÖb}šNíY86Ü”OFÒl>c|ÈX˜–ÖïëÉ™sÕWQןÁ;/§æv”^V’3P»¬ ÷Îñ{9û*—¬mˆ+ÚŒ&iø°qœ!­Õ.¶Åí‹Dð÷În0=*›æõs»¨’²§D–nA´%õñ®\Ô¢ýÐÝ𤞻Ÿ +سÔmᜣÎ,'õ‹Žl»iJzRW6—MÌ…çʹ=ëà›²;h…ö“˜oÅû9Aib&%òI:Ù^+ÿлé\Ó!OwãQu7 q“CºKAÉ¿nu_õ¥oï˜ÎWlÌ)§`UÆvLcÃç4Y ƒ&ú¡~«sOÓ0Ô¤KYñ¶ô~œŽ\))½ÇîöØhFD“ßF®ë-È"ötø{Œ~G¶4$CK† +ã® ïŸ 3ÄYë¨} +BO÷15=ËÛâH‘AøÅÏ’Xúé÷º2YÀrg2JWùb+é8Ñ“y—L¾ ïý–Ã¥¹b~Âÿï'ƒÄÌ5g€U?ŽÔnZˆ]ÏÊPBg+¡y¡%öTß[ù”µ®mh¶÷̉ê¯X[—î +…Ê—¶´ +ÀÕ-Š#«Z Ö¦XVÇ™IƒgÈìmk`j¼óßÜ£5§Z⤜Ðí㉠Œ̵ÿÙŽ¸Ôc-“yó16¿‘÷IÇkoŠyÄÙîop†…ƒ)ªÓDZÛÝ­áŒÌïf†®H/X€Š¨:Š3„Þ÷§÷<]KAåíú>QøÐbY¦{w§ OUØ„1˜žÅ#ŒUAk ß@Ø΢wÛzº{ +ÉùIb©Y)I£ÓÀcg³óÒCK$JŒ¥ù÷vð»"2º%ŸÉ .gmÀ„öÞ^½:Ò¿âÔ8Á¨p¯¾ŠI©Ñ¾ËŒY©ôv?%f§¥¦ÁTÚ-‰›vòUlíÆsò úBs@{\”=»Fôsõž•H#´ûk +§ˆŠ?Ie¦g¸*ÇÑüWY9>![õ¿@áªæ™¯7„«[EÙI"¥³Õáx¶#X¿^%öQÓõdK¶úl{]´Yvf Ì™ùTªÖñ„:Å©Y‹q½äoÀ´à±qn+ORN'¾Ç°Q¼WòBéðwÔ&ú@s¦…'ŒYˆÛ9iô.”ÏD×ÒKgŸ?8Jz-Aˆ+Ò0¿ +Sàp¨Õy½.+¼ÓŒo[+ /SºÇ¢¬T…½ÿ úÜ WµñO«"¥zfiVD#ñø£«ZìZTž¥psSõ ö„Ìþð¿±HázÂotŽÌ½Hwì–ïrÇIÍ(!M„ª zŽmY·²ª={²·Lߊ7œ"1r1††ížDŽ[Ê&Æ…âÇ%‚>åÒ``p×Ý`^R h½WÝ\Á[ŽP)1s¥‡19ûùdzÆn¿—Là‰`GVÚÔ*}îËuÂõtnü¥°.s óÊ¿yýTôÖ$ÉILz[$ }ãýWHy¥Öá·. áÿâ 0Âsê[ƒíjú\9BxC’¯ÁD°]ørëŠ+K¾t0ýf3È©—y=¬ŽÌmH[(úø¥”äß²3/7šåÀ{éçL#E»#;AIÃ`k±ü9Ë_ßçCÔÏ‘Œ_Xß6N)aQ`XøLÊxJõü>³òÙôû‡Ø‹1ê*¦ÄwQ˜†ý¬óˆƒKoã¼’'…IãÕã0nxQ¾¦b ÐÁÂÆïÒ)7’Èc²1’d kG20,?‡ô’71˜³`éÕÑî³ûßÞË< +…<–kü5€Ä]53µR_ƬۮôVxO¥9òJåw{ám™8že‰ 3ñÀõŽ;´Ò °²­©èy{‹‰@[T¯=·}Çw„–½(aÛ•-‘Õ?^ß›xyƒ–¤rfI áJ°J¹¦˜ŸòB¿¨›Ãý¿¬WÒ®á£M¥õ\SâÌ¡‰Ê™R$¶ÇDÎ[±§öê$X-mךl¾u¬iƒ`0íZèåaW_ÿb»óÔSÑR–Å-J&ö%ÞyŸ_!í>‹´ùJöÌÃüȃEÁ®ôßa„êñ X%”‹~Fz¯¨× +îQë +½î¬”¿6¦ÃV +)|½“Áøzèâïì`I¾­&½´^ÎSåÆ“wë¼Lº­¯Ià˜aG†$¡NÁ')ØÌ·‡NÁn>p±%=EÁMþâK+§„ñ3$ÛU‰£J}‚Ç[œdEjˆøuL?¥|XÞ¿´\z<>ÍÝ4pÄ•B åJ2Q—àÂ/ Æ%KÇanéÃD¡>coEZ%h«°ÀUä§ß.cÃTþíÏÕ|vÆHŸv;£=Ï€…éŸ/™)RŒèX‹‚RáÇNAûÏ¡‡úg¿ÓðôNŠgEžFävéÙ¶Bà“3b«\µƒ5³pÉBÍÁ._ã'ér‹|÷õèÖN|ûOš3.bž«ÉºEQûš ±H> +´½ðktôäšyN"S¥HOh}/9ôϘÞò•s æÅITîPÇ"~ï6Š-º¦-ÎLßÒÀeã)3j5Z›ØåÒÉW?“©éFî™ +w­‡tW&ÉTÇç›Ø'ðáí’TQ= ®ê8±c’”iTÊ8ÈjG=N¦íip@J-x„…îê³<54ŸÛÀèkGd%PY&½Îüb×(eÈ°&4ãàn³jO’Ô0Ó®Â2µ@ïj¥…ˆ ü`5ÙeþB|No¹Ít_šŒññjöØéDJpRsÖ’áÞ.š…ê´‘QI‚!Ö‰Ö²:e}eï½RÌ#sPŽ/õ5]ç"]í˜õÎÞ8é ^p ‚)äifVä6Fü”5¹eÎÀ¬àܘ#¼íëÁÙׯàæµÕƒÒÉÄ6{·L¢E\{†|6ŒîÐ){…Ö#$ÎYWÜl‘H™wé…H6Ûá8có¯ÄmTpZÈÇ¥Òøдï±uÿ¿’‘µšRWw.Q-ïæ §K,§ÌëI[¸Óäžø +Ôc\—2†¿dÞK*6ËÅ¢2_½=ý» 9x@÷¸ ‘%)ÁM¥ÝÇOL-²Æ›h&øùí÷jÍÕ5£2*aæ¼Z¢ÎA’ASe£{°lTLCøößb*â¡Óºn¡œ¥Š"vRŠXDÍó‘RÔÉãÙ^mIt0;™D KØQ¶ –!±h‘_„Ë×á€Îo«Ipör¡²Ýà¨jc°Õª“$‰EŽ~ ýÙ>YI!âcŸ6ñ¿0t’zb«þù(Xºh+¹YëWQ—™_Z»xL•\Àššò‹× °{mUÌ. ©sƒùÓÓé+ÛP ðÔÓMe¼û¢ âԢ͟6_Ñ^ è“f6 V\½ôqû[= W¤à=zèv¼ÙèssÀr‹kŸ,B:õW¼c¿Ã²t0§ªbÜLqáíüˆ&6#Έú^ ÁÑ&ÞrTMK_,óû¤:D §µ¨;»Ð˜÷¼À×ì°­[GЄœëBaFÛ¯8eì ι…ÜätV2‚e'+‚¾€ÁJ$¤„Ô“ÀÕµ…þ´•ëœ>LyPÏp:„9"q˜"nÚ«Ð|†*ÙêX?èýS‚²ÄÂjúN÷Él®M’€Õ)³ˆ`ñòcL7¹Ó«®ò.ͯ?àJläá +õ.a›31*.¾CoÖqW.{˜“©ÆÏäbÓàÞ•rM£” ¬ï ¢×cøIŒâq›µ—(ã%š`.AËDcyÙ‚BI½ƒˆg?ꃑÞd"·9,d£V³*xýþ£ëU…w{LÓ$tǦJcÝ ] +Ðç!VÝä ™j\yºút©¦4ÅØ-2W-™Ÿ%Tføòe³¾!†3VèqÏ!?H6`ŸÏŸ \V³Ö'ƒ–|µQËö¼­òZ4?ª¶‚ÓO3ͺ)ÅyóAýbj¯8&tÿôý”ÞL\y[Ãz¡–-ºÍ \©äÁr ö¨[>û^úE´?X„,^ÖÂܼt,K‡ÜÐ󔨾?¢¦ä´äixýç•`ÃË™ÅÉkÆ #ú–Uƒ"Ø$àcøèüƒõp˜@q® åñ’öBÚ Nq0úê­W?ÆRqŒspŸéð'~çØZhmëÌÂýç"ôÙ‹_º 5¹@M¬–?ù ê.æ*‹ ÊewµzºÉß™åž çP÷éüí¼XÏw‹ªq²»êþD$»úŒ>F*‰è§ÊÔ[eSù« ÀCŒ_š°ÎOþàaÍ¿Þ”Ú£Wn$ØžáôP¿SǺmíSý¸™sµ×´×~ØSÔHiCfÍ*rMä0Å,5bÕk0pÛurÂV'WfTn(Y¢Oì´ð˜,Ûb½ö±XŸ'dÇ—;@Ð!Â,ƒ-&%êÀYé°÷éž™K,Õ‹C^è+–ûˆõ ûBJo‡š˜e )ê>vÃ'”ðÊU²Û‡õ«ñ×%U¹A¯ãG˜hÂÛÝX?o½¥»Ðv¬%LìëÇÏàÙ[-x›Zfñ3,j«t\¤ æ‘SOcák!`¢¼„!º²c›#ä{|êÈÝÔ\éxT—ç dA¢’70þ•t3òb¨þbY¿Ç¬þ‰è!ýÔ¦”}š´Ãª×®US@vÌòc>Ü®¹¨IÖ'Ãò$8ÒÝÍ)(5›b1Æžm;º(±:ÑEPź/§Ü3‘Üù—ö(<¹ (_¬r‡5Yî1Ú¸ &–‹†3n¬âRkf¼8°Ì–ÇúÄìÄ€EÁ·Qgìâ(öf@Þ½¬Ä]GÖ.oëö‹ +ø›wäŽ]ïn8 'atò©KI}âÁ&øCüpß«•ôP ®™T'“2ôvÿ@e±ž¶ +s½³hû'@‡ÚÜRU3X˫ΑÐshwóþš³³ÚF‹™Ã”´°:ÚµáF2Ÿ[~„¼Zu%[ÌRL(fû€UÉì~ñíþfCü×õ^r”ûî r—¹-+:}:©½×÷]W.fm·¾÷QÒ +úZÒàü˜WÖÁT}©¨nSv+¹V†D0ˆk±Gã,ŒÊ‚kíw¹y`ç +ó»ßhÌ@4Áü p1fÁ%BÆGÌô˜«ýÙYBõî{ÿÊêGO&`“ +°¿?µ·ƒüâaXîˆg”HrdWGlM‚©Ìó2AËF<Ä*Îèb´ýP‹@•¡Àóᩈý§8áއĤÁ…¸x37ßN¼ ”ÐX¿ÙuÉ"?E¼zZá¬|qbuAX²€‡•ª‚’„÷BŽT2üAË$ˆCí{ÑhŠ¹*à,–bM^¨šøŠŒÿÞáa äh)©‰2ÍØ*M=·«Ñ -Ç¿YÝ4^à”`Ƚ 6¼ˆN­¹§lÞQ¬²Òy”É i®9'ÐÖªÆA~–£Hˆ$!’&û¶":] . ‰'13CÓ)“Ó·îX$¸$Ï~®Ö#g©³À$‘+/õTù€Ìê†e1¨!0BðJ¬TËò\”Á4æ†É+qù©Ýú B&åÔÅ% -È«ÆÄz¬­¤¬¿„„[a1ŠkDNž¤OƘc—®|#|{óa€ç}¸‡NŽ>F­6!—Ž ¡ºk,|Á­ñæh;/L+©ySŸáø¢ƒ,vjbß¹B "bîö3Iûä\½ÃsÉÕ/ßÞçì@ „ídiÀ|­wöëæü…ÈЩt'±ÓAÌ·x,[KëÖ–3LÛqp¤#øH‡ìàÕRV{õ„—ìé£%LŠŒQM[LJ§•YÊ‹5ÐfÉ;Å7º +£"ÙodIH +&Åd_$T–ó&œXòæŸA(Rœ?ƒ‰//¡€¦ïY @§Î‡5ñ#ÜC'­ÚrWVÈ hïi³Øö¸þ~7èÊ—icÒt>¹¿Cæ!µàÁóú0MBä1‚eÎþp[Ìj–>šQÚIFPBh,y;tEKër¡6#ÏõÿèuKÔR PšÓˆÔ#_ë¬È5#áýÐ0.:E™bá6ŒùÇêÀ ôIÝÞñ9Ôéaœ Ÿ»´\$&€tÐþÂ.KpàìθKа­j{ÂÊ â Å€<î¯ þRQ—.¿O/“àÿÕœdzó‘Ý“hN#«Å‹æ™æMÏ$îÝÁ}öy4šî@ËÿuµÚMÕT"¿1»wGjSôôœ¢ÃÖ~CstIZó ³š#uA͇8êr{gíÖš`¾LH7§˜’WŒ‚ÑOQÆNqýZÔ­ÌøP·ø= ¼ú¬š Ì~‹š%×RLõ@ц·óóÕ;ttûIPHjL± ô#O·º™o•Sw~3Cô ’«GÆctô«Ña ¿’tÌQÓîÇR"ò¥0mMvdq‰3ïì"ª)c'¢{´óF© ‹\hº(v³Oèc^FäµáÃI&”ÝøPôÞ§›“X{j›ÿä»ãdCfÚª¤uꉴ ÚÍVB;¿€Žjâd×r#?%rŠ»,Ø2T)¹‡©¿¸bïÚS±¨9Nþ¼@ ÌŶ]z„œºópžÃàm¨{UÊÈ‘“AãÙ¯¨ 8ey[µ-e)¯"zð°D«Òm~…òÞÃØÖ‡#ª÷?l¯vOý¥ µ7VÜTО¸hæ=|lM¥¯sDBçkîÂÔjt³!¼Ù9v—õÍVˀ̈¼½ÔKù¡W•]õk}!-cÑ¡õÃ?ž¦-wô·žöí¬Æt6¦ýï*iÐSß¡|Ɉ§¡ƒâkËåóí-SÎÖ[£¹Z­o@_ÞÓº˜,OÃ|ÉLœií¥þm>:Vf…$gú¿tžï¡ª«â£,¥{`— Ú”1 ³-unÈï¬ÕÏY³±^7RYMYáÄŠûÞo§3¹³ÀЭå¸.àƒ¸ÏXï£Øx­ºÅ¥}}òqä­·äØ‘e)²°KÏ»H_U÷#F£ÔyJŽXðÖÝZ†sà'‰îšÓcÅ< ç0ƒ‹Å¦/Èà ÿýÃïy»Ga`ê£?Ï¢, +JZRêÏ+–/¯¬º–2…Ëü.©j†ôìŠR +[—dXsFŸ‹¢:æ<àVÛ+b7וQ[úGèM¼–‹=Qìj°ÙÊÏë±g¼"œöÉÑõ† §×®Ý‰[n6?,Á¼_xôšçÅê“R'Ü»sÐÃà +?=MÃÏÐæFš½‰Ž@Â:Ì«2ËkKÖêQÁÄÉ ¡Ur C‰òNt*¾8)¶Ž… ,d¤F*¶‘'pQ>L„:Ÿ÷lÝL,j|'Õ7ˆ˜Ó9˵Ì3¢žliRVòÙ0":„=jGK¢Rá9ö²£òÚÁÅ'Nª×$;ø¹y˜dЃ©ñŒ#£ +æ r´`³m\J²ò´³3ò€%b´dëÞQ™¬0qrù“žÁB$Z¤ù$‚Zïò™ykÁGq0m¦ý tÒà¨Q³WuðéG +endstream +endobj +2226 0 obj +<< +/Length1 1761 +/Length2 18831 /Length3 0 -/Length 4049 ->> -stream -%!PS-AdobeFont-1.0: CMTT8 003.002 -%%Title: CMTT8 -%Version: 003.002 -%%CreationDate: Mon Jul 13 16:17:00 2009 -%%Creator: David M. Jones -%Copyright: Copyright (c) 1997, 2009 American Mathematical Society -%Copyright: (), with Reserved Font Name CMTT8. -% This Font Software is licensed under the SIL Open Font License, Version 1.1. -% This license is in the accompanying file OFL.txt, and is also -% available with a FAQ at: http://scripts.sil.org/OFL. +/Length 20592 +>> +stream +%!PS-AdobeFont-1.0: LMMono8-Regular 2.004 +%%CreationDate: 7th October 2009 +% Generated by MetaType1 (a MetaPost-based engine) +% Copyright 2003--2009 by B. Jackowski and J.M. Nowacki (on behalf of TeX USERS GROUPS). +% Supported by CSTUG, DANTE eV, GUST, GUTenberg, NTG, and TUG. +% METATYPE1/Type 1 version by B. Jackowski & J. M. Nowacki +% from GUST (http://www.gust.org.pl). +% This work is released under the GUST Font License. +% For the most recent version of this license see +% This work has the LPPL maintenance status `maintained'. +% The Current Maintainer of this work is Bogus\l{}aw Jackowski and Janusz M. Nowacki. +% This work consists of the files listed in the MANIFEST-Latin-Modern.txt file. +% ADL: 778 222 0 %%EndComments -FontDirectory/CMTT8 known{/CMTT8 findfont dup/UniqueID known{dup -/UniqueID get 5000830 eq exch/FontType get 1 eq and}{pop false}ifelse +FontDirectory/LMMono8-Regular known{/LMMono8-Regular findfont dup/UniqueID known{dup +/UniqueID get 0 eq exch/FontType get 1 eq and}{pop false}ifelse {save true}{false}ifelse}{false}ifelse -11 dict begin -/FontType 1 def -/FontMatrix [0.001 0 0 0.001 0 0 ]readonly def -/FontName /HZGQIC+CMTT8 def -/FontBBox {-5 -232 545 699 }readonly def -/PaintType 0 def +17 dict begin /FontInfo 9 dict dup begin -/version (003.002) readonly def -/Notice (Copyright \050c\051 1997, 2009 American Mathematical Society \050\051, with Reserved Font Name CMTT8.) readonly def -/FullName (CMTT8) readonly def -/FamilyName (Computer Modern) readonly def -/Weight (Medium) readonly def -/ItalicAngle 0 def +/version(2.004)readonly def +/Notice(Copyright 2003--2009 by B. Jackowski and J.M. Nowacki (on behalf of TeX USERS GROUPS).)readonly def +/FullName(LMMono8-Regular)readonly def +/FamilyName(LMMono8)readonly def +/Weight(Normal)readonly def /isFixedPitch true def -/UnderlinePosition -100 def -/UnderlineThickness 50 def +/ItalicAngle 0 def +/UnderlinePosition -133 def +/UnderlineThickness 61 def end readonly def +/FontName /XHVBMR+LMMono8-Regular def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 98 /b put @@ -32141,57 +32228,133 @@ dup 114 /r put dup 115 /s put dup 116 /t put readonly def +/PaintType 0 def +/FontType 1 def +/StrokeWidth 0 def +/FontMatrix[0.001 0 0 0.001 0 0]readonly def +%/UniqueID 0 def +/FontBBox{-456 -320 743 1014}readonly def currentdict end -currentfile eexec -ÙÖoc;„j²„¼ø°Aw-åÎ24 ÆòŠôWäEvçQ‚C<ùó3£‹ØAÀÔæ‹ùàë2¨ÿ·kX0k^ß|™‹:Ù´¼fbã,|Ð=úësLu2æK¿¿Z`3ndgï¸RÈwô@Ó),qñåÕœéG<&¸®÷­hï'¶ì. ΋883 „½Aœ½®B±AÓÔ¾I$sò@Îí“Féù™ÅË25âÆÚª,iá™êê pKôœê>˜èÂ6K`Ð Ó%äÂEû(‚ô¼„(bsÌP’ù,‰‚…³âW‹K¼T"0KDǯæ¯Ò–Š›¤‹kIU–ܨf/+WN/ƒÐr’±Ú4÷¨½qðÕW@À¹d³­ÌO ŽÅá¦G|Babhyè52+ÙÌ2›¿up·a²,-ôIÅ"‰³ðü±ŸMC㉮à0×’v\’›ÝàäG’‰\¯% ØŠZ\+;í7t›‹ÊaßmŒJsX÷€Œ´Pcû0ÛNqËL¬ÌÙÄì%®‚tãW‡tåį,ëœ×Çñ ‹);âbÈë»õõ&L¸î}NœÖ²òGÒi$ß²:[´<+s\6qõG•úq° Ô±%E~@‹kÖ´ðøÉÖÎþ„÷[iYKÆq[Ž‡× Pvø£brÒÔ~ÍÇã™Æà1zŽ¼6‚ð>)廄¾\û¯o¶xob÷Bi€n\!¸[ï(;¼"0¸¾ª:È÷7w…Ð yqMom¥D‰$cºqÎKRXr šÿÁc¶À3–m¢½ƒ‘œx]Ï€¦òï†â‚F†Ð…»í4_~nµ} VìF¤çuM;¹²[¹©)(6!^c«0Ü?;´´ïÜmÖXþ¯æBö󈊄±èÐ<ür#£ƒæ5ªö\ÇÖýAµÎçþÝzñ­‰¢ÄÝ!»±C`0î~°îȾÆUt[¡ijÛÏÖAsÞÅТ3X¥ñº´‘ûh‹ ŸQ‹;l:²´†hAßyÑPñw¢Š_R˜4žEwv½a¿XåäI› Íf£ö”JÀ{‰ï“ùþc -©šû“ÓTƒXЂ¯?6ß_×ãe—–?[=‰Ük³§ºDÅC³ÌàDר-'íÜþ¶Þ¥²_:mȹw‡à¬i)EÝ -É!Ã.¬â(’4G¶Å×gÿÐMwùÀÜŠeŸªãoðtn“¹Eí§Êy¡xÕ<ý¼PqÝ;AÌ÷Üâê >OÍÈ8‚}y•€f€2r¹ÏÇD§˜Õ’ù%Æs¾}w`À ÛÓ°ß’Ö/3x”Œ®‡Á6& ¨?ügçÙ‰¦óÊ.Ö\YEIIö~Å:Iß`àÔ‰ôŽž—Òµläæ±WI1‚{6oy"˜®Þ¤ûüW[ Í—¢#@xsä*¨·å®“dæÎ4]'oK·è›%~%j¼Ã{ž|FA1¨àà´@Æ*z}òÔ\1±ì3©¤%ÑÍuÖ}jyÌ<=í/Þ ê6l§@CC³À \+ýÄFè!} †kåŠ9ÓI ”F _’›îW˜ùÚª+Ìš~Ø"'K²È["ŞР˜öººß4§³±]#)Ÿ±¥»ý l'ª¹iÓ¥Úì¡åj§I:]Ïe± '®{—°½åͲ™}ÃÏÙÀŸïü¶Ù|€+¾ÁÙn­ Ð³Î™Yçòé0ܬªs¦d¨u¦;?äDz~Å!À3È·gÒÎǯ^@œ³¼Üe¥Ã×xƒ¯òÔVa؉ýåH±aÄŽqO*K{ŸÚà¼Ú‰¼ó Ð%½8;'FJŠ`‰K¼»!-%l•ëR,áܽ†x!öòÅz¤÷á ‡óóCêB!ê1¾ß@÷¾ãùÍ K¾=\ßߪðb•(m¿Å4ôwÇPòZh6?@„L½C@²äÊ8uwç1ÆrÇ¿è¶ù©Ì$ïÊBóKÅ„ÂØ -&Ì¡‰ç·Y5<1B îdlaà;À«9^¿'l„Hn,³ÛúºW˜÷ƒ{ÐÊY³µ¬Ô[œvñ -endstream -endobj -2223 0 obj -<< -/Length1 2299 -/Length2 13894 +currentfile eexec +ÙÖoc;„j—¶†©~E£Ðª%9.ì¬>XJ‘ٚмD â"e?¤¸€›&¤oLH:]~•n¦X%„lþÞ¹”­ÏôdQ@ãa~M~EAËŸV.U‚›MØ€ª¾")éJŸ¢Y§4Ò›º‘º UËêC9¼¿ù2Îÿò–"\ªºÜ¡w¥Ô1·rlxؽØнtÿùŒaÂAÙ*Ó`¢†ËJpÁ¿[úKª•LI5Œƒ(¬Ú¶Ÿª>H·Vhg!ÁM8óyiÆ4´óW~Ή\ef5S›ƒDy¼îè ™/ÇÓs©jeX¨Qö²yýsÜC´rny,qš€ÀwʘNó>~ï:Þ„¡Ó98ˆ>Æ0—jìÐõãºE™ Û¸t–†”=ÎøxÌqÚ¡×ÍæsaÐ2Ô… )–cH.#{2­ÐVí°”ñ#ØB¦9Ü;öà‡êT§ðÿO›z9R¸BѹŠ:µ2@)s¼UFìŒÒïÔÕEº’»+¹ÓöŸÑ~`Nc’ŒdÙ`À§XXyÿ¯R1ð×T›ŽuGxei}¯¯'h©YÌ°®ëC·s¤ +E€ˆåψ#†poyL0 !~DðûÍ,ÃîYÀÙJC–Þ}”¦rH6¿ÓŒeN Ó¸l¥HŸñ¸È>·Mtô·Ùn¨cÈð=¤,LØ¡ šÛv~ÉÞž(… v”w¥*B¤@nx̽J 9܇A•&oóĬ‚ßžßIŸdyz„Ézg ªðNž•ÏA¿î +ÿ})/Äþ¹Ñ‘Z=q·´çx +ÞxG~½ýÇÞÕÆ»xzòˆW™…5@CütÕ¾ÞЈTîG©=ظçð’½A¸DÁÅ\g1*mÃaXŒÃç‹ò΃Þf»Rý +žàåqõÎ÷uú`e(Å¡Ž"av‘. œ{‰ÔL¸¤#eå¼ÇÓ(ìvŸKL9’¯ëeÛLHAkά&™õI¡ú§á0Çï¢q\JL›hÞÑ(Y|o=æ£6¦‘[aEáðŒfÄ£àI>8É»{Oæ*dÉofkÒ¢fŸ8V”J:_#w¾’.yŽÿoPxY/Ûð±T‚0RÀ|K¸ °Yk+qœÅHfƺ'/,þá:Üõ¡¿rÌSW÷UÄŽIôtG`㚺ÏÓý +¥Û¦IÄB£î“á6mz ñ|ÝrÍ3|«=5­º#“y.‚ø•E\®!å.³î´Z&f>hn§<¤gØ:Ù7Œ¥SÈЋk4ûóÝŸìH™ŽÔ"ç{¹–¹§ â 1{—Z«ðß›Cp¶Ä1¯(:_H.VO1qáÀä–B7ÙÄ€yÜ?Q:þÙ Z¦„­wÊ7#HyuàEŸÊüA¹yà…õý$_ëþÜ›±DCÿùÉ+H*ÅÁÀBzÒN,™vI F¾l€2«6-eÇJEmZw°Ä8!ðGìãža†gÅž¤™·D¶ÜU]'‰h›í–jÜšÃÐn$Û²Ïûï&v3Y s[íÏç$ Z*1Íðëãì¸êu1köHÃÔeá,ÓaÝH’\¡dÓl«yBŠ/$a–¯ ËJ•Çjj×òžúîfSy‰Ay[%:†Ç1dÀ²ž„Öø®«æ-ŽˆMt“ÅÊäsÉ6Œu‰xNõT …¡­îcõ83Åt!%ÛÅ-ËÀ')o¸ Mó>~61"€í( •¹ Eê"b /§×R}K?Áôr­Ía–æýø/m’£ñ~¡þè Qêéïs[ûG·âEMY ö Íù¥Ü E†Ã™/Å?ÑéiÌâ~Ž¤V¼‘wö0‰xøÜpUñòñ0}øeékñ;Ûóå>QÛE‚…Wè;ê¦ël†-0¬ŠØ›ùhî1!,{h®)TÂ…ÙT¾¹Yá`¶éK­\ûÄS£X(…&2/GÑf'$rˆé¿\ŸDä + 4+„‹c- ˆáH/Y™M®É£Ò‹Ù€ŸîýF˜–FÓJ)AËDcyÙ:„Ú}Þi(0Îê/#/_ðáVƒ ƒi£ÝBâ/XQÒcÌÈØ8«½‘XœõëÅÝ Æ/y涧6q%A¡æJYÑ64¹xºi¦1îzN æÓ>×kö’û"ñ’Ó¼hèuh¨Dî ŽK':Ë]ÏŽkPÔc7aŸ†¯¶7= îkFq{ø¡xâ&n¬Ùµ‘¹/)ið¹t‰¡Ç¹£PŸ—O@Ÿ ¹^wŸ.“å +eƒ©”¸È}RÝkxÉ´iªajØéÁåû>¾ËÕêSþ^mš ë7A3¯sòóÿ4ß{Õ’¨eø´ýIêœ1Ö\Ù§i½Å8j&g7©Mfä³D‘L¿ö„ßa%$ýBSF*×@´Ë`p¡xBwy~ˆÊS©I™cŒeô[W×`èü%]îsû³*ñ‡j‘D á1Eœðû!°üÙòkÚ;Fg»ÖôbE°®ùîrX1Ë»q+ÔC>I+_ß9æ¾Õ_åVz½=™QõQƒÊ ˜G<îwwd2FF…røJl}ý¥Žµu@ô½† +)T`g-Ô€Â9HûÿÎÃé7N]+ÂŒ^é=#fQr¨+lÛ£&M¦‹Ãù —祫Ô'>ÿ©i–jxŸ¢ó}qìtßÞ,~õ³ÅB(ë¤ÒßT=•Eâ øzsQòn™ÎÁËÊK EÛ?Júah·N"a¡°t6-fõª"¦\N…Q:”¾‰BÞÊ/¿o©,ûR£¡ÝaE½ž¾Èñw¨!™Ð§Ñ>ÁÊ,p±—w ÎxÔDöíù„ +‹td\>\–§Ø‰èy¢2·•9f{ƉË(`÷äËèH?ñh† õ¾-YǺ;Þ qÃyWè…ãÔ©AOßëÒUW¸_:ÙKc¸ÀBT®v˜DM06$þ³c  Ÿ3Îä^Ix_ß$¬*]Ö4å7ÒÛbŒ<™Ðë¬Ù‡[Ôô*Ñ& z9àø~zÕüð³êL9שŠ€K›<j +ŸhQÿŸñ þò™.–JÀŽ= ¡|R3ÂËÉ-Ð<ŽÅY|`ÜøEb÷qX¿¾­:¶–‰ÝrµYHÌ 3~!ï\úúdö×¥ÿ¾,Mxß„ ¥‰™?mƒ˜q…툵ÎQÒ)³¾Zýщ¹Ñ(¿ã$Ò´Q4gÝ.Ÿ!Nb_ ÍBH±_œ·íWY.Ëù€h@ ¸jžæwi&2wÝùª®æ+0ÇCôM&¼bD“±ÂMYâ¦õ…XÊ'N@q{{Ô-½·*îTK`WPµì%ãµ fbõ̇¦—âó$ìX°GÕp>‘vÓG7é$-ÙßÔê-¥S*-³ïÐwÙSÈË /»¸^ÈhÔòGwG•"Þáú$9󯘛Ù˳¾ð‚eó×gñç |Çx}T” HÂPð9œÎ¸tŒE×h½(Ö¢þ*A„ÇwŒ§ÁÒXºÄR½‹µDµzÏ/Ÿè>>h›*¶•µT¤J@ßp+E •ËÜ!õŸèž-ÂõõÝt_2=m‰;Q˜ˆŸ¢Uû–` y3Ìåë½ý.h´ì»°ƒ},îX©§Ao×n÷  gŒ°LJYÛ¶MV}åP»߆¾rû©\®­$dØØöÆD¾v™ÿÊs´6Rw•º¥†ƒgýÕ³ºi;»9"¶c¨jÁL¦±Iå*Íu²æ+I*à–m]Öý”ŸX÷‚ÂÏÿ°¸¬d¶zƒ%¿†úBå$³i‘€oáΘjNoñ‚ak›:íc5, r&&©œ¿=8-AjgË“îX×Eÿ€ »•¹sûÞJ×Ø6­!xg#vâ³üæ>¦ƒ‹Ù[}¿='XÐ)þmÁ3jKTØÖâdí‘h5z[j¸˜²_ü +¾ô?ÃÐëÙ& þ +öÛ/”Iƒ±š\B2‚áý +™“kz[66ºLœÓ²gi­¢ LÑÒ´¡6zZ—QÔ_´sý<ò™•aÞ/á×›Ç;ä³”ésŽóâ?w˜™–Í骋&D$¢(¿‰Ábëhêm±PêЀ¹ýÐ|ňö•n¥9øÓÐ2¢4óˆÏA8—0lÚÙr‚¤j|jHõ£yéÝÞ`S +ËýSî#$+LÇQÀˆñ~ÿï×ò[Õ—ŽuW‚J­6¶4Ÿ­£¡ZP Wš_a šP7ˆº»1;üâªÙúA–bÙZWš¾Vfz›uÈ—Þe›ÃŽm¦å‡Â>p$œ26tàaýP¤L½ó‡ûú¥­,´©Gšë2°È2ÿÖàègý¢S¿á­»ðFݧ,lÎ.ÙnúÐÌèx¯•PÖ÷i‹–÷­«Ë™Êk ºCT¯øдâ/å€g5 ÎÁÖ ²Öšº–g>9“8¹ˆ#Nž–ï?A‰ªŽ‰W„¹×D$ÄÝUu1ŽPÕÃ3u–Ýœ®mr8-`¡§S*Áñ3©ãhs&Ov¥–? —àĈ}ï¯^PÅЩO—[Èåyþ”%¥vä¹»Ó…wVÿ „Ü‘F%öÏ!ó œÍyœ$‡[öõlûoÚݔՃÓÖ_VüvÂö)Ó>A uhêÝPû¸‚"ëð¿žøUî愈#À'n"ú@!ÔÝ ,á"gNO ËP4æ`¸éøWÇrøî¹?4‰630þ§ÊÇ;%ª Ý"|˜ÉõΙN¶‘ú8”<~9TÀBy ‹ËtÌáùylª´¿Pojüf¯¸ôîrñoJƒý;o|Èíˆ Ö <†ÓÓ(y¥õHì÷[˜Ȧ–ÂB¿I¾øùém¨G$U‘ maò@S²Õ<þ¹êTH`ÖˆéÍò^˜vŸÚ„ú˜Ú¦)8¶lhÚi±ÂâHÔÙø€Ý—Ç8Qn,^ÍrÓÆ57oˆò‚8â¼4Ñ^]ò•$4ðîèB ö´x÷±y‚¾ÎÕÆi'Çe8ØCJHéƒ ±ÊHâ }Ýb †­Ó¯ªÓô˜ë3õ™©³wJ¥»ìM÷÷d3n¨!fSº/°[ý€ÍôA»YJýºIõ®îúË‹ämäj©ìá6ýáye•p’”̪HŒ7Êïßï]úúÊñ ¢jPŸqinýj¦_èÝÏh–fYG–p|;²û¡]Ýð¾Ï·{«Zjg§ÀίãéUÙRn} d¤Äw4‚¯Œs…ø;”fUáXŠÎLü“š!žÇ§ÛÉDÙd¤ågšª¬È« „Ë»)žÓQÍÌú¡áæÛ + +cü· V?èð‘0î6°ê¶ X#|[ +y¢qííìJ½›yk™+!æYvX‹£àkå嶌˜7‡QLÝYö"Þâ5k”)$%Æ< ØZN¦ß n ©Íçeæ€GKŸ—“uˆŠç„DJˆy³â]ÊÝLu ±’Õò,o|òMç•fi[ä9£7È4ìlŸnÇŽðó¼IbGø<P´ ,Š2ƒì ŒÜî“ÝÍõ½ïµðˆrÇ{ pÁ/|„PÉqoŸ&^vÿö`ÅJÀ1µmðo¨ÁÊñJ(ú¤Þ.“~tG2ãú¶¨±¿PÈ@9À€?ÜW” Ô"«šúã•«ÁÀÎ8½§5zkQ‚‘~aͱO1žßF>Jöø ;XEr¿å礻îÝ$A¸ÉW¶ +à¦9úÖ'{Ö{D&ü¿Ð € Ì’Øy (0ÎBUÍ å}ù©h¸³H‹±Ë5rÊ|**Eï4= ƒºm +Yl½ÏZ–ú2Ù !*ó"%YÚ¿½‘J¼kŸ-Lp¿¾^¹ªs´~|/ã`®yÚË•…kè{Iþ§tÿ‚¾1FA +×7vtÛÍŸ¯¦Ä9µ§™^”½ª K’óRêÙç†!«ÝöžËÃ^r ›/$íåz™Ûõê´r/lö’[3ãÊ#.^À +n^ÇR½Æ¢ëÅj.‚ãÄØZŒ•=4áÃÀc)JPšZ)y&‰qiœÙ¿¬Ê§Éz‚4OgiXËçdì«èJ¾ÑPÁofÒ¯“VóžÃFTÚ”?U÷S0Áºò«« +Â0Ü°(¯šIÞ£GXà@4°ÂU|'77¡b±Ã »µžà˜h6Ÿ}l‰ÉhىϚ1æ¸õ^Év8lBR997ÉÆ2™äV|lÅE™v›/!ʘ\¶M²Õ <ÛNCæÙ_7 ²‚¸0ö-ó|~>äõ±NbÜKF°eæýH,Ý»ÐÑ?ÔºòlVÊ'¸fÇ tȺÛ@ád¨±uÆ®‚Š,2³u! þŽ„åµZ“·3ÕHu”Å +Á¹ŒlUNpZÉ4E“ãHg•ôÎhR‚} 1G½ !»Œw·Eº˜ùåÄsA–µk·ºK;“]{Ÿ†¸Õg°O·¿¦…²¸£>ÀÁ]ó¤€ýÀÆ2Øûu,×Üã\Ò«b‰è¸¨Ën¤;‹µ±={¤ù^Tçn0h¦CÙzA"ü2åt@ôË‘_0Kõü mW@.}yÆë© +ã9¡úàö¹ÊgÖ­0ƒ¬æê0G°ÃœztFD™CRp±{šþ. )åña¨Ý½@‡Ÿt(”Š!ŽÚ†ótÊë¸#vïGìQûQMï»ñõ6¸HÍ_¦…ÅI‘ùåÄsA–µk·ºK;“]{Ÿ†¹ƒã|yžÆ;ÙOõÙ§-ÅeÔì$n‰dù6¶Ù¢RŒ]™Û¼¸wÀÁå“<”ÉçOOøR9‹ +ZQ$¬’Õ±—N‡ÇöóÃüWÊØ…Ÿ3IYçù°¯ŠÔëƒå}€l·Ñ;ÎÇ 9 >¿×Ý­ùEð(Z$Ì9ãáak[^Kg¡åF3¼=—Gà‘xYαWv[¯2MA†íKóé)ß(Ž±¼ÎˆØšÞRã{¶â(uýÕÉÙÝ׼΢¬ñMGá‰P·'ªòBd!ð <`;Ö8'âÕ|ì9ž#œC¡ \/bN¬®îŠ}jr¬ê®!ÏÁzëHOØo_ùC¯³Åî’cký0ân‘ay¢2ÛŒÊÄ6WÙ8üJêꞈªö×+À³gFOÌ„ƒ)¼¾„$ZK^–º" +à\4ÞXaà®…Œ9•žzlþ!Žéì3«ßE¤ú%ÍhAŒ^xýpà?ÞñŠ'ã/ÀÁàûäúÏ•›Wíç$^k!öÌRj)9úûl0£™þÈü×âmçdiG™c»¤‹xyËŸIlä¨Jc¯ºös¸:‰È¾k,Uez^lD©w)_óu`QnÓ¬QÞ×?EvtÝš±ýaR:å¯Ý‹ŠÂ›Ó¸Ý‘Ôš‚îù‰0B5%¦üÆ'·²M*`úDn7cÙxŠÎÂÏîY£?ǽã×™ p"D›vz£©ë.\'ø±fÄœ?CŒäÐã†UÊ u LSÇùÉŸ‰¡åE 1j +„.Œ +wçûVÄ‘{çÕB=¾vúï^eSÍš…,Ž¢t)Í5,1æ_†RͬûM<ùùù9Q$ì—&6 ¡Æ?X•xƒµ×KÿBì6 zÉf 'Ž}ÞÅÆ5iO “ÇŸÖ?`˜3nWÃy9@ñ~y{!‰«o¢ë)÷*Ä'_Ýê6KΦåÙ[ ;ž7ÊÛ•7<°øÀˆó_,¶–Åyoí5•<®äš}¨ìÙ… ïvˆŸ'êZ2x··ZºµÉÞóáeAÒìaB#»9r°Ž§ý2T–™ãXŒë~[™˜Ûõê´r/lö’[3ãÊ#.^ê}µßÏ$P' ·_ü:Ú*›äãr"ëj¡µ&”Y[LÂUlúªñ]êF!Ĥ^ÚîÄ®n€b;l¸«¨Ù·À!¦_ù Ì8ùÄ„=»!áL==ÑyXÒèÑq zÊsÀSm„V¬Î‰Ú‚b¯÷‚Ë%à*ÃZI §4ô&ÔGÍøïÜÁȪ’4Æ@­šûãâªþ—`'±^$ì‹…hɪ@¬xÁg åΈœà´JO\W¶?¶Ï˜FÍ8=8WñÎÿ’¦ggU ê=~r¶Dùþe8 û>ꙂgûQÏBî­{°«w +¨}´Ã@ÐGõÑñ€ä“Ïw“J±vÕ¹Üѹ. °:BuÒýròΈ—ÑëárÆüÖ³ïö' 60¯+Ø—Û<}ÙÓË,J˜äˆøÔ:K²²|ƒŠé„²EæÓb#«—'‘å“—ïiA\jM XäÚ?0-p³›ýbŸ„µôŸ=V]&€²]§óù±µÈä=ƒØØuÑ¥X Eþýò†TP\µÝî,è+íQNÊß®ø¶>@ä'R‡ÊàS”ë4—^bu×o³{1ëv4)i÷/<¹ (_¬r^±zqt•¦¬ßö9(¬rÖ6®–’VÜ‘”AP<™ûÿ4¬å»pæNíï ¢G„¡Û”]£É1 ‘æÒK˧÷oÔ¾{$ŽÙo!M[}°«Áã‡LÝNÙ!F< +8žÉ¢UUï^nÚcIŒ¢P„¸óøòaH‚È…ÁrigΈ Ô;ŠÌù¾¹ý–è®ã˜'ò•²_Kã·Éfq +á(CŠÿµÄ~X'¸mÂœ¤g\E™AEäI‹Ö¶¶º$ÉknÁJ!…ýx¹Oùb…ŠåH—è¹ð ç3hj.ê.P×Ì@96Û^ÿòMK½„Ód†+B}–¬IÒ-U›­'ÓÅe'3Õø1Ó‹wmS+ɯsÍgö³:WÒŠêH‘æ­ô –³RpÇ2Ùž LïË?«B>â-‡a­Ÿè±ñóÎÃ8•r%£ÅgÅ +œ{íçVÍåä;»` ù;ýåå1è)„}p<ÖxÈ4ŽÙM#ª±!DÕ_ÍïÇɨ•Ôè7@M)¿>PqÂÅ +ß®€™˜à¶À,ñ+蜠v@ÖŸe¦õnÀuE«³T3ìßoÜ£lóGy}õ€Ú +Oûò %ß÷†Æ_ÒOÄ0ÇžÔÉcÇ *=á38õ/.r”L|AIÝ–6Ù:[c*À ߓױ#ÝG*þ“êÈí^™IÍI݆̺¿s§"…'%Ž}Ó4œè?—Ó§"áܧ5”©)ÓüUÁ.™…}á3ÿžÁÚä80†´·“7EÖ†nÞ€…,]µo^9¦—ýÙ]ï‹-W>ü,¿·ÄõH¿áic‘P3q'Æ‹œy¤­\‘÷—|ž«â¦€ó+I4§hø’£ÜZ€W¼Ó±¢(Š|¾CiüR¦¢CÔ¬«7™l5A…׃ViXwûýaGÖ¼:N&l—”®­|f·Ÿ»LÀÝð% " +ª×â'c`¬™ÃW‘þEði·”k^±×¥ÏÌWÝÍ®b Á¤[Ÿcƒ/í·Z|<td’ãÛ`w   xÇì±F÷ÐH•[‰z0ë!+`)ꙃ‹¥¨Ž%À=7 l¿Ý9.þ“‚¥p#{M‰Eë0tàÔÒF-Ù 0M˜×\ps(†ïôâ*¯YO¯¢ “Ó‡º* 2G°|Ù¬¦rg¹þ¥›Hx#Ÿ5ıˆÍçÖÒÔädün‚ãòø./ÎFŸuX¼åÁ.¤ÿ(SÈ~±&¨Š'›s a_„¡{¢-ζ CJ:ÊÁ$VPüÄ°ðþAðx1Ø"OP˜¹(¸ +íãöZZÞ¼ŸËÓÃÇàÓ9c“+»Ì^¬±HO¼U¨1â¸(+ís’Ǿ+ai¥/1±Ž•%ëàÖv= "lPÀXÁ/И<;õxŒ†éÒYPºÁ•AŸçÈKúD34ò­ˆžJ># +šoZƒ§Ð­´±š•Ì*þhnUØŽG%|ät#û„m¯Ó)}£GJøU„°T¥ïd[ÆÒ™ØÌÀZ_"þý¶NB·:›€S!8Òü®/,ûHÕO@afP²H9ÐÝ `½ûýÿ¨¨|"RW Q;Krç;5@—šäó§O$¬øyõéÐ$sé^ï3w*ÝkÓ«ÆO(䙵ËØÌB±:±ý†G' ~j¶F§QAÀ}Yaù&öA€-1ùª@ÃÙ 0 IÐóíe¸Œy9 A€;ÒâŠa†ÎØ~Uåï÷‰”!]«Õ HÙÝÌèùæ—¢¦œ`7ÎÑhÿÀ÷—Ó¸m¤t²<¸e qÔî³O6IXØÞYæ{£¶#) °íŠ*V?hÁÂðo0¯¥Wy¦€.-÷Ü4!fw׿‘8)r k£¿ÔGeô4H€j²gX3§!Ñt¦[Qãè© Qšo€ÃݵÛOÿç2d‹BTa˜´ÀoR‰ÁæK¢ÏWé^áú“Ì)™Õ!ån®OGþÁdÑKCÝêoBŽÎ›¦2ἌÏ(•2†¥³OaQ,1~’¨ŸM*ÎRhÿå„"râ1˜<µévÛÛ-§ýS¸ó}₵¶)„ÊW1¬@4£¹©®n½73´‹é0O‚±k5—è®U·ìÜï½ÓJD?&;A£²e•Î*}ª©L* ÿuH7gßå²×ç`Ÿ.[ú{ýš•›²OŸ±‘PŒuÀXøûe9œ¼ÙÉá)PE”²åßqˆÜ}ßM|‚s&³Æ/XÁç)ò$ìûNÀ ïHçål§¯–¹-Æn§=<=ý7Øük?þ„á<¢KîÞˆ»^± +ãMÓ®aòTé4¼)C5P,(Ö&Wgæµw7Lo‰ä¬Ö&Õ2ôõ“:"†·€º+l\TFšã­JQ Ve5½®äpïLÛ¦»/µ1^v {⡨%-¶r×jSò0K§Td;hWÚ»R» óÛ¸‡_ô˜SÉQsÀhU7%«©B?K&ŒX"AÕlÜ® ›Y¥kW´õ7PJ½k¯—ݨ꫸Äè]B?@ý©¯>ûÜPK÷ûG‚páÀã6PâO5>B:’TÜNj^ÀºÂ¶éæ´•À=*Oñõ ÒÝÐí1! j7bŠÇQË$ÆÀÔ›¦ct†?k\H>i#en’J:è±ÈòSNWíæ<?:Y¼ˆ¬õúëÇ$% â×[ÿ¤¬ŽÕŽ4N‘ÙA/4˜Õ;±´ÜÔÏIº7¤®B›y`‡C‹Ù–ìgXê캽Åüî>´T‰,ä)Ãì¨c[z§Od&×q¢ôõ ÑÅÚŒm&‰jA1;*?äµ;-a# &·'vÉè/bô*Ë–$®·‚M[ +é(Ú@sò@¨\y¬9 Fêªè^ ºè·@ejp@Cu`ÎõþgÑ$­¶ õ.N›ÀýÄîE­Ù4w'<%ænÏö;1-k»ú¯«È?ê.f'Â’(¤(ø ’Þdå"p™)6ö¦-±…”Ü.Xƒ +}è¡WTGÌó»j`7 ñŽêÅË5Âqõ +ŸŸ§W“GïýIƒ§¢òXWoîÐÆà¡›ƒ°ýt = ԙð8?ëŠ ¹¸Q5MHË»¶1ÂrŽÉ<î°íé¼mÿõg]n¨˜ðú(ཧ½½ÏzKº¬,Ó—hh4¦º¹nàñçS]·Í{n˜MîIJ{¶7ç~¯HL#ŠbÎÑOb ¦';1a¿-<ãÜ=ôkÿÁ„£Æ¡‚V?¨øö ¢@\›\ÛœkÊgÞzã ècqÜž(‘ÜR]“Óî*¤Cb¡•àx A*`m3ÀsžÛDãPãÍøåiîâ_‰Ç•“}’Wˆ ›×Zõk½âA ²±6­’°ÁuŽ ’QÝLh"o&|¤À霣 u˵«Ú°W"â+Ï8+ø’Çg‰IýäRík:a¡‹ èjwNÊcg"Œë|¨2A°‘P?â°_uy®ôš=O$³Ð óþ }˜žû¼?‰e5ÐÜõ9™*2›0ÂÊsçEQ]û^<Ø¢Ëo0>ý ]…u™}kŠkõHYì¤åöÔ£sȈŠÔL)Ç_Åuå' ýæP×E¨Æáþ Ü"1NŸ(Ï7&QHfõÀQ†õjž9Úö³9€Œ> ¢0äOUD,@S³ +ªËÜÈÛX]íãèEÌ…k`lÁ’‡G‹M!]°w&Ä™Êáø¬÷—q´ä}ê‡3_”*šHŸ°Ñ;äÜðåuL… ¤J@}Ù«qˆÿ¼ãª)ã) +=qXŠ jŽñ +©0Ô`ÀåTª|ÖGš4á·VW´ý¸¯ž=<é_bœQ? PKX0Ú€vsAd‘‚Ε³:ÑaÀ>’ô`^eC5˜ci³`KÁ2­­É,ÛˆŽ{»º»OlM€ˆR|Ú¾Æt±5€£'"¾ý¨Ñê~ ó:Ĺ%Db¤/A U¶õ@'ÁX)cq,}Jß(Cü€š ’‡ðÇ“÷³"1\œ³Ð+p)s˜.CHñL”òÇ.ÅÉ*››}õƒ$®n;cÞ2:nêþ©d³÷”ÿ%;qTL£g{ö—¢nŒ1ƒË‚š´U£Uß¹œ§äKnÚ¿Š»s"Š´ynTë£n‹Á"±C*k©¨¦tÊ-_8úŠôf­*ö~‘ì+KèÇMr§˜ÈÂdºM¤$]5Íψ…rÃÔâð#Çd¶p6^Ùí*Çúk—SÏ(’±ŒÅ„ ä!˜Øf_ˆ¤«òÄrÑ’ù‹©LT¬vtÜùó¯8 Özh¡X¼ãýiÞùep¾©dªMpŠXLJHºHàø¼³†ù*†aÎnB೦ÖÍà=ÛçßÿMÖJ—hrõ›c¿¤E`þõ§ò²2jØæÏ°ÒffyÁ:eáÖ‡+w=~ ´WŽÔs%¢!éÉ;Ä)½Ëäõ*ÅÃbz#Ú²Î-ìSÇ o33”Ëo·B Žº­'!´:4Ž~ƒ‡1П`ëå€&ý§€– l ¸yÔ\àFå#ËšÒÂlû&q·-' ØËYÊ tÅ»žýÔ°Óf{_Ük㣀Sx†öÝÙN”:ÌÍ5‰6†è}äê–>'nòNä<Ö!ì2úƾ+³Ö\K?bˆšó2%oË>§Šž·KK µ¶t° `ôùæíà2Ñ$(€¯ÓžÞøµOàNœ›8€Tå{®fÎDq#ß\CIV¼{¾VÞTˆÀ0lnå‰Âç3¹¿–t,­§pû`Åâ7;$ÔÔvréª +Ç.c„)*.Ða0 ‘ø¯· ,)¼»vÿ¾q€²ãßäZvÑ |ˆôþÛûyOÓînË?‹YSy šê¶n÷Y\ï„1Q—>©½èûƒG0ážFÆøpõÚr©u”_»dûÄòX‰ÌÍ„ã6œóJQ YAø3çe ¿áFÉâµ2CŸi÷%ªEõ#t»ûöHQ0RX‡[Š±½ÂêY£Ô«ùFUñMï´íð’²5Ãñ¥83ÞêC¾U&šúñŠzŠn×!òo—Z9æÝP¢fГZ>?@ÌM—ˆÀãËVdÆ ŽRJým§†ÖqB!Dîrr~|Æ BÛÂ`£xßã +”DïUHçº!ä¡÷1<“™8´þžšÓeËS籉kðªÓw÷ô3/º\ 5cž„—› „,öàá~º#RÞ)K†ë _Ę̞,ŸWïˆG†ÈÝÇð¡Ò±–zÍÀ.L…‘!`lr?Ö¢ÃCûÖl÷nw;hØ˯Ô%3¬˜-÷B'Ìj=ˆ:]ê°¡ÜH‰ÿºÊ|KKôG8”‡ç!\ü +*ÍÕ£“;÷µ—t‡Ì·ú,p‹MÜé«JúY w®eŠ<Ú¥!Š:KaÛØ}W&„®/s˜)K1‘µî]¹FÏn½…‰Šä‘äpùœ4úhxW,[ëýÝ€—ã’ õlÙÉ÷_º’«Ôz^ñŒ:C¹·£Ñïsvóè•_˜¶ëOƒ­YªÿÎ̳j£^ѹ„†#ÛQÑ-Ì_æ®;‰ø~_V-_ªÞká†d!BÀy$"Š‰àdY³šŽ¹†‚? Á¼æj)(˜óÛfD†Så6£Ø“†áCQR4Ê_Ž/ŽB«ü—à€|õÀ~OÅ©Ï-$_ <²ÂÿÝ]:ZðfïSän¥Ôgs†¿ÿtÈ>ÚVÑÑæ¬,}`½Ïö©WæÕÏ™Q©Ç*'Œ[¨s öLœ>ÿà.{#1­SûŠq‰&EƒV bn«d$ă;tým‘/i6@SZ1¦½œ3ùCp\jx;4»o#0Ñ?NGùkaºˆé•Dá ‘Š°½Ê”Ľқ=j÷É6]ßKmyp<¢ê +’âµò^wf#÷7 ÐŽá"Ú +ƒ³l’ªZ¸K²Å=¿i@ä™UÃ3ðÊq„á%Dn\Ë2[ùCÃ×ó-IeWD¦±d7¶ºA?„jˆ{ѳ¸žïsV_= )ƳÁÃûK%•mŽ3{ ðmî.€èò‹hJ"D“òò¡°Ñ?Ý÷æG®(²¶iÝ¢žwùAžN˜fgRìûäbáG€ ‰óÐw&j9³OK?€LfÉ9 ÙURÚ›ê§ç L \³€~ú¿Üùø6ÑCm¶Ø¡'—˨––ÃìdT2‘¤–h‡†}ÓÔŸ_1° JØêîWaµ5YWÌ€2N…’]>Ì™*®{mãX$L+¢U3E®†~Ã)d|RlaLXÖ­vhÀ}LE#´QÄ>µ˜ecá¡IŒ9@¹E‹·µøœÖ–º”wÖ)ó¨-HÊ‚šábàá‹"WŸôæŽÚ‚›L‚5†Å«~ý#Êél”² +âñÑGª¶鈜£Ó}º=´1£f(ÓqÂÑX¡ºeÈÒÔ|êP.~Ö®$ÁQ®è° ØâœÄ`¼ûçsƒ«ˆI :ee‚˳Ÿí¥ö†=F€N9r©¡ñVìC/ïhSúQ¯ÔSÞ ‹@¤Y+À"m$µP6N©É%s‡C ­Ô¬ÍÈðfÄÖáQ¬¥¸!ÇKä¡ž†"›ü¿½³C£ö è½¥’NÝ—Û²mb…ðyfÓ=ê;éÓfÌ©öyC߉?,¼Ó«aÅ”hd¡ÀëS–ßÞ™o;Š.'º‡"½ÑŸ‰‰t ¿‰V ˜þÕŸ ŠWéFJ+V> rÍØZtcP.د¯Å}Ý‹þ=ŸÌn‘ZfëNŠÞøü]'1ÈAs¤››É¤h +& vòXØÈhÚz +j/c5~„:¤Ò§ÜD«°™üäÚe¥üôLÞÍ«W‰K?óä_ÑîF7]Žš@òU!ÝÒ’ÀÕ9êÇ‚¼?aTzøÌ¿ 'öç!}·ˆƒî ªºöñÇó6õ—Ò!„¥S·£®÷š£ôÅÿ„¨’|ÜUˆ±ªÞ–€IŸy2Õò»Ê,0Bò-ëq5«‘Þa)¦¯tþ ¶=&Âýõ{¾ª¢Û}‰Â(xzu¡öå­”JY ÓšÂ4`WFc~—ø˜[h#¼•P +¡q0ȯ‹¾cÖ±ÌÞBQç6æ±ËÐuAB`¢¯ù[rJ`ñ|ðVúûé壪XÜòwbM|ã|ïdx†' +W6ÚX”A@?ã†ß¬"G¾ JˆSRÇ ìô\¼£ýÞ¡Èbɽâ±R„+øþE#Zâ9§Ï¶Ö˯·fuL‰4ù«s‰0ëqaEZ ¬‹²·'HÐí?ºBÈ6±ÿò|¶jèú|jþò”´&…ñ…y$–žŽ³fª)ŸÆqx»Å~¶só‡0âÂŒÚKx÷ˆÏÙ!ÒQØIíÄAÿ0Z¾oôÝ‘:¾â‹úàêãªFůnù./ÎFŸuX¼åÁ.¤ù}dVû¡²_+Á!ÏŸOV‡Î?¾r™!ÆmsÓ¨K ƒ2À«“Âý³ýLAÈLf‹å+•ìÏšk¥Ó ë j߀ùs’HªR¡ò<4äÓª†&`=jÕ„é$ÖyþlxêCø˸v3crW¿´Þh‘~-›ø|øëHÇGŒa´>ïÀêå’C—[Õ)ÕÚºê†Ö‹tŸRÓ0ŒWsK9àýí¨H¦;|­~éj Á­ˆœ·…™?Z§J†xJ°ËX/Ä?iÛm‡¬LcYŒËéeô‘œ;¿Á…k§{E1óT] +lRgÍúæÚ‰žÜuVË´Çf¢ÒA­/žkòØN¶0Õ)a D3Ž h ÁñÈ »nìåÆZRª>Ð~«6²ÇÈ©t è—aé˜8È ÄN¦–€xo’a×~æ³$ºáRÒš{_ÅñtUFØáˆaÑfÅÒ1:ƒÕ_¶ +R„â&…íËZðnàÍ—Þˆ'€:cŠäpüf±¯qMá‡ô_‹E«rùvµe¼ +v + æh#)xÑüF­@aý²W.ÔîxÒM¬ô¶¬E æ®ËH£HÄ-y$€­$J–ËÞïp:¥€‹þ"p3qú| G©lÄAœŠÁ·£œ»ãç] \tš'¾XNR€\bªW×rÈàò£08•É&©9 Z,ÒcÇwåô Ø@Q1k¬7n¿÷Îi†«½Ôï øÒt¥· KWòG4 4aëµ0D>šìI©L‚îßcBG˜ACÃÚd¸µÍ¥ÖåY{Âë÷æÕdÖ1A±í æÿÑØ,äЄ5.—ð‘`ܨÐÑê%‘ÌíkC¶pRÍÓÙ™RpÄyÞšOkR ÔëgçálêB½€Ïˆ§ÝTñ.2Þ†fX Ü éû©ê­ aXèáX1_í©M×;n{B>)Š¨ž¤ÇûI\óg`ÑÊZVË~³—Ë'ý ï†DG‘Móí,q—S»íœC[î£Úµþmí.»ÊÆ‚ /D€ÇΫËs.€yN§tÊÒì¤XÕŠÛj±TÚç’è…S¨ewªV½âÄý3ëP*Ò†…}Ö}‰ŽS8Â:àã(ÔýÅÂèe,Y!])Žö0_ýüfx7k£*»Ó˜A® ¯Â)½ª¼Ç¡Ïèø\ÍÍ#é&Ë]RÝ7ÎQ&dÂ[,à†q>9d7ÓzWƒÛ*¤ømðm÷j"QÇyÛðdŸÒoÐ^±jwŽNئ–§Ü3"Õw„¿©ÜZ‚¶bÀ¡VjxVL›·öfÏ®þ.¼x€OmÚz[÷íÃlxã ÌÎNë§Zå É/_ooxj 1k™/T}é7/î ¾@Tcý&q6_ý5|ÚÁÛ+×£–.„r‘8dv±lhéwÊPQ=¨Eé5U% +A!Ða¡C¥”ò20CÆ^çèKæPG¦g4˜`Y®„Á†º=ߊ=¦&{²rAMF‘/˜æ¨‘åßæ"úçñ;UÂó Xʵí¸CKqJ:ø$Ió൚°i:k‘Ã}À ˆ¼Áù¡ ß…½DÊ<„B(ªgj÷5épMnãü”:£-.\6þS ¿-8è´’‡¡ØèÍnó%ܪ i· “á ÓeøvÚƒ¶FÓ]¡ûJ£Ž¥^Xõœ7MÆ­J¨±%ôˆŠdyÛöi¼*»DbƒZÎà(ñKhR§6;B¸¯seh:ž¬@Õ›™fl[ã,Ç"ç!'ñ?3­Î|[ó°z¢´Y¸³ü*áæ¯]+­Ã~KŠqJ&‰rÍáÇ&ô÷`Y±v:ó/Ÿm‰ˆp ˆÖ(M§ù£·þ_~Öâh´Ac´é*ð¹?% +`§• 99Ýw s`¨|cs•ë”8†T¹öPgÃÄ‘Ì£¸Ü7çÇÀÊMµÃ™?èÁÄ©'[Dº=An^N6ÙÅü 3·ª Fó£þ,1£ªÜç?_½„VåS¯qŽ ö¬oLæ cÔxÛÙÑùëiY±Ó£—fQÌ\™LÁ¼'äQ=Â-JmIð RV›}§÷(/½9„"õ-×Ôñ™»ØtÝD-g×+JŒ3,ˆãƒ$»ÒÔ.2S)tÛ`¶°Dåëv)’€eñH^Ny*àc<3Èä‚6À¦ؘÅqbQ—«pJÙ Åÿ<¹ÓÄ Äö£Íð ÛÑDÎö1Q»b»>’´„þ‡æ[-Þø¾úRè«SDè'–8 #ÎF_ýdœ<…µ7†ÿ#»0B—Soep°\ ÏÉJËîŠ+V;ʵ{Å¿˜±[жâýYòï°í[š?ù{3?Âáwx2‰ •oÊ^Ålƒö½]¤ñNvÄXxŽl(öDµ´}¬è&-â¨3C÷tMI2û_ÆÏšT }];»žÍîÑ çkÅUÆ1­­8÷lzÌæ£+±7HÉûE€[adô'X½ÒÈÌ-2¨g8üL.z:D8ÑÑkÅSú¢l™± •}«,»¸¨| 9Äøy¿×>Y3 6y4ZÜQX }tÉœ›â¶»8Sšå¶L>àªPm²‹Sèhù·øXÆl¦ ø ƒw~ ¦vÁÓ"9ûYzû†{µ]9÷O™“Ptµµ£G ¥óCa®TÍI|IH”ö¿í ª¥Â˜3ø•‰üé:Â8(Œ{¾ ‚Þ3ú‡¨=«ž#0m åðÏEãO>goW‰.{„éñ„!H> ”ÉšÓ泫 ìlB•øÿ‡à{¬ØE4ÜÓi¼eIØÐ%–Þ”<zRáâO¾QŽ.¥.AƒóZ½ð]# ôI›;“ÿ–‚ö‡4>öIÜ;õV¦2Ñ ;ÓÉ¥x™j¯•{–<žÚÒ±Kº˜”QØñô;_,“ãÞM!xÜHK€  ^i|yýÈ?e°æ¿zbûl/r§ëµæ[81,ðôÿ)Ô=ãBŽgö±ìÑ +nvµå!™ü®þ{¸«ÂÌT:À+[›,‹á©Œ-TåÀ2½°´jõǪq\P+½Ç>@ô`íC+ýÛ/¾*>D¾~ø8Ç‚¥þùDfŸDJ¯—¼¦ø›Ñ}S&†#8V-9dGÅó;T’Õóy_13R(qŠ³é#:eŠímxD´p”þ¢[ýš(L‰ß­µq“71Û,Ö‘·ƒR<…_Cm4í·me;DïÆ—f"MƒF0Îã±{2r+5ÌɹîËÙ×\%±Þ‡µGÖW¹9ΙKÔ—ã£Ü¼*å@žzõÂÆE$¯o× +¼—MgÝŸªÓŠF{K¬<—‘½¢üa ùŒHžGÑ峟€ ´m6ênêªñÇðTÌÝX­9¼™Úí¸ðYTºå‘‡Twu“g“JÿÚåÒ ^ž}­Š<x1ôg"cÃVb™ÞÿTv)‘ +ç«#8‰|¡ö’¸¯þ¦ç +ˆDn—µª¨¤j²Ì úöúªŸ´ARX‚ü’;éx‚ÏKé;#xò–•ŒCbˤœÔókÛ7L*­ª$Hošƒ¡›.£`+©¾£@ô´TÎ"ï`yÝÐõ!PkÊ5/ê@ÓÕèIq!èg(ˆ`X(ªØÆ’æÙ†º‘ ®7ž0€OPÂß(p£oVÞ¹šÄÆ ´i¬,,ÜAWFY5¶…¥£÷<Ü­e¾qŒr¨@ÅÖaàØÝüèñ/ˆ^!ôÍñOý¢N„ò/Ù$¼OuQ&_ªKЗ^é[2TêuHËÁÕH©5öM+È™ãwÚˆÓa³SË5£‹â †‘&H|Á'É#¹/zŸÂæè¿õ’f|ÎG· 5*PêIJhŽøé²Vƒ÷¢ýÞ¶ÛÕ´©Š´£~ã@ÚïÒ‡FÔBð#À¶„8.ïneôÅâ…$¨nVŒ§”r¾Ê]PD#¦„ßtJ°®ØÌ-yk¦Ê“1üöy‡ByûC¸wõ$¶ë‘ª§ ­@ö¯†¾—øØü"£òE%~ø#K",ŸWoÔ,0ÑÓî'ü'ÿN Rc˜³ØÞ¿UWL‘|ò ®yÊò²sûVj$S+´‰}ï™Z{&ŸYT#“ì­dŒôÙc4æü†n< F>¡ü±A$øûV§Õl„†ÆV)8/¾˜Y€™ï4…•ƒ–“àÍ™@o¾"ë”vh0”U¥Yu^¢q¦ÂÀàVÉ>‹ÑêÞ‰€m·Ä½¸„fßÀÙ¬%††”&¾YèÄêKåëd®w{•ùÉn7¦ ´]"=¸²ß8C²› ò5;EY¢. âW¾’LÔgØêÿ¢•ÓôAs +?KžÀÁ–]^´²¹ð¦]R4Ü=¶»/òŠ7ÇÍg7;q&z Ïzy¹ËµæÝm[ï&ÏqàüÔJw ”ö1²=°)oî•63ñ?jåÌPx“Æ­®©7%J' +m@¬u>…Òé·æKV.ö {s¡ù$»¸Ðé3ÙµoÖRC²Êaòs3”Kã$"{èO#fΤ¬S9ì’ek/ÝÑŽ8½Àõ¸ñ7Æ£ÞHÁ­¿îÁ¦ rWLX`²‘ãœïá0O·ÜëÓp +endstream +endobj +2228 0 obj +<< +/Length1 2566 +/Length2 25126 /Length3 0 -/Length 16193 ->> -stream -%!PS-AdobeFont-1.0: CMTT9 003.002 -%%Title: CMTT9 -%Version: 003.002 -%%CreationDate: Mon Jul 13 16:17:00 2009 -%%Creator: David M. Jones -%Copyright: Copyright (c) 1997, 2009 American Mathematical Society -%Copyright: (), with Reserved Font Name CMTT9. -% This Font Software is licensed under the SIL Open Font License, Version 1.1. -% This license is in the accompanying file OFL.txt, and is also -% available with a FAQ at: http://scripts.sil.org/OFL. +/Length 27692 +>> +stream +%!PS-AdobeFont-1.0: LMMono9-Regular 2.004 +%%CreationDate: 7th October 2009 +% Generated by MetaType1 (a MetaPost-based engine) +% Copyright 2003--2009 by B. Jackowski and J.M. Nowacki (on behalf of TeX USERS GROUPS). +% Supported by CSTUG, DANTE eV, GUST, GUTenberg, NTG, and TUG. +% METATYPE1/Type 1 version by B. Jackowski & J. M. Nowacki +% from GUST (http://www.gust.org.pl). +% This work is released under the GUST Font License. +% For the most recent version of this license see +% This work has the LPPL maintenance status `maintained'. +% The Current Maintainer of this work is Bogus\l{}aw Jackowski and Janusz M. Nowacki. +% This work consists of the files listed in the MANIFEST-Latin-Modern.txt file. +% ADL: 778 222 0 %%EndComments -FontDirectory/CMTT9 known{/CMTT9 findfont dup/UniqueID known{dup -/UniqueID get 5000831 eq exch/FontType get 1 eq and}{pop false}ifelse +FontDirectory/LMMono9-Regular known{/LMMono9-Regular findfont dup/UniqueID known{dup +/UniqueID get 0 eq exch/FontType get 1 eq and}{pop false}ifelse {save true}{false}ifelse}{false}ifelse -11 dict begin -/FontType 1 def -/FontMatrix [0.001 0 0 0.001 0 0 ]readonly def -/FontName /RQJPKO+CMTT9 def -/FontBBox {-6 -233 542 698 }readonly def -/PaintType 0 def +17 dict begin /FontInfo 9 dict dup begin -/version (003.002) readonly def -/Notice (Copyright \050c\051 1997, 2009 American Mathematical Society \050\051, with Reserved Font Name CMTT9.) readonly def -/FullName (CMTT9) readonly def -/FamilyName (Computer Modern) readonly def -/Weight (Medium) readonly def -/ItalicAngle 0 def +/version(2.004)readonly def +/Notice(Copyright 2003--2009 by B. Jackowski and J.M. Nowacki (on behalf of TeX USERS GROUPS).)readonly def +/FullName(LMMono9-Regular)readonly def +/FamilyName(LMMono9)readonly def +/Weight(Normal)readonly def /isFixedPitch true def -/UnderlinePosition -100 def -/UnderlineThickness 50 def +/ItalicAngle 0 def +/UnderlinePosition -150 def +/UnderlineThickness 67 def end readonly def +/FontName /TWMFXI+LMMono9-Regular def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 68 /D put @@ -32236,7 +32399,7 @@ dup 37 /percent put dup 46 /period put dup 43 /plus put dup 113 /q put -dup 13 /quotesingle put +dup 39 /quotesingle put dup 114 /r put dup 115 /s put dup 59 /semicolon put @@ -32253,172 +32416,255 @@ dup 121 /y put dup 122 /z put dup 48 /zero put readonly def -currentdict end -currentfile eexec -ÙÖoc;„j²„¼ø°Aw-åÎ=Ó%åW˜)-{Ùr½uú•)¯œ‚ßröA•ÉÂÜãE(õ@Úý{ë¹´‡º“Q»û|ü_‘RÑå» -ØÐÆϤëA³Å -T@æ|ýq|Vk¹¿J% qu8P¢øwÄGx³ÅªÛ̆ÖåQæ¯6K üªÒ-UŒ\§Ô%¡bÕ"t-*ðxÔõöÓŸÏÿJ‘+ -}ì3¥{Zà2ŽùÕzݬT2sÀ$ZõÌÑ&{Bè–J×{¨¦Œß=מ0).ØnÁ¢ðBd^ö/¥!2-G×+~ò¦bÑ7³ Nø&lƒþdsõ˜á%o5·ð3N¸ûÍÐP±c9îU,”Y뱂0L²¿öç±Ê+OAÁd<¹Pgqíê‚!i\žhßx¤;B+üµX¹&ÜŸ)o²™ -û­¾aIñ`¥¦Aï.ËÁ…o_Å Ò‚«d™]Ùì©@s—p!ÃÛ×—;¿ù¿+üàî c¨M›)U *m¥žÁÒ‘¢4+o$¢U´ƒEc”E*­ü_G -‘õÊd.¬»U£Ó9ÏøöÚ¥#˜]æ8! —S}/a—§»¤«é"ÈöÒ48EV< >™N;œ¶ÓžÜåö½[9Ë“!¥"¤k_‹Ó†$RÑÓbÕ˜ÑgÅó ÉuŽx¨ÓVMhìÔL$3P˜ÛíÒÙy»5œ¬,¿o"Ð7'ˆçº*íE¤„<±ZÿÓé§c»ñªÍb8r$`FÀäÞ§/±úröôí¼…ÝøÂ]€“:BõóåI\$~—=Ô‘¢´ê€Ïˆƒ·¢Ž˜ÙËøȜȿ!äê–²Á±QÀ´S*_LÎyäXÊg*½!#B;Fdyú9 vF.ºxÂ&WÜ€ÏçÒc‰K[Òé¾­újô­ -ƒÜñ ²ßTœ-hj¸˜ÔÀ’:Ö²êÌbcÅÓ¢xù~Ðgk0š^£Z–]Ψ´&Œ33gŒñ3æêwm2÷ÀŽpBŒœIÌ€‰:~q¬Fà邉u<Ý^ðIE=Ï*ÑJÐ.­Zw†Òcî3Ö8ìY±Ö.1jKAs;¹¥ðÄ»Ž÷`ÎR^ÔyCQ4y»ˆŽÞÈå ¶Ç&{uðQgF -Ð¥vl²P¡ï¸©°¢ÌÁÀ -ÊeçÄ=Gylw -x]v¤ºŠ£&ú¿o9f·ÚiQ¼p´ÂÅÞÑymQÔrijËFÛ\ÍÀUBuª :sé燭™„Çy7ë䢼R'åT½‰»…ÐÐ{crœ/ðbìŠzfuÌyõ¸ÞÆ-_KýhÚ~ž sö‘£¬¸Ó­>a(—z•–‹fz!#ý¦…vÝZu¼ßZæ"VÃUEÉŸúw­øý-ô@»ã é¶Ùi äûaI+˜ RêÌ¿±P–ÿ¦¾;X¼C 6e½ë›À+îj‰e×Ê[µÀ7ñö-^j’¢â[ïÚ -g‰Ž°9‹‰3­ ØÃ…ÆD©Á’ùˆ‰ c% ±¸Ø™Ûy¯JC=©zê”íÒ[t v™™”Ê©>‚VæÅ3¨m™ÿ®9Á‚ð=Wp¦ˆ+ãlÖÁnׇ3g ¨#7Æ:ÊQÁ@µP~Ò-ìu@Ó±DïV\sS©’;þx; -ƒJ®¬Ï ®'p—ò -ºy?Ìþ^CÉõòùI ¶$HGY tÓ` „é\ÛLœj%ýf¬œÎxÇéìS›ÚŒrìÝܾ¨0þ;=žl;°äÀ MÑI>o Céµ" - mÆÞ³»ôªI•^:ÊO|¿u.^âÑNçuóœwb`ìÑ9º5>²¤øÞdJJ… °Äð0íôóX¦ðÉpöŸ[ÎtóÝàÉuè^%ئò`'õ -0T¡ I{ …6j=¡úÌwŠØ ɽ¥¿Ú‰»ËÅ7†øȘý¾ÏÛìÅùÚ:{ú¦©õŸº‡?lH´¼™)ò—ç8O®f¤¢mXn,Š³˜Ëß— Pš½iiþØÉ]X&•íBûx{˜5ǬfE¾ˆÞ®¾DtàçzÖ9nËy¡B…ÅÄ ›ãt+ ä4ŒˆþÊ, p×$¢@x&ý|ïĨ4H~4îŒJ7¨B‚àš<¢›Ð«C±ÉPæÐœÁ}uzèÂ|÷gÃr-š: èuäó¡ôà[ÃDè\; } ±bUûIS{™É27Ú=ñ©Òì FB›k„Þ' -¦ç€ ‡.~"ÄÈôätŸíOz-3RûŠæÙ·ZÍ[——‘“&ÒãÉhDþ÷|°AQßGCH :\'Š¸`aуã2¹ö{=o >uÐ&Ž0ý¹ß»”;e†®01]VÎÅ2c¨ø¤œŽ³`š †“CÄJ1/ŸÑL¯Wþ ˜Yckɲ¤¤‰Ð õÃ}c§8ÚLC|¨±Ågƒ%¼Y«9.Œ‘òµ?›óš»e‰²Ð+€‚ÕÃPÒöïI4,}# yE؇™ö]%TÌ>ÌýƒðÚ},ÁS ¤!*Á.ó¤ “˜v˜tåÐ7†÷ó[O·£Â±Rø«Y¶‡†Éìõ–SœSÔª5Õžl+>ÍŽ¼±¸l6¯åU®Yºï©'­¨ù_¢p%ï=°ü]^:k2 Åb¿3Ò²vu\þƒ—k!š†®çnúõ2Ì‹ƒ@Q‘*vÑ}Ô5z3màØšò/Æõ¨¯³¬Ÿ´_‰ó~¿éÐGfðG쪑“4”‚,¥­‹ê=¦5ûø%¡Ú¹]äEHŠfnÉÛ€x‹ÆR´òúŸL/'¹ 7.œ·ãñe÷Ͻ§„¯‡‡aÒG}å…èÂr²¡ -~÷) «]<$•ð¯2uµ¤<ýv<ò×NP|¢AÈüGk¤Ì .PY­ÆÑÓO;¨»½BRœ)­©o¬éVê°ÃðSþæ<Ä‚˜;aÅQ1Œuz0ˆKÄ%RÉvng;‘{®võk«ÅîÐä~؉nË^…‰À8@Þ»ªYö·Õ¸(Ø®xðÙ•(tÇ&XYò-»­7*«ò0ïx“)ÌEîtC¿w;ga"15Ä][f³l. àèZæ¤ÌIbŽgÕswàT&«©¦Z̸ý)D…H­†tªe ¶Ì6¢¸ ~Â5÷׶?œØ® ´ÏÇûáÁÂßkˆä…r.)¶•l´ö¡•Èx_²?ˆQ­WÑ32…š(l¨ãë ŠæÑ|O×b-F–çÑ].–®RPD”<Ô±ãDûOàD¸›]D®ÂL=/å™È¢ÀõÁKîÇõ­°¤`ÖàÉFƒgQ_ÊI -ânûqP¯»º·«_(˜gØGK&mĺt¸zÂéü­_ªo ×`4§z4¤-71kïds•w¨\x ZŸ’8´ éy\™šR¶¢ßÒìðÏlÌ@Ô€Û›…óÜæµ´$¨¯ðÍ#‹ÉÂa'·{Ô‚i³~KljÑåVBîñî“zU´/X1 S¡äq=¸þh&7uó«GôV¨èA_ÔÁ3ÄMüùâá~Â^…›tæ0‡²G&<ÇÈûÔו}Òðøh޵Ëäú0„ɤcÙÒ°ë•>„¶ô‘o6ˆ½)ß{QB”¬X[#ÞCÄI3jƒþeó’ˆ/N€ŸºUX+¤ëÖ`‡è‘"ŠP[\ûàƒºäAXºa|i¤–” Hn­k¹,· Ó? ‘J_y3^ÂcÍø}|at½ ü<â;1v’@¡Þ{EÒ¬«ÈË š¢Á/šEì7+?`µÔ+7ÂTԘɈ/¢Ÿ~£2’¢Fe匊·I›ó5)S®XÙvNŠ´eƒœ“…È BÒ®¿my‰Ä_1Ö4tüÅñí ÷Ôl—0w–ÿ -3Kq -TâùÜÏ­ÌÃ\¬Ó¼zlª‡]%b¥pýgÜ7G)½tâ°[àV‰ÑN¡¾Øþ~ÇO^¦‡˜çŒÔ v`Š«üv`чm -ňX h¸¨ý—>r±¿:ƒ9e1SôNCìà$cæØø% ‰Ìõ$¦,T¡M2ŠÏCOÝüV¡eŸ=•øÇS7áu£•_„{xœ •Ú$ì:#Zòi¾1ʇªlœ y ö^|Qûž¡°Lψc¿&ÐHn’)Ü ϨÊùÈ|heÃÝ À}ñ›IøºMÏô¢Ç•WÕð*+-Z¬ëß,dÞªyf¡¨mvÔ›]t þ;`Úo´Ø]”ŠWH¬°íàYÝ}'PN`ºSuBf$,ïÕ•ÓLöÔäó·=úwm³0mVÔå –l™ ˆTOnt£6×M"X˃ãöq¼åDŽf0Êms9?Äžš_šæmnH`ãA…A×mÒšº -ÖÜ­“a’èŒîjfnÌUØÛ™ŒÃCž›¹a£t·/ŒÜçÄæ½òvÛÜ×ßÚPqE$NP^_Ù…‚dùèv±¸ö§"TŸC[ þ¿|¨½¦wÒ1|7ô—B%Óÿ¶èf‚£W­"dÀá6QIa½÷Ò†ÅE.á<¾SiœùÁ—Õ™)Õ+µž Ì%ÐÅY`; —Š®5!‡KoLS»—NÕ’öhPhäDðÚ¶R‘™-¾„®¹Sòéæ‹„ç»? ò¦åÕ¤S8ûŠ!ª—,ÊÏ „«vÏN²[¼ `É"Baæ’Â5ÖÂ#ŽdwºéW%Yl‰¾~:/´‚>®Z¨;eÔFÁ{N# {Òí}Ê?7yøg -˜hS·…¢ à`tZ¦" *èQC&KsulÊÔJ·š|6×iÞ$rˆEÖFXggξß&vÊÈH_I«¦)³ÇÝLRá*Ž±·QÂgÉX¯ušV,Ð’"ª%_©L΄¤"4vܹ¥ÂÙ©v€Ã»ZÇõ! -åÕÌîûßY@øLŠ‰{Ìvߺ‘+‹ÎN*Y“Ë+qi]øî/^L)㊴Š’myß1=óˆ‡K굎 ™¯~{iµt{ÒnÚ?'†‡2²d,µä½›ÎÐrˆît‘ß³É÷”ö-¼FÜ…­¦Š¤ˆFl L y‰fÆæF?Mè€_¢‚Ý*aÁ–Œ¹ØÄÁ5ÕºäÓ½•ÂŒ!»¬‰£d§ 2‹ãΛiú¨w/"Ï;qfWò"Ž—^®ã0$go„ï¯aWó¡vêm.üÍ¢BÛÀ/c@ž{:¿d"€Aú·vÏø]P}Cð*&kxÊ;ô¡k€6܆ÒxɃ%ºÕ ^X2ÑLŒg]¡’øó…ÌVm¯·­¦Mhx÷äŸP&ï(Ð{Ÿæò‚gû¢Ð^Ö£Èý ĤԒË|í±º(ìdkÈå¿ì/¡ÎܺH€Ô”qÏJ'w¬Bî£t®½§€—žgOÝ`O± ¥ßcÔô¹“ ®\œK*ó/˜Íû´®¸™¼g²9Zö<0väB‘·ôWK=—äQ‘]a9<ÏÀV<-­Meb^|Þ€£q£À"Ä^ÏÌ×&l |ésêgiÿÑQ=¶ÀéõrÓå m[Ò:rL(Ó¸ó…„aXÂœšÃIlµ«eŠøÓý€wƽÑ?Z®Nm‚CU†´úØ ü‘Ö•;l€Àc;^=§ª÷¼/ÊE#ëà…q$SpÕ TÀð /.qKAEÍ æ6–ñ­¦ËêÃ`ÍQDAϪÐ'Ön¦6O¨ýGÒã$áDÍ’ô[Q»ç8s‰Ï2F£rïß$>•©.N©ãÝ:;f7÷ã0\ -^M -åaQ±u`8‚@jûk’¦p–XDS›<€–§µ×¶nh×yãDIÔ#!“‚MJF. 2E7'GìYN¡±}ÖÓN4Qs¤©Ë)­yÀ*ÿ>YAV`°upmSà6œƒ„ò)¤3F@ -k·Èqk‚‘áüû~ ¡©½Û]¦ -š¤Ù»”cÁM*i»[S.ƥʨ ýÍ5…M/ÕôgꦘÅ4i,ô–ãèü&ø=¶Eºáò*‚<¢» -&~ˆ Žfð”@ºa-Ò¥!Eî=3jŸd+aNé@#˜‚R/ëË甲’< öyo·‹¦oæ?”M˜*¤]ÿ¹ö¹„± -,ƒ™Ûsw[cöUŽørô¼IÔáû8#…ð0i·a³ÉFk‚4•5û7÷>ºók0ÒÇ v*êDÙi*”Ù°zu;åœÛÌÖ °\ -¬ oØ´úñó“ÐÑña;rÿÞ( Ö’ÚÈÀwz«KNB¨á˜“'Šgfä¤W?»Ä1Yr!k¯U`)´Ù˜íùÊ«µ\®$<Þ“€ŒX¡<9)ÜAžÈV”(`ú¶å­Ï©r›ëÞ).áUߊ]ýò–ëX/^¯Ó:bå²[BŒ'’‚ðA§’‹ÖÁ—«+u×}ƒì¯Ï"‡ñ -pæC3ˆ‘q'z}SdÉxÕBiؤ+GøFÅ.Y2µp£Ø8þo˜ªv}§áü@ÏÈëµ*‹QzöñI°DV `Ê¥ˆ—Ñkéʦèä.œÏ=­h9µ—8nA‚’@$` H¬Â@0ˆQ-HË1gýÜ­¾³«—уvÅËS›=í“áôç©]m¶¨;Ƭ›µÝ–V¡´Jêd™<*§ ¥Ov¯æIë<]_Žîcô‚K.Í&Ø CŒÈÊ­µE­] ÕW¿÷¡îšz7Ìöá=/ îè¾\­`ΫTÙ(~Ö°Ñ.øŠ)lšÆ$üÊaésZyñæ‹Þ§^ÜpÙõ2Bü™Cö [Àb€@ñØÎ1£X“<9µœyr0vƒË!êSsíÝK¨/æRú'ô»ÖÞââèÁ3eÛXÛ¸Øjótoåž+9n{Á„šcg@f)8}ÔE¾°€UØ9Ð_²¶£uð-¥Íyœj\}qÁëÑ¥Þ~¦'qêX™GÞ«kú`$ðq¢pï×vq•š&¾°"àŽ<Þ½¼ãK¤Üñ©‡Ð.Xo|%’Ñ}ŒÂ¯Ä|ãžû5®F½Z¹ãX»­ƒšKbµ X½p3Ÿß¨{é÷Ç/x+HˆvœE pº («ì -µL 4öæ–m¨(J«;¸Zs·r÷°%ÐPVúÏîR0©²?*ˆ=€ÒètÞ‘."¥¶)“¹$y8V+"V_•ËH‹BtUñ2.ÙßÃ?-–B'fFÐÐÛï̽ä"]çT‘+l#c-­Û°º´hn·rÁéQÈ‘"`Q4èÑ¥ˆO§Þ°æ†cþ¥7’?ÄÒ¯ÜnßÞ-y°šc¤(êœÝÚ¡gD¶»ŽB^Ù„ b~*àÐé 9 E?Å<‘‡mðdŠ¶Hö1ŒÙL'A­iFˆ»¨áIß_ẘèîTÃC¤6ÖôÓVÖ[)ÒB,øXÛv¦bÑ‚ð®$ØœPÃŲ w£«µ˜#Œ}&ˆÞ K<9èª^Óù—:tj@ yw´d†¶cW§+ÓBúqDå‘ ?ß+˜j3ž ëK,dd î3GLÂ÷¸±Ì‹e„®M$â/Å¢ø"gÉ7Wë`q`a¼“¹¸(Ãl²8ܹœE;ÿÕŸ–£º8ÞMî·ÏR‰€LžXý2d¼(o‘‡ÿ"„µ‹z¹c„û…æã/Ͳs\z* üeýô7àÃ>^mÁqºYÀá‰FgÑ€ue‰ºP/6þD¾úYÿ½DPßq‹ûæº|2I¾ÿSƒ¶°ðöî¯HFÃ1`<ŠÏF¶¾_J‰ÍÞô[@kÆÄ|m˜A]±ërˆÑßв¹ÿ4ÇZ'Låaóý‘DÅ;–6†[à!uyçd9/¾ÜP€E²–½®…KŒ}!n|<¬­}±-P{ýš÷°K¯–‹8TzY†‚Æ²Ê ¾9%˜t¬ÿz0¨ÝΤ‰rƒ¿jWɃ>¾¸¼Ž‹üvÚq…þV?Ç›²ëŸúÔP9~D£üèXʘøyˆ¥*0u¹˜žê&$G>HR§spXÏÅÏØ3DBª K[ØíÒB8ãJ s+5³t•P“ÝäíÙ‡ÉIw]+Ý~dI± -75V¼$.Ùa®drTàªÂúéÀ§$g–ËÃÐ,~â·…™°m0íx;éYfíõ‚£ØQÊÜÖ%¦D"IGi“¯×™z{BSá³ry>•¥j7‘üw<›÷OŽó¥I ð‹^E+G¦ÍwSÊžº‰Hq¡kiÈæ艨!ÂÆÃfÒÔ³{üÃé>±úLRÁ„®È-ÁúŸCIQ·m.NNT·Ü×\ò²Êo£=ÓuÊì~šðªÿ †ÒŸf*Ö;óJ._Zp¨3¢T‘_Í»rЕÉFÎÅÎhŒ°((t\ÄîJH‚ûå´zˆ Þÿî2æÑV ðèéQ#Fþ©U18Š8ü¡“'¤ â¹´üú.ÃA2eLfÚÙÌ@¯_Ž`‡4âýp“å2èÏ¡º§qˆÆúÐ  3Ñ+Í~ð*ߣo‚9•(ï>Û”T‘­Ž=_k Jâ9¯®ÿ¿¤+ø^æ}Ðîm{•~,÷1ag`;7TS.ãßÄòcîØW(°DÒ,Ê7q3oiNZ'-É8ÂÜI”t"öÏœŽæbߺÿZUÇnôíâ¡I4Æ`0jš!íá=¼L?Ò„*¶|?aöÈ’¤ëkü&T¶þ§QØwÖAñ2NœÜq€ðvAÚþénxb‰²]¶-¶uº‰¥¥ áÈvÚÞ–§Vï>ÉûxE.MýOͪ°RУÍÚ.ÎþÌašÙÔLŽwj;ç¶Ü ¶Â73SqhTlM‚1¯ˆv•RBaUbäó«ß:›²HŒÓüª®SŒliD71nœMIr oÁñwÛçxø¼Sºí×Ì£é@m Ó·ýì_&m•­ÀE†g}•`”Tt AÕS¬ú¯òÀ(^²ŒÜªÏÀí=5ÚX­¾&„NNqƱ¼s7©„>ãT@H¤‰»Á7a‚ôó%ü}Ǥ¡l pMØ…¸\ÔÐDÁ· Ÿ0]æð²ÍAŽ¢Ò[°±îv¶: -A0µÝ¶KÈ•ô“E5“7X·¢Ã|ÙŠ^åÖ’=“W1iqÎSw¸”V½]ò6KøzbêÔŸêz){ ë‰G`õÈ;Ÿt7Òzw"81ÕbàøÜ Õ¨,Oسì×NÚ¢CBÜ ´[’[¦ç;-¼²´Ó\é{;,ŽBò‡¶e,-:¹)³*ì îCÜUDÃV"¨/ä<{ÝÕ]ê;u9㢩9’^ ì0ÉùZM‘ ËVR£t+>¨[qh­wƒ^fôó®ŽÃø‚¥”1šuÆû¤=<)bÖ8Y•÷6”²¿“ü;û=U¦I<ºu­C»±Nö_ÔaÁ•–PŽi9[€a\PUá¾)ž”M`ÉR})”¿ýe>ÚÝ2VÆÖì¸/¾ÖHÂ’~“궉­S@­ Swu¸¿'¥¬²{—/$0UŸFÑï:›U–³³Ë°‘[M¦öÁ‡…}ø†‹(©ñ<½mõ «b˜xáýÂ,7\0F&º0*7ßnW3©­îýÿ»´9âF`Ä…v~b…#á»Ü~éÉ$€é_d¡ Šª6/ÄúÛHJVñÁe J_Óš2ÎCCtœO¥÷ü> -õNX¬Æ ¸Õyö W¢ÐSÞòKb3i:˜0/\©Ϩ†äH½§[ß/7]Ýreç;‹B'§Ð×ãåNk¼ÃY%>€ô¥)û‹ý€#-•Ø°w#“ÍÃø1Š¼\;ÝAèÐØË'‰—Ûâ”D¼u¦sÅi”ûßZqêö¼w§›Û=à@w`€Ÿxí†àüÿ$D«œà#øS>å´Z T† -V~éžHMÑ™Bëg÷?º¿;žµ®Ð"^m -ýUÖ÷æ -x)Ýw"×+öêyêý™ü¾#àÆ\ˆ¹­Å1«æþ¸3…W£âç">¦LWrô¤DKñ9³Êc¡êEÈGoÐE¾µY h -ݼ~§)(÷D(Ñ’c{/ Z´X(’&³axÑe‡pû¹-,ΙåÀSÃ=ÎLŠÈ8Ù–/GUÙx^2xˆnÈsO:‹Òvºì5ÉÍ3ŽŒäδ +cvö,wúª:ôØ!ü¹S?Üp–‰G!ìr–{ðŽªàʇÊu€c<ƒþ{ É*F븙ᦪÓ]eTZqu\ý“ k~dáwjRÌX6€;+šþÓû“í¶b÷÷:V§ãÂ,¡÷£`£:G>mcF"!«Þ`Så1hç~ó±€Âæ¶iÏ’à1îoõªèNÏ E,iXn ï -xhΡÔÞíçh.²›mdI-SF3œ†¸hu*:|çÊB{ UusC´®õ©Ùb]= -Q, -ÇŠ¥ì´þl¶za5ñ8 µAÒphÞ(Wü\‚ÿ¾™2×t©·¢wdOe³%(¿2 ´EpD»¬ -Š0ão6ˆ–Ž±NÇÝÍñƒlA·q»qäã”NÎ@ i®é@ÓÔ†Ê0OØ8sË\–¯}šrÑ_ÀDÈ1ŒÏ£RaÈòY _4„%L=ì‰õ<—±¡ÔÎxgUÕÝ Y¹:¶GÇE–*ºõ£6no¿ÌÂívµõ»á•ˆ/Q§Ëÿ.3dwòøüR8uÞæ"´c¿IÇãŠGZÝ9‰Í´ô곺>ÃÿEJQP¿bTžë6‡Ý–:¥š<„nˆUYÚ–†½Â±£tW_eÐqÀ®’[,m+7?Ϻ¡Ê&]ÓDN½ú£³BåÝÝS…’Dçë“zî´Ñõ‡?@!}öDÎ#“/Tò‘€¢5ñ¹´ß ¦ï@†r°ŒöäKþ®mEŒpè1°ÖÑù.HòFo$[îgt‹µõMBU+™ÑQŠ€:áúúH§¢´H¤›¤ªðªáñÝ“Â{ÌbçpyĆºÓU|ŠYðÃñ”AwB™ªXÜ{™ÿý±³Ã(2ôœ‡±wP’ýᣓà¦Y™_|«w Bµ¾$ÅíxæI6›öUV{ܕ̧$clàêpjÄysÝÅñ!¯Z 猇m·ZÓµ os‡» ^/ nT˜L…¥aó‘ÔZò2’g³ú†ï &Cºx5÷KTZr,«goHBN “Ò­½äÛ)ÇèØþç»Ó®ÝkZã¡Øê"ð]@Êñ­ëœ¸¬ö( yfŽÕ…¼HgË©ö}·T«_RF]»»Ú´›º»5K©Q™^Ò8D2%¤ ø™ ÆCý…q`V;âD†x?gºâMAÙA.Ûp·ûÙF2Pm5ŒÀœ¬Çîë¦Á @¨rµ-39RR#Ð?X‹pKµK¿žì¥‘*äU¶Ä°‡àbôÂ<Ç´p{â­úŠ"^Å‹¹ÌîÒsúC Ÿ ”šá‹âÛjø››ü•¨OÃýcŸDȦ„æ6#©”˜ínË"€¿ }w_€^75lØŠèÒY$ÀôÚ¡ãh²Ç“ìK¥ c0U¡ã+‰ϘŒ÷ot_ƒ–änË“'0”À*ÒCè2†4lÊ[(äEa¼Ê4'yPÏÉWÐÞ$[‹d½ùš£ZqšsX¼ ù<·XHøêû‚“î4>ì­Ô‡ÒÅöZBééýœ©mè/Ÿ¯Ât·`ûÈ{«NwNåçý¿¦"ñ¤˜"Žu3{±³¦1уæó18 -û虫Í ÇDh­œ]Dü¢Ó~ ÐmPDŠveY5¹Qö>ý¢>®iØVrSí~/qùï>U¢ø)Ÿä,Àþ'¤këAqòU©º‰Ó\ªc.D¼4œg$›n=b˜,o®e©h 8FM¾¼‹±Åg1°‚/qÎ0ü -®òÙ­e€c[Ö.¶?Rj^âÏ„êeàNx“l0•1Û‰<YäóöÍs—ã7™ ‡‡ï~]ìg%ÂüO#AÛæ½Ò”¦çáyûŸ˜-©ÏüÛ…áÐåŽÕôñ™â~” ¬X0‡Ñyö²n¶æ×\‡ŸK³ŠbF¹ÂåT{ô-aè.[DÃü}ÜÁyaºÌ0-%Hç÷E±5ªÝKàרß}¯“¼£âÉùqŸ†Ò5yG¹hÈïTåW=0ËÒ}•t(÷‘o -üÝ8ÂU+Œ`3|ÁHŠ¿q*ˆåG!h5v*b>›¼ƒ FÌñçVÇÔz½Éˆµidvî]ÁŠÏÐ=Ü<¢Ð8i¸ÿ²™b yºœ1*ß 6&>‰1ÐÄ·J¸cì(` •2±ø&a§%&]‡€œ*T:vFÓoi5.{|—í¹®2Ðâ°¦ð“Ø©&Û|TcýYRíýçù¦M3W\†0y8Ž÷¢HZúb7}íÀEʈ"è| É!^Ü©lf‹º‹ZÛÂ`ñžÈp+nøöYfƒ/3@/À%ïzùüÓÕ’ªF:}Wˆ–Ãoÿ0U!†¶nŽŒWꜵ¯ªŒ³ξä[ñ~KS™çÔ[³iÜ)3ŸMâèƒ(¾]7—_üÎçp0ÊÞzþOùšËà맕8-÷ýMìåbћœLåü|¡¾ÍüIxÁcóKo{ðÍH±hÅ=¤œÌ&;0£ wÓY¸3 ¢1ãL™'Ÿ³f†de­ý-«o—åÆO݇^ñ%&¹_ ƒë•¯=íI&Q óÞÑÉݵ¹¹÷`½÷ísj;zø Yã•óžI4/Š! P17cß±9½÷:Î/ºðaSøèêÈÏ´ØžsE]2©€ÌSCð1×Zü5 þ¼~šc‹8܇QOñ¹õ€ÓZ_1/Jå‘Ú£{rokÃ'Ñ&ÀºO`ÕŒ4P;@Ï/QÕ£­¿`7(z«ž öèS ]þPÿr?^h¼ã|è ÕedÒMÞ–á=“«åˆÆù-3ž5Ð’¾×u¿s ‡…嘗Ÿ7 ö7Š¯&ë¾r¹æ"óåc.µ#mU'Ö¹áZÓÝ?Åy`Y†¡!ÛxÞKc /™Z (ì-2å³×i¤¯ °¤ý4¹¼¶›hYË -CC'{¿…i‘ýF<Þ<±"TåJPÿraW?ëæ׬ð¼ ô*òõ5|ãË ˜¾a*ÿÒ‘^~!ð^ìF¤ÒlwÔ´u8éÙÑßXZÌ\¿½tHHKXžTí&àë¸mçî<Ì\Hnh"8SÉ 2v|§þRµóq”Å‘'¸QìÝÜÉû™=:.I¹xy»ÏxµdŒ¸W¤*lü{u™u®µJrôÖ ¬ÙbD42vi‘"-Åì“7K¼ÐLw¥@Å lÔþ4²¸×%>ml (£«ÙSÃÐÁ“þbc‚jøVÙÄßÀ©g¸þÎ7hð‹|ýt÷Át܇[ßhÑ"îì‰àº Ž ®"oÖ˜ç õ'œ—ÍTÁKŠ¢EA­2I”›ð8Ó€PT -ö@ã {ûÃ߬o[wr1žà÷úªå…|l‹¯¾§q8Í¢ê/ÖYxIJê›Ã”ÇGT×]2d©·Fô_8Q‹A¢~Cü–ñ1·ºÛ}ðý‹s»„,XõíT›/ÆôµÛÞèW%)¼ÐÁhú‡þÙçg¯÷"“^úêYÿ°‹(NPåÚ鑸øÛ Üëu÷ñ)Æ£—;–^|/êñª>¼‹ò¥§â°£(ŸCdŧŸ”bïÊÃä³Æ|t¥f‹pmÏûË]ù£év½«ÓT‹³äØm‹H›¸ö¶3!ž¬–40èêï•uª²b\áÓðI>ÞíUµ›ë{l`ÉèpÙàùµ„‚ª)ÊZ«òñÙßÙêÐ%š’$ã7Á}zqQøÉScõáR¹Ÿ-ñ”’º)ÃÂô”©g׶ iR³pN -îG|yTC£¢½c^;™[6Í,#«Œ¹33$ b4 ѧτ…ƒÿêóX¾ÒLÈAÊ’,IÞ„†Ø{Іϭt¯ëÓâqŸC<ìÛê5?ÞßHš˜/+ÀÔõ‰Ü & -Zƒ¾ëkþ`¡bŽ²ƒZ}ˆú utËÂqš;¾z$,|ãÒu\ϧˆ îO?Ú"ÍnùÔâºY0 ö2»Å{,˜yc-Œ#à«:Æc©Éž{“t‹{“)üím}}Gá\c q·öÜõ2ÿqAÒà"‹}¾Ï$‚T&š] -§f‡Ë€ºl²kü ßwC·~//v—¡d9øñèÄêÄÀ]ñ%íïmšfß-}V]ùe¸…b縥Ñt+î0  ¾Âõ¦Ç{ÒÕ†/J%,Ò|9*«™yBuŸ=M«ë”ù­×Œ oƒÈ¡”Õí>Aµ‚òMwOÐ§Ü -« ó7;Þ_:d:>OªoDÖ -ß\4 ‹Îÿ³?¯yÈàÉÝùÁò€ˆ–Lèqœô9ê‚ÁŒÏuýÈ·ÅÛ  -”Ä^ùF‰v2¥Áp›X†ò«ô !‰,x?‘%øÞáJ;·I†ù¶ò ûWdøéÒKC¾”–Ú(Éc?•”É„Üû-¡Ëy” ý^°ÂÆtQ`§-o23b$FíÞ#o”®ÐuÙm]AUYßwŽäሴÐÝí¼Ô rDäºiÅû@ˆÝþŸxììë„Þߎ\S%Ù oOb¼ç÷EÚÿ¦ ¦ÞÅ: ›ÝÌ}*±—ùEBÒ™`%YúyX¬›¹>+G‘ d«LeuáÁ„…“¦³Å…eÔ✓®á¢ÜâÓ6Ü]6&™mÙ9€Œ=nP‘;ÔO¬,^µƒTÃ乸s¢^å ÏVÀTK -P¸^à0WÇÔݶŸ'ñ S¾>¨üáPúŠô £CÚÝòØï¯í#tp'çJyB¦…v†“;äËKó™ØWÖTëwÛg ›k¾íTÙz¢Ø7ÃÛŒPB„/zÒœ9*)ÎåC³yöÀ€J¥®ѽaXèáX1ø¿jÎm‹Ù࣬1ÑK‘¨Ÿ¡Ê‡zT겟j&Š¦–[TtOáu‰¹¥ûãÀ!Å…9‡cÚP,c”0°©‡9׿»¶¥º5ŠÔ­åpáéL+1ÃCƒ † Óº ªõ@¶ˆç-Gø‚QPŸï”H$W”D%ÞôÇ4ô¢Ä{ ¾ Œ$9ÕMª|_y"×=ƒ–ÛyÝÉ·1ë^[ˆïƒN"yMc޲ܡ_~#†–ÿ 5"XÕíq¢O]xevKr -endstream -endobj -2225 0 obj -<< -/Length1 1173 -/Length2 2916 -/Length3 0 -/Length 4089 ->> -stream -%!PS-AdobeFont-1.0: PazoMath 001.003 -%%CreationDate: Fri May 17 11:17:28 2002 -%%VMusage: 120000 150000 -11 dict begin -/FontInfo 14 dict dup begin -/version (001.003) readonly def -/Copyright ((c) Diego Puga, 2000, 2002.) readonly def -/Notice (Copyright (c) Diego Puga, 2000, 2002. Distributed under the GNU General Public License (http://www.gnu.org/copyleft/gpl.txt). As a special exception, permission is granted to include this font program in a PostScript or PDF file that consists of a document that contains text to be displayed or printed using this font, regardless of the conditions or license applying to the document itself.) readonly def -/FullName (Pazo Math) readonly def -/FamilyName (PazoMath) readonly def -/ItalicAngle 0 def -/isFixedPitch false def -/UnderlinePosition -100 def -/UnderlineThickness 50 def -/Weight (Regular) readonly def -end readonly def -/FontName /IKXQUG+PazoMath def -/Encoding 256 array -0 1 255 {1 index exch /.notdef put} for -dup 165 /infinity put -dup 229 /summation put -readonly def /PaintType 0 def /FontType 1 def -/FontMatrix [0.00100 0 0 0.00100 0 0] readonly def -/FontBBox {-40 -283 878 946} readonly def +/StrokeWidth 0 def +/FontMatrix[0.001 0 0 0.001 0 0]readonly def +%/UniqueID 0 def +/FontBBox{-451 -318 734 1016}readonly def currentdict end currentfile eexec -ÙÖoc;„j˜›™t°ŸÆÌD[Ï|<3322ãý¿ô9I¸fÈÂÜ"ýÇXXH`ì{¶Ú(Ìb$`á«2 GyµÀ<“»»É\ð&’ÌMê¨ÒêµÂæCté+Ë…B›®JvÀÆ·mo÷Ïš}^ß¼ éYTY½·ÞCÒ]Sü=ÚnðÂt9x¦Ð>ÌíJòêKÌ1¾‹ž'r6iÁ’Xï¯Ü'l±­é Š”6ÑŸ±Ã=ïvª1]Øöüö•mñO ÀCüQC>õ¨iÔÜOLÈU½O³*TšÊcàTÙm(™ß‘À߀ ãº1ƒí˜U -nƒAŽYXÖþUÙ1ñ2¨×èèÒrÕrúE^Oy¤"â;ôDmÒà{O0Ò {2ÆR¿BÊi³¯N´íÌMÃýÙöéufû–L´”o¬ñ¹nõÛâ>å‘Gl»_ëí1¶#<ðW”\’ŽïJºÄaâ½æ„_«òF˜bÁ†ŠÆ~˜ ”M÷PsRƒî“ÛŬK5B|«BuG‰ÓŒ8öer(ßVΫ0¶\òFÌ+£S“hdr=ÊéÊVd^ RUl^›0ŒpEÉTXÛ½‡Ç›í—ë]Æ%’ÁÒ[õ”áT8ݯœû%²Uæ~ë½25ƒì£‡µK÷±;¯Fª<´FS$ÆÕû²9dwY€-u&‡i$ªò¼(O´fvdz¦‹ hº¾Ó¹ -s;¯“Ï‘èB]¢rø„£œ* 3Ü”™ Ãæ_&+Æ ÿªñ‹œÍ4‡ôÇ9¡Ï^WJ òÐlÆ.ņä»Y¹~^à³¾¨“òXq¹=ÂË;agâ £CgïÏÃÄ _%‰Æ˜¾JéD­í>‘ÚYÎ> Ø‹’¢á½næׯ ,G#¢Ä²›‹5À.êFŒE)a"™¬²ó p‘˜ßqÖo-Ù4Ï`7µò}§Óº¤WÌt_ceuÅÐóJŒ—h>ËÌÿÍú¸µ>Wþ¾ªÎ! ôA5j¾-±v`òç-Š?R ïtl.ƒe)öµ1=Ìï–kdSã¤iõä!5Ëþ_lŒœ<Ÿ ô—²ß-^ó­ª+éÍš‘ŽôùbøގçÞ©ÈÜ–™nWÿ%yïg§õ͚™ý÷3¡N>âVpä9ÿeŠ.•¯oÀÖÛv’£ˆóý“€hÓ; ÂÍ‹ ÀfW´\¹ü^yÑ_øÜ“±š¢'Úî1︖œø]c¨*Îç–" Õ磺û©@A.Øê’æXïècǽD&Ê<¼r”—kö¢ ¾Õ|lØ\†Ù·[ne_@úÄ_TkϺ©åhô´.5G´ ô­­†ï[î‚\?… nä½;göƒà¦ŸÌ’\]%»ÐAí¥ÝVY 3ã”è×?¼ ³ßMhš•L”I•*nÁFçÕqÄ™ñu §Šeñ÷~ÈÀA‚õE Q€P+%[sÖÐVÔÓ$—G… YRœ}aâÕÉèüHງ´×÷õÃPœ  Ð1t'yˆwBî`©Ö~›× -‹å$óe1cww9üBî6$Ê—TĶ¨³;ÀZæ (4¤9g»ð6Èv¡¡f«´§æ4J Ÿ¿¨]5(§™Wð ø„šs¹Ô˜tàaȳÅ<[EŽ¬p­üT˧Uü¥!µ”JtОÄò ©ýª?,–ÎL?!w ˜×ÃçsŸðg0Môh¾BÅ1êä³Í$sÉyѼÏåÐÛç8G•º“Š(ûûbÏí>z¨uݬ£ÿ­·¾‡Do­eHÐòñz)Ž^0 -9JžÒW –©fÅ'ÈUâ€ÀIõ•c‡‡eiÒä+¡5¦¤ÄH,ârø73g0Ÿ3{#Y°«ò_‰E^n2ÖÝ¥»„ þš®Á¿xdR$;‡ž9ÎÒÌ(ìúû·ŒfôñÂ÷n*±Ï×eHýc¸«AóìI…#†y`©û÷µ%,tÅ¢\µ ì)ÿ_ãV£+µó7À\öMþ7AÙ4R‚ã"З…@ðOÉ!1í$¥¯*ϤóÐŒsp%>*¿È–, QÄíŸÉ¥H;ºEÖ×Ùº~b@Ž—näÿ¦ sשÌXZðÛ–…ì+ ”¥ì, -ƒy¡¯#ö/–!A¨§L‹9¦Ú8üøž¢¼üT»a/±äe£T´,‰ut/u¼¶ÀÌî6ü~¯® áº1ûQAÔ‚\ñSøâWä{N?2¤±â/ïóG½?ÚÆE£æ« 8¼Ó®chbK3¿qí$£þ;+^ÇB(ƒñ¬¼¨xšà¸‰k‹:wMá…XÕ,³Óƒ~õ‚Rçÿþ[&¡òh¬8AµS #L'ɧÑT‘p¤®ú#{Ûª½¹>É;+â­¼’MÞuU§”‰[¡ä˜%Vû0ºEÄ—‚Ù²ˆC9Ø…ÀS©×Èu^sIÓ2\ØþÄï’¢ÒÊÍ. ->Íj5ÃxBÄ’Úª]˸fóŠâ*Á÷‡g|Üšx*c·fr+µB!ÔÃH9Râ€2Hî4^ÁEìdöãöÄ>>ž›J1öu£¬•z¹&¿ÐMLLœ÷i¸Úo™Jè$!¸ótîFoµÜ¸§_µO>yù,ÞªôÎ+ø±#îD‰ Ùqµø¾ýWô2³û  tE"Q×IŸÕ…À§Ø.e]‚Çùl wÙú,•Ó*¡"ŒœMf]­—ÍÆàiðñ/FSóê‚’.÷ìsãç_.nÁ1 àÕ/€ßâ8,‰}̨ó ÍÊçðW˜“‹‡ HFPšsµWkn3‰áa¸9`6I¼åŒùÿ݉Wi¦>|ä =ÔÙÙ1u¬Åª4ÿ¤ÅÔ…X·’HAH -ƒ$ü-UÒÛÊ3¤³# -endstream -endobj -2227 0 obj -<< -/Length1 1188 -/Length2 2740 +ÙÖoc;„j—¶†©~E£Ðª%9.ì¬>XJ‘ٚмD â"e?¤¸€›&¤oLH:]~•n¦X%„lþÞ¹”­ÏôdQ@ãa~M~EAËŸV.U‚›MØ€ª¾")éJŸ¢Y§4Ò›º‘º UËêC9¼¿ù2Îÿò–"\ªºÜ¡w¥Ô1·rlxؽØнtÿùŒaÂAÙ*Ó`¢†ËJpÁ¿Z+—¢TXêÀ{˜¸ñ«ÉÔ‰ß]_ŽöÝW_ÝÉù(~ÿF†àv¶œñáWfE@ñ»Qëc»¯¢PNêd¥‡qclµY¿„§†]¾Ošå¡æ®kÖÔÁ r‹ lÈú¥n€8e‹ &ãy~¦8qE”<£ÍDÏb´]„Þþ$Ÿ<%ª¼GÜ¡Ìd`øF òž‡]æÍ"Ãb¶ZÊ9 'Nnð à¯]Œ½w¯ù90dÿQY°NS+Ý{M´>d¯Õó¡*Ÿf$_ÇÔÏ{}äþÎE¾ÊN5´Úw0Æÿa™ëmLÿ@°o²Ÿ­½praè,¿Yuš½vT fEC/øî -^°Ì[XâO¨åß2þêÿ WEƃØKQÙB:”ŠÒAvÑøy™°ug¶¯ÙíÏ=q“p5­—/¸…1[úÕÜ‚!l2T˜'9HCª¾a—€®ĉ€üw™±Š¯‰Cî3^—Cyœ’ÔÙ}^µÿ–…À 2ŸA‚ú3>ÑG|ºÂà”ãH.øˆç]¶J%KV`à8ß?½†Ã(Ì0Í÷ÌÌg¨óôƒêt,×OT7 Üà$n"JaoÕ82*tß®ìû‹¼Àç±.ÙøÄüÌÕ„ñøeÙþÌ]'2õçb×v–ó€Úè«ëþÐi“ß)kŸ+ã÷šT½:CY#M5×7¯^ÓkjÈþrcÖTÑDd¤;y9±§/ªä£EêÍœŽ‡îåÐIœ7K| òŽ”Sö­ÁÝ¿[l6ƒáå†ê«÷¥ +K|ßOV`bàÂß'EÙp…Öœ§höº‚ÒõÛ(Ñuóèfè*Ž†—âéþ­ñ؈{£s’G´Â(\–h‡M9?Šª¬5 ›gÀ|Ü 9û@£?ä +â…;ßò^;Z]“‚Ý_·pßêXÛæÆêì¯ô5Mlî•pCŠbØÁMDþ ¢¼›†âC³õ ![]¡ÒzºŒ>¼¦¤§¨Òn®žÔ¥ç˜R>X5 ks‚•Æ¾l© +yHRëàv¸ú,#«á#ymí­B[Ü! ºY¸‡Ÿ|+á´ˆÉÖÌ@w˜1Èy0¼–+[åÇç3Þ€ º©;ð.f¥Ká›ÕÒ$¶w:¼É; ×0´6‘áI@œÛ3úñù€Ð]…3Š}A‡íEêŽ:l9éÙñ;fQIïÜé1Ë×YÚÿuK­YßEXÒÖúñfhÚêƼ½mxâÓg{šG9ÆzÏ‚ªÁi¨{š£$ŸÍ@í½”–vœÖß2BG™4õê˜#­=„Ì-*n=׬>¹‘paŸ¬\ßµ ¨wQS©RÔ9 ~Rã~{ãŠûµ“,VN…Q:”¾‰BÞÊ/¿o©,ûS[-gÍ<õïCI³ÝÞŒ½¥¯Û$d…oS¹×줇µÔQøfD®Vôc’ÙÏH9>S9¾ÛjÞCa7Y¶‘FÄC;™ +²gÓƒS°Þ“¼Ú$2hè¨HùýìÆ`N}HŽÏ+iI9âHŽu»õ—ÛIãô&Õ–ÁÙ‡‹J&HPn‡Í…èLTä|DŠ°îÌËÆ쎂j²º'ŒÏjÙÊ‘?k.&~§wÿÖ% ‡ÅRF¶‘ÕϺ·Á¸o>Üa­Cþ›ÁwÇÚÈÚD²e]ŽÉ¡>#_Œ(Àµ-ËŠ;]‡lƒ׈ ‚î©S \Z+Jøú CÛ<¹ø;ê–É’Z[ H£9•Z<(*@‰ðLî5À´GÄ:ŠÜx×›úã¨òÖ{y’PÊñ ‡0˜µ„DyQ®‡»µ/%¥¥ß¤]°ýù¹O 3ž¬5ø–Hwê"ÙåÔ%o14·ó¦,ýŸÝ¶Ç 4Ù¸–Õ?F)CžkŠ²·Rûõ­nò©¬kêô5‹o +ul¢—!ÖxJ†óæ†p$ ¬ÍÅKNgßò»ŠG–´¾Zs6^m„á¾ÞûYës8磩}‹2š² ŠH¡UëÕ +]Á‘ŸO¸í$ËÆõÞîH;ºCié_Œ'6Õ[$K¢\xË;í^zÙ8?‡!7•‚&þ¼†¦•–ØdD•¹§tÌÒrjpÞ—D+Ɇ‰Ð¾µ¦9ZôÉ€Åã“Ãó3HTÅ×IVxÞuJð®Ú'8Òü®/,ûHÕO@afP²C4ûCobž­*³¿l7Ñ#×±? f)ó'WgÿsJñ’«+) ë‚c&y½n2l°PÕŠ‡€¯ŒÅÊ¢‡ ©Èð– ðP€øz›`IMô ó”” N0LùÎV0ækëÈ7%¿ãHBh5>¼ZY@U4½òœh‰9jD‰ÐFÍÖ|HVÛx6ÑÕ‚çA ´{MùhÐŒB«ü—à€|Worr‘6•û˜ìúBÊî4·B°Rlk‡*Øô6@æÖ—‹1_r:~‚–¯¿î‰ ŸŸ—å«a-‡:5 ËÌÉ5hÖ» ;”ÄüãzíåŦêüì»á6`Õ69øŽÞɺ²Ý±§ }×LM¨a3ë1>pÑ°‘M°ªŽ´‚q#Ó=4ëùnU ß›a5|”F¦#<¤“œ,çÙÜ ‡;N`,mJV ÛClòª¡Ã¶Ö˯·fuLŽ#ûÖi¿>£4~HÂm\ì=c+µt$¬eÞ&«X\ Š>À" ‚#ˆªTräêéE«ïð’®² šx\¾ºÇ5/WÇÒänT×Òqéæ)AŽªP¶É¯mn®DöBÞ®âaÕ?,&ú íäB]ô­]PE³bÁÑ–áþµ-ÊÖ½w¾)¥×2­ K½(]äil°?~6‡Å÷F kóó¥å"µÈ}áÙu° ^«ÉŠ±DlÞ½g‰œG™—`§³LÐ&¤Rfê`…V'3‚EïMÊ)‰LÛ–;'#gtI¡W#Dú]˜ö'…rÜ£¬Ù‚”OCXmÿ¤ŠðÉ6 õq%QmË =Gdb¿ºI6«…!#[Ó»¨>ýB6V ¡Éh’ÍcÔU´øíIF&i ¬%fd@#íaù§QÕ#02o£2©Äv`윊Oí줎 r´3åó,iàÔðPfQfroü3é“NY³ ¨¢Ò«5À½¯%çÐ/¤F‚–§*=(îV$"Ûð3s¡¸8;M&”êúøFz-Yqɉ#ð²~`þ1ÒNõVüýcqúìôÜ™Éåæ¼o71aiAûâ<¦è„‹1ÎôèüÅHæU×qnÅ÷4ëMN…üØÝ ƒf>g¸Òœö3 ùŠ´6‘$Û¿7Ê!£y§{NŸp|jš.Ôß ?CÓVÓX Ëà¬=½OZ{ÛÚC³Š8L-¿Y+ ^©šUØÞluJaÅ[Ï*ßðöŸõ€[TN$‘èCÔ‚]™¦8…®«ûö‘ý} /¥‚Ú}ëºbµ¾ÌèOWÞµðìrrM-¼ò€å×ú÷¶f.+’ÐŽ‰ðæ+ÏæXÂUt×96{ðW +g»aØGrI4£×ÁÿHarCœ+2è)Ñ©kìÍ©HL‚õ EŸ6No­¸·8׊G²XÏ»˜ÕH0Áþ’wDÊÓ/¾ƒøÁòˆ+$jÞn…oš¹ÑEÉ+’:Æ„ øž³‹ÿ[ëÅG^žä—.•$Ü>°“¢y=U3Ef©Ì¯$|pZ„îm¹| +ñ—†ñté>=ùRŸpÒL»Ç‡Â”)ýÓPÈá­^»qí— ¢7k•ÀþšÀ:Ø \•¶‹ãyý˜ +ºFÈáèÓaî„…u¥õ"ôÈɀе¨sv_¬T*Ï;Ɉi”¼õJPØi±s4¤E›\''†>…œø*UP Z‹™(ÌÓ³ Þ# û·.Œ?^f£ª¯Êæ 04â(ì÷´GÌW`’3ýೌÔì©ø­½÷¦XŽjzñ•Ã€U*5<ÎPáãÐO! ß—ƒÃ—XòÞµ‡.² ^<Çç›ÝHãÚïôãv +‹AÇvŒ·˜ +ØÙ…'µí[~P÷< ²'žÏráìy ]9®SDRwÈ™þ†åK‹Û¬vÿ„àÚbÜ©Q]n‘’ w*ÉÌù‘gcšt²ú~ü¡¨c³Uì\"X”ƒ›OíWé6ÇÀEz•qµ æ“)O!Š¹ø§Ô^_Ëšpü¡¨c³Uì\"X”ƒ›OíWé6Ľ +Ö nÙ‰Ú*W^¤^Ú.Û´dôÅLï%1<׿/GPíô´dF@¬B‹ý20ˆÌGÈ"‘vp0ðBy¹BáÝ{ÅÑ µ+p“×…ô=~÷»©!XÃÍ6î‰ØŠjæ pAûÃœ\³È£fk¿‹¿½$Ó“w§èjĉm@ŠMÇØà& ŸiÒä(áDÇw%ü|Ï”‡-Ïdã¿A4 m•`ðƒA®g[ùéî—X¸è8]kRâËO1MC€n‚VýG®kÿdô28gû|}/U¼R¢ùæ¦0¬AéûêrÃÿ 4Ø3Îö"ºº:‡òP¯œûÁ‘Iª:¹'óÓ+BÃbÆ\ú÷-%éXýSmž¨gk\DDøÞy‚k?”Î* ò<‰àf=Ei$n“WÌ!ØC—‰ýRýjì%¶`UåmJ³ì´¾¸Ò”=3(ÿ¨1좎†,x91*¬ @®}É;øÔÏðl[”n!Ðõ§™ÍÿäÏàNIâ?øEÏR]qƒñ ¬„u¥ÂLaéRr'ÞŸùaG÷P%láßdAÛÏ#²ƒ<¡5’>¨&Ì…ÌÉ!Þ¤&’÷§¦éÓ¯ô'ÂüѳFÝv–C‘"Wü>¢B²QL$ÍtÒ¦n›‰ˆoPüÜ'½ÿê'6Ñè)ü¶»fÆoàSƒ„üÙ<5ìx¼²‡óÓâ¸ò™ìÏS•ƒÖÇ~΄q@¯0z6¿a‚ñWGMœî +åñ0Š#ºë$l:Åô+jé£CŸsäy$¢I¨ìàÞÍ­.l³2åÉ-5 aŽ +çT”Ÿb);4ÿ–ç$XìˆÀ/rÝTJ4A‚i¥Õ’”ÒvRû(ˆX`ð–V,Û‚aûÐZŒå.6û^Ïû©½ã€¶€äÈ7'ù=¢ÖQsG`¨’9\>KŽÇ¶TÓÖkIϹ·3Z¯±²Y§±À›Ç\\òæ€.¬F 5*„rî& ý»éÞ›tœ_RBÓË%<6¨LÈ7×k+ Þ _“ q)ýO:«E^·ØWrÖ$:‰©À°»˜êÓµ­«¸0Æ…¥ßª´RýëýEow„OÂt8öàÁU }bÕ$ܺvä›­ÙK–nñPfBËïš×QÝAØ@B{ÔE|0Š<¼ðüüTÈ°ù Ì@Ï–ï><:ìeªTŒxªxΕøn°à³Eˆ‹gÎH*DˆŠÐ8Øyn¨ÒE=e¨ÉU%oÒJ.B·fÁ_¹•‘)€pr™ +äóIwÛá·ý9ŸVº]·ü¶Ÿ1)ѵëX„Ãe>`91ðT-è9’—oÝWÑ|µ–Š8KR ýˆ•¸NM09RÃÖ:?DêwÏV5ñÞLÖæ-ù×ñê¦Å§k¦³Bì’¢ûÏŸÚמ*t(4‰Ó4"¹ü2¢×Ú£ÈÇ;xñ÷<«L -[™Ý²QöáÊ.l´ý”ðB@ŒÆ#Vˆn›'¯±«þbŠegvÓ ¢ªïð’®² šx\¾ºÇ5/WÇÒäeØ–A/‚r:[úê +#X—å}Ÿ¦øºoDØ£  ëþß1—¥yj#¹'"~Që÷ü%à´Ë’ àSÊ‹p«ræ¾gk7cŽtåKÜdÄë¤ý"Æ}|:÷§®âÞ]éMëÐ|õ +^{sd{ÿpa6L +ë +¯ÏŸ%«tÔYFÄÔ Ÿ£Uêbì÷m'Áe³»+3Ú?¤A¢9£W½JF½ +†ªÏì|-LÏóŸ¬~µkâyãP4Iê­Ç‘E?8$Öû¸_ܵtjÀÓÊš…¸‹·²é%Û3eÈ VàstÍ"Ê~¥æ‘ÿײé;¿CVÊ›¨£Ow²üŒ÷=‡}f PVÿÕÄäáóàn>mðÒóB@G¥ËÛˆŒ„­*Âí®©7%J'ÛÙ¢  úk‘—§Æ4æ­k)ÉD‘©Lú›C€Ó*yÆÌi€$^ÿÚö$µ&…Ä+KÔŒ#¥Ç™1ÂYÏL÷_ö~pÑB`õ*ç<ÎìÀyè7ÜöGEñ¹F•Ô]…‚®Ç©•«=pºÊ@ÀØf£ž}¸ÉRA€é â®ìd"cÙ” D5…àBŠªÁÊà#å¦ý•Œòü/®õ"IPa]¢N³±Œ‡¼!ØšŒv’eÀ< +<¾æ$ éFç«s^ àÄÞ{Âs’סâ€Í–êÍ‹¥=!\j‹×—·Y€çõ@V\5be|Éì²Ñ3i³lã¦"¥Æ­øÆû+Ù4x5ßåãvÚy¢>‡v9-m€ÿ\ž®.2óÅ.P‚¢3·Ù ;ÃKq}}ÚÈ̪‰YKýziÃÞ3—åîíL@©±ÚßÆŨù…~NÄè9æirú|6Tœ—m¯9ŽßªÍu¥$) ‚Z­ŠìÀ.y3áœÚ–Z‰y¹¤ÕèÒtNþQv:k½A§­ƒ²'Íõa©AExGÅì À"À‰«<²¥Â}e$ÍÚ•óy)îU°2¥ûþY‘ºÂUjéš¹¬ºù¼ãØ,Äwr{·‚ ^üI”Ã:‡ÃÞY7ÔTY7Qí×òmÏæ¦Ëeµ5»FÌò8ó”ø½r'›Ã°ÒŽúO2è·”åÂ~Ū„Pr­ñ¸¹üæ‚B^Ì@Ô!;ñéãó´–¬w`C‘i$qE4F?%×Ë(áÃåhÐèõÝNTÌu²Ó“ºy¢)†,ÂôUð0»Üœè“rˆÛÄùfaÈÓý½l_ƒá'»à;ꆴSVøÞë{é]»øýp‰åÌÔ6‹ïUj(mEåÒ=×ûfG&!:×»$ çß½ü¸¢Ñ1AãXØõ>4× õÃ$Þµb?ÇÙîop†…ƒ)ªÓDZÛÝ­áŒÉJC¼´â…ç̨ê+½2D@ŸÉS~Z^è`Ú7G©ò}ÿ@ï6@É MmS_MÀªªŠrhçIÛ¬±HAo`œ'©¨C†%tÔ‹‹dKö!á·wWé.¢õðz(^&¤RôÒtm–~-M˜._§ õ~Çß|óž¨%æÝŽ§¤•ÜLã‡2`è(x—øÆR†Ù?À±¯‘ð$˜)7¦°¥£› +4«+mÖ1›Ø‹§(áDZ^x8p +š¡"‘UgŸa†o6Yf¯, µóôi¾8?G»Ž$“¤â¦`V»øÄ{×1ñrË/ª ý­@½:çÖG°ÿÉ+d{xš›Ô{] ÌÏÏòÃNpYÙk.vTe%¬Ÿ~æêt÷ÀyÇ·Vž`…ßPŽ½ bƒ‰EmNÎmpV­ÄŽã–u×Y³}n¦~S±ÔÝŽ™Î¶'vø¤\)@~½ð09ñ²ÏF&` ŸRD4½Ü»ÓTééÚÃG3 Þi}ó'’V!¬N値–±/Û5Ma£IðSb¶Žq(´ 9Uò•K!rdz~vˆ"?ÊÂä1î?a†øW²û/@;êÇ ôrËO>‹…$…ùt¢Ò[àZ燸*¦±6U•Ë¦Û‘‚$¯£ƒ{/ýÃqAª7Rï£xSq5ÛºDÌaö®EÜÇç…ë 6 œÐÙ¸D&ðêS^+õ +¢ü}KM*×¢úŒGt81Uÿ\ rö@K¦›ÊÏŠ³öuø%Oέ_˜3{sRu7¸úXøo2ŸM´4ÌÓE,Tþ")yï‡@ù"ñ ª:ÓÍs²²¨ +·¹$ñ¾«Ÿ`Ú,@ï–=R%­ìL1-dz\òÙðô’£ÈÛ#cÇ9¬ê´dït§Xyôo¦CDž>™?ßÜ"ÌÀ2ã‹^Äuí*a”š_/¡^°k¬)Wq}®ó˜mEZÛ¼_Ï÷¹XžñŽJ5ðG0 `tw½°þÅJৗ%ÚcUç×/ê¥Èp„gp‹ò£`£:G>mcÓW{,™¥ô] ûqý-´ßÇ—P=ö|´sw©ž»Ï;ï¥BM$:¨¼~‡gs#c%^p@õÞyÿ˪fè’²ä:¥¶É."#©ÖSÒ»iÓ_?Ÿu[ M]¨?áy˜!’AÀÚgå¾À@©X±K³×‚¢'›‘ŽŠè²ï.9R’¶^ue¬Zð"–th%Ë'¾(^g2–¾W|à làöPòXŸæ‹)š>—£ð³Z?èß[$Ð@ÖUG ìqÔžÌZšñ\6L ËˆÄïP›eðú‚ZáÕ^‘§ŸžýÎÃ/‰rßF _c™ +)‡~ÑÛ0zqÀaHÄøÄ=`àÖzC#ñ}‘xÓ$‚¹à“ò<ªÈ÷^¦q%Ç*N2FÑïô.#{B4£×ÁÿHarCœ+2è)Ñ©i«*v,¬ÞãƒaMVM>±¤ÝLF¹ÛÈŒÛkÅþìYgrô¾ºv‘H ~5 ˆI©n÷‡ISÂ$Ÿç a2E5‹ÓŠRóI#çÆ>œ¹â„Ús<§²‰¦h_8½Ô]7ÛâsëÔÕ%pÐÐ5‰[´©‘ÄV4´wÓ»72å¡+ò¡¤äÕþçL„æ=è†ÁÖýÖÓðÀçîRìZŠ +¹¥tš[ ž eüM£V©Z;Î;Ha)X³‰öC©âû&+ýw>†rWƒ#!CøIq;ô¨“2!ù]蔨ðû9 FI‡+”[yÕç1ê7%r²A®Å]4F L° y!vô‡ûö%ñ{Ú$¬S¹PÞØèò(Ü)Ì–•§p èeHn^á1™hü G™Oo; û‰ª£q{óÍRçü†Èþ>Ó?Dµ£…7ê•Fê<Ǽ”i:F‡,0@ÈàSÃñî °ª«Dh“œƒ«tD~š†…؇Š$émnX?©¹¢É}"+OÕé¸Kš&F¢h•qˆ8§|æ\¹¢ÿ´Ï@º¿šû©* Q® †”àƒáÓ}›ÑG¢×ÀÖ2xþŠL;q`Ð(¦¯ÅZdž»TQ¿¶ÅìÚ׺¨ÎÍÕƒ°($Þäí¯— +³™!e}²µQÿD*H½IüpÊöÿ¾· +èx¹ýFt;6à³Â¾v&R =[ZkoÏ¿þóN*4q&êÍ7rVГy8Uó6IRÆ“ò´•³Þ|p…K;ÿ:"Ù6Ö3Pôyló[ã‹û6ËòæÐ×0CDáu1ÐÆ" öKaS±«FÚ,AµÒXk'"ÈprüsG€ËÅA=dÖQs¬ríOô°åºÛnV÷´³¹Q;¼Ž:SG²šY²§x€ñíÜ7?S™ƒÎÂ$‡K·Ê˜¨pÁ=¼å Âe‘±ù¡+sÖmOcÌSW÷UÄŽIôñJ†E¦Cl@±¬ ųv‘}ÖªÄùXÛa´­L}¬êÏ%½ùÛÄ+è–Ð +5ZŒéÍ7ãшa⸅jÛÛõRTA[¼wdÈkÇ3xLjW|4#øi€GR²šaDFÎâ%Ñ~ +ÖãÖªXîá7e¬Å¸—m^=º£cýMø"dŸ²× Jü `ÕD&•K_:¥£çý þ .;.S%3e"PR쬅F9pþѬê[t³ÁFƺáeXYûX“KýÕîmïæn¤ ãš9·üm b+’^ÉN‘I7j}>%,&÷ŽCö;gˆ4åÔ~â*6vUû'ç¶ç'öþ°ñ¾&ú&Â÷æMY@Ôìn(Ž›¹ždYX麾vãÀ2}!ýèˆR„ÁÆH‘â^˜ƒ‘ÄT>½zÎa «Iš֢ýÉʽn! ¡ÝU4¢Lš ñ¡¡ÀÂ7 –®;%®ßr´’úˆîŽ‡à‹Ä8–œ.¸ÄdnHOzS¢¾v¸ï$øŒiB7Ê·w qp´ëº¥I¼ƒ*SEÃùæy§ Í[ ©>¾‡ã÷$Ø”¸H»ex(ßB]ÎrfÖ©Àñn«J2AòŒôYn¤qtä +¥t‹ÕÝmçN8®t ^gõt„¦4úü¹©¦ŠrÏÂ#ϪNZJþÖ@·À0Œä†=‰ÜbüXoàF±•–\󃧖,0VâüÊ9­x¶ ðíS¸ã壘^›)Ñwß ˜q8`ZdžK¤Ž{¹Ðð‹úXª7é }J=tëÛä(“Á/“}çÍׯ®gtô;‰­ÇäLìF³Ö~Ìj*ü•¢ŸÏcÛÇ6×8ñši+Q.afo¾Þ¢P(dÂÎ!m"Ì‚¶dºiIkÒ÷P!ÊCÓŸ¶3I"±ÍWY¿ã6›A‘?ýŸx›ž'Ä;qQËî_²QDj.‡ÔåÁ³på¶ÚÒÃ…ý´Þ‹9wðy*K©!Ï8K’Bk¿;Õdè!%"òi«gOæà‡þ ÿ}‰:t™MWgd©Y.y¿—øÌÎ68¡ò2ºîpŒž&––uõó¹Ã:±‹ïzM-­©Â–u¿Q?ÌÔWQ ’ý§¡yf±£ƒ¼Âý¼÷þ°O‹‡¤ŒcËü/B3Kþs5ÉÛ4õ>{ Â8öè"Ýyj‡¢ø©ï' øŸBØêŒIÐ\Û<ÄÌv§)†WUVÔOª'A¥ü@Aßzo †c poÕH³Èpúéy&ǰ󷣞ö0'Üs9Ð÷úét”]÷ƒ¦wÛ‹Þ°V½Víõú›MË÷YÉÿ¨è“ÂŽõIÊ<|nÈLÀNÙüF¦€ÞjÓлõïdJ:*4ÔOýp#Ö¬¦#ªÃYife†÷Ì’õQTi.ÔS¸1F<òÂQDʢŪàÿ©í$FÿCœ Öøú1çb´Ü·‡øËùC¹È~®€T>Òq˼'JôÈ°³­¿®’´)°n«Ð‡ÊhlÓ­ÕëªDòZü·Œ5ž»&ë2BIà«´Ô½¿Vbü¾i†@¯kABh!9ø_ÔOE §Á¢LïÈn ˆNà0M/ˆ×ºæ- Eø&µ'¼G!7çJ©ÀB¶šº{ךƒ=yü=Ð_×´xʹv:íˆÿ"Ž¥0€ÆH(MUËx0uϲÀ‹OЊSë†yCv#Ÿå@Ô¶1¢\I\ÞýK +Å:±7‘/³ˆùY&›±´·ˆ¾‚®½Ás0}wß?fPX·½Å„¦f_ŒYÁ¢ñOP­Ìn!I£ùåË¢<˧Äl~0ˆôÑÉ–¨2ë®`ªCfÂà ã³©0<É'f>F–2'.ÚõâaÇ}A0¾?Ðb€íë©vñÝP!^ pgñæ••r`2¤ØWÈÜÍ#l²†À£á \é°ÛP{Tغ,ß…žÜSÐ邘M’¥Ùf‘_Š©E§0Z4ïDÈ4¦zá1VŽX±àÌ‚¢B7¢õZÖw·6a„ùÓø\:CAvdS§ÅÐ7[Ñ£±H¯ +éR)vX*ê5¼@õ£î!¼–í€Hu¿ éI}Ï„Sç`¥gM0U#Ó>AUPƒ$wŽZJÝX¼Ç,¯®Ñë‡RrEÜñ­¯ ÷ÌÔÙ«P nî7ýûÈdùj@¢Ä³f÷§óaùGƒà^Ä‘ºä»¸„JPÄLš Ô6&Pn¤'‡uýÕâ– /Ã@õþVbð-`•Ñ+©5*Z–“³9TÃŒF _iži°¦É°…DòÓ¦ÚßÓ…ð°Ùº»À¿a[›«é}'•GŒáh;C …RÙNn5~„:¤Ò§ÜD«°™üäÚe¤ÊåÏ8ßLÐKK= _r Û«Þ÷—Q먖‘’÷Þ“31pWÿ1VÔ&êŠÜˆÅ=*t·ÖÄ“»RJÎ…AõUõòdK5^žqʯZÕYŽ‹·rïsäM¼}H<•Þ§e_‡…•tk$°ºEØá9ÇAFøaˆÑ†%z:jooÚ¯ NJvÔè½!¥Èá(“Yçò08ÖÞRm¾((?اG’n|õ¸‘Š,°$?gn âÇf…OèT€˜Þ+ÔÄ‘åœ3ä>;Âfö;*¢M‰½Ö"Mú| G©lÄC~_ÉŒD1ÎøkØ<\Sùéç{i‚çfùž…ÊT.Ëÿ²=zuÞ[l‹P d}îÃX‹t¡>/k«« +0sàÑu'âŒ#Çuȹ9‚§í¢„à·£köŸªÉênó92ײ( Q¸Ê­ƒ^®¡+0íߨ2•[è÷TÕ$RH¬ ƒ_I†l™,´DðÇÒw”ú¸,çäãçgÊkü +4¢Ñ—(ú³‚?¦«¶ÛcÙ`‰¾ŽÝrE®d­Ç!êãC ã°™³\AíÐWdö5Ù7=óZOI\ùÄÝyô‡Š:PŽRfã…Ù/ë\½ãñÉû8AëÏäw¬Î/à®ùlrsfçfƒ¸Sñ9éˆ<˜+K7\Þ<¦Ýq@Nm”QezÀr< íp%¤èàšÞýGÒgr,hó$_àq"3›ë&É\ž ™dh7Ýe²ìæÁbU\:‘ô÷æA=pÿŠ'*qq}¬ïzãùæI/à˜ˆ¨1§¤Ëa0C"\håÿ,ÔxÅ>q ÇäÇvõÙÎU'½}¯ÞŠÊ„MÓJ.ÁõÅÇrÿê§ÂÆþÕ„ÌöásÕo=<ÎÁÎøÀ e8ÎÒz¯á—©–|‰‚lËçq¥4¿Ö.}n<ˆjoQ¿²‡‘»õŸ°½ìc~—-:ÛÅ–Z¼g¸¥5U¬rHßýª·ÿ±wu#§ª&è¨ìSM¿ÔXòúƾLEÿ7—5KëZ-gÍ<õïCI³ÝÞŒ½¥¯Û$d…bË3”Áh‡ÞâËÃxìJÇk©ÛÚË¢xýéï°2u½JL …³¡m¨%ØõýI‹P H°3Éìh:°Í–Äg'Ã`ñQ*ЦÇgž¨_ïw$wÆ’Ï%Éšù5-0I†æt)‰ 5Þ¥_)(A1…'MÚ¤À@tC¿º~s¸T!u®qx'(¾Þ"·q\t­®Ñ ÂEÓ^¡™#ú»KcF`ù‰ü.o¶ŠC¡15-u#Ó0Ž%•âqŽ$/Ö}iå·Ñ‰ ”ÎβæåRÑö19=£ÌD¥ÁvÝõïÁäÓ¤ÖO‚Î\žu}'èvrº^/EGœ†ÿà~<Ø©BÖægvýí µÔB3ªÜóðÙËÙhh²?(ˆ¶X½òžøeØÏÿöcô&HUF×"»£&ÔãñÜ\Ý˱%JÒE9*ˆ#ÎðI©C““¬K©ò´¸éYè‡|‘²¢GWJÄ0Æ*ÝlÆû} .wù„ˆ¸åãVCA,zƒaÔ*Üð¥ùO$Œ( »Lƒ†¬èB?¶,öý’“‚4LÛ¥ _¯E±¡Ú®”ÍÀ=9ó‹5ç±àR'»öˆÃEøö·c~Ì h™¢þ¹ð4+Ä•(@ó[b¦.´££ZáølâðÀ R/l³¿µ  Ë¢†6°œÊq=_è_gƒù?š"(cí žd襒ï—Ö ßüÒÏðÛ.`¹; q÷?˵ ‘"áaù0UÃC,9ç²È„9¼OT±mh ¼†ECl?¯ª!Èo·´¹Öþ1˜|žÞÖ°ãEóiL”ÔKàZÀ µFd”:Æž#UÑœÍâ'qöÛµ7+¿È­Y³öcJ³,$U?zð×hÐãWÊ:GWB##ŒVS£¿Ÿ(ʽÎhÐ&ÁéŒ0$ !Jõ¢ð6ô%pã_8|@e.´ä7c«qìojæñpN*Øm¦s¿U~˜3Kººûý!(]=Ëú²ô¾f|jOÊÖiC𰽘¡¨P(¥Ù†¢µs˜TòFYCÒ~Ò%óa¼†)ù~£ä¨d¬šþðÜØTʔѕ<ûl,k–éíuoÊ€Q?¾bÓñ“uÖå¼%!¼°Ãž6ã3m®¡¢)ÞØÚ]=$‰£¶˜fãnC:)½èn5jˆ ¼à ioŽ{PÏZ¦àÀm^L:Ö ™WdõëƒÝôjt¬” `áÅH-.!)\†Ë`Äyu Å-‚t +Ó5žˆFcðBçj¥˜º£+¬2¾¥®ç°«É;AWÞ¸þ¦ ]Fô?½X¸Œ€Ë U!¤Ž#Î:‰¨íÕæF»ì·vR„ÜŽæ7Bܱ²å(Bø áì/4Â’=* U ”ø€q[‰³+q.*5rÝ E솬X Õ?§²C³Â‘½ÞC-¾Eœ<ÇšáÒ¯­¿]bZú)’cªpÈ9nŸ{:ÌdÛ®óýÍ:€À«”·¦sT¹VÃìº|øn +Å’3{Lé »`œe2ùÓ®sf;¤ &àå~—A!®„ûwCµ÷>ÝÏÞ(êaÇImؤð«ì§-m®, +eäæJÚ±ÝÕ7»UqlÝç>D¬qN‡i¥Ð\ù×Õf¤<`¾U"ý2.R>Q;Ù¨ÄEù8¸Ôa;ÐÛ*påð~À]mƒ÷¯ŽdIî…É•l¶ÎÉzT»ø#)DX-ߟ`$.înÀ­Ý‘'HøîT‹K‡±ê{`æC‡¢¶16d£\âìOÇ&ý“ì/®ÀúB$±´ªz^£òâÅz Ãk¶f}¯½ ì_(̘:®3X>«ž²MkG¥¡æP4ÐÌŸŸfÞ+ä\-oÒ†DEmc#(lë=ÌÓqKð0˜®Ó†‘Æ—™}vš2³Ès”}Ùß<ÓdŸ÷È-ö¢¡Ô\ƒ\°–ñ÷§¡^¬¡ôsÚ¡7õLÛ ØÐp›¹^Ù1jŽ äó%!¶_ƒñR‡#O ÃûÛºÐ=ªNé]5<ÞÒ^®÷1b¦—iŽÝ,6~Õr¼‘ }ȧÈ÷êèP*[ mÒ¡ú4XÓŒÒèQ”vƒàWaó­§/Ÿ%}‰ãÖ¥™Ì˜1l”pø‘›ïBl1.Œ Ë? ¦”ÚGˆ›áW ¢«ŒÎ…R)“aÖ*+ÂÀÙ›hh¹yiïƒÂí=bˆ¨ÓÕq æ×KjO¿¯rEØðuít—úMÇr {¼8Èz¬õmX4†èÝWã^HåŽÜ¨A]p½TcÜdеÚS$“•”´¼}öë:Õ¡þEzŒdÄŸS")´›ŒùãáVêÑ9?¡Sûñùãã.WÏmÝ?«!=Ú +Ssš¢&åJÇ.:äCeÓ1œ!4´c"÷ëp+âÊ …^SœÚ¹ŸH—q9‚˜jÇ<Þ³´v`I`ÂËrCU‰X½lGÍ)¾Åª3«…(Úm8µ‘ L@nÙ2„¦1é +ti}3vyU6óåeJûf2ÓÏÕ*ª4œÉªÊ7Ö—Ìþ¥ö.}Ý“wHt–·QéhVc ªÛ®ÿ04‚‡ÉÃú‹4§fTÝzÖ’p*ñì›^VÛ=ºœC«T +h/õo£í-ÇËw‚:$iݶTÝ;Ë6Åû·ÙôŠjŒ3Í€2lm°óݵí•nQ±_sKž\Iß(eó.ð‡~µw£\äðEå/ì`—Ä?5À5ÙÖ¹âVqù¬Yš@Ã{$9Àoۉ㘯câHàzÖÑ *[ˆ$£Ó\×T ÇaؼHÌr{qX“B7Š6n³eJÙ[ç^9¥y½æázÔUÓ´æJ (Jèù¸:±zy^+àÛBäfSDžésæû¦Ðü0R&Š”÷‡ãç&â=^¬”ós\ß8­Ñú@áÇ‹ŒvW7¢ Dòå ˜/OØ:h. t6@ˆ(»BP{RKb/¬‡±œèª'_†ŸÂG•…A6°íPêŽË¿|‘×KýÌKl¸ÃtàÄ[å¯ê +EbRÑDfHÁ'¿^Lu×¹BoAš1½ž>î ôµ(Ng GM¢ããúI¼ó,˜€¾e Ýä‘”uø‰(Ô¹¶‚þœÜéí¤´6‰ÑgÔÔ5Ì:(ÜѲù$ø˜<œûBýmK×wÈýYºLþÃ3¼ýZ ;«Dº†þ¿Ì¿ã{žqÒ§±øþ »0'þ«à@À—$aõ“Ô}à¢<<„Np÷&|Õ F¬Aɤ…àMÊsOÂ>O@ѯЭéoþw} iÒ~7¦ë{´QFŽú ý}ÌNX0J¬-šÒŠgçŠGÏ‹tEÛhÎ2ªÍo÷ìEò–¯}ÍRØÛù§RêˆU ;Røˆ]ƒrÁÁ¹hy¬g’èBj3Íx¨D [`Nlf£j]½ßn/jTLÏóoŠè™r*ÓÖ&GLjÊ~ÞCfÌtí ;è¢2ÜþK pª:BìÁb=25<×hJ5"mI)î|Æ>¨ùxc艱?²»Z¿…±7w(XѨšÒM­6{!Ô·KI“"V6™Ô' œo‘ƒÄã~™U¨; B*|ji’¥ +N$MäO2¸EJë­S©3éâìjxT5‚®ØúÞ´¹Õò/¯BB{¦ò•vë]XAÄ1^, eòôb‘AÏuoJHLSS¤$z $Ñ Â{ñðjæÿEªBùäR"‡+kzþe á9IÍq^HØæ‘Y6KirÔ’2Ò)£¤:˜v +WŸˆõžˆ¯éæimä㜠Z‘šLlÑ1&¿ÿ¶„C€ætD¤é9_Èÿó¡BqM`Ä»)èìɘñú2á")œ\è¯GÓ¢¹Vúì÷ Ø'7@VˆÏ’I6­hVý€†Ð[M\ˆØ‹ì>Ñ0ST½0—ùý”[oyÀaß¿½¾/¸¿K¦:6?XH±äïÒš½Ê7t„¾(D¸`½’œÌñ ¡*½õÆ<[‚;¿ÀÀ;zB„:̦,u5 œTm’&§òæ)²`cƒ’|ÂsE)é$Ïe"oôÃO &¯îíú¬^×3»Û‰RäMgwÙå|Z¸ëÉsÍ9Få SWå|ØðP÷`“ôJIsÝౕiÈ?ù9r=k–¾×áP°«Á”WÌl `6±RnyîžÎqÙNèpLí•4to³3 å8þç³J §W‹¡ÝÒ¸nHeñã+#VºCʘ Íèy‹ÏKÜmflÑóàÄ^rYóL.­àÁ+2O²K[¼wdÈkÇ3xLjW|4#ýkaì€|¾‰ˆ2<ÇN×ÍbÍ°ã¶BùŒÈ@lXÿ XȆÒ€P=zyhsÃ7TÊ&g¹‡)ï²€¨]í/l±æs'½“û = ø±ûàðÏ„lå‡ßüsFÓõqŸyAì‘#_Ÿ’v™*0Ö1²- t·l8Eî¡kR‚§7[é—ý êŸ<îò iò÷ÓRcX<žòï*  ±É>AÛ{ý´Ü\ã5ZP­Ë½DJÝ<ˆ3–àÍ/¼ºÇ‡ ‰É:uo‘·WªíŒç–$i@F>-CX­³ïeHÎÄ4‘ Šê¢ôˆ8uêD.8võ'nÃeòQ@cøbW+¬š^ÑéÐ1zƒ|<{b¿/Ëk(Å'Ã/1ûG“¤ˆ„QMgíD±L!Šg+Oà]å}ä`Ÿï¡u_FçHÍR¯Ÿ~TojþbÜ•H¡‚z²atñÄÁ¸f¦G±¥ì›³)äÐtMÂóïhÞYBS=°í¦Fì ¯mÍ•ç8¥õêZx-x ã{g=>ÉÖÁŽòýµìPAÑôô2PvDK£˜7²@]Œ¾¬ÅÜãq΂Ze².h/xS‹HÝIW~òàèÙÀö@%ÇnP¹5À&ñvT÷1;­ÉÔ‰´Ϩj`M,*Ó©IÔ”ÂÝÄRj%qTù* +F¼½uv2 Í…°wÚ3p\àxýå6ú2i#;~sTw½:*LÓ¯D3Ÿx8tÔ™ýaMèZS·:©nH—¼œ íNy +yØ­g#J,“`yIg4éaÐëßdÛåb _ª—‘Hfó×Ù9À±¯É(iºÁêH’éâÍjºCÉ“$!Öî(ÑPƒäÜSî9f’þW ÐÉ»[3*Øø?o:eEgÓã|f$âÒþ´„¢ù*iÊÓÿ©®U0C׃Ñ>_íy5ÞÉd®ÛG!ñwü‰¥«¤¼il™*ŽÐà^D¿9O~ë‹8m8 ¢óWæλ£Õ6jªÂ§p¢¿P΂1hpBfá5jdô¼%c ¸0z–î2aS"G¨W !ÔÕ â©þøTÍb8j 2>n†¾:W)ŠLüŸhÁШ6¥J£Æôä©®à±|i R©ÆA5ÿ=OgêÍCl’hÕð¦q›fª>L÷RÑ ˜ƒ¨ä€g¥ý”»“E±U×Z9 •ü{'vÜ÷¡¹::£®“Q&ç.‡k;7uøä¼@ºx¼Á 4R¬£ubcþð8”Ó;0Pö]¦4oiŸ×Q]vø‹>ÇÔ‹Ûd/ â 3Ñ-W7 ›´½ñ] «C‘ý}ügÎ},¬ášt›/‡·*È}Fª@¢7 )јö•ø3Ï,ýÊšþáÒtJRå“W nÈä›ÔÙy1OÊÑ{,±Á$C`l—±z„F¦ÖÀ ÆìÀèóPÃOE~®§þNc{EžÐEÃýä‡Þ$CøïÒ2Njü¿¦6^RŠbÃÆ »è…O‡I`‚Hédûû™ÌCìþá|Y+O’%­Ð¾vÊ%­îÇS{}LháÓ0û+ýÙ¥r¬g·§¨KUåèiB«]´v× ¼Õ²[šJ'Ÿ»Ë¤]K?€Ù=‚ ÉNˆuØØæ÷n‚«te|‰êÛšü2%A€ta_t*é?µd@ß™æÜIäÐ’†á…ÖŽ¼2Ô“Üqy>„»óâ¾âë;i íõMV!¿Cw9~‰W^o #Ãkp<Ñö+ÞjÎöҕݑϬmUFýBž† ÆÎ@“âñ¨Åm®…œžþEÿˆKV@vGã~>²á©›Á1›w/{ØÉS*g¡òemsÅ'[É7TÈ¡ OPÆŸtד††4kË“»jMý‹ýP¡“…B U\;Óm¾v/2‘WJy&$IÍwE'd”…UÂG¡Þ¥âÛ“ÃjQ:x£ÚoS¨ 2¦æ0. ¨‹‘êÂÑø–»ÒQ è‚ëíièˆû¡â+m[RIûzŠ¶†[Ž?ÝØÏC¢±\o·Û–*¼­³0\>xƒ1-²!Ìu3︒c»‡¨sоäÀSƒ`#+±9ƒL”ˆ²xŠüPN‰=íÎGR%ˆ=· Q‡ šÝ¼SWî_XUV¶¶”¢‹µ"K>1Sdydæ¦ÝÜ:‹øFô̈KfyÚêX}6¢…B´M=:$ô¿ôü |~ìá,ˆÜìèŠ3¸×]º³gGb^!oŠö,†€v]K|ÓtäÉf®KïMsÊMßãkêlm¦ž+}w~ŠÛ’†ûî°ò8÷£„ €hÇáúªŒ»›]Ã.ÚÌ^4à˜êÉÚ!˜í÷Ò®›¾ØF§€G’×ð|âÖ»8Jìòëáµ8áéclUéÅÕ6J¥–ôï/ÀW\ÎM>ëô»Á‹èQsµVï@d Ô]:JÍ4ƒtQ·mçæš3#F#| J¡®˜ÿ‰~~&&;Ie-“]÷‘³š0f»*§$R}â}ç¬g=ª,âÙÃÖzC–¯#«aß1ºhoÜ"®êÓXgÌf\÷¢çõ ¡oìkÅCÉ/‚X…@œ€YWŽ}Jc¯§[‘ZN\]íõ³ÍŠÚ|ëÓS¼fÞ±­ç‰õ¡-IÁ¾%4ËdH&s†b(0ØïnÈãFÕïÕ Ûˆê<1¯J˜N×KÁIÕÏe9W¸ì²–&‰HðžP°vtâ}þ€­ŠTÓ£Ü'Š‡aUÆu½Î+ª*7RI‚EÝ"ÕÆ]µ}šÞM,"gß($ ‹ãKWJ$[¹. >èø­0-ÞËóÀQD}è5’‡A»œjNsbéª j)ì@PÑ™»ÈIªÅ’Þ-m´Épök`àkƒØ© 2¥j_mª¤%¿¯Î<Û©ˆn¡ïôšyîä•¢Áq…ØâìHÿ±ÆíR0<1Û¡-ïš/§Ê鬙Ñ";€XP¢3bØc ¿[WSé<4S78@^›z¶ay}7»Ñ!HÝ’ ½Žzd8NÞg,Ÿ`Sn¢Þ+`Sé%φx\1’8å‰rVï“å•b¥ÅŒ8A#¬Éß>0Hì£ò&©r‰ˆ¾‡ã WrØ1w-#±\¹ÐKÇ£$‡9ßáè“,ßiŽ¬L«zAŒï f§Ÿ3‘—+ûë£g‘à'¤žÁ¬*”#îoí0¸pµ6p_ M_ÆšžË‰#…=´Iÿ+³jT$.(¿”óæ²Zø`‚KòAM Áp‡ˆ§x|Ñ +»š•?s9ЄÜ8owÊí>Í<7ààò⇚ê÷ú|¢#dÃIY¶ÁDá²áAÀ1çàãsFF|ý|¼EœvîðKº?t”ã#bù·Š¶}ª’% Y 3ô#»…¥´:ÐqÒl9Wo«‰ jýäûˆøÇi¼àòs‹Làý©àâ­zx¡bxEr·“™Ø(¡å6T¤À-ÉØ¿ ö23ÝÚÁ¤€Úô¹„uÚB¦Ùyƒ dª @ˆû,Š¡ø¼ Å.I¯ +ÍèR‘ÌŸ–Ô£ÁÇo_e‡œ¥´úõcç¼ï”4JÎjÇ +†PЗ¬ÙG—h±† ëû¶ûe§T£lù½þ-ÄpͽƲ v,…{gï]ÌÕ¥\üÔ¯UÝ9N&ˆôx_²ºÞ‹¸¶ÒK§m‚p=‚ŸØÁHÄ‘wþþ˜)],í´T÷—Œvê@?àó#ýþù››‹¬"F–d‚ ){`ÊÆ!}ǡЦ2š‰¼…–uåF‚ $9 --G,AO¬7³}#ÕÖ_ê,‹màËG”­¢îÎÊ&}Ð õë°Àz}¸%6šk¯tºÄG @Ñ%¸#º† +¯Ä™Äp¡ÂÀœ:ƈk d–w¢p_H?ÿ4lþ Wz””æÿŠêÂ׆ËNu¹Îzk:ói8ÀŒ’™üR™àûï=Ð#*õ@Å>x™öÖÑ›sa =÷2h…¨í«%êÛZEµCù—¤(«ÜX[åÔr—ÖòÅs,3æýÉgçUWiØ´È8HÑvŽb+YÉý‹ñ7§©¢TIUº³ô Þ#Ø£¢€P?E`ýD&&6×({ *ÐëFÎ\ÍŠà_±¼FŒfxPS¯ ‚ôn²—ä– +¹/Ä)³Àà[‡…\¨êˆàt4 ëlbé餄¡Ÿçbï‰Aþ‡©ø×ÀÔXÑC£F€\d#›ºDèoº’³q‰Q¸l~vóøtF½]ö ïÞÄÚ r–l˜Úe6I‚*uäs‹.+àÑ«¸àUÒzù·ZÖ †e¦¿¸u|Mëñg259#\°Ó§”{´ºË¿a19ž:žÓCK\Jxß„ ¥‰™•Ò°†Tökû¤ï.b´=ã¾%0ûƒ`bóJœðí ƒ"Sý9Qšy;a^BœàÁ…á{,w</Žë~¢Oåx3†,ÿÞ%í„øµôrüoX6~=Ë„ºº@x!ñ#Àd^æŸ -õGFDeºØ¼&a¬n97‚}?`šÄ©¿wWâ÷«SS___°6š°¤~cè¹·°€ê·Ø²^²nèŒï[{ 3±<*v—Ô5kç”T+'ׂipщ>zÞ[Ì&ÆX;$- x}:lŠ0eÐ<({.Dö‹ªSßãÿWöl ‘eqRžrëò⚧;#/U÷Ãì[K¦åíìÕ¾È9/ï—ìz¦H¢ éÂ-$VñÊLÚF¤ÜEãƒAÀê{ü†›æ­›Àlµ X­==…¥Ñ ¥– X2¦I{“̺è3Mª„cú0÷Ç–â +¿SúGß. FËþRãš””‘eNè Q’dWRË-ßFIÈU;ÿ{)¹8™=3}ºnyu~‰ôCT·¦·^<ö]ˆõ\j‚mElDÆÄÂh¯—‹´{€ÄÞÿ³‰ÆM5Ëóìx¢Õ%—y:'|‹p¿ ™5ÿÍžg^)`Bò¡ƒÔ|îÌÙ¾1ä™O¥ílwv1'°rñ§‡ÞöLàrM©S¸-ëKÏù¤1i+a"•$®$ˆ +¼Ì|°Ûù;žÌ¸ô´XáºÙš“J#/•Ò1¿äÔA_Hù–ž.3hLâÚ ÓЫAM'¤Èbh€ÞNe— BµŠ'ƒ{Àt¡9,å4†¥ÁS–·2fü?;™ÒmÜPßÁŠ³ª½àﶵeÆ»ú²¬ZvÏFÁõÑñç#Ì1ä\Æ9ËI-¤ + 7%LMå‹n—ŸšzÅl¥½ýHݘU¸a´Š¡ûQtBK)®&¬Í盹j€‘@æAÿ&YwZp@mŽØ3ÑØä¡à éº¢O•*2n‚ÑÊÉÒTz÷dz¬±-‰à ÉE +?ZÌŽÊÿ,k¡|…-‚ýœ*ÿë6×÷õZöð öŠZ©w…Þœ¸]Öjñ?ñKÐNOƱãÞcƱstÑz%”ï¾KÇ<ûÓ-|Z¸ëÉsÍ— æ6µ¿+DWF§¶ˆ.†«[“OÝÞ†;'ü$«:vöÎk[¯hуÉ­¡ÀPjÄ%Aýû*YS—.q~üûQÓûÀ±QO­ˆz~¦Ãº> Ì6iû´à7}^äFò•CQ²šF'ãË}Áš§ôŠ3$½Zœö÷Ñ4ch¦‘np“ ǧ³LŒ‰’ó`¢x—R HêI<Ý>_¶ªud?Ÿ)9®Œxšc9÷ L´b(Å07DpGªœÄé¡+FQÃ(´Ie'0é’§ “B#õ +$Œ;Ô÷²õMÊg˜qÆÝäÑqZ$cI%å_ÂEñ/JZô Øž¸ÿZ1¬,Ì+_I3AäB%^¯‡GôæF†-6«øA¡úÐWL~=6Á>4ªê®æ¶P@æx +Ub]>Í̦ì¬*¢×ú4æüxmFû.›ÿLé!86Æ»ITÁé"P\¬Á¯ð) +endstream +endobj +2230 0 obj +<< +/Length1 2210 +/Length2 15959 /Length3 0 -/Length 3928 ->> -stream -%!PS-AdobeFont-1.0: PazoMath-Italic 001.003 -%%CreationDate: Fri May 17 11:17:28 2002 -%%VMusage: 120000 150000 -11 dict begin -/FontInfo 14 dict dup begin -/version (001.003) readonly def -/Copyright ((c) Diego Puga, 2000, 2002.) readonly def -/Notice (Copyright (c) Diego Puga, 2000, 2002. Distributed under the GNU General Public License (http://www.gnu.org/copyleft/gpl.txt). As a special exception, permission is granted to include this font program in a PostScript or PDF file that consists of a document that contains text to be displayed or printed using this font, regardless of the conditions or license applying to the document itself.) readonly def -/FullName (Pazo Math Italic) readonly def -/FamilyName (PazoMath) readonly def -/ItalicAngle -9.50 def -/isFixedPitch false def -/UnderlinePosition -100 def -/UnderlineThickness 50 def -/Weight (Regular) readonly def +/Length 18169 +>> +stream +%!PS-AdobeFont-1.0: LMMono10-Italic 2.004 +%%CreationDate: 7th October 2009 +% Generated by MetaType1 (a MetaPost-based engine) +% Copyright 2003--2009 by B. Jackowski and J.M. Nowacki (on behalf of TeX USERS GROUPS). +% Supported by CSTUG, DANTE eV, GUST, GUTenberg, NTG, and TUG. +% METATYPE1/Type 1 version by B. Jackowski & J. M. Nowacki +% from GUST (http://www.gust.org.pl). +% This work is released under the GUST Font License. +% For the most recent version of this license see +% This work has the LPPL maintenance status `maintained'. +% The Current Maintainer of this work is Bogus\l{}aw Jackowski and Janusz M. Nowacki. +% This work consists of the files listed in the MANIFEST-Latin-Modern.txt file. +% ADL: 778 222 0 +%%EndComments +FontDirectory/LMMono10-Italic known{/LMMono10-Italic findfont dup/UniqueID known{dup +/UniqueID get 0 eq exch/FontType get 1 eq and}{pop false}ifelse +{save true}{false}ifelse}{false}ifelse +17 dict begin +/FontInfo 9 dict dup begin +/version(2.004)readonly def +/Notice(Copyright 2003--2009 by B. Jackowski and J.M. Nowacki (on behalf of TeX USERS GROUPS).)readonly def +/FullName(LMMono10-Italic)readonly def +/FamilyName(LMMono10)readonly def +/Weight(Normal)readonly def +/isFixedPitch true def +/ItalicAngle -14.0362 def +/UnderlinePosition -167 def +/UnderlineThickness 69 def end readonly def -/FontName /DUJUUF+PazoMath-Italic def +/FontName /XVBOSG+LMMono10-Italic def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for -dup 97 /alpha put -dup 98 /beta put +dup 65 /A put +dup 67 /C put +dup 68 /D put +dup 69 /E put +dup 72 /H put +dup 73 /I put +dup 75 /K put +dup 76 /L put +dup 77 /M put +dup 80 /P put +dup 84 /T put +dup 86 /V put +dup 97 /a put +dup 99 /c put +dup 44 /comma put +dup 100 /d put +dup 101 /e put +dup 33 /exclam put +dup 102 /f put +dup 103 /g put +dup 104 /h put +dup 45 /hyphen put +dup 105 /i put +dup 107 /k put +dup 109 /m put +dup 110 /n put +dup 111 /o put +dup 112 /p put +dup 40 /parenleft put +dup 41 /parenright put +dup 46 /period put +dup 114 /r put +dup 115 /s put +dup 47 /slash put +dup 116 /t put +dup 119 /w put +dup 120 /x put +dup 121 /y put readonly def /PaintType 0 def /FontType 1 def -/FontMatrix [0.00100 0 0 0.00100 0 0] readonly def -/FontBBox {-70 -277 902 733} readonly def +/StrokeWidth 0 def +/FontMatrix[0.001 0 0 0.001 0 0]readonly def +%/UniqueID 0 def +/FontBBox{-491 -316 834 1016}readonly def currentdict end currentfile eexec -ÙÖoc;„j˜›™t°ŸÆÌD[Ï|<3322ãý¿ô9I¸fÈÂÜ"ýÇXXH`ì{¶Ú(Ìb$`á«2 GyµÀ<“»»É\ð&’ÌMê¨ÒêµÂæCté+Ë…B›®JvÀÆ·mo÷Ïš}^ß¼ éYTY½·ÞCÒ]Sü=ÚnðÂt9x¦Ð>ÌíJòêKÌ1¾‹ž'r6iÁ’Xï¯Ü'l±­é Š”6ÑŸ±Ã=ïvª1]Û -…Ä‘gåŠ@ìFÌ7žªàpqûÓÇíõ')øjjOh%SÂÊösq™·=ÓðŠ{ãÑ.] ˆZ§O7Ñ”î‘éEÕ„|Ûª´u±BwLà Ñ`'ª´&ÏÄ0C¶žg†)X0F?nÂY¼›«&‰Ñ8 ˹r¼]lpK¹GC°ÓpŒŒoÄŸ³g\´t/íþ–bj©ê}t SòEî š²£¾cûqR7¡VÀ“L%ìŽY'1³¼b¦êÇî ˆJ71Ù®SU3ÿ=ºg¡OŒä>b¸H”t ÈkK‚¤Ëc:ƒhíý׳A‹:FÁ.ÞxW˜(úäÿáüLÆ^œÓN1K °Ø@_»Pÿ̹>ýnéæn¹ïÂoùf³µ7-Uš EøßØÙlÇ#©+¢ÞöA5FÔ”¼kèyêî¾tçÚ:÷âR¶R¼ª2\þ¤3©Ìøí’ l ªvÕ"‘Úª> VÓûœ³ ù”íb¤…/¼¬þ™÷“&’¥íµæÝ"@Ñ4 -¹nD5¤tvÀñ_\…ž$ÎS„|Kp°ÔCOÂÈ1vF^É(âByÒ@ˆ‰`gˆFú—szVŽÖ‘‡ûŒ0‡ôÎQnÌýÙ .ßÔÒÄT=>>‡rÕYA,{ÓøÉ ÉQðƒ rçFä!Þs߸ߟ0×€ߎ5¥Ë¢Vô¼<æ4d(íênL¤Ø³[PCŠ©ì²;üŠ+qÞ¥œK V²:¾d£§&>J3)éÅ›ôQ6Põš|ÞB5 e í£ñ)\£@ˆ~7mÛknË‹ª$Ô¶úÜ–P±+‡‰™C}èòèo×*Ú(¨ã ·-ßè.4‰ÓÆ7§ŽWá?ø8"½N[.½^7/ÆŸéÌ3§þ/3±j(ĦwÿNÿË º×þ¸ wLjŽC; ìf&ŠŒcv^%€:ÆÃS,ƒ’yr8ºN@K}m>KW¿÷Kn­9×è}ê%F¢S–GhpŠx³‘„CŒå(¹€@òr XÕ#?FäÓØ0éʃº¾’#«w^øcuÖ»¢Î€…y’þ‡"¨-ÚlEƒ£#»~»Áõë¸%UŠôŠ²Í1b[ ^¨Â•1€›€@‹¹¶*°ßr«¹ÖEŒPâW‰ðS/õÆÛ?ÉûN!™õö< eâÙ* ‡ðWxaÏx"(äZ„°ÿ5Å…Á‹ÆQÎu?ÅéÊ -UªgHß{@^‘Κ2¤]Ʀyý±Î1°QÕÇ'Ç,êX§L káx2 ´ß]mÅÈŒÊ0Ó`픈âpº³:ì*:_k£Ãp -‚º“ÏŠÁ÷×%¼þ,‹•™véÈVíb]2q2嶖ćVas ¾|HzÊNê@=Ü«ˆ7ëik¬K‘\É}ŒpYöNsÏS)#â èôŠ„¡cÏì,áE¿×€ÝçZ†üí=D=0»Måaú‰‘fü~nÄľ»<ë“vž­QI¼q¿Š[ ‚]¹@²\ù‚$‹C{ª¸ÇŽžÏÝÞBŽ®0ôh”u‰9íp?{U¶ ~sU>í4ߧšïô2º¸ËB †o´b‘ ÜÎêbþObww»wªÏ>Â|ÎåGkh´6°–´„p rÎ[#õPŽ±"7à7vôÑÍEá8Ê–óõ›Á$;E£þ6×zóдR]}îâ¤#¤#ì+µëè0É"i%uCjíÀ,±|¯÷ÁšaÖSVMŒx›ËßÉ5àê‹Š ÿýp‰åÌÔ6‹ïUj(mEä*Üè~†mø™YÕdÇ¡æ—µàÁ¤w#ö`–9­Ôk?ÏÁ)€Â픥…Z©ÑÜçËW|Mü%H*'¢¤ýÝa>˜"VÑ›ÿ¸w5t¯’0QËPG“R%‘à®"ò*¸½¸“ý„B…u+Ö8®Î~®³j Xí< @œ±cêNÂnâøg¬YGй34ª¥ VÐ])¹Œ-l ÉŠ÷5þ¢•‘iþmÕ™UV#YÌ‚€¹{Û­õls»1å¶weØË(\‹OL‚õ EŸ¶É»Rþó¦Ô~–—[шP»så1\èŠzLê¦ÊòþDÿžg•SÂS[OÝu’p6æ(@1…Àâ£R›sP1í ?õ=ç¨úEìØœÆneM6N5‚†åw<3dÏq^cF‹‹òðž)ÿUÏIÂ2M õØFÅÖÆ¿äë~ãq -1йmدNIà B}ýfÈ·/ZºåMYöŽE(Y´Ö‹'á²¹¢ ú -T;U·Åì‡÷F7®<·{¬ÈÆLN-^×(¥£U'UAf0GivÄOð0P$Ý 4ÛªZ@º/³ˆ¯¤Ž.â}·L‡© ŠdlI/_Øfá°>k[عW‰¢SʪÇ1<" Ì£Çó—UÇÞ3 ±ÃØlÕûÆP.ý+\ÐõBÀl{½É‚ÞÄH%Õ¹éú'%œ’@-ÜƺüŒ˜¼žRÛSöÄëÊÂð覟yNuE—Ê|ž÷Ïù£ÝÝvæ‰^Ó+¾‹ÖåZúûR·kÚœ[ÙÈȈRÆÄýûÔ¶0ŠZHç: -wÆFe@g¬`!$à ûp±eš„pdwíé,Õ,#ã µ„Œž<€öx4¬¦«=u^‹Ur± ^‰ ÷´º×hIo Ç–mÝÐΧõ -AÖª^§Î±:™µd¸5âDѺL•‚:ÊÔU¨Ôœú-¾L::V -6» f žÑrº^‚7£ö1ë5aj³à¡Ašº£¨nWÉ©!µp© cílov®>ÕªI8©3!ÕÜ/ª˜ JÝÚ?|iî$dØ_ìñ(°/;Åb÷üÒ49Ãï•Û%X¾eÊal¥Øw픋v_Õ̹ûœ@dr®Ëp.álä§<çêÉJt~½ŸM«÷[£†yœ ä™û3¹î[Î{ -endstream -endobj -2229 0 obj +ÙÖoc;„j—¶†©~E£Ðª%9.ì¬>XJ‘ٚмD â"e?¤¸€›&¤oLH:]~•n¦X%„lþÞ¹”­ÏôdQ@ãa~M~EAËŸV.U‚›MØ€ª¾")éJŸ¢Y§4Ò›º‘º UËêC9¼¿ù2Îÿò–"\ªºÜ¡w¥Ô1·rlxؽØнtÿùŒaÂAÙ*Ó`¢†ËJpÁ¿Z+—¢TXì]› >:|s‡¬m’ºÍeaëŒØ„Œl¤·½v+¼¿:É‹£²D¾Ý([žÕˆÕÂG–ðúJJ¸£O´½Ä™I¦ªzÌ&ÀEnqšJâýEî†Ýðz€é¦&»CÍýªçx›h“+ ‘r‘nú«n¨»DœgúÃýEôŽ¼aw'À}ô”Ä|Ü ýꜱà$Ý%É—šš ד‘Z—VûgHTC(ûŠI_H‹áæÕ7}$¹%Ž\»yOaÛ‹'Oî¹ÀiÑÈ‘Ëi'Ç%Ïo2ê{´ô«PÉQÝwµåŒCÆX7%SëÃ?sê¯Íß~Õøu'iiqøúì³€þ€šN ÒÌ3w Q÷EŸ:šuÛ廕=šÓÛL•)M¿o¨i\ÉÑd·!Š®Ò<’ž0´ÎZEÑáR’Ë%qa‡´` ³õ°›Çkûâ)V&ìþ‰#û‚ÑÇRÇIœT´xMù‰k'ËeœkðŒ) ø}ƒÄIŽ¶³\ßqG¹#OÚëðv1ƒ;ÒdæpV5Ñ <Ô^¬c× ./#Q…E§UhÌÞZ»µµO÷¦GU˜c‡èeÿ¾5E«Ž„´»I©rô&÷¥¦v1Uçü4˜ÌSNä7èÌpk¤ÃÑøÁVe^qïÉT€†"šÚÄ•>î÷UѪ¯j "*ÚÈÉåá¯}”Iç’˜Ç]‹K•ÐÍÿŽ‡}Ø_p¸MVãÂ|~Ímv¤Ò5Ñ°¸žQ+-0"ô&'ƒ˜OºÖ`¶Âï iåÛmš¦h›¾)6¦k‹Ö¼eyˆj/ÿÛˆ…¨jú»Ýåo<“LÛÌC¹8ž&n°5Ì9È(àBüš,Á]©š;ˆ¼¯A‡4ÉXDÂÿ1Gv³èËJCò2ŒXÜ:.'I˜|ØÐ7ß©OŽãÖ—‹1_r:~‚–¯¿î‰ ŸŸ›8U ÐpÖ¡ýaErºÕ}Có±wú ¦{˱‚…½œ ƒ‡§l[]WóVëz,UŸ+gR訑²£hiLº¥][ƒcm>ǘ™b‰T08›°ÞÙ©ÿVAýx`üéðÇ!øä­ æŒÎw +.rmÏ){ìûz“´´"’ªý‰²5îxÂKå„0è9Ðu¤«@”ÅQ‚jS¾›_‰{•|Û'«¤€/å VF.¸ÄZ¤Á!z‹;ƒj”p.H‘¼R{ˆhBAŽ¡‡+Áiù*‰Ð?IŒ¢P„¸óøòaH‚È…Ár!ã‚~¢oë 2ÚßA9 SOœ}ÏÄž~4'±F§g»Cp’Ý|ºx{—ZÌ(HK›êÕ a[•…ï„,~m€3,£ÇÇñéTi”=¥EXò˜åa†©´}åÓÀËb9û-[Jã"O{R‡…ó1à*¤ã +–èÄÅl¢ë7H¯³.Ü×Hͺ¯ãァÝãÿ+2¾yÞºÊŦßÕkPG…v„´Õ”ƒ‘‹|¤q›+D€µè@i㻽IhJeþßH‘ìüõÍ=oÌu¦?ò6Ð0û+ýÙ¥r¨)têðÓlj»žñùÖR¤¾h§o1Wˆð”ÿLdO(üµb=¡5ÒâèN‡e@³cñ£¶H&Í•ð!™ïüôT¤ +Â)¿ˆWz½‚"˜_®×¶¡Ì‰VfíIfygñEŸYŽ<ºáÝ Q«JU÷»®œÄ$âwx‰SGZë…Éc¶µT* èOÁT¶ÿc bGn5ÀµÞ¦«¤ÞÖÞ2ñTMehXÖ=,Ðp8‚ W.©^ F¸`¸äz@}¶¤4奞3å¦Ò<ñT¿;0"eëø™Dï€Ï_9QÅŸ>  Y©cpìþéÅý‘ô³c÷†Qþa×馦î½m®+—rf]¸kLfzÆ/ÇZ=]n—Ì}Ùx·Ë8ƒìH’þ È›Q;}Ø7¦½ÿÓhG¼ñ šéZÈÇžE×£ÿjLÑ*m]Û´¿gN>ï2\s5ñ¶ÏŠ]k³âi'llG^}žU?fMUJÐìf&ŠŒcvRm¦a0$UnåÆÑ“IàjV;9ÌÛÏIº7¤®B›y`‡C‹Ù–ìgRËjQò‡bž»¢ºá6½0™þ” ·þyñµ› ý®,>vÏèîúÌ}J²b¾¦Xªg¸üPx|<ÂÍ’U%©ÀŸ`çÕ¬Uò.`ÒÙ +Ÿ´×½Yé)ìÕü “C Ã’ÁòýNþÖ‡0FñN… 0¼ý+jI¢‘&Ì +ØË{e~îØ_'ýX^®jRT´:3€Â䘙Ð7Ù9·rÖ—[X0%ŸlOX?VÁ'ãfmøGmZŠlÀ¥±B Òë⺵ˑ1§mêÁf§ÄI¥IêQõ¿yÛçwH(xtçÛY8fûRëžò±äb5j¼¼t‘ ë‹®‚ÉñNŸ;tKQÜ×Ò–_׃HÎ<‹c8GÞ`ŽdP]èj¢|é´Ô‡­j +ŽýÏ&»É¶©Ð¤c¼·•>(_Ö&áæ;óy¦¶ úÔã]À¡Ñè +‘±•·àòçÔª])J×b> w«’33ŠÀÚž¦ÙåŒÎ¾ç`ŠÅ¿„´¥Sçáævžmß¿LÆàoN"¡uÓoq”ÁðÓ–[˜‰cg† †7˜'î÷B–·wGɈ«é +üòUÝ|€$]žŸÔ4b€“©šak½ÌЃÙÄ Õ6ȆÒ[qÄz¶B³ªùv_÷ÿ.ÂÂAvÜô$ϱ€WäÍBÈ®á¼Ì¡ ÂÂÏmûœ÷~ð|Döôä^ʉÃ.ÈçÅ8Ì.Lõ¬ì´‚ü6¢2`dÆ…tû|Á¡]ÁE04ÿê$?ë¬kÔöJ!ÃËXg ^ûÞbÀÓ£JÛˆ ¦ä}3œfü$‰4P-dÔªÃuAz)] ±{'a[—Ï` Bwî~¶v9+„ÏA ^9Wƒ°fÕ¦[®J°8~@%|æñüF,ÈThÕ›Ú +rÆø<÷HÈ“ÙBÁE Lü֥ƟïÂ[`äÌëe—`€ÉÉ;Ì.‚sî4äï{8ùâ«=3bþ©¸1âiÄü‹,uáh¡1¯Çgs+Ã2¯Tpe:e$äýÏÜ­Ä +U;(©Ü3UŒIi”w>gª¤Å3î‹ý +ôîº,RŽâpØ‹¥4eÝH?6«j™Q¨xÔ[ßöÓ-ÆÄ-ظÜÞ÷y‡È¿’'˜„("î‘Ê_TÛ´Ú$Ñ4—ëæ3Ùà/×ò|…,—Îác +©šû“ÓTƒXЄá<±¯©©m5å­žÓ,ßð„ÑÀä‚…ë\×qÐE)CdW ©è0Àe„…Êcm|0Ü@ÂئïM…5^Yn&¶uå´6M—Xÿ¼%òÖ`Èñyƒp'ˆ°iÚffd¢ÜßCVì¿3%¢IK#ôsyöGð´ÞCh> Mú[’ÃèJÙC~Û}Pq_ýyóL+ ™ðŠ¬†Ý¢N~–׈Tr¡ó +]uÍÄ°Eþ)”‚bâçƒE±{‚–†Â=”ëêlžeÎݶ áø— +—;S I×W4Zi*I.á+ìNyqW· .U´>u}²’ ©­˜`l$ŽDSØë­ÁŸì3ÝoFo€3,£ÇÇñéTi”=¥EXò˜ã†tÐ94E%$â^FàÚé!‘}ÕØ7êØS7†ÐkDs#©ŽÛ+€6'1rf*gÒõ¦øoû¥Æ&z* ¸Ì{Nl'{{¬iªÁì5¯,>gJS³¥V~ó]X;©¼”ñ1íÉFÔ!´!üuÁ>ïìc+&Z¼Oqd4IGRiÎŒI‘ Șzw»ü úš’‰†ko|øÂÑi¶{2 [‰¡ÆÜ0B(Þ-àT¶ý½n¨¥/æµÃ¦e /´ÉçÕäôoèJò™–Ú~†ÐúP„IÒ!|‹Ñ!s©ÑdUg~2ØM¢CK”¬àöK…"Kƒî”…&”==~[÷Ëï†Û*±Ó¡ø™$‘Òvbж#šÉþǯhÓR|-‚âàu2 À¦SøôyÿHÌÃîº+uFXÞ±ø°{ž†A'âP7öò&O1UÔÝa´Ì´âOƒ_ˆ†ê.÷éÁesgO3LI¾ìõ×RÛÒÀUMDY’IãZEn&ñrÒ–?”sÝö%Lb?Ñ3Q*Bh{KfrØ岿QŠÊd Ô `uʼn³é~qvb©}´^Z– 0G>Ð…®"ʸœáìiXì»1($j2þjÈïn”›¼GưƃgNEÒÓ^•ÓÄ<ÜSàû“ê[á¸V îŸAý y¹º‡jÿ§hóàäÑ)E=̇ÒKî¹Ü£ñ½æ·ˆØÒ\rê_²9p«o%˜àaØ2áTue)ÝÒÁ$) òs6à ˜Ñ5àÖ×}ÉýTÏ!òÿ3ó'[Wýk"}!,ñ³Å\½·ðž³|Œ¨lœ‰Ù¹+4¢á²c« +Ãm‘»;qà@¿å#$Ï…Ðç n•x'á,Ë~"wEÛá\çš‘Á3+9|×¾õš ØèzŽ‚X‚À) -{¹•˜µt¹|@`qR"ÐÛ¡Œçæ˜#}Õ³|¡Ü®ˆ%ôÆG"hæo’p?OKÊx-"Ø¥ƒwõ±–äÑ\pîbÝ5™ƒyMõixÏ;ˆ°ó³ü77eìϘߒ’ßy^X5Ó‚~Œ9wgpÿ?¶ú^( Þœ±}A§’Hj0UE‰N + +ì8Dǧ›®ª©? Ix ù^{Ûi¨#ЛÒá Á»œ†BûÀ…F\ô%sÈxåáð©/&P\CXg¶ºJ;ѸI ½Þ9ÄYíx–*Њ wóx®¢“‘ ÊYé<`X’ÿ×$ÁöR%½OØKÙ89k3›ØgÓl˜õÄâÙD§¯iöÜ‹Lyp¤À:r Û©ÿá×I·6Ùjƒ9iûW2ÕÚøõ®®b›y/¥D=w3<‰(î¾ë1)Xqr¨}b0ûÇ™AÑDÎG9០+ß +&M›øì°kQ—æ¾4T¸Ê¶ÕòScS/½²O“â«+¤RRf‘%£è"Ž½Mƒ<.è¿b{ùôs¿À|î¿Iñ±/Oe“‹Å&{ú¿¤Íµ; UëC%N†X˜cL·hè]*¹Œ'jL‡pÓæ,~ê—YÓ7¨C~ Y®ž¡öl/ˆÀcUÆ#Lš¼zTË—!Ù±œ£ñÅ‹pÓ†Îñ_‚bzIš—NDÃ5¯ãÜÚ·-þÝl+›&æT.ƒáÈè¯E2U|§£+… +(éïy×òžy?oØõ;!Ÿ/1Ùã”F|Õƒ¯ngý6,¸/±Ì1nZïÈ.,xy°Ùj¦¸@>xl5Ûvðæß²ðɾä"“Ï”asÂÏRW‰Ô”’¿1õSbן6,0]8eÅ +Ã!ZË=µ‚Qo·$l{GáÈk‡Ø„ýyÅ|ž©”èù-ŠzW³‰þÌÜ¿·°ÍnE~#¶Ïs'¨úFpŽÛ^œ?”JÛàm7‚Eùψ™P—:n¬¾s/á€}Èò•úÙÏ묬Z†bÖ|QùðOdÝ¢}xnù­úÔæadWþ¾ûÐÉ/Œé×™UëhNo&›H}n¾fnW¸£übü8…`¬š+ s}³+R‹¥F5P÷§ªâwd˜ +åâ-4çLÇþ3²÷Ä<3väøjSfÃ}2³ÞdLs¾1$…ù}Û´XŽ=Ôøäþ“ëÑ\™ ÔÏV0'Þ– yÿŒçT-·Éåd(3k¸Ô%ÇR`ǺŠZGfîk‡˜ò‹‚º´'Ò³%ŽËxr +Ýžý*dúDâúì)!ºûØhü^Av΋Š˜l;«ê¸À¤â,X#R¢hœåæìÐJ ¦àH›ù!5£”8+È¢©Ky K]ÈR^BÕóï ¨¸<>ÄãÁ5(nÓ²bz){yîÍC¼á&ËF´ÝÍoõòÛs%›6) +–̱ÊOÇÝ|ú›%ã×ù†°#ºŒæ8|õlà%žYšÃniŸÕ7?Àb#@IF"MŒXúù¦ž ‘³êÝó}¶Ækxõ£z¾øÜáºAå)ê&¼#¤iá:»ÒA˜Löàëkip«¦?å? ã)ò` ¯hX)&ªÒ­ÃÐ"k[Œº+ÚÓñ^—pDÂGh61ÞøxÞQ™€—wŠ‰šn,/,·éñlØ Æ–þ[©˜OÇ1’2¹8j“#mGÄ'$¾zptr,ë•4q‘`=vsý»ñ´p’E”…Ú7=p¦HV^VÌF¢Ã[ˆà"áӯϑt¹+R÷õÚ_:9ýï­—è8›æ¶bw=EW ¼`â·q:³âP1qOL- !í˜1«=ë‘ÍO;R²ù‰‹=ÉdCƒ¥õ®6kº°t¯^X­¥›ÒÐPž¥ë‡}W¡yÿ4š8#?RóÊÐèCâºèT§v‰ód‚Ü7G«2Â"õÂ=´àX¶[¬ +%ÃܧRÖK{Ý©I}ˆÕ$óY/eÅÎT˜Èœ9ó{—CÓÕâÕƒ¿[*zQÙ ž(b%¢bÚ>³\†åBÿ#!¶mì/fß=_ŠL+¦C¿êÁZ\…=ÅŠ;à¶8áfÚ6,Çx‰T!ñ¦`=.¯·ÜPkœêxøy„ùp§yq>åFhRyß\Þ˜úÑ{ó t4<ô]lâØŽ'%¹*ÎÖÿ‰€ã}A` p Áz¹·ÛŠñ̤PR‹ iieDö±ñ)¨¤’~€Ÿ¶ÔÂ÷ú†&£Ú´o—Y0¿/ãx¿ê¬HÈØèì,`i¯:…òL¹"ç€eJ™é¹ ç¼w…ì< Ê GG"Ä°c¾]g;qdZ‘*Í<›¯Í³.Û¢óåÛŽi÷‚ÊÌUnMÙ’joƒÞ³áq—‘×ã£èo-ö¹Älÿ$Nc/·Ú7Úäò%ñÍÂÊàÏêiö#“î„;Ÿ‡¿T½’—[‰©}´׫„{¦Ê¾Îæ~B¸ôžÐh”~€­£ÄÊÓP¬67.Åp›Rà½ú Ó%ü”{™Ž¬=gnñ˜d´#ï$(ˆvå´š…æ '`¥HÇtRâù† Iž‘ƒ¸0ùFVÁ¤ð¥Ò²&GÙÁyL‰–FÛbÊ OpäûÁÅxMÊa‹ÁƒÐ6ÅêÎ<½™Où#'g‹P'JÐßr#R—Ì´|ý»bkTVrfê/Êe¼2ÂvyiÖ«â•/™€krÚ|ÃúüŠ÷‡Ž°«1PªLOmx,Á‰ï+Uv­TÉV냗Äâ‹ûÀlÉ!ä•DYÎ;üzªº1u.ÿÜÃÒq>î‚W·;²ItM8M‹ˆ»¡êúTFc÷: +Ø¥`Þó°í­žẆá<ŒV_“Ê>Òe!þSžKЯB '™==dÐHÎ4V ¿Äa⤿wÁ01tÙ%°zÌÌÐüˆÊú™ƒ–*ô€ÃÄÃQÞæsM à âh,ÏdÌl‚šJ®d…&e¼¦´Áƒþ•ïªÞ6vYƒQCBH»„%+"NÛn‹bÕ ædÍN„´Õ*É6Ça6é}°Q¾ý9-o :Õ,Š™GH€™£r±9’9[Âø U/°‡`lñóMTÝÄåt¥Ü[øWB«Õ†ècˆq™¬÷6ý‰¢s<‡ÐÜ@Nþ Aè‰ P.Δ–'F8QHëĆJã–7©M•j÷­¯o2jœðxŠrCÃF!Ë>eH{r¥N N̆Öýz]Òã<üÁJ 1üŠzo}D‘^)¾Ûïõí•Ó æÅN š93yõ2Hèöýb÷¾æÌ™¹½²~»ŒO=R{“oÖñïÿ˾J´ÃÍ3_>‹€Êô ·åYøòÇô­`É8÷s=¸5¯8òÚ­Ž§vUtÜÚýR«þÂ>gúÒi’‚tL(Ë @ññ;²ÄðïZ5Øbëð> +±»¡\‡˜l2ùô)ËíüøÙ/ µ*Jµ[X¾ã¥mц +sÒê‰(>†Ž]7ùc¡[ëdž3»àjŸVcwÒÞ læp¯0d¼œsȹ«@¼ŠžêÁ¹:äëoVÓôo.^ ÄЯ¾Ñ©¹ó%ÏÁ¡“´•–³»ºîšÿâôež: à}F*.Qé׌ «‚sž{–R ÕVbú¡8ŽßÃÌ+p¹sQØßë7ÿ˜ñq–ëYK¨—èªWÔ£ŽéèKðÚâõ®¶šŽë{£Rs¢@Iƒn\‹ÚïYõyµˆÞï¿ yÉAÃ-„JUÞÎ%XInkùGEt +í¥Cn0fœÚet.ÐŽFTÿsŠoX­Û‡Ý ˆ:öUu8P¦7V]Œ¢äœœ¡Í®Ìû,;.ð9l¾nEC¡L4ô×(éè ’y9)Ì=A—l£ð£<Ũ0µÅÕíøRÜûˆ¦ÉlOâï»RCs†ãéIbÍnYÃ$KÈœ)bØ¥0m¡Î\Ò A‰0˜ÜXíÏ9¬Î,{+H±Ú¢®ë¼DlF-¯õ­óxEN¶ñ4ªÉmÁÞÒ4ÃsP=5/0ŒwcТC¦Ï3ÑüW–®iüçn¡‘rÍ÷R͉˜P?‰( r’NAe‹½sÅ=«&Œ¤^5=_≽éZbõÖˆÒU5·ÏÜ¿Â6½Î›×u!“<¶?Œ)%ù‘o‹‚J‘÷ã‘FŠ–+ª|nÆ9¡p€3/èøFÆN7"$ÕP%køÚTé)qÎþè¹$û1œÙûy=ñ}<ù‹=ëÜÇFù®«´A#ðúÈY'ãºÑÌmk2o±h’~I%S&„¤)öSkyö”Á*ë{¬¯CW_²#í~¯[Ü2¯Øû1w¿+n«Ò [·}ÓTX¶b„Õ¢ätSŠRL;…³{h‘< ûíù¾,â¼jA }íЂ, qîYóíÆÉš´ÌP ÑEc‡¥"÷ÂU&ãÕàìŽï¼’™‰33˜'í„OàwóŸm¬£zÙ +£oÕ‚íÎiîÞ% ÅŒœâ©ŠŽæÓ8`Éê»xOç–TçÞ¶Õ–ý¢A61Yˆàw2ö™ó‚¶ÕC×´áN¦\`(ÎÍóË´n(DÕ< +µS‚EòÆ0Ô‹Ï8tV¾î›µÈ} µ¾ —‰ƒ¯ê¯o¶xob÷Biî½&ÿE=ÁM™ð$‘5JþÓ|ÒŠ¦¶¡ÐXkä„…«:ékº´,#±0礧V‘ËO£\Ái¯Öœ„¡é[;¤î.Õ@Í% ±,ܲÿ R& +ÕPoñìT8¯†€×µiY³ÃVy¤Ÿ³˜’Ôéú^ÇÁ,2¹ÿ“Jó ÖàŠo5áÙV¡ŽL¬oíšúsØÕ,°í‘AÑ`¼ Ó<êéYýS°N&îFŸ^I–¹k;ø¿òsCüšN3:…=À3åŸì€„>òEºC…¸Þ¼âPÚ2Ôý¶j¹MÝbè‰}¹Ìé£qpðÀ*œqC×"ÏÀ†wb'¯–fý‘ƒß…ºOKÖ§pDvu9Áý'ëÔ0ôf1be†ýw’yõuþ3>/¥/eË’[Ú\˜éŒ{¥â^áRþuocÀe "ò ¼“ˆð7{CåÜù„¢¸Ýk£JCq¦6#®eÓÊë€A:`D“±ÂMYâ¦õ…XÊ'N@q~šõ={Ó‡¸ýA6q#ς؈½ZÃCUŸ¼rÐSçטA¶Êxñ&³úk'%Y~æ.ø taŒÁÜZU˜CÓèlöÞ¼ð‰ÿ‘3`¶¾a쀅ómq’O¼zD#vÕdÊû_¶*Óî\^æÚ*8¿èÚô¬¼ÆQuÿ2'k¾”Aô¿Ýx¹L=ÍÝpãÃÎ7R¦h„l\YöðAn AÓkHe""] dÄsH )…-Î}mÿ‚ÅæcyýÑKqIrEÌ‘e#•Ä,Û™Gs;Û°K;ÀÔÞ6”o¿ÄÑ„Ë4qùXù‹]A3ÕÌ¢ByŒ«ìˆþ"v$‘yn•Ç“Àð•$B€­‡ eÓù´LŸ7[œù‚Ð<KÌTµÐµ)Ë®JÙ†ˆ?Ùâ³·Ø”À²:—ЀÂd¿ˆzKįÀ ¡6ãEÚ-æXƒOS—CÇÝÁ9˜®ƒÇcæ×E~¨ÍÏ.ªÙ|mšl'tS4àtTbv~ŽŽ¥<˜…>Ú§,)ćB·ËRDz_ÓÂ&_(e;1z‡´‡ßg)|Õ¸Ýò§M K­GÊÕî(ð¯U¤¸d¸"ä½Ë¾)ìS$‡e£Š¨ÅîÝ;GH%XHu\‡$æIùü‡t\,ôF^ñK¨2¶ÞÁ½…˜m ]Eu‚é ·£ëû¨p°øЗÌlŽ¿ê8H Ïe/`.$¥€0 Qɾ|ºb—惇¡a÷,‡PHÆa 1, +“S†}Ú Zâ}è9@+É)“Ý,8HÖUªè`qV)“ª;°kª€5‹ÓŠRóI#çÆ>œ¹â„Ús<¦Ž/ ýeÝÇ/è7\‘ûÙXAvrX¸&Äc«²z¶çFοœè³IÎ~/9Ó½ó“«Õý´ý(¦íAÕù¢XfPé.—jB`á ¸{»~Êh ¦Ä9µ§™^”½ª K’ðÉÆ"r<¬UôÕ#f(s–ØïZÍUÖxÿO‚páÀã6PâO5>B:’TÜNk^¥vŠ©â& +ùQf%Q§½oÇmW Öª¶Ý†rooØŠ¦yã>>ÜD²ŒÎu°ŽÈ]÷í–ú >2ëq<Êê8™]ñ04CÜdÅjæ¸þ¡P9QEO篢œÝï–éQª©¸î³ù·¢®-£iƒ¢—%\Ú™Z0¸JÆ´Iè gº5Æúûý˜Â–‚u4^«»é‡pm"§ 6Žyq*]Ø®’ß÷áá³”ôv×dE´å~ˆ(¶ j°Š=þ˜•'ùè2%–Âyz_†¢í‚Ñ_Ì?Ú—ÕÛ 99.5æ[ó+¥HjU +† å†y“—$=±òüZ‘Z᧲rÐ.=xâSQÀ”K*ßÁ'qÆÐáJRÙFR#³íž&¥Þ·¸“’—N ÂA>œQ"¡p1\9Q¥®ÅÖ¼÷Âé³ +b8v,¡LeØã/sÑ]¨zžQêÒÙ¤ÉÆ´žË¥MÑÄv®µÚûsš+C¿¢6<§O§&hüv¹À¾·MÃ@*¡ÞEü—„»i=¦ÖyÇ ©Ç=ðÇôQïÂ&Ë;ÄAQ‚ôR1ÛA–ÑyÚL\{äïõU³í€M%ïÙr—ņ$ŸãŸAo³üh¹îQüOjLß5/ßÔŽvÅsrSû]*㫽‹ZýC¢õÿ¤G h¥œát<Å=–¹@&¡>,R:ï™}5-i6T SÙçˆö×& J]°À^i¢9õ/XþA‡)=ê–¼p¹œÚ™Y^=f®ÜC3ùç U¶ ~sU>í4ߧšî=ý˜-.†³ËcÉhC•t˜Ì1SU3_¡ñ®éA××H3ËB÷—d+Ë{°½€j”¾nû(s<ãœóØ«ÞJ±Tì‡n +é§$£1 <è4Ç¿ ÈSª´|æÅl bÀª¹2ʾF%6r.•ÿ<)Øp–óNTvZó’øöšé—ØÂ+Uúí#¬Áêó™ßÆu|òÝ„ýRþëqP«›_~YÿC–æ7ÓC`0‡"ÞNÁBJ6RzZñÒý§¸Qoó)Wd] gå7À½aºfMÕǾ¹…. ö+²*:Q¾È»˜ßþ q[Lzk8dfåG`ÿ¥©üÖ{a‡´` ³õ°›Çkûâ)V&í©h +„X%†æ认?ÕEtde +Š_wRƒÎ<ž œ:Õ`»É§*P³Ÿ6z¶ká®RŒu&.öŶ Ø'p”]=ll;ï ŒÑXæû0/Û‡½ëÇqÔêbžTU\úæ„6™äÃ9O­4yºÿ¶†FêªÙúA–bÙZWš¾Vfz›uÈvµçÄò…ù½aJÞh%W!‰¡“­>|CºŒ[–„£Í—m!@¬Æú?`^éåXy´¡ÁG»F‚¦)õ;Â-ŽÁÄÕa³•µT¤J@ßp+E •ËÜ!õ”50XÛÛ‹‘±ÂH¥¿óû³Íèÿ³È!_¡^)LB†[ÏT“PË£ºè`bîÕjÔt™í_¡˜!–@&Ô€QWñÂ¥³kŠ l­©¹ó$jì ú_;«Rĵ’Ÿ#õ«O§@Åèg¿/pÕ³àélqGŸã¤¥ÌØgå5zœ!Ž ܾÅiÙøæÁÂÆ4è;epëŲZf>ŠeÞ¨ì7‘c˜ûŒéUá-Š‰­ë¤tØ™G^SÞu@P×ã6•FêÓ´²ásèFÞB”\¡õ¨ +gÒË_Ó‚™R  +)G1Ÿ±dërsizè0z V݈„H§éŠxù]ìXØGÜeéË7£û´ÔŸ?ÀA€šÍ¤UÆ*vµs Û=;@#Žiî›Îü‚Õ½‹ÆQ½-~èD¹¨îPäÄF«nÊìTQ˜ Òuè—hgGW1¿Ë÷y“²#ĹÀUÔäðÏÔ¥ýÉ„o+Sï{ó¹ Yd|­/|QíÃÏwRoõŸý`a#¤²ž  R}û•d믴)%e{ +©[”8N_¨ß?g£½on/ãÐ</…^é2´FlP›µ¼}f–c>µ÷<ÜC3xÍß›ÿ}7IŠn/ŠÂäÎó?hyÙç:”È*–R 2=ïMÈaùèÐà–¿8˜ê¤Á}SäÏÆ¡}ǘ™ë3Œ˜Àh©–'­Ø3¼Äü£RñA¹৻¹ …un»–µêN„Tˆ·–„„á‹ µª}±÷Œ^R_SRéTŒj3%w],·p¶Fj ¤v_]•ÌYûŒÌ&¨¼ ãZNdšwÉÓB²8§{Ú€ˆ‹¤HLq‘¼ 2ö ÜTnØM#ßèÄOãî€ÀØ•APÛ>,«!J’çÍš¢Çôɉv#µ&’°#ÆYíþ9б¹dZ<XL{ÚðùlÕpÕ™ Ù´Ê—&ÊÅ"L…?")ð³5t°L˜_f›Tl€ÛÉáÝçÄ£4ª»Òde£·ÄP ¶ÅìÚ׺¨ÎÍÕŒôjZÐ"å=ËuÚ#*¸¹ÚB2àå=«V½uŸªv¨ç¯I/cKµDçäºh†ODÅΠ¹Élw,}wQy~D f% 9ã97]`£FŸ÷£ê¾™o^vŸ„îÑXárˆG˜‰[Y.{•ñ÷Ñ9PàÊ…jÛ5·öcŽÅììcônÁF–`V|ÐEGõbDÜh8¹éÈ3‡×øš®æWh&2˜|=™YåŠêï}Tž!¤11Ă}Õ˜ÑÛé}ŽdÐéé/¶ú!”;©ÃªEéRWÔ´A5dXRFV›ÎEÒèº*%•&/Ü$±ç¾Ó…º Í­úŸ-¾Ö Jèä°\äF–‰m?L \vª@ŃÚÆ8<ÓÄÇç5£¬Ëk@⢒MeokÞþyš} ?ÏÖjƒý¡ BE“FÈžá+w¸b¹ñæcçS’z‡Ãü¹r˜Ê=Λެ /6Ào§²b—[1¾Ë¯+có ™B£c ýo&»Ö˜î“:É£ƒ„:8—ÙÒC!Õ°ˆ$Q8–Ÿõ3ˆ}<3`rÛ«8>§C¸æ¿ösIÁã&Ab ¦ÈJÎÙà-žÆæ.iüm³hbÚàË™‡ºqsʼnÚaV¸\Íuu#2eˆ8¹ñ(Ï÷Ÿe‹Äß–¢ØÉ.æõíDà!7ø€Fx‚cÿn  ãU›Rn" f™f´â$d ¾7š¶Ûž›Ýë³ßª]LòÚ-aú®þ ‡¦‡Y LÏU`ɼì"‹>F}?Œî2Z¶ÕiØ£²8ía4CúÈ^&èp…aa”釱çiÁ"Î +&¥–eIï*äXi®ŠÆfKJ™ÛÁ¼ËT‘Ë@9’tûÈÁ×Fžì(Ö±g·qôõÀÞØœ¬ï1Y2בY^þn+1n—È;þïyü¨Þ´9¬#¬~I 9ç ×.I/áב™¤.LÝ¥¾¤ý‚°èJAéæÿÃá-eÑ¢žwùAžN˜šçˆ¢Òø‡ƒpjïo¤5µ‡àµúÚR},D¢ë‚ƒ}cÚG¦K‰ŸO͈]Ká&îæ¸îQ„Ð!éeSV´¼›= +DÎG@|Skµ÷Šl;¸cêÒr Ùgý•He£úÅ<SZù&¡˜ÜBë–Vž€Ü×"—{Ò骴?>0¬“Àw¶ûòÎâ.”ÉàW›ØYŒuKa®ÿË„u×»)S÷pçõª *-³Iföv¢å´9õ÷&–¥ŒZMúPwÂvtê)Ч‹Õ÷R5°Ä6 ¾!?¡{o@«ñ½ŽŽjšâÚ3C†œÃáQtc%;¢¯^œsÏL‡_È{:“ç7û.éRÌêÆj–PyPê!3À±ÍÂ>šËr«f3þ×Y‚éh?ÄN” ¾¨' +ÛOò KDJøä1®ì*C¦÷%\lŸ1ÙHëJÌ3¯Â\’ÆklˆxJ¦ŒÆÅŸPLCô,ùþ­˜Ðêy ÛBa?ÒjÃ’'trú÷ÁGf̉‰-x’Úѽª¡J¸6æ wêeñGÖò1´a"]¾IR³›ëi¹úf¢É=afÙÊõwJ]Ü;¢Þˆ’·CîÒ©±e^S«BëäN}Ô^Mô+Ú‹[$ãäšXõÕJKÒ<Ïä웊ûMGÑzÄTظ'û j—è@61†™¿s¶?–(Úº˜oúü<ù¯4 å¶Wp VÈà.ƒÖxÔú^T­ãÙ@„¡Ü)¢|)È$w}ß}?½)ÁÜ\‚$Z·÷9ïùý­i•†ºMœÆûƽ’M[¤vÒ•¹Ã"8šR}J1·3,–÷C®@ +Ò/ëëZFRÞ¬ NËtZB=‹ü›ô¡Õ&%ßX1Cê €–ú†ñêœwwʳ&Ušã)Òc ‚’á2Ððž¹60ñô*­Þ´! k†Ñ¹OJ³KèÆ£wÊG4ÇKyã´ì5áP&¹=‘Ä£F1¾£º/Ñ2JãÆP…ciÐnýÆ—f"MƒFÌun§¾8VÁí.®¼%uy[i3vNµ#œ;2DZӈàž¾ÈùÎ1ÆoB8Š{áköJºãf€È­¥lÖž&A©Iñ`Ì)›–­ÂV{Ž[6í_Û$J©.¨Ýd®®ËöÇdÎAÉLá;Òy<]Àö eé¹×‘)Í6¸È~8AøÌmÇ9ÆGš¶CT3j[ëÁn7ìGïùÒa¶€º¿.îCD¿^’Ï5Zžóð(\A8æØ'_|Š¹˜@¤–Gù8gj\®Bé-? +"›‘§|gò3› p·B¸!ôÁ”°Ú§›èá)ÆéÆGØ%÷š°ÈJã9œÆlMoÙêœ ‚Z/ÞĮܑӦèÂZeÅ gM<¥ñí›gýâ}ÇU|Áð†”ìlr䳫~¦YÚd}âR•÷Ót”ÎùÝR—(¿ü1§›í•¢_*J§\Ñ-ÄÍyºu[J¥jrA%Z’׋=ÕÐïÿLês5'.ÒΕp\ +çýª–Ç´†ôlÛ\*’ŠO³ä_gŽ7·Jýbû ;lžE˦Oíú~7¼,'ù12¢…w?ÁÛö–d}ò³Æ5÷\ÂY…Ê¢²I(½ˆpmŒ –+_uÍu”­¦9—£cæ1¨{¶æ¨ÓQ·Zê%‚11áA´Ð\JëŠV;`öÿfsæXÌÆ;Î:M4ŠÑ/Þ  }‹íÞ¬œºj~Vè=kj•‘=ЊÉ‚gäÎéž$Ê'Åo»€*C2™_’ž.´´ +ý2ßÎ2&‚0DÙÜŸžæ¹ußb#”Š‹Y©Ó?&žOÄG'‘Ò7¯øE€´iØÍ”ÆÓ'Ð +z-Z)ÏÛýZpâ)¼ÒšrJ䱚jïc…û‚0ñí°D¬˜¡›#+‘kÉÔèÌo{.^›Éì[4Œ—‘âÿîœÔM)ñôv£Z(»ýSªÔã¹é$ksFUñmØØÖn8µpÎfŸeq Qx‘P‰R®ªe™ždÖ•ÿ +endstream +endobj +2232 0 obj << /Length1 1614 /Length2 22531 @@ -32538,7 +32784,7 @@ s ÿK=ö² ’á(ÛHfUïÎãrÒø¾IQ$W0LÇòÅÕµ8ŽÅX‘k¯ß‰ô¹­JÏMÐz{äPãÀ÷–¢Ô“‹¾>$ yò§ÑéÚ"Éjrº½¾‹Xf¯yÓLEœtnÖy“ÉŽœ‰ö•ƒQ]Ex.ò¢B‚…;®úˆ[„çðÂXánÍUîOýÅ›1Nd É¸&÷ïÎŒþ endstream endobj -2231 0 obj +2234 0 obj << /Length1 1616 /Length2 24418 @@ -32663,7 +32909,7 @@ c51 AÌø2C0ˆ¯5§Œƒñ¸ûoÔ]}‰I(&*㤠½;Ã@ar½’§×@ ž\-@óˆ…Ô|†5J¾ÏZ¬¬Ò¿ÖTÛÆâ¼ .áç1åT–€"aõJk:‘¹bÄ–D»| endstream endobj -2233 0 obj +2236 0 obj << /Length1 1620 /Length2 18673 @@ -32795,7 +33041,7 @@ ST Æ€{qK—ÿýðêT°%ÏÄÁç4 ~T’è:sãì# þýu B«K²¢Þ@§wÕI5­³=ð¡•öu³Îm9¶.tÑž2•5ÈĬa”Åpˆ5EÕûÚ4D(á€ÁXmè‡ù{>Ìì•o‹:=T¡AŒB[’œin«­áQ¦Õƒ‚:©ûþÀmôÆö¨.¸åê8¢U…E%x~˹>í£ñ:¾Ö@CQBYÑ: endstream endobj -2235 0 obj +2238 0 obj << /Length 1007 >> @@ -32865,16 +33111,16 @@ end endstream endobj -2236 0 obj +2239 0 obj << -/Length 1577 +/Length 1153 >> stream %!PS-Adobe-3.0 Resource-CMap %%DocumentNeededResources: ProcSet (CIDInit) %%IncludeResource: ProcSet (CIDInit) -%%BeginResource: CMap (TeX-cmitt10-builtin-0) -%%Title: (TeX-cmitt10-builtin-0 TeX cmitt10-builtin 0) +%%BeginResource: CMap (TeX-cmitt10-lm-rep-cmitt-0) +%%Title: (TeX-cmitt10-lm-rep-cmitt-0 TeX cmitt10-lm-rep-cmitt 0) %%Version: 1.000 %%EndComments /CIDInit /ProcSet findresource begin @@ -32882,23 +33128,22 @@ stream begincmap /CIDSystemInfo << /Registry (TeX) -/Ordering (cmitt10-builtin) +/Ordering (cmitt10-lm-rep-cmitt) /Supplement 0 >> def -/CMapName /TeX-cmitt10-builtin-0 def +/CMapName /TeX-cmitt10-lm-rep-cmitt-0 def /CMapType 2 def 1 begincodespacerange <00> endcodespacerange -6 beginbfrange +5 beginbfrange <07> <08> <03A5> <21> <23> <0021> <25> <26> <0025> <28> <5F> <0028> <61> <7E> <0061> - <03A5> endbfrange -71 beginbfchar +35 beginbfchar <00> <0393> <01> <2206> <02> <0398> @@ -32934,42 +33179,6 @@ endbfrange <27> <2019> <60> <2018> <7F> <00A8> -<80> <2423> - <0020> - <0393> - <2206> - <0398> - <039B> - <039E> - <03A0> - <03A3> - <03A8> - <00AD> - <00A0> - <2126> - <2191> - <2193> - <0027> - <00A1> - <00BF> - <0131> - <0237> - <0060> - <00B4> - <02C7> - <02D8> - <00AF> - <02DA> - <00B8> - <00DF> - <00E6> - <0153> - <00F8> - <00C6> - <0152> - <00D8> - <2423> - <00A8> endbfchar endcmap CMapName currentdict /CMap defineresource pop @@ -32980,7 +33189,7 @@ end endstream endobj -2237 0 obj +2240 0 obj << /Length 1535 >> @@ -33086,16 +33295,16 @@ end endstream endobj -2238 0 obj +2241 0 obj << -/Length 1724 +/Length 1291 >> stream %!PS-Adobe-3.0 Resource-CMap %%DocumentNeededResources: ProcSet (CIDInit) %%IncludeResource: ProcSet (CIDInit) -%%BeginResource: CMap (TeX-cmr10-builtin-0) -%%Title: (TeX-cmr10-builtin-0 TeX cmr10-builtin 0) +%%BeginResource: CMap (TeX-cmr10-lm-rep-cmrm-0) +%%Title: (TeX-cmr10-lm-rep-cmrm-0 TeX cmr10-lm-rep-cmrm 0) %%Version: 1.000 %%EndComments /CIDInit /ProcSet findresource begin @@ -33103,24 +33312,23 @@ stream begincmap /CIDSystemInfo << /Registry (TeX) -/Ordering (cmr10-builtin) +/Ordering (cmr10-lm-rep-cmrm) /Supplement 0 >> def -/CMapName /TeX-cmr10-builtin-0 def +/CMapName /TeX-cmr10-lm-rep-cmrm-0 def /CMapType 2 def 1 begincodespacerange <00> endcodespacerange -7 beginbfrange +6 beginbfrange <07> <08> <03A5> <23> <26> <0023> <28> <3B> <0028> <3F> <5B> <003F> <61> <7A> <0061> <7B> <7C> <2013> - <03A5> endbfrange -78 beginbfchar +44 beginbfchar <00> <0393> <01> <2206> <02> <0398> @@ -33165,40 +33373,6 @@ endbfrange <7D> <02DD> <7E> <02DC> <7F> <00A8> - <0020> - <0393> - <2206> - <0398> - <039B> - <039E> - <03A0> - <03A3> - <03A8> - <00AD> - <00A0> - <2126> - <00660066> - <00660069> - <0066006C> - <006600660069> - <00660066006C> - <0131> - <0237> - <0060> - <00B4> - <02C7> - <02D8> - <00AF> - <02DA> - <00B8> - <00DF> - <00E6> - <0153> - <00F8> - <00C6> - <0152> - <00D8> - <00A8> endbfchar endcmap CMapName currentdict /CMap defineresource pop @@ -33209,7 +33383,7 @@ end endstream endobj -2239 0 obj +2242 0 obj << /Length 2050 >> @@ -33355,16 +33529,16 @@ end endstream endobj -2240 0 obj +2243 0 obj << -/Length 1543 +/Length 1114 >> stream %!PS-Adobe-3.0 Resource-CMap %%DocumentNeededResources: ProcSet (CIDInit) %%IncludeResource: ProcSet (CIDInit) -%%BeginResource: CMap (TeX-cmtt10-builtin-0) -%%Title: (TeX-cmtt10-builtin-0 TeX cmtt10-builtin 0) +%%BeginResource: CMap (TeX-cmtt10-lm-rep-cmtt-0) +%%Title: (TeX-cmtt10-lm-rep-cmtt-0 TeX cmtt10-lm-rep-cmtt 0) %%Version: 1.000 %%EndComments /CIDInit /ProcSet findresource begin @@ -33372,22 +33546,21 @@ stream begincmap /CIDSystemInfo << /Registry (TeX) -/Ordering (cmtt10-builtin) +/Ordering (cmtt10-lm-rep-cmtt) /Supplement 0 >> def -/CMapName /TeX-cmtt10-builtin-0 def +/CMapName /TeX-cmtt10-lm-rep-cmtt-0 def /CMapType 2 def 1 begincodespacerange <00> endcodespacerange -5 beginbfrange +4 beginbfrange <07> <08> <03A5> <21> <26> <0021> <28> <5F> <0028> <61> <7E> <0061> - <03A5> endbfrange -70 beginbfchar +34 beginbfchar <00> <0393> <01> <2206> <02> <0398> @@ -33422,42 +33595,6 @@ endbfrange <27> <2019> <60> <2018> <7F> <00A8> -<80> <2423> - <0020> - <0393> - <2206> - <0398> - <039B> - <039E> - <03A0> - <03A3> - <03A8> - <00AD> - <00A0> - <2126> - <2191> - <2193> - <0027> - <00A1> - <00BF> - <0131> - <0237> - <0060> - <00B4> - <02C7> - <02D8> - <00AF> - <02DA> - <00B8> - <00DF> - <00E6> - <0153> - <00F8> - <00C6> - <0152> - <00D8> - <2423> - <00A8> endbfchar endcmap CMapName currentdict /CMap defineresource pop @@ -33468,16 +33605,16 @@ end endstream endobj -2241 0 obj +2244 0 obj << -/Length 1538 +/Length 1109 >> stream %!PS-Adobe-3.0 Resource-CMap %%DocumentNeededResources: ProcSet (CIDInit) %%IncludeResource: ProcSet (CIDInit) -%%BeginResource: CMap (TeX-cmtt8-builtin-0) -%%Title: (TeX-cmtt8-builtin-0 TeX cmtt8-builtin 0) +%%BeginResource: CMap (TeX-cmtt8-lm-rep-cmtt-0) +%%Title: (TeX-cmtt8-lm-rep-cmtt-0 TeX cmtt8-lm-rep-cmtt 0) %%Version: 1.000 %%EndComments /CIDInit /ProcSet findresource begin @@ -33485,22 +33622,21 @@ stream begincmap /CIDSystemInfo << /Registry (TeX) -/Ordering (cmtt8-builtin) +/Ordering (cmtt8-lm-rep-cmtt) /Supplement 0 >> def -/CMapName /TeX-cmtt8-builtin-0 def +/CMapName /TeX-cmtt8-lm-rep-cmtt-0 def /CMapType 2 def 1 begincodespacerange <00> endcodespacerange -5 beginbfrange +4 beginbfrange <07> <08> <03A5> <21> <26> <0021> <28> <5F> <0028> <61> <7E> <0061> - <03A5> endbfrange -70 beginbfchar +34 beginbfchar <00> <0393> <01> <2206> <02> <0398> @@ -33535,42 +33671,6 @@ endbfrange <27> <2019> <60> <2018> <7F> <00A8> -<80> <2423> - <0020> - <0393> - <2206> - <0398> - <039B> - <039E> - <03A0> - <03A3> - <03A8> - <00AD> - <00A0> - <2126> - <2191> - <2193> - <0027> - <00A1> - <00BF> - <0131> - <0237> - <0060> - <00B4> - <02C7> - <02D8> - <00AF> - <02DA> - <00B8> - <00DF> - <00E6> - <0153> - <00F8> - <00C6> - <0152> - <00D8> - <2423> - <00A8> endbfchar endcmap CMapName currentdict /CMap defineresource pop @@ -33581,16 +33681,16 @@ end endstream endobj -2242 0 obj +2245 0 obj << -/Length 1538 +/Length 1109 >> stream %!PS-Adobe-3.0 Resource-CMap %%DocumentNeededResources: ProcSet (CIDInit) %%IncludeResource: ProcSet (CIDInit) -%%BeginResource: CMap (TeX-cmtt9-builtin-0) -%%Title: (TeX-cmtt9-builtin-0 TeX cmtt9-builtin 0) +%%BeginResource: CMap (TeX-cmtt9-lm-rep-cmtt-0) +%%Title: (TeX-cmtt9-lm-rep-cmtt-0 TeX cmtt9-lm-rep-cmtt 0) %%Version: 1.000 %%EndComments /CIDInit /ProcSet findresource begin @@ -33598,22 +33698,21 @@ stream begincmap /CIDSystemInfo << /Registry (TeX) -/Ordering (cmtt9-builtin) +/Ordering (cmtt9-lm-rep-cmtt) /Supplement 0 >> def -/CMapName /TeX-cmtt9-builtin-0 def +/CMapName /TeX-cmtt9-lm-rep-cmtt-0 def /CMapType 2 def 1 begincodespacerange <00> endcodespacerange -5 beginbfrange +4 beginbfrange <07> <08> <03A5> <21> <26> <0021> <28> <5F> <0028> <61> <7E> <0061> - <03A5> endbfrange -70 beginbfchar +34 beginbfchar <00> <0393> <01> <2206> <02> <0398> @@ -33648,42 +33747,6 @@ endbfrange <27> <2019> <60> <2018> <7F> <00A8> -<80> <2423> - <0020> - <0393> - <2206> - <0398> - <039B> - <039E> - <03A0> - <03A3> - <03A8> - <00AD> - <00A0> - <2126> - <2191> - <2193> - <0027> - <00A1> - <00BF> - <0131> - <0237> - <0060> - <00B4> - <02C7> - <02D8> - <00AF> - <02DA> - <00B8> - <00DF> - <00E6> - <0153> - <00F8> - <00C6> - <0152> - <00D8> - <2423> - <00A8> endbfchar endcmap CMapName currentdict /CMap defineresource pop @@ -33694,7 +33757,7 @@ end endstream endobj -2243 0 obj +2246 0 obj << /Length 853 >> @@ -33752,7 +33815,7 @@ end endstream endobj -2244 0 obj +2247 0 obj << /Length 1113 >> @@ -33830,7 +33893,7 @@ end endstream endobj -2245 0 obj +2248 0 obj << /Length 1477 >> @@ -33933,7 +33996,7 @@ end endstream endobj -2246 0 obj +2249 0 obj << /Length 1477 >> @@ -34036,7 +34099,7 @@ end endstream endobj -2247 0 obj +2250 0 obj << /Length 1482 >> @@ -34143,8 +34206,8 @@ endobj << /Type /ObjStm /N 100 -/First 974 -/Length 16950 +/First 973 +/Length 17544 >> stream 606 0 2163 57 2170 151 2172 269 610 328 614 386 618 444 622 502 626 560 630 618 @@ -34152,11 +34215,11 @@ stream 2178 1384 2180 1502 2177 1561 2182 1629 2184 1747 2185 1805 2186 1863 931 1921 930 1977 890 2034 891 2091 906 2148 887 2205 888 2262 2187 2319 883 2377 2188 2434 1046 2492 2181 2550 2191 2644 2193 2762 918 2821 889 2879 886 2937 882 2995 2058 3053 885 3112 2194 3170 884 3229 2042 3287 -2043 3345 2190 3404 2195 3498 2196 3518 2197 3889 2198 3992 2199 4151 2200 4174 2201 4629 2202 4758 -2203 5056 2204 5702 2206 6173 2207 6804 2208 7275 2210 7850 2212 8075 2214 8407 2216 8651 2218 8922 -2220 9270 2222 9782 2224 10014 2226 10460 2228 10686 2230 10917 2232 11396 2234 11972 2205 12401 1851 12842 -1782 13005 1460 13168 915 13329 914 13488 913 13648 970 13809 1016 13969 1254 14130 1127 14295 665 14465 -667 14655 666 14845 668 15035 881 15148 971 15261 1028 15376 1062 15496 1092 15616 1134 15736 1180 15856 +2043 3345 2190 3404 2195 3498 2197 3518 2198 3889 2199 3992 2200 4151 2202 4174 2203 4629 2205 4758 +2206 5056 2207 5702 2209 6173 2210 6804 2211 7275 2213 7850 2215 8075 2217 8319 2219 8667 2221 8893 +2223 9124 2225 9409 2227 9934 2229 10179 2231 10638 2233 10981 2235 11460 2237 12036 2208 12465 2196 12906 +2204 13124 2201 13246 1851 13652 1782 13815 1460 14005 915 14166 914 14356 913 14516 970 14706 1016 14895 +1254 15085 1127 15250 665 15420 667 15610 666 15800 668 15990 881 16103 971 16216 1028 16331 1062 16451 % 606 0 obj << /D [2164 0 R /XYZ 99.895 284.171 null] @@ -34393,31 +34456,31 @@ stream >> % 2195 0 obj [1000] -% 2196 0 obj -[525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525] % 2197 0 obj -[277.8 277.8 500 500 500 500 500 500 500 500 500 500 500 500 277.8 277.8 777.8 500 777.8] +[525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525] % 2198 0 obj -[853 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 666 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 747 0 0 0 0 0 0 0 0 0 0 0 0 0 0 881 0 0 0 0 0 0 0 0 0 0 0 0 234 0 881 767] +[277.8 277.8 500 500 500 500 500 500 500 500 500 500 500 500 277.8 277.8 777.8 500 777.8] % 2199 0 obj -[528 542] +[853 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 666 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 747 0 0 0 0 0 0 0 0 0 0 0 0 0 0 881 0 0 0 0 0 0 0 0 0 0 0 0 234 0 881 767] % 2200 0 obj +[528 542] +% 2202 0 obj [525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525] -% 2201 0 obj +% 2203 0 obj [531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3] -% 2202 0 obj +% 2205 0 obj [388.9 388.9 500 777.8 277.8 333.3 277.8 500 500 500 500 500 500 500 500 500 500 500 277.8 277.8 277.8 777.8 472.2 472.2 777.8 750 708.3 722.2 763.9 680.6 652.8 784.7 750 361.1 513.9 777.8 625 916.7 750 777.8 680.6 777.8 736.1 555.6 722.2 750 750 1027.8 750 750 611.1 277.8 500 277.8] -% 2203 0 obj +% 2206 0 obj [777.8 277.8 777.8 500 777.8 500 777.8 777.8 777.8 777.8 777.8 777.8 777.8 1000 500 500 777.8 777.8 777.8 777.8 777.8 777.8 777.8 777.8 777.8 777.8 777.8 777.8 1000 1000 777.8 777.8 1000 1000 500 500 1000 1000 1000 777.8 1000 1000 611.1 611.1 1000 1000 1000 777.8 275 1000 666.7 666.7 888.9 888.9 0 0 555.6 555.6 666.7 500 722.2 722.2 777.8 777.8 611.1 798.5 656.8 526.5 771.4 527.8 718.7 594.9 844.5 544.5 677.8 761.9 689.7 1200.9 820.5 796.1 695.6 816.7 847.5 605.6 544.6 625.8 612.8 987.8 713.3 668.3 724.7 666.7 666.7 666.7 666.7 666.7 611.1 611.1 444.4 444.4 444.4 444.4 500 500 388.9 388.9 277.8 500 500 611.1 500 277.8 833.3] -% 2204 0 obj +% 2207 0 obj [525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525] -% 2206 0 obj +% 2209 0 obj [605 608 167 380 611 291 313 333 0 333 606 0 667 500 333 287 0 0 0 0 0 0 0 0 0 0 0 0 333 208 250 278 371 500 500 840 778 278 333 333 389 606 250 333 250 606 500 500 500 500 500 500 500 500 500 500 250 250 606 606 606 444 747 778 611 709 774 611 556 763 832 337 333 726 611 946 831 786 604 786 668 525 613 778 722 1000 667 667 667 333 606 333 606 500 278 500 553 444 611 479 333 556 582 291 234 556 291 883 582 546 601 560 395 424 326 603 565 834 516 556 500 333 606 333 606 0 0 0 278 500 500 1000 500 500 333 1144 525 331 998 0 0 0 0 0 0 500 500 606 500 1000 333 979 424 331 827 0 0 667 0 278 500 500 500 500 606 500] -% 2207 0 obj +% 2210 0 obj [528 545 167 333 556 278 333 333 0 333 606 0 667 444 333 278 0 0 0 0 0 0 0 0 0 0 0 0 333 333 250 333 500 500 500 889 778 278 333 333 389 606 250 333 250 296 500 500 500 500 500 500 500 500 500 500 250 250 606 606 606 500 747 722 611 667 778 611 556 722 778 333 333 667 556 944 778 778 611 778 667 556 611 778 722 944 722 667 667 333 606 333 606 500 278 444 463 407 500 389 278 500 500 278 278 444 278 778 556 444 500 463 389 389 333 556 500 722 500 500 444] -% 2208 0 obj +% 2211 0 obj [611 611 167 333 611 333 333 333 0 333 606 0 667 500 333 333 0 0 0 0 0 0 0 0 0 0 0 0 333 227 250 278 402 500 500 889 833 278 333 333 444 606 250 333 250 296 500 500 500 500 500 500 500 500 500 500 250 250 606 606 606 444 747 778 667 722 833 611 556 833 833 389 389 778 611 1000 833 833 611 833 722 611 667 778 778 1000 667 667 667 333 606 333 606 500 278 500 611 444 611 500 389 556 611 333 333 611 333 889 611 556 611 611 389 444 333 611 556 833 500 556 500 310 606 310 606 0 0 0 333 500 500 1000 500 500 333 1000 611 389 1000 0 0 0 0 0 0 500 500 606 500 1000] -% 2210 0 obj +% 2213 0 obj << /Type /FontDescriptor /FontName /MNPEHI+CMEX10 @@ -34430,24 +34493,9 @@ stream /StemV 47 /XHeight 431 /CharSet (/radicalbigg) -/FontFile 2209 0 R ->> -% 2212 0 obj -<< -/Type /FontDescriptor -/FontName /SFGIZH+CMITT10 -/Flags 4 -/FontBBox [11 -233 669 696] -/Ascent 611 -/CapHeight 611 -/Descent -222 -/ItalicAngle -14 -/StemV 69 -/XHeight 431 -/CharSet (/A/C/D/E/H/I/K/L/M/P/T/V/a/c/comma/d/e/exclam/f/g/h/hyphen/i/k/m/n/o/p/parenleft/parenright/period/r/s/slash/t/w/x/y) -/FontFile 2211 0 R +/FontFile 2212 0 R >> -% 2214 0 obj +% 2215 0 obj << /Type /FontDescriptor /FontName /TPELEW+CMMI10 @@ -34460,44 +34508,74 @@ stream /StemV 72 /XHeight 431 /CharSet (/arrowhookleft/greater/less) -/FontFile 2213 0 R +/FontFile 2214 0 R >> -% 2216 0 obj +% 2217 0 obj << /Type /FontDescriptor -/FontName /SOSTRQ+CMR10 +/FontName /VKSUEJ+CMSY10 /Flags 4 -/FontBBox [-40 -250 1009 750] -/Ascent 694 +/FontBBox [-29 -960 1116 775] +/Ascent 750 /CapHeight 683 /Descent -194 -/ItalicAngle 0 -/StemV 69 +/ItalicAngle -14 +/StemV 40 /XHeight 431 -/CharSet (/bracketleft/bracketright/equal/parenleft/parenright/plus) -/FontFile 2215 0 R +/CharSet (/B/H/I/arrowleft/arrowright/asteriskmath/bar/bardbl/braceleft/braceright/element/greaterequal/lessequal/minus/negationslash/radical) +/FontFile 2216 0 R >> -% 2218 0 obj +% 2219 0 obj << /Type /FontDescriptor -/FontName /VKSUEJ+CMSY10 +/FontName /IKXQUG+PazoMath /Flags 4 -/FontBBox [-29 -960 1116 775] -/Ascent 750 +/FontBBox [-40 -283 878 946] +/Ascent 0 +/CapHeight 0 +/Descent 0 +/ItalicAngle 0 +/StemV 95 +/XHeight 0 +/CharSet (/infinity/summation) +/FontFile 2218 0 R +>> +% 2221 0 obj +<< +/Type /FontDescriptor +/FontName /DUJUUF+PazoMath-Italic +/Flags 4 +/FontBBox [-70 -277 902 733] +/Ascent 482 +/CapHeight 0 +/Descent -276 +/ItalicAngle -9 +/StemV 65 +/XHeight 0 +/CharSet (/alpha/beta) +/FontFile 2220 0 R +>> +% 2223 0 obj +<< +/Type /FontDescriptor +/FontName /NCCVYE+LMRoman10-Regular +/Flags 4 +/FontBBox [-430 -290 1417 1127] +/Ascent 694 /CapHeight 683 /Descent -194 -/ItalicAngle -14 -/StemV 40 +/ItalicAngle 0 +/StemV 69 /XHeight 431 -/CharSet (/B/H/I/arrowleft/arrowright/asteriskmath/bar/bardbl/braceleft/braceright/element/greaterequal/lessequal/minus/negationslash/radical) -/FontFile 2217 0 R +/CharSet (/bracketleft/bracketright/equal/parenleft/parenright/plus) +/FontFile 2222 0 R >> -% 2220 0 obj +% 2225 0 obj << /Type /FontDescriptor -/FontName /QGKXNM+CMTT10 +/FontName /XKYJEW+LMMono10-Regular /Flags 4 -/FontBBox [-4 -233 537 696] +/FontBBox [-451 -316 731 1016] /Ascent 611 /CapHeight 611 /Descent -222 @@ -34505,14 +34583,14 @@ stream /StemV 69 /XHeight 431 /CharSet (/A/B/C/D/E/F/H/I/J/K/L/M/N/O/P/R/S/T/U/V/W/X/Y/Z/a/ampersand/asciitilde/asterisk/b/backslash/bracketleft/bracketright/c/colon/comma/d/e/eight/equal/f/four/g/h/hyphen/i/j/k/l/m/n/nine/o/one/p/parenleft/parenright/percent/period/plus/q/quotesingle/r/s/six/slash/t/three/two/u/underscore/v/w/x/y/z/zero) -/FontFile 2219 0 R +/FontFile 2224 0 R >> -% 2222 0 obj +% 2227 0 obj << /Type /FontDescriptor -/FontName /HZGQIC+CMTT8 +/FontName /XHVBMR+LMMono8-Regular /Flags 4 -/FontBBox [-5 -232 545 699] +/FontBBox [-456 -320 743 1014] /Ascent 611 /CapHeight 611 /Descent -222 @@ -34520,14 +34598,14 @@ stream /StemV 76 /XHeight 431 /CharSet (/b/c/e/i/l/n/p/r/s/t) -/FontFile 2221 0 R +/FontFile 2226 0 R >> -% 2224 0 obj +% 2229 0 obj << /Type /FontDescriptor -/FontName /RQJPKO+CMTT9 +/FontName /TWMFXI+LMMono9-Regular /Flags 4 -/FontBBox [-6 -233 542 698] +/FontBBox [-451 -318 734 1016] /Ascent 611 /CapHeight 611 /Descent -222 @@ -34535,39 +34613,24 @@ stream /StemV 74 /XHeight 431 /CharSet (/D/E/I/K/N/P/S/T/Y/a/ampersand/asterisk/b/c/colon/comma/d/e/equal/f/four/g/greater/h/hyphen/i/j/k/l/less/m/n/nine/o/one/p/parenleft/parenright/percent/period/plus/q/quotesingle/r/s/semicolon/six/slash/t/two/u/underscore/v/w/x/y/z/zero) -/FontFile 2223 0 R ->> -% 2226 0 obj -<< -/Type /FontDescriptor -/FontName /IKXQUG+PazoMath -/Flags 4 -/FontBBox [-40 -283 878 946] -/Ascent 0 -/CapHeight 0 -/Descent 0 -/ItalicAngle 0 -/StemV 95 -/XHeight 0 -/CharSet (/infinity/summation) -/FontFile 2225 0 R +/FontFile 2228 0 R >> -% 2228 0 obj +% 2231 0 obj << /Type /FontDescriptor -/FontName /DUJUUF+PazoMath-Italic +/FontName /XVBOSG+LMMono10-Italic /Flags 4 -/FontBBox [-70 -277 902 733] -/Ascent 482 -/CapHeight 0 -/Descent -276 -/ItalicAngle -9 -/StemV 65 -/XHeight 0 -/CharSet (/alpha/beta) -/FontFile 2227 0 R +/FontBBox [-491 -316 834 1016] +/Ascent 611 +/CapHeight 611 +/Descent -222 +/ItalicAngle -14 +/StemV 69 +/XHeight 431 +/CharSet (/A/C/D/E/H/I/K/L/M/P/T/V/a/c/comma/d/e/exclam/f/g/h/hyphen/i/k/m/n/o/p/parenleft/parenright/period/r/s/slash/t/w/x/y) +/FontFile 2230 0 R >> -% 2230 0 obj +% 2233 0 obj << /Type /FontDescriptor /FontName /BDDEWM+URWPalladioL-Bold @@ -34580,9 +34643,9 @@ stream /StemV 123 /XHeight 471 /CharSet (/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/Y/Z/a/b/c/colon/comma/d/e/eight/emdash/endash/equal/f/fi/five/fl/four/g/h/hyphen/i/j/k/l/m/n/nine/o/one/p/parenleft/parenright/period/q/question/quoteright/r/s/seven/six/slash/t/three/two/u/v/w/x/y/z/zero) -/FontFile 2229 0 R +/FontFile 2232 0 R >> -% 2232 0 obj +% 2235 0 obj << /Type /FontDescriptor /FontName /GLTUCO+URWPalladioL-Roma @@ -34595,9 +34658,9 @@ stream /StemV 84 /XHeight 469 /CharSet (/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/R/S/T/U/V/W/X/Y/Z/a/ampersand/asterisk/b/bracketleft/bracketright/bullet/c/colon/comma/d/e/eight/emdash/endash/equal/f/fi/five/fl/four/g/grave/h/hyphen/i/j/k/l/m/n/nine/o/one/p/parenleft/parenright/period/plus/q/quotedblleft/quotedblright/quoteright/r/s/section/semicolon/seven/six/slash/t/three/two/u/v/w/x/y/z/zero) -/FontFile 2231 0 R +/FontFile 2234 0 R >> -% 2234 0 obj +% 2237 0 obj << /Type /FontDescriptor /FontName /LHHPET+URWPalladioL-Ital @@ -34610,446 +34673,466 @@ stream /StemV 78 /XHeight 482 /CharSet (/A/B/C/D/E/F/G/H/I/K/L/M/N/O/P/Q/R/S/T/U/V/X/a/b/bracketleft/bracketright/c/colon/comma/d/e/f/fi/five/g/h/hyphen/i/j/k/l/m/n/nine/o/one/p/period/plus/q/quoteright/r/s/slash/t/three/two/u/v/w/x/y/z/zero) -/FontFile 2233 0 R +/FontFile 2236 0 R >> -% 2205 0 obj +% 2208 0 obj << /Type /Encoding /Differences [2/fi/fl 30/grave 38/ampersand/quoteright/parenleft/parenright/asterisk/plus/comma/hyphen/period/slash/zero/one/two/three/four/five/six/seven/eight/nine/colon/semicolon 61/equal 63/question 65/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/X/Y/Z/bracketleft 93/bracketright 97/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z 147/quotedblleft/quotedblright/bullet/endash/emdash 167/section] >> +% 2196 0 obj +<< +/Type /Encoding +/Differences [33/exclam 40/parenleft/parenright 44/comma/hyphen/period/slash 65/A 67/C/D/E 72/H/I 75/K/L/M 80/P 84/T 86/V 97/a 99/c/d/e/f/g/h/i 107/k 109/m/n/o/p 114/r/s/t 119/w/x/y] +>> +% 2204 0 obj +<< +/Type /Encoding +/Differences [40/parenleft/parenright 43/plus 61/equal 91/bracketleft 93/bracketright] +>> +% 2201 0 obj +<< +/Type /Encoding +/Differences [13/quotesingle 37/percent/ampersand 40/parenleft/parenright/asterisk/plus/comma/hyphen/period/slash/zero/one/two/three/four 54/six 56/eight/nine/colon/semicolon/less/equal/greater 65/A/B/C/D/E/F 72/H/I/J/K/L/M/N/O/P 82/R/S/T/U/V/W/X/Y/Z/bracketleft/backslash/bracketright 95/underscore 97/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z 126/asciitilde] +>> % 1851 0 obj << /Type /Font /Subtype /Type1 /BaseFont /MNPEHI+CMEX10 -/FontDescriptor 2210 0 R +/FontDescriptor 2213 0 R /FirstChar 114 /LastChar 114 /Widths 2195 0 R -/ToUnicode 2235 0 R +/ToUnicode 2238 0 R >> % 1782 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /SFGIZH+CMITT10 -/FontDescriptor 2212 0 R +/BaseFont /XVBOSG+LMMono10-Italic +/FontDescriptor 2231 0 R /FirstChar 33 /LastChar 121 -/Widths 2196 0 R -/ToUnicode 2236 0 R +/Widths 2197 0 R +/Encoding 2196 0 R +/ToUnicode 2239 0 R >> % 1460 0 obj << /Type /Font /Subtype /Type1 /BaseFont /TPELEW+CMMI10 -/FontDescriptor 2214 0 R +/FontDescriptor 2215 0 R /FirstChar 44 /LastChar 62 -/Widths 2197 0 R -/ToUnicode 2237 0 R +/Widths 2198 0 R +/ToUnicode 2240 0 R >> % 915 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /SOSTRQ+CMR10 -/FontDescriptor 2216 0 R +/BaseFont /NCCVYE+LMRoman10-Regular +/FontDescriptor 2223 0 R /FirstChar 40 /LastChar 93 -/Widths 2202 0 R -/ToUnicode 2238 0 R +/Widths 2205 0 R +/Encoding 2204 0 R +/ToUnicode 2241 0 R >> % 914 0 obj << /Type /Font /Subtype /Type1 /BaseFont /VKSUEJ+CMSY10 -/FontDescriptor 2218 0 R +/FontDescriptor 2217 0 R /FirstChar 0 /LastChar 112 -/Widths 2203 0 R -/ToUnicode 2239 0 R +/Widths 2206 0 R +/ToUnicode 2242 0 R >> % 913 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /QGKXNM+CMTT10 -/FontDescriptor 2220 0 R +/BaseFont /XKYJEW+LMMono10-Regular +/FontDescriptor 2225 0 R /FirstChar 13 /LastChar 126 -/Widths 2204 0 R -/ToUnicode 2240 0 R +/Widths 2207 0 R +/Encoding 2201 0 R +/ToUnicode 2243 0 R >> % 970 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /HZGQIC+CMTT8 -/FontDescriptor 2222 0 R +/BaseFont /XHVBMR+LMMono8-Regular +/FontDescriptor 2227 0 R /FirstChar 98 /LastChar 116 -/Widths 2201 0 R -/ToUnicode 2241 0 R +/Widths 2203 0 R +/Encoding 2201 0 R +/ToUnicode 2244 0 R >> % 1016 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /RQJPKO+CMTT9 -/FontDescriptor 2224 0 R +/BaseFont /TWMFXI+LMMono9-Regular +/FontDescriptor 2229 0 R /FirstChar 13 /LastChar 122 -/Widths 2200 0 R -/ToUnicode 2242 0 R +/Widths 2202 0 R +/Encoding 2201 0 R +/ToUnicode 2245 0 R >> % 1254 0 obj << /Type /Font /Subtype /Type1 /BaseFont /IKXQUG+PazoMath -/FontDescriptor 2226 0 R +/FontDescriptor 2219 0 R /FirstChar 165 /LastChar 229 -/Widths 2198 0 R -/ToUnicode 2243 0 R +/Widths 2199 0 R +/ToUnicode 2246 0 R >> % 1127 0 obj << /Type /Font /Subtype /Type1 /BaseFont /DUJUUF+PazoMath-Italic -/FontDescriptor 2228 0 R +/FontDescriptor 2221 0 R /FirstChar 97 /LastChar 98 -/Widths 2199 0 R -/ToUnicode 2244 0 R +/Widths 2200 0 R +/ToUnicode 2247 0 R >> % 665 0 obj << /Type /Font /Subtype /Type1 /BaseFont /BDDEWM+URWPalladioL-Bold -/FontDescriptor 2230 0 R +/FontDescriptor 2233 0 R /FirstChar 2 /LastChar 151 -/Widths 2208 0 R -/Encoding 2205 0 R -/ToUnicode 2245 0 R +/Widths 2211 0 R +/Encoding 2208 0 R +/ToUnicode 2248 0 R >> % 667 0 obj << /Type /Font /Subtype /Type1 /BaseFont /GLTUCO+URWPalladioL-Roma -/FontDescriptor 2232 0 R +/FontDescriptor 2235 0 R /FirstChar 2 /LastChar 167 -/Widths 2206 0 R -/Encoding 2205 0 R -/ToUnicode 2246 0 R +/Widths 2209 0 R +/Encoding 2208 0 R +/ToUnicode 2249 0 R >> % 666 0 obj << /Type /Font /Subtype /Type1 /BaseFont /LHHPET+URWPalladioL-Ital -/FontDescriptor 2234 0 R +/FontDescriptor 2237 0 R /FirstChar 2 /LastChar 122 -/Widths 2207 0 R -/Encoding 2205 0 R -/ToUnicode 2247 0 R +/Widths 2210 0 R +/Encoding 2208 0 R +/ToUnicode 2250 0 R >> % 668 0 obj << /Type /Pages /Count 6 -/Parent 2248 0 R +/Parent 2251 0 R /Kids [658 0 R 671 0 R 717 0 R 774 0 R 821 0 R 861 0 R] >> % 881 0 obj << /Type /Pages /Count 6 -/Parent 2248 0 R +/Parent 2251 0 R /Kids [879 0 R 898 0 R 910 0 R 923 0 R 935 0 R 940 0 R] >> % 971 0 obj << /Type /Pages /Count 6 -/Parent 2248 0 R +/Parent 2251 0 R /Kids [953 0 R 975 0 R 986 0 R 994 0 R 1005 0 R 1021 0 R] >> % 1028 0 obj << /Type /Pages /Count 6 -/Parent 2248 0 R +/Parent 2251 0 R /Kids [1025 0 R 1030 0 R 1035 0 R 1042 0 R 1049 0 R 1054 0 R] >> % 1062 0 obj << /Type /Pages /Count 6 -/Parent 2248 0 R +/Parent 2251 0 R /Kids [1059 0 R 1065 0 R 1069 0 R 1073 0 R 1077 0 R 1083 0 R] >> + +endstream +endobj +2252 0 obj +<< +/Type /ObjStm +/N 100 +/First 923 +/Length 10703 +>> +stream +1092 0 1134 120 1180 240 1222 360 1281 480 1333 600 1382 720 1431 840 1471 960 1510 1080 +1557 1200 1600 1320 1638 1440 1674 1560 1713 1680 1754 1800 1783 1920 1817 2040 1855 2160 1890 2280 +1928 2400 1966 2520 2002 2640 2057 2760 2158 2880 2189 3000 2251 3084 2253 3202 2254 3323 2255 3444 +2256 3565 2257 3686 2258 3761 2259 3866 655 3935 651 4009 647 4097 643 4185 639 4273 635 4361 +631 4449 627 4537 623 4625 619 4713 615 4801 611 4889 607 4977 602 5065 598 5139 594 5251 +590 5325 586 5413 582 5501 578 5575 574 5700 570 5760 566 5885 562 5959 558 6047 554 6135 +550 6223 546 6311 542 6385 538 6510 534 6584 530 6672 526 6760 522 6848 518 6936 514 7010 +510 7135 506 7209 502 7297 498 7385 494 7459 490 7584 486 7658 482 7746 478 7834 474 7922 +470 8010 466 8098 462 8186 458 8274 454 8362 450 8450 446 8538 442 8626 438 8714 434 8802 +430 8890 426 8978 422 9052 418 9178 414 9252 410 9340 406 9428 401 9516 397 9604 393 9692 % 1092 0 obj << /Type /Pages /Count 6 -/Parent 2248 0 R +/Parent 2251 0 R /Kids [1089 0 R 1096 0 R 1103 0 R 1109 0 R 1113 0 R 1124 0 R] >> % 1134 0 obj << /Type /Pages /Count 6 -/Parent 2249 0 R +/Parent 2253 0 R /Kids [1131 0 R 1141 0 R 1147 0 R 1158 0 R 1164 0 R 1171 0 R] >> % 1180 0 obj << /Type /Pages /Count 6 -/Parent 2249 0 R +/Parent 2253 0 R /Kids [1176 0 R 1185 0 R 1193 0 R 1198 0 R 1206 0 R 1211 0 R] >> - -endstream -endobj -2250 0 obj -<< -/Type /ObjStm -/N 100 -/First 920 -/Length 10604 ->> -stream -1222 0 1281 120 1333 240 1382 360 1431 480 1471 600 1510 720 1557 840 1600 960 1638 1080 -1674 1200 1713 1320 1754 1440 1783 1560 1817 1680 1855 1800 1890 1920 1928 2040 1966 2160 2002 2280 -2057 2400 2158 2520 2189 2640 2248 2724 2249 2842 2251 2963 2252 3084 2253 3205 2254 3326 2255 3401 -2256 3506 655 3575 651 3649 647 3737 643 3825 639 3913 635 4001 631 4089 627 4177 623 4265 -619 4353 615 4441 611 4529 607 4617 602 4705 598 4779 594 4891 590 4965 586 5053 582 5141 -578 5215 574 5340 570 5400 566 5525 562 5599 558 5687 554 5775 550 5863 546 5951 542 6025 -538 6150 534 6224 530 6312 526 6400 522 6488 518 6576 514 6650 510 6775 506 6849 502 6937 -498 7025 494 7099 490 7224 486 7298 482 7386 478 7474 474 7562 470 7650 466 7738 462 7826 -458 7914 454 8002 450 8090 446 8178 442 8266 438 8354 434 8442 430 8530 426 8618 422 8692 -418 8818 414 8892 410 8980 406 9068 401 9156 397 9244 393 9332 389 9420 385 9508 381 9596 % 1222 0 obj << /Type /Pages /Count 6 -/Parent 2249 0 R +/Parent 2253 0 R /Kids [1218 0 R 1224 0 R 1237 0 R 1244 0 R 1251 0 R 1262 0 R] >> % 1281 0 obj << /Type /Pages /Count 6 -/Parent 2249 0 R +/Parent 2253 0 R /Kids [1278 0 R 1285 0 R 1296 0 R 1302 0 R 1313 0 R 1318 0 R] >> % 1333 0 obj << /Type /Pages /Count 6 -/Parent 2249 0 R +/Parent 2253 0 R /Kids [1329 0 R 1335 0 R 1344 0 R 1350 0 R 1358 0 R 1365 0 R] >> % 1382 0 obj << /Type /Pages /Count 6 -/Parent 2249 0 R +/Parent 2253 0 R /Kids [1379 0 R 1387 0 R 1396 0 R 1404 0 R 1408 0 R 1423 0 R] >> % 1431 0 obj << /Type /Pages /Count 6 -/Parent 2251 0 R +/Parent 2254 0 R /Kids [1428 0 R 1435 0 R 1442 0 R 1446 0 R 1451 0 R 1457 0 R] >> % 1471 0 obj << /Type /Pages /Count 6 -/Parent 2251 0 R +/Parent 2254 0 R /Kids [1463 0 R 1474 0 R 1479 0 R 1488 0 R 1496 0 R 1501 0 R] >> % 1510 0 obj << /Type /Pages /Count 6 -/Parent 2251 0 R +/Parent 2254 0 R /Kids [1507 0 R 1512 0 R 1520 0 R 1525 0 R 1533 0 R 1539 0 R] >> % 1557 0 obj << /Type /Pages /Count 6 -/Parent 2251 0 R +/Parent 2254 0 R /Kids [1548 0 R 1562 0 R 1566 0 R 1579 0 R 1585 0 R 1592 0 R] >> % 1600 0 obj << /Type /Pages /Count 6 -/Parent 2251 0 R +/Parent 2254 0 R /Kids [1596 0 R 1604 0 R 1609 0 R 1618 0 R 1626 0 R 1630 0 R] >> % 1638 0 obj << /Type /Pages /Count 6 -/Parent 2251 0 R +/Parent 2254 0 R /Kids [1635 0 R 1640 0 R 1647 0 R 1652 0 R 1658 0 R 1664 0 R] >> % 1674 0 obj << /Type /Pages /Count 6 -/Parent 2252 0 R +/Parent 2255 0 R /Kids [1670 0 R 1677 0 R 1684 0 R 1691 0 R 1695 0 R 1705 0 R] >> % 1713 0 obj << /Type /Pages /Count 6 -/Parent 2252 0 R +/Parent 2255 0 R /Kids [1710 0 R 1715 0 R 1728 0 R 1732 0 R 1738 0 R 1744 0 R] >> % 1754 0 obj << /Type /Pages /Count 6 -/Parent 2252 0 R +/Parent 2255 0 R /Kids [1751 0 R 1756 0 R 1760 0 R 1764 0 R 1768 0 R 1772 0 R] >> % 1783 0 obj << /Type /Pages /Count 6 -/Parent 2252 0 R +/Parent 2255 0 R /Kids [1776 0 R 1785 0 R 1789 0 R 1796 0 R 1800 0 R 1807 0 R] >> % 1817 0 obj << /Type /Pages /Count 6 -/Parent 2252 0 R +/Parent 2255 0 R /Kids [1811 0 R 1819 0 R 1824 0 R 1831 0 R 1835 0 R 1842 0 R] >> % 1855 0 obj << /Type /Pages /Count 6 -/Parent 2252 0 R +/Parent 2255 0 R /Kids [1846 0 R 1857 0 R 1862 0 R 1869 0 R 1875 0 R 1879 0 R] >> % 1890 0 obj << /Type /Pages /Count 6 -/Parent 2253 0 R +/Parent 2256 0 R /Kids [1885 0 R 1892 0 R 1898 0 R 1904 0 R 1909 0 R 1916 0 R] >> % 1928 0 obj << /Type /Pages /Count 6 -/Parent 2253 0 R +/Parent 2256 0 R /Kids [1923 0 R 1932 0 R 1939 0 R 1946 0 R 1952 0 R 1956 0 R] >> % 1966 0 obj << /Type /Pages /Count 6 -/Parent 2253 0 R +/Parent 2256 0 R /Kids [1962 0 R 1972 0 R 1976 0 R 1984 0 R 1989 0 R 1993 0 R] >> % 2002 0 obj << /Type /Pages /Count 6 -/Parent 2253 0 R +/Parent 2256 0 R /Kids [1999 0 R 2004 0 R 2011 0 R 2022 0 R 2027 0 R 2037 0 R] >> % 2057 0 obj << /Type /Pages /Count 6 -/Parent 2253 0 R +/Parent 2256 0 R /Kids [2052 0 R 2062 0 R 2073 0 R 2101 0 R 2121 0 R 2139 0 R] >> % 2158 0 obj << /Type /Pages /Count 6 -/Parent 2253 0 R +/Parent 2256 0 R /Kids [2155 0 R 2160 0 R 2164 0 R 2170 0 R 2174 0 R 2178 0 R] >> % 2189 0 obj << /Type /Pages /Count 2 -/Parent 2254 0 R +/Parent 2257 0 R /Kids [2182 0 R 2191 0 R] >> -% 2248 0 obj +% 2251 0 obj << /Type /Pages /Count 36 -/Parent 2255 0 R +/Parent 2258 0 R /Kids [668 0 R 881 0 R 971 0 R 1028 0 R 1062 0 R 1092 0 R] >> -% 2249 0 obj +% 2253 0 obj << /Type /Pages /Count 36 -/Parent 2255 0 R +/Parent 2258 0 R /Kids [1134 0 R 1180 0 R 1222 0 R 1281 0 R 1333 0 R 1382 0 R] >> -% 2251 0 obj +% 2254 0 obj << /Type /Pages /Count 36 -/Parent 2255 0 R +/Parent 2258 0 R /Kids [1431 0 R 1471 0 R 1510 0 R 1557 0 R 1600 0 R 1638 0 R] >> -% 2252 0 obj +% 2255 0 obj << /Type /Pages /Count 36 -/Parent 2255 0 R +/Parent 2258 0 R /Kids [1674 0 R 1713 0 R 1754 0 R 1783 0 R 1817 0 R 1855 0 R] >> -% 2253 0 obj +% 2256 0 obj << /Type /Pages /Count 36 -/Parent 2255 0 R +/Parent 2258 0 R /Kids [1890 0 R 1928 0 R 1966 0 R 2002 0 R 2057 0 R 2158 0 R] >> -% 2254 0 obj +% 2257 0 obj << /Type /Pages /Count 2 -/Parent 2255 0 R +/Parent 2258 0 R /Kids [2189 0 R] >> -% 2255 0 obj +% 2258 0 obj << /Type /Pages /Count 182 -/Kids [2248 0 R 2249 0 R 2251 0 R 2252 0 R 2253 0 R 2254 0 R] +/Kids [2251 0 R 2253 0 R 2254 0 R 2255 0 R 2256 0 R 2257 0 R] >> -% 2256 0 obj +% 2259 0 obj << /Type /Outlines /First 4 0 R @@ -35589,6 +35672,27 @@ stream /Prev 389 0 R /Next 397 0 R >> + +endstream +endobj +2260 0 obj +<< +/Type /ObjStm +/N 100 +/First 862 +/Length 9899 +>> +stream +389 0 385 88 381 176 377 264 373 352 369 440 365 528 361 616 357 704 353 792 +349 880 345 968 341 1056 337 1144 333 1232 329 1320 325 1408 321 1496 317 1584 313 1672 +309 1746 305 1872 301 1946 297 2034 293 2122 289 2196 285 2321 281 2395 277 2483 273 2571 +269 2659 265 2747 261 2835 257 2923 253 3011 249 3099 245 3187 241 3275 237 3363 233 3451 +229 3539 225 3627 221 3701 217 3826 213 3899 209 3986 205 4060 200 4148 196 4236 192 4324 +188 4412 184 4486 180 4612 176 4686 172 4774 168 4862 164 4950 160 5038 156 5126 152 5214 +148 5302 144 5390 140 5478 136 5566 132 5654 128 5742 124 5830 120 5918 116 6006 112 6094 +108 6182 104 6256 100 6382 96 6453 92 6536 88 6618 84 6700 80 6782 76 6864 72 6946 +68 7028 64 7110 60 7192 56 7274 52 7356 48 7438 44 7520 40 7589 36 7698 32 7818 +28 7887 24 7943 20 8062 16 8144 12 8213 8 8330 4 8395 2261 8488 2262 8684 2263 8857 % 389 0 obj << /Title 390 0 R @@ -35613,27 +35717,6 @@ stream /Prev 377 0 R /Next 385 0 R >> - -endstream -endobj -2257 0 obj -<< -/Type /ObjStm -/N 100 -/First 865 -/Length 10172 ->> -stream -377 0 373 88 369 176 365 264 361 352 357 440 353 528 349 616 345 704 341 792 -337 880 333 968 329 1056 325 1144 321 1232 317 1320 313 1408 309 1482 305 1608 301 1682 -297 1770 293 1858 289 1932 285 2057 281 2131 277 2219 273 2307 269 2395 265 2483 261 2571 -257 2659 253 2747 249 2835 245 2923 241 3011 237 3099 233 3187 229 3275 225 3363 221 3437 -217 3562 213 3635 209 3722 205 3796 200 3884 196 3972 192 4060 188 4148 184 4222 180 4348 -176 4422 172 4510 168 4598 164 4686 160 4774 156 4862 152 4950 148 5038 144 5126 140 5214 -136 5302 132 5390 128 5478 124 5566 120 5654 116 5742 112 5830 108 5918 104 5992 100 6118 -96 6189 92 6272 88 6354 84 6436 80 6518 76 6600 72 6682 68 6764 64 6846 60 6928 -56 7010 52 7092 48 7174 44 7256 40 7325 36 7434 32 7554 28 7623 24 7679 20 7798 -16 7880 12 7949 8 8066 4 8131 2258 8224 2259 8420 2260 8593 2261 8773 2262 8950 2263 9127 % 377 0 obj << /Title 378 0 R @@ -36391,570 +36474,555 @@ stream << /Title 5 0 R /A 1 0 R -/Parent 2256 0 R +/Parent 2259 0 R /First 8 0 R /Last 598 0 R /Count -13 >> -% 2258 0 obj +% 2261 0 obj << /Names [(Doc-Start) 664 0 R (Hfootnote.1) 916 0 R (Hfootnote.2) 917 0 R (Hfootnote.3) 969 0 R (Hfootnote.4) 1965 0 R (Hfootnote.5) 2017 0 R] /Limits [(Doc-Start) (Hfootnote.5)] >> -% 2259 0 obj +% 2262 0 obj << /Names [(Item.1) 943 0 R (Item.10) 957 0 R (Item.100) 1698 0 R (Item.101) 1699 0 R (Item.102) 1700 0 R (Item.103) 1718 0 R] /Limits [(Item.1) (Item.103)] >> -% 2260 0 obj +% 2263 0 obj << /Names [(Item.104) 1719 0 R (Item.105) 1720 0 R (Item.106) 1721 0 R (Item.107) 1722 0 R (Item.108) 1723 0 R (Item.109) 1724 0 R] /Limits [(Item.104) (Item.109)] >> -% 2261 0 obj + +endstream +endobj +2265 0 obj +<< +/Type /ObjStm +/N 100 +/First 1037 +/Length 20320 +>> +stream +2264 0 2266 177 2267 354 2268 534 2269 712 2270 892 2271 1070 2272 1247 2273 1412 2274 1578 +2275 1744 2276 1916 2277 2086 2278 2258 2279 2428 2280 2600 2281 2769 2282 2938 2283 3110 2284 3280 +2285 3452 2286 3622 2287 3794 2288 3983 2289 4173 2290 4396 2291 4617 2292 4822 2293 5012 2294 5194 +2295 5390 2296 5615 2297 5835 2298 6066 2299 6302 2300 6534 2301 6745 2302 6922 2303 7100 2304 7280 +2305 7459 2306 7639 2307 7818 2308 7998 2309 8177 2310 8357 2311 8536 2312 8716 2313 8894 2314 9072 +2315 9252 2316 9424 2317 9596 2318 9766 2319 9938 2320 10108 2321 10280 2322 10449 2323 10618 2324 10790 +2325 10960 2326 11132 2327 11302 2328 11474 2329 11644 2330 11816 2331 11986 2332 12182 2333 12379 2334 12575 +2335 12762 2336 12944 2337 13138 2338 13368 2339 13593 2340 13812 2341 14039 2342 14265 2343 14487 2344 14712 +2345 14942 2346 15171 2347 15399 2348 15621 2349 15849 2350 16075 2351 16297 2352 16519 2353 16760 2354 17022 +2355 17278 2356 17544 2357 17814 2358 18076 2359 18338 2360 18548 2361 18728 2362 18904 2363 19073 2364 19169 +% 2264 0 obj << /Names [(Item.11) 958 0 R (Item.110) 1725 0 R (Item.111) 1726 0 R (Item.112) 1735 0 R (Item.113) 1736 0 R (Item.114) 1741 0 R] /Limits [(Item.11) (Item.114)] >> -% 2262 0 obj +% 2266 0 obj << /Names [(Item.115) 1742 0 R (Item.116) 1747 0 R (Item.117) 1748 0 R (Item.118) 1749 0 R (Item.119) 1779 0 R (Item.12) 959 0 R] /Limits [(Item.115) (Item.12)] >> -% 2263 0 obj -<< -/Names [(Item.120) 1780 0 R (Item.121) 1781 0 R (Item.122) 1792 0 R (Item.123) 1793 0 R (Item.124) 1794 0 R (Item.125) 1803 0 R] -/Limits [(Item.120) (Item.125)] ->> - -endstream -endobj -2265 0 obj +% 2267 0 obj << -/Type /ObjStm -/N 100 -/First 1037 -/Length 20119 ->> -stream -2264 0 2266 178 2267 358 2268 536 2269 713 2270 878 2271 1044 2272 1210 2273 1382 2274 1552 -2275 1724 2276 1894 2277 2066 2278 2235 2279 2404 2280 2576 2281 2746 2282 2918 2283 3088 2284 3260 -2285 3449 2286 3639 2287 3862 2288 4083 2289 4288 2290 4478 2291 4660 2292 4856 2293 5081 2294 5301 -2295 5532 2296 5768 2297 6000 2298 6211 2299 6388 2300 6566 2301 6746 2302 6925 2303 7105 2304 7284 -2305 7464 2306 7643 2307 7823 2308 8002 2309 8182 2310 8360 2311 8538 2312 8718 2313 8890 2314 9062 -2315 9232 2316 9404 2317 9574 2318 9746 2319 9915 2320 10084 2321 10256 2322 10426 2323 10598 2324 10768 -2325 10940 2326 11110 2327 11282 2328 11452 2329 11648 2330 11845 2331 12041 2332 12228 2333 12410 2334 12604 -2335 12834 2336 13059 2337 13278 2338 13505 2339 13731 2340 13953 2341 14178 2342 14408 2343 14637 2344 14865 -2345 15087 2346 15315 2347 15541 2348 15763 2349 15985 2350 16226 2351 16488 2352 16744 2353 17010 2354 17280 -2355 17542 2356 17804 2357 18014 2358 18194 2359 18370 2360 18539 2361 18635 2362 18749 2363 18861 2364 18972 -% 2264 0 obj +/Names [(Item.120) 1780 0 R (Item.121) 1781 0 R (Item.122) 1792 0 R (Item.123) 1793 0 R (Item.124) 1794 0 R (Item.125) 1803 0 R] +/Limits [(Item.120) (Item.125)] +>> +% 2268 0 obj << /Names [(Item.126) 1804 0 R (Item.127) 1805 0 R (Item.128) 1814 0 R (Item.129) 1815 0 R (Item.13) 960 0 R (Item.130) 1816 0 R] /Limits [(Item.126) (Item.130)] >> -% 2266 0 obj +% 2269 0 obj << /Names [(Item.131) 1827 0 R (Item.132) 1828 0 R (Item.133) 1829 0 R (Item.134) 1838 0 R (Item.135) 1839 0 R (Item.136) 1840 0 R] /Limits [(Item.131) (Item.136)] >> -% 2267 0 obj +% 2270 0 obj << /Names [(Item.137) 1849 0 R (Item.138) 1850 0 R (Item.139) 1852 0 R (Item.14) 961 0 R (Item.140) 1853 0 R (Item.141) 1854 0 R] /Limits [(Item.137) (Item.141)] >> -% 2268 0 obj +% 2271 0 obj << /Names [(Item.142) 1860 0 R (Item.143) 1865 0 R (Item.144) 2066 0 R (Item.145) 2067 0 R (Item.146) 2167 0 R (Item.15) 962 0 R] /Limits [(Item.142) (Item.15)] >> -% 2269 0 obj +% 2272 0 obj << /Names [(Item.16) 963 0 R (Item.17) 964 0 R (Item.18) 965 0 R (Item.19) 966 0 R (Item.2) 944 0 R (Item.20) 967 0 R] /Limits [(Item.16) (Item.20)] >> -% 2270 0 obj +% 2273 0 obj << /Names [(Item.21) 968 0 R (Item.22) 978 0 R (Item.23) 979 0 R (Item.24) 980 0 R (Item.25) 981 0 R (Item.26) 982 0 R] /Limits [(Item.21) (Item.26)] >> -% 2271 0 obj +% 2274 0 obj << /Names [(Item.27) 983 0 R (Item.28) 997 0 R (Item.29) 998 0 R (Item.3) 945 0 R (Item.30) 999 0 R (Item.31) 1000 0 R] /Limits [(Item.27) (Item.31)] >> -% 2272 0 obj +% 2275 0 obj << /Names [(Item.32) 1001 0 R (Item.33) 1008 0 R (Item.34) 1009 0 R (Item.35) 1010 0 R (Item.36) 1011 0 R (Item.37) 1012 0 R] /Limits [(Item.32) (Item.37)] >> -% 2273 0 obj +% 2276 0 obj << /Names [(Item.38) 1013 0 R (Item.39) 1014 0 R (Item.4) 946 0 R (Item.40) 1015 0 R (Item.41) 1057 0 R (Item.42) 1150 0 R] /Limits [(Item.38) (Item.42)] >> -% 2274 0 obj +% 2277 0 obj << /Names [(Item.43) 1179 0 R (Item.44) 1201 0 R (Item.45) 1227 0 R (Item.46) 1399 0 R (Item.47) 1400 0 R (Item.48) 1401 0 R] /Limits [(Item.43) (Item.48)] >> -% 2275 0 obj +% 2278 0 obj << /Names [(Item.49) 1454 0 R (Item.5) 947 0 R (Item.50) 1461 0 R (Item.51) 1466 0 R (Item.52) 1467 0 R (Item.53) 1468 0 R] /Limits [(Item.49) (Item.53)] >> -% 2276 0 obj +% 2279 0 obj << /Names [(Item.54) 1469 0 R (Item.55) 1470 0 R (Item.56) 1482 0 R (Item.57) 1483 0 R (Item.58) 1484 0 R (Item.59) 1491 0 R] /Limits [(Item.54) (Item.59)] >> -% 2277 0 obj +% 2280 0 obj << /Names [(Item.6) 948 0 R (Item.60) 1515 0 R (Item.61) 1516 0 R (Item.62) 1523 0 R (Item.63) 1528 0 R (Item.64) 1529 0 R] /Limits [(Item.6) (Item.64)] >> -% 2278 0 obj +% 2281 0 obj << /Names [(Item.65) 1530 0 R (Item.66) 1542 0 R (Item.67) 1543 0 R (Item.68) 1544 0 R (Item.69) 1545 0 R (Item.7) 949 0 R] /Limits [(Item.65) (Item.7)] >> -% 2279 0 obj +% 2282 0 obj << /Names [(Item.70) 1546 0 R (Item.71) 1551 0 R (Item.72) 1552 0 R (Item.73) 1553 0 R (Item.74) 1554 0 R (Item.75) 1555 0 R] /Limits [(Item.70) (Item.75)] >> -% 2280 0 obj +% 2283 0 obj << /Names [(Item.76) 1556 0 R (Item.77) 1569 0 R (Item.78) 1570 0 R (Item.79) 1571 0 R (Item.8) 950 0 R (Item.80) 1572 0 R] /Limits [(Item.76) (Item.80)] >> -% 2281 0 obj +% 2284 0 obj << /Names [(Item.81) 1573 0 R (Item.82) 1574 0 R (Item.83) 1575 0 R (Item.84) 1588 0 R (Item.85) 1599 0 R (Item.86) 1612 0 R] /Limits [(Item.81) (Item.86)] >> -% 2282 0 obj +% 2285 0 obj << /Names [(Item.87) 1613 0 R (Item.88) 1621 0 R (Item.89) 1622 0 R (Item.9) 956 0 R (Item.90) 1643 0 R (Item.91) 1644 0 R] /Limits [(Item.87) (Item.91)] >> -% 2283 0 obj +% 2286 0 obj << /Names [(Item.92) 1655 0 R (Item.93) 1661 0 R (Item.94) 1667 0 R (Item.95) 1673 0 R (Item.96) 1680 0 R (Item.97) 1681 0 R] /Limits [(Item.92) (Item.97)] >> -% 2284 0 obj +% 2287 0 obj << /Names [(Item.98) 1687 0 R (Item.99) 1688 0 R (algocf.1) 2071 0 R (algocf.2) 2117 0 R (algocfline.1) 2056 0 R (algocfline.2) 2124 0 R] /Limits [(Item.98) (algocfline.2)] >> -% 2285 0 obj +% 2288 0 obj << /Names [(cite.2007c) 930 0 R (cite.2007d) 931 0 R (cite.BLACS) 906 0 R (cite.BLAS1) 889 0 R (cite.BLAS2) 890 0 R (cite.BLAS3) 891 0 R] /Limits [(cite.2007c) (cite.BLAS3)] >> -% 2286 0 obj +% 2289 0 obj << /Names [(cite.CaFiRo:2014) 2042 0 R (cite.DesPat:11) 884 0 R (cite.DesignPatterns) 1046 0 R (cite.KIVA3PSBLAS) 2188 0 R (cite.METIS) 918 0 R (cite.MPI1) 2194 0 R] /Limits [(cite.CaFiRo:2014) (cite.MPI1)] >> -% 2287 0 obj +% 2290 0 obj << /Names [(cite.MRC:11) 2058 0 R (cite.OurTechRep) 2043 0 R (cite.PARA04FOREST) 2186 0 R (cite.PSBLAS) 2187 0 R (cite.RouXiaXu:11) 885 0 R (cite.Sparse03) 883 0 R] /Limits [(cite.MRC:11) (cite.Sparse03)] >> -% 2288 0 obj +% 2291 0 obj << /Names [(cite.machiels) 886 0 R (cite.metcalf) 882 0 R (cite.sblas02) 888 0 R (cite.sblas97) 887 0 R (descdata) 989 0 R (equation.4.1) 1265 0 R] /Limits [(cite.machiels) (equation.4.1)] >> -% 2289 0 obj +% 2292 0 obj << /Names [(equation.4.2) 1266 0 R (equation.4.3) 1267 0 R (figure.1) 900 0 R (figure.2) 926 0 R (figure.3) 1368 0 R (figure.4) 1402 0 R] /Limits [(equation.4.2) (figure.4)] >> -% 2290 0 obj +% 2293 0 obj << /Names [(figure.5) 2065 0 R (figure.6) 2060 0 R (figure.7) 2104 0 R (figure.8) 2116 0 R (figure.9) 2142 0 R (listing.1) 1017 0 R] /Limits [(figure.5) (listing.1)] >> -% 2291 0 obj +% 2294 0 obj << /Names [(listing.2) 1047 0 R (listing.3) 1087 0 R (listing.4) 1107 0 R (listing.5) 1872 0 R (listing.6) 1873 0 R (lstlisting.-1) 1228 0 R] /Limits [(listing.2) (lstlisting.-1)] >> -% 2292 0 obj +% 2295 0 obj << /Names [(lstlisting.-10) 1942 0 R (lstlisting.-11) 1949 0 R (lstlisting.-12) 2014 0 R (lstlisting.-13) 2076 0 R (lstlisting.-2) 1882 0 R (lstlisting.-3) 1888 0 R] /Limits [(lstlisting.-10) (lstlisting.-3)] >> -% 2293 0 obj +% 2296 0 obj << /Names [(lstlisting.-4) 1895 0 R (lstlisting.-5) 1901 0 R (lstlisting.-6) 1912 0 R (lstlisting.-7) 1919 0 R (lstlisting.-8) 1926 0 R (lstlisting.-9) 1935 0 R] /Limits [(lstlisting.-4) (lstlisting.-9)] >> -% 2294 0 obj +% 2297 0 obj << /Names [(lstnumber.-1.1) 1229 0 R (lstnumber.-1.2) 1230 0 R (lstnumber.-1.3) 1231 0 R (lstnumber.-1.4) 1232 0 R (lstnumber.-10.1) 1943 0 R (lstnumber.-11.1) 1950 0 R] /Limits [(lstnumber.-1.1) (lstnumber.-11.1)] >> -% 2295 0 obj +% 2298 0 obj << /Names [(lstnumber.-12.1) 2015 0 R (lstnumber.-12.2) 2016 0 R (lstnumber.-13.1) 2077 0 R (lstnumber.-13.2) 2078 0 R (lstnumber.-13.3) 2079 0 R (lstnumber.-13.4) 2080 0 R] /Limits [(lstnumber.-12.1) (lstnumber.-13.4)] >> -% 2296 0 obj +% 2299 0 obj << /Names [(lstnumber.-13.5) 2081 0 R (lstnumber.-13.6) 2082 0 R (lstnumber.-13.7) 2083 0 R (lstnumber.-2.1) 1883 0 R (lstnumber.-3.1) 1889 0 R (lstnumber.-4.1) 1896 0 R] /Limits [(lstnumber.-13.5) (lstnumber.-4.1)] >> -% 2297 0 obj +% 2300 0 obj << /Names [(lstnumber.-5.1) 1902 0 R (lstnumber.-6.1) 1913 0 R (lstnumber.-7.1) 1920 0 R (lstnumber.-8.1) 1927 0 R (lstnumber.-9.1) 1936 0 R (page.1) 663 0 R] /Limits [(lstnumber.-5.1) (page.1)] >> -% 2298 0 obj +% 2301 0 obj << /Names [(page.10) 996 0 R (page.100) 1654 0 R (page.101) 1660 0 R (page.102) 1666 0 R (page.103) 1672 0 R (page.104) 1679 0 R] /Limits [(page.10) (page.104)] >> -% 2299 0 obj +% 2302 0 obj << /Names [(page.105) 1686 0 R (page.106) 1693 0 R (page.107) 1697 0 R (page.108) 1707 0 R (page.109) 1712 0 R (page.11) 1007 0 R] /Limits [(page.105) (page.11)] >> -% 2300 0 obj +% 2303 0 obj << /Names [(page.110) 1717 0 R (page.111) 1730 0 R (page.112) 1734 0 R (page.113) 1740 0 R (page.114) 1746 0 R (page.115) 1753 0 R] /Limits [(page.110) (page.115)] >> -% 2301 0 obj +% 2304 0 obj << /Names [(page.116) 1758 0 R (page.117) 1762 0 R (page.118) 1766 0 R (page.119) 1770 0 R (page.12) 1023 0 R (page.120) 1774 0 R] /Limits [(page.116) (page.120)] >> -% 2302 0 obj +% 2305 0 obj << /Names [(page.121) 1778 0 R (page.122) 1787 0 R (page.123) 1791 0 R (page.124) 1798 0 R (page.125) 1802 0 R (page.126) 1809 0 R] /Limits [(page.121) (page.126)] >> -% 2303 0 obj +% 2306 0 obj << /Names [(page.127) 1813 0 R (page.128) 1821 0 R (page.129) 1826 0 R (page.13) 1027 0 R (page.130) 1833 0 R (page.131) 1837 0 R] /Limits [(page.127) (page.131)] >> -% 2304 0 obj +% 2307 0 obj << /Names [(page.132) 1844 0 R (page.133) 1848 0 R (page.134) 1859 0 R (page.135) 1864 0 R (page.136) 1871 0 R (page.137) 1877 0 R] /Limits [(page.132) (page.137)] >> -% 2305 0 obj +% 2308 0 obj << /Names [(page.138) 1881 0 R (page.139) 1887 0 R (page.14) 1032 0 R (page.140) 1894 0 R (page.141) 1900 0 R (page.142) 1906 0 R] /Limits [(page.138) (page.142)] >> -% 2306 0 obj +% 2309 0 obj << /Names [(page.143) 1911 0 R (page.144) 1918 0 R (page.145) 1925 0 R (page.146) 1934 0 R (page.147) 1941 0 R (page.148) 1948 0 R] /Limits [(page.143) (page.148)] >> -% 2307 0 obj +% 2310 0 obj << /Names [(page.149) 1954 0 R (page.15) 1037 0 R (page.150) 1958 0 R (page.151) 1964 0 R (page.152) 1974 0 R (page.153) 1978 0 R] /Limits [(page.149) (page.153)] >> -% 2308 0 obj +% 2311 0 obj << /Names [(page.154) 1986 0 R (page.155) 1991 0 R (page.156) 1995 0 R (page.157) 2001 0 R (page.158) 2006 0 R (page.159) 2013 0 R] /Limits [(page.154) (page.159)] >> -% 2309 0 obj +% 2312 0 obj << /Names [(page.16) 1044 0 R (page.160) 2024 0 R (page.161) 2029 0 R (page.162) 2039 0 R (page.163) 2054 0 R (page.164) 2064 0 R] /Limits [(page.16) (page.164)] >> -% 2310 0 obj +% 2313 0 obj << /Names [(page.165) 2075 0 R (page.166) 2103 0 R (page.167) 2123 0 R (page.168) 2141 0 R (page.169) 2157 0 R (page.17) 1051 0 R] /Limits [(page.165) (page.17)] >> -% 2311 0 obj +% 2314 0 obj << /Names [(page.170) 2162 0 R (page.171) 2166 0 R (page.172) 2172 0 R (page.173) 2176 0 R (page.174) 2180 0 R (page.175) 2184 0 R] /Limits [(page.170) (page.175)] >> -% 2312 0 obj +% 2315 0 obj << /Names [(page.176) 2193 0 R (page.18) 1056 0 R (page.19) 1061 0 R (page.2) 673 0 R (page.20) 1067 0 R (page.21) 1071 0 R] /Limits [(page.176) (page.21)] >> -% 2313 0 obj +% 2316 0 obj << /Names [(page.22) 1075 0 R (page.23) 1079 0 R (page.24) 1085 0 R (page.25) 1091 0 R (page.26) 1098 0 R (page.27) 1105 0 R] /Limits [(page.22) (page.27)] >> -% 2314 0 obj +% 2317 0 obj << /Names [(page.28) 1111 0 R (page.29) 1115 0 R (page.3) 912 0 R (page.30) 1126 0 R (page.31) 1133 0 R (page.32) 1143 0 R] /Limits [(page.28) (page.32)] >> -% 2315 0 obj +% 2318 0 obj << /Names [(page.33) 1149 0 R (page.34) 1160 0 R (page.35) 1166 0 R (page.36) 1173 0 R (page.37) 1178 0 R (page.38) 1187 0 R] /Limits [(page.33) (page.38)] >> -% 2316 0 obj +% 2319 0 obj << /Names [(page.39) 1195 0 R (page.4) 925 0 R (page.40) 1200 0 R (page.41) 1208 0 R (page.42) 1213 0 R (page.43) 1220 0 R] /Limits [(page.39) (page.43)] >> -% 2317 0 obj +% 2320 0 obj << /Names [(page.44) 1226 0 R (page.45) 1239 0 R (page.46) 1246 0 R (page.47) 1253 0 R (page.48) 1264 0 R (page.49) 1280 0 R] /Limits [(page.44) (page.49)] >> -% 2318 0 obj +% 2321 0 obj << /Names [(page.5) 937 0 R (page.50) 1287 0 R (page.51) 1298 0 R (page.52) 1304 0 R (page.53) 1315 0 R (page.54) 1320 0 R] /Limits [(page.5) (page.54)] >> -% 2319 0 obj +% 2322 0 obj << /Names [(page.55) 1331 0 R (page.56) 1337 0 R (page.57) 1346 0 R (page.58) 1352 0 R (page.59) 1360 0 R (page.6) 942 0 R] /Limits [(page.55) (page.6)] >> -% 2320 0 obj +% 2323 0 obj << /Names [(page.60) 1367 0 R (page.61) 1381 0 R (page.62) 1389 0 R (page.63) 1398 0 R (page.64) 1406 0 R (page.65) 1410 0 R] /Limits [(page.60) (page.65)] >> -% 2321 0 obj +% 2324 0 obj << /Names [(page.66) 1425 0 R (page.67) 1430 0 R (page.68) 1437 0 R (page.69) 1444 0 R (page.7) 955 0 R (page.70) 1448 0 R] /Limits [(page.66) (page.70)] >> -% 2322 0 obj +% 2325 0 obj << /Names [(page.71) 1453 0 R (page.72) 1459 0 R (page.73) 1465 0 R (page.74) 1476 0 R (page.75) 1481 0 R (page.76) 1490 0 R] /Limits [(page.71) (page.76)] >> -% 2323 0 obj +% 2326 0 obj << /Names [(page.77) 1498 0 R (page.78) 1503 0 R (page.79) 1509 0 R (page.8) 977 0 R (page.80) 1514 0 R (page.81) 1522 0 R] /Limits [(page.77) (page.81)] >> -% 2324 0 obj +% 2327 0 obj << /Names [(page.82) 1527 0 R (page.83) 1535 0 R (page.84) 1541 0 R (page.85) 1550 0 R (page.86) 1564 0 R (page.87) 1568 0 R] /Limits [(page.82) (page.87)] >> -% 2325 0 obj +% 2328 0 obj << /Names [(page.88) 1581 0 R (page.89) 1587 0 R (page.9) 988 0 R (page.90) 1594 0 R (page.91) 1598 0 R (page.92) 1606 0 R] /Limits [(page.88) (page.92)] >> -% 2326 0 obj +% 2329 0 obj << /Names [(page.93) 1611 0 R (page.94) 1620 0 R (page.95) 1628 0 R (page.96) 1632 0 R (page.97) 1637 0 R (page.98) 1642 0 R] /Limits [(page.93) (page.98)] >> -% 2327 0 obj +% 2330 0 obj << /Names [(page.99) 1649 0 R (page.i) 719 0 R (page.ii) 776 0 R (page.iii) 823 0 R (page.iv) 863 0 R (precdata) 1106 0 R] /Limits [(page.99) (precdata)] >> -% 2328 0 obj +% 2331 0 obj << /Names [(section*.1) 720 0 R (section*.10) 618 0 R (section*.11) 622 0 R (section*.12) 626 0 R (section*.13) 630 0 R (section*.14) 634 0 R] /Limits [(section*.1) (section*.14)] >> -% 2329 0 obj +% 2332 0 obj << /Names [(section*.15) 638 0 R (section*.16) 642 0 R (section*.17) 646 0 R (section*.18) 650 0 R (section*.19) 654 0 R (section*.2) 2055 0 R] /Limits [(section*.15) (section*.2)] >> -% 2330 0 obj +% 2333 0 obj << /Names [(section*.20) 2185 0 R (section*.3) 2084 0 R (section*.4) 2105 0 R (section*.5) 2125 0 R (section*.6) 601 0 R (section*.7) 606 0 R] /Limits [(section*.20) (section*.7)] >> -% 2331 0 obj +% 2334 0 obj << /Names [(section*.8) 610 0 R (section*.9) 614 0 R (section.1) 7 0 R (section.10) 541 0 R (section.11) 569 0 R (section.12) 577 0 R] /Limits [(section*.8) (section.12)] >> -% 2332 0 obj +% 2335 0 obj << /Names [(section.13) 597 0 R (section.2) 11 0 R (section.3) 35 0 R (section.4) 220 0 R (section.5) 288 0 R (section.6) 308 0 R] /Limits [(section.13) (section.6)] >> -% 2333 0 obj +% 2336 0 obj << /Names [(section.7) 421 0 R (section.8) 493 0 R (section.9) 513 0 R (spbasedata) 1052 0 R (spdata) 1045 0 R (subsection.10.1) 545 0 R] /Limits [(section.7) (subsection.10.1)] >> -% 2334 0 obj +% 2337 0 obj << /Names [(subsection.10.2) 549 0 R (subsection.10.3) 553 0 R (subsection.10.4) 557 0 R (subsection.10.5) 561 0 R (subsection.10.6) 565 0 R (subsection.11.1) 573 0 R] /Limits [(subsection.10.2) (subsection.11.1)] >> -% 2335 0 obj +% 2338 0 obj << /Names [(subsection.12.1) 581 0 R (subsection.12.2) 585 0 R (subsection.12.3) 589 0 R (subsection.12.4) 593 0 R (subsection.2.1) 15 0 R (subsection.2.2) 19 0 R] /Limits [(subsection.12.1) (subsection.2.2)] >> -% 2336 0 obj +% 2339 0 obj << /Names [(subsection.2.3) 23 0 R (subsection.2.4) 31 0 R (subsection.3.1) 39 0 R (subsection.3.2) 103 0 R (subsection.3.3) 183 0 R (subsection.3.4) 212 0 R] /Limits [(subsection.2.3) (subsection.3.4)] >> -% 2337 0 obj +% 2340 0 obj << /Names [(subsection.3.5) 216 0 R (subsection.4.1) 224 0 R (subsection.4.10) 260 0 R (subsection.4.11) 264 0 R (subsection.4.12) 268 0 R (subsection.4.13) 272 0 R] /Limits [(subsection.3.5) (subsection.4.13)] >> -% 2338 0 obj +% 2341 0 obj << /Names [(subsection.4.14) 276 0 R (subsection.4.15) 280 0 R (subsection.4.16) 284 0 R (subsection.4.2) 228 0 R (subsection.4.3) 232 0 R (subsection.4.4) 236 0 R] /Limits [(subsection.4.14) (subsection.4.4)] >> -% 2339 0 obj +% 2342 0 obj << /Names [(subsection.4.5) 240 0 R (subsection.4.6) 244 0 R (subsection.4.7) 248 0 R (subsection.4.8) 252 0 R (subsection.4.9) 256 0 R (subsection.5.1) 292 0 R] /Limits [(subsection.4.5) (subsection.5.1)] >> -% 2340 0 obj +% 2343 0 obj << /Names [(subsection.5.2) 296 0 R (subsection.5.3) 300 0 R (subsection.5.4) 304 0 R (subsection.6.1) 312 0 R (subsection.6.10) 348 0 R (subsection.6.11) 352 0 R] /Limits [(subsection.5.2) (subsection.6.11)] >> -% 2341 0 obj +% 2344 0 obj << /Names [(subsection.6.12) 356 0 R (subsection.6.13) 360 0 R (subsection.6.14) 364 0 R (subsection.6.15) 368 0 R (subsection.6.16) 372 0 R (subsection.6.17) 376 0 R] /Limits [(subsection.6.12) (subsection.6.17)] >> -% 2342 0 obj +% 2345 0 obj << /Names [(subsection.6.18) 380 0 R (subsection.6.19) 384 0 R (subsection.6.2) 316 0 R (subsection.6.20) 388 0 R (subsection.6.21) 392 0 R (subsection.6.22) 396 0 R] /Limits [(subsection.6.18) (subsection.6.22)] >> -% 2343 0 obj +% 2346 0 obj << /Names [(subsection.6.23) 400 0 R (subsection.6.24) 405 0 R (subsection.6.25) 409 0 R (subsection.6.26) 413 0 R (subsection.6.27) 417 0 R (subsection.6.3) 320 0 R] /Limits [(subsection.6.23) (subsection.6.3)] >> -% 2344 0 obj +% 2347 0 obj << /Names [(subsection.6.4) 324 0 R (subsection.6.5) 328 0 R (subsection.6.6) 332 0 R (subsection.6.7) 336 0 R (subsection.6.8) 340 0 R (subsection.6.9) 344 0 R] /Limits [(subsection.6.4) (subsection.6.9)] >> -% 2345 0 obj +% 2348 0 obj << /Names [(subsection.7.1) 425 0 R (subsection.7.10) 461 0 R (subsection.7.11) 465 0 R (subsection.7.12) 469 0 R (subsection.7.13) 473 0 R (subsection.7.14) 477 0 R] /Limits [(subsection.7.1) (subsection.7.14)] >> -% 2346 0 obj +% 2349 0 obj << /Names [(subsection.7.15) 481 0 R (subsection.7.16) 485 0 R (subsection.7.17) 489 0 R (subsection.7.2) 429 0 R (subsection.7.3) 433 0 R (subsection.7.4) 437 0 R] /Limits [(subsection.7.15) (subsection.7.4)] >> -% 2347 0 obj +% 2350 0 obj << /Names [(subsection.7.5) 441 0 R (subsection.7.6) 445 0 R (subsection.7.7) 449 0 R (subsection.7.8) 453 0 R (subsection.7.9) 457 0 R (subsection.8.1) 497 0 R] /Limits [(subsection.7.5) (subsection.8.1)] >> -% 2348 0 obj +% 2351 0 obj << /Names [(subsection.8.2) 501 0 R (subsection.8.3) 505 0 R (subsection.8.4) 509 0 R (subsection.9.1) 517 0 R (subsection.9.2) 521 0 R (subsection.9.3) 525 0 R] /Limits [(subsection.8.2) (subsection.9.3)] >> -% 2349 0 obj +% 2352 0 obj << /Names [(subsection.9.4) 529 0 R (subsection.9.5) 533 0 R (subsection.9.6) 537 0 R (subsubsection.2.3.1) 27 0 R (subsubsection.3.1.1) 43 0 R (subsubsection.3.1.10) 79 0 R] /Limits [(subsection.9.4) (subsubsection.3.1.10)] >> -% 2350 0 obj +% 2353 0 obj << /Names [(subsubsection.3.1.11) 83 0 R (subsubsection.3.1.12) 87 0 R (subsubsection.3.1.13) 91 0 R (subsubsection.3.1.14) 95 0 R (subsubsection.3.1.15) 99 0 R (subsubsection.3.1.2) 47 0 R] /Limits [(subsubsection.3.1.11) (subsubsection.3.1.2)] >> -% 2351 0 obj +% 2354 0 obj << /Names [(subsubsection.3.1.3) 51 0 R (subsubsection.3.1.4) 55 0 R (subsubsection.3.1.5) 59 0 R (subsubsection.3.1.6) 63 0 R (subsubsection.3.1.7) 67 0 R (subsubsection.3.1.8) 71 0 R] /Limits [(subsubsection.3.1.3) (subsubsection.3.1.8)] >> -% 2352 0 obj +% 2355 0 obj << /Names [(subsubsection.3.1.9) 75 0 R (subsubsection.3.2.1) 107 0 R (subsubsection.3.2.10) 143 0 R (subsubsection.3.2.11) 147 0 R (subsubsection.3.2.12) 151 0 R (subsubsection.3.2.13) 155 0 R] /Limits [(subsubsection.3.1.9) (subsubsection.3.2.13)] >> -% 2353 0 obj +% 2356 0 obj << /Names [(subsubsection.3.2.14) 159 0 R (subsubsection.3.2.15) 163 0 R (subsubsection.3.2.16) 167 0 R (subsubsection.3.2.17) 171 0 R (subsubsection.3.2.18) 175 0 R (subsubsection.3.2.19) 179 0 R] /Limits [(subsubsection.3.2.14) (subsubsection.3.2.19)] >> -% 2354 0 obj +% 2357 0 obj << /Names [(subsubsection.3.2.2) 111 0 R (subsubsection.3.2.3) 115 0 R (subsubsection.3.2.4) 119 0 R (subsubsection.3.2.5) 123 0 R (subsubsection.3.2.6) 127 0 R (subsubsection.3.2.7) 131 0 R] /Limits [(subsubsection.3.2.2) (subsubsection.3.2.7)] >> -% 2355 0 obj +% 2358 0 obj << /Names [(subsubsection.3.2.8) 135 0 R (subsubsection.3.2.9) 139 0 R (subsubsection.3.3.1) 187 0 R (subsubsection.3.3.2) 191 0 R (subsubsection.3.3.3) 195 0 R (subsubsection.3.3.4) 199 0 R] /Limits [(subsubsection.3.2.8) (subsubsection.3.3.4)] >> -% 2356 0 obj +% 2359 0 obj << /Names [(subsubsection.3.3.5) 204 0 R (subsubsection.3.3.6) 208 0 R (table.1) 1099 0 R (table.10) 1247 0 R (table.11) 1255 0 R (table.12) 1268 0 R] /Limits [(subsubsection.3.3.5) (table.12)] >> -% 2357 0 obj +% 2360 0 obj << /Names [(table.13) 1288 0 R (table.14) 1316 0 R (table.15) 1332 0 R (table.16) 1347 0 R (table.17) 1361 0 R (table.18) 1390 0 R] /Limits [(table.13) (table.18)] >> -% 2358 0 obj +% 2361 0 obj << /Names [(table.19) 1426 0 R (table.2) 1144 0 R (table.20) 1438 0 R (table.21) 2059 0 R (table.3) 1161 0 R (table.4) 1174 0 R] /Limits [(table.19) (table.4)] >> -% 2359 0 obj +% 2362 0 obj << /Names [(table.5) 1188 0 R (table.6) 1196 0 R (table.7) 1209 0 R (table.8) 1221 0 R (table.9) 1240 0 R (title.0) 3 0 R] /Limits [(table.5) (title.0)] >> -% 2360 0 obj +% 2363 0 obj << /Names [(vbasedata) 1033 0 R (vdata) 1086 0 R] /Limits [(vbasedata) (vdata)] >> -% 2361 0 obj -<< -/Kids [2258 0 R 2259 0 R 2260 0 R 2261 0 R 2262 0 R 2263 0 R] -/Limits [(Doc-Start) (Item.125)] ->> -% 2362 0 obj -<< -/Kids [2264 0 R 2266 0 R 2267 0 R 2268 0 R 2269 0 R 2270 0 R] -/Limits [(Item.126) (Item.26)] ->> -% 2363 0 obj -<< -/Kids [2271 0 R 2272 0 R 2273 0 R 2274 0 R 2275 0 R 2276 0 R] -/Limits [(Item.27) (Item.59)] ->> % 2364 0 obj << -/Kids [2277 0 R 2278 0 R 2279 0 R 2280 0 R 2281 0 R 2282 0 R] -/Limits [(Item.6) (Item.91)] +/Kids [2261 0 R 2262 0 R 2263 0 R 2264 0 R 2266 0 R 2267 0 R] +/Limits [(Doc-Start) (Item.125)] >> endstream endobj -2385 0 obj +2388 0 obj << /Title (Parallel Sparse BLAS V. 3.9.0) /Subject (Parallel Sparse Basic Linear Algebra Subroutines) /Keywords (Computer Science Linear Algebra Fluid Dynamics Parallel Linux MPI PSBLAS Iterative Solvers Preconditioners) /Creator (pdfLaTeX) /Producer ($Id$) /Author()/Title()/Subject()/Creator(LaTeX with hyperref)/Keywords() -/CreationDate (D:20240711120826+02'00') -/ModDate (D:20240711120826+02'00') +/CreationDate (D:20240711130925+02'00') +/ModDate (D:20240711130925+02'00') /Trapped /False /PTEX.Fullbanner (This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023/Fedora 40) kpathsea version 6.3.5) >> @@ -36962,153 +37030,169 @@ endobj 2366 0 obj << /Type /ObjStm -/N 19 -/First 179 -/Length 2402 +/N 22 +/First 209 +/Length 2765 >> stream -2365 0 2367 116 2368 240 2369 360 2370 473 2371 585 2372 696 2373 807 2374 921 2375 1042 -2376 1168 2377 1294 2378 1424 2379 1545 2380 1666 2381 1788 2382 1891 2383 1975 2384 2010 +2365 0 2367 112 2368 223 2369 333 2370 449 2371 573 2372 693 2373 806 2374 918 2375 1029 +2376 1140 2377 1254 2378 1375 2379 1501 2380 1627 2381 1757 2382 1878 2383 1999 2384 2121 2385 2224 +2386 2308 2387 2343 % 2365 0 obj << -/Kids [2283 0 R 2284 0 R 2285 0 R 2286 0 R 2287 0 R 2288 0 R] -/Limits [(Item.92) (equation.4.1)] +/Kids [2268 0 R 2269 0 R 2270 0 R 2271 0 R 2272 0 R 2273 0 R] +/Limits [(Item.126) (Item.26)] >> % 2367 0 obj << -/Kids [2289 0 R 2290 0 R 2291 0 R 2292 0 R 2293 0 R 2294 0 R] -/Limits [(equation.4.2) (lstnumber.-11.1)] +/Kids [2274 0 R 2275 0 R 2276 0 R 2277 0 R 2278 0 R 2279 0 R] +/Limits [(Item.27) (Item.59)] >> % 2368 0 obj << -/Kids [2295 0 R 2296 0 R 2297 0 R 2298 0 R 2299 0 R 2300 0 R] -/Limits [(lstnumber.-12.1) (page.115)] +/Kids [2280 0 R 2281 0 R 2282 0 R 2283 0 R 2284 0 R 2285 0 R] +/Limits [(Item.6) (Item.91)] >> % 2369 0 obj << -/Kids [2301 0 R 2302 0 R 2303 0 R 2304 0 R 2305 0 R 2306 0 R] -/Limits [(page.116) (page.148)] +/Kids [2286 0 R 2287 0 R 2288 0 R 2289 0 R 2290 0 R 2291 0 R] +/Limits [(Item.92) (equation.4.1)] >> % 2370 0 obj << -/Kids [2307 0 R 2308 0 R 2309 0 R 2310 0 R 2311 0 R 2312 0 R] -/Limits [(page.149) (page.21)] +/Kids [2292 0 R 2293 0 R 2294 0 R 2295 0 R 2296 0 R 2297 0 R] +/Limits [(equation.4.2) (lstnumber.-11.1)] >> % 2371 0 obj << -/Kids [2313 0 R 2314 0 R 2315 0 R 2316 0 R 2317 0 R 2318 0 R] -/Limits [(page.22) (page.54)] +/Kids [2298 0 R 2299 0 R 2300 0 R 2301 0 R 2302 0 R 2303 0 R] +/Limits [(lstnumber.-12.1) (page.115)] >> % 2372 0 obj << -/Kids [2319 0 R 2320 0 R 2321 0 R 2322 0 R 2323 0 R 2324 0 R] -/Limits [(page.55) (page.87)] +/Kids [2304 0 R 2305 0 R 2306 0 R 2307 0 R 2308 0 R 2309 0 R] +/Limits [(page.116) (page.148)] >> % 2373 0 obj << -/Kids [2325 0 R 2326 0 R 2327 0 R 2328 0 R 2329 0 R 2330 0 R] -/Limits [(page.88) (section*.7)] +/Kids [2310 0 R 2311 0 R 2312 0 R 2313 0 R 2314 0 R 2315 0 R] +/Limits [(page.149) (page.21)] >> % 2374 0 obj << -/Kids [2331 0 R 2332 0 R 2333 0 R 2334 0 R 2335 0 R 2336 0 R] -/Limits [(section*.8) (subsection.3.4)] +/Kids [2316 0 R 2317 0 R 2318 0 R 2319 0 R 2320 0 R 2321 0 R] +/Limits [(page.22) (page.54)] >> % 2375 0 obj << -/Kids [2337 0 R 2338 0 R 2339 0 R 2340 0 R 2341 0 R 2342 0 R] -/Limits [(subsection.3.5) (subsection.6.22)] +/Kids [2322 0 R 2323 0 R 2324 0 R 2325 0 R 2326 0 R 2327 0 R] +/Limits [(page.55) (page.87)] >> % 2376 0 obj << -/Kids [2343 0 R 2344 0 R 2345 0 R 2346 0 R 2347 0 R 2348 0 R] -/Limits [(subsection.6.23) (subsection.9.3)] +/Kids [2328 0 R 2329 0 R 2330 0 R 2331 0 R 2332 0 R 2333 0 R] +/Limits [(page.88) (section*.7)] >> % 2377 0 obj << -/Kids [2349 0 R 2350 0 R 2351 0 R 2352 0 R 2353 0 R 2354 0 R] -/Limits [(subsection.9.4) (subsubsection.3.2.7)] +/Kids [2334 0 R 2335 0 R 2336 0 R 2337 0 R 2338 0 R 2339 0 R] +/Limits [(section*.8) (subsection.3.4)] >> % 2378 0 obj << -/Kids [2355 0 R 2356 0 R 2357 0 R 2358 0 R 2359 0 R 2360 0 R] -/Limits [(subsubsection.3.2.8) (vdata)] +/Kids [2340 0 R 2341 0 R 2342 0 R 2343 0 R 2344 0 R 2345 0 R] +/Limits [(subsection.3.5) (subsection.6.22)] >> % 2379 0 obj << -/Kids [2361 0 R 2362 0 R 2363 0 R 2364 0 R 2365 0 R 2367 0 R] -/Limits [(Doc-Start) (lstnumber.-11.1)] +/Kids [2346 0 R 2347 0 R 2348 0 R 2349 0 R 2350 0 R 2351 0 R] +/Limits [(subsection.6.23) (subsection.9.3)] >> % 2380 0 obj << -/Kids [2368 0 R 2369 0 R 2370 0 R 2371 0 R 2372 0 R 2373 0 R] -/Limits [(lstnumber.-12.1) (section*.7)] +/Kids [2352 0 R 2353 0 R 2354 0 R 2355 0 R 2356 0 R 2357 0 R] +/Limits [(subsection.9.4) (subsubsection.3.2.7)] >> % 2381 0 obj << -/Kids [2374 0 R 2375 0 R 2376 0 R 2377 0 R 2378 0 R] -/Limits [(section*.8) (vdata)] +/Kids [2358 0 R 2359 0 R 2360 0 R 2361 0 R 2362 0 R 2363 0 R] +/Limits [(subsubsection.3.2.8) (vdata)] >> % 2382 0 obj << -/Kids [2379 0 R 2380 0 R 2381 0 R] -/Limits [(Doc-Start) (vdata)] +/Kids [2364 0 R 2365 0 R 2367 0 R 2368 0 R 2369 0 R 2370 0 R] +/Limits [(Doc-Start) (lstnumber.-11.1)] >> % 2383 0 obj << -/Dests 2382 0 R +/Kids [2371 0 R 2372 0 R 2373 0 R 2374 0 R 2375 0 R 2376 0 R] +/Limits [(lstnumber.-12.1) (section*.7)] >> % 2384 0 obj << +/Kids [2377 0 R 2378 0 R 2379 0 R 2380 0 R 2381 0 R] +/Limits [(section*.8) (vdata)] +>> +% 2385 0 obj +<< +/Kids [2382 0 R 2383 0 R 2384 0 R] +/Limits [(Doc-Start) (vdata)] +>> +% 2386 0 obj +<< +/Dests 2385 0 R +>> +% 2387 0 obj +<< /Type /Catalog -/Pages 2255 0 R -/Outlines 2256 0 R -/Names 2383 0 R +/Pages 2258 0 R +/Outlines 2259 0 R +/Names 2386 0 R /URI (http://ce.uniroma2.it/psblas) /PageMode/UseOutlines/PageLabels<>2<>6<>]>> /OpenAction 657 0 R >> endstream endobj -2386 0 obj +2389 0 obj << /Type /XRef -/Index [0 2387] -/Size 2387 +/Index [0 2390] +/Size 2390 /W [1 3 1] -/Root 2384 0 R -/Info 2385 0 R -/ID [ ] -/Length 11935 ->> -stream -ÿ]Ñ]c#Ñ\c-Ñ[c5ÑZcAÑY  -cJÑX  ÌÑW Ì ÑVÌÑUÌÑTÌ3ÑSÌ4ÑRÌ5ÑQÌ6ÑPÌ:ÑOÌ;ÑN Ì<ÑM!"Ì@ÑL#$ÌAÑK%&ÌCÑJ'(ÌDÑI)*ÌHÑH+,ÌIÑG-.ÌJÑF/0ÌQÑE12ÌRÑD34ÌYÑC56ÌZÑB78Ì^ÑA9:Ì_Ñ@;<ÌaÑ?=>'Ñ>?@'Ñ=AB'Ñ<CD'Ñ;EF'Ñ:GH' Ñ9IJ' Ñ8KL'Ñ7MN'Ñ6OP'Ñ5QR'Ñ4ST'Ñ3UV'Ñ2WX'Ñ1YZ' Ñ0[\'&Ñ/]^''Ñ._`'(Ñ-ab'.Ñ,cË?%'3Ñ+ËË'4Ñ*ËË'5Ñ)ËË'<Ñ(ËË'@Ñ'Ë Ë -'JÑ&Ë Ë 'ZÑ%Ë ËŠÑ$ËËŠÑ#ËËŠÑ"ËËŠ%Ñ!ËËŠ1Ñ ËËŠ<ÑËËŠMÑËËŠTÑËËŠ[ÑËË õÑË!Ë"õÑË#Ë$õ/ÑË%Ë&õ<ÑË'Ë(õKÑË)Ë*õQÑË+Ë,õXÑË-Ë.a ÑË/Ë0a)ÑË1Ë2a2ÑË3Ë4a=ÑË5Ë6a>ÑË7Ë8aUÑË9Ë:abÑË;Ë<ÔÑ Ë=Ë>Ô -Ñ Ë?Ë@ÔÑ ËAËBÔÑ -ËCËDÔ&Ñ ËEËFÔAÑËGËHÔQÑËIËJÔWÑËKËLÔ^ÑËMËNGÑËOËPGÑËQËRGÑËSËTGÑËUËVG!ÑËWËXG+ÊcËYËZG0ÊbË[Ë\G6ÊaË]Ë^G<Ê`Ë_Ë`GBÊ_ËaËbGHÊ^Ëc”}ƒGOÊ]””GVÊ\””GcÊ[””¬ÊZ””¬ÊY” ” -¬ÊX” ” ¬ÊW” ”¬#ÊV””¬*ÊU””¬.ÊT””¬2ÊS””¬6ÊR””¬:ÊQ””¬>ÊP””¬HÊO””¬RÊN”” ¬\ÊM”!”"ÊL”#”$ ÊK”%”&ÊJ”'”("ÊI”)”*'ÊH”+”,.ÊG”-”.7ÊF”/”0=ÊE”1”2CÊD”3”4IÊC”5”6OÊB”7”8TÊA”9”:[Ê@”;”<bÊ?”=”>‰Ê>”?”@‰ Ê=”A”B‰Ê<”C”D‰Ê;”E”F‰"Ê:”G”H‰+Ê9”I”J‰6Ê8”K”L‰;Ê7”M”N‰?Ê6”O”P‰EÊ5”Q”R‰IÊ4”S”T‰OÊ3”U”VùÊ2”W”XùÊ1”Y”Zù Ê0”[”\ùÊ/”]”^ù]Ê.”_”`ùaÊ-”a”bùbÊ,”c]üöxÊ+]]xÊ*]]xÊ)]]xÊ(]]xÊ'] ] -xÊ&] ] x Ê%] ]x -Ê$]]x Ê#]]xÊ"]]xÊ!]]xÊ ]]xÊ]]]À£]] ½Ž]]xYx[xZx\ÑO]#]!í]"]%]&]'](])]*]+],]-].]/]0]1]2]3]4]5]6]7]9];]<]=]>]?]@]A]B]C]E]G]I]J]K]L]M]N]O]P]Q]R]W]U]$‚w]S]T]8]:]D]F]H]X]Y]Z][]\]]]^]_]`]a]b]c           +/Root 2387 0 R +/Info 2388 0 R +/ID [ ] +/Length 11950 +>> +stream +ÿ]Ô`c#Ô_c-Ô^c5Ô]cAÔ\  +cJÔ[  ÌÔZ Ì ÔYÌÔXÌÔWÌ3ÔVÌ4ÔUÌ5ÔTÌ6ÔSÌ:ÔRÌ;ÔQ Ì<ÔP!"Ì@ÔO#$ÌAÔN%&ÌCÔM'(ÌDÔL)*ÌHÔK+,ÌIÔJ-.ÌJÔI/0ÌQÔH12ÌRÔG34ÌYÔF56ÌZÔE78Ì^ÔD9:Ì_ÔC;<ÌaÔB=>'ÔA?@'Ô@AB'Ô?CD'Ô>EF'Ô=GH' Ô<IJ' Ô;KL'Ô:MN'Ô9OP'Ô8QR'Ô7ST'Ô6UV'Ô5WX'Ô4YZ' Ô3[\'&Ô2]^''Ô1_`'(Ô0ab'.Ô/cË?%'3Ô.ËË'4Ô-ËË'5Ô,ËË'<Ô+ËË'@Ô*Ë Ë +'JÔ)Ë Ë 'ZÔ(Ë ËŠÔ'ËËŠÔ&ËËŠÔ%ËËŠ%Ô$ËËŠ1Ô#ËËŠ<Ô"ËËŠMÔ!ËËŠTÔ ËËŠ[ÔËË õÔË!Ë"õÔË#Ë$õ/ÔË%Ë&õ<ÔË'Ë(õKÔË)Ë*õQÔË+Ë,õXÔË-Ë.a ÔË/Ë0a)ÔË1Ë2a2ÔË3Ë4a=ÔË5Ë6a>ÔË7Ë8aUÔË9Ë:abÔË;Ë<ÔÔË=Ë>Ô +ÔË?Ë@ÔÔËAËBÔÔ ËCËDÔ&Ô ËEËFÔAÔ ËGËHÔQÔ +ËIËJÔWÔ ËKËLÔ^ÔËMËNGÔËOËPGÔËQËRGÔËSËTGÔËUËVG!ÔËWËXG+ÔËYËZG0ÔË[Ë\G6ÔË]Ë^G<ÌcË_Ë`GBÌbËaËbGHÌaËc”}ƒGOÌ`””GVÌ_””GcÌ^””¬Ì]””¬Ì\” ” +¬Ì[” ” ¬ÌZ” ”¬#ÌY””¬*ÌX””¬.ÌW””¬2ÌV””¬6ÌU””¬:ÌT””¬>ÌS””¬HÌR””¬RÌQ”” ¬\ÌP”!”"ÌO”#”$ ÌN”%”&ÌM”'”("ÌL”)”*'ÌK”+”,.ÌJ”-”.7ÌI”/”0=ÌH”1”2CÌG”3”4IÌF”5”6OÌE”7”8TÌD”9”:[ÌC”;”<bÌB”=”>‰ÌA”?”@‰ Ì@”A”B‰Ì?”C”D‰Ì>”E”F‰"Ì=”G”H‰+Ì<”I”J‰6Ì;”K”L‰;Ì:”M”N‰?Ì9”O”P‰EÌ8”Q”R‰IÌ7”S”T‰OÌ6”U”VùÌ5”W”XùÌ4”Y”Zù Ì3”[”\ùÌ2”]”^ù]Ì1”_”`ùaÌ0”a”bùbÌ/”c]üöxÌ.]]xÌ-]]xÌ,]]xÌ+]]xÌ*] ] +xÌ)] ] x Ì(] ]x +Ì']]x Ì&]]xÌ%]]xÌ$]]xÌ#]]xÌ"]]]À£]] ½Ž]]x\x^x]x_ÑO]#]!í]"]%]&]'](])]*]+],]-].]/]0]1]2]3]4]5]6]7]9];]<]=]>]?]@]A]B]C]E]G]I]J]K]L]M]N]O]P]Q]R]W]U]$‚w]S]T]8]:]D]F]H]X]Y]Z][]\]]]^]_]`]a]b]c                                  ! % #]V¾¢ "°Â & ' ( ) * + , - . / 0 1 2 3 4 5 6 8 9 : ; < = > ? @ A B C D E F G H I J K M N P R W U $5¡ T 7 L O Q S Y [ ] ^ _ ` a b ccccccccccc c -c c c ccccccccc V}c X Z \ cccccccc c!c"c$cñx]x,x#x0x.x+x x!x*xxc*c+c,%Žc1c/c%½c.c&c'c(c)*Dxc2c3c8c05çc4xTxSxRc6c7x)c=c>iycBc9V_c?c@c:c;c<xxcDcEcGcCp#cFcUcScHƒžcIcKcLcMcNcOcPcQcRÌÌcT¤”cVcWcXcYcZc[c\c]c^c_c`cacbccÌxUx^ ÌÌÌò²ÌÌÌÌ Ì -Ì Ì ÌÌÌóÌÌÌÌÌÌ Ì/*ÌÌÌÌÌÌÌ"Ì#Ì.Ì!GÌ$Ì%Ì&Ì'Ì(Ì)Ì*Ì+Ì,xVÌ-Ì0Ì1Ì7Ì/bÞÌ2Ì=Ì8vxÌ9x_ÌEÌ>‡ÄÌ?ÌBÌKÌFš“ÌGÌMÌNÌOÌTÌL°ØÌPÌSx%ÌWÌ[ÌUÏÌVÌXÌbÌ\ì'Ì]Ì`'Ìcûx'x`bž' '1¢''' -HÚ' ''['''mo''''"'†`''!'%')'#¤`'$xa'+','/'*¸e'-'K'1'9'7'0ǽ'2'6';'='8Ùj':'A'>ê1'?'C'D'E'F'G'H'N'L'Bë 'IxX'O'P'R'Mr'Qxb'T'U'V'W'X'\'S'Y'['^'a']0/'_'`'cŠŠŠŠŠŠ'bAŠŠª{Š Š ŒfŠ -Š ŠŠŠŠ ŽéŠŠŠŠ­‡ŠŠxcŠŠŠŠŠ¹§ŠŠŠ!Š"Š#Š'Š ÓŠ$Š&Š+Š(ð-Š)Š*Š-Š.Š/Š3Š,üPŠ0Š2Š6Š4ÛŠ5Š8Š9Š:Š>Š7_Š;Š=ÊŠGŠ?6»Š@ŠAŠBŠCŠDŠEŠFŠIŠJŠKŠOŠHGÚŠLŠNŠQŠRŠVŠP`{ŠSŠUŠXŠYŠ]ŠWuÁŠZxWŠ\Š_Š`ŠaŠbõõŠ^‹Šcõõõõ7¿õõ õ -õ õ õ õõõ×ÅõÊõõõõñ†õõõõõõõõõ õ_õõ"õ#õ%õ!.õ$õ'õ(õ)õ*õ+õ,õ-õ1õ&@õ.õ0õ4õ2]Ðõ3õ6õ7õ8õ9õ:õ@õAõ>õ5_Gõ;õ=ÊõCõ?|âõBõEõFõGõHõIõMõD‚'õJõLõOõRõNŸnõPõTõUõVõbõZõS¢õWõYÈ]õcaõ[¼Aaaõ\õ]õ^õ_õ`õaÖ¸ -'"Óaal`aÊaaa a ax…a -a a¼kaaaa—Maaaaa"aa¬Ñaa#aºía!aaaaaa Ë þxa%a&a'a+a$,a(a*a.a,Mza-Êa0a6a4a/S@a1a3a7a8a:a5pa9a?a;€Na<aAaDa@™:aBaCaFaIaE²8aGxQaHaQaJÙvaKaLaMaNaOaPÊaSaVaRã®aTaXa]aWÿIaYaZa[a\a_a`Ôa^ Eaaac=ŠÔÔÔÔOŒÔÔÔ Ô\)Ô Ô ÔÔÔ dÝÔÊÔÔ{hÔÔÔÔÔÔÔqÔÔÔ#Ô—ñÔÔ Ô!Ô"Ô)Ô'Ô$œ£Ô%Ô*Ô+Ô2Ô(±ßÔ,Ô-Ô.Ô/Ô0Ô1Ô;Ô3ÌéÔ4Ô5Ô6Ô7Ô8Ô9Ô:ÊÔ=Ô>Ô?ÔBÔ<ÞÝÔ@ÔLÔCùŸÔDÔEÔFÔGÔHÔIÔJÔKÔNÔOÔRÔM&ÔPÔTÔUÔYÔSÔVÔXÔ[Ô\Ô_ÔZ!]Ô]ÔcÔ`9°ÔaÔbÊGGGGf(GsG -G%GGG G G GGG }GGGGGGG™¢GGG¦wGGG"G³FG Ê G'G#ËáG$G%G&G)G,G(ξG*G.G2G-äMG/G1G4G8G3ðëG5G7G:G>G9ãG;G=G@GDG?kGAGCÊ -GFGKGE#YGGGIGJGMGRGL1ÕGNGPGQGTGWGS?”GUG]GXU9GYGZG[G\G_G`Ga¬G^c?Gb ÙÚ¬¬œ’¬Ê ¬¬²É¬¬¬¬ ¬ -¬ ¬ ¬ ¬¬¬¬ÏŠ¬¬¬Ðr¬¬¬¬ ¬æ5¬¬¬¬'¬!÷Û¬"¬$¬%¬&¬+¬( l¬)Ê ¬/¬, U¬-¬3¬0 ›¬1¬7¬4 $[¬5¬;¬8 *¬9¬?¬< /D¬=¬E¬@ E¬A¬B¬C¬DxPÊ ¬I¬F Z%¬G¬O¬J q'¬K¬L¬M¬N¬S¬P „ȬQ¬Y¬T š±¬U¬V¬W¬X¬]¬Z ¯Y¬[¬c¬^ ÆK¬_¬`¬a¬bÊ þ0"F­ !]  -!) !?3!Sþ!jËxOÊ$ !ƒt!#)%!˜™&(+,/*!­Â-2340!Æ`1:5!îÛ689@;!ý -<>?ÊFA"{BDELG"®HJKPM"ËNRWQ"SUVY^X"% Z\]`‰_"8šac‰Ê#’Ú‰‰‰"kƉ‰‰‰ -‰‰ "|>‰ ‰ ‰‰‰‰"˜º‰‰‰‰‰"´˜‰‰‰"·d‰‰‰ ‰$‰"»ö‰!‰#ʉ&‰'‰(‰)‰,‰%"Ïˉ*‰/‰-"휉.‰1‰2‰3‰4‰7‰0"ñÖ‰5‰9‰<‰8#<‰:‰@‰=#´‰>‰B‰C‰F‰A#¾‰DʉJ‰G#$Š‰H‰L‰M‰V‰T‰K#&ë‰N‰P‰Q‰R‰S‰W‰X‰Y‰[‰U#G^‰Z‰]‰_‰\#c)‰^‰a‰cùùùùù‰`#tïù‰b%•Žx1x2ù ù -#ôù $!0ù#ùùù#¿Ìù ùùÊx-ùù%ùù#âùùùùùù$7ù.ù0ù$´ù$ù&ù'ù(ù)ù*ù+ù,ù-ù/ùù$+ùù ù!$,‡ù"$-}$.‡$Ma$u£ù8ù9ù:ù>ù1$V“ù;ù<ù=ù2ù3ù4ù5ù6$‚'ù7$ƒH$„>$‘úùHùJ$ÐUùFùLù?$°ÐùGùIùKù@ùAùBùCùD$Û^ùE$ÜZ$Ýj$÷%&žùTùWùM%:ùUùVùNùO%3ÙùPùQùR%5JùS%6F%7f%TqùZùX%a²ùYÊù^ù[%diù\xù_%€ ù`ùc(Þx x%È}xxx %ÝÈxxx%è¦xx&x%é4xxxx"x$Êx3x'&Bx(x/x4x5x6x7x8x9x:x;x<x=xNx>x?x@& xA&0êxB&TÚxC&rçxD&’xE&º xF' ¢xG'×xH'\}xI'lÚxJ'|–xK'ÛLxL(AcxM(‘ (•9(›Ÿ(¡Û(¨Ô(±(·W(½–(ÃÕ(Çg(Ëý(Ñÿ(ØÊÊ) ›ÊÊÊÊÊÊ)JdÑ^Ñ_Ñ`ÑaÑbÑcÙ)r}ÙÙÙÙÙÙÙÙÙ Ù -Ù Ù Ù ÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙ Ù!Ù"Ù#Ù$Ù%Ù&Ù'Ù(Ù)Ù*Ù+Ù,Ù-Ù.Ù/Ù0Ù1Ù2Ù3Ù4Ù5Ù6Ù7Ù8Ù9Ù:Ù;Ù<Ù=Ù>Ù?Ù@ÙAÙBÙCÙDÙEÙFÙGÙHÙIÙJÙKÙLÙMÙNÙOÙPÙQÙRÙSÙTÙUÙVÙWÙXÙYÙZÙ[Ù\Ù]Ù^Ù_Ù`ÙaÙbÙc >)à > > > > > > > > >  > - >  >  >  > > > > >)Ár)Í[ +c c c ccccccccc V}c X Z \ cccccccc c!c"c$cñx`x,x#x0x.x+x x!x*xxc*c+c,%Žc1c/c%½c.c&c'c(c)*Dxc2c3c8c05çc4xWxVxUc6c7x)c=c>iycBc9V_c?c@c:c;c<xxcDcEcGcCp#cFcUcScHƒžcIcKcLcMcNcOcPcQcRÌÌcT¤”cVcWcXcYcZc[c\c]c^c_c`cacbccÌxXxa ÌÌÌò²ÌÌÌÌ Ì +Ì Ì ÌÌÌóÌÌÌÌÌÌ Ì/*ÌÌÌÌÌÌÌ"Ì#Ì.Ì!GÌ$Ì%Ì&Ì'Ì(Ì)Ì*Ì+Ì,xYÌ-Ì0Ì1Ì7Ì/bÞÌ2Ì=Ì8vxÌ9xbÌEÌ>‡ÄÌ?ÌBÌKÌFš“ÌGÌMÌNÌOÌTÌL°ØÌPÌSx%ÌWÌ[ÌUÏÌVÌXÌbÌ\ì'Ì]Ì`'Ìcûx'xcbž' '1¢''' +HÚ' ''['''mo''''"'†`''!'%')'#¤`'$Ì'+','/'*¸e'-'K'1'9'7'0ǽ'2'6';'='8Ùj':'A'>ê1'?'C'D'E'F'G'H'N'L'Bë 'Ix['O'P'R'Mr'QÌ'T'U'V'W'X'\'S'Y'['^'a']0/'_'`'cŠŠŠŠŠŠ'bAŠŠª{Š Š ŒfŠ +Š ŠŠŠŠ ŽéŠŠŠŠ­‡ŠŠÌŠŠŠŠŠ¹§ŠŠŠ!Š"Š#Š'Š ÓŠ$Š&Š+Š(ð-Š)Š*Š-Š.Š/Š3Š,üPŠ0Š2Š6Š4ÛŠ5Š8Š9Š:Š>Š7_Š;Š=ÌŠGŠ?6»Š@ŠAŠBŠCŠDŠEŠFŠIŠJŠKŠOŠHGÚŠLŠNŠQŠRŠVŠP`{ŠSŠUŠXŠYŠ]ŠWuÁŠZxZŠ\Š_Š`ŠaŠbõõŠ^‹Šcõõõõ7¿õõ õ +õ õ õ õõõ×ÅõÌõõõõñ†õõõõõõõõõ õ_õõ"õ#õ%õ!.õ$õ'õ(õ)õ*õ+õ,õ-õ1õ&@õ.õ0õ4õ2]Ðõ3õ6õ7õ8õ9õ:õ@õAõ>õ5_Gõ;õ=ÌõCõ?|âõBõEõFõGõHõIõMõD‚'õJõLõOõRõNŸnõPõTõUõVõbõZõS¢õWõYÈ]õcaõ[¼Aaaõ\õ]õ^õ_õ`õaÖ¸ +'"Óaal`aÌaaa a ax…a +a a¼kaaaa—Maaaaa"aa¬Ñaa#aºía!aaaaaa Ë þxa%a&a'a+a$,a(a*a.a,Mza-Ìa0a6a4a/S@a1a3a7a8a:a5pa9a?a;€Na<aAaDa@™:aBaCaFaIaE²8aGxTaHaQaJÙvaKaLaMaNaOaPÌaSaVaRã®aTaXa]aWÿIaYaZa[a\a_a`Ôa^ Eaaac=ŠÔÔÔÔOŒÔÔÔ Ô\)Ô Ô ÔÔÔ dÝÔÌ ÔÔ{hÔÔÔÔÔÔÔqÔÔÔ#Ô—ñÔÔ Ô!Ô"Ô)Ô'Ô$œ£Ô%Ô*Ô+Ô2Ô(±ßÔ,Ô-Ô.Ô/Ô0Ô1Ô;Ô3ÌéÔ4Ô5Ô6Ô7Ô8Ô9Ô:Ì +Ô=Ô>Ô?ÔBÔ<ÞÝÔ@ÔLÔCùŸÔDÔEÔFÔGÔHÔIÔJÔKÔNÔOÔRÔM&ÔPÔTÔUÔYÔSÔVÔXÔ[Ô\Ô_ÔZ!]Ô]ÔcÔ`9°ÔaÔbÌ GGGGf(GsG +G%GGG G G GGG }GGGGGGG™¢GGG¦wGGG"G³FG Ì G'G#ËáG$G%G&G)G,G(ξG*G.G2G-äMG/G1G4G8G3ðëG5G7G:G>G9ãG;G=G@GDG?kGAGCÌ GFGKGE#YGGGIGJGMGRGL1ÕGNGPGQGTGWGS?”GUG]GXU9GYGZG[G\G_G`Ga¬G^c?Gb ÙÚ¬¬œ’¬Ì¬¬²É¬¬¬¬ ¬ +¬ ¬ ¬ ¬¬¬¬ÏŠ¬¬¬Ðr¬¬¬¬ ¬æ5¬¬¬¬'¬!÷Û¬"¬$¬%¬&¬+¬( l¬)̬/¬, U¬-¬3¬0 ›¬1¬7¬4 $[¬5¬;¬8 *¬9¬?¬< /D¬=¬E¬@ E¬A¬B¬C¬DxS̬I¬F Z%¬G¬O¬J q'¬K¬L¬M¬N¬S¬P „ȬQ¬Y¬T š±¬U¬V¬W¬X¬]¬Z ¯Y¬[¬c¬^ ÆK¬_¬`¬a¬bÌ þ0"F­ !]  +!) !?3!Sþ!jËxRÌ$ !ƒt!#)%!˜™&(+,/*!­Â-2340!Æ`1:5!îÛ689@;!ý +<>?ÌFA"{BDELG"®HJKPM"ËNRWQ"SUVY^X"% Z\]`‰_"8šac‰Ì#’Ú‰‰‰"kƉ‰‰‰ +‰‰ "|>‰ ‰ ‰‰‰‰"˜º‰‰‰‰‰"´˜‰‰‰"·d‰‰‰ ‰$‰"»ö‰!‰#̉&‰'‰(‰)‰,‰%"Ïˉ*‰/‰-"휉.‰1‰2‰3‰4‰7‰0"ñÖ‰5‰9‰<‰8#<‰:‰@‰=#´‰>‰B‰C‰F‰A#¾‰D̉J‰G#$Š‰H‰L‰M‰V‰T‰K#&ë‰N‰P‰Q‰R‰S‰W‰X‰Y‰[‰U#G^‰Z‰]‰_‰\#c)‰^‰a‰cùùùùù‰`#tïù‰b%•lx1x2ù ù +#óûù $!ù#ùùù#¿Ìù ùùÌx-ùù%ùù#áàùùùùùù$ù.ù0ù$’ù$ù&ù'ù(ù)ù*ù+ù,ù-ù/ùù$+]ùù ù!$,eù"$-[$.e$M?$uù8ù9ù:ù>ù1$Vqù;ù<ù=ù2ù3ù4ù5ù6$‚ù7$ƒ&$„$‘ØùHùJ$Ð3ùFùLù?$°®ùGùIùKù@ùAùBùCùD$Û<ùE$Ü8$ÝH$öå%&|ùTùWùM%ùUùVùNùO%3·ùPùQùR%5(ùS%6$%7D%TOùZùX%aùYÌù^ù[%dGù\xù_%êù`ùc)É,x x%È[xxx %ݦxxx%è„xx&x%éxxxx"x$Ìx3x'& x(x/x4xOx5x6x7x8xQx9x:xPx;x<x=xNx>x?x@&éxA&0ÈxB&NÕxC&vÍxD&‡*xE&–æxF&êÅxG'kVxH'¼+xI((¼xJ(pxK(ÎÐxL)4çxM)„‘)ˆ½){)“·)˜ÿ)¡>)¥Õ)ªg)®ù)²‹)·!)½#)Ã%Ì*ÌÌÌÌÌÌ Ì!*8=ÔaÔbÔcÙ*_EÙÙÙÙÙÙÙÙÙ Ù +Ù Ù Ù ÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙ Ù!Ù"Ù#Ù$Ù%Ù&Ù'Ù(Ù)Ù*Ù+Ù,Ù-Ù.Ù/Ù0Ù1Ù2Ù3Ù4Ù5Ù6Ù7Ù8Ù9Ù:Ù;Ù<Ù=Ù>Ù?Ù@ÙAÙBÙCÙDÙEÙFÙGÙHÙIÙJÙKÙLÙMÙNÙOÙPÙQÙRÙSÙTÙUÙVÙWÙXÙYÙZÙ[Ù\Ù]Ù^Ù_Ù`ÙaÙbÙc >*±. > > > > > > > > >  > + >  >  >  > > > > > > > >*¯*¼W endstream endobj startxref -2739547 +2800727 %%EOF diff --git a/docs/src/cuda.tex b/docs/src/cuda.tex index c6ee7dfa..0ac435ef 100644 --- a/docs/src/cuda.tex +++ b/docs/src/cuda.tex @@ -31,9 +31,19 @@ routine in the library is built according to the following principles: In this way, data items are put on the {GPU} memory ``on demand'' and remain there as long as ``normal'' computations are carried out. As an example, the following call to a matrix-vector product +\ifpdf \begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} call psb_spmm(alpha,a,x,beta,y,desc_a,info) \end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} + call psb_spmm(alpha,a,x,beta,y,desc_a,info) +\end{verbatim} + \end{minipage} + \end{center} +\fi will transparently and automatically be performed on the {GPU} whenever all three data inputs \fortinline|a|, \fortinline|x| and \fortinline|y| are {GPU}-enabled. If a program makes many such calls @@ -81,9 +91,19 @@ and \fortinline|beta|, and the return code \fortinline|info|. environment} \addcontentsline{toc}{subsection}{psb\_cuda\_init} +\ifpdf \begin{minted}[breaklines=true]{fortran} call psb_cuda_init(ctxt [, device]) \end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +call psb_cuda_init(ctxt [, device]) +\end{verbatim} + \end{minipage} + \end{center} +\fi This subroutine initializes the PSBLAS-CUDA environment. \begin{description} @@ -109,9 +129,19 @@ available on the current node. environment} \addcontentsline{toc}{subsection}{psb\_cuda\_exit} +\ifpdf \begin{minted}[breaklines=true]{fortran} call psb_cuda_exit(ctxt) \end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +call psb_cuda_exit(ctxt) +\end{verbatim} + \end{minipage} + \end{center} +\fi This subroutine exits from the PSBLAS CUDA context. \begin{description} @@ -131,9 +161,19 @@ Specified as: an integer variable. \subsection*{psb\_cuda\_DeviceSync --- Synchronize CUDA device} \addcontentsline{toc}{subsection}{psb\_cuda\_DeviceSync} +\ifpdf \begin{minted}[breaklines=true]{fortran} call psb_cuda_DeviceSync() \end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +call psb_cuda_DeviceSync() +\end{verbatim} + \end{minipage} + \end{center} +\fi This subroutine ensures that all previosly invoked kernels, i.e. all invocation of CUDA-side code, have completed. @@ -142,45 +182,96 @@ invocation of CUDA-side code, have completed. \subsection*{psb\_cuda\_getDeviceCount } \addcontentsline{toc}{subsection}{psb\_cuda\_getDeviceCount} +\ifpdf \begin{minted}[breaklines=true]{fortran} ngpus = psb_cuda_getDeviceCount() \end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +ngpus = psb_cuda_getDeviceCount() +\end{verbatim} + \end{minipage} + \end{center} +\fi Get number of devices available on current computing node. \subsection*{psb\_cuda\_getDevice } \addcontentsline{toc}{subsection}{psb\_cuda\_getDevice} +\ifpdf \begin{minted}[breaklines=true]{fortran} ngpus = psb_cuda_getDevice() \end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +ngpus = psb_cuda_getDevice() +\end{verbatim} + \end{minipage} + \end{center} +\fi Get device in use by current process. \subsection*{psb\_cuda\_setDevice } \addcontentsline{toc}{subsection}{psb\_cuda\_setDevice} +\ifpdf \begin{minted}[breaklines=true]{fortran} info = psb_cuda_setDevice(dev) \end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +info = psb_cuda_setDevice(dev) +\end{verbatim} + \end{minipage} + \end{center} +\fi Set device to be used by current process. \subsection*{psb\_cuda\_DeviceHasUVA } \addcontentsline{toc}{subsection}{psb\_cuda\_DeviceHasUVA} +\ifpdf \begin{minted}[breaklines=true]{fortran} hasUva = psb_cuda_DeviceHasUVA() \end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +hasUva = psb_cuda_DeviceHasUVA() +\end{verbatim} + \end{minipage} + \end{center} +\fi -Returns true if device currently in use supports UVA (Unified Virtual Addressing). +Returns true if device currently in use supports UVA +(Unified Virtual Addressing). \subsection*{psb\_cuda\_WarpSize } \addcontentsline{toc}{subsection}{psb\_cuda\_WarpSize} +\ifpdf \begin{minted}[breaklines=true]{fortran} nw = psb_cuda_WarpSize() \end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +nw = psb_cuda_WarpSize() +\end{verbatim} + \end{minipage} + \end{center} +\fi Returns the warp size. @@ -188,18 +279,38 @@ Returns the warp size. \subsection*{psb\_cuda\_MultiProcessors } \addcontentsline{toc}{subsection}{psb\_cuda\_MultiProcessors} +\ifpdf \begin{minted}[breaklines=true]{fortran} nmp = psb_cuda_MultiProcessors() \end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +nmp = psb_cuda_MultiProcessors() +\end{verbatim} + \end{minipage} + \end{center} +\fi Returns the number of multiprocessors in the CUDA device. \subsection*{psb\_cuda\_MaxThreadsPerMP } \addcontentsline{toc}{subsection}{psb\_cuda\_MaxThreadsPerMP} +\ifpdf \begin{minted}[breaklines=true]{fortran} nt = psb_cuda_MaxThreadsPerMP() \end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +nt = psb_cuda_MaxThreadsPerMP() +\end{verbatim} + \end{minipage} + \end{center} +\fi Returns the maximum number of threads per multiprocessor. @@ -207,9 +318,19 @@ Returns the maximum number of threads per multiprocessor. \subsection*{psb\_cuda\_MaxRegistersPerBlock } \addcontentsline{toc}{subsection}{psb\_cuda\_MaxRegisterPerBlock} +\ifpdf \begin{minted}[breaklines=true]{fortran} nr = psb_cuda_MaxRegistersPerBlock() \end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +nr = psb_cuda_MaxRegistersPerBlock() +\end{verbatim} + \end{minipage} + \end{center} +\fi Returns the maximum number of register per thread block. @@ -217,27 +338,57 @@ Returns the maximum number of register per thread block. \subsection*{psb\_cuda\_MemoryClockRate } \addcontentsline{toc}{subsection}{psb\_cuda\_MemoryClockRate} +\ifpdf \begin{minted}[breaklines=true]{fortran} cl = psb_cuda_MemoryClockRate() \end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +cl = psb_cuda_MemoryClockRate() +\end{verbatim} + \end{minipage} + \end{center} +\fi Returns the memory clock rate in KHz, as an integer. \subsection*{psb\_cuda\_MemoryBusWidth } \addcontentsline{toc}{subsection}{psb\_cuda\_MemoryBusWidth} +\ifpdf \begin{minted}[breaklines=true]{fortran} nb = psb_cuda_MemoryBusWidth() \end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +nb = psb_cuda_MemoryBusWidth() +\end{verbatim} + \end{minipage} + \end{center} +\fi Returns the memory bus width in bits. \subsection*{psb\_cuda\_MemoryPeakBandwidth } \addcontentsline{toc}{subsection}{psb\_cuda\_MemoryPeakBandwidth} +\ifpdf \begin{minted}[breaklines=true]{fortran} bw = psb_cuda_MemoryPeakBandwidth() \end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +bw = psb_cuda_MemoryPeakBandwidth() +\end{verbatim} + \end{minipage} + \end{center} +\fi Returns the peak memory bandwidth in MB/s (real double precision). diff --git a/docs/src/ext-intro.tex b/docs/src/ext-intro.tex index ef9b882d..44e9915c 100644 --- a/docs/src/ext-intro.tex +++ b/docs/src/ext-intro.tex @@ -32,6 +32,7 @@ following steps: Suppose you want to use the CUDA-enabled ELLPACK data structure; you would use a piece of code like this (and don't forget, you need CUDA-side vectors along with the matrices): +\ifpdf \begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} program my_cuda_test use psb_base_mod @@ -80,6 +81,61 @@ program my_cuda_test stop end program my_cuda_test \end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +program my_cuda_test + use psb_base_mod + use psb_util_mod + use psb_ext_mod + use psb_cuda_mod + type(psb_dspmat_type) :: a, agpu + type(psb_d_vect_type) :: x, xg, bg + + real(psb_dpk_), allocatable :: xtmp(:) + type(psb_d_vect_cuda) :: vmold + type(psb_d_elg_sparse_mat) :: aelg + type(psb_ctxt_type) :: ctxt + integer :: iam, np + + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) + call psb_cuda_init(ctxt, iam) + + + ! My own home-grown matrix generator + call gen_matrix(ctxt,idim,desc_a,a,x,info) + if (info /= 0) goto 9999 + + call a%cscnv(agpu,info,mold=aelg) + if (info /= 0) goto 9999 + xtmp = x%get_vect() + call xg%bld(xtmp,mold=vmold) + call bg%bld(size(xtmp),mold=vmold) + + ! Do sparse MV + call psb_spmm(done,agpu,xg,dzero,bg,desc_a,info) + + +9999 continue + if (info == 0) then + write(*,*) '42' + else + write(*,*) 'Something went wrong ',info + end if + + + call psb_cuda_exit() + call psb_exit(ctxt) + stop +end program my_cuda_test +\end{verbatim} + \end{minipage} + \end{center} +\fi + A full example of this strategy can be seen in the \texttt{test/ext/kernel} and \texttt{test/\-cuda/\-kernel} subdirectories, where we provide sample programs @@ -132,7 +188,11 @@ OFFSET & Offset for diagonals \\ \begin{figure}[ht] \centering % \includegraphics[width=5.2cm]{figures/mat.eps} - \includegraphics[width=5.2cm]{figures/mat.pdf} +\ifcase\pdfoutput + \includegraphics[width=5.2cm]{mat.png} +\or + \includegraphics[width=5.2cm]{figures/mat.pdf} +\fi \caption{Example of sparse matrix} \label{fig:dense} \end{figure} @@ -155,7 +215,11 @@ indices, e.g. the last valid one found in the same row. \begin{figure}[ht] \centering % \includegraphics[width=8.2cm]{figures/ell.eps} - \includegraphics[width=8.2cm]{figures/ell.pdf} +\ifcase\pdfoutput + \includegraphics[width=8.2cm]{ell.png} +\or + \includegraphics[width=8.2cm]{figures/ell.pdf} +\fi \caption{ELLPACK compression of matrix in Figure~\ref{fig:dense}} \label{fig:ell} \end{figure} @@ -199,6 +263,7 @@ computing days; in modern CPUs it is not quite as popular, but it is the basis for many GPU formats. The relevant data type is \verb|psb_T_ell_sparse_mat|: +\ifpdf \begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} type, extends(psb_d_base_sparse_mat) :: psb_d_ell_sparse_mat ! @@ -212,6 +277,25 @@ The relevant data type is \verb|psb_T_ell_sparse_mat|: .... end type psb_d_ell_sparse_mat \end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} + type, extends(psb_d_base_sparse_mat) :: psb_d_ell_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! + + integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:) + real(psb_dpk_), allocatable :: val(:,:) + + contains + .... + end type psb_d_ell_sparse_mat +\end{verbatim} + \end{minipage} + \end{center} +\fi \subsubsection*{Hacked ELLPACK} @@ -251,13 +335,18 @@ submatrix inside the stacked \textit{cM}/\textit{rP} buffers, plus an additional element pointing past the end of the last block, where the next one would begin. We thus have the property that -the elements of the $k$-th \textit{hack} are stored between \verb|hackOffsets[k]| and +the elements of the $k$-th \textit{hack} are stored between +\verb|hackOffsets[k]| and \verb|hackOffsets[k+1]|, similarly to what happens in the CSR format. \begin{figure}[ht] \centering % \includegraphics[width=8.2cm]{../figures/hll.eps} - \includegraphics[width=.72\textwidth]{../figures/hll.pdf} +\ifcase\pdfoutput + \includegraphics[width=.72\textwidth]{hll.png} +\or + \includegraphics[width=.72\textwidth]{../figures/hll.pdf} +\fi \caption{Hacked ELLPACK compression of matrix in Figure~\ref{fig:dense}} \label{fig:hll} \end{figure} @@ -267,6 +356,7 @@ therefore the additional memory is limited to the hack in which the row appears. The relevant data type is \verb|psb_T_hll_sparse_mat|: +\ifpdf \begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} type, extends(psb_d_base_sparse_mat) :: psb_d_hll_sparse_mat ! @@ -280,6 +370,25 @@ The relevant data type is \verb|psb_T_hll_sparse_mat|: .... end type \end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} + type, extends(psb_d_base_sparse_mat) :: psb_d_hll_sparse_mat + ! + ! HLL format. (Hacked ELL) + ! + integer(psb_ipk_) :: hksz + integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:) + real(psb_dpk_), allocatable :: val(:) + + contains + .... + end type +\end{verbatim} + \end{minipage} + \end{center} +\fi \subsubsection*{Diagonal storage} @@ -300,13 +409,18 @@ addressing is required. \begin{figure}[ht] \centering % \includegraphics[width=8.2cm]{figures/dia.eps} - \includegraphics[width=.72\textwidth]{figures/dia.pdf} +\ifcase\pdfoutput + \includegraphics[width=.72\textwidth]{dia.png} +\or + \includegraphics[width=.72\textwidth]{figures/dia.pdf} +\fi \caption{DIA compression of matrix in Figure~\ref{fig:dense}} \label{fig:dia} \end{figure} \begin{algorithm} +\ifpdf \begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} do j=1,ndiag if (offset(j) > 0) then @@ -319,11 +433,30 @@ addressing is required. end do end do \end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} + do j=1,ndiag + if (offset(j) > 0) then + ir1 = 1; ir2 = m - offset(j); + else + ir1 = 1 - offset(j); ir2 = m; + end if + do i=ir1,ir2 + y(i) = y(i) + alpha*as(i,j)*x(i+offset(j)) + end do + end do +\end{verbatim} + \end{minipage} + \end{center} +\fi \caption{\label{alg:dia} Matrix-Vector product in DIA format} \end{algorithm} The relevant data type is \verb|psb_T_dia_sparse_mat|: +\ifpdf \begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} type, extends(psb_d_base_sparse_mat) :: psb_d_dia_sparse_mat ! @@ -336,6 +469,24 @@ The relevant data type is \verb|psb_T_dia_sparse_mat|: end type \end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} + type, extends(psb_d_base_sparse_mat) :: psb_d_dia_sparse_mat + ! + ! DIA format, extended. + ! + + integer(psb_ipk_), allocatable :: offset(:) + integer(psb_ipk_) :: nzeros + real(psb_dpk_), allocatable :: data(:,:) + + end type +\end{verbatim} + \end{minipage} + \end{center} +\fi @@ -378,12 +529,17 @@ the number of diagonals of the $k$-th \textit{hack} is given by \begin{figure}[ht] \centering % \includegraphics[width=8.2cm]{../figures/hdia.eps} - \includegraphics[width=.72\textwidth]{../figures/hdia.pdf} +\ifcase\pdfoutput + \includegraphics[width=.72\textwidth]{hdia.png} +\or + \includegraphics[width=.72\textwidth]{../figures/hdia.pdf} +\fi \caption{Hacked DIA compression of matrix in Figure~\ref{fig:dense}} \label{fig:hdia} \end{figure} The relevant data type is \verb|psb_T_hdia_sparse_mat|: +\ifpdf \begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} type pm real(psb_dpk_), allocatable :: data(:,:) @@ -408,5 +564,35 @@ The relevant data type is \verb|psb_T_hdia_sparse_mat|: .... end type \end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} + type pm + real(psb_dpk_), allocatable :: data(:,:) + end type pm + + type po + integer(psb_ipk_), allocatable :: off(:) + end type po + + type, extends(psb_d_base_sparse_mat) :: psb_d_hdia_sparse_mat + ! + ! HDIA format, extended. + ! + + type(pm), allocatable :: hdia(:) + type(po), allocatable :: offset(:) + integer(psb_ipk_) :: nblocks, nzeros + integer(psb_ipk_) :: hack = 64 + integer(psb_long_int_k_) :: dim=0 + + contains + .... + end type +\end{verbatim} + \end{minipage} + \end{center} +\fi diff --git a/docs/src/figures/dia.png b/docs/src/figures/dia.png new file mode 100644 index 0000000000000000000000000000000000000000..de7db9197139e8b592815942792d983f23f3f93e GIT binary patch literal 50686 zcmc%wWmr`27e0y)As`Y0(v2`mcejYbNW;)wDlqgAl7fmf(t?B{HFOP~3L_x{DBU@9 zcb<)(@A?1Vob%%M>TIsLnAp$W&$HrQYu)SF;aVEci3#ZlK_C#Zs>)Lx5Qu;o1j2D4 zzyrR?%AaEa{$Y9OJb!{U-pvyPe897Q@%$<1=JqeUIX?mT=B}%Xkp~FG_we?G<&!J# z1$@cksch(}>tgTeZRKtUdak9$^8A&Hho^^)`)gNE7C`|%0UjgALSD)hQ5Hy_9BVG@VXoVhNaWXjKdA)HqaFuM=RWG8o%4fb}Z z7&D(?J_MS5rrsj^cbRPSynQSiD#fd+#G#jvHIxim{z5J7SkN^Ml#Iy$ z^8iYk`OUqsc}xDB6iTjTsIPBRj=qVdpG=ac-Iz- zms#{pUfjB}Wyr&6rYau4kGP-mNCHjiTN0{&2ate3mRy99kO(MVR^%cEZ>PeTeK#I+ zwm2U{1p<-00G3{D#lMe{LG$}bd{49+|C5%~6YHB9G`-)fAKI^eQDQ+N>Dm~{YCZ}m z+u3^f1Z;9ADChglw|T;o3$tF#D8mnSbP=)At5_2-i>JHhugYS?VtI8lHV9-xZL=4t z!{R$#x57bQV?k&Vqweq)w4rQb^i$L9$&H54+1yso|G%WQS-F8FmlT@ zc4h%7QA#`=$l8n1iymscaJw=$8R-(-(($hOjl|0NFdwtXN}k4#juqV+?Xx~RjYC}F zTqWK=^WT_X&_Vp77JVFc8T0taB}b+5kYmhiM{1z;GYw$Hj+{YJfF!#PM6+> z{*EiTj64Eo^Jn&JM*W0Q5PsXBAyK%>!J0y zo`ed1&Rj)~7{G@qjFK5Umu}FXEE`=G6eHZu%^+(So?fPJ3_QHziOJvCLh(C8dwCz)0Gj~4FE&s{FE_8*}gp#M6qyUAeuf`H-X@0{POTr)~J?|4j2?AIF9 z2THCX)}s4Avv*DAW94#Shx(szK_IU|*X45?E9v)`j#gPNr(V|)7B^qOe_*ejEup#d zdF?*Aj_LaKvHNwA4qUo{UGPA@nxv{%kEn?!y7`et-BKev`pco)z|u<+ZI9A0XR)$O zXouc$v!aiHb0S=~DP*quAE4Lb&R-?C@wZO04LU9h=X#nH8~@pN%jk52LEf3lIwtED zyungSU&4827YQ53cQjrH6(WW{#GAm|&_+ z{qj2CMY8Cm7SX~RERcdOnWxW~ma400#cW-K73)}aVc9jX6z8I>7Uhr=2fO7BzPAyl zEint2ru!;om&FZ|5ucb(8enHkixVTrl(DW-)%doN@NJt5XTXsiXf92DD>peCGMb8O zm^+`=PU?N3f$b~eiUs`#9e!rA&B~KkJbE;zro8VydPs9ucURSAs)yW^?J-sxE87D0 ziENp)O|~O7(KnVjb%UL142hxXf?4DI+X|cp1yB19E^8SXhVrm_o#JtG5+vZH|gyf2rypf;JOb0^M9yDNg^PTt1qG>V~N zX)N}R?Y(0S+odaZB~i_o6dClT(Dx`y++T3GD}5Wk5A%-ZxZoo!*QXrXy3TX zi3k0qZ*;b-x%k+AE7GgWczaH2U8hrie==`OuCiPABfJR+5C2EdJ3vom-Wifq?`0M@s$4# zyIL^pK?#re))l4MFn6$Jh1N{JN>L9viY(9{NaoDcHNhJMFERy=3*5COQ>6&g%S8EA zoZ{1P0})QOVzDBMq$6)g?`4H`E&p>~yqkzXNeqdUtQpNDeCtVq#tx0}HpqezU7cE~`qV*7TWC8Ygx>D^lZ%Cgv?01a@AZL+O zG1^(11W-^B^jR<42dys_mr+ZV z$VC4$e=}0VJYxSVF@po~RnxN(rZRdWffG8KP6f2rJrD>8=P}|(VOga0%wFs)PINDL zx>`ysqxOzTI=o?#i{`c2OSvV%TE&)p_%qL?N2_eo4M_ysMl1z8<~psz!HQGbvyo{Z zH^!GT<r#p+k&Omly8<+OfWigOQImLvZQdZr4bVA`9mGNmD@#CW0xjJ6PG zLQrN+bF2QB13f8brIyk@^s5OQB7yYcxbc^kdM(~Zl{;V%$cY=gzkw86Wve(w+V{^n zW7|xECpU<-6Bf-=a0k4yF9Ro=XYrjU#RQi?H*v8g;duvGp?5Wybuuxr#DSyFj?AKV zzu=97AJSHI`(MmJpK(tmQYjTm5k_S$;o$M>b3efMUyccyR+t;`Ga2^UoiP6$hdBe_ zv+7qLLoS)AfxT_imY`5}I?*#E6HTXVk>U2%a$Hdd2$XH-Ey~R;ty&!YeO^7_I5gR9 z`z-nK`YsHZ-(?+5`d>%I;j#`lEG3lZ?1%U(z1wUDcUvE1pTRF9szvgjs`zX|J z)_rg(O7+|}fVo$Uj_IUfDRd;%XHY|5u}m8ccNzzG3DgRX;HEDGxI$&!bS>e0k^X6> z=!)qCl)FZ#bAk!a{%enm_xqQV&4`?~2P$%w18fBPo2kVcpuZ*T61H=#Yvx|0M#)Li z52{4sHzxAI6tp&*A4&F`zynO_?08PX0&gCxdyT&&0R;;d-qV*k7O1cgcd;zBG>sui z6xe^!*<8qnED-q6cD+vHXCc{=yTH&X#HHqhSofjoHqjkjx|T<7xpo1QaY?9{R!peB zZmzZXD51F5V%>PhQV2-9JeMn#HkhtsML);m-y`h$uKcHcNIlt3Q<&~)sqpE>L=_lk zLXc>n_rn|W9Oqb=bL;)kh6VPtEg+YqiC#sI+~GQ>@ett*mmS{Q(qm+^`cCtX-Xz=4 zwLrn!p^ zPbi?rf94k56XY_718PhCdxVnDX$r_^Ycm!f`TU~$KycjicYh_1BW!l(5CvM83*CeL zI=J*w1A?yf%uw@giUa>%YkK_GQnts0Fnaa$sI%5GV)F*{@x5gX%H7GM8I%qvuAJ?o z!KUe7_4|LXuG){05&i9AiVZo2?=MAaHLg-i9W|ZlY#+u};EuAO9j&)2RLTG5y&Y6v zYozJqdH@2c1eq!urteM-&g&J@zxZNjBz82sZKF#~U|gLMocW*U7{W+RA}>_-%yn8$ z{!T}-KN&;_BD=+kqLMp3atw1-hqW!;0Rh=|s)kNb4iXD-@RLZ!p${(I!sgZxgdNjS zNNbv5_?FI>=LuJ;&qePF?jEG6`9;}GzP8kJi%x5y27!o_6Q0h=p>))b9eI*Hoy?8r9o4)7U?l=E#<$YaDmy%Sj5T&OS~<@-7F zT~n0bkJcp2^A|?6^EefAb`>+p?_WFJ(XjOhs8_P_GW_p%cfh&V_#LiW4BmfV)`qf7 zE!L&8M}1dOc#NN5Z&eJlMzEC|eMvB^EwKD!9lGzQrk?um)QkLHIXf6t=F$OVa#XF%vT?4Q64;P28^Jx!}Y`>WWNkym@St zTyh-lj~Gv$32kIxfN|uv#k|MXf-pNz==Vi72XhfczX&OYct>U@1rTz7m?8vg$DI51gi>$fuUq;NHr7 zpkybGxpE^4$G4Fx;5p0D*J`4&(n!GW>`Ot+z3oZb7?=t8SYc!Ad04&cdmp z07VX$tw@giuKk(ik~S)lZY^D!dJmz~$bpHwRpl)Bp`vBVZLrxmG#eiDin(v;0~puS z$PUO_L(KD}<@-V$IhSWCvS0~k9f&-!|L_knu$x&rP&df*Rz{xIhug;I19b%R+p|Wz z1V|a#mm8_XpP_d%M!!$mOa-lTNdjUHcOjz$(ysfjK5YC8g@!IhzUZ;QhAZ}4ZblAty$LA-RkP_evytlw4t*3;WgbQ zh$#I!&H!qp2?(sdkENtXG$HFuPY^lkLfF6vPvZQHIeU?bvG_*V!H3mcrJ|79G4nQ| zzLZ|Z?v@Ejey(-G8Myr|!FSM7A#OpY6{!6e6?$eh_nKA)m}-r_COFlCKy4pIX=904 zyOFBdRs~oaHQZA8&O3`SB_~MYnGytx8UkV0tsXi$kYPPg<(2Dqd6nlI9eJ1cBZM-E zxkx7FwYe=WM&z?tyZ-bVY_Qpd_XxH&P&}IU81gLq&NQ9JT?zj;~bN>qfA-yk&E%4 z1M1RVe$D#xVA8g4XQ%S2!+VnVHf&Cl1~7vX6NTBl(-;6`6z#3V5aWEU;_2mU=(~*4 z=p0>TklQRzo9BwTaDT^`*lj*a#d9FT{Cm#+o|0VIcr`!`F$2YmlUCZc*Six+5Xg|$ zp(ePpl!B$yn#7vAXp8Xt)EVD*3P_g$9e5E0G6EjRMs|(FNJa*&Rk_*X+$kKW`Gypt-=&cGLscwi!R;UWbp;iWJA`X?>Qu+y6p_{&-$M6n5^Y-@4Vm1RWgv60-;}=Cdz$@# zd2U-g6bN|0IHaDy6v@+TzArNiKMJ4*dzcG5#&VAm$CEyj`~*-pRu8)IZ>xdbH#t1L z{l`p~dO&y} zM@?MhAwv?taX!}nx(6n-qP#;dgTu67bQ|rGD`n6Za(F6hq>KrT9XMyA=mTaGA9R)@ zm^E-y*iT%_rpRd+7JonUK&Dk@?rV)bWE4V(HjTXfY!|h>>vw(2;$Op zh@Bi;rUL??p~&h^-q%ClNtuxkJ8%NO->OkZb&=L;m*vkXgGUpGMvRO4|185oQOsZo=SD?%&z;OUqB_(`pa6L1z%gjY=HXm zr{2U8xZ+4{`bk!$>TDz^;+FkTIJe3FmKrcU(y|NxoYcJ2&2)hmI9_?Vpp2`H4=Ww5 zc?>2`vt5+yRIqu_>~eshe!)i+dOpaYs;-Y^fzY0;C>t82jn;U=f7ft}Bx-k|5^WyxctNb+4@$2-J^D zAsL0~P>f?%79K}jDI3dpks)P3y&Ww=IdPh<(#T>oY3xmQY4M15;u2k-+bHe4@9S8= z>%W+;_-4W`DDJWI*))wl=V%;c@Xo}XaON_fs5rL*{HaNpO_y2gB(tA0g&ts?2)%H( z(GvcR6|b(Qf(4G2lrqlTSpZM~H6p2U4(-Y@A}M0DcgaVIEv{b!>UIX;^+5ofric5h z9|iP&Y3*th!_oAhgXQrkoP*2suU%bceqsyUQ>BDv{J$B|95$Tz6t4&MA6$y*pI96R zsHq>qrv$SOMCDvkcuz{L?;Khti~!{;Z$Pa@*8tc)+wl0xcN8XLt^GHzKI7Jby?l2v zf$I4N;Q!#67?ZL#<^li!oF;79{Y6yI>lo+Y*bk|AZ!B#$O?R3|1moCvo3Xpb*q_6GnQhF4ipyGuuSfjeXlH zLExD)o!&PQ91#;_?QGlFXgMJE&6Kw{I&PSMV{T{UMa4aIu}gkenG+^XeRSVLI~1}` zs4JacU(-D-y0Ni{g$FRG%@I+*tWmbK+&83qk_PnV?ltNHLPCS+oQLo@5)GmSK8F}- z(NxQ|DZ?^H>V26Yz$Z3;M(Q9;-YsztT1B@bte zFZ!s#i%lqKhra31B-u`>sU-=||33|zBq(Dk75ER>GKehkvg460O(?pc81L!x)iQ*U zct!puw1H9BctacVZ`%ty7rGQn>J>x&1SdllB3~k^rHbeq7noehSHFE)@-{Q&m;eO& zGr|s%dgpd9#?2h{O~~TCd8{;`bs2OSJ$NJk)gzliumGSyuYj7^{qpR)8lK0$wf8+q2)??Hh!dr~MOiG4F zLNr z1ppk^T1AEFYTuT8<-T`^S6GbpFZo`7wi_QM=x~v`J^(mg>)fAn#GC(_NF6{e0kO5@ zUZAa!*UZ87m^wMj2h+1)*=66BzDgd^F|;hLbmg;(Zwr{W$mFT&h^t$-aUvF=G3it0 zt>p2eo!B#=9h7{X0J@A=!g)hR`>At_kmsJdklQ&Z{s`lHk8>57{r*x38ljTS6|3l10Zuw&rZ8 z#3Hw_(7KQ^{HaP8+QnfYk$dOBNTj0oC1Z8@zWjLc^>;BVQBNaGR4dk>N*ib+J#+TQZbt8Pr5 zIlw(zfT}BEw&HD8g4o?QZ+7N^`d@lHUU=pWTz)iJy5gy`dR>JDU$BB|7p-7_En{01 z)il&r8;;p3L()xq!zzekbYlOzb<}c^Ls2?g71USoB`B-04J}mZ|cSL>sw`Of6v*?&P?harMLf ziO7_is9s*L(7{WGL7vH}w@K0zxLt%Qelve2ZBfA&LxxFtrZ<^6^CK*gto%M>fEAL# z;JY8(Z@~(v^0S4>;`mAHF6dr~((a0^WtaG68@|)toUlAH?1w=qIeXGmuHSN!seH9tjS8W zlp!-#oAt#`0Ey$zo|8|dT)BX`YikbM(oI_Pt(WH3PleL|x^>lJjPrD zeuSN6ilOqS*C9$g`fl=to(O}s>!!go_bKQ-b?0N1bBmXMPB-d{l77M>-IkTsVoBru z1>%pZt5WZA);5;*OqPk<<=HuE9PETo0XD(2Vt)ekLmWQK1Ca{O-4v{)?)`MZBRF&-c!UTVL}&9<%1`T-Ym- zBI+dR8|?rcO6Gbphjtgql9QP~Oh0JZalp6Y8?7lhF*iGid@|?nNcKv8a-l8)quVE! zIpO;|SJvz7j5D6PCkgD;(hmX+Kk|Ev@4RD+RzsBfK#PfBdkEM&9hPuiAZ9!OVwg*S+AhUuOG*jq!`F+p9lz8@)h#~yGh{K4m4i&9}dr7F4I*9t>aKY6H_|vo|G9XmiN2u5?*_HA>K54sdTHTzk1u_B+K$ zKWRrR`giGRbLfCb%CR&a*H=Y!HtAjNQyWyl#mM4DpxqTXDIV?RUOUf_b@J+=dP{Aj z7=<*Uhgl>s7E;NXd9Ygc$XM0$IW(LhfLG0d+|u0E=(ujt)INVy5PdL7@O>1otO{`E zK(7dlNWR$kU&GATpp42b?8C=W@5^us9e|n+lt5{-8%F##xTIDl%5opd&RBe}CGc6z zPqEOJR+l;lWD*Q}LMR!aQ?}Oi$>d#hb6I*kTp%(&;rml2XfkE#%ldim#;9P-%Y+?? z89#X6^Y?c|sfi5^nU;*5-TJe%0|P`nT<8%h!1j)7I|Y>PG2N(N(N4pBz`XEGV^Y%b zU|^ubzz6xo`f@1zOJT;JsR`R3HWI*QmzMSyCe8gI8b1vV2u@XQ3DI zWS+~FjJp}l2J&MH$4{TCtxvM{zy3jGG7#{rrXD31T4p1?&5LbP_}4tJFv^HvB1?QM z#sjk@LB}q7xF(D~m_=UYRWx`1rouGAt+koc^tIO9{YF$K-b`3W26w57NKaAgQ`(!< z^E>%E#}0lF!;8!J((rIhFk$%d4ZK#qPU9S=zf3cy#r|(Ii~u(oi}7D})=eAQ_S9wr zsb`f>1>>5FFa+zZ&&n^div&<_rq*JyuJTXOP}kiWE4FqqFTx0vcOxW(c0X`I`VsU& z=ULtplL?GB^59tS5%%XICd>>7&Nw! zCG&jJ*9c0Ix+?iQPu=yX+K?fE4PIHmXTmL{@E+FPg^Z}BzEmh^; zK+r4IP1S2I);pl!`cD9t_TRC&EDjJ~|2qVn<4OPbHn{nJqkzxHZ2!#V_Ls59C7l1A zVafl0bh7+mz`ub}qwe)G5YCLzrcJvFLw_$%6{276^-Tr7-5Bc%pS4w2nz&_m-((%B z$-}fx>%FOIvO#L}r!&`*@uyi>bJ*))D`)B`Hu!RE)(leU%&;*orI|4u#KA1G_~Cg5IFutJ@2s%Vdn!NCSz3zf zeEv*OzEsi^uz452Wu|&;8yd1+C5q`eia7ET*9~Va#9Oi|sWS@+AAU{I@)FVCemD zd~y8iDEX~9VVZgw^o`prs%`Ev^wg?FMt%h)n%~m`Tae6U|IdO~hZ$~X8T@&yCjM!% zT~Jx6%vXrM-dKM8uzQ7J6VhMiR&34x+(WHfBZ)n2GqNC_lJdCF_UMaF{J#~befUqa zJ?2QCEf-x#Q0t254PWlzg=}oN+k~MgAlw_E4?OGXEkX1vt8>INUB>(WZ(x#oS@Rwz zO4vw_{~*}Hh=;0H-Gbg^NM^Wc)4sxxfUSSPy?)Bj{4x5V75Fh_nvqP2%!C?NY@7@0 z3&s5MuN!gth6erlhvBL5W{(Ps+rIX6C5dz9-bm*j25p_r=+6SH79UQY5oVT2^AY$1l+C#I<@=Y+{0GqDk%owbmXvSsZG79+%U&r5tE>%S-;*nSeJHD!-Ew>Dj|mv1tsh>QAh!3$0^V@`2?;%&wg{T}sn4C=COB~f$=CAP8s z@!v2KrRD$cyx{)@wf+Z0L_fOs){G#pj}BsWAVH10qc?)Nhh;$V&Y7EX5?hOAA_a;& zQ1IJ-wjiMN2$hfF%#A#i1fWa&^yN*!{4CIwdI4Dz&OQ91^Tz+i$4_=u@piEShW{MF zo{gNObOBTRzSfk3$Mk#RwnmJm1z|V=KW~%#t!y5$o`Hh(DSJfAa?}pgo(69e9HPqf zJFz`CmQD}3;&Dp3wtHdHv03Q}i+=#d+6J+&dEoKB75u z9Z!6LYXz?XcCBEeJv_%1(re(?VpgGcz{PT9_;|QjszafZaN>Xp;zgbCp!Az*EwmEk z7lAqYL8Xdcws(0Ib#p_fYIOtbjMXG8yC4M&5RvHPLzSg#pgqr)dw2jySSZ(%|L$$T zw0v_rWo;{$)yTXg;^Xp(&~sLO6*&`Pc_Lsokx$--DkX37mBwF1J$-kpfMqOs3QKaf zwK;HZ^{d_eln#$$yWbZ9ColKZ%sIFpSomp=O<3t#Pq5`yOGwH!$Fk#ge*kU+qPUeS z!~{=(#ESA~T_iNV7W=_;l8%bR-LJM01RIC087z^tU33755*YXIV*Di-D&$)#lGPDwT`;g zC!GeaM2F;eu9g)cN6wKB)LCSIxk@w5I0%ceO^a|pSkvwvJXzaT@7IB7YLhLRlVi6r z|Ma-{YA^j=V<$sfb)IARQ3a!NP0j(Z?hU>epwkKp{s9=C0wjpYd`iSn^fTMQB~5!2 z2HXvLFZ3IlUG8XrcjiZ4!la<`0k+^n+UdiG8$a%^UiE!UIvF4kO%ALhr0 zixtNyZa+W!u9y4|@6J1`Gux11+&SCt1NX3O0#p(Q2%(J4r-E~BPL!O)d`5nTY4UuN zK!|7YC7h?|={v-tQAvDLcnOWMbh2b5dz=e#JV}J=qmraezLI!ZZXmgl&V-VO@Ja?7 zMc7ujIA$hgxw$Z?eVe&JP3{Ax?56Z>(HCfI0Jd62Ofdo67DPh>h*a3y-Tz#xs`E*H z5%}TfIl){{v#OvYpaEi{r&zbMFqX%qpDs7do+Xo?SUNfLunFSd6#!XlQRMib%+5-n zk5xqm#bp|~JhDuvTu61+1T?u=uUk4+K4Fb9c4rmvdjD=>!ONrQ`q-hjau^9mFT*yn z3rq`luNHYXE>g$LzwR&k&Of3=3iwC&gy1uK3(Ewe^VM5HsT8-9qYO<$gx@3q`=Iv$ zuR|^>U33EL;Zks;kcV!6!;$}`&^w@fC%loEe+}*zdOC$kV}A!NchD;G$0GR(e!bg* z#(3%OHC{c$m(eGtV{ZHmlB%hOBeB7`AYmpxyKqQc&6zHx{re4Djj&`d+`V`AuM`y=+F1 zE+r)?0$d`m3$X~>oto(amIsHdz4>h+|5INLlye_C85nbYln43&HgLPSxB~1yBXG+< zgCQ$ZIerFBkH8Xg72Iy&Hbv#}S;s7LV*gQ#n>~2_hd)>&RhR~7r+|bhD9$t;rCQKM z-$TDb$$S8WjArRfd91AOm8o+sNzOesceryoEgy|F&_FV=r4>)ob8CFF@-u;IoF(_l zd4ZNYN)lV`nO<<=gu}dOeCM-zb;aU^`Y7CpN_;fjauu*#G@$sP2xtWC&K>>S+MtLF zd0}m`DBJIZk#Y!x1mDAoYm_1DnGL&bGHQ<6jd_3y{B!55dW~8ke@Y1-vX9jk^)&p- z#ZjuYmq3E_jY~TqrW67ly*pj8*6`pM6kUE0X6O2KCgvtko!$r7`RIG&)3`G~fFwuN z@fpGsF=wqQ!5}qdHS?gN)1s6*7Xyyv=x6MWX8P>ZrAW*24t$#LG~P}VJosqQbP!ugqbCY zR3L%{sy9(HUl_8OmVK{V%T#4lICF!9yFmI32RGF#WjFeO?D7_suwPBqMkEPmapsoU zG`YBt-zBi%zFzNGq7z`JM|Mblf1ecd#|G5<2E+l`p3{6-yYmilmL`4j*6P4d zjvo+LBrEbFrLh0c=0e!5tNs{|^C>qLlnf8}2pG0GV6FT%nw{T<=Ym8`APi4%9oMnN zTElVQ9Rf}~%E&ljP8g04?@qi8&isPDI0A&9x-Dudx4hO*ssOXRkhs~e+StBDR0m4t z;6Wn;oipGejHsdYq?5wa1I%fCbW*&yNpf@m5Do*p_!&wXpD5>`2~q)xNb`r}Z7=58ftrv+DScd3~FfT25?pK}7Qld;|&=9oSh+ z4H6|y00jqjQ4v>FSsiuWe9}2bzX%db!+n?bGZf(=EQu4P4qa%!Maq^io3*K1D3SRQ zag~~LYHoj~hJQ}2_;w#sgszi!32=#U;74meYkpl*3XQ9BNT>U-BZ#dNOWFOdw&M4| z{YQVtH|>8<^}Z~AHPa1TX9K_;o6dljaOS$Hqh`s+tr+327NF_K*+%$qK-Rw2_!1P*GIIp0c@)y4c zYAp*?WloC{hl0#jElWi>L-7R&1XKCse^|Vv@w_pvC>iQVhA71Lgj*GrY#ukrKerxF z;f3&7bXJ9Yx&?ut8=s4Y(ul#^wRTp;VsRJil~#In9#ADaUSC?r95C3-o%rh?-plrb zqFd~MM~7|!7d0W|&0FX#k z-Yjxxh3gmms;w$vQ5Os41^`VU;5p4jPx=-)at}G;4@pYx8}YmGIe;V@43}u_nT`^k+Tt3FOjGE| z(>wxHkaw$sv+c-XHlW|hxDiikFKbZ+w8Vn%hWERiDW1}0{FO{|EL_PrCSKI55r4?w8q~H{3$3$U)e@pn} zo(7k1U*~AzG6Famc(^N_A|_*D0i-E?+T(z68HV~7hxPS&smS*JVB4P7uynt)ys{s?lL--FF%obx*Up!MNRqD^86oE z+*6M4WJ|w|_MqG}dd?tb-6@KSDcbWX@5yLBiu)ucr3P@w{$73VdDPY`NUGzR>&4HMogN zzyorrK5<+v2|0kz-z89(?Lp@lp!XbveKK(_zqKDrgJr+^!LMSJuFXe|NTXp^9l)GD z5KNoEmch1PoWo5?`zP+AUv+-ttIMh2Lljme_E<{JG}Hz_0oZWQgsx` z-m#@J1IlyIWrij(teVd*c7Z))bFvmeBSQcdjk#Nq0>06uS-A4y8+n`eTTCWNzB&}+ zI^b2ViTwl}j{jO+%Y|?Ylp3Ryypa*Iv%N@cz=j?5JI^oOjoGtorw3pC{A0Y3RRbVB z97klmdByS*oZBm++uWsfryiX*wm+v{&hk=Kpix**d~@RigdU=9?<7(^fr;?z`vS@v zAw=j*N&&-w6l*c$`!!%v&c!8fTtC8M@^3Zqk#l$|f|I0AdIJ}h5G?TSXDD&r7)h|3 zP+Um`IJ_94i7Q|X;L3CQZP*DTGs!N#O+hs~D+O5PfAU)3Bfc6q!2%6PDMtQo>BW_k zl#^xl{opF;Iw7?MBa)kV{q4}82>0D^5Gm*0+@J460$MhKOjrY%aPHE~QFbe= zKcn8zJK6F8n7sw^vLa%SE~NNtm1Ue@H(i9UAGDj+hV;ov;3b^$z7FK(Aefv~N7>bd zh}#gT-r2YHFW?PR{=$4o!&MD(ofE)GzA8?gtPA-+m9u-uW zumd3VJmoMCaU13@DG86>ym&Gm7pyhny`XsJZMO*nG+6QICsuhYBEVjLYmEq0^&JQV zhPBHbmV@@I9r33xo(+aSYkG{Dezv+2hQ$ z?GdYQg4MOYqQz1fpWbYxb&6J(Sycbc9!WRb~`l?UV> zt=qf15j}5ZyYbuN2ufX!rybs%#EK`8O4I=lL=y&l1ZXOtldNAW+grJrH_+918U@tP z0h3wn&LhRnmuj45C!Q)G12UzI7zZLwql(1jLz}0d?Ae?}L{3V`}>b`b%O1u!-p*pmNNeo+qS^CinIer2Q09Y&|tBevAzd8xf@Chr8hT{vs`y8Cls zSqT@Zl4gVV=E@fGosxnFP*gc9@)`C$%OhqSHHF7%*3zJ^ z#YXm-g~QGNdoO@cQWOFx)6Ai2P3c5)1M4BOu*;T_=I7K|tOdijl%+c~0qFgF_-!h{7tObo zQZhLKFz!T_X+$e&XPzGz2p*TLMn$O3C5yMB%?en>LNQpzv|F8iDG ziWWF={HuHpgq=t}@%q%*cXV34zFB~vX0ZzpTjX7*ErLUZ);Sg?arWaNq= zg#!85UJ;>8QJ_}Iy5DEW^Kbr4_?)BD&lcJt%y$XMesR}^7FJ^cww;MVnxEDnc7Diy z(Eys;qPd5nAty!Mt;~~OR(lUFrH&7__Jy|>g^P!_#()G!eXRuvQ12$A{WCO)2_SI{Bm`74G0}-xi)9pVVuZ0P+}E=c zv_9dw4z#FJXSLf44jN6_+ninRc}*{}V*M6T#oMTHPvT{^^gp~u;=&cMsRGeo&X#p7 zVgfu)b!9TN=Qa!59y8?}zI1eih-$vpG8ZMAF}@R}#LiUgX^N$|k0oFWMSBcKUGFj& zkGTU>4o!+b_vI%LPSo>+V01fD!D)8}n?pbdh=N7%Zv2&q6QIFmN$1q8|%e;2gLQtn&yJc#i-Z zh4_ z%VnH^hctJApgG;(OBKmrd<$&YY!@w;mZpIJBtKez$r&QHta$!FnjDJpG}wC??fKcK z+<~(7g>n;O`xHwE%HQ0eK~V zqJx~7o}R4Fj2MkP+X-b5ayW240L)a(kd6HWiVYd|z9WF^H`|-f!u5Yq_Z~n|HPO20 zAR>xNP?Dk`ppqo%FF8jEk|bv_AX&0x6wpybkc?y%2}2Z+90gI*CJ`3k^+qU4kJY=}kNA!g>>XPzs*|c;!cO(cfcwtp z>$99j)uko1%K#|Dy~2@NB;Av9(I(-7g0lUHlZFjhznm=O$M))<0btdkdr0PDN+g^0 zhXnsaFO3J1n{~+BErMDT#4Nty|J}g+sWw~X0VUt##{m28H?ll) zjiL}!F#_JrT!v-YI4R8G+@qv7UoZ1eY{y(XDzX$Ac=bJ;spwOb_oMs)*zNMM2E0!s z_!A~v_x#X*bop;NrYMTTs^+z8$IbZ!?8=rqRti+3k<6|vce5}n z92emRQ`OeY5^iIDK=qAAr;8%1IyCag8_1C$U_LxE8H-FHdoj=R{vepbT!t&^GGck0 z$s?Di{rM@){jGw3H+(=fGSm9oDoZ@lJwx_Xr3Q6GzAJznN*`iluRor`WgOyTbHB#h zRmo$%J{Z^-cA7#L;%BsvfDXtyhcE-9aYE;< z!Ts??`yFXHuF)!i^*xwN3d}{w!qj^Z=hEq%NQiGfG=JQQ*R57px@Z`QT?a$j-^uK1 zfAdr`&`m}w6klLVK0*wt+Ww2{TBz!wfQe(S6IE4n(p$ILah z$@t*#Fv3)a+W1D!ZLiVePRtb7W>4sD(CN?Eb=MWj+$(8x6%48^__z><- z$wiqJ{soHZV#LqWO5G99v#ku~|9&y)7~heVovMOu>0qW3D+bc2RC9lraF}h`cPb?F zbgj6bHeQl>n6sIs;4LR+gI`$>t9;j1H*up4YCN1eONEJZ{;429ouScs?UQGCWSR5zw@h`iHg^HKO&!H5a-5<_ z;yg00tGb_hUR$Rw1j1!|7S#9H>lmNPg96(P;(ivcp$PGeP&R~x zF%Ue=z4*2#AIUK5ge~Z96c)?wRbR2WXukZ6lin`^(A@6by$slI1a5@z^bHW=1(TN! z91=upFM<&>0RvZ^+k1Z+L7%UrxrmNidZij_cI`JNJMK0!9txm6rj1c*ok(4*;vW)z zk&G*K*A4L*5M*P~E$^wCeDD>!eWpBw`+YMH!q~Xyj8ogCay}lp)fM0f$hr7737@eh zhtr_$jZ?1VNREC2#^iqIY}mdms?M>8uvxAz?}fY@Th33_E^KW?!d*}JAuoto_F?r$ z&C*DI-)i+F_TZNCj2V44@hu$qVu&kru(k54F2fYu{!GA|9Y7U->&cgO}%r=`{895cQ0jP~J~CrFy4#o)x&wYCp9_qRDuX=Ys(I&$gNO8WIU z%qa?g1GLoM#8V<1XWepU-o?OQerr=19r^a*t`OMZ8($Ece`QhH$V?L-=wIXb$|rpD z%bftG!nU6aZyLT=*nR2c7raH&YOD}WJT|rAJr1Y-91BrCXWZPatHUFRL~%!3^NzO< zuboNBfaszejZ)~kunX4C>VoMK95p``j=d7iq4IgLLptuV*-t19Cfr{?(x7SZRN~lk z2l2{)Kiq*(uR@i>&U4>{I-JrTlqo*DsIM1nhkx4A9=!f z$>FA=YM7vbbOrrvsZaZ;dqLLKF3q(@fg>F6D2j8-hpQwy|Jf90lH5B)ALY8C`J9w= zjgSRzz(I&E&~xefL_()YyDWU@P*sfNtGe#Nr)qmD^B%u;3SP<2D{WoslPWunZ%G;e z$KMh=KJEIu!#sJJI#I#P>`)=FI&KUb^YJGm;^ zMz#)Vuq`^M6Z81ljk+|j_`A~H84Nyodi);79Dk?ivEJCoZDiVFP~VVEZ^M)pS>0&d za>r2}e?z(SSN`|WzS-6lK+w721Tpd_SMsc2quWT4NbmzpAnADBR94D9yf`3TX1UBd zYFAqR*?#VQb9{oYh1?k(UVnpN%XL?= zin*lZDa@4L#fuV6<9L-3znaq<6S!C@@nA1p7j1+?)jAi-?VoYfXC!Q?vR0f10LNh_ zNB6nk4Cr)>By3lUp{Jjl#o@0L=IWL!=D1<&EKik;s zk9j~Gulv1PVf8sVs}$O6^j<)-G?(Q`m70Ae<(Ump#%^;?P9j^Y45FuZU0F{UeQws) zNxD-lOG))qu?F?!&R}8BQZqq_vacxzXNg|KpTu0$Q#zW=Pw8uK!&GqepS+I;f{Tyo z8Cm^rcimNad+JN*b)6&aEa+m+p1pI&IC50catxIZ^gr$t=6iN?Qu>oQ`t576PgL9t zdLmFsyM6pJ_Oa&2kCC$9b0^GxeyekhSdY~6{~SXriJ*RIX>)tNe7>aPntJ)U zX~m-utf%$_3iX@9Vz<1g=^xZN(~GN0w6o6vLh`*wlLG0siQFj}6RFL&#@v9mbFm#g zXZQN92L5am`Q1xyDPe)%i^Rq?yKKn3sHz)m@F$%HRbUWxW~vc^@VZ>G=jIgy>3T|* zSlwBq`ll5shvbC(m>URtCe{q^V%gE!@&$%ZEA;pFa!yQ3r;3edovP2BD0japqD1xG z^W}SDB5r?L`Fl^yblBLn*+sw;K!sb4$>!Watk!nVYwFF}YADD@Nxg=iyI;^uZg8aS zfTw#`zaOYay8Trr#iJOTO0!6uuPlR>3k;Si4)zHr6Y${3PSN?ukRt1d^)s<^{kgFM zeW3Ul0kV7WzEtw|MHcFS?>mv5W+ZRSY%zS^kPTol+}AB!Z5@}EeZ;=(NEj(06<8?L z7$775Ir1#^*4XLDYTiz7>Vq2nw+GHT9}LPSt;^~Tbq_nJyX3{<%6oQkQ0V)kdxf9F?~^gl(ze7=&lQR})0BU! z9OtciBt9c$nT)?5i4-<^92f*IH3qu49+*JMB=nu!*WWzKif?$)vpXgLg-P3; z@G{6@(mHc1hHJkGHLYHK#n|Npt+TGH(k)n7Z1up7uk zP`_IKYh`L~I8A?FVR%=IMZu=+Rv7XTPf$2OvBMKNl``%&^5C=Yx}J$rL4I?2*yVr|cwmv8w^QxKOTtXThAym!tovDa^X2cUyoCmKIa)>-gh$(k_4Ig}W z6Zy<;9V|gKN16{?z%0IY)7^C^g5S;{dJl(G^|sk$N5&`1=0@H*M_9rsy*F!AZYG&# z%VkqFgd_5+e>{SMHn`D7dBzp4cI1=IZs=CD_9M0BULjFVxI1a*2!+JAy5EPU5C%lu zBGgnNW5Yu)_`mn4O(1XsJpP(HsvP2ik51yWdjw2HM97~`6-)1!Gm0rrelEU$+UM!s z)+_z=!ks*@WAM0r0*%sAj)B_j&qexIicWHJ(+1yDw(8w*{4V()h48SqpO)^J$Cg(f z^tt_O0RAtl^ZgH!t9F7zxGHcB`>biF8oSNhN}8@`J}FLNW4Kty8LLAqYGAuD{+R5e zzva-yU$JrNZCb?r>hY~wR-2CIXvT4?LA8eY$ws&PAGS`t6g4n!eAaMMbw8ynm}Zj0 z;B=>g&Iem}!Ar##U%b6BS@-O!$DHnKi`SJKx;_&A-v>)xR4Ureu1+T`ZSA3sun;P! zk!hZggV#;{dFav0|4x99a^zntXnvvoe_B4NK*CDG$Ne88Z@c~T5L5lR*8t!BHywYS zc}t4YR(Us+@B(c^#h>VVGtKXQj$DJa+!e(J>xxc8PV>^*W7a?8dpfS&zHG9MWf>K` z>agQZwNi60iK2z%1RY@{A(?toDYFnyXAu_r_qBB)L=xEc8XkoPEx9Y7M02`W8Q33= zo?V|*B|AGXO(oNeQ;^%OYMpvct#7D`J#rgf_6Sb~{RnZ2ib$0=s~xAQ=%~6M>t1<( zhG2{v36<`vO^I46l_+&LQt}V4mOIrI8;s>zug|u1C8lVBkYe>-gf37`ux9AZt`DOF zTuJ7PKie-)CMr~wB?vIkY0LRLDbbMMC(ZO-eet1V@BS^y+Z^HyMGl08^$$NK;ZIe< z<-+In^;T<3rn^`78rM_$+%z-i5Ko*N3?bB@@6Q@3L+Z?N(S`m%U2b=Q*m%y;I^9A! zE80mVjoN=KB5CiIl_@@1u3^H2R}zi(iD}2iF`rFoerRFseowqVCeKwXG&~-YmW(wOeM58Q%o+zR!K1YQSjn@PzLRm&d z<2LqjZt3f6KdS0n!?L)pQLRM4{3xk#GDlGXO4yM^0VC3rZ}#YBEam>0ZM)&zFtKJ+ zFlfciG?#+Rlbw5nm|mL0h0onTq&h0Gw(hnps)9DfEa|1q&v-%d@7H`qSccTu5~mWg z1IA)FQwh>?>=}yL9yRYto@+#xCdyCh@)W7*upg`{s@6H z4*hon#~f>w*;Qt+i=&Ra2CB?Yx6rZbSWMytnW1ZQ_gZ2Vsyteb&`CMWoPnXpv=wiQ z1U1}eSf8BDy?}`}L^fa6zH*uf#l~7Jxv7-=yUx0AZO5siv)o7Pl5>AzH)u)uc`7rt z)MuvmSw~I#!VbUEcsEpen#z6WFhw-$s3-h>Xt9V^jpgvq#UkC#s$9i}9PQ_qu-aMS z9bC1C`931s_ikuc$@5FH-0}q*&bE<7f!Q`M4j1G-)3srU{)DMj49*o6lu}YTq{f=3qhyX&8D23~*&Eu4xoM)N z4-Y$Bxefg_Ba;@~H>?V1sHo0NX{k4KkFw{9R`8o}nQ*nc96uC?a}Kj)&Wt$+cG5l4haFKC6ZLKAQ$& zCSKt^Y9lw&+zgL%+~}J#sJfG@=`eH?r-w|FQ*{&dv@=nH?agqX4rj?JEsfC}#;PI9 z#MpoUrTCSqL!vm?bX}S!q@r3hfu=NCZikL;zaW<767TWrz_itEZpqi9euU(bLqgtjXJ+lZ0~hNfEM6 zkqI!5=Mob?>G~6Oho1jdRwfEG5~iU{c=(hNj@uwQ_M6Ad{jR?JwFqBc51MmDmPAh# zJU>hY-5jfXN4oO+M?eJAPcCIUXM6mE_|qKPvt`J=01bYywuiSPZ<#G>zNqYQBcc4r zmXLmH>-GhubVZz6*pR)up$f@~8U3`=#UAkn&T1Wpbb2sV-QC@~yQ784F`xRRc`N!R z$91Mw2ar`j{1QERRdeQWmM132wBy)RyfCkcvlh8;rn-D&kXw4!T#?m8T3dFd(~$ew z_4{~(V;aY-SgSFkbzi@RgsKL4z#BWQj_u^j!|T6%i%ndX!135$r&mCVP4k zTVh{3W}eS7fQp5Mg%@2YrkbO_J>8XbsYF{?DlYBhX0GKnq84j)Yz1XoZ(|>yfBlc; zo>S-)Ufd3;yFg`0Z=$kz9h_IX`J!)OBh*^B$1Cp!ynfxy8e0^u7DHa8y*_cjU8&C? z{lQ~SC5EMqj|;^D0sv933If$k+B2ta$z@Qea&1 zn;pZ;@0WOEUA**-3dXJ64Y_8v!qToE*SKU-7m$;avuTS4Wk#0TiHmS|M}w!HRKw0A zedpPyP%mDq)V?FE7k!>61(Tf2>8X8O1DuTo#~6P zP;jmXL%o3hsyjb(os^*XhXW~6-rO&&@?*7NUKeCC@DZSQp#J9~`xA4m>VeZmBrl20 z%`%-jlV)$m$McSYGB%#fqAy{wxbAGnqZ5RzV9H&ji;wX$7y&x7=6>^^o{wLD&k)Qz zc6ma@S43j1Acx<|e>+>3S0tJ;~Jh2DrykTm19*SPc@oT^h6Q|r+fco`uoGI*It`$pe|7W=mt&#>2Oo|beOK@{|E z9>Zhxq=TzTaY+~`7?lD3v90~EWw#v|vxQMo`B2)9b z@6B(1S}QWwa_F53uq4(fRjBG+zjAin?YQtq$^H+;rZ6Ci9b866bZ%RtTY9nKcYS5< z#LaeJyDMtfvRAV_k20-wuNQ;Q$YMgt&v}xI8<}z4zc`WRi5MWM=f{|&ith?yqqLWJ zch=)LADnwQP%dEVSw#0cW;H1h=J*4zEursM%?Pf$G2}y_R;kM+XM^>#(AYnmQo{em zgP-vp40n1P_3+L1d?II>GBl{tLW?}2(yo$e#A^H8!y2o%1SNeODYwboL+&Y|NKQ-m z&wlNvKP@qhJ#yupj83FpjE`@YH;Ouw>p~e`wIzsg)%Q!LFY;N9LvS|Fd$56Z-<~r} z=Xi%jw)!K?DfywIuCq{6Z)Dx!@M7j|Kvbpn+bJaLVOP^@@kfki+zHR$iDUP*D+jHn?5CXlqjxG%^!#m3D-jp8e z&0(bS!%Rz~?%LT`vwI#*>sc&{W&j(%s%k1vNL$oifoe51^&b`=@VNQRqCeJQ=iI-uh`G^qVGyZxPzzqIaELs)#qAfAaTS3J2s5Za(X#{-pPSasB z=-S~*f31wfTpDr0RD`IZPulo9dG|$d!}X*mFu6+$3RQ7?`%`+W^=AOyNaK@O%zLmi z^TRV6BYy@NgRP~RR$Il*d0s(!jJ?X`qk(}d7^VC#xYUx$n_MFoQ`k-7Cr?{mesM;r z_}C0$0jME1%DPk`BtDoS7^R1eDOut4uGO#xmnss1 zk-HfgN=O>CmfK_EZlZmAObL6HeyhfBxw|zrmRcl~Zb}R6A5*!pM^gAa;wL$oDz6fe zLlgf~aF)oFf;)RV%hUv?TAp6D<+(Z^7(sq{$!2s5Mcp>Jh57dME3oSDuoDqX+~wY4 zm++YPusn7=lXk83eBX8$>C|SUeI&9uyG};dCQW@G@BM~oxXFSu*;zsYUICtbU1+ef zrt+j7Tl^Vo#C+$i%SeCbNc;IzriCQsB|eZsEm7orv0)BaCjHQ`OiE<=q>dpW09fC& zgpSI3Ql&oQR$uYx&M#L^RM-kxWkI9{8hTZWhJ+oHd*Nq8Tp{>|-9S}>JyeEe)Ao=B})9#IA9j2M%DeHxzE%aBx6?LpOdS-19zi+7QH zMS~m`^QK4qJ(GCrcb8$NiL0N=zTUVI&)XZZ+nL2Aw|4Y0H}yZX{$gH}T*ZdmAiN3K z0wj1G7i-bUM?_#h1`C$*)3ff#os!D7MhLu%JI40-UfNF?S47kcvu3LESylUwpw>Pn zRPlM}t_PgpCV&OXhz7JA8RJ3Vg7nJWtTuUto1BA*;y)y>k1o(1j_~hLu`|aPQN#*e z(;V<`=ID32-CKxUz)leZNBGv3&+Xfe5AQTlE4umkVCyRjAtkM=xB= ztnYD|{1{CL1TH}W+I2r*MQ_iq@KTpvU!tz5>rr~o8R`f4ExxRg_oBi*Fy6;m>#B$Q z13_RaSgh>h8UEomC!C-)q2}Bt1nGWV;w;)jVEUV)A$Tb zY2rz+@TZqh{!Wlmk2n>oKVMHhdI5B~l%>&UEADhc__E%TD?NPVo$EsZm< zerlmkvn|b;vVJ~&g{_d$hh_~tVUC)prlaz(yKfH}W#gfzBz$4odnt2eKAaIdqYTv9-YiY7?S@bnmEL1Mi;w9m9JgyQ z3r2@#=`&vn*?O1IS@2rfS(N#>wr<`O9%Ox|i=nxeaVNUZW}HBe|lgPgB7~g32Z7zco zF4v&$nef2$6}sdw#qTaKPZ0FEiX-u;uK{yfD^Z&E$qBX4lUX;??-W}QJ(bB?bh##) z&hQB(^Co6!?uV&LEW$da9C~^ujqGcbz~pdqQnZR&bN2Afy$%&{JngWVUgESVtWiwW zNuJEa&%&|}t!0OWCsCwfo@RZXbhLw)gR;KdqS^ zhDy#-2^RMt-X!A)=&@4;Ee5A1tY~ey9C0c4P~+`tT#a-Y@bjyVHT4J!`qKAN)(OiZD?mHRd;edA>oM+8!| zzfJBjV)60yACPtHVg^j>LX<%b4HeF3NI*~f=xx1Y>&fr=lr2l!PPE!1 zsKN)h;-W$RNk!2EJBGPcqoen2McZJyJjit^@ps*ex(K2{uq|g zPa*O`jNo|EG#yOU{r0`=4zIm7`3o3xOIw6#2wcuZO zN}f9{Q~KI>u6IILa2;EKYF6 z3cqqA+HpGxGW~XJ+7HiF4M%A_3%`-BmX}U@)SX;EXZ!*-=wLNW)=(LHBuWDn4!-hD z-{inIjJ`9he5v<}ld+Jy1Omta7^Cj}a!0$rkAHZv$UgaZJ~D>T5r~Zi0`n`@wv_a5 zvJJW^mmEJY_56L?eu-9pWTM-ZJ%O-S9jm#~yvh%W$8R{N8-a5z*1PxkR=Qdf!!3s| zfIwtR`qA3V*%TBMINIJ(t}Bis-fh)a>asmzJ&5eFlrZoHylIVdL1_IO6ONXq1*ALD zIRNH=Y`mZGv}mg);fZo*#gn6?ndoKCXg3n_B3+bF3QWY|I>s=E7J`KK`{=%J8J?kCJTG`HH!8Lo%{CEqDY-UF=oA9T1}ESon$icR*M>+ zY-5^6t+!lj`h|K`PsVvrufFJe9O6v6ZZb*&sVH&>z3Z4DYN=`K#{99LnVpVcj?w5+ zm)l$drnUgG;LvD7Qq^x%#uPIdSTJYcw?NbH*mfLAL{KN#Mu>k)Y0a$Xn$BfXdkxKe zqDhawy_qICb-Z^j)3ez@v7si}W9A?PeK7dpbnS`~b`sGa%bJI^U`16E;?SU7Iy4BK%aY)GS&kT6O2{{_#3+iNayIA>9V)Mu}kG^$k+Ci%r=dflS#d{ul zT<4t?2%lv`7#_ZcjxPsJ*5C3wFOPi5brV*!veNr46-78~R0Ix)j|GU2Nr1T5aw+S7 z=K3-IwB9!6nn@1sn}znMmplj$y;NFAe*es$m8=4f~qgzJcd!7JB(tcX#Q<_me}De5Q^nE{hjj6&0=E`#nWi z5&Ey+pHXNnn=i&Lt(2(!X4D?QV+X<>NA{BRUjSl;v`Nu; zM~S4L3JtfplwO8}nDh<}4K0at8oa)K;}RyQx3@Qnl@ZBCrEViTf-U2pi^~%5S=c^oKnD){tEPeU7|c0LRCt8cn|Cn5Wnj5(n47Y*yTbGtPv`e!p5XU`U{Td{v#cN9>DjfX^m;-Db{?R@Tgxs$^W~6lZXp|(0UU7( z!s+%FUl8|cNcNp_9tNzc1*~_AqBkI7zQQH(YPfQ}V-bvBK=KcMH4s?Pxj#vlNk_Ko zvpRuAeY<9C7?~uJ+P@gNnFzrQ*UbBY%3Drw$*=!t5ReCT&tJ;Tv zK~cphFSVzc<=wThAsFSjzBqaY1}~4j<|y9Am|WxM6n?uS81M3!;4zTAZuB2i?Z~N}!)nFySk@+Dq+Wr;vn}@3#XD%KR>TCrX+)oC7r1`qbQt ztq(T{i{BeyU|^u)$jI-{yp?`ub^h*8wH^!jQlriv#c4WtL;4m@421J%Mp#fJb^dU2 zf5vyI|JN_{`V`i}&Szo50*5JgnbE4;YwxGxh~zt9!>x>jN)qJpUVcY0Iu`2JHz%8Z zEAMw1y0i#Pm78z0##uIx~Qb3{K4~oNaR4w^rdQrQD#wq*Yo3dX zY1Dg?8aj*7D=UhQ`^7jUg!W|)^n!>ANc1<7_&P*CA7(r0`TLImhz}=6vFZsa(hJad zE5TS_s?cO_U)#EXY>Zysmx^Ie!Tr#K9o{j9;a$&~lJ@tFGf8#FpkM?}T{>u9m1$i- z;?^Kma}VxsyLQ|yHMf6^DU!}^c%9=h zDW_iYog3V2Ihf-e+c@~W;aRm{NOqw$VSXAiuS+CXm(u+2#{fw{mOQ)wf$tliIeCRmGsElwB#H31%7mM4*=MO+CF5#sKAgwqw}rBbH}6@FwxEtd|gkVW-y zhy#ABH-hOrN@_)m5@%{Ej{$sgvzJXW7`l#xQ*Z~_k4I}4Buw_+>qX_*P_4XHCIn7h z7hwo%y)#3(H@6czErItCD!`NF%`UhPy^{1gP#+s~sOq2Nc8{7OK`GoMGaqWfJXi-= zyth-PzW5TTOEYhLFr0;?ja3)2H$y^OuaTYL1FuCdlWA;ce`W}wae!V5TH6zU>EMl< zbKLOTTpH_0Q}N)%LSDR~?yC)eAFtilsi(Y5;=bC=)}&Ropn>Ta@u~N>I38pjLd=&` z5UlsZ@_x*TCD;`MTU%S|Ygw|}%}d^w_H9?x$G{FuRCz-dI3&EKNrXyBD54ss5kr=@ z>sfLms4v~Me4+*Iv1Wc6s-<)dZe)RJ0EdyiY}eo(mFZKCzv#Qti{)}Y%QH9Ig<0V} z=&E@Mx-wZM7Nbl_j5?86M0DW$sjGf+GMK=Y;catD>&`z$$ezwF_0`GD&dfIi_eSXT zqFpWaUY?}ysz=`Gdr?j^9ch~}2eW=5!v_JwEnWszrYLj$K^2A-UgWG)n^&Q{x#MyRxlJh8h zIEJ#_Yu4?J%U09oCwjGM4xv)4%Vi?Lq@3{4!DPdtReCH$hH*e!DPiDP(p*R`3LSSs*i3|nUzQ_2} zY+YwDTfx3}RVfwCG-3osL8*FMt|6bIac1*MZ91}h1DKE>m~H8vo@3C`-vUM^NsBNo z?43)NJkxgCQFLgp+b_a>Yw=s?Vbt%gwD03dzwd^&ZM&UM_O6Q@T3M2ki z>N%jr*B1;P4EFV%?UkrCAgnTvO%)Ub)FlZIK~($o)6Zs4&^^=dx`dcMMGnTa(DLZD zsmKG0iUDbb*mz`jy?X2afnXzZqCTlv7xKc!lNo_dPw0D!Op)~=uwO~Jw|3PU7S^MN z_muG++5l2UAK0V(wWi;U<%pk__?6R?pcll2tO-I|&Gr`7mKPV_yDc)tRnti$akLBG z@Dx60T^81P-~4@!cuNi*(j<5^P67DL4DO$^f^Sb%c!))yirzIRL9RF3Pj zU5L%@ZK3@@IyE)@IYPAjUFcL^l%40rC_5iBVwi8)fGRNhpT5I)7`8fpXs1_ulglXVwoWUWg3 z#qgN%cpgPPocC;yvwxYv*EQe>r^OeX=Ja z7Mt8KtU{Llu`BghEaJVWZFf;iXG;BS{Para~<`eQUOM9S;)g zypcnUQnqLF4l4;OIYm|SYV>k{zkYj|=)bi9{Uu%lch&tUi7ahZKTdXyTy@?{O<fi5Rusxazz4`M^qj)BMhuhnVMX#&>>sIe<{U9%tJ1;D1NtKXy;T z%mBDSfQ_IE<;9tsBriilrc4;){1JLe%3vb550&;)=jrBWCO`ew8=P;OzSTuVFdc)J zV?Zl$fgUC6;%g>72%*!0P<<%1Fdgm;^7JxOO;hZ+TRLKCA{B-{cnnR7)LfMi&|M?Y zuc3udnO>Iy~n|c(?CV2{( zAs{&^hUK)nlAFO4z^8tjxbiFSlPLG`PL$Nld*SA!ZlZe9t;<-qAc|;UoR#-0ki#LQ zgxFfE+pgHrSyIVU5?{|UF=35j7ezFWqBK~%WWpmX*>HWwUluVgu(uK}dMMXh(`sXr zf@VS`_pQ7^z+i@($JT{79^5{QVlT|aYh*FaUxzgqx)(*jl9Xj20!Xh|e=V4ui~rv1tpg_ zNpSa%ta3lH`7cA(P$@vnYBky__tUy~{@;<18W1aN{o3Bn#u(}F!EMKP9ItVCiP*W& zdAV~a4H6UQ=;KrEbl1Yeg+ZbJr+`SaOqpR8uXSsQ&k;aXXY0Z`=RxJ-ndhs$!c8>S3z&} z)&pRZS&jC7|Ha?u@97YNF(jno72`{lDR^7+}ykR(GJ9--Y@E5wr3C=a2uzvava z1yAwPTnj%G5*pgH)#HR=bn)>)MrBWexTfD*WbmMaqWa+T=H^&3t>D{32zm)1c5e&p zhDsN_0>`N%b6=co2bh(-{E6@7kM`gGsiLKUT$QF9vt7ruKPGuD4u3pdE%8;RO5Rk} z#YYflz{XC3xE_yNQeE~3NVo~5XPgSKz_ z)@^|0NO{!@+?rR^`RYN)o^cS{WQ0>+t%}NVaz!YQvDNs-lk>5NuoK5`dDOj_K$jY{ zW*{pm#15&^shNxVdR9XY2P-9nb9!Pnwo<@@2axWtjgCtA3*WVTN~z7cAsc{z$M8<- z9?f_UI*oK&m1Q`nER7IhD1`8Sssh~ry6{EM!JAuRcjZ}Q$n4Q70gv1snVq*B z?RVJX9vtU|`P1E35?knaH?khkQ1K@}K?)*K+JNPr$kcwb;VI{0*Xp-9MlpsFtfoZ* zB8wyLQh7I%=6Vj?g;}bzN%>HG)*WAV7b<&rl4|H3RIAQFk%^kGIVqpxeU|FYNjl_! zd+_EdGytC+wc~~8Sjqsx{-%X&m2@cVF>Wjkhaz%%uTS^;BzLg~x4m~lYt93J zCHA8LT(k&}S>-u1Z(Q&mZJHkI=!dJ7C{#_jz&yajT6r6Y-ufR_$xuzPQT7lv*_*bA*<% zuLkIC4oIAXUM!ptpv6P`W()yaG);S3?<6-MP=o9wy#%DtZ;^ryxhnwul@$qzcg0x!!PK58+uWC?FK=~4Prs(kFsoB0JAdWeD8Q3^ z9j5n^=rZMHA)XtdO9n|bJ zLeLsT4>ej+>Y1nNEV5qYpO!&W--s14pBYN~sdoF)o-;qUH!(^h%mkV&cmRNTTFsmW_}KU7VqY^_?@OY2)^Y2)O!KfjePB84djCv z7ec63x{-0mK`d{r>*kld$s=vOz-bGH8 z31??&23!|HaKSB+J9?i=N;bT(4m?+`B>akHuYr7%E9pEdmOxi+p@qMm8#!9drfX?w z8AG~z4p}V}@tN&qX}MN~h5`QJu_c9usyo%Bmp%=DknzO2tKH?({(=x+tfLtD&;Xe~ zL=O``X&mdhx(&GFWz4jURO|r7dY2kmky^Bn?WN~1>ag~=7kbytFJUHt5$a-PMB>>0 z%zJSfIMNgAek@Poj3G1+T7lbWe;>r){r&HMx$}mUaKaE&<-V~LXBilF+92y$n#U#i zl~+~mIgpEA&6+l?p#utG-xjhcvvaG?yiBWONKgNX3sZTvRSGPY&u}53(au)phrMcE zST;$#I5O(83b1(kXtXq4z;=?`b2L^G5>E~IaK^FR}S$XE9a6i5LL_sK5ag> z>hovk{g;Wp>%r%M!$m`x%TuSMxYnr`MI)8eRKf7svMEI^>uSJ&kmnb$*werYIL!Cy zCyBuALGHab2xmoAKu@KjRuf}JG)&h5jaJZqZ;{vXgQ;WjEi3TmW|{2xvZ@rPwvhlW zFjk*Es=mgX_O^H5O%32-A&RpLEOP{M}PBW(Z7 z6mp%d4UNnE=P7`@!pQ>hdS)A(zW&DUq`i-lRN~tp^qRvzc4(;eHv2R2hx8!V6Ob#><&AgN z@D2h_1K^_X^$D*W-)sOH%PTqjt|{~_^fumbS2aPH0m@K&p507yJ0aL%wGku>7I(#m zsP+Imw0F7qwY}F{(^FsR1NeXCa*AAdt6u7LDBVOh|Jm>IshyD8Yo&O~W^rP!QQw*p zvA|VI0Za81!5@N$STd?wNP_CYMcW_I;vV^|v z{Y4XL3=?w1%9k# z*e_VUN%0OBAB6}E^7&op3YMa#apXsnrrCy-+%7jqq_fI4UVe7Y`rpN*fpnSJ1~Zoz z{+v{HY^`E&v4mwQee9hDZEW_~@8#bgUB#ZG%_8jGoj(3myi#sx;s{L~SVyOdn%aX~ zNS~3Oo_kQE>X*K^GBs9|o_%o)f2)Qq_x{i7O&9OG{|?1nD9?+c4=wImkIHu3MnvA< zWmT(Ape`)s5)u`?*qNkGf${+g+ERM z+z?G9wb?=R%Xc*?oih#lO;yt_B8^uT-$-!}O@KRRUG3!w7Z=6l-9 zwbg&r=K@6&kJR#ySxnxvzfo)Su~f z!3IBL=b7d$L;4Qx*}3{mmp&IUGBWYAHf!MhHuq;XwXuNyl3XO&FgudxE|RG1@+o~Q z|G+CDF_GNVREeyOt1;WU{Nm1NPu<-0K{|(pze~6RpotUq6kX*<>dVmG~gO=C+E7 z(mZWyT8sAO# z{da>bV1taGz9OfTxZ$2Kv}M-yj_&D;smo07TmEBd$H6E}tiC@GgdKXCcoO?!cfI zLEDp0n5uB(cBXf3U44}4)WnmWlS@IuIUoE(tNu(>|6k`8aT!MErLclzGaz_Qj z8!p2hQnuNj!tP8K;^)P~T#Bq!e!cN}dBya2)9Z(+W;y@%i&N-Ca6`rR;x4r>QJ$yg zWnOiCqlCeIS88AETYu>wBCR){Xs^Eu@JvqH7(Vo=(rxN0+jr_V`p;1gLy1aT`E&D1 zvf-|)GF@4c3r9X!*`Nbn?IvY({Sko#3^8)MuO;_%a>L)x+1^Sn^u>DF@62vL|0MEl z@rDBuQ1xZ4A69y~Jl*AKFC6V4E}qw9y;B>NVg0<|s* zuf5u*$_+n!mW7i`r@wN7J-wPK)^pyNrCI?nOTUyR z&ru}>iZBH&@49-3^L|{xpZeM>Y;)mSTKj`EU$&m9NgFa}UQ*qqPOc#HCC$8ctr$<; zhJ zn3B02)v zZ9f>#^sPB`TogS{d_8sp?b*#G%yt-7Gxyo7b>ERu=C1c9v7?9Y?oX_E(r;ryFsZD> zHOyUCs+U;S>jR4_D`jqmU*E1P6)8v65E*me8UGbuBF^z-<;!&|Mi*rQLL5^3M? zO6$Rw`343m$Ab_x?eydQR&EK5CpLP0oaxi1>Gd%D>*7xeIK}bQuLHs-t`<4V*7&{S2kj!jCKDi z)#7RKwEqwlDRhIwn7-EQ-`kXPhhfkd4PlAi7&K1pOR(lOD;v;6_a|~vt97beX==Hr z^Tu344PFfr+GF<+W+XXGhtg0{Yn*N1NgsUfB9_d#8aWPO^qI*-FY4Oq35JDpKF*%}k#%g$`#djCTrIsxii%YFe zeW$a!NSgjgT098RDoEj6EA>_tJNrg&6fb}U4Y)u&3qJ{JRWbRsBhz22p{3l}zMBTe zm#a+ePl-aji`H?mXubU>T8@FI1mFp@nzE(+M`D)e_v&BK(d}7Ion)1&IgsI^GMAzK zpAi?pZg^`x!Eb9(k~Z`%!m{sD{|p-iApIi<+yt{-F|iX9E3+z z5HugePA!+riMietnI<);f<0OOT<$cE52y?|a4}E^nB>TMBd2z??m$Vpl`|E1dr+@6Z z>*;T%7Sl5hL;}QSj>->|tLiOy9OWIb(ns)B*JQ$PK1QgyFSg#1aB;NSriJKDbi znBfJ;sQm8?t@&RuBNtc+^Y*AzoxeSw9{lH4eYlki)+C~)Um3p5psmrrp%Id^hwYQX zCvCe2Ml7`cMEI(Z_n+bC9*JRRhj8<9=&iZ1J6q^sDZgc{577;Ju^x3J?!>$jf>aUN zr_*vntEYkLDhy58Lx@{6`p(jee$XQ>L_GYbNgx&rce2)}P9QKkktr|JkwUHJl(+O1#zhY{VIcPAvZoi>`hIq-mX(~=^Op+ec?Ym+RrJ^2j88mMP#CZEASGt4a`$_Y@VP}$ z2Ou*tl0k4Op`GN#5QgqGN=Zu2mZ8Hhm2{U)(oYlZ>4-1)JZmz)NuaNelhwNj-lR6T zc0jL^e!`teD-K(tbit2xZbXgd)>wDrGNbdUhR+cUPj}Ch3-$fj*6WAO`=xC=rar10 z%u^LQrWTtIVkRe`s;j}ooDCk>Kn8{OkMOaCz1tUlH?+0>6dqXs6LlMsHrzL`vi|~u zA^x}FYj2@P*iXS8ZoaoU*Ti|(3j0UWV@1x_3?ic~?jPwX>90jPuf>lb!ii^~-XOkr zXb|3?2b;Y^(+M3ve-_>=e7xJbpp&2(KTEfH@SrsQ8`L?YP-({8mCPp6#R;UqNtb&P z2YB2GJ-v>Yo%K^h*ezl0#}e;7=WP9EQ-BS^8Ai%#r<*p6hJX6^`3>Ree>=I6Zh&Np*3U6O zx&(EG$S>sjpZxLbUVww%-hAjxyRQ6BxvF{SdvkdDLjhu0Fb-Lq6^b~|*dUICcR+o% z2hT_Mu;{zz!JgYn?*&(Ea6wZb9nyt}GD->Ei>C!kCPOG^o5t8A%OI0ceI21bgZ?9jrR|LaF~c8W857( zddKeVg~OM_=;qM4Z5;Dej#U3hV7dcmdU`KaZX>U3I-zulp*wzvuNBkoR90lAj7CXn zcpV~b{yX<$aT%Q2)am!KT0N-c=Oxx}1^dr^o=w4DTq`}l7I#}l;5f^{ha661qV_Zm zmRJ`LCG|CG)dTJ$HF>QbMPZ^em^q(xmBuBz`WFRMIa6sax*}WgUa> zGi#2$pI7foFFTxZ%5Jw(&1x+srH_$=uw3kT?W%nVf+Y}uX6F$7wo&AfwvkQhD7y*- zdb{dbn%`N}IaLEI-Ds>!QuFfeZgVM_I4Jl}Znaj)YYHej?{xwFmq1>8>s+ z(rMH+*p%muBdSgi2`8QG%$;P{VH4Z=52VekBXKiW%!8WlqUFM7>uUIQh5I`gS{qmt zh4p0t9zQ{AwP>7jlCk%6I0OkaUeD>U(_zyWOU2B9xj=kX-4$0NrXYoFcX}FFwyW~s z(Wjm{2hPc`MLX%7mnx$)l9%VuF%5XiT31{V1X)t!JrFiDhI-H!z<7TJGT3fZ`A-mR zi|JLnHH#K28en07gAFVVEop@9%$Zo7!^#BVeg-d>p#5J$bo*X@sB~B6=CxORTSpVr zoCyicI`RpJ&@%0P0y1$r0BxL;U0Hg_TZ21NSun^1%Esa>7%Hv6R;6yo z!8*RD_Y<@m(MY>GY33W9$_UTQO)>>AO^H{17dqNxWmopmR|XSuvHvJo7guV$ zXBLV3QI3XT^IWH33OY}%_9!F-h4i;}j2C+_oY)ONL6ru}oTsp|rPGZBSE8*J#(6j&0xCY1gdGG+ni)A#D?TdY$)_hErk6*bA^<0G z>{;Uvso`6n5wLWM(wjEh(@UKTes@+5W+zt|nj3i&iUu>&ItAZM18lf4eTSBH)rxZ* z^DzeCSGt?sHf%uX*gSWESc_QCm=i16ohKFMS4IB?KvKQ*#8zyJ`$aXTY3TQ9rm?-$ z=G`*;oK~0JTEtEYv(NY^OEF&I2Z*|^Xms2(&mtz_tqA1`7%4!k3KRW?HRQVpMEdHi z1AS?3CL?K0Pnu2rPQVcM)*(7NRDBmwwd$LbH`pGcx6~pE1tbm#=0tyjZ}58C;HGr< zs9%u(&`kTZ_SvyD{d=3^7+u{>|Qg znykMsCVvJSipXh}YiMsD%c$Gw7lK{3_=idWkv^x*9x4#4-$1N8E?#b4#B%fPK|K@j zMw+EPp?&YAj$AW2vuPU<+@C`9>UuspuDOIttPHKBc2-OrN9(+osn*Pd=3u-joJS3L z2O?!$%WBcx0YBRg*4@`Vc%8EL=y#$JX|sgj1=e+ETK*Ikay`8t zQ8b8h+rf#v|R&*vQq!!3P?(&rS-qS8PKK?*#OX^O3^gWt`E-Ss1Hp9}TC zS~UW)eU%6Q(n1+^^sG0-pv&E0BIy~9N6`W{dGQBqGmy0qUA8COzKuBv zopn*rtT9Kc$85U7U+47vjR)&xirLzZv*2DOH3Lt;$w+&G=3B-i~0^M;jLPVCL zxjiw~mDpB+6XZF_$@XT4^D>!7Pr=DaWN^fE_#%VtL^T6ClXweWE%lc_3FIpzXQ|Kt z=5Nryd~ZcfPsbXSI^IpDsRj)2yxw+YYaIc5)X08zm`@mY?K&3Ib-8oIIo6Yr6r8>S zAYQAuo>Pv=?<1ikN;0YEAtetAt9uthz1L+IQ5jZe~;Ook_a!xm714O5J+T_rR_n zU3Qs-W)RhCAgcbK!N%Hqz4W}H4?={Y9-b9*`NQ0M^AZJhM>ZZ%fixu}XlGpGCsmGk zG)G=i?mE>hNTb;mal@_@(Sr<`|i^Xra~Mlkz*N9wr= zAsqUmT5`J-mxXSxd};A+AM-g3iECm#1l;<2|po_Q5tmm}+mp`3N1^UxImq z${byd3==&LmobOpuDb|7%`Y0qYc_YnAvSaOs}(a7Tz+q1A-MO(ze8r(A=DsT)Z?i8XwT*L zi4QKTk46_V>Z=&Aly@e8>$*EHZfyzoG0CS9g)rCN5X2Qw|GCh4i%~YFhdL=@@oYAJ1Fxd8N$+x23F{kgU0b#TbS#7LK z<_<45nZCSKf+R#J7O@z+n+-n9?Pttm-fGD)UJ%6!T^V+LgL*VO>}W46_?R^I?aCJY zeFPCtyPa{(qW2=d6taoDgHSEMFd|&BUhazTy2{F1fR#Rxn=k&QgkUb_FZe>|ZpLrZ znjp%BPt@{xf2}x@E{eb20B3Gu#&xbJE!<9Z|K@djyoNZcu4k+vg4NC|dJ?hrH+fDd zp>sA}+r&Y_XC|rTtdD0WjK|@ikYap%9A{o==%wv<0l_!Cxl$no^peQ1yr9WZcdJI$ zxPVuSc>qz+)Q>WD6*1~{ll2f#C?9NLR}>I74b&>wl{edRi(wY%HqXAUq#6%YwF?t9LG=}6od zqedGwxV}vU>F?0LMGweF-J{v<22Gk#?hTgXJYF7QXqF*|n~jnO*EC$C4Zbw0A&+?5 z&x*F9I76=(rC(N6yzMwET%>QJBAWQX($bPylpt!!FXhrNXJ|x>^=2Sy zx}$|H`T4Ij)iH1O9X(&2ox&%yt7SiHY|0j)(J&ClUDuKaZy4B}cc%Ud_7QVN(VL`4 zTm9bNj*wgi%(did7@Or?OFt8Jb=dawKU;WSEd@aT{VI~Y1JkC~7s}@Q zp8&W3-4-@y|C zd{)0r7+&RK%>=OeP{m8$W3ktM@Xr4x*bb@toEveHQOfO;)STYNGpy z@dEswzad2{2^d^#-ocq9E+E%ptZH8t&cv}tGzW4813|lWfByjt>}|i&(T1gkn~*g$ z3owPhAB{zj{*0)%yW~y|pKaIlTe7eisOCSTbj||9I0zP%7K)Ag3tU-c9=I8kZi&#jIVD?)pZ zJTFV9ZuhT#cc)$=rlF-RiI@BLMK#fL4ue_M9+^~P%Z4)VA#ok|MU@u7RN)mWfGHbL zu$+D*F21lsA{++5OK2-x^7{sv)04=~>UY0>?Bs0b;+p0HGUSZ7c+8^Ar2fo?Cx!Dv zP@F&@(aifF8P&8rkpeK-r23uJq&b^;Au(j-KuZk{RUcD_|FsC2bOAG&2C3YK1kX^h zg=DjeK{&1wt~4btVRjS}8r)SLapd|uZ{4*lt9OGSMb!PWOT%o_34sLD{QRXOQ?oG#HZnKgOf=`F z#1)SC`#sCCPV?v``IrXFQx{L{@22bTOx&R@f8E&hZ8t#yFxGXNLL(@h(YqU3;|usE z)Y%YyHvsAj3stm@^4bHQBV2@DI%z~o{W)FAe>>%VD+|^Za}@AuHrT>~;(s`+4!1E= zBK=KTo;CmwtFb~*;P}dDvd;wx*P3E^s*Cf+0TAag%ZKc zCT#fhkeDUWY{7sqS-r#(kTYT2D+eGAi{w?<7pG6CK%gcm^Wn?VuNp zD&$mWcNjgiG%7pP-1EftX(?;k-u|<}Jea%vf0rg>T=p?kQO%&FIBD%9Z(4ibvF&Y(BT?jFQum zc1Z)81aF`DG{~dM&GHSFJxoqB%^mzs?;Y*RbJ%VoNb@VQa6k5fDUID;b7!)SkTU0G zm9!bFUMZc~pM1&Zc(20ipk0;AhduLW-C7Py3^OL;4GjkPa0Ww9$GL?qy5+U68sH8! zS;$L{Bria--4@(SqG2K!n}SA{t$md$*p!+Lk*ZqAW^rqg?Aau{bKcB?m)6-+FXU8af)BeXJPxGcZ%5 zN54-S5UHXFTintnqhNU<-9tycY3-4Lm`v!da$o_2$%ipKn*N%%ck%*t#pgClvoIsS ze0n#ai};u?KkN}X;qu{8BBj>C($ZzLho^$TkmzaOvD5rp7MATvy7Pec1u_gN_8CXz z(n}uZU=A>O5N%IR&UeZ~#aQUX1cm&Veb3)3AHuwjIE8!yb1Q*ID0N;$d%#@!FqBTJ zmkDKUwQE)X(@B%M{3jHtwIs$=4hRHxpci460MfAy7E+FnA)ZfgQWAy2s24W)gK?xt z9PXv>=$!Y3Ea&5|1AjToj8Z~xIgX35cLk+H(VGFk%DclO`^9}4VbFoWuJ)xiO}QjH zcD#UW55PrxcbC18phWXX0kR0oB>2GOvz^5xi9ibbOb7e+g5xalr8+*xN0?Id8Z753 z30Mr2jiHk|C%mZpLl*6`+wm0lEa#ChT@fOf0%c!MTb1y{XXdK&)sKE;Q4TU>k&%dCSYf5;spQV>)*um z3ZOG^6CE zB|O2L{C;HBqoh0j=j$j;pz0uTrTvP>+G++5`B8)EFnL?SKgSn=e3Ep;Fj6vaQz0IC z94-*&cJ>#328u%3kfwfGsQ3a;=*a8;MgrI^ta25WgYY-&aAWYwOnlo{AhhhZdklCf z%Qh}cGvw@$2^2PLz<3d(6KW-KfNm4Y*%sj#+WHF)Am}+Iiky*dW{xD$#bSNXmAJQB`)0aoSY|9J>`rkyN2Um1V*7FN-R0JAIOIhy#9n zm%Nplau0ULf%d80>_C&^9WXmJ-ZfdRCV*TOL>>j{6s_#Pn|^4xk0A-i3PcjkIy8BY z|HAEk=_I8vy@)xuM3HjXH(WKvpYcw8<05JMNzPjY5mw|J}ve?FE?8}&l#r+XqI zf$KArg>GE`-9Dx^O)U7CGd8$E4|Q(|{5PN<*9`=fhE8t!iNmF{&+zd|9vjfcw{;4; zn;~-4dajCxx92?ND`D6m;DzGr3Y{WgABsUpB;WzIb@D3|QRZ20I9{MGvaAUj7Q?~GUU?f*tImpHIf zg#~p2pB)>F$X0hV%$T8tdDcd7V$e z=hi&2u;^#bC?;rr^B>c`1{1$D5EuCPiwFBmY1_oZz4<2*$)q8>U4n+ocYS!$=f?WPHU9tO3QA6UvRzHgg|W~`fv z;NH!f?xPZi4g}Wps%k0z9A9v!KdjW;gtEDw+~mdy3}vs4igHQYgoQ7%Qwx;=$4#Rg zKz=~Kwc(8RyIG0yV&I_~v32$9MM3$+=Tl?ut75zz>U}pvrFmBVr1u958w~O!Ic%EU3C=VnK4dF@wFm2GyS4CgR`Ni`pj^dN@n@ zP)E|i&j|5iNN#Szych{4#_ZXUACPsDyyiP(=jF$6dLa<4S;OLm!yP{sKO0tl6cII2 z-=fH0s=;zt`OMb-hfJ{H-KH_4?*ddtuw*0B_7;TbM^U7%<%uCYmDFTqYLE~1*=DgP z;G;l3VmE8_#p%SqZXUsH8hy7#;>cs;@;!U4nYLdl-(*y-#J?jl)Gjf#Zg=98N4kJx zyNM`v4Yeu;{j=6?ci{k^vn&(zwRr6-L-E4EZ7607b_S6p1(>|)t*TYdH(WkzH6Rv< zQm*@oCNgt5^Ut+u8L$#wec5D0`s z_0dCJ2!ucd0>RTK#0T$$Ex(op|GV@;S4H8{L@#eJ_y*rvUF9L<0{2gLOJO2-=bGCi zqZbeeKRfQfOTKVfZ}29or;?%PljruHK2{!f5EbpmtSXMrUwFQ-@p$Iu$tonsFDT3} z)M@ekE%+!z^`ZPz-;Zn4T8_LEnRoYZ#=Sav(_TDbn>ewn=+m}gMLo>-lAxJZkb<`P zenP;YIc?|_b6=t4D2@w$Ace_dXzh_#TjqxEe#9P~?blKN{cwno;ENiS zYTl;4YFadTrJs-pen}Vi5l+TTR%)srSg+9ty!4oYv1t9Z1bMabdmblV39%Q zbTsSaxJ5$P(A!e82cqA!U&p=yvXX`hr7*JxEgD2d$PINAOO0-~E%RYuryAq{-Ft`2lR=_Zw0#R2`INUFbNfq;6IWp0q86 z5j_fkLWc{o$Z!JNTnSYIZlS9Ysc#$IGO$PM%wk+{mOg9{*L%!aL=R!wz^!c6XKgPP zX$G8ME8OMF>@3J6%JI$9MX%DX1VM+(CvB4L?rvQis%~BEkomsN!b)K(WGgl9hI#y4 zCEfLn#h`vH>|DZ$8Yh`^)Uq|&ztizLEJX?E?z zOGK|rm~2%m$@+{P&~@U3L2TF<*I)WRx~QEuNb$>@UAIc4emOjL$7&enVgj{%XE&jA z#lQ>EBSXR$xHRvlA^UNv5FfiB4fkZY_j`s0aoYw(ng?a&K$lFcz?Q>-U#5jhz@aGIlX=nJ;K5@q>4&O6 z^hvx>#x-O6w2O{o*Cgv@46VCDqMN*qAN-YvR1}|d7n*_^Il5Ea;qu|O572+#7O2Wq z)-q$BiQTNy{OR65SyH{~QDidGwPQSFTh?FO@rjU1>xo(+$;<_}Jw+X~=wy$L$tiA( z1fw&VO}sgoTfx&*U(ShJUoQBXN>hca*??D{cHN>=Z`N#(Er(wJkfa$!{Dvg2iwO3@+y2sp7r8C zjiEm(;+Bj9Oos@bHez;81J$S8s24kFX7Zpee3k^b9b2CCGp<(>5~h{bHlmHf(W5MZ zt~^U`EsLA+mo4pMy?2{8Olwj&UiwtTD$eegM6J84vS=^zD)j3swVCk;2lCKf#VxZW zJLgz00$W0%zarqt@#Yk){Oq4i zHNDQ8)+6TIu;y5tjvQm@jxK~f81J48?t;xIEce5fru?SEINFCb?Ro1<$xcR&fZ@5U z+Rqh;_O03*c|W0F%K69-fzy`Xyui5=9s#KM`QxlP}u@nA)5T)}mLHv-B8+E&!bD z9h+Q(K&IYdkGwLmg?^dAYFw~M*73l%eVntu{m!{&R{hEFE~7)~!t(Yc-1~D|7z>kH zWA^J+1#8~sH4wl6R5>Txl}*Qyx@URY-h~Qdc6y{R0(rBNv2q?}n|iQWg8FBJEWr$C z?$f$*n}e+~UH3HgiXjrL=oj~nnr_>h^Ye3?&tJ|yO{?G;_{1QoZ&`{_XmS6ehj@zFDc=Y3)<% zL-7tn;8>*%$uML0>DJ2n)q~t3xg8LhGg){C7j5e7PfYs11zP-Y?rLS~ZW-4(JyGpk zbTFT%t&xZbia7aR2dnQL+KJ|oI#K0Fkx(?fB-ilHnFn&;;)%Jw#!h znKj(?{`U?nFS@P@|Jmm5Li5IxL}6hV@AJvtGkUoI7vT<%jBR9=x>#}jD;)^rn3cIs z%x@`p_8q|vdV@{WjNXP=BA|cNF?b9>Mt5}6?2PlG-#JCFH_N@P0UM4PzfihY0G8j> zWl53%L$IuX3fr5N-}H7pYeYxV9r_I7Bfni-*1pBKo@`AQb3i4(0MqysR~AOS6*lWh zIi<%dfqEY{5s<%w^}?jRj>uqs(D=~cpk!>l>gzVrC^;j^^=H!XRY>;pG~%_Xlb@(a z`81`wFtY&nbbpWjw3;$p1gHMLMDS-oIgzBby{jU}wTbu@NAY*iQPVtGMBkttgRWdM z`6v_7>Z0~Xb)9u%XS|K3Hca7(hAu%x?dCUO4=N;SXhyyAh*%CPNeA9|2BQ3Dnc3S) ziIT~z^&R2jDvPsC?w7z;jLAIe7D@|hma;dzOorar)Ybl!?n8@SyD4C7j(7n!kgQqL z>uc^lS^2zaygff=dnd~QLjyr-iBFP~3sa8WFXAEBIXEeMkBE?S$PhRRP3_vd;dQ8z zr-k*>DyJ`P8MIBd9P{)*;g2$>6Au~cnI9B;^8QtWkN?y1D*mNL+Nqsa_!7bWb<^jg zgzc`A*W1DywD?vYr$_%9Uzk|vdpXQPg<*_DJee&My8exQw6O%QfVxsnl% z6sew89JY7G?PVk4KlN_kZFxWq`%%bW*Xvt8v|t`I(zYDK_vw+{PZ2pSLKR6?Wj!ht zIuSF&Kt0EIbuf7fINzAj71$iN!VrHl;WG^j%qst9r9H*3Xn%w7eDsOP=lxWWZiO#; z1?y^#6={O}D6Jsp;pV27$wDgCXUG}yLvo7*we*idO(0Qp;&$|DZ`(3;Qc1XU_lxTg ze>w3<64|tNN}5Nt^rnxXEgzM5`CC=x{BPV*$h@UZpF5K|2JHoDE4ms(&RUM-glTeR z?BA(e=?x;Ol`hva7sI!sbZDB{hk6S`AiGLGcqls)^u}fgswlq{J+N0FneJrXuN=}B z*#ATTt?qsCQXjqAjp(0$kz8OGbZLojZYi^?2tL15;uYGB9D9ED>h|R!jzzg*MtBl) zxW1DQA-0x}oYCCQXg^-}c+zre^W;YK{xekg59Tr6iV|G>UDMw(p9A@p`AutLWXDs* zSfg5wP-7E;45S@wqENHGm(!+cbsH%Lzi2HpK@x_zlE-e$Dp?gwoP_6e6%!?7yr$L> zoo}$>isCllf*0~fM>zMg4mdwCN@lV%@}`hAAorF9*=7kL>2tL^W+lxJeSK%%TYAc! zh%ABpQENJ4Dl}>6$us*34xcnU7fD0#J%Ww$^Vf!GPsK<`oxRnd(Wx?>}a#}qf#ao*9J#XCwfqW6?^uwsX_}bI$oHZtX)If=KkD%?@AP^-n+1v-UA0+J z!VUom?V#EL%YA3tb@!a!HJv;Opx>s*1AKwSJACsf1GAK@y=5agbw&i$t)dddpyf~4 za=Xl4wT7~#kn9(ohPYtMOE~r+F6psd>^RS!sFKh8{dS=HDLCT9G38-jjp=Ijh zjULoWI!*@?aL?x7U+NQi(S3Oc35QZ_Ehj&5dJ?7Il4X3uQ_+>g}F7DF>g%pw$3!s$OdgJ=D{rv4p(NJ8{>ihkCxZGdds4 zirglZJ^I0)WcZfOuaHC`IX4Q1dE{*Q9qYwre|HpU!=<{lQ)bT%RRcH!OO{Bsj_K+Z z{w-`tCK02z`%wa)d%u1}KtGZ$2vr6dA&?#~oT=|66kP3Ce81@ROX_`a#Yg_XrfXwy zuSDXB^&R_n=Aw*U@2#hrWZ0p*Sg)M9tB$j390$;UBoyAd!bR#*&Om8)a8htbAOJ1o zyik1quwHE#t8O+p0i!oeZ+5ACJXE}mG=STHJhe@ZP{oH=O=#@}zHOmG6k&Q=Y{Sah znhv&h$S<^3NVA&PL_(n=PXc8z3D}FuyImnU-lHa`K_e8bcg>8Dwo$+$tSwgXFJ`cQ zGwBIPaDw)l-hVNo5MYEHSs-GQgf?gXs;wA6oj&E;=#?&3_zZxeA*~n*||ffWm0qgwn<| zv>h5B=1oC)e`WxmPa{eINuSo3FDjIi>bUxAEh)W=PoxqiuD+HeeQDJX`p3^Jqw`s! zPP9%eT=mcN(wU4?(Mhe~_x*<0x&a{o2=WHy!Fm1zxsmyH%cQgihgmD2O+c~c@5>RMv06TKivL;8XWtk+%ZeapmZc8lYu$Az8$ zsh;Af(7R(TWS;`gekO0Y^#_3pq!TP(%^MJL3FS2vjKbWgdfU1_96z;?w&?YK_$uwC zdn!Jh=xPfpWlpGp>LRK5xSDzIztCL$b0VZ0%!6)J^EQU z!eZA`_6Y$zKTWoCfXnx%^RMd|)NL34K4_Us71?I&S(JkBLNl>gzs%<+%sx{RO(ok4lCC$7GF}}q(9pgb;y$n*35tyc5&YoUm&SDp{fNfpf%rbtso4F4^W~V8f zebd!?!CJu6^52M(Tm5v){~{0Z(C5ROm@xc3q-Wbc|vJukmtr2PC&L0-iuRn`-OS9Qz0b=z0lzb1^q9)%{fWN?0x z%-%uL07^$pDkTfSE9vwHsX0Ct^ehh0ZJCscu_?6okyWXKwcPRIQ`@?-O@ehc`{0DX za&^b))rUQg{Mx?lKXZW!e;~ge;^ukMm(x~0Mp1OqT(HR7M( zdDzdkF-AN2OTwQz9}gmzmC@CP3B!>Aap(7oo_dY`S`F&qy>FWKo7LXL+i5Cgd0FOJ zA|yJ&&9kX5FPquNtd32lqI|&)`XGvGYT8&e{tla#PU0S;j}M0czsATHHhR-J3rCEX3p3aQ~~0+A=y;bh*>NL2L3hBoCw{353LGi|rHR zFHOBJXbu`h^bD2_u$p&JCOySOol{bngVvcPIgImr4mN!r_EY-U(#KrzC~CC_WKJmM z@62O^3ycS_m|MW#N(@D9vc^VQi1Zi{@HAeH7ehlK%Vx`6e~&UDb}|!kspPuke(ktV zV&wv0PEXPx9uxZlqgu@aJAvyH89K_p?7N~XdT9N|t}04Z=%w|yN;*07MS?7beW}DG zWg8GfJ!DrF6{m0+nAd&j9$*%~Bk}TQ>uYLyire46F-kW3oc+Dkt)F|r?>y6P!jD%H z;Z(V;lx&AFn>y!~p2XeXAh38Q zPqv$YMqz0o5am2;)sIb3V#ipFkr$ay9bB-`D|W)kFH_dR$U8%~I9>JV0lGSc|0#9J z@E&Z4;}Wf;otH_H-GqEnkE#Bx9T0)j&G^Nc5u<6z)l$(Ejp(`x)l_$i%`K7rz0wAb zXN-9uhrz}1NF7hWmx7(uQ{M;x>`+RXlv$`W&QAaLLL17D_3 zR-CL5s_hY}pSz{7#6gXiKA^gdGspJ?w_SP0i%OPW4COid_mp-6Zwlm+aagI2-Aoj{ z)>dTam$@0h;4i@3$I|6k$U$~GYoomI;@GNczpl=XJeBdYDAeD5k9O){N;k{BYWoH% z_$^U$N=IwG7u!LCE^=U}`TbBqI{lzu%d0Jaj6ppwvmisF|CB60{v5_) z)=vgSbq_Hg>>k;O5lt`#nVvnS9s4EkB zK@6DCqTsgKZMojU4$Gwj!uxfi4G$FH6(S=qM(JG|ofT#6rwCJXKf5Ht-Rp#Zx#3n$ z{*RUIzv_%8VEy!IqPthkx1~^DcTVpE%>8A*Cazp+zqJEBJ)i8t^{PA#cx#`OQh1@( zi?z1Sz0@~MA07@ph@G&{)ip}YUb|&~$J^XsUp4(o$mTVbi4UNA)1=K0VhTigzOmgo zjhjH!gr~FdHFyAEPL zxS7y^*bgm!fgoT)#6=00XmH2CVykO2>M=M^_({+K_$5e)l@re)=NoFTJHk`?2YzpK z_-jC|;r+2N$=u2O1*nXN@~TTuaQ+N=dAI~r-5(jEYcl?70X-$ zNS~I}7VKy;j4~hxp3O{$t|9VKXxOU=5GWW2)Pu-&?m&RIS=v z%y$_Vf@KfgK~{G^S9Id3K%XF9rSw0mxi%PWV^#E4w?l1d5bUL43>#y;s4ZhyL5Wl1@Vf*YCwr#l$UW>$XWQLyq3-%ho`($FuSZE+sDxA zPZ^mG5TuGYu7CNBq~vl`USwOg6RZ``!M^6D_PiE6ux}BUEfxxe5 zbZmUQ;@26n4)mK|YVn{5MsJl~O0_w)AnyI?sThRs=HJGjXD96IW(By3h^+L;kY9kL zIDS`qg=n*-=JWGpJFzBW);wOvVcm2b70C2_wkXyp*$%`j>HEY;()kvF$?*%vDJ+S- z4D-XbeYk3yNDYkM1k|(){MKez4$@okGY2l7kUCcMPT=em*gr(jpWIgp+E6fOqqZD4 zY0ESJCgAKIN!^R0MRgrg_oEW!6UBE}_En`KP7f|_0 zt~7IB2xJcC*yrRku@fpXs2V<~Y$+0YZ`X)+SZJ!_q%d`{+1-(8&S3aM1W_EQ^$X_y zF3v22T2sW*hn!LTqqoON9#SQ#?s^}W_W;vv#$^sHTJG;10fEsSnEjT&oj~wS6D#B2 z_6Bf6`TzXDKRy6{N_+LM$AMRZ*Z_fANjf^uj@%^c3&f2GK&HNlGiq>Uz>_rfPtS$m3v-_v7z>ihoy>Pqb+_FD zOjjl!MtZmV9-e3S9}wsT25bL5!He~71q|drvjAWT$Bb&g$Kk%(I7{!nzy-yh^+|07 zOUuj4`Szp15;8KaGt<;nTFJxNrR}=(t@o7k+eKb)Mw4%4$WPRzSo~v(>AIKGa})95 z+tN$SWawwwqxsT@1GI5S=ZFPKo2P*e+I1s~DM=&$TWv7xd1hv&D}C$!=J&Ipm;-AF zXoa>&VOGs|EM%pnGuB-4mO_JzDrlKzSxzH1$wU`UYyu}w&l@CVWP+Yqr2|cbWS(%q zJx7KD&YHUpatT(LU3#3)9q%$IW}Su}zPvii!v5^j)OGtxde;g>wrASLlGr^Na`7J=M$pTW&dV|w*3()tt%Q7Q(Jiqz5 z|6Fgk=hqfgZJEs3dF(_&z4 z84aDkf@$kR@yS?&a?4GFq;_j!#g!)Ul)T6~=*Va3ehu07GOfR)n5CmUYi`eOhW|G% zq*aw`S~PGy_?_ zpg9bI!GG9WWeqzH2<)Ii!Ob-_8bC#I7}yJ7|I-U$!1`50$nKt(E(Zt^gKzF~!QYeB z1G+IQE-sG6r(SPuYIyj`bmw|-aRn{S|HBcoO991=`x3l9yB2^4UWBv+7}-o@+*0S% z-X(jYc9H#CB)#UI{}&JXr$r^yza(u;SIXHEHlw%xWXOk=@uP0z0%XWL+~tF=zFD#H zHJ3=7{j7B8tXoO(7Rnoi!tVz?@m)lx z!jgMkSe!WTYnx5D+Zv20tA_x*Z7psta`R8#0QI_y#0O~=lLoQGN|Rg*&bqKVzT+^S zvwud0YShJz+0<-i?tdg7PX-+ZrPF+{k1SoJXs8>?+Wx2B#}qI7t3ls&PEETWcT9o3 zOq0rHCB*I$`3zF;{_WI5Dlg4B%D-kYSQJhn;Tbqv$@^XpI?oIDtU-e?E2DK6Q9{`N-f|`<`8Z5glk4nj$7aw6-QzC&vkoP-Z>1&I?jHCI=4Ty;LCr;Dgz`PXT{TfyenRc66GVc}zxQ!oF$qB_g( zbB;N-D|%nmX;#mrd+6g?uHU-#-TyK0SX0!Wdbn9(;>7yu)agEZmR7CdlidP&&dEvt zY$5hT{-CsSXa59UiMR2)lWF?Urx#$ME=eiMqTrwDkLV`Syv%`uP93xt783 zJR$W}^{+MFt*^KKB16bxP~9fU_!~lUX8kLy0_4IUH#}fC z`#Ax--q?elbHP1ZY|n-$?-6AIEl8~D>$1hpDQXD%r|3!q!O}N9WBytj__Q$G)7Y@H zrthn#@CgMQJwv33nZX_rhG~h$)%iHA;`kpVGIaHvTLPc>sC(SJFebnSM+0GnunH{g zy-NmRYy{--qGosBCn!8><$a7)=Ysje(k(V#ByRu#TYJzTr}mkHXnF1Tvz0Ggk91B6Qid&L#Y3K$7~|cevMyb@w=6zkI7W?7}d^ zy)H&YofN~pYZnpm%}&WbZDAjbm6Ym~1q>ItOu-)hm-uWekm-4_5sZ#ltYR(az4!0TK)p@1#%Q4YS7 z15nxkQNh!l1$%0{L|W?hqz7ddMY$G#cN>z>{w5Fn*iC8qJJwKr+m9^0t}eC38+n^m zI_k_ej8HikHE*y9nrZxRtdxWT{sb!djmT2R^Bd3#x*Kk-_Au`g_3#{r2?UJu{#|e> zU;a&=kj)A@Vd#@jRk)>I_BSK)@xVyS`n5-@Ba7e`vv{v52Qyh2jy&$yj1oLR8E>!3 zW@_4BbjFH(+R3#r)Ym=Y2Yalt5H}wZ__*{Bxc&?XB~@XEd`c7YXIkV8;Chuld}o!! z9E6P$X&3!Mjk}1bq@h`h?NwB=?(wWovuauD*}&#G;8JDFe0zg$B5ohGr`$Q~u)~~$ zr7Y1DE1X9i{!OVsf()(1*+kRZgao@35Gn28j{?~Hip}}2J$hb{Xf$$MoJTqLQ{yW- zNLFoQzkAIm=m3zj7dUywTh$;0Ijm|10b?k>5aZ92f>u!V@fob69sZ41Hez4g?%O}o z`6R42k=L;x3hd`%j_$e!eE-@-f#!{HZ0D^J7n8$vC;vhRvw#hWb^`pojgGrQBtK=f|QK zSj6nnG^+l#?V5|6F-s2R?dz!W`OR6CKO#jdN>9@@?Hva1K9x@Amf)=|ELpZq)>WG%g1Gc%_pJa| zEQyR7J|OJGmzLNBA*K2DS_7>O-PZ=IPigXjO|&qvizwL(yJWjy0r+kOkNH?5MUzJ8 z;4Unn!Ccq1(P@7nyGtTZ@2umegO)N%n+g(F4O}Or4wzgg@VW=^T`wUM&)KQZ-k)0* z$9zXCm(RW%Jv}-e!I_i;4RJqE0XPzwkaiP4t(o1Rd22UlA7_Gxu#wg8Ju6;m*Y0+(<-(_%^vt!X<)5pJo zJ)xRgziwL0{!Zb7|1!ic>QlY4GUXHp5u^_#R7rVlwW)5e9@9SV@K*1tb$yBb4IdHK z>HD;15k{elQ7byH7sEt?Ba^Ro+X(LCFi(_&k|~Q&S$4DN>9$^tfIs)ZY;~(uRgKpA zWR6hkiKh}6&v=Opp@hg--k5VNJnsa{hR5xZOiIk(V4%_w}ll-pWCc|dHxGkk}Tyk?h?)3x`IeXH>dsLj(#*M%tb#0*BcFF`_Bn&Pr{fG+AmNF63@E(b<1 zJKDdLTX@TnTh_rpZX!4fBsBG_QZ^CUyPmxsVra%x@^J<*YodqIlliXo(4-{YtGEo( zhVRc5+dcnlF)IE<8mfu?oK~_JRm6t5!9!rp!kvd)4e>N-ca$HgqBRXL91`$#BfrJT z@&pIQ&QUkkPWPbZN6~Y@CZFLiv%W96A>HHuj)nd;3xXhSjI-8R*;4lVw0eb^X1&66 z|1U-8RmkYrs+(u8QiR)XPhZb3){ZFR@s6N<#%3t2A=K5c>CS z(Z-F~{CkmB3%6*YUfyoa7Q;q84tVC*9SFTe+8TOyh(j|jS!%S9sQkKIb-MW6+psr6 zZB;)e!MRM@1zeW9U|KkHq!KFoyLSxjL*3SP;UQ4|(<0_uw(O?bmlS@)iI`pf0yFkm z=Z<#~zV=Y^Qi#Z&D5Dw0QmTk?8Dj)V)oc4Bg+Gn!3?J(7hNQ3LJyR8z$gj)JcYU$= z%ft35pczWL((>@gokjVJxu;#5xQ$BXi%p$SBQomGelZ7_ut`Ir&9XWNx0Swt1kaRE zw``?>`1fVWS2G#?)VLi!5kxi=@w242_cQ7_1@MJtDglyv>bJR#HuXWmrgQ(wX%UC> zD1Fjo{;K@fzvKBUOJ2>Cm7Q%5hIm>4T^`||Tgu&g zi-xw-%c+B4#KNPDB!iwAfe_9?eel^w^5dtmg$JYB8zVg{oi}e<>%U;a6M$@}9p(t%r%*FHq}84?gc4G=u#F{-jhS z;tPH1;I!SWEO{AdPZ;5v-sc1L5(J|ccIZsTshvD_M&vwj@cA78j=VBlX8ujvU*H~pU}GmX&QxB=#}0P`J}GaHDZg+=FJRx~nl zZ^v-sZcky@p!0pAM;DvrSMu-<5*hHt2y25p0t*s9fCvh)6kEA0iWk&Wvb+->rICdK za2JZdn!Hg;c^^Wo3KkNkiP{SfjPA=BAHb_h3)z)jvJ^BXMZXbDY-lDMWhgTk5%^0Z zdgpB%g`G@pAT55?X0b1s8cdfCW5?$pavWSj{UV$c5w8Hk`zKYmJI3b_=Pr(IL8U1}va z8D{y^?NK6wJ8=Dd74b=Ey>s(EC?!QmAAveZ`<5@a^$@Di<+i+!$oQL5Vs>z3NIH-C z{@;vi!wpePoD+?sBX%c>19rSdYixu0pQx=qwKJxR@1D#rx%?ehhVJ>N6GDo$-G|10?H4pMFHs^I70jT+zX}5*AE2=7Yv`aZ?&w`Un<&l_>n!p_C*SgY+ zAqL{c6~z{`3fiOqx(Uz*t zAfXbbBjaTxlI0aDy3cFxSqaTxeM036S2eD$7xS3liT5G*UEBJ?-QJmv6oloi(sdA913k!yn`~p_@R<8$908i3 zNir(}V=0=zBMre?TxO`&uNVL>ivHmQkc@9=T=34V-O=a4++a|kt?SpcCl@18rjekG z*JsLNGj%kmeRi+UfWg|&4!5x3J|j9U;mjM^ey_e{NbaI1mCp%Z2;!nD&^f40`l6rF ztV}A@mC$NjcZ$Cd=_-i)_AbOT9>J9oo&Q~+RNs{d#{z}$MLAIG-BWq~UTeMHSJa(; z(M`<#2;ePHSd%P42;a&JapP7)Q~g-mUF+#rY;RcgXg-=alcnS55i#NB7@y1%JgqKl zW;qVKqx?+(Sd!<6Qdd@siL)myz{pK-<{gqzRt>-wH&_7tDEb@m9=L5)KGpk_tf5lr zr5Rn!i=qFa_F*v&tQkQ12>o(S`_j;Z{>nX6b3ga%tI9xa2t4_R(BX zP)=iUP4)Y z)iB9nfMn}c6ZcPNXp$Mgn-1p7Lqh$x_z#GbO_Cp?wD~_trsAG(`=+FBAJh8$M&{s6 z_W`jo$30wOP}8gmI?&)wf>MKN$`ojPjnP&n?xx8VqA1$X~SCmTIe(UJROh zg!*$+Z3V=RwHb}BQk_Ll0`BA^KhCT3tC_2yIM|1;uQ);ky%mX65-0Lxb(k~uWj^cj zxwGq#K9c=GUB?v7zjli?CTd%#pStvV-yV^yz6|pw9&arO_21~K!fpLVm)37&4aVyx zeTH#Lr!)bcVA67^!zgKjD*m#ZI!IUexpF=+2S-eJEZ(sFLNmRnz4nuR_1+pct=YJ| zoUs>gc+HHX+Ux;Q92Gmrs;wTv>%iq8^G%uejzFf;QV6w*%B9@E4)M-`NO@$P{O8#f z_^Nnhi03dx3q(%70%w~(qa)I)t`{UAH zoG^79wy9TUAlwiSo}z1b-jt*@bG*kIBe#~n+amz1-AkY?fE}bRUzwQ#P};Q@iln_jLHit;N`d@mTZi*W;_T9$t}hh zxzfO@L9k>IK!cQk@mN@6>>gvJe)yspkur@8eLVlQJ{9r^*lkXhqN?}$AZm}N7nwda z9bEubow-Ov3Ud_vW3k$Df$#bD_yX(}tC>%4I)avv1XB786e$g5grCd1^+RR(ThS*! z4y|1Xb3NVkIQ1L%9SuSU!!+=Q2t#|ABlw8=dd3A9lh1vh;y|&j{IlHR{Mn(|YYXBp z3469tqa>1m{#i{nEe{;M3Mmy$b@NnZ1y!&`x32O%{=RPJaK5Kui#Q%gCGQBy9L?`S z8$UzXmx8~TXOx~dUl7F(va6RN zD0dLz_LwkP|?ZCa*s!rLij_tJFL7^I`3utr!ajF3Kg?B%vJ ze5%;)s}Dz%u&TVVGLPK1&fsoaEtPA&(F&!DI$lD@b`l@X&SZc^YFr-DyBDFtm?W@D zgKrX401h(;ydIp!yGwdpG3V7V!qW5G%INVAUpWtAd-K9|#u(@kE99_9J}htfgeaot z4$Tc&u1^xIdW%nH!P9%>#4$!Aj!m6OLs`3gv}Q#_kDjP$Q3apVT5WNH3}G|n$}0oB z4H_ANWtst8Nen6DkxBQ9-7X^5zVfcbKBge8wDEbZ#ki((?@OChh?|B=%~bw_WN z-`xL>eXCxWyzlLOd3vHr7fNJ~#fCj!HIA;AZeI>X$S z0M86NEjtI@jDQ|3 z;QSO6__F3(2^WKp+gU!xbUo`L9AMic(qr4q_zOH{LSMYMT$BYRm?cYOs6s!c*L;%K z`+PYB%HSz(%=yXKi`*Ts7_GRtA?-5RoqLMyOsFLpHLHX@1qc`nMmdHfEzWAe69wRj zx9*tni|U23i$g#+%x?N7N<3GROUt!5{4pFf_e=@JihI(~QRhcoaO?$TbS%0&i>~PM zRR|vtl@`QlQ^ia8Aj%`Hg;Sb*6)wX|NAg>t`6XwDiAdiH3}uh%2%^F@_^b zN3wuqz5rfdV;&J?HPQAqYKg|5JRWdL2*4p83;j({(O2@;3=_Rn(#tijL!#LjHDs@_ z^{9{1OGRpzB34P1Z+PFl^hnS01y-TCF;yPvqA{ADs`F!_t@%cRfHT79)u4YB^?Gs8 zoU<07QPIa?AmUd2)tU4w9s1-(=##|71d!ENhU);kI$>S7V;`>{W*xPMpLNT3CAwkh z9BB)~C39e_7RVdmIhd(a52a6o2*Up7$om0q5+9hlT?XwCZ)SfLJbtzqe*H{A?`;~0 zf8W}J=89EOmA}4%h(cQfqI*<(m5^NWSqTY`Nl@&!+ldUbOT z%EkK!%;Dgcq5zJ1DNTF_^B_mSuP0iy*<+`-5(d2p$faP@K97tEl~CoWe8;J}$CL;* zBgI`JEMPbj6;PF9UeAaCe-1hhPjN;1w{mF=ZOdB+$_%~EypHYz^v8oS}`B{$3 zO0o8Rc#fLBqk-=mGem`mMsLyeK(q-HYQ zH$7IJf`o|R2G7S+z%5LB3I|00bT;|}xKSiX$^~@1cdFKZvf1Cx{ERXB`lb4SlPHjf z&{>NSJxq8@Ao*ixsA1eLH@`Sbo^%beK=HNf-LNb@^W4DV_~tudL314E!KJzzG#l); z&tv{xFoVR6XnvWKuCrh<98LD?LtF8;ZI><&&DCNel}#x!NXTBaD6pqeG2aIn{TuAJ z&8U@`x;HC85*NV6IlYv7|5Y49;m_dbTc?$6$H`!g3V>};HbD#OgiOAHDX{<8Y|mdI zpa{z7is(S#N6Gwd`j1%vnr1GAK~R|ZjT|=yYM;t_GsA0rCIH4<9^}}##p-htu$14b zX&<%}-Ks^4D3@%gT+N&IuyAD1BLzZK)f0JF@G;H|kjes`H+e+vIoq@}StWh?1%y4+ z_j;&<_h}LJ=JQ!y@hF7U zGIzy|JmQv&|-qIcp(ikH^W4?8ioG;rl`RH2Oj11+Vv{!+!3Yc1e9t5 zv&o~E%|IWTL;zl^5tbWJ*8@&upNC^}vQ5&6KmOQnVdsmqiiV#thcx&i!}q+>$#QRC6m>Nuhdj3YhVvttLY7fHDW2ChP+x(ox%`e2+zOwX zl<3k)Mz2BVJ6BVxVd}?W2oa%NJ69Yon+x2B13LaPH$ycuWj)fyvL7qv2!A}lL zB>)HxUZPIPWV+P$`*j|P66o;hB4dbt=SA*aP6;cB@_IzGO4gd6WS-Q$ zR-yE1gtH|8v@>#<7^(TqqoBDB%usjfp&myv`>XqpLxZeF{Bv;tw)Dx+JFj<}hepV` z-z(^iQo|lRwB1qIvF-$v*)_LVr6WUjn|l#iOBSxlSMoN=qp$a0`ib{Ck0ByzNM$uV zK1eF@Ti9~MGLGDAxFuym9BEdV``vnU+&W}EYQlVa$TEDU)o=709(lZs%G>GUI?p8C zV+r6cVEpif5NC#_eH1)^U;im*fT#^6WRmUgTvQRjv+d*=rz%F3NIH8i7)y~YUyE$| zs)t}PQacXkjKIci`j1UT5vTQtfhd@K42X0fgE>Xk##{|~cQVl`27(vvF(5F35185D zf9o_f_=^%rLuOX0a1Gw%?W5bYS3|r%JitxSX}9+e%)N-622;U%rB+^akd}pzM62u}`7Db|$3YKg?a|p5m;*VT=@12Z z2t@ioLv1++esJ7LJsZVb6i~@jaik(SLF>9?+Ma{wBm=-xa)B}Xg3QUomUylAy5Sr2 z9X{O=hXa;o>HH{9u$fd{wF!Zst*8-$GO#M(1R5?~F767ws?G$y4fP5P{CLOC3%6Z> zA65UW$_;z2s}`qWp89_q{V8*0GCTO6Cw{(>+!aAx*r_dJyleP8L%=b?92c1V3d&#H zj62w`!_E6HA)d#^#c8EC{{iD#RwIffQkaYU|C*qAla^uzhRMXP|D+2C*V6p^&c(vv z;i11ux+1~vxiJ0u z|M(Jjy?n;qu=mH;}_xf)5h7>CnJ4~V`v zzBOsc=rxlAd60-6>0|26BQPQ#XSvoo&1atu1m-e*D!|!BK8zBK>nup;Zf2wiO+@TSq$o2>`Ag+mj!MrEs6|`tJo(1yaVO2|LZ*2q+Zi2RvNsc;XjHN zp5;h6C+=|z=&Ct{#XqI3wgZLmtx3hiKihEa{oO|2AfV4+15y=`GLFG961+8`U>b*p zZQ1BQCvghkFD|xcPwkHPEY2j(e$Srk9e;fuw39b`wujmm>ziAe0@`o>V?;2B{~tp} zN_nIzL_)kLqpADy z%-{W6duOQ$4E`V4H`hrxaAQAQr&g|}j!Xn!4LGKYfWu8zp^}FUBxfv2wu1LjtnPiI zoP*v@bR26N@Qn^wXP z+1<6F224QXR^^D;)S)h^6N|;=fI>p>CpPi<``NRrzr&1NIhm%{Tqit~`C~U-Cv;Qo zPu-u?qYGKPovOzyr^Je#V6gE)K@Wq`iHV39cgaZbPO;{BWNBoeO~cDw?*P=;;1%eL zB9zw%i>JJ2mnW3tK;DpUio6P?2zi(j=sM}AP96I=8x#6xt7e)qoB|#-vRYLQ?&lz^ zM)Kah2DELZkj7{iZ;sb*`(c;U)4ut*j`Z(+k{5-ZhizX5>GjepE$kc&epHZ=lJxvT z2C>CLzb(J?BdmoGUgSYyY_{4TbNo)#+eTBvv{xrSIR9!e&q|lIDL`>U=wIXKZuO1V1U$!N8Nl!a$qvKi927 zyvbsF=$&+E2_3S@)U|Y*?TkP^5rcOps6gqKw)sxqTY?#0gyBQc;?7xgrhPA+w5{=Pfw z(%0xoA$59WQC?lG)jbNUm^OlmwN0e zoWT8>{tF+L_l&549s)TJ^WvP{8aYkk=|K}rz5w@2IB`JLset78F%K~@aX9tljT|rI z`xXW@=Kt+BH|!Dg1&#t-#FZsjDJLFxsvg;_+at$O%2!dPu8>52D*@>((ghaU0I#45 z89LRMOeyJgrs%tS6lWqQfMXRfxlX#nFu$s6h3#8`*AT+2wSP!bu&{V#1KcP4TkHdN z6s#fjPVBUu5JKm)a9G{YW>H4hs%dNkH$+g)DrTrCP+-faG)I6NdR)Epb6#cc%T!() zk#*2Oy(7PcfvO@FIY6X0V0=*80ZF&w$YPC9MVwW+b`1<3qpU8+R<=>4rd>j)BDz$V zKW)I?>qq_e(a(9Mt6Ap*^|7#<=RBPMoxp}b>WL)dHrG+B6noTeh~W{f3QR_ZHLrE# zXtKdEiAbH|1P;o`!)$_qqQribePQwl{21KJ&cZ78eoji3y9p7Te-(wQx(S5xF379Z zOrMd>mJTEXpQt%udqHyu1!E()&7zpuw5e;nCT;2n9905)jk!jTb-E4}I!gXz4Ws{- zK8u|^I+w?m=WOh4*e6)g?LT0YtUkPX9M7bHuY>{|h(csln0&xrP+8fxk{NL2+O=zc z?XJ05#_5U}o|VB51eJQ1*FQklOD++ps0hRof|Ai0Eeb+I2UvcPrqKBWm)>k>sdhRBcM&GM)*{zBnak&~8hBrWh63Afz>T z!-E$3ky3KP@2-!JPg_(z1OzYTFa5P@%Nc<7SC*Y=`A$QPVa<7dt3tM!T+}|>Ybl;KdCSPXAqD?DW@(paYBp7l3yFiht*R9@9#wu)L@tp1+01Xo z%{nicJbq;NGc)QIogRZ@@tCE4*D*)JI~b>zqoeAopPW3Hr3D73F=$(v_$g|*AVV)AGDFa2_nU`)Jh&3^| zUh9=_SK}q28#XWcOE-I30{<&em`C@a~}hO})<|B;iCO|DHj?3@VN z4NFOvN#LH1(PI_&->s>FWl6ZDV5{NxZ$6{krI>Qbjkt1tJ)0PM|7@>yDl%_agC`kA z?X8eE;eU(NVNvs)+>^-{plv9U$^ItmweSJRH|}tn{zsk2+l9`jQAL3k#hWr-zO>=R zB;vk;Fl~5$=DcsfrI&BjX=P1+cVbb-B!L+S&cG`4ZG;e*;Liu~LHSlQDmtAv(5!D= zQx?dbCMA2%3qZ7rLn`dy4(NhblE_!}(5!;RA4WBM85zI3Q!1C&hQGDKN^XfcXTCqw zS$L*%Da7s}{<4Qq#Y2k2UYh>_-lAJFc&yp)>Z~U`s2{G=da?zB$_5OL2^1LyF=5qU z4L`m*XO8iSSxc0--^FI9AMcj3e@H1&@Tq_Uk*Eus=DPvOEdZM6-r%>6`(@4P9qPfD zaw>5;D50M^t5A(nc{ruM{8I2#pGO4s&p(~k#ky#$`x~@RHLmBTx^gunN?On4AoLKb%*x-EkJO+=uIg+e8FIyLU)ULFN#xB^pKC37 zD(tmnMkv6t@aE*?fKs+(j;LqA~Sxw-vSfRtvA-shr(2{$f*$p>n{11@gx8L5r;u zCfEvh`+gy(<_lYP92a~K#^f_+2Ddi(U3zjhp(rNpZdDa0<8&QtHgYaW&#ZMZ*?-P2U(Bd0$dT;VrzvK z-wQV|Y7}J3C|2UV3M@#cwg2+|e0hMzH<8ZGQc8)y4+nE?5k+XnOP`aZr(e8NE=>Bb zBn8S+vh-mv5j2WkHM5|Lo5Z>PlqB$-xdgNUfZN2}dJ8T{JVIw;4R`y!9xGU7GG+_I z9{{0oLd|Z66FcG<<=*%^H970bX(8@j7wF{Z%RDhG1o9i3|G87Gv{f-7ME2ic<*e&X z_+hbI?M75kd&|O$FU~G0T&jaa7C0IEowds=0nuSiV%wul4=o>>?Fci?r4xxC8WP^^ z`ko@{veXQZXCH;vQ&-u-^t z%A$J!TC=uCHD3NLZ3v)*i{M~Qn+o+({Zjv{f+mWvBxU(8acm0`n5>Zh>) z?mSlmh@+pOl@u*>5Zg;i9cC7hk#eR(HgD7L#-pjwyM z+(1zjO=Gw1NoCO#uH&)bb1ODFUXWY!w>AI}NoIwseW`yFD5rXH+UHh9j3V|bAz3=% zlB-E>u#00NgD0BDKB@8S7m?9=&#jAnZ09SNz~;=p0bt3Rpv1d59}l_6uUy_d)bi6j z3)bfI2;zuTqK;nD6dxCJsA-O?Kx#(G$+cye8i%F$Jt%Y(hkngSNe{?LdKcd@TmN_H zHR=7(fh~*&K{!}1oWo8AF;Hh~?SF!r-65(`bKWS)8UrGEe+Y&hJPyWb%Tr}_Mq z-y)p)xfc=LKY0+F0-)AiFRfqjAgoQ#HwUIGAm51CT|^#vGR?hVmVbm(>U{mZn>x~4 zVCG%=i(sx!V%RsX`dZaV1#`Wd*xD%YW9%ali9fod0iY6<=wAKv{SOU>uCvm=Y2J4l zIk}7zSilM{H;V7dHuYyYzbG#E8eXI+yZWcaW~2-y)iHj6P=Y80zRE>kngr$E-d>3+6SKNg0cht+&47ZL4Ke=_MH zgs9FHfAYX$(uXapWyh-gh#C^Y3UUR4=r3+%@kYtCR@a+Lags3X)m+*G6y#2~Z#bwiv*QSh{T30aWs z_ehH95bI`U{MNEmu zrxxGQt(e=5ElPj77##}07H`et-MNiwM{rWn;!H_4_DXxWDHSQ=oYLgJqt46xkrC`LLQe45om|%r1bMOo@U@`) zL@8HK5h}*ez^9O>4aIy4j0*>*SF=B@j{9b7vekm`oL0fOwM)=F48#v4Ricm19j<-N z*jGXB{?T|e^==WgE_Fw{xOJED)o6{6X}mHB9@*ae18pPWG()yOk|{ZSum?|f2f3b~ z5A#wMagRMhLI)atI$ar9)RZ|X1B}qxOoTviCble_8j{4!y9PLjHoWxdE>&ABaf0xW z0yrO6^yw6(Z~UR9tQlrGVa5GM=@~lf^^C>TME{WBtvddrZ#je?Ozcb82d8&HEk{GW z*8->g>+qk!x&nLh+e?CFxAebPz*+6&4f<(A>ckCu-)Jd>;8v$P;hE}KZStQ{#5k42 z=tv;N$RLrbZ{r#8OkNwSq+TiSM)u>jtHq-t38N6;7);{t6+ydSzkaQ8&8*sRTbIPL zI%Hh1chUN0qlP*^Y@Zd!Ydxr8Xz*y-D+XU|(S0sgZT(A-7-E39-_n1NHNX8AX%jR7AOfHXY82&Vb3P%E10oLBsuu3t!obgn)Bg= zexWHtqA+(#z@o2Sqbm@Sqy`CaPxd8IR@vNr54@r~-Ze^+U1SjcK_Q}1z8?g!cFh7u zfkyH2$saeaI^_Jy58qs>)cI=Prak%GShXo&Ht=A3g%5&THP#Qm`1wbIjJD;N%C>)ef7P+ zw7eWaJroa!8e1%>lf<}sw+9yAJfTySG*r>xs2 z7N9tq0uxu!`PFq0|F>tP?Q&v|PG*slz@M}cz70b@v4=f`IuTOCe_2_6n^CvAw%!Aw z-t%W8rx3^IfO-tA)Qk$#2Am6~3oe-SxIUC^h$h=_?Ut7vOe10{l{i zoSI)2@v`3?GBh|JD|PoI*jPhN4IDFp9D}DWM8B@Cq_m(?a5UcsX0v$?`L>h5fg|$D zH7J()P4=D0(1NFJ-OpH_lDG&N&nUry;AF(_#yKtUvY!QAwyA@$UUK<+Bcv%RJ zWre-ROD%6fSf7vLm|=Y(J+3ACcNbXmj7Mxsme*{CB#@l?pCPj^JDo6>_j4Ft-nT$a zp~FT~7X1`7Zn+ENG3by7t>eb-eoZgjj8<;Eu0 zzJ{GWC*$8Hysv3wXX<0MMC-A%NH>gZa+dr@* zPZR_Rlv-~aeChX=!c*aAhJg<7OVlVfGU`H{#S7Xt%fja{fMlA|v96o^$2XnxBCqO; zL=^z~n&)T|8!3RHO>3XXVSR`9N;mIjy<=x9z^g_T?v>Ar(_Oi(HPI`y9r%zGJ#enG zg3ry|Z$_NU_qz&uHd{Z-4gha21SvaR$e@p4#hq!+oZwjG6&aHOeMLaW+QT>5Y-9}} zz}GiCpvyKIdb@)q@6D&{9WN60cG?Go=b~~PzMq|)t&)`WCGEAi?SU_r_99v}aC^zf zjVeptR9n*Vm?D92=fT0xH>`C8`21+RIijxMT{>oI23vXY;vWd1ll$Jl$ntoC4dV3v z?ET4Mc28l|pW@+(=gmU}!Pr}%A^jxE>FP5K(EJN@D|a5~61ZY-p=Tt3t33@`oQ$~`+B`(dBrY2D$qzz>=eAPh`9fNnz_Jk-{;F(u6qOJ6o*2n3sUNHQvJ zsGq!!aoTT*c^l&7%ZX=7a)P^8FTEzWg#i(wYG7x3HoNH)S=gcEa#o!LB`XFk`Fo6DBi*c=G@#JYqPQGpytENxHxpKdnff)w(H$3+Z5hZp5|3R;&A<)jp+T>V&qJ@7ZQEJntI9-4D)QXGU<7 zAe8C7I$H9}1S6^n@RaN#>7X74BA1=V#wwRBH)8Pqnl0+3KFu_j5tJ-@%U?EeBdmrE z$XkBijy6{)+#5a}Pc;yeF&j}7S0E2%vG4DEd4Mu#9Q1goCuZKZtZ0it*;jS^Ao`I+ z*>3mxtm3chlgB?IzX>Nq+|p%UrIVeO%sK=YWFw6xeg`+RYa2PmnO0z2tvSVst{5cz zroR3ArG;G1Ob;k|^}p>04JhKh)b;BKB7en?bTFtH6y}~xn#Co+-f6tT_-BhDo5Ylf zL2eeZiHZ?RziiW4z^pS_rB$yKk?aXQJHB|C1 zx;DAwob^>DgEwl+yQDhF%kTEcreCIZfEGI}Nbr4hxsPb-Ppo&Q;)dWsP>fKCSFWFH zf478)=qcu(+eLi@RYyiu`|1OMjbfdoSTe*Ey=P6ev_A?IdRqawPt&#h^QbGJ6bmK~maKL=cjKL{QPwb!FSb@!Kv zj|}g>*=#8Rpk@w;YEmpz`Y~bT&8(0VVo=I}7fUSrH~Xla0Z>!f$Zo3<%xmIe3_Q4* zHgl~9qQXX3NJKSGC#U6yoj&_nzsUYNXD>(o)mjb08u<4h93vv|G0|WXE#^7I2~~&s z6Y)Q&mr-FGl}`!3EXyE?X-CuvBpup5bo>wruwZ7|6v0Pw2Y)W4&OCJ|=olfqdlxd4 z{O>|x;NDFhr?1Izd#;o8gGKvz!_VJfI+e^nMpwogO2c+b3J@ z-)#RWd5AJAz8DNNW_kcPk1#x#LGQa7a4c@@m&aX9-kp3MzxJcx!OZ6Alo&q5DC8g0 zV*biq= z{GU^}q&fe03K!!4_JYAKpFVve>G1mfk>BFt5HLj{l2V(?pZN^v=LK!RAJM0`XtTxy zRDS*Hg<6qu^sWzxZ9cum^7}l?{K46UlZBa?nT^S7fF>bI>v~W0XqJOp?Vo2a&ro4p z)Hnk2R&aP+{2BdJ?@UXHFmBNk?Ov$y-|@fHPX3VrJ`y-E_LW9 z+3u^pp^nZNtM@IQF7@@@x{h<70=z>S*|f`*wg}3Aoq;Ra5B;4Yl}VQ7Pdxo1qj>`8 zNUH|(V>0!n>Me%4@?FBd`^fou_8I-kS5^z4CcWIKNq03fTlCJZ!k%O$-N2dpvha#7 zt0QjowUaUB*QK6)`5)AlZ@ry=EE~w)#IZy8rvdPl*UyH*Iw1WV+~$UA`1& zg0l#4DH(xi>_88No_!)MJ^lb1iUCgEStj`6(8C4niesPWa(VS;DD~|5Ygw?kt*6&s zKRCW97tG|vI_Z56d$7W$DU|m1_Wt5?_vvF!zvhSZ^z^(d6%`RVBJ6nD5c!D9px$%m zmFZ`L`T(9Er`L4;{R@1W=jJ$es0sT*IsWJM!4r?#tHjQ2Grwu|rikvv7=q7oFdMkP zpO3s~WR^Jwc9|?X(?=PeFzj`p4F#CzCJ{39zf$OrI2M$Rz<$2yP0U*suG#)yK zzX;^Le$3WM_TOv@fjAO?Zp#TfZs`k#BP#O(ZlkXNkDXj0g}vd=C}l6B4C8vS)207{ zz7WL6I?C9Bdnq^ZdI?F?VFmxS!3}~OU8Rxub;7R*`s3E{96bQEMm#AhxXS^$$%v>h zG~2b$UWv?e@F$Z>GOJj9g4#C`YY@df2vaD%W&U565m3xru_nlB-~AI|Q4Ts@Ks{ly zb!-5ILcgzTO^5MY^!gqoZ~0x;?QCrMib)>Lvh2i>NGb$e-19%1r;pjY57*$S;vy2& z2jma@0Ml0vKql& zia$?ivw1u)=rrk-DNIS!OV}Xg(+kK?z>L!#?dgG%BxcX7F_{vZkUt##m61X++^qpi zlvvQPZ3@IKKqlaOVJn>GxAV|lYNp4x2ZU4N;H-#UhC5qC^_%@!@;j|k{=(Cz z{XRRJBr~*!W--U&txY)i$ zA7D%#rWl3=Yf;Yq8OywUSo-uks_~m%1R9G0;qndcDPG3EE-v<5yP=ZAf1kimlzth! zL4iwjO7^VaU^mJYn)J_%jEGRg@*P$mYr%rMOI`9EQ4v)dw9n0%_->ufMEzaumHsTx zE%~iBA1QBc|D%qr_AL3Bii=L?aUVyR5Beufb*eM4G<$|e(V%1SI&>`(|8sKeFhGK6~D zR73r8Ib{+g z1fdT=x$`2(!t5+Vn)tcE#adQ61x4iHdT@5Y?&XCd)~b4Z#)?4<{HW|I0%D;XH#e0Z zs8fzCms_##mZ1LTowNPcbYep559-p%x(5nX0^GLI$&}#)a@RWcuwtb=dSco>aE*gs z*eWLOUDdRwlv^@2MI^BcGpG4k4PaW`8V&rgBru=>k|^e)3FJaoX~nkcmbm3WR8q#o zPSc|#Mv7Kw!05#ENb##vI;}xd4{+}8rn~f}HP&Ep zESKSP?SR{H>nrv-{|VA^3epkgxnj*%N5DTK!|Dn87hQWUdEBH-I`JR(>)yZd^uq~- zE0pE=)|9-XFODi9>bg9*mNnwO+rqu2pMxs%S*auzTxV4K*obtwBX!>4dAd>kC2ey< zcCE$wjcd>cuU8cy7D%2fw0_LL1zp^U>%a=ccB3Z%Hv zFU2i>I;V5?XFk1cdfH%kPzx^`q!m6t!lFDk$*H>N>Kc@ZdQw&L-oEmaNzlc|^8aFh z6U!Rut^7yJU$goYpD?s5&?OkLkjKGH#u?n$tF6;9<4rJg+Ilo$Ww_u!;`-o91~EGb zw!(Ye5m7310k>n)#U;;WQh$M1b=GmYAFzlT(+>|jRgT`vse1bHRB3Z#<0j!p^8pYl z=Cf3sU!T*gK1m}2$}8vcUxiSv!M_4Vp#Ce}eSCf&%o;D~&|99O^mjq6gi zm!ah~H6llvOBj8RRi?%E+4+XUN;4s*p*nySqJ3xF5-6TStwhC{UUEe-ZUpi-Ie_Mw zjo(yA$e5DP(`jYnmi2wPsCm#6WMiO(TP5p|ee(ph z5vr0?jZLNRd*|=Wo%<9)Q(1 znV**OMxE3NtGDr+DrK&;YU-(uKfSSPpfX%u9e2l-IDuW^%MvHy7EXSO5@;uHX!tZ+ z!f<2LJp5Nv>nZB0ZPU_b5LoGA&bXGdo>+`zFVcU={`lF)y_Sx#gv6Yx(_{PT9XqUt z6S3*ttv_yxW52+5*lijZT%l~N4QY?jPMig}4|C9wesrf%lcugou9H#7M{bkQzNxSk zyiPH;TYAsvO)tf`)u(Z{U;1fsOfl|0M*9>zqlaid286othDO&kA3ykEh0Cd9$7P--JBr;#jU2KXd3mN-?R=8uA!EX}CeoDcwRc>X+j2H_2 zlDuE|n($j(Dlan1B}er@rd@-=0iw#5Kp;OzklTG&Aj&9XhIN7Z^TP=p74-4VaL?> zz1uqy+;GFm4_6A#(qX}u9e)PzrI$Er_1x=mxG7n$H!uC?2S)$EsB#(P)ZOUQ`nY9w zvD;VSsAA{D6w}5;8So>o3oBB>n|N7wZpsZT(+E3$&Ga*~N`Y&oIkl+K20gGzER+wO zn($h6%9EDbO8PQ-H&7}6xkxhHo33`O3B*)7zY|5L*N@BqAoMzoDH(Qb`pL{kU z&*O9cI%bJ}z|b=;Yza`ST~NOQdO$ScJ4zJDeT?2**n~{P5zG0g%Xt@BNmj`1jfOGBTP%4Wox_LYY( zvOd~9EoA4u@cu^kwVnQz=a4q;LR{dF#z1}f>&_AAySY1-|iM4)* zpk;3DSJcE&;V1(K_33x0@crts6%}u%f@F}flSZ2tCyxVN&9R@{-r1Lci*+568Ke&b zTwe5&IC0I9>C<7ehyXW$-e`A&Tk+s`A#$e$TYGttj|_)J?6^^A&QPb4Jmqx{(SmT9 zMgf;s`(em_DpE_jXGrNDv0Tj+(R{DdnY?4z!!tguAnTd6s@t^5Lz*0sz-6 zU`o@-W;!A2;sknf{`b_Xe?_bzE3k{=bQ8B}5uA{7=EC`$!axmHp-#=hv~}}w*aC~d z!`R7>M$e|wjHRxC)6H|>CyoNenvY!jw?PH61Kw>nt?|!JnvM>%_4J^=o-hWu6u@mL z!Zsk2UeOljUZ^4~(SQNNAUa`(N)&S^;YR8Z86-v1nAV%eJC%$$1IEd1pxSyhne~8>X@^_$4nV*KsHdI zYb}&7^aXl!=@RAYb8jsE3Jxz+lf$Ft9~p>yK7aLnmnqzzm|yw+jz9~qc}Kodk5#vE zRx}$r3la|pJ#$&%gJ5IBt^zv0Rd+7_KMLQNSufQM4py#m{W4b`w(jh6Pj3fl+v#;O zqo|D6Vl-fKlAN-Fh(K%vkp7^8P+dMaOP7BfKk+BE7t!{rWflh5L$7uZtb~Zz7&NPQiaogq-?wu>AwhY;rdKl z6hR;8ErPm7llBp{n23m~XtJ@e7+K--150a^odFeNDaxKV3s6CP*X6rRH>gS*K(W|P ztN=jW6d7{qt6gb7g4Lv{KTQqx2Ug#4UHWFc5VXg8k37VzE1yokbG#H)RB>qVOHObC zZ1bNjDSF=;C6)^LM#|roRgvjk1B#e$;Ofy9Pp+kJ@E)F+$^ldnWvUq?Hy+^nQaU;M zoiQxSW_)$HJ>TYli9`N_$|%=e2mx%#asOBF6Xg}zC%(%cVl<=W+Qto_#|BqTj`=7^ zH4n9UxEH!~o-+YWPv9VL(cNv4Q*AyPdwTwiyLV%A%dE5mYll~>wxAAS@%>4oNp;@8!a~6Mc6e?>;p<(1V-2eW#2F(!EBKym744*8CPFn4a8 zzyUThkz_ZMvTDpzUS_tp!Z9IPFa+AICc#?H?4Ds)K1F0WltjKojngf(D@nM}5!mdv z=g$Zm_i8avVLj83?C{?CkPjGd94qV0WVA9yhCmK)`6XLzv3)OS3cj6Id+2m1}dLG9S`Q^ohZP*ICYvDw*azXgID+6 zVciGBA4#aEFALKq*!E8#m5TTztJ9ikfB$2E3(3*==%~pcq$js%8NYDV1{&H(!s#a8 zrTttr7#4N@@!tQ@0{F{M*ojeubdGo`})j7djY?tCF12C%w_BAqd}?T!It*Hy+339x9&0B z&69e+s_|5Oe0rMF2{b3c77Y;N)$26VhnoGt4oPDbgoxe(sQw@y>JfM>>kFro zOKXDfLvm%vo$RdUou7+hp9eDN!ZBY?>k|c6v3~9I1yq+^#|472*|hhdp8#(8 zbx?aN88=e%>VEnXD7XTJ~G=_DqZ!(qU(eLIK$$54Z&KMpl^}4m6|T&HoH7QUzig+d~=76 z0wQgfaL{`c@HXtS7$X%!ClQV<1=^xnkJB$YnyAzN_@+*6`Zv;m;)=`l`MaS0NXPvq z3jXBPWWbcwmFw>Fn2(=DZuXzPuR6iZ0s74FkEPP= zHoS6lT~0a`C;?&!vkoFKrV)?&zWL+rEFh4=sTCC$G&SIhZ@lYPaN>I=KQed7!lHvq z^BL$k^PFAR_vOLDgT;%27Twh!RIWZze>NF|hNwO= zdqEOizj|@hbXEgYm`~v3`gs#2oj-rW(D3Rv*WfBdbU_efd}CE!=VE?@i@ zz}>fXV%wLxh-|nau$oWoa;L4`K*%&zcu}7gG%#P2j5`olwGwPfy{GL$kM(665$}6H z)I_~)Vb@!|iT$_{!NealIJJZ4n|~hhnn4L+Iy_(%_!;qDzTVI*r4+fRECVh~Nm_V? zg#Kg-Y{6d@f^=u3MgfuZ{RBSfhUzy3}!onX8ww^44A5nTPJx;kh{jIf{ zqq5=V*rxn!_b6hWt=l8k$wmeM^6_Q`Z*xvVHMt@K+9st_OHNp|}IV zTKuom$HjaO0S`-Gz*N#*9xKRwz4J?h^S($}I4uAZs-Oc1l%v8YbOxbS$35>NxliLA3(M{Ra95t!^Nml?xt) z<6%TZUBwsP1fo9NxvRB$@9@XdY)1jz_WJFNAZ*f4L25R;Rc~b;ka?=9iTZRMirJ5P zon9ST_2}@txu_YkzRwp)3L(ahIoQw9KWZk6rW2HsN=L&h!_mgnnWKf^@j}f0NYwG$ z&cszHf*Fep@jO4<;>AiDI-C3a5J)U%@Jq}o-R;QX)-ogK<(iH7LAQ+qQ#cWqFj%Wj zSf^b|M*S~8i;m(*{6HP`aj@U}OQ%?#788nCKkOPc+L1NB;QGht}TRzua&(h{(F z4v&X`)&_hxQvTY)nYdmzNH#bAj#FVwxwCIY4oef<3q{dN^t-_c6ddTjFsXEk*T$Q>~ze>!Z*OGl6Q zX^Id?v%wgl`t;&sXXi7gugA4wmfi%>6mULEC?=82@D{*WI4)Sg80DAnWR5=&KH&4m zUl;sK{>*`Vq7T4KRMGIK?Bp{bYQlZKh&0SkJ(^#V;&ONm-%GL7(>& zl-k>7mW~`u-okWOvkqLXPkqeYIy;T~`U#X6Pk*ibX4?=JW4E2*+rm+W1F7nKSKJ$B zPPxCWjOmzZ4trI+TG!v|!x!OhNMfkE8s4yH49g(;~pdj_7>Rl?ZdNK5C7_|qZ-J7d?{3I@)hdNrCU zl(}`j(lCyf9Z{tkswbMLH@aH#cP>yg-u%Xc>vCG&@`WEG_oNl8n zhy-dxrDFBpmT(3N(jD&081&IjOn_c@hVswg;0Ujc$Lmxf2hbY&UW>Y^PK1Lz@kzWtjDStTA*H5#i0QBUl&J% zr1bQdwh^S3qAxHfpv#n4F8~O@?`1PED#szBt62@D4O6RCsInB^RS!PvzBWbpZWMw1 z`lM))@^A>$L-@fcLU?NwCP*NCUy}s;on zv&WFrFJcNr=A)(cnQlVN)hv0lDwyAVr||9}(2aI|;9AD>C0Uz9c&Q@Bd={3m61)2H z_^-b*G=aU$FHavG*H?84W3{O~podcX;qW+e=&dFD$S5dxtO;ZtfFKB~ZRTg9Gk;@* zGW_$XaNyuDf307!c%Vse;p??kNh-luif_y3jVx-#DIP_ly^#D!m!~8~$siOV#^Zkf z`gDR_rHkKEN1;1g7w1h8)ieHeH8^R%S_%ovkH!<~8J68sC-RoSw4oUIQuEqhXUMi6 z|A{kuQzialRQFgG9uUEvMU*b1uJ;zj7uD5!{l+%LfIc-$MRMvbXi3iDdp^c0%@?12 z0Hi0P8U~P`Q;t#PB?8-I1l^XWH?6V;?P3UKScf-UOBwsX(!0%@A(%n3MPK+(Uvbwv zRhbqcZvxL-@a^dj&|~MH-?0Jrw5GU)lf5OzVdR#Aqsq3(6?a$Gy1;wk&FpIu|JzN@ z^IG#$^o_dR-;`H&bGpjor}+3QRcN_Ni>;r0&xt6#dHS_C8*Gs$c2|Pb5`tQ%ekN1O zojtdJmpXeY@ceDiF=8tTumMt^AC+E5SZ^vR=%GibbVqwvgQLD9>GzjdsX*(#0Td1-F*LEcM*F{Gk!s+FWLO;fdi+@k z>RImVZ&`4LGy;)jbi*%ZSg!?MqlZE6E;YnljT30XYl8&%#m|ZZ&UJS=FT^izE?KhdMoZgA?R7KxJHoK zFXCl2*sGNs=USwQ8dEGqb@ytMItd4_n*uSWhYS1i2xl#RMz}fm`Nz-u=Kd0e*4qF( zQZ_m7Y0YcdUs(Tz-PE&zd!;Ec>6h(&9U!IbBB`s@AtwC$rb@7cuL-X|a5Yj$VcPph z3V(>f03LG!!RhGO;q0s|ydAX0bU(#m&)DcG`p=Ys9C%=EonJ$(etWH@W-kcc3cT_O z0kM$9z1bh1)eB#OHmR52N4=;ptgacH-r;7-!wTBA*f|qxtPYw9d&GvWm0IPzsggR6 zg!1BIvD4v$)LhVJdyRp3qdR3e=D;Vx^pbbdV67J?C;PR)&W}-b=Xv>{LkC=aef8en z54>N09d`N>RDM1)m-c_D-kqgSGmj2<)38GZwMXOm;izjT=S?a5U+Si8E8Q!UaeZDb zv;`CHtPZkZ4++trpT~M|ts@&iRZ#fP+PG*uN;#IH)hc@MH9#Mlj(%hT03UY0unl`q zhem?|dhjuzg!U%*`8Obqm$~OWov6B1n=){)C^ufm%WZVZ?hDif^Zsk8nr#-(a&A0h zJaDd6@#OT2%%YT*RKToOMQGz-Y6u#ItDe5MNNE~4lhiG}IhU#2(r*NI z7V3-KEhx6`RczJrzrwl>26>NiLE`c}ID3rMh8uk9_P0?EVF7KW z8O7$&sUpI81C9xRO-(Ul_MpK_10>7f6Tr!}MJ~-4gg=`&iJ=D6?jOAeJzzdt1FWLJ zK;qI&p%&xI4TK)4Yf0XFEg%5SS7HRQmb%kz&2twi*H!Gmdx$_p`pw^uuHJAJFVeX# zrUL>{+Z#!A)O1?gxv>Y98qVyKQZEe>Ky}bQgdDMN(=tbDMdC=+c}iEcoESVCXl*JwoX}WFy+RiyCihdu z%bm#-U9?;Ko{G&x3(uUv>(8sFqX9i#deQ9ylDvq=;UBF4V6SM9`{_NBlvR~w2-@2p zz5RoigY`?WV_1cLK8g(D~YeOH%)#kk@DGEk9-G@S`z_SZw3$Nht7AaL#}WZnFrg>wPg8KD;e zo86O6kAKPNvXNoU3j+B#aGrIrj|15U3To`3A>eo(I6~hoyLgGGFP(lmV)u=VwnLmN z3pbt;qg83!TGad|sWVtbr~jwD_l|1v`T9kJiqb))cPt5B9&A`rTC2wgy0Xwplhqm~APCv~u>pIIG)Vo0IyeWa7mivN z*8HG)tLNG2V8e-Pr|TcJy+Wvjt!0vbxuaEaeK!*KSmLAHABIBtN6#z`#`=Z+0ynX~ z0O1Mm`ag{yIKF!%C-KUElMa1*Mttx5K@h`aeJ=_ctRaW^EulW1$niA(Ph?1QM!~@! zfLd0i(SeKZv<&;r8|N>#P|ccAlHA*x*x8Tu@;1!sr?3zD#Fo*xO-z?;^v5pUa=F zC`^wj7azr{(GZz!yA562bwAIg!7!ynACO*USUxqqed_{;?0W&R#ojHMa#SXLTsE+X zEfl!u;thbTSt}2lxF&%Hh_%FazRSeDfi$^Euj?>SaoSbg5STOew04!H{;^{5T zyZB$tQRH!tzCpy!fB}o4d0q;`xkqBe_-JLDyS{e z$Ui3tP{&N^gYF5$qY4t|@z=M|%9dO`5De3+&9jG;7$*7$vjFQ+VuwXF59dJ`J2~y>Jd&W_`Z^pN8(VtG{o+d7+xU|Z@L4=Zfc~{FpfK#5 zIqc#Ow)0;e>i%60F@q9B)3KK_9K`4C>W2C#35X8c-W~>BFCJj{EM=3^oRO((`47DL z0?^x>@qY*OhW-~ouiI@CmoaO4(mi8w;;Ru8O0Av>0ACz}hoPyMlMHw`Gy_-sPs{%+ zu=D>mAOCl8>=zF__whW4pL+ClA76x4x~@+&G@6HXWxR=q3nxydZ|cRBH6tT@ zcUQ=DCuy9g7MP!{JGMp>_iwVu=e}?=ogk(*}B z-gZ1D8B6HsEB~zN|D|_<-DZX|4CP7dg=8($WG$(O=0CuhA&rx(Yabn8T-y2L4Ro?& z1T0g1?gwysQqO4v^Wwm6Q$?NL{nSj+W@L1A<`!Q9lfrr+ng15HTaXg1mI0c!pR4c_ zemwb{)4G83nwxT8Q^{H)qYIKu?nzM-%4a^FL6fos)*qS+N8&a=!EJ-h&h}GRuOK%O zpk3ZIf9>4&ThUaJQ64uMB0`#33Fd+B0_X!&V3r!S&I(*)@z%kefY6qZtsP7-nL!(_ z!^2+$VSX#EhxUgKB^H@=6Hvsz1#i zhl_mGp1W1~puIJpI)RoH+X94&nZEaadH<m_R~68b0cx$Eae}ecXa1jWX}X zVViS$S$7&aggmc%RPD`4POxi!vd@(7F+R9Y1N4Sc)}2Aty)w{9pu3f9>`hs<6D8)pN z=%pB#LSkb2W_LO}6VD@c3=K&mDipK~8*k8BTN`_yDrq$$8EGTljqhgoZ_4A%=5;kM zzRO(s!7?_GljY!b@@w^+@eEUx-C{$$g?c8T^^*rroXTNug`$b<9 zn2#I;G>jzQ`N?I@&(AyT{xTJj5F_g7=uq>CXzqv%HcQqNZf%c^D0yRcTd!9A@Ws}> zz|H=-gSocfJ!~;3qP2tL*_9zxkw4DlQj83*gBj#~cmv5uT4zVA3~QZ_dfe|humx^@ z9k#)ID=pz)tZFCHFgB>JEfrl31 zGVI~6&A9{9*nl01)~IFqLZ{FhJ>L$!%WVc(RLip`JCMYzbNtsB{A*3NJ zftx}$2Gc~V@uO@JArFI&Oq1ClkR@IzajEdWNZs3g(c&zDr(`V#c*Y&8_g@cKRT{RJ zx+qkQYI81!`f9hi1UA1uF61~lYqw^c8r>k79(A2huP1BCw<|8w+|It#1N|^-k+N$@ z3B_UX79+sSoX3vSx?IOnY-{U{h`SqEk*xu#a54Ix&Ix?ns$Px1`4&lf4Z{7m84t3U`51 zf)}8ydo&9e;o1U>B+`t*0dCivNWX_Q%orbT3))XtQ-C)6@>%O|${vc^eO8qxfH*vq z3!5fobEcs+Fm)fasyvXxD>liY3hP?15}YJ0^H)W%*=4xv@4FD?%gSiF$T56pRQ7zN zIsRoONr?WV)KD8qZt}a3$>HZ-@3)-{_&gl9Mc2-cSWimNj|VSBt72(w=yNWJ^gP#8 zr9SvPY||J{YMG)|aKkI@pk=Xj_IR^c8cuu-LlSo*JQBO&W*CvgjrB*G1Mc)!SQtN} zELUdl^5cnvZ_dWf>vv(ZRl<$W`LW_h@0o*DFHm8^)BYB$sCd}!3e z;=G^7VC8r<ePoQxPU1i>9?G{!Jj@#sgftho@1$+HeEP?5=c+%}w^GtGh zQKsF8#s~AK1rN}|GI6h32HdaeF3I&~h?N++m_9i=T#Z=@LeHFN?7^B_x{2ljo-akNyJ60C z@_PAOgE@PgmI|9Ul!4YUB9MD**W)>R5IJf_6gKOx)SC46JT1SHF)RCk9x6yqjBa0E z4WIK(rG6{5u`g!WN4dT)29)HoeAOt7Om65jv$2e#4$3|V;ot<>gqiXOY^$IToN5eO z3kwTQhx6+g*u!28uO`Hg=QTv~>uTTx`Br;-xbC~q62+PNb56jGo-@@9>Lf)Igxj$% z1$dE??z(v}7PgYfuQB!>g|Ee(wygW18Afs~UDQ3sxPCQ3A|SUNu(q;bLd6`jr-Ko5q^~Xp_wavuntG4{1jJug-~_+O=rqbiqBc*(ZSXC8^21k zF+Z;W2U#viS8~24^Hq0g_WX&J79Ep4l?ciP{%_Nbro&E9%XsZqDxwK$?E8;&hwZlw z$dz+j3d8Yt{C%At&0J05UG6>0|3JqYOks1PqD=zfyVan<=p9kcba{XY$lH3m;+=9J zLy4-G*7n8fww_gDmkf&f1EO3}WvYJz-2m5kvo1fpawJJN(Nw~8xq1E2yu4yfiMF=b ztPr<8;fGR{j6v=ny=D@XyYbx(zVhF#)5Y)<@*93i@*Tn(8c*dc1W*f80YRAbuDaUc z_nI{O`#){7C(M+$!CyQUtT=y+-a`)Cur=!P0V!_YO9(9lNbv-!v$JVw1+*Yuu=xy) z)?*l*#f1{oM+5*(W^7w{-40zV`KKtP@b$4aSS30rP-vte#r$?30Z>~VWn7FMQK#%0 zfv*{VJ(vT!vBAj?*9u`NQs53NGQd-^45(`e)vIsg1Q{m@`9%e9-P*#hWadn@vA5_%>kkh6&D zA#9_w0Fqm6CW!lcNb3R2d=SBP4RW|YC2~@`VhKj(FthmyOwl)vK}(@r56QUb_pY_e zR@R+LT?EyJ-w-?3N`HHaYouX%U~-QPs--Z+F&_^xdurgMA@}onXc2S7K?Ravj|xbW zPornIme&s|MsX3JTxTux4w)t?%FdUQyQ+G6_>E`9$LU@YT{ettsJDwXZvAr#H?CWB zEJAp}z*@BTA!TDr@2<(k<>w{$NbI_r!(l4cHO^cE$W^i$}N%lIVO|eI(M?fr7kZqhZsfdK&Kr-^y=-8SRLp*0qxDe6L%5Bj6gll^ zPdYQ=+WO5GW~@gPjEQQfN6}eab>!%-ddUcE@hzC~=AX?sRilZMCT>3(^kor zm}&L>Fe1jvL!^AaoKCiyDj+K;xgRxpsDsQt5%05FXb`eWqiBO2REe_}NnaXP%Xbi1 zJ*lp(e9^nTY?x@=mr6cq@w!|d;cYfn;_uqsH&@jDF&sB*Dd@9dZe(u=(B@W+eyrqG z?;DtwpFw=diPNYIdibdszSegYQfWuNw^INR#O@47Cti(kp0Ze5yWw2K!e;s*h z=6+hr%yCNDKTA^3eji)5OPkoT0f!$Rjv+VAdd$@$&a_duu`9r=*2uQ`U|WR)sjWvT zOVByI51W#I*tT)0ktg4{x_cZYfhspMBTCDxG^Jb5e)qWjRpI&?Z!SUF_3ks_^p^up zr)QTqlEdIvN2q;cHNqSOcvjNzTY(I?OA{QV#gaH6w52*V5O>K>%Ndwl`5Zj&_^-|* z9o!CWMKe`yTv?nqC-V{UN>>*=DoL>#f7Rj$@x9Bb6K>Gf=Cbrie0rjeNp_5zeG_Gy zSGTZgB>!zM75U~mrMjw~vrk=ezm9;)L$KGVjM7=nDn#<1z$WhJ9K*l4X&Z*|;h ziT2Ns2!a`NT6dU)PUBt$-3kI?Da&4YJKPH5E1#^Q|M}GH`C|FdutJRjAONt7Uv!_< z-jwBYQmSQzTJe~iB?`;{EI4QW_N>JWjU#)A@k3@;g)Tp>@1Wj7s$3~B(Ox+l96B7% zAfth^srbr3L{0F|#;M^f(D*RT@=GK$46o|P0|+5a+iOJ@x(Yn^f;D+VW2E^kGVIFr%G;h@6Q6|q%AyM+q4Dl9X982q2R8*5i8_#SY;)o<@+P() z8&-~o(diOKT*-Qu^eNj;>(Sy0dGh6rJ0&bkxw&BDkoL`UAl5oHFB31QD-6R1Jl8zC z^TL&0Du`%xlifiPn!9!bEOn=7jQik5OIO!~z}D=h_edYJqNvlWnEpKCjARYRfXqXk%mS&{)*|Nw3kaha4Se7UYH_I_vIB zAhP)90lms(Y@R6N+ALiMp9J#82%!zA0t$c;oStMlucDkN0X}S4z~`Jb_$vfNLvDy zTc>Q8N7azq|7QM3;`jqNkBdzv>@|yY4)oRbZ$|%DshL&=)T5Y# zvUb^Z0OgI#i5c=o3W02fOyV4Wm&4uJfDgRyaNk;U0d6bLPvs%pytlkw=YQz@Tzgk# z|1ekA_^Qg~SYohvwz)mb^F3p=yzoSwVulSwbuY1eAFCFk{A$)isH;8e*p^B|dJ0E- z-+HW|1!vn(x(fMeay2&am?iKUh!DLW7XP?>^!+LvjN=NoOXth|vSR^m&!DWPt~^H( zkJm3HrKC^<-rq?>emkq(_}ItHb|vrW-L1q@pf{f6uhWY*-UjuQTUOl3J}!4#${AFZ z&_j&!fMpnSNb3pC9oy~^R=OdaNYdy!vv1#vZ2Jukq#Iy+g(P zbv;qr?~(w!m7+%UAPrApmD^4LF15>Q-u3lf4K#~mFj02g*2TJP=E{%9cR`&)j;cO3 zwne>X@~_nUhFu4AvyDnq(6LP(t7&*WZrc^`#6!2?X2P_Z3vye_uxLz-chE7ya2bA3E?_V02HjwMjW zQRTz5NFXIDg|9vHhi#6%ve?OjikJ|e%xKm_*zh0?K4Z``OlN9 z&u&JJWZt`+n>Z1eb*a=uR61QJIO|e$=Yy7Qv2a}DvGA{`SA_$S`|o$maTjA=1uary zKm}W?wQ?3x`d+6b_IAL>(yr&7TO?L>?D|B@s8p_bt0e&uz z7jZ9f^_9@8FtZ>NL5J5=I_*dA!oQ0+l`Er=n?*+lVpO6#{?n-t2#rbh%0^M^q| z=rK#6I6u6$aO?}zdW0fwK-rBqc)sKb%=~7)P>xmL5mS+vN(yq$Pvt||L^{786mm$^ zjhYqW=e|c2kw*o20Pdy9lu30>Og%ENn&H@OvC@<0B1%^6Z7nGkMX@HCzzspbjZ@>j zRDz@GA?jlxzIM^_?Q=tMeM%k_E^L{K@NwQJF{~I;>x}I$2z~6QRT@tAxsrDYcd^Y8 zuE}wJ>u!EwIA6ioA~4S^8l|$rl~%=UKjA>`$$KfoBo;%^hMS9gm)D)yRSlj_r_#Fk znZj9%wknfs52$>vLk-xa$}H;MELy=dzs}J{C`?n;cQnLI$ydPNj=}x zN#0}4kV2%;uaQWL6ciVSC}dp+9z~62g@2LVtE)j~Xpv?1oq|qaG%f1&hIWrVDbdWT zlo4-TcErXcea_4bzJK87{juKmJMs8iJm$!@Mzj2rP+3QSKFAO3><9#IdbD)daR1Ut zuqjX7NF{4Y)+nprB?N0GCr0WESQXsMR$Ap+vpLy3Ww|X$1{6itpFE2Nt{SDS6XkJ? z+ugcbxk9|TCP8CQ?v@gia>euLqYz~IyvftQP8265BWq7Gd&Ko|yeaS`jC#>9l&jC<1-{hqTLA2-03J~GH z6Iki>7lNY{)9dW4`(>W1gNLG!d}GL38rlei&(ob)U=5fG_K$@qgJefToG=7JsJ1KCed)nx?&1np{q)hqbj5?H2b_%7j;`ea8guKWx_;%QMbj$0>9n6rhbLK$DWrKp531Qy1$i(Z{^Ll+S zF{POGr9HYuy-tFzKic2QJgx8M$0A^}$#kRkuPY{s)&n#&phw^aos!4GMP{h?6%4>j zna>O;!#^LiyxIkuFD zMFo*zZW_A%B!QCvH?gZ8@j<`VS#+uz?zG!`%!tH+P$F{0)WMPhku~e?KRO`3;S4Pb zdTE*0U1L`GE)tl)Zu3{SVO^&l19lPOu{ac(M}vtVw3P~__~=2`OE+rwD#)1EYU-t% zk>@UPmEB2U3a9pUT)(lQMZ?}fBq;3rQrddEy@F1gA||hg(A*$ApA}VD;8S|Si354k z5m+`29hLD1mO!e1m+I`jp(FtfR@}tp`+1H(4skU!7F!1_xX8M5gB2CB2Jl9Oip}e- z*Ot|Vfx*T_K3V2pM!KDuPv8q^)E717z781$7{?-xk+3E$ekQDD{Ehzlg;NmN}ZNC$% zIo(A)m3NLp_-k#%U=Kc*#*FOOxZ3E9YU5kQpZfS|%Q$HH-E@2RN01FFCwlV3xh>vC z*~6^#5zC-rO+vNvFGoj0H2QSCu0Xi)NeS&e{ws=kN;W&czsPXizJ~^c`hmo<&{EI> z;z}7R7j-0=SE!KnzUoab%AD*nq~JqAS8Dy?+NICSs;Eh~n-gQG6C~|=k{~!U;YcFX zUzM)JFIF?zL7+>*Qy?}7Gg)O90?0cc%mxeABkJyx&?Axk6t2`Yp$&3&K6QySzIa_X zK~cr&>R3_UZy9{@rf4_Rl=f3J`i#Xo=6hrt2>51q5!xoLY;5^GAPTSf zBvij)-PDy2RU*G!dWWikuq7=AKiK&4i^pzHDFTBlRLL%3Ts>145UrrRax@d|NEetK z$K@cvZC2mKvcwibf-7D+@b30sf|P*Od~Da=QvRFviF(S-Z5`WWDv{#SB5gTFRB45s zK?$bes=91p%YcP=MGyv?+Om~3{H#YzJ{c7rB!eqmtn-~_6?g|7i+Te}?h;th6tQ`_ z&Sa&l=w-X(F>{2NWbY#!2pEXC=~B$aJ0`OuV#f?@udTFKNbbdDfhYPvXtxBB=&8_) z!;&+qmIidHhHcQic>B|%U~~I4hXWGlPT8iu3VzMdy32<~nzmG>&T=SsF3~Z_G=wsZxe<&TxOrVl+P6$GE? z%)qjiaCz=Xtu9Ao!BWJNgk245igBQ-#h>|-$~X1neVx6%C8ru{V#lQfuRipmA$?Ic z@qViI4JXirT+5#*Gctk@T_yIh-$)hE4M2$6p%J6V=uwUFje7gJ*M2=++mltlZ$?kZ zT!yGLjEU_WeNT2{u!sB#rDNTmNd7Xjo_E7!Z{c%Pq9)~LTnwbEUm5(Y4rFQ`%SKO=wOlUlW3hxaPY^oE6{d&>B6E=yTPx$Z zSPXClBeA+tg(jTsyY8U@mZi@(FtGKz|l0rF5zdnUx4r(o9?m4Nhy>gsS9W**M@?-jw66Jqu5B`(4lG}dRex1QK~v>`bVb{rMr$fI*k2}wG||Lxm@@*H0-&KG zXg14o@-}!Y{sspoKs|ju{L=l8T}dzY3s<3C0?^ECXe0^E>K7`>f|r*oD5+<(|V9iP5q+7 ziGHw$W7RKN18a`o7;I(`LpFQM3!O0E_~m@Cr~y;X8UAJkEV|?Jk7brX!v~bnAdMTj zEWk`&{$mfRasB?!v@hG+0|MqTJcIp79V0gAJAX}EWD^w@0{q&>EI9$*O~Tr`bUIm@`lp7$ZD_yXBL?@a>6 z`nIoVZjhk|6zR!nolhXh7`7T>&_N(wzNl{z9KlI9+rF19A8RvN%JG4nsF$$sF5eG31wccg?-AsaW%FV>#kI37q^Q-IiBtkZkZtMi%6O~bXs5a!8z z|B*QD$~lw6tWC3K_oT+Mo0G&Iwj_borMK*7N^4;9ZBDtEZ%X~gxHt#74a%1sr7|AX{Ha~@4$uc3W6umLEu~2RiIDo6s(gSfEIE9yi)>m zaWbQ9#I7iiD94NESv=#{-jvE|F3PLUcUxoi0a6d(5oo078%QxyNWxQiV+vseu(9av z3>xp}O|P0l{mYU;y(yrE9Bg+ivX_E%h3zORd}rR#vGx!yW4Srx(hmQ;@qCv8V7%+t zKqjzno}wl0%Rgb9Yu!@==}YFJq3V7~TijnbUk%wj z9U+y^T#yA`Bi{MRZ2Ihe;iUX1Hrp}fbh|cT><>%X>0h&sCOSVnOV0*}bO_MSjzs9r zrc0lc-J_l+WtqAeo@Ph^&Q;7$He3$+#QZG3C@f5ziRoL{$$d4{0O4MC#XhyTaXydN$&e4!w+Cf;qTU39IzZ_m_ApNAS(VX?vl>eun|Tcne8TGmULgIy)@kqq}8 zsHhm<52c@q%8k3lVQ6e#8=C{(T5-r62C#R15-h-EseS1)Kkvc3e-*QhvdQAljF&bx z9h=M<$OafUJzRn)2Hl+p+l+bI*69n86JIuJwn9(u)jOaQ=yDYP%c*5sZ=ay8ZY zTF3cDyGCGIcf$As&W?MUHoYFg%hPFS4HbPK>G`FM4j^|ekq6UEHRnN@$CGw=v%?wF zJTm%sXd;8lNM(3U?!_JTm%;YBWu5V=E&v(;Ndv~aJgEF8IB3IfeJrly@@PsML%lRu zq!0Nj1^?W~53ca@M5YbwzlrwLhg9Y%;X}N!-_z-+(FNXK%%(d)X5bwy#(Q&C!dt`J} z&b09Q$Y^n3`-Bb_<0Mpspx)PpK#1QdK6I9_xyj+S?fh9vg6t!(dd$JB!{geog2syKquKMl8M}l3DJOELYw#9r0Y{dgLNo2QY zSDFA4sa5NKT-_lT-g%dNhwsdt6f8NN3>x@{YqMv-#ux-604mx6ud+f(U+RE&i3@w|3Q?$d4km=?rBakcyuatBLX9CbMsNP5T zlvO2$1lQE9uLiGy>YV5jA5@y+`gvic9Ny$@zwfDpf!X2#TVx%y6nqcX=--GirrDKnlQvW$B9%J9uB~m8>o_3Dj&N%4KJD8;dx}?XruRO$Ni70s zqI8x3x^eh0zA}QB=r1?g|hQj6(b>KW;KJaR~g#UQiD7P)Mg!9dPK0y*Z zx!J0%rcHMh22jKZY|;LtF#Z=X8^9HbE`oO8Dl`87j_=|P8{wxr0!-DFmHX++FJ2I$ zrPr4~2{k0onO0)IdW}CS>v9r5T5O2sKE9>JctHA~+Q)%$gXvx3&guNGdd~ybW!E=g z_%detBrH?uOP9kaS?7)$)Aya;e>~UQ$1QO@lRJ^SH6<;=c0j67<8$XV>L9AHMh_ff z1W|$gTh~`n>$uA2xN44j99$pX|2b3dDQ;$XrQqvW?a6q74{v4DS;Nm$pOmz;v*RG% zXvATZnzIKVQ_g%HyI9K!hRlCnU{G~zuT}CkcZ`ADqYav04t+*HJ}ax93kPocygvPW zurcjPW^#f0WwzB_7X96HoggJ(tBJ3}ax>ObL5HIx$cM~mu9FU?M&Y44wuVpR4&c{-R1hD{63b!LH9frJqu8tO?pB+XE55akU z&+e^`-jx1=#+=u^c^{`ZKexNPdrouH^3A!?M^H6+N}77{40|`1|GyCS8$2K&3ogn0 zay1umHBY!rerRQDyWW@z$T^D7+wrW%{Rp-{1@RR6*Q={8X15`Jdz^|KbS^QH* z`;KwkUsP=^Z+bmO$RJBSCqk_7oR)Nh#xil2*1x>koX;w=gSQZn)?fj1Ax5Mo3FX#BC|GKlqo zS2rv6Xf_ZQD6$E?Y6ShOls!ZXUlt0XeEwGjeF)^AW!|JDA}OiQc}xq*75*D3?{s(J z%EfKfB|8$E?oY5SLyX4%p2nW(0=g{Q&sQ(n>}lg7JDtdGw_VFEd3;=pR-xQxkyXEB(cEO-f{lOC=WngZZG* z^6S?xo$*mn=>KnCP2glIw`mvj~!joL;oBmvabc3@OjCfwd ze78!l@`tEQ1Jd-h00^WwZ(_)g?-LU`8xd^*SX1V6rw0xs|KH6-cmCav^S`Gt^FL7V z|F=dNfJ}liQ|3Si3#_i^ zCmb#4-=xy;SE2BgzZrQU${-NMzqcus+`x+BF0gy?_uqoCNe^H2uiKPnSuS=6`kH~; z9E|+|WEx`hqhJ+xgbJdC-6-gEzx#4{2m|(V0(P;Ot?%xZuUSJf6h=ly9PPOnoxxYk t&qoNsNdGfq|FH!B-757@Z$Q5e&q?q86r|5~a{>246y=p470DX;|6g%Z0sjC1 literal 0 HcmV?d00001 diff --git a/docs/src/figures/hdia.png b/docs/src/figures/hdia.png new file mode 100644 index 0000000000000000000000000000000000000000..08bfb5ffe8515da22a16750d333eaacd6a716570 GIT binary patch literal 59145 zcmdSBbyO7E7X~^Y5=u%+h~gEbLplY-pt}(ylp2s2kQhP)l+;T%gACm@=R0p?lW6wH+MHHSG%|FcLfFb1cdkmJIqHufsaC7 zJdxLZm%1?{=ls`HJ?paGGp4t1?>lic-sN|1`%=kdxB$_&mbfd2Pekax^aRoqK;1!- zB;oS-l!=s@>2KzDiT!N?Umjgk^w)qk^3l;KKV{8Dj1qA-ZGeyX19Q)ze|~_@_SK;(GSd80*4%bAF_? zkw=pD>7Ubo=Y*hJc#u_vBWSO53`H6_TR)*(h^)2IAuoI1oVN1zB>p3-IGYKj4+?yY z_zLUqR5@qHu3uxb*m>~&VKpKEG1<*`>6DH!nv1UEc5&+4v|x&aT9`;VvcgXhP>tQh zhHn)c?ClCErjNE%@Um(~c2Ci7nPcGaMUh6v<fh-c@a0hh8W9x!6RgE zac**-yi)D5drYR9$%Axa4%3ne(R8*aJ7wRkO{@C(SuUM^(y?0NzZDwIk<<^X<5zQ% zjlJBIxBGiPTQRPnA$AC6UaFBVq&Rd$HpzB6pgI zIhlckj=X5>&M;=LOSfFOa+(Qv_TdX(gZ}vn__ndP8r#0DEO?>ECKlmE7Vg zdK6-1*kRLg#BVwD#@+J$nOpyp(@jbFF9pOw)1_LydFzYKU7`k1=+~pTQ|<`KyTAUz zXAs!CcH*T`UG&nDMa&Zy*BhvFkgM4gQN4p62SuY+22nwO_@Sqs&Q zyk*|F4E%P}v3NL$rEp*#Gg~!oS(AZR^v1_f`@#FeHeX8`sPnrUZ9fd+yyF(bqPHwH#lG_>cZ?n1dBeMV*fS^G7W z{+2mAO-s@qbdKU^ihSYS(bcliUna_{9ir4LBL0<)FohWNu{`U_Bh(k0iJIiz*je*J z_@`EME^Uiz8s=m({qmPtI)>Hf)WmGE`=sQD(f2w3e(d>}$dy{!PiAZw_Jn4vF_OK; zq(qHXv{+drZmb3|gnqi+K|b}P=)(8@M_n@WV%l3loH|JwHqEeTiRwA-yn&kE>xGi6 zxTQXpv*#uY=95n_GjLhmJ3cG;YdG63_}ySMGTxM%~I$O95{%+?rTY2 zvW=PvhpCUfd-biF+ohwj?!>$MjJ09*LUvtqjRO@MnYlY{nV9>7*w8#uttu=Z?du$m z0ks$hCIj~H#dqKXjlUUT#=1Kbts^hz+M(ucO|pNy{7i0$(|DBgQb$Df`7&Lx#GU9a z7ha~B=`X7k0Dut?L2`Xvy3R#WzEU)tsBhP}Tt0Z60|Mdc*iQB<+Y}{+k1zhxp0Z`K zZjOk$nh&`_s%Ls67}zq1qAE2xkRFr}aBm1-Guurp#CNBs;+i_#CiUA4qdiVjTTNWN zK)b6=d26!G>NM$>Bn_>b>{Ye3OoUF+j?!7gD`%vrUF5Dy56U?Xi}hm9Ik^goPE#A# z7TqeWKg+77W1N3-bjFtAMKa$q_TFXHIF?`R{UsA!-@6#N*rWS8QIGyuTTu8(!?lJ+a0ujUUt>_NE8nurk2))4U{zQwEs>HnHk%DxQsFSSUXVLIr zH%k$J5rN9B&fj^CX!fnAw>(benTIV*-nTcyiU!6^T1ypM4~6>DEwBR}=sD-OBz$+mfnFg{zu`Kiu_&Sntcs4?ET;S|fqsfV^0sL;#GwA2(tbpD*d zjQ}VK88p{!uhE_~?NE>V&oIh_tmr=bi#7JnZjWo8l=6kH93DVYHb`}!ENZC1X+m`I zHMhB%rgQ1!BT{0A#)jURnIVUJnSM$%&For+2Z@Ki`Kd0e zWVqo#AmUM;=NdI=hdCT)5APCzN5;r?Svk2;n4O=6l5KmjM)F^ah(@DB*hVzBh>Ma* zWf>~Gm8YeP>R-K^w1Fi_me;r*h%l)UmD4bAu?|~bQ$#=y`d*@3Ka%9@r40qeznIKV zbg;GM4=hRc5K3O}pqqR-w;&0Z`}@E6G_-cX{g6P z2%3}Xl35-Lx%3t!^hn2e>H8vMXS);v8$v#vuze5=?Mi??bx{Yl`2%|nS6&^H;lCvw z>2@K~U*T*NFQQj%HpQGVoW6lnTlTvA3^JQ>!n2DIH}WpFOLLqJPkB2_=g{`(I#OIZ zm=;cMErzd8H%Jf8VZ557yVfB^^0u~aOX{sA?(+t)Ji^ueC$;&T&f6jaxO_NRK-`dS{>7pP^Vs9^B= z0F9yx`ct2;k670__|Z`9l)~U9@MfTg5F#Mu60OUkBYpdwytCjhQ!x~bHD+aO)C^+D zyef--t)&}o>oDife)MTBayJzSM286XWZCYSUAS{4jofd(MJg3thn%vdfF4C~2;SM@ z?LPMr?7>uVD^~YP53*vV<>uyPv66$>G+xxFF1f;GhW@d-x=3@#KnNPW+{Y5oK+gR_ zgK%x9>Uv0e*4p5#645K&YpZUgq`@eUw{nj|i;HJwHde(~dfKAiznm+jvYHC=3(XM- ze963+&29YnTHMK@!;P^4dY$&egtH=7KG}}8>;-# zI4Rp*mVDVgBRM7k3Ary^75JPqLF&{q?h0Blw(s+u&K`ECZrww-S5;L-0L`u zxT~lFQ?PmC!-vNPM)3ztnYK}b`SkbgW5yh&DYhFEXaujWCA1zU|Zqphs z_#0fm$9HIYr>eMqz(iGul$||2oP61$@NU*4172#WlU7aJlyl~6md^D_4>!T;ricWK zQ3VH(BxrdP?#K#!5K}Ty-(mAmh>D@vQ@t;nXQxg3dMh zjZ}*T7_v5m#DP@LJM}^jQK#E(9A*ll31FQPQL;Ulkp!)(-dev@Pf};Ityig;l0SB9 zUkKa^_)Sb!%-Q~6DaupC@h4sLxXQEK2fL;W)+QBm1Pz{r!QPHZhBj(R<3SCW%yX( zzbU3PExdN{y>eQPN9cb_2P9%VzME@kWH3f#-gy=VnsAu zEhhng&8E`uiNKJXvGtg-5@L}a;|sOhye(~Yb(mjRf5j~IJ-D^UO|30W~B z$6I4Kbhm(Fm5(;y6m63+q@}z04aDDqkEz?(?QUzoYQ;ynt(uHJwh!5BC)pJ|n=xA` z%O$j4&zKJlN_@HGUWP$=$X953z!0 zqxPL)aMfZ@x@Frnqb|2&wTmEQ_~Dc_kLIdS#(^Etb+Qt!n|rK#(%ZKz@hz-Bg#6NZ z*>!=k+&gbZVq2os6-|hiIq{joX$l+OurP#?TTBrwOc77YPt9etKUrA`QWFuCTm~|8 z+%K)*ITLCZsyGwq62(&yUHe?z=i8y3m%lZ)udQg|lvyZ`Td-hV{kl zTd_WaiY&NER`E?GXl$NqEJ&zjs`+hDKHq?)ROB`4rajuMFk^(=|4X z$Cm}Pzd0HAx zM;@e-7x&k8j%*d@?4>@PVYhneFgF#e*CXWgtn4vM02v7xjD~u4Davn3lnN?0XY(w# zKl>*`JDU%vOyxm7Nq>t$ncAMntewNhD5>(3p^dFQ=MApzN19@6XYBoIet3m zh7|&MK8eG35rJ4qR8aE~diF`em>;Pap_(`MYTYvvR<{j_rme`wTCkPZghBL$JT@@u zf#5#hvqvGLf$RU5GC!@zCG;e2aa~PWulODD6-35oV8`iWg_CzAX6mBClh{R;C4(`- z<)SC$ij)q8&^}A8GH5U4^;6rtpkqoNjrJ-S;SFg1tQm|kN9d%Q9@(mS084VScnBk% zR9&izcDg0pUs1`^-yE?!p1cnq@Jcr?b&=s|2>bNYDA)nixS#A_$JkLnR_4NPhW7cU zW@06q%QI{}X|4PC6#d(v zW+JD^d$XQN9_9o=?f0XJ1gMriZRX~c{RF6V#?zm!uR3q~+6Wa6m(A6F^%|3vJe&|R zmOUP2O)jPb%W;hn?p*gTSMowz;=9b1Za<8c7{o>!#UsfSJ_8OVrBK-}elb`pPo#gs@PnpQ8=Nk_Rxa7FA{(<4~+eNIV%u z?lGKN@$e8@ETG89ppSYbNwknT=ZF?3j>{6{^oosg{x*dk^;nBUWYo`s-d2DhNLl9} zPpt>2L+1c1Lx+fo1ADtFXd%|+TK!dKnV4J#YG`~SjNG>&p#J(54i8MzNpcQdc=_8t zqUTrkwiQUKB)2$!T@=-E@d_Dws-S;{#l?&1*i%Cw5!!YB7rJJj<1r_S{qJYPK@H$3 zPIrJt3`$|jSJfgeQpvgSd7wE$eqV-!(}(|>vW`%!(mUcsaRNKbOXMz&y1KcFH_(Yf zAReez#2<0Prv!wnxN0lH(UdK#j25+@^jKonH2KjlLNDIkehyLuqj;Il$ZqzwuEN?+ zr}It38xwyipt!Iw(LO8vB>o3Dd|Xjcd)UHh?cb2~-gcLv;l|3GCv(OnP51+PnP&oD za6ojVt-~b8y6KiIpK_k4eLA@(MGvxO_i3{w9h-FQFLBvc-|_S!Gg!+|^{qB7r6s>E zf;Qmn3HsEeYj?TQQrG{qE-zeQW&0}%$@QAoQnO>5pDG0_QN%)gL_VQE16z$eOw?Px z1__vrt(ox(4^U5ItF5$`-&SYGf`l{dx`&yx`+`t_8$i6c6gbwZx=3GC0JFj@!GR`b7$%&!i*)ly%ZEx4+Yfin(IX>-G<< z^x&@xWzHrI&a@sM?>W56US>B(#GX@Eq;5(fs8)LHqu>+!2XHZq6iD8qPn09tU2p!J z@yb3bVY4V4ya8$bJC37tJDI&<@vN|WU!#I&%0}SQ>H}H5xC$qHYIiqcLifk&UCY${ ztfjF#-lk8g+^FBF$2iAOZP?v3V=2mu)lJP^8-%J53Ot$$2@y@UHgi+(c)q0G){&!U zo265^7Ib1AE_5aA6aFHpg!+-v+i%uW;$+Qz?EjRJAS{8ry^(m4)X%h#6b4|R1(@l* z%UO0Y5Bgod={Ij}pw;;(H2tIaNL~8V64N3&IrCAB?3B%eLFvJ5 zF=AO700ue2e5l|T0N}$hLnxCjc*L1{J3;!e7qQP6nP=>y;D?_2#%?R~FSBDJmKleB1A>CyP2KT< z!IdOVZ9=#dLgZ5$Ij9&^K)BXUqJRXDU1;vI(XFA@$}*2!$EE^-DY$gZqvZiB-Ct2k zg-T56O&>PvtJwIfze0nm#0XY;#6bJ%nH)+Gfe{`Pt@|ZvL#59%$U38*pG*r`r(!Jd z)LN}FwK0l!K8w?~PuXu?u`BcEqJ4en6g00X@h3#owZ(G{h6?9;?R~BEZ1Es z=1E~`a~`_H@9{Qn;<@lJ&F^~gHh@!}lVrZS7{8{d586yxBe^V&oxAQ&u7Ot)^( zKKQE|Y*kaud%=#$fiRSh?Lt<`EPak)vW=OGOil2W+=}3{QAD)Ev?DX=JLqgDF2{+( z1?w&@M-Ui)f!`sJj#{1>`X;D&Qp?(e-YY29VyA5ihz7_D#H7hUlpn$mu*OCL^`=^j&5UVUYcsco~(7}lP&9tekT)OMYufCMUD-Zt+q$?+u$Y(w7r z^6-|*HTL}wsq+(8BAKIj@J67^E%#6PA$mrfX#fJ!ZFYRW9 zs?k60X6p~7J$lD+_R^LKzCalERv5UN>DX`%;|e^pKN5N?;pv@N#nKJ zncfn2yC9tgM9SA;;$JW$IQSq^3sYt;CBQ~iWJ_jXZ6vyX0Mhs1@Of_xuZ*X!IM`fz zk(+NDB0NimM3?2Bn*;U&X9mzRm?l^EmgVoc72c)Pc+;Nq4nW&f#txOTbl>B9CQBh$ zh^cB7O_Ge_kMI+`1-YPPJk>f0I(-#NE3zf!JMN?%RaK;8_?M(&xY*ddX7;Gb)MS8I zER;GE>I11{?K)duZ-0E&6~aDy|hKm{h=lJT@GAisAy zeRycnS}26zaS~edBjs}m@$&Ud1&5(`De8*8^Pa2Yj!f#wk=kIAH>)~5B#nX$Hd1*L z_iKNo{aD}O!dQ$2*6~pvRT7hRb?$xFRro02#)bV!giC=kpOvG_rJaEH!>8V@>S_4D zx&KdC^}{i`uazc1*6rl+&ep_aO`k__Plv>M~b~rMU758(T62y`W0Pm_ToXIp(d?u3=`H*4$D-QJ>|2hso!URV;%%17(30HU0W(@j(Z#@t znQ) zE=Z}mNDI2do?E{G_kHA^CDC6IBnUmCr6rQ}I3CR>2z8Popgx?Ca5^*5aF`~pWL**% zwE4grDsU2@X6oF(9ayKU{yrbaGvp170e%Srp?lb!9p|YNXFVs zz(`SI5+5@>^Gk*s#ECMjxI?40+FjoQ4C*SczHS=knvA(`zI1gDxnHeqU%A27U$Jf` z7aFA(A{%vHZJM>FHmkN#I9o`5jnOR;a-r+ZDr>(kKo0B%VCR&!>?u!wMGRa|2^0r^ z7sQ>^ldqLNI6ZE7x1yy50=LD_zHrf5k19jU(c0}7AXm1!U8wGVr&OKS>LR#5cIaR$ z+Mf4^T_i*F@n`MR3d;>yC58|rHPXlISA&UU)cvh>uHs}X|J7M6PHcV8?6iLF|il} zUa=cBkBo22ag36z4g*WKjMK@OEb!PoW1J}*H%9u({QULm2|)MAzR}{IQ0#8N0uIV@{0MiXv3fmpuGd!Fj@6OZ?b(sr?DG>Y&l>wf-Y47qZ;vfUY zPeu11v&u^!(0|nNoy1(3X zO{pbTElP&W)LY3ekG1qZiK^Rb@}k95X%rF)-#OGHaiX_;oa-IAdGY!k+a^i(hGIYF zvP*XM4Hs5T-^`|YHXjtwtpl451I(eqP&wEDha!5s9>)%(NVX0F)e6L ze|zR&IfKvs(@UJ~5l12rU);Go;iRKG?H$cTUl+Hl$=1vr8;D%a)3AaBp&+1wqi_|Q zTT6?{uL7E?2j6Yh>(eAEyj>|-I`N%UB9gh|HrhF1rV&@t{dUuMY$e*OtGc}AaES5` z&s1J=H-l5wV)u3~-dC35%-iQD?Xg|Ta!e8v1lFIJO0ORk=2_s!!Wo!|n!!ZK76Gpl>R zUj+rD)mLU}D;{3WUz=px81e?h)Tt$2$DN?6O8b26B%wDT7Ft=`rdzq2l9Y3zeDDbo zUmYX&u_KwsdI_?74;2)3WOJ{bpkI{~ikX5QYprFELwB?h_OC?JW0#ivL@r!gAJnd^*M%NfX9A?f` zfh(F8^R(wa58tz;xOXbc$MQ96EFbJ9L&Z4_Q!9UZOas;dmtzyut^MeIq5I{84s*<= zaBI$dIIqB!ZtVRa5<9n_-l)dewC+}|!rW2t=~uz*WOokxy8*M=$~P{lONV_s$H zb)fQcP$d^t+nEJH$8R_VrF}PleI0hYnPIEATlzT;gMw@i8Ky-n5!J(hfU3p&N_(F;=3cx|?_t zNvR&z;=^=hVrqVttm~u9Y4+?v5iGhqZ_7>6z3T&=iNVd)lS;SkO_CgT{Lh8>Ob+G^ z%A;*1`lgk$+RCh_eB2o}6B&{(#b%FKT&G8Lqa;cOp>3I=umb$9H#^_M4hVo~7K72kdh5-Q%Unt)xJ~}v{=?hi+WcJx9*X&*tass4=b60d zrhL;r#({ZIeee-v_zN<2QkF(^^rR!ZgJ@X42VH(`2(*|Km0p`{bK9)OvZm#Pr{hC3 z1n;@l*!4J#L9M#xHiafC<7B7w=9FU+6J)A_dJ+xm*L^B%o^4v?vit_^B?woYgJFV~ z6l2l3w%j!)s@_{`$vBp*S)zf3!{h-|6oNDUW zMVYT&kXm?dGPNk3?4%{EI|)@=MJX$o6>7vy_3P|T7~hz5Cs}r)eX>dUoN>q`9n*4n zky&neU>}KTbjiF832@3Pc4h|U2xc=ThI4XRVAz5t29;f6PJbO+0XRcW_YG{rP;SW93M^>gOOFd-QUt{0w!(8R-lJpmB-2TM$=@%^p67q8*Z2f87sXo8nFyv6) zMmFw5AJ-{mV(@QwNW@FU(Nyyzi4q2@On>{6YEY=kIUb9f@ZT*wPqRNpY}^~fVw*32 zu`j!;`y4aD31ZY2Wh*ydzFzN>aPw8SlZRGdtf(0A4T%x=Act&D1Q`Uq0d<_I`oC)!bvadsE8LmoX}K`usJ-=2KG$bFj8qyHVlCI}5FqO~Ihg4jO6P~P#&)t*+IOWQ3M6Db5(iq8Ije)|?=aC3`+~&5iehTE zWHNrkW97wVQu;8v8CT7rHbQ(5+fW+tm&u%a6VGr0Yl*aIO}woa~#$bj@p3KF)(~kF$6S_2FIC>{wjW253iu zc5DlWb#aMtbH&R#QqimqGY{Iu@Qqq&oH23(5hS;6nj4rK9%f>G^0 zf2`tI&s5y)jA3?q7C($o!Rr2!V$XapX8h8@tkPTkrejR;f=feV6unHw8^}}Te|yRx zUjAASXt~-BHT5kEa5IK)#DKTmNduTWt;`R1&ljcrSI|Gm+1~Bdsq9!e z4cgg3T_de(RXO3C$LmV4Dg?ehWa9;(bkIGx{wfG$z))2`CTiMD&D+3#U59HkIYMV1*j-})Qmd!0;q`bk{uVFP?5+*_RJlY>qvX- zN=|p7MjgV(E{7Sf{@j}!05RUvu6fW-5bFB9#lOM^hRi+o=GF_IJT|>1z^9=2&-AV8 zO0+SN9L0d=_Lh!s=6LE9UM8>m_=IlGKLc|@F%{P}$d4kXltIRBEjp9eSMO^7SdF?P z2O3w$NCI`09%ef%CATZ4V=({C6wqQ`Jz7U-A^jobpYb8yw_*-7K%wJs(!}=IoenRn zzVG3Z@=zpt#-2ZGdVMh=}Sj5WwBb zvHqVUWJ>n>;F@Y5hj!c%$F%Z|r<4A>2Cxy`ht?2?k3Q$4l1_2C{C|JA@ZJL@W$e(4 z-8p^Q!3;NY@ae}HIHeqJ=jgsZzlY536xYj*6I#Ul)XyBK{s8Joz15aVyy)PIw?A;( zzY@6#Dyn}U7O(!NPH^b^|L`>ml+8x*HeO5Xjw_3nh9b~~*DF@|{|)%?|E}Wyw_l$N z?E`^S!6}oNjO?`B!>g-w))cSfv$(5E3~0h8bLWXja3HzLK4+b5W!uMxtva1uhqO+Hv2Dn4}2eBP|v3{S_LbfBWYy8{;Apc(eBiT%&v zc7l{bfyMknzqxzNniG3De0+7Kp}abGa6CTHQOb$**FD|#PtiUd4(RC+uZ!OS0{?$}-9ifgM-tHWI{fqZE+|^>BHe%OaD(=Yp4g$`{&nndIz{vsaY<>} z=bUNX{^^|tzwccdFeN2@?O0qe%>e4f#txYq_$|$Kp7{HC!Tp$51n}6^{?kH0T6h3j zi2xD?2)`>?WNiAv`aPsO|6f9K&q8~aM2C=Rrg4|hU4K^Y%|FmwqVK=h?z&UqwYxrd zjPo~5y%IlC`nDs8+~3(dJx8AsLvC^i|I@$@X;ps$`NPu=aR~mvNE7!AT!#s2n=@o!ucf@$}~pzC~yHw(!lS3pec z3ud^yrKie}L>vcKr>ZlQ|U`+YBb1%YXQYoNCCB zgGbwmBbpOgqdTn6#;i}^A13;4NU#>%{HT9eO+96Bz%$z+rc|9l9v1crXdVQ7RnCj` z*daN-tA+k`H@dM5UCARel%&+B9kh+|solxB(-%DHD$(U-#vwzZYexaMiqDqp{dr z4h{bb;>Z#ZzgY8|0NcoJzLnvM?2jYGF^@fla(aS zw7HzoH%qdZNcl9c-OKcr#PJi!eV14gBp#%wl!mYtB)4o3*Evt(`|sJneywXip0@-S zPuOT&FZND-$1vrZ{FA*HazsS~arku2`RwF^;I>Xprjdm7ew$&(GF#C>{?hDaKFwdg zrn^24;a3`@QtxrO1+Nu9s4e?n67)CCC=^*Qoi=ztdXs~R} zho^`2FjFew@B*Deu6gDs2T4;v9qVxBsZ?fQ0#y;)(p_G2D)0F5AA901r})X?*U72Ajn*q0NU0g=udt*cUC*dYP~xMpvL4w;H=VkQGUUI-hhK02TLdO z0){paQA2#QiK$?63evc*7 zW>O&WI=820y61n)fNyy1%k^LL=?eRqav^*3*V09k>aHK_ltn|_R|tef$-jh?c#kG7 z_(xC0;^PmuW=n~KY1#txXk!c&IfaMZ&0l`#d#criWS7>(s7l$jKFol^sD@P0>ZXSpY^y9pL>$cB|cdMK;3KH&lCI)#yF93MdOEd zx`>JjZ7Hvoth94;psXwRfDWc6?Vo}57Nb_ms~12_m?O0)7u6kOTE;W$2$Dn)+KiJ1 z@F0gc=i|};N$#Sb^8C zEw3A!jrP`r$Sx{VX_uPhzq6`dDQj%uyuEnhAK{}1fiOM^M$`*@pVEDQ*G91Pr~BN= z>dMZ$h*803#SaABSvRePF1Cmh;mMfJ8su}M7mRxsr8 z_LkN4LHLJ={t_$NK?aVLi2=QiJ4epkhtbXlKwIF0zUDgLvscE_olnOdyUtQ*3aeAv zeJBJO&h!~$;cM%&0)%@lqUVMn_-3|+DRmuD5hvPH{)L>|y2lFKcu+?Rc#yTFG z=op_SLXlax+e`ezKu~e2mM!4zd>{1E>sVrxs+z}6L)x7tf zjMaYYh5CKf&KoWnGsv~RIS|XP`em54`6UTLZWN-HlN(Y(BszPELi#KRviO*@*=`GP zD=40K0;LKls9g}{*o;~%{KAOqdx-=?;w3+eU7wwML`SW@bk=COwAv#R?2)`Bh|_7_ zPsICgJot*i#Ns1RPdJ_4(>$uFfUzdoD+iFVj`{lEYD0i)E>}j_rvCA6XF*G!l6uU! zi+Uj9Zwe7BH6^8r`Wa&g`!m~*k~lyw*V^-3U`zD-wX-P8?I(O-pb;+jfkK&;}anW;4WK%()0m#es_2t<4%8&x)X$BLQ)&+%8{Ka>gAVskK{y4>J zFnTQ18Yzz^)%Fym^fbV|;OR^6Gi$f$hPkFMnbZ~-tHM{UsLm`{0_1d6cbiMp8quI{^EsJk%H}H3`chO++?C3X3nZ}snJgY= z0B32bZF_(6DG-@s&uaIKR!;xGMB_T`D`${tMhlkJJ4c$tY@UENh@33GowAQN*lYwT zv^QHsud~xV*xD|%@^ls{9rLszCx_l*;A#XhVkF{C>W_Dh*n*rsv{@s$iq6<9y+0h1 zqh|XtR&^O7&=@UPehg6U7#H8gWQi<9CkY2m$#Zha;bV2=3vGE|kQQgdzy=-OqZ$?F z5kiO|=X>(pIA=K?Vam^JG@8Zft~NylK-Ya|r-Fo`C=${*9yaMAN;;Loc1w`Kb=e!a z>~o&)GzHRb`yldgQi5zbXF0tZ?GtW9j$E_&}*h5EoJSD!39F9YK2vvs*ihDg^<~Jb%F9dV6 zS%O+jj!}wpUE+EN)rO`IGw(s%8tmR$f1F42;VV9X{m!o&fa+!REEJN`GNJ@^N%0c@ z`gfYAIqQ*_7s{(Sy6|sr2qOk5P3Gw)3F|OI0YvA|@bDTFjPqgKDQvGb4G@WrNn3?l^cNY8VSU3mX%{g{;4uY#bv40ZzQr|M=?$dhn}Pzd zuZ>-DO417FPb=3PT{Co&=s!H>Ws3|*bGqO^GAmJ2($GAaRxlJnw;g=Yx)CM-Wb zKZ68o=^z9Nhytd?Z+V`rO8doo^zu*Fdj&kHQqjurd35?U#D2RO1oH3S1yJlVd=2A+ zuKgA=iGk@H`6YT=km5)YoU1B><&DW)Hy8w$fx&RN0Pg$slyc)c`u9HHH6xYVe}X3* z4Drm$L~cV`2X(1dOPualaUufNu)bluN#3KNdv*?I#$k_Qm_tEYBumY;@p^UiTYwiO zBhG}aJZ2WHSh8bFfK_1(wTMU^R>IaLzi|F63V5Ea&>cgDowOaDE%o6Q6k77nW zDk|rn10~>HQ_R0-DmuFJQS-NV!S>C?AFbIq5=(%D-R&v+(CSNi3lebHbtQ`2kM{PP z4uAOIf<~;-Q|l?i${Uty?{y$BD0?51G(99*2(1Wtswm7Ulwv#p;ssXD4Y z)r)1l+BsWnIWKGhH&?d-ccXQ(iwn@XIrD?9frx#O>H53BGra(Jid+g7A82%|8+g;u z;OD5?e&lp_R9imZ5Gt^Nl%>VgroRNmG5zm*LDm8kgq0Tevb-&Z;a^9~AP`zI6ZX1L ztLlsbRq*wU+iwVS+7GQs)t+4oD6a(o zgR8CqqWQwy-!+c3v5x1ak%@Yk;)#U@N@v#nxk}>|1y}|y1C>senzmp3(b%*$zxNS_ z;$qUWc=$ap?Ys*9`*DvTE$k=_w8i#PGG^TbF^H!Zcpy7+KNlF51{pFj(4%-e4Im1F zD2;T!wNvR^O+IO(UK#{)u%@7>IF}JQqv3X&j5cN28jTryATC_dicV$VH&X04j;~Y0 z#ZX+({?7OH6piybmC;aeD<&Dm?gJPU6tEi*Gj9lS376>CK)kmQh=Qh?FQ|D<F9o*!2YlUaG7ac7Fx(td625k58lyHX4O&N ze)%#(F8S?;3GAIvJBLZpkzatHMz^ea*!Tcdo@?kRUtz<{Pu9VH`PcO*7GQ@RfUTVi z3?Lwn#c(_pIj5H=Xg7SOk*NFdZ4!kw9_%Z}`*-f}&1 z0UrR6;G&7Ml=pG2w&AxoWCPLi&WSZS#l3fe1Mncfe%Yk@t;RDD<~vo3JeRT~A-(YD zM79{5%%a;674kLiQ<^j4`GWBtqYfX=v@L`jHM-I6!VtqHd!h_lP@ZGK=6eEwslit) zd!&;WwsJu+Y=c9t?-w-IhPDsVU?WYkDmSlhfa@_21om;4zrbqz&9Rj&EVhLe_GR|2 z2tHmP}^uE`ucBS3-%c)*Sn z*c%PbRu7C?6{_mrPRzbnga#tOdi)R1d*sf+)ryweMsID&BNB=buMFal!8Za)DrIi# zWV+#(sb+Y;ivYYTt_;oMpo}Ju#+qG4g6}$x`3Lpdu}Fg&Lza5snkc$TWCB#6m1oaz zGsa9PC>HX%tRw214_0OAzIu|7(q8T2X1hG*zghz)dsl#EFxU0-oHL2jQ`FOea%jYf zmJXrqVXx}zLT?1#*0TGk9wTukK?54{0PZU(+YJw9)M)D=C{!p&+X6z;Yqwp=lGf%~ zc+7NczrB%#JSHNeb^Q>cUl@WP&qzG(g+#zW_D`k9oUBrtBbl>!S~jU+rAgO|r^6=v z0T<*~UI+POLH&CSY;7HgSFL7C9#ldaMLGl<zXwVE~a91qp#cq+97$QV>u{ zX{4kZkyb!NV(1QGq;qJg`OX35d4B8t-oL)@kKe3yuXQ_dUT5rm>|-B$Ul(iDgsE$u z0W<7cUSDqj@VC_FBLUE`Br%qwemA{Za$yAqGqRg5wE-)5=?8Pt49pT;Q~!KDebK>-((myuf2M()QcE58Ck1rrQr6pu%!9qJcS1- zPOf|W?n80)kjd?1gr)0m49@I%{E-Bg!199)ctp46gzMA;MmzW>0gq@vZ*vDL?($h+ zWPQwKGyu-)5S;rSuwfT={6^Q7!xtTX#!5QZ@XN?%PnHiZ+vO5nfaK2uBdj+)(l`HP zu;Zko5#3(B0V|q*CwUPT;DsB+te{!YFR=4VL4enpM$>8k!Gu%a;0R{RxDro^7l=yv zX+OJxL5pT?TRpnAIRfeH+4ZKslengPp|216+T<<;1kOL;-5;Rd7(w%D`5}p)BL&ia zKqO=Gn@J5G;k8{lx_D#zXY(&hKI~gqm0|;H{y<8@jN$(`3fa+Ku^OU4v+~{pxVCIc zz-sZM{>j+g&)%%c3d#loHb&KP_h})%$paIVm$8q`!;RiL4QM9jXbtF$af2mD-~<6S zWU)lRsL5hQ849PQWxq6s1O-6f2d;D*@{J(MW7z2(ThWxK*JTtwVHnRycWdL*3Zkf$ zXT`UFzWU4;e6g?B5ph87Hv)m=uS;fb=WN#Lb2BG@w3OO*cxkNVv;~cQK<&wL2o{r! z-Vd5$M9&%?t0d3+00kuUX9Rk8VMEwvYmfP^CjC@gdt4K2R>Ix^U|(_L-rcKdGH{AV zXUn~$3gGu+Zg&N(T3k@zojIw6VYVla$3j|f5^sL%ABiV~q?$PxK3Y}N-+drwN(^kc z$dW>mrw~1&?lAYfuV7`Dnu>j;YDT@n-95f%Tx(+MlSh5%)uA>^f$>&g3eiY7#Q4Uk zGvR>kJbDlfi7Tkw+vUbd9|L)Y5HAxS)z~A8ABWTsxR_I#ujNi!%j>(!`od3hjuACK zeT3>HIf%%>C76ZDUziR2`jMEMCW!t05`m8uI+Wzvg{|vQ*{@(nAvN8D*|f}wg(;W& zN-eGK=;>aBSWg2xD>Qw8!pM!P&e!AKR^M%$c$F7CZmVZw{-aUd2={ARLj3xKt*dML zRf0p0%WpdG!=lrwhw$N7`~z>r0ii0@g)gnaQp1PxVo%<|2ND8?-vK!qCU_9|=Or29 zP$KMnyh|Odgr$Yorr%X}cc^_QrDET{Mi&CtcTz^)e&zb2mz+xPc|}^arS*`r8djF# zS;hw`*2}}`P#x0wFwDwerTI($2ZZ$o&c}qwz8AD_a3NH({&cxxQAvQ|Joe*PO8co^ z7xsZZUUSps(hJPwnvH!#*ya@^{;_Y7wz-f)e z+Ys_lREHT&I60t(aN(n17XZNq>Xa5CC&j@yzRQ4bv@~5DgI**SWaTRWM3_oN8YZ)> zwsAGRO(Et}_@}49CaB&DKjMgdm+=L-#(n!+uS_adp9$R)p>aa2KTN^K{k{h5KZ}Xe zRP|~cG>xeqbBRXc0U}NcZg;w8miSb7SxwDAzD_9|9vlUhk?O-}Rqyg8)#j(55=nhWI-3aSK&zg8q+J6vVr2TX+YK5?*^4WcA$S`f){r)jVTxKc1U;4 zwTq#*amFeOEvZdP5-#94`As!eVK{X!Ef~M#*y^FcI~_Y1`8C<4gOs6*bX}hW{O=Km znNl$snOE^eMifWv-G$eBY%x8S~&2jA4GyY7hB-b2UxU zK_w6LG*bJ93u+8K_Tmrbx1F5o$HbIR)KOe&_N6X3bj#9$iBG1qxb}RPLZ^rH(}zN07!8kGW)) zX$%L$Jge4}z>dH&j%AJI!DmLInT}U`dz(-q!6`R2P;Sw>O{($u{#^T$S1`Vzo=4KM z@K17_7wksAe6+suGEGLw{?m#>&eu|4YY{{(xRQvQ!sL3ZUY1l5!V#-vY(@gbIw_0&b8g=S$cl6aSwGh{*DF{g z@osBfHa@dnNOwnr0g59MoSWj;G<5d{2f9A3xA(oL`6q>4lgN@JKCnBIua{vHTl-Z} zs|&uffabpgkp(VWO(?JG-uV1wq4AFCK&Pl50;9(e`3bx9q?jDlHvEwuw;RN6nH(1b z7gpP@TcV((1dY6@d!JgoerE?tf~#ABYITF&gXnqt+~^#@VC=x*-tUY!F}`$@MVk;{?)}0C?x*1!Gu^x;C$vbeV8hITC(D4{>$0L(=aMt5<<}M_&u+g(kOZ1lkjyx44Fi`5 zLhfv;jIG?^UAG%;_I=*xm30kdMHhn|jJ|7Hq64kpESQ(SzJ znf^$5PP{n;5%iJH*#C8xmSdw7I|m6Q3c%*Pgham_-?S%49gY)#_vxrbs6~YQQYY+s zgiDGDIWhb5M6fDC(sW`;5NGbVlP3NGOijz{B7_h_O+f72&oR;~0;)(0xYo|_D~Yqx<0U-h-RDrr^2} zV29UP;vN&>f!jV$i?S_W7zaGKatZV^qf{-uk76`vCY*>;?bwNW+AwPa|G6pV*m9JV zRXx}TiELu4f8%^$*8q5oP7+A!ASMs!7bb9`WkJ1g9*oa;DG*K(kFz)dy8t1~fAk=O zTi5~6RZLj{!0;=tV6H6NkCw_0;R}*$Z1=7`jVp~)I-t_i8o14l@T6{vF^P8Wb>w+Y zxhe}c4-np4fBa_oiM;EADPeTmNBqs9xG8IV!XyGhETz<5QY$x5$_s~X%fZ~IeyI(7xTf_^~0>*=qoTE*4dIG;d1w}<1B{q-<``>B)N!x48(ia{0}!t5k= zm0VR(jGXWeTepTthP{>5@&sm7F;Q{vH_jR^3_xMUJpE|xaI*f<%cat+=UO8>pawnh z)SU)spgwPMp9u*_ot#uNg4!lAYV^C7g%HtD(KparKm0-#3);BXLl>{InQTfbab zqkoH6x-U>-epFcV>)(=LiXekbEs9HD!U|b^^Ee0?*Yu>t+>JJUw7SWTeAo@^`5Cc4{}<49B>RT3LF3!0meT`(V8`xJ*D#Tia_Es&Dcp0|{9 ziv4)(8L|W;A-ge>n_+ z`Jj*8xfWXaF^Q1F0KL4e*YYV+c;1YjJ<8@I-W5rFe60Sff2RRw`#6IzwpMf0#mA+y z);9Ug#2zRv3z5x?dmZ{hn-VHEQJf?mUH>SWx&)7OA}h+4Mcnzp7s-6u>mF~Go6P){0+%?qeHN9Ugw7{I!qOin5s2jNyYf*taSo8D%x&KZcQOT7a|Gcbm{fP9FGSxqLKM zio|cF>t=>n1T`*psFka38&8ft3x8W{Hm%fZ5pH>gLEgLT=TIn2H*4g z+l&f+?W+Bs&Nbfu>SE*NjW}euiy#&o{;tM6lOn)yn8G}>E;cNfXH|qcK{Jr~Onh}l zhz;ZXodK`CA2Cr_EArUwGuVjQjsAKV9G0Lz1MfvZ)epI;G&LbzJ0kSdcB;&@TA}oD5TKwM}ae zM`&4T$taz8c(OBa24AJ&ZzB^3?3*9fmCmQTK{)M5D3YC^$!1E zXN3RlAZG?i6_0YQ?c1Z$wgu(`WtR7f@SB~}{^lw^84-M3-}8HtblHC=#&x(4GwMd$*UIkXq>(v(O&lSdH%2}iH)2r z0~z`gDo0AKHDqh?b1pZ+gG$ha*U2Exdn`COMNo1F9U9D@M8eFhnnVeyW`Q=tgsZ0` zq)x9gs8IdEQAlNbeG*B7+T9j&(~^~?%Wj)N=5q6h((`ubkzfBHX*xdWJm%-7^+Y+G zT2r6FMMz`rofhjzJ<@IZ9vLb{Sz^cob>cS7nM8T11+oVCNWjmsnJ*@1P0@pf@!M0$ z@uX{`w(er!eNGjMgj<2vqDTE%ncf$K3pJ3DAy{Uy#vQ5s>I`@`~`R%<4EKzj_)Z^As|4mFDR?PNh>PF3#%B*!U{Z zBooFJxhSc6^~8+K!8vv?2#Qtb)}Co2_%QTG;dKaS1UbcM zjY(N|OTWfb%D#qHH?1Q(u<0s3&oyJk9x_hFStg9sxA0kBVQ~KtP8}m!n_YhJ!@!cV zx@UIQq}J__m7AyGb-Nu8a-zn?XMfmd+stjN554|ST}h2a@{72DKQc3;E4Ny_ zetk%(HNJ1#IZj5l<})6>WD2xbyKBidD<+}F!C<3v$Zlp;$PT&u*=~~H^29*D+q5vV z0XXJCBBja8(HA;^lEAFYqvU+lxEy`jsoh=D-)ryKlue)gw!4n)*zSF$#D`mx@yff1 z{q0q-t1($4hW&BAd7Jsd)l6{Lci((lL-BVrHxTXe3mbm~n6lcQYw=LH(dkhx&bvA4 z_aVmYCcm#w9tlo*9-A5=<3xE@vVoPk+0qU%j%MaodzKVSW97dE5yE$;S372fnK{w^ z)Lpf-ScWmEC>lOr?4M;%fv*k;T?#aiZ|=+G zW-Py)pWBWs?-)?E6tXNMBP;6lq~y;?LFDPYOk1x%6!dmOh3?O`@KDfDSt`y490$9K zUJmWt5C?;F5kNpUow+6b@@qneC0o>|@xVrvWo9dv+whT-2uFFDJ@}fI(;v5Ll-Hmzu?GBo;Pwj<^TT16 zDKA>XiNtCwu(YBd_)jEe9jo0yrdNLO&)d&hwu`#X(cj&#b z)b_^2le;n%@t{I4La!A8=(R|46{dMG*)%HOSSQ&U*xE^V6Nlh9`& zbt47iZx2-{)OqZ2dvA^T1fw@*eS)Xz{@6%ev~~zpUo^#!G8lS;T|DR?xBws#_hFku z`}@;X=YJCCsO1F^NLcD#@j^;*>dc7K@IyKm-rlSbdu3`YUeJQOxZjW zy5Fv|nt{~o(~|Y%+b*7V;?jtXjYYK=d6PAqbQWQ3|Cn$C=V4lJ<%=PCS-Q+)D!9FW zV4R+N8gE_Ya@t?4(ozGke47_)wTWJ2e}XPp+WeT?Zk8jR(fRHdX$KEB*RHAOzML+z z<)WsOo3jdSr@Uo4^)q|(T(>pF(0+`O@iZLBJuxuY<*yP`JVfSuJy)~Gqhd~Ds^+cZ z9I?=hEjz(GhU+Q2yJydMMv`Wo@^d zthnB6f}CC4b#HXhG6cE8qjbZ%vzA6NA)(fyS zWVplskvQXcYw^v^#qdN*UnVk|nMqSlCFe=x+fy*EN_qTHC|Ff_R3_Ym=Al_{FD&Jr z>=w?`z$0{~Ka@FkX#e^0F{jw5Pb>La8O_L`;(=t5zZtjpj>*wlr`;j- zqo7{zYJL`B$M?H0d@DfVu}*n_OjSGP6o=s0V3#UQstq!VJ9R%TZ89?_S3rsNKXcOZ z&c@H?vNAPQ$`?gPjhc_r1c7Jp_f&F@jVeq{MVnODAM89v%`FQZ6Ix>Ro{SF43jLT} z@HM(ZGgH5twS>p@7yMss=DSV`^3jnUE_d_xN6Dz$43)V6s9rEqD7G9OH5=sSg}$Q+ zGN|-ixQ+;^ES~+TG&K~BoCb&B%taFzUwE1La?}Ms0ANX(lmsz1PtR7{$YL?N@^Vk? z@;GP8H+7q-smbawBuKu_5H>9!qR#~O_3CUtplz^RNTv(UMN*?8Z9{=xhgF|0p^bTwlS%SK`DRK%Qh zP^tdiLx;Fg*TIRhVDrhl4LwzD-t;R1vB`&PHFn6E$A-H?A93IBA}d7@eoh2ISjwQ$ ztngn5oY6>EE#vzw&ed~={E)22`rb${J61uV|9$2eVeE}T1Kn>+I_+1qJaQ^*W;553 zb;zk2m-U%;Loe#}$+~4bILWP5L}#YR5FZ`R2CfWjJpNzkmmj;dU=&Zq=)YZ8iOrx) z+!WPc+Dq8id2pwDEnk=(_ zn=0X^wIbi_jGQie$;r2!oXf4Ir+ZUyE5BHo`lv@*slB`X<)pUvaIfID=fGH?X$9f& zoJ+%^ow~&RL#G$Sj)R+?5Dw4;HYRFWd%c{K8nc&;mX-VyyDoYPEksm$xTZ)l{G0rF{A{ro!m8j5t-w6ip|Q-(tWS!Ana=kMz61)D)Vquv~E)N}-LP zqiOOPweiL6V=%RfQl5K9)txn;{nd?_n^ua4Zv1qWd0_K|z~(iVf?fjfvbwI~D99#e zXc?0-!d~(&e&6WR=q_5CTWK(AG%%jXriPf6jS|YW|E^Z)X&q+--F9ak%V0#)UfQZ2xJ+RxdW@*OiQaoc^_+;e@go! zZdYWUPJWjojmflxa|FMw>?uDYL{p#R^e+Nv7el{(8E~!oQcnDsEl6lH-~418jyHPY zML6|CjqZ(~S4<>`=IaYNRd&}VYEI>jQ%Zw!%t(#Jo!ad8An?k{xwi!j-?6+b#-;>_ItRs*3l)EC)H&22&n=(GmmUF} zueb@7mGv<0bd#mq6~1BHeP#2MzyO%cbULN1IC-n1skHKDaSr?iZAO6^ZXLpEO3Ag9gG`utQRQ>I zq~zKXx+Bu@Rv_W|k>nFQuDQO+u&Eshf0Lmj2~t+J7*rHL|-Us}q<{LebO-@cs|F!kq+t+x1N;|U`F@tbE(Py|6(_Gfe2`6N>M0PzcnPYi>tscn&_17s1MBQ1Qoz=&!&W#6%~|- z=~)K$lX}=mt8ptheq?2{9Z9Nf6Cec4bew9eu?;4twTgEyQS#lqe?VRKu~#&210O z<=$g?n*C>HJ$#pW#wq#QV*|p|9pFfF^RiFRcF2EU@cqzCBJAS9^&2iOozaXj$7cJJ z`;z;25zP{uhXBU_5EjpA{QP4fOV?HM;r?>Bbn-RsTbOA#T3V=D3tr@VGEhq_iU|8q zY#Gla()z~gw@wcsVGI5xqM|oy_6zdgG8%QS(7eJjFDLGd^qf?hX7agy`tzl#E3O~- z+Bz8pie6eq!4=9QCsf8zjw2D&kH4F}E~ORlA&EH_!Ku3TuIFX$((`R~Lk>g;s`QP? z0!X_$%VxHMg+qcTJZw!lxE?#ly;3+G^tqK)+emM6pRIiAPecgQmtIw1O7fbqGFMhd zZOaXIDdIVh9-F^CFx3C}ji~@M#cUoM&Sg&X>gD|c7=n3bq_zO&rd2yAdV1%fQjyK3 zdrgo-en=%PcdP^iqHjq66E^K|&ir6Ka#72|b3F{zhGaV?GH(Nh>udl-Y>d|uKGKYB zB|`xqL7eSb5bTs4iAJ#oC8SYe`HsX#Hc@Ko;}h}Su-U>1r{>zsA2t^;?ciVEd^<@y ze{TwXi$UU!lQ?Ybq=6m5-&jHS$EdNK7;|=wa44IQkUpY@9UBraMR~P}P!XRdS&+fj zN;mCzzV~Q^D~|(tbP~k4!~cGE$TL|jwoV2ASEYX- z-#TeF3%QjkZMk&(g>SZ+*1d)i3NhNF>q0anML&TFu%t0vmsvh7IOJE9*ABWNjj(+4 z3Xha~RbqLeF}G@x<`BPKHEGOyg{M4{0|Iq*;t92_WMGXRqPFy-%l77{C*v7EbW{nJ zxnjZDo;w14_B2lVEK1OjSg|F%H)~m=fp*slP2P)OXt_7_Syl)3qCrZjfe~T_x(2JB z<~o`CbPcBYiF2qvIsHRSzM0(=zHXCE!Z8}j*KDLhH3=YhW8)9eSvP-9mi<(URCyx8 zV@`XJUfv)yU!}DQ3<>Z_-iMm8L1ZY$F3P)OXZVroWAVClPURI4Da-{;2sOt9lvQAa zI%6m6y6E+$8+u%6?a@}L_m!gRhHmY!F?w+dZ{O=8XdDG~fMy6wt+ zE?B2vPT95xR9hq8xgv-}&4pQ%p95f4 z3=Hh8nzB+j*zjPqqzw_abjx4e!-8jCCc^rhbp~BpdX`-l3BwGVVcs5#)wAWXd6gZc zoHClR*<3J-WE+dhO8Yk?Vw^_|Tq zFE6+K*fdYplGaktEWggt*qHywz61vgvY=r#rf#`N6GhHCVkz#aJ22~@23wNV6JBYH zWJ7^n8=p#bf4{q2`Go`HZD4+TrM`n{UymJ5(MZBl&)q6mXrI2roVt2lKusG64j|cy z>crQ|FR=P{W4SbHI(i+6%jk0@f`Q0Z)U7DfWWnS>0-`PEC-(E=gkk|qjEoNvnzkiLw#QhxkGW~l?LaedV|BX|hHmf-0`$_iB%&c+sXlq26!+UzKPF#s zs?{UxKBMP`@X0oA%8t+{+?d^YC*nY?Crr>w9R<1VWx3obU4K4WurPxNg;G582Jzs^ zOs(YXS(iaw-H_GPS=ga&;ICYul9+X1(E&VoQfRgMTgRdM%AGd?PfC}5?rrt#Nrk9M z{V7lB5l<~={ETfz(sx2q90T^aE2E-2rE5gAJn!t_D0nIqp|Nx!P9#_v>#LgWL8=cg zUhQ5ho;^VS4*XS7M=gwfTW`#45HR=p6ppWHYLX|?M{V~Q_xL~IzTKUA%+;=$Y(aft zFsYlg`}SYY7X@k$ACk2(Qk)~ceIvitu@3dgg6pxcbw3hLK^e^ z#i*ad7MHQ|tvr6@1)Epi(wA-KzuIIE_7GuuX$63QIl)kVzT=S<0W7Ity7ZaKq0N?< z)WJ$C>ML%`0i4{F2qsAI&L<~gNIES(gEm70_C&Rpm5Wn&_ zMq_l%zjWJ7u@xC5+{tNu4w`_QsvS}f(`D&qjP4@J$_+0Pf@<_es>X7+s%#<%Id$8( z_w=H9F}Q25wsy;Q zAM-6{Yz|yDuc-p*c#J`L?k<|47J4j|MW(u$onm&)e5G~5ScHqUdPR%3Dv4E2Kn?aZ zPSS&|(NQ7r0Fm=2Sk*VD+Zk=>$7Bvr8Rh6ghw#Nje!o z0u{{h7UT|Ke*+R{8dUnI+~$=qvop&~s0qOZWfgp%ci)E{fwJD~bUzhlaX%rQ~{x>m~F9xh-wG8_+4&j9hu~hAMh8 zrrJT6s$cMEE0u4wAJh)-<1-#a*}Vm{MGfJwT(Yeuc_4hU(&|Hz;=O6Ht`q|h1?ZnV z%ro%sZqlLTzQ8UC|2oohMWu=kUH4nPvMV>lw|`0d=G+I;S>(5OGHPlnCrvZ@L-LW< zSvT8N84h&4SvVc_mwSvfV-cL11Kgd9;x%ig8>hgK&bV`x0cf>ih>%D}gSd*QEQ;H{ z&%PW?`%34X@zTYX11~E@wgOntVn7|`4#sqNgB-(F<&v+NEmrG~-B^*wzd)CX69P&M zYX=8UETN++N|#p+u9AoJGoyN+Kien<>qC@1WuxawM(j&Wga@yRb8xe^zLA5z1SUQr zZ2DwgoJ;ej!gB&m{k3@=2T_j1t%aWS{ohE&W;RCjb)ZkPayQhy5i)Fe4V2=_trC!f zChX?uB~f-TVjY3UO%*@t4J~gY=zJ_xTUFC#aNvgw5DUi46z*+tIW{&Y$5@N>dT++6 zEk0RN=G{gI-NCu8-VF3^zyF3!RSu~;)TsbH`^fcEr~JY27WsA2Bq7H*vzEbr)YD}O z^lx!`9?pkO_in>?+$`f0uCa$j)T1AGk~6ksr+Cs}6xH2?B4p>7kON3Qi;BdqT~A7+ zwdIe6UM2u!6uWVh8q_K4TZ?<;(|ep{6QK-~)Df*3KQ0Q=gvB>#2O&|z6Pzwl2HW%# z(=v^mL2Im-G%nR{*-=77zotgbc>lMDgi;tuC+&$~_M~9%vkm?Hql^u6sHlbKR5lXj zn%&A&@=Ob*{r;N|u1oEtl!$y{HX2>TJcFQ3;n!id;-KfZy?P~n^5kb_7bo&bKbK7A zW1z-VT3cJ_l@B6Z{p8ItqW?ny2rncN9RS>A&z(q*R*YtW^h<#*vIBGuU`+j{T$VL>u zoN{7%pl#OXwo{U!nu1;V<^^hNM8kuRgZIIE^e>sTy-$VwFOh|R)<2hrasABDT+op{ zD$Z^9dYRUOkwptmHk!7#ygzuKkWA-07?4t3sCt>j1i$&0ki8WM>Ai zd2kgiGGhmHG0zP9OXU}mZ;HJMZYm$2-sVf^n2$7;uNL`$8x`Kuy0|JQ%43~5?^@Qx zO3&ZbH}-)xD7L-d%}6SVva(mi=*5yq zJ@+&B+1j_sX3|W)BA|bS0Ya>8i7F6wSAVRo@dQK}MAY;wE z0Y(kzzq0MvB^Fu9f=Mks=!yRo`(r?RQn4U_gQ2tN0Inni`0`-*nMr|vR$*dq%o)ak ze>Pf!Y(e)g%LM;^3>czSQIDvjVI-Kio0hesusC=d^K!KZUxh1!#J;oS!^Ho=_rRwl z;Q4J-QGjOvrb9y(^upQ{5VkYu2A{$;!MJ_^!RyZ_5{UT$=NUf&-yn0*wTBS1e>xe7M_!%P9 z#DSZ4Il1k9;vC6Y8;D5>A$|*b(3{be%Po=pcUbW% z>7c;xNK{wt7*&}37@0$_tE-(jgA19$Q#r6m2V|O$1VhB02Msa3B2&sSNX}ShZUy+R z8S^kZZ{@qbi@AI1<6s52{0m_ByqqRPqhh2mZ^GNa0HD5KH7|v>7&8W2*Y%*p)O*-d zYNC}j_`t!(pk$Tfv`|kO z%FGO0T23?8?92*%!@=N@!MwK#Ao=AeS56DOW()uoF9D+r4+b@N*YHkjx#neY4r0;s zvn&Vq#Vvp*00Y~X3=IYqX};F1U7-FYnYCyIHT_%29&k%Xg)_GV%gDQHj+bhDZZX=< zGB60i$MbVAxe2{geD^;e%Y%G07G*+4s>Mq$V$8ro!0kGmjc=&{(iH=S+KR?*huy$A2mEB z+Z_8M{PoUeU+{iU|6v2@zS$FIh>0-1+;gV^6B0g4gargmi{Oe1(L}U*MPR{;do_$b za-rT@4nt5U&M>J+IT7EWi(1p=ho)orkBhR-DfJlTDy4Wvb8K6@qEU z0b*2lI=~7jw$GYXuW0$uboXEL(l!R$ApsZ&0DIsb5y7EA&S7CD2DNP|VBr+whaO}o z-G5haJOHq%eHDbjtl6J)7WAY2{{Xf41YCG9_}@4XAj;#xiO!Mkmnhg+tG{U~m;?9| z1z7oC3iX2iW9<^)VWm{QATazdhW_hi*mtDElriKN7NBor7)ukI&mw+E`uB-*zSlrx z;Z+BfsjQ9e?{o>ELkBocvXe0M@8ME(Y(9fr1R9BhtS19mD?lv{W&;pAyZ>|iyecrr zx$a6u^*{bqo81KF2S#-{A64|D1)lF7*sW|b2hmnj)&kgn0*`Mv=&*xlkspjClsPRx znGMdz)@A_v@dW6Ho#jtPcIgtn1Y=;1TNJLBh6je$?Xt41B+Ul>e=6(40e}ZV> z&(yj}KoE34zj&$?D6qAgLl2t50Sw$b4#&|pIiCpwi0hvP>B%iw#4nAC# zSQLzle);oEJ!L)S<w+nyB?0_^+W@BUA%5lLNEQJS7-{?#d2M3Xw~0J~tuG|SlV6O2DJ_Opf;7gpp>-OJT%5(Q z2lVg-MN7QV4`oi~r>H%ACYy6A;s>hP;HNg1+Xnn{z?FFr{aYS*I)c9?gbxHy#W=Jhs)g~bO|G2!8{-oubw+QhHLst z=PYf4hNo=$a^kvkkI82T9)olLpEvV914|@~`R5JrPndzl|ItcJ_X^hCy#m<1e}?62 z!U}G*e~1ZTz*@FI&zD(?^SNCuV4m6q1)5yUE7(>!v5kB)AQ9)qFkchQP5N9wYEb~9 zC{U(X&p|5&)bRK2(1d*lRBL=s7(xMLO|bYgbZV>yO)jeOMrmz_7i1enDmZ5os9 ztl-z{Uzy+&=R*x*?f@A7dOX|gEbAJco%^=jw&-1aUYleIWdPW?JE_(@)kZu zgIU+KmK4)@-#HHVdio#BwB3xvuEnXMifBDK0a_H5FxdMzAoGJ}!9;NRH3^}zXi#nI z+ce{howvCEMeZOE+b(5bt1MV}(Swwe{_s*4WfM|_kpUs1g66DqT7+`dfJCOzR|h;i zR}@IdfB}KJkdEw=j*^@@aSmXeB?ITYdotYu4<{<-AWf;^e1g2 zDGRri6uqptzy|w#s>HcJ-z1Aq#OGa;6duMFT}1%zZEq7xN4bhmbyN9+rsh0)6LE$J z3Mm@jfwy3H@JUXAF4*+}rs4@hhWCb|L6^!3SRCvGE{)^;-LgS4eXHetY1rEq+2s;Syd4*y;j8M%3(j*EpAUr({S6KxueSn&n zo-aOQaj*&IksAS4<8oX{tHFb!mk ze|16d~e12k`ikkKwXjras6JiD*o|GCQe#SJTT>Z&7tKg6iCmmw~rGT z<(m;%ljsCc(nS`1T>|yTqg%&lP->l*hzL44mXkgD(|@SX7^>}ketb5S*(d0A<{3M& z$rGK{F+kYi*nc^UFKCi^LodLMKQ$mHOP58#qJ^=t@ZiXtAm{JM65hr}Oump~{B;qu zPA|GCY+FNV=rimsWM$v9A|Euc%%q7J5=P8>x{A-2Q-{g-QQDAz)ANpKnx^4uXaWw< zzEbxEw>=W;F33E^iUJ}p~w%h)v5-C`57}y}bi|L)QF<35)W&IB9Me43#>?(Ei!w z<~@8u04ng;U~C35HGrgX$OzxCVAtpi3gC=u?L08)zI_ta~YGYJAMrIY~ za6U)TVjRqc?}JVO5dHcd!QB2w$_@_zY^Cx2IixsZo))X+`wtFNl>j;x%baTg`T(+h zuk|@Bcfb4h6)N!bB8c{b)5Pfi4H9EO8wn)8_$)pCpO^@z!c4IROtI&|a(D9is5og% zYtNuGVPCnRjRs#ie4xp^(7()Lj z7_tD0g#W=%j5LtH^t=Jdr0S7u)}mvnh;e@{^?1mrst~V@YMuU`9mv6ZpD#xy#Dr6N zUAzhjjr}HF^KjlT%rUuYZA$rM51K>^M6VKFto#_q7l(uWR!s#7wvKjgHm>iy(b%2q zm}fcTShwf~g@gQWCkS&_P<|kkTKqEpYL~-=QLiB12ZhP@vKeu%hBa0e-)k7O5@lQI zvb(70HZGvzbADMm^&qs2-jGbBI^s$$yu z93T&U_kGVAYYbB=6z!gV)~zDQ@|4G`GZYBJVR%AxSezfq31`x&}a$t-EK4nRe_&Rn0km6dTg92`7PhGJ)~$GjI13t4!Od<~Q-6J?v1$+w?8gMYH%@ah~| zbl<6HPsW-6!aZmMO9x^XGcg4nm1VqtyCJ9<0BIh(sf@~O z5bwmO2Dcd_0jEjKi3O^i?@6Y8mjV9jIQyq9Obg!UDKE;{*eO}r=Go7&sF7@1KAL0H zWIuYY;kXDXb=-;;KAD(8dmSx3ZU|{WANo+Dcc+fV_6Lf*o%-h3kRvKXHx0UWh%VTh z<>)N@;vR~f^4=0;X2$f}yC6}FU1Tb1$(}sm#wgV9wmoL%8PHYWn$T%?uh}{IS}R$} z`sf`_Rd*ZLw+6J6KQX=fv<-(%>&L%cySG6J5Q)NNY{q3256AQZM7FBU^rX{zJQwV% z?cLfw^l{I&Glu|=_T}hhQ~@__tQ@0PCRD8TEaG`~A4gf}#Bk~q=GtDkk#%8+{fCPF zc-55~l%EXRD6779OpM&;ooKfr$(mog^R6duHr2@FmGvF|;3N|lW7b{UYUe@LzOQw6 z)ql!a8eVX}dts%Js%sx)zS|)Ampv>E!dm++^~L@F~h9ql+x4h@@$HxygQG={g-_!=i`x&d-6u z?(U@}%YBsEN!7ZOp(jdizfoL=CreKtJ< zxbWWh((CGQJ_T90nk^9hK(?*{+b|15{-FY70ZX`V4{kc!j|7mXMwm{{csKKGVYeI&=@0P5)WU;H7b3Rr)8-&IW2A+S0G=uR?C#`E|a;O z{^qNZU<-U-dU};u#=v#;W9X#h>E-)7IgKJgoa*+hxTp`x1Z?fsxFH=viNUg z{(>Z`iVJzZr=YUNZA)2~kU|H99;Qr~O$El{5*WEHDgv8*yIoqLT~@94Tm4UCS~y5m zX*v&7IV&eZ^%=T8PB<;x7tiseTrRnI1zV$_zwj|f1um06&%LCGj+I<3ODmM>EYH0U zE>mehrZM_gd|(jTC1p#d8|f@tCNr;-!JHs1inc z(OyKl;H}v#-9NnNLn(ROlWoB>_DRb!B2nx@qCVZdZMWWvFIsumk|lj;y!Uq2vnZ&Rpx+!U zxeUxiJ1cJA%BP|_y2Hg=4vzWgBwEu;va+op6Q`#*Da%;hvL#B$GqWm`RrQzu-cUS`h>gjrCHysG^yn2C`fNk!SFY_+y z$+r`X-{C@Ho5mOmK2vUW^OzDD?CE*1zce-#l#NIsX8)$#k}Y)rJvvTrAn-vitbYS= z)vILNa-qZuACgTj)h?yrUPw|(j( zj9{~X;Wd6}E`+7jBJ~fgnEdbgsW#6jFB(m;pf0lixy~7-M$gFY!P9NmlG;5XUv^R@ zVavYug5kTZ*=LXHq%WT$rRllewASexMznwj9}>Q9slLC4Bp7=tYLy#3S`c2Wu0iI) zh7(XI5Wh3Vcm0|FUHN7tI$z&cVDt(Ol@|HI+~N=henbg`KC1a_TkMmd5e+1aK1U9{ zeM0_SSn`y1C-;vyr`EWkp++xNS7$vBjx0u%1ZOG~Qt#EC#M4lXd+n}vV7Ga!t~TId z?7jT>k=P?is}wyp%pVMLWeYPso}{WKo^Wq1gnf^ieOs*V-SQ3A8`K_DqSV|tqo1zh zL)Y51?flmM0h=B}W(6;UUj?&wBckd)L#xY-w;we{C`rUjJ=1N@a@wY)l;+g8+%hB3 zHZp%R6Ex)ZjEeZhP?@qPC)ift3sHp%(^zVqsWP_`sxKuh%*}8%G)w><+C_7Us zZf(MS)a`$TTPb&Q4Z^P!O_g8t({XTgB=-g#{k`ATg+iS5A7CbJ7G~;oC|*rWWTdR4 z`iP@qM;l5`@iPtUWg0*XZQ2H*r&mHLCt3A{4G5SPB%`tByG<7KyVQhn z`GikQ2;Z}fgczlHe3Kcu_IbyJUPb(}4D9W79CL;p$;lBpW-G17kDKNUZ2~djf>mN- z%|JJ9hP2V5g_y-_{sk;#oqx54FTpUAtHw&1v<0#1H8i4IpoOtPGj`r#!5b^C=?Yi5 zG10!{gZfa~1D;{(hcOO(oZC0TIpSMhU0J_H!SG&&uI4sY>dU9|Zwvgh*BW5+cUU#zcF5#12+FeymDLuX0+FZXUzSf{=N(Kop|fOmTyhzSo5BH@{2O}SUYA) z+{RqR7AXDrN!<53byk?){WkMKl@GzSn+ub`d0Jmo{8`d4-z-8uQn*SA86H@z5~yQhcs%3)hw&yO&fwgiSg!k zd*va0(miKms%QKsJ(|~%z4#~RMp=R+L2O)AW!6htf9-1R zHf%m<7ep#4YU29C!6n;-{xt+4fv_)3rBe31oe-`mG$?hVUaB10K3L65(u7X2`DMLwx2=nuf~2=0NsTie30?z-dvO;&&R7FuQb4mblqq zjq#3Sk1M%0Dt@f)_&2BR7Ibn9QBkH5%7@K@&JrPW9gBS)IXRkG?Xb>dMQla!9Y2wx ztc18u3hU&W)Y6{GhpS>$*M4T%jMp?*pD}mUmk)}9IjJD_ad-W>_x0Nd4Bf?^kpgT8 z$OicDJFH{lSUSU72{nidR4uomS4a$~tbn{dX8vSoAU(AQ9+y?svz=OA(Efm$VbN54 zhi4o0TGn2i4rf;7&8tg(tdq>0UD2B?V=jIvidxMR$xgzEO-whr=&i z=zH#ESs_|I%p&9YkCxj8+ZD0g9!mGfiAQrwuaa())ohhD9T*-Q%mX{6%Y;vS1t0%N zRy%NKcg5N7_@N|;2YFmvTbwQT7!@zAXx6Wd`%NNy%_MvKB$mWnpn$a?|*MdfMjh)y^-Qt)> zSIX5CvJk=im@*5Ax0{Q>eY+K-Ep0CN;gXLT%u{ZphjphDH3+3pmCgwB3e&`J_EaDX zGLby*c9@7>jz3V9Cj%*LR@22VIO`w#@<2m2XU+~#i5c65*u4KV3NHh%bQ73$9}V}u z{e5#zk2^IiQo;(3TvNC{z`=&bvJZ(zAAabfo1q)XSHk+HZXlVj<_iSUcwW7)nw%{R zBxQ|twUBU`e0F&vW-4g?)?8|5>qZuO*qin({iWHsZ#%ybT{JOpcrCwhlD0@>iEM|K zu3a|!6o0nyT>63%7*762IofT{0=GLGDEP^7!l?UaYm0KqjS3O&nJX{0MNv;P-Pnqc z`uwA4;EyJNFDw@2B4f7GcM)JW7MJ@t;+<23faw8;IvW|L5OnB*pT}(^~7=`zP zA5iJerCvH%V~y68OIPm*JVOwQ<4d27`Ld0UccOZCWS;?Vwo144QQfrLW`l(6$711n z;HCSJTvG}>vXQ&xW8ebkecdjos6>(&K4!{g$u#_0%YET){np17(PJp^IyiEMu2!Pg z;{{HZ=C>Q}Kt81TY-V?~>+0k*m&nf)*mD`NLuABz_95*Ns1~_@zoQN`qz_d>p_RBKd|elV5O$$lAU5TPd4k%U?$^X51m*%*&rtplH!(664N>W4y2cN ziCjb2Yb-fTbY>W&$PpJKp1fHiNz4y@NSyQSQ64j+sS~%p6zLI(ae{ZO* z=7u;8Gv6RLt#3bI&jj3Ua*gamW*5JxP^Y_4hJu zrthk0wC7f0HT=GOciUyigM0hUhG>%TAv*3EDiqAW(-Cge3{Y#hi;Uh)y}S1)Q(Hvf zYd0`6^_F3$Q$(i|ZH-lwhybp&i485w8>;aG4{YNZMclW&S8uwbN~QFaiELM{wsVp( z+R@16e1v$UqJ^9G7IG4sdZwR{ER~Xfhlj(SwE`|e7z>ZTto4GOI8)jgy zesU8^LTPG}lK9hi2KRpMP84|{?A=z0R-;`F_b5t1IzR7T_D4B@7^0Bx3du>|`0|F& z@wm|Y7vnk7z0v(SW3L0pB!?bEstrFNR2~+8{pH}PW8i^`e|mUuKefCWSM2u-@TKH| z7fx4%vazE0k-DproodS;?%ML1-f(kqgobI1skVVFtrQMi4?pqRYTJ;Q6q%{GE?bqP zkh7!s>pfpR6I#i%k39oyRR}_D5D8GzJ+K+!UDQ{`zbaD7Z#aJ1;!E+yi)l^rI?v>d zTdvNkDn!6S75yM2+tpZZTo{udAtH-oUL+RQw%T1v!gz;u^>;qi@6J&0tf{8 z9hk||Ar3f0oyDiWOKB)wvi@vwHUEU&F6ks@K$g(*atN8o6g|Ee} zyt=N-Fb=gcq{l>94aIPW47xL?>Jev!Ql2iMIOBNT*0C1Uk2Sw}D25B$)rPM7jqXq~ zWDV5zkx`g26O!di_#}Gur5#c|pv!LnjF!fZy6(ED17^=R=FfQU3?J!Km=%7#4jT6} z>D0d~$Ig?1flJ_xs9IdROQN_WwqhBL&VzE7yBl*pq~uc$Vk^oNx76z8Wj{|JfSqSe%YI-t zo37MGH{Q;0cL+4w9(pkyQ8wVsdelzdXF96xL9gx!jOd5LeVMewfh@>FSO;wb$jc#$#Ki$fn z$aL&pmeD&?CkTbwJ&{Pijjj01w=Sq-OK@dtMc(2k2@LjoT>t_+NvC$Xs=3j%cFHcr z^p2(Y@dsTP+#|xl%yZ*^a0{&{s}1uWrVE#r4G>M|>JCcM;t-cC_EZcXJJnuZTH;OW z`Oz0~HSk=wHBIXyQH-3m)3@1b2X9+2RqdyIOr<`U*12g|y>jn4CxMm4lA1{-8noG% zl(fntmMCh)wShz4Z^Xep$LL)la69WZD$CPac(b$DHvJ8bwT|6JNbE-y3DNVCsIS)& zcpYwL%z@oT8CI4Zw_EP%V;p(<{VWNyR7P!tpFC;N{dB!%uhA>~S{Zv;N{(4!!;%HqYAKOb(IS#Z_AZzvDn#}BR85zT6XkKtFPG2r zI(J?PJxZ&3c6JB|XC34bvc53y!HW!(aQHfaMgTfAaHvD;_$GwYKbyU!r6n%T4DEO} z|Bv3STXy51M}+@TE^8dN9bpAS!575dApFTBVfVX$sAg2TQ?HTyG&WaC@X6B@gWI0% zBK5u`^lOW%>K z5tP@hI|2AyGX=cowd!>h8MDc#i6)1p>LI})21|mIqe{B;aD0s=F&cM9Y}b7IOr-m+ zkB>$I72i}iTXe0g3M(^L94bd~W#RsKv@lW<;CZh`CeqpbP*3KRkDKdc=Q`>^uWNHx zmFo~HZhT02BBC4hnf?wKZU{m~jt@=7r6H354k@a;8QwuTGX-WsVU`?iYFr<1i7XYZ z1YJ&1a^I*-OnS>vaE42|OkDcY^$rg?b>U@YFc!H#R*T$^cxl3l8->w zY-%tyV5G%MZ7r;>5g7?^PA<_qyxg@s06uvg%64QUed7X7d~Ob9z02q56nhad*U^-J zR-+Z#w5^ig#Zon``uG)EA1$Q+oI9AOY;CyRS<}ARza}xV+CpgV%F(#K&-e1_ss`h& zN`7hLr>BY21cEPxv~1ME4}QHbZq+yUzqZz)y3-F?Czvof@EqjVJA(C`;@>i}{5me9 z?M#x%c%sD%3;S9cXvV5T$np*N479ir+u6Xc1b*WL5l7LGM838G6OQ=j+A$*bNtqQ> zPoOoH6}luG_z{kG`72_9K6$?kjMh{qDI$VOWSc3W=dgE3O?pa36P4;rtt;m?6FT>S zS%xz_SPsB{wYL^-0$9hS@GA&-wUx=pmnB7SbHs9k=!&&rMGkD}G^()u#ekeyGi4r?84gT`5IDZTagw`ZfS(elTqZkuN(~HuxXkrV$^}14lAZhna^S)r>TJ`Vg(yG<%}n11Y26Xqkt#4d_fkobPnE9oJY;zCp$>vny`~`C_^Sgi zdOcTqog1c#4h*9_UFjnLc6IkGd31Aw85Gv{cwp9a=d;~?&Nfvtc z%tKNGr#WW))jl?CG3So@YER;_e~=bnmdHB-Gkjb52QQnA&MJNteJ9hDRGUwI&V){5 zyTy0joX#NxUVYu|%6Fys*XAR?-|o~}zMf<)qWKy*NyiZE?vED>B^dI{IO%V{))2@! z(8O9$(--eQ^p@cX=SvpamrYu3hJ}sny=w5wmTutcj?6{fuD|#Ee18o246w1EJeXTa z+5Oxy$M}jzd6fZL(Raj##~0&j*9u~bF0l@cY*Y^37uGRXn9mHr zOe_niDRa_5cDI0yKR#>YI*z;AbYi>$`cvY;>|9MaiuQG}wfytE?{c+w@IT8f6%x3Oo$aLu%`O|+O$ zxx-wB4={3hp_%lH-5KK|J-2B(yx-v!CnnPvQ zm=Qs-;|}zi0;tak*8Im9!L$rD(;y}fDO#E@1?7YHjbk60O2ES>)+gnDiHRAs5;AmL zV{g%AwLHAEHLT*0LCbq!qb2tCqv7hu@En!%X6wdr;co zRW*mpNj!4glPKZc*#HRkYOia=Bivq@4O(BXvb{9=s;jnfW}>x21DJ98dzW7%^pUwy{e9H9DQmdDQ%CzT zBQ3)v)X=`dr%26DW8zZ-&$ivm zXX2Xj!2Z7&l6h1$_<-7)TDcNM+~>-b9cmmfMX*-E23VF+cP^ zfN{{zQ>nm~Bj63HZ|xv_bkR?=1h?thve*;^Wxph<7i8~T`@G>X7C^5IqRSBkp{H&J zEqRDu6f;+CTx!#4M1O0UO@Drsj_7@mX5~%pX`q-Gg?W$C1=V~kmrmz;rO=aH%qb%v zc2f`-md=LEEr&Cdu~}^b4a1k-nrts#m^>EvqQ*`LC+zAfj-F9+kmpq^@#smv6zeR7 zS>cvatB-5ipLi#RW7SSh8G#pDq06{`b;2)TQr+8+$7}{)Ys1B+pwFg1BP{dmM?Zw4 zYk9tV*u1T4WjS_;8*dNhkUFDxYoU}yRjhUO)ih?S8)5VF;=`uZd|-gZixoNWggdhf zv3O3u6_E>MH!(9I22pQ8-5oukJ+0e6+F^Gc`^Y@bb@g)PDy!GveZ>5uA`0*6w8J@g zc}4@l_Lai-u`^w`;5}{&#^0(Nj*mV#v=pO{LA6c0<(Qw&>CaX*ofIrz86-8RFHaE* zurijX;*E@GpfEA5kpwbg=d;x1Gv3a3!k8PHsr@R^2e@rVU}A~ea8}+r z7}IAuIy67^lOz%2xc5>&fu+&&R0G@j6iizWoi+nNm+7W_6xIzmT~jWb_9P7xh!zh< zHWU-Mf9DI;eMciGB)BJApIUVuM8JfJ+K9b|m&9`4b~)C=ptzdgF(&caNAA_1;xwPImX z8uitzVGqFuXBYN!Lx;HaRG3eRx;PV=&PZD^F#wD#!kwP@d^F&aUFLjNv2*j-3@rW} z71fbI%iwxjs1wSiQ6e@1eq;~cPrMk|G`u88cVWa+2y@C5sW4EEj(<)mj9)4-dGX*3 zFcr9ux9OT*gBNf4ysfTld5|6VP+np0VuzljnO8O$)4gwr!$Ig5SO6$WS85aJ=JpI_*-boCR{mCFR>Ew0E z%#6$?%IUhJ(_PwMOfS^)oE`Rz#tkl6>bhPDw>pwtYyy~nZjgtdscFq?nuKux;?_u? zV?QwNOKUg}BV3RfaIquqWY(W>-sO`S|9IJBoN4XE^?IT|fph3rm%m|cJBs2I{% z;2xaQ5)lz!Ik|YHKWq!|#A9?(CmfMrlDK9s!5rbLK7)9`fyy}z08-c>Cw>>&7?G^~9 z_+|{-**g08q*PFJ(Q&L}r`%D*=E5^pyC;LpPZInhF}vl@6Zcny1FC%cr!pbJ!UF14 z&1--4`|is}!9N#;LMs*qsV5|9v6bkge(g@hxPYX+fRO2{K16!RiytpayzE4l-}$!Y z(kRu_?;@S(4gvq-Uh#ssL^V(<2Z7l(X-{D~$X0N32Q+K)a~8*1-@mmgoN!O)t6i?w z^$RRJvR$PhNXg>it+Y?iY=RFlxMMk_DGUlP9F)>Ole=&JD4Bf){z<4%Vz-0NJoIci zP*bwwb0wB%&qgSvqn6D(*h7tatOB7A4Ik?-&lYE5E?#8$g1(BB;OnaVI0A;q5du{l`^~M##Ed%t8sLKJ3>vinz&4k68 zmtUyQ{(4bSE_}GQEb#3Rl-j`9M6Fa_Sr*gVE4(gMtE>LVqVgH_IV@yljp1}c%kll|yM0Wl_e4XdRP94H(n4086$bJLk#>alA=#A`L za<~1H8y7;d>=r$097rh2*nnhv%#SZWK0Z|bsqzA7{^s;IG(l{}ciFQ%>=bW*4uJ{G zurw|KFn(Y&suKW4$4k5+i{cFbc_bSGjfUtrMest$GYzb?lA40R`IMiocN0n2(x|h( zQF}&?l3cUjpq<4ry$2k7V<5~O;6PqD^JA&&%Z+^@wT1?fzSoeR$uv3pB#``T3}447 zh@L~n{FgLWJ6BF zNfn&}`$BwnQh3JxdraZ^s^CN+T9)X8-0e67H1c$RhTs6s%biFoyVeEPWt9UK99~Xggg{VhK*kxLZqW=K)1NTK1tEy~0PZof# zQs^SA&{l)5=_#d@q<-?*T?xqSkn+g1Cr;96BLN@to5&diKjaLa6b;N>al^SuqMh z5u1Sp7ohE@u3Lv%s;^)4@Unysn2pDORg~1bb1g4Vd6~A{A`)hkfFlSYx|Xj#iehJn zF2Bs#<|{cVIoSW{Cxaq#e-ca|Ms)1y03WiNWe;^e6XUSAa%SVR-4zZ`Okr-5RGW%Q zSAm!?o?29FH7p_44Oqcfo@Bog{=5-b$RWMYhRpp%9C44of^o}#E6aM)aPerGjNR{& zu$V8HLZ`4=viGXBR7R-a>XP>eV}EFpQu8oVAfw%9Y4fjSXisS}sBEo%FWW3W`xeLu zQH5^V%xc-G-nJEGse-{TWkgSD9(@t#OuM1?`NwwF?H6lc=VQs3=C@Xn>xsGcbwJDf zoYz9i9pgkFs9FfqY%#R0Zd72wW-xPuyi>O8?R^|yvf^45HzInVa@omi>&}Ka#l>~q@r4owPO#(V(obWaUk6&lz@%< z3{Ylj;-%`saU<5-wX79Gg14$qYrsZ0x#>Mbq%Ai012+~N^FLp10>4kKda~I8W@@pZJ7GYU70LdbK}m92AfN% zW$pphJeV1$cv!nK24<$smI?}dJbV&wYokgZu;!i)ymX>{&w$N@)$;2tV=2FF3EG!P zzk$pvz?rK8Sbq%GlfJ!LTB*Lr9UfKR9C4W2Zm#gPluo^=i0Mt~#8LjjX-A;C9=b7d z!fRf3v2*DAh)w51lZ`|;eGPS$H%HV}5Qw4*?>(yHv+Or6T^VSrW_GGl4BV2oXgGNt zT08~tkIJbT!>cauNB|yL$DR=L{u*eU~K`yhBi);}wON7Cdotpa>BY~nqu~!w=8;CKWa`uy=E%0=! z6B@5p@POh&=($XS&aTTAT-XF)F5CBeura1Yp)Qoi7}aactVUg2{?CCgjj?2kFx) z_SSpv_x79GKypLpHGdee4%sG+5|4C*2@`lg^&yDvVX!`cRyl#LjwqE#^78ba0AY2j zP~g^k%GTmqmNP&RX`7C2DC(pIXc5pC8)oZLMiRt4!*i16NGtdv5&$vJI>XD!+t_z; z3CG80pR|vf^JaRqf4{dXF!vKLB|a{0IDIkv9q@789WV}dvn?b$J2o=nlTRm_QommS z^>yd1uo=-~s~r|eKypGeGs@aVfY8BnQo(VOw3((ui^W((+TBKHG21LpcUdf5G2EL8 z;q|tRFf9|#Qa>^1y)=?#;ax1^csAjJ8TD}eyWz}CdI$Dk1DvgvisUB9EHF|Z+f+?h z>%_jsN|9ln&93k=sdgEMweVGRT01RY#vcAgPSF9oy|^#uT768)(n)z}&AvVOw}sXh zJDVOyP(r@xG!+|zME=8@Q3^16YV_!uBkMdl<*GYwm^)1{Aba{fAHqe_(^nb=g*%%^GkTYl-TcI3>)^X-(=~M zTVYy!y2}8n4c5WqkQgC19v+^J^7}BDH)y1%Txf~N8d%T~22U(ZOiWB2Hj9*w>`3Jq zHX{eMA5yeu5^9NSu*$MWF(X?;*I6yW_n{{MYm zBfJKRfq37pQMUGGG_^HQwi+FuP4Nkll+9WaTH243wA{I2M)Sl;fW@pG%sir2)ld&n}z;94 zR9dWB4W0qoKm&)cE5nM+8QC}WE1{GuotI{<*%Q9gjGp+IoY@$n*NW|mF|h~!6Dv8K zn{|J=00Drqwh`m>^cCJW^}>cG1J(-{R6mZ?6fYfQ)UEj|gS4P@V;!$;wJ*9IR(ys`>9huGJGajJsi7v9lI6bc;I` zKypSZC95Mrf`|>PE1}Ljq0TBxln)hEC9<1P=fYVmwkxAx)d)qhDWm+->mttO4F%ri8 z`12z1+S>HL-!K=1Bu|x^pG);XbZ4iyXAoy<0948hrsjbEre)0q%}MiWY5?@fo}`{v z;_PDi)v196VekyEr7jqjcQtyRbNdGJ_Ytzp^YGA9zEE zmE4OU#SbPv2|xd_JwIR8t*OQw8W1DxdnNyw6su`4ZvJ3V^sze z?0)aqBN4;LQ>={v*IZ{k zQ#MBZLA{iJ-8!A1VCdoD*Q?1^n=$ZfVgvL7fNHYSceW1R7b+uKU2B40bFUtuVyT4y zmc|`~yh8fE32<0j#wsz%1Wr(W)8?wu@>;1?s9TsZB827@lwaf68>i8cyh_$;dgqLGLsk?Z3A?+b=w5 ztZ^wKA)p7lm5D~n^-k%h+=R)4Oj;B z{K*46v}bbKmrq$)o4gs}H~jo1((yF60EO6qawcFkTt2FLJ+s}B-xRPS$Dq=dA`DVQ zq66>%d}*X+g&&r7opRDn)qT;m&C-*S-mNPRL?c&CDboM32vS%i2BC~7wcy${gssj% zQtMySQ`;_Lri~yR_07Z-z3PB6h2-1Jz>-1^#D^fCdN!b8uWTro3#)gcxZ#Rym;u39} zdnS(qnxL4xRb1Of$YlH-c=;^FS*YQ2CQs`B#v9X7+0Rwfl$w(XFd@MbqKiZ1a#*-ouUh;fSuCrNx-U*0KzerMx!rUZ#6t0a4d z(vKCcKFJdPSVh#PYpc*-3%3TW^>i-m&;e$K<(=Kb;Z5*w$Na6QJFpRNCt6}DgSR+P zg_w%+L0wQ~hwovPVhC7(py^HIAPtw8q2*sJS_@}dI#;~SZ&gj&n5q79WJFR@pQ>lf z<(ws;WT?`zZ;u`b@{WzH=O$keGo^!b_WyBXf{?bkPW3vVCKR=rk1ur9va8>j2dU0i z+t>{L$Z-9m-Oq~XG4SMsjC+Iu1a>V?*iAE_t2Wt?_d{BrE)x4QFB=t(WW_(^COxFb z#qYlIS8>~$2V(mO#_`IN1Z7@{vPpluyre=kv+6=;LGshp)nc~-Nj&%s&&+DE9aRB8`ML`m?>dC z_O}%WjmwOS1m4$pEnY`tSrlM(Hmd+V^BJO!zx($Y7NtF(l{ed`#0zOS}CNJ(Y?kf@{nAM3%Jc8C4;jgh{*<`N8s#wF7SWj9I z>HP75fA6YOy7U6hpho{Ai3A4$8%+#g^`;s1H=h9$ge7Eu+-ZNM`GHD@T#w8HOqMZI zZz7zjs|Vi3384VrqubZ!UTz^5OiAz1UbbKeC&0(Fj{K|1CX6BiTTmrl5ssn*q%Y?+ z6195qaiULGWPf-j@B?#jMWUQVQ6!hrpXz!77BZ@-(g36j%j79m8l7^y7JIB!=C zHXc~F>Ri}(G|{G}Va-^UQ6|6;rraa_;?enP?7`b6<1f)utsgK7VKY1CuOCM;d(?hP zxX2|jun!7av00)PX`7>`u?z$BYqN)a1U2}_glRpD3$m$nA639?U(>B)u3>FF5 zfMJaDv{c`&2mSa&AQhO~^Q&&^f!YN*yWEjl7rl2!`}BNayCxb$sZ&oar2*4(?X=_6 zY^(308TaWIPvDxwE))l?{;?@R!D=f^P&w{7b^n({;)vxxZ&%FFk%R48lBPLc0D$2P zD{|cc1o$D$!(L2yJHMTT4s>>|Dq3qj92= znjXN}NGV@0*5|*570>ICW~z?WK&e|B_qbh$1!Qm8@>S;veW91LvOy64jf9DBE(YCK zT9U|VyYn&W(m}Y3p3`1znFe15o$~u{>Jv6UA7v`IbPPNmT`Ek9e~?Y{PL#a^S~vna zI)+9?a=X%T#_`?LYObYoy^d_m%rQ|R5HGHqt&J=wJA8P(Nb z=Z2k9-~$XBXVJTXZ|sb_I%)$|{F`t#Qa2hTH75gqNgzh{()kHV{WpNd4|yqfl3e{8 z|3~E%zkQ**EBGPg6xF$x7Q;*hXS|fn+xW6<%iU&hZ-*EQgGPmhlF`VFoLH0@?4iX7k*2yWO2Rk#bAK zfxOsgi2cs2?Wue+rMxP7l>1o2dr3m%*cH1rVI0U^bisLy#FkuzeT!A zAaet!NCG)OZ+X?0Ev;z1p#0}@eG)1#hK=u?m&qfFsCi=`wD< z`&V`uox~rM@}srd%WoBZ;stwFPBpD&>-Yt@?x$#t#V{?8qw?IpwnScaz4^ygjLLo= zXzPb zZY)VHF^VtUeJOq*`|^7XY1I2Ola@+39Yohxr1sB00i^uto+pLyn@3r>TMla0(^s>K zHR7}sL%3q+)fgrc-KIF!+!^UFXAkSpqUmUiN0G(8Bd-X5CEWO9v*QCzJM6Qc&+10y zy`sP(j3(n@qeeJY#pJSGoVrk1_+>+gH?0W$Sl8>NquQdLnze%E%fD9Rv}PxrGA57l zsf#rGm<=rL)d|llXwjoT2OhNI*8lAMuxj`)c_?jL{`M6U5b0j}Qtw(Ly{43cC&f{Z zpEaPK*S_;^YRCM7gj6QT{ey$;td`gSLDdE_E~-9WnB(MybUw3;1RYbthU{$D!@1p; zoE#TgaSSKTbsn2;eBZ`fByQt8iGyk-Gc{dCe!~)nqodb2xo8yKEdv@iN|nnpI$&Dn zTc9weB_Ti#2l{u| zvj??Y2)_1;G>$V?O~mzKN$4eKsy5br`U1`v6&CtY2?GSo*f#P@>5KCg%lC1`ej9>_ zi&S?F$>ZEFiiFAGYm8mLN)Pu{@t50CVph@3i#g^WSCzUdXh)jk@^V={FQGJa;hahX zo=Kyqzw}^h2#DwYr@UjwkLN>$AG?4*xyPH~7FzHq;NeE9J6}_(z=kMqQ4x9#V$vBW z(0kgkZ<%CA0vo3FcihdoESf_B-p4-nr@Gj%XNfFdB8%3k@^sZ~1v;!;sj8ist*tyi z7NPyrpQJ5h%_4IQ=@>3W*H&VZ*tqx>iDMti^eht7B>lCuisqZ_Tx_?!h-M?#a;co? ztrosSUZ{Pv3NmPKka0^#HDSF8d7+ny>BKn1kNPh@MnAm}AcU=WAFMWdE>0SGQdzjv z)tdl!&(k$O-4~9swt;2v%ycn^P?FbX7EyzQ&$OqKQ1Dch`Yzv zF?qP}Lh=4(4#}-m!c~_WlOkY2TkeXvG*5O)*84g69_P=iHih~tKd;k!PGO2^0HLj# zK_N-L7*+ zFo*w2z~T1x?#qYaAa3NAF*&&8yjuSC;&niayZ^zG5Wo&xYf>O~^~u-=)@?Oqcu&@> zx|&Ub#?(+<+7Y_>-}&MtL|!vA67=hQ1;SNmg_Vsq;~R|H->TO2B)Mrm0}LV8qlx2q z>mCa8@PYRONsX(D5dW+)y%BZse$tw33n*I29ay-h^(jSHr$d(H+&TmWKV?rTvdW4R zeV2}Ny+2Tmn}_yVT|mC>yE_LSnM0PprVoX>A499KsO79(2)-Bgd??!i>?TGU#UCfS z_N(&VZeJ))T(W0>pq+)^A3L1obwoU&v@Ri<SO;<>Pi+#$5^Mc2_3a2@-bHuTOPVv>3@o-C4KJEG5O{ksLN`0PdhXllq6`^QJjmc zy>uB70ruDcgkRsfI4Ca*Xb1&WI(iA)ZX05+J*M7uy~yngRg%>oFpa7Ik4a8_RYFfN zxd8Zk@k7_xH_~yAKHbW5pX{yx+@eb!DGU*fb6$hBaS%xKvHtaL(hSV|#n`J7+ePB{ z`bD%`CQpKS`)>c<|JjSbaQ+M6JqT$M&VXbwyA(mY2MR4(RA|Pp$?gTRtK(jUCP;8g zc>XWqJ5XZU{{1K*YA!y0;LT;n_&qK?h0SwFlG?1{h!P zK0Fsiw4iBj)9PAd64^Wx6tw)1bO!PB2j;`?6^KYO1%f-Y*z z-2Dp7gpe+52C2&~!KIT(0;K?r8(z}HjMJXNiv~aNoM!0OB7b}W`M7fbxqKH8B1r2m zt+ZAG{?+IrF-}~>GsfOpLB)l?VLpukDc0^q93dgWyeZR6I~)W@9eZf%u;D+;s>Wy> z>@dUk4sQVOnWuA8RU@XbHTfio4mTt zY4IqAtLE?ghDWDzcW>%i)5rRRI;)vqFWN2IpydXr6giYG<5Cb-OvYhdRt9YGjZbsl z$tRIC_6;WKoexuaZ~poy%cRY8mKOVn(C> z!a!FR!kI;q9FUeu0JUn(-*43GooL;6sp%rq$Nh>yS?Bcz{%jJksdX+qe7WlVkmEG| z+>JI|&2Fwg56D_W>WXZFbhEPumy5JEcX{*~$mJ$vOvj|Zj1nxRtOo=ez)OAo)sBn$ zcdz7!cd!_tZRiw5pSEOwARq+9s5}n_qF@{v!*L|5E2S7bSqceuMdK3i)+%&H16|^E7(9J`KmLRD9YcVw z_%=yLFA}qwU=+qrZN(kY!ND|ty@~kC$OX04&%^XXuO3onnWivZuo{nh!rpO4)e&@S zZtR^QAAVJ`wQ8hVL5kZ|-p6nsgyb-jshCydvWp*TH}(;3BV84y&Q4+25EPkuO+F&P z!?Zq&*Z<>=Hp_=7Q-tkzeT3txE^l9@0!)EgYz`ddhYlTcO_yf)Vn41=bj*NBc=3AW z2QX5=OytywB0cbg7XL9sFY@CP>efjDf09tn?<@jhSd^2?QQsC!M&UC@gighjhUhWS zh>^);x7cOylWywh#9w-C7J9yoD6A_6cn1K(lqF_=+%D%22?+F|<(@h}w44)Eb@{$6 z1YrB0Dw#l^37FK=(9+SW5HYT{&oz;YZU7pNljjGF6nkjJ^k_K*O#|FKrK>g*=nWVH zMris$+Yxb4DgtV~LMU5(hU_7>A%y%M9Q023e*#cxsh|p`p#}i)N$bkSTmCs5^l1tp zX<`5h2qnSdfs*|f&A_r{qbe%YH}nlm3;-cRDAuRG+P zjtmELwO&a*_r-vTMKXUgl|+mB7c%4`VX6$aeis$DepesaHtq%Ow)sej+ zNG8eUGqf5Z(+w@Pskpj-0^Pw#Xf~>S?|AH55_qs*OC6lpz0hAhAHkEC#M;yj+d1?; zbU(Gr$*idozcSp=a7*hbdHphh%7$FMvG;qL>s9NOLV11_Yl}PHhx5a%4`k(pcA^iJ zdBU7Z*HFj_)Lkq55!2H7Z!M>@J&sL=^b~7rw|a(ohPQXS%^Rc07wHxs$Ywh2irSPJ zzFNQ4Gq3WjhvM02>$R=;dxIR$Xm1ISiV#J)b-SHaK#$a&j_a>3#U% zdb-25H@hI-;BRNKBmrzPzDGa9>Bd19`*g~Y7<`=bKbwf%td)BYf@dP+vsFsV!J?^OA_=N~gmWm_S_GuC|c}l$g zWE#`B8$3)WC40{x6d@}#ga20$i?xvp)tH?f(DTvj_h(zW( zJ7<6PwxW^ALSWKd?$DIV=5VJ~Ryd&YMjW+aWzT$j*~s`rN65HQ-PFMOZP;|GRL@oh z=D`PpVKw`;Z*xy3{*@qy{~5L5+fb@MOeFwS{;&V}6*(X}Lx)$EmIhyO>f&N?oA9Q=k6*qO*C;9)P z?Y;S^@LP5#xKDH2eJ!G9`1u%`!KV!Y9`vXFNs1t`^XbOQkPq zQU0&bE&sEd$QO)^KPq%sasf?tmIHDYepqoW6Z&+&&JnA+;$G8+E%*RWxN!-_cP`#` zBXnT25e-n&E{hC8`3qGLqb`WCng&c@%?GbASn=oHi9Ba?3;5yVp0o==B9#BdDe}3I zy~mOW_}QlLD?M!C-ws*zlMV~;ge_4t4qGfVDFqqOhJryfmQ=$Dv%YO?ulNjng5lq_ zZ*X;e4D}&f|J|LNZZ~0{e(MbQ5(cxJ{1=zZtR;RWax^DnQX=o9b?q1!8c4-kll74wDV^G(_d zc#+xjrJkpztBt3(`CDs{g1R!Zf}N{}r-$WRdpA#Jem)*P0UrLJrjyaYTS1C1WVL*< zb{3?gxDa2M{%WUE&PH!=Y$>2tjcvMm4({1E#xfB8c5<_LV$;ZY-XER!q!#B+j$)R0 zn!8!p_wPa+HCBmdAvoe$gt+(;v#jELV%2AEy}N%&X>zf`u*2NMo6AN@G}}tcTH9K0 zf|!I*Ig*kKr?x6nr42!j6>KUKfWZGQ<)5yti~svep5#01|GDR5d52B%I%$nvF-b@< zDYCNZ8>eED5xe5sRJQ(9HpQGIwhPWZ%bQc~n=7S&TkRw^TVZKwm87+jY)K1K#iXp> zW8ViOhnk8HUwvY`pbhTi#@R4-mEMiEUS~HU8yNU?c1pEmXds`&rr#R&ERtik!LKJyTBhaM6gLo~;cSYX@5zTD1p6%E zWcC@IO{WVgfUHl$-5$y{6R~J{o$2}zvOJFOZbMK{32us z3F-GDK0EKT^qCl9XV{TanLt#EVxRZwZBLoMfFeasm(v>Dc!Ls72gz?y!i$D<6}C2U z?zwa0^6Ag58Ma4ln+qH5UyUS!Ak&dxt@O_NCSncv2DBXdt7Z>gQu4t2_*$lcNsa(j zW8pR?Q*_|}cco+GeH$6VEqSJ;Q^g^tmuNoFaA^tHd&=RLg*A?YUEdC+q%~m=7rC8~ z%F9aUSut0$wlhvT#C~90_IhLx>a5Mgp}ooD(0uuC)SbjfvjWv07-Ceeon1yItRcDZ z2o@KQk~H<(d#EjS-NVyFwtkN*|9%suGjW{)a3|=FUF}|S1+0FKLolKhitzLNy|)^8 zp4R%E$2J{(HG)F3DVhk7EUUmfV9jbz;1=lt!}LGdbF4FcA`tjxq|O^Y4+jLzTIKgJ58_T^G4sc16`b-oGzwkI*b_9x;l79!QipjG7|#OBJlF>3g?BbnnAzciO6Z=Ed;-W z2B@JDPX6Pm%7i@KQeU%L(d5hY*wcwN9I(_PgrKRY^{f|pXx!`MjJP~@*kt_+iumdZ zEgn5SOUd7y@MUmhcw3H!5^eW(&GDRCY^Iyy;RW5mq)pbkrk&KUd_D|cNI5}_4_D4A z{wTohGb+I|Ukz6HgNmCux@>p`{1<2}K<4@=8?7b_+9tH&kSaPsWkK1&b49YxLn_Tp z=KS0EK{YF0YX|=le4$DCr91r4F9C);tmfvuUIfGAHf9KqjQ@Q5i*lMsm<@TMFTOOp zCDr3hIu#t*aC`qOt$*!jC|WXYl@5+y?$I$Xg4^ELRq#kVG;At~wo^i*?qs@KE$-fB9^h}$@wC_dc z@1Qb$PR;4!=ZA|bo19@Hradu-m6rYwPH4d`}D@V3M(e=bESM`=% zo#Eue3FlYGX(5aH!?cSVxis-V2K|M8zWeFR?b#ua2NPR*3qO_t0&PE5PHkEi5XzK0+TVq@TUExh$mKS&PraPw$nZ7yFUJ>czs7%20My|A0k25P>BR=kFvifPU zyP}#avjgJ(>O~IwD^GatUkZX?$g%5fTO23Xt!$|rc-DV`udt=ztHq1|dY6buD9Bw} zCVOPx&*5vSaPC?9^!nP<>1Lwz_QCSILw%6O9qeSivC}VR(8!x_m;^8gSKQljMFq-fOMn zxyE_H=Afv6gRAvKRVEPaL=0Mccy!M2vKTO@<9~U(#!mw`mNloPW5q;fxdE z{0fTfbo(s;L`u0vFwJBQ%^#`LTID`I&urV~NXl5bwth*Z6w2P0GbDO@_4>8{;Nhi@ zi3EQ)*oqqmQDu^%aB9Jp?VrdG?b9$X)3qTHUCtB7za^6z;e)Q2CVX9kX z(-UL~-4P)XVIyDA{z{|k%(X=x_NeVC=t=fs@k>h*qIz)#oyM$_zn5;|rK@cns?ulf zBB2#N*)-cw^34*KsS5ND{%6|4VI#g-){d|6Yt(Q!KEZ{-hO~kAqgnfFHHW9OkElZ^ zw+=BvQuqa!veCzF!by=@MwGe;RJnTwBUj?`bT#!7 z0O_jk8~-hHPG6g4NiYnt{@sE>*z9Fw{tDAwcBVQfs{b@90|}k|&Ay)bJ*06Aj(~79DnREOUPuyQ{yZ#Mr5!vCm6o

      ty&2WiY_*9Me>m`_w^L~}6eTEclcUODnE=}S8$fPTH1upJ8VA+}I9$16M;r}ft zG~iB~9hoczN%B>rwNuVg81Ms3)Eo$)zaS@pUHCeH(__ho;#l*}NhkEg-?o(V!Uirbp_cTu zrlITyFMKNhmSj4qaLYNP(3l&qt|{M{chnJ5Vu*^ilwTq)RPLiN&UI`*O)JqkhOT@r z(Z=%2AHjrTB=XqT+7Dk76NAhg)m=0BSt|N}CG^U`MuuDGu@m35pyb}QYcip#KZy?6 zMF#-w7-=o^v}OHm-=5Efb%|dVls;;5@?(z@QjspS&*(9u0I>=rw?cEpY)*#A2iF*p zwR@6N4&`@kZ9V-dDseX#L3_G$hNOHU#{=QSHP1^Q;Jm{|59<6fIi@a|C%AZ|%Uyxi zda^u~s(LVtefQAj)H)W}yQ=u+k$K*53saAfnPSpBiE~LfPE(7juP@D9jo5pZ6v7Zv z;%_bsPvU1ug&~Dur5j{sx%m3*maNuO?g+=ysb;2&t$Y=XcLa8s+PjJ?FO4|b;;TnK ze>H+~OkCIKpea;QHzUIO`ghSHKo zicDF%qU>?4YgH}Rr8Fy~*5o!(tbUESH07c5RrO6f_RJd%N*xst#*$RD@H>pdQa@nUL6Yq!;- zS&*MCQTnEUi10;VPK5oE^QM0hl3XECCe>$<{*l2*_6hj!lZlHzF zF*=!G<51UZ7t6K1v4{7z509wLMr##mHjpv&Q~*69mtqnxB_Uxr*5@!Vnaqap;zTr? z;;>_cfZytjbNuJsz^6X8{LYtDUH+364qock*m=%4WrnDCmO6tfE`EcLpz{4Eihz}@ z04u?MSC;6wzJ>k3L^1zYOxJoD;b|f@+L?r_KHG!ffOYDPgUg>zbt=JLs!w&5XRAnt~>A0B7MmT}0HQ7k(AN z+DZ-?LrUo0EVS~{*DR1N%?5B~9^eDmFw3&h zFl%U|WW|{ml5teKj~40^NH5&F4%Ms(5^Zpp)v5#4I~HTINgK&&dt?4SkmF8$yZSJN z-P|BaNM$fMjcr}}sz9ws)$Y`auGVp`tI+<^JYnftXUrU(za{Je&^Hc+bUrjTXa_8mgP=V%it9{aAzJX?@mNe7pNo?Y-)fPfM;cl)NXFvk&_OnW;HIE#D8j#>%L{_?- z6N`+dA^Z>R7FspqXRCVg%gV$YiiYbpEteE(iJf!%jMxGGBkpv()NdD3ew$%HombT7 z){EQez~lTy%MWE|Do+-Fs%!$IaDpQ1P_z&%KU*P;uXYJ!9Jl_&tUX`Fi3uN?mL<;) ztiTkuer=b_GYIc5?TPYvt*fln$@Iz~y8G2mKZOEbPYBJTsO8RS+s3(JhcT>o)9pV-B{39SJu zr?rmCi!*P{GVGH*pK7pLRasA*$wnx+WuRyYjWYeGFNIh_|s{fz*2f&`JpPA)l3omHX{~e6{B{R7Hvep4vqHY! zn~~3Jl0UJ-%D7UC>0&qM@@(Cr;1ROg?^SIL2j2@6I^9k262hA?<&+{ zgsAgIXi!pdkV{0>JwO*ux0ke<*S9PxGl8QFg}+zJWM_IaJ=KTCyYJgj{@MyKF2||w z9CG(-M@XjN%(f~gt?h+pdJ#dmC_zv7Kej>(=iV5l7+M=AN@_Q|W~&1hF19|834$&4;Re(4HyN*#8hE}zu27gF5~xtj@n z3Y8Z^<< z#IVNsHSAjBx2j>7T)r*zR20IHJ_HQb|=+g_E{uw(h7Y*Ul%JT zSGm0iF5dFE)BN9B7wD;1z*@n;T6&S?)@JB$&#?<0aU^~M3K=ZBNX2Le=~-$bav&Mj zV5?^)K3d(aJ|LUj)nww`+80?3P~vLFu1dy%vwzQ--HeZSA+K5@M+;W(jy8OptT0?p z`+R9+*;hUoI)%7X^KZdFL8p$E_O9+GcS*jG`i3m-rydc+j*%I=%8(gb-`t{eG=--c zooCv`+`Jn;X*39hZ)*MU%JX$ydHwC`OaIRcj)V|YxZu$rx(13 zUNr#j@XZxrY9)jIs*KHtyQeG8iV@EPUn_EcQ05FUyS#!rGb{U(x{`8jWz(D$|4Og+ z_0rJH-toA;$+6nSieW&{y_UE_S&U2+N!RkN~@T7&H!3l{BP)CRRxZwTO$DJBuyf5uQeWvw7-+c5M!8-%TyzXkJz%*u&I~7j2zMiou0o=V(Vw#CXrlvVtp@! zg=QNAR9zsFRTZ1Q3`b^?i@fU;G>>INO$SYOa@Eq-0?KO^RkJ@%w@q#{?;$TWo#`C) zm0VtjU%&8Lz@~}ipQ=Etq)m}4>}kDM9I@!= zDD&Gh@oWFj|0Xf`Zz00Z#BgF2yr$vTYC6Em_xc^S#?JZBqL%7{xc+A`Lq<*U80v0t ziHPWD4$Aw*fg~p#NtSf5;H#6}@2AZFow|r7|DoU` zjN^VO)YuB8qp#}-tQ-qzWcVgZ)iL+qmX9!)uZ1NSPI^*GI-9q+gK@|7yl&pw&ov_Q z{8bK~ub2#F%LQL1ke0^c{J@5!HI}IABOjfp+kRj}a-YJ^LXRe?#|BhO^UKS0enJSA z3)RS0y6c7Mhv_UAE+^^hBzul7TZV;N6_d0o+~@8}&i}XYC1&B8{9O3-Nj2$gRL(AD z;gmcX`l)oo$gRW4f;30fi~8p_+gW1QD%uLjma+0K0OKf?hii%7WqE!o$wfYBpl!ES zJ49u{1=n8RyV24Vb5Gn6KK!d?D)8AOhfPt0d31)rBt(kFap5wVzV2s_AKE#tiD$K= z33XZfpZ1e5+Ryr!1=l(KMmk$B^>6#xdEHFwu8*AKEGjQ^>hR_Lmf){)7odTfib-qb zYMK`?++Ao~6X|0vH`4;a+M(ALQ{ElZSrlPsR!khJHA(aDzk8R$kj`hWd1}#tW~~|R zo$)O$fluRSL&t@Tr^-baPq%z6Q*(Ac2@rOZ$1Vqb>sa@<6sxL0Dl*i%OU_`NuAv`j z1Ua^y@OB=vPpR?*scGLC)5vb(=QjAY%|0Nog#S(P?=g5m^i%~M z|7XkTz7EdeQBC169Re07TS18d3_dJYp8jTnH>$?k25@?Iu)9->QWZK~q!PKJU;iloBv05lKedQQ>L)2BJq zHejL=xSeaa*Dva9Jv+O)w|bES@OiGsx_9Dpd}0O37#szlNPGa{HoSHHFhU8HD|$fk z{5aM<_ec1EmGQPV=vpWPJ(Y+sR5cUYfJR~n;bv1!js!FSLq5(?Qx$u-i=X0cS|P}v z8C(#(p(OblSv(;dbdr4FAA#4IV^%R(CkCB{D}OkZ9A>L_rL7un6xNYieRb8!;0GbQ zo7KDP$`war%jq*$vnHd-5~_aOj;gQ_Om^mJ^Lj2UMA_@ju=Nner8s79tDYtKt@tX} zp|uu_Mj()s>W2E~dtZtd^~T)XPKu*Arz;YKYb>YdIyk32kV0O(30c$Lo@k*O0exMj zxtE|{G+WmW#k<^b54U`|1zN#YLZ#Ok>i-#IvPO_7t95ld8KLg^$xfIeMPq}#&dH)9 zU0E-$RQCK)_<^uuQo%$SKeHR9+LSJy`pP^^+KRu! zA)PzL#EYUd_Xs3gymlQq5Jp~c^Od(y^>0EcpCTF1=K*U;RM2?i0;F$mx~2=zr#2P{ z?rTk4l6-rP1)DwfHu6+?=6`DmAqo3bZpdW+8dg5#uURfS$qAqCkF@^ZSpdm- za$>IMMpGAySK<==ORvJvo)jbdm4VIRKlI$I`+H#N)F(5!s#V68i+|6(4IARr&{^=A z^oFvlK*_(YO&c9+EG}vltMt-nfzHa6L*HT!)Dy{E|DAm`l|{Ao_V&5psdK}w-yOGG zT5xQrBdk31C_}{9I|0=LMy=%n=LaSG6O#jLj4(hJp_}=wb>PyzqD5XiMD219Bh60} zYo`7==FUQTW6zx}| z@I+_zbF&!3?^MZ7={V{M-b1~0Q#I-hm#Wv!-_LZQQV$lG9q-UP1fZl~Te`ul&Yw$I z&-N?7aJeYRoAyS=)&1Vjfd#jf=;zShO2i=4Ckbnj?Lu!v3Tub_wUm$7*B>1}v`w$A zMTQEKuso-4>Ch(YHSP=of4^v%^9V+fV)^>|o^CX-#bfF*;t{C%9?u$ghm5j!4c-|?^hJ#ejwJI&vtnMCS*gEU7WAc74`+gw zUY4KGxAhJ84`^$yT=ot+zu(0*t@)rtB2-(b$;AY_;eM|twY1t$aMpk%hs^)2IQKrJ zx;+-=fgNVFbczUfOga)Qi(m+=#zUT)3X>C{Q<%l$RNlL|-cXS8c0sT%3QqKt z8e-AgJw=tV)Hey)%WXhCp}+WkTxY!Yef|dxFi4M*(vHO6&pD1fQG7e@4CM8#v>1bo zLPbqF&v$_xuIDzMjIJM4wB$@zRqTPzSJu%zx7BIqe6cO9TQZ$}P&Sc$#EyXN=V&1H&|Zjg6T?Z=&D} zk&@djRfn|xJEsZ@erlRvo4|4;y2G1<}~qgi_90l*0^q!&FbIH;9uELuxA|0(tlZrZ!wztt}&?#v>0l;Et9 zAW(?KoFOQL2bgsl{lKQkvjV9)AOL24WN_-6K%mHbRR1&g6GC{Q0;l`mfuMQ?_5ZzP zfARm_Vs!EUtRB`PD@Fg)aD+>Pfw4~edSHuDXMMf+NsmJV_%4f)R{Jg#c%`vk@!u>DNpPg477zGN%Eu) zYAaLn3 z#DUoWhGyjs7+JF0^56Ve8vo}!rwbS@v3^GP!gZ39uJYl;U3k3iw9w7-J1%$*o%KTs3gFto( z|JU~ZZ^IM7f&igh79oA+TBTDK6CwS{{NlJ&v|OjGNK*RPy~U`Kbcqe47r(v7fXJaE zB@W(~MJ(Z&_??khKrd8PWPfR@-M+yDu+H%b+%nZ^uZVV}yskSA8Uwa_ z{cFh<8y9bL0AN&Wmq)Ig1HSX1Q`%A~o$r8tPWA*Yk2p^1-`Eo@7vXotU`3Ne1y`&% zk8nGaUcb?fyf!9S&TXpat}hzC{u1r@4*!MOQOe-DKP7x}s`PDnzuMsXn?M%}hvb;V z!mTcK5sXi2H)=n7u&M<$c2mjY#*8MaPrPkumpl^$G-@2D@j{f5pySrM2v}R7iPGQJ zaI+$gnFMa>O(eT?;vFPEFq^2y=xU+3$&`RVZp<@9D?)?s)s<-JpSb964MlbehsTtPM| zD0Ae#?qU1Bzls>I-)%r60r2Vu6Cm|A1tyuPKJvmf>5K2KqYUU3@ge_P*E`tw$?do? zsN_a_P6XK3%0;V~M!MKwa`YYRFQzLOT~1pW2f*UB`4qAk3;g=`7+T7r#4d4j1+(;+ z-QtrAr$bt~=B4%uUG5<1$H1Ovk90Q%dm6U35b))TrHFfb7Is$a8NgqIy8flq`1=ZX zT_tc(Bm~c44Sj?1*Cxp-q9!ZP<2gFV#OJeF2YbABFA^`7$VYZ(<4=);S&`&9D;&rE z)+;A~L=w!*)siSc&0P=W&Ykx*P0yWv=L8-tQ={Es)@SUvNamOD4jpwGvvY~+WL{K@q zzbVi;w~k8p0I~V3|Jg(q(3gN0fll1^LM3(ZiNW$As%aw96h`p_p}JDLJL5$v6+!L- zCCyRgqVyI9M=>?6fB7y@^8rz{MZ;oYvTO%4`tygr?#f+Zcs!n?i{v_Dg7oCwep72dPACy zyk+0)I4DK@7A`S-yAaUmU+3YDMt%SU$W_XuSBY4m{IQ*x5M*N)Li}b!TTpJif zj*qe-i;=#VSOT7Q`#4-(gy^=03@fx9*qQrr<4tvevQap>31GwE$?tzjUT1|a=dWc{ z0~^~}zA)9!y0va+C%}|iKEHp~Rz03wVeulk-?GqNA?><2ZC&6g@c^E4t1i&i{c?wf zD^nylt2g&*3o|GP4A|bP_8V5-dBZN1k{lh(8h)^J_bwF>jJCj2lnC;jN^g)fwA<(l z-c0v3pix!{0We}n4J9ThU-u?<*ZxreJbJ`fjnhPze(H)d-g8(&uueuYJ+ZsX-s3h5mYdKb9Uy16NMbX0DgVU%TmPs0z$5jFc32O(I1Yo!9VvWF#E zL!AfvnrIC0Rb|vc@@%^NT^js%Kxfo+Hl0+HtR`{S=({Ki-@%|o8)tE{`rMOR^14le8EOtPj zM!#)isz{ooT~o`?X(6S=>H#)>PxULgde-swEM-s-ql`6(lOso__@C$a_&FE%6QJ0O!IxYhsO zwbYYN&2FjSwh1ktI0PaO^*An>jJ5(xM#gM=MQ~ixrsAS78>k6|wBro5KE@oG?_Cgu z2j<6(oj|DOin=1<2e3;N5(pL=$S zp$h7K4VU^($&T5jPp!EqFCx;MSx0VxvuIOdZZ35|>+Kb{s>F%O(EDm7%E{TcBbLOZ z&b{&jUmO=FhowPR$`Wk+YzL#`%nfvd(j*Gcb*55hv+rGqquba^TE4mxNzEH5h5r2MNl6BoKB>upL%3-J9 zc5-u4S{GQ!KhC+Wb^Ku$26>pf@|#fKI+zl~XW-s($UDbU#64U-=>@wU|BZe9U_?Xv zRo>Y#gqSV(Lzm&2a>#=WRt^x(BW4gNOMUDzsZ5Lnwb|PjHG&sMF-TYyrZ>sr*X3(? z>u~lyKOFCjn9ymk2P&K1oc=1EL*n)`YiJ*{hsrNQhh$tcA`6*6oT`G-CF`l@4Zq)N zwYB#=UZ}KuvY+67eZjOEy!0TP51BT9WD2=AGE~XDd(FbKs`p^&bl!bT)(uZyQ5}RY z^Y#Hq>|4^u)qVFDr&c|}_hM*S*+t(qTM1n_I{iVbF8#ztN8mPE6ilxTyrAblTewIV z^i-CAetRJxov!~Fl-9meYXzjq4Ik6?CI0bL_os#~e}tt3Z<>Ju73(-swQ_m9&*F97 zcZ+cZ>7LQK*o|Yf>AiTXD!XCg$D8pZk=1RK$ukK;nCpoLlchI)CSiQSE% zrNt~B$<~i@ZUgxKT0d1HUE57N^GpV?U8BXFgN>_N&Gg&{++#iGYslxk?n4&^HW?pS zhotvv=tIX=uQi3M-S;2gA<`O>a<4S%cE`obTVnIZ3ekKucVh3o!G6zMX8x?|HV#g_-hJ1hkAYCp5BNwXa|B=i|SSPZS1TgAruh^b&iS8 z)oqGH4;-m=xQ~saCJBk^8@Ms2fqVh9M_4pMWS7}bx7RW*e=U;-J-kkIJ#Po_k}ZSt z8^aIsmPSlRvc~W6RH^Q9WNMo^S$`@1=sYl(cvZf0J=?zW0m6p_^Xwr&hs6aQG3+Zk^a;UXb$VbXA3?!$OULQeCbNEujLLP0+D7 zYA^e=P>i($U^mJzxx#j~NA)iOEES^KtC3;rZhVpptQHoZIuVl}d*y!r-g(29RsOl! zhjec{GrewRMx38W3yrwR%EG>>P-_CcN$Kw*r)j?~=sv03kAd=CE8xFb^&P;@XG z&`9G5Gt)@7E3fn|t(Gxl361?Cz#oA>cy$qcTCTwnd^4i(7U+>`gGh<$Pv1YzSB@x) z-uMd^e83^-hARtzQ1Amf+yPqV#=(05K!G%MU>}%e;gLuTwiC^isrj-h&?-PP_yQOy zBa0VexWCEFM&2iO9YAcM|KV%S7>_g-sJa12hCnuXa;V>-qCN7>2EH2!&OY8Csb88T zTyq%2Bn}X0^Z^w7trFldfC7vNXw>T)Ryor>soq;|mFX`|;rAkHY@~XrX&xFchm8>U z=*q|7mtIY!sN$aTz9u^u6w+Bt#7-l*NCw4WgW;E!y~F0Ukquk@_u#t#7&vAmy9S!e zNU^~XYeLL(olo3a_6uqE?Icu7>~dA7H3FRU^2!orpZPPL6=xk3+^9-Y(ORJ?wAc^9tv6@J9+Fz)ChYukP!jxL} z!O9BFxbjjQ2Lamx?sj^4cYrgdNj{N-?l)4KW?I88OTuQE%lM!?3hmR;##Fhy87UkR zM$+W4BW12kOm9SeWd%7wu(gZGSbHvKQliv^X6{)B(VwLN?I za6IipwTuazC6xb;Q(wEQ3UGqXPA7u{vKy_GY0Q%u=?P^-d(HE%j$^;v^GXhNWWlmq z#PELIG|Ke_gnuS)fKRb}q71qbkjm@qG5Z13zKWk&oZH%WHvWN@6}6iaUN0_TazFBW z1bA4PW4{HK4G2Io3Ip4dBhz2^i-r(>)IT@cQttl3G=x>Nk8xMtetfAU-x)zJC2{LU z3m9R@XB+5V#AaS*&F#5J8J5ctJVP?d@v{A6A4lWGb^~V3%F!vJk&=MdH6312Lr+qD z0ZStCzQ62G!bxyv$C9-XqfjCPT3#mW)W_!l5m4E>$ZV%r^^X5vbJ#bFd(*(+iY^22 zJIZjr{LzK=b9uoZk428KKr)#FIo~k=QZ+@spcAm=Wr*#U$Yx^MAo4|k-~oXB4Hp}z z=rISN>ZLZIa|E`5DFs8nz(dcij-NdiwDYVjf5XnE-nb^C1C+HfE@5@E-{|S&tH6BS zP&!Zu)>D7{8lG#n05CI`+ov)^%*hRa9c(^^+{gI2Ov^F(592KBKP;+|qIQpwm(X#} z=X@J2w~RxZhn>X)gjZ@T;lcU``^J%YeR;)U{>sb6n*B>;;ng4zDHXh#H$&sk;@rtk z3WeL?4@&s-D}I2;R^qSzV?nhfVi;il)7x+jiPEeqD1QsnH z8zy?H5l>b9QTE4;QbN>Aq8F;>WGfQMLRzuTT1ryS#|_epDej$10PNSz&;4L{L3Q_x z{QQPQ3#>6%1=9rEy)xRa;^1&|4!m5RYp&;f5IT}i-n zO9VyW-`;j{f%R84AHXWGX|t*Rn@JKvr&BJjMA5Z)3wZdoGRt#Z&}hmP?I}z6f!LS4 z8!Jo$oYUoX|0nj#Wr{rWe~*mWApfxenEB^l=RoO|vAY-7;DiD(jx*z;KYyJif7$=_ z{^d=T8-NSzWC^Xv3CHV{kis!lnBZbXU+npbub}Y&o-X+}`$3S*hJYm1$bSCFeKMdX zR4%Jvzx~&S%m47x2MwVSEa6Ly2a@~l;P$C*iOVw$p`+97ZeiS_KTlChmOJEEl{+K0 zF=d)@E8;?I)eRUGHJyKwzK%&6IS2fdq7)bfxDen!8C$y$2n1+49(l3M*P2Hg!A{>< zKp~Cu)Zu*G;(XZdgk6zF>VE=9tcP6yuWS*(Dgw~|Xb7;wMx--{=F_>1ug6N2Z=7ru zj8Mw$KGfl%tWlm7ZVydScR*~Mn0{)}4BQfeZ3ti_3y5}*!%e<7m2{u{TzMGkqhUpo2&T#vuBcr0884hCwp*BTvzhET zRsnRND@WL6oKkjufy1JmYYM-P6kfutH?w~%MO!~wnw>pA!clpT+Xyy@$SYPRZsivo zE=%U*_^lfcA;-3dJYHROM%n z9jO?RlEaJ#EK*tVl?5o!g{51k%i6R5odp0i=SdvIyQF_3wL-xI7~8d$rEm+txp&M% zulRkL`l+s8Zb(bN&DpOatYXFRauw0p{TG~D($Ho_Au}n%=TyH*FUmaH{tE#((A1G( z{0Jx>R->-k)sjw-6)%dvSz!C+4bU_kEocpu_n9P*!1vx0%) zyk}s>0x@7a&QaeHk@M1~nfdkyoyn5Y`xzhiqky4QE~V~|7;r=|1%O=^Wr&+CM?!wknXy;-A@c} zuB78nu5ds?PrW#T`>#GFc8l%!$pW7b~4m0}{SKLP95Dn)@72DGRdLg+(^0 zho18H<2U;RXZ!}ZbGtX?N|M0GyH}(M31ejxo28I>GR{uO3wqm+<&H@soTF6#5T04O zz1WOjLUtX7Jsy#F$Q#M+b%CA2GZ8E5p|5-`edRQTwP zUMkHTH2Eh|XUAlq(Zn{VSInx&lE)5xucmJE)?Hf0{dTzsrGY+aGWZAT(16Q=`9x{5HP1 zO3?l;)MBWKNf>v4wu3CadtJ%s_2|8KtLjCXozWhFOme_j{qP9r&LkTq0N^GN0?fwX zthobiaEk&J4VhJQqKWg~i1IJjSv$X$XC4~V-vWd=`g=HTqkwtnZPOoE7t!6GS;`}8 z3xR@uCBPLAuM%!D$RUqDNdn78emJ#b2|X+gxNrBx`AJIp7wda51Vr7T|AKlFDDPj! z@h=Q(wz**Uc zr3-y1oXs^zZ|0`y^|+7o;^O030R>%CjE!ouR~I&jBehK7E@7;uWBzh8u59*^;Cp7l ztbh6>|G^u5t%gUoV(U(IxCzEz9e?1BCvBjQy}BXbZmiDE1I$}VN8Z0T1LU$>MS^8% z;0!nzPx__|;Bk!)_E9zY4gxC|w?>{ft5N8&S&Dlt*>ix8&M1^CNcP+UAiSy(fj9Ym zIsD(5K<+L87m_`T$#k-$uJivWj9|Lz%bibDMR8miM~9K`f{}r*%p3v3!dSv@o7uP$ z$z(8P1X|QV|Jmmz=7y1HHHo31W;LT4kO#o)Aa%J{A(W+p1&Cyuz%Y4Cc>#6R7Fha- z=W@&Q*L4F?Pt5Y1IyS;5nzgt;s_vQAP&Hp#V)rzlYUv$$o8ht4}3m^2Kc`k-0ypyG}Xh1~hv!O!4l zE1Mg&6r*r=5xmy`7Ib}2e)E5j_ulbT|L_0!K}exaGBQ#W4dW<#S0Y*2n?o-ldy}1v zvXU7Yg{(57tZ-x{am>h$kWKbHzw4=9@6Y)D^ZWCAyM3Rx+pBtcp7VG<#`U-!_v?P$ zuP3wG$o*D^Gv&Xq#Stf%=n2u!SP20D;3eio|Y3)IxoQGk!fD_iX1rc5|txjQ@~E{34F0_k4N?-n41yU4Tu${@ii!1t)D5 zT{-+;-8ppXzy#%=?cQv5j;1H!rn#A1ZxW(#u>?`_myeF%$G2)`uu1E5=F)vGN}F85 zhfsIkv^b!oo!J5PAty+D+Fs<^?7nQvdCv#fCbIRAZjmFI0^?@-N3hkxkfTVy(1 zt009j#1LT;>>l)!2u)N(T+A8VCtV=5Z+ndS%kOcQq9haLekg4GPlZEKA@e zF42_ULklZY9{2w8=*pKiLOev5K4UAsr|^54;cJSl<6FI-DN(^W6}>?$$zGJwFVJHz ze+Wz^VJo&G8gNT4wjv=PgW)?~x00nKd*>5l(_+ndv2%w=v5W{$+X!~N0oBfqZoK62kL{(%C5pG!MFU(q>A79 zifa%VRekw7ruCESz&owabVov2Jd4YYm5H525JDjS+~XOEWzp_9N<(TEw?p?rf$4Q+1Hh}(=na0``&44Xl3Ie zmfKM1-iM|u3o-G>yF9F2?swGg%vwHugLr1;Al)K~q@J9LUQUV%C4P*sF<-1^@u`F? z6q0kZ2}o^DF=cN*YsSWJaQb2Ut(_}V1ilnP#gy}om-_m?VOM0F!#>gIt)lTw@!>q) zjj~cZIf#bVo3+{fciuKyEaY2GEj(wSbAQ}&{A6qrh(SsjIDDwje*EVKiNjW5=;l`) zIx=6rXcWH+Pj$VJ3_fJkGrt4Twhyq;c-pH} z`-7=bRn!`o<0$1@@HiQG9K#dgI^3mdkTsO`J5eo*M(7QsxHjOVRiBrMV=4Tv$h~#L zPyd7WHCSB#eW>@*l%g;Wj#yzu;~T`a*2H^Sn66Pimpz-MonbW^dr@H6QAnzyy+Pm* z>)rI)n;g6Dak@U(-je}~Hp`d}UG4(4k zRU8KQ@`=pLXKzH!UwB6k_NJ^*`D0ahirGpm&){{w@AI5l`kobIC*A;}w1a?OI&X}G z5ZZCZ`XWu&y=@&7iXm%W?H@xs@;i!1F6jv(orBBb%l{C@AujjeLf*6X|JpyL{?%i( z$M)QZlCm2QP--n4xB^}`d98Q!rQ`vZP0((TaC;FVbH<#Ka^=)5(huS#Z*Bm znQ8Myi=h-0`Vl}^oGP#+LD>=s`U2JElo0?z9*Ed;7s%&Q+C|H;ei_0XGJYj@k>4B7 z4AhXJ zepsk-cs!9$S##rP@1cG+1eC1}1s!jT*z$u>+a|vD)m=^@4BkqYuPET_yv5$ZKX0w0 z{OspnH6e5Vohd{Hyc5GDJX{zU>vC)ImAV?r(U-fD^Y zrW^6^1@ZE=XleOpVLT|y2i&V8ar%)OFIFUe-_35!YB*I5IaenpI zFM6aINj_*3J@K2DpkQygCe1OhMWgk!Rgugg!_YL0C_1S=iK`wp-pm$?j-p%$T-@n` zcQxGqkjU*wx2REBq0?~b9gnW>!W7BznR~H3ks5NEQzWJJ5@#iL8u`GUlJ(zba4>@8N__VIf$7KQDP` zt@5tF6>y*KDY?)mFV%E?l(>VJf&b=HixZKZy*??lau|ab4HpsXv+jL(U6s(UvotA&)pTf;-*1E!#ez7uMflU3eWJ`8Mst^Zb7y`p z!erU2Nn>B(K5uP#c&$zHS4)b8mfF=kF1BkM+uapsV|!=3$5iz0T_<5RRnuh)v!APW zZ85KxGk@D7FXxjq=Ng43GO-pQ+lGnFs16*Jr`vll^^jG|#bd~@29566*r8@O<=xwr zqv1MaD^o8kTHw=E{bs{z<@D6X z#$#0*9R*EJ(Hxb-QSg{{$dALo?^x1rMDvCEG@*;)>HL|DA1RqXaSP@;3X zfPYez%j1@4du*FDK6AA3TJ9#M8yTO+_Jdq|oUo>$q|(b6BPnm^n+awTY}9HY|6E?X zh>RlKv)C5?#`t#H4w!3mk+EhqyeHQ*bX~lk<*FI4l2ZhD6X!$Mj$`h6IV`cZZAGQ>P&Cr^*?+5Ek_R!W4l zUTKFFeRU@A*6Gz;D$1hMXuR4lq&dCX4iXG!XGV9iZfUx)X8h%1TN0GLTPM%#UddC@ zuSPO+t?G{#oU1qMF1N`pa-(e5lb!GaV6@Q>*$3)Qi|x(u8&y9zWjEsEv_ZvmR-#{m7Yiz8$WpX#lR>rz*)^=I7 zkqfmjrTg+(7sECw`yBPrh!@Gp4S2~tOZdC>wWd(aC1qj`K8M1Tmx$aKYxQm$h{_v$SbgV~ z9g%y?{+-{hDYTrHUHwLy@ey@~YSR)gyN*&BE_W|J`nK{$Sa~l!&sTbdSproo6q(7T zNrUn`A4j*p9xUDsgRvu1s?fZM+H(JVgULgSa>vu+{N9-U_RVJ;o~e||Ka*hvr;ND_ zZ>4GLjxIkf9&nU3rpU6!MjGU%9!EjGtEf##}+-trI$d7w8 z;8m?s`8(U|*X8i5$1GC7%x{2;HQ=)sRh;@KWi>h8V^HHhkJ2G2rscvkSi~G>bqVX* z*$Zs0_u4bWRC^{&QfS3)xXQYLk9{-qu4kNe?|KcxkLJ<6FNn`s-!gab@sX_58hVw> z6S?xHkJ+)HB*nbJRx@5<2Ncb(JG8H1PYd02Q0Vlow+$%Rag`DOQao1lq%@TGt?yi< z4vENZco|mCr)7D);$JKZ7W_ zrBs>v$#?NR`7@3M1%oCvhS%hwPn-GtIYPoB^r_XGH$Qssv|2N|$ibGpEvHVY71!5R ze7#v6^^5&XyOFoa_!>ou4~y0GuPh}K|AtuehV?@BuMbRCO^)tOuSq@$vCUO0lKLt6 zzE{dMuIM+*pn%*lTv&#`2Eo+Zj?Hy8f+IzilvGdb)Dct*Pq-8r=aOC#!=QT4gqHF9 z*D3G$A9vhFtol=?d=)*fasBu2->vnWnsO`VULW5`6WpLcZGL;oPG*tHS$h1$FI}b3 z+Am*u(^iKuT<6-AHlE+jBg&YuJ*GA>VdBbiioA>Fq~CXs?)V@v@XcoqjR5#32VHy3i-efhGMnJDSdK8r;L& zs_QCi*2y8mxB_Nfs6wf1M5Wot#%M`B4*3>4qcEOdoBS!yX7J)0<6YXlF)^OK=f+k0 zrh?qhUpm|I&y8GX+Bt;cjwdE2&*|RHF2AXFCiW34?Fo_F<5{gC?ss}7Zi7olFJss~ z*cQqw-E#8Jz%3V6JH%Ws6Iye>&?ZaaaFTlf)SYCH!SNbHGQaiSrUAvpC!H+`S zGwo9jtD*}&w9%>a#+Wm_t6v7p;M+#1_+?k}yvSr97e>2kpQ|tncSIjz?bRIX9tohh zO9n~MEV*7*K`(bUyrs zy|Yx7+wyImak0qLpoRhBBsQg8qyF%f-#sng`7L}XlVUW;=>0x;Bvzl+cG*hLwbuO&2kRA|{{zsbgF6QrJMN$chhsE^?jjn^a|D z2`1I?17SAK+FGW*BaFjD?zEVF(>$DOZG~67-WpOaT)n^HuHTl?%Nu0nRP&26=t+&Z zHwzq!S1ajS{n63SW-mUXLcN9c01p2g}#F}c@pR#f@ulc?O zXvFz2XHHBFtYbCbNh;-w8MxF`{5VWpCV7o0!5`-EwUf(&M&h9J9MsC;N2dr;x36ob zNDap|NbQ-1#BGEw?w-Qq@tQQ7m+Q74jgzBBEA^z}3>^0~KYU2+JJhSK=40^rAMgIu zhWT4Pr)9a@S+k^Xu{UbahBzO(#zZ{a&H&GeO5)(o;=k{>;;zBSfYPyMcm0@%Q;=1( z<=z`h^>KJFxE{*?ay^-%uVDB^YJ*;?=Ld$?pV~p zL->pawi_~~Mv@9fzP>)cVpXW}W>VM0jS1CGR@KnEz^!?f$#s|Bi6=0;0K@~E_nNy2 z`7@VopjF1R7f{?$w^8AVgZvjMq2Z+!0w&h&XMsWgMEk?yrnS)J{%^e1G^a(Rq$0Zt zl+Ak+xA^C(EuX_qGKgRP`MY1Ca^KB%d~Dk2XRgSMuEvzvM>%Z`nvk=!<%w7Fb8(tK zNYf(IP$f;8P1J+m$OTcD8#&@XN0lla)8eU;{?Y00jN5Lg*X!c#j7VA>MWR17YLn&I@rm>E{HmS| z!6bx1@1zsU7}sjunapn!p;k=nRaY*|hfKVyeH_%IrX76aUoAk%iqENO`e-YKnzB{% z?V0eqv`K1f^peP-&o)ANDQr z%F1@b`;ts!FHVi!$yuR;)9TyTB7T1f_hV+ICs4R~5yv+T28nA7?gptkjXQ!CFQ9h; zcgay{X0me+KAaK%^hoRkU5bx`gv~U4K(v>+xiLQfe5eWLLZN6n{NjTW|0iWDmcs$W zv(xT}Mvpus2GpY~p5FGA|1+`Qguq--8P(C0E`}Et32XIDHtoFmb~!r@h0Vow!_~=3 zcj=n2sfGF!DC}+awiv1WnNDUwq4ydaw)RXwDsP3q;keV2iHyf6-ZmuGiaC#}^3hu8 ztp2)>LypS9F{k%Vcuv8&ccSL~j7emASI3(#Li0_ee9)k?jJ2o{uI=Ef`up$1~MhOqv3 zpZ9}9HaUxJWdi7OjCUKEOKrYyHRDHuIrFC{(|> z*xm#9qF_09xGrxu??915yD-d?QF;W6aL!Dpa(qz6Q(QEhlvBDL&s{tL+oDi(H&bW_ zEU%T)pAb%TNlN&9E(vXC^HS0#=Fn?MOte^a7m-{iZtYj9$-i0ebA&P*bMP(uKtRLl z0z_y1C*ZXQuPAa!rAda9T}b>U!$n-vJeyi|^>QBK`f;VbYJvWBA$>6Se8*5fSFY+4_;W9fG;0p>12CgcC3R$xtonk6a!?!QvqH z(8t7oXFF)#$T5`^g?TA{S&(f^ex#$ka@uoJmA)xsd?Z{y*HK*lJ@H*$9)K_{*KYuGi`N!cdw6?6`kAO;@v5JPwgEJ|z>Z z?A^7AWE=BHq1_=SK@CPJ2-S1wH!?Ec;2vKf*&ExAt1>p2q1jk?eu-hc)4e)Z0fKwK z%W%B2pz%!_mf_VN-Kv^S&q*{5QxwaF?R)r|yBOY+bwYZe(SGAti+OkOLeyPy#$t(c z%c42TjK3BVW5`G2sCEWZf67+0l1U}SJN!_oDXY$0ps!m%FVuMY3Wn=42Y7$-rbFS5 z=RB0=#6Ov`9Pi0kd6|b^KBwto=A)#ii8T=Z`GKpaifNDxY*8|TrOZ3Hm*G(OJ8lfP zJ0{l~Z*p6E$mqYuJ8by-IC#IErN49}tS3m42xZUiq4fqrD+o*XG_GNNpLj-ioA_^O z_L#YSYS?`5{8J_x_5)ejPVjjMyS8Zqc&_;eX}Ea6YTlC(Pq`9Xb_pI~b4rzLz+;s? z?1ELfbLBDE(3T1{47*pW2*W$YR?ROI?yj3jMG&J#Wq-dZyY<75)AEKs%}Vl4#MuNZ zPZ~)(vA4$jX3wh4UQpi!AmVsR04wTz@7_vog_mB>L_kR2{R@~S#p`r_K~!X<+=n9t z=N9kj2Eak|v%izKTAh0~*=;y{XM;rw^Qq92hvq3H@?Y>8PJ7?1uPf=eU*NyoS*@hC za#52uVfD1-OfMgcxdY}$S9sTfhvP|qNZca)Tto-~**IKV%tY9?PV96!4t~YWDe+6@ zJ<4HGWCSCi+zh^53Hwr|G{HL-$jt08QsEIQ`Kr_e^23dBaLbUsa<6UE?%{2n^&jdmhED zwxQ%dfZY;p$FqI4KG3Ry-Tq-G7_n`)gv79S^NAQBc2< zQB{fvmOsmASgytQ2r6FqMXzmfx8_##gAoazQSWR1DXLA>*~4TgOtOX{b}ac9aoY-a zYp|4J=(duua_@tuf|E+`q82_2vv7bMv&E!4cw9~89?vs>9Iiz3v?zMn01F9yN->0-_yC1&KdTXdQY}*gRirQP6w`30)bL3Ad9-*{;9b>KT z??2Ri426v`5o#W|YSCmMFSkvpe1$nnK3A}vH^{=HrmUOOXX%F=*f8@c2fT|Z@d;F# z2sEB-U#)S|pOmlA!}f9@8Fla>@`%oj4Yn}{2)5w%={IrWufFtRa=Gdr9h^ov;oDeo z<$%kRKm9%^5;uP3YpNJUoH`0>Wvt$t}fCo{?zTTX&#%pQ0E%1QW0u) z>KA?8`wzz8On|O2A@r>xyEsyE_u9t?dkxl1Li&|Aaw(kE&46Z3ET%!@8^xP~a!8{m z^OAHvrd~LQ^x4F`Ozr|HP^L~ThE)z zrqCvz6;q{FPbi>W4Q*Ks4KaBc5?M`99jxqan+|YhZYsP_tdv}S1SS29xMiF!hDD;? zDE-pqjba|+-u5;zZ=DmCo%c1Kw2_Yy=ghp*2e$;)lJ{7~0Kmvo<4;gXW1 z;3)?8Z)3^+sB@OJPWBy665#hw2~4+T;_FN*8}%!fvW$Px2gqvDZgy}=-ET!7g5yE9 zKKksj=U%Yky|n{fY@S*@=ZlT?)S&TWvS?zFZq(loyFV zk>59__2-?<#io61(`xeLStdt)he|O0u(Qo<7U|=O^EMVoA|s`?-CemW*e{;5e;Tni z;}#P!3-qF^p#ZhEO`11Hj(hKyr$UOLkfrO*RuhxsPHi_3CGq#SY}P>eUxih+7D`vtD>t6xi%xO-&Mw zlNM3vCV#4ar;}F)A*$fwX`(JsgjgAni~s|Jdz^WLPybDF>Cg;(w8{Cc6fTFS*1n%V zjmXQ0D<6IJHp|#cq2^ZS!j?i0hvfDW`7*4=3#D{h-4xnN_UqsEIarBM3tu(wE`N*c z(!XaATbW+cOkIY!Uunx%uH9q#Y6%MYyqG2XuO5cczhvs2V7jy(h-bVQzb`YFW%x~j zoqLz+sP2N!W~EFG;52rn3d1ApzPlYw4ut)^@BA}yKXsQa>ScbWu#+RfdP4DJ@{N%b zK;m=;mF8t{ld6N?Yv6yE=$EWA4_q98=_GW~_dD28q=zjZ4x@1PmS30LKih__IV-lR zd7aK^z#jDlMtL<9{eev3BVx8byLHC8iG|XMj|N$Px2ucApiq;IkMG!tWq9HpdJaAM zXaZ@6!i1pMPn4ElTp?{z$cw?0kt~Tm2q>5W05$*AHaj?TvA1ig_ETIEzSvX%|AdpB zgRX*ZAE;pl{!7x1i^El$J*<201Eqq_kjkxhxtEP(`$CQW;H%4*Qbk=$B|a2n=Q2<4 z&v)`!;?@8jJd~fn$Z?M{UU;u_gi`!g^m8Rrvc^n7aDw2T!b40#)AMrrzh?W54c?Bv zP0pT+VF}+{>6EKMat=)wx$K*dN$s0>!92Q+WM2%Yh0Lzuq76GlU!nuS*w5+2dNv=C#{)JPOd;g3R&E+lqOgt^1EnrKu1R$r;CaOVa*1o+ zht>HcI;l}H8uDc^ZBK?69rFa;)h#GI0B#x%u8!d^WBRNEdiqpANE}2`u z4s5YX{sn?Cz1sN~0qCCi3J#il6&M`jU0Z4<<7z-$%j2y3w?87#%X07$$KeR^lkkFz zgdCG=L}rr%CWI$XPUQMrs{^0opK-I_gyscjXAC7!SMrIYzU^4-=ozJa0L9za@6}22 zp3b~$>8k$8*A{$tbMcsmOj!SqYVDrP=0{9O=7Fw*HRI;mVwo?&XV-mA+jK3OwLxt0 zqgWDI1Rx6Vx#Zf1=J%x7m261xJboHb2ak<~Z`!D3o&<_&BEOVvP znFk&!>wF5{1Hu7mED9@P_#TyDy+IeWSY{c&%Do-l@6bMAGdlO_;xuFxi9%a7Vt+h4 zR=sg;cg=WfcDXLnS9oRoFeC{f+LS+XIE=Ao0kU#B_J>f|$)IE z{Am`272R!ScW3>Hsg*Z6DiiamnKx_la{HZ?&J9xUJv0HbRpIQjm%gPN3qzsE!TC?k zw$Q;E0F+qzcAexR=3Vl2_SBp8nlEE-K#wfi&c$+0&6i!8Yzk=@ODD@sztYq>>w_8l zG2DU8^{M54UO+NtlIifc0-0=nJu(ma2wDsghiFR+2|5Ec?)zll3NW~GZx zuda?6EcOgfN$>uaC0OeDd5X49UF%nRmqHvJ{iuVws|`C9I&x+2;Wf+n-n!~%FK-wp zA?xL}RCSDPEZD)-l`hLpd3-gm%YA1(iHR`BU%q)E+>9Jw%rbc)Tzg)T#ILe6rO}-0 zDL>iB0+SU>W^d6zy>O*nQ5m6n>@(Z4qr%>O-}=3uDY?e>J7CU48HWAo93KynCg>%e zlTd74FBwKJeN=BxBZ)anCe{W}bS&p>{au_eXo#=oG zrJ0(VT5cUEg>3MZP5oh|YmeTqzVF`{=6gKe&8cx58?_S3b73`BX)N&vI#Jugf&O6CB0+$D3pQ{cknV zAg%ztysxQpIbXPP^%AFLX%dS(%D$2#XRg?GyzA3}DCFoLcUInm zSgm}#^JP_YRx8lCI+Q7_kOhWJihSrwmBU0An)aq`oqVE&mqE`&G$V+dpk4K#CGwZmGV~yeOh5*1Q&YK4vnmH29G+GZooYjteN94y44ps0L*+?-mq( zqi48tf&u$DOy}@iGE4Qh4S_b3ehRM_KHiCRDZ0|zq;(m5qg^3=Fv=>svGQIaj*I$U zG27P{wf)hkG60tb{LVRYReaKf>yD5cG)|Thv-EQ9V!vJBm_YX=P&{67M3~U=wAv{q zf{f<4-zg}v!?Jl#9aHi8Hb?snNR<>?TR1l~My`*#Wi&rjNM)5g+Hay*v$;rCy%(=( zxF1?+#Zvr~ZiAP5Da-H)!CS_wdt+W3z1;LA3Ub*H9zs&iJzXF0FfxhuK zMjY6fyklj90!gUVf9d1sCm*GuP`y`@YGr?-DG+33C{pQ9JZa zI?z99hLo)0BIb+|B?>O5j71B+ zD|At+c>@5tKVO}NXN@XDW}F^D)&iSN?wl;>bOmJKX!K_@5wI z))#f5DQVKk5n z=LU0waRbMAl}(U|l5%cRx2XRP&7aQF#ihYiBP>2lK}Ssd03E}@mr(FlFL1Cp91x99 zLdGxhyzggauqb@|=jKsJxB3OD!l{tkLJn~3_Z#;V^P5??kC0(wHH4E^V2Q+{36 zttW#VbO)CWq)Bj}a%UVl*%B!!#aoI^)Bp1=WQcTnn(>?n+J7Tge+}>ict7OAy#py6 z_97hG73xN|>AaZ2Z~u;9@>3EoxjEQdylfxXUtMjO09*mCWhkUip%>8|#oa0$ zFjUecvm1!47|K5q#9jRE-S1g?diwE{6{~I8oi}gZ?Dm&czcemUzp+TcnXEjO{gR^y zURRj#*tWDXiEb&eWN6+sshUIK^}xK5#P160g0AVgK{cncn_vi8(|h+4{WUq+L=Spg z7y~iUD78=9NOgK@9rLNu<1bQH2GGj`Zk=Z)8_#*ydCH0jtftsApp&Qs9-n`IV`Ee9>>%?5YLUNN9DB{VawvdoSPq3vv33RE#D}_3s zxT5Qnp)3>^yUa_y=iD=ahe(V|GT&Z_peU-n`2{RsUwDY3FMbAUnY#g|CKh0zkR&HF^P{& zrKhUmR8$v;kJBkHD@$-;!m$=Dfj6nmpr~h!^hi8@S1deFc{P=N zj8K*S!QPEy>!Zb(gh^J{G_!y`i{G~-?Ede6eAhzi2VIXo$J_GG8h$+r zM;$h-B%2`2a<)*S_MbgXQFMoV&k5E#gYhlqAc_w1jv z25OKItj*$prp&}!r2JOi4zdi`bsiI1; zXE5!_za5$Wx&mo+q$6~h;IZrc{r6xw82|RZ{R+~1ajXA;MpaUq#dH+C7ND z-QV6z7QbA9E`l7Bd|fLdwvruSUMbsIbrnravtu4n)lc9ZA`?7bl<<)gLyt5~t>N-L zJj50rD+icAbC!6;?3p}D=+XQtzY^Q0v?{*HBm)CwBW*v@u4(6wZcg%9T_VfnZ5Z%D zeS5xQTOYATZ$SI_wSo#0Wm_tiU%cQVnw%_lpV}#VV1D5>X=jvZa@&`o_lIqH@K5Hu zQ?n#)j|ywj|2vIrY-0zjZ=eM!dl~UtVu!OXDwWZp5!ChH9q`dF9VA@O95Quce*Mrn z#Ty4J!ig@5Hg7P><*_$w2;=+1^VDgW;A&_O=E&lr+%XjHAw=r+0Ym^$CoOI|aDcJ|6vKP36pP{(mg=i=XNNauDzC@`w3+fawB->HFgS$9x zzyf}BR>GKeC0a6w>X8c@0nMNwjVTM}_+Jx{a2L!MR|=l<1w28S@&4(9Pq!m#^wDQ0 z8Tuka-v3uWoseNOfJ#R>7t=0-#b)Kg?{N7$I9p_J40!-N9CCU7ou{kFW>N$hThB>4 zpr2-9;>DC{$x{9u8}>T#^!NPC48n@EWYkQ5pNR+h83ZB|@L8C`jm$>FZD zRTuUT`ou0E8{udcz&Uy}r@F!YKMXk1 zKMNYLdaMb6Xd1KU+uAbx4s* zW7EVYCIsQw8P-2=C_0xe1M&lTussPtf;@Cla`kqyO#QcphK8|#^~ z`H<9rutCV4n@$IifX(O?UtTt!yI-ANzm2Fh-DYJUtEnDaCNQH3Gt>ihy9~@L$F?`n z%XW*|+=IyuixfXrUcGOu;^bfR=$LmFZLBGg1sH1eYrn%iRW)p*{2w#3iC!Ka9v{!I zU11yX(Xiy@(IS4zS;df7I(h*~%|D(~U58J&E)?pboGxk}>QmAretjO(?l@%IJ9-Lb z5!$Mp{Si!+>dtz@#ShG1zL&Iei$BAWJt&NC#W&q!C-p;{1uXQyFCPob00eDgBq zu;^2|ML=-gzwswL@e_mB;DlOsUa^R@EoVU;P5(c_0<0?NJ08kw5_1^FFlxJ?(p&gAYuG!$Z?TLqqGFU$1Ag6oAYc0HSFkOA+d= zz;T8ILFXP;1|6&H#|NLbzl&g0Tq~Tv&e~d9)aYX@Rw}8QH5Zrxl{j9B(4a6r;Sw4r`BEX4-KjDZWt$=t)lOcB-fTG zgCL{{Op&m=kXXlZY9r;iB(L7=myJhcQoIq2Wn^5>*vGwXpD#ofsQ?^%xQ;Tvg7GaISi*saitF8LY9FX;v|`uu#TS1-9hq#%*eZ#1DLn zyur!JO&N|yT`GqhgcdxLmCdIj;K|QNC2FT`DkvfTjQHN?1LHgefsU=>abo^q37g?hXC5Nm zO;G<6mUSUyKmN(zec4j-zjb6BI(GmnfP(?NnjRIWqP*NYV`fy~t0?0C6A0^vVAYj}kdQxN^WC zjAULYI^@79`gc-vsF7_8p)T<4AV=?0Xrgdd%k)e7MM@@$E<%L6bxN7%A$t%Wver%S zi72t^LN*(yBqTGc)$c+l3g>A_dxmmheW zuzTm#M6QY{8&^r%GfZzLSK9EwL~scrL!vu4>$gcr2Gk8~p|S=HGb}_R^r|8oH?idH zP``kqzS`dz)?39dE3-i?3k3;VCFOCBOr5heQQJ5rBf{cpz<#gPn!jc4YYZ^^zq1RB z6?5q?Os_qjcdq)i798S!Nf8O{w2@`Bh^od(mlk}&DcL|>5y0fWA1!I1wA!X4B6oYD zVkNLPqO5nutG`s*qQjkfHSmV*+M0?~%WMq_reO9cIVBa5QjXgYL`4zX+mG;^&@Kal zAKI^5W=(v9Hwhj1HMLDFo~O-Kq4JSim+7cdWO>=+;fJd}di)Eyt_~ZWvr_YXoI)Lh zZB~)fX4M(&R-(OcIQZT|As8r99FEY;|0x9H9ZGTm5T1bWjj%;-o7bsSW`BDeg5*Q8 z4Ab4%xD~J51*8Hr6%!g?PTG#d=ok2`_9 zq;OO}M`Oc8@8|c_F@t5hmdKH7SgZgWXzG^l$WK=aOcO6dZ5>Qlp0KwQqq(`c(7s2i z*YD<1TC)AoW4=<7NbiLR+bUyt!cb~c>TZ%$`An&+n(?6egO4K7)hhgJ+j8}#X9HmJ zLp(PY8lX3$Q20-Kv_83d;}uH*VP3xHpT^VXPCh=TjjKfVKHSG4)oo*Qb6kc~4W)ZO z0v5fGFMho0_yuZlY?)tg9BR^lb!ZojfCy;x*?}kZI*!%{N`hz52s`T0U+bX{eD^3_ zfl8{zz5UVLn`8f$oK5rkFiH{T%$GqmpF{tc-1~Pk=W=DuhFB2+zi+AGa%zV#ml`4V zI<1#JHa+$FgT2%YU#F{{dmu9(hngaX-&^LYgk9e8pDXyrdD%sl$luu=LB~Pe^m0l& z@DS8Vu%xE!vdyHzo8{fNOlO|rFi!)si%`(e@BV|zO{%P!;YC08ap#Q%O30saPZ z6%d-qI2Wkvc|Gb0=~L?cuFz}K{{WI4AUqiQ^j;peV9m_O<>9gUJYHkN4a()5y>BCg zx0`Qk2>Q#Mgh`b#);$%%h5Z)aEkclljQ}^L?U0(3!F|i7&FH+gBwr}Cd;W1h%Q(3a zN(T?@p^4S|1V}Ad*Gr5@pTR!cSSayV-~zrpkC?dX((zsiEvz0a^zzu;O|Ru`&E3(u z$T84N2+!mUOjpepK^?yRRm&Hf7ly9pXD*vATD*%65x!k>TCUzihIwnyCKbpNbseXw z#gtc4pS=e+Tuz{abER619t=H*t<%b@FnI2g)F^M`)7{(qU~m3{H;Y`ox?Fb$l%%0h zVIVbx<1%YcC`jm|24Slm=;MfO4mwc}Ezs4Kci~)4J#`(EI<>a#>#;3b?Y=kDKh-Am zmY!jgp3p=%MVKY*)`WqB1LVr1bEUlG=>yDS0Rfct^#|2(gn(n?Q3tGKrGSl`S{!iYt{Mh1l!|3th@;cY1h7&wrEi#r~Z{>D2F2A z#_TbwInju)4^FGF_B5axDn${_u&t|A5MDp<_Ycs&KvI)@iM~B;)lJknYO}T1ZY-;2 z(-N_x$epN$P4&iz&VKL4LxxAimHL}fZdvg|m`>Eak8f_b_TA=P)ixZHCOsmGAo zYfu^QndOz$6oN$U^4D44tKPLy3X4Cg0=JNlJr6##jg*6Ban^*WsQB>BTnJk`NoOxk zfeMR?SnxQFu2k1Z@r+qGDg_i!Z<+NLL_ARuZ#}PEyES^Ls`~ff-u^t?;nt+`l$^L1 zB^Dj=d4R=@x&L?-Zt2>QQ_^_|3b5x!u=TH+vsR8K_k_4jJN)>s6-vz&LD6R;JUAhb zbj$}_iTrK{{KERp@hrj>pZVVXJQEd=^_p+DSXjO@`!!3LS)s>Ahec_UP9=UHCch&|*cOMu*O>?`_bA#zH<>8}>7l)XQ6VD-;`OVb^UgT8!&Oa9< z30An-CACvC=H|QT79=s4biCd8S;;L?{&S8EwsYk!Fu>FlgYwYb$GwF6mTa#7xJ1Fm z8uU(oy$Z{UxYP7cRFxMQh){3i>BiR=vkz^#>XrOM6n-R$6Uar~o{7G4sHTckd+)Dm zLgYVkk=s%FO!ZDRe>T!_US8edgGaI4qs^*4LIstg_+BW@(zO=)UN@P*VFZglxW^sgJRj9c}pZIj( zAzAKSV<26cLjng+`!6y6M*BS#H1{jvY_T&})M3Jinv z0~3Nxcp}tPS~(2OW_}^An=H^^xI2x+`g*#$y2iS?9w}rq1K!4XEiJ7Sh(r@U6JuY) zIz-wDU+gYUlpL2OC6k@!`c<0TIT^-xzjp}`h1Y(=SS)SLm&N_z#jsqi_Wpy>oHxU_ zwdG&JS3d=?b5Fy}Us>OnpsMC>dUt7uGu8J)d(U~YWTIl z)hiz%6<0KGb?fpTmCqm@UL}Y@$W&&MFlf6>`qU^rkGK~gJGIh%6^p?|@0d%*_V0(P z9ron6H_GCxIiBl3FIU}oBkwloK%&h+i?JB9P)yJVTEP!S^uSD2&RbgIN)##XBfxgc z#FPE^K_nf0^P}NJI;W#$7!(4XPUZg2=74A2{ju1DjEK@R`|U(yY6gqQNQLRL^A9-u zMUgVfxtHEF;WaVCcDz+n)n03k7cWsz&IFnU_1&={_WK||{<3zcgwxV|6O4Z2!An2+ z)xc)A{Ejfq$_DyR>$FZie6QJXIytG~d9rD#VH^D368ST~bBIlB?*i&2S&W*b+blbf0VlAvj?6+)7H+Pw| zQ}+7hs44$H^4>BosxRsv9b$+9$w5FsQb`HvZULnm=@10z6ltUz1?lcaX^>PxQVHos zKxz=A>+X@?bD#g)`{KU2_k2DogK*A1Yp-74wKiAfz3x= zqeZn|eb*{tgL`h#go}%g=qLjK8h=+&NZn)EJL?i3^q0-R&bIzV7AA(3Bu;@s{Kn@ zIv}9kX=tNLf0NwCtJ>@b$&2YEWezq@nW6`s?{cbc9wY7{Ji7nfv-7;-AlJ6}7d?Ou z`DDYqy{WM5@tHfZ?I`}TV>h0YIM_VRV*-W_qf~>jnxSd>9=w>M3aTC6r+-ti+<4gM zv!Y;ul!bX{shlj1OJe>7psVOc-0OsRRz7w8?23^2slt_xwxtek;O6sUT(aur$L?DD zSpj8t$ogjo7ZUb*2Gj4--zl_m5dKiT-#zt28r3_UT;lWaU}wp!`@A}-hoLu<=Q8R~ z(w3kS4D9OWYtDiMb!N5vV^s$WC^>?HFkk-3*9%{9iCh-u0=QXFVAN}0#34ul3?fl= zDbE+R?S7hWKK#CYq*&2Var8ru`Wwg176&GEy{}f&%#DwDUgSml*ze6iu5G+3%5ch) zsw#FjQ&?H+n~Dnkou+#KE#FS33V+g&v4U=NI z=d!5=;>;-gr9k=!KEk=%+A|8n1)zH*5}p|o<5>P}J27*RQJYg04HMnLfZx@#d8|Ou z0UZumd-u;xZO+a&6fqZ611Ek#_H{9|@}{C+Ms^2vi6H)I<6Z$Ya1Vh>C$xt{+N*K( z2f-i6;AFiQzZHVD2;y5d1as2-8Q>?OxkALr!*Z@9{%*h*g1Y$mE#A-de>cD<34&W5 z{pcD1GqELo!xb~Wqhysb6WKEA?E8IdtMu}B)>ZhPwDw)-#2@P451r1|AGbvJs}7Fr zJ=Y27d;KAD;Lx_70n^CXIu_fi7nJ7H3E*op(&fi~#XKj%88C;t@~I~>V>2GBHsXkK z1D{C*}58JTs_3mfb zBkzXd^HH^jqJwd25ZLDR^)*|Lqcam-i^c*Q1kzJEZIccNWkrmKTOQemL#h)}+U2o0 z^Z`7LaW*ovqO!8z_grm&zLd{a(+O1 zY#rK&K-97G5F1%eStGA*k~d>dU519aEVA;qs})@1<(qHT`s-*ZflX#A280;{f3h8& zi?4Sg`$A7nPA+U@fMcYM1;uceUCVm7|A^16qMnMNNl#jG9~0@`p8Kh&me_B;w;PlG)~(L zDrRSRyvtvqJP|%*R&w$+$E9i>bxkmVSNXgY_gye5%%;>t-WX>StT?Xvhv2{f>*?;#cVZ^cDMbQ?+Ph)xaz0 zLh=Y!C9DdMx%5um=+sMcsLgn2D+Cfcadr6@spWS^ifT{tsLzdrKAOzg@(Mg*duO)e zub7)B5fL|zv5IMegACFjeWHtH6&FUk!C{{%(=$XZg>A$dj>cu}7F`tL8yBgs0OQ`T z&t)ps7gUzd&ssjKHoNKFY{8z~K61?O67P%v4`%I0~If@v*Ik~$a$m=UH@4br?`}tQoLC%@z$BHo!bn8838T_HZn-vp! zI!Axh=Ax@GX<9U-{Wja*=UubB;$xA7zjb3#T@dG3q?ZqI{&r%M)~4gdOovJLxIa25 z^!2hA?<~i;u3&l>qxc0i2K7w!+SDei?eQB?p{Fljc5&D5_+|7B>2%Sng)9>Pi)bZl zCgj0GwZ%d;(~jRKWVO-K zz72At*A?O>1=4#TodsIO&y$X$slN|-THI`tM|9P@CX&GgE3so`$c2=q-Y9|{1hhEV zb_^StKW=@#Rqfv_PQV}N=n=TP5nqx23c)4}vz>Y~f@P2Nk|F+cs=I08Dj%Pl+~bp7 zll@7-MO#j({5vmJ>>a+?>F=d4vK+7{^*0~viJs5zWw^F@yu~N=xk>wbWe|iuXm7uW zCuZSN1S%?p50{VrDpnZC;qmH6zo%`6g@x_pW8)N?)9qb%(9&b>Bph71{oFvWL{WW> zOx8l;1Se#d0N*I;GIQGLe<^omifh|Qu0Q;9y%|ey2#xUNra2IPB8S2~gm{uUtY)4g z_1p&wKgIfqdRTNXxB+>Xm#Apwin|Fp6T)J~AA1-_upPzfnZ(P2gF+^a$g^zR^{P z>i5x`X*;Fi-@f7ox}uf8^C7a_ZzLKOKx>5V*VarTc%R_JAJeI>tC)L_vRw#V6nBV- zs*Qe9K}Um(BD{?u1hVB+t!{d`E`6umrNd%SrAN`3ZG|5K84LU^dW_Az87Uc!B0zbNwRqSGO=*iWKTv!iX4wpkCyp_SG;VV+jDW_J1)S)Qp19BQ_?NhI(UVx`lr20$h%iyI| zU<42el-)N77G z4gC=1>!MyW@e*j)lza3hTV#Y*4bwK`jMdgm7#goPGbq!kZMS(aF%v)i+x2WF@O-_R z2?1tDc~B<92x0~qMb>$?i~6wdHf>9(6;E2GZA3`p)nM1cr5Dr?6RCevfm|iTzvQrW zrkR$h`kBP+eIP#`LDe0FVV}pNRnQ<&n8-xH?G+a+s&ugjSlTK5$FYf&rml%cpI?1t z7iu#nXkH8WkN+SVM>luG_8o2}{a68^jbGI8(%$U(U3eT&->lxuo=kb{I&jZsRwou{ zm1wdohh9s&>!U@@r-a#}`!Of#YQ$Oi+KX)~fduIfsic(6sAGwG1=C<>XZlt?_t4ec`w9=GZ7Pf z0#D-T>kUb}B-xN7sUVbq@rN;}FF=Og0+YZ@*U)Y#x@2@v@n>3`7hZWaqNS5ppWO<` zlv>H)9kH)UQsCs(?Db)McVYpHHGiM+DgPui+JNRTZ-FkaVmsallVpDsP973_1+yU^ zl*WPc7I3}+1VVS-g$lX{rtK6^UTVL8))^VdxZM5{mWjnxn=JlA^d*iEjDdAX#=gk$ zJ?JA?ro_FxN~k|L{?j@Wu*E>YzLUkLADzwG!A)|Htfvr$6D8CGk$=kxOTRTNjw;5g zOJ!BQN%J4z{-(U1`P|Y0s^7f@W;4IY@5^%AqtT{r6!mR@x>4a%l2}ar@8ScF?>SN0 zbcBiGTLVHbj@g*~5^zF%R?g%S(f{)4*!~ZO|LJ)s zBs^vLq`Q6}3j#4OTJNX6Bl7u`Db|=9hm6Iu!&3zyCwy0Vm_g8(@sH3JhLBM2szTvm z2yQ?&lJ-lPeM2cKH&;c|whF_E12xTeS9{X*p^|C>558P;NZ|~s0rMBTjvbqOZ= z7;*gWcj+U`vX5EI4^0QXtK3b<_-RVhBuYRD!wr*qZm*DC?i(r46GVte7eOh8 zflXJ8vcdNycvD;%(ODe?1VPqe9a9vm7wXjRuqAlThbM7 zo}x6v{xT;%ZI=4@NY*T(3;SuoyUM(vM;GE!lh$3|)xKCavlHJ%_H4iA)ROQed=nPu zpRp40ch^)-5qSCaVImZtt}a6m6N*%KR7I=7OI8 zmmj>9yJwnq+>V#=(2Ohwo3%?C)F{%xv_9O5e$fXECKbCuv9SiKS|}%`F=TMTlL6IApzOAiG~JYVK3)>lA~l-V>% z-Au#ZTb7w>sWS8|FAdEBnvNMl0ee{Qba1lHf;vGUVge5fTj0UR>{8Tz{M?wFtAs3N z)efR&$BS(2=u~;35M{xt&bzlf9a^$tWsGuJv*`OU>w0}`hmXZPWMWMFF1prfj>ApT zKuuw8F|onSjZk4)H9MyKhr1v0H6!N4N`T&~Et|Q0n!AjJ%2i^VKgB~q`nK4uu%z|Z zsWW`0EbpH9DAAFCdoSGU_|!B*JX7(I`U8h?ebDf0yKqu-)HRstb3 zXV!Fl!X06jOal5w1tf#fW5$iKu^Z{FV=fxivfWfh#YS@TAicEZpz|7Bj_LMVXFwo_ zn+VY>>q&Jk+N&yjn?zVypRrEcX<`7v{I-gTvT~HkCusXrn>zHtqR&xroY@vG?x&iQZ-hPJjv;HmgQLXTg=r3cEhnY5y?dd!fgt&;6o6U134r zYsJ~?k35-~wUb_HZ0WSE5(ees4Pj~BD`Wp+^%4Qzz>a<4@AabK2(1oIuoDh57ebXL zl#T~iEx)$7sAvDS&P)mE7gv!?7We~s{5Rtn0+D77H%wDTqkMLbtKa5r*@Qh(KZxlns!yg@w%0UVIT=5X&qwSU{sKAoR)kC z5LsYm_FHE<^V>;@9^BpYsp1Ii29?^!?S72psJ65-vM`-dp~|+QF}E z6A%|qYCk<}j{`;CoqC(D4|kK4=rIjgvF_t7XtE|gJ$QZe(Y~OS7w~7{Rr|ILe}KO| zTX3<-(B+&r9|bQo@u}=j9qMf{*RE(u~Ox;z{}SoaF+$Lz&ComWBWDsQq*W=gjze z$jF_7PjW&|YpirxdJ-0BbA!O!yK(mXsmb%`^I4Op2J2PxtBb|_X`3Cq7#adXJ!2o+ z7Y*U+B@C|ywwoOXQ-s99pp?&SV8Xo@qPYFZLY~PMa#8N>Gbc7qHLGZpr0zmd zd;Xvm?xDL9`zHe8KzX^jYXrHu4-3LRt76YAjcjmpb8~>T%8#Q${BueO(c|4OfcR*f z{p0N?O7aEj@e?oX;!nX!1h||{;%08*&0Z=>wpN>IM`x9;2>AXrn6&FsRpsE3hU7k? z8=>0f1b5QLlS1u05qnjCZxA+;JcFrV*w{0DSB#lSbwe~QW89awV$+zq*Qv0_A~}Yu zlF;-)OXVht+~DA#ZCKwxGHHb!2@PObvec*F!}wZ+vbzmeE-ozuRw^ zJ2NB~NQ5&X`>G2y@o9JM&4|nz;Oj)=LVM!QLtXX}%LXTNb(X}n)+5HkcvuUI3>w6L z44B9`StM71@Xw3qlCkDBRw!q~F3(hKoYP^M-J@?-TYIaxLV&SaR1L&C+=JgoKRMlRx z*~{3j@hgXc6C3*2CROnUcH8g3fwk4XoDyFm<5j1zLv@Md9|Fz{%JGEAx3c@*h#)GPe~dQVyzSaQ*;ss5H1^FNO|JS= zE#0%ofkONm8xfGg5IAm~X$zwaqQ=)=ZEt@4z@f70(`*Of;KR~nLRa!!Lt^jI1-8Mrf%dLnXM9?g8_+Q%xI;NBC4 z@0l?1HwlU^-gKFvPE|GzNI*R9ypNy|lmt#zHhW0co#BV1gl8*}9BUi1?Q#E98wG!U zcAPa9HhKoSt`E_Xq6b>BL1yB~&rJCvHQuy}d+H^c?~W#V6=+ekql>a(8#60sGyTtV zdNkM`v8OdhhQY8MYseu>Jh!PV*)Ennw*9t8R^xS=qh3XMYC5@P5ElcTz>=s=#~|xD zC*2QJ&?u^XO_~>*hlw!h-!{yyq;HRI^$;hBkat1I|8bA*68NdU*O#OR+s0QylX+O{ z8|$kx5m_Z^PhCw%RWEK*>~Ao;PZA3f4PxoAj@xO+zu%3Ioxb^^w)?_k;&76YY*&qa zqanbwN#tU0ig${U0oaQSmd!TGu{ng34Kcir@hDTMl47hGq~7@0vZYWt%p=BJF(|5d zBJmHaC7g^KiX~<6LdYeTyz0=T$s+d59Ho2_>kv-zbtQacu;U+wN033_>qS&k+s9dItGy#~B2)xtc zF5CMN0dHgU1T&3URPUV_*=PhqN6Vj+)(OqQiLe%02+ygAkGzZY;@elf>S|Zx8R-hq z%n0Pe?+Bh5*?l>&;kc8*f`^Uwg#;byAK291?m#iKwfS8P&J8MtO(VLr#5C2}P-4W@&1Slc|<+Yr2ijWv&c zpb`wNos8};4Dqo@Iu*_Ppz56ooBb40%Bx&t^mJeO~Wm$WIitq8a@%joN3*B2% zsfoK6({|n-Q3)QqTMWTIM^PseUozIDq-^Ji_ZF^ z#=jw^%|A<57W?nN*axKC)D{ekzx_&k22x}ivu!}rv2n0JaN`qD<44Difj|frS43Y~ zfjnxk8Z^6-LPh55_)o*}RpA5aKYY7M_GRDs5gXJ%xt*AwtIu}6ezz|MVkU0$^Yd^J z(cPovoT4&e#X`#$=!uyi!>Pnxf(ro?=IZC?XDQzg#Z#&<1)j})mfDCZUE3Sr+I}0! zx{{iE$;duep7#~xSRC-dZTa!J*Y=R<_V!Z6X%lX6&kR@Y~X&xW}owdua6)>+uaP26-U2*{%pq87B+R9=19Qf{bhPPo%% zB|>rPlH1i!My&5Q^41)r<}XUeBpu6xTb46QevlN=IhSAGSz-(PuU_%h6^WfpF8&_; z+K>bT)9SJn!2Q_&IUs-ev-%f|5#Nw6``6fyp{%S7S$9fY^gC2qTP$a*2n&-hc+6Py z>!6x7j+sD3_vH0Ul<=nr1Z&nPd?M$_K|x&5)p11Fi~|||m*s^lbq2cswF&Ms0;l;nB46d|xMpfZ zf$V`vm(4P>hV10VmmNRyioP_A3vo;-Lqy2lM?1*@zxFIGfxGVUuOLFS>2HoV4O+CxS6SRFoCNkaO(oaD(PzEAIftky z|J;4Ze;GY8VXXvVc)T<)XQAt8J@tjuqa`e;_ql5x8y5nnNfX?{7?N|Gl^ck*{Jo#rxh8T09H5b%l$XtdXnQ8Ck*CwOSHO2_#+JoC4J9(TrG# zll&{{9hc*FSEtS(LPC>QaFDIUHB*xAx=#9b!Dz)yEgWH7p2mP3CwLyAeKOR_Wp50t}Y7$?@f@2TB zwcITW8)W+;-?UyoUN+meu(H~ihVx?!s#Q(CXP?9OaCbj0sufIr2j4(9;-E}iN$=Q( z#?dYyz670Id@q^Y)~7poygc^0-GO!(n=@ry6!HW_qyP8y|7Z65e@)c>r&wSQf`O2* zpc`D9$i9-W+O&!uYmBroX5gcIIA479r6?VvR~qD+Rh-7+Xn{tH>?;gIznKsuB($!- z$2c}Gr1%ao5qM=U#F$=INWA@WkiSA;##@kdG+5^=B^ZqL&r1r4qrKQkG3 z>1w&EJieO#%J;9kmF+f%(;-t#k~5ow`du2DfmttOF>+#K1x%iHf;z!qi7JYK!2J*w z)O+Z%Yj%W!;8hivcq9ULw23-yAKP};6vvh9#G9ksq5Qgl> z0Ul4vb1F&$%7x%VyQ`NyBQuCde)Twn!JXY zjzdfM90?}ovgHEP5c-C8^$7_9i4vq5-n9+I+8=tsl12 zUYXPC7ENxqvJx2%9JTZU4`pyf^xc*>FF5`A0iAw1O$M75?%cC6rIqttq}vWC5dCq# zYz$DX!D~{7$2Kbxcy=#*?Tzja>8!$n5U>dlXBRPvh?Wxg#kry}V-d`Q-#VJzZd8fJ z8fF&1PeF=^=mGDe5b*8}?>m#(0t(6Ze1n3t0@UcsakYUif%1NFGy;tISc%XLbSW87 zTg-X&b~otz%hNhj4j!?nldacst^8>{#dJ|AFqlu~k$Vs!J9(@rY4}tF4G?B$Jy5Rkpq!AI?&is@Z~_+Bpuzs`WqvV znOv-+nVixfjfA68%ZtWxtlDF>H?_T+)JZbOk4Q#`ZbS~?AboQXV3ja^&S5|Huxpzd ziGUH2JM0072@+C7nnbEGooQ5cQu*3|-MIf?s4( z4depqUeZ8*2}Gio*DX~=cZtXn4n*qM={P2wV&LP&0V7JZ0ZVh~zXX?ujuO2OulSuk zkAx~CJ3)CiMspblLLi;NYOI{?Uz=kT>RGi%b95a^@*7%kY`m}_r#JuDmk8-lSH5qr zWn)kU8foNR4^}a|`G|~#qtmy{iWB^QKBJpgvd8~m0U%){soZ2urU0q523Cc9B;Y5O zv^$h>xvEri{%a4%WNJf`{n8Vr0lh)Yp0*r2L0$GbBI0p3-Xt+_TrFa|I zRzM(H8qPiK!@Ny*M83~;tM}yfPo85MU0!F-s|wZ+jZ3Nxpd%`sbMFsP9@a26Hf=7| z(Rl|@D=XdZ@t)ak!`*voWl5e0E!&=WS8}sj&8I-9fjNoH1>BhPUZQ4VO6kH)Q-@-H z15vCNqJ8TUcgT~B8E>sO1A+gnPn90JvY2fGEyV@EFqn7!)QogQ1o(#@(CV18TMw4D-0Cu0$^C5 zH78v?=R!-0PxhK8qw8}vsh_#s75L%3*-Oao|xrBLHH9xa~@CsIqv_RxIe zE4!CBJ|wd%NV}Y#Mgi1Ko7Yi=HEwtFJq_~-B?-uDCO0FV3La6Cci&J`c6A*Ne1S32Cu@AwM~yy$(SL<{Q- z2r`lWzWy^6=UM>V9kG=00#iZW?ary1p3gf{vN0i|1ppeqkPQS;pI9|aFZWF+t*-pppI4|Dx2=Yn?YqL|GZe}botR@8Py%`+0iE~ znj}qxl;y)5TY}3#Qy}l4uSapI>Zp903?NB;ogJ@k!ozm(YScA+_FN{IC1#s!J3fE{ z_5JF5@H0{)sCW8U;_t8=-z+85ZQ=>L{EBN=SRP3g$8rq4(!j6rYrrL#3Q*CP7GL#Rp2L+L=3vZotDQsbwu6fX#x=NdV zy`-dIAwz=j~GH%gx-m~ZkgCl{H^q&MN1Ul92UKLnj=pHJCz}a07S^)*sGewhH}GwmieDB`7T@mi3<4Hf~Zg*5!(PGFv^^a?4pC zEjrp-N&G!`qJEVrKwCK$l>>g3YdMsv?tl=I{#Z3^8t}ilz8>%*lr_pvc}N0OZI>Yn zAMfn8Pyv{Y9}69I8OaoJ66b;c>bE;ih@oCj${s3};C8Y93cXB_ClR^8yarSAn-FQN z%{Y7WdRiFSeh<#stmsxkQmSkj-+%?HS8YVvLe@=D2ZNO?=EAKPRklZ&yFQZ`cpWKS z9GF{$Jdj2Proi!Daw052#UEObyK0lqJH2p#I@GVuy-{={O15nAGv`NX)`&9P%Nz-^ zM}0h89VS$%)a$<70=o^}DM_C{nqjmosN=P_cTkia7A3{`x`oQGPEaezKp@~(VJKR8 zHSahcU@v=i_R4E!c>62mO`2>wZ9>RzjC6p3VF5NbCD1w8m=_%)Eu>j?eZu&sTT7Co zGW$!qQ)F>G+VbS6bVPG^eq~aFCgGVU#>EdfyzfQYC7s5+@xjBr=jEaS-=EI8sBUf} zNB@B9*cB>~wyxY8Zk@KLse?j77JfMhsP&T+U6v#WG%*|L5G*+RKtGr%Dy#iiFRaNL=e0TmJel8U+O(JG&?$aJO#%mZL~IPDDrAiN7D zx5v5v@U^(Bz^>G%t*~M*>V-_z4dS)1W(;nqtc8?md?Nro#*?Q0N2o&fYC32v;!(OV z)y70Ot0C9R*^j7B44gz`zOzq(gZ=T#*S`%9b?C%zx_<@ldGtIFSqyedi@l?Ti)dMP zbMZ3wDN3nE%FU2o%OVO5y%Q9SrTJj8=I>SyU}SmKp-wv;2@Pn|@~~ayTw*2l)9h-= ze}f~yL?XJ~uN+lI!dejA&uz`)qB}BNeQ+`^5k_q<7oUPx{Chhvv%2S?4-E|NN{7Xv zfSRCn5?Mn>vs5r{h|6QW4H)s%E2uAEl3a(=8OtvN zk~_jeOxc?zatNdQJ}!fDzC_g|ceG)3iHTw;Vf3UI1?}_1#(xX!W$LQ@Iqm#-r_5TKpfb1Hxv(xT@%@0m#+S<5p`w8bU!S*M^ zzwJ>`#$aY{HDhnKLVWU%K3j@}x&T$+D(jN=B;oHBTcrmX_E%SuYU@Qb32oO5PhB@7 zM;1j7_?xhJwAaOrAJL+iout*deBbp-s`r(|oxpDQ&3>!J9}q2l92pYr9xZ**z8eTu zDcb0Kx^*6X2tXzw`--$2*K!icOd;z{x;pJD!IrQvUXzKrt-IuQ@_91+;nE)R2mg>uD zQIKkS4uuA{l6B8L_@2?N)$0)T=`J^_l2bMDUl#mWN|4WrQR6Jozn`w_#`dzhR0KNY zqP|HGZl8F!>WH+@Hm`-XL|H zgm1|Nl@IjJmo0#e(~h3e#2t10U@~z^Bb@|Ii$SB(PAoDZ8&g{&P*`hx$VBkMi60x1 z462*EC0!~6MDjJyi3JOVWP%p)sXiJ1QEiU%!ZODs_~7OdUfyyaH#!^nh^4zdc=;Ag zsNmjP7kW$>Y1T2pN_FQmLd{ygLKdMLU}x%s<>}@vr%v9~yMd*%6(6qOr$zB9Ry|kS z%o)1{0e0T+u}5{Kb$Uv;;4t+8<7SN{EHiWNN12-D9a`Ap!a#ST+E+gDik7O z>9Bl9{?LRrifY$1uA1sJbckXiK(T2;Nwm{v-^QaQCjeSAYAoP2KH9d268{HO!Z|Do z&9?4#kag1KmkKcN9NeGNomP_|h11BGSqkvMNSQ{1DfXJ@ZTSiz&GW(_9@2BbfWY6q zp+0}CsVW9=4bKLicngMt`@`LAQvw@%H5Mq>7QYH15q~_A0bPbT$O$xH4$G*{+<5KK z6L|#4dMhO-^hJMI5S6VGNZlinHRR3AM{KN65{{@Ua;$tL+-3ODG-UY@?VgFNhp)0J|&cL;_De4<28fSue!PR0X8 z^=HPa{v`2#T!SA!(-yXl2&_$t&BIoono-L}Y?x_zib%)hhC$7RV7Ll32s=?mNXJ`H zhE|bjgU7c9smz0pf(e)z924*mA&Y>85H+VegbeLgFVc^*eHR2why|ysD{hr3V(Ni&=D;^mM)48^NzrvypRcvDB#0-My=h3&JRI|C~312-v&I zGF2glQjIpwF)kIz0^`_JfP2k^$qq=w)aW@NU%|g5ygsccPlD|W%Y_n-CG9Z@R8LUk z9o^lxW7$)J`7QQ3{lD5~S!KQP$OX9=48IQp95u655~_cqCeFE%mOxAzYi3V8Vgew3 z#(NvX$>{6azZlYy3Hm#6yB)Hg9|^=!xZds#p&r*87q9n2AK+m)P;OG+p3u>&%FSli zsgO$gZ*0GtJNm+D$phVGal-+;;M8c>D4O&&EuFAAOzS>Z2+bt8Ww-*Fu!KJr*}s+; zFkR2q5Jgmt9X`0p#`SpHuSfv2F-hD5An%8#^mNpbeJ+H8@c7XB9C3B1^cof*GPE$R zgS_{ibGc*h!#q&*xV-?Dfo{S2`+bAD;zjKYjF8>7Pn9#XW20h>JoK2unoQ_qMS!h= zj6YO&huf-{YV<^aW2gSROz8fD&Vy1kb!p&Db#MYzGi_kdC3u< z(4lc8tj3|YihD0??lW^@BZK^7fc9?1Chm3Oe7x-t=4=c0!wJAZLg@~cw4_^9X_YP}Wfl2t@eH5Kbuo5sVk#sz9`YE5jI?b<7SH!ah zr{vT&01AJQjKdF_qZrCG*FP-;ISJt0bO-y7OOs3SST$p!K(79S_EUdq0p}t{mDJBh zQFibEE|k@3-2Tr%3vV)U=0BCyXQ;~&m^2IzZLzZP~Uw!Z)DQff3%mQ!Yx_DSuB14|< z0RSaR!Z2^A@X{3~L2kMRDeU8MUkXw!Jp8n9V{P;n=8@&!B8jAnzV0Itbi-#(&+Nlw zeKDj@(3w)d%m3tOei(fBuXJ_swmidJM_X(W5ZO1%5dZL3_4j(2tyIR;Xwbld6H)?E zfjJ~ZotB;I5&QZDX$u?DgJ(}an(-J}TRsFj=H9 zI;pNG3#I2b#uXRGZReN$ESR6hG5GRc&C`OeSB#LTlzWn@DyKY8@Gagd<0an(LBsro zmH?xYs>!>JvivH;KJ$dKd*DGGkK$JzLd{y8J+&Jm<_byje8h3OO}M2yYaP3n66ksb zHn2cPl~Vi&p-)-u6v8rt=w4l&2gMr)jh1vcUlSX8Lu*fyRYU93FlDCRXpDq|T zLh?;WSCdk3c3aw`5EFFV?j_5z_zem;y@t3>{vlQ_fVbK6NXI*v9ou=8XwpeosAj~9 za9G(qU;_}eaWH^|^)ZS_6k?+Oj@{}k0EiF}qADPoYIc}Z`~HVbr8QAos>uE_J>U0l zBN(qQCxBy%zVx-+&_ilW#1xH8uGMm>^OJY^CZ|5tSLqPQ7~Dg-?suzzzi;6WI$2-8 z=syDoZW5#x*sN=>;J1E{XuMV){Ox(-(+7sMeWes2clEu2WkL1SHI-JxbmC(+VUwgv z7(;)()-Tuh^1b_@hZeNpgm2HtG@x`yflMc9Jk^&_&#d6;{MAmlfG5FXS;22j)jspY zY(Q+t()}KM22NK!rZrjPz&7*F>NgA4srxcHk)75AbPAS#N}Zb(7`}fts!yv3*-MH%|TYNms9m7UoFTG9Cf)#>keHb+5XyeeXH| z#xO82vS=|C%*9dGee;&ixu)*Cm<1%1YNk`ux!?+RB6zMb0GxmNjDQXVR-fie)2m-= zoqa^2^ooad3dVk0G{_2qdPV*@6A^>UFC^uNbY3!B5EgC%)i+q%mV}ttJN)!0nWYEM zgv@8M^d1E<)`thd_%N^X(SJ^bqlU3$832X^Z<)F4zo!S~f?;@|=4lQhWZTS5^63M1 z5?PrKq)D#$v#Pkt^v;76nfQ#>nK-Q4iDiG#d{|up$S|Eap#ZvzfRyQ@m_z}kHRtt6 zXCV}27&r${6y*ZC+S_|%7^q0~T;WVRANv!*Oee_FLp#Dav$(#|l}`YC@9^}=^Jt0) zQ)SRL)V!}Kyu4n=>-vZa;IpEb3F8wBzzp}8t_8Bu%?mvMR|0qu3()6kiZx#MX!ZmW zXJt3rD~=KqK_T3M5TP4)BJ{9L7Sa@v0hnnSXeCZ~i$8dv|J(6kEzwId=n8c+_Dfg} zKr-c3-jHewP#mV7ywzM5b}nxbc|%v2 z=C6c82RQ7+S$=Rpr@3u9ABOMu~Mri`ej-q2{P(8gs-&;UI&VZwo3us&f z7%rB?{-rOBF}10&-u7ci1Yxa2(^~q1|pRUVc=5o#0=yashfb>PME`Wyu zz;jn>=c{$swGCs?)!?>N{bAXovmY6~ZDJ>Np%$or8&~R>L54FRe_m)wQcMy|2DutX zv{;X0wI9mo?^&fRBvr+lU-|=GWfr5s+M{{vni4P*=&+DzIDHjDqG8P%H7eGXNT&hR zQ5hce4Dj2ds_pfWmb+x(s^Wf&rJ2ba143LVM5ND0UOiDMzlEZvd;+M{O(_dTcsazx z?3gE3Pc00Ez;x%~QiqpqRQMZX`(`6QM^ze2kr3V!?49;pBzq7%GYpc8SOIUitrMe7 ztrgAMXP6#d%>GFQliJT=H~a{?4Di}CQWq`D#aEqN50EhSM+h+>?uxxPB{|Co}(zo@g{#6&~qH=5!xU{XNq10fjlNxkt+cqDko2z=7Rb;H5O+v9aXpj!v|;+TV+NGvh~H!>mGJArNU`;ae)pDWk2^ ze7AH_IHe5t#VEW*3a{Sebh{1+ByrH^ABe(y*txC8UYf8g9VUqA;uj%jIjP+ zdaS}vzXlm^vAf1Y{3Ssd1>l)C+H_iU{KP<2D*&_|ai@h|ou|OUpDMIm=4t%2z=4YA~K) z0s!B&HiD9!?lH0`VnUSF%6yG+;m<$=7Kl5562UnV%=5i^B!%k#UUB1&5dSZYRC|;Z zH#pYxWVAXw{;lZ~0^pQ%aNf(StPHKVh4pj@JF&_w>U)6#_u;hN92o$03HksfV;&Y?;AVm|Q0D(F8c3^XyAmAaJhLbJ4cQL%5xh}5W6VtioGu%VG!h@k;P}#GG0t+R7P;FPfN+fe6x$} zY<=PkpXvqcP{bt_$#uVCvGB@L-4*~cE5hnaCJU2TaQQdC|6JO){Rz$@02Mi7V5CYA z6PaZc9tZ#`_H?)k2L`e4*TnV?pQ8*Qrk|#Pm)7JU8Q0brWJ`+hBzfO-e+1>o$WJ<>tikMfGCP~hul)Fz-d!9nsmW3fjCvnK=&bVi zaR&P8nx?H5s?%jq#~+ zFPy1As=I#&766J4YX?Iw_O$Vk;(%nBOPm>nEJeQoP~@OQoEueCq)j$}y#O>sR-rxS zp+KqV85umTBO%q%##Tf&foNnvV=x>pp_1=FUi9F6OTBr%Wrt7G9KbR_?SHikw^kx1 zM+hffEb|Kf%e zC0ouD4-Zc9=v=D2e^W7i1llp0jpj#P6-6|vQ~=y|XJ#8Y6dhb2k#f^4DIl<_1lSuq z2;jH5_;(A-VoN`P7x?w*kMl-?`)D4>wO{0PnHaa6fj*=ZfZ<1uacLA8@mBT(trG>o z1^M=v@W5UP63r}%=feHKfdHb9e0S2ET;M4HOT2=LYBymQbX4(cHGh7QBWI$dl+ z-e#MzebYeHWTvBY4XdxRx4t|iT#9=k#W;v<^oXck#vX4USQ$~DS*BNO(biyEvAHdE6STZ9zQ()Xr1ZwJ$UW+Y7ug~ zK#y~{y$aY6fC&Mr_ycIP$bu6IsP$sd2GEvJw2!R7fMJ;^`M=t``e-P(H9m9-lQWDt zqI@KCMW|$4$=B4FA~7k3?#UIA>GX+-l8+{ZL5)L+iHH;O`WPyo$yY;S4sMcegpiMh zB*V~P&VFb3=dN}Cxc8jaS>1K_TC>)EXWn=3=iU4FJbTaHzx_O^7%;g4v)@L+{+`6* zgyMVDIx1*$kX)^-MYym!ke?Mxj+wku8_=GCGy)R_P;(|Ar~q0NFdn#DuiC}Ta%Ni4 zO=G%)%La+;A&&iz_utOnGF|g1p8@~`DOYanJj=?YcE*9H>baU zJ3f<;xGdusJ|h>xD*1(R-DWsSV-TzPprZRmDWIBnjW%{AbEVF&1N8e$@K**bd%V%8 zi!wDPsq?O@Ww*#Yu5!tZY20<2O2}@H<0f;9QZQjG3-*rmd~YvYC_I&=-4HLsDF1le z`6r7Xl-rQVFklv50KUf1%pR}?S9D=s zQH!}3ovRIoC+GTFn*$~eAx;UDxg7(^OMq&FP$%#eK(P_f|yZ< z6@b6I%=@1ATFP7F2}A{CLcV^!O=%Fk$N*$kXqBnu5@u5znEA5pnSDRQjyFK~H1Vl- z1H)q5OPzpjcVbIs?E_qNYE5k=aN1miyUvn*-usaeuUYW={ z8%9E7UOG!iRP3{J&>?z_>7$*&0T_aRe`(H$0XS^{=)__pA-9H;?W|{03K()ZI!4}r zrnn{!<19tddlVgPc23`}JqH={a?`4zXFHYRPft^*P07A_jO(6t*!Eu}*ZM0eOS@r@ zULkr0WX)K>{sIsPX)*c4{ZyFZJxHT$t?rur85$SXAf?(YG_0M_#yY5i$mlCw7+rbw zTw2Yx@S0#p!LS1n1(1S9%GzlGuz4-7wcqfkIlrRtHGS1a;}Z zW`A)C)j^)VN?5_gu96~=-)+^APGelk-F5~t3Cdd>gCyT&eR;U;=x`LqU%$|_vWAc@ zao}?Nc$+WUtbyHUx3EJo&aon9(8T~E-oY44Ez6&@G>hA~r83&&0tj+4fVA4B(|1iV zeMVYQy7xeM-E>m_C%{!5RrO#wC`DVINZIRJHbPWW`7RQvtE#*DYKu&A1Y^ZSnCDed zH@X{%IW1lU#q&Dz1j24Nh7?aG+-d~XCl4R92h{k$%)ISzW;xl(i92WM`u8xie+Q}i z&w11kct}f|2J65GL^w?GZuoR;(HcEBp-6g^pd*%(i#LIX1*labEl6IfIvS6`$MSJ=9(&aV zkZAC|nVA{x8oWxeoW35r{$DzRBWC67ylfscq6EK)3vf(^Pvh}-ZBUDDtftS+3O8#8 zId-cLK5H(MYnjiU`S@{Tf&S{6kshqJgPNpTt#l;K>aLYeq&tE3oEAS6@)9LMxJy9f zVD1uLPx&2w;5}QORDr+1(%P`k=PLh-KeuewM0*NPo}QR%@Yu+*z|?VfAKq$-Va#4E z!q@R>v0i7ZTiIj$IXrW&75dVtcXEt>$v#!aoKnI^z*=DNBH;0pkO}_l@c;^S-Xhri z@!OA+30kp>r+JS+n7sLmr{yj${oh5WFSoyurl{w@C2fyb^FX$ZF52aR?^%Zy@p(XQCzc>GCLnv9 zD4J-)zh9Sz{Zhf{-xO-O8HBY!7*QYd5P7fo^w-s&rX$xfK}xGXoQu z(@&qJF#}6@U>l`I;5D?@+f7(FT>YHW%<0y*P_ghLZ?+JPUid~1m;Sn{aPTB~+*=h` zQImL1_a62!qPgNqRn7i=q{`ui)Rj_Cx_{o)h-=RWwD^^t>c;5XkUKQ0}zC- zq?ckV?u|o<&2ii+dvky-pJ30KNfkjz))4IO4?#<4i-NhbgOTvht^J>5=qZkNY7jaUm;f@FwyK(jasKA=nu&~IFdX~OJ1m~Ix*K3EUbb9VuZ%{qsqrHA~RGmcbtkT(@f|fR~Wl zg4-Ious9UCA1^Bi=|mZbD+71eNE%L^qpw7fi;rAX{~{I;e7zGZJS5rqHx6x4bF{k2Pn$0&%UP8)!MYEslmIYSbIg~{pm8uBNa z?_t{$T~U{MC`0dGOaV|$++cQbDr4ya50)^Ad7c?q#XH6OnOw}1g*#g|!tuK^;FJ0= z7h>TU0Ntw3$QNR>CoeRw69W6xWhuXN`}JCPEc=JYcP`F{G-XJF&OkOyD-T zhz?QZHSeqb8xIFU57DN9Emuqt+1o+|X5c#Xd2Hf(=tF6gH5efnIyh^gPYLAr=BE)( zlxX6_En*(u3}Va_a?B53Ts504Rx;Ez1h(@n zX-S3olb*sO2z@R@=R>#7n zI(nxP*`;)o^(0J?a)DNvf{cNbNVDO_HR}HWIzW{Eli}yv!RNOl>Hjt9e_NB{gpw^+ VT?j%#*c7a9dv}wF1qAo7-vFpbPeK3y literal 0 HcmV?d00001 diff --git a/docs/src/figures/mat.png b/docs/src/figures/mat.png new file mode 100644 index 0000000000000000000000000000000000000000..d4f5c6f97763e00ac51e3febf2db6a9e5112863b GIT binary patch literal 92468 zcmYgY2RxPS`+q1|uTVy>O=*}7JM)m4j2t@04%vHeg`AvlD7=wTna4i%PLlPa%#N8+ zHXVEW-zW9{{!bqtdOYVI*L{ueb$zeLGwpjSG*m295CqYvsVeD05IHvlopGW#1MW1gsof#e&`o9P-Il2rrBkzO<(G_ias{u(~$_ zoo6pj`6?|9$S|EeY;%r#hQ<=%bt&ZZuNxt5ur~V++>!)O>Y&Wo-_KyT7r}s*h%FJG z=mie)le@L?R$xwF{cVY1b>5k$r!gnD596#5CfgT=)hVhU_dkI{l)r9&zJ*Bm{-JI0 zJghL*3a_9fo$Jqb^89BeSQ{fwU<8e__KZ38>t2Xo?$bIB?io8Ro`;Pq3MbEL%PGwK z2*YTAwOVqjp4_XF<+gk#(7CmI>SPxASi80^3hquBG^)Oa{AAzq>l8dg=V44>^_O_g z{kjxlTU$6=m1jsm`6O-%y*zH=b_I5c_+is%rxnvU*x>xB}^~2V4vh3Clw= zPcP30{_jJAK008Oj3Mp}zTexc;I-1)kO|ZKGY^wYkw2vQWrR+b@zRL^MzUDe4C`tB z8@TusmV2KChJA8!m}Uk*utolVc`jQ0Zyt6(FweqZ?f*upO|Zh7j9KK|Uxlf{e;=dX zx4s&3?QD~9%%4q|ZTm68ls|^n{62qqIZH$W*hJ07<*>Vf-r>JLSZq;G$$cNfFVe`L zh%rasG>?0q0X9s1A{~hDGn!79U;ktWIroHNswBU+<2i4kG1w4>QSqcb`7N~fcAeWY z!D^r0ReM|F7)&0; z_HkP(u?@(ai~y<1=K4eMc|VF=M*fpqdikZ?mVDM@MSl+Z`cPs*e#-wO`&qu;rkyYB zM6OT;_62cSv}F&;etkau8BU;ck)ye=-gF!Cnkil(aS3d0%GufWx{;zOWXE!rMvU$G z*{srb-?8bF2|KKO$LrHYJ8@bU9~QlBPWkQ|)cDq-eB9-_6bcopms^u|lsXvxiFw|F zLn=k(Bi=)-I^V2K&q3nCa<*5;dT=v2bz{O|t!xacGyFK(qv5rb%CCW|a*Yl6fB2>@ zugwk>xN}?Dwe018P^97>x^PyRRyU+mUs2d?-y&XI6J_g!`|p zh1lo5wX#bLZ4-Km>Cyd1hmqT|$>n;Ifj9>f->}zw0JlSLR_SD~B<{jJ1J`Jye?u4N z9o&#j1PBT_tGb>Tin^fmF^xZ9mzHO$SDFDzPY%OW_T|#u?8MchcFLxjskmQ=+_-&@ z(^D!2)!K)j$^YT1{ZQgUS5GV!c{#Bfg;MFJhaj^ruq$c1UUWnA{cbutpJU0ObYO^; zedTm(S9wzP10HbCRGu%tLhtI)WoFxuI5Aa(-eS5Ov`@{MEQLm$SGa%cFY`bEuc;LT06rRnmazcW_Zp~L|HGyGxY|ZT^qj@7=ZgR!dIj{_LHsp?kp+- z7|LgQ@vpF%I?I^WMo#);^U3`c=B;Sys}AfX(cQoD|mCet0QTXE27Y{MX0v{XusMcvu5@$AccG0be9#QTYgKAq(&1c#@#C(k= zi^aq^Vup;Xw4$SJ+a~1(7DJOAHm73QMB&N zVQ57TU)b~LGqE7ly=N-0FKrXdkhzswM3MN3o&Xl=tGAtDE0z3O)A~gW&y)_b09nRN z-=ka3VHh`@8ibo-lQfi2u`~Xj+#S15PjG3MJV);;&GtMs(~J>Cv_r93EEv`JNHMTn+(Rw8y5~O_2q{DS|&G=h! z8}U*5^<10^I{zf|?YzTHlYf@teQ$!%sa`obOI$d7huSesxVs|UE9utwc(%>f@9Iwp znUJe~bo9q=;MY+sPl1aamB$7diA6+ttOnzXRo6Tc(YD#Ze^6bLySHqZ7#{1-q(};V z=1rjhN>SS#L~YdkVWwbdQ$}-A*J8x_uJwz~e%ILth-nB;F`MpRtKkCYDbXF7p! zU0?LX?rcd*A_&*N^(cLmV^AW6aCJ&M;U(QtyJKx2N!F4(Q(8ypsUGKts zK@EPP&aJmq&rvG5HN|iySQZXjXBBLcp!#Cciex4@5ld~z3-6XSI;TJJ40WvYfChrP z;(iqMBnoIZ++`2o5OefL?RcGCli-n6XH2@|e+?y#HMfBv7kgt$?t;HY-!kB&`E8q| z((2+hqmkUBDlq=!IDX14rR-)!{#X}o=v^g~ zJ_~acP-GveYcv` zK=Q{RkiYV;*N?#1%Bfbo%dc5>0CtEq7l>DPe8ia$Ft3_9aywcB$(|yddeM=ZNBl3Y zSz3rAK5QVOxv9;Q0)qH}2}URUYpPXqPQJU~pNOUG`s*anT$a`%$NP#hDS)%8*>mGH zN@y0p$C(wr(5=O{@|x<-Zv|#LxV|;@i!S3ujf#Z4))0*CfcY|(f^|L?@irbz$#jq% z^0Vd86%&_JWqboX?E{$VLp9WzL_y6`RGD+zoyk7Fu?BcWug`0&YLSf}o$narCRC6w zv=ZZo(YFBFRSOsMmNCVpm)+`pi^q}0wr)k7hSJ-Stgx)qDyItPYh(^BnB}*f`|^dZ zx@*-qs8C#&(GrpAfVbaF$vP7&PQ%440pfr9!{(+rh4#_Sn>;Jq<|K|u?-INE;J{CK zXd7&WJ2(Om=w{#X;l;fo9%Ek3rO0@v2$>=eei;?9S99Wms8JXsX&P=cYX7Klb`}V1 z35oc+RZL4VGYTAPhCeMN@9go=v`vhtlFrw{FSoRJAnYXZ)_h}ey!+uMdAb$X6lTOo z+sLdG0)=}(9RFG_;yorDuqPl$x5T>Ne+v4{;&_&uvtK6U>{Ua-YFx`B&WvXct~YBZ z`UWzsnug1JVrxR%1j#Y6TOS0@lLGsbzy*njZwQj7&d`V0IMeFeQ{_A@6cygTDcx#q)&s- zBSvMfYl5kQc|R9D$BC*le*N2t6RA-s&A;MI-p!>z%+w>5@3MP7?e%%w+bFK zOuGmti>>{Oi`fVfnDW68g1&93Gb?A2Jq|P&j2<*P4TUJ%+Ed^&)Y~d5LasGD%?1Gm zmsKJHM_AdUUG{8|TimUImorGi`@%JD8uWKAuwAPu03_Cr}#&=#hNgI*YAlnCW~b}gNc2zKLrajA>tuz z6124efAYImF%aaSlRAqSv#=>049C!5m1>`(_I*<;T}d+-7k>a%2}B@}UeOMLJ)a%P zW8r5o)E!$7=kJf0fMb?ITMvO~ojs5L$isaJdY>@xw^BtQ(3GbbP0A3Ca*>edacb>@ z!wz2Bh}yBN)04G&(keo_R&#@n{HXkEAs^y?=WE|?&K^5Pf-z^+fwmETAy^!2;1=Hn@Hyozw%+P_SD4q*Xu(xJgn#B zmDNVZFAF395M04GGA>Vn;DJXlnX-;3LXd}Q@^-j`Aq75cARq=f8bM=Un(R`wWW@nK zhP=@v7mgL(0+Qh@1I$^r^u*P-#1air#63@VoE zUazx1pJoEz9s3+VWFv85(^1!8-nUVKZZ}v=AY@||m~BP2#P;g2bihcVfPe{(ks5;Z zu#Vi8DdJxe*>j#ej~gofsvMWsVe(E$Ypw%p#hyH{hM^fW2U8h;I|dR^-ss%(X?qoTK;-?+GLLrg;Ijz&RVv^WKisVG-SKL<$3D$LYWVsV%#Z-fX9r2Ph)2hdArp7eM{ z*|o5Icg6`@^6e*Knxf{6Yr(iFzs*qTp5u3st@r!GhQP*Z-q&w-F)+sr)?TFV=SMn92VIO4ad$q-0q7n4?t?j_ul7m8O+Cy28!V4pz1g!aRQ_<_ zZMY|kd$a?ceD|2GJRKs#kA;{!rN6^EwGS&75E0c#UI|N{28rd%(-+0^OjkSXMN%|t zjM)9HXxk>u^mcu5JRt`+d5#w2WOx?;;_2+B2g*Xc3}bOPecL+}h;@@Q*`gf&%!PW@ zH019FQ>sMm?`E1A6i{)M3hZkDBX5jCH4YSW{mO|$IC864*7~So${8Igxfij6w|mwP z)Zq_0;}3lu9yB5eoAbkxI(l2Pc>*~m%<}h+<|Yoz0}O3 zWZnYDU2d-dGTX(imzqj?utNQSKq+HN$rR0wqk#C+1IcGF?le?9i#5t5kbJfPe(%Ev z{AQ$k+`S${5W6EP!Z4ni5;cR7o_JZ$%N`mT`sFP{*|hxX=<|aq5e`E%T(d0al$d(x z&j$g+=b*MW0RG3yeXr2_C;0gDNJCtO`m|zO*_TJ`aAlw@wJetyu3V(GV^Lv!SuYky z!Hv#~@%D=*2GYD+C~!`+Wfo*mNCDrtmZ3x~5}`UWF#9(I)hAdX@@m#ZWyA7a<=VR8 z3PM-!&21&-zs?B2jeK{vSW!nMe5fd>b%Z1hEa(tk(G(y{`t~D|BuTw(pzvR{gtBsw zG#;9OP_t;GNP$q1@QR)FO?3X+2@@H>e2*;ZoR5<^Uz@eWhTm&!aU{dIi;~5*xX%Qr zjaY#E^H$6Ld}tZ`%y(ot3kQ^rF0&N85+*P-uU-k#U6Y%+d3jT1C3~ZfZjlw-b{we% zfu^v73t;!<#JfBGu0%(6Hh$UKW^&2RfT?Ft1k}-dBIt0VRZlJS3nZ>dsy?Qg;HQ4H z&v1P?-*L5&w5^&dv<*kjefa>ORO&u_#!~9=2mHA}6KQJbvj~>E$j*b}mDtz(n(rOJ z4F=j|+E~r|wWv8w3_Gl3fB{amRYyK)n_6^z#TP{ag^XP5R{7VUb2E?taMLQlO-Th* zEbOpBJu*Bxd9w^xR#iEvquFfx8l*XTy^l%_7@ahRw>ZWg2`6B|T8z)c0vJ&|!!mFZ z^X586rQwH!!Uo|~GHC7{5iVtgVGK5T!tO>sZV#fGKuHMg8l`ndRZmIcDrd?Oc?*!6 zQ-p#gevu0lOhT4F$9YDT87mht_kfJW0aVzGfB24P>qlDon{rHK^%hVk=qaRbmUaD5 zL$oY;0t^5a0b-0TnHPbi4OBknEwll~Hxvh@6b+^)*suTt{G$0iQb_yaKCD55Vzxgi$gH-5iArvs5a6kUA1yba&I6Xv@aZXLD>Pr3YA8}+CTjc8 zzLb|=?cFMnG?v?9OtQBOj6e-Ul(tV`iU7VXK+d!tZk-^cP7OmBHCau}7DAAw&K-?G zfzCB!P7790Zv;-Eu4urilW%N+;>qGhnI-SMS`QHfEYl!HnQy#p0(;a91^#IxBZJB< z|2qI+OV3wi4C2F!r^`YZ0BBOA*sULjD|8p+c=(SeJ~O6DdF5c40M;*!y^YyHoIDbpMEI_N#;PLr8V;291%)WVthiQ#@9PxLLXh9D7q5{|{>*?6z z?{WI|v%eNs4;GgiA5d5deyyX#Q2DV+l^QrU8w?-7yP?WYG02j^i_7-gIWrn6xyH8H z8B{&iR+GJPLvOZwC3TtV*eh>@yf!?qbhOPolwtJ-6hewm*^-!*P`87!7oVZtup~dz zXXFJ+=<|J0!1jzWX9uOHz46G++qPsNAf9Wr9k9BR0j>EeF^p^*DSy0J;EC)H0 zRd8L&;fC;Xm)AoZ)%vg$ESJM;DSIA{&jM7|_>yVDwkd1gCOb1@(E z&=Z6c1D4Z9$Uz8Nc)_HU&^3~pwQDjYZH$xo7|0#~j;E^u(?!Fb_gaD?MLnba-*+O_ z*Q8j^DyL;IzAA_vZV%GMJrUt~_ef)Xuhg!+|i! zJDrEFgX8~ZOsm1vwUWt&Tj*B5{elTfg!{-2HjzUiVm3wqULXeenZq#}8ERdJyo|Br z^n|y9{F)Kq)XScf{i}x5>=YF&6F$VwbVh)hS-!c=Py&)hm^$~k#Y1d&+x`N`Nwtd~ zepymb3g}JnUAPIVbD-7&n1FY2!f6lWsUdko5uvbrb2(M+87iXXEfOn2#PPHA>%hr0 z15rxzbZp`3?g~CAAF2yyP&#VW_^=@M`r^LrlVi*F$^$J@fX{3aL<|HC@75%OJRB+_ z=>mKx9I(nW|Ut0YzS7ed@Y<#tm~zDV5Z$tvsvq2da+IN2QSwfX;F- z1LZ6sN}h}5kBkRtvSW3bUuh*sGb5O407$kFy%CmwGhL$K=YxVoWP(ug#?qaSMCEwf z&6v%4Mq`$q5Gx~6!5=;lqrb#kbK${|Z}d{;R-JG{nRq0s3WbURiIG}MGsr~?xdjD` z#mFV?I!!ihqc;NDZ^gPIl?=D6fhYfD_mBc%bz2{f8G3oyhg0~~w>x|L{z;epRN$Ih zzycMh5@Zqd0^#i=E1B+=c$2rWmj|?FAA5$+wjIHCs<3w3B4ikt9T6jjORD0fSBq^^ z{FwwDxN-S*>y{v$zZn6%OVl`^Ie0se)+>>i$umHIc0oz-Dw(8;hQ1v~R!lYsP9pV# zAW^ckZ*sPSyZ^(-F%&*Dn_HeGBhQZk1wH%j*h7&oK~ikbyL%hnjey!}@2+c=Lf$M| zq-p3aGk}hd%v1O3X2yfZt7mhrZ6DaKwoWsmZG`*SDz-{UA(z{}sn(hkwXOy%j36NB zTQSa9rOv|ISk?2cbJCK(BP2b$pFuaLkhGDKtzNp0s;av8?Ioc zg3I+tk*1ofySMRL+`UaSTU7OTU#R=<4J;)+_RMJ9UcLN%p6yxq`la6Ln{qKuZb3>? zDxc=1ZD+^;uVt*Dj>MsPg7RX z-{$HN5e(~(EYH-v;425`=g8zM(_GGuaXdKe$(uW^L35uXU!3wHdW)HfZ}f)|y)015 zR#1F>Q6}1kt!|wX-0NfsDA->vryxiIqjq22_ICsr~#udY)zSlz$t zMpt#-01%j@A17<_fVZZxcy$_U=EiX$?as$cyH+Rl;G$W=iu%;mrQHX0%0@y<=Xb|a zW3lW2mc0${mZ4>0l3&4H)Oh7PGE}n64MB4?pTaPJ4TSrw3xCAwB-v0tdw_L`mH^Q) zqyX{Y66-b0sK>;oWT}*hn3^&Wo~XgY;w3wUH@GwO^FhWTAJ0VE-Xh3zR?G%p({aad z<@y8%tOca@wOc$qxEgYuO8|pGke@Sd*Si z;TZTzGZC*rPy(OBc~|7R3$W8_=^l`vt<|*ZWZw3qHB*4cV-hu&&>+bmUdCTC=-e9L zPUrmQ8$;O7!A=q`$Z!BfOuL(@Xs`Sl6QozSlBy;wFT&cW^0*W!eiOC>K)eeait&3) zFq0p?K{{VbPDV~oOgabS(DQg*8W|0o67i}Zcu9xDs{hAO7Fz^TbHjhn_da@ngJ`O7 z{y`LKohs&pmV+kR*QG}Wdb)A>*td;Q{TwiTe57d4dS_(x?+4&SGL%V_J1if2?mt#l zFP)Ox(kdRq$o`w^^efQf&LEZ-h*ysMc~~2ExCHxqUm;ZeEHJi};POAc0K{cLfBUB9 z{n@xaa3_E>rt&m^t-w9l{!pwQ$X_+ye>&Id8cKfA)6x7A{Q%%^^Gn>De%?AAO4V%Q zbgdJ2EZwu-*zjY-&sS(YdN!}&A1l0E`}#p}j+2B)lszRP;m|8}wz2Yu|B*@RY_RrN zjt#WR$&jM-aW#LJW0hHE>M)emO>mOEkXK;RXJntny`4Uhhy3aQ z`>&%~|KCv=cvBz-Ov3+AvMg_|3Eub4tsrUy7Rx2PE0l*kRWjE`KxBxJ zRCLTqPuORS3V-A#IA1N$c@zMo64vd z!q2lq=Mr3m)nm6~j^_RK`!o=#Lw!tQow!+g*9T2+WINW4H1v_?fVM(--}mzHcnKHh zNdhcZQLRfF;KF1IZ6ri6_LM+!*LhNlb}RN(R8%fQMA^S3_|$pb`*HZzv@Y1?d?7DG zb%OS%`m1-w<-YmX4maNDSsy;88Nf%EM?yFMbWvy*NGZ1Q*(jc{*brkLZ`i-M{5ZZg zl=km#0Qn-&38B4xVF4R9nl~r$N7Q>iUI5d;M0y5lss1idKzjLo@fhFK zG`Q<;;Z_v_8?R|C<~JTc|DOr;r+3C<{*0NU@SOa9mF1w zi3g-1ed2%SlJw(F3VG4RuzWsGIOU0A%@pa<(B2Q_+jVY@QzPRfBp6Ro<@Pd@f z0JkM@T3dgNH7V!MPb}YkQwL<8I_61*~%f(=oL! z7}-=JW(b$>^#}f^ICjHfsz~Xh7L)9u3O%9GUyKB}>ke3(KqofmNybGmKxnu=dU05& zJ6MYWDg4xhBuTg&DN~q(3<4Nv#g*!k7Qj$dQ^bjBJki$Yndh5JDWPI3W%C#0< zK5pBvem_DJjIm#Hn^s1(vAg;CbhuqM$WYuWrnX5L)#GDYiDdgkP1V%^L+P_nRwwEk;uAd$OL(GyB*&(TNgy!-2t`<0q!m!haI_4! zArBcHT70_}_h8whnohN5K7u9V*`e#+*N?VTY(-1c5}=r8cO#6*vS~!`nl&Ht&YI7J zGAJXfz#iE3zf{nb{B+=9O4a;#Rx;I!1WL&`7uPZNcF&e|tJgKASMwN<(UQ65Y@csa zPfmYH;`(`$r@OaR|MHCOmnK^!Y4!a|X;sdBji63*B{s17Qk0LSDS(A@2_ka7y6dIS zRQCsB`qnAIrZ$W~O`Pbar@tofDYk{uG|UdJNqaXBYK!LtMaIc+B*J-@h09DwZPK1@ zY+eD-RCWK(*MhR0Z}_C`Cm`b1E%JRYWA5}-k{&nH`Dm_G2g+qJMj$+=9~PEKtlf4V z4>&(j=X<`$G?<;9zNjIHQSSIBbF9^>hsZua?Gp@Jm{HS`pRP`_sh}j@Mm)It*uS`) zhgFWH^c2MU0j=s5#d^@^%h>OTjbXy3oE2?mcUOBLB0fNa6s?tP{}4p-W5Nn9mQ}+2^*hEAj1Gz5W56MGvuAY zW0{K|-|#!)1MDEsL! zWv+A}-Gxqg)g!a;we%ch#?ME|oi|Jy_Xr%YKM?V5q|BfYGbsu=ES!}P$;mj9A@CJY ztGP5(E;ROqBdy1L_lpQcRqqN}%C@5Jyhx&qrkEU+IqY&2}PFkM_PN5?X`T z9Cxbnu&tVc0xwNA9j1Ih4!ur1@?>!2VinjdTHXELS&gHHp8GpHIV(R)Y~CDBHS^AX z)}dk@U20~@C0oSW%P%saGssC}z|s5qy$4=ZL>_r{M-5*$v$;4#oTHV1pG|8?7N6+9 z^_~~l@u(?KuE)FN&}T8Q8My6O^Kzl8KZCTP?xs{~_hn8k>lbL2rxhhc;O(@PSZ*5d zW`Qz6V;S9vEbcBMi9&|Bu!AGFY=?#WEbV?rxYoJT*K?HNI%xrp zToR3nF_2E)`3Nj&uHq!}38rAf{#~ytr>zmmVP_XP!WP0hcby^ zRa8~H-m0nDN=f?efMP-1*5g`EdMgWNC743ZS!w>=UW5Utc9vtuQ9^{^1bo8B)rKBt^A0^m_cxTegDBwB9vU*O_cy3V>s*3|9>PM$t@peLRs(*xOv(Q!c;5e=?-VaJM2cXf5bW z;3LLp;7rm=*pMYXuDYy+c?BB9C(6KZ_UU!>ceiu+sp$uO^m?fRgn6RvSwNKE09`?T z$icuxoFd7L(Slg!MNk;$gX_Wc;q<+^-}2f)6m*?<62S=9(sDeb6%$`jC1dN)|3foV zy(fYfZ5DkW6o8;5RjTnqaP|l2p6P+1qJ_Wvqcg3~Psa)S0)aXZiQJB9O77}}4YNSR zPi?P@qr9Ei)H2yO-e7cdQ0jav+?!D)uKV2^U#=wz0^XA9FzIB6(E;!Z&xIQf;4-1u z?&&z|^X9>-Q2H~_RF;Q)#v7Kej1S!`l|4)Mk@fyZ`6d zcp$stTdgg3R*OssuZ5}N z>D6A)Q)lzCP!C4Mm8Y{Mf9P7XyBr0qi;3PkTx-?PRW;^i_)1FAQxjgY;+Ab5crRFD zpuv+0)5e3Fz*!si*x1DZ@604yFUR)$bS$Vqd1Ri`1q;*QOB32d?v`aNv`5LNBo>nG zhij!}Qo2mUla(-C`4w<>+iq`vCB{9|gFW=#8F&#+vdwk))_Ex1%Q!b|3C?Sx*+E({ zt}mPUKJ&u+6Q#6KW<`Uv3F+RuPeOh^_brASAb0$wguJ6SFEW{SMz4BQGfH}EszjsIP1^L zP2=$apu?;7EyzBXB&>t$Q#+6%R@L*nFB1ic7RM6#sf|mZTK8nu7}XjC*tC7PkWijSEVh^ z$p<;Nmp&t}K$AcV6^kH+jwpn+jnTsj4f3hV$=Om~Y@NSm(gzeWzFw#hpmi5_gH+-A zg9R&@i?)S0vbR6Dq0bM0Ww4VKUhFV^o*wsjiQ4;vk?^^FA^-PZ$gN%&IKI-zkUSiHlwm$b(~m#KbAipv-BRwOTadl-{WdAIh%}59h1>8t5@cf{rv%s zc2K>(W@{hny^?JjcnA+XWtU?mh_Gil3&|^~5V815w0?eaDLI?5fpnU&zd`NVUPeL% zP^e8K%3hO(-bhc%kroJw>J3*{El}{zz5q+j77I19qLrvAd_XymEJot}U(kVCcj0P^ zwFXiBfVCAmi*)G^7JMSLh0?`p&vK(_3%nI5s&BWj!V8!aRVN*8yrh@-%-gc~*nZW{ zD*o-|V<4Zv)lrqk19irkn;Ki$;5M-3LKezY`KRw^}-YG38vlD;QyqUs`{JJMDZ*V})o3`Hxkb}701&uH1 zJI_G*)_d{q4#s0Xo};nwV{c7R60!$Qvc>V?)GzmS*PkJ6?4Jk-G60>}v!Lg9PonlO z?mV6gBT+ayTz)-QC&E$H;NUhGSJ>`CYgSOv3W)$=mP8-+Jej!EG z{Yes!=40cv>R`|_iG5naaF~MHSr6|ytWWx+yJ5=6#B-n!W$`PW%h4O*=UyhKEcS0S zU8_E}mr8zaf1&d>aOuIwzqmo!d!oo$X#yl|jRrgYzM6_B9~wR(D zpDhKZ&bN3*drJx&Bx}u!<+U4taTuw1p$=kN*+a)hu##5l<6ywE2boO!umJ9XUHRks zhwO6G4F2wmQ)ZLLpzVbHWxzdN5IIIajMaKCf<1!xbH`frFEYr?Iv!(1YwF!}3b1<# zL;TcMdW*e`Eqt;=VFO0Mp1*Asjw()6zhhZD;{h@ryUFe-tHS&^ux@yGr=usY2VjX{ z2;3;CoQ3dU=J zW*AQKT@N*aUoCKl9R1)dt~Q7jg51+3&>(fcZhQQm7t_K`P_~kH95$i67B(6?@1R>@ zDi5D@Gi2mX8=dRpEu>^shff1Lk$C{tn$%JAtnPT+fAg;_nMh-M0n}+=BYAYXEk@!D zie)&N?j4gG7}s}N)*0==gx5^41b%mf6x6D9smDPXNWN$oXr6uLIqw`WZD^SMb zjO*_K%KKHrtZ8+*zcTzF^8#%6l#^t<)`kLJayhBkj7Zt%OM3|d(@L(zrLU^U)fx1t zCiM{WEFCXKn7+;pYu=$1{3$2*7Oo$lAhUDSHMwIiH?OK;eDqry~-z%fa%8R6h&h|EvG{9ePnBKEK&mRLm z`?QyI!h0nbl)K_9I<7j!%-~N$+Ia;LpeW4Pd||kY!)4Ld8! zZ}IunFkA1H(P#d0pdI6-r_~KkpcUn4^)@Ui-|{lnoVIGS?DiXR0Q!8}?S>WiTa*PT zEj0It@ug62V=T~AWAMoo={#B__!N|*nLJXW`V!WG8Rqf%gTIS3FDBuwmbTQ z<|0zM<;pKEr<7fNx0JL~^>h*>I|lW)h8o;w?;Xsv%H;xAtSiP@ z0y<)H#)LqHm2V~tGw`>{N2^J970?X!V6%_^noG4VS?(C6j1E(=%)D*(weT{9tQ?ZgGtq7Ff2)(2aPczj~f{C<2f>7sMA(FMKe0C>qG5fsgPcXNuKGT5f<;$uX;pCobdW zp>N`L1n92O(%3S7hVscFzM14@4SXLOh-oL3^Ha=fp!P)ig3gx1hw%+a{#}`9SpOPM zHgfx*`_TUXP+xEmy zxkyle{901Bv)V!W-ZWq*nK16ZI&>>1GDA`gFRcMyar!J_NCSGY`0X_-sY%oPWQ_DT z_4<6ggy4P&V>4->A-`4hFG?0GkiZM${bxWkoXp|}p71idH%n69pk;rl7P#tx8UUiZ zQ*IO1Cw6>CtNXe{w?P_J8Yi4W1pOj9hN~UpA{TonOYgEC9ujJ7B@UzR`nAjl+EBj-l4K2 zft)}h5nv$aKf-Of$Z!1h-z02ycbVy3@!cMcEvfqFsEFa>54F7uBYkng>=|4*Y~*gi z?~(yNPXFIR;JLNmfs&s(*|&PmF*I%PU<#L;xvPh(dA1~IL$Db+%WY`*{ljJ^Hrus5 zF}hZp{uQ<&Raml!bJ(6F!LMv)`1*w0R?gEK#v>m@zbUjQf;TsU{N_Md>$gOxBydjb zbIBgyBMO|m06)6bSC`?SFniG*yam%f?03b>YTb1qc1wYOe_w(~^p|;ZR_PU#3xouK z{9zV~V_T@EKRx(*T(ZRC!a>jZYUvate=8$ez<>#Mv@=dl z0IM%8SFl80*lmyVZJ!2FV=(*-=v1u;WFGuV-m%o*yrJDN$HgOSJ0j~sJ*kKYk-C>B z@L04rG$f)$1{p_APXI3m$=LQj3m+(IqP=$ZBjr-`xx8#mX3Df?Z{kN(BC{@smfEB0vkQVW0T zaw^wxzEy)S>!R_+==R6v{PNJE(Hk$qkd^cIBR z{Q+91Q`GA(K(=a{wQT;c1{5;*)r+Efi9|>yR)1R1FxI<2EHe8}IP_M4t6sH$dASs- z#BiJmcwI=_iRIbuvzyQYyIcn?Uv=nk>fG-OeL$r1Nk4D&eu#`Q23nUcqpQ80<6w{S zm#blJ+)Qp{yygDEoV3{S4PL3i;d^^Txx!FJVmdBghUF2#ZJ@cA2Su$z41#) zyXDP6Z?PN&4#cpx&f!8DVq1F#vUXH1=SJJ0MT_!`>zHafw^O?3{t-wwg6q7 zdg`9o$skKCetD>~Dyky$GY+RFMSD_khrG^H(&2Ph4EKEBCuDZalzMbM$afnztYUgu zZdLz}fp+$>TntJhz8gReqd+lNkFoF=@6~a^{f&tEX{l~zWD)WbCFBw9mEZpmR5RgfSt$UeU);fDRn6Isn#`>>Yw4?eIx$OKWPtbk& z;6?010GE8TZR>Bs(g8jKQ(gZ3f^Pcc@L=@je}pk4-e2*DOi!PzV1%tW66e(UZ%&;P zhA;~`5G~r{&JFS<;wAF>&phw-t$b4_R>(nzky1cSIeo$BtaE)?7h+nquvk4S{v(*u zfm_%1Yq$tg(xOY((D8MiBhNb)Z(bQ`09N~;J85;-8a~$i<-)j}4|tusF=cjFcrYM& z8yy6g;Br9Dekc#SxpK!hVmrn}N=8bGWk?29*oIs3njAkj(mgpo@u4S&@0(jw^{Y^t z@flm@;I#bpP8!myE`sJ`q1=0|wv{Bns80Saz`jb4LlxJ+J{e$peIo2B#b=PPSpP4n zgrhGSv!1jen?Sss>j!Rza$2|bPxfh+mqJxR4}>yleR2b0FNp$W8?Suk{~cAhSh>X* zk{0>NqS>i@KM81ZKw&IKeuCWs z5Ww-E_r zLZgP`5<*e34@*(lybE*sxVmS^p+KVYZrB?IB*sZx#RR zAAnYE?*Auh74X`2uQAn0}!0XHJ`E6-$)9CV!MY#IX0j4e|X^^;lBemH(aRL*cdcOQO67df<{92WK zGO@L>U$W4FI4h(t-y4_gA|K6VmOm{Ru7FJ5Hm5HyZOOAc$ zArDaoP4}Rd2(auc25FAW^y2)M1+nE!3-(O;H371?X?9L#$ZMfnFlhCYBk^x_fM!!C zkc)$Hv*`g@9L-O`h5oT@k_OmWW`I9(rlwE59~{j(=N62f{`Gjyo#91-1pJbdUI1ZX z(ExA_LlcMFBL|LkL5F)C+hf)4S5_K=w^K}miP?_-YC)h#ZQ6%6a(dUBDVh$3Wb%1s zZ`kvqUOh%RUpc)V$o!*`L%@NwaV`(FGr=?EW?s6DMoFZBu1!W-V9Yi-{Z6jI=1wJP zL4qUejz>R_rMpByRR=I~K29WpiCCJnd}_{#-z+AG&YO;NJ6p@ja>v9l3f4zYPq=9k zw7m<0tK7=roXP0^WPO$Y_6nBM^#Xka*r5OqN9_vqnaPoY`*Esz9v!oI&_o%{MofYu z1E{cf_ce#!7heVlNiO&899NfUVc+-Ms;h2C>p~Z&M3yJKYR0~h*1lxAEoba0=(GMK z*=KiPyJ=di-gjLyfbhL&ipVAc7_)nfudV%Bfs?-RGg$YnB@!nPpI6JEI!86SpG4|4 zhCbb-e}xtcQTna@P^W$L!?px_=koqP<@e`W6`Jp@N%TFiFVT_-{i{P5eb<8d>zZP3 z{Ct4f^V6F(tfvMr5ZpW7sx!{^)wya~&-tm=dInydy2P1Pa)!~%*Va`v$J~2%gI`zO zG>zJ=-pw(|eH7TPtaLCiys)Q#?)gg_8u>TImnb9`g67^csNRen5hr`PtiLvqQa-SG zrTtNpMKWl$xc(|`=9B)z>E&CO#LwFC+D=rxI(><=oL82LPF{g`Achu~>MX)SzttuTbj48x5OnMchO&w z7`@x=Z_{Ivpaz2dc`h}VI&Zc1CrU)ht#%ao6)qVB&=OGH^bF}0eaI`xcdi0`T>HCP9;fwtzv0%SJao?s>@*zdJ z0)6!6-eT(PqeqY483|dFox+XY8@x~uroyO!d6#i>uP^`fTw-~%kE;(ISk606qey+~ zuM6>Z0)j#<|K9Dj zsd|rnJO#b?QGG&P@NGJGpJ~%iCfcM?eemziTL#iyJzXl9H<<@^?ezHK`%Hpr1t)d3 z8QYq`$rqq@E-fFlzIZY1vv*+I6QroqZ!++93(KT|)+niR5nSe_9n>FkUS4tv7TNnk zB44qt50j!jCy}U78pj%?Tqm`+^=%tnChxmFw%Ss=8YDb^Jh@LGXvs$R$Q?@u5sHH8 z(+<}rHuGK@sX0xpUP?aQ{rof(@&svU+$gU4vjofp4g<_7WHjH8%wuC~K)F@38T`n* zAtyeu`d8)WK(e*i(_C_zwaWdAn>%}WgX(e<8T4W3p@gjDv&@#vtBIXsTF!+9g%hLr zu_LauFC^D()iZmQOYwEuovW@ktyFhWGo!8khhk@t=pB95jLx5+%+cb%TF)G-r*?$z zITktEmD)da!(YV@Uc!i%8xUT~g-sh?aX>;poAZJ(tSSFDz= zr#9|nSuIWt*drL@md6Zazv9Qd{-QB@8${AZ)Pjm*fwaddeVxPR{V}GkhC>tT_B}Nv z;P~b5T!IwWY?WRLtu*N;;E$4!?UlkRWxq%O;`)AYj>@%zrb&w1|pBjDa^%{k`i_nmWL+z!Xu6{w>2J^86ypz!s} zj;%QA(4FSx6~CI0+alUVgp2}JIr$MtyW6iWdM)p)R!;_rpUxO=+w>mLj2uDulciZK z>Jw<&7&$4H!loXBX?_#`hL)wu5V-&q2F8~NhV#45rlau3(LAKs(<*7tp46;Tb2}}h zl0AY(K!yl?K#s|~$+v5zI;Ci>KJJ#e9v$Yp>G*qAAX_Un2YhKvM8m8Sf3LVop0%)$ zP-tOlh?-54Mz>RS3>F>sxv8W`(U&Qu2cTv3)-xZ6EO}w$YHbPD3q1uTB%-y+{b_hQ z>+EG%!~Qa%uf>*PuoN&3GJYPR4)lUGrHUWv9AieD_kukj9WI52aWTPu2F9hB*%X&_ z^G!JXP>Vt)r3Qs9C!c)8(kUW5YOnPe-cs_k{<0sAn8{p-32z{8!?QOFhq9gxTk<|w+wXth{$8)DpkX9Gzrev+M_J%X z2wDOrp80}7Ag!VAN@_-A2wBMsB;;8G%QKGFAp<>7h)$oL_?{F#K7Fy4ExOX+hCq-P zy?-=;)~OL6>!zM>BjVqB4h;CC)~Lz#)z*Ekig4d=gjE;x@E$l`R~aTnY(D%>GxM&S z=^JJ5A>wj$3ByX|Tko<{+hrtu^4zD>lln|8Y2kG$1mZc1!sb%W_ng{X6=g)xELc06 zBT7gn&=L+>LHP5|JQ38Z)S3-EhGsb**Pk!9<=M9ymu}#aN zKhbjR>wa@3SRS_yeDehc%Gp$BX31S4lH$OA9wE&9^jFEU>1T3~CM3 z#=88)924!0UAGm;h(S-s_YY4c` zpT}5NMSZV;tC}Dx6zn;t>XLo<8se~^V{$_G&!!)eqhAtpX)(wdCFQ5mRx`|)*F=3^ zPX@})$kn6L(IF;VbT$i8*6!<8;vUt98^MSP=|xh@iH9ka8A?NwqV?1ik`*~<`^SB8 zvx-_ZpB&q-!)cDjG)J>asG-B}`iHWr@BQuRXkT>MlQcPbe3j_5O+1wPxI3(h+EoJ( zwE4I`bw^+8ZkMHgr=crn`s|#U<*2XL#qVW2le}7UOMWbMQDGC|$DXOaDAmM2pA&G^ z?80X6*+~My>J*uE-ZGK>-*ev&cZ;L}|Ht`ere;56r{&kS2a>NxIm=v1@$WRtsV;oA}AUZSt*=|zP*x%_++2{?B}ax+E8Nh`4Z2vZGag0N}* zQHmp1zom{+9*Z9to^a-0anf+5MIZ>7ayFM1n4Go_EUiaBxQYk~spc6uZJjAm$juhP zL~Tzm{_1vq?NOUD^jcd<8zKAimZW?FH|Hz_T7jxUYzl&z9LM8>?LVtBNctY~E@8$& z@9P$lsuq__JA%FppvL*NZhxi}eOX_slM~~s!7*#$)IrRR2!5g{KXuEan-*;h77W;% zhra8c8{of|d>(%Zfjuutzg*oe&G4oOOy;ZcWfar?prpFwO`+whaWh?5laHYxz>+^P zv!*3H^Q&a5dGS8W8W$otGI%VMauAe|`y46kMYmrD&RTs+fzr+M*=sfXytp?t@@Vt9 z;yv0nnD28W8O{p4!R!!Z?%&`-=vPVM()vsN8o7GG?`SB@XI{?A;`S&F)H^4_s|hxR z*0~8JA+q%OSJc^L&^4E>EE`Z4X)2JWTf8OJ99q zFH{WT=mXcWYa2jaE7oqoi5=6c_|d`+r~iGgrLs4{ypKWlku3uc;^7Doku3)MU-4yT zwrG0(b{yAUwI1Bj6EoQz zmTkFLpwtxJ#_DAd&g!rPZIgCn0XH={9Tnw;!y~KS-FYzpYf2q@JV`PwU5mX-I{P6R zQ$F+5CoC(*l+eEVnuAgnco~`iF%;>ijqtY`EUt515xk#1qH!+g8N#_;v2-NMjidgAy(%9n$9op&43$x!oU<0|YxjcZ zTHV-jIJ+54Z6IPf-qD=mC1hEs_~LmC1WPWRj+9upCqm*y3IkS5_%Wb>lM6ob=%QO5 zGWK1DMWO!udJj<$pYR!kpOOY^j0evT9L*UThC4>w(LFNHxnU1(9`o5nX~qlqmJp#ns0S zP86`d7nRg56#3|b0hrLy%X_lXT?_T$4bNkqAnNSwlnn?~IgVBm-Dat!yo}(Kwm2tc zy@AocRBv_KvAk@{&SB8w!0UZ9Yk0zXvU$@tW|y<*XqT84T})q>l&8}1lU#=S1rmw1 z$F{EKOywKFL6B}9-%;bdjCii_r^#S6e#`z;snRyFI5jvm487L5t;MKm41?dT3gy&L z?X<}f6GkPovTS^AELw~TTNuSoiDIR+N96C1SPA2kb3d_V06{znN9gAkXPwR0?-TU< zd3Z|qQ+Da*TJ5fZ*HM&j>{=j+_{qAP$jJ$0JCI6@H}|XObT;+L8X5!2IV$ z1+g1(9o^5cx-<1}$YrX(1kba32j8k-_f{*Utz>AOTGT)g{+g6-oU}cj-A7rVGpZgp zX$nxJt6*cE)W&zbX{g!l{Pa8=FQ$5U2K7h>iMV!gekqMvih;8iVwcD@hQt?Kji1B7}=iN3@J>ux)OjA!-@@aKBhd_{6*&&h_hf;Ya z-n4w#sfW7e{bW>5>`+e5p#?4??(GM(|H^rA_Ejc)d%t*oKiE_mRz2?CFvQJ;0b&Ua zyphReD9lW{olV-e84k4z-#uTB-Q;m!9>r*2si?-qDJrg>=+BxiHuQ<@F>0311Tu%f z9@@yJuT^r7^{WK%U%zBfmCrfo>RL|i%6ydjZZ-NYRW6)pNDt})CK_^VQGeuywoz{G zo7k|q;f;pe1r@fY6Llmj(nLZ>ulH9>ZtgFBig`FC5Whh_KEl6vp+e#K2-P#a|A<4k zU(f-34^E#(Tb$!f(QR%_w%i1oS>d9`E?lV*7nLjZL-Piot-eSFh0IICF=D_uk1iI~{x!Pmuz3>yO`n@1S%eK+qCZ6;% z_I>s{P1f9mH}!`T3jI2H1RD3xB@hQpMAjf0gpP+%UJ*1554A-vdFRrKXccCdvxv7@ z@PapbEc%I8vQ>5PJ8SEOfB*+Z|I+G-u$wnI=6T*Eb#-=TC1b2BD=#Kq_Ve%*JDTjT9O3Xup&p@h zwZca~V1@^!;n4bHDizIp^f_iSe~5Jq6Xhc-ypg5# z(9Ws}-+TN07q*!8##AER9L)!_2YOa&qUzXq5(Ze1$o#6zIbEba<^{!SfVRjl-EfiW zvF@i(s(%_^6HA%3cA;gBQS_m2@9pfg9d#t~v|3^EcVJvvhwQE%dB0Uy%4|8NWAa-KF7Z#lme=vYwwUKWQtLf7LuK9*EO?`Za<0qsaW zPV@E-gUAi=z7`Tf9q%wF6n#`Bs1;@n)wUxw7?bWx)mHu|AJuQf zO*glX=}xvBqgOMX`VzvK)f&Zkf7pu;5gbVgb0r-?)*R*CwSf{*o

        4w@Zg$yMp-)cbNb4H44i%R*ly?tt$lyY zT%Nm>kT7!302xZj=FoC9?lshLjTxggr9cyL=i-@(oH)hIm=0=uPQ20cUTco?so!6w zp7g58iN7Pr)nwg(v6`AuiS=zl>Ud)U*GjG&oR(?wuz}^QlaO{neT-IDWt=n8v4B!P zEyLqOcDjaA9%Y}1gRcgK`-p{XY{#rXOz&Eaw>83<4Nu}%E0)CDa~^R%OOY)Y%HcB& zyJ3Cjy-uUVPjrv=zD$OIbmw_0c!bR~rr-Xaq?g@&9rYfkr6}Hef-#DWpb}KDkR{o> zSY$@d=$HzPVqbB()$A2xY4w;2XhzndhTNu;%O_vXX3oB!oaC`N9%CDxd^NasR4RG0 zBG<+zm4=C+*I|v}?>^v@QNHASjX~KjLv#_>N?-%Jki}>l4(ajP*A;t8DN(5}mS5YC zwo0OjEr@QG4E~iwkt%!V^QTE)`KX$*wQ()9PDt?9$gem^ zt|vo37}}5b^ef0U;>c2*oAv%4mDf#X2VDjPz zpGp%_6T{H@7!p%{h)uI*lYh49BP8dhFy-{J_SReYOvsk;Z%+|M@6TNcr~zIQf(e~k z8aSQt6;Y}mcZN=@b%JbgPA!imiMbg8uSr`nIDDhU|mVEV` z8aza(FgZG+xm)d*?0ydlB#QE5y~C-%xoPiYKC>Go8qzUJ`b{aaTs%0iJaXEwjDZ=! zxr$@g!g>}@AN|~85M>KA{0DB^p3a2%lA%L%5a2{NeYS{vF9JN^OYV_3^q;P@PEKRY zzIAi@$*d^Ga&iub(J{G4HVzt^9KwXH>P#jslF$_VG4k=&|OJ%cjYHqWzZnTquRC3&gI0e7d%NL4-MCeVQ3tc zhNqTxF2_Da%@xTzRp2WskjWvCHCFiK6BmYBZ&VqC>ByB@@?M$uSOc)4v!{zFa{SvS zi>}En;bBV5N+rxly_J@3sNf12KD8!#zY@A8)0lEz385TCheKtjk>PzeEsko;{Gf008K|M4Nx{u8<=^a{fX*dE`V{7!iSNYje?Dt#`e%0tYa z`7Vk@bC208VeLKeq%cirN7x@nnqG#QQeU1#&@1`~QY}JV_lDto{Y@aUqAx!@F3Sr} z7jetWTO}Y4p#Q?fZ}}IwH===x?Dy4h2X6T1$J;K}7m74lLpE=LH$q=Vo#&ux;C}KS z&I)3Fz?NSrZhsBJPe^f1UEs>S)EQ_#{l4CO$+mEJ{P85b%Ut8{Q=0F=?|=Tg;V~t{ zS@*1yg8%&LtnqgPJ7EkI(!VcG&3`_9=Mu+R=u$yG_WQc5Va=a-Yd{BFZ)YiDOC+t^jp@~PoORr)Aj!{Cvxw8`6w)uhcM-@rg$k+hF@lnWR&3N%@3 z$87n1z1nR~YDR2*xT@~nMa&UZp^uDS%C2el;*#m4PKvU@qtaJuoid2Fp9@<=Sw4EC zK|6XjS~2s)zn%a_km=t-cUEYmtgK9S{}fyVToUDniHV7BsB4Y9Eb14EEdNq-{J5C^ za})=HaF9l_RgWIc!be}6v^aQD1;IvFIXvmTZvOe(6v@SWs->u!|9k|YuiX2ODB?*2 z{;VJ3c`nGmIF&tyAJ`1Dztz0q$ zQmeCn6=UjwDuLAZtG)l|Yz)^Vl?O+TG26Z~6>h zia)Wd#T~f)zpsk;{wLvV^jH27>;LnZyYK$ebsF~m6(Mv2|9qW1mkyhjdj6kL{)Dd= z1Y2GuCjPHwiA*93sNqZdtN)~F|4^^|@M!*7-~Xw=W;nD;nSJe+FG#}Vj(q++vw?9z z2hZImVEWIm2z*e>iK{a%|488}$v@VAK7vDLr_p zcyR=IEQ7yUx}8|%$d2LAu=H3}Nhy=Jr8j;9!_wt|t9+mx!XcSJ9EkT?3_;VWf5pWUnYGVITyaeqb3K_ZyNL=Zxm96jPlhUT)lm=KIQ1{It8 zV+RK!;7~o=4z?n;uOp(e+Rz%fICSs?yKmRGu9W-`%FK&>j zZhLJ~4^YAJvbXG%6h?#zFy^eX?ZswB*ATszfiQRFq3o5HH-@qi!RX=Q8$%|=gGXqf zJ;l|Lh;1mnj7G6!8oh&Ry%-W(+>nw#iK}Cd+?aD>kD
          _r&jN_2NpBM8y6k(D0r z_)(D?Q9jsf*{O_q8>kWj-or?4)qwtuEhrMaT>rcZEcRcue$_NU&cbp5;LMG zy_pc7obWOs!QbB3UbZn?p}O{vKMLbj+Yv?sXR?L22Vgoa%*+mRkq3^V(C}-qf%rPv z$!KveRhGMcB9@(?p#s#6=CXobqUTfgRS3w_0DwVX@YQJEsKlv{hfxs;bpFzW4MU*s zv(}*#xnrH!CgouBMqpMyG8I)T$se|~?J*E)eb-Yf2$%-`C&$PwA9E-!(b+O!{V=yf z?|oTm+Y*k0DC+ZqH@mTp6CzLeY z?c$V1Bg|1<({&t=Z3^pjr|N}sw@ojm>a1vSjBq~wo!7$v(bc8ajeH(t$!*Nmg#9yq z#uO#VyKQY7iSCRXKelh71v0fR^DlM5VJ?P{4VG7xJNwqvb_v83NlpNkk!j?w7PUhy zc0lGIoErH~u|7jtbL`R4rPQ;u{2XkmfSNMb8nO%R>iXhp?_G6Venv3dwQaox07xI} ziUdE6!@WHw*xffOj|Q88!UMI(P@!m_x%$ZF56EMum8i%OqG(l9DiS-+FjVhAr=o&h zTqO-Au|r4O&>{Bhp@%-zLB5)UY$G#F9$j@)-~ZKsAlK_MFNP-SYxl6*ppOyqxp>s3 zqT;zt_g@1x4_qF{_06fS2RFYYGiYctei336uU9)L{UHi>kL;S}%|ADb9uKl6Wsoe# z<@>N)Q5c5o7qo&gYHbr0k(jE1i~uax9|Crnxpx(GFww>nK+J#^F9V*5W(AC47H0lSao71bCx|#{d=nBI74p= z7-gj`O}PlIyFjtf>qa8+c3C}SD2Vw+Y%x8Zv?%BQ6RqEvy z^p|hiywJTlCO?!NI*9ou93Q&Fw)|$x2<3PmEdt0w&l_B;v3iMQ$ZuHaK$`P?0oqI< z$dB4Ghd&+?`H=U|e1J@0adYRn>&`r9DUyzZo`BW4G2Q9T0qi}5taxny94UdPLMQaChYRA6R>sopVKmntLK zx706x@M7Di$V|aLfYmeQsyb87>MH%Es33yfSlo_jl{xw_M`w^z1S@5D8Uc#|OG6a!-?*fQxdB znMH&}hrvPu+r;&stI_>5-+h{&`^<5K*LHYxy>tB~kDv#mcvQ-{84b+pXG5bIt?xo> zV@ljGr!dj_J5pPMn7OV%_ssR-9^Fogco$Mn{DTnEE~9_C;_ix0RM=bFYJ?*1L|F zyxqwix*2$|IQq*&?Exnu_%$S#x1&nD7j>srsF%ac2BltWGv020u&u3*A-p}xA1?Bw z2=o9RR&J7Dm6X*pe-KJ0kFc;f;q?9E+gAcB`j7X(*L+Sr6?`YQ)IUTKL1Ko{ijMlX zBa(u?^$V|Z)`FUX!$ z<5S{)kSq`+>2-+^LQv5GXgQg#CKK~aJcTxD$_7o zNInRMpRQsXEG=g#pal=q3TSEyge1OD;1n*MxZB)gMaK)S2zX-1xeb+s7DP6WFU0ug z%LYyy-hYvQ9o@+SwaaND-7m1FxFI*x*cFE};vUi}P!5I8#E2zYlQO4EnV2aG5P83GttX?Kn3)56$F80ff~ZFM>?M8YEe&SX;j@J~m^CqNWQF&YC7 z3mVF@;1zldtRG|bB)KSuGY!xtvlRXKV!=Md+Ky-AI+`|XWu+e)R*!)3mh3XWUUP{87Y&z zAz+sM{6qrT#wgBwMu%F1)P+Yi2Op zhjzb@6mn@qMnkUEioMR|7nsO;jQWs$fBfVV%1c%q0iMdL9+&J0QQJvv2i7tlr_aV% zR?-%)k$euXLEa3TR!vly#BZ?_cw3QWkKN0}Mdt-F-E~Ehs*>J zPkbMo=|fnUp2&k_1*>7)dL(*%W}W#_HDIAeulhn=ZbCiwkV%x%xZh()wmi6LRoeb4 z)1{A86QtbT*Af&d2u@xeu_Kb6?Wc>Qa2pb~OY&pCZH5HLmXxD2)hu6Z1fkB|C*f-7 zq~4etVoK?oXHb}J_fGBSJYwp7C$%>yV)y&$D+!KN z$w-Pv?`MEPl*R(K(kxA)ykyyg6fbMA+5Pp7`78x#RqN7T3K}CvxaLyT`V1a_?aH`+ zv4NMzH|aB&UHq2zd(q*qIogn|$foxmuQL>=VX)A^y}ENP&U7{#XUnE@ZCoN$V5<#X zQ@Es5IJ$+Efr0bj1Qzy;k&)Zj$;IhdatE)V`k(46$(6xF(N;k~M(#P{4kuI#56MI& zdFSmi{!mcjGh__Z*lMF6$+n8|xI~H(AS{ee$iqPuHFKA6qa9B+85r9z@7zHfgN$5e zjkdsXrWPmQhf2L0x*xvv#NNG>7l|FOJr{-nX|n3zBbCky*T|IdG}3E`IH=3~iWlz9 zP^Bi#2t;y)@1S(`?o%H=`;PXo&5n@LrhW>38Li@)*xE3_<(rhef96eLs7+d9>`C9; zcu6G%;&~2>WGk~f5yH>kS#xtlaQS8?m5s7IpQ##6Y43pIT5z)%`MHPZ=fZ+`d=Xw9 zd>e$^bNb@95jV87FE|ZQX27tgKVi-ia?1T2Sdc=u)X+Rlk$`F)15 zc7s+34tMrvcGm9rVN__!00l#01Dwch#j-^PELsuFzHS`RWRwnqu9F%<0z!U1=?6+7 z!IRk-0;ys^)U)S;}$K|(^u{SWo_(ua2#a1~iNvqk>9?cGTE|8vQUYXBP&(Z#nb!A)_&GyB zyl7R=n|}RHL(R9F0*8i3+Y7C&JGDFl2THm#Dvg*u`)n2(H{$v+cStvM5rPD$mJx6c zs)JDsY?X|OkDg<+K24y2LALU8#C80J26?gF%tf z|E=m`^SK4MQ+??5L>rccP-dP%PL|mZ)xKuq77Jr@bAHxKgX~t^qnl!#W}Jw99$!Kb z?N>u5mF0KOOT<(J0431A@1Tk?)t8A)>oCiC9?{$YnS0c##A_LYs^A@`sFK#DeS^9A z0yJu`)Fzmc(Sy;T`^Uh^<>j1|WVsgxIu>pj8KCdc)y^gq46eHO zDZnjPN|5srk)BB++@=x-|Je7GE78y6vX>*bOL0{`7vv}=`D z?;6(q6rPeVUdsKXD5}DdWohrCj)yow7%0tdpW!)9zE0|Fbb`cG$?w|9b{)KGELRJ@ z6ecGL+R4bNDQPFZm;?&9mE_1vTbE)$2{`G5`K1dv*yjY^Jb`YoyhvE{1L}B>xv&!$ zhQ*L$2zy+IV`)|Rq$NI+S?Gk)+yg{~^Y5GWJTbG{@DQWF9^aG~WW_PdIa)hf8aH1R)`-pFt5_dE5SMBG}s?|TSikszv;>_#n0ww#tAjg9wJDU7O8oauH zA0ys%F3bVk`17AWPAgt${sxk7sxh(ohA84<|JDl08BSg+3kURV^Pay)Yk9(s?R(XV z0c_t)^)?4-q1L@+H5tw;Q%RlFAdwwf4E0n!f1U8!T{Y*}a{wTiFXki27%Xwc=i3A7H$sF$dQ9=h)mSAa zEjgYrd(&5{O&u3Bi8bVOQbV?cm=D(%@kHw0yrJJAOz-$$D?x-2ZkgC-0GkF#pcICe zt=-+724wH`f~z&f*xM}A#ZW=7@hQAb?XoHdp*EE2VQ7eW`;~OQg1ayxw*5{#w9g3MG z=;6J4_8mwXLzru;u&`fx!`|JC?3_+<6^|OiT#R;=qi*NN5v2Sr#(R-Ai?|a@{R+0b z6U@f*b?qLA%~<~Ff*DW{CJF6JUx4<~$i4+#0l?3MR&w%_C+y~J86wGQMKMOuFl%vP zvmy`;C_5Z>ZQQmPR`3u9f3ki?whQj|6LYG53W|D=^8QREd_mb@RQs31N)&W zkDCg%y`z+VQsjj(ln4|0975&$3)^kq*poDJ$*##Y60xrrF~XfQv`T{YT$VTcoV4@O z<-5-euAViL2fZTHaLQMy{k|jQl>SgP;S4o%ey!HfnLFg*mh@knzKt8rat8YvzjxjK zFj4zMiQ3?2?V<)!zgse6vQ9k|hWWp~0h0V#yAh@A=(586GDpW?iWV z+?$|Q>^^Lx8a_7_fnY$$Xm$=>L6Fj0d83||Jo^sF|M)$35y0#(L5FOg@Pm?#<1WDa zTQ-!QZWeBOL5V!T^!pC+^n^1{*cArjwc9N6EH^w~ll3o20q>AOcVX8y^bF@!bWMM~ za>plJKS-p+us-Nwux*zM@Hn_hO`-eB*ot{a9zC@xKw)vMI0otHz)&~3$JQD_G6jQ# z09I#ntZH)T->LugZO9zXSlvB`Fhrw|z*G?9AgvmFrIW?gaQ1h97<#SgCG&c1MIA~V zl}sH5`8OIx9O@q-2T$eN?y?~6$jh9SLV95+`-9prD>N@^57{6k8$ezW$tYePo;J^LPr?%SSuL2Lj1LaWkBlN}49h?$iA0mOU5g1Z3477|!uy=Iv?V{<) zQ%I;}<1GwWyFBH$T84)nhUx@}={NMhYkB^s7)IYBRVt{VUGBgXRGVm3n3#|d()7%R znAq`nw49*s&-ORCm;d)dfTt@pL;vgsOv%jb_71A2d(xMvTWH788b=nF z23=I38&7(Xy$*RdymY445XgtnP(^o&k37$4x7||c#?ZnhxC2-jsE%%`7+{l=fq}TX zOa0n0KVj_p^+S)N7Mz@%yjHgtkH%w)HFfP7QLi9qK2v9nvFA(d8Ysw~sQw_Tn}K+! z8#%9G}WwYk^D7hC0p+u1QquI?Uy zZ;Vl#Rq+$1`w1Y`|OGm6b62FItC+l-sVmcx}lmzWv z*bC`;%V+C=g^upm_ApgpGDvJBApw=bwa1HmN0G)si5vxqFK~>#!4|w2l<;Rw3p}yXRWOl zH1sH6?L~>Icl_0JX~_Zz6D6peq%2Fdl}7U>6v;d(t>$IV_eRYmWyv8kFUjRmsY^FP z4Yl;>;bEi*|1DLreIo3)7|qZ@@H1Wn4N;zp8jGuew+J0stuSV8Y+dN|iXQL0xbo(u4Pl@`#v=e+=Z z@%IdgzWMyQ${)*QsSgq`S*-%G`m@SUysi*pj{6ptv2<~eFsJq`aNNd225jX=ad z05A0WYPfUh?SJEzBW{7%9Upx(ny1q5hW(@cV0=y8o*T`xp9S1sVRnjNZY6 z%botk5EZ-3QDgPdim9@V%`CWI0q#!ss<$dGEp=g|)84#>`}BH@;;BjT$kE32n}| zE9(pJYfMt}e?V2`z>yl3!5#-sk7%duC5W+u7U4$_miEvIKm(#Af@rtE;P;{>pjm z8RX}op&?mmFOT*Pv2Ww!C5U)Bfbf9 z^1!#toM(HtwEFMK0}apqmCL`9`!5jJ;YoO=x)tN(+dslXM7^mc+0;J2Rv-7p^FIQO zV_9>3ujb_A`{yG@t()in%Gv}F@(E>6{l5YiEIfU^^9I!P_x0Z#hkuC||8p2ClC2?E zTmCN0S@g-Q=^Yrg`0*LGYUZ)kUvUS{+adl~qG%nK34HZnCL&wr{Nc`DjE zEW@ox4jVf;byLIBpviHvtuN4*Zb9FoIKQne&q7R0gB{`X7@!O)c^Yz=jR!akzax1M?uekizP90a-6^OFI&ACvU&D zD9v7n;K%u_y>A=FgD@MKKm+L`#%m^n0=tr)f30&bxkS9nv5t6kJ%MMH;Zd_0PHa&L zZqhZzN6qP1n2o@E}euprR|eq$KXbZnIw4&iW^g3!m6Rl?QRZ5xD1_-JQ~qd_Xb@ z`~0bAIFOVtcB2yA32ym45wv-jV^AoUJ`(70>;f40Ob>d`c<3-}f3HsvK|YdQ@=bj} zPsblgHtELrVTR4>sjI`+9=WA!X$HPjy;Uu(dtN1{x;dTa4)!Z3h*L*tuA%pT5e%(fenyHa|hurY9rp>?!6c#;*+zGuMo0ymgXJlOnK8eQs0m&j(7F@BRqqDxd z^tc9cQb7GR3pGv5v)1xARti)kDJAHmy~I|XlLzB-K|lbH14_Pai!itH7DM`$0q*;* zTL*XF>l;mtw8hAUg4$e};|j6$?H^;F?ORLe5|L&%nq=H4IEnFY89=dl>ByRbVO)c} z6oxh;U7Nu_3FOWpv-apvt{apH8|y;ryFtih>g9So!LFYBFBLN*w;3H+ zFG5>s?YtdLh9dtf>Ll0m{kmt=p5_iO-7Oq8IW-dhCnLA6?~yyidF$$NIa;6k+LlP- z$}Nm1k|65H!vTw@I2n@+6_sKVT<40kMTLY`<8_|Tj+Op0UKXWh={05);@Je?{$8E| zCVKppqdIEY=9kVSnS%`*)VuQk0fW&yiz9-wkDs5w#)twAc5}O{6TyLMk(B#kvfP4x z*W+xl#1?d(Jp>pHpkqRpVrC__sf!<}fg${{L-AX4AWL>&d{uS1=Z1eBU77v55uok6=>6* z6)D{iY@mU1M<-I|zV+|J1YmA^UBiZ<>~IFbAAq&o@!?+DB<6!hAcTI?>9c-Hkqr4> zk)?|$Gz{&tZ3asfX4;=U28@HfmHx@2)8&aCRXyE9=7VRSXwL`WZ_jfugjdBp!k%A0 z?=eLBi$wi6k@{uT)2OO|nQ=+lWcNHLytWJr(Z}g^PXyGJXzpq$&|v-a`;;UZt5LQ~ zJ!Vu>zI)=8(RUd8_VaVwveeQ#8&S5asaNN*^=na6+-_EmJY0SUIzz`?bCW&ELiK_~ zYoXttyU97AkahOe;`U5p)ZAxU4ODanP;ZYbsZJgr%E9>&-TBw2-QSudU&k3w7S#4m zxFoUDoahHUP_>Xnx&KFZ+%Wq`qZWIZ@x={cF)#IP*2juVKom9 z*5aTfaF=)a*Dvm)?USAwsgq0vdcQHsQp?#1M5izbmnTzU_VKcML7b@s44*N0DJ67{X;VY;AW8b5c%MxAv#~V4`WZ$VQzEg|P zV(rSM)*-n0SX%dWWBUSW4s!0ZKHUBJjl<>r=pj||WqLBf_bnfDsUqmn4~F+X89nf> zzC6eo+voyuKRzGh^Ms z3fA4&ASa~4E!Aq5$c@?N<1%@5jFO%7W9ztMOY6G`mcR=n@$!@Q&-^j#hFLk%^B2ck zb+Jc8Og>GWH^ThLs7Yt^dR~T2lpg6OTKw3oguI!NsZF&}*n8w5jB+0&PPi+eMVS1;#pZDB?QH?c8cJ!-sk@Ny_R+k$%DNBzU( zMw{bMqg#G8dm}=axY1`XejoK(r1px??0BUCflLo>d_kOJ=Z;qS*txa-$?SRco6d?9 zdGW4xoIA;x8^aKxVb?Z!=&?(@OOacW05)t~#}j&-`TGtL)!e^J}7 zErt}@m=qEgifq!MoRSM~jm+^{Ci8K$DSEpxuuX!duW(lG2 z5-*fBg(_)&W{Uw%ic{@!tu|AzTQL}Pb+-bE^|=-dn0*%95WP0|KI@j4kSf8r+wvX% zwoDG&Mc3+>$(5nPVFj8d`w)@ccQS)%h>OgQvgJIq{&z{(l+=2}h8}x0W%go3QMD1!z3TD9X45Ubh_snYU*+g{IP?C`4{9sL z@2ObTURBb?i#zT~1kP>8r%LY&`dRWE_!7Es%-AjM*d3U-F5B?ZSFv+6 zaradfC@?yTS=U@UB3;Ean!7_r1&^bzG-#I&dZraOXYc5)2-|FR#9!3W_`FAAG8p_3 z$zkfenzQ^fme;zM;Xj(K-h}tT|B{o-ckGcUF5|hY)QIXNk`g(}sS|SbIPEpv`JiDT z_Oq_<8uIUbi@agtsFdt_JD2Y{I5U=pjvto;!X;uGvE*fg;zrxZTP;(u2efTzE#ov^ z1;5GECeZmgGcU2US+%CT&=${GfK-WiEH<#=wZUtxSn;uOO=03o)`iRErpZFL4)YQ? zN-guPR?Bs0W$f0p;g~^$IUDfrx&8 z96RRD;jInSy~*)!qpGL2e(jX;dk`$OQ+5^Hd(QId)o`!Z8r;ACnte5DqxfB4z(&KQ z@8Kgoix4pi-%}S4u?Fb}Y(xHU#HPK=%G8@zUCt&AqzMgx5-Oa<%TTzO&P0G~7Etu+ z@f+8ln8E%pIimcZg8fnHgu_%9q)Tk)qF+Qux_;_}0 z4TenRSg`{67tb0eecRWh@*#?rSY%xwDPlx^9Npk&F==tX^j)LqolMJ}X|T;F;KVUL zo~4T4n$4Fy8Wx9R3dJeF!$(BQgg9Cv>zkqCOhido)jOCFlE#3iP3by17S|* znSkC4ZAr8i<;(NYv3T~*s7;i6(n$g$Ci<+r zGLE^Dr&=;9+$y@!G@EQ~gjLY^)fL;~KO4@|bToKAf5x`YjT!HLK0#JN*;9nRU))0m({(V?@EkVcj)aSj$+Q38(KD8{$PWC}){Fd4P$drq!cwP7$%!U!7RNd(KH^n4Tlh2%~ zc=T`PNGy0F@}JQ5ODhwG#`a0sr`Wu5uiHt;GZHa~=dWI3j_&m+w(hwE()nco(g~)! z-8zizW9OH{X^b_XHM-9=v9$EyhIyzayXlpt#;E{xWNi0rqtPfs`7=9strFx1?T^fa zSyB3TgT5ZXWX67GqBm!EoBoqghv3pnb(R^5*e zUsO+3a=#7!==Gpeyk#Iw;Lbc~;><6$7gZya%;?3mJkITyB7~wy<)(NDtD1}%L zFwHY72jByWg(EcrM}2t@3QP~MaMCl6LK&dRhK5<(2G)JSFbxp7=$z(iV|9#*8r?-6r{lzIIh=H3FT%B|}k-HIY5DWxEy0wRq; zhZsmJNJt|fDc$Xn1_31mNtIGcN;;$^m99;9BeBW7?|eM(dB5-8?;HQ|k9)_sdkl^T zczD)YbImp5_nT`iYuCNBhOT@m{AvNJR|5QbRIh?$)}nl(nZ2YhskhqSN59F)k2sKz z8^A=LDqiU5&4yKS%8mMm+xLMJQLhL;5gL=Mpq8XsZoUjnRx)PAQz02;a>@x+iw^q! zs{Xja6Rn|fZSL8euipz^s@yAFSzc@>Vuj;S=kfwd$;)tnc`rYBauN zl%!$qImNFwul|{9`0LB{jx}Eo>I6E5%m7ZyHa7W}9%irsET7Ih>xYIem+;H;{U1?L zK5s~QE%j|O0+RRPnZ-p|3%na z1w)~Ge$q!?yG?Mwuoml2vL*Xk}qD zpjF6j(+W^~4~^H8{Iy3RiYfmr6MvQv7pe!J`SAt7wG0aij(Uq#ei~biI};Q1-GM%&dhQr&<24f78^!zWp8k|4i%t`r*Vwo^<-ZLS*ntzabvF8aRWEhQ zqrP{%DRlQae?GSIryZY;4l&t*2!5K-jLP4Mi=>sV_la>GxxTU!%S?+#i%LdN^2_yG zh5-*=sFtM%R_x<<+ECGubYCQ6ovR$8P)_2oo}#O1{g=c1+~Pd*g;tVx9UtGh<9W{n zx2CqXEXS0dY5eJ4jb~CeJKxRu)Rp-GNXag`NK)hR76J^LH#*R%fp( zkdS)tzyr(fxrw34_oKUBywZ{rvHs>*V{9u|f{>;RTblY=#M zx{G-=bTOFAQg(EmOkGWkX=;mkc0MKD_ZMXi8@*u>gh7P^A3)#!gd9a<6`M}TT9+49 zH_*2!@vtu2x0j?q zXR=0+^F)EddFQN$Qb>qgv9~xUH+P!f=^zvKh;z-lubKl!t^2C&m73~e)NT15rG38N zg9N5{a)0@V2GtdqM-S2TOasw7lLv1@tId~?DES4KE}hrWhM zHIY-XSJrX-!&_=ebxT^#(Xbxc&?X!Wr@~wh=3(u0Ov1%y2@G)u#$P1o!F-r2`4X3SO=l4K;j>U zENPtYpU!V%XIG?S)2te>trIjfsnczAW*z)MnUmucx%+!dSOPTEi_z;tSWdM)jpJ1> z@mnc0uOS?Zx__*EO8>Ii{2pMX!)Mo^g{5Bpvl9=xJK^d;Uum)di(;2F(35v6-x|6m z;se6X9lecK{>nP)^o6*5+-vIvAMKNf&>JHOJfs2aInm$kbl1Ohs$hw$*N(L*iHTvYM)44Z`a$>|Ozo^QD-3jFRbV{*w0z#G-T8=8 z31CmS*1|(k1-By`ZGYd`#NbkjmG^7)!LVj$tRHwd+LYp+?W&*ZBX2eeGFcd2(wx<@ znd?<^d^-46noAf-KMVlA(STLp%{W#@{*1Wz3+b9$q#wKTnR1i|1zRsw*N-pj@_TE) zFlIRw;oY6;xrKrT343u*s@kMFefN|chvUeT$%2A@N?U|?4@apC2kDOHoYOwRsbf`N%2rquxXg)s+G`R50e&;=UZ?dN_DbJ%5Wum^IRVz+puyA45=jy=gEm(ysYi)~|PaHsW-^^oY8Z>c+k2Z;+V`)jw8` ztm75SKVDF(&r{DqJLiS94zE7XexLFOy!g5ly5CSe2cc!FPI4b;c69M1H|Hu&;t^4(ceAi|y1n}7$~7Y` z?-s{-vh-zkA|dqLqXc_Ansh?&@-=tr5e0rJwRdneL*JYuMwZ8?wX{Zs?Z@&p4&GSI zrSbumtA=N0bkTUGrE*VKtLHsm*8i@WtwjA+uRs@o-AU0vU^P&a82_1 z6FU15jAQ^)^)d7MJ{VUKnr5p-!@iYVfwY~JV#;c*F2*cYDRq2k^^))M^0MFM>b0ZL zxtHBR9LvgQL_vlf3e4jd9E?bolC_<%Qf1j*o{K{*{m(r#^u~A=wySDoHa#g-Vll}U& zq!VUmW_-Z{^19Dn_>O1KmKUK14THK|sMg}?din;~YnXY@Ju5ML^~T4epC?Ej`ml%G z>5-V0T|I#~+g#E1c!&5h-O}x8jKo}3`0+EgI){d!!sG&{Jox1w1?)~3?=`(yH@q~Q z>8^&lm0o$N9OsC5o<};T z1$gN71ET$28pEpZz0S+yCTG*vv~k>i2~#>*H+@8$Y)ZWyAlyEG({D3*!1)U;_v=AV z<6o_eff(uMZ5X~+H6p->-o8#wEs2)YC1;c^sD07y{*@NyPV##beHM0ORpR$wH6jBe zv$v;)o@WcQfE|rx1pM$;egw{>Kh+=*uac<-$0w@kehzfuhS}%-^6`UmJDUchS>l{- z)bory1U5EjVsK{a z#}1&(6Km9k4aAv6i1`u___ixWP~%7L)GZEy&aB0yLF_0gz>kw#t|EZFYIz?+an>47 zNgb@huaJr)$b43}ee}+>`D86xSa5WFWOUGtLdu0kYEa=YVQfp(mmDAn)8vx7hxQ`c zIy#E8=QS}kl_sNl7cFeti$pDo7LET zDp4k{$0|}nji~i`AV(7Sw0|(dK}uf_cgpy4@|UKjx@!+x>2w?CgZ&De?*VP%EJr{% z8Z;)`^BAv^oOq1~@2j*P8yQbtZe3rW@q)IK-5Gb#(3|hx>)}cunUkqFM|B^?Le6&D zdkBVnkJ<6eyu;MrlAIB;(E*2 z`faI1#F`jwlc)H$d23^1nv9cF9J#*`WwUqQZGmq|4fCzjMP19QPgY*5cJXxH&9L8e z3>99}a0JH-hnx7%x?|Xc+`FZQ=#_z4LqFBj(0G{5iXR(OePYZiK~SD?VrQG^z#vhP z%^~m18J7~l`0w{9Bncb88qARTB|&?=F@a|5^i`)h1TJY`531%hpE+gFQYPW{@=1-> zC=>G-Q%$+Vnwq7+fY!t=r|;K9%>Iop?BP-b?w!ZJ`>?8Tmwh*$jpm!&#j|o z`$~L_+1A`fWGw3AjAhs9tq;a-6iITt`9N65wwav29u7Svex#y6C@-fc z(uv-ak26hLp&_O}s3Kslq!f>Ng)|A*Zs?~U)V1sg){ev#r6m6h8El>Nlez4(-yM%4 zV)h;6&%q?UsZg1uar<6J94R(C;Ou=w$wuh@vR7w}sp|dI4x{JUrkhVxc5%n!stWsG z*4M|kPF!aVqXwUpMD|*hO>=?w<);tx!TIKd$a(`}N|Jf%k_8X8NAIWgxLv?*eD(KH zj#olH<_pF0#Cl^lz8yMUpZWIV{P$D(_qP=&vFF#r6RW+gmQ1g?+@#l#+QfXrcr1{m zI;THUsowSQIap-MBIR-@eX+M)j?oTsJZq0=v;ZWqvL9+v$n=5_nn8?=>O~S4HHW;; zu>}CTW-U*8viI(&->xH@X2PY>uyyM2!!dz_9dVp0!DNf#x22^Di6o3}$oJlZM>@aq zaM%UE_w66BIvmW$`@GsJ{(G45EOs#_pFDpWVPD=Mjh$tQrZ0UTfvT}2CMI4U7#Pr_ z?T18FIrvqPz;Wb5R0OLcxm(R86}?Q;d6^Cuaa7Oq>{j0OTPR-r3Y=&D;}tl~=FdkJ z`u+F+rOWcJ)awnzkXf}9`kqK) zF?tlj-+m~4Tse$Jd#Y~V?;WbEtJ~qMR`kY}kW0DTZi=r>)IH^FVr{){Okj|)u>7~s z%$N7q*4Faw>)lJFrjRoK=eDK)Cy@2OCZA$xkVeWyc$vq;)3b_qIgij7hAr?LW9#fZ zu&&s#y1J?pz85uLQBhITxnOsVxr=glch^aZfJfS6SP6cb_uN4y$_GQ{4^LK+Ft5yb zhG9b7;4&ozml)pmLf@O_kNsV7K0{g>8gx6HNx|5oF2#Q;8@~h-F^3Z!l>a@xT1JlR zz8*S12TuR}{aWllMO(H1XH)y%>B=oBg5~>q|5CO`jIYV-ra$uht8gxp^xr4s{O@Se zf7jgqx@vw&`v?c`H077&7gI@TkTF{}zx#GWe8=%l6tlWf&&HE!eQ@J$v)M zf+se>Y%f9b-j}G&>-@~#aM`xH6vLo^3(@w@MIRgdLpxL92olmcOHIFc!b!Dmp7sjhPom;s&$;p;0p@ z3gxU5vexh)FX@FoSrHguzHx-GeZ5#SW;NbfDkgF@gjzjLqUw9+z{p@2P82;lxU?GM z=>@Ud%};q`C`p8KN3HoIh06^$E_zq|SenYpYO`^5j6TSpQ@#Nis|>c3joFv&@5rOZ zG~}k1?U|Jx-NR9tM7;zPFU$S0-&eh#SWlTb+Z$SRogzm`?tt}1t;N7QyKyqGb73cD0YlM&Jb^7$4-p{#BBd8rr7k; zF9HBFfq0Z$1^LDXRntgUxF!!==uL)xbj`2+Ty?VE-f^SC)5Bt}LhS1Y`8f%Ty*kT! z)14aX7U!$qCOn1BX;&boF@>^-U03&&4cEc($b0jXDk8q*VRx6kI3ULP;8;UVLwiH+ zLo>7QI1*PhF%}H(oJ2j5gG)&i-&5@DCqrIb^0Ad)hVQ`!Z(Rg zti@&dJ6gIZS7>>RvFa>T!8y}Nl0xarq{`vS@<2zNzDD>f?1e;%dSo=;?h@cS-){r2 zIk~-JxVMF7f`6wI6%I(29fT)mwrAik@~PJS+GXJWx>gx*rH?|J1gq9h=qrP=EW;PB z)x5?H#)mWpRXkyy>8(0@z2d~)mB&|*lAmHPPn!BCR+qk29;mBzAe9?f4~(>+kXV`R zA$nv98V36L2&!W?K6}ROn2p%}Hp$+Shcn8PK%^;xuPbk#L)lQda=gw1iXRFW_NG^v z{QSe~-6F}LoS~4F$q7~xmU={xFCA8x3A1l6%DElolJ16?M~Yy|C04^uqhhH*LO#!6 zBXI4L#^@%V9<}p`meh>lqD#t49#v7dtcQ(1FV-I>Wj>O-kos#%ad3Qk=sh7w?~~>v z%@-)Kfa;|BAB!^m8H0?(s7K1k`|JQWa3~ z;VgQiTnGHV8%BGEXe$Av+NK--S3RN$P{On2)QlBB%sPp>6oQOY6l{+CeUe#a_32%; z^Iegk4It8v_bCg-ce>x-LKhaPRYEL!1XVvj7afAjzFaW(wwhlJ^Q0W{IDvXZigf5L zbjZd6CMA5@eEP)Jw{AfNMW5?|@)T@$oC5T4Fe~d}^M-7e?|rNb5-Bcr1$v%-p9dvr z`v(_G@qn(6_FLDlhLxkOblXzO9H5k@_=+7E3l6@a#{#z9S14M(NtDdzQm z`)xu`X7^U}gB|j^buSF&BSQ6dk-)sjE0~eTT4o&U!og-Hy&t`yI0Kaiq0~e=*94faqNu=j4>O9N z;Wr`I7J?~f@D`QTa;?;=KjD4ZrDKRoQz&`Z1X*dr0p_$1@lfY3 z3A~|ONlx&}M2^LP$=>p48PZdLD`ywLuF957E7(I}!nba$jCLFus+KXZctNrXkWg8e z!>GT&6bD&?QN18;p9U0rd|%Ts$zD&_shm&YCK+ZtBH?FH6Kk3GuOQM=18FsV14nsL zYLL;Ziu;8|Pp@Nb4VaE6bjKwi{UUoQXT;c@v%e3QLZ{I%rXte>F=*}8B!BY$Lp@mH zawBIoE(qaaTi5(U4IE_U#t!^{F@L+N_&9#I5``*Ep;5^AXKt_JJKb@rKo67c-W5A@ zR8hB(3Ii(Y!-VHZoV|UwBpGn7B#;s zp+GA2ba!+GTxt>w6vUrSJ%Evv;^XHfDmHVsh=Ua`N|vTpXJljLQ)U%5=bD|**PXo~ zbprMLA(!x<*`b1Fu7J;aL00nYMRG|ARdp*6t`ClH+(MYU#yDjAD36VipTNb$d3-m*x*P zG~MY}gR>kU7;1m7=^&3d2ToX`p>H+#mPK1E*_me!AdZ;c?n)B5Yq0IQ;8fA5qYz}g zLi6o_0W~oP3>aj@C}Sv|)Q`I76Jysz@HoT%&90rQRLjf0dHQjVUw-3MC?7NB5JgEu z^a?~j4ED0@HR<$2nOgH&4)&^>biLO4Bm841&PyB8LF1j zqEOEjxP+MzO~VIGV|`oD7S7C`6~^I=4VFp%#qae%Eg26r3SGSU&olbZhT)W#N9W-C zS`X;P-(mLuJw>X$_rAmZzMj+l$vG+mxuHSQMIEgA&Q7mj(Qn<*5|SA4o*(C zwkaa5v2pU<7v4c753-K5=fLQ)@v}B%i^Jkc&~%XcN22l#!PCF4pYLULvM;Coou&}V z$grl)9D$FDz4ecDG6zv~#`-BsN6{V~&zFK%%9vJw!eC?d$`h|{Vsf1ehB~lr`ac#pjd;A&faCne7r15{0Ja&p0b21 zmPB5=l&k=5+bVK5SaWVTi8vc!R^-hzAbz_>)C+yKPk1j^t2Ubfm!heaN!zI1k@(Pqht;dwcqK6Bzj^+i$m2Uk z${e1Jw#N8}&>w4m^+y$C-DyYK*X>~+%{dsN_&*(KGU(f+Q^r5Eo-&e&COOp&h}$31 z>lRBCAH7<%J%}VYm4@*SGABVWg9sfN?tdniem=CfACO`VWG2gkIl@#^&%g7j? z+E%Unm$v=ec1)iqV7~C$e^=U5O_Qu!H+NJY2RVczfq}lWNfSm_4&q*}6mP~?9&oGi z9j^CENNca5HCvXG#eHYULdxF0g;S*_?^qb`%?jAQO%29h^8;k3XuT#zjVasz!Ij6l zs@$RYsB01b$u1fOnOq=U@dqdk|A^VQxUabU>@Jl_tEl!utzty$9!6EnrJysP5q)jh-4}^69tJuk6O~e<-Y~;1Q}Zbj4l2l<34!zYZuf?uLBz#OpUjw=dWjtw)TSwd&X`4 zz)lL2Y~l?Uq3nD@7%bGR8v5xo6i{Vwt!m|O7nl|g%);Rm_X_E(hjpGsIWfO452h}p zC`L)u>Q@DDaywPWP&0!QUWMI(@_LLGzY^9J5Rn9fNG^!L{6>E3zHL%6G1@vb>k42p zyMIA3!b)1`Xl{R7D=8S>6dyPsLh0`=sf9dW`TYcml>nHUPD3$ag`&EMP!uB)g&yNbFJ z7{s)T+dSOdvpF|OZPW7m*M>p)9R}L_f z6+7)tC4S(Ej&_FmPj1m}>O#%VKZ(K=pev}uFq+qbj8umM+oky`vj@5t#x^Z~||V>%i92X1AQ5eAM# z(dU3Xs7(y|fL1cg3a{V8TNT+t=JyEO{Fv)m&dsF1N=0sOF?4{^0A(g|t~uA)NUXmd=`~8e+m^16&g4<$oww5N-{>8Hh0e6BV$q18@(Nh+=Yz9kZ z*1c^KYiH4Ws{XOI+0l_Acd9=^gj!o!R)t}{cWyMp?WY<#u5^fvfWH-lEhh+*v$SNl zG*AHZ4BXR);?Y)NlZ^2!^18gv(G#dxy~zHLiEWCIhG)SXSV0|JZDPpkUtWN(42QhJ zqKmq~i601WtV0gFwg2wYm*Xuhg-=|}9^unK2q|XmCDCmtc9-2dLA}~fA z-XospShKUUo&`B2P&o;D9@VOX=)V*Em~}LhNda2}V`YtbJ*d-Js=kjOsG(>A0vl8b zR2b?%eTW@w(F#G)YmmcZ#Y=n8)lWahDrxRnT+6k9eZyO4oUOLPyc!Vj$7vi#if z_WGg&dMZz_- zNFu%)SJy%r=pQQn$lB{7o;1uq7~T;N$J|->YDqlx{_Q6^sT!cyy!l?`ok2+k{lT`v zz>?K9CcXhI&j3N>7`7$KUP=DGueY&K+59%ho2}}bN)04OJAW-EW^cJ!Ehtpd>NdE; zaX0VVGVi;;OodEh0v}9Ll_rcgjL)G8$)!t!J1bI1}NfMju zEBE&(6)~(nQsZL0iVu&=oyi(-p2h`mE$8C{^Iv)ZN&O)lQi}Z5u8;-J%aDrjXXd42 z(FMm-okK;@i+QU~r&0O{0sH4UeeTAstDnnC?fc`BK)wmkmrHt|7XH4d9fkUG|Gy&t z|6k+!%O|VXwg8^jBi<`7E(Ca+QA&g7K$xxzV0WCSr{^)#74g*!dL7bPK(1Z^$?*rn z$Hk*(AKgx3fo%sz$-oKV)&t=R>)gCN+`vk1Fy#jJ``38@_bnRcU1~BZ)|o~lObGEr zNXoU4L??;A&C`z{%1W*8iym zfXkS^C9SWmwfYe~xkvE%pS|G!XJkj8@-N!jM0cQWb++&Se???KL2PW5A9T=B>xjELafY17nrjhXZ61?yn~$N|1Ay(7Ve+V|1VyMOdJaQHsBhJQiG(-qNL2( z%{X)zPNL&6!K5R>qyi2nfs~t0%fa6Ix!SoVHS5rPM!lui5*JO5*s5%s9y2&s1{sE_ z->9^Feszo~{4RmWX4;sGgUGyud!Ms6gCDtWg#-#UVJ-ELs1VSStE3A_GbBfa*_gAW zhq!hDW?x#{4a_q)by&RZ@uh>CG7KU!ellhSO$^die9{DyjSz4C+RlxA8jNXHtdwT0 zl8|aTrkFb3Y(0jHQqY9Vad8MwQ2iYpnV=I^vMK1haWj?yh4RrXpMBPBn;k&zG>yZ( z01HklScO`TPy>q}SyPj9R>L*lMk-He(G9@p=SKf< z6_Y~>O$_Sl?-1|OY0L&Fe;(61ll@Ja^N?hP9ZIkJl4Ffe7`ZkaPA^`qxaf|#w~k#v zbBO7!h4>+_{;=*vU$01Dz~=v^rW8b*7oZzkK>o~MsksFA(h29UE*09=rGcmW3FRECTf48D8IY6@ zVE*@V9Y4#*&+kaDfOKdiX$%oKG3=Of{x<0{n(6OQApEGXXASm>|5Ny3yB3~EURUGS zheAC`B!Bz)ymPb})OSGr1;T~Aq)ron?W9uBQ2?u(6B#tZS8bf8Q>@qfY2gTn(K6fm zG#OfvFuQ*d1mseC8=Gx?_!xjz z1Y}3h#D%*AIfr^De^6iotl-{*C-0#bfjR2Qvx$d;t4;`Jz2`A@(bVzgi~2}Bh!+f? zqPM?x-By&xP9zEa>?bG$3$;kDXvbj!vfDEI*z9@;W8o2_AXq0`7l@*v%(gHrc)`(d z2+BJSe#B0%HQZ0Hnf2_1ElztbPHq_(=|E!Znw2l2q&bH<8!5$a`@R)Bdy67Z54nPq zdm=MuCrYPIa!K#`=?Dlm5@6G(clF^FK?@Z4YvP6iWfD;Of75 zjD3VKK=%IvVX)=HzHGq+oPy`->fm4Y?&-AZRGZiql1n4WBhyEW=pcyfT%5O^dI!81 znp2gYyTJ_)HUYj7qp}Q=I1$oJDg@X~Uayuv)z0t1&_Ck0KX6!62cziEh&u;s>@9uq z!Y1rK`)Xr0h>Q(zd{^R213-}ki|oe!MfYfaj0xGOjO@R)_1<3VOE{dyz;Yc$R|8_f z0}W{~2;w+Mqwu%$__?5oe|Fpn8Y$^MvmsBE8f)akMydMtmZl#DDg7I^S!a-qL zsG;Six05A_Lgb#4EebZ|B4#!+G{vb9-;C)MHY)Mwiaj!!BWm9bp~4oUb-?KrFP{ z0NZ9$uvL=1t9Fu-i%;E0>Hrr0=!MB zK9^vDKj%yec~`NJFVCp{+r#YFfl2?4I3e>9P#W*8Jw+~2oj?aBJ1-&&0d^ciq%L!~ z;k43wCGZqVpBwDlL+#fI=OwE}yyBh5o|(O-OBL^XPYrKn! zwT!_c)m}_vll`y^fD1}!yB{ugg~n=4^GHy0?D&oSXp=TzpOfgvYsbt;x7G-l>E#$! zSk&To)hs*Zvf@R-a6Nrt(_dT*J7eNp=%rjnaZ)^$@$>Masw5ws*!NgOE$FSnDe?y{A5c%57cB&nw8CI@Z<`+3K;5+XV{1BwfQn*VHO?=7SP2{qQ zrAQWV{^>kx8?vY5fu(vnKhseay4mYSFzKwLf!_$$bYmIW?puD}I*E))<&`<&5#T*C zxs^YBzU16q9RNlJ=(OPggg;Aa#|A?z^?s_err}u);Qy`o^2{}b?M|8#YU+G8atQ%Q z?Hf=xjoQ5=GY*Tc3AU*{IWS9E@UgMa7eum^fjH0Z?5I(ISqE0Cys|BeK9*( z8THbyfDYc6G~8JaM^cPGyM>*KjUfuYNWlE#obw%05)Y`+;;0&S7=&SK9%eLengCic zDn+7v#waC5pG9iiv^gY<~O&WS)xxsz@bQ2|*c0+}z_F#?VnuSHj8w(kzN zDJ9l}aV?3Vv-c1c$x*27wLf6rasPIKpC#}Zj3!KKqqf2wpyARzyF|cYiZp6dpirUG z1Tr!A+Tw*tccmOG1OhL*s|JXehU_F$_YbDN0L_v}tTVokv>* zp>|=Cohppnhm%Llr2Q;KuAc&VQc1m?8`|J*3Ekx>wsmw|OrU{tv){mi_Mq&*o4#jE&^NzV&0J(2;d@)3flKer)Odms6Ie2=% zq;NN*=-u+S>u_P1)fI|NPw_nG3;jri+h>!zH1@X&-+kj%lRPd#Jn*7}zh17^!?UQ` z1ac{UOZ|=RMTeEYRt#%FSxaIFdY8OtX1+!xH}4__;92hTIm!@GG}Ei$A3wdS{JGpA zNyn3VSRMQ-<|TCo`>Xh2x&1iN+Y=y<{Ii@>RJ<(4D@R#vQ_T%^gHeeeieolKRQ{MY zq07Fhde447lez@wFW3*a;REW*D>dx8r1^pssu(gHQ$Ksi^_P~~>o;fk<;#G=n0?RY zgUxUNd!>%ormz_5tGLs1*m6iPgS%3R$c#@s6URL}UjQ*waT0bY`VFOW2KULFs;JZT z;TF9Gv?w1Tm>^#s4n-z5wS?i*ai8Z*ioilC2)*)>zxCpWFuAfGpc;x9Dx-Z-8cn3X zz97dvwbL_S6$?TsXr)@xOCUTKYQ`S_IM!TvZg34pOKezZ?6C0p00PI_1PZC{NYjw3 zzU}>G@;a|85_d17P~>$G;67{rwWxj0p3gpIWHo*DWVU5XRKNM|@1-xXA#qgf63MM& zNJDJQS3eisYkjx5_4!~Q%1&%91i6*DzZiYPHao8}%P0Npu}Q0<#6ha>9un}&f*I+m z_2DthVDeOU3LZvoks6}hDcHjPPV<#ly=Z^+|jJbKL4c%DfY%|e|APF zVNhr!>al;ajTB7vy`kV&bsXJH$LYn;(1ujJ75b%ZPU8uKiI9T5FwJ3QhhMUj!jt3W3x^WwT{|O+#jY zvX_F5VEqakhF$nN_9oXD)4AHYLBy8ggp`zpQ%gk`YZlGF&UG5PHaBOh)3KI!$0EtQmW z2q459sq|O%o-koicF!{t!B|DferkP1$NHAwr!xM{d-Rbk$ohz|)*sFHSogA3SGL2v zQ=#=!^K9oMe?t7Zuz0PcTX;Km97g}-{4Kf$UVj&tOyWj3x!ra_&(`z@mRCkw;H3_{0!p)*W0dK`UD>Y6vy!009 zy}M@hImhCdb~{9}g`e(~1m%~MFV3#37Y0X`Q@Rcss!wGc5^b;yz9ZL@D*Y>WR*rzp zv;U0uI>(^5xij^XH_;7(bQfMpZr}0$Dn1ugOY^=vp-W}9HTY4T&#h0Jn-``Th0<=f zc@Y(;e8^yXNTp2HIz*>+Fb{nmy5c+2iv2z&;#?FO!$&w)JPn~(ZjK3_p5la>nNXTW zR>5Ze{Cq;P-lKw&@YngEQNbUdyD1SLREN%m#W35k*0Q?80`$aV`?i)@+kIsk^Egdp z;S9&N;Mt)ohJ_ne!%s-S<}^B~9GvAO;Jmd?)AN=+YO4ZaCu?`kwhQ^n>&+&Rd%;aT@SgoGAKO{X9dLCyz7a-4YuI?r92!ha zOo&BCRLef2r3o^TB)53ObakF`rL(A9HNjg@JiVUc-Gi%xm(9X~P-AA#{dU&7pFcy^ zM&HHnk@K&6BC3{l-1<95(AhtL=;qZXQgHh#@=LN)R-d_R#w9()>RXMnAC_z924^e! z#u(zaJSvksRIQs^lm0xu%uCl;GVtH0z(HU0v=xA-i^jNo@u~G%sc*oID z35N?&fuVl=JbF&qI60g-!zdB%SJpRIwcqZ1czDKrWHEI-`pa0)+b#R`^l=CWHqLgV zm&6R0_V@i{lm709B!`wOi(j6Y1TLcIW7_SR;By(5zy+C0J@o%k8ZhmZ=9GjL`_UnK z+6=Hj)Hu&pkye#1Px`IXW!~1eERuH8>oP8h@aGYI9^T6La28rVH{(R~8|;9&Dx1Po zQ@2b_QaAUGTa~hi65Mju@qS_r&S|RRN{0%JM;Bc)&}!>Bs?WEke|J}zj+yP&cxcS3 zL@+KMguEoWNv6)EI=5dsq8&V&=KfUg%h47%h3%}S%*6FKWjD%*?#v()`Ajl)1%9q^ zHo*yQm9fR??ysj>kD1MN2WPPwqMYQ#m5+F4I}b2YT1sqm!8LEQ?$) z1mRFiJ$xWbHSzNICyq9bw8{{}7NgyqMzBG2H4f-I#(TSuCOy646i+Je22u3p!wXpaHJp~tkID?s z^c=J|`PLTkF%O&sEO$cX8w_VPJH^e|HO=A9PQ=-ou4n^@H)EpM#HQ5Jf;NR~R|YAM6Dv%NxMf^Cu26&-1nR1pTq3*{dS?_nyv zx8OR^kiO`?xN5wwg^PS9c=Hy{U3w#u(Mb>Q%q+>yF~;Bc04Mj>_euY%>}fyv9q&L1 zw&Qs)&^BSk<} zIL!)~;i}oRKks%myD}kB_*eQ|;&bFr9W$n$oX=mhb;s#Vv{9qLkW{GUtdu4!+&^obsUB{y;W*1=AgArMG@K_!3j1*7?S+GnfS zR|YIbPEu8jJ83JMp`aiTl0 zqL;^Zy>skY8p6Z9+DEiC0%2uq=sbPhOy<=RPdv0O$O>j40KU3$_UVvs&i@bAMVp8W;bxlS%aRq4=;c66liT#R?%a zQEJL!Mqo>&K)(IQ?1i%t=*jMgpKa;QS`ms-&y%jNwx9O)*q(kO$|NCrWIMPxh`!0z zk)Q|H+Gc+?j_&b|rz^fj{`uZA^fVL{CTOe!&dkeWO}OGzQ#~XxcqsGv0;rtgqS0Vm zrI4iw-pugv8LG4KbMv&hRNQ_F;Uyf2oSsP-572LI)yBA)A46cIo#W*;_r6bS=#~p) zi)Qr*ymx*-AmmvVd9vlD`#p%X77M(;mY{FV54O4~Z7JS>U;nZynQmJg5R$GyU(^x- zvwrhJhbiY+Stn~7I?!d+VPtg#yw8xv_>r`mxUO+k#tpqn?>x^POAjG2`enG1yJiDp zKWM6p*?0LQ3Ym%VRbQiQ<=B$K{OWzq%U(f&Y){wYll1}-wXbx@GqIfjx=S;s|8=zq zEMmWXKUk_$il0C(W+l)dJ96po&|5{y>QOjtCM+;{>$aqmVv6JSj}Eu*eCzD85}l8Q zw>7$nEfMc5TRDX~TwX7>F)L7Zj2~FsZ#kP~Q9P`w&zOInO;7Qo<5;lT@=DVMV~b+V zUfLyrA(9J=`-6%nQiDYyYyvDo>rGviw`I1qRa%hVb2@tZ5IhPBTH1MAFI$S7{-<*i zDtSCjbw@`fqc_anfn|u2ysot``ogC@r>T4Mmd$}y1v;$Xm4lnR^M=y*zU7d|@mZd2 zj+Aw2>_4g1YN~d(C|-4eT*E>DTEuuaZ&55>r|JS`NsvWD8 zaiQ5r;PP0yI|QLVZx_Ba6+|mmiB)wbTgvLc!5_M^ci<7)VNg!H z{n{PsT{4N7ua@Mhkg`QLxW2DzF&WTd*dtxMN4e6@Mg!{IMDsR4zGX7f$u1vx79q~>i6 zu<3Z0uR0wI$%mi0w+mg!s=J~bqEX0boc#9@vhalct4O|9+9vG@M% zW7oLrm~8aKzTirH^4c&7VDowaKoOb!9w4NM{tOXxQ?AAI$Frd%3&Ld!`yed7OlmZ^ zDCCo(4XogwPpt+uwbbtTCiDsUMEU-68VVi9tj53cj{JXbqcQyVHkzh?@7xLf_W>$& z97xEDe0Y&k%s(If?Eh$~@A9*Zg$IBC95p+6^?&&AzjsRfr`B)LQgmIO8j$gxZfk2R zlkx7lK0G#t6}p^6gC@;gt$EPkO~09xNLcK*xMSHcfK$JB)P;OAQ|Nn!Vsl)&a^Cx1 z!y#dDz+wt6rFnF8)Vrdx(nE0S#X*SqFK-(|akx&o(3i96w zW-;QUNejS!dg~Nog`(Q5JhYxuKC`i$s%7iyDkQMDPS|7R?tVBQ=yGdC46bO#j(jeK ztvziN)~;~G_>P-YhXK((rvW@7JY6vl53o8=f5fGX1FM<$iRRP}ZQn=Sgw`JChvY8G z0=kfHCESTf0r)Hxx-cZ^>FVn4Y_{eDY>a&VL#?U@3^3#WPI%YtTwV8b57+(vt*rl{ z6<>A+OptsRK6@97u?N|a`~m`))WCIE2*_tN_i|f4(Z5pX6yMQ*E`|P|SJ(e&>mU7> zl!5cTb!av{|GN(T^U!}9&Hv6K;W~##wG{mE3jLR-%sREk?l(+A>ecBR-*oClwly3Y z&9*3{(dN=-;6~#F7h*aQn_fN*j%P)bs>^PyARb!@o=7CYX6sr67LMl+<$+bq3Mk-* z+x-F}%yorDbP=@IW{$TKap3O)5(5V*gxiyfE^S{H!>d2(Tj?!c{wgETr2_otTL1a> zhSW!kPu*C5(~txlZ~b~Izj6@zHB)eUn+fV8Z*x^WoDNXjbBXj87mim~me}m4gBhJT z5N4jKi09s`FOfA+F`3+JjKvlwHqPg4i;WRoNY@*v`xz3?O*HG$P5q#97~I$Ez>>wA zqa%DrhjBjDHj9l4cbgd!QCs&9F0OYM=3mK9Vmu&2J%Ay4{OX$zH}n{aW0MF8Os;5p zce@e>7y6TsIb?80BJ95L^Ls?p-^!gXkAI`3H>m$c#oZ{EF{~TPr+t1-=3b@jIOZ#d z1LZ@-4`AhU_yXkW9i)DIjTJ=y8O>kKwpP05({EIJ^xdlV$Z8jW{5}PANL+#Hwk#*t z#Tr@Smiwo>62DINH66@3-K)%x6?J9z@Nn#T8)Vo^$o~!G>)dUxpKnT(y0O_8tH|LI zvewwpY2r&|bGip^{*XPdht<|sWtxT*&xd)G@J9|Zq-dImykZv*H>dO9N{e`Sdh|wc z?}M#Ts7olBdv~7vGB4&I*Md8QZ>xv6dGeXt+dt0Zd0?2&x1^=bWon&mT|EYI2iBh{ zSavOP^|T zx~*<&@mZDI44M~DZLi4PJE&a1TGSw6WNh;KzJ*~vMX~0K-jYSmtJM^O)Y3S(Q1^2x zu4@22+AmhU63QX{nn%Pi(3(zf_~$7ARDf-$SWYrGFBw64kztatIlFz;p5l^{iEWvP zPiCLFZN{(Are}sv2EWo!)TG{D6QGH-mKJW@-g2yKRxEXZy#N;&j21=jQc%EgtdKbB zhClyA|GH5Gg2jMij->ky-Wr_mv*QzjS3MpicnAeS)L`Kq4*%W8jztu>X3+2Wx!qj9 zDb2;|bdoJ&ob@5nV}YU3-Q(%DF}`~y95kXshP=NTtpF8I^lP7tV*gF6}X$D<}S4?x!2oqQY3=YRlX2>u*=SNaJgWhon!y@C5)QwV?ng z)0i>uzN`HCxv+4$7*-7?)>VGkt7mBQXQxFNEUcOTPCNZk zZ|h_WOyi!-HZwfG9>hhyBLZ={1Q{-{zGjS!xLo){UGW;bQPC5x9f{v3TP(0RyDZ^k zmF8q|(!&I|Vz8NVZvFIl$8d8^>gHgc>xr4hMpp*xjyP8I=+n>5S!?ej8m2IDk_*k2 zX`5+(W}h8^_BId{&*IF8kA-3jTRPQW$Ggqe^)&2WiV{=;5j26=8yrT6HXkh-9{u^v zCfm|X+mF7&I4Vm~1>*Zx);Wzz9h%4Uo-+49dq=K&(jSNe(84c!YH&wfAnkT{EfWgc zg+#y5bR0@Q_9p4!EensK<1*1TO7!^B){l(6)$nou7p89KDR5?=AP0KH0=@bHy&8ru zZ=*yvI7U|ed)-aWiYB%SM2PhVT~f){s>b|TE+b{qOSvSUjz4E{4st_dU4Zj zo$UK&L!Bv`iHB zkej`{w#8inDiy%htMuBbf)SB1v23E%0se*M__#FJyv@U;^I(=X=K~~uD{5q?brFOF z3Z#HAq72w(!^6V;RSROzCo0?$ilzpiM{5XRTM7}h`cN4ChF&-^DUnnwDpB?dO;vKL zdv${8j|dt?e%iq2EJjPWFW*GlUH*O0JvAxGKBAZ@ki$k>P&BeCc(i={^>?;}2!*TI z-|H)T$LP&FL${?#RczhfuuEMNR!pz1aY~1Io z@M%#3)q?2aWeX3XkNn?z4=X%YYPJr9;^V;VD?_6J&?x;^c*6c(<$|{-jZ&5IKIpu4 z#_y2r!mXW~J++Ktg%|l9L-+nK!rlX%%kPaJf77xmGD2l`dA;uY9R5S&*G&3j z6W@KC%3=|Fw7)~m#kRQ5ck|qDB_F774#Wvye{2HFuB-yTdI5bT4mp#Q^Zxu~XOc_D z8oZ(xo2k)GKYb-_l{ z{rc+$DlRMCktB$@hEOlfp zd4v(Nq7OD1Q5X@5C;F|&SNE?ib$+>OmiZl^t#{>5p zyBzEE#cS`h^S2itn0(og_iLhv)DrpuN^hq5WolXJKhpDhw)+#ESTRDn4^V?u?J5H3Y_6>G-{w6lq2d`myXT1bbMP8f);$f^7hAB9> ztygZf)2m%~BgZ0C>`|0+kINnjd8?KZLQt+|xcYYTXW^EX;bM5sI7ZgdC%AyIq5g(> z>p^9Dcu8!V55x-~1QT9(1&7kFeg!nFE@R@-#w2IfK6-PGZSr-(rw41I@2Op@iGB7y zNo;!K$2(5{m!XIEZU`K*F3Lsj1hl?Js-2RZ%uG5f2s6*wSXOS4V{fYZEZ|0BW<+G- zA~nsfOwwZ?>sMzdC&<-8CVN`6{7>A2{;N6ra5Z?dB$zA=g5FPNcFxwdKNUCe8_VGr z7VHvW-b>5b@>vkA?Sl`Hlir#ND3Q%aTjst8Pb?xXJY=d(hcrw3Cy@qYB4e z?(|3RDKK++{UJR$PUM`Y9tl@%h$g$WSJ4)2*pMievA7h5AJS&qfJQJbip;|r&^OMN zWZLROoC*HBS1hBqk=Ki-H;Kx~}nBMMh$xBIB4hjlhTei>XGm}1$^2ynxL^-c5hZmUBzX+nfq#m(y8t_YP zMsFdu@+dz|l))nS>JJi_J2osSH+T``PQ4 zwqnTaTU|P5pXXGr0rb|?pjx$QFRcV*$a>ueHZASBgyOZVP;Ky2Kg)qTwe}r5x)D5X z3(h2<1cLIg8#UyPlDulae;l<-DJaiH}d#{N;LeJmZ97{NvO(vX05W8{ws@5MD3a!_eM0w z^FE6A*7rEsEO4cQU`ud%Bh5$ zy>U-)dN8ufI?cwXn0Y5imav?}5$TrjqXSlCY%~j7QM6^ zyqFC>U5IJQ=Q^9^8W0SCdRujQ$sj)yuuYaH{HJ8+&$eTe%BnChqj!eZ*PYH48~;#e zcWq<4+0O&3L*BUD*Nga@ltW6t4o^_sA*b9-*jG2Ez1K`SVMY$wa;VPB)O2VUlF+ME zrn&WrRwqQ|7Dt&|+4zqFBj&Z{X1nVw+AFx;b!Ji{bv%VSdJM8X-6b<{qW1wvk(I`h zX&kQ#Pu^Tnboj`uf161o!|U~|YETOEI7maiEsrzVd_hZWgDjMNwl22V?KS`oqw;Ng z=e>l4C*LGoCo;ney_=y9I54V{j+*Yu33?AUnpr{Ny|B$S?LmL8tl8NoI5y=0Z?hw) zjVjL>e^V_F*^1 zPIht7F4Hz`DmCJbv=4pwZRgh`JVy`f-XmqL727_~QL*F37+;}8zY4e>FX?iU;CGOVFNj)Zu|P}Q zu8yD^{B|x8P9RDsgC!%?GvTyn^Nr%duF**jE=nQes~7l}ekrtX!_X)q?PDi=)=k-Te zR8k$4TqS9lMw^s;s(r}FHZ$;L&b+*Sh|Ji}mPfdyzP^AQC1R4RG>40qbRJRieYQN$ zb}qt@fPZpK)L%kkLv&oWC@5-`jDY?i@9JcaU9Rjf*k?J#BkAabRYx+u3J@nfz!I@n z#7WWuGH-&?wT7wz=n0AQ%0Xbdk@9~5VGOCYO6--`@L%%2(21wGx5x_AFKzc^;?*@c z9>1T%* zK+BY55G^|~nVPUl=(h@cTqP0{KD!MzFM(-p?dFw-GYu~eo(ua+@TtD;w^VHNs4G6L zup4}d<-rjKhXh%RWEqRaej<+|XnBkSD3uGznFJg1{Kh(3z^9asUl~(s_Ny=U34yoP z1Tk~yEOd;*M*_)^X~ors!+GU|{|OSA_2~o@tK84e>$Xh!P7l&`=-P+fSH6;7n|yaq z>z4FE%h`x(Fyt!jtG$ogNFU5If_P);ywe^S7}i5R80-eM*rScD(`RQ}1?F2cx&*rp zqghI%r@prGiPnV>H%3l#r6wOvo|YYhMQWam(l{V_F6??U%i?VYFg8=(ArfxF)1OPQ z%nA7Qn^$JuN^HH`*uPw=-6eiE$fYKUm^+f++im24FUjAHzb?K^i@jUmprz}$P5oRV zamo5m%It!GLK;k*G^Wr8y`~=&uDV`iNM+C(gVNB;SMSi4xTaFh@s{+OQfriLYPNij zc->ReTllg0UrD!XE=O$?d^&f~!lMdKa4XkOt!S>GoazMYvRY)K$)H)iWuIPuE9PPP zgt}(U#)C1PzX>5EMvmC9s7s7bDpaZA#CrYiCK<_=s+MZp^3KwJCb(2Q-$*<7fx1P`%Wht+EfBWgRh2EkY=si(~Nr{Q5Z1ZVhLAk013r_ zO+C+t&jEZ6@$V1;CJRK31{Upm#QQ+)lOD2U@PtR&o6ezU!fedj*3i>=(YY6(Z$4R? zuxw<`+OnIe-o@!B_i-c(NF`dY(ByC^C$;(I#!xr2EQj&0O21**8|ozbf)Haxujf_9onqa|!2Ah%fRls*G9ic9$3-|Q~+ z*haL9E&19?k8glY{0K2_+y*6C&fxLT(qB^FMcN>ADzpViC29 zys?lMPm80q+`b-tTtj!Jq;uL}M#{dxW>zKSwI{Q|DK_u%Iec^H1v3Ae@pa8hwe|S7 zhQh7R0<7Sq#wTMC@l<972zB~~Ocr*$w6#~ljQc+OBC_nW@5U908@v$^f}m*RK)ai{ zR~9J7E|(pJCdHe82!ye0fs6D=b3U@oXJ|t5H;AfvGTHbLS>RRH#TEy@p8@ zS!S3n`@k2jf8C-ize5foAod83FX_IrOm@fkVqaU-$H=EmeZYnn%PL28@9&G}p*=%3 z(^*d6Igv@dxap#|ttX}Usj>I`*z;ambtKT?vmlh zff_}tJ$-#GJ?4Y#vV^9FtO-Q*r3z(#As#1ho~j{Ne@OCbsS3c~VzCj9#34n};ooMp zGQTa4v#v0g?`_#QG90N6ym)F?Z1K%*XrbMF(A_Crsc>lBVX_V_`o&hGM}wVqLU#+- z_;8$FVC+mQwmWRE2CGmqa-E#U-D%lI~qJLb1s$GX8hc(LD-$14|omr zwx?X?5@5KmS&=pn`ISAttg=|(hh2DIyv&&RT>uYZgiUI>E#O}e2@!KKOT_WKQLU=N*LLNV#H#Jxxh(wHGe5_AEhY3Ke1Q371JOtB2wfj*amWp?$O~Ngb z*-WtR{s_BXHGZ`u!eAi1_}K|V75Lpti{LenXca37&0os=O}%T&=Mpz18-eJqe%m?m zeG`ENBFkd4RbJoEXouf6yFo_ygwlaEJmcgGmi69@lU850QMiL zt|EAOl2;|ymWI{zsWgAnC23=sGamZ1nDN=%G@7FCoSfVdfTL}sQ#E(MCs80-o3^=; zz26B`KIPr3bJWQs`v*IxvvvAJY42wqStM57gAr}wUOp|5ImWqs0FZ;07zOoJqO%bu zfxT6=nYW=UrF-Xn&RWWnJrhpC^gvGL<^{mlcHbEc~v}5C>A`)3J`KIn- zq0KXIM`kY~PWMbhf8U{fE1UZQxG#e#g(!aIS*}q2I8bTbH~B3##g4)TCx zwm+PI^DMAzUW@d`*S0bl|HntI+7bZ;_45z~@^Sg(kCW54NY2sH=;6jT%OSY7nb0qx zz@F1~8thEww^T9<$?q}@G9W%qSf!~9gM?cNSnb4jVjNF;G^9*JJwa$N`el_VI=KU> z0F^_~%@Fg@yKo2k+4qTya@1Xm)jbj5OlnpgQGs-+)uS{lh3&xKL7?K}#)%g+_&OoM z72&5+SLN#ZXPvhsZfi(aI~?7FB^kYoUEX|ulEr|Wz-B3{0_M-OLshg^;I^#L*qxzB}RZ(nUm-GK8+1;oIcUPrS_NwLt-Ni zRw^^2UcTz^8l0~^*D-SvXXJhp+D8DY4SQSV6G5ZQE`Q! zurC>76QUqCU0~_-2WTM|IjLx)s?PH_u>h{);x+NdnJShaTkb`8)D`xZtjb9Ub-3uS zE3PlpeOJ#Vh6Dj?mtXV{jNw@J8iWQ>N26vstgQe$)-U_kR}>bLobpHc|CR^`>RV$L~a7=Ha`Uo|WA`88JV$ z*N%budJS=DOSK`hay~Qao%y#{I?ARVv+z|fsj664D%RO2J-zrwJoEa9g8wb67s36; zM+Pg8C8`xN#|3&?K7BIl97{KSF!J#`Y_oxe8}i98%?@@_uisvKOs-!!27O2ghHwsP z|1v8Y8es8;DoL=D(Bfa7#9+Qq{ATps=~ddPI7en3r{PUDp_&&~riKrF>>|tEVil!C zs+yV_mwT{2OcEVae2f7Bb$hQy#(w@P$W1;L4=KQojp>qT2)X zvj6zpQ9ONZ;asj@U+P47!R(vystvN&83F8!_mC7UV= zAY2v%z#~OWhDbMP)9k~)E2T?PZ?%>_tAq4Pq;Qx>zWnoLv9^B^21RiY00D4{uSf19aH z6GQ|YSVc!i9}W3#?T;h>7@yK^0xkZuauncu#5p0n>yOjx`7_Wh>QjR$Hh+NReCAPB zQX)i`BtmqFB;u70IgL2Bb?kbF(3jQJ2(SLVLcOy3>+rCPt{rVoRx5sS-Q~sQ$9$ly zNMR}iNxV&Q`27=#zLye#_>kguk(%!Jzy9BYaEK|yySuuCuU3L%gOvJhHX^i_uO@3H zhVgp{?5LjOec}!o*`iX7xrQsyfVg-a|CrnGn`qjxH50h!eA7FwfoM(ZBSdW>iqC!nOsaO|QBqK+Mj ze%!FYASK2O#%X1(miH_p_(y+(q72+JsJ(SS4!<9&-&lhjRjBy%8qgVPMzbt;z&~6GQ^6;KPiFMGGzlL-F>-qukemfrl^iByS!FI?_&nnM4*#!7&mT6D0?P|EQeF zX1Cl97VU5;vMAUcoY6w1KoFPM>pr6bL8r@Qj0CGJ?u8B9Z6kiZiLmfB+}rkmE_Iee zBQkYgPd-*%UFcNPx9?GVoCK%q(Xo1~p2IA+hnm^@ye-1$V~1$&vmW@t(@VZiZ}5=A z4_3E?tXaF%N@R1#qH-(DHK@!A;9#-pJp8^ks&9j7t`z#{jn9*?s{L8q0K$D**we!< z0lq4@8w^Lo-E>xhGy!=%bnLs-)RGcdhgX<0hPd^XS`X?F z1H=NMM}(p61&221pR2zeirb6(Y)HIio&JNO(8Ezbml~0UB?c#G9kE?_U@@>~wFya$ zTDio)2LT`E@n%I0e5hKb;NRh+*F{C`v@EZdS^yMk3(>?Yn}M}tsCpt$$ugD~!G!|! zO6ztEw#dtC(^FM-D?pxu8*{tyE`+m6K#VEGX+i+}%mFQ$u$m8|2drwVAYLCOpU$mu zz>nUES)7>IH}}}7u^n)za^GALfDi$V$4H1kPeHmXt$vz9$;kKs7Rn17rb!utLYRk4 zc!oR@ez7yvulcM+2nh=T3R^;Af3t|hk-V$C>|H|HqTN**55>ox%c@{A#46HUDLu)4 zB>}TH(chM7mD6dfn^>{1ni+gIcVGgxtx+r2qyXD;*@IvJ`Ytt3PK{G=u-Z(P$wvm~ zUOoOF`Uh;gnnTH~fUv;`%fZzPAw zQlF2|+0L4%ud%rvH+G-oVi?r{F9^(jbKTKpehqgB6^YBmM3ulzb^&N`8`!h+y1f1b z3jn)M8!30SyLYuW3wumEZuIO`i1A0&+X0uM1Aq24Dt#V^-j@)@P=`CtG?EWb9&TSPbWt+xe`Jz{gW@<_EH2G6{q&Mtc;%72 zAg3$BSgtovYHV>TzUjv9Ixj`JHnwIC63_>!Rcze@%f+GFXZAsVeI|!i@iNIWgle2S za-o!Jk#e@$;a8)@guD_kCsmlk7@cnmbkk2@pb;tWd=K^;OJ5ZhNp?fHXu(3EMSr^< zWN}up{GzjogcrbP8{{jN<<`x6LGS**&JB0)j)rk@{c;ByCP#9r8t?`r*_`w?a?J#_+?7 zQW3m~E`OQ+_O(Y4uXb+K6(*(F>Kbpy``^60sy;ENEQqQ5AhHm=tuxCtC*kBh{0rMA z^Ag`m4u_6}I?t|{$U>Y5A|#I}{XR4eG83ZpcUQa~m5m@_1QRf6t@FvlK}kF*W)9iv z(l{aW@CDo+Hcpp#^74(dPIhh)Wwf>?Kp2N;#BJ(lH$n;;N`)$t?#l1~2K9jE+2ZfXn z_43MG_*kvConq|EjF;VGz#IBy@a1AAoVRKsg)@2@vsFEpr{7U5s1H=-K!2r#!SN35 z#O!@-P$Mz7#N2b8UC5Y0EX;j!9t;SsfYDCgOwXKGg<~1>E2^u7F8{KprhB4N*jLcb z`bVg*EPA~`ld9Vmql)vOLoasu)p9F1jm~MXz+88Ze^#ctjN!Eq7aw26R(bXW^%TT? zDqdH>$k2_e#v45mYb*XX!Y3l(3eygZZ~c;7ytwO3&aIrU5q%Kxq(yH!)TxHyMZ=Gc zJ=&Oin3xV4b31o{r9kr34FQq%N=h46bR>*sy^(R0VOLC9T|oY(-f!_Cb*|Z zyDu%>^o%hK9q`FbJUL>NqQ-}#4l$tI@=C*ayQzDnFll8Y43~y1%Sse)`BbpLIh4>z z-4o$aV0KtkhBL;ImN1GX^Phz;aojy&JCLC8Ci2PtjwXD?z#MKI2<ph&Co4=>vA`m4)!yT|m5!uRF1^{{p;+F5@fLy!8{%`leSeu8Kh>UUuNZ~Ieo2=uV~TUM&Re3 z6U2hfbAx%VUm}aKbtg-RSJQ6J{*)8|8Rg`6m7_&OH(ffc4rR>N)sd)S>ibKztdTQBw z1VcVE34aZY77()5`LXx;3H9i;KtjzurPM=qQNl0zV6kAJVfPkFcp~UE|CvzSB;gk= z!oaiGK5&HTSHE4sI~B*ND1Q5B1-W6DBb?3Be%zFAOAmg zNX=>L>!HB3gVCTDqTKW`)4J|R#w(wkRAp!ZmrPk-uK~{?H6D3j$Xe45k~L$?1Q*Ua zXMb$|9D;&dH4;KN;C2m1Fm0P80Tm%x|oaN|PN z^XTg7dECQtPV~fQs(IS;6tJNC+MKk-isImI?g$~8ML&~0-pDKs{r;?Rat+Vh)FI|v zwDkpL6f#})PB3n(E|V4L>HqL-JAMc}$D{y1G@P<}jZE;#ao{w__*EgVsk22)z<4lN z01krNW?~uOO2TtG*8^h-$fh-sbP5#d8w9>O90EB)c1N+|D;Buy0vQp~3{#t->VCEY zdX)mbLet0xJVjv7Nb8cq-MV}#%##Ga)?H$x1|=pX5ho8p2{Y~h(K1>ZN^9D;=dgxQ z;!HJ3XG{3w#r=Gne(+ZxH$sIF*%{?4tnFb8adKQQYT&-t%-rh;z8J49NH<_yOlB~F z&@(c6)zvxP{s0G6|2QV&@%=jxKL5!AJU-ncAMfUg#0hflboknRK*W!xpUObuCu-aZ z0VIw#F9LLAeKt-RK&L#y)Rp7uBsPr<(;199%Pa( z;L1Xvd=sa@iQ8m1dZP&|)zokM4WsQ|K^~pg$aM}4d-CFFi(9CaN{IBo=5Kim=J;J8 z{~UHZM$0d*<-!-aRV}B5%TOa&}C}ZZE!B2VzLaW(-QBBwTQ{6 zhga3*1Eo>JhTND^E(zU#d0;!xe80i0)yR0qW3Xc;2URa|77W}=x3gJMPtjpWqO4gP zQCsY{WzdqZy2xktPK%cUW{418fD0O{|FC|Hh2k&+zneBaEKm5uFfcCDjE?-GNAYcC z@SzkJXz}~K=dI26->gNt+VGx@2Y-(cuBk3qv_Hb_aEyMVo;%ah3TXWxb88|uHc5=iso{52h|uot?zQpAe-7($p__2==&cvoWLrxWnm0%44+36ocpIpBOn zQdmzPAUg=~H~?b>$2~jdHo6!<{o>_Xp#b0HA8atUn8tuzaPZU`l-|0%Cpi>OzJkhs~yo!Z}(ywFOBNZn&Gpm2ZtA>%F zzNv$S)3fMTvBYwQJePU=3o$p%Z20gcZdcMm7&FhJ$E$4h!lB94=Pl0ZE@Q^%mx#Z! zA)Se#Z{q$K9)K@0j%TfK-!Is8fTU4mF<~i*#6zhtD=d4o=;DurH@s9vR#@Hng{to%?3BhR*#a4JNZ|8Erf~s1cbt*sOQK z(8?k11UYD15rWMsgjbav=r4{E?f}D}!uR2xmYyH*VWglRunn<+juarXgDfIv%g8b= z5deRnvnGIa$d1c(;B6Q2_<3XHu%&@pL+u%KSQ|#P0FueR?ZLuv1?Kz#5rI{4tsauj z>BPUKDc=dxbbotp<>&R;jY&h6x=P`lK^PW}Yp^x%QLVHSL)f(;iY8zzzYqBVGG8^! zWP$AYi|`oD-c$zgG8G?~dJKh+gSSvouW& z1gj4K0SjM6<{80wHGeEx!Esk4uT?j4g=T_s-i{GfuX>3mZWw?Q0PK`FL6=8hQzJ;Q z3c13lx8Ff*?( zMA?zF&1-*BOYLTIzn^X3?fw!3go`?ydK~g+cvvAA+WjOJFJCtZ%CC6*cCqz2(JhNkmM>PdjjI8 zx@p1)o9?y|Z+*xI0rLUSe|hZgkDlAvO=;1WcdN(LwP zzP8p;mBCW&CG#OODaL^6@*6z|6q~#EKhKztE02|Oy^c4+HV5^@iJHkd`U2@VvX@M4 zXLFzxK)B!|*2_UO6>TZCSZ$wQv4Es`MmX{y8-FkkFb=L0B$8iMAigFHOGO@k0cw1vph4@bJ2r3YTH=H7dtRX44qstt7Ko(k`PL)FY}21 z*m8wX09sOsxFbQnL%OyD7l+V7dsog0u7CD*%KZVG1(G-7t$x=&^#FtkTO=YJu@^|r zw7;(-0RggEt;B5WtfH;^V2DF89@_~j_v8xHqDZyF)}_%>>inKd11&@R976O=Rk8Tz zn|2w)&xt&p4^?ES_4DO}3M|ZQwzwld%*$`AeQayszB#IJ{KhmtJI5^w4AEzqzYYpMTD?t_db@SWJIB3_)x-NIRzvK?8^?DAU7B7bwQgp~E>AE4eK@8qB4Ij`4!+zPkd3#h=U zMGA-diRU3^am&$s;D8{-faG)j1)G|MKdnbOs+3Vph3EVMK~bX$=x2X1|A1+x;XAn`-g>!fYnAXBH=Nbza-n(8|hcL@U0ZRwm5zPXETZk5JbNmgu?N z`(boPi5c^rk$jgInrpo2WDX zxi0wYDT+6xP5)uaYS1f(0kAm7W!WTxYU(ET%L!svEX;3xhm{-e;0`SJEk#5~{DpX&ko)^jUqa{YeL_Wrl1 zQMq!E66Q?m#@fzx&oNbDR6Cz6e(@uimHL}hCQZw5oXkz5WfPFL=@(n*^npib1svA; zR3C(Xw8I~3wM>s!b*FJ<>s8$je+;{b=v--8+1BFQ+3i^5r}DE~7hEc$Lt1nq^h87r zL~aoN0m(CT!syHf9+b=e6T-|$ypVGQRhmQ6@6L0y^l{hR?R@eOpHrbfTrdy}22x^J zhyUe-s1YZL=k4(c7^PZX?h)f5Uyb2{gWg~`DG1P>diFNPh2qit zD28h?Veb~MZ$Q({8RHvQj65sDckUipZs~88U>|u%{Ghx-IaaQE8Q!Pz7`JkYT2jH2 z4Y{^k6`J^F;Go3Q&TZQz^~7l}@^TscwQmehQC{(1;PSQ8SCY|2G@@c<`RgS=HqX;c z*h^HCgdjpc4QFC%_xvI%V2~Ma8mOOMhI7qk8|NPtP8i`eoc7YLkiqXeL(QSCQH+%w zXvitr>F{fI8lH(~#gMsr7b@_w{ASHJBI?BZy~F-_LJ!Tv9-N;wq(#u0^67&R%$3+@1rsQnfqMifv+n_skIGht?cza;(e)mU-(i_3B05MgXhY(AB9TcJxTr4uY={R zz7a;^3IYwmQr%DVWserR_WAQpAFl%{xuxw0)6b; zBBBB!Zo-2fpx3BKA7(*~lh4yJ*wiV5ZZ!K=Ak&kH{Ii z751wa?QLOjL@i;+Oh2^pJ-CF2l8=!Er=++9*Q4{CZIAuxDouCyIPv?dWM$A0fN>yI zX7aZZOsY@dR{bL>1if+VuM5VL>TYzk!Kd| z8h}?$K_cwVNOM>T6Xl{>C`#V{^~-cMqcn;iV#3B+FFfJCc|bN^=_`5!!Jlha;b2nt zMWMr;MbXJ0`{{b zHhIE*v3kyIQA=&=)1)O_Qnx_v{qFmD6*vulQHwWwMUJ7odY{PZ&Uo`3(IZu@pQ|~` zCyyxW_K8eqS6(IkVvGMK7X;t&Vtxm$b$2*?knA6PakZnZXZ6>{(wAW0iooTuwvfi9 zm68=~l=%|FP3eQ?r%#I;`fNwy>{!pAF7^dYr^;9FKwSp1MppuDf?v%S9NnYT9>7~ZJyzB|8C@oNQO#_5ZtSF3;y~#YRt`z{ zq8^Dc@u7_}>F3;ZIH?q=&%9^X-C>3vs!_V7W&iEGvs4q};La8vu`PJ3%H3+NzCs|H z<$E$E2oW1>M`&{K+em$WG|Cd^_oTqEp$2?5LXhr4PW0rcfo2p`$62|e~{#%D0=AGKpX5&gw*78-qH zXK5(Alclpif39=K!|1rGKq7k!P(wa7sU|#Ihr@z7(%6e%UEyGAr6F1Al*3)j5D>jb z;%k%<++ZJNjI3-t>YwwoH(#s0%SbkdFZb?#YTxVY_)&xMYR4AG%O!~ckwG*{1jLxonD5dxDr>XBSa0dxJ3sJh` zj#0LizaA@a&oJZ}TLW@OCc_poV#P2GM!iPZ94#sr*49)K_J3K7MLU#io%Z4np_9?x zBwZWj#&YI(ZH-bfKn{|oaVQXRaNc1aOX{|?2-iciHC$$YCjg)K>f1a;)zdxRI!v90r>^5oPqaOSe4}5)j~>~%J(>;qm$QIL=~3djTDKmq1UT4DB+Gq>tBa? zl{eL%c-Me9s8-K;;{oyWEIx>9aNVYE^U5KW$(8U#2rH#BOE;g z9{2Ba0Vd1bH$?>!gIHQwwJEiovHQW*i74p4DSc^M>AZWmD&gKD&a>hi#2qo{EKVD@ zuvku*`c@HLqOmIfksD>4Yb&Vk-u{}R;1u;Ksy*UrMov8B(S5r|-TQU(!X zt3EO=S`YpB#~b$;D-`h8j4&r#AFNZbAf`2KtLRR2^Z=fT?3Ais92~xgfbvThVIjydp;Jjg zFXwvWxfY-z=6P?nKF1C?k(FFamF9O~z`qjghv-#6PpOgq9#MmHFe*zqaij=yU;$Tr zCV{@5*f}y4+WK3(LIq1|b6>c1x?SlE$-Vlp#4#mzpM~%*-L6Qbs`@lV0Tne*4wF}* z4>i%!84}%klH(rdQS!3+BSj1{sn4)-Y#%FYKM6uSuX{Nq9Y&Y$f+zWJw^mfCfN$f8If+`BO3LUq6LlpRr}K;5=3rFAKd0} zlXx!14+I#=PN|7gT%m05L;AH^{+|7-G@ab}@DdGbEpo5;^8VT8K+)`eZenlmoljd; z1s81ICj1a51Y3$CO}6l>?aDBkH^iyr#i*Bt-GWM}0-6?QxJXo{yxo*H;V4qw_)$TN*P7spL2ql$$Ny1*Q`EMD6Gl|&_1l@ zB}(bl$ZbNS&U&}G_!yFtBQts*Qd=62$9ZvB+jZx#*n9pmTzwX`vbFTRy1;}R)tOuA zDK1EOfyK_w?F%1&q+tyEth=+jog0%O;@&jpvVR}alN7xY57+HhYKU7#+g(ZdQYvPo zNP-?^ZLL&2g-Csp(a`1OR-Ajj<}G?UlyTK#{Nfe-hXy8_r}|IQ6tZgmh&k?FjIr)Z z(8wDC8T#F>sSgNI>A9|g>M1UqcrROHzfk^us8I)3oFu{XMw#6s%#>^3i9TK9#G_;@ zZ2b7~zWwu@&@+(vK^bDgg#C!HznY>nF0uK& z-JFLuI~1-Dfa{=~^^dnqUf3WuxBe;(M4+=7c{rT2?q7PwxKK=LOV2~ZHen3)HxI`| zHT7cm+71?;4^*?X3Lg)BOu0tQc_D3A)h=oCG%Bsil^RR~UOJgDbzz^I?;f1GJ-}zE ztLv-@|C`eLkf@aJdJzB(Uf6=jDCPU;Xlf=bzRx?=GW)}Hg%}k9`*6~>-qr7VRyIvuBjPkWKAF6rujQ%`O+i;IdCF43JR z_4<&sCBhM$uZY9RdPfz`2t|oG7vt2`(Tz_VvK9^%FN zyh}kgtNR6612Q)=(DLToU4^7Kfd?L1^TT=tx=A~UmGsI2F-{L zu5I57qkVwN8r2o?bNBslxjlz^4>J_5wd!(<`aZm%efO;z3h8vtI*TB~bcr*TghLh2 zv|Dgc07eQ)1~nF3GxND8*LQC5jBrQz-t=C_i+lxSkfKFt;4LbTJ27zbP-aaqUwl`r zho)sV6zj4Z4Yq$*zs-2eDJS6XW9g58yqLuN;^9@D<0icNiuv zYJq-H^Rwfl_BW+cTz3g}9xmM#((cd4_L}E7n=T7>BoN-`jqlptT^({ZC6s>wS34CY zd6v5xVYC)^HzBT<<&3i)27uHJ+cxe3!J%+f;p+6Rh9q%fB@c#@T>no$R&X7njfXc& z_a4IkZBB~H>vn`we+}&k4m!WpafJZt;K<3d?Oo3c@8yx(ikj$<(pOOHv)jsw0fXeM z@A2jy_~Z7`RB7jVzm>e1Y?yT>4!tgU^^VU;g*!A8*(FFqY06O z&3&9_8E~FizNG@{b0il4{B0~Wwp?d5G$@++^xaiDa$0}S#MPJVb!6Pxo;jGvxM#G$ zEI?*LnIt;CR_BOiu+Hi7vDlt?!4lCk$@-A8$`SVd`8d+3g-7JWMYjQ#QVYf)RZwT% z=4QvQc>9(V)Tt?YakGur?QQi1&+WB%xP45MJA1qbGvoq0e8d397%K1B8(uq@-k!;| zn&=&VqkWRL%9U~4ZL=$-CVZtB+0i^^c@{!Yq`L`+A)hsOzJOl8I9T_JeW)(y(X%51 z=jnBt2lY=kTw&LG$Cwy#lk7l)+=vN0nFa1I!2ZzfV0#g6h`aRe4##!$H3faqLl>os z+2KF-TSh0Jpkml%!qN!cc0_|jo2T={1MS+n%OHre zi+w~I0z5TI+bO6RSF^Yv`qNH}7t**D#_41@gRY~vSa2mg%E0%#NH8wds5rJBwZd*n zxF-c68E@=+Uk$!g*U|H+Tqpgz&4(Sk?7*~)gJL7=Krl3d!?k!>0lC`3fp$VtLTKJy z5tcD%_Nsw1eXY|Mzw;OK5#-{fzdGDPAgEi{`>^pKeY$+;Q8WB?_d@Zw(H@cetTv_? zT5Wvpa*Q*G)2bNzP?7gT&)M6z)$_kR;(Y+N1-2ZLi?>*5Xomm8FUBpn$$NDkHN#>6 z?txp<=kb)v;!AF-(}=6fbq#l634oZI)?X}x>q%RMUPcm)ZvciMSo+m6muB{>M-g^T zqZZupT@UF3AM&n9(#oWMt>&9cXwhT*D)M457bTx>hAqBKR4VgZD*=*%EA52+eWRVf z`e+piG)|pNH%{XNg0t z8O>g-?$%EJVdQyQajoTtf<$$Y?*SVJVt+cblO~Olj6L7WQ8eFtuTjLfja8o)69rJj zY5RN05)wuOp@Lr}JTG9hJ!i9?uy;5_mgIU`qsdyp-CTV&(BDPiRlXvBT}m+;w8 zDmo=x^g^qz6Wr%g>Q2A-v+eEs{8Q(ONf^-eZ`$D0OcY1hBg)sVmQyZ0B2AA48U>2H7>3ESwRJ6zY6 z>sl?}J2rh&BH;$kX)kB{A*!@Mj}#&e?X0+Rq?wUb7j)coSCvpHQRM${$e62+XT{AS zRN}bk*bY%meZcA=d$}1_fV=}`G}Hl!9)nV^sURmfC>rUWRjUSHtZi~_f zzHqT&|86a@@7!v8=qRzT-RG2bCYe$voUSdokNd!;wckyz^I|gPXpAd1|EOH+gKO

          Wi*ONIP0pF?xC~_8m$za={3qS6o10aop{`G_9)%ko&SGIOJz+Qe(u}{}GTV|s zcAa`)_7t!ePXb-B%ck_i*=>9IC)XcbyK)TZt1W4&ZNM%Iir7IQE7p#Qd?8n-i_FmP z-=Qqcbp8&2moM|9mff_p#oy9Em2nqP8EKt)^-ooJGXLE*r|$p2C9!z82wc%bEoJpc zmdRD*lu6)PJY*icu66KnJ2<-Cc*JbR_Bq@YJD3*X9oFk;Cx*hBZwUkW5civ$iN2LI zvwpu8e$fmP_^u~TVSBe=b?16C!bY5y>*prhvS0w)Ir}2aNQj-gk#6i{mOoGs(FCja zKOIvN=Z+}}!>thwpl-$^xi6cy)_B7ciN4#`t^G;M%gfrIx(JdfH-<+>_N>47r1bdj zOsqd2dBXlW+7;WmQ*wTK{e-2J@@hGp$pQZ3@4h#iC){jR)-Fa6lz@No;OYi>n4Ul0 zo5OP)8tnq));j<5(!y|2&L2X+%jJ?O{#qBQz6JhY`DR^T`gde%T=U-Z z2(-zQS{(B`bg`QC8#$Yt*D%En0KR_=%i%ArpLBbsD@IET>$GxV{1str2=P#^Kb!)3 z8{?vtVjy_$rNi@}hQT-J(%D)2>`kW5tCN#xpA>ZIFS$TZl9Q<0PuV2)8EpX^YI9-lUd< zTe8BpbH!e1*g&AvUj{_nrH{}l!F$v9OV=S5Q+)ub%EU*e$~`TBl}z;uCBe3xslbXd<~ zFZiWzYBww;ewZxwuDmmUv0GW3T!9tS1eIq)b7Nb!~+0Il6APF69kf zN2xsg+@=k_4xo{Zf7~YhEdV*It=wM$7`nZrJGnl!blA6+f<5YQ$*=%Upra!Nvie1~ z4ZdMNbBaI zhbfxQ+As!S@s)eCtF`tN1Oy#hNA^dWnoM}>(k3Z3;aK9!8lU$_y_h*NZtIt)!yz~hIo4v1$iV2V}sxZ@_uzQpMXV^Ll>lS{&`SXCGZR@-955e!Fp$z z0^U*_h};VnDWiB{ZiKGJc|eICUo6=&eITPlI;Fp-MSuw1e%kQv$gCx2LpPwQ>#*f` z?spE)CE&Akc?MXC7|yTHR)G320g|rvZ$gsn4f*Rc=zean722v@s=E1BNiHDbrcHzpW42|pX&Gh zAEgrBR(4q-D?|vHNsfrhI<_+2*;_Uxm61I{2xT4Q*jvM1m5{BhI7W6H<9og6{dxTU zf#2)>sCVae&b_Yty081XuIF{#x6toBAn4Zgix5w-u0=fyJb_xEfJ%aNu_QoN7aI2T zYubjik3bo*vDEsDUhRmiTrSZmfmXgLmA9oPU-1H<3BU0Rz!(4?bOVp)$kR>rsGB0N zn#tJyS;RBVIC84GjSQ(T*1Wg|`lgFVs|{s6ip0}+84L-tHT3ZFlKa%GRyG<9+xmPCzQBU8NO5MSgywWk8 z7D_ExU0`W;?`;*eNHnLDAg8>#1xIpEreUlMN-igfmnk&p1`P;Xg?xa1h zjrx!-g>#ktWSx6|;+&StzEUr_G^Z#`l4NoG(mI+b|%w;m_*D9}$n*SVH#q3qUk$ zg=9L$XiT4Bo;Z!~cMWdWWR$-VHIu)$=RSrftPslu^C8H%BH^oPz|O-#d?UVpTTSD8 zH6lfadn-Imyx6_rXj;>s&c{xHl($yCn*l5KinM>i7MH?t2K8}29o)5io<_O5$jojQ zI?8oyV_gtc{p&!Y%AtOMsud*Ih_%o&zDdSNnV5_Map_4q;zkMqIRR~LLp;XA0%-{Z ziUW`*F}7{5;W``m9$RQUke36vGcECq46wG=lmZH1fqhci}#v!Q?^MXhD@&xRTGbT>S% zN+`QfPj?p)W9~IZqv_UD3%f>RAAvGM0$PfBe1tj5w$m%BhH!`B(fp(@!lafz)Km!aoa~4o!D@C#p!I42g>R^7 zj5rXqrl!0ce);pR*CI-Qw@3 z0`Exeyz>@?y~S4qc53ij*ZB#Mf(=bXM~B8gzq=udWR<&w+YUIbV)oxiDsU4y7c#HA zq%J&j9!prnM4z!Yhms-yG{y2HI~v;j;IP5yb$gejmQesROq@7N?kw8v_E>g+Xr+MS z;%plo&v#r*Yh!qqBH#wo^U~K8DY1GAq-R5AfmF%3{13$}7j=G*=|zYnV6%2?wjwt# z7}bS*@407a`5M^(Zr>)-Osywrxly1;N?S@S@&Kl%QgXIxRQ|npZ-O3~OUANQ1E5=7 z2)gy<=4OfNkp;4(3hhbsN&Il!Z#I>T5tl*i)Fl<>CWB%Y3*(Kw1^?<=3;=N%F`J|J zN98JuQ72(D*xfPm`$O37Qs?%Bul2Pvx1C4}Py+5Z<%-b>`8ch0EicF%DSR`JRUrG$ zx4p;A$*y3<1er8+P{_%977~AUCbC@@Mmot5YJ(~!I`uJ^?n%#x+|sQ}-@{aTFZ1H` zX~-4dKG4^JWw2`4bG1+IKwNF;u#q7ztpK~q6#1LuRY}4Z3c31hnxqWJs<0q!m)J*I zv-sIFv*Iy)ss!!l1Kh89BFqLfMC&M1uBHdse5XWI26&OgbN&?iMBHC%N|UDK4hbVF zIw$xzxM{-CZQDQx=v|kL|76DX8IOYh{>l;l9nH{Tn5oMTc*$8nP_QZ*#-=re$E_sKoTM=g zepyOhQTgsOJ`w}5XaUJJ*Tusq9Gkun9f(*hx&dfE#^8gRibWQfCJBk+%cFve%#$5 zp(~AKHiA-5GgTAVVBsH%eXWO-5C$m+JQfnw1)e z2<#^}`VRE!)1)u4AJloe00y@PsQN@Ow(MJ(x}!qtJxPV2Js?}0T4#w39^GZxOC?3@ zyi|aM-4(5+kvCc99xe)U63rL&)+&Lme&9v-j6u^Nf@ip<{H?y8o`BSe`-K-r4ceuJ zN*pdyxV&}HX!g@-P|l3ns;~5quH>GbBO+^d9-rE_J2tET zo@vx8wkrM=sb>Mx_q|M$_j}jiBA%DIsw%Uv?$5Jl5yPf?6boQ8?z}i={F&dca9YiL zeCk{fSh|h>V&gFbFDf^s=(xB8;Et}GCCQMLY4zLG1kg= zuJLzJtg@6N(z>!HW#iu^CR;&TOqceh_sf0Xx}Th#c)uF5W^M%m0;s_O0WQR$!6Vri z);3gWcvHmlUF}cXRt(>|!a>F|r$Yv?NMqO3D_nW?s-~E^4sB8ii`(T>h8}kQmS882 zEq57uRbrF2-t7t47j#;-SSWQsux+eO(N-YfHh@hk_v;L-Vw-oy_+e?;7dV7#zw?yU zMP9O$vpm6#$T?#Q3a7bT79=&|YZA+7bP;U3@hSrD!oE!t1$M8C5ji^24T9E+@)!ZS zX=u+x0kKQ@FC+*ebtD^LJhT zBaHl!7cv{Y8xHX*ni&G#3;G?4kE|PCUcdo=reCdI+wA~DxON%O<{c5hf_7XSTD}30 zK;Ox=f%vlY)9aiV4L=-5&D| z0*pAusol>Dv^h@eHj6&^>4d!Ap^rd>($x80>R~shENqJE(}w}V%Lw}N@;8(gX5akv=N;w|Wui37;e) z&2Jaj%o$nzAVM@pMI6;8!zz+p_VF9Du$eak)*ns;Cn5*DhWV6rc&~)c){viyT(DRs z7S0feVwWiPqoZN0zQHqHnj)ily{j_rT#(P*4`N!1L|Qzg^&8p*ek+dUbbiOWPhDwE zu0&+F)A%&4g3tD^{FnL>07ho$M4g2C`!gL$(f54qIX((-^;4XZPgVM4Qfg)~Vp)97 zh7*;+M4hhhKWFsE?Sf>9u#=5=G6MBI2NZ8i=;b?*fLfdep)!l3j~5DWkapin;RAx6 zAf`;!5#>uHM^iQkMVak|wg4tND1*bY@lHN)|3)3wC!nJkY^@StZ9&=q+IPc-5R1kJ zV-!WaZU~)E6fv9ncVHhr5n;-z{XVUe)PTo+O29P{s*n^5(`l^zM4&-+_&qfB6W3xw zPt2AkeWRS^$R||UWlf1pQ4_Z9=J@-us8@$*>}|H9^58JMr+YBeOhDTXTsfPhZo_8Q zTG>D&AF$H;XxqOHH-CkVD!m_mr`;by$9+%dF$8`PTCjznxx0J=!*Ow3IF_^PWT#-06Tpfsd9> zzYf#W!<}@vg1d(C8;v75CJ`+3j-$S zS_1Y2NXTOCb@u)HTBsQ`WjOEmtuC8%CnwV?2>mBP|M5Z^Z(cZrV0g!= z1aN-}zro6XU|oAbI_LGS*4a)*YAHZ5^GjtpixE)a^Mr!Z?j>ev%_wk;?Jexz{!;@= z&)nKxCcZ}I`5vaMuj~-k#0>cC4!M|Xd2?5jFf zf2(BvMjFUj0#P=Dh<%I0NK0cflZbs5w*TV3Yj<5UH~u}0GVk>|V=1qL`eRg{w!y*S z9-3MM;p2E;|1%I%txsR@Z-rcQc1H)|e_a*(8=4N=9XLv-vYtr{?tL7L(8>z|4RN|2 zhe&4W)}7CC=gR-!5Tv`q$@e{61dtiyo-w-raB&5R6^JP{5Jn@DXohxlkTZzzlf#q@ zH4vRXa0ZVMM}~vU5CqP*J~~}|*@r_HFX zTxemowsGw`0@&eE(XmdsivSWdJ~Tw{E?T0^&yTm%#8esxE_t0(`eb8g5A>M1R?+m+ z4nP67Ukn48-j7ZS%XmfO;2@*H9Dk2aJlX*(-dmJ{Z`FQ~q?7dd&UG$xbP5TH@DprP z{}}k^Vgy2MXSr!A4WwGRt&2+35dD2dt;cH}O&6B3?zO%VXRe34op?t#8aGgaSN71b zi9W|nfi3SUDlz5xc1s(QPRyp;?n79oa1*vk*MqQfj3)AI%kia+xaUb^pePe??3c0l z(o$Ut4ETU5rz>N5!Wg@EYuUp89stRM$2Utj1%<`(PAkT*{z&3%yVdvREm*qZf!?q7 zJX0;+$hKkc43zk@UXGG(mG%CIO2RW@4g)Py0g*m_imA~D)_iSsT3PC- zbgJRDsI447Hj5aK78N+uEY(z@qgYwn3+Q<=`Rmy0!^yVP2-N!K9SgBN|4nPhWz)jT zeAV`7yA#T?N`J9chKM{!K8b_-A`mnO9#wH%T;zXIcmBi4cu;*nHa_-&o7JE} zBTr{t-r;_8_`$Tf^Cl&j!&J!m5AdYlftF&t79oBdRkx|G%>+GJ&-xUP`HyCrkMh7? z{AAsV=sSl+2xf+^60?KidiY76WS$Z_a1M9tKeRTWDn4uOQ+NGYI|3AzhDt8pwaTIQ z^2PjBEFGM-R(}3o-Mo`90=t5L@CpBTWA$p(kQ{&COZYlSxpVz2q}d+!Hzo< z5GGZ6U;T&%8mj-JVL|)j$d_3CEF`M&hzf}YZX}BFeJAw{?)-i6uKGAT5^Qj2Bp<#@ z*NVAHfqgMg9W)}eqW>RBg%u7)PF_ZTP=u3 zvd~p5ynwDkcrT5ffL9L4|<;5Qo{vSlsJ}ghb5TSp9T@=q{qVZp@WV0!7 zESaOGv#BN}b^2TIJ;J*8ZLHg2g^Thf1)<6zBMpYIYkq5jhi3%fIah2$Vv%xE@*)J{V=|H~q&Y^u5vsO$> zN&AlX2U?@kGx-bcE0i znaN_f46vz%qTv94sHY*>cK8v?62|Z>1Wm;r)NgH%KYd6=RRi^ULweu|c4#T`0;%lM zi=nDz0C`G>S>+DAFDxoSwwCV_*aZMlLu{Spfv+5>#P4omtm2}CurlP{Lz>6BAJ^J5 z!6r--betg*iKGztE^#?G3zAY6H5LIBZvt2?s_4tBPbTDd#F@e?8%ob^C3PYH3s*K< zIq(v%w}NhPFCv4rLYMsrHAD3QP#|kq`tv^AvL&bwu4UzVWe>Sb+Jc3sUOhd%FbMQT zBK<(iR`B0L(S`ZML!#l0df~<(62(pLc+R$+()C{AoP12z9L2{+5rBtOB&AdI0zL)l zb7K`89Akc|l@QmrQ3l z1oc*qpQw~H(^P18zj7tQf1EczbA93pfR|?jD zdV5C)w5KE1<(*RWK1y!2RB4drU6`AivRPpDf$LHltbtYGhzEuI5C(VwY3Y zVj(n4F7!2&9-*dPd2P> zukOz{zM|Owx6??E_ZcexqB-`n z3TA)bKm4-iSi}5h`|bT@p{1m)1t*x(?c%&D=Dy`VxmZ(i?EZsb z49~k0mVun+^>7U1%{QkSxx$BYSbZf!w(2}q>bDAGrzYR^nokXTIJ9U}?pK#r@877_ zr-N#9a?AQQ;^2?m*WDJXYze8jIG?lb&Z`^aS8ghZ+zAv@`%+rXmaj~WP2{5rAXR1F z7!MVnhapaiH@%T6VDlOG)^&aMM^c;O#W{H-T1e}z)Y`$zdj!XJ_WWv>>-ZOyv)5AS ze3uuOX?@#@Z1Qmp{O?Y*Yd+zevP8*k&FbV&KW3ab`+x>-WxUcnr|jEVmMuE@tK;_W z#*d;ava;RY+bE^o0pxrsf}8}`{sRXoJO#Xe(IC4k7IVGt<0ge{vkB|Iu9oSZ0IgoA z$F&U^L2tr@OKYfBfetUK+s%<(akcIA1j@U&H^O&vV`7R{{I-m~Y+(4s7obi}R;$zb zH@bJy{-OA}w`3a@Tb0wR*O^F!lY861m(%dTxGBj3&A5%_Pu}JVf6oY zN@UjZqaq?A_)T~dXM2Ard~ze7fmyt+EPaVm=3)$D|-@s=mZ37e3+k{ocy)7_qvu={e9y1E}BWp0G!qvPIL6DNGdA13jKmOQlE43b_E56 zMN>w)*op>S*Mb`b>-fFzd*3^T-^m#m82RgW=RtJ=p(<8efs&E;yLZY&EyKEtcxR3Y zJ87#b+Q;v%X$KLgOWe7rH{eNfz1i-2b^~G^O;9U$FRAllWVI<-)cxZji6Ffj0HioX%XH&*R|w8}i4ui<<3 zyHNEsYJu5OFW#a6JK#tghf?IW=zh{!7fhMXu3XKwMIk%GHOchaRBx}<6v>A&!0OR1(k_L!9=V5i7^_67u z+x^$h%{R*coT~&p8dCXkl6cI7;Wz_6?Q{)$Q+?p`Jbmg_^+8fiDx%AuTMYw1oP{$C z$#(C*Z>epuJlr>J871@SQ|6fOt2tFwo=CYyKQ*m&kp%620Rq2!;5^t7p>&j=ES~kk z1(W*@{zbKYozF6bZg>tPT-67nM;{Rk-SH`&d{U7P ztPZ1`2Q^<@kecOd=G9z#6@+p-TBLp;GqwBJpv}(0KnDBwlJ&*1tmkHA=(d8-7Mj&D zp<46taIa*6;RKE#&z9}AyN{1;iR{$xwbtOhD1{{`r&;JK&dO8RE%{MGAC^5Z-j9}$ z{J`f?LABL!&;H!ypw7v!ts;54P3s)4^E>tXJ5ZwJtCYKEelle`kHy5Dp&_C+iqzF@ z;+)FI(mOWOtg#0&F%M!`eYXv}RV}}-tT+K*{%8RqgESUc;;ArTGD++DG{i>!jyn;y zeT^+MDC3eb3zfQI}VJ3gc-FfHd6Nkrqk5zVu->Rz^u(xVZ$@ zWcoOLFT*UZ4K+4-qt}GQQKxTKK^cDiKrl;*vvGql*Ikstxi*EoCPd+|1C1|t|Kuk& zUm5+@82f8g8uHZ4OAw2DnxXOs%5c(0vT4>5PI4`1N2x$32dvWX zy_?Ygr@OUk==!0m-F}I3!0{Yz+qwDoGaG{O0xUu-@$(tFn^FWx@=X)lhKq>YC_y=G zY`=(TAd?fZJfW)f)HCw-G^cq6GqveyEwa=eZ^Vp8eiK*ug7%NSwuWL7DI(h85q&#< z*!1CtZ%q*vLZ)~4UIlex7vRtfUhi`RpY3&bbot=xZ}Fn|xb=2pg3H?NzI}@4WPRPP z5gC@;nzbCfndte7`4#q%TvLvgawfMHvc*x9=~vxU6nBJ=C)NfEt}(l~Oc!KvIOp-X z%_DOvd7^ttioIvR{<#f7jP6qn6BvJtgyVGc$Vm9A&K)Yr<)|)1Pl&xgp(a@9RDa7V z%ixS$NSBCGp zbnUccNnZX6KAwn)vaU$6k-TzEn+tx}$vsf~LRUCATjx1t`dPQ_^*%cyVTEC7gY!-2 z)+A;-&}zi44sL5K_)&y>2)CR`8RdPkeK-+u`0r8c7$BY+HMQyL z30VWu>V2jHQ~O8xNI-YG(wwcVp5bC}HsPZCur@~c{QMOPk@?slH6M8*wSJYrr;Mk^ z6O*C41fq6Rsya=5<0I|MWPJVSSNwf_Y??dZdY(-QK$s8G3PQF>KpPoh>2$O&V5>3`tW;FVul-j-)O61 zwI0ZicJQDdSv^A)O6sv%t=Kg1`nX$Nh}eAm|K@*~!OyX#hH}xp!0(CS(AS@%wJql% z9}*ZLFREnc=Z9C$IwI#GAWLAZd-0U-r9GgjPRl8*HF*Gu>f;fUR9|t+lA%*D;cI!^ z<1lM=od?JYO(&bf$bKeAI~F>GH4TS)HcboNB|Bw$kCUCS@)`Zd5_aeplXPcYI_c{6 zx-PN^4I5iu)=k)&c!Mw6p;oPD@s{QbDU&89H#Nn#{AezYL)zpg43H0gGFz&sAq5j9 znAO34+14(;gagW(rnp+NUa9ba88$auym(P??$PnLZ{1O?N^A_KtKzHG?8&8O%dS$z zrIoQ;(bkBtNwhm}we_ayO|9bO(EQl^JQT<)JmgH8_-c0M8Z(EA;bI>vt|I;4ae{~) zo@h6>AWJF1;?iIA3Zig5mIY?()nud#~K1T%6o-nHK&G>^p8jLS)o@gIUo$8vxW!i&oyaGKzDeH0@DopX?TXZz zr6t!;Z6&bAR%Fxs9LYs@eeb}2mc}c?K{vRiy zr*y-q(rs_(I z>q@3~9H82qNY{8)Mv0P2#|sX|_ogojO^+>{p<3zK?C!_a(g)xMg7Wn;yMdsN3=>RQ zlBdu2Im0}Z<(*v^o2TQ_F{v&@R$?CStGOG^o)_U@m(d_C)XmVF*Pr*zf_CXxFrON9 znVxzg>5xtEDMp5sG#xiP;?k(9ph=yju@3LKGRece&Qs_sqkm)jsP}aV4s9g*5TZ6R z8d^He^b)pgCazV%_LjzAI*%*x870ZZSi_}hHs~lXhxfW zP6;&IogMQJnfa$P%SC*i`mkH)C#j>diTkxxm5^g?e(ZnD(Cfsldk`lk&HFque%0?B zVB>1}QTEO{gHa$c)GhB6IB?m~0o#79s! zJ=Mz9Wu};3^%WqrXz>Gt+kw+4Sg)QP$gKjlcyosmNM|ibt9PKKW$FuJ0=^4_1@p2p z!_9t488cGnVfH zrYF*q^_|Lyp2sw!7U)(8FIGP8`A0`tz#UD+wWVi~29^r!za(sM{A@XsNoc8bsQC)p zg{vsO-PC!m62OZZNW9uz(>(PJp`qhJnz{Lw&ejl3G{~TXMIS@MO3|g+y5idi$!AnA z1+v}^GXZ{ke(Y)zbtPxc#}hAi4Bsh6*EGy zQw!HWk+$7{@lBLAdfB<{p+jsiWd2>SE9(KbgA~pfUv8Pr^#959G>xJGKMS$Om0|}P zhWFK4CecNA4Kv@doi!awiVcD^)`o<-Qvq*L$>XfOZ}$eCJ=9t+gQiN0jbDJKd1Yr+ zl7jmPbM(XncjLuB3JvHC_^UH-ji}>uZ)LA72jf!)!^bBr z@~C5xQz;3Y-&DR21@kuiOz)qK+T|)Gr*&NqEqccemAMC`hE$~{txPNNaEOK~mKk(%V{i;u#%-4vVU4l*SS z3i>UTu3q%MNT421?axeATwGpVA8olQtM1?Iu44Sfa%auj+lt>K2T8nayq@*_L+G<- z(5D;MoWfB!@+Pq}caOz=$pPIMVPxsAGuswBG+=3DGjH9XTA2z#wRMRmyE3~%=?VQT zh%S7+(M~)0x$c|UHaCZOg-7|w(#byeq6OmETql&uXe2?&MZ3b2;@^=f(gv@mjTUK0 z%u&C9;_{5aqy>{{iOuaVIdB(<$E#n~H-r_7HHx)kF<*+?o4MFyW~o;EErf^yLM2N` zDj0#A3HZ))acfL6Rx`auGmR26QhfoTVbO|R=o+*}j5c@@W8ZoxDBq2VY*QZm>#t9D zXGy3FK!5(unPrb`VeoKYoK9ydDh5F=flwBnm<&>p2d>n2{-4jQ$;ublX| z=#BB0Wl3_Gk9^nx%W)Ryq6M+e=w|R*(Nq0kMSw$i5{`7XPOzUGUOxD*0%5}F$Z=cH zA`gEvA>yhq6BDH7iUcLY(`c@u_|MR@XA&^r!xsUA*`(M+&MC%R=G6r3-xS40+n9i} z8r04z@fJmp<1&al{3(H{5VaY9OksI+A%v39R3vNeW2z!5BObTNsK`&Ho{~brl zG*f?QRFHtF?B9`Fq2!HIIHwq9{`Q2DIx>Nq#PgrEzvuH!ICHWKHTxV=vvu+13emew6(Eq_`n)Y4w^A}#wR;m^B zrs_P?AXj-hjf-pzt;phiJ9v9f!9qpW*oeOaiyhEpEYPvx;_wT7W@W|VV2R$m`@G{X zAzuEJ>c82F#Uxk^u90dXRwq32_fd+*3h@>~UsGC*2md0S0qFB2p4ZXwbs)hwIY1qS x$29Wg-;p;e&Haaya;yG_LnQK`{_=!r#y)F;ia|jh#oRnn5Op0+URE@H^nY6dK6wBD literal 0 HcmV?d00001 diff --git a/docs/src/userhtml.tex b/docs/src/userhtml.tex index eb1d7943..770f9194 100644 --- a/docs/src/userhtml.tex +++ b/docs/src/userhtml.tex @@ -18,8 +18,6 @@ \newtheorem{corollary}{Corollary} \usepackage{listings} \usepackage{algorithm2e} -\usepackage{minted} -\usemintedstyle{friendly} \usepackage{microtype} \ifpdf \newmintinline[fortinline]{fortran}{} From 497cd3101801585ea2879bb60297dd6424cc49ba Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 16 Jul 2024 13:14:02 +0200 Subject: [PATCH 114/116] Fix configure --- configure | 1 - 1 file changed, 1 deletion(-) diff --git a/configure b/configure index 0832aa87..0c59d9fd 100755 --- a/configure +++ b/configure @@ -7550,7 +7550,6 @@ fi if test x"$pac_cv_serial_mpi" == x"yes" ; then FDEFINES="$psblas_cv_define_prepend-DSERIAL_MPI $psblas_cv_define_prepend-DMPI_MOD $FDEFINES"; CDEFINES="-DSERIAL_MPI $CDEFINES" - CXXDEFINES="-DSERIAL_MPI $CXXDEFINES" else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking MPI Fortran 2008 interface" >&5 printf %s "checking MPI Fortran 2008 interface... " >&6; } From 13a402031e4f5c650aaac92b4a645c641975547f Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 16 Jul 2024 13:20:36 +0200 Subject: [PATCH 115/116] Fixed docs. --- docs/psblas-3.9.pdf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/psblas-3.9.pdf b/docs/psblas-3.9.pdf index 5140f9d2..6d104b9d 100644 --- a/docs/psblas-3.9.pdf +++ b/docs/psblas-3.9.pdf @@ -37021,8 +37021,8 @@ endobj 2388 0 obj << /Title (Parallel Sparse BLAS V. 3.9.0) /Subject (Parallel Sparse Basic Linear Algebra Subroutines) /Keywords (Computer Science Linear Algebra Fluid Dynamics Parallel Linux MPI PSBLAS Iterative Solvers Preconditioners) /Creator (pdfLaTeX) /Producer ($Id$) /Author()/Title()/Subject()/Creator(LaTeX with hyperref)/Keywords() -/CreationDate (D:20240711130925+02'00') -/ModDate (D:20240711130925+02'00') +/CreationDate (D:20240716132003+02'00') +/ModDate (D:20240716132003+02'00') /Trapped /False /PTEX.Fullbanner (This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023/Fedora 40) kpathsea version 6.3.5) >> @@ -37162,7 +37162,7 @@ endobj /W [1 3 1] /Root 2387 0 R /Info 2388 0 R -/ID [ ] +/ID [<47BB61B7C67E5662CD653809E7A13A98> <47BB61B7C67E5662CD653809E7A13A98>] /Length 11950 >> stream From 08a69985c861726f26e773f76eecce8f773a803d Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 16 Jul 2024 13:22:47 +0200 Subject: [PATCH 116/116] Take out unneeded file --- docs/src/userguide.pdf | 1 - 1 file changed, 1 deletion(-) delete mode 120000 docs/src/userguide.pdf diff --git a/docs/src/userguide.pdf b/docs/src/userguide.pdf deleted file mode 120000 index 7b032aa3..00000000 --- a/docs/src/userguide.pdf +++ /dev/null @@ -1 +0,0 @@ -tmp/userguide.pdf \ No newline at end of file